service.py 2.02 KB
Newer Older
1
from datetime import datetime
2
from urllib.request import urlopen
3
from scrapyd_api import ScrapydAPI
4
from .models import Job
5

6 7 8 9
PROJECT = 'exa'
SCRAPYD_HOST = 'http://127.0.0.1:6800'
LOG_PATH = '/logs/' + PROJECT

10 11 12

class Service:

13 14 15 16
    def __init__(self):
        self.api = ScrapydAPI()

    def update_jobs_status(self):
17
        try:
18
            jobs = self.api.list_jobs(PROJECT)
19 20 21
            finished = jobs.get('finished')
            if finished:
                for i in finished:
22
                    Job.objects.filter(job_uuid=i['id'], status=Job.RUNNING).update(status=Job.FINISHED, end_time=i['end_time'])
23 24
        except:
            pass
25 26

    def run_job(self, job):
27
        job = Job.objects.get(pk=job)
Vasyl Bodnaruk's avatar
Vasyl Bodnaruk committed
28
        job.job_uuid = self._run(job)
29
        job.start_time = datetime.now()
30
        job.status = Job.RUNNING
31
        job.save()
Vasyl Bodnaruk's avatar
Vasyl Bodnaruk committed
32
        return job.json()
33 34 35

    def cancel_job(self, job):
        job = Job.objects.get(pk=job)
36
        self.api.cancel(PROJECT, job.job_uuid)
37
        job.end_time = datetime.now()
38
        job.status = Job.FINISHED
39
        job.save()
Vasyl Bodnaruk's avatar
Vasyl Bodnaruk committed
40
        return job.json()
41 42 43

    def delete_job(self, job):
        job = Job.objects.get(pk=job)
44
        job.status = Job.DELETE
45
        job.save()
Vasyl Bodnaruk's avatar
Vasyl Bodnaruk committed
46
        return job.json()
47

48 49 50 51
    def activate_job(self, job):
        job = Job.objects.get(pk=job)
        job.start_time = None
        job.end_time = None
52
        job.status = Job.PENDING
53 54 55
        job.save()
        return job.json()

56 57
    def restart_job(self, job):
        job = Job.objects.get(pk=job)
Vasyl Bodnaruk's avatar
Vasyl Bodnaruk committed
58
        job.job_uuid = self._run(job)
59 60
        job.start_time = datetime.now()
        job.end_time = None
61
        job.status = Job.RUNNING
62 63 64
        job.save()
        return job.json()

65 66
    def get_log(self, job):
        job = Job.objects.get(pk=job)
67
        url = SCRAPYD_HOST + LOG_PATH + '/{}/{}.log'.format(job.spider.sp_name, job.job_uuid)
68 69 70
        response = urlopen(url)
        return response.read()

Vasyl Bodnaruk's avatar
Vasyl Bodnaruk committed
71
    def _run(self, job):
72
        return self.api.schedule(PROJECT, job.spider.sp_name, query=job.query, fresh=not job.scrap_old_news)