from datetime import datetime from urllib.request import urlopen from scrapyd_api import ScrapydAPI from .models import Job PROJECT = 'exa' SCRAPYD_HOST = 'http://127.0.0.1:6800' LOG_PATH = '/logs/' + PROJECT class Service: def __init__(self): self.api = ScrapydAPI() def update_jobs_status(self): try: jobs = self.api.list_jobs(PROJECT) finished = jobs.get('finished') if finished: for i in finished: Job.objects.filter(job_uuid=i['id'], status=Job.RUNNING).update(status=Job.FINISHED, end_time=i['end_time']) except: pass def run_job(self, job): job = Job.objects.get(pk=job) job.job_uuid = self._run(job) job.start_time = datetime.now() job.status = Job.RUNNING job.save() return job.json() def cancel_job(self, job): job = Job.objects.get(pk=job) self.api.cancel(PROJECT, job.job_uuid) job.end_time = datetime.now() job.status = Job.FINISHED job.save() return job.json() def delete_job(self, job): job = Job.objects.get(pk=job) job.status = Job.DELETE job.save() return job.json() def activate_job(self, job): job = Job.objects.get(pk=job) job.start_time = None job.end_time = None job.status = Job.PENDING job.save() return job.json() def restart_job(self, job): job = Job.objects.get(pk=job) job.job_uuid = self._run(job) job.start_time = datetime.now() job.end_time = None job.status = Job.RUNNING job.save() return job.json() def get_log(self, job): job = Job.objects.get(pk=job) url = SCRAPYD_HOST + LOG_PATH + '/{}/{}.log'.format(job.spider.sp_name, job.job_uuid) response = urlopen(url) return response.read() def _run(self, job): return self.api.schedule(PROJECT, job.spider.sp_name, query=job.query)