Commit 50b27c03 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add mixin to spider

parent 5a897b66
......@@ -2,31 +2,23 @@
import scrapy
import dateparser
from scrapy.utils.project import get_project_settings
from ..helpers import CompanyMaker, Database
from ..helpers import CompanyMaker, Database, QueryMixin
from ..items import ExaItem
db_settings = get_project_settings().get('DB')
db = Database(**db_settings)
class AitopSpider(scrapy.Spider):
class AitopSpider(QueryMixin, scrapy.Spider):
name = "aitop"
allowed_domains = ["aitopics.org"]
def __init__(self, *args, **kwargs):
self.condition = kwargs.get('query')
self.query = "SELECT id, name, country FROM wp_esi_entity WHERE 1 and id=13"
if self.condition:
print(self.condition)
self.query += ' or {}'.format(self.condition)
super(AitopSpider, self).__init__()
def start_requests(self):
companies = CompanyMaker(db.select(self.query))
companies.make_companies(self.name)
for i in companies.get_companies():
try:
yield scrapy.Request(i.url + '/', callback=self.parse, meta={'company': i, 'post_id': 0})
yield scrapy.Request(i.url, callback=self.parse, meta={'company': i, 'post_id': 0})
except:
pass
......@@ -102,6 +94,4 @@ class AitopSpider(scrapy.Spider):
else:
return None
def get_common_items(self, company):
return {'region_id': company.region_id, 'type_id': company.type_id,
'media_id': company.media_id, 'company_id': company.id}
\ No newline at end of file
......@@ -3,7 +3,7 @@ import dateparser
import scrapy
import traceback
from scrapy.utils.project import get_project_settings
from ..helpers import CompanyMaker, Database
from ..helpers import CompanyMaker, Database, QueryMixin
from ..items import ExaItem
......@@ -11,19 +11,10 @@ db_settings = get_project_settings().get('DB')
db = Database(**db_settings)
class MobiHealthNewsSpider(scrapy.Spider):
class MobiHealthNewsSpider(QueryMixin, scrapy.Spider):
name = "mhn"
allowed_domains = ["www.mobihealthnews.com"]
def __init__(self, *args, **kwargs):
self.condition = kwargs.get('query')
self.query = "SELECT id, name, country FROM wp_esi_entity WHERE 1 and id=3"
if self.condition:
print(self.condition)
self.query += ' or {}'.format(self.condition)
print(self.query)
super(MobiHealthNewsSpider, self).__init__()
def start_requests(self):
companies = CompanyMaker(db.select(self.query))
companies.make_companies(self.name)
......
......@@ -2,7 +2,7 @@
import scrapy
import traceback
from scrapy.utils.project import get_project_settings
from ..helpers import CompanyMaker, Database
from ..helpers import CompanyMaker, Database, QueryMixin
from ..items import ExaItem
......@@ -10,18 +10,10 @@ db_settings = get_project_settings().get('DB')
db = Database(**db_settings)
class TechcrunchSpider(scrapy.Spider):
class TechcrunchSpider(QueryMixin, scrapy.Spider):
name = "tc"
allowed_domains = ["techcrunch.com"]
def __init__(self, *args, **kwargs):
self.condition = kwargs.get('query')
self.query = "SELECT id, name, country FROM wp_esi_entity WHERE 1 and id=13"
if self.condition:
print(self.condition)
self.query += ' or {}'.format(self.condition)
super(TechcrunchSpider, self).__init__()
def start_requests(self):
companies = CompanyMaker(db.select(self.query))
companies.make_companies(self.name)
......@@ -32,16 +24,11 @@ class TechcrunchSpider(scrapy.Spider):
pass
def parse(self, response):
print(response.request.headers)
if 'tag' in response.url:
return self.parse_tag(response)
if 'company' in response.url:
return self.parse_company(response)
def get_common_items(self, company):
return {'region_id': company.region_id, 'type_id': company.type_id,
'media_id': company.media_id, 'company_id': company.id}
def parse_tag(self, response):
try:
news_list = response.xpath("..//div[contains(@class, 'block block-thumb ')]")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment