Commit 98bf7249 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Change saving news to DB

parent 76f763cb
......@@ -17,25 +17,35 @@ class ExaPipeline(object):
self.urls = {i[0] for i in self.db.select('select url from wp_esi_news_accept')}
super(ExaPipeline, self).__init__()
def open_spider(self, spider):
spider.pipeline = self
def process_item(self, item, spider):
item['title'] = ''.join(item['title']).replace('\n', ' ')
if item['description']:
item['description'] = ''.join(item['description']).replace('\n', ' ')
if item['tags']:
item['tags'] = ','.join(item['tags']).replace('\n', '').replace('\t', '')
return item
def insert_news(self, item):
data = (item['title'], item['description'], item['url'], item['media_id'], item['type_id'],
item['region_id'], item['post_id'], item['date'], datetime.now().date(), item['company_id'], 0, item['tags'])
item['region_id'], item['post_id'], item['date'], datetime.now().date(), item['company_id'], 0,
item['tags'])
query = """INSERT INTO wp_esi_news_accept (title, description, URL, media_id, type_id, region_id, post_id,
publish_date, record_date, company_id, is_accepted, temp_tags) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n"""
if item['url'] in self.urls:
publish_date, record_date, company_id, is_accepted, temp_tags) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n"""
if self.check_url(item['url']):
print("DUPLICATE", item)
else:
print("UNIQUE", item)
self.db.insert(query, data)
self.urls.add(item['url'])
return item
def check_url(self, url):
if url in self.urls:
return True
else:
return False
def _insert_news_entiry(self, news, entity):
query = 'INSERT INTO wp_esi_news_entity (news_id, entity_id) VALUES(%s, %s)'
self.db.insert(query, (news, entity))
......@@ -23,3 +23,6 @@ class BaseSpider(scrapy.Spider):
companies = CompanyMaker(db.select(self.query))
companies.make_companies(name)
return companies.get_companies()
def check_buffer(self, url):
pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment