Commit 227cd025 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

add functional for store only unique news(not duplicate)

parent eed7af2e
...@@ -13,9 +13,9 @@ db = get_project_settings().get('DB') ...@@ -13,9 +13,9 @@ db = get_project_settings().get('DB')
class ExaPipeline(object): class ExaPipeline(object):
def __init__(self): def __init__(self):
# self.out = open('out/out{}.txt'.format(datetime.now()), 'w', newline='\n')
self.db = Database(**db) self.db = Database(**db)
self.urls = set(self.db.select('select url from wp_esi_accept')) self.urls = {i[0] for i in self.db.select('select url from wp_esi_news_accept')}
print(self.urls)
super(ExaPipeline, self).__init__() super(ExaPipeline, self).__init__()
def process_item(self, item, spider): def process_item(self, item, spider):
...@@ -26,11 +26,10 @@ class ExaPipeline(object): ...@@ -26,11 +26,10 @@ class ExaPipeline(object):
query = """INSERT INTO wp_esi_news_accept (title, description, URL, media_id, type_id, region_id, post_id, query = """INSERT INTO wp_esi_news_accept (title, description, URL, media_id, type_id, region_id, post_id,
publish_date, record_date, company_id, is_accepted) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n""" publish_date, record_date, company_id, is_accepted) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n"""
print(item) print(item)
n = self.db.select("select url from wp_esi_news_accept where url={}".format(item['url'])) if item['url'] in self.urls:
if len(n) == 0: print("DUPLICATE")
news = self.db.insert(query, data)
else: else:
print('Duplicate') self.db.insert(query, data)
# self._insert_news_entiry(news, item['company_id']) # self._insert_news_entiry(news, item['company_id'])
# self.out.write(query) # self.out.write(query)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment