Commit b17357f1 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add select url that we already scraped from db to redis

parent d23cdbbe
...@@ -16,7 +16,7 @@ class ExaPipeline(object): ...@@ -16,7 +16,7 @@ class ExaPipeline(object):
def __init__(self): def __init__(self):
self.db = Database(**db) self.db = Database(**db)
self.buffer = redis.StrictRedis() self.buffer = redis.StrictRedis()
for i in (for i in self.db.select('select url from wp_esi_news_accept')): for i in self.db.select('select url from wp_esi_news_accept'):
self.buffer.set(i, True) self.buffer.set(i, True)
self.urls = {i[0] for i in self.db.select('select url from wp_esi_news_accept')} self.urls = {i[0] for i in self.db.select('select url from wp_esi_news_accept')}
super(ExaPipeline, self).__init__() super(ExaPipeline, self).__init__()
...@@ -50,10 +50,11 @@ class ExaPipeline(object): ...@@ -50,10 +50,11 @@ class ExaPipeline(object):
self.add_url_to_block(item['url']) self.add_url_to_block(item['url'])
def check_url(self, url): def check_url(self, url):
if url in self.urls: if bool(self.buffer.get(url)):
print("NOT")
return True return True
else: else:
return False return False
def add_url_to_block(self, url): def add_url_to_block(self, url):
self.urls.add(url) self.buffer.set(url, True)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment