Commit bfc6c7da authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add documentations for pipeline

parent 68235128
......@@ -18,6 +18,12 @@ db = get_project_settings().get('DB')
class ExaPipeline(object):
def __init__(self):
'''
This pipeline make
conect to DB,
create classifier for making tags
make Redis buffer for collect all news from DB
'''
self.db = Database(**db)
self.classifier = Classifier()
self.classifier.teach_model()
......@@ -58,6 +64,11 @@ class ExaPipeline(object):
self.add_url_to_block(item['url'])
def check_url(self, url):
'''
Function check url in redis buffer
:param url: url from item
:return: True if url already exist in buffer
'''
if bool(self.buffer.get(url)):
print("IN REDIS")
return True
......@@ -68,12 +79,22 @@ class ExaPipeline(object):
self.buffer.set(url, True)
def get_article(self, url):
'''
Function take text from url for processing
:param url:
:return: text from url
'''
article = Article(url)
article.download()
article.parse()
return article.text
def get_tags(self, text):
'''
Function for make tags use ML algorithm
:param text: Text for searching tags
:return: json object with tags that are relevant for text
'''
self.classifier.classify(text)
tags = list()
for i in self.classifier.teg_accordance:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment