Commit c3c8d3c2 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add text file for exists

parent 280e6936
...@@ -45,13 +45,14 @@ class ExaPipeline(object): ...@@ -45,13 +45,14 @@ class ExaPipeline(object):
return item return item
def insert_news(self, item): def insert_news(self, item):
tags = self.get_tags(item['url']) article = self.get_article(item['url'])
tags = self.get_tags(article)
data = (item['title'], item['description'], item['url'], item['media_id'], item['type_id'], data = (item['title'], item['description'], item['url'], item['media_id'], item['type_id'],
item['region_id'], item['post_id'], item['date'], datetime.now().date(), item['company_id'], 0, item['region_id'], item['post_id'], item['date'], datetime.now().date(), item['company_id'], 0,
item['tags'], tags) item['tags'], tags, article)
query = """INSERT INTO wp_esi_news_accept (title, description, URL, media_id, type_id, region_id, post_id, query = """INSERT INTO wp_esi_news_accept (title, description, URL, media_id, type_id, region_id, post_id,
publish_date, record_date, company_id, is_accepted, temp_tags, tags_id) publish_date, record_date, company_id, is_accepted, temp_tags, tags_id, text)
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n""" VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n"""
self.db.insert(query, data) self.db.insert(query, data)
self.add_url_to_block(item['url']) self.add_url_to_block(item['url'])
...@@ -66,11 +67,14 @@ class ExaPipeline(object): ...@@ -66,11 +67,14 @@ class ExaPipeline(object):
def add_url_to_block(self, url): def add_url_to_block(self, url):
self.buffer.set(url, True) self.buffer.set(url, True)
def get_tags(self, url): def get_article(self, url):
article = Article(url) article = Article(url)
article.download() article.download()
article.parse() article.parse()
self.classifier.classify(article.text) return article.text
def get_tags(self, text):
self.classifier.classify(text)
tags = list() tags = list()
for i in self.classifier.teg_accordance: for i in self.classifier.teg_accordance:
tags.append(i[0]) tags.append(i[0])
......
...@@ -33,8 +33,8 @@ class NewsUpdater: ...@@ -33,8 +33,8 @@ class NewsUpdater:
self.db.update(query) self.db.update(query)
# this bad way # this bad way
def update_all(self): def update_all_tags(self):
for i in self.select_news('select id, url from wp_esi_news_accept where id> 80 and id<100'): for i in self.select_news('select id, url from wp_esi_news_accept where 1'):
try: try:
text = self.load_text(i[1]) text = self.load_text(i[1])
tags = self.get_tags(text) tags = self.get_tags(text)
...@@ -43,7 +43,16 @@ class NewsUpdater: ...@@ -43,7 +43,16 @@ class NewsUpdater:
except BaseException as e: except BaseException as e:
print(e.with_traceback()) print(e.with_traceback())
def update_all_text(self):
for i in self.select_news('select id, url from wp_esi_news_accept where id>26500'):
# try:
text = self.load_text(i[1])
self.update_news('update wp_esi_news_accept set text="{}" where id={}'.format(str(text.encode('ascii', 'ignore')), i[0]))
print('News id={} was updated'.format(i[0]))
# except BaseException as e:
# print(e.with_traceback())
if __name__ == '__main__': if __name__ == '__main__':
ml = NewsUpdater() ml = NewsUpdater()
ml.update_all() ml.update_all_text()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment