Commit 7058f8c2 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add data from alias table to MLData creator for creating more and better data

parent b577fc65
...@@ -7,9 +7,11 @@ MLData = namedtuple('MLData', 'news_id, tags_id') ...@@ -7,9 +7,11 @@ MLData = namedtuple('MLData', 'news_id, tags_id')
class MLDataMaker: class MLDataMaker:
def __init__(self, tags_list): def __init__(self, db):
self.db = db
self.tags = [Tags(id=i[0], tags=self.split_tags(i[1], '/() ')) for i in tags_list] self.tags = [Tags(id=i[0], tags=self.split_tags(i[1], '/() ')) for i in self.db.select('select id, name from wp_esi_tag')]
self.alias = self._make_alias_dict(self.db.select('select tag_id, alias from wp_esi_tag_alias'))
def split_tags(self, tags, char, replace=' '): def split_tags(self, tags, char, replace=' '):
if len(char) > 1: if len(char) > 1:
...@@ -35,7 +37,22 @@ class MLDataMaker: ...@@ -35,7 +37,22 @@ class MLDataMaker:
for i in self.tags: for i in self.tags:
if self.is_tag_similar(i.tags, news_tag): if self.is_tag_similar(i.tags, news_tag):
ml_data.append(MLData(news.id, i.id)) ml_data.append(MLData(news.id, i.id))
for k, v in self.alias.items():
if self.is_tag_similar(v, news_tag):
print('MLA')
ml_data.append(MLData(news.id, k))
print(ml_data)
if len(ml_data) > 0: if len(ml_data) > 0:
return ml_data return ml_data
else: else:
return None return None
def _make_alias_dict(self, alias):
result = dict()
for i in alias:
if i[0] in result:
result[i[0]].append(i[1])
else:
result[i[0]] = [i[1]]
print(result)
return result
...@@ -29,7 +29,7 @@ class ExaPipeline(object): ...@@ -29,7 +29,7 @@ class ExaPipeline(object):
self.classifier = Classifier() self.classifier = Classifier()
self.classifier.teach_model() self.classifier.teach_model()
self.buffer = redis.StrictRedis() self.buffer = redis.StrictRedis()
self.ml_data = MLDataMaker(self.db.select('select id, name from wp_esi_tag')) self.ml_data = MLDataMaker(self.db)
for i in self.db.select('select url from wp_esi_news_accept'): for i in self.db.select('select url from wp_esi_news_accept'):
self.buffer.set(i[0], True) self.buffer.set(i[0], True)
super(ExaPipeline, self).__init__() super(ExaPipeline, self).__init__()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment