Commit 7058f8c2 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add data from alias table to MLData creator for creating more and better data

parent b577fc65
......@@ -7,9 +7,11 @@ MLData = namedtuple('MLData', 'news_id, tags_id')
class MLDataMaker:
def __init__(self, tags_list):
def __init__(self, db):
self.db = db
self.tags = [Tags(id=i[0], tags=self.split_tags(i[1], '/() ')) for i in tags_list]
self.tags = [Tags(id=i[0], tags=self.split_tags(i[1], '/() ')) for i in self.db.select('select id, name from wp_esi_tag')]
self.alias = self._make_alias_dict(self.db.select('select tag_id, alias from wp_esi_tag_alias'))
def split_tags(self, tags, char, replace=' '):
if len(char) > 1:
......@@ -35,7 +37,22 @@ class MLDataMaker:
for i in self.tags:
if self.is_tag_similar(i.tags, news_tag):
ml_data.append(MLData(news.id, i.id))
for k, v in self.alias.items():
if self.is_tag_similar(v, news_tag):
print('MLA')
ml_data.append(MLData(news.id, k))
print(ml_data)
if len(ml_data) > 0:
return ml_data
else:
return None
def _make_alias_dict(self, alias):
result = dict()
for i in alias:
if i[0] in result:
result[i[0]].append(i[1])
else:
result[i[0]] = [i[1]]
print(result)
return result
......@@ -29,7 +29,7 @@ class ExaPipeline(object):
self.classifier = Classifier()
self.classifier.teach_model()
self.buffer = redis.StrictRedis()
self.ml_data = MLDataMaker(self.db.select('select id, name from wp_esi_tag'))
self.ml_data = MLDataMaker(self.db)
for i in self.db.select('select url from wp_esi_news_accept'):
self.buffer.set(i[0], True)
super(ExaPipeline, self).__init__()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment