Commit 8992ba05 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Fix bug in News tuple, tags and add lower

parent b06dcee8
...@@ -2,7 +2,7 @@ from collections import namedtuple ...@@ -2,7 +2,7 @@ from collections import namedtuple
Tags = namedtuple('Tag', 'id, tags') Tags = namedtuple('Tag', 'id, tags')
News = namedtuple('News', 'id', 'tags') News = namedtuple('News', 'id, tags')
MLData = namedtuple('MLData', 'news_id, tags_id') MLData = namedtuple('MLData', 'news_id, tags_id')
...@@ -22,7 +22,7 @@ class MLDataMaker: ...@@ -22,7 +22,7 @@ class MLDataMaker:
def is_tag_similar(self, tags, news_tags): def is_tag_similar(self, tags, news_tags):
count = 0 count = 0
for i in tags: for i in tags:
if i in news_tags: if i.lower() in news_tags.lower():
count += 1 count += 1
if count == 0 or count < len(tags) // 2: if count == 0 or count < len(tags) // 2:
return False return False
...@@ -32,16 +32,6 @@ class MLDataMaker: ...@@ -32,16 +32,6 @@ class MLDataMaker:
def make_ml_data(self, news): def make_ml_data(self, news):
ml_data = list() ml_data = list()
for i in self.tags: for i in self.tags:
if self.is_tag_similar(i, news.tags): if self.is_tag_similar(i.tags, news.tags):
ml_data.append([news.id, i.id]) ml_data.append([news.id, i.id])
return ml_data return ml_data
\ No newline at end of file
if __name__ == '__main__':
from scrapy.utils.project import get_project_settings
from .db import Database
db_settings = get_project_settings().get('DB')
db = Database(**db_settings)
tags = db.select('select id, name from wp_esi_tag')
print(tags)
ml = MLDataMaker(tags)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment