Commit 4b39d48e authored by Tags's avatar Tags

Change model from 'frequency matrix model' into 'Direct model'

parent 1b3a12f2
...@@ -27,11 +27,11 @@ class Classifier: ...@@ -27,11 +27,11 @@ class Classifier:
self.db = _mysql.connect (host=host, port=port, user=user, passwd=password, db=db) self.db = _mysql.connect (host=host, port=port, user=user, passwd=password, db=db)
# geting tags # geting tags
self.db.query("SELECT id, name FROM wp_esi_tag") self.db.query ("SELECT id, name FROM wp_esi_tag")
rez = self.db.store_result() rez = self.db.store_result ()
# result = # result =
tags = list () tags = list ()
for id, description in rez.fetch_row(maxrows=0): for id, description in rez.fetch_row (maxrows=0):
tags.append ((id, description)) tags.append ((id, description))
self.tags = tags self.tags = tags
# print (len(tags), tags) # print (len(tags), tags)
...@@ -43,9 +43,9 @@ class Classifier: ...@@ -43,9 +43,9 @@ class Classifier:
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id LIMIT 1000 ''' ORDER BY wp_esi_tag_news.tag_id LIMIT 1000 '''
self.db.query (sql1) self.db.query (sql1)
result = self.db.store_result() result = self.db.store_result ()
data = list () data = list ()
for tag_id, title, description in result.fetch_row(maxrows=0): for tag_id, title, description in result.fetch_row (maxrows=0):
data.append ((tag_id, title, description)) data.append ((tag_id, title, description))
for tag_id, _ in self.tags: for tag_id, _ in self.tags:
...@@ -93,7 +93,7 @@ class Classifier: ...@@ -93,7 +93,7 @@ class Classifier:
for i in range (columns): for i in range (columns):
for j in range (rows): for j in range (rows):
if trained_matrix[i, j] != 0: if trained_matrix[i, j] != 0:
frequency_matrix[i, j] = 1 / trained_matrix[i, j] frequency_matrix[i, j] = trained_matrix[i, j]
# normalise rowsfrequency matrix # normalise rowsfrequency matrix
for j in range (columns): for j in range (columns):
...@@ -128,7 +128,7 @@ class Classifier: ...@@ -128,7 +128,7 @@ class Classifier:
temp_matrix = self.tag_frequency_matrix[row] temp_matrix = self.tag_frequency_matrix[row]
rez_summ = temp_matrix[self.matrix_test_data.toarray ()[0] > 0].sum () rez_summ = temp_matrix[self.matrix_test_data.toarray ()[0] > 0].sum ()
if rez_summ > (persantage / 100): if rez_summ > (persantage / 100):
vector_accordance.append ((row, int (rez_summ * 1000) / 10, self.tags[row][1])) vector_accordance.append ((int(self.tags[row][0]), int (rez_summ * 1000) / 10, self.tags[row][1]))
vector_accordance.sort (key=lambda tup: tup[1], reverse=True) vector_accordance.sort (key=lambda tup: tup[1], reverse=True)
return vector_accordance return vector_accordance
...@@ -223,35 +223,36 @@ if __name__ == "__main__": ...@@ -223,35 +223,36 @@ if __name__ == "__main__":
FROM ( FROM (
SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id ) as rez ORDER BY wp_esi_tag_news.tag_id ) AS rez
ORDER BY rand() LIMIT 15''' ORDER BY rand() LIMIT 15'''
tags_classif.db.query(sql1)
news_results = tags_classif.db.store_result() tags_classif.db.query (sql1)
news_results = news_results.fetch_row(maxrows=0) news_results = tags_classif.db.store_result ()
news_results = news_results.fetch_row (maxrows=0)
data = list () data = list ()
for i, (id_news, title, description) in enumerate (news_results): for i, (id_news, title, description) in enumerate (news_results):
text_for_analis = title.decode('ascii', 'ignore') + '\n' + description.decode('ascii', 'ignore') text_for_analis = title.decode ('ascii', 'ignore') + '\n' + description.decode ('ascii', 'ignore')
# print ("\n#", str (i)) # print ("\n#", str (i))
print ("\nNews title: " + title.decode('ascii', 'ignore')) print ("\nNews title: " + title.decode ('ascii', 'ignore'))
tags_classif.classify (text_for_analis) tags_classif.classify (text_for_analis)
print ("Model calculated Accordance (#tag, %-accordance, tag_description): ", end=" ") print ("Model calculated Accordance (#tag, %-accordance, tag_description): ", end=" ")
print (tags_classif.teg_accordance) print (tags_classif.teg_accordance)
sql = " select tag_id from wp_esi_tag_news where news_id =" + str(id_news) sql = " SELECT tag_id FROM wp_esi_tag_news WHERE news_id =" + str (id_news)
# print(sql) # print(sql)
results = tags_classif.db.query(sql) results = tags_classif.db.query (sql)
tags = tags_classif.db.store_result() tags = tags_classif.db.store_result ()
tags = tags.fetch_row(maxrows=0) tags = tags.fetch_row (maxrows=0)
# print(tags) # print(tags)
# print(tags_classif.tags) # print(tags_classif.tags)
print ("User classified tags for present news:") print ("User classified tags for present news:")
for (tag, ) in tags: for (tag,) in tags:
tag = int(tag) tag = int (tag)
# print(tag) # print(tag)
print (str(tag - 1) + " " + tags_classif.tags[tag - 1][1].decode('ascii', 'ignore')) print (str (tag) + " " + tags_classif.tags[tag-1][1].decode ('ascii', 'ignore'))
exit (0) exit (0)
tags_classif.db.query("SELECT title, description FROM wp_esi_news ORDER BY RAND() LIMIT 25") tags_classif.db.query ("SELECT title, description FROM wp_esi_news ORDER BY RAND() LIMIT 25")
result = tags_classif.db.store_result() result = tags_classif.db.store_result ()
for i, (title, description) in enumerate (result.fetch_row (maxrows=0)): for i, (title, description) in enumerate (result.fetch_row (maxrows=0)):
text_for_analis = title + '\n' + description text_for_analis = title + '\n' + description
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment