Commit 1b3a12f2 authored by Tags's avatar Tags

Make changes according the requirements.

parent d531aef4
import sys # import sys
from _mysql import OperationalError # from _mysql import OperationalError
from pprint import pprint # from pprint import pprint
import numpy as np import numpy as np
import _mysql
# noinspection PyUnresolvedReferences # noinspection PyUnresolvedReferences
...@@ -11,7 +12,6 @@ class Classifier: ...@@ -11,7 +12,6 @@ class Classifier:
try: try:
import nltk import nltk
import operator import operator
from MySQLdb import connect
from stemming.porter2 import stem from stemming.porter2 import stem
except ImportError: except ImportError:
print ('You have import flowing packages: sklearn & nltk & re.') print ('You have import flowing packages: sklearn & nltk & re.')
...@@ -24,27 +24,29 @@ class Classifier: ...@@ -24,27 +24,29 @@ class Classifier:
return return
# Extract data form DataBase # Extract data form DataBase
conn = connect (host=host, port=port, user=user, password=password, db=db) self.db = _mysql.connect (host=host, port=port, user=user, passwd=password, db=db)
self.cursor = conn.cursor ()
#geting tags # geting tags
result = self.cursor.execute ("select id, name from wp_esi_tag") self.db.query("SELECT id, name FROM wp_esi_tag")
rez = self.db.store_result()
# result =
tags = list () tags = list ()
for id, description in self.cursor.fetchall (): for id, description in rez.fetch_row(maxrows=0):
tags.append((id, description)) tags.append ((id, description))
self.tags = tags self.tags = tags
# print (tags) # print (len(tags), tags)
del tags del tags
train_data = [] train_data = []
# text_id = [] # text_id = []
sql1 = '''select wp_esi_tag_news.tag_id, wp_esi_news.title, wp_esi_news.description sql1 = '''SELECT wp_esi_tag_news.tag_id, wp_esi_news.title, wp_esi_news.description
from wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
order by wp_esi_tag_news.tag_id LIMIT 1000 ''' ORDER BY wp_esi_tag_news.tag_id LIMIT 1000 '''
result = self.cursor.execute (sql1) self.db.query (sql1)
data = list() result = self.db.store_result()
for tag_id, title, description in self.cursor.fetchall(): data = list ()
data.append((tag_id, title, description)) for tag_id, title, description in result.fetch_row(maxrows=0):
data.append ((tag_id, title, description))
for tag_id, _ in self.tags: for tag_id, _ in self.tags:
texts = '' texts = ''
...@@ -95,13 +97,13 @@ class Classifier: ...@@ -95,13 +97,13 @@ class Classifier:
# normalise rowsfrequency matrix # normalise rowsfrequency matrix
for j in range (columns): for j in range (columns):
suma=0 suma = 0
suma = frequency_matrix[j].sum () suma = frequency_matrix[j].sum ()
if suma == 0: if suma == 0:
continue continue
for i in range (rows): for i in range (rows):
frequency_matrix[j, i] /= suma frequency_matrix[j, i] /= suma
self.tag_frequency_matrix= frequency_matrix self.tag_frequency_matrix = frequency_matrix
return frequency_matrix return frequency_matrix
def teach_model(self, data_text=''): def teach_model(self, data_text=''):
...@@ -122,12 +124,12 @@ class Classifier: ...@@ -122,12 +124,12 @@ class Classifier:
:return: tuple of % af tag accordance :return: tuple of % af tag accordance
""" """
vector_accordance = [] vector_accordance = []
for row in range(self.tag_frequency_matrix.shape[0]): for row in range (self.tag_frequency_matrix.shape[0]):
temp_matrix = self.tag_frequency_matrix[row] temp_matrix = self.tag_frequency_matrix[row]
rez_summ = temp_matrix[self.matrix_test_data.toarray()[0] > 0].sum() rez_summ = temp_matrix[self.matrix_test_data.toarray ()[0] > 0].sum ()
if rez_summ > (persantage / 100): if rez_summ > (persantage / 100):
vector_accordance.append((row, int(rez_summ*1000)/10, self.tags[row][1])) vector_accordance.append ((row, int (rez_summ * 1000) / 10, self.tags[row][1]))
vector_accordance.sort(key=lambda tup: tup[1], reverse=True) vector_accordance.sort (key=lambda tup: tup[1], reverse=True)
return vector_accordance return vector_accordance
...@@ -168,15 +170,16 @@ class Classifier: ...@@ -168,15 +170,16 @@ class Classifier:
def save(self): def save(self):
import os import os
file_tag_frequency= 'data_tag_frequency.csv' file_tag_frequency = 'data_tag_frequency.csv'
if os.path.isfile(file_tag_frequency): if os.path.isfile (file_tag_frequency):
os.remove(file_tag_frequency) os.remove (file_tag_frequency)
file_data= open(file_tag_frequency,"w") file_data = open (file_tag_frequency, "w")
if file_data == None: if file_data == None:
print("Can't create data storage file") print ("Can't create data storage file")
return False return False
np.savetxt(file_tag_frequency, self.tag_frequency_matrix, delimiter=';', fmt='%1.4f') np.savetxt (file_tag_frequency, self.tag_frequency_matrix, delimiter=';', fmt='%1.4f')
print("Data is saved into file: "+ file_tag_frequency+" " + str(int(os.stat(file_tag_frequency).st_size/1024)) + 'kB') print ("Data is saved into file: " + file_tag_frequency + " " + str (
int (os.stat (file_tag_frequency).st_size / 1024)) + 'kB')
return True return True
...@@ -206,28 +209,60 @@ if __name__ == "__main__": ...@@ -206,28 +209,60 @@ if __name__ == "__main__":
try: try:
# if localhost database is not available then use server # if localhost database is not available then use server
tags_classif = Classifier () tags_classif = Classifier ()
print('\nI use SERVER DataBase.\n') print ('\nI use SERVER DataBase.\n')
except OperationalError: except _mysql.OperationalError:
# use server DataBase # use server DataBase
print('\nI use local DataBase.\n') print ('\nI use local DataBase.\n')
tags_classif = Classifier (host='localhost', port=8080, user='root', password='password', db='news') tags_classif = Classifier (host='localhost', port=8080, user='root', password='password', db='news')
# the method is not implicated # the method is not implicated
tags_classif.teach_model () tags_classif.teach_model ()
tags_classif.save () # tags_classif.save ()
# exit(0) # exit(0)
result = tags_classif.cursor.execute ("select title, description from wp_esi_news ORDER BY RAND() limit 25") sql1 = '''SELECT rez.news_id , rez.title, rez.description
FROM (
for i, (title, description) in enumerate (tags_classif.cursor.fetchall ()): SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id ) as rez
ORDER BY rand() LIMIT 15'''
tags_classif.db.query(sql1)
news_results = tags_classif.db.store_result()
news_results = news_results.fetch_row(maxrows=0)
data = list ()
for i, (id_news, title, description) in enumerate (news_results):
text_for_analis = title.decode('ascii', 'ignore') + '\n' + description.decode('ascii', 'ignore')
# print ("\n#", str (i))
print ("\nNews title: " + title.decode('ascii', 'ignore'))
tags_classif.classify (text_for_analis)
print ("Model calculated Accordance (#tag, %-accordance, tag_description): ", end=" ")
print (tags_classif.teg_accordance)
sql = " select tag_id from wp_esi_tag_news where news_id =" + str(id_news)
# print(sql)
results = tags_classif.db.query(sql)
tags = tags_classif.db.store_result()
tags = tags.fetch_row(maxrows=0)
# print(tags)
# print(tags_classif.tags)
print ("User classified tags for present news:")
for (tag, ) in tags:
tag = int(tag)
# print(tag)
print (str(tag - 1) + " " + tags_classif.tags[tag - 1][1].decode('ascii', 'ignore'))
exit (0)
tags_classif.db.query("SELECT title, description FROM wp_esi_news ORDER BY RAND() LIMIT 25")
result = tags_classif.db.store_result()
for i, (title, description) in enumerate (result.fetch_row (maxrows=0)):
text_for_analis = title + '\n' + description text_for_analis = title + '\n' + description
print ("\n\n#", str (i)) print ("\n#", str (i))
pprint (text_for_analis) print (text_for_analis)
tags_results = tags_classif.classify (text_for_analis) tags_results = tags_classif.classify (text_for_analis)
# pprint (tags_results[:5]) # pprint (tags_results[:5])
# accordance % # accordance %
print("Accordance (#tag, %-accordance, tag_description): ") print ("Accordance (#tag, %-accordance, tag_description): ")
print(tags_classif.teg_accordance) print (tags_classif.teg_accordance)
# log_data (i=i, text_to_analise=text_for_analis, tags=tags_results) # log_data (i=i, text_to_analise=text_for_analis, tags=tags_results)
# if i > 10: break # if i > 10: break
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment