Commit 1b3a12f2 authored by Tags's avatar Tags

Make changes according the requirements.

parent d531aef4
import sys
from _mysql import OperationalError
from pprint import pprint
# import sys
# from _mysql import OperationalError
# from pprint import pprint
import numpy as np
import _mysql
# noinspection PyUnresolvedReferences
......@@ -11,7 +12,6 @@ class Classifier:
try:
import nltk
import operator
from MySQLdb import connect
from stemming.porter2 import stem
except ImportError:
print ('You have import flowing packages: sklearn & nltk & re.')
......@@ -24,27 +24,29 @@ class Classifier:
return
# Extract data form DataBase
conn = connect (host=host, port=port, user=user, password=password, db=db)
self.cursor = conn.cursor ()
self.db = _mysql.connect (host=host, port=port, user=user, passwd=password, db=db)
#geting tags
result = self.cursor.execute ("select id, name from wp_esi_tag")
# geting tags
self.db.query("SELECT id, name FROM wp_esi_tag")
rez = self.db.store_result()
# result =
tags = list ()
for id, description in self.cursor.fetchall ():
tags.append((id, description))
for id, description in rez.fetch_row(maxrows=0):
tags.append ((id, description))
self.tags = tags
# print (tags)
# print (len(tags), tags)
del tags
train_data = []
# text_id = []
sql1 = '''select wp_esi_tag_news.tag_id, wp_esi_news.title, wp_esi_news.description
from wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
order by wp_esi_tag_news.tag_id LIMIT 1000 '''
result = self.cursor.execute (sql1)
data = list()
for tag_id, title, description in self.cursor.fetchall():
data.append((tag_id, title, description))
sql1 = '''SELECT wp_esi_tag_news.tag_id, wp_esi_news.title, wp_esi_news.description
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id LIMIT 1000 '''
self.db.query (sql1)
result = self.db.store_result()
data = list ()
for tag_id, title, description in result.fetch_row(maxrows=0):
data.append ((tag_id, title, description))
for tag_id, _ in self.tags:
texts = ''
......@@ -95,13 +97,13 @@ class Classifier:
# normalise rowsfrequency matrix
for j in range (columns):
suma=0
suma = 0
suma = frequency_matrix[j].sum ()
if suma == 0:
continue
for i in range (rows):
frequency_matrix[j, i] /= suma
self.tag_frequency_matrix= frequency_matrix
self.tag_frequency_matrix = frequency_matrix
return frequency_matrix
def teach_model(self, data_text=''):
......@@ -122,12 +124,12 @@ class Classifier:
:return: tuple of % af tag accordance
"""
vector_accordance = []
for row in range(self.tag_frequency_matrix.shape[0]):
for row in range (self.tag_frequency_matrix.shape[0]):
temp_matrix = self.tag_frequency_matrix[row]
rez_summ = temp_matrix[self.matrix_test_data.toarray()[0] > 0].sum()
rez_summ = temp_matrix[self.matrix_test_data.toarray ()[0] > 0].sum ()
if rez_summ > (persantage / 100):
vector_accordance.append((row, int(rez_summ*1000)/10, self.tags[row][1]))
vector_accordance.sort(key=lambda tup: tup[1], reverse=True)
vector_accordance.append ((row, int (rez_summ * 1000) / 10, self.tags[row][1]))
vector_accordance.sort (key=lambda tup: tup[1], reverse=True)
return vector_accordance
......@@ -168,15 +170,16 @@ class Classifier:
def save(self):
import os
file_tag_frequency= 'data_tag_frequency.csv'
if os.path.isfile(file_tag_frequency):
os.remove(file_tag_frequency)
file_data= open(file_tag_frequency,"w")
file_tag_frequency = 'data_tag_frequency.csv'
if os.path.isfile (file_tag_frequency):
os.remove (file_tag_frequency)
file_data = open (file_tag_frequency, "w")
if file_data == None:
print("Can't create data storage file")
print ("Can't create data storage file")
return False
np.savetxt(file_tag_frequency, self.tag_frequency_matrix, delimiter=';', fmt='%1.4f')
print("Data is saved into file: "+ file_tag_frequency+" " + str(int(os.stat(file_tag_frequency).st_size/1024)) + 'kB')
np.savetxt (file_tag_frequency, self.tag_frequency_matrix, delimiter=';', fmt='%1.4f')
print ("Data is saved into file: " + file_tag_frequency + " " + str (
int (os.stat (file_tag_frequency).st_size / 1024)) + 'kB')
return True
......@@ -206,28 +209,60 @@ if __name__ == "__main__":
try:
# if localhost database is not available then use server
tags_classif = Classifier ()
print('\nI use SERVER DataBase.\n')
except OperationalError:
print ('\nI use SERVER DataBase.\n')
except _mysql.OperationalError:
# use server DataBase
print('\nI use local DataBase.\n')
print ('\nI use local DataBase.\n')
tags_classif = Classifier (host='localhost', port=8080, user='root', password='password', db='news')
# the method is not implicated
tags_classif.teach_model ()
tags_classif.save ()
# tags_classif.save ()
# exit(0)
result = tags_classif.cursor.execute ("select title, description from wp_esi_news ORDER BY RAND() limit 25")
for i, (title, description) in enumerate (tags_classif.cursor.fetchall ()):
sql1 = '''SELECT rez.news_id , rez.title, rez.description
FROM (
SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id ) as rez
ORDER BY rand() LIMIT 15'''
tags_classif.db.query(sql1)
news_results = tags_classif.db.store_result()
news_results = news_results.fetch_row(maxrows=0)
data = list ()
for i, (id_news, title, description) in enumerate (news_results):
text_for_analis = title.decode('ascii', 'ignore') + '\n' + description.decode('ascii', 'ignore')
# print ("\n#", str (i))
print ("\nNews title: " + title.decode('ascii', 'ignore'))
tags_classif.classify (text_for_analis)
print ("Model calculated Accordance (#tag, %-accordance, tag_description): ", end=" ")
print (tags_classif.teg_accordance)
sql = " select tag_id from wp_esi_tag_news where news_id =" + str(id_news)
# print(sql)
results = tags_classif.db.query(sql)
tags = tags_classif.db.store_result()
tags = tags.fetch_row(maxrows=0)
# print(tags)
# print(tags_classif.tags)
print ("User classified tags for present news:")
for (tag, ) in tags:
tag = int(tag)
# print(tag)
print (str(tag - 1) + " " + tags_classif.tags[tag - 1][1].decode('ascii', 'ignore'))
exit (0)
tags_classif.db.query("SELECT title, description FROM wp_esi_news ORDER BY RAND() LIMIT 25")
result = tags_classif.db.store_result()
for i, (title, description) in enumerate (result.fetch_row (maxrows=0)):
text_for_analis = title + '\n' + description
print ("\n\n#", str (i))
pprint (text_for_analis)
print ("\n#", str (i))
print (text_for_analis)
tags_results = tags_classif.classify (text_for_analis)
# pprint (tags_results[:5])
# accordance %
print("Accordance (#tag, %-accordance, tag_description): ")
print(tags_classif.teg_accordance)
print ("Accordance (#tag, %-accordance, tag_description): ")
print (tags_classif.teg_accordance)
# log_data (i=i, text_to_analise=text_for_analis, tags=tags_results)
# if i > 10: break
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment