Commit 5dd6300d authored by Tags's avatar Tags

Method Cross validation

parent f9f1cbfb
......@@ -105,9 +105,9 @@ class Classifier:
self.entity_tags[entity] = "" + item_tag
else:
self.entity_tags[entity] += " " + item_tag
# print(self.entity_tags)
# print(len(self.entity_tags))
# exit(0)
# print(self.entity_tags)
# print(len(self.entity_tags))
# exit(0)
def text_clear(self, texts):
import re
......@@ -132,7 +132,7 @@ class Classifier:
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
self.vectorizer = CountVectorizer(min_df=2, stop_words='english', ngram_range=(1, 2))
self.vectorizer = CountVectorizer(min_df=2, stop_words='english', ngram_range=(1, 1))
self.tfidf = TfidfTransformer()
# self.classifier = MultinomialNB()
self.classifier = KNeighborsClassifier()
......@@ -310,6 +310,41 @@ def log_data(i=None, text_to_analise=None, tags=None):
out_file.close()
def cross_validation():
## _______ test from indentifyed news ______________
sql1 = '''SELECT rez.news_id , rez.title, rez.description
FROM(
SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id ) AS rez
ORDER BY rand()'''
tags_classif.db.query(sql1)
news_results = tags_classif.db.store_result().fetch_row(maxrows=0)
total_score = 0
for i, (id_news, title, description) in enumerate(news_results):
text_for_analis = title.decode('ascii', 'ignore') + '\n' + description.decode('ascii', 'ignore')
tags_classif.classify(text_for_analis)
rez_accordance = {item[0] for item in tags_classif.tag_accordance}
sql = " SELECT tag_id FROM wp_esi_tag_news WHERE news_id =" + str(id_news)
results = tags_classif.db.query(sql)
tags_in_article = {tag for (tag, ) in tags_classif.db.store_result().fetch_row(maxrows=0)}
print(rez_accordance, tags_in_article,len(rez_accordance & tags_in_article))
if len(rez_accordance & tags_in_article) != 0:
total_score += 1
else:
print("\n#", str(i))
print("\nNews title: " + title.decode('ascii', 'ignore'))
print("Model calculated Accordance :", end=" ")
print(set(rez_accordance))
print("User classified tags for news:", set(tags_in_article))
print("\n\nThe Model was tasted on ", len(news_results), " news. ")
print(" The total result is:", total_score / len(news_results))
# exit(0)
if __name__ == "__main__":
try:
# if localhost database is not available then use server
......@@ -324,41 +359,12 @@ if __name__ == "__main__":
tags_classif.save()
# exit(0)
## _______ test from indentifyed news ______________
sql1 = '''SELECT rez.news_id , rez.title, rez.description
FROM(
SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id ) AS rez
ORDER BY rand() LIMIT 30'''
tags_classif.db.query(sql1)
news_results = tags_classif.db.store_result()
news_results = news_results.fetch_row(maxrows=0)
data = list()
for i, (id_news, title, description) in enumerate(news_results):
text_for_analis = title.decode('ascii', 'ignore') + '\n' + description.decode('ascii', 'ignore')
# print("\n#", str(i))
print("\nNews title: " + title.decode('ascii', 'ignore'))
tags_classif.classify(text_for_analis)
print("Model calculated Accordance(#tag, %-accordance, tag_description): ", end=" ")
print(tags_classif.tag_accordance)
sql = " SELECT tag_id FROM wp_esi_tag_news WHERE news_id =" + str(id_news)
# print(sql)
results = tags_classif.db.query(sql)
tags = tags_classif.db.store_result()
tags = tags.fetch_row(maxrows=0)
# print(tags)
# print(tags_classif.tags)
print("User classified tags for present news:")
for (tag,) in tags:
tag = int(tag)
# print(tag)
print(str(tag) + " " + tags_classif.tags[tag - 1][1].decode('ascii', 'ignore'))
cross_validation()
exit(0)
# print("\n\n")
tags_classif.db.query("SELECT title, description, text, company_id FROM wp_esi_news_accept ORDER BY RAND() LIMIT 50")
tags_classif.db.query(
"SELECT title, description, text, company_id FROM wp_esi_news_accept ORDER BY RAND() LIMIT 50")
result = tags_classif.db.store_result()
# news analysis witt title + description + text + company_id
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment