Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
E
exa_news_classificator
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Vasyl Bodnaruk
exa_news_classificator
Commits
f9f1cbfb
Commit
f9f1cbfb
authored
Aug 21, 2017
by
Tags
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Improve save Method.
parent
27909e71
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
51 additions
and
44 deletions
+51
-44
model_data_analysis.py
model_data_analysis.py
+1
-1
news_classify_tag.py
news_classify_tag.py
+50
-43
No files found.
model_data_analysis.py
View file @
f9f1cbfb
...
...
@@ -97,8 +97,8 @@ def graph_results(in_data, title=''):
figure
()
hist
(
data
,
bins
=
50
)
show
()
savefig
(
"fig.png"
)
# show()
def
text_clear
(
word
):
...
...
news_classify_tag.py
View file @
f9f1cbfb
...
...
@@ -132,7 +132,7 @@ class Classifier:
from
sklearn.naive_bayes
import
MultinomialNB
from
sklearn.neighbors
import
KNeighborsClassifier
self
.
vectorizer
=
CountVectorizer
(
min_df
=
2
,
stop_words
=
'english'
,
ngram_range
=
(
1
,
1
))
self
.
vectorizer
=
CountVectorizer
(
min_df
=
2
,
stop_words
=
'english'
,
ngram_range
=
(
1
,
2
))
self
.
tfidf
=
TfidfTransformer
()
# self.classifier = MultinomialNB()
self
.
classifier
=
KNeighborsClassifier
()
...
...
@@ -146,9 +146,10 @@ class Classifier:
# _________ ML learning posses ___________
X_data
=
self
.
vectorizer
.
fit_transform
(
self
.
X_text_data
)
#
X_data = self.tfidf.fit_transform(X_data)
X_data
=
self
.
tfidf
.
fit_transform
(
X_data
)
self
.
classifier
.
fit
(
X_data
,
self
.
y_data
)
self
.
links_tags
=
set
(
self
.
y_data
)
self
.
X_data
=
X_data
# ___________________result usage __________________________
# X_test = self.vectorarizer.transform(["Hello In the world from android programmers. My apps are so cool.",])
...
...
@@ -186,8 +187,10 @@ class Classifier:
def
classify
(
self
,
text
,
test_corpus
=
None
,
entity_id
=
0
):
"""
Method which test the :type:text for tags relevancy
:type text: is text for analysis
:type text: is text for analysis.
:type entity_id: is the entity id in the DB.
"""
if
text
is
None
:
...
...
@@ -235,36 +238,40 @@ class Classifier:
return
True
def
save
(
self
):
return
#
return
import
os
# save info data
file_info
=
open
(
"info_model.txt"
,
'w'
)
directory
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
file_name_dictionary
=
directory
+
'/info_model.txt'
file_info
=
open
(
file_name_dictionary
,
'w'
)
if
file_info
==
None
:
return
False
file_info
.
write
(
"number of features = "
+
str
(
len
(
self
.
vectorizer
.
get_feature_names
()))
+
'
\n
'
)
file_info
.
write
(
"Quantity of tags = "
+
str
(
len
(
self
.
tags
))
+
'
\n
'
)
file_info
.
write
(
"
\n
data_tag_frequency.csv ---- Model data matrix --- "
+
\
str
(
self
.
trained_model
.
shape
)
+
"
\n
"
)
str
(
self
.
X_data
.
shape
)
+
"
\n
"
)
file_info
.
write
(
"
\n
Dictionary.txt -- Total number words in is---"
+
str
(
len
(
self
.
vectorizer
.
get_feature_names
()))
+
'
\n
'
)
file_info
.
write
(
"
\n
tags.txt --- Total number is ---"
+
str
(
len
(
self
.
tags
))
+
'
\n
'
)
file_info
.
close
()
# save model matrix
file_tag_frequency
=
'
data_tag_frequency.csv'
file_tag_frequency
=
directory
+
'/
data_tag_frequency.csv'
if
os
.
path
.
isfile
(
file_tag_frequency
):
os
.
remove
(
file_tag_frequency
)
file_data
=
open
(
file_tag_frequency
,
"w"
)
if
file_data
==
None
:
print
(
"Can't create data storage file"
)
return
False
np
.
savetxt
(
file_tag_frequency
,
self
.
tag_frequency_matrix
,
delimiter
=
';'
,
fmt
=
'
%1.4
f'
)
np
.
savetxt
(
file_tag_frequency
,
self
.
X_data
.
toarray
()
,
delimiter
=
';'
,
fmt
=
'
%1.4
f'
)
print
(
"Data is saved into file: "
+
file_tag_frequency
+
" "
+
str
(
int
(
os
.
stat
(
file_tag_frequency
)
.
st_size
/
1024
))
+
'kB'
)
# save dictionary
file_info_dictionaries
=
open
(
"Dictionary.txt"
,
"w"
)
file_info_dictionaries
=
open
(
directory
+
"/Dictionary.txt"
,
"w"
)
if
file_info_dictionaries
==
None
:
return
False
for
line
in
self
.
vectorizer
.
get_feature_names
():
...
...
@@ -272,7 +279,7 @@ class Classifier:
file_info_dictionaries
.
close
()
# save tags
file_info_tags
=
open
(
"
tags.txt"
,
"w"
)
file_info_tags
=
open
(
directory
+
"/
tags.txt"
,
"w"
)
if
file_info_tags
==
None
:
return
False
for
text1
,
text2
in
self
.
tags
:
...
...
@@ -314,41 +321,41 @@ if __name__ == "__main__":
tags_classif
=
Classifier
(
host
=
'localhost'
,
port
=
8080
,
user
=
'root'
,
password
=
'password'
,
db
=
'news'
)
# the method is not implicated
tags_classif
.
teach_model
()
#
tags_classif.save()
tags_classif
.
save
()
# exit(0)
#
#
# _______ test from indentifyed news ______________
#
sql1 = '''SELECT rez.news_id , rez.title, rez.description
#
FROM(
#
SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description
#
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
#
ORDER BY wp_esi_tag_news.tag_id ) AS rez
#
ORDER BY rand() LIMIT 30'''
#
#
tags_classif.db.query(sql1)
#
news_results = tags_classif.db.store_result()
#
news_results = news_results.fetch_row(maxrows=0)
#
data = list()
#
for i, (id_news, title, description) in enumerate(news_results):
#
text_for_analis = title.decode('ascii', 'ignore') + '\n' + description.decode('ascii', 'ignore')
#
# print("\n#", str(i))
#
print("\nNews title: " + title.decode('ascii', 'ignore'))
#
tags_classif.classify(text_for_analis)
#
print("Model calculated Accordance(#tag, %-accordance, tag_description): ", end=" ")
#
print(tags_classif.tag_accordance)
#
sql = " SELECT tag_id FROM wp_esi_tag_news WHERE news_id =" + str(id_news)
#
# print(sql)
#
results = tags_classif.db.query(sql)
#
tags = tags_classif.db.store_result()
#
tags = tags.fetch_row(maxrows=0)
#
# print(tags)
#
# print(tags_classif.tags)
#
print("User classified tags for present news:")
#
for (tag,) in tags:
#
tag = int(tag)
#
# print(tag)
#
print(str(tag) + " " + tags_classif.tags[tag - 1][1].decode('ascii', 'ignore'))
# #
exit(0)
## _______ test from indentifyed news ______________
sql1
=
'''SELECT rez.news_id , rez.title, rez.description
FROM(
SELECT wp_esi_tag_news.news_id, wp_esi_news.title, wp_esi_news.description
FROM wp_esi_news, wp_esi_tag_news WHERE wp_esi_tag_news.news_id=wp_esi_news.id
ORDER BY wp_esi_tag_news.tag_id ) AS rez
ORDER BY rand() LIMIT 30'''
tags_classif
.
db
.
query
(
sql1
)
news_results
=
tags_classif
.
db
.
store_result
()
news_results
=
news_results
.
fetch_row
(
maxrows
=
0
)
data
=
list
()
for
i
,
(
id_news
,
title
,
description
)
in
enumerate
(
news_results
):
text_for_analis
=
title
.
decode
(
'ascii'
,
'ignore'
)
+
'
\n
'
+
description
.
decode
(
'ascii'
,
'ignore'
)
# print("\n#", str(i))
print
(
"
\n
News title: "
+
title
.
decode
(
'ascii'
,
'ignore'
))
tags_classif
.
classify
(
text_for_analis
)
print
(
"Model calculated Accordance(#tag,
%-
accordance, tag_description): "
,
end
=
" "
)
print
(
tags_classif
.
tag_accordance
)
sql
=
" SELECT tag_id FROM wp_esi_tag_news WHERE news_id ="
+
str
(
id_news
)
# print(sql)
results
=
tags_classif
.
db
.
query
(
sql
)
tags
=
tags_classif
.
db
.
store_result
()
tags
=
tags
.
fetch_row
(
maxrows
=
0
)
# print(tags)
# print(tags_classif.tags)
print
(
"User classified tags for present news:"
)
for
(
tag
,)
in
tags
:
tag
=
int
(
tag
)
# print(tag)
print
(
str
(
tag
)
+
" "
+
tags_classif
.
tags
[
tag
-
1
][
1
]
.
decode
(
'ascii'
,
'ignore'
))
exit
(
0
)
# print("\n\n")
tags_classif
.
db
.
query
(
"SELECT title, description, text, company_id FROM wp_esi_news_accept ORDER BY RAND() LIMIT 50"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment