Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
E
exa_news_classificator
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Vasyl Bodnaruk
exa_news_classificator
Commits
a0879988
Commit
a0879988
authored
Aug 01, 2017
by
Tags
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Make load Method:
Parser & loader.
parent
b66f9757
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
100 additions
and
0 deletions
+100
-0
model_data_analysis.py
model_data_analysis.py
+100
-0
No files found.
model_data_analysis.py
0 → 100644
View file @
a0879988
import
numpy
as
np
import
pprint
import
re
import
csv
def
load
():
file_info
=
open
(
"info_model.txt"
,
'r'
)
if
file_info
==
None
:
print
(
"Info File
\'
info_model.txt
\'
is absent"
)
return
False
# parse info file
data
=
file_info
.
read
()
.
split
(
"
\n
"
)
for
i
,
dat
in
enumerate
(
data
):
# words = [words.split(" ") for words in dat]
if
i
==
0
:
rows
=
int
(
''
.
join
(
re
.
findall
(
r'\d'
,
dat
)))
# print(rows)
if
i
==
1
:
columns
=
int
(
''
.
join
(
re
.
findall
(
r'\d'
,
dat
)))
# print(columns)
if
i
==
3
:
file_name_matrix
=
re
.
match
(
r'^\w+\.csv'
,
dat
)
.
group
(
0
)
# print(file_name_matrix)
if
i
==
5
:
file_name_dictionary
=
re
.
match
(
r'^\w+\.txt'
,
dat
)
.
group
(
0
)
# print(file_name_dictionary)
if
i
==
7
:
file_name_tags
=
re
.
match
(
r'^\w+\.txt'
,
dat
)
.
group
(
0
)
# print(file_name_tags)
# read matrix
matrix
=
np
.
genfromtxt
(
file_name_matrix
,
delimiter
=
';'
)
# print(matrix)
# read tags !
tags
=
[
item
[:
-
1
]
for
item
in
open
(
file_name_tags
)]
# print(tags)
# read dictionary
dictionary
=
[
word
[:
-
1
]
for
word
in
open
(
file_name_dictionary
)]
# print(dictionary)
return
rows
,
columns
,
dictionary
,
tags
,
matrix
def
save
():
""" Just not used function. template from news_classify_tags.py """
import
os
# save info data
file_info
=
open
(
"info_model.txt"
,
'w'
)
if
file_info
==
None
:
return
False
file_info
.
write
(
"number of features = "
+
str
(
len
(
self
.
vectorizer
.
get_feature_names
()))
+
'
\n
'
)
file_info
.
write
(
"Quantity of tags = "
+
str
(
len
(
self
.
tags
))
+
'
\n
'
)
file_info
.
write
(
"
\n
data_tag_frequency.csv ---- Model data matrix --- "
+
\
str
(
self
.
trained_model
.
shape
)
+
"
\n
"
)
file_info
.
write
(
"
\n
Dictionary.txt -- Total number words in is---"
+
str
(
len
(
self
.
vectorizer
.
get_feature_names
()))
+
'
\n
'
)
file_info
.
write
(
"
\n
tags.txt --- Total number is ---"
+
str
(
len
(
self
.
tags
))
+
'
\n
'
)
file_info
.
close
()
# save model matrix
file_tag_frequency
=
'data_tag_frequency.csv'
if
os
.
path
.
isfile
(
file_tag_frequency
):
os
.
remove
(
file_tag_frequency
)
file_data
=
open
(
file_tag_frequency
,
"w"
)
if
file_data
==
None
:
print
(
"Can't create data storage file"
)
return
False
np
.
savetxt
(
file_tag_frequency
,
self
.
tag_frequency_matrix
,
delimiter
=
';'
,
fmt
=
'
%1.4
f'
)
print
(
"Data is saved into file: "
+
file_tag_frequency
+
" "
+
str
(
int
(
os
.
stat
(
file_tag_frequency
)
.
st_size
/
1024
))
+
'kB'
)
# save dictionary
file_info_dictionaries
=
open
(
"Dictionary.txt"
,
"w"
)
if
file_info_dictionaries
==
None
:
return
False
for
line
in
self
.
vectorizer
.
get_feature_names
():
file_info_dictionaries
.
write
(
line
+
"
\n
"
)
file_info_dictionaries
.
close
()
# save tags
file_info_tags
=
open
(
"tags.txt"
,
"w"
)
if
file_info_tags
==
None
:
return
False
for
text1
,
text2
in
self
.
tags
:
file_info_tags
.
write
(
text1
+
" "
+
text2
.
decode
(
'ascii'
,
"ignore"
)
+
"
\n
"
)
file_info_tags
.
close
()
return
True
if
__name__
==
'__main__'
:
rows
,
columns
,
data_dictionary
,
tags
,
matrix
=
load
()
if
data_dictionary
==
None
:
print
(
"Something wrong whit data files. "
)
# print(rows, columns, data_dictionary, tags, matrix )
np_matrix
=
np
.
asarray
(
matrix
)
print
(
np_matrix
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment