Commit a0879988 authored by Tags's avatar Tags

Make load Method:

Parser & loader.
parent b66f9757
import numpy as np
import pprint
import re
import csv
def load():
file_info = open ("info_model.txt", 'r')
if file_info == None:
print("Info File \'info_model.txt\' is absent")
return False
# parse info file
data = file_info.read().split("\n")
for i, dat in enumerate(data):
# words = [words.split(" ") for words in dat]
if i == 0:
rows = int(''.join(re.findall(r'\d',dat)))
# print(rows)
if i == 1:
columns = int(''.join(re.findall(r'\d',dat)))
# print(columns)
if i == 3:
file_name_matrix = re.match(r'^\w+\.csv', dat).group(0)
# print(file_name_matrix)
if i == 5:
file_name_dictionary = re.match(r'^\w+\.txt', dat).group(0)
# print(file_name_dictionary)
if i == 7:
file_name_tags = re.match(r'^\w+\.txt', dat).group(0)
# print(file_name_tags)
# read matrix
matrix = np.genfromtxt(file_name_matrix, delimiter=';')
# print(matrix)
# read tags !
tags = [item[:-1] for item in open(file_name_tags)]
# print(tags)
# read dictionary
dictionary = [word[:-1] for word in open(file_name_dictionary)]
# print(dictionary)
return rows, columns, dictionary, tags, matrix
def save():
""" Just not used function. template from news_classify_tags.py """
import os
# save info data
file_info = open ("info_model.txt", 'w')
if file_info == None:
return False
file_info.write ("number of features = " + str (len (self.vectorizer.get_feature_names ())) + '\n')
file_info.write ("Quantity of tags = " + str (len (self.tags)) + '\n')
file_info.write ("\ndata_tag_frequency.csv ---- Model data matrix --- " + \
str (self.trained_model.shape) + "\n")
file_info.write ("\nDictionary.txt -- Total number words in is---" + str (
len (self.vectorizer.get_feature_names ())) + '\n')
file_info.write ("\ntags.txt --- Total number is ---" + str (len (self.tags)) + '\n')
file_info.close ()
# save model matrix
file_tag_frequency = 'data_tag_frequency.csv'
if os.path.isfile (file_tag_frequency):
os.remove (file_tag_frequency)
file_data = open (file_tag_frequency, "w")
if file_data == None:
print ("Can't create data storage file")
return False
np.savetxt (file_tag_frequency, self.tag_frequency_matrix, delimiter=';', fmt='%1.4f')
print ("Data is saved into file: " + file_tag_frequency + " " + str (
int (os.stat (file_tag_frequency).st_size / 1024)) + 'kB')
# save dictionary
file_info_dictionaries = open ("Dictionary.txt", "w")
if file_info_dictionaries == None:
return False
for line in self.vectorizer.get_feature_names ():
file_info_dictionaries.write (line + "\n")
file_info_dictionaries.close ()
# save tags
file_info_tags = open ("tags.txt", "w")
if file_info_tags == None:
return False
for text1, text2 in self.tags:
file_info_tags.write (text1 + " " + text2.decode ('ascii', "ignore") + "\n")
file_info_tags.close ()
return True
if __name__ == '__main__':
rows, columns, data_dictionary, tags, matrix = load()
if data_dictionary == None:
print("Something wrong whit data files. ")
# print(rows, columns, data_dictionary, tags, matrix )
np_matrix = np.asarray(matrix)
print(np_matrix)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment