Commit eed7af2e authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

changed module DB for reuse

parent 41acec84
...@@ -6,12 +6,17 @@ Company = namedtuple('Company', 'id, url, media_id, type_id, region_id') ...@@ -6,12 +6,17 @@ Company = namedtuple('Company', 'id, url, media_id, type_id, region_id')
# 'http://www.mobihealthnews.com/tag/MedTronic' # 'http://www.mobihealthnews.com/tag/MedTronic'
Entity = namedtuple('Entity', 'id, name, country')
class CompanyMaker: class CompanyMaker:
def __init__(self, companies=None): def __init__(self, companies=None):
# self.in_site = ['http://www.mobihealthnews.com/tag/Twitter', 'http://www.mobihealthnews.com/tag/intel', # self.in_site = ['http://www.mobihealthnews.com/tag/Twitter', 'http://www.mobihealthnews.com/tag/intel',
# 'http://www.mobihealthnews.com/tag/ibm', 'http://www.mobihealthnews.com/tag/Salesforce', # 'http://www.mobihealthnews.com/tag/ibm', 'http://www.mobihealthnews.com/tag/Salesforce',
# 'http://www.mobihealthnews.com/tag/google'] # 'http://www.mobihealthnews.com/tag/google']
self.in_site = companies self.in_site = list()
if companies:
for i in companies:
self.in_site.append(Entity(i[0], i[1], i[2]))
self.companies = list() self.companies = list()
def make_companies(self): def make_companies(self):
......
...@@ -2,7 +2,6 @@ from collections import namedtuple ...@@ -2,7 +2,6 @@ from collections import namedtuple
import traceback import traceback
import MySQLdb import MySQLdb
Entity = namedtuple('Entity', 'id, name, country')
class Database: class Database:
...@@ -36,10 +35,7 @@ class Database: ...@@ -36,10 +35,7 @@ class Database:
def select(self, item): def select(self, item):
try: try:
self.cursor.execute(item) self.cursor.execute(item)
result = list() return self.cursor.fetchall()
for i in self.cursor.fetchall():
result.append(Entity(i[0], i[1], i[2]))
return result
except: except:
self.cursor.rollback() self.cursor.rollback()
......
...@@ -15,12 +15,9 @@ class ExaPipeline(object): ...@@ -15,12 +15,9 @@ class ExaPipeline(object):
def __init__(self): def __init__(self):
# self.out = open('out/out{}.txt'.format(datetime.now()), 'w', newline='\n') # self.out = open('out/out{}.txt'.format(datetime.now()), 'w', newline='\n')
self.db = Database(**db) self.db = Database(**db)
self.urls = set(self.db.select('select url from wp_esi_accept'))
super(ExaPipeline, self).__init__() super(ExaPipeline, self).__init__()
def __del__(self):
pass
# self.out.close()
def process_item(self, item, spider): def process_item(self, item, spider):
print(item) print(item)
item['description'] = ''.join(item['description']).replace('\n', ' ') item['description'] = ''.join(item['description']).replace('\n', ' ')
...@@ -29,7 +26,11 @@ class ExaPipeline(object): ...@@ -29,7 +26,11 @@ class ExaPipeline(object):
query = """INSERT INTO wp_esi_news_accept (title, description, URL, media_id, type_id, region_id, post_id, query = """INSERT INTO wp_esi_news_accept (title, description, URL, media_id, type_id, region_id, post_id,
publish_date, record_date, company_id, is_accepted) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n""" publish_date, record_date, company_id, is_accepted) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);\n"""
print(item) print(item)
news = self.db.insert(query, data) n = self.db.select("select url from wp_esi_news_accept where url={}".format(item['url']))
if len(n) == 0:
news = self.db.insert(query, data)
else:
print('Duplicate')
# self._insert_news_entiry(news, item['company_id']) # self._insert_news_entiry(news, item['company_id'])
# self.out.write(query) # self.out.write(query)
......
...@@ -19,7 +19,8 @@ class MobiHealthNewsSpider(scrapy.Spider): ...@@ -19,7 +19,8 @@ class MobiHealthNewsSpider(scrapy.Spider):
self.condition = kwargs.get('query') self.condition = kwargs.get('query')
self.query = "SELECT id, name, country FROM wp_esi_entity WHERE 1 and id=3" self.query = "SELECT id, name, country FROM wp_esi_entity WHERE 1 and id=3"
if self.condition: if self.condition:
self.query += ' and {}'.format(self.condition) print(self.condition)
self.query += ' or {}'.format(self.condition)
self.comp = db.select(self.query) self.comp = db.select(self.query)
super(MobiHealthNewsSpider, self).__init__() super(MobiHealthNewsSpider, self).__init__()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment