Commit ed9259da authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

fix bug in replace

parent afea8191
......@@ -32,5 +32,12 @@ class CompanyMaker:
self.companies.append(Company(i.id, 'http://www.mobihealthnews.com/tag/' + tag, 43, 2, 2, i.name))
def _make_list_for_tc(self):
def name_for_search(name):
comp = name.split(' ')
for i in self.in_site:
self.companies.append(Company(i.id, 'https://techcrunch.com/tag/' + i.name, 81, 2, 2, i.name))
\ No newline at end of file
tag = i.name
if tag.find(' '):
tag = tag.replace(' ', '-')
if tag.find('.'):
tag = tag.replace('.', '-')
self.companies.append(Company(i.id, 'https://techcrunch.com/tag/' + tag, 81, 2, 2, i.name))
\ No newline at end of file
......@@ -57,7 +57,6 @@ class MobiHealthNewsSpider(scrapy.Spider):
print(has_next, next_url)
if has_next:
# pass
yield scrapy.Request(next_url, callback=self.parse, meta={'company': response.meta['company'], 'post_id': 0})
except BaseException as e:
......
......@@ -17,7 +17,7 @@ class TechcrunchSpider(scrapy.Spider):
def __init__(self, *args, **kwargs):
self.condition = kwargs.get('query')
self.query = "SELECT id, name, country FROM wp_esi_entity WHERE 1 and id=3"
self.query = "SELECT id, name, country FROM wp_esi_entity WHERE 1 and id=13"
if self.condition:
print(self.condition)
self.query += ' or {}'.format(self.condition)
......@@ -55,6 +55,7 @@ class TechcrunchSpider(scrapy.Spider):
item['post_id'] = response.meta['post_id']
if item['title']:
yield item
has_next = response.xpath("//div[contains(@class, 'river-nav')]//li[contains(@class, 'next')]/a/@href").extract_first()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment