Commit dec262cb authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Clean parse function

parent 52f4db11
...@@ -63,11 +63,11 @@ class AitopSpider(BaseSpider): ...@@ -63,11 +63,11 @@ class AitopSpider(BaseSpider):
item['post_id'] = response.meta['post_id'] item['post_id'] = response.meta['post_id']
item['tags'] = i.xpath( item['tags'] = i.xpath(
".//div[@class='row hidden-xs']//div[@title='Concept Tags']//a[@class='filter btn btn-link']/text()").extract() ".//div[@class='row hidden-xs']//div[@title='Concept Tags']//a[@class='filter btn btn-link']/text()").extract()
yield item if is_company_in_item(item):
if is_company_in_item(i):
yield i yield i
next_url = self.next_url(response) next_url = self.next_url(response)
if self.can_follow(next_url, is_duplicate): if next_url:
yield scrapy.Request(next_url, callback=self.parse_by_title_description, meta=response.meta) yield scrapy.Request(next_url, callback=self.parse_by_title_description, meta=response.meta)
else: else:
print("DUPLICATE NEWS") print("DUPLICATE NEWS")
...@@ -93,8 +93,7 @@ class AitopSpider(BaseSpider): ...@@ -93,8 +93,7 @@ class AitopSpider(BaseSpider):
next_url = self.next_url(response) next_url = self.next_url(response)
if next_url: if next_url:
print('FOLLOW') yield scrapy.Request(next_url, callback=self.parse_by_tag, meta=response.meta)
# yield scrapy.Request(next_url, callback=self.parse_by_tag, meta=response.meta)
except: except:
pass pass
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment