Commit 73ef5c55 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add callback for parse tag

parent 47b2e13a
......@@ -31,6 +31,13 @@ class NanaSpider(BaseSpider):
if self.pipeline.check_url(item['url']):
is_duplicate = True
break
else:
yield scrapy.Request(item['url'], callback=self.parse_tag, meta={'item': item})
next_url = response.xpath('.//div[@class="pagination"]/ul//a[text()="Next"]/@href').extract_first()
if self.can_follow(next_url, is_duplicate):
yield scrapy.Request(next_url, callback=self.parse)
\ No newline at end of file
yield scrapy.Request(next_url, callback=self.parse, meta=response.meta)
def parse_tag(self, response):
item = response.meta['item']
item['tags'] = response.xpath('.//ul[@class="tag-list"]/li/a/text()').extract()
yield item
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment