Commit 58b8b305 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add function for select tags on news page

parent e9f6b038
......@@ -18,3 +18,4 @@ class ExaItem(scrapy.Item):
type_id = scrapy.Field()
post_id = scrapy.Field()
company_id = scrapy.Field()
tags = scrapy.Field()
\ No newline at end of file
......@@ -32,7 +32,7 @@ class MobiHealthNewsSpider(BaseSpider):
item['post_id'] = response.meta['post_id']
yield item
yield scrapy.Request(item['url'], callback=self.parse_tags, meta={'item': item})
has_next = response.xpath(
"..//div[contains(@class, 'text-center')]/ul/li[contains(@class, 'next')]/a/text()").extract_first()
next_url = 'http://www.mobihealthnews.com' + response.xpath(
......@@ -44,3 +44,8 @@ class MobiHealthNewsSpider(BaseSpider):
except BaseException as e:
print('We had error')
traceback.print_exc()
def parse_tags(self, response):
item = response.meta['item']
item['tags'] = response.xpath(".//div[@class='bottom-tags field field-name-field-tags field-type-taxonomy-term-reference field-label-inline clearfix']//a/text()").extract()
yield item
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment