Commit 58b8b305 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add function for select tags on news page

parent e9f6b038
...@@ -18,3 +18,4 @@ class ExaItem(scrapy.Item): ...@@ -18,3 +18,4 @@ class ExaItem(scrapy.Item):
type_id = scrapy.Field() type_id = scrapy.Field()
post_id = scrapy.Field() post_id = scrapy.Field()
company_id = scrapy.Field() company_id = scrapy.Field()
tags = scrapy.Field()
\ No newline at end of file
...@@ -32,7 +32,7 @@ class MobiHealthNewsSpider(BaseSpider): ...@@ -32,7 +32,7 @@ class MobiHealthNewsSpider(BaseSpider):
item['post_id'] = response.meta['post_id'] item['post_id'] = response.meta['post_id']
yield item yield scrapy.Request(item['url'], callback=self.parse_tags, meta={'item': item})
has_next = response.xpath( has_next = response.xpath(
"..//div[contains(@class, 'text-center')]/ul/li[contains(@class, 'next')]/a/text()").extract_first() "..//div[contains(@class, 'text-center')]/ul/li[contains(@class, 'next')]/a/text()").extract_first()
next_url = 'http://www.mobihealthnews.com' + response.xpath( next_url = 'http://www.mobihealthnews.com' + response.xpath(
...@@ -44,3 +44,8 @@ class MobiHealthNewsSpider(BaseSpider): ...@@ -44,3 +44,8 @@ class MobiHealthNewsSpider(BaseSpider):
except BaseException as e: except BaseException as e:
print('We had error') print('We had error')
traceback.print_exc() traceback.print_exc()
def parse_tags(self, response):
item = response.meta['item']
item['tags'] = response.xpath(".//div[@class='bottom-tags field field-name-field-tags field-type-taxonomy-term-reference field-label-inline clearfix']//a/text()").extract()
yield item
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment