Add function for select tags on news page

58b8b305 · Vasyl Bodnaruk · e9f6b038 · 58b8b305 · 58b8b305
Commit 58b8b305 authored Jul 18, 2017 by Vasyl Bodnaruk
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 3 deletions

items.py exa/exa/items.py +2 -1

mobihealthnews.py exa/exa/spiders/mobihealthnews.py +7 -2

No files found.
--- a/exa/exa/items.py
+++ b/exa/exa/items.py
@@ -18,3 +18,4 @@ class ExaItem(scrapy.Item):
    type_id = scrapy.Field()
    post_id = scrapy.Field()
    company_id = scrapy.Field()
+    tags = scrapy.Field()
\ No newline at end of file
--- a/exa/exa/spiders/mobihealthnews.py
+++ b/exa/exa/spiders/mobihealthnews.py
@@ -32,7 +32,7 @@ class MobiHealthNewsSpider(BaseSpider):

                item['post_id'] = response.meta['post_id']

-                yield item
+                yield scrapy.Request(item['url'], callback=self.parse_tags, meta={'item': item})
            has_next = response.xpath(
                "..//div[contains(@class, 'text-center')]/ul/li[contains(@class, 'next')]/a/text()").extract_first()
            next_url = 'http://www.mobihealthnews.com' + response.xpath(
@@ -44,3 +44,8 @@ class MobiHealthNewsSpider(BaseSpider):
        except BaseException as e:
            print('We had error')
            traceback.print_exc()
+
+    def parse_tags(self, response):
+        item = response.meta['item']
+        item['tags'] = response.xpath(".//div[@class='bottom-tags field field-name-field-tags field-type-taxonomy-term-reference field-label-inline clearfix']//a/text()").extract()
+        yield item