Stop parse news when we found first duplicate

802fee77 · Vasyl Bodnaruk · 98c9b2f3 · 802fee77
Commit 802fee77 authored Jul 21, 2017 by Vasyl Bodnaruk
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 4 deletions

mobihealthnews.py exa/exa/spiders/mobihealthnews.py +2 -4

No files found.
--- a/exa/exa/spiders/mobihealthnews.py
+++ b/exa/exa/spiders/mobihealthnews.py
@@ -35,11 +35,9 @@ class MobiHealthNewsSpider(BaseSpider):
                if self.pipeline.check_url(item['url']):
                    is_duplicate = True
-                if not is_duplicate:
+                    break
-                    yield scrapy.Request(item['url'], callback=self.parse_tags, meta={'item': item})
                else:
-                    # print(item)
+                    yield scrapy.Request(item['url'], callback=self.parse_tags, meta={'item': item})
-                    pass
            has_next = response.xpath(
                "..//div[contains(@class, 'text-center')]/ul/li[contains(@class, 'next')]/a/text()").extract_first()
            next_url = 'http://www.mobihealthnews.com' + response.xpath(