add pagination handler

9655cc2e · Vasyl Bodnaruk · 293b7a26 · 9655cc2e · 293b7a26 · 9655cc2e
Commit 9655cc2e authored Jun 09, 2017 by Vasyl Bodnaruk
Show whitespace changes
Inline Side-by-side

Showing with 29 additions and 25 deletions

__init__.py exa/exa/helpers/__init__.py +1 -1

decorators.py exa/exa/helpers/decorators.py +0 -0

techcrunch.py exa/exa/spiders/techcrunch.py +28 -24

No files found.
--- a/exa/exa/helpers/__init__.py
+++ b/exa/exa/helpers/__init__.py
--- a/exa/exa/helpers/decorators.py
+++ b/exa/exa/helpers/decorators.py
--- a/exa/exa/spiders/techcrunch.py
+++ b/exa/exa/spiders/techcrunch.py
 # -*- coding: utf-8 -*-
 import scrapy
+import traceback
 from scrapy.utils.project import get_project_settings
 from ..helpers import CompanyMaker, Database
 from ..items import ExaItem
@@ -27,10 +28,10 @@ class TechcrunchSpider(scrapy.Spider):
        companies = CompanyMaker(self.comp)
        companies.make_companies(self.name)
        for i in companies.get_companies():
-            yield scrapy.Request(i.url, callback=self.parse, meta={'company': i,
+            yield scrapy.Request(i.url, callback=self.parse, meta={'company': i, 'post_id': 0})
-                                                                   'post_id': 0})
    def parse(self, response):
+        try:
            news_list = response.xpath("..//div[contains(@class, 'block block-thumb ')]")
            company = response.meta['company']
            for i in news_list:
@@ -53,3 +54,6 @@ class TechcrunchSpider(scrapy.Spider):
            next_url = 'https://techcrunch.com' + has_next
            if has_next:
                yield scrapy.Request(next_url, callback=self.parse, meta={'company': response.meta['company'], 'post_id': 0})
+        except BaseException as e:
+            print('We had error')
+            traceback.print_exc()
\ No newline at end of file