Add yield command for build item

7a6d1a92 · Vasyl Bodnaruk · 7ed0a294 · 7a6d1a92 · 7a6d1a92
Commit 7a6d1a92 authored Jul 10, 2017 by Vasyl Bodnaruk
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 7 deletions

settings.py exa/exa/settings.py +1 -1

aitop.py exa/exa/spiders/aitop.py +6 -6

No files found.
--- a/exa/exa/settings.py
+++ b/exa/exa/settings.py
@@ -73,7 +73,7 @@ DOWNLOADER_MIDDLEWARES = {
 # Configure item pipelines
 # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
 ITEM_PIPELINES = {
-    'exa.pipelines.ExaPipeline': 300,
+    # 'exa.pipelines.ExaPipeline': 300,
 }

 # Enable and configure the AutoThrottle extension (disabled by default)

--- a/exa/exa/spiders/aitop.py
+++ b/exa/exa/spiders/aitop.py
@@ -40,18 +40,18 @@ class AitopSpider(scrapy.Spider):

        for i in self.build_items(response):
            if is_company_in_item(i):
-                print(i)
+                yield i
        next_url = self.next_url(response)
-        # if next_url:
-            # return scrapy.Request(next_url, callback=self.parse_by_title_description)
+        if next_url:
+            yield scrapy.Request(next_url, callback=self.parse_by_title_description)

    def parse_by_tag(self, response):
        try:
            for i in self.build_items(response):
-                print(i)
+                yield i
            next_url = self.next_url(response)
            if next_url:
-                return scrapy.Request(next_url, callback=self.parse_by_tag)
+                yield scrapy.Request(next_url, callback=self.parse_by_tag)
        except:
            pass

@@ -60,7 +60,7 @@ class AitopSpider(scrapy.Spider):
            items = list()
            rows = response.xpath(".//div[contains(@class, 'summaries')]//div[@class='row']")
            for i in rows:
-                item = dict()
+                item = ExaItem()
                item['date'] = dateparser.parse(i.xpath(".//time/@datetime").extract_first()).replace(tzinfo=None)
                item['title'] = ''.join(i.xpath(".//div[contains(@class, 'col-xs-12')]/h3/a//text()").extract())
                item['description'] = ''.join(i.xpath(".//div[@class='summary-content']/p/text()").extract())