Commit 7a6d1a92 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add yield command for build item

parent 7ed0a294
......@@ -73,7 +73,7 @@ DOWNLOADER_MIDDLEWARES = {
# Configure item pipelines
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'exa.pipelines.ExaPipeline': 300,
# 'exa.pipelines.ExaPipeline': 300,
}
# Enable and configure the AutoThrottle extension (disabled by default)
......
......@@ -40,18 +40,18 @@ class AitopSpider(scrapy.Spider):
for i in self.build_items(response):
if is_company_in_item(i):
print(i)
yield i
next_url = self.next_url(response)
# if next_url:
# return scrapy.Request(next_url, callback=self.parse_by_title_description)
if next_url:
yield scrapy.Request(next_url, callback=self.parse_by_title_description)
def parse_by_tag(self, response):
try:
for i in self.build_items(response):
print(i)
yield i
next_url = self.next_url(response)
if next_url:
return scrapy.Request(next_url, callback=self.parse_by_tag)
yield scrapy.Request(next_url, callback=self.parse_by_tag)
except:
pass
......@@ -60,7 +60,7 @@ class AitopSpider(scrapy.Spider):
items = list()
rows = response.xpath(".//div[contains(@class, 'summaries')]//div[@class='row']")
for i in rows:
item = dict()
item = ExaItem()
item['date'] = dateparser.parse(i.xpath(".//time/@datetime").extract_first()).replace(tzinfo=None)
item['title'] = ''.join(i.xpath(".//div[contains(@class, 'col-xs-12')]/h3/a//text()").extract())
item['description'] = ''.join(i.xpath(".//div[@class='summary-content']/p/text()").extract())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment