Commit 61f1ee4a authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add functional for handle pagination

parent 60128423
......@@ -19,6 +19,9 @@ class AitopSpider(scrapy.Spider):
item['description'] = i.xpath(".//div[@class='summary-content']/p/text()").extract_first()
item['url'] = i.xpath(".//div[contains(@class, 'col-xs-12')]/h3/a/@href").extract_first()
# print(item)
has_next = response.xpath(".//ul[@class='pagination']//li/a/@href").extract()[-2]
if has_next and has_next != '#':
next_url = 'https://aitopics.org/search' + has_next
return scrapy.Request(next_url, callback=self.parse)
except:
pass
......@@ -57,7 +57,6 @@ class MobiHealthNewsSpider(scrapy.Spider):
"..//div[contains(@class, 'text-center')]/ul/li[contains(@class, 'next')]/a/text()").extract_first()
next_url = 'http://www.mobihealthnews.com' + response.xpath(
"..//div[contains(@class, 'text-center')]/ul/li[contains(@class, 'next')]/a/@href").extract_first()
print(has_next, next_url)
if has_next:
yield scrapy.Request(next_url, callback=self.parse, meta={'company': response.meta['company'], 'post_id': 0})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment