Commit 68235128 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Fix bug with first row on CB

parent 89cd155d
...@@ -27,7 +27,9 @@ class CbSpider(BaseSpider): ...@@ -27,7 +27,9 @@ class CbSpider(BaseSpider):
pass pass
def parse(self, response): def parse(self, response):
rows = response.xpath("//table/tr")[1:] rows = response.xpath("//table/tr")
if 'page=1' in response.url:
rows = rows[1:]
company = response.meta['company'] company = response.meta['company']
is_duplicate = False is_duplicate = False
for i in rows: for i in rows:
...@@ -49,7 +51,6 @@ class CbSpider(BaseSpider): ...@@ -49,7 +51,6 @@ class CbSpider(BaseSpider):
break break
yield item yield item
next_url = self._next_url(response.url) next_url = self._next_url(response.url)
input('Press key to continue')
if len(rows) != 0 and self.can_follow(next_url, is_duplicate): if len(rows) != 0 and self.can_follow(next_url, is_duplicate):
yield scrapy.Request(next_url, callback=self.parse, meta=response.meta) yield scrapy.Request(next_url, callback=self.parse, meta=response.meta)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment