Commit e8b3ad9c authored by Andrii Marynets's avatar Andrii Marynets

Select field from dict

parent 3249334f
......@@ -46,7 +46,6 @@ class CbSpider(BaseSpider):
pass
def parse(self, response):
print(response.body)
body = {
"field_ids": [
"activity_properties",
......@@ -76,8 +75,10 @@ class CbSpider(BaseSpider):
yield scrapy.Request(url='https://www.crunchbase.com/v4/data/searches/activities',
method='POST',
body=json.dumps(body),
headers={'x-requested-with': 'XMLHttpRequest', 'content-type': 'application/json'},
callback=self.parse_news,
meta={'cookiejar': response.meta['cookiejar']})
meta={'cookiejar': response.meta['cookiejar'],
'company': response.meta['company']})
rows = response.xpath(".//div[@class='grid-body']/div")
company = response.meta['company']
......@@ -104,7 +105,17 @@ class CbSpider(BaseSpider):
# if len(rows) != 0 and self.can_follow(next_url, is_duplicate):
# yield scrapy.Request(next_url, callback=self.parse, meta=response.meta)
def parse_news(self, response):
print(response.body)
body = json.loads(response.body.decode('utf8'))
print(body)
for i in body['entities']:
prop = i['properties']
if prop['entity_def_id'] == 'press_reference':
item = ExaItem()
item['date'] = self.format_date(prop['activity_date'])
item['title'] = prop['activity_properties']['title']
item['url'] = prop['activity_properties']['url']
item.update(self.get_common_items(response.meta['company']))
def _get_media(self, elem):
media_name = elem.xpath("./td[contains(@class, 'article')]/span/text()").extract_first()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment