Commit ef7f5415 authored by Andrii Marynets's avatar Andrii Marynets

Add company maker and XPath selectors

parent 28ebd1b1
......@@ -22,6 +22,8 @@ class CompanyMaker:
self._make_list_for_aitop()
elif media == 'cb':
self._make_list_for_cb()
elif media == 'nana':
self._make_list_for_nanalyze()
def get_companies(self):
return self.companies
......@@ -54,3 +56,8 @@ class CompanyMaker:
name = i.name.replace(' ', '-').replace('.', '-')
self.companies.append(Company(i.id, 'http://www.crunchbase.com/organization/{}/press?page=1'.format(name),
None, 2, 2, i.name))
def _make_list_for_nanalyze(self):
for i in self.in_site:
self.companies.append(Company(i.id, 'http://www.nanalyze.com/tag/{}'.format(i.name.lower()),
47, 2, 2, i.name))
\ No newline at end of file
......@@ -6,6 +6,7 @@ from .base import BaseSpider
class NanaSpider(BaseSpider):
name = "nana"
allowed_domains = ["nanalyze.com"]
start_urls = ['http://www.nanalyze.com/tag/google/']
def start_requests(self):
for i in self.companies(self.name):
......@@ -15,4 +16,8 @@ class NanaSpider(BaseSpider):
pass
def parse(self, response):
pass
for i in response.xpath('.//article[@class="tease tease-post"]/div[@class="tease-content"]'):
# url, title, description
d = i.xpath('./h2/a/@href | ./h2/a/text() | ./p/text()').extract()
date = self.format_date(i.xpath('./text()').extract()[1].strip())
print(date)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment