Add company maker and XPath selectors

ef7f5415 · Andrii Marynets · 28ebd1b1 · ef7f5415 · ef7f5415
Commit ef7f5415 authored Oct 10, 2017 by Andrii Marynets
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 1 deletion

company_maker.py exa/exa/helpers/company_maker.py +7 -0

nana.py exa/exa/spiders/nana.py +6 -1

No files found.
--- a/exa/exa/helpers/company_maker.py
+++ b/exa/exa/helpers/company_maker.py
@@ -22,6 +22,8 @@ class CompanyMaker:
            self._make_list_for_aitop()
        elif media == 'cb':
            self._make_list_for_cb()
+        elif media == 'nana':
+            self._make_list_for_nanalyze()

    def get_companies(self):
        return self.companies
@@ -54,3 +56,8 @@ class CompanyMaker:
            name = i.name.replace(' ', '-').replace('.', '-')
            self.companies.append(Company(i.id, 'http://www.crunchbase.com/organization/{}/press?page=1'.format(name),
                                          None, 2, 2, i.name))
+
+    def _make_list_for_nanalyze(self):
+        for i in self.in_site:
+            self.companies.append(Company(i.id, 'http://www.nanalyze.com/tag/{}'.format(i.name.lower()),
+                                          47, 2, 2, i.name))
\ No newline at end of file
--- a/exa/exa/spiders/nana.py
+++ b/exa/exa/spiders/nana.py
@@ -6,6 +6,7 @@ from .base import BaseSpider
 class NanaSpider(BaseSpider):
    name = "nana"
    allowed_domains = ["nanalyze.com"]
+    start_urls = ['http://www.nanalyze.com/tag/google/']

    def start_requests(self):
        for i in self.companies(self.name):
@@ -15,4 +16,8 @@ class NanaSpider(BaseSpider):
                pass

    def parse(self, response):
-        pass
+        for i in response.xpath('.//article[@class="tease tease-post"]/div[@class="tease-content"]'):
+            # url, title, description
+            d = i.xpath('./h2/a/@href | ./h2/a/text() | ./p/text()').extract()
+            date = self.format_date(i.xpath('./text()').extract()[1].strip())
+            print(date)
\ No newline at end of file