Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
E
esi-table-data
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
esi-data-scrapping
esi-table-data
Commits
9655cc2e
Commit
9655cc2e
authored
Jun 09, 2017
by
Vasyl Bodnaruk
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add pagination handler
parent
293b7a26
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
29 additions
and
25 deletions
+29
-25
__init__.py
exa/exa/helpers/__init__.py
+1
-1
decorators.py
exa/exa/helpers/decorators.py
+0
-0
techcrunch.py
exa/exa/spiders/techcrunch.py
+28
-24
No files found.
exa/exa/helpers/__init__.py
View file @
9655cc2e
exa/exa/helpers/decorators.py
deleted
100644 → 0
View file @
293b7a26
exa/exa/spiders/techcrunch.py
View file @
9655cc2e
# -*- coding: utf-8 -*-
import
scrapy
import
traceback
from
scrapy.utils.project
import
get_project_settings
from
..helpers
import
CompanyMaker
,
Database
from
..items
import
ExaItem
...
...
@@ -27,10 +28,10 @@ class TechcrunchSpider(scrapy.Spider):
companies
=
CompanyMaker
(
self
.
comp
)
companies
.
make_companies
(
self
.
name
)
for
i
in
companies
.
get_companies
():
yield
scrapy
.
Request
(
i
.
url
,
callback
=
self
.
parse
,
meta
=
{
'company'
:
i
,
'post_id'
:
0
})
yield
scrapy
.
Request
(
i
.
url
,
callback
=
self
.
parse
,
meta
=
{
'company'
:
i
,
'post_id'
:
0
})
def
parse
(
self
,
response
):
try
:
news_list
=
response
.
xpath
(
"..//div[contains(@class, 'block block-thumb ')]"
)
company
=
response
.
meta
[
'company'
]
for
i
in
news_list
:
...
...
@@ -53,3 +54,6 @@ class TechcrunchSpider(scrapy.Spider):
next_url
=
'https://techcrunch.com'
+
has_next
if
has_next
:
yield
scrapy
.
Request
(
next_url
,
callback
=
self
.
parse
,
meta
=
{
'company'
:
response
.
meta
[
'company'
],
'post_id'
:
0
})
except
BaseException
as
e
:
print
(
'We had error'
)
traceback
.
print_exc
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment