Commit 60128423 authored by Vasyl Bodnaruk's avatar Vasyl Bodnaruk

Add functional for take data from page

parent 3b2b44ce
# -*- coding: utf-8 -*-
import scrapy
import dateparser
from ..items import ExaItem
class AitopSpider(scrapy.Spider):
name = "aitop"
allowed_domains = ["aitopics.org"]
start_urls = ['http://aitopics.org/']
start_urls = ['https://aitopics.org/search?filters=concept-tagsRaw%3AUber']
def parse(self, response):
pass
try:
rows = response.xpath(".//div[contains(@class, 'summaries')]//div[@class='row']")
for i in rows:
item = dict()
item['date'] = dateparser.parse(i.xpath(".//time/@datetime").extract_first()).replace(tzinfo=None)
item['title'] = i.xpath(".//div[contains(@class, 'col-xs-12')]/h3/a/text()").extract_first()
item['description'] = i.xpath(".//div[@class='summary-content']/p/text()").extract_first()
item['url'] = i.xpath(".//div[contains(@class, 'col-xs-12')]/h3/a/@href").extract_first()
# print(item)
except:
pass
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment