-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheluniverso.py
28 lines (23 loc) · 930 Bytes
/
eluniverso.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from scrapy.item import Field
from scrapy.item import Item
from scrapy.spiders import Spider
from scrapy.selector import Selector
from scrapy.loader import ItemLoader
from bs4 import BeautifulSoup
class Noticia(Item):
titular = Field()
descripcion = Field()
class ElUniversoSpider(Spider):
name = "MiSegundoSpider"
custom_settings = {
'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
}
start_urls = ['https://www.eluniverso.com/deportes']
def parse(self, response):
sel = Selector(response)
noticias = sel.xpath('//h1/parent::div//li[@class="relative"]')
for noticia in noticias:
item = ItemLoader(Noticia(), noticia)
item.add_xpath('titular', './/h2/a/text()')
item.add_xpath('descripcion', './/p/text()')
yield item.load_item()