-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapy-practice.py
30 lines (25 loc) · 1010 Bytes
/
scrapy-practice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from scrapy.item import Field, Item
from scrapy.spiders import Spider
from scrapy.selector import Selector
from scrapy.loader import ItemLoader
class Pregunta(Item):
id = Field()
pregunta = Field()
#descripcion = Field()
class StackOverflowSpider(Spider):
name = "MiPrimerSpider"
custom_settings = {
'USER_AGENT': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'
}
start_urls = ['https://stackoverflow.com/questions']
def parse(self, response):
sel = Selector(response)
preguntas = sel.xpath('//div[@id="questions"]//div[@class="question-summary"]')
i = 0
for pregunta in preguntas:
item = ItemLoader(Pregunta(), pregunta)
item.add_xpath('pregunta', './/h3/a/text()')
#item.add_xpath('descripcion', './/div[@class="excerpt"]/text()')
item.add_value('id', i)
i += 1
yield item.load_item()