forked from turicas/covid19-br
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcorona_rr_spider.py
29 lines (24 loc) · 1.05 KB
/
corona_rr_spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import io
import rows
import scrapy
class RoraimaSpider(scrapy.Spider):
name = "rr"
start_urls = ["https://roraimacontraocorona.rr.gov.br/winner/mapa.xhtml"]
def parse(self, response):
date, _ = "".join(item.strip() for item in response.xpath("//div[//h4[contains(text(), 'Última atualização')]]//h6//text()").extract() if item.strip()).split()
day, month, year = date.split("/")
date = f"{year}-{int(month):02d}-{int(day):02d}"
table = rows.import_from_html(io.BytesIO(response.body))
for row in table:
place_type = "city" if row.cidade.lower() != "total:" else "state"
yield {
"city": row.cidade if place_type == "city" else None,
"confirmed": row.confirmados,
"date": date,
"deaths": row.obitos,
"discarded": row.descartados,
"place_type": place_type,
"recovered": row.curados,
"state": self.name.upper(),
"suspect": row.suspeitos,
}