Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat] RJ spider #224

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion covid19br/run_spider.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse
from datetime import datetime
import sys
from datetime import datetime
from pathlib import Path

import rows
Expand All @@ -12,6 +12,7 @@
from covid19br.spiders.spider_ba import SpiderBA
from covid19br.spiders.spider_ce import SpiderCE
from covid19br.spiders.spider_pr import SpiderPR
from covid19br.spiders.spider_rj import SpiderRJ
from covid19br.spiders.spider_ro import SpiderRO
from covid19br.spiders.spider_sp import SpiderSP
from covid19br.spiders.spider_to import SpiderTO
Expand All @@ -24,6 +25,7 @@
SpiderRO,
SpiderSP,
SpiderTO,
SpiderRJ
]


Expand Down
104 changes: 104 additions & 0 deletions covid19br/spiders/spider_rj.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import csv
import io

import scrapy
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

from covid19br.common.base_spider import BaseCovid19Spider
from covid19br.common.constants import ReportQuality, State
from covid19br.common.models.bulletin_models import CountyBulletinModel, StateTotalBulletinModel


class SpiderRJ(BaseCovid19Spider):
name = State.RJ.value
state = State.RJ
information_delay_in_days = 0
report_qualities = [ReportQuality.COUNTY_BULLETINS]
start_urls = ["http://sistemas.saude.rj.gov.br/tabnetbd/dhx.exe?covid19/tf_covid_brasil.def"]

def pre_init(self):
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
self.driver = webdriver.Chrome(options=chrome_options)
self.requested_dates = list(self.requested_dates)

def parse(self, response, **kwargs):
self.driver.get(response.url)
# Select Município
self.driver.find_element_by_xpath('//*[@value="Município|municipio"]').click()

# Select Óbitos and Confirmados
element = self.driver.find_element_by_xpath('//*[@value="Obitos confirmados|obitosnovos"]')
ActionChains(self.driver).key_down(Keys.CONTROL).click(element).key_up(Keys.CONTROL).perform()

# Open states menu
self.driver.find_element_by_xpath('//*[@id="fig4"]').click()

# Select RJ state
element = self.driver.find_element_by_xpath('//*/option[@value="RJ|RJ,|2"]')
ActionChains(self.driver).click(element).perform()

# Click dates menu
self.driver.find_element_by_xpath('//*[@id="fig1"]').click()

for date in self.requested_dates:
# Select start date
element = self.driver.find_element_by_xpath(
f'//*/select[2]/option[@value="2020-01-01|2020-01-01|10"]'
)
ActionChains(self.driver).click(element).perform()

# Select end date
element = self.driver.find_element_by_xpath(
f'//*/select[2]/option[@value="{f"{str(date)}|{str(date)}|10"}"]'
)
ActionChains(self.driver).key_down(Keys.SHIFT).click(element).perform()

# Click button mostrar
self.driver.find_element_by_xpath("//*/form/div[2]/div[2]/div[3]/input[1]").click()
self.driver.switch_to.window(self.driver.window_handles[1])

# Get url from button download csv
url_csv = self.driver.find_element_by_xpath("//*/div[3]/table/tbody/tr/td[2]/a").get_attribute("href")

yield scrapy.Request(
url_csv,
callback=self.parse_csv,
cb_kwargs={"date": date},
)

# Close tab
self.driver.close()

# Go back to main tab
self.driver.switch_to.window(self.driver.window_handles[0])

self.driver.quit()

def parse_csv(self, response, date):
report = csv.DictReader(io.StringIO(response.body.decode("iso-8859-15")), delimiter=";")

for row in report:
if row["Município"].lower() == "total":
bulletin = StateTotalBulletinModel(
date=date,
state=self.state,
confirmed_cases=row["Casos confirmados"],
deaths=row["Obitos confirmados"],
source=response.request.url,
)
else:
bulletin = CountyBulletinModel(
date=date,
state=self.state,
city=row["Município"],
confirmed_cases=row["Casos confirmados"],
deaths=row["Obitos confirmados"],
source=response.request.url,
)
self.add_new_bulletin_to_report(bulletin, date)