Skip to content

Commit

Permalink
v0.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed Mar 23, 2024
1 parent 05af38f commit f192664
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 19 deletions.
6 changes: 3 additions & 3 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Version: 0.3.1
Date: 2024-02-08 15:14:44 UTC
SHA: 028ef793b00ac8f8551a2c051e2dc46c574fa73c
Version: 0.3.2
Date: 2024-03-23 15:56:12 UTC
SHA: 05af38fa8f5eb7aed083b789d0ef9aa35749a361
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: censobr
Title: Download Data from Brazil's Population Census
Version: 0.3.1
Version: 0.3.2
Authors@R:
c(person(given="Rafael H. M.", family="Pereira",
email="[email protected]",
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# censobr v0.3.2

* Minor changes
* Moved {arrow} package back to `Imports`

* New data set and files included in this version:
* 2022 census
* Preliminary aggregate results of census tracts


# censobr v0.3.1

* Minor changes
Expand Down
17 changes: 12 additions & 5 deletions R/read_tracts.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#' @template year
#' @param dataset Character. The dataset to be opened. Options currently include
#' `c("Basico", "Domicilio", "DomicilioRenda", "Responsavel", "ResponsavelRenda", "Pessoa", "PessoaRenda", "Entorno")`.
#' Preliminary results of the 2022 census are available with `"Preliminares"`.
#' @template as_data_frame
#' @template showProgress
#' @template cache
Expand Down Expand Up @@ -39,15 +40,21 @@ read_tracts <- function(year = 2010,


# data available for the years:
years <- c(2010)
years <- c(2010, 2022)
if (isFALSE(year %in% years)) { stop(paste0("Error: Data currently only available for the years ",
paste(years, collapse = " ")))}

# data available for data sets:
data_sets <- c("Basico", "Domicilio", "DomicilioRenda", "Entorno",
# data sets available for 2010:
data_sets_2010 <- c("Basico", "Domicilio", "DomicilioRenda", "Entorno",
"ResponsavelRenda", "Responsavel", "PessoaRenda", "Pessoa")
if (isFALSE(dataset %in% data_sets)) { stop( paste0("Error: Data currently only available for the datasets: ",
paste(data_sets, collapse = ", "))
if (year==2010 & isFALSE(dataset %in% data_sets_2010)) { stop( paste0("Error: Data currently only available for the datasets: ",
paste(data_sets_2010, collapse = ", "))
)}

# data sets available for 2022:
data_sets_2022 <- c("Preliminares")
if (year==2022 & isFALSE(dataset %in% data_sets_2022)) { stop( paste0("Error: Data currently only available for the datasets: ",
paste(data_sets_2022, collapse = ", "))
)}

### Get url
Expand Down
13 changes: 7 additions & 6 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
## R CMD check results

── R CMD check results ──────────────────────────────────────────────────── censobr 0.3.1 ────
Duration: 1h 38m 26.4s
── R CMD check results ────────────────────────────── censobr 0.3.9999999 ────
Duration: 5m 52.9s

0 errors ✔ | 0 warnings ✔ | 0 notes ✔


* Minor changes
* Moved {arrow} package from `Imports` to `Suggests` while the {arrow} team fixes their conflict with CRAN policies related to downloading binary software. [See here](https://github.com/apache/arrow/issues/39806).
* New package contributors:
* Diego Rabatone Oliveira
* Neal Richardson
* Moved {arrow} package back to `Imports`

* New data set and files included in this version:
* 2022 census
* Preliminary aggregate results of census tracts
3 changes: 2 additions & 1 deletion data_prep/R/add_geography_cols.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
add_geography_cols <- function(arrw, year){


# get code_muni from weighting area
# get code_muni
col <- case_when(year == 1970 ~ 'code_muni',
year == 1980 ~ 'code_muni',
year == 1991 ~ 'code_muni',
year == 2010 ~ 'V0011',
year == 2022 ~ 'CD_MUN'
year == 2000 ~ 'AREAP')

if(year %in% c(2000, 2010)){
Expand Down
54 changes: 54 additions & 0 deletions data_prep/R/census_tracts_aggreg_2022_prelim.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
library(dplyr)
library(data.table)
library(arrow)

df <- data.table::fread('C:/Users/user/Downloads/Agregados_preliminares_por_setores_censitarios_BR/Agregados_preliminares_por_setores_censitarios_BR.csv')
head(df)


# remove P from code tract
df[, CD_SETOR := gsub("P","", CD_SETOR)]
head(df)


# make all columns as character
df <- mutate(df, across(everything(), as.character))

names(df) <- toupper(names(df))

df2 <- dplyr::rename(df,
code_tract = CD_SETOR,
code_muni = CD_MUN,
name_muni = NM_MUN,
code_subdistrict = CD_SUBDIST,
name_subdistrict = NM_SUBDIST,
code_district = CD_DIST,
name_district = NM_DIST,
code_urban_concentration = CD_CONCURB,
name_urban_concentration = NM_CONCURB,
code_state = CD_UF,
name_state = NM_UF,
code_micro = CD_MICRO,
name_micro = NM_MICRO,
code_meso = CD_MESO,
name_meso = NM_MESO,
code_immediate = CD_RGI,
name_immediate = NM_RGI,
code_intermediate = CD_RGINT,
name_intermediate = NM_RGINT,
code_region = CD_REGIAO,
name_region = NM_REGIAO,
area_km2 = AREA_KM2
)
head(df2)


# save
dir.create('./data/tracts/2022/', recursive = T)

dest_file <- paste0('2022_tracts_Preliminares.parquet')
arrow::write_parquet(df2, paste0('./data/tracts/2022/', dest_file))

arrow::write_parquet(df2, 'd2.parquet')


3 changes: 2 additions & 1 deletion man/read_tracts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions tests/testthat/test_read_tracts.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ test_that("read_tracts", {
test2 <- read_tracts(year = 2010, dataset = 'Basico', as_data_frame = TRUE)
testthat::expect_true(is(test2, "data.frame"))

# different data sets
# 2010 different data sets
## check if file have been downloaded
tbls <- c('Basico', 'Domicilio', 'DomicilioRenda', 'Entorno',
'ResponsavelRenda', 'Responsavel', 'PessoaRenda', 'Pessoa')
Expand All @@ -30,6 +30,14 @@ test_that("read_tracts", {
testthat::expect_true( nrow(tmp_d) >= 303000)
} )

# 2022 different data sets
## check if file have been downloaded
tbls <- c('Preliminares')
lapply(X=tbls, FUN = function(y){ # y = 'Preliminares'
tmp_d <- read_tracts(year = 2022, dataset = y)
testthat::expect_true( nrow(tmp_d) == 452340)
} )


# check whether cache argument is working
time_first <- system.time(
Expand All @@ -54,7 +62,7 @@ test_that("read_tracts", {
testthat::expect_error(read_tracts(year=999, dataset='Basico'))
testthat::expect_error(read_tracts(year=999, dataset='Basico'))
testthat::expect_error(read_tracts(year=2010, dataset='banana'))
testthat::expect_error(read_tracts(year=2010, dataset='banana'))
testthat::expect_error(read_tracts(year=2022, dataset='banana'))

testthat::expect_error(read_tracts(year=2010, dataset='Basico', showProgress = 'banana' ))
testthat::expect_error(read_tracts(year=2010, dataset='Basico', cache = 'banana' ))
Expand Down

0 comments on commit f192664

Please sign in to comment.