Skip to content

Commit

Permalink
resolve lints
Browse files Browse the repository at this point in the history
  • Loading branch information
GregorDeCillia committed Feb 12, 2019
1 parent 6b157e3 commit d5b121d
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 56 deletions.
145 changes: 94 additions & 51 deletions R/ipf.r
Original file line number Diff line number Diff line change
Expand Up @@ -144,41 +144,55 @@ calibP <- function(i, dat, error, valueP, pColNames, bound, verbose, calIter,
if (!is.null(numericalWeightingVar)) {
## numerical variable to be calibrated
## use name of conP list element to define numerical variable
set(dat, j = "fVariableForCalibrationIPF", value = ipf_step_f(dat[[variableKeepingTheCalibWeight]] *
dat[[numericalWeightingVar]], combined_factors, con_current))
set(dat, j = "wValue", value = dat[["value"]]/dat[["fVariableForCalibrationIPF"]])
set(dat, j = "fVariableForCalibrationIPF",
value = ipf_step_f(dat[[variableKeepingTheCalibWeight]] *
dat[[numericalWeightingVar]],
combined_factors, con_current))
set(dat, j = "wValue", value = dat[["value"]] /
dat[["fVariableForCalibrationIPF"]])

# try to divide the weight between units with larger/smaller value in the
# numerical variable linear
dat[, fVariableForCalibrationIPF := numericalWeighting(head(wValue, 1), head(value, 1),
get(numericalWeightingVar), get(variableKeepingTheCalibWeight)),
by = eval(paste0("combined_factors_", i))]

dat[, fVariableForCalibrationIPF := numericalWeighting(
head(wValue, 1), head(value, 1), get(numericalWeightingVar),
get(variableKeepingTheCalibWeight)),
by = eval(paste0("combined_factors_", i))]

} else {
# categorical variable to be calibrated
set(dat, j = "fVariableForCalibrationIPF", value = ipf_step_f(dat[[variableKeepingTheCalibWeight]], combined_factors, con_current))
set(dat, j = "fVariableForCalibrationIPF", value = ipf_step_f(
dat[[variableKeepingTheCalibWeight]], combined_factors, con_current))
}
if (dat[!is.na(fVariableForCalibrationIPF), any(abs(1 / fVariableForCalibrationIPF - 1) > epsPcur)]) {
if (dat[!is.na(fVariableForCalibrationIPF),
any(abs(1 / fVariableForCalibrationIPF - 1) > epsPcur)]) {
## sicherheitshalber abs(epsPcur)? Aber es wird schon niemand negative eps
## Werte uebergeben??
if (verbose && calIter %% 10 == 0) {
message(calIter, ":Not yet converged for P-Constraint", i, "\n")
if (calIter %% 100 == 0) {
tmp <- dat[!is.na(fVariableForCalibrationIPF) & (abs(1 / fVariableForCalibrationIPF - 1) > epsPcur),
list(maxFac = max(abs(1 / fVariableForCalibrationIPF - 1)), .N, head(epsPcur, 1),
sumCalib = sum(get(variableKeepingTheCalibWeight)), head(value, 1)),
by = eval(pColNames[[i]])]
tmp <- dat[
!is.na(fVariableForCalibrationIPF) &
(abs(1 / fVariableForCalibrationIPF - 1) > epsPcur),
list(
maxFac = max(abs(1 / fVariableForCalibrationIPF - 1)), .N,
head(epsPcur, 1),
sumCalib = sum(get(variableKeepingTheCalibWeight)), head(value, 1)),
by = eval(pColNames[[i]])]
print(tmp[order(maxFac, decreasing = TRUE), ])
message("-----------------------------------------\n")
}
}
if (!is.null(bound)) {
dat[!is.na(fVariableForCalibrationIPF), c(variableKeepingTheCalibWeight) := boundsFak(get(variableKeepingTheCalibWeight),
get(variableKeepingTheBaseWeight), fVariableForCalibrationIPF, bound = bound)]
dat[!is.na(fVariableForCalibrationIPF),
c(variableKeepingTheCalibWeight) :=
boundsFak(
get(variableKeepingTheCalibWeight),
get(variableKeepingTheBaseWeight), fVariableForCalibrationIPF,
bound = bound)]
#,by=eval(pColNames[[i]])]
} else {
dat[!is.na(fVariableForCalibrationIPF), c(variableKeepingTheCalibWeight) := fVariableForCalibrationIPF *
dat[!is.na(fVariableForCalibrationIPF),
c(variableKeepingTheCalibWeight) := fVariableForCalibrationIPF *
get(variableKeepingTheCalibWeight),
by = eval(paste0("combined_factors_", i))]
}
Expand Down Expand Up @@ -210,52 +224,68 @@ calibH <- function(i, dat, error, valueH, hColNames, bound, verbose, calIter,
if (!is.null(numericalWeightingVar)) {
## numerical variable to be calibrated
## use name of conH list element to define numerical variable
set(dat, j = "fVariableForCalibrationIPF", value = ipf_step_f(dat[[variableKeepingTheCalibWeight]] * dat[["representativeHouseholdForCalibration"]] *
set(dat, j = "fVariableForCalibrationIPF", value = ipf_step_f(
dat[[variableKeepingTheCalibWeight]] *
dat[["representativeHouseholdForCalibration"]] *
dat[[numericalWeightingVar]], combined_factors, con_current))
set(dat, j = "wValue", value = dat[["value"]] / dat[["fVariableForCalibrationIPF"]])
set(dat, j = "wValue", value = dat[["value"]] /
dat[["fVariableForCalibrationIPF"]])

# try to divide the weight between units with larger/smaller value in the
# numerical variable linear
dat[, fVariableForCalibrationIPF := numericalWeighting(head(wValue, 1), head(value, 1),
get(numericalWeightingVar), get(variableKeepingTheCalibWeight)),
by = eval(paste0("combined_factors_h_", i))]
dat[, fVariableForCalibrationIPF := numericalWeighting(
head(wValue, 1), head(value, 1), get(numericalWeightingVar),
get(variableKeepingTheCalibWeight)),
by = eval(paste0("combined_factors_h_", i))]

} else {
# categorical variable to be calibrated
set(dat, j = "fVariableForCalibrationIPF", value = ipf_step_f(dat[[variableKeepingTheCalibWeight]] * dat[["representativeHouseholdForCalibration"]],
combined_factors, con_current))
set(dat, j = "fVariableForCalibrationIPF", value = ipf_step_f(
dat[[variableKeepingTheCalibWeight]] *
dat[["representativeHouseholdForCalibration"]],
combined_factors, con_current))
}

set(dat, j = "wValue", value = dat[["value"]]/dat[["fVariableForCalibrationIPF"]])
set(dat, j = "wValue", value = dat[["value"]] /
dat[["fVariableForCalibrationIPF"]])

if (dat[!is.na(fVariableForCalibrationIPF), any(abs(1 / fVariableForCalibrationIPF - 1) > epsHcur)]) {
if (dat[!is.na(fVariableForCalibrationIPF),
any(abs(1 / fVariableForCalibrationIPF - 1) > epsHcur)]) {
if (verbose && calIter %% 10 == 0) {
message(calIter, ":Not yet converged for H-Constraint", i, "\n")
if (calIter %% 100 == 0) {
tmp <- dat[!is.na(fVariableForCalibrationIPF) & (abs(1 / fVariableForCalibrationIPF - 1) > epsHcur),
list(maxFac = max(abs(1 / fVariableForCalibrationIPF - 1)), .N, head(epsHcur, 1),
sumCalibWeight = sum(get(variableKeepingTheCalibWeight) * representativeHouseholdForCalibration),
head(value, 1)), by = eval(hColNames[[i]])]
tmp <- dat[
!is.na(fVariableForCalibrationIPF) &
(abs(1 / fVariableForCalibrationIPF - 1) > epsHcur),
list(maxFac = max(abs(1 / fVariableForCalibrationIPF - 1)), .N,
head(epsHcur, 1),
sumCalibWeight = sum(get(variableKeepingTheCalibWeight) *
representativeHouseholdForCalibration),
head(value, 1)),
by = eval(hColNames[[i]])]
print(tmp[order(maxFac, decreasing = TRUE), ])

message("-----------------------------------------\n")
}
}
if (!is.null(bound)) {
if (!looseH) {
set(dat, j = variableKeepingTheCalibWeight, value =
boundsFak(g1 = dat[[variableKeepingTheCalibWeight]], g0 = dat[[variableKeepingTheBaseWeight]],
f = dat[["fVariableForCalibrationIPF"]],
bound = bound))
set(dat, j = variableKeepingTheCalibWeight, value = boundsFak(
g1 = dat[[variableKeepingTheCalibWeight]],
g0 = dat[[variableKeepingTheBaseWeight]],
f = dat[["fVariableForCalibrationIPF"]],
bound = bound))
}else{
set(dat, j = variableKeepingTheCalibWeight, value =
boundsFakHH(g1 = get(variableKeepingTheCalibWeight), g0 = dat[[variableKeepingTheBaseWeight]],
eps = dat[["epsHcur"]], orig = dat[["value"]],
p = dat[["wValue"]], bound = bound)
set(dat, j = variableKeepingTheCalibWeight, value = boundsFakHH(
g1 = get(variableKeepingTheCalibWeight),
g0 = dat[[variableKeepingTheBaseWeight]],
eps = dat[["epsHcur"]], orig = dat[["value"]],
p = dat[["wValue"]], bound = bound)
)
}
} else {
dat[, c(variableKeepingTheCalibWeight) := fVariableForCalibrationIPF * get(variableKeepingTheCalibWeight),
dat[, c(variableKeepingTheCalibWeight) := fVariableForCalibrationIPF *
get(variableKeepingTheCalibWeight),
by = eval(paste0("combined_factors_h_", i))]
}
error <- TRUE
Expand Down Expand Up @@ -296,7 +326,8 @@ addWeightsAndAttributes <- function(dat, conP, conH, epsP, epsH, dat_original,
if ((maxIter < calIter) & returnNA){
outTable[, c(variableKeepingTheCalibWeight) := NA]
} else {
outTable[, c(variableKeepingTheCalibWeight) := dat[[variableKeepingTheCalibWeight]]]
outTable[, c(variableKeepingTheCalibWeight) :=
dat[[variableKeepingTheCalibWeight]]]
}


Expand All @@ -314,7 +345,8 @@ addWeightsAndAttributes <- function(dat, conP, conH, epsP, epsH, dat_original,

# adjusted constraints (conP, conH according to the calibrated weights)
setattr(outTable, "conP_adj", lapply(formP, xtabs, dat))
setattr(outTable, "conH_adj", lapply(formH, xtabs, dat[representativeHouseholdForCalibration == 1]))
setattr(outTable, "conH_adj", lapply(
formH, xtabs, dat[representativeHouseholdForCalibration == 1]))

# tolerances
setattr(outTable, "epsP", epsP)
Expand Down Expand Up @@ -505,7 +537,8 @@ ipf <- function(
variableKeepingTheBaseWeight <- w
variableKeepingTheCalibWeight <- nameCalibWeight
if ("variableKeepingTheBaseWeight" %in% names(dat))
stop("The provided dataset must not have a column called 'variableKeepingTheBaseWeight'")
stop("The provided dataset must not have a column called",
" 'variableKeepingTheBaseWeight'")

OriginalSortingVariable <- V1 <- epsvalue <-
f <- temporary_hvar <-
Expand All @@ -522,7 +555,8 @@ ipf <- function(
###fixed target value, should not be changed in iterations
valueH <- paste0("valueH", seq_along(conH))
###Housekeeping of the varNames used
usedVarNames <- c(valueP, valueH, "value", "representativeHouseholdForCalibration", "wValue")
usedVarNames <- c(valueP, valueH, "value",
"representativeHouseholdForCalibration", "wValue")

if (any(names(dat) %in% usedVarNames)) {
renameVars <- names(dat)[names(dat) %in% usedVarNames]
Expand All @@ -536,7 +570,8 @@ ipf <- function(
dat[, hid := 1:nrow(dat)]
dat[, representativeHouseholdForCalibration := 1]
} else {
dat[, representativeHouseholdForCalibration := as.numeric(!duplicated(get(hid)))]
dat[, representativeHouseholdForCalibration :=
as.numeric(!duplicated(get(hid)))]
}

## Names of the calibration variables for Person and household dimension
Expand Down Expand Up @@ -568,7 +603,8 @@ ipf <- function(
}
combined_factors <- combine_factors(dat, conP[[i]])
set(dat, j = paste0("combined_factors_", i), value = combined_factors)
set(dat, j = paste0("valueP", i), value = as.vector(conP[[i]][combined_factors]))
set(dat, j = paste0("valueP", i),
value = as.vector(conP[[i]][combined_factors]))
}
for (i in seq_along(conH)) {
colnames <- hColNames[[i]]
Expand Down Expand Up @@ -598,15 +634,17 @@ ipf <- function(
combined_factors <- combine_factors(dat, conH[[i]])

set(dat, j = paste0("combined_factors_h_", i), value = combined_factors)
set(dat, j = paste0("valueH", i), value = as.vector(conH[[i]][combined_factors]))
set(dat, j = paste0("valueH", i),
value = as.vector(conH[[i]][combined_factors]))
}

if (is.null(variableKeepingTheBaseWeight)) {
if (!is.null(bound) && is.null(w))
stop("Bounds are only reasonable if base weights are provided")
set(dat, j = variableKeepingTheCalibWeight, value = 1)
} else {
set(dat, j = variableKeepingTheCalibWeight, value = dat[[variableKeepingTheBaseWeight]])
set(dat, j = variableKeepingTheCalibWeight,
value = dat[[variableKeepingTheBaseWeight]])
}

if (check_hh_vars) {
Expand All @@ -628,7 +666,8 @@ ipf <- function(
for (i in seq_along(epsP)) {
if (is.array(epsP[[i]])) {
combined_factors <- dat[[paste0("combined_factors_", i)]]
set(dat, j = paste0("epsP_", i), value = as.vector(epsP[[i]][combined_factors]))
set(dat, j = paste0("epsP_", i),
value = as.vector(epsP[[i]][combined_factors]))
} else {
set(dat, j = paste0("epsP_", i), value = epsP[[i]])
}
Expand All @@ -642,7 +681,8 @@ ipf <- function(
for (i in seq_along(epsH)) {
if (is.array(epsH[[i]])) {
combined_factors <- dat[[paste0("combined_factors_h_", i)]]
set(dat, j = paste0("epsH_", i), value = as.vector(epsH[[i]][combined_factors]))
set(dat, j = paste0("epsH_", i),
value = as.vector(epsH[[i]][combined_factors]))
} else {
set(dat, j = paste0("epsH_", i), value = epsH[[i]])
}
Expand Down Expand Up @@ -675,7 +715,8 @@ ipf <- function(
}

## replace person weight with household average
set(dat, j = variableKeepingTheCalibWeight, value =meanfun(dat[[variableKeepingTheCalibWeight]], dat[[hid]]))
set(dat, j = variableKeepingTheCalibWeight,
value = meanfun(dat[[variableKeepingTheCalibWeight]], dat[[hid]]))

### Household calib
for (i in seq_along(conH)) {
Expand Down Expand Up @@ -708,7 +749,8 @@ ipf <- function(
cw = variableKeepingTheCalibWeight)

## replace person weight with household average
set(dat, j = variableKeepingTheCalibWeight, value = meanfun(dat[[variableKeepingTheCalibWeight]], dat[[hid]]))
set(dat, j = variableKeepingTheCalibWeight,
value = meanfun(dat[[variableKeepingTheCalibWeight]], dat[[hid]]))

### Household calib
for (i in seq_along(conH)) {
Expand All @@ -735,7 +777,8 @@ ipf <- function(
calIter <- calIter + 1
}
# Remove Help Variables
dat[,fVariableForCalibrationIPF:=NULL]
fVariableForCalibrationIPF <- NULL
dat[, fVariableForCalibrationIPF := NULL]
addWeightsAndAttributes(dat, conP, conH, epsP, epsH, dat_original, maxIter,
calIter, returnNA, variableKeepingTheCalibWeight)
}
14 changes: 9 additions & 5 deletions tests/testthat/test_ipf.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ eusilcS[, agegroup := cut(age, c(-Inf, 10 * 1:9, Inf), right = FALSE)]
# some recoding of netIncome for reasons of simplicity
eusilcS[is.na(netIncome), netIncome := 0]
eusilcS[netIncome < 0, netIncome := 0]
eusilcS[, HHnetIncome := sum(netIncome),by=household]
eusilcS[, HHnetIncome := sum(netIncome), by = household]
# set hsize to 1,...,5+
eusilcS[, hsize := cut(hsize, c(0:4, Inf), labels = c(1:4, "5+"))]
# treat households as a factor variable
Expand Down Expand Up @@ -60,7 +60,8 @@ test_that("ipf with a numerical variable works as expected - computeLinear", {
conH = list(conH1),
epsP = list(1e-06, 1e-06, 1e-03),
epsH = 0.01,
bound = NULL, verbose = FALSE, maxIter = 200,numericalWeighting = computeLinear)
bound = NULL, verbose = FALSE, maxIter = 200,
numericalWeighting = computeLinear)
expect_true(abs(calibweights1[, sum(calibWeight * netIncome)] - sum(conP3)) /
sum(conP3) < .01)
expect_true(all(
Expand All @@ -86,7 +87,8 @@ test_that("ipf with a numerical variable works as expected - computeLinearG1", {
conH = list(conH1),
epsP = list(1e-06, 1e-06, 1e-03),
epsH = 0.01,
bound = NULL, verbose = FALSE, maxIter = 200,numericalWeighting = computeLinearG1)
bound = NULL, verbose = FALSE, maxIter = 200,
numericalWeighting = computeLinearG1)
expect_true(abs(calibweights1[, sum(calibWeight * netIncome)] - sum(conP3)) /
sum(conP3) < .01)
expect_true(all(
Expand All @@ -113,7 +115,8 @@ test_that("ipf with a numerical variable in households as expected", {
conH = list(conH1, HHnetIncome = conH2),
epsP = list(1e-06, 1e-06),
epsH = list(0.01, 0.01),
bound = NULL, verbose = FALSE, maxIter = 50, numericalWeighting = computeFrac)
bound = NULL, verbose = FALSE, maxIter = 50,
numericalWeighting = computeFrac)
expect_true(abs(calibweights1[, sum(calibWeight * netIncome)] - sum(conP3)) /
sum(conP3) < .01)
expect_true(all(
Expand Down Expand Up @@ -156,7 +159,8 @@ test_that("ipf works as expected calibWeight renamed", {
err <- max(c(
max(abs(xtabs(calibWeightNew ~ agegroup, data = calibweights2) - conP1) /
conP1),
max(abs(xtabs(calibWeightNew ~ gender + state, data = calibweights2) - conP2) /
max(abs(
xtabs(calibWeightNew ~ gender + state, data = calibweights2) - conP2) /
conP2),
max(abs(xtabs(calibWeightNew ~ hsize + state, data = calibweights2,
subset = !duplicated(household)) - conH1) / conH1)))
Expand Down

1 comment on commit d5b121d

@lintr-bot

This comment was marked as outdated.

Please sign in to comment.