diff --git a/README.md b/README.md index 55adcf64..70b5f82b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # Survey +[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://xKDR.github.io/Survey.jl/stable) [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://xKDR.github.io/Survey.jl/dev) ![Build Status](https://github.com/xKDR/Survey.jl/actions/workflows/ci.yml/badge.svg) ![Build Status](https://github.com/xKDR/Survey.jl/actions/workflows/documentation.yml/badge.svg) @@ -171,4 +172,4 @@ We gratefully acknowledge the JuliaLab at MIT for financial support for this pro ## References -[^1]: [Lumley, Thomas. Complex surveys: a guide to analysis using R. John Wiley & Sons, 2011.](https://books.google.co.in/books?hl=en&lr=&id=L96ludyhFBsC&oi=fnd&pg=PP12&dq=complex+surveys+lumley&ots=ie0y1lnzv1&sig=c4UHI3arjspMJ6OYzlX32E9rNRI#v=onepage&q=complex%20surveys%20lumley&f=false) Page 44 \ No newline at end of file +[^1]: [Lumley, Thomas. Complex surveys: a guide to analysis using R. John Wiley & Sons, 2011.](https://books.google.co.in/books?hl=en&lr=&id=L96ludyhFBsC&oi=fnd&pg=PP12&dq=complex+surveys+lumley&ots=ie0y1lnzv1&sig=c4UHI3arjspMJ6OYzlX32E9rNRI#v=onepage&q=complex%20surveys%20lumley&f=false) Page 44 diff --git a/src/Survey.jl b/src/Survey.jl index 62f263a2..de467a8a 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -25,6 +25,7 @@ include("boxplot.jl") include("show.jl") include("ratio.jl") include("by.jl") +include("raking.jl") export load_data export AbstractSurveyDesign, SurveyDesign, ReplicateDesign @@ -35,5 +36,5 @@ export hist, sturges, freedman_diaconis export boxplot export bootweights export ratio - +export raking end diff --git a/src/raking.jl b/src/raking.jl new file mode 100644 index 00000000..99ca7901 --- /dev/null +++ b/src/raking.jl @@ -0,0 +1,29 @@ +function raking(design::AbstractSurveyDesign, sample_margins_row::Symbol, + sample_margins_col::Symbol, population_margins_row::Vector, population_margins_col::Vector, control=Dict("maxit" => 10, "epsilon" => 1)) + count = 0 + epsilon = 10000 + popsize = sum(population_margins_row) + sampsize = length(design.data[!, sample_margins_row]) * 0.5 + gdf1 = combine(groupby(design.data, sample_margins_row),nrow) + @show gdf1 + gdf2 = combine(groupby(design.data, sample_margins_col),nrow) + @show gdf2 + b = gdf1[!,:nrow] .* (popsize ./ sampsize) + a = gdf2.nrow .* (popsize / sampsize) + coln = nrow(gdf1) + rown = nrow(gdf2) + colsum = zeros(rown) + df = DataFrame() + while count <= control["maxit"] || epsilon >= control["epsilon"] + for i in 1:coln-1 + for j in 1:rown + colsum[j] = 0 + df.i= design.data[!, sample_margins_col][i] / rown + colsum[j] = colsum + design.data.weights[j, i] + end + end + design.weights[j, ncol] = design.data[!, sample_margins_row] - colsum[j] + count = count + 1 + end + return DataFrame(design.data.sample_weights) +end diff --git a/test/raking.jl b/test/raking.jl new file mode 100644 index 00000000..5c3d11a3 --- /dev/null +++ b/test/raking.jl @@ -0,0 +1,11 @@ +@testset "ratio.jl" begin + apiclus1 = load_data("apiclus1") # Load API dataset + apiclus1[!, :pw] = fill(757/15,(size(apiclus1,1),)) # Correct api mistake for pw column + apiclus1.schwide = apiclus1[!,"sch.wide"] + dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) + poptypes = [4421,755,1018] + popschwide = [1072,5122] + + raking(dclus1,:stype, :schwide,poptypes,popschwide, [100, 2] ) + #@show dclus1.data[!,dclus1.weights] +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index f2fc89d4..10be1621 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -35,3 +35,4 @@ include("hist.jl") include("boxplot.jl") include("ratio.jl") include("show.jl") +include("raking.jl") \ No newline at end of file