generated from hubverse-org/hubTemplate
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #22 from reichlab/Splines_code
Adding code for splines models.
- Loading branch information
Showing
10 changed files
with
728 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
team_name: "UMass Amherst" | ||
team_abbr: "UMass" | ||
model_name: "Hierarchical Multinomial Logistic Regression FDSDN" | ||
model_abbr: "HMLR_FDSDN" | ||
model_version: "1.0" | ||
model_contributors: [ | ||
{ | ||
"name": "Isaac MacArthur", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Maryclare Griffin", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Evan L. Ray", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Nick G. Reich", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Thomas Robacker", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Benjamin W. Rogers", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
] | ||
license: "CC-BY-4.0" | ||
team_funding: "This work has been supported by the National Institutes of General Medical Sciences (R35GM119582) and the US Centers for Disease Control and Prevention’s Center for Forecasting and Outbreak Analytics (NU38FT000008). The content is solely the responsibility of the authors and does not necessarily represent the official views of NIGMS, the National Institutes of Health, or CDC." | ||
methods: "A version of our model that has a shared sd between varaiants (sd_structure = F), uses time from start of dataset (time_treatment = D), uses a splines model for time, technically cubic-polynomials at the second (time_model = S), uses a Dirichlet-multinomial model (distribution = D), and has no spacial elements (spacial_correlation = N), it also assumes that the time coeffients for each variant are drawn from a N(bloc_v, bsd_v^2) distrubution, and bloc_v are idd N(0, 400) and bsd_v are idd draws from a trucated normal(1, 400) distrubution. The intercepts are drawn from a N(aloc_v, asd^2) distrubution, and aloc_v are idd N(0, 400) and asd is a draw from from a trucated normal(1, 400) distrubution. Here the prior on kappa is trucated normal(1,5)." | ||
methods_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/blob/main/Model%20Details.pdf" | ||
data_sources: "NextStrain" | ||
ensemble_of_models: false | ||
ensemble_of_hub_models: false | ||
repo_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/tree/main" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
team_name: "UMass Amherst" | ||
team_abbr: "UMass" | ||
model_name: "Hierarchical Multinomial Logistic Regression FDSMN" | ||
model_abbr: "HMLR_FDSMN" | ||
model_version: "1.0" | ||
model_contributors: [ | ||
{ | ||
"name": "Isaac MacArthur", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Maryclare Griffin", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Evan L. Ray", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Nick G. Reich", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Thomas Robacker", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Benjamin W. Rogers", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
] | ||
license: "CC-BY-4.0" | ||
team_funding: "This work has been supported by the National Institutes of General Medical Sciences (R35GM119582) and the US Centers for Disease Control and Prevention’s Center for Forecasting and Outbreak Analytics (NU38FT000008). The content is solely the responsibility of the authors and does not necessarily represent the official views of NIGMS, the National Institutes of Health, or CDC." | ||
methods: "A version of our model that has a shared sd between varaiants (sd_structure = F), uses time from start of dataset (time_treatment = D), uses a spline model for time, technically cubic-polynomials at the second (time_model = S), uses a multinomial model (distribution = M), and has no spacial elements (spacial_correlation = N), it also assumes that the time coeffients for each variant are drawn from a N(bloc_v, bsd_v^2) distrubution, and bloc_v are idd N(0, 400) and bsd_v are idd draws from a trucated normal(1, 400) distrubution. The intercepts are drawn from a N(aloc_v, asd^2) distrubution, and aloc_v are idd N(0, 400) and asd is a draw from from a trucated normal(1, 400) distrubution." | ||
methods_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/blob/main/Model%20Details.pdf" | ||
data_sources: "NextStrain" | ||
ensemble_of_models: false | ||
ensemble_of_hub_models: false | ||
repo_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/tree/main" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
team_name: "UMass Amherst" | ||
team_abbr: "UMass" | ||
model_name: "Hierarchical Multinomial Logistic Regression TDSDN" | ||
model_abbr: "HMLR_TDSDN" | ||
model_version: "1.0" | ||
model_contributors: [ | ||
{ | ||
"name": "Isaac MacArthur", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Maryclare Griffin", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Evan L. Ray", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Nick G. Reich", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Thomas Robacker", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Benjamin W. Rogers", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachusetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
] | ||
license: "CC-BY-4.0" | ||
team_funding: "This work has been supported by the National Institutes of General Medical Sciences (R35GM119582) and the US Centers for Disease Control and Prevention’s Center for Forecasting and Outbreak Analytics (NU38FT000008). The content is solely the responsibility of the authors and does not necessarily represent the official views of NIGMS, the National Institutes of Health, or CDC." | ||
methods: "A version of our model that has different sd for each varaiant (sd_structure = T), uses time from start of dataset (time_treatment = D), uses a spline model for time, technically cubic-polynomials at the second (time_model = S), uses a Dirichlet-multinomial model (distribution = D), and has no spacial elements (spacial_correlation = N), it also assumes that the time coeffients for each variant are drawn from a N(bloc_v, bsd_v^2) distrubution, and bloc_v are idd N(0, 400) and bsd_v are idd draws from a trucated normal(1, 400) distrubution. The intercepts are drawn from a N(aloc_v, asd_v^2) distrubution, and aloc_v are idd N(0, 400) and asd_v are idd draws from a trucated normal(1, 400) distrubution. Here the prior on kappa is trucated normal(1,5)" | ||
methods_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/blob/main/Model%20Details.pdf" | ||
data_sources: "NextStrain" | ||
ensemble_of_models: false | ||
ensemble_of_hub_models: false | ||
repo_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/tree/main" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
team_name: "UMass Amherst" | ||
team_abbr: "UMass" | ||
model_name: "Hierarchical Multinomial Logistic Regression TDSMN" | ||
model_abbr: "HMLR_TDSMN" | ||
model_version: "1.0" | ||
model_contributors: [ | ||
{ | ||
"name": "Isaac MacArthur", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Maryclare Griffin", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Evan L. Ray", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Nick G. Reich", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Thomas Robacker", | ||
"affiliation": "Department of Mathematics and Statistics, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
{ | ||
"name": "Benjamin W. Rogers", | ||
"affiliation": "Department of Biostatistics and Epidemiology, University of Massachussetts Amherst", | ||
"email": "[email protected]" | ||
}, | ||
] | ||
license: "CC-BY-4.0" | ||
team_funding: "This work has been supported by the National Institutes of General Medical Sciences (R35GM119582) and the US Centers for Disease Control and Prevention’s Center for Forecasting and Outbreak Analytics (NU38FT000008). The content is solely the responsibility of the authors and does not necessarily represent the official views of NIGMS, the National Institutes of Health, or CDC." | ||
methods: "A version of our model that has different sd for each varaiant (sd_structure = T), uses time from start of dataset (time_treatment = D), uses a spline model for time, technically cubic-polynomials at the second (time_model = S), uses a multinomial model (distribution = M), and has no spacial elements (spacial_correlation = N), it also assumes that the time coeffients for each variant are drawn from a N(bloc_v, bsd_v^2) distrubution, and bloc_v are idd N(0, 400) and bsd_v are idd draws from a trucated normal(1, 400) distrubution. The intercepts are drawn from a N(aloc_v, asd_v^2) distrubution, and aloc_v are idd N(0, 400) and asd_v are idd draws from a trucated normal(1, 400) distrubution." | ||
methods_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/blob/main/Model%20Details.pdf" | ||
data_sources: "NextStrain" | ||
ensemble_of_models: false | ||
ensemble_of_hub_models: false | ||
repo_url: "https://github.com/IsaacMacarthur/Umass-Virus_modeling-code/tree/main" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
functions { | ||
// function for dirichlet_multinomial_lpmf comes from https://discourse.mc-stan.org/t/transforming-a-multinomial-model-into-a-dirichlet-multinomial/26399/2 | ||
real dirichlet_multinomial_lpmf(int[] y, vector alpha) { | ||
real sum_alpha = sum(alpha); | ||
return lgamma(sum_alpha) - lgamma(sum(y) + sum_alpha) | ||
+ sum(lgamma(to_vector(y) + alpha)) - sum(lgamma(alpha)); | ||
} | ||
|
||
vector map_rect_likelihood(vector phi,vector params, real[] data_r, int[] data_i) { | ||
int K = data_i[1]; // Number of clades | ||
int B = data_i[2]; // Degrees of freedom for spline | ||
int N = data_i[3]; // Number of days | ||
int y[N, K]; | ||
{ | ||
int idx = 1; | ||
for (n in 1:N) { | ||
for (k in 1:K) { | ||
y[n, k] = data_i[3 + idx]; | ||
idx += 1; | ||
} | ||
} | ||
} | ||
|
||
// Extract shared data | ||
matrix[B, N] x = to_matrix(data_r[1:(B * N)], B, N); | ||
vector[K-1] aloc = phi[1:K-1]; | ||
vector[K-1] bloc = phi[K:(2*K-2)]; | ||
real asd = phi[2*K -1]; | ||
real bsd = phi[2*K]; | ||
real kappa = phi[2*K+1]; | ||
|
||
// Extract location-specific parameters | ||
vector[K-1] alpha_nc = segment(params, 1, K-1); | ||
matrix[K-1, B] beta_nc = to_matrix(segment(params, K, (K-1)*B), K-1, B); | ||
|
||
// Compute raw alpha and beta | ||
vector[K] alpha = append_row(0, aloc + asd * alpha_nc); | ||
matrix[K, B] beta; | ||
for (b in 1:B) { | ||
beta[:, b] = append_row(0, bloc + bsd * beta_nc[:, b]); | ||
} | ||
|
||
// Calculate log-likelihood | ||
real acc = 0; | ||
for (n in 1:N) { | ||
if (sum(y[n, :]) > 0) { | ||
vector[K] lambda = kappa * exp(alpha + beta * x[:, n]) / sum(exp(alpha + beta * x[:, n])); | ||
acc += dirichlet_multinomial_lpmf(y[n, :] | lambda); | ||
} | ||
} | ||
|
||
return [acc]'; // Return log-likelihood as a vector | ||
} | ||
} | ||
|
||
data { | ||
int<lower=0> N; // Number of days of samples | ||
int<lower=1> L; // Number of locations | ||
int<lower=1> K; // Number of clades | ||
int<lower=3> B; // Degrees of freedom for the spline | ||
int<lower=0> y[N, L, K]; // Count data | ||
matrix[B, N] x; // Spline matrix | ||
} | ||
|
||
transformed data{ | ||
// Prepare shared data indices | ||
int shared_data_i[L, 4 + N * K -1]; | ||
real shared_data_r[L,(B * N)]; | ||
for (l in 1:L) { | ||
shared_data_r[l] = to_array_1d(x); | ||
shared_data_i[l,1] = K; | ||
shared_data_i[l,2] = B; | ||
shared_data_i[l, 3] = N; | ||
shared_data_i[l,(4):(4 + (N * K)-1)] = to_array_1d(y[:, l, :]); | ||
} | ||
} | ||
parameters { | ||
real<lower=0> bsd; // Prior sd for betas | ||
real<lower=0> asd; // Prior sd for the alphas | ||
real<lower=0> kappa; // Scale for the Dirichlet | ||
real bloc[K-1]; // Prior means for betas | ||
real aloc[K-1]; // Prior means for the alphas | ||
vector[K-1] alpha_noncentered[L]; // Non-centered alpha parameters | ||
matrix[K-1, B] beta_noncentered[L]; // Non-centered beta parameters | ||
} | ||
|
||
model { | ||
// Priors | ||
bsd ~ normal(1, 400); | ||
asd ~ normal(1, 400); | ||
bloc ~ normal(0, 400); | ||
aloc ~ normal(0, 400); | ||
kappa ~ normal(1, 5); | ||
|
||
for (l in 1:L) { | ||
alpha_noncentered[l] ~ normal(0, 1); | ||
to_vector(beta_noncentered[l]) ~ normal(0, 1); | ||
} | ||
|
||
// Vectorize parameters for map_rect | ||
vector[(K-1) + (K-1)*B] param_shards[L]; | ||
for (l in 1:L) { | ||
param_shards[l] = append_row(alpha_noncentered[l], to_vector(beta_noncentered[l])); | ||
} | ||
|
||
// Prepare shared data | ||
vector[K-1] aloc_vec = to_vector(aloc); | ||
vector[K-1] bloc_vec = to_vector(bloc); | ||
vector[3] scalar_vec = [asd, bsd, kappa]'; | ||
vector[2*K + 1] phi = append_row(aloc_vec, append_row(bloc_vec, scalar_vec)); | ||
|
||
// Call map_rect with the correct arguments | ||
target += sum(map_rect(map_rect_likelihood,phi, param_shards, shared_data_r, shared_data_i)); | ||
|
||
} | ||
generated quantities { | ||
// Declare raw_alpha and raw_beta | ||
vector[K-1] raw_alpha[L]; | ||
matrix[K-1, B] raw_beta[L]; | ||
for (l in 1:L) { | ||
for (k in 1:(K-1)) { | ||
raw_alpha[l, k] = aloc[k] + asd * alpha_noncentered[l, k]; // Reparameterized alpha | ||
for (b in 1:B) { | ||
raw_beta[l, k, b] = bloc[k] + bsd * beta_noncentered[l, k, b]; // Reparameterized beta | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
functions { | ||
real partial_sum_lpmf(int[] slice_y, int start, int end, int[] ll, matrix x, | ||
row_vector weights, vector[] raw_alpha, matrix[] raw_beta, | ||
int K, int B) { | ||
real acc = 0; | ||
for (n in 1:size(slice_y)) { | ||
vector[K] alpha = append_row(0, raw_alpha[ll[start + n - 1]]); | ||
matrix[K, B] beta; | ||
for (b in 1:B) { | ||
beta[:, b] = append_row(0, raw_beta[ll[start + n - 1], :, b]); | ||
} | ||
acc += weights[start + n - 1] * categorical_logit_lpmf(slice_y[n] | alpha + beta * x[:, start + n - 1]); | ||
} | ||
return acc; | ||
} | ||
} | ||
|
||
data { | ||
int<lower=0> N; // the number of samples | ||
int<lower=1> L; // number of locations | ||
int<lower=1> K; // number of clades | ||
int<lower=1> B; // the number of df | ||
array[N] int<lower=1, upper=K> y; // the clades | ||
array[N] int<lower=1, upper=L> ll; // locations | ||
matrix[B, N] x; // the spline over the days of the samples | ||
row_vector[N] weights; // number of seq per location | ||
} | ||
|
||
parameters { | ||
real<lower=0> bsd; // prior sd for betas | ||
real<lower=0> asd; // prior sd for alphas | ||
array[K-1] real aloc; // prior means for the alpha's | ||
array[K-1] real bloc; // prior means for betas | ||
|
||
// Non-centered parameterization | ||
vector[K-1] alpha_nc[L]; // raw alpha's | ||
matrix[K-1, B] beta_nc[L]; // raw betas | ||
} | ||
|
||
transformed parameters { | ||
// Apply non-centered parameterization | ||
vector[K-1] raw_alpha[L]; | ||
matrix[K-1, B] raw_beta[L]; | ||
for (l in 1:L) { | ||
for (k in 1:(K-1)) { | ||
raw_alpha[l, k] = aloc[k] + asd * alpha_nc[l, k]; // Reparameterized alpha | ||
for (b in 1:B) { | ||
raw_beta[l, k, b] = bloc[k] + bsd * beta_nc[l, k, b]; // Reparameterized beta | ||
} | ||
} | ||
} | ||
} | ||
|
||
model { | ||
// Priors | ||
bsd ~ normal(1, 400); // prior for the sd | ||
bloc ~ normal(0, 400); // priors for the betas | ||
asd ~ normal(1, 400); // prior for the alpha sd | ||
aloc ~ normal(0, 400); // prior for the alpha means | ||
|
||
// Priors on raw parameters (standard normal) | ||
for (l in 1:L) { | ||
alpha_nc[l] ~ normal(0, 1); // standard normal for alpha_nc | ||
for (k in 1:(K-1)) { | ||
beta_nc[l, k] ~ normal(0, 1); // standard normal for beta_nc | ||
} | ||
} | ||
|
||
// Likelihood using reduce_sum | ||
target += reduce_sum(partial_sum_lpmf, y, 1, ll, x, weights, raw_alpha, raw_beta, K, B); | ||
} |
Oops, something went wrong.