Skip to content

Commit

Permalink
initialize sampling implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
Apostolos Chalkis committed Oct 12, 2022
1 parent a3d46ee commit a6920ce
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 49 deletions.
1 change: 1 addition & 0 deletions R-proj/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(file_to_polytope)
export(samples_uniform_portfolios)
export(mmc_sampling)
export(psrf_univariate)
export(sample_points)
Expand Down
12 changes: 6 additions & 6 deletions R-proj/R/fast_preprocess_with_mosek.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ fast_preprocess_with_mosek <- function(P) {

m = dim(Aeq)[1]

minFluxes = c()
maxFluxes = c()
minWeights = c()
maxWeights = c()

row_ind = c()
col_ind = c()
Expand Down Expand Up @@ -54,8 +54,8 @@ fast_preprocess_with_mosek <- function(P) {
#print(paste0("mix_dist = ", max_dist))
#print(" ")

minFluxes = c(minFluxes, min_dist)
maxFluxes = c(maxFluxes, max_dist)
minWeights = c(minWeights, min_dist)
maxWeights = c(maxWeights, max_dist)

width = abs(max_dist - min_dist)

Expand All @@ -68,8 +68,8 @@ fast_preprocess_with_mosek <- function(P) {
ret_list = list()
ret_list$Aeq = Aeq
ret_list$beq = beq
ret_list$minFluxes = minFluxes
ret_list$maxFluxes = maxFluxes
ret_list$minWeights = minWeights
ret_list$maxWeights = maxWeights
ret_list$row_ind = row_ind
ret_list$col_ind = col_ind
ret_list$values = values
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
generate_steady_states <- function(path, n = 1000, Recon2D_v04 = FALSE, Recon3D_301 = FALSE) {
samples_uniform_portfolios <- function(A, b, Aeq, beq, ess = 1000) {

P = Hpolytope$new(A = as.matrix(A), b = c(b), Aeq = as.matrix(Aeq), beq = c(beq))

P = metabolic_net_2_polytope(path, Recon2D_v04, Recon3D_301)
print("computing min and max Fluxes")
print("Preprocessing...")
pre_proc_list = fast_preprocess_with_mosek(P)

print("Computing the null space of the Stoichiometric matrix")
print("Computing the full dimensional polytope...")
rr = null_space_and_shift(pre_proc_list$row_ind, pre_proc_list$col_ind, pre_proc_list$values, pre_proc_list$Aeq, pre_proc_list$beq)

A = P$A
Expand All @@ -26,19 +27,20 @@ generate_steady_states <- function(path, n = 1000, Recon2D_v04 = FALSE, Recon3D_
}
print('Full dimensional polytope computed!')

print("Computing the Chebychev ball of the full dimensional polytipe")
print("Computing the Chebychev ball of the full dimensional polytope...")
max_ball = get_max_inner_ball(A, b)
print("Chebychev ball computed!")

d = dim(A)[2]
m = dim(A)[1]

print("Call MMCS method")
print("Sampling with MMCS method...")
tim = system.time({ samples_list = mmc_sampling(A = A, b = b,
max_ball = max_ball, n = n,
num_rounding_samples = 20*d,
max_num_samples = 100*d,
rounding = TRUE) })
print('Steady states computed!')
max_ball = max_ball, n = ess,
num_rounding_samples = 20*d,
max_num_samples = 100*d,
rounding = TRUE) })
print('Sampling completed!')

N = dim(samples_list$samples)[2]
samples = samples_list$samples
Expand All @@ -51,13 +53,14 @@ generate_steady_states <- function(path, n = 1000, Recon2D_v04 = FALSE, Recon3D_
result_list = list()
result_list$HP_rounded = HP
result_list$samples = samples
result_list$steady_states = steady_states
result_list$random_portfolios = steady_states
result_list$N = rr$N
result_list$N_shift = rr$N_shift
result_list$T = samples_list$T
result_list$T_shift = samples_list$T_shift
result_list$minFluxes = pre_proc_list$minFluxes
result_list$maxFluxes = pre_proc_list$maxFluxes
result_list$minWeights = pre_proc_list$minWeights
result_list$maxWeights = pre_proc_list$maxWeights
result_list$run_time = tim[3]

return(result_list)
}
43 changes: 20 additions & 23 deletions R-proj/example.R
Original file line number Diff line number Diff line change
@@ -1,32 +1,29 @@
library(volesti)

## give the path to your mat file that represents the model
## assuming that your working directory is R-proj the following command will work
## alternatively you could set your absolute path to the mat file
## Get Ax<b and Aeq x = beq from a metabolic model just to illustrate an example
path = 'metabolic_mat_files/e_coli_core.mat'

## request effectiveness n = 1000 and sample steady states
## If you would like to sample the Recon2D_v04 or the Recon3D_301
## from https://www.vmh.life/ you have to set TRUE one of the corresponding flags
result_list = generate_steady_states(path, n = 1000, Recon2D_v04 = FALSE, Recon3D_301 = FALSE)
P = metabolic_net_2_polytope(path, FALSE, FALSE)

## compute the PSRF of all marginals
## request effective sample size ess = 1000 and sample
result_list = samples_uniform_portfolios(A=P$A, b=P$b, Aeq=P$Aeq, beq=P$beq, ess = 1000)

## compute the PSRF of all marginals in the full dimensional space
psrfs = psrf_univariate(result_list$samples)

## approximate the flux distribution of the reaction "Acetate kinase"
h1 = hist(result_list$steady_states[12,],
main="Acetate kinase",
xlab="Flux (mmol/gDW/h)",
## approximate the marginal distribution of the 12-th marginal (weight)
h1 = hist(result_list$random_portfolios[12,],
main="Asset name",
xlab="Weight",
border="black",
col="red",
xlim=c(min(result_list$steady_states[12,]), max(result_list$steady_states[12,])),
xlim=c(min(result_list$random_portfolios[12,]), max(result_list$random_portfolios[12,])),
las=1,
breaks=50,
prob = TRUE)


## we could use the polytope of the last phase to sample more steady states as follows

## we could use the polytope of the last phase to sample more portfolios as follows
N = 5000
inner_ball = get_max_inner_ball(result_list$HP_rounded$A, result_list$HP_rounded$b)
more_samples = sample_points(result_list$HP_rounded, n = N,
Expand All @@ -38,20 +35,20 @@ more_samples = sample_points(result_list$HP_rounded, n = N,
samples_in_P0 = result_list$T %*% more_samples +
kronecker(matrix(1, 1, N), matrix(result_list$T_shift, ncol = 1))

## compute the steady states
more_steady_states = result_list$N %*% samples_in_P0 +
## compute the portfolios
more_random_portfolios = result_list$N %*% samples_in_P0 +
kronecker(matrix(1, 1, N), matrix(result_list$N_shift, ncol = 1))


## to compute a better density estimation
## approximate the flux distribution of the reaction "Acetate kinase"
## using the total number of steady states we have generated
h2 = hist(c(result_list$steady_states[12,], more_steady_states[12, ]),
main="Acetate kinase",
xlab="Flux (mmol/gDW/h)",
## approximate the marginal distribution
## using the total number of portfolios that we have generated
h2 = hist(c(result_list$more_random_portfolios[12,], more_random_portfolios[12, ]),
main="Asset name",
xlab="Weight",
border="black",
col="blue",
xlim=c(min(c(result_list$steady_states[12,], more_steady_states[12, ])), max(c(result_list$steady_states[12,], more_steady_states[12, ]))),
xlim=c(min(c(result_list$random_portfolios[12,], more_random_portfolios[12, ])), max(c(result_list$random_portfolios[12,], more_random_portfolios[12, ]))),
las=1,
breaks=50,
prob = TRUE)
42 changes: 36 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
This is an R package based on [VolEsti](https://github.com/GeomScale/volume_approximation) library that serves as supplementary code for the paper [*"Geometric algorithms for sampling the flux space of metabolic networks"*](https://arxiv.org/abs/2012.05503).
This is an R package based on [volesti](https://github.com/GeomScale/volume_approximation) library that serves as supplementary code to sample uniformly distributed portfolios

### Dependencies

Expand All @@ -19,12 +19,42 @@ devtools::install()
library(volesti)
```

### Sample steady states
### Sample uniformly distributed portfolios

First you have to download the mat file of the model you wish to sample from [Bigg](http://bigg.ucsd.edu/models) or the Recon2D_v04, Recon_d_301 from [VMH](https://www.vmh.life/) and save it to the folder `root/R-proj/metabolic_mat_files`.
Request effective sample size ess = 1000

Then follow the script `root/R-proj/example.R`. In that script we sample from the simplest model of the Escherichia Coli (the mat file is already saved in `root/R-proj/metabolic_mat_files`).
```r
result_list = samples_uniform_portfolios(A=A, b=b, Aeq=Aeq, beq=beq, ess = 1000)
```

### Results

If you execute it you shall get the following histogram that approximates the flux distribution of the reaction `Acetate kinase`.
result_list$HP_rounded : The full dimensional polytope of the last phase in MMCS method (use this to sample more portfolios).
result_list$samples : The samples in the full dimensional space.
result_list$random_portfolios : The uniformly distributed portfolios generated.
result_list$N : The matrix of the null space.
result_list$N_shift : Shift for the full dimensional polytope.
result_list$T : The transformation matrix to map samples from the last phase to the initial phase.
result_list$T_shift : The shift of the previous transformation.
result_list$minWeights : The minimum value of each weight.
result_list$maxWeights : The maximum value of each weight.
result_list$run_time : The total run-time.

![histogram](doc/histograms/acetate_kinase.png)
To sample more portfolios, that is how you can use the last phase,

```r
N = 5000 # sample 5000 more portfolios
inner_ball = get_max_inner_ball(result_list$HP_rounded$A, result_list$HP_rounded$b)
more_samples = sample_points(result_list$HP_rounded, n = N,
random_walk = list("walk" = "aBiW", "walk_length" = 1,
"starting_point"=inner_ball$center,
"L" = 6*sqrt(result_list$HP_rounded$dimension)*inner_ball$radius))

## map the points back to P0
samples_in_P0 = result_list$T %*% more_samples +
kronecker(matrix(1, 1, N), matrix(result_list$T_shift, ncol = 1))

## compute the portfolios
more_random_portfolios = result_list$N %*% samples_in_P0 +
kronecker(matrix(1, 1, N), matrix(result_list$N_shift, ncol = 1))
```

0 comments on commit a6920ce

Please sign in to comment.