Merge branch 'joss-submission' of https://github.com/bandframework/Ta…

…weret into joss-submission
bandframework · Oct 30, 2023 · a87eade · a87eade
2 parents b015260 + 799663c
commit a87eade
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 59 deletions.
diff --git a/joss_paper/paper.md b/joss_paper/paper.md
@@ -67,17 +67,15 @@ Mixing (BMM). In general, model mixing techniques are designed to
 combine the individual mean predictions or density estimates from the
 $K$ models under consideration. For example, *mean-mixing* techniques
 predict the underlying system by
-$$E[\bm Y \mid \bm x] = \sum_{k = 1}^K w_k(\bm x)\; f_k(\bm x).$$
-where $E[\bm Y\mid\bm x]$ denotes the mean of $\bm Y$ given the
-vector of input parameters $\bm x$, $f_k(\bm x)$ is the mean
+$$E[Y \mid x] = \sum_{k = 1}^K w_k(x)\; f_k(x),$$
+where $E[Y \mid x]$ denotes the mean of $Y$ given the
+vector of input parameters $x$, $f_k(x)$ is the mean
 prediction under the $k^\mathrm{th}$ model $\mathcal{M}_k$, and
-$w_k(\bm x)$ is the corresponding weight function. The
+$w_k(x)$ is the corresponding weight function. The
 *density-mixing* approach estimates the underlying predictive density by
-$$p(\tilde{\bm Y} \mid \tilde{\bm x}) = \sum_{k = 1}^K w_k(\bm x)\;p(\tilde{\bm Y} \mid \bm x, \mathcal{M}_k),$$
-where $p(\tilde{\bm Y} \mid \bm x, \mathcal{M}_k)$ represents
-the predictive density of a future observation $\tilde{\bm Y}$ with
-respect to the $k^\mathrm{th}$ model $\mathcal{M}_k$. In either BMM
-setup, a key challenge is defining $w_k(\bm x)$---the functional
+$$p(Y_0 \mid x_0,Y) = \sum_{k = 1}^K w_k(x_0)\;p(Y_0 \mid x_0,Y, \mathcal{M}_k),$$
+where $p(Y_0 \mid x_0, Y, \mathcal{M}_k)$ represents the predictive density of a future observation $Y_0$ with respect to the $k^\mathrm{th}$ model $\mathcal{M}_k$ at a new input $x_0$. In either BMM
+setup, a key challenge is defining $w_k(x)$---the functional
 relationship between the inputs and the weights.
 
 This work introduces `Taweret`, a Python package for Bayesian Model
@@ -122,27 +120,24 @@ outputs* details how many observables the models themselves can have
 to compute the model likelihood (e.g., in heavy-ion collisions this
 can include charge multiplicities, transverse momentum distributions,
 transverse momentum fluctuations, etc.); the *Number of models* column
-details how many models the mixing method can combine; the *Weight
+details how many models the mixing method can combine, and the *Weight
 functions* column describes the available parameterization of how the
-mixing weights depend on the input parameter (the one references in
-the *Number of inputs* column); and, lastly, the *Calibration +
-mixing* column indicates whether the model is cable of simultaneous
-determining the model parameters and mixing weights. []{label="methodcomparison"}
-
-+---------------------+---------+-----------+-----------+-----------+---------------------+---------------+
-| Method              | Type    | Number of | Number of | Number of | Weight              | Calibration & |
-|                     |         | inputs    | outputs   | models    | functions           | mixing        |
-+:===================:+:=======:+:=========:+:=========:+:=========:+:===================:+:=============:+
-| Bivariate linear    | Mean &  | 1         |  $\geq 1$ | 2         | - Step,             |               |
-| mixing              | Density |           |           |           | - Sigmoid,          |               |
-|                     |         |           |           |           | - Asymmetric 2-step |               |
-+---------------------+---------+-----------+-----------+-----------+---------------------+---------------+
-| Multivariate mixing | Mean    | 1         | 1         | $K$       | Precision           |               |
-|                     |         |           |           |           | weighting           |               |
-+---------------------+---------+-----------+-----------+-----------+---------------------+---------------+
-| BART mixing         | Mean    | $\geq 1$  | 1         | $K$       | Regression          |               |
-|                     |         |           |           |           | trees               |               |
-+---------------------+---------+-----------+-----------+-----------+---------------------+---------------+
+mixing weights depend on the input parameter. []{label="methodcomparison"}
+
++---------------------+---------+-----------+-----------+-----------+---------------------+
+| Method              | Type    | Number of | Number of | Number of | Weight              |
+|                     |         | inputs    | outputs   | models    | functions           |
++:===================:+:=======:+:=========:+:=========:+:=========:+:===================:+
+| Bivariate linear    | Mean &  | 1         |  $\geq 1$ | 2         | - Step,             |
+| mixing              | Density |           |           |           | - Sigmoid,          |
+|                     |         |           |           |           | - Asymmetric 2-step |
++---------------------+---------+-----------+-----------+-----------+---------------------+
+| Multivariate mixing | Mean    | 1         | 1         | $K$       | Precision           |
+|                     |         |           |           |           | weighting           |
++---------------------+---------+-----------+-----------+-----------+---------------------+
+| BART mixing         | Mean    | $\geq 1$  | 1         | $K$       | Regression          |
+|                     |         |           |           |           | trees               |
++---------------------+---------+-----------+-----------+-----------+---------------------+
 
 
 ### Bivariate linear mixing
@@ -190,25 +185,25 @@ $f_{k}(x)$ is the mean of the model $k$, and $\sigma^{2}_{k}(x)$ its
 variance, both at input parameter $x$.
 
 In this method, the software receives the one-dimensional input space
-$x$, the mean of the $k$ models at each point in $x$ (hence it is a
+$X$, the mean of the $k$ models at each point $x \in X$ (hence it is a
 mean-based mixing procedure), and the variances of the models at each
-point in $x$. Each model is assumed to have been calibrated prior to
+point $x \in X$. Each model is assumed to have been calibrated prior to
 being included in the mix. The ignorance of this mixing method with
 respect to how each model was generated allows for any model to be used,
 including Bayesian Machine Learning tools such as Gaussian Processes
 [@Semposki:2022gcp] and Bayesian Neural Networks [@Kronheim:2020dmp].
 
-### Model Mixing Using Bayesian Additive Regression Trees
+### Model mixing using Bayesian additive regression trees
 
 A third BMM approach implemented in `Taweret` adopts a mean-mixing
 strategy which models the weight functions using Bayesian Additive
 Regression Trees (BART) conditional on the mean predictions from a set
 of $K$ models [@yannotty2023model]. This approach enables the weight
 functions to be adaptively learned using tree bases and avoids the need
-for a user-specified basis function (such as a generalized linear
+for user-specified basis functions (such as a generalized linear
 model). Formally, the weight functions are defined by
-$$w_k(\bm x) = \sum_{j = 1}^m g_k(\bm x; T_j, M_j), \quad \text{for}\ k=1,\ldots,K$$
-where $g_k(\bm x;T_j,M_j)$ defines the $k^\text{th}$ output of
+$$w_k(x) = \sum_{j = 1}^m g_k(x; T_j, M_j), \quad \text{for}\ k=1,\ldots,K$$
+where $g_k(x;T_j,M_j)$ defines the $k^\text{th}$ output of
 the $j^\text{th}$ tree, $T_j$, using the associated set of parameters,
 $M_j$. Each weight function is implicitly regularized via a prior to
 prefer the interval $[0,1]$. Furthermore, the weight functions are not
@@ -330,7 +325,7 @@ could be the mixing of an arbitrary number of models at the density
 level. Complementary to this density mixing method is a stochastic,
 mean-mixing method of arbitrary number $K$ of models. An extension of
 the Multivariate Mixing method to multi-dimensional input and output
-spaces, correlated models, and well as calibration during mixing, is
+spaces, correlated models, as well as calibration during mixing, is
 anticipated in future releases. Lastly, to facilitate the utilization of
 this growing framework, we hope to enable continuous integration
 routines for individuals contributing and create docker images that will

diff --git a/joss_paper/references.bib b/joss_paper/references.bib
@@ -13,10 +13,10 @@ @article{Fragoso2018
 }
 
 @inproceedings{loo,
-    title = "\texttt{loo}: Efficient leave-one-out cross-validation and WAIC for Bayesian models",
+    title = "\texttt{loo}: Efficient leave-one-out cross-validation and WAIC for {B}ayesian models",
     author = "Vehtari, Aki and Gelman, Andrew and Gabry, Jonah",
     year = "2017",
-    url = {\url{https://mc-stan.org/loo/}},
+    url = {https://mc-stan.org/loo/},
     note = {\mbox{https://mc-stan.org/loo/}}
 }
 
@@ -36,11 +36,11 @@ @article{Yao2022
 
 
 @inproceedings{SAMBA,
-author = "Semposki, Alexandra C. and Furnstahl, Richard J. and Phillips, Daniel R.",
+author = "Semposki, A. C. and Furnstahl, Richard J. and Phillips, Daniel R.",
 title = "{{SAMBA: SAndbox for Mixing using Bayesian Analysis}}",
 	year="2022",
   note          = {\url{https://github.com/asemposki/SAMBA}},
-  url           = {\mbox{https://github.com/asemposki/SAMBA}}
+  url           = {https://github.com/asemposki/SAMBA}
 }
 
 @techreport{bandframework,
@@ -82,7 +82,7 @@ @article{Semposki:2022gcp
     archivePrefix = "arXiv",
     primaryClass- = "nucl-th",
     doi = "10.1103/PhysRevC.106.044002",
-    journal = "Phys. Rev. C",
+    journal = "Physical Review C",
     volume = "106",
     number = "4",
     pages = "044002",
@@ -96,7 +96,7 @@ @article{Phillips:2020dmw
     archivePrefix = "arXiv",
     primaryClass = "nucl-th",
     doi = "10.1088/1361-6471/abf1df",
-    journal = "J. Phys. G",
+    journal = "Journal of Physics G",
     volume = "48",
     number = "7",
     pages = "072001",
@@ -112,63 +112,63 @@ @inproceedings{Taweret
 }
 
 @misc{yannotty2023model,
-      title={Model Mixing Using Bayesian Additive Regression Trees}, 
+      title={Model Mixing Using {B}ayesian Additive Regression Trees}, 
       author={John C. Yannotty and Thomas J. Santner and Richard J. Furnstahl and Matthew T. Pratola},
       year={2023},
-      eprint={2301.02296},
-      archivePrefix={arXiv},
-      primaryClass={stat.ME}
+      journal = {Technometrics},
+      pages = {1-12},
+      publisher = {Taylor & Francis},
+      doi = {10.1080/00401706.2023.2257765},
+      URL = {https://doi.org/10.1080/00401706.2023.2257765}
 }
 
 @article{Pan2020,
     author = "Pan, Lanfeng and Li, Yehua and He, Kevin and Li, Yanming and Li, Yi",
     title = "{Generalized Linear Mixed Models with Gaussian Mixture Random Effects: Inference and Application}",
     doi = "10.1016/j.jmva.2019.104555",
-    journal = "J Multivar Anal.",
+    journal = "Journal of Multivariate Analysis",
     year = "2020"
 }
 
 @article{Yao2018,
     author = "Yao, Yuling and Vehtari, Aki and Simpson, Daniel and Gelman, Andrew",
     title = "Using Stacking to Average Bayesian Predictive Distributions (with Discussion)",
     doi = "10.1214/17-BA1091",
-    journal = "Bayesian Anal. 13 (3) 917 - 1007",
+    journal = "Bayesian Analysis, 13 (3) 917 - 1007",
     year = "2018"
 }
 
 @article{Sloughter2007,
     author = "Sloughter, J. McLean and Raftery, Adrian E. and Gneiting, Tilmann and Fraley, Chris",
-    title = "Probabilistic Quantitative Precipitation Forecasting Using Bayesian Model Averaging",
+    title = "Probabilistic Quantitative Precipitation Forecasting Using {B}ayesian Model Averaging",
     doi = "10.1175/MWR3441.1",
-    journal = "Mon. Wea. Rev., 135, 3209–3220",
+    journal = "Monthly Weather Review, 135, 3209–3220",
     year = "2007"
 }
 
 @article{FitzGerald2014,
     author = "FitzGerald, Thomas H. B. and Dolan, Raymond J. and Friston, Karl J.",
     title = "Model averaging, optimal inference, and habit formation",
     doi = "10.3389/fnhum.2014.00457",
-    journal = "Front. Hum. Neurosci. 8:457",
+    journal = "Frontiers in Human Neuroscience, 8:457",
     year = "2014" 
 }
 
-@misc{OpenBT_MTP,
-  author = "Pratola, M. T. and others",
-  title = "{Open Bayesian Trees Project}",
-  year = {2023},
-  publisher = {BitBucket},
-  journal = {BitBucket repository},
-  howpublished = {\url{https://bitbucket.org/mpratola/openbt/wiki/Home}}
+@inproceedings{OpenBT_MTP,
+    author = "{Open Bayesian Trees Project}",
+    year = "2023",
+    url = {https://bitbucket.org/mpratola/openbt/wiki/Home}
 }
 
+
 @article{Kronheim:2020dmp,
     author = "Kronheim, Braden and Kuchera, Michelle and Prosper, Harrison",
     title = "{TensorBNN: Bayesian inference for neural networks using TensorFlow}",
     eprint = "2009.14393",
     archivePrefix = "arXiv",
     primaryClass = "physics.comp-ph",
     doi = "10.1016/j.cpc.2021.108168",
-    journal = "Comput. Phys. Commun.",
+    journal = "Computer Physics Communications",
     volume = "270",
     pages = "108168",
     year = "2022"