Skip to content

Commit

Permalink
fix typos, update bibliography
Browse files Browse the repository at this point in the history
  • Loading branch information
Y0dler committed Nov 25, 2024
1 parent bd7723c commit 98731ef
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 31 deletions.
24 changes: 17 additions & 7 deletions docs/source/literature.bib
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,12 @@ @article{RN150
@book{RN162,
author = {Kruschke, John K.},
title = {Doing Bayesian Data Analysis},
edition = {1st Edition},
edition = {Second Edition},
publisher={Academic Press},
isbn = {9780123814852},
year = {2010},
type = {Book}
year = {2015},
type = {Book},
doi = {http://dx.doi.org/10.1016/B978-0-12-405888-0.00001-5}
}

@article{RN144,
Expand All @@ -96,7 +97,8 @@ @article{RN144
volume = {12},
pages = {171-178},
year = {1985},
type = {Journal Article}
type = {Journal Article},
url = {http://www.jstor.org/stable/4615982},
}


Expand All @@ -107,7 +109,8 @@ @article{RN152
volume = {7},
number = {4},
year = {1992},
type = {Journal Article}
type = {Journal Article},
doi = {10.1214/ss/1177011136}
}

@article{RN153,
Expand Down Expand Up @@ -176,7 +179,7 @@ @article{RN146
volume = {11},
pages = {3571-3594},
year = {2010},
type = {Journal Article}
type = {Journal Article},
}

@article{RN147,
Expand Down Expand Up @@ -221,6 +224,7 @@ @article{vivo2012bayesian
number={6},
pages={2622--2630},
year={2012},
doi={https://doi.org/10.1021/ac202124t},
publisher={ACS Publications}
}

Expand All @@ -232,6 +236,7 @@ @article{woldegebriel2015probabilistic
number={14},
pages={7345--7355},
year={2015},
doi={https://doi.org/10.1021/acs.analchem.5b01521},
publisher={ACS Publications}
}

Expand All @@ -242,6 +247,7 @@ @article{briskot2019prediction
volume={1587},
pages={101--110},
year={2019},
doi={https://doi.org/10.1016/j.chroma.2018.11.076},
publisher={Elsevier}
}

Expand All @@ -252,6 +258,7 @@ @article{yamamoto2021uncertainty
volume={175},
pages={223--237},
year={2021},
doi={https://doi.org/10.1016/j.cherd.2021.09.003},
publisher={Elsevier}
}

Expand All @@ -263,6 +270,7 @@ @article{wiczling2016much
number={1},
pages={997--1002},
year={2016},
doi={https://doi.org/10.1021/acs.analchem.5b03859},
publisher={ACS Publications}
}

Expand All @@ -274,16 +282,18 @@ @article{kelly1971estimation
number={10},
pages={1170--1183},
year={1971},
doi={https://doi.org/10.1021/ac60304a011},
publisher={ACS Publications}
}

@article{kelly1971application,
title={Application of method of maximum posterior probability to estimation of gas-chromatographic peak parmeters},
title={Application of method of maximum posterior probability to estimation of gas-chromatographic peak parameters},
author={Kelly, PC and Harris, WE},
journal={Analytical Chemistry},
volume={43},
number={10},
pages={1184--1195},
year={1971},
doi={https://doi.org/10.1021/ac60304a005},
publisher={ACS Publications}
}
33 changes: 17 additions & 16 deletions paper/literature.bib
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@softmisc{nutpie,
@misc{nutpie,
author = {Seyboldt, Adrian and {PyMC Developers}},
keywords = {Software},
license = {MIT},
Expand Down Expand Up @@ -44,7 +44,7 @@ @article{matplotlib
year = 2007
}

@softmisc{matplotlibzenodo,
@misc{matplotlibzenodo,
author = {{The Matplotlib Development Team}},
title = {Matplotlib: Visualization with Python},
keywords = {software},
Expand All @@ -62,7 +62,6 @@ @article{RN173
journal = {Journal of Machine Learning Research},
volume = {15},
year = {2014},
type = {Journal Article}
}

@article{RN150,
Expand All @@ -76,16 +75,17 @@ @article{RN150
doi = {10.7717/peerj-cs.1516},
url = {https://www.ncbi.nlm.nih.gov/pubmed/37705656},
year = {2023},
type = {Journal Article}
}

@book{RN162,
author = {Kruschke, John K.},
title = {Doing Bayesian Data Analysis},
edition = {1st Edition},
edition = {Second Edition},
publisher={Academic Press},
isbn = {9780123814852},
year = {2010},
type = {Book}
year = {2015},
type = {Book},
doi = {http://dx.doi.org/10.1016/B978-0-12-405888-0.00001-5}
}

@article{RN144,
Expand All @@ -95,7 +95,7 @@ @article{RN144
volume = {12},
pages = {171-178},
year = {1985},
type = {Journal Article}
url = {http://www.jstor.org/stable/4615982},
}


Expand All @@ -106,7 +106,7 @@ @article{RN152
volume = {7},
number = {4},
year = {1992},
type = {Journal Article}
doi = {10.1214/ss/1177011136}
}

@article{RN153,
Expand All @@ -121,7 +121,6 @@ @article{RN153
doi = {10.1021/ac60319a011},
url = {https://www.ncbi.nlm.nih.gov/pubmed/22324584},
year = {1972},
type = {Journal Article}
}

@article{RN149,
Expand All @@ -136,7 +135,6 @@ @article{RN149
doi = {10.1002/biot.201700141},
url = {https://www.ncbi.nlm.nih.gov/pubmed/29283217},
year = {2018},
type = {Journal Article}
}

@article{RN148,
Expand All @@ -151,7 +149,6 @@ @article{RN148
doi = {10.1002/1097-0290(20010205)72:3<346::aid-bit12>3.0.co;2-x},
url = {https://www.ncbi.nlm.nih.gov/pubmed/11135205},
year = {2001},
type = {Journal Article}
}

@article{RN145,
Expand All @@ -165,7 +162,6 @@ @article{RN145
1573-1375},
doi = {10.1007/s11222-016-9696-4},
year = {2016},
type = {Journal Article}
}

@article{RN146,
Expand All @@ -175,7 +171,6 @@ @article{RN146
volume = {11},
pages = {3571-3594},
year = {2010},
type = {Journal Article}
}

@article{RN147,
Expand All @@ -187,7 +182,6 @@ @article{RN147
issn = {2475-9066},
doi = {10.21105/joss.01143},
year = {2019},
type = {Journal Article}
}

@article{harris2020array,
Expand Down Expand Up @@ -220,6 +214,7 @@ @article{vivo2012bayesian
number={6},
pages={2622--2630},
year={2012},
doi={https://doi.org/10.1021/ac202124t},
publisher={ACS Publications}
}

Expand All @@ -231,6 +226,7 @@ @article{woldegebriel2015probabilistic
number={14},
pages={7345--7355},
year={2015},
doi={https://doi.org/10.1021/acs.analchem.5b01521},
publisher={ACS Publications}
}

Expand All @@ -241,6 +237,7 @@ @article{briskot2019prediction
volume={1587},
pages={101--110},
year={2019},
doi={https://doi.org/10.1016/j.chroma.2018.11.076},
publisher={Elsevier}
}

Expand All @@ -251,6 +248,7 @@ @article{yamamoto2021uncertainty
volume={175},
pages={223--237},
year={2021},
doi={https://doi.org/10.1016/j.cherd.2021.09.003},
publisher={Elsevier}
}

Expand All @@ -262,6 +260,7 @@ @article{wiczling2016much
number={1},
pages={997--1002},
year={2016},
doi={https://doi.org/10.1021/acs.analchem.5b03859},
publisher={ACS Publications}
}

Expand All @@ -273,16 +272,18 @@ @article{kelly1971estimation
number={10},
pages={1170--1183},
year={1971},
doi={https://doi.org/10.1021/ac60304a011},
publisher={ACS Publications}
}

@article{kelly1971application,
title={Application of method of maximum posterior probability to estimation of gas-chromatographic peak parmeters},
title={Application of method of maximum posterior probability to estimation of gas-chromatographic peak parameters},
author={Kelly, PC and Harris, WE},
journal={Analytical Chemistry},
volume={43},
number={10},
pages={1184--1195},
year={1971},
doi={https://doi.org/10.1021/ac60304a005},
publisher={ACS Publications}
}
13 changes: 5 additions & 8 deletions paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ Since this is a time-consuming, not to mention tedious, procedure and introduces
The advantage of this approach is the complete integration of all relevant parameters – i.e. baseline, peak area and height, mean, signal-to-noise ratio etc. – into one single model through which all parameters are estimated simultaneously.
Furthermore, Bayesian inference comes with uncertainty quantification for all peak model parameters, and thus does not merely yield a point estimate as would commonly be the case.
It also grants access to novel metrics for avoiding false positives and negatives by rejecting signals where a) a convergence criterion of the peak fitting procedure was not fulfilled or b) the uncertainty of the estimated parameters exceeded a user-defined threshold.
By employing peak fitting to uncover peak parameters – most importantly the area –, this approach thus differs from recent applications of Bayesian statistics to chromatographic peak data which e.g. focussed on peak detection [@vivo2012bayesian; @woldegebriel2015probabilistic], method optimization [@wiczling2016much] and simulations of chromatography [@briskot2019prediction; @yamamoto2021uncertainty].
The first studies to be published about this topic contain perhaps the technique most similar in spirit to the present one since functions made of an idealized peak shape and a noise term are fitted but beyond this common starting point the methodolody is quiet distinct [@kelly1971estimation; @kelly1971application].
By employing peak fitting to uncover peak parameters – most importantly the area – this approach thus differs from recent applications of Bayesian statistics to chromatographic peak data which e.g. focussed on peak detection [@vivo2012bayesian; @woldegebriel2015probabilistic], method optimization [@wiczling2016much] and simulations of chromatography [@briskot2019prediction; @yamamoto2021uncertainty].
The first studies to be published about this topic contain perhaps the technique most similar in spirit to the present one since functions made of an idealized peak shape and a noise term are fitted but beyond this common starting point the methodology is quite distinct [@kelly1971estimation; @kelly1971application].

# Materials and Methods
## Implementation
Expand All @@ -77,22 +77,19 @@ Since the inference data is stored alongside graphs and report sheets, users may
$\texttt{PeakPerformance}$ accommodates the use of a pre-manufactured data pipeline for standard applications (Fig. 1) as well as the creation of custom data pipelines using only its core functions.
The provided data analysis pipeline was designed in a user-friendly way and is covered by multiple example notebooks.

Before using $\texttt{PeakPerformance}$, the user has to supply raw data files containing a NumPy array with time in the first and intensity in the second dimension for each peak according as described in detail in the documentation.
Before using $\texttt{PeakPerformance}$, the user has to supply raw data files containing a NumPy array with time in the first and intensity in the second dimension for each peak as described in detail in the documentation.
Using the $\texttt{prepare\_model\_selection()}$ method, an Excel template file ("Template.xlsx") for inputting user information is prepared and stored in the raw data directory.

Since targeted LC-MS/MS analyses essentially cycle through a list of mass traces for every sample, a model type has to be assigned to each mass trace.
If this is not done by the user, an optional automated model selection step will be performed, where one exemplary peak per mass trace is analyzed with all models to identify the most appropriate one.
The automated model selection can be started using the $\texttt{model\_selection()}$ function from the pipeline module and will be performed successively for each mass trace.
The results for each model are ranked with the $\texttt{compare()}$ function of the ArviZ package based on Pareto-smoothed importance sampling leave-one-out cross-validation (LOO-PIT) [@RN146; @RN145].
Its results for each model are ranked based on Pareto-smoothed importance sampling leave-one-out cross-validation (LOO-PIT) [@RN146; @RN145].

![](./Fig3_PP-standalone.png)
__Figure 1:__ Overview of the pre-manufactured data analysis pipeline featured in $\texttt{PeakPerformance}$.

Subsequently, the peak analysis pipeline can be started with the function $\texttt{pipeline()}$ from the $\texttt{pipeline}$ module.
Depending on whether the "pre-filtering" option was selected, an optional filtering step will be executed to reject signals where clearly no peak is present before sampling, thus saving computation time.
This filtering step combines the $\texttt{find\_peaks()}$ function from the SciPy package [@scipy] with a user-defined minimum signal-to-noise threshold and may reject a great many signals before sampling, e.g. in the case of isotopic labeling experiments where every theoretically achievable mass isotopomer needs to be investigated, yet depending on the input labeling mixture, the majority of them might not be present in actuality.
Upon passing the first filter, a Markov chain Monte Carlo (MCMC) simulation is conducted using a No-U-Turn Sampler (NUTS) [@RN173], preferably - if installed in the Python environment - the nutpie sampler [@nutpie] due to its highly increased performance compared to the default sampler of PyMC.
Before sampling from the posterior distribution, a prior predictive check is performed.
When a posterior distribution has been obtained, the main filtering step is next in line which checks the convergence of the Markov chains via the potential scale reduction factor [@RN152] or $\hat{R}$ statistic and based on the uncertainty of the determined peak parameters.
If a signal was accepted as a peak, a posterior predictive check is conducted and added to the inference data object resulting from the model simulation.
Regarding the performance of the simulation, in our tests an analysis of a single peaks took 20 s to 30 s and of a double peaks 25 s to 90 s.
Expand Down Expand Up @@ -124,7 +121,7 @@ __Table 2:__ Depiction of the results for the most important peak parameters of
![](./summary_joint.svg){width="100%"}

In this case, the fits were successful and convergence was reached for all parameters.
Most notably and for the first time, the measurement noise was taken into account when determining the peak area as represented by its standard deviation and as can be observed in the posterior predictive plots where the noisy data points fall within the boundary of the 95 % HDI.\\
Most notably and for the first time, the measurement noise was taken into account when determining the peak area as represented by its standard deviation and as can be observed in the posterior predictive plots where the noisy data points fall within the boundary of the 95 % HDI.
In the documentation, there is a study featuring simulated and experimental data to validate $\texttt{PeakPerformance}$'s results against a commercially available vendor software for peak integration showing that comparable results are indeed obtained.


Expand Down

0 comments on commit 98731ef

Please sign in to comment.