From 78fbcea72b9dcc8fb047b50e26fde656aae2fbe2 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 14:41:42 -0400
Subject: [PATCH 01/29] add continous integration

---
 .github/workflows/ci.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 .github/workflows/ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..52f1904
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,28 @@
+name: continuous-integration
+
+on: [push]
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest] # remove mac tests
+        # Latest pyOpenMS supports Python 3.10, and 3.11
+        python-version: ["3.10", "3.11"]
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pytest
+        pip install .
+
+    - name: Test
+      run: |
+        python -m pytest tests/ 

From b34300b03e7b85a3baa1fa05376fa460c45afd6e Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 14:44:20 -0400
Subject: [PATCH 02/29] preinstall numpy

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 52f1904..db704f8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,6 +21,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pytest
+        pip install numpy
         pip install .
 
     - name: Test

From 955e1f13e095b8325d5e4183cb7a4a1024660830 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 14:53:07 -0400
Subject: [PATCH 03/29] remove numpy from setup

---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index d7e7621..ac191ee 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 import sys
-import numpy
+#import numpy
 from setuptools import setup, find_packages
 from distutils.extension import Extension
 
@@ -36,7 +36,6 @@
       url="https://github.com/PyProphet/pyprophet",
       packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
       include_package_data=True,
-      include_dirs=[numpy.get_include()],
       classifiers=[
           'Development Status :: 3 - Alpha',
           'Environment :: Console',

From a6841366bc53cc588dddca8aeb0dbde36419c416 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 14:56:45 -0400
Subject: [PATCH 04/29] install numpy in setup script

---
 setup.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ac191ee..faadc30 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,15 @@
 import sys
-#import numpy
 from setuptools import setup, find_packages
 from distutils.extension import Extension
 
+try:
+    import numpy
+except ImportError:
+    print("Numpy is not installed. Installing it now.")
+    import subprocess
+    subprocess.check_call(['pip', 'install', 'numpy'])
+    import numpy
+
 try:
     from Cython.Build import cythonize
 except ImportError:
@@ -36,6 +43,7 @@
       url="https://github.com/PyProphet/pyprophet",
       packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
       include_package_data=True,
+      include_dirs=[numpy.get_include()],
       classifiers=[
           'Development Status :: 3 - Alpha',
           'Environment :: Console',

From ff5804c0effb5cf7da838926faa54a2289b34a6f Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 15:04:39 -0400
Subject: [PATCH 05/29] convert to .toml setup

convert to .toml setup for building on github
---
 pyproject.toml | 50 ++++++++++++++++++++++++++++
 setup.py       | 89 +++++++++-----------------------------------------
 2 files changed, 66 insertions(+), 73 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..4a665c0
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,50 @@
+[build-system]
+requires = ["setuptools", "wheel", "numpy", "cython"]  # Dependencies needed to build the package
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pyprophet"
+version = "2.2.8"
+description = "PyProphet: Semi-supervised learning and scoring of OpenSWATH results."
+readme = { file = "README.md", content-type = "text/markdown" }
+license = { text = "BSD" }
+authors = [{ name = "The PyProphet Developers", email = "rocksportrocker@gmail.com" }]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Topic :: Scientific/Engineering :: Chemistry"
+]
+keywords = ["bioinformatics", "openSWATH", "mass spectrometry"]
+
+# Dependencies required for runtime
+dependencies = [
+    "Click",
+    "duckdb",
+    "numpy >= 1.9.0",
+    "scipy",
+    "pandas >= 0.17",
+    "cython",
+    "numexpr >= 2.10.1",
+    "scikit-learn >= 0.17",
+    "xgboost",
+    "hyperopt",
+    "statsmodels >= 0.8.0",
+    "matplotlib",
+    "tabulate",
+    "pyarrow",
+    "pypdf"
+]
+
+# Define console entry points
+[project.scripts]
+pyprophet = "pyprophet.main:cli"
+
+[tool.setuptools]
+packages = ["pyprophet"]
+include-package-data = true
+zip-safe = false
+
diff --git a/setup.py b/setup.py
index faadc30..489ea94 100644
--- a/setup.py
+++ b/setup.py
@@ -1,80 +1,23 @@
 import sys
-from setuptools import setup, find_packages
-from distutils.extension import Extension
+from setuptools import setup, Extension
+from Cython.Build import cythonize
+import numpy
 
-try:
-    import numpy
-except ImportError:
-    print("Numpy is not installed. Installing it now.")
-    import subprocess
-    subprocess.check_call(['pip', 'install', 'numpy'])
-    import numpy
+use_cython = True
+ext = ".pyx" if use_cython else ".c"
 
-try:
-    from Cython.Build import cythonize
-except ImportError:
-    use_cython = False
-else:
-    use_cython = True
-
-cmdclass = {}
-ext_modules = []
+extensions = [
+    Extension(
+        "pyprophet._optimized",
+        [f"pyprophet/_optimized{ext}"],
+        include_dirs=[numpy.get_include()],
+    )
+]
 
 if use_cython:
-    ext_modules += [Extension("pyprophet._optimized", ["pyprophet/_optimized.pyx"])]
-    ext_modules = cythonize(ext_modules)
-else:
-    ext_modules += [Extension("pyprophet._optimized", ["pyprophet/_optimized.c"])]
+    extensions = cythonize(extensions)
 
-# read the contents of README for PyPI
-from os import path
-this_directory = path.abspath(path.dirname(__file__))
-with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
-    long_description = f.read()
+setup(
+    ext_modules=extensions,
+)
 
-setup(name='pyprophet',
-      version="2.2.8",
-      author="The PyProphet Developers",
-      author_email="rocksportrocker@gmail.com",
-      description="PyProphet: Semi-supervised learning and scoring of OpenSWATH results.",
-      long_description=long_description,
-      long_description_content_type='text/markdown',
-      license="BSD",
-      url="https://github.com/PyProphet/pyprophet",
-      packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
-      include_package_data=True,
-      include_dirs=[numpy.get_include()],
-      classifiers=[
-          'Development Status :: 3 - Alpha',
-          'Environment :: Console',
-          'Intended Audience :: Science/Research',
-          'License :: OSI Approved :: BSD License',
-          'Operating System :: OS Independent',
-          'Topic :: Scientific/Engineering :: Bio-Informatics',
-          'Topic :: Scientific/Engineering :: Chemistry',
-      ],
-      zip_safe=False,
-      install_requires=[
-          "Click",
-          "duckdb",
-          "numpy >= 1.9.0",
-          "scipy",
-          "pandas >= 0.17",
-          "cython",
-          "numexpr >= 2.10.1",
-          "scikit-learn >= 0.17",
-          "xgboost",
-          "hyperopt",
-          "statsmodels >= 0.8.0",
-          "matplotlib",
-          "tabulate",
-          "pyarrow",
-          "pypdf"
-      ],
-      entry_points={
-          'console_scripts': [
-              "pyprophet=pyprophet.main:cli",
-              ]
-      },
-      ext_modules=ext_modules,
-      )

From 4b6750ad33585b5c3d8bcca2bf9db2e52cc21244 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 15:12:48 -0400
Subject: [PATCH 06/29] remove numpy from requirement

---
 .github/workflows/ci.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index db704f8..52f1904 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -21,7 +21,6 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pytest
-        pip install numpy
         pip install .
 
     - name: Test

From 122753d3afbd2c0e2c182d0c16538ebaf1463cc0 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 15:13:56 -0400
Subject: [PATCH 07/29] just ubuntu for now

---
 .github/workflows/ci.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 52f1904..a64618f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,7 +7,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest] # remove mac tests
+        os: [ubuntu-latest]
+          #os: [ubuntu-latest, windows-latest, macos-latest] # remove mac tests
         # Latest pyOpenMS supports Python 3.10, and 3.11
         python-version: ["3.10", "3.11"]
     steps:

From b9f35afb7c064083804bca8fc86bb84b2ff945a4 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 16:31:15 -0400
Subject: [PATCH 08/29] fix setup.py and .toml

both are required because of cython, works locally now
---
 pyproject.toml |  5 ++---
 setup.py       | 29 +++++++++++++----------------
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4a665c0..173a84b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,7 +44,6 @@ dependencies = [
 pyprophet = "pyprophet.main:cli"
 
 [tool.setuptools]
-packages = ["pyprophet"]
+packages = { find = { exclude = ["ez_setup", "examples", "tests"] } }
 include-package-data = true
-zip-safe = false
-
+zip-safe = false
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 489ea94..6dd7624 100644
--- a/setup.py
+++ b/setup.py
@@ -1,23 +1,20 @@
-import sys
-from setuptools import setup, Extension
+from setuptools import setup, Extension, find_packages
 from Cython.Build import cythonize
 import numpy
 
-use_cython = True
-ext = ".pyx" if use_cython else ".c"
-
-extensions = [
-    Extension(
-        "pyprophet._optimized",
-        [f"pyprophet/_optimized{ext}"],
-        include_dirs=[numpy.get_include()],
-    )
-]
+try:
+    from Cython.Build import cythonize
+except ImportError:
+    use_cython = False
+else:
+    use_cython = True
 
+ext_modules = []
 if use_cython:
-    extensions = cythonize(extensions)
+    ext_modules += [Extension("pyprophet._optimized", ["pyprophet/_optimized.pyx"])]
+    ext_modules = cythonize(ext_modules)
+else:
+    ext_modules += [Extension("pyprophet._optimized", ["pyprophet/_optimized.c"])]
 
-setup(
-    ext_modules=extensions,
-)
+setup(name='pyprophet', ext_modules=ext_modules, include_dirs=[numpy.get_include()])
 

From be62dfb461a6f4b763ea60b02e4c52a15a2b80ec Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 30 Oct 2024 16:42:07 -0400
Subject: [PATCH 09/29] add line to build extension

---
 .github/workflows/ci.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a64618f..5f0f093 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,6 +24,9 @@ jobs:
         pip install pytest
         pip install .
 
+    - name: Compile cython module
+      run: python setup.py build_ext --inplace 
+
     - name: Test
       run: |
         python -m pytest tests/ 

From 15dd65babb4e1b1b8f1da7bd7dffb1469d6bdc57 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 31 Oct 2024 16:58:30 -0400
Subject: [PATCH 10/29] fix: stats tests

---
 pyprophet/stats.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyprophet/stats.py b/pyprophet/stats.py
index 6934e87..349ef7f 100644
--- a/pyprophet/stats.py
+++ b/pyprophet/stats.py
@@ -233,7 +233,7 @@ def pi0est(p_values, lambda_ = np.arange(0.05,1.0,0.05), pi0_method = "smoother"
 
 @profile
 def qvalue(p_values, pi0, pfdr = False):
-    p = np.array(p_values)
+    p = np.array(p_values).copy()
 
     qvals_out = p
     rm_na = np.isfinite(p)
@@ -277,7 +277,7 @@ def bw_nrd0(x):
 @profile
 def lfdr(p_values, pi0, trunc = True, monotone = True, transf = "probit", adj = 1.5, eps = np.power(10.0,-8)):
     """ Estimate local FDR / posterior error probability from p-values according to bioconductor/qvalue """
-    p = np.array(p_values)
+    p = np.array(p_values).copy()
 
     # Compare to bioconductor/qvalue reference implementation
     # import rpy2

From c99db26a06be12917b0a49343ced8eada1895db1 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 31 Oct 2024 17:27:14 -0400
Subject: [PATCH 11/29] add pytest-regtest to workflow

---
 .github/workflows/ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5f0f093..b33d29d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -22,6 +22,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pytest
+        pip install pytest-regtest
         pip install .
 
     - name: Compile cython module

From fdb5513818b4280d8478e9720dc1c26257cb2ee5 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Fri, 1 Nov 2024 10:42:09 -0400
Subject: [PATCH 12/29] update autotunning so does not fail

note tests still fail though, possibly because random seed is different?
---
 pyprophet/classifiers.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pyprophet/classifiers.py b/pyprophet/classifiers.py
index 750bc82..0aefd04 100644
--- a/pyprophet/classifiers.py
+++ b/pyprophet/classifiers.py
@@ -110,7 +110,8 @@ def objective(params):
             
             clf = xgb.XGBClassifier(random_state=42, verbosity=0, objective='binary:logitraw', eval_metric='auc', **params)
 
-            score = cross_val_score(clf, X, y, scoring='roc_auc', n_jobs=self.threads, cv=KFold(n_splits=3, shuffle=True, random_state=np.random.RandomState(42))).mean()
+            rng = np.random.default_rng(42)
+            score = cross_val_score(clf, X, y, scoring='roc_auc', n_jobs=self.threads, cv=KFold(n_splits=3, shuffle=True, random_state=42)).mean()
             # click.echo("Info: AUC: {:.3f} hyperparameters: {}".format(score, params))
             return score
 
@@ -129,7 +130,8 @@ def objective(params):
         xgb_params_complexity = self.xgb_params_tuned
         xgb_params_complexity.update({k: self.xgb_params_space[k] for k in ('max_depth', 'min_child_weight')})
 
-        best_complexity = fmin(fn=objective, space=xgb_params_complexity, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42))
+        rng = np.random.default_rng(42)
+        best_complexity = fmin(fn=objective, space=xgb_params_complexity, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng)
         best_complexity['max_depth'] = int(best_complexity['max_depth'])
         best_complexity['min_child_weight'] = int(best_complexity['min_child_weight'])
 
@@ -139,7 +141,7 @@ def objective(params):
         xgb_params_gamma = self.xgb_params_tuned
         xgb_params_gamma['gamma'] = self.xgb_params_space['gamma']
 
-        best_gamma = fmin(fn=objective, space=xgb_params_gamma, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42))
+        best_gamma = fmin(fn=objective, space=xgb_params_gamma, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng)
 
         self.xgb_params_tuned.update(best_gamma)
 
@@ -147,7 +149,7 @@ def objective(params):
         xgb_params_subsampling = self.xgb_params_tuned
         xgb_params_subsampling.update({k: self.xgb_params_space[k] for k in ('subsample', 'colsample_bytree', 'colsample_bylevel', 'colsample_bynode')})
 
-        best_subsampling = fmin(fn=objective, space=xgb_params_subsampling, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42))
+        best_subsampling = fmin(fn=objective, space=xgb_params_subsampling, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng)
 
         self.xgb_params_tuned.update(best_subsampling)
 
@@ -155,7 +157,7 @@ def objective(params):
         xgb_params_regularization = self.xgb_params_tuned
         xgb_params_regularization.update({k: self.xgb_params_space[k] for k in ('lambda', 'alpha')})
 
-        best_regularization = fmin(fn=objective, space=xgb_params_regularization, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42))
+        best_regularization = fmin(fn=objective, space=xgb_params_regularization, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng)
 
         self.xgb_params_tuned.update(best_regularization)
 
@@ -163,7 +165,7 @@ def objective(params):
         xgb_params_learning = self.xgb_params_tuned
         xgb_params_learning['eta'] = self.xgb_params_space['eta']
 
-        best_learning = fmin(fn=objective, space=xgb_params_learning, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=np.random.RandomState(42))
+        best_learning = fmin(fn=objective, space=xgb_params_learning, algo=tpe.suggest, max_evals=self.xgb_hyperparams['autotune_num_rounds'], rstate=rng)
 
         self.xgb_params_tuned.update(best_learning)
         click.echo("Info: Optimal hyperparameters: {}".format(self.xgb_params_tuned))

From 3c8bfbdcbf85bd30a3d6310ecd66512929a875ea Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Fri, 1 Nov 2024 12:47:14 -0400
Subject: [PATCH 13/29] fix: fix level context tests

---
 pyprophet/levels_contexts.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pyprophet/levels_contexts.py b/pyprophet/levels_contexts.py
index c7ce194..edc3602 100644
--- a/pyprophet/levels_contexts.py
+++ b/pyprophet/levels_contexts.py
@@ -33,7 +33,12 @@ def statistics_report(data, outfile, context, analyte, parametric, pfdr, pi0_lam
         outfile = outfile + "_" + str(data['run_id'].unique()[0])
 
     # export PDF report
-    save_report(outfile + "_" + context + "_" + analyte + ".pdf", outfile + ": " + context + " " + analyte + "-level error-rate control", data[data.decoy==1]["score"], data[data.decoy==0]["score"], stat_table["cutoff"], stat_table["svalue"], stat_table["qvalue"], data[data.decoy==0]["p_value"], pi0, color_palette)
+    save_report(outfile + "_" + context + "_" + analyte + ".pdf", 
+                outfile + ": " + context + " " + analyte + "-level error-rate control", 
+                data[data.decoy==1]["score"].values, data[data.decoy==0]["score"].values, stat_table["cutoff"].values, 
+                stat_table["svalue"].values, stat_table["qvalue"].values, data[data.decoy==0]["p_value"].values, 
+                pi0, 
+                color_palette)
 
     return(data)
 
@@ -184,7 +189,7 @@ def infer_proteins(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_m
     con.close()
 
     if context == 'run-specific':
-        data = data.groupby('run_id').apply(statistics_report, outfile, context, "protein", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette).reset_index()
+        data = data.groupby('run_id').apply(statistics_report, outfile, context, "protein", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette)
 
     elif context in ['global', 'experiment-wide']:
         data = statistics_report(data, outfile, context, "protein", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette)
@@ -257,7 +262,7 @@ def infer_peptides(infile, outfile, context, parametric, pfdr, pi0_lambda, pi0_m
     con.close()
 
     if context == 'run-specific':
-        data = data.groupby('run_id').apply(statistics_report, outfile, context, "peptide", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette).reset_index()
+        data = data.groupby('run_id').apply(statistics_report, outfile, context, "peptide", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette)
 
     elif context in ['global', 'experiment-wide']:
         data = statistics_report(data, outfile, context, "peptide", parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, color_palette)

From d320945efd572b562965f5a605173aaf9cba76dc Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Fri, 1 Nov 2024 13:08:44 -0400
Subject: [PATCH 14/29] update export-parquet tests and fix tests

update the tests to remove old parameters and fix tests that were
failing
---
 pyprophet/export_parquet.py            |  2 +-
 tests/test_pyprophet_export_parquet.py | 21 +++------------------
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/pyprophet/export_parquet.py b/pyprophet/export_parquet.py
index fe0d3ed..795a12a 100644
--- a/pyprophet/export_parquet.py
+++ b/pyprophet/export_parquet.py
@@ -172,7 +172,7 @@ def export_to_parquet(infile, outfile, transitionLevel, onlyFeatures=False):
 
     # transition level
     if transitionLevel:
-        columns['FEATURE_TRANSITION'] = ['AREA_INTENSITY', 'TOTAL_AREA_INTENSITY', 'APEX_INTENSITY', 'TOTAL_MI'] + getVarColumnNames(condb, 'FEATURE_TRANSITION')
+        columns['FEATURE_TRANSITION'] = ['AREA_INTENSITY', 'TOTAL_AREA_INTENSITY', 'APEX_INTENSITY', 'TOTAL_MI'] + getVarColumnNames(con, 'FEATURE_TRANSITION')
         columns['TRANSITION'] = ['TRAML_ID', 'PRODUCT_MZ', 'CHARGE', 'TYPE', 'ORDINAL', 'DETECTING', 'IDENTIFYING', 'QUANTIFYING', 'LIBRARY_INTENSITY']
         columns['TRANSITION_PRECURSOR_MAPPING'] = ['TRANSITION_ID']
 
diff --git a/tests/test_pyprophet_export_parquet.py b/tests/test_pyprophet_export_parquet.py
index b4f3dba..6c01696 100644
--- a/tests/test_pyprophet_export_parquet.py
+++ b/tests/test_pyprophet_export_parquet.py
@@ -27,14 +27,14 @@ def _run_cmdline(cmdline):
     return stdout
 
 
-def _run_export_parquet_single_run(temp_folder, transitionLevel=False, threads=1, chunksize=1000, pd_testing_kwargs=dict(check_dtype=False, check_names=False), onlyFeatures=False):
+def _run_export_parquet_single_run(temp_folder, transitionLevel=False, pd_testing_kwargs=dict(check_dtype=False, check_names=False), onlyFeatures=False):
     os.chdir(temp_folder)
     DATA_NAME="dummyOSWScoredData.osw"
     data_path = os.path.join(DATA_FOLDER, DATA_NAME)
     conn = sqlite3.connect(DATA_NAME)
     shutil.copy(data_path, temp_folder)
 
-    cmdline = "pyprophet export-parquet --in={} --threads={} --chunksize={}".format(DATA_NAME, threads, chunksize)
+    cmdline = "pyprophet export-parquet --in={}".format(DATA_NAME)
 
     # if testing transition level add --transitionLevel flag
     if transitionLevel:
@@ -112,19 +112,4 @@ def test_export_parquet_single_run_onlyFeatures(tmpdir):
 
 
 def test_export_parquet_single_run_transitionLevel_onlyFeatures(tmpdir):
-	_run_export_parquet_single_run(tmpdir, transitionLevel=True, onlyFeatures=True)
-
-
-def test_multithread_export_parquet_single_run(tmpdir):
-	_run_export_parquet_single_run(tmpdir, transitionLevel=False, threads=2, chunksize=2)
-
-def test_multithread_export_parquet_single_run_transitionLevel(tmpdir):
-	_run_export_parquet_single_run(tmpdir, transitionLevel=True, threads=2, chunksize=2)
-
-
-def test_multithread_export_parquet_single_run_onlyFeatures(tmpdir):
-	_run_export_parquet_single_run(tmpdir, onlyFeatures=True, threads=2, chunksize=4)
-
-
-def test_multithread_export_parquet_single_run_transitionLevel_onlyFeatures(tmpdir):
-	_run_export_parquet_single_run(tmpdir, transitionLevel=True, onlyFeatures=True, threads=2, chunksize=4)
+	_run_export_parquet_single_run(tmpdir, transitionLevel=True, onlyFeatures=True)
\ No newline at end of file

From 71804e413f1ccb07ddb6abdbf56de0a323c36cce Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 20 Nov 2024 08:26:43 -0500
Subject: [PATCH 15/29] raise error if standard deviation computed is 0

This is appropriate since mean and standard deviation are always
computed in the context of normalization and cannot normalize if the
standard deviation is 0
---
 pyprophet/stats.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pyprophet/stats.py b/pyprophet/stats.py
index 349ef7f..f037e27 100644
--- a/pyprophet/stats.py
+++ b/pyprophet/stats.py
@@ -118,7 +118,10 @@ def posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null):
 
 
 def mean_and_std_dev(values):
-    return np.mean(values), np.std(values, ddof=1)
+    std = np.std(values, ddof=1)
+    if std == 0:
+        raise RuntimeError("Computed standard deviation is 0, cannot perform normalization")
+    return np.mean(values), std
 
 
 def pnorm(stat, stat0):

From 516389e37244439d2081d7a26571b975a21e3aae Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 11:31:21 -0500
Subject: [PATCH 16/29] test: set tree method as exact for tests

---
 pyprophet/main.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pyprophet/main.py b/pyprophet/main.py
index b3257ea..24f97ab 100644
--- a/pyprophet/main.py
+++ b/pyprophet/main.py
@@ -106,6 +106,8 @@ def score(infile, outfile, classifier, xgb_autotune, apply_weights, xeval_fracti
     xgb_hyperparams = {'autotune': xgb_autotune, 'autotune_num_rounds': 10, 'num_boost_round': 100, 'early_stopping_rounds': 10, 'test_size': 0.33}
 
     xgb_params = {'eta': 0.3, 'gamma': 0, 'max_depth': 6, 'min_child_weight': 1, 'subsample': 1, 'colsample_bytree': 1, 'colsample_bylevel': 1, 'colsample_bynode': 1, 'lambda': 1, 'alpha': 0, 'scale_pos_weight': 1, 'verbosity': 0, 'objective': 'binary:logitraw', 'nthread': 1, 'eval_metric': 'auc'}
+    if test:
+        xgb_params['tree_method'] = 'exact'
 
     xgb_params_space = {'eta': hp.uniform('eta', 0.0, 0.3), 'gamma': hp.uniform('gamma', 0.0, 0.5), 'max_depth': hp.quniform('max_depth', 2, 8, 1), 'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1), 'subsample': 1, 'colsample_bytree': 1, 'colsample_bylevel': 1, 'colsample_bynode': 1, 'lambda': hp.uniform('lambda', 0.0, 1.0), 'alpha': hp.uniform('alpha', 0.0, 1.0), 'scale_pos_weight': 1.0, 'verbosity': 0, 'objective': 'binary:logitraw', 'nthread': 1, 'eval_metric': 'auc'}
 

From ca4a14eb83487b6ffba583f739e5c474a0258bc1 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 11:38:47 -0500
Subject: [PATCH 17/29] update snapshot tests

---
 .../test_pyprophet_score.test_osw_4.out       | 20 +++++++++----------
 .../test_pyprophet_score.test_osw_5.out       | 20 +++++++++----------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out b/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out
index 8f458fc..bde61a3 100644
--- a/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out
+++ b/tests/_regtest_outputs/test_pyprophet_score.test_osw_4.out
@@ -1,14 +1,14 @@
              feature_id  ms1_precursor_pep  ms2_peakgroup_pep  ms2_precursor_pep
-0  -4409520928686189639             0.0045             0.0045             0.1757
-1  -7771919224870429764             0.0045             0.0045             0.1757
-2   -797725006165535344             0.0045             0.0045             0.1291
-3  -1732939685941081620             0.0045             0.0045             0.1757
-4  -6747816958328369759             0.0045             0.0045             0.1757
+0  -4409520928686189639             0.0008             0.0024             0.8813
+1  -7771919224870429764             0.0008             0.0024             0.8813
+2   -797725006165535344             0.0008             0.0024             0.0883
+3  -1732939685941081620             0.0008             0.0024             0.8611
+4  -6747816958328369759             0.0008             0.0024             0.8813
 ..                  ...                ...                ...                ...
-95   237580321205345393             0.0045             0.0045             0.1291
-96  5416940836005312912             0.0045             0.0045             0.1291
-97 -7541234528799769804             0.0045             0.0045             0.1757
-98  8036548921756545335             0.0045             0.0045             0.1291
-99 -6558503086717676095             0.0045             0.0045             0.1291
+95 -6034887541083502974             0.0008             0.0024             0.8461
+96   483971408708572459             0.0008             0.0024             0.0883
+97  5086440667566053402             0.0008             0.0024             0.9278
+98  7291105701317857435             0.0008             0.0024             0.8813
+99   237580321205345393             0.0008             0.0024             0.8461
 
 [100 rows x 4 columns]
diff --git a/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out b/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out
index e8d7f7c..8b58143 100644
--- a/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out
+++ b/tests/_regtest_outputs/test_pyprophet_score.test_osw_5.out
@@ -1,14 +1,14 @@
              feature_id  ms1_precursor_pep  ms2_peakgroup_pep  ms2_precursor_pep
-0  -4409520928686189639             0.0045             0.0043             0.6924
-1  -7771919224870429764             0.0045             0.0043             0.6924
-2   -797725006165535344             0.0045             0.0043             0.3053
-3  -1732939685941081620             0.0045             0.0043             0.6924
-4  -6747816958328369759             0.0045             0.0043             0.6924
+0  -4409520928686189639             0.0008              0.004             0.2748
+1  -7771919224870429764             0.0008              0.004             0.3580
+2   -797725006165535344             0.0008              0.004             0.2370
+3  -1732939685941081620             0.0008              0.004             0.2748
+4  -6747816958328369759             0.0008              0.004             0.3580
 ..                  ...                ...                ...                ...
-95 -5977524328878179832             0.0045             0.0043             0.6924
-96 -6034887541083502974             0.0045             0.0043             0.6924
-97   483971408708572459             0.0045             0.0043             0.1735
-98  5086440667566053402             0.0045             0.0043             0.6924
-99  7291105701317857435             0.0045             0.0043             0.6924
+95  8943629340769664660             0.0008              0.004             0.2370
+96 -6034887541083502974             0.0008              0.004             0.2748
+97   483971408708572459             0.0008              0.004             0.2370
+98  5086440667566053402             0.0008              0.004             0.3580
+99  7291105701317857435             0.0008              0.004             0.2748
 
 [100 rows x 4 columns]

From 0704475b119455350ddbf5e97cf676778f5f2681 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 12:07:03 -0500
Subject: [PATCH 18/29] update actions to dependent on reuiqrements file

this fixes the versions so tests do not fail. Will add dependabot to
update the reuqirements file
---
 .github/workflows/ci.yml |   7 +--
 requirements.txt         | 118 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 4 deletions(-)
 create mode 100644 requirements.txt

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b33d29d..85a120e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,8 +9,8 @@ jobs:
       matrix:
         os: [ubuntu-latest]
           #os: [ubuntu-latest, windows-latest, macos-latest] # remove mac tests
-        # Latest pyOpenMS supports Python 3.10, and 3.11
-        python-version: ["3.10", "3.11"]
+        # Requirements file generated with python=3.11 
+        python-version: ["3.11"]
     steps:
     - uses: actions/checkout@v4
 
@@ -21,8 +21,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pytest
-        pip install pytest-regtest
+        pip install -r requirements.txt # test with requirements file so can easily bump with dependabot
         pip install .
 
     - name: Compile cython module
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3129b63
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,118 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile --all-extras --output-file=requirements.txt
+#
+click==8.1.7
+    # via pyprophet (setup.py)
+cloudpickle==3.1.0
+    # via hyperopt
+contourpy==1.3.0
+    # via matplotlib
+cycler==0.12.1
+    # via matplotlib
+cython==3.0.11
+    # via pyprophet (setup.py)
+duckdb==1.1.3
+    # via
+    #   duckdb-extension-sqlite-scanner
+    #   duckdb-extensions
+    #   pyprophet (setup.py)
+duckdb-extension-sqlite-scanner==1.1.3
+    # via pyprophet (setup.py)
+duckdb-extensions==1.1.3
+    # via pyprophet (setup.py)
+fonttools==4.55.0
+    # via matplotlib
+future==1.0.0
+    # via hyperopt
+hyperopt==0.2.7
+    # via pyprophet (setup.py)
+iniconfig==2.0.0
+    # via pytest
+joblib==1.4.2
+    # via scikit-learn
+kiwisolver==1.4.7
+    # via matplotlib
+matplotlib==3.9.2
+    # via pyprophet (setup.py)
+networkx==3.2.1
+    # via hyperopt
+numexpr==2.10.1
+    # via pyprophet (setup.py)
+numpy==2.0.2
+    # via
+    #   contourpy
+    #   hyperopt
+    #   matplotlib
+    #   numexpr
+    #   pandas
+    #   patsy
+    #   pyprophet (setup.py)
+    #   scikit-learn
+    #   scipy
+    #   statsmodels
+    #   xgboost
+nvidia-nccl-cu12==2.23.4
+    # via xgboost
+packaging==24.2
+    # via
+    #   matplotlib
+    #   pytest
+    #   statsmodels
+pandas==2.2.3
+    # via
+    #   pyprophet (setup.py)
+    #   statsmodels
+patsy==1.0.1
+    # via statsmodels
+pillow==11.0.0
+    # via matplotlib
+pluggy==1.5.0
+    # via pytest
+py4j==0.10.9.7
+    # via hyperopt
+pyarrow==18.0.0
+    # via pyprophet (setup.py)
+pyparsing==3.2.0
+    # via matplotlib
+pypdf==5.1.0
+    # via pyprophet (setup.py)
+pytest==8.3.3
+    # via
+    #   pyprophet (setup.py)
+    #   pytest-regtest
+pytest-regtest==2.3.3
+    # via pyprophet (setup.py)
+python-dateutil==2.9.0.post0
+    # via
+    #   matplotlib
+    #   pandas
+pytz==2024.2
+    # via pandas
+scikit-learn==1.5.2
+    # via pyprophet (setup.py)
+scipy==1.13.1
+    # via
+    #   hyperopt
+    #   pyprophet (setup.py)
+    #   scikit-learn
+    #   statsmodels
+    #   xgboost
+six==1.16.0
+    # via
+    #   hyperopt
+    #   python-dateutil
+statsmodels==0.14.4
+    # via pyprophet (setup.py)
+tabulate==0.9.0
+    # via pyprophet (setup.py)
+threadpoolctl==3.5.0
+    # via scikit-learn
+tqdm==4.67.0
+    # via hyperopt
+tzdata==2024.2
+    # via pandas
+xgboost==2.1.2
+    # via pyprophet (setup.py)

From ce075f312f7fec1bcb8b5759fbb4b3af4f6b0ba8 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 12:08:41 -0500
Subject: [PATCH 19/29] add dependabot

---
 .github/workflows/dependabot.yml | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 .github/workflows/dependabot.yml

diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml
new file mode 100644
index 0000000..7e6ee06
--- /dev/null
+++ b/.github/workflows/dependabot.yml
@@ -0,0 +1,9 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"  # Location of your pyproject.toml or requirements.txt
+    schedule:
+      interval: "weekly"  # Checks for updates every week
+    commit-message:
+      prefix: "deps"  # Prefix for pull request titles
+    open-pull-requests-limit: 5  # Limit the number of open PRs at a time

From a0235e04b56b3c76ea393d3cd492416b35e73c5f Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 12:46:12 -0500
Subject: [PATCH 20/29] add tests for windows and mac

---
 .github/workflows/ci.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 85a120e..37edb05 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,8 +7,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest]
-          #os: [ubuntu-latest, windows-latest, macos-latest] # remove mac tests
+        os: [ubuntu-latest, windows-latest, macos-latest]
         # Requirements file generated with python=3.11 
         python-version: ["3.11"]
     steps:

From 88291fa7941a95cbacfc2340e927819a923f0ee6 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 12:49:52 -0500
Subject: [PATCH 21/29] remove default_rng

---
 pyprophet/classifiers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyprophet/classifiers.py b/pyprophet/classifiers.py
index 0aefd04..b23c96c 100644
--- a/pyprophet/classifiers.py
+++ b/pyprophet/classifiers.py
@@ -110,7 +110,6 @@ def objective(params):
             
             clf = xgb.XGBClassifier(random_state=42, verbosity=0, objective='binary:logitraw', eval_metric='auc', **params)
 
-            rng = np.random.default_rng(42)
             score = cross_val_score(clf, X, y, scoring='roc_auc', n_jobs=self.threads, cv=KFold(n_splits=3, shuffle=True, random_state=42)).mean()
             # click.echo("Info: AUC: {:.3f} hyperparameters: {}".format(score, params))
             return score

From 08df7cde31f78f1e70c01d42a5534faa79d9c52d Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 12:53:56 -0500
Subject: [PATCH 22/29] remove mac tests

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 37edb05..a3755dc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest]
         # Requirements file generated with python=3.11 
         python-version: ["3.11"]
     steps:

From 0c342e9fc59808c97d3cbf36efea61484506945e Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 13:03:58 -0500
Subject: [PATCH 23/29] remove copy of np arrays

---
 pyprophet/stats.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyprophet/stats.py b/pyprophet/stats.py
index f037e27..752ca52 100644
--- a/pyprophet/stats.py
+++ b/pyprophet/stats.py
@@ -236,7 +236,7 @@ def pi0est(p_values, lambda_ = np.arange(0.05,1.0,0.05), pi0_method = "smoother"
 
 @profile
 def qvalue(p_values, pi0, pfdr = False):
-    p = np.array(p_values).copy()
+    p = np.array(p_values)
 
     qvals_out = p
     rm_na = np.isfinite(p)
@@ -280,7 +280,7 @@ def bw_nrd0(x):
 @profile
 def lfdr(p_values, pi0, trunc = True, monotone = True, transf = "probit", adj = 1.5, eps = np.power(10.0,-8)):
     """ Estimate local FDR / posterior error probability from p-values according to bioconductor/qvalue """
-    p = np.array(p_values).copy()
+    p = np.array(p_values)
 
     # Compare to bioconductor/qvalue reference implementation
     # import rpy2

From 527c365926a51fbc8f1a1d636453917da22ab456 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 13:05:18 -0500
Subject: [PATCH 24/29] remove windows tests

currently both mac and windows tests are failing so just keep ubuntu
---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a3755dc..29617bd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest]
+        os: [ubuntu-latest]
         # Requirements file generated with python=3.11 
         python-version: ["3.11"]
     steps:

From 9c6466596be407542d50cc75b3f1f6f86d756362 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 13:38:19 -0500
Subject: [PATCH 25/29] refactor: new function for normlaizing score to decoys

---
 pyprophet/data_handling.py   | 11 +++++++++++
 pyprophet/semi_supervised.py |  6 +-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/pyprophet/data_handling.py b/pyprophet/data_handling.py
index a7c50a1..21e6953 100644
--- a/pyprophet/data_handling.py
+++ b/pyprophet/data_handling.py
@@ -5,6 +5,7 @@
 import sys
 import os
 import multiprocessing
+from sklearn.preprocessing import StandardScaler
 
 from .optimized import find_top_ranked, rank
 
@@ -336,6 +337,16 @@ def get_top_target_peaks(self):
     def get_feature_matrix(self, use_main_score):
         min_col = 5 if use_main_score else 6
         return self.df.iloc[:, min_col:-1].values
+    
+    def normalize_score_by_decoys(self, score_col_name):
+        '''
+        normalize the decoy scores to mean 0 and std 1, scale the targets accordingly
+        Args:
+            score_col_name: str, the name of the score column
+        '''
+        td_scores = self.get_top_decoy_peaks()[score_col_name]
+        transform = StandardScaler().fit(td_scores.values.reshape(-1, 1))
+        self.df.loc[:, score_col_name] = transform.transform(self.df[score_col_name].values.reshape(-1, 1))
 
     def filter_(self, idx):
         return Experiment(self.df[idx])
diff --git a/pyprophet/semi_supervised.py b/pyprophet/semi_supervised.py
index 87b56cb..cbe1099 100644
--- a/pyprophet/semi_supervised.py
+++ b/pyprophet/semi_supervised.py
@@ -64,13 +64,9 @@ def learn_randomized(self, experiment, score_columns, working_thread_number):
 
         # after semi supervised iteration: classify full dataset
         clf_scores = self.score(experiment, params)
-        mu, nu = mean_and_std_dev(clf_scores)
         experiment.set_and_rerank("classifier_score", clf_scores)
 
-        td_scores = experiment.get_top_decoy_peaks()["classifier_score"]
-
-        mu, nu = mean_and_std_dev(td_scores)
-        experiment["classifier_score"] = (experiment["classifier_score"] - mu) / nu
+        experiment.normalize_score_by_decoys('classifier_score')
         experiment.rank_by("classifier_score")
 
         top_test_peaks = experiment.get_top_test_peaks()

From 9be0a9a5fa9d0c0cb175f1a4daa7cc845fd6f816 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 13:52:22 -0500
Subject: [PATCH 26/29] replace semi-supervised learning normalization with
 sklearn

replace semi-supervised learning normalization with sklearn which can
handle cases where std = 0
---
 pyprophet/semi_supervised.py | 8 ++------
 pyprophet/stats.py           | 5 +----
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/pyprophet/semi_supervised.py b/pyprophet/semi_supervised.py
index cbe1099..7735131 100644
--- a/pyprophet/semi_supervised.py
+++ b/pyprophet/semi_supervised.py
@@ -3,7 +3,7 @@
 
 from .data_handling import Experiment, update_chosen_main_score_in_table
 from .classifiers import AbstractLearner, XGBLearner
-from .stats import mean_and_std_dev, find_cutoff
+from .stats import find_cutoff
 
 try:
     profile
@@ -88,13 +88,9 @@ def learn_final(self, experiment):
 
         # after semi supervised iteration: classify full dataset
         clf_scores = self.score(experiment, params)
-        mu, nu = mean_and_std_dev(clf_scores)
         experiment.set_and_rerank("classifier_score", clf_scores)
 
-        td_scores = experiment.get_top_decoy_peaks()["classifier_score"]
-
-        mu, nu = mean_and_std_dev(td_scores)
-        experiment["classifier_score"] = (experiment["classifier_score"] - mu) / nu
+        experiment.normalize_score_by_decoys('classifier_score')
         experiment.rank_by("classifier_score")
 
         return params
diff --git a/pyprophet/stats.py b/pyprophet/stats.py
index 752ca52..6934e87 100644
--- a/pyprophet/stats.py
+++ b/pyprophet/stats.py
@@ -118,10 +118,7 @@ def posterior_chromatogram_hypotheses_fast(experiment, prior_chrom_null):
 
 
 def mean_and_std_dev(values):
-    std = np.std(values, ddof=1)
-    if std == 0:
-        raise RuntimeError("Computed standard deviation is 0, cannot perform normalization")
-    return np.mean(values), std
+    return np.mean(values), np.std(values, ddof=1)
 
 
 def pnorm(stat, stat0):

From aa70dbef89dfd27ad554ceddbb51eac393bd1f21 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 14:00:48 -0500
Subject: [PATCH 27/29] revert to numpy std

sklearn transofmration does not use degrees of freedom. Keep the
normalize_score_by_decoys method but use numpy std() method
---
 pyprophet/data_handling.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pyprophet/data_handling.py b/pyprophet/data_handling.py
index 21e6953..29c52b7 100644
--- a/pyprophet/data_handling.py
+++ b/pyprophet/data_handling.py
@@ -5,7 +5,7 @@
 import sys
 import os
 import multiprocessing
-from sklearn.preprocessing import StandardScaler
+from .stats import mean_and_std_dev 
 
 from .optimized import find_top_ranked, rank
 
@@ -345,8 +345,13 @@ def normalize_score_by_decoys(self, score_col_name):
             score_col_name: str, the name of the score column
         '''
         td_scores = self.get_top_decoy_peaks()[score_col_name]
-        transform = StandardScaler().fit(td_scores.values.reshape(-1, 1))
-        self.df.loc[:, score_col_name] = transform.transform(self.df[score_col_name].values.reshape(-1, 1))
+        mu, nu = mean_and_std_dev(td_scores)
+
+        if nu == 0:
+            raise Exception("Warning: Standard deviation of decoy scores is zero. Cannot normalize scores.")
+        
+        self.df.loc[:, score_col_name] = (self.df[score_col_name] - mu) / nu
+
 
     def filter_(self, idx):
         return Experiment(self.df[idx])

From 005af6c9c1e67fbb1a3acf976293aa29b824c6b4 Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Thu, 21 Nov 2024 14:05:31 -0500
Subject: [PATCH 28/29] minor updates to pyprophet.toml

---
 pyproject.toml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 06fa77d..00f104d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "pyprophet"
-version = "2.2.8"
+version = "2.2.9"
 description = "PyProphet: Semi-supervised learning and scoring of OpenSWATH results."
 readme = { file = "README.md", content-type = "text/markdown" }
 license = { text = "BSD" }
@@ -26,7 +26,7 @@ dependencies = [
     "duckdb",
     "duckdb-extensions",
     "duckdb-extension-sqlite-scanner",
-    "numpy >= 1.9.0",
+    "numpy >= 2.0",
     "scipy",
     "pandas >= 0.17",
     "cython",
@@ -41,6 +41,10 @@ dependencies = [
     "pypdf"
 ]
 
+# Optional dependencies 
+[project.optional-dependencies]
+testing = ["pytest", "pytest-regtest"]
+
 # Define console entry points
 [project.scripts]
 pyprophet = "pyprophet.main:cli"
@@ -48,4 +52,4 @@ pyprophet = "pyprophet.main:cli"
 [tool.setuptools]
 packages = { find = { exclude = ["ez_setup", "examples", "tests"] } }
 include-package-data = true
-zip-safe = false
\ No newline at end of file
+zip-safe = false

From 81f78bf16d437d241852797cb6042f4cd8c8e5db Mon Sep 17 00:00:00 2001
From: Joshua Charkow <joshuacharkow@gmail.com>
Date: Wed, 27 Nov 2024 10:47:30 -0500
Subject: [PATCH 29/29] fix: ValueError: Buffer dtype mismatch, expected
 'DATA_TYPE' but got 'double'

need to cast to np.float_32
---
 .gitignore                 | 1 +
 pyprophet/data_handling.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index f374cdc..d9a6697 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,4 @@ nosetests.xml
 
 # vim
 *.sw[opqrs]
+*~
diff --git a/pyprophet/data_handling.py b/pyprophet/data_handling.py
index 29c52b7..23ddd65 100644
--- a/pyprophet/data_handling.py
+++ b/pyprophet/data_handling.py
@@ -360,7 +360,7 @@ def filter_(self, idx):
     def add_peak_group_rank(self):
         ids = self.df.tg_num_id.values
         scores = self.df.d_score.values
-        peak_group_ranks = rank(ids, scores)
+        peak_group_ranks = rank(ids, scores.astype(np.float32, copy=False))
         self.df["peak_group_rank"] = peak_group_ranks
 
     @profile