Skip to content

Commit

Permalink
msi feature detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Philipbear committed Feb 24, 2025
1 parent b4c248c commit 7a2a6b3
Show file tree
Hide file tree
Showing 10 changed files with 497 additions and 229 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ For more options, run:
```bash
ms1_id msi --help
```
Expected runtime <5 min for a single MS imaging dataset.
Expected runtime is ~10 min for a single MS imaging dataset if at least 12 cores are available.

---------

Expand Down
28 changes: 14 additions & 14 deletions src/ms1_id/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def run_lcms(args):
peak_cor_rt_tol=args.peak_cor_rt_tol,
min_ppc=args.min_ppc,
roi_min_length=args.roi_min_length,
library_search_mztol=args.library_search_mztol,
library_search_mztol=args.lib_search_mztol,
ms1id_score_cutoff=args.ms1id_score_cutoff,
ms1id_min_matched_peak=args.ms1id_min_matched_peak,
ms1id_min_spec_usage=args.ms1id_min_spec_usage,
Expand Down Expand Up @@ -144,10 +144,10 @@ def run_msi(args):
min_overlap=args.min_overlap,
min_correlation=args.min_correlation,
max_cor_depth=args.max_cor_depth,
library_search_mztol=args.library_search_mztol,
ms1id_score_cutoff=args.ms1id_score_cutoff,
ms1id_min_matched_peak=args.ms1id_min_matched_peak,
ms1id_min_spec_usage=args.ms1id_min_spec_usage
library_search_mztol=args.lib_search_mztol,
score_cutoff=args.score_cutoff,
min_matched_peak=args.min_matched_peak,
min_spec_usage=args.min_spec_usage
)


Expand Down Expand Up @@ -244,7 +244,7 @@ def main():
help='Minimum peak-peak correlation to form a feature group (default: 0.80)')
lcms_parser.add_argument('--roi_min_length', type=int, default=5,
help='ROI minimum length for a feature (default: 5)')
lcms_parser.add_argument('--library_search_mztol', type=float, default=0.05,
lcms_parser.add_argument('--lib_search_mztol', type=float, default=0.05,
help='Library search m/z tolerance (default: 0.05)')
lcms_parser.add_argument('--ms1id_score_cutoff', type=float, default=0.7,
help='MS1 ID matching score cutoff (default: 0.7)')
Expand All @@ -270,21 +270,21 @@ def main():
help='Number of cores to use (default: None, use all available cores)')
msi_parser.add_argument('--sn_factor', type=float, default=3.0,
help='Signal-to-noise factor for noise removal (default: 3.0)')
msi_parser.add_argument('--mz_ppm_tol', type=float, default=5.0,
help='m/z tolerance in ppm for feature detection (default: 5.0)')
msi_parser.add_argument('--min_overlap', type=int, default=10,
help='Minimum overlap between ion images (default: 10)')
msi_parser.add_argument('--mz_ppm_tol', type=float, default=10.0,
help='m/z tolerance in ppm for feature detection (default: 10.0)')
msi_parser.add_argument('--min_overlap', type=int, default=5,
help='Minimum overlap between ion images (default: 5)')
msi_parser.add_argument('--min_correlation', type=float, default=0.85,
help='Minimum correlation between spectra (default: 0.85)')
msi_parser.add_argument('--max_cor_depth', type=int, default=1,
help='Maximum correlation depth for spatial correlation (default: 1)')
msi_parser.add_argument('--library_search_mztol', type=float, default=0.05,
msi_parser.add_argument('--lib_search_mztol', type=float, default=0.05,
help='Library search m/z tolerance (default: 0.05)')
msi_parser.add_argument('--ms1id_score_cutoff', type=float, default=0.7,
msi_parser.add_argument('--score_cutoff', type=float, default=0.7,
help='MS1 ID matching score cutoff (default: 0.7)')
msi_parser.add_argument('--ms1id_min_matched_peak', type=int, default=3,
msi_parser.add_argument('--min_matched_peak', type=int, default=3,
help='MS1 ID minimum matched peaks (default: 3)')
msi_parser.add_argument('--ms1id_min_spec_usage', type=float, default=0.05,
msi_parser.add_argument('--min_spec_usage', type=float, default=0.05,
help='MS1 ID minimum spectrum usage (default: 0.05)')

args = parser.parse_args()
Expand Down
5 changes: 2 additions & 3 deletions src/ms1_id/msi/calculate_mz_cor_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,13 @@ def worker(start_idx, end_idx, mmap_filename, intensity_matrix_shape, min_overla


def calc_all_mz_correlations(intensity_matrix, min_overlap=5, min_cor=0.8,
save=True, save_dir=None, n_processes=None, chunk_size=500):
save_dir=None, n_processes=None, chunk_size=500):
"""
Calculate m/z correlation matrix for MS imaging data using multiprocessing and numpy memmap
:param intensity_matrix: 2D numpy array where rows are m/z values and columns are spectra
:param min_overlap: Minimum number of overlapping spectra between two ions
:param min_cor: Minimum correlation value to keep
:param save: Boolean indicating whether to save the result
:param save_dir: Directory to save the result if save is True
:param n_processes: Number of processes to use (default: number of CPU cores)
:param chunk_size: Number of rows to process in each chunk
Expand Down Expand Up @@ -133,7 +132,7 @@ def calc_all_mz_correlations(intensity_matrix, min_overlap=5, min_cor=0.8,
corr_matrix = corr_matrix + corr_matrix.T
corr_matrix.setdiag(1.0)

if save and save_dir:
if save_dir:
path = os.path.join(save_dir, 'mz_correlation_matrix.npz')
print(f"Saving correlation matrix to {path}...")
save_npz(path, corr_matrix)
Expand Down
8 changes: 4 additions & 4 deletions src/ms1_id/msi/group_mz_cor_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def generate_pseudo_ms2(mz_values, intensity_matrix, correlation_matrix,
n_processes=None, min_cluster_size=6,
min_cor=0.90, max_cor_depth=1,
save=False, save_dir=None, chunk_size=1000):
save_dir=None, chunk_size=1000):
"""
Generate pseudo MS2 spectra for imaging data using chunked parallel processing
"""
Expand All @@ -36,7 +36,7 @@ def generate_pseudo_ms2(mz_values, intensity_matrix, correlation_matrix,
# Assign intensity values
_assign_intensities(pseudo_ms2_spectra, intensity_matrix)

if save and save_dir:
if save_dir:
pkl_path = os.path.join(save_dir, 'pseudo_ms2_spectra.pkl')
with open(pkl_path, 'wb') as f:
pickle.dump(pseudo_ms2_spectra, f)
Expand Down Expand Up @@ -89,7 +89,7 @@ def find_correlated_mzs(target_idx, current_depth, visited):


def _perform_clustering(mz_values, correlation_matrix, n_processes=None, min_cor=0.90,
min_cluster_size=6, max_cor_depth=1, chunk_size=800):
min_cluster_size=3, max_cor_depth=1, chunk_size=800):
"""
Perform clustering on m/z values based on correlation scores using chunked multiprocessing.
"""
Expand Down Expand Up @@ -120,7 +120,7 @@ def _assign_intensities(pseudo_ms2_spectra, intensity_matrix):
Assign intensity values to pseudo MS2 spectra.
"""
for spectrum in tqdm(pseudo_ms2_spectra, desc="Assigning intensities"):
# Get the intensities for all m/z values in this PseudoMS1 object
# Get the intensities for all m/z values in this PseudoMS2 object
intensities = intensity_matrix[spectrum.indices, :]

# Get the intensities for the target m/z across all spectra
Expand Down
13 changes: 7 additions & 6 deletions src/ms1_id/msi/main_msi.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ def ms1id_single_file_batch(
mz_ppm_tol=5.0,
min_overlap=10, min_correlation=0.85, max_cor_depth=1,
library_search_mztol=0.01,
ms1id_score_cutoff=0.7,
ms1id_min_matched_peak=4,
ms1id_min_spec_usage=0.10,
score_cutoff=0.7,
min_matched_peak=4,
min_spec_usage=0.10,
max_prec_rel_int_in_other_ms2=0.05
):
files = [f for f in os.listdir(file_dir) if f.lower().endswith('.imzml') and not f.startswith('.')]
Expand All @@ -24,9 +24,10 @@ def ms1id_single_file_batch(
mz_ppm_tol=mz_ppm_tol,
min_overlap=min_overlap, min_correlation=min_correlation,
max_cor_depth=max_cor_depth,
library_search_mztol=library_search_mztol, ms1id_score_cutoff=ms1id_score_cutoff,
ms1id_min_spec_usage=ms1id_min_spec_usage,
ms1id_min_matched_peak=ms1id_min_matched_peak,
library_search_mztol=library_search_mztol,
score_cutoff=score_cutoff,
min_matched_peak=min_matched_peak,
min_spec_usage=min_spec_usage,
max_prec_rel_int_in_other_ms2=max_prec_rel_int_in_other_ms2
)

Expand Down
17 changes: 7 additions & 10 deletions src/ms1_id/msi/ms1id_msi_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ def ms1id_imaging_workflow(file_path, library_path, n_processes=None,
mz_ppm_tol=5.0,
min_overlap=10, min_correlation=0.85, max_cor_depth=1,
library_search_mztol=0.05,
ms1id_score_cutoff=0.7, ms1id_min_matched_peak=4,
ms1id_min_spec_usage=0.10, max_prec_rel_int_in_other_ms2=0.05):
score_cutoff=0.7, min_matched_peak=4,
min_spec_usage=0.10, max_prec_rel_int_in_other_ms2=0.05):
file_dir = os.path.dirname(file_path)
file_name = os.path.splitext(os.path.basename(file_path))[0]

Expand All @@ -31,35 +31,32 @@ def ms1id_imaging_workflow(file_path, library_path, n_processes=None,
mz_ppm_tol=mz_ppm_tol,
sn_factor=sn_factor,
n_processes=n_processes,
save=True, save_dir=result_folder
save_dir=result_folder
)

print(f"Calculating ion image correlations for {file_name}")
cor_matrix = calc_all_mz_correlations(intensity_matrix,
min_overlap=min_overlap,
min_cor=min_correlation,
n_processes=n_processes,
save=True,
save_dir=result_folder)

print(f"Generating pseudo MS2 spectra for {file_name}")
pseudo_ms2 = generate_pseudo_ms2(mz_values, intensity_matrix, cor_matrix,
n_processes=n_processes,
min_cluster_size=ms1id_min_matched_peak + 1,
min_cluster_size=min_matched_peak + 1,
min_cor=min_correlation,
max_cor_depth=max_cor_depth,
save=True,
save_dir=result_folder)

print(f"Annotating pseudo MS2 spectra for {file_name}")
pseudo_ms2 = ms1_id_annotation(pseudo_ms2, library_path, n_processes=None,
mz_tol=library_search_mztol,
ion_mode=ion_mode,
score_cutoff=ms1id_score_cutoff,
min_matched_peak=ms1id_min_matched_peak,
min_spec_usage=ms1id_min_spec_usage,
score_cutoff=score_cutoff,
min_matched_peak=min_matched_peak,
min_spec_usage=min_spec_usage,
max_prec_rel_int_in_other_ms2=max_prec_rel_int_in_other_ms2,
save=True,
save_dir=result_folder)

print(f"Writing results for {file_name}")
Expand Down
93 changes: 0 additions & 93 deletions src/ms1_id/msi/msi_feature_extraction.py

This file was deleted.

Loading

0 comments on commit 7a2a6b3

Please sign in to comment.