This repository has been archived by the owner on Nov 15, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First implementation of a base search algorithm class
* current search algorithm moved to msm_basic package * SearchAlgorithm is a base class for implementing search algorithms * SearchResults class api changed * tests updates
- Loading branch information
Showing
16 changed files
with
170 additions
and
99 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from sm.engine.msm_basic.formula_imager_segm import compute_sf_images | ||
from sm.engine.msm_basic.formula_img_validator import sf_image_metrics, sf_image_metrics_est_fdr | ||
from sm.engine.search_algorithm import SearchAlgorithm | ||
from sm.engine.util import logger | ||
|
||
|
||
class MSMBasicSearch(SearchAlgorithm): | ||
|
||
def __init__(self, sc, ds, formulas, fdr, ds_config): | ||
super(MSMBasicSearch, self).__init__(sc, ds, formulas, fdr, ds_config) | ||
self.metrics = ['chaos', 'spatial', 'spectral'] | ||
|
||
def search(self): | ||
logger.info('Running molecule search') | ||
sf_images = compute_sf_images(self.sc, self.ds, self.formulas.get_sf_peak_df(), | ||
self.ds_config['image_generation']['ppm']) | ||
all_sf_metrics_df = self.calc_metrics(sf_images) | ||
sf_metrics_fdr_df = self.estimate_fdr(all_sf_metrics_df) | ||
sf_metrics_fdr_df = self.filter_sf_metrics(sf_metrics_fdr_df) | ||
return sf_metrics_fdr_df, self.filter_sf_images(sf_images, sf_metrics_fdr_df) | ||
|
||
def calc_metrics(self, sf_images): | ||
all_sf_metrics_df = sf_image_metrics(sf_images, self.sc, self.formulas, self.ds, self.ds_config) | ||
return all_sf_metrics_df | ||
|
||
def estimate_fdr(self, all_sf_metrics_df): | ||
sf_metrics_fdr_df = sf_image_metrics_est_fdr(all_sf_metrics_df, self.formulas, self.fdr) | ||
return sf_metrics_fdr_df | ||
|
||
def filter_sf_metrics(self, sf_metrics_df): | ||
return sf_metrics_df[(sf_metrics_df.chaos > 0) | (sf_metrics_df.spatial > 0) | (sf_metrics_df.spectral > 0)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
class SearchAlgorithm(object): | ||
|
||
def __init__(self, sc, ds, formulas, fdr, ds_config): | ||
self.sc = sc | ||
self.ds = ds | ||
self.formulas = formulas | ||
self.fdr = fdr | ||
self.ds_config = ds_config | ||
self.metrics = [] | ||
|
||
def search(self): | ||
pass | ||
|
||
def calc_metrics(self, sf_images): | ||
pass | ||
|
||
def estimate_fdr(self, all_sf_metrics_df): | ||
pass | ||
|
||
def filter_sf_metrics(self, sf_metrics_df): | ||
return sf_metrics_df[sf_metrics_df.msm > 0] | ||
|
||
def filter_sf_images(self, sf_images, sf_metrics_df): | ||
return sf_images.filter(lambda (sf_i, _): sf_i in sf_metrics_df.index) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,23 +5,22 @@ | |
.. moduleauthor:: Vitaly Kovalev <[email protected]> | ||
""" | ||
import json | ||
from datetime import datetime | ||
from os.path import join | ||
from pprint import pformat | ||
from datetime import datetime | ||
|
||
from pyspark import SparkContext, SparkConf | ||
from sm.engine.msm_basic.msm_basic_search import MSMBasicSearch | ||
from sm.engine.dataset import Dataset | ||
from sm.engine.db import DB | ||
from sm.engine.fdr import FDR | ||
from sm.engine.formula_imager_segm import compute_sf_images | ||
from sm.engine.formula_img_validator import sf_image_metrics, sf_image_metrics_est_fdr, filter_sf_images, filter_sf_metrics | ||
from sm.engine.formulas_segm import FormulasSegm | ||
from sm.engine.imzml_txt_converter import ImzmlTxtConverter | ||
from sm.engine.search_results import SearchResults | ||
from sm.engine.theor_peaks_gen import TheorPeaksGenerator | ||
from sm.engine.util import local_path, proj_root, SMConfig, logger | ||
from sm.engine.work_dir import WorkDirManager | ||
|
||
from pyspark import SparkContext, SparkConf | ||
from sm.engine.imzml_txt_converter import ImzmlTxtConverter | ||
|
||
DS_ID_SEL = "SELECT id FROM dataset WHERE name = %s" | ||
DB_ID_SEL = "SELECT id FROM formula_db WHERE name = %s" | ||
|
@@ -79,6 +78,19 @@ def _init_db(self): | |
self.db = DB(self.sm_config['db']) | ||
self.sf_db_id = self.db.select_one(DB_ID_SEL, self.ds_config['database']['name'])[0] | ||
|
||
# TODO: add tests | ||
def store_job_meta(self): | ||
""" Store search job metadata in the database """ | ||
logger.info('Storing job metadata') | ||
self.ds_id = int(self.db.select_one(DS_ID_SEL, self.ds_name)[0]) | ||
self.job_id = self.ds_id | ||
self.db.alter(DEL_JOB_SQL, self.job_id) | ||
rows = [(self.job_id, self.sf_db_id, self.ds_id, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))] | ||
self.db.insert(JOB_INS, rows) | ||
|
||
rows = [(self.job_id, adduct) for adduct in self.ds_config['isotope_generation']['adducts']] | ||
self.db.insert(ADDUCT_INS, rows) | ||
|
||
def run(self, input_path, ds_config_path, clean=False): | ||
""" Entry point of the engine. Molecule search is completed in several steps: | ||
* Copying input data to the engine work dir | ||
|
@@ -133,8 +145,17 @@ def run(self, input_path, ds_config_path, clean=False): | |
self.fdr.decoy_adduct_selection() | ||
self.formulas = FormulasSegm(self.job_id, self.sf_db_id, self.ds_config, self.db) | ||
|
||
search_results = self._search() | ||
self._store_results(search_results) | ||
search_alg = MSMBasicSearch(self.sc, self.ds, self.formulas, self.fdr, self.ds_config) | ||
sf_metrics_df, sf_iso_images = search_alg.search() | ||
|
||
search_results = SearchResults(self.sf_db_id, self.ds_id, self.job_id, | ||
self.ds_name, self.formulas.get_sf_adduct_peaksn(), | ||
self.db, self.sm_config, self.ds_config) | ||
search_results.sf_metrics_df = sf_metrics_df | ||
search_results.sf_iso_images = sf_iso_images | ||
search_results.metrics = search_alg.metrics | ||
search_results.nrows, search_results.ncols = self.ds.get_dims() | ||
search_results.store() | ||
|
||
except Exception as e: | ||
raise | ||
|
@@ -144,37 +165,3 @@ def run(self, input_path, ds_config_path, clean=False): | |
self.sc.stop() | ||
if self.db: | ||
self.db.close() | ||
|
||
# TODO: add tests | ||
def store_job_meta(self): | ||
""" Store search job metadata in the database """ | ||
logger.info('Storing job metadata') | ||
self.ds_id = int(self.db.select_one(DS_ID_SEL, self.ds_name)[0]) | ||
self.job_id = self.ds_id | ||
self.db.alter(DEL_JOB_SQL, self.job_id) | ||
rows = [(self.job_id, self.sf_db_id, self.ds_id, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))] | ||
self.db.insert(JOB_INS, rows) | ||
|
||
rows = [(self.job_id, adduct) for adduct in self.ds_config['isotope_generation']['adducts']] | ||
self.db.insert(ADDUCT_INS, rows) | ||
|
||
def _search(self): | ||
logger.info('Running molecule search') | ||
sf_images = compute_sf_images(self.sc, self.ds, self.formulas.get_sf_peak_df(), | ||
self.ds_config['image_generation']['ppm']) | ||
all_sf_metrics_df = sf_image_metrics(sf_images, self.sc, self.formulas, self.ds, self.ds_config) | ||
sf_metrics_fdr_df = sf_image_metrics_est_fdr(all_sf_metrics_df, self.formulas, self.fdr) | ||
sf_metrics_fdr_df = filter_sf_metrics(sf_metrics_fdr_df) | ||
sf_images = filter_sf_images(sf_images, sf_metrics_fdr_df) | ||
|
||
return SearchResults(self.sf_db_id, self.ds_id, self.job_id, | ||
sf_metrics_fdr_df, sf_images, | ||
self.formulas.get_sf_adduct_peaksn(), | ||
self.db, self.sm_config) | ||
|
||
def _store_results(self, search_results): | ||
logger.info('Storing search results to the DB') | ||
search_results.clear_old_results() | ||
search_results.store_sf_img_metrics() | ||
nrows, ncols = self.ds.get_dims() | ||
search_results.store_sf_iso_images(nrows, ncols) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
6 changes: 3 additions & 3 deletions
6
sm/engine/tests/test_formula_imager.py → ...ne/tests/msm_basic/test_formula_imager.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from scipy.sparse import csr_matrix | ||
import pandas as pd | ||
|
||
from sm.engine.msm_basic.msm_basic_search import MSMBasicSearch | ||
from sm.engine.tests.util import spark_context | ||
|
||
|
||
def test_filter_sf_images(spark_context): | ||
sf_iso_images = spark_context.parallelize([(0, [csr_matrix([[0, 100, 100], [10, 0, 3]]), | ||
csr_matrix([[0, 50, 50], [0, 20, 0]])]), | ||
(1, [csr_matrix([[0, 0, 0], [0, 0, 0]]), | ||
csr_matrix([[0, 0, 0], [0, 0, 0]])])]) | ||
|
||
sf_metrics_df = (pd.DataFrame([[0, '+H', 0.9, 0.9, 0.9, 0.9**3]], | ||
columns=['sf_id', 'adduct', 'chaos', 'spatial', 'spectral', 'msm']) | ||
.set_index(['sf_id', 'adduct'])) | ||
|
||
search_alg = MSMBasicSearch(None, None, None, None, None) | ||
flt_iso_images = search_alg.filter_sf_images(sf_iso_images, sf_metrics_df) | ||
|
||
assert dict(flt_iso_images.take(1)).keys() == dict(sf_iso_images.take(1)).keys() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.