From c3d3f0be27f8910727392098f3e5320f6db978c7 Mon Sep 17 00:00:00 2001 From: Bora Uyar Date: Tue, 11 Jun 2024 10:05:24 +0200 Subject: [PATCH] add option to disable marker discovery --- flexynesis/__main__.py | 18 +++++++++++------- pyproject.toml | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/flexynesis/__main__.py b/flexynesis/__main__.py index f61fd23..5b19e84 100644 --- a/flexynesis/__main__.py +++ b/flexynesis/__main__.py @@ -60,6 +60,8 @@ def main(): parser.add_argument("--threads", help="(Optional) How many threads to use when using CPU (default is 4)", type=int, default = 4) parser.add_argument("--use_gpu", action="store_true", help="(Optional) If set, the system will attempt to use CUDA/GPU if available.") + parser.add_argument("--disable_marker_finding", action="store_true", + help="(Optional) If set, marker discovery after model training is disabled.") # DirectPredGCNN args. parser.add_argument("--graph", help="Graph to use, name of the database or path to the edge list on the disk.", type=str, default="STRING") parser.add_argument("--string_organism", help="STRING DB organism id.", type=int, default=9606) @@ -255,13 +257,15 @@ class AvailableModels(NamedTuple): flexynesis.get_predicted_labels(model.predict(test_dataset), test_dataset, 'test')], ignore_index=True) predicted_labels.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'predicted_labels.csv'])), header=True, index=False) - # compute feature importance values - print("[INFO] Computing variable importance scores") - for var in model.target_variables: - model.compute_feature_importance(train_dataset, var, steps = 50) - df_imp = pd.concat([model.feature_importances[x] for x in model.target_variables], - ignore_index = True) - df_imp.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'feature_importance.csv'])), header=True, index=False) + + if not args.disable_marker_finding: # unless marker discovery is disabled + # compute feature importance values + print("[INFO] Computing variable importance scores") + for var in model.target_variables: + model.compute_feature_importance(train_dataset, var, steps = 50) + df_imp = pd.concat([model.feature_importances[x] for x in model.target_variables], + ignore_index = True) + df_imp.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'feature_importance.csv'])), header=True, index=False) # get sample embeddings and save print("[INFO] Extracting sample embeddings") diff --git a/pyproject.toml b/pyproject.toml index a06f12c..2710c39 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "flexynesis" -version = "0.1.4" +version = "0.1.6" authors = [ {name = "Bora Uyar", email = "bora.uyar@mdc-berlin.de"}, {name = "Taras Savchyn", email = "Taras.Savchyn@mdc-berlin.de"},