From c3d3f0be27f8910727392098f3e5320f6db978c7 Mon Sep 17 00:00:00 2001
From: Bora Uyar <bora.uyar@mdc-berlin.de>
Date: Tue, 11 Jun 2024 10:05:24 +0200
Subject: [PATCH] add option to disable marker discovery

---
 flexynesis/__main__.py | 18 +++++++++++-------
 pyproject.toml         |  2 +-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/flexynesis/__main__.py b/flexynesis/__main__.py
index f61fd23..5b19e84 100644
--- a/flexynesis/__main__.py
+++ b/flexynesis/__main__.py
@@ -60,6 +60,8 @@ def main():
     parser.add_argument("--threads", help="(Optional) How many threads to use when using CPU (default is 4)", type=int, default = 4)
     parser.add_argument("--use_gpu", action="store_true", 
                         help="(Optional) If set, the system will attempt to use CUDA/GPU if available.")
+    parser.add_argument("--disable_marker_finding", action="store_true", 
+                        help="(Optional) If set, marker discovery after model training is disabled.")
     # DirectPredGCNN args.
     parser.add_argument("--graph", help="Graph to use, name of the database or path to the edge list on the disk.", type=str,  default="STRING")
     parser.add_argument("--string_organism", help="STRING DB organism id.", type=int, default=9606)
@@ -255,13 +257,15 @@ class AvailableModels(NamedTuple):
                                       flexynesis.get_predicted_labels(model.predict(test_dataset), test_dataset, 'test')], 
                                     ignore_index=True)
         predicted_labels.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'predicted_labels.csv'])), header=True, index=False)
-        # compute feature importance values
-        print("[INFO] Computing variable importance scores")
-        for var in model.target_variables:
-            model.compute_feature_importance(train_dataset, var, steps = 50)
-        df_imp = pd.concat([model.feature_importances[x] for x in model.target_variables], 
-                           ignore_index = True)
-        df_imp.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'feature_importance.csv'])), header=True, index=False)
+        
+        if not args.disable_marker_finding: # unless marker discovery is disabled
+            # compute feature importance values
+            print("[INFO] Computing variable importance scores")
+            for var in model.target_variables:
+                model.compute_feature_importance(train_dataset, var, steps = 50)
+            df_imp = pd.concat([model.feature_importances[x] for x in model.target_variables], 
+                               ignore_index = True)
+            df_imp.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'feature_importance.csv'])), header=True, index=False)
 
     # get sample embeddings and save 
     print("[INFO] Extracting sample embeddings")
diff --git a/pyproject.toml b/pyproject.toml
index a06f12c..2710c39 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "flexynesis"
-version = "0.1.4"
+version = "0.1.6"
 authors = [
     {name = "Bora Uyar", email = "bora.uyar@mdc-berlin.de"},
     {name = "Taras Savchyn", email = "Taras.Savchyn@mdc-berlin.de"},