From c91373c89b9913a9be38ffa211d4b1f43f2ba894 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Tue, 17 Sep 2024 17:56:34 -0500
Subject: [PATCH] updated commenting on oi2 wrapper code

---
 config/config.yaml        | 18 +++++++++---------
 spras/omicsintegrator2.py |  6 +++---
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index b87bcd45..53a3317d 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -45,13 +45,13 @@ container_registry:
 algorithms:
       - name: "pathlinker"
         params:
-              include: true
+              include: false
               run1:
                   k: range(100,201,100)
 
       - name: "omicsintegrator1"
         params:
-              include: true
+              include: false
               run1:
                   b: [5, 6]
                   w: np.linspace(0,5,2)
@@ -69,7 +69,7 @@ algorithms:
 
       - name: "meo"
         params:
-              include: true
+              include: false
               run1:
                   max_path_length: [3]
                   local_search: ["Yes"]
@@ -77,18 +77,18 @@ algorithms:
 
       - name: "mincostflow"
         params:
-              include: true
+              include: false
               run1:
                   flow: [1] # The flow must be an int
                   capacity: [1]
 
       - name: "allpairs"
         params:
-              include: true
+              include: false
 
       - name: "domino"
         params:
-              include: true
+              include: false
               run1:
                   slice_threshold: [0.3]
                   module_threshold: [0.05]
@@ -152,14 +152,14 @@ analysis:
         include: true
       # Create output files for each pathway that can be visualized with GraphSpace
       graphspace:
-        include: true
+        include: false
       # Create Cytoscape session file with all pathway graphs for each dataset
       cytoscape:
-        include: true
+        include: false
       # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
       ml:
         # ml analysis per dataset
-        include: true
+        include: false
         # adds ml analysis per algorithm output
         # only runs for algorithms with multiple parameter combinations chosen
         aggregate_per_algorithm: true
diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py
index 450aa258..19a8bd14 100644
--- a/spras/omicsintegrator2.py
+++ b/spras/omicsintegrator2.py
@@ -149,15 +149,15 @@ def parse_output(raw_pathway_file, standardized_pathway_file):
         # Omicsintegrator2 returns a single line file if no network is found
         num_lines = sum(1 for line in open(raw_pathway_file))
         # Omicsintegrator2 has corrupted output; list of correct column names
-        sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2']
+        sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] # the order of edge attributes in the NetworkX graph is not guaranteed.
 
         if num_lines < 2:
             df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
         else:
             df = pd.read_csv(raw_pathway_file, sep='\t', header=0)
             if sorted(df.columns) == sorted_correct_column_names: # if column header names are all correct
-                df = df[df['in_solution'] == True]  # Check whether this column can be empty before revising this line
-                df = df.take([0, 1], axis=1)
+                df = df[df['in_solution'] == True]  # the 'in_solution' column exists when the forest is not empty.
+                df = df.take([0, 1], axis=1) # the first two columns in the df will be 'protein1' and 'protein2', followed by the edge attributes.
                 df = add_rank_column(df)
                 df = reinsert_direction_col_undirected(df)
                 df.columns = ['Node1', 'Node2', 'Rank', "Direction"]