From c91373c89b9913a9be38ffa211d4b1f43f2ba894 Mon Sep 17 00:00:00 2001 From: ntalluri Date: Tue, 17 Sep 2024 17:56:34 -0500 Subject: [PATCH] updated commenting on oi2 wrapper code --- config/config.yaml | 18 +++++++++--------- spras/omicsintegrator2.py | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index b87bcd45..53a3317d 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -45,13 +45,13 @@ container_registry: algorithms: - name: "pathlinker" params: - include: true + include: false run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: true + include: false run1: b: [5, 6] w: np.linspace(0,5,2) @@ -69,7 +69,7 @@ algorithms: - name: "meo" params: - include: true + include: false run1: max_path_length: [3] local_search: ["Yes"] @@ -77,18 +77,18 @@ algorithms: - name: "mincostflow" params: - include: true + include: false run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: true + include: false - name: "domino" params: - include: true + include: false run1: slice_threshold: [0.3] module_threshold: [0.05] @@ -152,14 +152,14 @@ analysis: include: true # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: true + include: false # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: true + include: false # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: # ml analysis per dataset - include: true + include: false # adds ml analysis per algorithm output # only runs for algorithms with multiple parameter combinations chosen aggregate_per_algorithm: true diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index 450aa258..19a8bd14 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -149,15 +149,15 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # Omicsintegrator2 returns a single line file if no network is found num_lines = sum(1 for line in open(raw_pathway_file)) # Omicsintegrator2 has corrupted output; list of correct column names - sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] + sorted_correct_column_names = ['cost', 'in_solution', 'protein1', 'protein2'] # the order of edge attributes in the NetworkX graph is not guaranteed. if num_lines < 2: df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) else: df = pd.read_csv(raw_pathway_file, sep='\t', header=0) if sorted(df.columns) == sorted_correct_column_names: # if column header names are all correct - df = df[df['in_solution'] == True] # Check whether this column can be empty before revising this line - df = df.take([0, 1], axis=1) + df = df[df['in_solution'] == True] # the 'in_solution' column exists when the forest is not empty. + df = df.take([0, 1], axis=1) # the first two columns in the df will be 'protein1' and 'protein2', followed by the edge attributes. df = add_rank_column(df) df = reinsert_direction_col_undirected(df) df.columns = ['Node1', 'Node2', 'Rank', "Direction"]