From d731ae2eef0dfcd00d439de51c046fd3d0b46cce Mon Sep 17 00:00:00 2001 From: ctrlaltaf Date: Tue, 20 Feb 2024 10:32:29 -0800 Subject: [PATCH 01/28] added placeholder files --- docker-wrappers/BowTieBuilder/Dockerfile | 3 +++ docker-wrappers/BowTieBuilder/bowtiebuilder.py | 1 + spras/btb.py | 4 ++++ 3 files changed, 8 insertions(+) create mode 100644 docker-wrappers/BowTieBuilder/Dockerfile create mode 100644 docker-wrappers/BowTieBuilder/bowtiebuilder.py create mode 100644 spras/btb.py diff --git a/docker-wrappers/BowTieBuilder/Dockerfile b/docker-wrappers/BowTieBuilder/Dockerfile new file mode 100644 index 00000000..7f9f9fbe --- /dev/null +++ b/docker-wrappers/BowTieBuilder/Dockerfile @@ -0,0 +1,3 @@ +#btb uses 1 file, btb.py, which contains the algorithm needed to run bowtiebuilder + +FROM python:3.10.7 diff --git a/docker-wrappers/BowTieBuilder/bowtiebuilder.py b/docker-wrappers/BowTieBuilder/bowtiebuilder.py new file mode 100644 index 00000000..40c2fcce --- /dev/null +++ b/docker-wrappers/BowTieBuilder/bowtiebuilder.py @@ -0,0 +1 @@ +# Potentially the place to put the raw bowtiebuilder code \ No newline at end of file diff --git a/spras/btb.py b/spras/btb.py new file mode 100644 index 00000000..af65822b --- /dev/null +++ b/spras/btb.py @@ -0,0 +1,4 @@ +# need to define a new btb class and contain the following functions +# - generate_inputs +# - run +# - parse_output From 99bf63274f4eb38128520bed46cd928b2ce174d3 Mon Sep 17 00:00:00 2001 From: ctrlaltaf Date: Wed, 28 Feb 2024 16:34:07 -0800 Subject: [PATCH 02/28] Draft: btb --- config/config.yaml | 17 ++- docker-wrappers/BowTieBuilder/Dockerfile | 6 +- .../BowTieBuilder/bowtiebuilder.py | 1 - spras/btb.py | 138 ++++++++++++++++++ spras/runner.py | 1 + test/BTB/expected/output1.txt | 7 + test/BTB/input/edges.txt | 6 + test/BTB/input/edges_bad.txt | 6 + test/BTB/input/source.txt | 3 + test/BTB/input/target.txt | 3 + test/BTB/test_btb.py | 54 +++++++ 11 files changed, 232 insertions(+), 10 deletions(-) delete mode 100644 docker-wrappers/BowTieBuilder/bowtiebuilder.py create mode 100644 test/BTB/expected/output1.txt create mode 100644 test/BTB/input/edges.txt create mode 100644 test/BTB/input/edges_bad.txt create mode 100644 test/BTB/input/source.txt create mode 100644 test/BTB/input/target.txt create mode 100644 test/BTB/test_btb.py diff --git a/config/config.yaml b/config/config.yaml index b85c599b..f56b0dbe 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -14,7 +14,7 @@ container_registry: base_url: docker.io # The owner or project of the registry # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio + owner: ctrlaltaf # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm @@ -43,7 +43,7 @@ algorithms: - name: "omicsintegrator1" params: - include: true + include: false run1: r: [5] b: [5, 6] @@ -53,7 +53,7 @@ algorithms: - name: "omicsintegrator2" params: - include: true + include: false run1: b: [4] g: [0] @@ -63,7 +63,7 @@ algorithms: - name: "meo" params: - include: true + include: false run1: max_path_length: [3] local_search: ["Yes"] @@ -71,21 +71,24 @@ algorithms: - name: "mincostflow" params: - include: true + include: false run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: true + include: false - name: "domino" params: - include: true + include: false run1: slice_threshold: [0.3] module_threshold: [0.05] + - name: "bowtiebuilder" + params: + include: true # Here we specify which pathways to run and other file location information. diff --git a/docker-wrappers/BowTieBuilder/Dockerfile b/docker-wrappers/BowTieBuilder/Dockerfile index 7f9f9fbe..08f4c1f4 100644 --- a/docker-wrappers/BowTieBuilder/Dockerfile +++ b/docker-wrappers/BowTieBuilder/Dockerfile @@ -1,3 +1,5 @@ -#btb uses 1 file, btb.py, which contains the algorithm needed to run bowtiebuilder +FROM python:3.8-bullseye -FROM python:3.10.7 +WORKDIR /btb +RUN wget https://raw.githubusercontent.com/ellango2612/BowTieBuilder-Algorithm/main/btb.py +RUN pip install networkx==2.8 \ No newline at end of file diff --git a/docker-wrappers/BowTieBuilder/bowtiebuilder.py b/docker-wrappers/BowTieBuilder/bowtiebuilder.py deleted file mode 100644 index 40c2fcce..00000000 --- a/docker-wrappers/BowTieBuilder/bowtiebuilder.py +++ /dev/null @@ -1 +0,0 @@ -# Potentially the place to put the raw bowtiebuilder code \ No newline at end of file diff --git a/spras/btb.py b/spras/btb.py index af65822b..c001f1b5 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -2,3 +2,141 @@ # - generate_inputs # - run # - parse_output + +import warnings +from pathlib import Path + +import pandas as pd + +from spras.containers import prepare_volume, run_container +from spras.interactome import ( + convert_undirected_to_directed, + reinsert_direction_col_directed, +) +# what type of directionality does btb support? + +from spras.prm import PRM + +__all__ = ['BowtieBuilder'] + +class BowtieBuilder(PRM): + required_inputs = ['source', 'target', 'edges'] + + #generate input taken from meo.py beacuse they have same input requirements + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + @return: + """ + for input_type in BowtieBuilder.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + + # Get sources and write to file, repeat for targets + # Does not check whether a node is a source and a target + for node_type in ['sources', 'targets']: + nodes = data.request_node_columns([node_type]) + if nodes is None: + raise ValueError(f'No {node_type} found in the node files') + + # TODO test whether this selection is needed, what values could the column contain that we would want to + # include or exclude? + nodes = nodes.loc[nodes[node_type]] + nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False) + + # Create network file + edges = data.get_interactome() + + # Format network file + #unsure if formating network file is needed + # edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') + + edges.to_csv(filename_map['edges'], sep='\t', index=False, + columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False) + + + + # Skips parameter validation step + @staticmethod + def run(source=None, target=None, edges=None, output_file=None, k=None, container_framework="docker"): + """ + Run PathLinker with Docker + @param nodetypes: input node types with sources and targets (required) + @param network: input network file (required) + @param output_file: path to the output pathway file (required) + @param k: path length (optional) + @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) + """ + # Add additional parameter validation + # Do not require k + # Use the PathLinker default + # Could consider setting the default here instead + if not source or not target or not edges or not output_file: + raise ValueError('Required BowtieBuilder arguments are missing') + + work_dir = '/spras' + + # Each volume is a tuple (src, dest) + volumes = list() + + bind_path, source_file = prepare_volume(source, work_dir) + volumes.append(bind_path) + + bind_path, target_file = prepare_volume(target, work_dir) + volumes.append(bind_path) + + bind_path, edges_file = prepare_volume(edges, work_dir) + volumes.append(bind_path) + + # PathLinker does not provide an argument to set the output directory + # Use its --output argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + # PathLinker requires that the output directory exist + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container + + command = ['python', + 'btb.py', + '--edges', + edges_file, + '--sources', + source_file, + '--target', + target_file, + '--output', + mapped_out_prefix] + + + print('Running BowtieBuilder with arguments: {}'.format(' '.join(command)), flush=True) + + container_suffix = "bowtiebuilder" + out = run_container(container_framework, + container_suffix, + command, + volumes, + work_dir) + print(out) + + # Rename the primary output file to match the desired output filename + # Currently PathLinker only writes one output file so we do not need to delete others + # We may not know the value of k that was used + output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) + output_edges.rename(output_file) + + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + # What about multiple raw_pathway_files + df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1) + df = reinsert_direction_col_directed(df) + df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') diff --git a/spras/runner.py b/spras/runner.py index 6ef26496..2d3fb519 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -7,6 +7,7 @@ from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from spras.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from spras.pathlinker import PathLinker as pathlinker +from spras.btb import BowtieBuilder as bowtiebuilder def run(algorithm, params): diff --git a/test/BTB/expected/output1.txt b/test/BTB/expected/output1.txt new file mode 100644 index 00000000..34740e2d --- /dev/null +++ b/test/BTB/expected/output1.txt @@ -0,0 +1,7 @@ +Node1 Node2 +A D +B D +C D +D F +D G +D E diff --git a/test/BTB/input/edges.txt b/test/BTB/input/edges.txt new file mode 100644 index 00000000..6f97ec4e --- /dev/null +++ b/test/BTB/input/edges.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C D 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BTB/input/edges_bad.txt b/test/BTB/input/edges_bad.txt new file mode 100644 index 00000000..6f97ec4e --- /dev/null +++ b/test/BTB/input/edges_bad.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C D 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BTB/input/source.txt b/test/BTB/input/source.txt new file mode 100644 index 00000000..b1e67221 --- /dev/null +++ b/test/BTB/input/source.txt @@ -0,0 +1,3 @@ +A +B +C diff --git a/test/BTB/input/target.txt b/test/BTB/input/target.txt new file mode 100644 index 00000000..0cae3d39 --- /dev/null +++ b/test/BTB/input/target.txt @@ -0,0 +1,3 @@ +E +F +G diff --git a/test/BTB/test_btb.py b/test/BTB/test_btb.py new file mode 100644 index 00000000..48da8cad --- /dev/null +++ b/test/BTB/test_btb.py @@ -0,0 +1,54 @@ +import sys +from filecmp import cmp +from pathlib import Path + +import pytest + +import spras.config as config + +config.init_from_file("config/config.yaml") + +# TODO consider refactoring to simplify the import +# Modify the path because of the - in the directory +SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() +sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'BowtieBuilder'))) +from spras.btb import BowtieBuilder + +TEST_DIR = Path('test', 'bowtiebuilder/') +OUT_FILE = Path(TEST_DIR, 'output', 'ln-output.txt') + + +class TestBowtieBuilder: + """ + Run the bowtiebuilder algorithm on the example input files and check the output matches the expected output + """ + def test_ln(self): + OUT_FILE.unlink(missing_ok=True) + BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), + target_file=Path(TEST_DIR, 'input', 'target.txt'), + edges_file=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + assert OUT_FILE.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'output1.txt') + assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' + + """ + Run the bowtiebuilder algorithm with a missing input file + """ + def test_missing_file(self): + with pytest.raises(OSError): + BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'missing.txt'), + target_file=Path(TEST_DIR, 'input', 'target.txt'), + edges_file=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + + """ + Run the local neighborhood algorithm with an improperly formatted network file + """ + def test_format_error(self): + with pytest.raises(ValueError): + BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), + target_file=Path(TEST_DIR, 'input', 'target.txt'), + edges_file=Path(TEST_DIR, 'input', 'edges_bad.txt'), + output_file=OUT_FILE) + From 725eeb0f54067bc23e0abb7c623048da87f30aa6 Mon Sep 17 00:00:00 2001 From: ctrlaltaf Date: Fri, 8 Mar 2024 10:25:05 -0800 Subject: [PATCH 03/28] goes through snakemake process without any errors --- config/config.yaml | 6 +++--- spras/btb.py | 51 ++++++++++++++++++++++++++++++++------------ test/BTB/test_btb.py | 41 ++++++++++++++++++----------------- 3 files changed, 61 insertions(+), 37 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index f56b0dbe..2a03d595 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -133,13 +133,13 @@ analysis: include: true # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: true + include: false # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: true + include: false # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: - include: true + include: false # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph diff --git a/spras/btb.py b/spras/btb.py index c001f1b5..68dbe57d 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -9,10 +9,10 @@ import pandas as pd from spras.containers import prepare_volume, run_container -from spras.interactome import ( - convert_undirected_to_directed, - reinsert_direction_col_directed, -) +# from spras.interactome import ( +# convert_undirected_to_directed, +# reinsert_direction_col_directed, +# ) # what type of directionality does btb support? from spras.prm import PRM @@ -34,6 +34,11 @@ def generate_inputs(data, filename_map): for input_type in BowtieBuilder.required_inputs: if input_type not in filename_map: raise ValueError(f"{input_type} filename is missing") + print("FILEMAP NAME: ", filename_map) + print("DATA HEAD: ") + print( data.node_table.head()) + print("DATA INTERACTOME: ") + print(data.interactome.head()) # Get sources and write to file, repeat for targets # Does not check whether a node is a source and a target @@ -45,7 +50,15 @@ def generate_inputs(data, filename_map): # TODO test whether this selection is needed, what values could the column contain that we would want to # include or exclude? nodes = nodes.loc[nodes[node_type]] - nodes.to_csv(filename_map[node_type], index=False, columns=['NODEID'], header=False) + if(node_type == "sources"): + nodes.to_csv(filename_map["source"], sep= '\t', index=False, columns=['NODEID'], header=False) + print("NODES: ") + print(nodes) + elif(node_type == "targets"): + nodes.to_csv(filename_map["target"], sep= '\t', index=False, columns=['NODEID'], header=False) + print("NODES: ") + print(nodes) + # Create network file edges = data.get_interactome() @@ -54,14 +67,13 @@ def generate_inputs(data, filename_map): #unsure if formating network file is needed # edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') - edges.to_csv(filename_map['edges'], sep='\t', index=False, - columns=['Interactor1', 'EdgeType', 'Interactor2', 'Weight'], header=False) + edges.to_csv(filename_map['edges'], sep='\t', index=False, header=False) # Skips parameter validation step @staticmethod - def run(source=None, target=None, edges=None, output_file=None, k=None, container_framework="docker"): + def run(source=None, target=None, edges=None, output_file=None, container_framework="docker"): """ Run PathLinker with Docker @param nodetypes: input node types with sources and targets (required) @@ -77,7 +89,7 @@ def run(source=None, target=None, edges=None, output_file=None, k=None, containe if not source or not target or not edges or not output_file: raise ValueError('Required BowtieBuilder arguments are missing') - work_dir = '/spras' + work_dir = '/btb' # Each volume is a tuple (src, dest) volumes = list() @@ -98,7 +110,7 @@ def run(source=None, target=None, edges=None, output_file=None, k=None, containe out_dir.mkdir(parents=True, exist_ok=True) bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) volumes.append(bind_path) - mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container + mapped_out_prefix = mapped_out_dir + '/raw-pathway.txt' # Use posix path inside the container command = ['python', 'btb.py', @@ -110,6 +122,7 @@ def run(source=None, target=None, edges=None, output_file=None, k=None, containe target_file, '--output', mapped_out_prefix] + # command = ['ls', '-R'] print('Running BowtieBuilder with arguments: {}'.format(' '.join(command)), flush=True) @@ -121,12 +134,21 @@ def run(source=None, target=None, edges=None, output_file=None, k=None, containe volumes, work_dir) print(out) + print("Source file: ", source_file) + print("target file: ", target_file) + print("edges file: ", edges_file) + print("mapped out dir: ", mapped_out_dir) + print("mapped out prefix: ", mapped_out_prefix) + + + + # Rename the primary output file to match the desired output filename # Currently PathLinker only writes one output file so we do not need to delete others # We may not know the value of k that was used - output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) - output_edges.rename(output_file) + # output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) + # output_edges.rename(output_file) @staticmethod @@ -137,6 +159,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file): @param standardized_pathway_file: the same pathway written in the universal format """ # What about multiple raw_pathway_files - df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1, 2], axis=1) - df = reinsert_direction_col_directed(df) + print("PARSING OUTPUT BTB") + df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1], axis=0) + # df = reinsert_direction_col_directed(df) df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') diff --git a/test/BTB/test_btb.py b/test/BTB/test_btb.py index 48da8cad..8f34203e 100644 --- a/test/BTB/test_btb.py +++ b/test/BTB/test_btb.py @@ -15,7 +15,7 @@ from spras.btb import BowtieBuilder TEST_DIR = Path('test', 'bowtiebuilder/') -OUT_FILE = Path(TEST_DIR, 'output', 'ln-output.txt') +OUT_FILE = Path(TEST_DIR, 'output', 'output1.txt') class TestBowtieBuilder: @@ -23,6 +23,7 @@ class TestBowtieBuilder: Run the bowtiebuilder algorithm on the example input files and check the output matches the expected output """ def test_ln(self): + print("RUNNING TEST_LN FOR BOWTIEBUILDER") OUT_FILE.unlink(missing_ok=True) BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), target_file=Path(TEST_DIR, 'input', 'target.txt'), @@ -32,23 +33,23 @@ def test_ln(self): expected_file = Path(TEST_DIR, 'expected', 'output1.txt') assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' - """ - Run the bowtiebuilder algorithm with a missing input file - """ - def test_missing_file(self): - with pytest.raises(OSError): - BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'missing.txt'), - target_file=Path(TEST_DIR, 'input', 'target.txt'), - edges_file=Path(TEST_DIR, 'input', 'edges.txt'), - output_file=OUT_FILE) - - """ - Run the local neighborhood algorithm with an improperly formatted network file - """ - def test_format_error(self): - with pytest.raises(ValueError): - BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), - target_file=Path(TEST_DIR, 'input', 'target.txt'), - edges_file=Path(TEST_DIR, 'input', 'edges_bad.txt'), - output_file=OUT_FILE) + # """ + # Run the bowtiebuilder algorithm with a missing input file + # """ + # def test_missing_file(self): + # with pytest.raises(OSError): + # BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'missing.txt'), + # target_file=Path(TEST_DIR, 'input', 'target.txt'), + # edges_file=Path(TEST_DIR, 'input', 'edges.txt'), + # output_file=OUT_FILE) + + # """ + # Run the local neighborhood algorithm with an improperly formatted network file + # """ + # def test_format_error(self): + # with pytest.raises(ValueError): + # BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), + # target_file=Path(TEST_DIR, 'input', 'target.txt'), + # edges_file=Path(TEST_DIR, 'input', 'edges_bad.txt'), + # output_file=OUT_FILE) From 5534087cc8f9c085c0b4f93d04c20f25b39c9eef Mon Sep 17 00:00:00 2001 From: ctrlaltaf Date: Wed, 3 Apr 2024 10:26:00 -0700 Subject: [PATCH 04/28] Added new test files --- test/BowtieBuilder/expected/output1.txt | 7 +++ test/BowtieBuilder/input/edges.txt | 6 +++ test/BowtieBuilder/input/edges_bad.txt | 6 +++ test/BowtieBuilder/input/source.txt | 3 ++ test/BowtieBuilder/input/target.txt | 3 ++ test/BowtieBuilder/test_btb.py | 62 +++++++++++++++++++++++++ 6 files changed, 87 insertions(+) create mode 100644 test/BowtieBuilder/expected/output1.txt create mode 100644 test/BowtieBuilder/input/edges.txt create mode 100644 test/BowtieBuilder/input/edges_bad.txt create mode 100644 test/BowtieBuilder/input/source.txt create mode 100644 test/BowtieBuilder/input/target.txt create mode 100644 test/BowtieBuilder/test_btb.py diff --git a/test/BowtieBuilder/expected/output1.txt b/test/BowtieBuilder/expected/output1.txt new file mode 100644 index 00000000..34740e2d --- /dev/null +++ b/test/BowtieBuilder/expected/output1.txt @@ -0,0 +1,7 @@ +Node1 Node2 +A D +B D +C D +D F +D G +D E diff --git a/test/BowtieBuilder/input/edges.txt b/test/BowtieBuilder/input/edges.txt new file mode 100644 index 00000000..6f97ec4e --- /dev/null +++ b/test/BowtieBuilder/input/edges.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C D 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BowtieBuilder/input/edges_bad.txt b/test/BowtieBuilder/input/edges_bad.txt new file mode 100644 index 00000000..c08a8503 --- /dev/null +++ b/test/BowtieBuilder/input/edges_bad.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BowtieBuilder/input/source.txt b/test/BowtieBuilder/input/source.txt new file mode 100644 index 00000000..b1e67221 --- /dev/null +++ b/test/BowtieBuilder/input/source.txt @@ -0,0 +1,3 @@ +A +B +C diff --git a/test/BowtieBuilder/input/target.txt b/test/BowtieBuilder/input/target.txt new file mode 100644 index 00000000..0cae3d39 --- /dev/null +++ b/test/BowtieBuilder/input/target.txt @@ -0,0 +1,3 @@ +E +F +G diff --git a/test/BowtieBuilder/test_btb.py b/test/BowtieBuilder/test_btb.py new file mode 100644 index 00000000..1112c59c --- /dev/null +++ b/test/BowtieBuilder/test_btb.py @@ -0,0 +1,62 @@ +import sys +from filecmp import cmp +from pathlib import Path + +import pytest + +import spras.config as config + +config.init_from_file("config/config.yaml") + +# TODO consider refactoring to simplify the import +# Modify the path because of the - in the directory +SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() +sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'BowtieBuilder'))) +from spras.btb import BowtieBuilder as bowtiebuilder + +TEST_DIR = Path('test', 'BowtieBuilder/') +OUT_FILE = Path(TEST_DIR, 'output', 'raw-pathway.txt') + + +class TestBowtieBuilder: + """ + Run the bowtiebuilder algorithm on the example input files and check the output matches the expected output + """ + # def test_ln(self): + # print("RUNNING TEST_LN FOR BOWTIEBUILDER") + # OUT_FILE.unlink(missing_ok=True) + # bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), + # target=Path(TEST_DIR, 'input', 'target.txt'), + # edges=Path(TEST_DIR, 'input', 'edges.txt'), + # output_file=OUT_FILE) + # assert OUT_FILE.exists(), 'Output file was not written' + # expected_file = Path(TEST_DIR, 'expected', 'output1.txt') + # assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' + + """ + Run the bowtiebuilder algorithm with a missing input file + """ + def test_missing_arguments(self): + with pytest.raises(ValueError): + bowtiebuilder.run( + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + + + # def test_missing_file(self): + # with pytest.raises(FileNotFoundError): + # bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), + # target=Path(TEST_DIR, 'input', 'target.txt'), + # edges=Path(TEST_DIR, 'input', 'edges.txt'), + # output_file=OUT_FILE) + + # """ + # """ + def test_format_error(self): + with pytest.raises(IndexError): + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges_bad.txt'), + output_file=OUT_FILE) + From d289f2fdd9bdb023d941da685323cb1ad1a3188d Mon Sep 17 00:00:00 2001 From: gabeah Date: Wed, 5 Jun 2024 10:00:55 -0700 Subject: [PATCH 05/28] LN testing complete finished testing the LocalNeighborhood. Moving to make the dockerfile --- .../LocalNeighborhood/testing-files/ln-network.txt | 5 +++++ docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt | 2 ++ .../LocalNeighborhood/testing-files/test-output.txt | 3 +++ 3 files changed, 10 insertions(+) create mode 100644 docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt create mode 100644 docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt create mode 100644 docker-wrappers/LocalNeighborhood/testing-files/test-output.txt diff --git a/docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt b/docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt new file mode 100644 index 00000000..5a9b0451 --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt @@ -0,0 +1,5 @@ +A|B +C|B +C|D +D|E +A|E diff --git a/docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt b/docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt new file mode 100644 index 00000000..35d242ba --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt @@ -0,0 +1,2 @@ +A +B diff --git a/docker-wrappers/LocalNeighborhood/testing-files/test-output.txt b/docker-wrappers/LocalNeighborhood/testing-files/test-output.txt new file mode 100644 index 00000000..58dc92d9 --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/testing-files/test-output.txt @@ -0,0 +1,3 @@ +A|B +C|B +A|E From 0fe3c2a7242bdc63fd921f3e59d897234757abed Mon Sep 17 00:00:00 2001 From: gabeah Date: Wed, 5 Jun 2024 12:14:30 -0700 Subject: [PATCH 06/28] dockerfile is made! --- docker-wrappers/LocalNeighborhood/Dockerfile | 5 +++++ docker-wrappers/LocalNeighborhood/testing-files/output2.txt | 3 +++ docker-wrappers/LocalNeighborhood/testing-files/output3.txt | 3 +++ 3 files changed, 11 insertions(+) create mode 100644 docker-wrappers/LocalNeighborhood/testing-files/output2.txt create mode 100644 docker-wrappers/LocalNeighborhood/testing-files/output3.txt diff --git a/docker-wrappers/LocalNeighborhood/Dockerfile b/docker-wrappers/LocalNeighborhood/Dockerfile index 06dcce8a..1f934ad9 100644 --- a/docker-wrappers/LocalNeighborhood/Dockerfile +++ b/docker-wrappers/LocalNeighborhood/Dockerfile @@ -1 +1,6 @@ # Create a Docker image for the Local Neighborhood algorithm here +FROM python:3.12-alpine + +WORKDIR /local_neighborhood +COPY local_neighborhood.py . +COPY /testing-files . diff --git a/docker-wrappers/LocalNeighborhood/testing-files/output2.txt b/docker-wrappers/LocalNeighborhood/testing-files/output2.txt new file mode 100644 index 00000000..58dc92d9 --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/testing-files/output2.txt @@ -0,0 +1,3 @@ +A|B +C|B +A|E diff --git a/docker-wrappers/LocalNeighborhood/testing-files/output3.txt b/docker-wrappers/LocalNeighborhood/testing-files/output3.txt new file mode 100644 index 00000000..58dc92d9 --- /dev/null +++ b/docker-wrappers/LocalNeighborhood/testing-files/output3.txt @@ -0,0 +1,3 @@ +A|B +C|B +A|E From fc3ba8096e691dd721b5e3bb05a1e83ab4df5a67 Mon Sep 17 00:00:00 2001 From: gabeah Date: Thu, 6 Jun 2024 11:21:56 -0700 Subject: [PATCH 07/28] working on LN implementation --- spras/local_neighborhood.py | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 spras/local_neighborhood.py diff --git a/spras/local_neighborhood.py b/spras/local_neighborhood.py new file mode 100644 index 00000000..a9ae41ef --- /dev/null +++ b/spras/local_neighborhood.py @@ -0,0 +1,56 @@ +import warnings +from pathlib import Path + +import pandas as pd + +from spras.containers import prepare_volume, run_container +from spras.interactome import ( + convert_undirected_to_directed, + reinsert_direction_col_directed, +) +from spras.prm import PRM + +__all__ = ['LocalNeighborhood'] + +class LocalNeighborhood: + required_inputs = ["network", "nodes"] + + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: + @return: + """ + + # Check if filename + for input_type in LocalNeighborhood.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + + if data.contains_node_columns('prize'): + print("h") + # Omics example + if data.contains_node_columns('prize'): + + node_df = data.request_node_columns(['prize']) + elif data.contains_node_columns('sources'): + + node_df = data.request_node_columns(['sources','targets']) + node_df.loc[node_df['sources']==True, 'prize'] = 1.0 + node_df.loc[node_df['targets']==True, 'prize'] = 1.0 + + else: + raise ValueError("LocalNeighborhood requires nore prizes or sources and targets") + + # LocalNeighborhood already gives warnings + node_df.to_csv(filename_map['prizes'],sep='\t', index = False, columns=['NODEID','prize'],header=['name','prize']) + + # Get network file + edges_df = data.get_interactome() + + # Rename Direction column + edges_df.to_csv(filename_map['edges'],sep='\t',index=False, + columns=['Interactor1','Interactor2','Weight','Direction'], + header=['protein1','protein2','weight','directionality']) \ No newline at end of file From fe47da597036dcdf7af0c6920502476897a134ac Mon Sep 17 00:00:00 2001 From: gabeah Date: Fri, 7 Jun 2024 15:11:23 -0700 Subject: [PATCH 08/28] dealing with some errors --- config/config.yaml | 17 +-- docker-wrappers/LocalNeighborhood/Dockerfile | 2 +- spras/local_neighborhood.py | 104 ++++++++++++++++--- spras/runner.py | 1 + 4 files changed, 100 insertions(+), 24 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index c31b2429..6c50df3c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -14,7 +14,7 @@ container_registry: base_url: docker.io # The owner or project of the registry # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio + owner: gabeah # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm @@ -35,6 +35,9 @@ container_registry: # careful: too many parameters might make your runs take a long time. algorithms: + - name: "local_neighborhood" + params: + include: true - name: "pathlinker" params: include: false @@ -43,7 +46,7 @@ algorithms: - name: "omicsintegrator1" params: - include: true + include: false run1: b: [5, 6] w: np.linspace(0,5,2) @@ -51,7 +54,7 @@ algorithms: - name: "omicsintegrator2" params: - include: true + include: false run1: b: [4] g: [0] @@ -61,7 +64,7 @@ algorithms: - name: "meo" params: - include: true + include: false run1: max_path_length: [3] local_search: ["Yes"] @@ -69,18 +72,18 @@ algorithms: - name: "mincostflow" params: - include: true + include: false run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: true + include: false - name: "domino" params: - include: true + include: false run1: slice_threshold: [0.3] module_threshold: [0.05] diff --git a/docker-wrappers/LocalNeighborhood/Dockerfile b/docker-wrappers/LocalNeighborhood/Dockerfile index 1f934ad9..d1001b87 100644 --- a/docker-wrappers/LocalNeighborhood/Dockerfile +++ b/docker-wrappers/LocalNeighborhood/Dockerfile @@ -1,6 +1,6 @@ # Create a Docker image for the Local Neighborhood algorithm here FROM python:3.12-alpine -WORKDIR /local_neighborhood +WORKDIR /LocalNeighborhood COPY local_neighborhood.py . COPY /testing-files . diff --git a/spras/local_neighborhood.py b/spras/local_neighborhood.py index a9ae41ef..709dbe8a 100644 --- a/spras/local_neighborhood.py +++ b/spras/local_neighborhood.py @@ -8,6 +8,7 @@ convert_undirected_to_directed, reinsert_direction_col_directed, ) +from spras.util import add_rank_column from spras.prm import PRM __all__ = ['LocalNeighborhood'] @@ -23,34 +24,105 @@ def generate_inputs(data, filename_map): @param filename_map: @return: """ - + print('generating inputs!!') # Check if filename for input_type in LocalNeighborhood.required_inputs: if input_type not in filename_map: raise ValueError(f"{input_type} filename is missing") - if data.contains_node_columns('prize'): - print("h") - # Omics example - if data.contains_node_columns('prize'): - - node_df = data.request_node_columns(['prize']) - elif data.contains_node_columns('sources'): - - node_df = data.request_node_columns(['sources','targets']) - node_df.loc[node_df['sources']==True, 'prize'] = 1.0 - node_df.loc[node_df['targets']==True, 'prize'] = 1.0 + # Select nodes that have sources, targets, prizes, or are active + if data.contains_node_columns(['sources','targets','prize']): + node_df = data.request_node_columns(['sources','targets','prize']) else: raise ValueError("LocalNeighborhood requires nore prizes or sources and targets") # LocalNeighborhood already gives warnings - node_df.to_csv(filename_map['prizes'],sep='\t', index = False, columns=['NODEID','prize'],header=['name','prize']) + node_df.to_csv(filename_map['nodes'], + #sep='\t', + index = False, + columns=['NODEID'], + header=False) # Get network file edges_df = data.get_interactome() # Rename Direction column - edges_df.to_csv(filename_map['edges'],sep='\t',index=False, - columns=['Interactor1','Interactor2','Weight','Direction'], - header=['protein1','protein2','weight','directionality']) \ No newline at end of file + edges_df.to_csv(filename_map['network'], + sep='|', + index=False, + columns=['Interactor1','Interactor2'], + header=False) + + @staticmethod + def run(nodes=None, network=None, output_file=None, container_framework="docker"): + ''' + Method to running LocalNeighborhood correctly + @param nodes: input node types with sources and targets (required) + @param network: input network file (required) + @param output_file: path to the output pathway file (required) + ''' + print('Running!!!') + if not nodes or not network or not output_file: + raise ValueError('Required LocalNeighborhood arguments are missing') + + work_dir = '/spras' + + volumes = list() + + bind_path, node_file = prepare_volume(nodes, work_dir) + volumes.append(bind_path) + + bind_path, network_file = prepare_volume(network, work_dir) + volumes.append(bind_path) + + # LocalNeighborhood does not provide an argument to set the output directory + # Use its --output argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + # LocalNeighborhood requires that the output directory exist + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container + + command = ['python', + '/LocalNeighborhood/local_neighborhood.py', + '--network', network_file, + '--nodes', node_file, + '--output', mapped_out_prefix] + + print('Running LocalNeighborhood with arguments: {}'.format(' '.join(command)), flush=True) + + container_suffix = "local-neighborhood" + out = run_container(container_framework, + container_suffix, + command, + volumes, + work_dir) + print(out) + + # Rename the primary output file to match the desired output filename + # Currently LocalNeighborhood only writes one output file so we do not need to delete others + output_edges = Path(out_dir, 'out') + output_edges.rename(output_file) + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + ''' + Method for standardizing output data + @raw_pathway_file: raw output from LocalNeighborhood + @standardized_pathway_file: universal output, for use in Pandas analysis + ''' + print('Parsing outputs!!') + df = pd.read_csv(raw_pathway_file, + sep='|', + header=None + ) + print(df) + df = add_rank_column(df) + print(df) + print(df.to_csv(header=False,index=False,sep='\t')) + df.to_csv(standardized_pathway_file, + header=None, + index=False, + sep='\t') diff --git a/spras/runner.py b/spras/runner.py index 6ef26496..1e88a8d3 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -2,6 +2,7 @@ from spras.allpairs import AllPairs as allpairs from spras.dataset import Dataset from spras.domino import DOMINO as domino +from spras.local_neighborhood import LocalNeighborhood as local_neighborhood from spras.meo import MEO as meo from spras.mincostflow import MinCostFlow as mincostflow from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 From f887fcab96491b9ac5cbf8df718d1f42deb6ff0b Mon Sep 17 00:00:00 2001 From: gabeah Date: Fri, 7 Jun 2024 15:26:55 -0700 Subject: [PATCH 09/28] Step 4 completed --- config/config.yaml | 6 +++--- spras/local_neighborhood.py | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 6c50df3c..75214ad8 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -131,13 +131,13 @@ analysis: include: true # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: true + include: false # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: true + include: false # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: - include: true + include: false # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph diff --git a/spras/local_neighborhood.py b/spras/local_neighborhood.py index 709dbe8a..20f115e6 100644 --- a/spras/local_neighborhood.py +++ b/spras/local_neighborhood.py @@ -6,7 +6,7 @@ from spras.containers import prepare_volume, run_container from spras.interactome import ( convert_undirected_to_directed, - reinsert_direction_col_directed, + reinsert_direction_col_undirected, ) from spras.util import add_rank_column from spras.prm import PRM @@ -118,10 +118,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file): sep='|', header=None ) - print(df) + + # Add extra data to not annoy the SNAKEFILE df = add_rank_column(df) - print(df) - print(df.to_csv(header=False,index=False,sep='\t')) + df = reinsert_direction_col_undirected(df) + df.to_csv(standardized_pathway_file, header=None, index=False, From 877707c1873dde15e619477d7b4a0d023c9d421e Mon Sep 17 00:00:00 2001 From: gabeah Date: Fri, 7 Jun 2024 16:02:13 -0700 Subject: [PATCH 10/28] stuck on modifying generate inputs and parse outputs --- .github/workflows/test-spras.yml | 10 ++++++++++ test/LocalNeighborhood/test_ln.py | 11 +++++++++++ test/parse-outputs/test_parse_outputs.py | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 3dc2ab85..1afe2c7b 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -83,6 +83,16 @@ jobs: docker pull reedcompbio/mincostflow:latest docker pull reedcompbio/allpairs:latest docker pull reedcompbio/domino:latest + docker pull gabeah/local-neighborhood:latest + - name: Build Local Neighborhood Docker Image + uses: docker/build-push-action@v1 + with: + path: docker-wrappers/LocalNeighborhood + dockerfile: docker-wrappers/LocalNeighborhood/Dockerfile + repository: gabeah/local-neighborhood + tags: latest + cache_from: gabeah/local-neighborhood:latest + push: false - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 with: diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py index 391c5fb1..0d3a28fb 100644 --- a/test/LocalNeighborhood/test_ln.py +++ b/test/LocalNeighborhood/test_ln.py @@ -5,6 +5,7 @@ import pytest import spras.config as config +from spras.local_neighborhood import LocalNeighborhood config.init_from_file("config/config.yaml") @@ -50,3 +51,13 @@ def test_format_error(self): output_file=OUT_FILE) # Write tests for the Local Neighborhood run function here + def test_localneighborhood_required(self): + out_path = Path(OUT_FILE) + out_path.unlink(missing_ok=True) + # Only include required arguments + LocalNeighborhood.run( + nodes=str(TEST_DIR)+'/input/ln-nodes.txt', + network=str(TEST_DIR)+'/input/ln-network.txt', + output_file=OUT_FILE + ) + assert out_path.exists() \ No newline at end of file diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 8d8d0933..31830ff0 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -11,7 +11,7 @@ # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt # from https://github.com/Shamir-Lab/DOMINO/tree/master/examples -algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino'] +algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'local_neighborhood'] class TestParseOutputs: From 8dfa3306ce3d1f920e035b5e5adaf30092b73748 Mon Sep 17 00:00:00 2001 From: gabeah Date: Mon, 10 Jun 2024 09:26:30 -0700 Subject: [PATCH 11/28] finished? --- .../expected/local_neighborhood-network-expected.txt.txt | 5 +++++ test/generate-inputs/test_generate_inputs.py | 3 ++- .../expected/local_neighborhood-pathway-expected.txt | 2 ++ test/parse-outputs/input/local_neighborhood-raw-pathway.txt | 2 ++ 4 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 test/generate-inputs/expected/local_neighborhood-network-expected.txt.txt create mode 100644 test/parse-outputs/expected/local_neighborhood-pathway-expected.txt create mode 100644 test/parse-outputs/input/local_neighborhood-raw-pathway.txt diff --git a/test/generate-inputs/expected/local_neighborhood-network-expected.txt.txt b/test/generate-inputs/expected/local_neighborhood-network-expected.txt.txt new file mode 100644 index 00000000..5a9b0451 --- /dev/null +++ b/test/generate-inputs/expected/local_neighborhood-network-expected.txt.txt @@ -0,0 +1,5 @@ +A|B +C|B +C|D +D|E +A|E diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index 86319e2c..a6f04a42 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -16,7 +16,8 @@ 'omicsintegrator2': 'edges', 'domino': 'network', 'pathlinker': 'network', - 'allpairs': 'network' + 'allpairs': 'network', + 'local_neighborhood': 'network' } diff --git a/test/parse-outputs/expected/local_neighborhood-pathway-expected.txt b/test/parse-outputs/expected/local_neighborhood-pathway-expected.txt new file mode 100644 index 00000000..e2fd8d57 --- /dev/null +++ b/test/parse-outputs/expected/local_neighborhood-pathway-expected.txt @@ -0,0 +1,2 @@ +A B 1 U +B C 1 U diff --git a/test/parse-outputs/input/local_neighborhood-raw-pathway.txt b/test/parse-outputs/input/local_neighborhood-raw-pathway.txt new file mode 100644 index 00000000..dfdd8243 --- /dev/null +++ b/test/parse-outputs/input/local_neighborhood-raw-pathway.txt @@ -0,0 +1,2 @@ +A|B +B|C From dbaacf1b2ee8494521a2543240f16fb3fe297afc Mon Sep 17 00:00:00 2001 From: gabeah Date: Tue, 11 Jun 2024 09:42:07 -0700 Subject: [PATCH 12/28] finished changes as defined in code review --- spras/local_neighborhood.py | 9 ++++++--- test/LocalNeighborhood/test_ln.py | 4 ++-- ...d.txt.txt => local_neighborhood-network-expected.txt} | 0 3 files changed, 8 insertions(+), 5 deletions(-) rename test/generate-inputs/expected/{local_neighborhood-network-expected.txt.txt => local_neighborhood-network-expected.txt} (100%) diff --git a/spras/local_neighborhood.py b/spras/local_neighborhood.py index 20f115e6..c50f3210 100644 --- a/spras/local_neighborhood.py +++ b/spras/local_neighborhood.py @@ -13,7 +13,7 @@ __all__ = ['LocalNeighborhood'] -class LocalNeighborhood: +class LocalNeighborhood(PRM): required_inputs = ["network", "nodes"] @staticmethod @@ -21,7 +21,7 @@ def generate_inputs(data, filename_map): """ Access fields from the dataset and write the required input files @param data: dataset - @param filename_map: + @param filename_map: dictionary where key is input type, and value is a path to a file @return: """ print('generating inputs!!') @@ -53,7 +53,8 @@ def generate_inputs(data, filename_map): index=False, columns=['Interactor1','Interactor2'], header=False) - + return None + @staticmethod def run(nodes=None, network=None, output_file=None, container_framework="docker"): ''' @@ -105,6 +106,7 @@ def run(nodes=None, network=None, output_file=None, container_framework="docker" # Currently LocalNeighborhood only writes one output file so we do not need to delete others output_edges = Path(out_dir, 'out') output_edges.rename(output_file) + return None @staticmethod def parse_output(raw_pathway_file, standardized_pathway_file): @@ -127,3 +129,4 @@ def parse_output(raw_pathway_file, standardized_pathway_file): header=None, index=False, sep='\t') + return None diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py index 0d3a28fb..649c6aeb 100644 --- a/test/LocalNeighborhood/test_ln.py +++ b/test/LocalNeighborhood/test_ln.py @@ -56,8 +56,8 @@ def test_localneighborhood_required(self): out_path.unlink(missing_ok=True) # Only include required arguments LocalNeighborhood.run( - nodes=str(TEST_DIR)+'/input/ln-nodes.txt', - network=str(TEST_DIR)+'/input/ln-network.txt', + nodes=Path(TEST_DIR,'input','ln-nodes.txt') + network=Path(TEST_DIR, 'input', 'ln-network.txt') output_file=OUT_FILE ) assert out_path.exists() \ No newline at end of file diff --git a/test/generate-inputs/expected/local_neighborhood-network-expected.txt.txt b/test/generate-inputs/expected/local_neighborhood-network-expected.txt similarity index 100% rename from test/generate-inputs/expected/local_neighborhood-network-expected.txt.txt rename to test/generate-inputs/expected/local_neighborhood-network-expected.txt From 1caca3afdf35bb3199923fc43c1a90f60c29c4de Mon Sep 17 00:00:00 2001 From: gabeah Date: Fri, 21 Jun 2024 15:16:55 -0700 Subject: [PATCH 13/28] added files for BTB PR --- docker-wrappers/BowTieBuilder/Dockerfile | 5 + docker-wrappers/BowTieBuilder/README.md | 3 + spras/btb.py | 165 +++++++++++++++++++++++ test/BTB/expected/output1.txt | 7 + test/BTB/input/edges.txt | 6 + test/BTB/input/edges_bad.txt | 6 + test/BTB/input/source.txt | 3 + test/BTB/input/target.txt | 3 + test/BTB/test-btb.py | 54 ++++++++ 9 files changed, 252 insertions(+) create mode 100644 docker-wrappers/BowTieBuilder/Dockerfile create mode 100644 docker-wrappers/BowTieBuilder/README.md create mode 100644 spras/btb.py create mode 100644 test/BTB/expected/output1.txt create mode 100644 test/BTB/input/edges.txt create mode 100644 test/BTB/input/edges_bad.txt create mode 100644 test/BTB/input/source.txt create mode 100644 test/BTB/input/target.txt create mode 100644 test/BTB/test-btb.py diff --git a/docker-wrappers/BowTieBuilder/Dockerfile b/docker-wrappers/BowTieBuilder/Dockerfile new file mode 100644 index 00000000..08f4c1f4 --- /dev/null +++ b/docker-wrappers/BowTieBuilder/Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.8-bullseye + +WORKDIR /btb +RUN wget https://raw.githubusercontent.com/ellango2612/BowTieBuilder-Algorithm/main/btb.py +RUN pip install networkx==2.8 \ No newline at end of file diff --git a/docker-wrappers/BowTieBuilder/README.md b/docker-wrappers/BowTieBuilder/README.md new file mode 100644 index 00000000..e1131c13 --- /dev/null +++ b/docker-wrappers/BowTieBuilder/README.md @@ -0,0 +1,3 @@ +# BowTieBuilder Docker image + +This is the dockerimage for BTB, created by @ctrlaltaf and @ellango \ No newline at end of file diff --git a/spras/btb.py b/spras/btb.py new file mode 100644 index 00000000..ba94415f --- /dev/null +++ b/spras/btb.py @@ -0,0 +1,165 @@ +# need to define a new btb class and contain the following functions +# - generate_inputs +# - run +# - parse_output + +import warnings +from pathlib import Path + +import pandas as pd + +from spras.containers import prepare_volume, run_container +# from spras.interactome import ( +# convert_undirected_to_directed, +# reinsert_direction_col_directed, +# ) +# what type of directionality does btb support? + +from spras.prm import PRM + +__all__ = ['BowtieBuilder'] + +class BowtieBuilder(PRM): + required_inputs = ['source', 'target', 'edges'] + + #generate input taken from meo.py beacuse they have same input requirements + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + @return: + """ + for input_type in BowtieBuilder.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + print("FILEMAP NAME: ", filename_map) + print("DATA HEAD: ") + print( data.node_table.head()) + print("DATA INTERACTOME: ") + print(data.interactome.head()) + + # Get sources and write to file, repeat for targets + # Does not check whether a node is a source and a target + for node_type in ['sources', 'targets']: + nodes = data.request_node_columns([node_type]) + if nodes is None: + raise ValueError(f'No {node_type} found in the node files') + + # TODO test whether this selection is needed, what values could the column contain that we would want to + # include or exclude? + nodes = nodes.loc[nodes[node_type]] + if(node_type == "sources"): + nodes.to_csv(filename_map["source"], sep= '\t', index=False, columns=['NODEID'], header=False) + print("NODES: ") + print(nodes) + elif(node_type == "targets"): + nodes.to_csv(filename_map["target"], sep= '\t', index=False, columns=['NODEID'], header=False) + print("NODES: ") + print(nodes) + + + # Create network file + edges = data.get_interactome() + + # Format network file + #unsure if formating network file is needed + # edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') + + edges.to_csv(filename_map['edges'], sep='\t', index=False, header=False) + + + + # Skips parameter validation step + @staticmethod + def run(source=None, target=None, edges=None, output_file=None, container_framework="docker"): + """ + Run PathLinker with Docker + @param nodetypes: input node types with sources and targets (required) + @param network: input network file (required) + @param output_file: path to the output pathway file (required) + @param k: path length (optional) + @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) + """ + # Add additional parameter validation + # Do not require k + # Use the PathLinker default + # Could consider setting the default here instead + if not source or not target or not edges or not output_file: + raise ValueError('Required BowtieBuilder arguments are missing') + + work_dir = '/btb' + + # Each volume is a tuple (src, dest) + volumes = list() + + bind_path, source_file = prepare_volume(source, work_dir) + volumes.append(bind_path) + + bind_path, target_file = prepare_volume(target, work_dir) + volumes.append(bind_path) + + bind_path, edges_file = prepare_volume(edges, work_dir) + volumes.append(bind_path) + + # PathLinker does not provide an argument to set the output directory + # Use its --output argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + # PathLinker requires that the output directory exist + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/raw-pathway.txt' # Use posix path inside the container + + command = ['python', + 'btb.py', + '--edges', + edges_file, + '--sources', + source_file, + '--target', + target_file, + '--output', + mapped_out_prefix] + # command = ['ls', '-R'] + + + print('Running BowtieBuilder with arguments: {}'.format(' '.join(command)), flush=True) + + container_suffix = "bowtiebuilder" + out = run_container(container_framework, + container_suffix, + command, + volumes, + work_dir) + print(out) + print("Source file: ", source_file) + print("target file: ", target_file) + print("edges file: ", edges_file) + print("mapped out dir: ", mapped_out_dir) + print("mapped out prefix: ", mapped_out_prefix) + + + + + + # Rename the primary output file to match the desired output filename + # Currently PathLinker only writes one output file so we do not need to delete others + # We may not know the value of k that was used + # output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) + # output_edges.rename(output_file) + + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + # What about multiple raw_pathway_files + print("PARSING OUTPUT BTB") + df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1], axis=0) + # df = reinsert_direction_col_directed(df) + df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') \ No newline at end of file diff --git a/test/BTB/expected/output1.txt b/test/BTB/expected/output1.txt new file mode 100644 index 00000000..7c7c4818 --- /dev/null +++ b/test/BTB/expected/output1.txt @@ -0,0 +1,7 @@ +Node1 Node2 +A D +B D +C D +D F +D G +D E \ No newline at end of file diff --git a/test/BTB/input/edges.txt b/test/BTB/input/edges.txt new file mode 100644 index 00000000..6f97ec4e --- /dev/null +++ b/test/BTB/input/edges.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C D 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BTB/input/edges_bad.txt b/test/BTB/input/edges_bad.txt new file mode 100644 index 00000000..6f97ec4e --- /dev/null +++ b/test/BTB/input/edges_bad.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C D 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BTB/input/source.txt b/test/BTB/input/source.txt new file mode 100644 index 00000000..870951ad --- /dev/null +++ b/test/BTB/input/source.txt @@ -0,0 +1,3 @@ +A +B +C \ No newline at end of file diff --git a/test/BTB/input/target.txt b/test/BTB/input/target.txt new file mode 100644 index 00000000..f479e5f8 --- /dev/null +++ b/test/BTB/input/target.txt @@ -0,0 +1,3 @@ +E +F +G \ No newline at end of file diff --git a/test/BTB/test-btb.py b/test/BTB/test-btb.py new file mode 100644 index 00000000..77e42865 --- /dev/null +++ b/test/BTB/test-btb.py @@ -0,0 +1,54 @@ +import sys +from filecmp import cmp +from pathlib import Path + +import pytest + +import spras.config as config + +config.init_from_file("config/config.yaml") + +# TODO consider refactoring to simplify the import +# Modify the path because of the - in the directory +SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() +sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'BowtieBuilder'))) +from spras.btb import BowtieBuilder + +TEST_DIR = Path('test', 'bowtiebuilder/') +OUT_FILE = Path(TEST_DIR, 'output', 'output1.txt') + + +class TestBowtieBuilder: + """ + Run the bowtiebuilder algorithm on the example input files and check the output matches the expected output + """ + def test_ln(self): + print("RUNNING TEST_LN FOR BOWTIEBUILDER") + OUT_FILE.unlink(missing_ok=True) + BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), + target_file=Path(TEST_DIR, 'input', 'target.txt'), + edges_file=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + assert OUT_FILE.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'output1.txt') + assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' + + # """ + # Run the bowtiebuilder algorithm with a missing input file + # """ + # def test_missing_file(self): + # with pytest.raises(OSError): + # BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'missing.txt'), + # target_file=Path(TEST_DIR, 'input', 'target.txt'), + # edges_file=Path(TEST_DIR, 'input', 'edges.txt'), + # output_file=OUT_FILE) + + # """ + # Run the local neighborhood algorithm with an improperly formatted network file + # """ + # def test_format_error(self): + # with pytest.raises(ValueError): + # BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), + # target_file=Path(TEST_DIR, 'input', 'target.txt'), + # edges_file=Path(TEST_DIR, 'input', 'edges_bad.txt'), + # output_file=OUT_FILE) From a59e281107d440a001540fd1ef0bdbbcbd97dd5d Mon Sep 17 00:00:00 2001 From: gabeah Date: Fri, 21 Jun 2024 15:29:59 -0700 Subject: [PATCH 14/28] working on btb pr --- config/config.yaml | 7 ++++++- spras/runner.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/config/config.yaml b/config/config.yaml index 75214ad8..4d74530a 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -35,9 +35,14 @@ container_registry: # careful: too many parameters might make your runs take a long time. algorithms: + - name: "btb" + params: + include: true + - name: "local_neighborhood" params: - include: true + include: false + - name: "pathlinker" params: include: false diff --git a/spras/runner.py b/spras/runner.py index 1e88a8d3..d741e98a 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -8,6 +8,7 @@ from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 from spras.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2 from spras.pathlinker import PathLinker as pathlinker +from spras.btb import BowtieBuilder as btb def run(algorithm, params): From 7c4610461400da963eacb24f76d3009bc96b5317 Mon Sep 17 00:00:00 2001 From: gabeah Date: Mon, 24 Jun 2024 13:15:00 -0700 Subject: [PATCH 15/28] continual testing for btb --- spras/btb.py | 30 ++++++++++---------- test/BTB/input/edges_bad.txt | 2 +- test/BTB/test-btb.py | 46 ++++++++++++++++--------------- test/LocalNeighborhood/test_ln.py | 4 +-- 4 files changed, 42 insertions(+), 40 deletions(-) diff --git a/spras/btb.py b/spras/btb.py index ba94415f..e30255e3 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -9,10 +9,10 @@ import pandas as pd from spras.containers import prepare_volume, run_container -# from spras.interactome import ( -# convert_undirected_to_directed, -# reinsert_direction_col_directed, -# ) +from spras.interactome import ( + convert_undirected_to_directed, + reinsert_direction_col_directed, +) # what type of directionality does btb support? from spras.prm import PRM @@ -20,7 +20,7 @@ __all__ = ['BowtieBuilder'] class BowtieBuilder(PRM): - required_inputs = ['source', 'target', 'edges'] + required_inputs = ['sources', 'targets', 'edges'] #generate input taken from meo.py beacuse they have same input requirements @staticmethod @@ -64,8 +64,8 @@ def generate_inputs(data, filename_map): edges = data.get_interactome() # Format network file - #unsure if formating network file is needed - # edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') + # unsure if formating network file is needed + edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') edges.to_csv(filename_map['edges'], sep='\t', index=False, header=False) @@ -73,20 +73,20 @@ def generate_inputs(data, filename_map): # Skips parameter validation step @staticmethod - def run(source=None, target=None, edges=None, output_file=None, container_framework="docker"): + def run(sources=None, targets=None, edges=None, output_file=None, container_framework="docker"): """ - Run PathLinker with Docker - @param nodetypes: input node types with sources and targets (required) + Run BowtieBuilder with Docker + @param sources: input sources (required) + @param targets: input targets (required) @param network: input network file (required) @param output_file: path to the output pathway file (required) - @param k: path length (optional) @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) """ # Add additional parameter validation # Do not require k # Use the PathLinker default # Could consider setting the default here instead - if not source or not target or not edges or not output_file: + if not sources or not targets or not edges or not output_file: raise ValueError('Required BowtieBuilder arguments are missing') work_dir = '/btb' @@ -94,10 +94,10 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew # Each volume is a tuple (src, dest) volumes = list() - bind_path, source_file = prepare_volume(source, work_dir) + bind_path, source_file = prepare_volume(sources, work_dir) volumes.append(bind_path) - bind_path, target_file = prepare_volume(target, work_dir) + bind_path, target_file = prepare_volume(targets, work_dir) volumes.append(bind_path) bind_path, edges_file = prepare_volume(edges, work_dir) @@ -161,5 +161,5 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # What about multiple raw_pathway_files print("PARSING OUTPUT BTB") df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1], axis=0) - # df = reinsert_direction_col_directed(df) + df = reinsert_direction_col_directed(df) df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') \ No newline at end of file diff --git a/test/BTB/input/edges_bad.txt b/test/BTB/input/edges_bad.txt index 6f97ec4e..41557b71 100644 --- a/test/BTB/input/edges_bad.txt +++ b/test/BTB/input/edges_bad.txt @@ -1,4 +1,4 @@ -A D 5 +A D E 5 B D 1.3 C D 0.4 D E 4.5 diff --git a/test/BTB/test-btb.py b/test/BTB/test-btb.py index 77e42865..331afa9d 100644 --- a/test/BTB/test-btb.py +++ b/test/BTB/test-btb.py @@ -25,30 +25,32 @@ class TestBowtieBuilder: def test_ln(self): print("RUNNING TEST_LN FOR BOWTIEBUILDER") OUT_FILE.unlink(missing_ok=True) - BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), - target_file=Path(TEST_DIR, 'input', 'target.txt'), - edges_file=Path(TEST_DIR, 'input', 'edges.txt'), + BowtieBuilder(sources=Path(TEST_DIR, 'input', 'source.txt'), + targets=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), output_file=OUT_FILE) assert OUT_FILE.exists(), 'Output file was not written' expected_file = Path(TEST_DIR, 'expected', 'output1.txt') assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' - # """ - # Run the bowtiebuilder algorithm with a missing input file - # """ - # def test_missing_file(self): - # with pytest.raises(OSError): - # BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'missing.txt'), - # target_file=Path(TEST_DIR, 'input', 'target.txt'), - # edges_file=Path(TEST_DIR, 'input', 'edges.txt'), - # output_file=OUT_FILE) - - # """ - # Run the local neighborhood algorithm with an improperly formatted network file - # """ - # def test_format_error(self): - # with pytest.raises(ValueError): - # BowtieBuilder(source_file=Path(TEST_DIR, 'input', 'source.txt'), - # target_file=Path(TEST_DIR, 'input', 'target.txt'), - # edges_file=Path(TEST_DIR, 'input', 'edges_bad.txt'), - # output_file=OUT_FILE) + """ + Run the bowtiebuilder algorithm with a missing input file + """ + def test_missing_file(self): + print("RUNNING TEST_MISSING_FILE FOR BOWTIEBUILDER") + with pytest.raises(OSError): + BowtieBuilder(sources=Path(TEST_DIR, 'input', 'missing.txt'), + targets=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + + """ + Run the local neighborhood algorithm with an improperly formatted network file + """ + def test_format_error(self): + print("RUNNING TEST_FORMAT_ERROR FOR BOWTIEBUILDER") + with pytest.raises(ValueError): + BowtieBuilder( sources=Path(TEST_DIR, 'input', 'source.txt'), + targets=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges_bad.txt'), + output_file=OUT_FILE ) diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py index 649c6aeb..641fcc1d 100644 --- a/test/LocalNeighborhood/test_ln.py +++ b/test/LocalNeighborhood/test_ln.py @@ -56,8 +56,8 @@ def test_localneighborhood_required(self): out_path.unlink(missing_ok=True) # Only include required arguments LocalNeighborhood.run( - nodes=Path(TEST_DIR,'input','ln-nodes.txt') - network=Path(TEST_DIR, 'input', 'ln-network.txt') + nodes = Path(TEST_DIR,'input','ln-nodes.txt'), + network = Path(TEST_DIR, 'input', 'ln-network.txt'), output_file=OUT_FILE ) assert out_path.exists() \ No newline at end of file From e182c89abe31076066a3f56de5f5d00dab599f11 Mon Sep 17 00:00:00 2001 From: gabeah Date: Mon, 24 Jun 2024 13:18:22 -0700 Subject: [PATCH 16/28] continual testing for btb --- test/BTB/test-btb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/BTB/test-btb.py b/test/BTB/test-btb.py index 331afa9d..7cc7c2f4 100644 --- a/test/BTB/test-btb.py +++ b/test/BTB/test-btb.py @@ -14,7 +14,7 @@ sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'BowtieBuilder'))) from spras.btb import BowtieBuilder -TEST_DIR = Path('test', 'bowtiebuilder/') +TEST_DIR = Path('test', 'BTB/') OUT_FILE = Path(TEST_DIR, 'output', 'output1.txt') From 12369a47ee66800dde41d5cf22ae89e552ff828d Mon Sep 17 00:00:00 2001 From: gabeah Date: Mon, 24 Jun 2024 14:41:53 -0700 Subject: [PATCH 17/28] prepping for PR --- test/BTB/test-btb.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/test/BTB/test-btb.py b/test/BTB/test-btb.py index 7cc7c2f4..1f78c8a1 100644 --- a/test/BTB/test-btb.py +++ b/test/BTB/test-btb.py @@ -12,10 +12,10 @@ # Modify the path because of the - in the directory SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'BowtieBuilder'))) -from spras.btb import BowtieBuilder +from spras.btb import BowtieBuilder as bowtiebuilder -TEST_DIR = Path('test', 'BTB/') -OUT_FILE = Path(TEST_DIR, 'output', 'output1.txt') +TEST_DIR = Path('test', 'BowtieBuilder/') +OUT_FILE = Path(TEST_DIR, 'output', 'raw-pathway.txt') class TestBowtieBuilder: @@ -25,7 +25,7 @@ class TestBowtieBuilder: def test_ln(self): print("RUNNING TEST_LN FOR BOWTIEBUILDER") OUT_FILE.unlink(missing_ok=True) - BowtieBuilder(sources=Path(TEST_DIR, 'input', 'source.txt'), + bowtiebuilder.run(sources=Path(TEST_DIR, 'input', 'source.txt'), targets=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), output_file=OUT_FILE) @@ -36,21 +36,26 @@ def test_ln(self): """ Run the bowtiebuilder algorithm with a missing input file """ + def test_missing_arguments(self): + with pytest.raises(ValueError): + bowtiebuilder.run( + targets=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + + def test_missing_file(self): - print("RUNNING TEST_MISSING_FILE FOR BOWTIEBUILDER") - with pytest.raises(OSError): - BowtieBuilder(sources=Path(TEST_DIR, 'input', 'missing.txt'), + with pytest.raises(FileNotFoundError): + bowtiebuilder.run(sources=Path(TEST_DIR, 'input', 'unknown.txt'), targets=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), output_file=OUT_FILE) - """ - Run the local neighborhood algorithm with an improperly formatted network file - """ + # """ + # """ def test_format_error(self): - print("RUNNING TEST_FORMAT_ERROR FOR BOWTIEBUILDER") - with pytest.raises(ValueError): - BowtieBuilder( sources=Path(TEST_DIR, 'input', 'source.txt'), + with pytest.raises(IndexError): + bowtiebuilder.run(sources=Path(TEST_DIR, 'input', 'source.txt'), targets=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges_bad.txt'), - output_file=OUT_FILE ) + output_file=OUT_FILE) \ No newline at end of file From 01aa15ff23d9d1d5df4f17344c3ec397a73166cb Mon Sep 17 00:00:00 2001 From: gabeah Date: Tue, 25 Jun 2024 13:34:15 -0700 Subject: [PATCH 18/28] passing two tests, need to pass two more --- spras/btb.py | 2 +- test/BowtieBuilder/test_btb.py | 33 +++++++++++++++++---------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/spras/btb.py b/spras/btb.py index 68dbe57d..2c7330f1 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -127,7 +127,7 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew print('Running BowtieBuilder with arguments: {}'.format(' '.join(command)), flush=True) - container_suffix = "bowtiebuilder" + container_suffix = "btb" out = run_container(container_framework, container_suffix, command, diff --git a/test/BowtieBuilder/test_btb.py b/test/BowtieBuilder/test_btb.py index 1112c59c..f2731c2e 100644 --- a/test/BowtieBuilder/test_btb.py +++ b/test/BowtieBuilder/test_btb.py @@ -22,16 +22,16 @@ class TestBowtieBuilder: """ Run the bowtiebuilder algorithm on the example input files and check the output matches the expected output """ - # def test_ln(self): - # print("RUNNING TEST_LN FOR BOWTIEBUILDER") - # OUT_FILE.unlink(missing_ok=True) - # bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), - # target=Path(TEST_DIR, 'input', 'target.txt'), - # edges=Path(TEST_DIR, 'input', 'edges.txt'), - # output_file=OUT_FILE) - # assert OUT_FILE.exists(), 'Output file was not written' - # expected_file = Path(TEST_DIR, 'expected', 'output1.txt') - # assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' + def test_ln(self): + print("RUNNING TEST_LN FOR BOWTIEBUILDER") + OUT_FILE.unlink(missing_ok=True) + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + assert OUT_FILE.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'output1.txt') + assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' """ Run the bowtiebuilder algorithm with a missing input file @@ -44,12 +44,13 @@ def test_missing_arguments(self): output_file=OUT_FILE) - # def test_missing_file(self): - # with pytest.raises(FileNotFoundError): - # bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), - # target=Path(TEST_DIR, 'input', 'target.txt'), - # edges=Path(TEST_DIR, 'input', 'edges.txt'), - # output_file=OUT_FILE) + def test_missing_file(self): + with pytest.raises(OSError): + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), + target=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE) + # """ # """ From 740cfb78020b95b173cb59026b33999fe96898c6 Mon Sep 17 00:00:00 2001 From: gabeah Date: Wed, 26 Jun 2024 13:31:54 -0700 Subject: [PATCH 19/28] losing me mind over pytest :) --- test/BowtieBuilder/test_btb.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/BowtieBuilder/test_btb.py b/test/BowtieBuilder/test_btb.py index f2731c2e..34188fe7 100644 --- a/test/BowtieBuilder/test_btb.py +++ b/test/BowtieBuilder/test_btb.py @@ -37,25 +37,29 @@ def test_ln(self): Run the bowtiebuilder algorithm with a missing input file """ def test_missing_arguments(self): - with pytest.raises(ValueError): + with pytest.raises(ValueError) as exec_info: bowtiebuilder.run( target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), output_file=OUT_FILE) + assert exec_info.type is ValueError def test_missing_file(self): with pytest.raises(OSError): - bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), + try: + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), output_file=OUT_FILE) + except OSError: + raise OSError # """ # """ def test_format_error(self): - with pytest.raises(IndexError): + with pytest.raises(IndexError) as exec_info: bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges_bad.txt'), From f82b611b9a09718e8fef4f614d2938bbad14b813 Mon Sep 17 00:00:00 2001 From: gabeah Date: Fri, 28 Jun 2024 11:32:00 -0700 Subject: [PATCH 20/28] three tests passinggit add . --- spras/btb.py | 5 ++++- test/BowtieBuilder/test_btb.py | 23 +++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/spras/btb.py b/spras/btb.py index 2c7330f1..6fca6e6e 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -89,6 +89,9 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew if not source or not target or not edges or not output_file: raise ValueError('Required BowtieBuilder arguments are missing') + if not source.exists() or not target.exists() or not edges.exists(): + raise ValueError('Missing input file') + work_dir = '/btb' # Each volume is a tuple (src, dest) @@ -127,7 +130,7 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew print('Running BowtieBuilder with arguments: {}'.format(' '.join(command)), flush=True) - container_suffix = "btb" + container_suffix = "bowtiebuilder" out = run_container(container_framework, container_suffix, command, diff --git a/test/BowtieBuilder/test_btb.py b/test/BowtieBuilder/test_btb.py index 34188fe7..cbed73f9 100644 --- a/test/BowtieBuilder/test_btb.py +++ b/test/BowtieBuilder/test_btb.py @@ -34,7 +34,7 @@ def test_ln(self): assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' """ - Run the bowtiebuilder algorithm with a missing input file + Run the bowtiebuilder algorithm with missing arguments """ def test_missing_arguments(self): with pytest.raises(ValueError) as exec_info: @@ -42,24 +42,27 @@ def test_missing_arguments(self): target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), output_file=OUT_FILE) - assert exec_info.type is ValueError + print(exec_info) + """ + Run the bowtiebuilder algorithm with missing files + """ def test_missing_file(self): - with pytest.raises(OSError): - try: - bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), + with pytest.raises(ValueError): + print("beginning test") + bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), output_file=OUT_FILE) - except OSError: - raise OSError - # """ - # """ + """ + Run the bowtiebuilder algorithm with bad input data + """ def test_format_error(self): - with pytest.raises(IndexError) as exec_info: + with pytest.raises(IndexError): + print("beginning test") bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges_bad.txt'), From dbf81a09e223dec7e9872850ad6ed3b139dd566f Mon Sep 17 00:00:00 2001 From: gabeah Date: Sun, 30 Jun 2024 19:54:51 -0700 Subject: [PATCH 21/28] continuing finishing test functions for PR --- spras/btb.py | 18 ++++++++++++++++++ test/BowtieBuilder/test_btb.py | 6 ++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/spras/btb.py b/spras/btb.py index 6fca6e6e..ea327d54 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -89,9 +89,27 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew if not source or not target or not edges or not output_file: raise ValueError('Required BowtieBuilder arguments are missing') + # Test for pytest (docker container also runs this) if not source.exists() or not target.exists() or not edges.exists(): raise ValueError('Missing input file') + print("testing line by line") + + + # Testing for btb index + # It's a bit messy, but it works + with open(edges, 'r') as edge_file: + try: + for line in edge_file: + line = line.strip() + line = line.split('\t') + line = line[2] + + except Exception as err: + print("error!!") + print(err) + raise(err) + work_dir = '/btb' # Each volume is a tuple (src, dest) diff --git a/test/BowtieBuilder/test_btb.py b/test/BowtieBuilder/test_btb.py index cbed73f9..a9d8f9fe 100644 --- a/test/BowtieBuilder/test_btb.py +++ b/test/BowtieBuilder/test_btb.py @@ -37,12 +37,11 @@ def test_ln(self): Run the bowtiebuilder algorithm with missing arguments """ def test_missing_arguments(self): - with pytest.raises(ValueError) as exec_info: + with pytest.raises(ValueError): bowtiebuilder.run( target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), - output_file=OUT_FILE) - print(exec_info) + output_file=OUT_FILE) """ @@ -50,7 +49,6 @@ def test_missing_arguments(self): """ def test_missing_file(self): with pytest.raises(ValueError): - print("beginning test") bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'unknown.txt'), target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges.txt'), From d0822bb090494d57bd911310711f1442d7c14814 Mon Sep 17 00:00:00 2001 From: gabeah Date: Sun, 30 Jun 2024 19:56:17 -0700 Subject: [PATCH 22/28] oops, forgot some extra files --- .github/workflows/test-spras.yml | 10 ++++++++++ config/config.yaml | 10 ++++++---- spras/btb.py | 8 +++----- test/parse-outputs/test_parse_outputs.py | 2 +- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 1afe2c7b..ca253aaf 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -84,6 +84,16 @@ jobs: docker pull reedcompbio/allpairs:latest docker pull reedcompbio/domino:latest docker pull gabeah/local-neighborhood:latest + docker pull gabeah/bowtiebuilder:latest + - name: Build BowtieBuilder Docker Image + uses: docker/build-push-action@v1 + with: + path: docker-wrappers/BowTieBuilder + dockerfile: docker-wrappers/BowTieBuilder/Dockerfile + respository: gabeah/bowtiebuilder + tags: latest + cache_from: gabeah/bowtiebuilder:latest + push: false - name: Build Local Neighborhood Docker Image uses: docker/build-push-action@v1 with: diff --git a/config/config.yaml b/config/config.yaml index 33d0ee54..a3622825 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -35,10 +35,6 @@ container_registry: # careful: too many parameters might make your runs take a long time. algorithms: - - name: "btb" - params: - include: true - - name: "local_neighborhood" params: include: false @@ -119,6 +115,12 @@ datasets: # Relative path from the spras directory data_dir: "input" + # label: data2 + # node_files: ["tps-egfr-prizes.txt"] + # edge_files: ["phosphosite-irefindex13.0-uniprot.txt"] + # other_files: [] + # data_dir: "input" + # If we want to reconstruct then we should set run to true. # TODO: if include is true above but run is false here, algs are not run. # is this the behavior we want? diff --git a/spras/btb.py b/spras/btb.py index ea327d54..a8ea20b5 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -90,14 +90,12 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew raise ValueError('Required BowtieBuilder arguments are missing') # Test for pytest (docker container also runs this) - if not source.exists() or not target.exists() or not edges.exists(): + # Testing out here avoids the trouble that container errors provide + if not Path(source).exists() or not Path(target).exists() or not Path(edges).exists(): raise ValueError('Missing input file') - print("testing line by line") - - # Testing for btb index - # It's a bit messy, but it works + # It's a bit messy, but it works \_('_')_/ with open(edges, 'r') as edge_file: try: for line in edge_file: diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 31830ff0..a42775ed 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -11,7 +11,7 @@ # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt # from https://github.com/Shamir-Lab/DOMINO/tree/master/examples -algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'local_neighborhood'] +algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'local_neighborhood', 'bowtiebuilder'] class TestParseOutputs: From 7d10dbb38728c508dcc8574c90dee1688281a61d Mon Sep 17 00:00:00 2001 From: gabeah Date: Tue, 2 Jul 2024 17:08:22 -0700 Subject: [PATCH 23/28] wrapping finished? --- config/config.yaml | 5 ----- spras/btb.py | 22 +++++++++++++--------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index a3622825..952ecd97 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -115,11 +115,6 @@ datasets: # Relative path from the spras directory data_dir: "input" - # label: data2 - # node_files: ["tps-egfr-prizes.txt"] - # edge_files: ["phosphosite-irefindex13.0-uniprot.txt"] - # other_files: [] - # data_dir: "input" # If we want to reconstruct then we should set run to true. # TODO: if include is true above but run is false here, algs are not run. diff --git a/spras/btb.py b/spras/btb.py index a8ea20b5..121e9d0f 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -20,7 +20,7 @@ __all__ = ['BowtieBuilder'] class BowtieBuilder(PRM): - required_inputs = ['source', 'target', 'edges'] + required_inputs = ['sources', 'targets', 'edges'] #generate input taken from meo.py beacuse they have same input requirements @staticmethod @@ -51,11 +51,11 @@ def generate_inputs(data, filename_map): # include or exclude? nodes = nodes.loc[nodes[node_type]] if(node_type == "sources"): - nodes.to_csv(filename_map["source"], sep= '\t', index=False, columns=['NODEID'], header=False) + nodes.to_csv(filename_map["sources"], sep= '\t', index=False, columns=['NODEID'], header=False) print("NODES: ") print(nodes) elif(node_type == "targets"): - nodes.to_csv(filename_map["target"], sep= '\t', index=False, columns=['NODEID'], header=False) + nodes.to_csv(filename_map["targets"], sep= '\t', index=False, columns=['NODEID'], header=False) print("NODES: ") print(nodes) @@ -73,7 +73,7 @@ def generate_inputs(data, filename_map): # Skips parameter validation step @staticmethod - def run(source=None, target=None, edges=None, output_file=None, container_framework="docker"): + def run(sources=None, targets=None, edges=None, output_file=None, container_framework="docker"): """ Run PathLinker with Docker @param nodetypes: input node types with sources and targets (required) @@ -82,16 +82,19 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew @param k: path length (optional) @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) """ + + print("running algorithm") + # Add additional parameter validation # Do not require k # Use the PathLinker default # Could consider setting the default here instead - if not source or not target or not edges or not output_file: + if not sources or not targets or not edges or not output_file: raise ValueError('Required BowtieBuilder arguments are missing') # Test for pytest (docker container also runs this) # Testing out here avoids the trouble that container errors provide - if not Path(source).exists() or not Path(target).exists() or not Path(edges).exists(): + if not Path(sources).exists() or not Path(targets).exists() or not Path(edges).exists(): raise ValueError('Missing input file') # Testing for btb index @@ -113,10 +116,10 @@ def run(source=None, target=None, edges=None, output_file=None, container_framew # Each volume is a tuple (src, dest) volumes = list() - bind_path, source_file = prepare_volume(source, work_dir) + bind_path, source_file = prepare_volume(sources, work_dir) volumes.append(bind_path) - bind_path, target_file = prepare_volume(target, work_dir) + bind_path, target_file = prepare_volume(targets, work_dir) volumes.append(bind_path) bind_path, edges_file = prepare_volume(edges, work_dir) @@ -179,6 +182,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file): """ # What about multiple raw_pathway_files print("PARSING OUTPUT BTB") - df = pd.read_csv(raw_pathway_file, sep='\t').take([0, 1], axis=0) + df = pd.read_csv(raw_pathway_file, sep='\t') # df = reinsert_direction_col_directed(df) + print(df) df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') From c3310fe12128b6defbb212395e64520eaa2c6a55 Mon Sep 17 00:00:00 2001 From: gabeah Date: Tue, 2 Jul 2024 17:21:40 -0700 Subject: [PATCH 24/28] wrapping finished --- .../expected/bowtiebuilder-edges-expected.txt | 9 +++++++++ .../expected/bowtiebuilder-pathway-expected.txt | 7 +++++++ test/parse-outputs/input/bowtiebuilder-raw-pathway.txt | 8 ++++++++ 3 files changed, 24 insertions(+) create mode 100644 test/generate-inputs/expected/bowtiebuilder-edges-expected.txt create mode 100644 test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt create mode 100644 test/parse-outputs/input/bowtiebuilder-raw-pathway.txt diff --git a/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt b/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt new file mode 100644 index 00000000..0fb97edd --- /dev/null +++ b/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt @@ -0,0 +1,9 @@ +A B 0.98 U +B C 0.77 U +A D 0.12 U +C D 0.89 U +C E 0.59 U +C F 0.5 U +F G 0.76 U +G H 0.92 U +G I 0.66 U diff --git a/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt b/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt new file mode 100644 index 00000000..350d85f7 --- /dev/null +++ b/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt @@ -0,0 +1,7 @@ +A D +A B +C F +B C +F G +G I +G H diff --git a/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt b/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt new file mode 100644 index 00000000..5699a112 --- /dev/null +++ b/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt @@ -0,0 +1,8 @@ +Node1 Node2 +A D +A B +C F +B C +F G +G I +G H From f3d400678874761b6cb77e3baebe6c342dcfe6b5 Mon Sep 17 00:00:00 2001 From: gabeah Date: Mon, 8 Jul 2024 14:49:41 -0700 Subject: [PATCH 25/28] updated dockerfile/config to reflect new repo/dockerhub images --- config/config.yaml | 2 +- docker-wrappers/BowTieBuilder/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 952ecd97..19e29619 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -14,7 +14,7 @@ container_registry: base_url: docker.io # The owner or project of the registry # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: gabeah + owner: reedcompbio # This list of algorithms should be generated by a script which checks the filesystem for installs. # It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm diff --git a/docker-wrappers/BowTieBuilder/Dockerfile b/docker-wrappers/BowTieBuilder/Dockerfile index 08f4c1f4..06606ec9 100644 --- a/docker-wrappers/BowTieBuilder/Dockerfile +++ b/docker-wrappers/BowTieBuilder/Dockerfile @@ -1,5 +1,5 @@ FROM python:3.8-bullseye WORKDIR /btb -RUN wget https://raw.githubusercontent.com/ellango2612/BowTieBuilder-Algorithm/main/btb.py +RUN wget https://raw.githubusercontent.com/Reed-CompBio/BowTieBuilder-Algorithm/main/btb.py RUN pip install networkx==2.8 \ No newline at end of file From 40ad34ff3d27e69a5f9c2a9729caadabad9c2763 Mon Sep 17 00:00:00 2001 From: gabeah Date: Tue, 9 Jul 2024 12:02:58 -0700 Subject: [PATCH 26/28] beginning PR fixes --- .github/workflows/test-spras.yml | 16 +-- config/config.yaml | 23 ++- docker-wrappers/LocalNeighborhood/Dockerfile | 6 - docker-wrappers/LocalNeighborhood/README.md | 44 ------ .../LocalNeighborhood/local_neighborhood.py | 70 ---------- .../testing-files/ln-network.txt | 5 - .../testing-files/ln-nodes.txt | 2 - .../testing-files/output2.txt | 3 - .../testing-files/output3.txt | 3 - .../testing-files/test-output.txt | 3 - spras/btb.py | 2 +- spras/local_neighborhood.py | 132 ------------------ test/generate-inputs/test_generate_inputs.py | 2 +- test/parse-outputs/test_parse_outputs.py | 2 +- 14 files changed, 16 insertions(+), 297 deletions(-) delete mode 100644 docker-wrappers/LocalNeighborhood/Dockerfile delete mode 100644 docker-wrappers/LocalNeighborhood/README.md delete mode 100644 docker-wrappers/LocalNeighborhood/local_neighborhood.py delete mode 100644 docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt delete mode 100644 docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt delete mode 100644 docker-wrappers/LocalNeighborhood/testing-files/output2.txt delete mode 100644 docker-wrappers/LocalNeighborhood/testing-files/output3.txt delete mode 100644 docker-wrappers/LocalNeighborhood/testing-files/test-output.txt delete mode 100644 spras/local_neighborhood.py diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index ca253aaf..7d66f477 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -83,25 +83,15 @@ jobs: docker pull reedcompbio/mincostflow:latest docker pull reedcompbio/allpairs:latest docker pull reedcompbio/domino:latest - docker pull gabeah/local-neighborhood:latest - docker pull gabeah/bowtiebuilder:latest + docker pull reedcompbio/bowtiebuilder:v1 - name: Build BowtieBuilder Docker Image uses: docker/build-push-action@v1 with: path: docker-wrappers/BowTieBuilder dockerfile: docker-wrappers/BowTieBuilder/Dockerfile - respository: gabeah/bowtiebuilder + respository: reedcompbio/bowtiebuilder tags: latest - cache_from: gabeah/bowtiebuilder:latest - push: false - - name: Build Local Neighborhood Docker Image - uses: docker/build-push-action@v1 - with: - path: docker-wrappers/LocalNeighborhood - dockerfile: docker-wrappers/LocalNeighborhood/Dockerfile - repository: gabeah/local-neighborhood - tags: latest - cache_from: gabeah/local-neighborhood:latest + cache_from: reedcompbio/bowtiebuilder:v1 push: false - name: Build Omics Integrator 1 Docker image uses: docker/build-push-action@v1 diff --git a/config/config.yaml b/config/config.yaml index 19e29619..ca7a4175 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -35,19 +35,16 @@ container_registry: # careful: too many parameters might make your runs take a long time. algorithms: - - name: "local_neighborhood" - params: - include: false - name: "pathlinker" params: - include: false + include: true run1: k: range(100,201,100) - name: "omicsintegrator1" params: - include: false + include: true run1: b: [5, 6] w: np.linspace(0,5,2) @@ -55,7 +52,7 @@ algorithms: - name: "omicsintegrator2" params: - include: false + include: true run1: b: [4] g: [0] @@ -65,7 +62,7 @@ algorithms: - name: "meo" params: - include: false + include: true run1: max_path_length: [3] local_search: ["Yes"] @@ -73,18 +70,18 @@ algorithms: - name: "mincostflow" params: - include: false + include: true run1: flow: [1] # The flow must be an int capacity: [1] - name: "allpairs" params: - include: false + include: true - name: "domino" params: - include: false + include: true run1: slice_threshold: [0.3] module_threshold: [0.05] @@ -136,13 +133,13 @@ analysis: include: true # Create output files for each pathway that can be visualized with GraphSpace graphspace: - include: false + include: true # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: false + include: true # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset ml: - include: false + include: true # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph diff --git a/docker-wrappers/LocalNeighborhood/Dockerfile b/docker-wrappers/LocalNeighborhood/Dockerfile deleted file mode 100644 index d1001b87..00000000 --- a/docker-wrappers/LocalNeighborhood/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -# Create a Docker image for the Local Neighborhood algorithm here -FROM python:3.12-alpine - -WORKDIR /LocalNeighborhood -COPY local_neighborhood.py . -COPY /testing-files . diff --git a/docker-wrappers/LocalNeighborhood/README.md b/docker-wrappers/LocalNeighborhood/README.md deleted file mode 100644 index 94209fa4..00000000 --- a/docker-wrappers/LocalNeighborhood/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# Local Neighborhood Docker image - -A simple pathway reconstruction algorithm used to welcome new contributors. -The algorithm takes a network and a list of nodes as input. -It outputs all edges in the network that have a node from the list as an endpoint. - -New contributors complete the `Dockerfile` to wrap the implementation in `local_neighborhood.py`. - -## Usage -``` -$ python local_neighborhood.py -h -usage: local_neighborhood.py [-h] --network NETWORK --nodes NODES --output OUTPUT - -Local neighborhood pathway reconstruction - -optional arguments: - -h, --help show this help message and exit - --network NETWORK Path to the network file with '|' delimited node pairs - --nodes NODES Path to the nodes file - --output OUTPUT Path to the output file that will be written -``` - -## Example behavior -Network file: -``` -A|B -C|B -C|D -D|E -A|E -``` - -Nodes file: -``` -A -B -``` - -Output file: -``` -A|B -C|B -A|E -``` \ No newline at end of file diff --git a/docker-wrappers/LocalNeighborhood/local_neighborhood.py b/docker-wrappers/LocalNeighborhood/local_neighborhood.py deleted file mode 100644 index 2a2b6096..00000000 --- a/docker-wrappers/LocalNeighborhood/local_neighborhood.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Local neighborhood pathway reconstruction algorithm. -The algorithm takes a network and a list of nodes as input. -It outputs all edges in the network that have a node from the list as an endpoint. -""" - -import argparse -from pathlib import Path - - -def parse_arguments(): - """ - Process command line arguments. - @return arguments - """ - parser = argparse.ArgumentParser( - description="Local neighborhood pathway reconstruction" - ) - parser.add_argument("--network", type=Path, required=True, help="Path to the network file with '|' delimited node pairs") - parser.add_argument("--nodes", type=Path, required=True, help="Path to the nodes file") - parser.add_argument("--output", type=Path, required=True, help="Path to the output file that will be written") - - return parser.parse_args() - - -def local_neighborhood(network_file: Path, nodes_file: Path, output_file: Path): - if not network_file.exists(): - raise OSError(f"Network file {str(network_file)} does not exist") - if not nodes_file.exists(): - raise OSError(f"Nodes file {str(nodes_file)} does not exist") - if output_file.exists(): - print(f"Output file {str(output_file)} will be overwritten") - - # Create the parent directories for the output file if needed - output_file.parent.mkdir(parents=True, exist_ok=True) - - # Read the list of nodes - nodes = set() - with nodes_file.open() as nodes_f: - for line in nodes_f: - nodes.add(line.strip()) - print(f"Read {len(nodes)} unique nodes") - - # Iterate through the network edges and write those that have an endpoint in the node set - in_edge_counter = 0 - out_edge_counter = 0 - with output_file.open('w') as output_f: - with network_file.open() as network_f: - for line in network_f: - line = line.strip() - in_edge_counter += 1 - endpoints = line.split("|") - if len(endpoints) != 2: - raise ValueError(f"Edge {line} does not contain 2 nodes separated by '|'") - if endpoints[0] in nodes or endpoints[1] in nodes: - out_edge_counter += 1 - output_f.write(f"{line}\n") - print(f"Kept {out_edge_counter} of {in_edge_counter} edges") - - -def main(): - """ - Parse arguments and run pathway reconstruction - """ - args = parse_arguments() - local_neighborhood(args.network, args.nodes, args.output) - - -if __name__ == "__main__": - main() diff --git a/docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt b/docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt deleted file mode 100644 index 5a9b0451..00000000 --- a/docker-wrappers/LocalNeighborhood/testing-files/ln-network.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B -C|B -C|D -D|E -A|E diff --git a/docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt b/docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt deleted file mode 100644 index 35d242ba..00000000 --- a/docker-wrappers/LocalNeighborhood/testing-files/ln-nodes.txt +++ /dev/null @@ -1,2 +0,0 @@ -A -B diff --git a/docker-wrappers/LocalNeighborhood/testing-files/output2.txt b/docker-wrappers/LocalNeighborhood/testing-files/output2.txt deleted file mode 100644 index 58dc92d9..00000000 --- a/docker-wrappers/LocalNeighborhood/testing-files/output2.txt +++ /dev/null @@ -1,3 +0,0 @@ -A|B -C|B -A|E diff --git a/docker-wrappers/LocalNeighborhood/testing-files/output3.txt b/docker-wrappers/LocalNeighborhood/testing-files/output3.txt deleted file mode 100644 index 58dc92d9..00000000 --- a/docker-wrappers/LocalNeighborhood/testing-files/output3.txt +++ /dev/null @@ -1,3 +0,0 @@ -A|B -C|B -A|E diff --git a/docker-wrappers/LocalNeighborhood/testing-files/test-output.txt b/docker-wrappers/LocalNeighborhood/testing-files/test-output.txt deleted file mode 100644 index 58dc92d9..00000000 --- a/docker-wrappers/LocalNeighborhood/testing-files/test-output.txt +++ /dev/null @@ -1,3 +0,0 @@ -A|B -C|B -A|E diff --git a/spras/btb.py b/spras/btb.py index 121e9d0f..38bc1863 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -149,7 +149,7 @@ def run(sources=None, targets=None, edges=None, output_file=None, container_fram print('Running BowtieBuilder with arguments: {}'.format(' '.join(command)), flush=True) - container_suffix = "bowtiebuilder" + container_suffix = "bowtiebuilder:v1" out = run_container(container_framework, container_suffix, command, diff --git a/spras/local_neighborhood.py b/spras/local_neighborhood.py deleted file mode 100644 index c50f3210..00000000 --- a/spras/local_neighborhood.py +++ /dev/null @@ -1,132 +0,0 @@ -import warnings -from pathlib import Path - -import pandas as pd - -from spras.containers import prepare_volume, run_container -from spras.interactome import ( - convert_undirected_to_directed, - reinsert_direction_col_undirected, -) -from spras.util import add_rank_column -from spras.prm import PRM - -__all__ = ['LocalNeighborhood'] - -class LocalNeighborhood(PRM): - required_inputs = ["network", "nodes"] - - @staticmethod - def generate_inputs(data, filename_map): - """ - Access fields from the dataset and write the required input files - @param data: dataset - @param filename_map: dictionary where key is input type, and value is a path to a file - @return: - """ - print('generating inputs!!') - # Check if filename - for input_type in LocalNeighborhood.required_inputs: - if input_type not in filename_map: - raise ValueError(f"{input_type} filename is missing") - - # Select nodes that have sources, targets, prizes, or are active - if data.contains_node_columns(['sources','targets','prize']): - node_df = data.request_node_columns(['sources','targets','prize']) - - else: - raise ValueError("LocalNeighborhood requires nore prizes or sources and targets") - - # LocalNeighborhood already gives warnings - node_df.to_csv(filename_map['nodes'], - #sep='\t', - index = False, - columns=['NODEID'], - header=False) - - # Get network file - edges_df = data.get_interactome() - - # Rename Direction column - edges_df.to_csv(filename_map['network'], - sep='|', - index=False, - columns=['Interactor1','Interactor2'], - header=False) - return None - - @staticmethod - def run(nodes=None, network=None, output_file=None, container_framework="docker"): - ''' - Method to running LocalNeighborhood correctly - @param nodes: input node types with sources and targets (required) - @param network: input network file (required) - @param output_file: path to the output pathway file (required) - ''' - print('Running!!!') - if not nodes or not network or not output_file: - raise ValueError('Required LocalNeighborhood arguments are missing') - - work_dir = '/spras' - - volumes = list() - - bind_path, node_file = prepare_volume(nodes, work_dir) - volumes.append(bind_path) - - bind_path, network_file = prepare_volume(network, work_dir) - volumes.append(bind_path) - - # LocalNeighborhood does not provide an argument to set the output directory - # Use its --output argument to set the output file prefix to specify an absolute path and prefix - out_dir = Path(output_file).parent - # LocalNeighborhood requires that the output directory exist - out_dir.mkdir(parents=True, exist_ok=True) - bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) - volumes.append(bind_path) - mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container - - command = ['python', - '/LocalNeighborhood/local_neighborhood.py', - '--network', network_file, - '--nodes', node_file, - '--output', mapped_out_prefix] - - print('Running LocalNeighborhood with arguments: {}'.format(' '.join(command)), flush=True) - - container_suffix = "local-neighborhood" - out = run_container(container_framework, - container_suffix, - command, - volumes, - work_dir) - print(out) - - # Rename the primary output file to match the desired output filename - # Currently LocalNeighborhood only writes one output file so we do not need to delete others - output_edges = Path(out_dir, 'out') - output_edges.rename(output_file) - return None - - @staticmethod - def parse_output(raw_pathway_file, standardized_pathway_file): - ''' - Method for standardizing output data - @raw_pathway_file: raw output from LocalNeighborhood - @standardized_pathway_file: universal output, for use in Pandas analysis - ''' - print('Parsing outputs!!') - df = pd.read_csv(raw_pathway_file, - sep='|', - header=None - ) - - # Add extra data to not annoy the SNAKEFILE - df = add_rank_column(df) - df = reinsert_direction_col_undirected(df) - - df.to_csv(standardized_pathway_file, - header=None, - index=False, - sep='\t') - return None diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index a6f04a42..b9d14a6f 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -17,7 +17,7 @@ 'domino': 'network', 'pathlinker': 'network', 'allpairs': 'network', - 'local_neighborhood': 'network' + 'bowtiebuilder': 'edges' } diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index a42775ed..4974402e 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -11,7 +11,7 @@ # the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt # from https://github.com/Shamir-Lab/DOMINO/tree/master/examples -algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'local_neighborhood', 'bowtiebuilder'] +algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino', 'bowtiebuilder'] class TestParseOutputs: From 3b9d70e0ce2f02d3d06e67452ab8a4ebba0818a4 Mon Sep 17 00:00:00 2001 From: gabeah Date: Tue, 9 Jul 2024 12:32:15 -0700 Subject: [PATCH 27/28] PR changes finished/comments have been made --- docker-wrappers/BowTieBuilder/README.md | 14 ++++- spras/btb.py | 47 +++++--------- spras/runner.py | 1 - test/BowtieBuilder/test_btb.py | 1 - .../expected_output/ln-output.txt | 3 - .../input/ln-bad-network.txt | 5 -- test/LocalNeighborhood/input/ln-network.txt | 5 -- test/LocalNeighborhood/input/ln-nodes.txt | 2 - test/LocalNeighborhood/test_ln.py | 63 ------------------- .../local_neighborhood-network-expected.txt | 5 -- .../local_neighborhood-pathway-expected.txt | 2 - .../input/local_neighborhood-raw-pathway.txt | 2 - 12 files changed, 30 insertions(+), 120 deletions(-) delete mode 100644 test/LocalNeighborhood/expected_output/ln-output.txt delete mode 100644 test/LocalNeighborhood/input/ln-bad-network.txt delete mode 100644 test/LocalNeighborhood/input/ln-network.txt delete mode 100644 test/LocalNeighborhood/input/ln-nodes.txt delete mode 100644 test/LocalNeighborhood/test_ln.py delete mode 100644 test/generate-inputs/expected/local_neighborhood-network-expected.txt delete mode 100644 test/parse-outputs/expected/local_neighborhood-pathway-expected.txt delete mode 100644 test/parse-outputs/input/local_neighborhood-raw-pathway.txt diff --git a/docker-wrappers/BowTieBuilder/README.md b/docker-wrappers/BowTieBuilder/README.md index e1131c13..555904be 100644 --- a/docker-wrappers/BowTieBuilder/README.md +++ b/docker-wrappers/BowTieBuilder/README.md @@ -1,3 +1,15 @@ # BowTieBuilder Docker image -This is the dockerimage for BTB, created by @ctrlaltaf and @ellango \ No newline at end of file +A Docker image for [BowTieBuilder](https://github.com/Reed-CompBio/BowTieBuilder-Algorithm) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/bowtiebuilder). + +To create the Docker image run: +``` +docker build -t reedcompbio/bowtiebuilder:v1 -f Dockerfile . +``` +from this directory. + +## Original Paper + +The original paper for [BowTieBuilder] can be accessed here: + +Supper, J., Spangenberg, L., Planatscher, H. et al. BowTieBuilder: modeling signal transduction pathways. BMC Syst Biol 3, 67 (2009). https://doi.org/10.1186/1752-0509-3-67 \ No newline at end of file diff --git a/spras/btb.py b/spras/btb.py index 38bc1863..77152050 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -9,11 +9,10 @@ import pandas as pd from spras.containers import prepare_volume, run_container -# from spras.interactome import ( -# convert_undirected_to_directed, -# reinsert_direction_col_directed, -# ) -# what type of directionality does btb support? +from spras.interactome import ( + convert_undirected_to_directed, + reinsert_direction_col_directed, +) from spras.prm import PRM @@ -63,41 +62,35 @@ def generate_inputs(data, filename_map): # Create network file edges = data.get_interactome() - # Format network file - #unsure if formating network file is needed - # edges = add_directionality_constant(edges, 'EdgeType', '(pd)', '(pp)') + # Format into directed graph + edges.convert_undirected_to_directed() edges.to_csv(filename_map['edges'], sep='\t', index=False, header=False) - # Skips parameter validation step + # Skips parameter validation step @staticmethod def run(sources=None, targets=None, edges=None, output_file=None, container_framework="docker"): """ Run PathLinker with Docker - @param nodetypes: input node types with sources and targets (required) - @param network: input network file (required) + @param sources: input source file (required) + @param targets: input target file (required) + @param edges: input edge file (required) @param output_file: path to the output pathway file (required) - @param k: path length (optional) @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) """ - print("running algorithm") - - # Add additional parameter validation - # Do not require k - # Use the PathLinker default - # Could consider setting the default here instead + # Tests for pytest (docker container also runs this) + # Testing out here avoids the trouble that container errors provide + if not sources or not targets or not edges or not output_file: raise ValueError('Required BowtieBuilder arguments are missing') - # Test for pytest (docker container also runs this) - # Testing out here avoids the trouble that container errors provide if not Path(sources).exists() or not Path(targets).exists() or not Path(edges).exists(): raise ValueError('Missing input file') - # Testing for btb index + # Testing for btb index errors # It's a bit messy, but it works \_('_')_/ with open(edges, 'r') as edge_file: try: @@ -107,8 +100,7 @@ def run(sources=None, targets=None, edges=None, output_file=None, container_fram line = line[2] except Exception as err: - print("error!!") - print(err) + raise(err) work_dir = '/btb' @@ -163,12 +155,7 @@ def run(sources=None, targets=None, edges=None, output_file=None, container_fram print("mapped out prefix: ", mapped_out_prefix) - - - - # Rename the primary output file to match the desired output filename - # Currently PathLinker only writes one output file so we do not need to delete others - # We may not know the value of k that was used + # Output is already written to raw-pathway.txt file # output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt'))) # output_edges.rename(output_file) @@ -183,6 +170,6 @@ def parse_output(raw_pathway_file, standardized_pathway_file): # What about multiple raw_pathway_files print("PARSING OUTPUT BTB") df = pd.read_csv(raw_pathway_file, sep='\t') - # df = reinsert_direction_col_directed(df) + df = reinsert_direction_col_directed(df) print(df) df.to_csv(standardized_pathway_file, header=False, index=False, sep='\t') diff --git a/spras/runner.py b/spras/runner.py index 66937fe6..2d3fb519 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -2,7 +2,6 @@ from spras.allpairs import AllPairs as allpairs from spras.dataset import Dataset from spras.domino import DOMINO as domino -from spras.local_neighborhood import LocalNeighborhood as local_neighborhood from spras.meo import MEO as meo from spras.mincostflow import MinCostFlow as mincostflow from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1 diff --git a/test/BowtieBuilder/test_btb.py b/test/BowtieBuilder/test_btb.py index a9d8f9fe..38858c5a 100644 --- a/test/BowtieBuilder/test_btb.py +++ b/test/BowtieBuilder/test_btb.py @@ -60,7 +60,6 @@ def test_missing_file(self): """ def test_format_error(self): with pytest.raises(IndexError): - print("beginning test") bowtiebuilder.run(source=Path(TEST_DIR, 'input', 'source.txt'), target=Path(TEST_DIR, 'input', 'target.txt'), edges=Path(TEST_DIR, 'input', 'edges_bad.txt'), diff --git a/test/LocalNeighborhood/expected_output/ln-output.txt b/test/LocalNeighborhood/expected_output/ln-output.txt deleted file mode 100644 index 58dc92d9..00000000 --- a/test/LocalNeighborhood/expected_output/ln-output.txt +++ /dev/null @@ -1,3 +0,0 @@ -A|B -C|B -A|E diff --git a/test/LocalNeighborhood/input/ln-bad-network.txt b/test/LocalNeighborhood/input/ln-bad-network.txt deleted file mode 100644 index 970b0e11..00000000 --- a/test/LocalNeighborhood/input/ln-bad-network.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B|E -C|B -C|D -D|E -A|E diff --git a/test/LocalNeighborhood/input/ln-network.txt b/test/LocalNeighborhood/input/ln-network.txt deleted file mode 100644 index 5a9b0451..00000000 --- a/test/LocalNeighborhood/input/ln-network.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B -C|B -C|D -D|E -A|E diff --git a/test/LocalNeighborhood/input/ln-nodes.txt b/test/LocalNeighborhood/input/ln-nodes.txt deleted file mode 100644 index 35d242ba..00000000 --- a/test/LocalNeighborhood/input/ln-nodes.txt +++ /dev/null @@ -1,2 +0,0 @@ -A -B diff --git a/test/LocalNeighborhood/test_ln.py b/test/LocalNeighborhood/test_ln.py deleted file mode 100644 index 641fcc1d..00000000 --- a/test/LocalNeighborhood/test_ln.py +++ /dev/null @@ -1,63 +0,0 @@ -import sys -from filecmp import cmp -from pathlib import Path - -import pytest - -import spras.config as config -from spras.local_neighborhood import LocalNeighborhood - -config.init_from_file("config/config.yaml") - -# TODO consider refactoring to simplify the import -# Modify the path because of the - in the directory -SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() -sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'LocalNeighborhood'))) -from local_neighborhood import local_neighborhood - -TEST_DIR = Path('test', 'LocalNeighborhood/') -OUT_FILE = Path(TEST_DIR, 'output', 'ln-output.txt') - - -class TestLocalNeighborhood: - """ - Run the local neighborhood algorithm on the example input files and check the output matches the expected output - """ - def test_ln(self): - OUT_FILE.unlink(missing_ok=True) - local_neighborhood(network_file=Path(TEST_DIR, 'input', 'ln-network.txt'), - nodes_file=Path(TEST_DIR, 'input', 'ln-nodes.txt'), - output_file=OUT_FILE) - assert OUT_FILE.exists(), 'Output file was not written' - expected_file = Path(TEST_DIR, 'expected_output', 'ln-output.txt') - assert cmp(OUT_FILE, expected_file, shallow=False), 'Output file does not match expected output file' - - """ - Run the local neighborhood algorithm with a missing input file - """ - def test_missing_file(self): - with pytest.raises(OSError): - local_neighborhood(network_file=Path(TEST_DIR, 'input', 'missing.txt'), - nodes_file=Path(TEST_DIR, 'input', 'ln-nodes.txt'), - output_file=OUT_FILE) - - """ - Run the local neighborhood algorithm with an improperly formatted network file - """ - def test_format_error(self): - with pytest.raises(ValueError): - local_neighborhood(network_file=Path(TEST_DIR, 'input', 'ln-bad-network.txt'), - nodes_file=Path(TEST_DIR, 'input', 'ln-nodes.txt'), - output_file=OUT_FILE) - - # Write tests for the Local Neighborhood run function here - def test_localneighborhood_required(self): - out_path = Path(OUT_FILE) - out_path.unlink(missing_ok=True) - # Only include required arguments - LocalNeighborhood.run( - nodes = Path(TEST_DIR,'input','ln-nodes.txt'), - network = Path(TEST_DIR, 'input', 'ln-network.txt'), - output_file=OUT_FILE - ) - assert out_path.exists() \ No newline at end of file diff --git a/test/generate-inputs/expected/local_neighborhood-network-expected.txt b/test/generate-inputs/expected/local_neighborhood-network-expected.txt deleted file mode 100644 index 5a9b0451..00000000 --- a/test/generate-inputs/expected/local_neighborhood-network-expected.txt +++ /dev/null @@ -1,5 +0,0 @@ -A|B -C|B -C|D -D|E -A|E diff --git a/test/parse-outputs/expected/local_neighborhood-pathway-expected.txt b/test/parse-outputs/expected/local_neighborhood-pathway-expected.txt deleted file mode 100644 index e2fd8d57..00000000 --- a/test/parse-outputs/expected/local_neighborhood-pathway-expected.txt +++ /dev/null @@ -1,2 +0,0 @@ -A B 1 U -B C 1 U diff --git a/test/parse-outputs/input/local_neighborhood-raw-pathway.txt b/test/parse-outputs/input/local_neighborhood-raw-pathway.txt deleted file mode 100644 index dfdd8243..00000000 --- a/test/parse-outputs/input/local_neighborhood-raw-pathway.txt +++ /dev/null @@ -1,2 +0,0 @@ -A|B -B|C From 027ecbd8a6038c344d2dc399adcf0ca38f845ef7 Mon Sep 17 00:00:00 2001 From: gabeah Date: Thu, 11 Jul 2024 13:42:49 -0700 Subject: [PATCH 28/28] small fixes, preparing to merge main and resolve conflicts --- spras/btb.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spras/btb.py b/spras/btb.py index 77152050..73ebe0e1 100644 --- a/spras/btb.py +++ b/spras/btb.py @@ -63,7 +63,7 @@ def generate_inputs(data, filename_map): edges = data.get_interactome() # Format into directed graph - edges.convert_undirected_to_directed() + edges = convert_undirected_to_directed(edges) edges.to_csv(filename_map['edges'], sep='\t', index=False, header=False) @@ -100,7 +100,6 @@ def run(sources=None, targets=None, edges=None, output_file=None, container_fram line = line[2] except Exception as err: - raise(err) work_dir = '/btb'