diff --git a/config/config.yaml b/config/config.yaml index b87bcd45..3889cc84 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -56,6 +56,7 @@ algorithms: b: [5, 6] w: np.linspace(0,5,2) d: [10] + dummy_mode: ["file"] # Or "terminals", "all", "others" - name: "omicsintegrator2" params: @@ -101,6 +102,8 @@ datasets: - # Labels can only contain letters, numbers, or underscores label: data0 + # To run OmicsIntegrator1 with dummy nodes, add dummy.txt file to node_files + # or a dummy column to the node table node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] # DataLoader.py can currently only load a single edge file, which is the primary network edge_files: ["network.txt"] diff --git a/config/egfr.yaml b/config/egfr.yaml index 0b41f0a5..bede941e 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -33,6 +33,7 @@ algorithms: - 0.1 mu: - 0.008 + dummy_mode: ["file"] - name: omicsintegrator2 params: diff --git a/doc/README.md b/doc/README.md index 1e217c4e..bfebcfa4 100644 --- a/doc/README.md +++ b/doc/README.md @@ -23,7 +23,7 @@ The graph algorithms below have been used (or have the potential to be used) for - Basha et al., ResponseNet2.0: revealing signaling and regulatory pathways connecting your proteins and genes–now with human data. _Nucleic Acids Research._ 2013. [doi:10.1093/nar/gkt532](https://dx.doi.org/10.1093%2Fnar%2Fgkt532) - Basha et al. ResponseNet v.3: revealing signaling and regulatory pathways connecting your proteins and genes across human tissues. _Nucleic Acids Research._ 2019. [doi:10.1093/nar/gkz421](https://dx.doi.org/10.1093%2Fnar%2Fgkz421) -## Prize Collecting Steiner Forest (PCSF) +## Prize Collecting Steiner Forest (PCSF): OmicsIntegrator1 and OmicsIntegrator2 **References:** - Huang and Fraenkel. Integrating proteomic, transcriptional, and interactome data reveals hidden components of signaling and regulatory networks. _Science Signaling._ 2009. [doi:10.1126/scisignal.2000350](https://doi.org/10.1126/scisignal.2000350) @@ -31,6 +31,14 @@ The graph algorithms below have been used (or have the potential to be used) for - Gitter et al. Sharing information to reconstruct patient-specific pathways in heterogeneous diseases. _Pacific Symposium on Biocomputing._ 2014. [doi:10.1142/9789814583220_0005](https://doi.org/10.1142/9789814583220_0005) - Tuncbag et al., Network-Based Interpretation of Diverse High-Throughput Datasets through the Omics Integrator Software Package. _PLoS Computational Biology._ 2016. [doi:10.1371/journal.pcbi.1004879](https://doi.org/10.1371/journal.pcbi.1004879) +One of the parameter options for OmicsIntegraor1 is `dummy_mode`. +There are 4 dummy mode possibilities: + 1. `terminals`: connect the dummy node to all nodes that have been assigned prizes + 2. `all`: connect the dummy node to all nodes in the interactome i.e. full set of nodes in graph + 3. `others`: connect the dummy node to all nodes that are not terminal nodes i.e. nodes w/o prizes + 4. `file`: connect the dummy node to a specific list of nodes provided in a file +To support the `file` dummy node logic as part of OmicsIntegrator1, you can either add a separate `dummy.txt` file (and add this to the `node_files` argument in `config.yaml `) or add a `dummy` column node attribute to a file that contains `NODEID`, `prize`, `source`, etc. + ## PathLinker PathLinker takes as input (1) a weighted, directed PPI network, (2) two sets of nodes: a source set (representing receptors of a pathway of interest) and a target set (representing transcriptional regulators of a pathway of interest), and (3) an integer _k_. PathLinker efficiently computes the _k_-shortest paths from any source to any target and returns the subnetwork of the top _k_ paths as the pathway reconstruction. Later work expanded PathLinker by incorporating protein localization information to re-score tied paths, dubbed Localized PathLinker (LocPL). diff --git a/input/README.md b/input/README.md index 5c7a59fc..50be2a7e 100644 --- a/input/README.md +++ b/input/README.md @@ -11,8 +11,8 @@ All other columns specify additional node attributes such as prizes. Any nodes that are listed in a node file but are not present in one or more edges in the edge file will be removed. For example: ``` -NODEID prize sources targets active -A 1.0 True True +NODEID prize sources targets active dummy +A 1.0 True True True B 3.3 True True C 2.5 True True D 1.9 True True True diff --git a/input/node-prizes.txt b/input/node-prizes.txt index 0e1f682d..d03c3049 100644 --- a/input/node-prizes.txt +++ b/input/node-prizes.txt @@ -1,3 +1,3 @@ -NODEID prize active -A 2 true +NODEID prize active dummy +A 2 true true C 5.7 true diff --git a/input/tps-egfr-prizes.txt b/input/tps-egfr-prizes.txt index 71afb4b9..bb7a6aae 100644 --- a/input/tps-egfr-prizes.txt +++ b/input/tps-egfr-prizes.txt @@ -1,4 +1,4 @@ -NODEID prize sources targets active +NODEID prize sources targets active dummy 1433Z_HUMAN 1.041379133 True True 41_HUMAN 3.389112802 True True 4ET_HUMAN 2.569973509 True True @@ -181,7 +181,7 @@ EF1A1_HUMAN 3.774750081 True True EF1B_HUMAN 0.768939794 True True EF1D_HUMAN 1.240472409 True True EFNB2_HUMAN 2.222686177 True True -EGF_HUMAN 10 True True +EGF_HUMAN 10 True True True EGFR_HUMAN 6.787874699 True True EGLN1_HUMAN 1.876580206 True True EIF3B_HUMAN 2.048949271 True True diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py index 16469924..5438751b 100644 --- a/spras/omicsintegrator1.py +++ b/spras/omicsintegrator1.py @@ -35,19 +35,21 @@ def write_conf(filename=Path('config.txt'), w=None, b=None, d=None, mu=None, noi f.write('processes = 1\n') f.write('threads = 1\n') -""" -Omics Integrator 1 works with partially directed graphs -- it takes in the universal input directly -Expected raw input format: -Interactor1 Interactor2 Weight Direction -- the expected raw input file should have node pairs in the 1st and 2nd columns, with a weight in the 3rd column and directionality in the 4th column -- it can include repeated and bidirectional edges -- it uses 'U' for undirected edges and 'D' for directed edges - -""" class OmicsIntegrator1(PRM): - required_inputs = ['prizes', 'edges'] + """ + Omics Integrator 1 works with partially directed graphs + - it takes in the universal input directly + + Expected raw input format: + Interactor1 Interactor2 Weight Direction + - the expected raw input file should have node pairs in the 1st and 2nd columns, with a weight in the 3rd column and + directionality in the 4th column + - it can include repeated and bidirectional edges + - it uses 'U' for undirected edges and 'D' for directed edges + + """ + required_inputs = ['prizes', 'edges', 'dummy_nodes'] @staticmethod def generate_inputs(data, filename_map): @@ -83,13 +85,22 @@ def generate_inputs(data, filename_map): columns=['Interactor1','Interactor2','Weight','Direction'], header=['protein1','protein2','weight','directionality']) + # creates the dummy_nodes file + if 'dummy' in data.node_table.columns: + dummy_df = data.node_table[data.node_table['dummy'] == True] + # save as list of dummy nodes + dummy_df.to_csv(filename_map['dummy_nodes'], index=False, columns=['NODEID'], header=None) + else: + # create empty dummy file + with open(filename_map['dummy_nodes'], mode='w'): + pass # TODO add parameter validation # TODO add support for knockout argument # TODO add reasonable default values # TODO document required arguments @staticmethod - def run(edges=None, prizes=None, dummy_mode=None, mu_squared=None, exclude_terms=None, + def run(edges=None, prizes=None, dummy_nodes=None, dummy_mode=None, mu_squared=None, exclude_terms=None, output_file=None, noisy_edges=None, shuffled_prizes=None, random_terminals=None, seed=None, w=None, b=None, d=None, mu=None, noise=None, g=None, r=None, container_framework="docker"): """ @@ -118,6 +129,19 @@ def run(edges=None, prizes=None, dummy_mode=None, mu_squared=None, exclude_terms bind_path, prize_file = prepare_volume(prizes, work_dir) volumes.append(bind_path) + # 4 dummy mode possibilities: + # 1. terminals -> connect the dummy node to all nodes that have been assigned prizes + # 2. all -> connect the dummy node to all nodes in the interactome i.e. full set of nodes in graph + # 3. others -> connect the dummy node to all nodes that are not terminal nodes i.e. nodes w/o prizes + # 4. file -> connect the dummy node to a specific list of nodes provided in a file + + # add dummy node file to the volume if dummy_mode is not None and it is 'file' + if dummy_mode == 'file': + if dummy_nodes is None: + raise ValueError("dummy_nodes file is required when dummy_mode is set to 'file'") + bind_path, dummy_file = prepare_volume(dummy_nodes, work_dir) + volumes.append(bind_path) + out_dir = Path(output_file).parent # Omics Integrator 1 requires that the output directory exist out_dir.mkdir(parents=True, exist_ok=True) @@ -139,9 +163,16 @@ def run(edges=None, prizes=None, dummy_mode=None, mu_squared=None, exclude_terms '--outpath', mapped_out_dir, '--outlabel', 'oi1'] + # add the dummy mode argument + if dummy_mode is not None and dummy_mode: + # for custom dummy modes, add the file + if dummy_mode == 'file': + command.extend(['--dummy', dummy_file]) + # else pass in the dummy_mode and let oi1 handle it + else: + command.extend(['--dummy', dummy_mode]) + # Add optional arguments - if dummy_mode is not None: - command.extend(['--dummyMode', str(dummy_mode)]) if mu_squared is not None and mu_squared: command.extend(['--musquared']) if exclude_terms is not None and exclude_terms: diff --git a/test/OmicsIntegrator1/input/oi1-dummy.txt b/test/OmicsIntegrator1/input/oi1-dummy.txt new file mode 100644 index 00000000..223b7836 --- /dev/null +++ b/test/OmicsIntegrator1/input/oi1-dummy.txt @@ -0,0 +1 @@ +B diff --git a/test/OmicsIntegrator1/test_oi1.py b/test/OmicsIntegrator1/test_oi1.py index 6664f744..35b41d42 100644 --- a/test/OmicsIntegrator1/test_oi1.py +++ b/test/OmicsIntegrator1/test_oi1.py @@ -49,6 +49,7 @@ def test_oi1_all_optional(self): # Include all optional arguments OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt', prizes=TEST_DIR+'input/oi1-prizes.txt', + dummy_nodes=None, dummy_mode='terminals', mu_squared=True, exclude_terms=True, @@ -66,6 +67,23 @@ def test_oi1_all_optional(self): r=0) assert out_path.exists() + def test_oi1_dummy_file(self): + out_path = Path(OUT_FILE) + out_path.unlink(missing_ok=True) + # Include optional argument + OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt', + prizes=TEST_DIR+'input/oi1-prizes.txt', + dummy_nodes=TEST_DIR + 'input/oi1-dummy.txt', + dummy_mode='file', + output_file=OUT_FILE, + w=5, + b=1, + d=10, + noise=0.333, + g=0.001, + r=0) + assert out_path.exists() + def test_oi1_missing(self): # Test the expected error is raised when required arguments are missing with pytest.raises(ValueError): @@ -81,6 +99,18 @@ def test_oi1_missing(self): b=1, d=10) + def test_oi1_missing_dummy(self): + # Test the expected error is raised when the dummy_nodes file is missing and the dummy_mode is 'file' + with pytest.raises(ValueError): + # No edges + OmicsIntegrator1.run(edges=TEST_DIR+'input/oi1-edges.txt', + prizes=TEST_DIR + 'input/oi1-prizes.txt', + output_file=TEST_DIR+'output/test_optimalForest.sif', + w=5, + b=1, + d=10, + dummy_mode='file') + # Only run Singularity test if the binary is available on the system # spython is only available on Unix, but do not explicitly skip non-Unix platforms @pytest.mark.skipif(not shutil.which('singularity'), reason='Singularity not found on system')