diff --git a/config/config.yaml b/config/config.yaml index 027ff615..a2428077 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -42,9 +42,6 @@ container_registry: # then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be # careful: too many parameters might make your runs take a long time. -# TODO: where does the dummy node argument go? -# i.e. where do i set dummy_mode = all, etc. - algorithms: - name: "pathlinker" params: @@ -73,7 +70,7 @@ algorithms: - name: "meo" params: - include: false + include: false run1: max_path_length: [3] local_search: ["Yes"] @@ -104,24 +101,38 @@ algorithms: datasets: - # Labels can only contain letters, numbers, or underscores - label: hivtest2 - node_files: ["modified_prize_05.txt"] + label: data0 + # To run OmicsIntegrator1 with dummy nodes, add the dummy.txt file to node_files + node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] # DataLoader.py can currently only load a single edge file, which is the primary network - edge_files: ["phosphosite-irefindex13.0-uniprot.txt"] + edge_files: ["network.txt"] # Placeholder other_files: [] # Relative path from the spras directory data_dir: "input" + - + label: data1 + # Reuse some of the same sources file as 'data0' but different network and targets + node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"] + edge_files: ["alternative-network.txt"] + other_files: [] + # Relative path from the spras directory + data_dir: "input" -# gold_standards: -# - -# # Labels can only contain letters, numbers, or underscores -# label: gs0 -# node_files: ["gs_nodes0.txt"] -# # edge_files: [] TODO: later iteration -# data_dir: "input" -# # List of dataset labels to compare with the specific gold standard dataset -# dataset_labels: ["data0"] +gold_standards: + - + # Labels can only contain letters, numbers, or underscores + label: gs0 + node_files: ["gs_nodes0.txt"] + # edge_files: [] TODO: later iteration + data_dir: "input" + # List of dataset labels to compare with the specific gold standard dataset + dataset_labels: ["data0"] + - + label: gs1 + node_files: ["gs_nodes1.txt"] + data_dir: "input" + dataset_labels: ["data1", "data0"] # If we want to reconstruct then we should set run to true. # TODO: if include is true above but run is false here, algs are not run. @@ -164,4 +175,4 @@ analysis: # 'euclidean', 'manhattan', 'cosine' metric: 'euclidean' evaluation: - include: false + include: true \ No newline at end of file diff --git a/input/README.md b/input/README.md index 5c7a59fc..8ebfede4 100644 --- a/input/README.md +++ b/input/README.md @@ -18,6 +18,31 @@ C 2.5 True True D 1.9 True True True ``` +##### OmicsIntegrator1: Dummy Nodes +There are 4 dummy mode possibilities: + 1. terminals -> connect the dummy node to all nodes that have been assigned prizes + 2. all -> connect the dummy node to all nodes in the interactome i.e. full set of nodes in graph + 3. others -> connect the dummy node to all nodes that are not terminal nodes i.e. nodes w/o prizes + 4. file -> custom nodes - connect the dummy node to a specific list of nodes provided in a file +To support the `file` dummy node logic as part of OmicsIntegrator1, you can either add a seperate `dummy.txt` file (and add this to the `node_files` argument in `config.yaml `) or add a `dummy` column node attribute to a file that contains `NODEID`, `prize`, `source`, etc. + +If adding a seperate `dummy.txt` file: +Make a file with the name `dummy.txt` and list the dummy nodes, each seperated by a new line. Example: +``` +A +B +C +``` + +If adding the `dummy` column node attribute, then add the dummy column and specify boolean values for the `dummy` attribute: +``` +NODEID prize sources targets dummy +A 1.0 True True True +B 3.3 True True +C 2.5 True True +D 1.9 True True True +``` + A secondary format provides only a list of node identifiers and uses the filename as the node attribute, as in the example `sources.txt`. This format may be deprecated. diff --git a/input/dummy-1.txt b/input/dummy-1.txt deleted file mode 100644 index 535f50a7..00000000 --- a/input/dummy-1.txt +++ /dev/null @@ -1,55 +0,0 @@ -UBAC2_HUMAN -H37_HUMAN -PHF3_HUMAN -DDX52_HUMAN -HNRPK_HUMAN -Q5T5H1_HUMAN -ZC11A_HUMAN -TCPG_HUMAN -2A5E_HUMAN -CPNE2_HUMAN -HEYL_HUMAN -CAMP1_HUMAN -SVEP1_HUMAN -SPRL1_HUMAN -C9JFD3_HUMAN -THAP5_HUMAN -NCOR1_HUMAN -SKIL_HUMAN -A8YXX4_HUMAN -CO4A2_HUMAN -ZBTB3_HUMAN -MORC4_HUMAN -HIPK3_HUMAN -ZN200_HUMAN -TGFR1_HUMAN -Q2TU89_HUMAN -SUMO2_HUMAN -UBC9_HUMAN -AXIN1_HUMAN -RS27_HUMAN -UBP25_HUMAN -RN111_HUMAN -CBX4_HUMAN -B4E127_HUMAN -G4XH65_HUMAN -CPNE1_HUMAN -HDAC1_HUMAN -RNF4_HUMAN -SETB1_HUMAN -SMAD2_HUMAN -TRI62_HUMAN -FAF1_HUMAN -EF1G_HUMAN -AHNK_HUMAN -LRP1_HUMAN -UIMC1_HUMAN -B3KWV4_HUMAN -CDC27_HUMAN -RB_HUMAN -ZMYM2_HUMAN -WWP1_HUMAN -RU17_HUMAN -HIPK1_HUMAN -ZZEF1_HUMAN -SMAD4_HUMAN \ No newline at end of file diff --git a/input/dummy.txt b/input/dummy.txt index 92fb21f7..8c7e5a66 100644 --- a/input/dummy.txt +++ b/input/dummy.txt @@ -1,86 +1 @@ -NODEID prize sources dummy active -1433Z_HUMAN 1.041379133 True True -41_HUMAN 3.389112802 True True -4ET_HUMAN 2.569973509 True True -A8K1N6_HUMAN 1.948221966 True True -A9CQZ4_HUMAN 0.421460919 True True -AAGAB_HUMAN 0.906857382 True True -ABCF1_HUMAN 1.662535462 True True -ABI1_HUMAN 2.262002188 True True -ABI2_HUMAN 6.039545959 True True -ABLM1_HUMAN 1.851877252 True True -ACACA_HUMAN 1.413801552 True True -ACAP2_HUMAN 2.26361378 True True -ACINU_HUMAN 5.059742801 True True -ACK1_HUMAN 4.634804389 True True -ACLY_HUMAN 0.924296287 True True -ACTB_HUMAN 6.332977709 True True -ADAT1_HUMAN 0.15086641 True True -ADCY6_HUMAN 0.213467876 True True -ADDA_HUMAN 2.023396633 True True -ADNP_HUMAN 1.863304115 True True -AFAD_HUMAN 5.746711895 True True -AFTIN_HUMAN 1.428578311 True True -AHNK_HUMAN 1.03846887 True True -AKA10_HUMAN 1.256166574 True True -AKA11_HUMAN 0.927725859 True True -AKA12_HUMAN 0.839912266 True True -AKAP1_HUMAN 1.744860335 True True -AKAP2_HUMAN 1.596611866 True True -AMOT_HUMAN 1.79256998 True True -ANS1A_HUMAN 2.76115098 True True -ANXA2_HUMAN 1.709856841 True True -AP3D1_HUMAN 4.077699923 True True -APC1_HUMAN 0.888837295 True True -AR6P4_HUMAN 0.701112743 True True -AR6P6_HUMAN 2.695059469 True True -ARHG5_HUMAN 7.044363255 True True -ARHG7_HUMAN 3.809839832 True True -ARHGB_HUMAN 2.260010614 True True -ARIP4_HUMAN 0.270475986 True True -ARMX3_HUMAN 0.11573305 True True -ARP8_HUMAN 1.094787599 True True -ASPM_HUMAN 0.369667496 True True -AT133_HUMAN 1.627668371 True True -AT1A1_HUMAN 2.904315518 True True -AT2B1_HUMAN 2.165602139 True True -ATRX_HUMAN 0.701149125 True True -ATX2L_HUMAN 4.425369048 True True -AZI1_HUMAN 1.861521522 True True -B2L13_HUMAN 1.614443902 True True -B4DGC6_HUMAN 0.752406932 True True -B4DM10_HUMAN 0.474391755 True True -B4DQA8_HUMAN 0.12336285 True True -B4DQQ2_HUMAN 0.509838368 True True -B4DSL6_HUMAN 1.401622791 True True -B4DZC2_HUMAN 0.736249376 True True -BACH_HUMAN 0.409715682 True True -BACH2_HUMAN 0.942291628 True True -BAD_HUMAN 0.390261342 True True -BAG6_HUMAN 0.406028443 True True -BAP18_HUMAN 2.420530962 True True -BARD1_HUMAN 0.113513875 True True -BAZ1B_HUMAN 0.714778368 True True -BAZ2A_HUMAN 1.358383434 True True -BBX_HUMAN 1.196178614 True True -BCAR1_HUMAN 5.368797237 True True -BCLF1_HUMAN 3.268307286 True True -BCS1_HUMAN 1.435079955 True True -BIG3_HUMAN 0.978095454 True True -BMS1_HUMAN 1.398231144 True True -BORG1_HUMAN 0.8309547 True True -BORG4_HUMAN 0.908661259 True True -BRAP_HUMAN 0.507219767 True True -BRD2_HUMAN 1.88956051 True True -BRD3_HUMAN 0.351886484 True True -BUD13_HUMAN 1.407446196 True True -BZW2_HUMAN 0.610443432 True True -C170B_HUMAN 0.887656818 True True -C2CD5_HUMAN 0.319586924 True True -CA052_HUMAN 1.950626165 True True -CA172_HUMAN 0.676879108 True True -CAAP1_HUMAN 0.46595922 True True -CAF1B_HUMAN 0.356408629 True True -CALM_HUMAN 0.977490284 True True -CALX_HUMAN 1.062601393 True True -CAV1_HUMAN 0.258171175 True True \ No newline at end of file +A \ No newline at end of file diff --git a/input/node-prizes.txt b/input/node-prizes.txt index 0e1f682d..2ce28bb7 100644 --- a/input/node-prizes.txt +++ b/input/node-prizes.txt @@ -1,3 +1,3 @@ -NODEID prize active +NODEID prize active dummy A 2 true C 5.7 true diff --git a/input/tps-egfr-prizes.txt b/input/tps-egfr-prizes.txt index 71afb4b9..9f25672e 100644 --- a/input/tps-egfr-prizes.txt +++ b/input/tps-egfr-prizes.txt @@ -1,4 +1,4 @@ -NODEID prize sources targets active +NODEID prize sources targets active dummy 1433Z_HUMAN 1.041379133 True True 41_HUMAN 3.389112802 True True 4ET_HUMAN 2.569973509 True True diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py index d50b0a11..035b747c 100644 --- a/spras/omicsintegrator1.py +++ b/spras/omicsintegrator1.py @@ -87,7 +87,6 @@ def generate_inputs(data, filename_map): if 'dummy' in data.node_table.columns: dummy_df = data.node_table[data.node_table['dummy'] == True] # save as list of dummy nodes - # dummy_df.to_csv(filename_map['dummy_nodes'], sep='\t', index=False, columns=['NODEID', 'dummy'], header=['NODEID', 'dummy']) dummy_df.to_csv(filename_map['dummy_nodes'], index=False, columns=['NODEID'], header=None) else: # create empty dummy file