pr changes

Reed-CompBio · Oct 18, 2024 · 541812b · 541812b
1 parent 0d34b21
commit 541812b
Show file tree

Hide file tree

Showing 7 changed files with 56 additions and 161 deletions.
diff --git a/config/config.yaml b/config/config.yaml
@@ -42,9 +42,6 @@ container_registry:
 # then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be
 # careful: too many parameters might make your runs take a long time.
 
-# TODO: where does the dummy node argument go?
-# i.e. where do i set dummy_mode = all, etc.
-
 algorithms:
       - name: "pathlinker"
         params:
@@ -73,7 +70,7 @@ algorithms:
 
       - name: "meo"
         params:
-              include: false
+              include:  false
               run1:
                   max_path_length: [3]
                   local_search: ["Yes"]
@@ -104,24 +101,38 @@ algorithms:
 datasets:
     -
       # Labels can only contain letters, numbers, or underscores
-      label: hivtest2
-      node_files: ["modified_prize_05.txt"]
+      label: data0
+      # To run OmicsIntegrator1 with dummy nodes, add the dummy.txt file to node_files
+      node_files: ["node-prizes.txt", "sources.txt", "targets.txt"]
       # DataLoader.py can currently only load a single edge file, which is the primary network
-      edge_files: ["phosphosite-irefindex13.0-uniprot.txt"]
+      edge_files: ["network.txt"]
       # Placeholder
       other_files: []
       # Relative path from the spras directory
       data_dir: "input"
+    -
+      label: data1
+      # Reuse some of the same sources file as 'data0' but different network and targets
+      node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"]
+      edge_files: ["alternative-network.txt"]
+      other_files: []
+      # Relative path from the spras directory
+      data_dir: "input"
 
-# gold_standards:
-#     -
-#       # Labels can only contain letters, numbers, or underscores
-#       label: gs0
-#       node_files: ["gs_nodes0.txt"]
-#       # edge_files: [] TODO: later iteration
-#       data_dir: "input"
-#       # List of dataset labels to compare with the specific gold standard dataset
-#       dataset_labels: ["data0"]
+gold_standards:
+    -
+      # Labels can only contain letters, numbers, or underscores
+      label: gs0
+      node_files: ["gs_nodes0.txt"]
+      # edge_files: [] TODO: later iteration
+      data_dir: "input"
+      # List of dataset labels to compare with the specific gold standard dataset
+      dataset_labels: ["data0"]
+    -
+      label: gs1
+      node_files: ["gs_nodes1.txt"]
+      data_dir: "input"
+      dataset_labels: ["data1", "data0"]
 
 # If we want to reconstruct then we should set run to true.
 # TODO: if include is true above but run is false here, algs are not run.
@@ -164,4 +175,4 @@ analysis:
         # 'euclidean', 'manhattan', 'cosine'
         metric: 'euclidean'
       evaluation:
-        include: false
+        include: true
diff --git a/input/README.md b/input/README.md
@@ -18,6 +18,31 @@ C	2.5		True	True
 D	1.9	True	True	True
 ```
 
+##### OmicsIntegrator1: Dummy Nodes
+There are 4 dummy mode possibilities:
+ 1. terminals -> connect the dummy node to all nodes that have been assigned prizes 
+ 2. all ->  connect the dummy node to all nodes in the interactome i.e. full set of nodes in graph
+ 3. others -> connect the dummy node to all nodes that are not terminal nodes i.e. nodes w/o prizes
+ 4. file -> custom nodes - connect the dummy node to a specific list of nodes provided in a file
+To support the `file` dummy node logic as part of OmicsIntegrator1, you can either add a seperate `dummy.txt` file (and add this to the `node_files` argument in `config.yaml `) or add a `dummy` column node attribute to a file that contains `NODEID`, `prize`, `source`, etc. 
+
+If adding a seperate `dummy.txt` file:
+Make a file with the name `dummy.txt` and list the dummy nodes, each seperated by a new line. Example:
+```
+A
+B
+C
+```
+
+If adding the `dummy` column node attribute, then add the dummy column and specify boolean values for the `dummy` attribute:
+```
+NODEID	prize	sources	targets	dummy
+A	1.0		True	True    True
+B	3.3	True	True	
+C	2.5		True	True
+D	1.9	True	True	True
+```
+
 A secondary format provides only a list of node identifiers and uses the filename as the node attribute, as in the example `sources.txt`.
 This format may be deprecated.
 

diff --git a/input/dummy-1.txt b/input/dummy-1.txt
diff --git a/input/dummy.txt b/input/dummy.txt
@@ -1,86 +1 @@
-NODEID	prize	sources	dummy	active
-1433Z_HUMAN	1.041379133		True	True
-41_HUMAN	3.389112802		True	True
-4ET_HUMAN	2.569973509		True	True
-A8K1N6_HUMAN	1.948221966		True	True
-A9CQZ4_HUMAN	0.421460919		True	True
-AAGAB_HUMAN	0.906857382		True	True
-ABCF1_HUMAN	1.662535462		True	True
-ABI1_HUMAN	2.262002188		True	True
-ABI2_HUMAN	6.039545959		True	True
-ABLM1_HUMAN	1.851877252		True	True
-ACACA_HUMAN	1.413801552		True	True
-ACAP2_HUMAN	2.26361378		True	True
-ACINU_HUMAN	5.059742801		True	True
-ACK1_HUMAN	4.634804389		True	True
-ACLY_HUMAN	0.924296287		True	True
-ACTB_HUMAN	6.332977709		True	True
-ADAT1_HUMAN	0.15086641		True	True
-ADCY6_HUMAN	0.213467876		True	True
-ADDA_HUMAN	2.023396633		True	True
-ADNP_HUMAN	1.863304115		True	True
-AFAD_HUMAN	5.746711895		True	True
-AFTIN_HUMAN	1.428578311		True	True
-AHNK_HUMAN	1.03846887		True	True
-AKA10_HUMAN	1.256166574		True	True
-AKA11_HUMAN	0.927725859		True	True
-AKA12_HUMAN	0.839912266		True	True
-AKAP1_HUMAN	1.744860335		True	True
-AKAP2_HUMAN	1.596611866		True	True
-AMOT_HUMAN	1.79256998		True	True
-ANS1A_HUMAN	2.76115098		True	True
-ANXA2_HUMAN	1.709856841		True	True
-AP3D1_HUMAN	4.077699923		True	True
-APC1_HUMAN	0.888837295		True	True
-AR6P4_HUMAN	0.701112743		True	True
-AR6P6_HUMAN	2.695059469		True	True
-ARHG5_HUMAN	7.044363255		True	True
-ARHG7_HUMAN	3.809839832		True	True
-ARHGB_HUMAN	2.260010614		True	True
-ARIP4_HUMAN	0.270475986		True	True
-ARMX3_HUMAN	0.11573305		True	True
-ARP8_HUMAN	1.094787599		True	True
-ASPM_HUMAN	0.369667496		True	True
-AT133_HUMAN	1.627668371		True	True
-AT1A1_HUMAN	2.904315518		True	True
-AT2B1_HUMAN	2.165602139		True	True
-ATRX_HUMAN	0.701149125		True	True
-ATX2L_HUMAN	4.425369048		True	True
-AZI1_HUMAN	1.861521522		True	True
-B2L13_HUMAN	1.614443902		True	True
-B4DGC6_HUMAN	0.752406932		True	True
-B4DM10_HUMAN	0.474391755		True	True
-B4DQA8_HUMAN	0.12336285		True	True
-B4DQQ2_HUMAN	0.509838368		True	True
-B4DSL6_HUMAN	1.401622791		True	True
-B4DZC2_HUMAN	0.736249376		True	True
-BACH_HUMAN	0.409715682		True	True
-BACH2_HUMAN	0.942291628		True	True
-BAD_HUMAN	0.390261342		True	True
-BAG6_HUMAN	0.406028443		True	True
-BAP18_HUMAN	2.420530962		True	True
-BARD1_HUMAN	0.113513875		True	True
-BAZ1B_HUMAN	0.714778368		True	True
-BAZ2A_HUMAN	1.358383434		True	True
-BBX_HUMAN	1.196178614		True	True
-BCAR1_HUMAN	5.368797237		True	True
-BCLF1_HUMAN	3.268307286		True	True
-BCS1_HUMAN	1.435079955		True	True
-BIG3_HUMAN	0.978095454		True	True
-BMS1_HUMAN	1.398231144		True	True
-BORG1_HUMAN	0.8309547		True	True
-BORG4_HUMAN	0.908661259		True	True
-BRAP_HUMAN	0.507219767		True	True
-BRD2_HUMAN	1.88956051		True	True
-BRD3_HUMAN	0.351886484		True	True
-BUD13_HUMAN	1.407446196		True	True
-BZW2_HUMAN	0.610443432		True	True
-C170B_HUMAN	0.887656818		True	True
-C2CD5_HUMAN	0.319586924		True	True
-CA052_HUMAN	1.950626165		True	True
-CA172_HUMAN	0.676879108		True	True
-CAAP1_HUMAN	0.46595922		True	True
-CAF1B_HUMAN	0.356408629		True	True
-CALM_HUMAN	0.977490284		True	True
-CALX_HUMAN	1.062601393		True	True
-CAV1_HUMAN	0.258171175		True	True
+A
diff --git a/input/node-prizes.txt b/input/node-prizes.txt
@@ -1,3 +1,3 @@
-NODEID	prize	active
+NODEID	prize	active	dummy
 A	2	true
 C	5.7	true
diff --git a/input/tps-egfr-prizes.txt b/input/tps-egfr-prizes.txt
@@ -1,4 +1,4 @@
-NODEID	prize	sources	targets	active
+NODEID	prize	sources	targets	active	dummy
 1433Z_HUMAN	1.041379133		True	True
 41_HUMAN	3.389112802		True	True
 4ET_HUMAN	2.569973509		True	True

diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
@@ -87,7 +87,6 @@ def generate_inputs(data, filename_map):
         if 'dummy' in data.node_table.columns:
             dummy_df = data.node_table[data.node_table['dummy'] == True] 
             # save as list of dummy nodes
-            # dummy_df.to_csv(filename_map['dummy_nodes'], sep='\t', index=False, columns=['NODEID', 'dummy'], header=['NODEID', 'dummy'])
             dummy_df.to_csv(filename_map['dummy_nodes'], index=False, columns=['NODEID'], header=None)
         else:
             # create empty dummy file