Skip to content

Commit

Permalink
testing new species addition to methods
Browse files Browse the repository at this point in the history
  • Loading branch information
ctrlaltaf committed Jun 11, 2024
1 parent 04f9ba6 commit 7d739df
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 30 deletions.
2 changes: 1 addition & 1 deletion classes/hypergeometric_distribution_class_V3.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def predict(
pos_n = len(positive_pro_pro_neighbor) #Number of protein neighbors the protein of interest has
K = len(positive_go_neighbor) - 1 #Number of protein neighbors the GO term of interest has, same for pos & neg, does not include the protein of interest
pos_k = positive_go_annotated_pro_pro_neighbor_count #The overlap between the GO term and the protein of interst's neighbor proteins

print("pos_N: ", pos_N, "pos_n: ", pos_n, "K: ", K, "pos_k: ", pos_k)
#The hypergeometric function using variables above, math.comb(n,k) is an n choose k function
positive_score = 1 - ((math.comb(K,pos_k)*math.comb(pos_N-K,pos_n-pos_k))/math.comb(pos_N,pos_n))

Expand Down
49 changes: 27 additions & 22 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,47 +38,52 @@ def main():
if not os.path.exists("output/images"):
os.makedirs("output/images")

interactome_path = Path("./network/interactome-flybase-collapsed-weighted.txt")
go_association_path = Path("./network/fly_proGo.csv")
fly_interactome_path = Path("./network/fly_propro.csv")
fly_go_association_path = Path("./network/fly_proGo.csv")
zfish_interactome_path = Path("./network/zfish_propro.csv")
zfish_go_association_path = Path("./network/zfish_proGo.csv")
bsub_interactome_path = Path("./network/bsub_propro.csv")
bsub_go_association_path = Path("./network/bsub_proGo.csv")

output_data_path = Path("./output/data/")
output_image_path = Path("./output/images/")
dataset_directory_path = Path("./output/dataset")
graph_file_path = Path(dataset_directory_path, "graph.pickle")
sample_size = 1000
sample_size = 10000

testing_output_data_path = Path("./output/data/")
testing_output_image_path = Path("./output/images/")
testing_input_directory_path = Path("./tests/testing-dataset/")
testing_graph_file_path = Path(testing_input_directory_path, "graph.pickle")

interactome_columns = [0, 1, 4, 5]
interactome = read_specific_columns(interactome_path, interactome_columns, "\t")
interactome_columns = [0, 1]
interactome = read_specific_columns(zfish_interactome_path, interactome_columns, ",")

go_inferred_columns = [0, 2]
go_protein_pairs = read_specific_columns(
go_association_path, go_inferred_columns, ","
zfish_go_association_path, go_inferred_columns, ","
)

protein_list = []

# if there is no graph.pickle file in the output/dataset directory, uncomment the following lines
# G, protein_list = create_ppi_network(interactome, go_protein_pairs)
# export_graph_to_pickle(G, graph_file_path)
G, protein_list = create_ppi_network(interactome, go_protein_pairs)
export_graph_to_pickle(G, graph_file_path)

# if there is no sample dataset, uncomment the following lines. otherwise, the dataset in outputs will be used
# positive_dataset, negative_dataset = sample_data(
# go_protein_pairs, sample_size, protein_list, G, dataset_directory_path
# )
positive_dataset, negative_dataset = sample_data(
go_protein_pairs, sample_size, protein_list, G, dataset_directory_path
)

# Define algorithm classes and their names
algorithm_classes = {
"OverlappingNeighbors": OverlappingNeighbors,
"OverlappingNeighborsV2": OverlappingNeighborsV2,
"OverlappingNeighborsV3": OverlappingNeighborsV3,
"ProteinDegree": ProteinDegree,
"ProteinDegreeV2": ProteinDegreeV2,
"ProteinDegreeV3": ProteinDegreeV3,
"SampleAlgorithm": SampleAlgorithm,
# "OverlappingNeighbors": OverlappingNeighbors,
# "OverlappingNeighborsV2": OverlappingNeighborsV2,
# "OverlappingNeighborsV3": OverlappingNeighborsV3,
# "ProteinDegree": ProteinDegree,
# "ProteinDegreeV2": ProteinDegreeV2,
# "ProteinDegreeV3": ProteinDegreeV3,
# "SampleAlgorithm": SampleAlgorithm,
"HypergeometricDistribution": HypergeometricDistribution,
"HypergeometricDistributionV2": HypergeometricDistributionV2,
"HypergeometricDistributionV3": HypergeometricDistributionV3,
Expand All @@ -87,10 +92,10 @@ def main():

results = run_workflow(
algorithm_classes,
testing_input_directory_path,
testing_graph_file_path,
testing_output_data_path,
testing_output_image_path,
dataset_directory_path,
graph_file_path,
output_data_path,
output_image_path,
True,
True,
)
Expand Down
14 changes: 7 additions & 7 deletions tools/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,17 @@ def create_ppi_network(fly_interactome, fly_GO_term):

# go through fly interactome, add a new node if it doesnt exists already, then add their physical interactions as edges
for line in fly_interactome:
if not G.has_node(line[2]):
G.add_node(line[2], name=line[0], type="protein")
protein_list.append({"id": line[2], "name": line[0]})
if not G.has_node(line[0]):
G.add_node(line[0], name=line[0], type="protein")
protein_list.append({"id": line[0], "name": line[0]})
protein_node += 1

if not G.has_node(line[3]):
G.add_node(line[3], name=line[1], type="protein")
protein_list.append({"id": line[3], "name": line[1]})
if not G.has_node(line[1]):
G.add_node(line[1], name=line[1], type="protein")
protein_list.append({"id": line[1], "name": line[1]})
protein_node += 1

G.add_edge(line[2], line[3], type="protein_protein")
G.add_edge(line[0], line[1], type="protein_protein")
protein_protein_edge += 1
print_progress(i, total_progress)
i += 1
Expand Down

0 comments on commit 7d739df

Please sign in to comment.