-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtest.py
30 lines (21 loc) · 1.18 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from pyfaidx import Fasta
from pyvg.sequences import SequenceRetriever
from offsetbasedgraph.graphtraverser import GraphTraverserUsingSequence
from offsetbasedgraph import GraphWithReversals, IntervalCollection
import logging
def find_linear_path_through_chromosome(chromosome, chromend, fasta_file_name, ob_graph_file_name, vg_graph_file_name):
genome = Fasta(fasta_file_name)
seq = str(genome[chromosome][0:50818468]).lower()
logging.info("Creating sequence retriever")
sequence_retriever = SequenceRetriever.from_vg_json_graph(vg_graph_file_name)
graph = GraphWithReversals.from_numpy_file(ob_graph_file_name)
start_nodes = graph.get_first_blocks()
assert len(start_nodes) == 1, "Found %d start nodes" % start_nodes
start_node = start_nodes[0]
traverser = GraphTraverserUsingSequence(graph, seq, sequence_retriever)
traverser.search_from_node(start_node)
path = traverser.get_interval_found()
path = IntervalCollection(path)
path.to_file("22_path.intervalcollection", text_file=True)
logging.info("Done")
find_linear_path_through_chromosome("chr22", 50818468, "../hg19.fasta", "tests/whole_genome/22.nobg", "tests/whole_genome/22.json")