Skip to content

Commit

Permalink
Extract a function to generate the split TSV file path.
Browse files Browse the repository at this point in the history
  • Loading branch information
tmooney committed Dec 1, 2016
1 parent 935788e commit fd3380b
Showing 1 changed file with 9 additions and 10 deletions.
19 changes: 9 additions & 10 deletions pvacseq/lib/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def tsv_file_path(self):
tsv_file = self.sample_name + '.tsv'
return os.path.join(self.output_dir, tsv_file)

def split_tsv_file_path(self, split_start, split_end):
return "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end)

def convert_vcf(self):
status_message("Converting VCF to TSV")
if os.path.exists(self.tsv_file_path()):
Expand Down Expand Up @@ -108,7 +111,7 @@ def split_tsv_file(self, total_row_count):
if split_end > total_row_count:
split_end = total_row_count
status_message("Splitting TSV into smaller chunks - Entries %d-%d" % (split_start, split_end))
split_tsv_file_path = "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end)
split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
chunks.append([split_start, split_end])
if os.path.exists(split_tsv_file_path):
status_message("Split TSV file for Entries %d-%d already exists. Skipping." % (split_start, split_end))
Expand All @@ -131,7 +134,7 @@ def split_tsv_file(self, total_row_count):
if split_end > total_row_count:
split_end = total_row_count
status_message("Splitting TSV into smaller chunks - Entries %d-%d" % (split_start, split_end))
split_tsv_file_path = "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end)
split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
chunks.append([split_start, split_end])
if os.path.exists(split_tsv_file_path):
status_message("Split TSV file for Entries %d-%d already exists. Skipping." % (split_start, split_end))
Expand Down Expand Up @@ -307,9 +310,8 @@ def __init__(self, **kwargs):
def generate_fasta(self, chunks):
status_message("Generating Variant Peptide FASTA and Key Files")
for (split_start, split_end) in chunks:
tsv_chunk = "%d-%d" % (split_start, split_end)
fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
split_fasta_file_path = "%s_%s" % (self.split_fasta_basename(), fasta_chunk)
if os.path.exists(split_fasta_file_path):
status_message("Split FASTA file for Entries %s already exists. Skipping." % (fasta_chunk))
Expand All @@ -331,7 +333,6 @@ def generate_fasta(self, chunks):
def call_iedb_and_parse_outputs(self, chunks):
split_parsed_output_files = []
for (split_start, split_end) in chunks:
tsv_chunk = "%d-%d" % (split_start, split_end)
fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
for a in self.alleles:
for epl in self.epitope_lengths:
Expand Down Expand Up @@ -378,7 +379,7 @@ def call_iedb_and_parse_outputs(self, chunks):

if len(split_iedb_output_files) > 0:
status_message("Parsing IEDB Output for Allele %s and Epitope Length %s - Entries %s" % (a, epl, fasta_chunk))
split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
params = {
'input_iedb_files' : split_iedb_output_files,
'input_tsv_file' : split_tsv_file_path,
Expand All @@ -401,9 +402,8 @@ def __init__(self, **kwargs):
def generate_fasta(self, chunks):
status_message("Generating Variant Peptide FASTA and Key Files")
for (split_start, split_end) in chunks:
tsv_chunk = "%d-%d" % (split_start, split_end)
fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
split_fasta_file_path = "%s_%s" % (self.split_fasta_basename(), fasta_chunk)
if os.path.exists(split_fasta_file_path):
status_message("Split FASTA file for Entries %s already exists. Skipping." % (fasta_chunk))
Expand All @@ -425,7 +425,6 @@ def generate_fasta(self, chunks):
def call_iedb_and_parse_outputs(self, chunks):
split_parsed_output_files = []
for (split_start, split_end) in chunks:
tsv_chunk = "%d-%d" % (split_start, split_end)
fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
for a in self.alleles:
split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(), fasta_chunk)
Expand Down Expand Up @@ -466,7 +465,7 @@ def call_iedb_and_parse_outputs(self, chunks):

if len(split_iedb_output_files) > 0:
status_message("Parsing IEDB Output for Allele %s - Entries %s" % (a, fasta_chunk))
split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
params = {
'input_iedb_files' : split_iedb_output_files,
'input_tsv_file' : split_tsv_file_path,
Expand Down

0 comments on commit fd3380b

Please sign in to comment.