diff --git a/pvacseq/lib/pipeline.py b/pvacseq/lib/pipeline.py index d02aef7..4c562fe 100644 --- a/pvacseq/lib/pipeline.py +++ b/pvacseq/lib/pipeline.py @@ -59,6 +59,9 @@ def tsv_file_path(self): tsv_file = self.sample_name + '.tsv' return os.path.join(self.output_dir, tsv_file) + def split_tsv_file_path(self, split_start, split_end): + return "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end) + def convert_vcf(self): status_message("Converting VCF to TSV") if os.path.exists(self.tsv_file_path()): @@ -108,7 +111,7 @@ def split_tsv_file(self, total_row_count): if split_end > total_row_count: split_end = total_row_count status_message("Splitting TSV into smaller chunks - Entries %d-%d" % (split_start, split_end)) - split_tsv_file_path = "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end) + split_tsv_file_path = self.split_tsv_file_path(split_start, split_end) chunks.append([split_start, split_end]) if os.path.exists(split_tsv_file_path): status_message("Split TSV file for Entries %d-%d already exists. Skipping." % (split_start, split_end)) @@ -131,7 +134,7 @@ def split_tsv_file(self, total_row_count): if split_end > total_row_count: split_end = total_row_count status_message("Splitting TSV into smaller chunks - Entries %d-%d" % (split_start, split_end)) - split_tsv_file_path = "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end) + split_tsv_file_path = self.split_tsv_file_path(split_start, split_end) chunks.append([split_start, split_end]) if os.path.exists(split_tsv_file_path): status_message("Split TSV file for Entries %d-%d already exists. Skipping." % (split_start, split_end)) @@ -307,9 +310,8 @@ def __init__(self, **kwargs): def generate_fasta(self, chunks): status_message("Generating Variant Peptide FASTA and Key Files") for (split_start, split_end) in chunks: - tsv_chunk = "%d-%d" % (split_start, split_end) fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2) - split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk) + split_tsv_file_path = self.split_tsv_file_path(split_start, split_end) split_fasta_file_path = "%s_%s" % (self.split_fasta_basename(), fasta_chunk) if os.path.exists(split_fasta_file_path): status_message("Split FASTA file for Entries %s already exists. Skipping." % (fasta_chunk)) @@ -331,7 +333,6 @@ def generate_fasta(self, chunks): def call_iedb_and_parse_outputs(self, chunks): split_parsed_output_files = [] for (split_start, split_end) in chunks: - tsv_chunk = "%d-%d" % (split_start, split_end) fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2) for a in self.alleles: for epl in self.epitope_lengths: @@ -378,7 +379,7 @@ def call_iedb_and_parse_outputs(self, chunks): if len(split_iedb_output_files) > 0: status_message("Parsing IEDB Output for Allele %s and Epitope Length %s - Entries %s" % (a, epl, fasta_chunk)) - split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk) + split_tsv_file_path = self.split_tsv_file_path(split_start, split_end) params = { 'input_iedb_files' : split_iedb_output_files, 'input_tsv_file' : split_tsv_file_path, @@ -401,9 +402,8 @@ def __init__(self, **kwargs): def generate_fasta(self, chunks): status_message("Generating Variant Peptide FASTA and Key Files") for (split_start, split_end) in chunks: - tsv_chunk = "%d-%d" % (split_start, split_end) fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2) - split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk) + split_tsv_file_path = self.split_tsv_file_path(split_start, split_end) split_fasta_file_path = "%s_%s" % (self.split_fasta_basename(), fasta_chunk) if os.path.exists(split_fasta_file_path): status_message("Split FASTA file for Entries %s already exists. Skipping." % (fasta_chunk)) @@ -425,7 +425,6 @@ def generate_fasta(self, chunks): def call_iedb_and_parse_outputs(self, chunks): split_parsed_output_files = [] for (split_start, split_end) in chunks: - tsv_chunk = "%d-%d" % (split_start, split_end) fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2) for a in self.alleles: split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(), fasta_chunk) @@ -466,7 +465,7 @@ def call_iedb_and_parse_outputs(self, chunks): if len(split_iedb_output_files) > 0: status_message("Parsing IEDB Output for Allele %s - Entries %s" % (a, fasta_chunk)) - split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk) + split_tsv_file_path = self.split_tsv_file_path(split_start, split_end) params = { 'input_iedb_files' : split_iedb_output_files, 'input_tsv_file' : split_tsv_file_path,