Extract a function to generate the split TSV file path.

griffithlab · Dec 1, 2016 · fd3380b · fd3380b
1 parent 935788e
commit fd3380b
Showing 1 changed file with 9 additions and 10 deletions.
diff --git a/pvacseq/lib/pipeline.py b/pvacseq/lib/pipeline.py
@@ -59,6 +59,9 @@ def tsv_file_path(self):
         tsv_file = self.sample_name + '.tsv'
         return os.path.join(self.output_dir, tsv_file)
 
+    def split_tsv_file_path(self, split_start, split_end):
+        return "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end)
+
     def convert_vcf(self):
         status_message("Converting VCF to TSV")
         if os.path.exists(self.tsv_file_path()):
@@ -108,7 +111,7 @@ def split_tsv_file(self, total_row_count):
             if split_end > total_row_count:
                 split_end = total_row_count
             status_message("Splitting TSV into smaller chunks - Entries %d-%d" % (split_start, split_end))
-            split_tsv_file_path = "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end)
+            split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
             chunks.append([split_start, split_end])
             if os.path.exists(split_tsv_file_path):
                 status_message("Split TSV file for Entries %d-%d already exists. Skipping." % (split_start, split_end))
@@ -131,7 +134,7 @@ def split_tsv_file(self, total_row_count):
                     if split_end > total_row_count:
                         split_end = total_row_count
                     status_message("Splitting TSV into smaller chunks - Entries %d-%d" % (split_start, split_end))
-                    split_tsv_file_path = "%s_%d-%d" % (self.tsv_file_path(), split_start, split_end)
+                    split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
                     chunks.append([split_start, split_end])
                     if os.path.exists(split_tsv_file_path):
                         status_message("Split TSV file for Entries %d-%d already exists. Skipping." % (split_start, split_end))
@@ -307,9 +310,8 @@ def __init__(self, **kwargs):
     def generate_fasta(self, chunks):
         status_message("Generating Variant Peptide FASTA and Key Files")
         for (split_start, split_end) in chunks:
-            tsv_chunk = "%d-%d" % (split_start, split_end)
             fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
-            split_tsv_file_path       = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
+            split_tsv_file_path       = self.split_tsv_file_path(split_start, split_end)
             split_fasta_file_path     = "%s_%s" % (self.split_fasta_basename(), fasta_chunk)
             if os.path.exists(split_fasta_file_path):
                 status_message("Split FASTA file for Entries %s already exists. Skipping." % (fasta_chunk))
@@ -331,7 +333,6 @@ def generate_fasta(self, chunks):
     def call_iedb_and_parse_outputs(self, chunks):
         split_parsed_output_files = []
         for (split_start, split_end) in chunks:
-            tsv_chunk = "%d-%d" % (split_start, split_end)
             fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
             for a in self.alleles:
                 for epl in self.epitope_lengths:
@@ -378,7 +379,7 @@ def call_iedb_and_parse_outputs(self, chunks):
 
                     if len(split_iedb_output_files) > 0:
                         status_message("Parsing IEDB Output for Allele %s and Epitope Length %s - Entries %s" % (a, epl, fasta_chunk))
-                        split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
+                        split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
                         params = {
                             'input_iedb_files'       : split_iedb_output_files,
                             'input_tsv_file'         : split_tsv_file_path,
@@ -401,9 +402,8 @@ def __init__(self, **kwargs):
     def generate_fasta(self, chunks):
         status_message("Generating Variant Peptide FASTA and Key Files")
         for (split_start, split_end) in chunks:
-            tsv_chunk = "%d-%d" % (split_start, split_end)
             fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
-            split_tsv_file_path       = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
+            split_tsv_file_path       = self.split_tsv_file_path(split_start, split_end)
             split_fasta_file_path     = "%s_%s" % (self.split_fasta_basename(), fasta_chunk)
             if os.path.exists(split_fasta_file_path):
                 status_message("Split FASTA file for Entries %s already exists. Skipping." % (fasta_chunk))
@@ -425,7 +425,6 @@ def generate_fasta(self, chunks):
     def call_iedb_and_parse_outputs(self, chunks):
         split_parsed_output_files = []
         for (split_start, split_end) in chunks:
-            tsv_chunk = "%d-%d" % (split_start, split_end)
             fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2)
             for a in self.alleles:
                 split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(), fasta_chunk)
@@ -466,7 +465,7 @@ def call_iedb_and_parse_outputs(self, chunks):
 
                 if len(split_iedb_output_files) > 0:
                     status_message("Parsing IEDB Output for Allele %s - Entries %s" % (a, fasta_chunk))
-                    split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk)
+                    split_tsv_file_path = self.split_tsv_file_path(split_start, split_end)
                     params = {
                         'input_iedb_files'       : split_iedb_output_files,
                         'input_tsv_file'         : split_tsv_file_path,