diff --git a/docs/pvacseq/input_file_prep/proximal_vcf.rst b/docs/pvacseq/input_file_prep/proximal_vcf.rst index 2e72c42d9..6ad3c4453 100644 --- a/docs/pvacseq/input_file_prep/proximal_vcf.rst +++ b/docs/pvacseq/input_file_prep/proximal_vcf.rst @@ -110,6 +110,8 @@ ______________________________ Phase variants using GATK’s ReadBackedPhasing _____________________________________________ +Unfortunately, the tool used for this step is no longer available in current versions of GATK. We recommend using GATK 3.6.0 to run this step. + .. code-block:: none /usr/bin/java -Xmx16g -jar /opt/GenomeAnalysisTK.jar \ diff --git a/pvactools/lib/fasta_generator.py b/pvactools/lib/fasta_generator.py index 746c91c6d..4114f4500 100644 --- a/pvactools/lib/fasta_generator.py +++ b/pvactools/lib/fasta_generator.py @@ -33,6 +33,13 @@ def __init__(self, **kwargs): self.proximal_variants_file = kwargs.pop('proximal_variants_file', None) self.proximal_variants = self.parse_proximal_variants_file() + + def contains_invalid_characters(self, sequence): + for character in ['*', 'X', '?']: + if character in sequence: + return True + return False + def position_out_of_bounds(self, position, sequence): return position > len(sequence)-1 @@ -305,10 +312,7 @@ def execute(self): if subsequence.endswith('X'): subsequence = subsequence[:-1] - if '*' in subsequence: - continue - - if 'X' in subsequence: + if self.contains_invalid_characters(subsequence): continue if len(subsequence) < self.epitope_length: diff --git a/pvactools/tools/pvacfuse/run.py b/pvactools/tools/pvacfuse/run.py index 4ce7c323d..d7d179ee2 100644 --- a/pvactools/tools/pvacfuse/run.py +++ b/pvactools/tools/pvacfuse/run.py @@ -211,7 +211,7 @@ def main(args_input = sys.argv[1:]): for epitope_length in epitope_lengths: (input_file, per_epitope_output_dir) = generate_fasta(args, output_dir, epitope_length) if os.path.getsize(input_file) == 0: - print("The intermediate FASTA file for epitope length {} is empty. Please check that the input AGfusion directory contains fusion entries with `*_protein.fa` files. Fusion entries without this file cannot be processed by pVACfuse.".format(epitope_length)) + print("The intermediate FASTA file for epitope length {} is empty. No processable fusions found.") continue run_arguments['input_file'] = input_file diff --git a/tests/test_data/pvacfuse_generate_protein_fasta/input_with_invalid_character.tsv b/tests/test_data/pvacfuse_generate_protein_fasta/input_with_invalid_character.tsv new file mode 100644 index 000000000..2ebb48f60 --- /dev/null +++ b/tests/test_data/pvacfuse_generate_protein_fasta/input_with_invalid_character.tsv @@ -0,0 +1,2 @@ +#gene1 gene2 strand1(gene/fusion) strand2(gene/fusion) breakpoint1 breakpoint2 site1 site2 type split_reads1 split_reads2 discordant_mates coverage1 coverage2 confidence reading_frame tags retained_protein_domains closest_genomic_breakpoint1 closest_genomic_breakpoint2 gene_id1 gene_id2 transcript_id1 transcript_id2 direction1 direction2 filters fusion_transcript peptide_sequence read_identifiers +PTEN ENSG00000200891(21548),MED6P1(31892) +/+ ./+ chr10:87952259 chr10:88016243 CDS/splice-site intergenic deletion/read-through 163 146 6 3691 3002 high out-of-frame . C2_domain_of_PTEN_tumour-suppressor_protein(100%),Dual_specificity_phosphatase__catalytic_domain(100%)| . . ENSG00000171862 . ENST00000371953 . downstream upstream duplicates(384),low_entropy(4),mismappers(15),mismatches(6),multimappers(2) CCTCACCTCCATGCAGATGCAGCTGTACCTGCAGCAGCTGCAGAAGGTGCAGCCCCCTACGCCGCTACTCCCTTCCGTGAAGGTGCAGTCCCAGCCCCCcCCCCCCCccCCcCCCCCcCCCCcCCCC|CCC??CCCCC?CCCCCCCTGCCGCCCCCACCCCACCCCTCTGTGCAGCAGCAGCTGCAGCAGCAGCCGCCACCACCCCCACCACCCCAGCCCCAGCCTCCACCCCAGCAGCAGCATCAGCCCCCTCCACGGCCCGTGCACTTGCAGCCCATGCAGTTTTCCACCCA LTSMQMQLYLQQLQKVQPPTPLLPSVKVQSQPPPPpPPPPpP|p?p?ppcrphptplcssscsssrhhphhpspslhpsssisplhgpctcspcsfpp K00193:38:H3MYFBBXX:4:1101:18904:31572,K00193:38:H3MYFBBXX:4:1101:20386:3197 diff --git a/tests/test_pvacfuse_generate_protein_fasta.py b/tests/test_pvacfuse_generate_protein_fasta.py index 9213b9bca..d2b58dd9e 100644 --- a/tests/test_pvacfuse_generate_protein_fasta.py +++ b/tests/test_pvacfuse_generate_protein_fasta.py @@ -54,3 +54,18 @@ def test_input_tsv(self): expected_output_file = os.path.join(self.test_data_dir, 'output_with_tsv.fasta') os.unlink("{}.manufacturability.tsv".format(generate_protein_fasta_output_file.name)) self.assertTrue(cmp(generate_protein_fasta_output_file.name, expected_output_file)) + + def test_arriba_tsv_with_invalid_character(self): + generate_protein_fasta_input_file = os.path.join(self.test_data_dir, 'input_with_invalid_character.tsv') + generate_protein_fasta_output_file = tempfile.NamedTemporaryFile() + + self.assertFalse(call([ + self.python, + self.executable, + generate_protein_fasta_input_file, + self.flanking_sequence_length, + generate_protein_fasta_output_file.name, + '-d', 'full' + ], shell=False)) + os.unlink("{}.manufacturability.tsv".format(generate_protein_fasta_output_file.name)) + self.assertEqual(os.path.getsize(generate_protein_fasta_output_file.name), 0)