From a12437b057e961b0f2bd07c4cfe3368d71a9ed2f Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Mon, 14 Nov 2016 09:10:52 -0600 Subject: [PATCH 1/7] Handle repetitive inframe indel at beginning of the sequence --- pvacseq/lib/parse_output.py | 4 ++-- .../output_pat27_4_18.iedb.parsed.tsv | 1 + .../pat27_4.ann.HLA-A*02:01.9.tsv | 20 +++++++++++++++++++ tests/test_data/parse_output/pat27_4.tsv | 2 ++ .../test_data/parse_output/pat27_4_18.fa.key | 4 ++++ tests/test_parse_output.py | 18 +++++++++++++++++ 6 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 tests/test_data/parse_output/output_pat27_4_18.iedb.parsed.tsv create mode 100644 tests/test_data/parse_output/pat27_4.ann.HLA-A*02:01.9.tsv create mode 100644 tests/test_data/parse_output/pat27_4.tsv create mode 100644 tests/test_data/parse_output/pat27_4_18.fa.key diff --git a/pvacseq/lib/parse_output.py b/pvacseq/lib/parse_output.py index d432c88..ec6987a 100644 --- a/pvacseq/lib/parse_output.py +++ b/pvacseq/lib/parse_output.py @@ -125,8 +125,8 @@ def match_wildtype_and_mutant_entry_for_inframe_indel(result, mt_position, wt_re result['match_direction'] = 'left' return - #If the previous WT epitope was matched "from the left" we start by comparing to the baseline match - if previous_result['match_direction'] == 'left': + #If there is no previous result or the previous WT epitope was matched "from the left" we start by comparing to the baseline match + if previous_result is None or previous_result['match_direction'] == 'left': mutation_position = find_mutation_position_from_left(baseline_best_match_wt_epitope_seq, mt_epitope_seq) best_match_count = determine_consecutive_matches_from_left(mt_epitope_seq, baseline_best_match_wt_epitope_seq) #The alternate best match candidate "from the right" is inferred from the baseline best match position and the indel length diff --git a/tests/test_data/parse_output/output_pat27_4_18.iedb.parsed.tsv b/tests/test_data/parse_output/output_pat27_4_18.iedb.parsed.tsv new file mode 100644 index 0000000..6fb1648 --- /dev/null +++ b/tests/test_data/parse_output/output_pat27_4_18.iedb.parsed.tsv @@ -0,0 +1 @@ +Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type Mutation Protein Position Gene Name HLA Allele Peptide Length Sub-peptide Position Mutation Position MT Epitope Seq WT Epitope Seq Best MT Score Method Best MT Score Corresponding WT Score Corresponding Fold Change Tumor DNA Depth Tumor DNA VAF Tumor RNA Depth Tumor RNA VAF Normal Depth Normal VAF Gene Expression Transcript Expression Median MT Score Median WT Score Median Fold Change NetMHC WT Score NetMHC MT Score diff --git a/tests/test_data/parse_output/pat27_4.ann.HLA-A*02:01.9.tsv b/tests/test_data/parse_output/pat27_4.ann.HLA-A*02:01.9.tsv new file mode 100644 index 0000000..88baed3 --- /dev/null +++ b/tests/test_data/parse_output/pat27_4.ann.HLA-A*02:01.9.tsv @@ -0,0 +1,20 @@ +allele seq_num start end length peptide ic50 percentile rank +HLA-A*02:01 21 9 17 9 DEDNSLFPT 32923.45 56 +HLA-A*02:01 22 8 16 9 DEDNSLFPT 32923.45 56 +HLA-A*02:01 21 10 18 9 EDNSLFPTR 37798.39 74 +HLA-A*02:01 22 9 17 9 EDNSLFPTR 37798.39 74 +HLA-A*02:01 21 7 15 9 DDDEDNSLF 39523.61 81 +HLA-A*02:01 22 6 14 9 DDDEDNSLF 39523.61 81 +HLA-A*02:01 21 6 14 9 DDDDEDNSL 39790.07 82 +HLA-A*02:01 22 5 13 9 DDDDEDNSL 39790.07 82 +HLA-A*02:01 21 8 16 9 DDEDNSLFP 41439.95 89 +HLA-A*02:01 22 7 15 9 DDEDNSLFP 41439.95 89 +HLA-A*02:01 21 2 10 9 DDDDDDDDE 45072.48 100 +HLA-A*02:01 22 1 9 9 DDDDDDDDE 45072.48 100 +HLA-A*02:01 21 5 13 9 DDDDDEDNS 45266.52 100 +HLA-A*02:01 22 4 12 9 DDDDDEDNS 45266.52 100 +HLA-A*02:01 21 4 12 9 DDDDDDEDN 45685.23 100 +HLA-A*02:01 22 3 11 9 DDDDDDEDN 45685.23 100 +HLA-A*02:01 21 3 11 9 DDDDDDDED 45944.50 100 +HLA-A*02:01 22 2 10 9 DDDDDDDED 45944.50 100 +HLA-A*02:01 21 1 9 9 DDDDDDDDD 46155.74 100 diff --git a/tests/test_data/parse_output/pat27_4.tsv b/tests/test_data/parse_output/pat27_4.tsv new file mode 100644 index 0000000..3da6bf6 --- /dev/null +++ b/tests/test_data/parse_output/pat27_4.tsv @@ -0,0 +1,2 @@ +chromosome_name start stop reference variant gene_name transcript_name amino_acid_change ensembl_gene_id wildtype_amino_acid_sequence downstream_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index +9 95237024 95237027 CTCA C ASPN ENST00000375544 DE/E ENSG00000106819 MKEYVLLLFLALCSAKPFFSPSHIALKNMMLKDMEDTDDDDDDDDDDDDDDEDNSLFPTREPRSHFFPFDLFPMCPFGCQCYSRVVHCSDLGLTSVPTNIPFDTRMLDLQNNKIKEIKENDFKGLTSLYGLILNNNKLTKIHPKAFLTTKKLRRLYLSHNQLSEIPLNLPKSLAELRIHENKVKKIQKDTFKGMNALHVLEMSANPLDNNGIEPGAFEGVTVFHIRIAEAKLTSVPKGLPPTLLELHLDYNKISTVELEDFKRYKELQRLGLGNNKITDIENGSLANIPRVREIHLENNKLKKIPSGLPELKYLQIIFLHSNSIARVGVNDFCPTVPKMKKSLYSAISLFNNPVKYWEMQPATFRCVLSRMSVQLGNFGM inframe_del 51-52 NA NA NA NA NA NA NA NA ASPN_ENST00000375544_1.inframe_del.51-52DE/E diff --git a/tests/test_data/parse_output/pat27_4_18.fa.key b/tests/test_data/parse_output/pat27_4_18.fa.key new file mode 100644 index 0000000..b2a3347 --- /dev/null +++ b/tests/test_data/parse_output/pat27_4_18.fa.key @@ -0,0 +1,4 @@ +21: +- WT.ASPN_ENST00000375544_1.inframe_del.51-52DE/E +22: +- MT.ASPN_ENST00000375544_1.inframe_del.51-52DE/E diff --git a/tests/test_parse_output.py b/tests/test_parse_output.py index 7249dd0..4c16406 100644 --- a/tests/test_parse_output.py +++ b/tests/test_parse_output.py @@ -82,6 +82,24 @@ def test_parse_output_runs_and_produces_expected_output_with_multiple_iedb_files expected_output_file = os.path.join(self.test_data_dir, "output_Test_21.iedb.parsed.top.tsv") self.assertTrue(cmp(parse_output_output_file.name, expected_output_file)) + def test_parse_output_runs_and_produces_expected_output_for_indel_at_beginning_of_sequence(self): + parse_output_input_iedb_file = os.path.join(self.test_data_dir, "pat27_4.ann.HLA-A*02:01.9.tsv") + parse_output_input_tsv_file = os.path.join(self.test_data_dir, "pat27_4.tsv") + parse_output_key_file = os.path.join(self.test_data_dir, "pat27_4_18.fa.key") + parse_output_output_file = tempfile.NamedTemporaryFile() + + self.assertFalse(call([ + self.python, + self.executable, + parse_output_input_iedb_file, + parse_output_input_tsv_file, + parse_output_key_file, + parse_output_output_file.name, + ], shell=False)) + + expected_output_file = os.path.join(self.test_data_dir, "output_pat27_4_18.iedb.parsed.tsv") + self.assertTrue(cmp(parse_output_output_file.name, expected_output_file)) + def test_input_frameshift_variant_feature_elongation_gets_parsed_correctly(self): parse_output_input_iedb_file = os.path.join(self.test_data_dir, "input_frameshift_variant_feature_elongation.ann.HLA-A*29:02.9.tsv") parse_output_input_tsv_file = os.path.join(self.test_data_dir, "input_frameshift_variant_feature_elongation.tsv") From f21adb0c13ca0e0b785ae3fec438a8320f6076db Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Tue, 15 Nov 2016 08:32:20 -0600 Subject: [PATCH 2/7] Fix bug for inframe indels where positions toward the end weren't matched correctly and add test --- pvacseq/lib/parse_output.py | 109 ++++++++++-------- .../output_Test_21.iedb.parsed.tsv | 3 - ...ame_insertion_aa_insertion.iedb.parsed.tsv | 1 - ...e_insertion_aa_replacement.iedb.parsed.tsv | 2 - .../output_nn_align.iedb.parsed.tsv | 4 +- .../output_pat126_17.iedb.parsed.tsv | 4 + .../parse_output/pat126.ann.HLA-A*01:01.9.tsv | 19 +++ tests/test_data/parse_output/pat126.tsv | 2 + tests/test_data/parse_output/pat126_17.fa.key | 4 + .../MHC_Class_I/Test.combined.parsed.tsv | 14 +-- .../tmp/Test.HLA-E*01:01.10.parsed.tsv_1-48 | 4 +- .../tmp/Test.HLA-E*01:01.9.parsed.tsv_1-48 | 3 - .../MHC_Class_II/Test.combined.parsed.tsv | 4 +- .../tmp/Test.H2-IAb.parsed.tsv_1-48 | 4 +- tests/test_parse_output.py | 20 +++- 15 files changed, 118 insertions(+), 79 deletions(-) create mode 100644 tests/test_data/parse_output/output_pat126_17.iedb.parsed.tsv create mode 100644 tests/test_data/parse_output/pat126.ann.HLA-A*01:01.9.tsv create mode 100644 tests/test_data/parse_output/pat126.tsv create mode 100644 tests/test_data/parse_output/pat126_17.fa.key diff --git a/pvacseq/lib/parse_output.py b/pvacseq/lib/parse_output.py index ec6987a..f8ba17b 100644 --- a/pvacseq/lib/parse_output.py +++ b/pvacseq/lib/parse_output.py @@ -43,7 +43,7 @@ def determine_consecutive_matches_from_right(mt_epitope_seq, wt_epitope_seq): break return consecutive_matches -def find_mutation_position_from_left(wt_epitope_seq, mt_epitope_seq): +def find_mutation_position(wt_epitope_seq, mt_epitope_seq): for i,(wt_aa,mt_aa) in enumerate(zip(wt_epitope_seq,mt_epitope_seq)): if wt_aa != mt_aa: return i+1 @@ -54,7 +54,7 @@ def match_wildtype_and_mutant_entry_for_missense(result, mt_position, wt_results match_position = mt_position result['wt_epitope_seq'] = wt_results[match_position]['wt_epitope_seq'] result['wt_scores'] = wt_results[match_position]['wt_scores'] - result['mutation_position'] = find_mutation_position_from_left(result['wt_epitope_seq'], result['mt_epitope_seq']) + result['mutation_position'] = find_mutation_position(result['wt_epitope_seq'], result['mt_epitope_seq']) def match_wildtype_and_mutant_entry_for_frameshift(result, mt_position, wt_results, previous_result): #The WT epitope at the same position is the match @@ -91,18 +91,44 @@ def match_wildtype_and_mutant_entry_for_frameshift(result, mt_position, wt_resul #We don't include the matching WT epitope in the output result['wt_epitope_seq'] = 'NA' result['wt_scores'] = dict.fromkeys(result['mt_scores'].keys(), 'NA') - mutation_position = find_mutation_position_from_left(wt_epitope_seq, mt_epitope_seq) + mutation_position = find_mutation_position(wt_epitope_seq, mt_epitope_seq) if mutation_position == 1 and int(previous_result['mutation_position']) <= 1: #The true mutation position is to the left of the current MT eptiope mutation_position = 0 result['mutation_position'] = mutation_position def match_wildtype_and_mutant_entry_for_inframe_indel(result, mt_position, wt_results, previous_result, iedb_results_for_wt_iedb_result_key): - #The WT epitope at the same position is used as the baseline match + #If the previous WT epitope was matched "from the right" we can just use that position to infer the mutation position and match direction + if previous_result is not None and previous_result['match_direction'] == 'right': + best_match_position = previous_result['wt_epitope_position'] + 1 + result['wt_epitope_position'] = best_match_position + result['match_direction'] = 'right' + if previous_result['mutation_position'] > 0: + result['mutation_position'] = previous_result['mutation_position'] - 1 + else: + result['mutation_position'] = 0 + + #We need to ensure that the matched WT eptiope has enough overlapping amino acids with the MT epitope + best_match_wt_result = wt_results[str(best_match_position)] + best_match_count = determine_consecutive_matches_from_right(result['mt_epitope_seq'], best_match_wt_result['wt_epitope_seq']) + if best_match_count and best_match_count >= min_match_count(int(result['peptide_length'])): + #The minimum amino acid match count is met + result['wt_epitope_seq'] = best_match_wt_result['wt_epitope_seq'] + result['wt_scores'] = best_match_wt_result['wt_scores'] + else: + #The minimum amino acid match count is not met + #Even though there is a matching WT epitope there are not enough overlapping amino acids + #We don't include the matching WT epitope in the output + result['wt_epitope_seq'] = 'NA' + result['wt_scores'] = dict.fromkeys(result['mt_scores'].keys(), 'NA') + + return + + #In all other cases the WT epitope at the same position is used as the baseline match baseline_best_match_position = mt_position #For an inframe insertion the MT sequence is longer than the WT sequence - #In this case not all MT epitopes have a baseline match + #In this case not all MT epitopes might have a baseline match if baseline_best_match_position not in wt_results: result['wt_epitope_seq'] = 'NA' result['wt_scores'] = dict.fromkeys(result['mt_scores'].keys(), 'NA') @@ -119,15 +145,14 @@ def match_wildtype_and_mutant_entry_for_inframe_indel(result, mt_position, wt_re baseline_best_match_wt_epitope_seq = baseline_best_match_wt_result['wt_epitope_seq'] #The MT epitope does not overlap the indel mutation if baseline_best_match_wt_epitope_seq == mt_epitope_seq: - result['wt_epitope_seq'] = baseline_best_match_wt_result['wt_epitope_seq'] - result['wt_scores'] = baseline_best_match_wt_result['wt_scores'] - result['mutation_position'] = 'NA' - result['match_direction'] = 'left' - return + result['wt_epitope_seq'] = baseline_best_match_wt_result['wt_epitope_seq'] + result['wt_scores'] = baseline_best_match_wt_result['wt_scores'] + result['wt_epitope_position'] = int(baseline_best_match_position) + result['mutation_position'] = 'NA' + result['match_direction'] = 'left' #If there is no previous result or the previous WT epitope was matched "from the left" we start by comparing to the baseline match if previous_result is None or previous_result['match_direction'] == 'left': - mutation_position = find_mutation_position_from_left(baseline_best_match_wt_epitope_seq, mt_epitope_seq) best_match_count = determine_consecutive_matches_from_left(mt_epitope_seq, baseline_best_match_wt_epitope_seq) #The alternate best match candidate "from the right" is inferred from the baseline best match position and the indel length if result['variant_type'] == 'inframe_ins': @@ -136,47 +161,41 @@ def match_wildtype_and_mutant_entry_for_inframe_indel(result, mt_position, wt_re elif result['variant_type'] == 'inframe_del': deletion_length = len(wt_results.keys()) - len(iedb_results_for_wt_iedb_result_key.keys()) alternate_best_match_position = int(baseline_best_match_position) + deletion_length - alternate_best_match_wt_result = wt_results[str(alternate_best_match_position)] - alternate_best_match_wt_epitope_seq = alternate_best_match_wt_result['wt_epitope_seq'] - consecutive_matches_from_right = determine_consecutive_matches_from_right(mt_epitope_seq, alternate_best_match_wt_epitope_seq) - #We then check if the alternate best match epitope has more matching amino acids than the baseline best match epitope - #If it does, we pick it as the best match - if consecutive_matches_from_right > best_match_count: - best_match_count = consecutive_matches_from_right - match_direction = 'right' - best_match_position = alternate_best_match_position - best_match_wt_result = alternate_best_match_wt_result + if alternate_best_match_position > 0: + alternate_best_match_wt_result = wt_results[str(alternate_best_match_position)] + alternate_best_match_wt_epitope_seq = alternate_best_match_wt_result['wt_epitope_seq'] + consecutive_matches_from_right = determine_consecutive_matches_from_right(mt_epitope_seq, alternate_best_match_wt_epitope_seq) + #We then check if the alternate best match epitope has more matching amino acids than the baseline best match epitope + #If it does, we pick it as the best match + if consecutive_matches_from_right > best_match_count: + best_match_count = consecutive_matches_from_right + match_direction = 'right' + best_match_position = alternate_best_match_position + best_match_wt_result = alternate_best_match_wt_result + else: + match_direction = 'left' + best_match_position = baseline_best_match_position + best_match_wt_result = baseline_best_match_wt_result else: match_direction = 'left' best_match_position = baseline_best_match_position best_match_wt_result = baseline_best_match_wt_result - #If the previous WT epitope was matched "from the right" we can just use that position to infer the mutation position and match direction - elif previous_result['match_direction'] == 'right': - best_match_position = previous_result['wt_epitope_position'] + 1 - best_match_wt_result = wt_results[str(best_match_position)] - best_match_count = determine_consecutive_matches_from_right(mt_epitope_seq, best_match_wt_result['wt_epitope_seq']) - match_direction = 'right' - if previous_result['mutation_position'] > 0: - mutation_position = previous_result['mutation_position'] - 1 + #Now that we have found the matching WT epitope we still need to ensure that it has enough overlapping amino acids + if best_match_count and best_match_count >= min_match_count(int(result['peptide_length'])): + #The minimum amino acid match count is met + result['wt_epitope_seq'] = best_match_wt_result['wt_epitope_seq'] + result['wt_scores'] = best_match_wt_result['wt_scores'] else: - mutation_position = 0 - - #Now that we have found the matching WT epitope we still need to ensure that it has enough overlapping amino acids - if best_match_count and best_match_count >= min_match_count(int(result['peptide_length'])): - #The minimum amino acid match count is met - result['wt_epitope_seq'] = best_match_wt_result['wt_epitope_seq'] - result['wt_scores'] = best_match_wt_result['wt_scores'] - else: - #The minimum amino acid match count is not met - #Even though there is a matching WT epitope there are not enough overlapping amino acids - #We don't include the matching WT epitope in the output - result['wt_epitope_seq'] = 'NA' - result['wt_scores'] = dict.fromkeys(result['mt_scores'].keys(), 'NA') + #The minimum amino acid match count is not met + #Even though there is a matching WT epitope there are not enough overlapping amino acids + #We don't include the matching WT epitope in the output + result['wt_epitope_seq'] = 'NA' + result['wt_scores'] = dict.fromkeys(result['mt_scores'].keys(), 'NA') - result['match_direction'] = match_direction - result['mutation_position'] = mutation_position - result['wt_epitope_position'] = best_match_position + result['mutation_position'] = find_mutation_position(baseline_best_match_wt_epitope_seq, mt_epitope_seq) + result['match_direction'] = match_direction + result['wt_epitope_position'] = best_match_position def match_wildtype_and_mutant_entries(iedb_results, wt_iedb_results): for key in sorted(iedb_results.keys(), key = lambda x: int(x.split('|')[-1])): diff --git a/tests/test_data/parse_output/output_Test_21.iedb.parsed.tsv b/tests/test_data/parse_output/output_Test_21.iedb.parsed.tsv index db00206..3b8edb4 100644 --- a/tests/test_data/parse_output/output_Test_21.iedb.parsed.tsv +++ b/tests/test_data/parse_output/output_Test_21.iedb.parsed.tsv @@ -75,8 +75,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 8 6 LLPLLPLLL LLPLLLLLL SMM 141.94 76.05 0.536 NA NA NA NA NA NA 10.2102 9.71946 148.74 78.78 0.530 199.0 231.0 76.05 141.94 78.78 148.74 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 10 4 PLLPLLLLL LLPLLLLLL SMM 179.11 76.05 0.425 NA NA NA NA NA NA 10.2102 9.71946 187.69 78.78 0.420 199.0 225.0 76.05 179.11 78.78 187.69 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 11 3 LLPLLLLLG LPLLLLLLG SMM 2014.23 83006.1 41.210 NA NA NA NA NA NA 10.2102 9.71946 2081.76 83006.1 39.873 19860.0 10788.0 83006.1 2014.23 85394.52 2081.76 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 14 0 LLLLLGASG NA SMM 3319.79 NA NA NA NA NA NA NA NA 10.2102 9.71946 3576.27 NA NA NA 11181.0 NA 3319.79 NA 3576.27 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 15 0 LLLLGASGG NA SMM 3549.03 NA NA NA NA NA NA NA NA 10.2102 9.71946 3779.46 NA NA NA 16348.0 NA 3549.03 NA 3779.46 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 12 2 LPLLLLLGA PLLLLLLGA SMM 7728.76 726.29 0.094 NA NA NA NA NA NA 10.2102 9.71946 7932.86 791.46 0.100 3914.0 12574.0 726.29 7728.76 791.46 7932.86 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 9 5 LPLLPLLLL NA SMM 8873.81 NA NA NA NA NA NA NA NA 10.2102 9.71946 9515.46 NA NA NA 14262.0 NA 8873.81 NA 9515.46 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*02:01 9 13 1 PLLLLLGAS LLLLLLGAS NetMHC 20934.0 6922.0 0.331 NA NA NA NA NA NA 10.2102 9.71946 23340.49 1942.81 0.083 6922.0 20934.0 1871.16 23340.49 1942.81 25729.42 @@ -153,7 +151,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*02:01 9 6 7 LSRTLLLAA LSRTLLAAA SMMPMBEC 18811.78 19834.9 1.054 NA NA NA NA NA NA 0.802702 5.06467e-12 19235.79 20049.8 1.042 20920.0 19699.0 20049.8 19235.79 19834.9 18811.78 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*02:01 9 8 5 RTLLLAAAG SRTLLAAAG NetMHC 20696.0 29322.0 1.417 NA NA NA NA NA NA 0.802702 5.06467e-12 42472.71 2051681.77 48.306 29322.0 20696.0 2051681.77 42472.71 2115582.67 45022.54 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*02:01 9 7 6 SRTLLLAAA SRTLLAAAG NetMHC 22858.0 29322.0 1.283 NA NA NA NA NA NA 0.802702 5.06467e-12 62533.11 2051681.77 32.810 29322.0 22858.0 2051681.77 62533.11 2115582.67 63449.76 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*02:01 9 13 0 AAAGGSSLQ NA NetMHC 26360.0 NA NA NA NA NA NA NA NA 0.802702 5.06467e-12 175966.5 NA NA NA 26360.0 NA 197292.24 NA 175966.5 4 40434704 40434725 AGCGGCTGCGGCGGCTGCGGCC A ENST00000381793 ENSG00000163694 inframe_del AAAAAAAA/A 495-502 RBM47 HLA-A*02:01 9 4 9 SAAAAAAAV SAAAAAAAA SMM 1106.9 5509.47 4.977 NA NA NA NA NA NA 0.871384 0.128745 1170.66 5800.03 4.954 9700.0 1235.0 5509.47 1106.9 5800.03 1170.66 22 26936775 26936776 G T ENST00000338754 ENSG00000128294 missense P/H 274 TPST2 HLA-A*02:01 9 6 6 DLIGKHGGV DLIGKPGGV SMM 2037.56 1079.22 0.530 NA NA NA NA NA NA 16.0251 1.98891 2230.64 1102.63 0.494 11952.0 16231.0 1079.22 2037.56 1102.63 2230.64 22 26936775 26936776 G T ENST00000338754 ENSG00000128294 missense P/H 274 TPST2 HLA-A*02:01 9 8 4 IGKHGGVSL IGKPGGVSL NetMHC 26101.0 26272.0 1.007 NA NA NA NA NA NA 16.0251 1.98891 141719.63 104095.03 0.735 26272.0 26101.0 114580.31 148973.84 104095.03 141719.63 diff --git a/tests/test_data/parse_output/output_inframe_insertion_aa_insertion.iedb.parsed.tsv b/tests/test_data/parse_output/output_inframe_insertion_aa_insertion.iedb.parsed.tsv index fda20ba..a4d92ab 100644 --- a/tests/test_data/parse_output/output_inframe_insertion_aa_insertion.iedb.parsed.tsv +++ b/tests/test_data/parse_output/output_inframe_insertion_aa_insertion.iedb.parsed.tsv @@ -1,6 +1,5 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type Mutation Protein Position Gene Name HLA Allele Peptide Length Sub-peptide Position Mutation Position MT Epitope Seq WT Epitope Seq Best MT Score Method Best MT Score Corresponding WT Score Corresponding Fold Change Tumor DNA Depth Tumor DNA VAF Tumor RNA Depth Tumor RNA VAF Normal Depth Normal VAF Gene Expression Transcript Expression Median MT Score Median WT Score Median Fold Change NetMHC WT Score NetMHC MT Score 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*29:02 9 4 9 ATLSRTLLL ATLSRTLLA NetMHC 4435.0 6771.0 1.527 NA NA NA NA NA NA NA NA 4435.0 6771.0 1.527 6771.0 4435.0 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*29:02 9 13 0 AAAGGSSLQ NA NetMHC 9211.0 NA NA NA NA NA NA NA NA NA NA 9211.0 NA NA NA 9211.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*29:02 9 5 8 TLSRTLLLA TLSRTLLAA NetMHC 11053.0 18985.0 1.718 NA NA NA NA NA NA NA NA 11053.0 18985.0 1.718 18985.0 11053.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*29:02 9 10 3 LLLAAAGGS TLLAAAGGS NetMHC 20532.0 19633.0 0.956 NA NA NA NA NA NA NA NA 20532.0 19633.0 0.956 19633.0 20532.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-A*29:02 9 6 7 LSRTLLLAA LSRTLLAAA NetMHC 23512.0 27191.0 1.156 NA NA NA NA NA NA NA NA 23512.0 27191.0 1.156 27191.0 23512.0 diff --git a/tests/test_data/parse_output/output_inframe_insertion_aa_replacement.iedb.parsed.tsv b/tests/test_data/parse_output/output_inframe_insertion_aa_replacement.iedb.parsed.tsv index c89587e..fba7866 100644 --- a/tests/test_data/parse_output/output_inframe_insertion_aa_replacement.iedb.parsed.tsv +++ b/tests/test_data/parse_output/output_inframe_insertion_aa_replacement.iedb.parsed.tsv @@ -6,7 +6,5 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*29:02 9 6 8 PPLLPLLPL PPLLPLLLL NetMHC 19790.0 18161.0 0.918 NA NA NA NA NA NA NA NA 19790.0 18161.0 0.918 18161.0 19790.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*29:02 9 12 2 LPLLLLLGA PLLLLLLGA NetMHC 20339.0 17269.0 0.849 NA NA NA NA NA NA NA NA 20339.0 17269.0 0.849 17269.0 20339.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*29:02 9 11 3 LLPLLLLLG LPLLLLLLG NetMHC 21775.0 21790.0 1.001 NA NA NA NA NA NA NA NA 21775.0 21790.0 1.001 21790.0 21775.0 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*29:02 9 14 0 LLLLLGASG NA NetMHC 24130.0 NA NA NA NA NA NA NA NA NA NA 24130.0 NA NA NA 24130.0 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*29:02 9 15 0 LLLLGASGG NA NetMHC 24260.0 NA NA NA NA NA NA NA NA NA NA 24260.0 NA NA NA 24260.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*29:02 9 13 1 PLLLLLGAS LLLLLLGAS NetMHC 25504.0 21621.0 0.848 NA NA NA NA NA NA NA NA 25504.0 21621.0 0.848 21621.0 25504.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-A*29:02 9 5 9 PPPLLPLLP PPPLLPLLL NetMHC 29135.0 20644.0 0.709 NA NA NA NA NA NA NA NA 29135.0 20644.0 0.709 20644.0 29135.0 diff --git a/tests/test_data/parse_output/output_nn_align.iedb.parsed.tsv b/tests/test_data/parse_output/output_nn_align.iedb.parsed.tsv index a98d506..0a98bd3 100644 --- a/tests/test_data/parse_output/output_nn_align.iedb.parsed.tsv +++ b/tests/test_data/parse_output/output_nn_align.iedb.parsed.tsv @@ -129,8 +129,7 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 11 8 PPLLPLLPLLLLLGA NA NNalign 3519.2 NA NA NA NA NA NA NA NA 10.2102 9.71946 3519.2 NA NA NA 3519.2 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 12 7 PLLPLLPLLLLLGAS PPPLLPLLLLLLGAS NNalign 4769.1 18984.2 3.981 NA NA NA NA NA NA 10.2102 9.71946 4769.1 18984.2 3.981 18984.2 4769.1 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 13 6 LLPLLPLLLLLGASG PPLLPLLLLLLGASG NNalign 8015.7 30110.3 3.756 NA NA NA NA NA NA 10.2102 9.71946 8015.7 30110.3 3.756 30110.3 8015.7 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 19 0 LLLLLGASGGGGGAR NA NNalign 10076.1 NA NA NA NA NA NA NA NA 10.2102 9.71946 10076.1 NA NA NA 10076.1 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 18 1 PLLLLLGASGGGGGA NA NNalign 10810.0 NA NA NA NA NA NA NA NA 10.2102 9.71946 10810.0 NA NA NA 10810.0 +2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 18 1 PLLLLLGASGGGGGA LLLLLLGASGGGGGA NNalign 10810.0 10735.4 0.993 NA NA NA NA NA NA 10.2102 9.71946 10810.0 10735.4 0.993 10735.4 10810.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 17 2 LPLLLLLGASGGGGG PLLLLLLGASGGGGG NNalign 11744.6 11744.1 1.000 NA NA NA NA NA NA 10.2102 9.71946 11744.6 11744.1 1.000 11744.1 11744.6 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 16 3 LLPLLLLLGASGGGG LPLLLLLLGASGGGG NNalign 12726.0 12743.9 1.001 NA NA NA NA NA NA 10.2102 9.71946 12726.0 12743.9 1.001 12743.9 12726.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 15 4 PLLPLLLLLGASGGG LLPLLLLLLGASGGG NNalign 17866.0 18241.2 1.021 NA NA NA NA NA NA 10.2102 9.71946 17866.0 18241.2 1.021 18241.2 17866.0 @@ -249,7 +248,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 3 15 QTSVNSATLSRTLLL QTSVNSATLSRTLLA NNalign 2031.7 1883.7 0.927 NA NA NA NA NA NA 0.802702 5.06467e-12 2031.7 1883.7 0.927 1883.7 2031.7 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 4 14 TSVNSATLSRTLLLA TSVNSATLSRTLLAA NNalign 2220.7 1899.8 0.855 NA NA NA NA NA NA 0.802702 5.06467e-12 2220.7 1899.8 0.855 1899.8 2220.7 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 5 13 SVNSATLSRTLLLAA SVNSATLSRTLLAAA NNalign 3501.4 2557.4 0.730 NA NA NA NA NA NA 0.802702 5.06467e-12 3501.4 2557.4 0.730 2557.4 3501.4 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 17 1 LAAAGGSSLQTQRGL NA NNalign 4014.6 NA NA NA NA NA NA NA NA 0.802702 5.06467e-12 4014.6 NA NA NA 4014.6 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 10 8 TLSRTLLLAAAGGSS ATLSRTLLAAAGGSS NNalign 4161.8 3364.4 0.808 NA NA NA NA NA NA 0.802702 5.06467e-12 4161.8 3364.4 0.808 3364.4 4161.8 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 6 12 VNSATLSRTLLLAAA VNSATLSRTLLAAAG NNalign 4502.9 2927.7 0.650 NA NA NA NA NA NA 0.802702 5.06467e-12 4502.9 2927.7 0.650 2927.7 4502.9 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 9 9 ATLSRTLLLAAAGGS ATLSRTLLAAAGGSS NNalign 6472.4 3364.4 0.520 NA NA NA NA NA NA 0.802702 5.06467e-12 6472.4 3364.4 0.520 3364.4 6472.4 diff --git a/tests/test_data/parse_output/output_pat126_17.iedb.parsed.tsv b/tests/test_data/parse_output/output_pat126_17.iedb.parsed.tsv new file mode 100644 index 0000000..3126119 --- /dev/null +++ b/tests/test_data/parse_output/output_pat126_17.iedb.parsed.tsv @@ -0,0 +1,4 @@ +Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type Mutation Protein Position Gene Name HLA Allele Peptide Length Sub-peptide Position Mutation Position MT Epitope Seq WT Epitope Seq Best MT Score Method Best MT Score Corresponding WT Score Corresponding Fold Change Tumor DNA Depth Tumor DNA VAF Tumor RNA Depth Tumor RNA VAF Normal Depth Normal VAF Gene Expression Transcript Expression Median MT Score Median WT Score Median Fold Change NetMHC WT Score NetMHC MT Score +8 8234868 8234868 C CGCCGCT ENST00000330777 ENSG00000182319 inframe_ins -/SG 350-351 SGK223 HLA-A*01:01 9 3 7 SGSGSGSGA CGSGSGSGA NetMHC 29038.15 26854.89 0.925 NA NA NA NA NA NA NA NA 29038.15 26854.89 0.925 26854.89 29038.15 +8 8234868 8234868 C CGCCGCT ENST00000330777 ENSG00000182319 inframe_ins -/SG 350-351 SGK223 HLA-A*01:01 9 2 8 GSGSGSGSG GSGSGSGAS NetMHC 31943.39 28223.18 0.884 NA NA NA NA NA NA NA NA 31943.39 28223.18 0.884 28223.18 31943.39 +8 8234868 8234868 C CGCCGCT ENST00000330777 ENSG00000182319 inframe_ins -/SG 350-351 SGK223 HLA-A*01:01 9 1 9 CGSGSGSGS CGSGSGSGA NetMHC 32285.64 26854.89 0.832 NA NA NA NA NA NA NA NA 32285.64 26854.89 0.832 26854.89 32285.64 diff --git a/tests/test_data/parse_output/pat126.ann.HLA-A*01:01.9.tsv b/tests/test_data/parse_output/pat126.ann.HLA-A*01:01.9.tsv new file mode 100644 index 0000000..42476a4 --- /dev/null +++ b/tests/test_data/parse_output/pat126.ann.HLA-A*01:01.9.tsv @@ -0,0 +1,19 @@ +allele seq_num start end length peptide ic50 percentile rank +HLA-A*01:01 39 6 14 9 GSGASSPFV 17504.37 7.9 +HLA-A*01:01 40 8 16 9 GSGASSPFV 17504.37 7.9 +HLA-A*01:01 39 5 13 9 SGSGASSPF 22960.55 28 +HLA-A*01:01 40 7 15 9 SGSGASSPF 22960.55 28 +HLA-A*01:01 39 1 9 9 CGSGSGSGA 26854.89 48 +HLA-A*01:01 39 2 10 9 GSGSGSGAS 28223.18 55 +HLA-A*01:01 40 4 12 9 GSGSGSGAS 28223.18 55 +HLA-A*01:01 39 8 16 9 GASSPFVPH 28635.04 58 +HLA-A*01:01 40 10 18 9 GASSPFVPH 28635.04 58 +HLA-A*01:01 40 3 11 9 SGSGSGSGA 29038.15 60 +HLA-A*01:01 39 7 15 9 SGASSPFVP 31084.53 73 +HLA-A*01:01 40 9 17 9 SGASSPFVP 31084.53 73 +HLA-A*01:01 39 4 12 9 GSGSGASSP 31538.48 75 +HLA-A*01:01 40 6 14 9 GSGSGASSP 31538.48 75 +HLA-A*01:01 40 2 10 9 GSGSGSGSG 31943.39 77 +HLA-A*01:01 40 1 9 9 CGSGSGSGS 32285.64 79 +HLA-A*01:01 39 3 11 9 SGSGSGASS 34894.91 92 +HLA-A*01:01 40 5 13 9 SGSGSGASS 34894.91 92 diff --git a/tests/test_data/parse_output/pat126.tsv b/tests/test_data/parse_output/pat126.tsv new file mode 100644 index 0000000..b0f36b1 --- /dev/null +++ b/tests/test_data/parse_output/pat126.tsv @@ -0,0 +1,2 @@ +chromosome_name start stop reference variant gene_name transcript_name amino_acid_change ensembl_gene_id wildtype_amino_acid_sequence downstream_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index +8 8234868 8234868 C CGCCGCT SGK223 ENST00000330777 -/SG ENSG00000182319 MHQTLCLNPESLKMSACSDFVEHIWKPGSCKNCFCLRSDHQLVAGPPQPRAGSLPPPPRLPPRPENCRLEDEGVNSSPYSKPTIAVKPTMMSSEASDVWTEANLSAEVSQVIWRRAPGKLPLPKQEDAPVVYLGSFRGVQKPAGPSTSPDGNSRCPPAYTMVGLHNLEPRGERNIAFHPVSFPEEKAVHKEKPSFPYQDRPSTQESFRQKLAAFAGTTSGCHQGPGPLRESLPSEDDSDQRCSPSGDSEGGEYCSILDCCPGSPVAKAASQTAGSRGRHGGRDCSPTCWEQGKCSGPAEQEKRGPSFPKECCSQGPTAHPSCLGPKKLSLTSEAAISSDGLSCGSGSGSGASSPFVPHLESDYCSLMKEPAPEKQQDPGCPGVTPSRCLGLTGEPQPPAHPREATQPEPIYAESTKRKKAAPVPSKSQAKIEHAAAAQGQGQVCTGNAWAQKAASGWGRDSPDPTPQVSATITVMAAHPEEDHRTIYLSSPDSAVGVQWPRGPVSQNSEVGEEETSAGQGLSSRESHAHSASESKPKERPAIPPKLSKSSPVGSPVSPSAGGPPVSPLADLSDGSSGGSSIGPQPPSQGPADPAPSCRTNGVAISDPSRCPQPAASSASEQRRPRFQAGTWSRQCRIEEEEEVEQELLSHSWGRETKNGPTDHSNSTTWHRLHPTDGSSGQNSKVGTGMSKSASFAFEFPKDRSGIETFSPPPPPPKSRHLLKMNKSSSDLEKVSQGSAESLSPSFRGVHVSFTTGSTDSLASDSRTCSDGGPSSELAHSPTNSGKKLFAPVPFPSGSTEDVSPSGPQQPPPLPQKKIVSRAASSPDGFFWTQGSPKPGTASPKLNLSHSETNVHDESHFSYSLSPGNRHHPVFSSSDPLEKAFKGSGHWLPAAGLAGNRGGCGSPGLQCKGAPSASSSQLSVSSQASTGSTQLQLHGLLSNISSKEGTYAKLGGLYTQSLARLVAKCEDLFMGGQKKELHFNENNWSLFKLTCNKPCCDSGDAIYYCATCSEDPGSTYAVKICKAPEPKTVSYCSPSVPVHFNIQQDCGHFVASVPSSMLSSPDAPKDPVPALPTHPPAQEQDCVVVITREVPHQTASDFVRDSAASHQAEPEAYERRVCFLLLQLCNGLEHLKEHGIIHRDLCLENLLLVHCTLQAGPGPAPAPAPAPAAAAPPCSSAAPPAGGTLSPAAGPASPEGPREKQLPRLIISNFLKAKQKPGGTPNLQQKKSQARLAPEIVSASQYRKFDEFQTGILIYELLHQPNPFEVRAQLRERDYRQEDLPPLPALSLYSPGLQQLAHLLLEADPIKRIRIGEAKRVLQCLLWGPRRELVQQPGTSEEALCGTLHNWIDMKRALMMMKFAEKAVDRRRGVELEDWLCCQYLASAEPGALLQSLKLLQLL inframe_ins 350-351 NA NA NA NA NA NA NA NA SGK223_ENST00000330777_1.inframe_ins.350-351-/SG diff --git a/tests/test_data/parse_output/pat126_17.fa.key b/tests/test_data/parse_output/pat126_17.fa.key new file mode 100644 index 0000000..b7f5fd7 --- /dev/null +++ b/tests/test_data/parse_output/pat126_17.fa.key @@ -0,0 +1,4 @@ +39: +- WT.SGK223_ENST00000330777_1.inframe_ins.350-351-/SG +40: +- MT.SGK223_ENST00000330777_1.inframe_ins.350-351-/SG diff --git a/tests/test_data/pvacseq/MHC_Class_I/Test.combined.parsed.tsv b/tests/test_data/pvacseq/MHC_Class_I/Test.combined.parsed.tsv index e0e7b75..1911b3c 100644 --- a/tests/test_data/pvacseq/MHC_Class_I/Test.combined.parsed.tsv +++ b/tests/test_data/pvacseq/MHC_Class_I/Test.combined.parsed.tsv @@ -78,8 +78,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 9 5 LPLLPLLLL NA PickPocket 16946.23 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 23057.684999999998 NA NA NA 29169.14 NA 16946.23 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 5 9 PPPLLPLLP PPPLLPLLL PickPocket 29108.61 18478.42 0.635 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 32658.89 19267.885 0.590 20057.35 36209.17 18478.42 29108.61 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 6 8 PPLLPLLPL PPLLPLLLL NetMHC 35483.9 31563.06 0.890 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 42741.95 37737.485 0.883 31563.06 35483.9 43911.91 50000.0 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 15 0 LLLLGASGG NA NetMHC 41248.96 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 45355.445 NA NA NA 41248.96 NA 49461.93 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 14 0 LLLLLGASG NA NetMHC 43117.66 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 43514.785 NA NA NA 43117.66 NA 43911.91 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 12 2 LPLLLLLGA PLLLLLLGA PickPocket 43911.91 40708.92 0.927 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 44188.5 42010.55 0.951 43312.18 44465.09 40708.92 43911.91 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 13 1 PLLLLLGAS LLLLLLGAS NetMHC 45329.74 44996.48 0.993 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 47664.869999999995 47498.240000000005 0.997 44996.48 45329.74 50000.0 50000.0 22 22550449 22550450 C G ENST00000390285 ENSG00000211640 missense R/G 43 IGLV6-57 HLA-E*01:01 9 10 2 TGSSGSIAS TRSSGSIAS PickPocket 10641.85 8571.13 0.805 5 99.9998000004 NA NA NA NA 0.0 0 26618.01 25216.454999999998 0.947 41861.78 42594.17 8571.13 10641.85 @@ -152,7 +150,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 10 3 LLLAAAGGS TLLAAAGGS PickPocket 19088.05 20368.33 1.067 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 27654.284999999996 27433.97 0.992 34499.61 36220.52 20368.33 19088.05 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 9 4 TLLLAAAGG RTLLAAAGG PickPocket 22695.78 19505.61 0.859 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 31150.364999999998 30217.010000000002 0.970 40928.41 39604.95 19505.61 22695.78 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 8 5 RTLLLAAAG SRTLLAAAG PickPocket 37333.42 30726.73 0.823 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 39884.665 37467.34 0.939 44207.95 42435.91 30726.73 37333.42 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 13 0 AAAGGSSLQ NA NetMHC 39811.17 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 44370.405 NA NA NA 39811.17 NA 48929.64 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 7 6 SRTLLLAAA SRTLLAAAG PickPocket 41151.77 30726.73 0.747 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 43565.535 37467.34 0.860 44207.95 45979.3 30726.73 41151.77 4 40434704 40434725 AGCGGCTGCGGCGGCTGCGGCC A ENST00000381793 ENSG00000163694 inframe_del AAAAAAAA/A 495-502 RBM47 HLA-E*01:01 9 4 9 SAAAAAAAV SAAAAAAAA PickPocket 21040.32 21040.32 1.000 127 3.149606051212122 NA NA NA NA 0.871384 0.128745 28125.2 29506.67 1.049 37973.02 35210.08 21040.32 21040.32 22 26936775 26936776 G T ENST00000338754 ENSG00000128294 missense P/H 274 TPST2 HLA-E*01:01 9 10 2 KHGGVSLSK KPGGVSLSK PickPocket 6539.79 20149.14 3.081 133 18.79699106789541 NA NA NA NA 16.0251 1.98891 24389.595 31577.025 1.295 43004.91 42239.4 20149.14 6539.79 @@ -2437,8 +2434,7 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 9 5 LPLLPLLLLL PLLPLLLLLL NetMHC 25501.86 26314.15 1.032 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 34470.61 33511.535 0.972 26314.15 25501.86 40708.92 43439.36 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 10 4 PLLPLLLLLG LLPLLLLLLG NetMHC 29821.75 26616.83 0.893 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 39910.875 18925.125 0.474 26616.83 29821.75 11233.42 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 5 9 PPPLLPLLPL PPPLLPLLLL NetMHC 38950.93 37931.97 0.974 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 43940.285 40220.71 0.915 37931.97 38950.93 42509.45 48929.64 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 14 0 LLLLLGASGG NA NetMHC 46426.72 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48213.36 NA NA NA 46426.72 NA 50000.0 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 13 1 PLLLLLGASG NA NetMHC 46938.88 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48469.44 NA NA NA 46938.88 NA 50000.0 +2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 13 1 PLLLLLGASG LLLLLLGASG NetMHC 46938.88 46940.91 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48469.44 48470.455 1.000 46940.91 46938.88 50000.0 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 12 2 LPLLLLLGAS PLLLLLLGAS NetMHC 47327.99 47032.43 0.994 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48663.994999999995 48516.215 0.997 47032.43 47327.99 50000.0 50000.0 22 22550449 22550450 C G ENST00000390285 ENSG00000211640 missense R/G 43 IGLV6-57 HLA-E*01:01 10 9 3 CTGSSGSIAS CTRSSGSIAS PickPocket 37333.42 26123.53 0.700 5 99.9998000004 NA NA NA NA 0.0 0 41647.155 35751.175 0.858 45378.82 45960.89 26123.53 37333.42 22 22550449 22550450 C G ENST00000390285 ENSG00000211640 missense R/G 43 IGLV6-57 HLA-E*01:01 10 10 2 TGSSGSIASN TRSSGSIASN PickPocket 39408.75 31740.46 0.805 5 99.9998000004 NA NA NA NA 0.0 0 42935.065 38870.08 0.905 45999.7 46461.38 31740.46 39408.75 @@ -2514,7 +2510,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 4 9 ATLSRTLLLA ATLSRTLLAA PickPocket 11479.16 13356.59 1.164 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 16856.684999999998 26746.714999999997 1.587 40136.84 22234.21 13356.59 11479.16 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 3 10 SATLSRTLLL SATLSRTLLA PickPocket 21500.59 33144.32 1.542 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 22233.184999999998 36775.884999999995 1.654 40407.45 22965.78 33144.32 21500.59 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 5 8 TLSRTLLLAA TLSRTLLAAA PickPocket 23192.26 50000.0 2.156 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 32641.010000000002 47181.845 1.445 44363.69 42089.76 50000.0 23192.26 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 12 1 LAAAGGSSLQ NA NetMHC 39577.1 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 44788.55 NA NA NA 39577.1 NA 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 10 3 LLLAAAGGSS TLLAAAGGSS NetMHC 44668.58 44213.23 0.990 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 47065.255000000005 47106.615000000005 1.001 44213.23 44668.58 50000.0 49461.93 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 9 4 TLLLAAAGGS RTLLAAAGGS NetMHC 44709.19 44101.91 0.986 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 47354.595 47050.955 0.994 44101.91 44709.19 50000.0 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 8 5 RTLLLAAAGG SRTLLAAAGG NetMHC 45722.81 46525.27 1.018 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 47861.405 48262.634999999995 1.008 46525.27 45722.81 50000.0 50000.0 @@ -4795,8 +4790,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 7 7 PLLPLLPLL PLLPLLLLL PickPocket 10527.33 12653.21 1.202 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 10527.33 12653.21 1.202 NA NA 12653.21 10527.33 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 10 4 PLLPLLLLL LLPLLLLLL PickPocket 12653.21 4830.5 0.382 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 12653.21 4830.5 0.382 NA NA 4830.5 12653.21 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 11 3 LLPLLLLLG LPLLLLLLG PickPocket 12653.21 50000.0 3.952 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 12653.21 50000.0 3.952 NA NA 50000.0 12653.21 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 15 0 LLLLGASGG NA PickPocket 15044.73 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 15044.73 NA NA NA NA NA 15044.73 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 14 0 LLLLLGASG NA PickPocket 16228.45 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 16228.45 NA NA NA NA NA 16228.45 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 9 5 LPLLPLLLL NA PickPocket 27278.95 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 27278.95 NA NA NA NA NA 27278.95 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 12 2 LPLLLLLGA PLLLLLLGA PickPocket 37333.42 30396.07 0.814 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 37333.42 30396.07 0.814 NA NA 30396.07 37333.42 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 9 13 1 PLLLLLGAS LLLLLLGAS PickPocket 50000.0 21970.92 0.439 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 21970.92 0.439 NA NA 21970.92 50000.0 @@ -4872,7 +4865,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 9 9 4 TLLLAAAGG RTLLAAAGG PickPocket 16053.81 6128.72 0.382 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 16053.81 6128.72 0.382 NA NA 6128.72 16053.81 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 9 6 7 LSRTLLLAA LSRTLLAAA PickPocket 28178.94 27875.69 0.989 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 28178.94 27875.69 0.989 NA NA 27875.69 28178.94 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 9 5 8 TLSRTLLLA TLSRTLLAA PickPocket 37739.56 48929.64 1.297 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 37739.56 48929.64 1.297 NA NA 48929.64 37739.56 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 9 13 0 AAAGGSSLQ NA PickPocket 50000.0 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 50000.0 NA NA NA NA NA 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 9 7 6 SRTLLLAAA SRTLLAAAG PickPocket 50000.0 50000.0 1.000 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 4 40434704 40434725 AGCGGCTGCGGCGGCTGCGGCC A ENST00000381793 ENSG00000163694 inframe_del AAAAAAAA/A 495-502 RBM47 HLA-G*01:09 9 4 9 SAAAAAAAV SAAAAAAAA PickPocket 50000.0 50000.0 1.000 127 3.149606051212122 NA NA NA NA 0.871384 0.128745 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 22 26936775 26936776 G T ENST00000338754 ENSG00000128294 missense P/H 274 TPST2 HLA-G*01:09 9 8 4 IGKHGGVSL IGKPGGVSL PickPocket 22695.78 19295.7 0.850 133 18.79699106789541 NA NA NA NA 16.0251 1.98891 22695.78 19295.7 0.850 NA NA 19295.7 22695.78 @@ -7152,10 +7144,9 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 8 6 LLPLLPLLLL LLPLLLLLLG PickPocket 12249.09 29745.37 2.428 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 12249.09 29745.37 2.428 NA NA 29745.37 12249.09 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 11 3 LLPLLLLLGA LPLLLLLLGA PickPocket 20589.91 50000.0 2.428 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 20589.91 50000.0 2.428 NA NA 50000.0 20589.91 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 7 7 PLLPLLPLLL PLLPLLLLLL PickPocket 29425.27 31398.89 1.067 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 29425.27 31398.89 1.067 NA NA 31398.89 29425.27 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 14 0 LLLLLGASGG NA PickPocket 35367.39 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 35367.39 NA NA NA NA NA 35367.39 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 10 4 PLLPLLLLLG LLPLLLLLLG PickPocket 50000.0 29745.37 0.595 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 29745.37 0.595 NA NA 29745.37 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 12 2 LPLLLLLGAS PLLLLLLGAS PickPocket 50000.0 50000.0 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 13 1 PLLLLLGASG NA PickPocket 50000.0 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 NA NA NA NA NA 50000.0 +2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 13 1 PLLLLLGASG LLLLLLGASG PickPocket 50000.0 39837.46 0.797 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 39837.46 0.797 NA NA 39837.46 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 4 10 PPPPLLPLLP PPPPLLPLLL PickPocket 50000.0 50000.0 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 5 9 PPPLLPLLPL PPPLLPLLLL PickPocket 50000.0 50000.0 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-G*01:09 10 6 8 PPLLPLLPLL PPLLPLLLLL PickPocket 50000.0 50000.0 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 @@ -7235,7 +7226,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 10 4 9 ATLSRTLLLA ATLSRTLLAA PickPocket 22942.68 28485.48 1.242 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 22942.68 28485.48 1.242 NA NA 28485.48 22942.68 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 10 10 3 LLLAAAGGSS TLLAAAGGSS PickPocket 32434.8 32787.64 1.011 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 32434.8 32787.64 1.011 NA NA 32787.64 32434.8 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 10 9 4 TLLLAAAGGS RTLLAAAGGS PickPocket 36931.66 14099.07 0.382 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 36931.66 14099.07 0.382 NA NA 14099.07 36931.66 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 10 12 1 LAAAGGSSLQ NA PickPocket 50000.0 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 50000.0 NA NA NA NA NA 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 10 3 10 SATLSRTLLL SATLSRTLLA PickPocket 50000.0 50000.0 1.000 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 10 5 8 TLSRTLLLAA TLSRTLLAAA PickPocket 50000.0 50000.0 1.000 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-G*01:09 10 6 7 LSRTLLLAAA LSRTLLAAAG PickPocket 50000.0 50000.0 1.000 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 50000.0 50000.0 1.000 NA NA 50000.0 50000.0 diff --git a/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.10.parsed.tsv_1-48 b/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.10.parsed.tsv_1-48 index 84afbe0..2c52fa9 100644 --- a/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.10.parsed.tsv_1-48 +++ b/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.10.parsed.tsv_1-48 @@ -87,8 +87,7 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 9 5 LPLLPLLLLL PLLPLLLLLL NetMHC 25501.86 26314.15 1.032 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 34470.61 33511.535 0.972 26314.15 25501.86 40708.92 43439.36 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 10 4 PLLPLLLLLG LLPLLLLLLG NetMHC 29821.75 26616.83 0.893 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 39910.875 18925.125 0.474 26616.83 29821.75 11233.42 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 5 9 PPPLLPLLPL PPPLLPLLLL NetMHC 38950.93 37931.97 0.974 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 43940.285 40220.71 0.915 37931.97 38950.93 42509.45 48929.64 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 14 0 LLLLLGASGG NA NetMHC 46426.72 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48213.36 NA NA NA 46426.72 NA 50000.0 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 13 1 PLLLLLGASG NA NetMHC 46938.88 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48469.44 NA NA NA 46938.88 NA 50000.0 +2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 13 1 PLLLLLGASG LLLLLLGASG NetMHC 46938.88 46940.91 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48469.44 48470.455 1.000 46940.91 46938.88 50000.0 50000.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 10 12 2 LPLLLLLGAS PLLLLLLGAS NetMHC 47327.99 47032.43 0.994 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 48663.994999999995 48516.215 0.997 47032.43 47327.99 50000.0 50000.0 22 22550449 22550450 C G ENST00000390285 ENSG00000211640 missense R/G 43 IGLV6-57 HLA-E*01:01 10 9 3 CTGSSGSIAS CTRSSGSIAS PickPocket 37333.42 26123.53 0.700 5 99.9998000004 NA NA NA NA 0.0 0 41647.155 35751.175 0.858 45378.82 45960.89 26123.53 37333.42 22 22550449 22550450 C G ENST00000390285 ENSG00000211640 missense R/G 43 IGLV6-57 HLA-E*01:01 10 10 2 TGSSGSIASN TRSSGSIASN PickPocket 39408.75 31740.46 0.805 5 99.9998000004 NA NA NA NA 0.0 0 42935.065 38870.08 0.905 45999.7 46461.38 31740.46 39408.75 @@ -164,7 +163,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 4 9 ATLSRTLLLA ATLSRTLLAA PickPocket 11479.16 13356.59 1.164 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 16856.684999999998 26746.714999999997 1.587 40136.84 22234.21 13356.59 11479.16 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 3 10 SATLSRTLLL SATLSRTLLA PickPocket 21500.59 33144.32 1.542 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 22233.184999999998 36775.884999999995 1.654 40407.45 22965.78 33144.32 21500.59 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 5 8 TLSRTLLLAA TLSRTLLAAA PickPocket 23192.26 50000.0 2.156 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 32641.010000000002 47181.845 1.445 44363.69 42089.76 50000.0 23192.26 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 12 1 LAAAGGSSLQ NA NetMHC 39577.1 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 44788.55 NA NA NA 39577.1 NA 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 10 3 LLLAAAGGSS TLLAAAGGSS NetMHC 44668.58 44213.23 0.990 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 47065.255000000005 47106.615000000005 1.001 44213.23 44668.58 50000.0 49461.93 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 9 4 TLLLAAAGGS RTLLAAAGGS NetMHC 44709.19 44101.91 0.986 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 47354.595 47050.955 0.994 44101.91 44709.19 50000.0 50000.0 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 10 8 5 RTLLLAAAGG SRTLLAAAGG NetMHC 45722.81 46525.27 1.018 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 47861.405 48262.634999999995 1.008 46525.27 45722.81 50000.0 50000.0 diff --git a/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.9.parsed.tsv_1-48 b/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.9.parsed.tsv_1-48 index 5228240..f2c4864 100644 --- a/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.9.parsed.tsv_1-48 +++ b/tests/test_data/pvacseq/MHC_Class_I/tmp/Test.HLA-E*01:01.9.parsed.tsv_1-48 @@ -78,8 +78,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 9 5 LPLLPLLLL NA PickPocket 16946.23 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 23057.684999999998 NA NA NA 29169.14 NA 16946.23 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 5 9 PPPLLPLLP PPPLLPLLL PickPocket 29108.61 18478.42 0.635 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 32658.89 19267.885 0.590 20057.35 36209.17 18478.42 29108.61 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 6 8 PPLLPLLPL PPLLPLLLL NetMHC 35483.9 31563.06 0.890 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 42741.95 37737.485 0.883 31563.06 35483.9 43911.91 50000.0 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 15 0 LLLLGASGG NA NetMHC 41248.96 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 45355.445 NA NA NA 41248.96 NA 49461.93 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 14 0 LLLLLGASG NA NetMHC 43117.66 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 43514.785 NA NA NA 43117.66 NA 43911.91 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 12 2 LPLLLLLGA PLLLLLLGA PickPocket 43911.91 40708.92 0.927 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 44188.5 42010.55 0.951 43312.18 44465.09 40708.92 43911.91 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 HLA-E*01:01 9 13 1 PLLLLLGAS LLLLLLGAS NetMHC 45329.74 44996.48 0.993 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 47664.869999999995 47498.240000000005 0.997 44996.48 45329.74 50000.0 50000.0 22 22550449 22550450 C G ENST00000390285 ENSG00000211640 missense R/G 43 IGLV6-57 HLA-E*01:01 9 10 2 TGSSGSIAS TRSSGSIAS PickPocket 10641.85 8571.13 0.805 5 99.9998000004 NA NA NA NA 0.0 0 26618.01 25216.454999999998 0.947 41861.78 42594.17 8571.13 10641.85 @@ -152,7 +150,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 10 3 LLLAAAGGS TLLAAAGGS PickPocket 19088.05 20368.33 1.067 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 27654.284999999996 27433.97 0.992 34499.61 36220.52 20368.33 19088.05 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 9 4 TLLLAAAGG RTLLAAAGG PickPocket 22695.78 19505.61 0.859 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 31150.364999999998 30217.010000000002 0.970 40928.41 39604.95 19505.61 22695.78 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 8 5 RTLLLAAAG SRTLLAAAG PickPocket 37333.42 30726.73 0.823 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 39884.665 37467.34 0.939 44207.95 42435.91 30726.73 37333.42 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 13 0 AAAGGSSLQ NA NetMHC 39811.17 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 44370.405 NA NA NA 39811.17 NA 48929.64 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 HLA-E*01:01 9 7 6 SRTLLLAAA SRTLLAAAG PickPocket 41151.77 30726.73 0.747 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 43565.535 37467.34 0.860 44207.95 45979.3 30726.73 41151.77 4 40434704 40434725 AGCGGCTGCGGCGGCTGCGGCC A ENST00000381793 ENSG00000163694 inframe_del AAAAAAAA/A 495-502 RBM47 HLA-E*01:01 9 4 9 SAAAAAAAV SAAAAAAAA PickPocket 21040.32 21040.32 1.000 127 3.149606051212122 NA NA NA NA 0.871384 0.128745 28125.2 29506.67 1.049 37973.02 35210.08 21040.32 21040.32 22 26936775 26936776 G T ENST00000338754 ENSG00000128294 missense P/H 274 TPST2 HLA-E*01:01 9 10 2 KHGGVSLSK KPGGVSLSK PickPocket 6539.79 20149.14 3.081 133 18.79699106789541 NA NA NA NA 16.0251 1.98891 24389.595 31577.025 1.295 43004.91 42239.4 20149.14 6539.79 diff --git a/tests/test_data/pvacseq/MHC_Class_II/Test.combined.parsed.tsv b/tests/test_data/pvacseq/MHC_Class_II/Test.combined.parsed.tsv index 9644ba3..eda1a21 100644 --- a/tests/test_data/pvacseq/MHC_Class_II/Test.combined.parsed.tsv +++ b/tests/test_data/pvacseq/MHC_Class_II/Test.combined.parsed.tsv @@ -129,8 +129,7 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 11 8 PPLLPLLPLLLLLGA NA NNalign 3519.2 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 3519.2 NA NA NA 3519.2 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 12 7 PLLPLLPLLLLLGAS PPPLLPLLLLLLGAS NNalign 4769.1 18984.2 3.981 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 4769.1 18984.2 3.981 18984.2 4769.1 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 13 6 LLPLLPLLLLLGASG PPLLPLLLLLLGASG NNalign 8015.7 30110.3 3.756 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 8015.7 30110.3 3.756 30110.3 8015.7 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 19 0 LLLLLGASGGGGGAR NA NNalign 10076.1 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 10076.1 NA NA NA 10076.1 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 18 1 PLLLLLGASGGGGGA NA NNalign 10810.0 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 10810.0 NA NA NA 10810.0 +2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 18 1 PLLLLLGASGGGGGA LLLLLLGASGGGGGA NNalign 10810.0 10735.4 0.993 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 10810.0 10735.4 0.993 10735.4 10810.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 17 2 LPLLLLLGASGGGGG PLLLLLLGASGGGGG NNalign 11744.6 11744.1 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 11744.6 11744.1 1.000 11744.1 11744.6 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 16 3 LLPLLLLLGASGGGG LPLLLLLLGASGGGG NNalign 12726.0 12743.9 1.001 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 12726.0 12743.9 1.001 12743.9 12726.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 15 4 PLLPLLLLLGASGGG LLPLLLLLLGASGGG NNalign 17866.0 18241.2 1.021 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 17866.0 18241.2 1.021 18241.2 17866.0 @@ -249,7 +248,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 3 15 QTSVNSATLSRTLLL QTSVNSATLSRTLLA NNalign 2031.7 1883.7 0.927 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 2031.7 1883.7 0.927 1883.7 2031.7 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 4 14 TSVNSATLSRTLLLA TSVNSATLSRTLLAA NNalign 2220.7 1899.8 0.855 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 2220.7 1899.8 0.855 1899.8 2220.7 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 5 13 SVNSATLSRTLLLAA SVNSATLSRTLLAAA NNalign 3501.4 2557.4 0.730 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 3501.4 2557.4 0.730 2557.4 3501.4 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 17 1 LAAAGGSSLQTQRGL NA NNalign 4014.6 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 4014.6 NA NA NA 4014.6 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 10 8 TLSRTLLLAAAGGSS ATLSRTLLAAAGGSS NNalign 4161.8 3364.4 0.808 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 4161.8 3364.4 0.808 3364.4 4161.8 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 6 12 VNSATLSRTLLLAAA VNSATLSRTLLAAAG NNalign 4502.9 2927.7 0.650 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 4502.9 2927.7 0.650 2927.7 4502.9 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 9 9 ATLSRTLLLAAAGGS ATLSRTLLAAAGGSS NNalign 6472.4 3364.4 0.520 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 6472.4 3364.4 0.520 3364.4 6472.4 diff --git a/tests/test_data/pvacseq/MHC_Class_II/tmp/Test.H2-IAb.parsed.tsv_1-48 b/tests/test_data/pvacseq/MHC_Class_II/tmp/Test.H2-IAb.parsed.tsv_1-48 index 6aa873c..bd0f48c 100644 --- a/tests/test_data/pvacseq/MHC_Class_II/tmp/Test.H2-IAb.parsed.tsv_1-48 +++ b/tests/test_data/pvacseq/MHC_Class_II/tmp/Test.H2-IAb.parsed.tsv_1-48 @@ -129,8 +129,7 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 11 8 PPLLPLLPLLLLLGA NA NNalign 3519.2 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 3519.2 NA NA NA 3519.2 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 12 7 PLLPLLPLLLLLGAS PPPLLPLLLLLLGAS NNalign 4769.1 18984.2 3.981 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 4769.1 18984.2 3.981 18984.2 4769.1 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 13 6 LLPLLPLLLLLGASG PPLLPLLLLLLGASG NNalign 8015.7 30110.3 3.756 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 8015.7 30110.3 3.756 30110.3 8015.7 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 19 0 LLLLLGASGGGGGAR NA NNalign 10076.1 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 10076.1 NA NA NA 10076.1 -2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 18 1 PLLLLLGASGGGGGA NA NNalign 10810.0 NA NA 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 10810.0 NA NA NA 10810.0 +2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 18 1 PLLLLLGASGGGGGA LLLLLLGASGGGGGA NNalign 10810.0 10735.4 0.993 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 10810.0 10735.4 0.993 10735.4 10810.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 17 2 LPLLLLLGASGGGGG PLLLLLLGASGGGGG NNalign 11744.6 11744.1 1.000 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 11744.6 11744.1 1.000 11744.1 11744.6 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 16 3 LLPLLLLLGASGGGG LPLLLLLLGASGGGG NNalign 12726.0 12743.9 1.001 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 12726.0 12743.9 1.001 12743.9 12726.0 2 217498305 217498305 T TGCTGCC ENST00000233809 ENSG00000115457 inframe_ins L/LLP 20 IGFBP2 H2-IAb 15 15 4 PLLPLLLLLGASGGG LLPLLLLLLGASGGG NNalign 17866.0 18241.2 1.021 123 2.439024191949253 NA NA NA NA 10.2102 9.71946 17866.0 18241.2 1.021 18241.2 17866.0 @@ -249,7 +248,6 @@ Chromosome Start Stop Reference Variant Transcript Ensembl Gene ID Variant Type 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 3 15 QTSVNSATLSRTLLL QTSVNSATLSRTLLA NNalign 2031.7 1883.7 0.927 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 2031.7 1883.7 0.927 1883.7 2031.7 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 4 14 TSVNSATLSRTLLLA TSVNSATLSRTLLAA NNalign 2220.7 1899.8 0.855 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 2220.7 1899.8 0.855 1899.8 2220.7 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 5 13 SVNSATLSRTLLLAA SVNSATLSRTLLAAA NNalign 3501.4 2557.4 0.730 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 3501.4 2557.4 0.730 2557.4 3501.4 -6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 17 1 LAAAGGSSLQTQRGL NA NNalign 4014.6 NA NA 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 4014.6 NA NA NA 4014.6 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 10 8 TLSRTLLLAAAGGSS ATLSRTLLAAAGGSS NNalign 4161.8 3364.4 0.808 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 4161.8 3364.4 0.808 3364.4 4161.8 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 6 12 VNSATLSRTLLLAAA VNSATLSRTLLAAAG NNalign 4502.9 2927.7 0.650 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 4502.9 2927.7 0.650 2927.7 4502.9 6 41754573 41754573 C CCTT ENST00000458694 ENSG00000124593 inframe_ins -/L 287-288 PRICKLE4 H2-IAb 15 9 9 ATLSRTLLLAAAGGS ATLSRTLLAAAGGSS NNalign 6472.4 3364.4 0.520 74 70.27026077428908 NA NA NA NA 0.802702 5.06467e-12 6472.4 3364.4 0.520 3364.4 6472.4 diff --git a/tests/test_parse_output.py b/tests/test_parse_output.py index 4c16406..e4729ea 100644 --- a/tests/test_parse_output.py +++ b/tests/test_parse_output.py @@ -82,7 +82,7 @@ def test_parse_output_runs_and_produces_expected_output_with_multiple_iedb_files expected_output_file = os.path.join(self.test_data_dir, "output_Test_21.iedb.parsed.top.tsv") self.assertTrue(cmp(parse_output_output_file.name, expected_output_file)) - def test_parse_output_runs_and_produces_expected_output_for_indel_at_beginning_of_sequence(self): + def test_parse_output_runs_and_produces_expected_output_for_repetitive_deletion_at_beginning_of_sequence(self): parse_output_input_iedb_file = os.path.join(self.test_data_dir, "pat27_4.ann.HLA-A*02:01.9.tsv") parse_output_input_tsv_file = os.path.join(self.test_data_dir, "pat27_4.tsv") parse_output_key_file = os.path.join(self.test_data_dir, "pat27_4_18.fa.key") @@ -100,6 +100,24 @@ def test_parse_output_runs_and_produces_expected_output_for_indel_at_beginning_o expected_output_file = os.path.join(self.test_data_dir, "output_pat27_4_18.iedb.parsed.tsv") self.assertTrue(cmp(parse_output_output_file.name, expected_output_file)) + def test_parse_output_runs_and_produces_expected_output_for_repetitive_insertion_at_beginning_of_sequence(self): + parse_output_input_iedb_file = os.path.join(self.test_data_dir, "pat126.ann.HLA-A*01:01.9.tsv") + parse_output_input_tsv_file = os.path.join(self.test_data_dir, "pat126.tsv") + parse_output_key_file = os.path.join(self.test_data_dir, "pat126_17.fa.key") + parse_output_output_file = tempfile.NamedTemporaryFile() + + self.assertFalse(call([ + self.python, + self.executable, + parse_output_input_iedb_file, + parse_output_input_tsv_file, + parse_output_key_file, + parse_output_output_file.name, + ], shell=False)) + + expected_output_file = os.path.join(self.test_data_dir, "output_pat126_17.iedb.parsed.tsv") + self.assertTrue(cmp(parse_output_output_file.name, expected_output_file)) + def test_input_frameshift_variant_feature_elongation_gets_parsed_correctly(self): parse_output_input_iedb_file = os.path.join(self.test_data_dir, "input_frameshift_variant_feature_elongation.ann.HLA-A*29:02.9.tsv") parse_output_input_tsv_file = os.path.join(self.test_data_dir, "input_frameshift_variant_feature_elongation.tsv") From 85cce57d4c0702639d0499fe9a8cbe5da31aa22c Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Tue, 15 Nov 2016 11:39:32 -0600 Subject: [PATCH 3/7] If the current row is the last row in the input tsv, don't do anymore processing --- pvacseq/lib/pipeline.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pvacseq/lib/pipeline.py b/pvacseq/lib/pipeline.py index 59706d1..777efd7 100644 --- a/pvacseq/lib/pipeline.py +++ b/pvacseq/lib/pipeline.py @@ -121,6 +121,8 @@ def split_tsv_file(self, total_row_count): for row in reader: if skip == 0: split_tsv_writer.writerow(row) + if row_count == total_row_count: + break if row_count % tsv_size == 0: if skip == 0: split_tsv_file.close() From 5639a41cea031bda39970f09bb7724d08cf43726 Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Tue, 15 Nov 2016 11:39:51 -0600 Subject: [PATCH 4/7] Fix some output formatting --- pvacseq/lib/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvacseq/lib/pipeline.py b/pvacseq/lib/pipeline.py index 777efd7..81eb61a 100644 --- a/pvacseq/lib/pipeline.py +++ b/pvacseq/lib/pipeline.py @@ -292,7 +292,7 @@ def execute(self): status_message( "\n" - + "Done: pvacseq has completed. File %s contains list of filtered putative neoantigens" % self.final_path() + + "Done: pvacseq has completed. File %s contains list of filtered putative neoantigens. " % self.final_path() + "We recommend appending coverage information and running `pvacseq coverage_filter` to filter based on sequencing coverage information" ) if self.keep_tmp_files is False: From fed101f9a3ad2c2c54b4c1a84e9252af75d6a206 Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Tue, 29 Nov 2016 15:35:31 -0600 Subject: [PATCH 5/7] Only check the sample genotype if the VCF contains a sample --- pvacseq/lib/convert_vcf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pvacseq/lib/convert_vcf.py b/pvacseq/lib/convert_vcf.py index 2891ee5..d8aad25 100644 --- a/pvacseq/lib/convert_vcf.py +++ b/pvacseq/lib/convert_vcf.py @@ -186,10 +186,11 @@ def main(args_input = sys.argv[1:]): reference = entry.REF alts = entry.ALT - genotype = entry.genotype(vcf_reader.samples[0]) - if genotype.gt_type is None or genotype.gt_type == 0: - #The genotype is uncalled or hom_ref - continue + if len(vcf_reader.samples) == 1: + genotype = entry.genotype(vcf_reader.samples[0]) + if genotype.gt_type is None or genotype.gt_type == 0: + #The genotype is uncalled or hom_ref + continue alleles_dict = resolve_alleles(entry) for alt in alts: From 3200877f4e4c8a9c3aa73a9f3f595b2214ac208d Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Wed, 30 Nov 2016 14:33:59 -0600 Subject: [PATCH 6/7] Handle variants that are start lost --- pvacseq/lib/convert_vcf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pvacseq/lib/convert_vcf.py b/pvacseq/lib/convert_vcf.py index 2891ee5..48cc4f1 100644 --- a/pvacseq/lib/convert_vcf.py +++ b/pvacseq/lib/convert_vcf.py @@ -87,7 +87,9 @@ def parse_csq_entries_for_allele(csq_entries, csq_format, csq_allele): def resolve_consequence(consequence_string): consequences = {consequence.lower() for consequence in consequence_string.split('&')} - if 'frameshift_variant' in consequences: + if 'start_lost' in consequences: + consequence = None + elif 'frameshift_variant' in consequences: consequence = 'FS' elif 'missense_variant' in consequences: consequence = 'missense' From 3b6ceafe919a4e9b497b6bfb42077ca18821b9dd Mon Sep 17 00:00:00 2001 From: Susanna Kiwala Date: Thu, 1 Dec 2016 13:39:42 -0600 Subject: [PATCH 7/7] Update to version 4.0.4 --- docs/conf.py | 4 ++-- docs/index.rst | 7 +------ setup.py | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 100d974..52b0e04 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -62,9 +62,9 @@ # built documents. # # The short X.Y version. -version = '4.0.3' +version = '4.0.4' # The full version, including alpha/beta/rc tags. -release = '4.0.3' +release = '4.0.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index e9f1a0e..ea0cbaf 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,12 +23,7 @@ pVAC-Seq is a cancer immunotherapy pipeline for the identification of **p**\ ers New in version |version| ------------------------ -We added an :ref:`optional downstream analysis tool` to generate an annotated fasta file from a VCF with protein sequences of mutations and matching wildtypes. This tool can be run with the ``pvacseq generate_protein_fasta`` command. - -This release fixes a couple of errors that were introduced in the previous version which would occur during the processing of certain inframe indels. - -This version also fixes an error that would occur if the number of variants to -process was a multiple of the chosen ``--fasta-size``. +This release fixes a couple of minor bugs. Firstly, the pipeline will now skip variants that result in the loss of a start codon. Secondly, this release fixes a bug that would result in an error when the input VCF doesn't contain any sample genotype information. VCFs with no samples will now be fully processed through the pipeline. Citation -------- diff --git a/setup.py b/setup.py index 239a029..c246fe4 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ setup( name="pvacseq", - version="4.0.3", + version="4.0.4", packages=["pvacseq", "pvacseq.lib", "pvacseq.server"], entry_points={ "console_scripts":[