Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/hotfix'
Browse files Browse the repository at this point in the history
  • Loading branch information
susannasiebert committed Jul 14, 2023
2 parents 05c9c5c + 86ecee5 commit e2f0ea5
Show file tree
Hide file tree
Showing 23 changed files with 166,424 additions and 69,560 deletions.
Binary file added HCC1395_inputs.zip
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
# The short X.Y version.
version = '4.0'
# The full version, including alpha/beta/rc tags.
release = '4.0.0'
release = '4.0.1'


# The language for content autogenerated by Sphinx. Refer to documentation
Expand Down
11 changes: 11 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ Contents
contact
mailing_list

New in Release |release|
------------------------

This is a bugfix release. It fixes the following problem(s):

- It fixes errors for a few edge cases when determining the mutation
position(s).
- Update the HCC1395 demo date for pVACview to include elution data.
- Correctly set NA columns in pVACview export dataframe.
- Handle Arriba files with empty peptide_sequence fields.

New in Version |version|
------------------------

Expand Down
11 changes: 11 additions & 0 deletions docs/releases/4_0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,14 @@ _____________
the filtered report.
- A new parameter ``--aggregate-inclusion-binding-threshold`` controls which
epitope candidates are included in the aggregate report.

New in Version 4.0.1
--------------------

This is a bugfix release. It fixes the following problem(s):

- It fixes errors for a few edge cases when determining the mutation
position(s).
- Update the HCC1395 demo date for pVACview to include elution data.
- Correctly set NA columns in pVACview export dataframe.
- Handle Arriba files with empty peptide_sequence fields.
28 changes: 26 additions & 2 deletions pvactools/lib/calculate_reference_proteome_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,16 @@ def _get_peptide(self, line, mt_records_dict, wt_records_dict):
epitope = line['Best Peptide']
(full_peptide, wt_peptide, variant_type, mt_amino_acids, wt_amino_acids) = self._get_full_peptide(line, mt_records_dict, wt_records_dict)
if variant_type != 'FS':
mt_pos = int(line['Pos'].split('-')[0])
if line['Pos'] == 'NA':
mt_pos = None
for i,(wt_aa,mt_aa) in enumerate(zip(wt_peptide,full_peptide)):
if wt_aa != mt_aa:
mt_pos = i
break
if mt_pos is None:
return None, full_peptide
else:
mt_pos = int(line['Pos'].split('-')[0])
else:
epitope = line['MT Epitope Seq']
full_peptide = mt_records_dict[line['Index']]
Expand Down Expand Up @@ -453,6 +462,20 @@ def _write_outputs(self, processed_peptides, mt_records_dict, wt_records_dict):
peptide, full_peptide = self._get_peptide(line, mt_records_dict, wt_records_dict)

if self.peptide_fasta:
if peptide is None:
if self._input_tsv_type(line) == 'aggregated':
line['Ref Match'] = 'Not Run'
if self.aggregate_metrics_file:
self.aggregate_metrics[line['ID']]['reference_matches'] = {
'count': 0,
'query_peptide': peptide,
'matches': []
}
else:
line['Reference Match'] = 'Not Run'
writer.writerow(line)
continue

results = processed_peptides[peptide]
else:
results = processed_peptides[full_peptide]
Expand Down Expand Up @@ -553,7 +576,8 @@ def _get_unique_peptides(self, mt_records_dict, wt_records_dict):
for line in reader:
peptide, full_peptide = self._get_peptide(line, mt_records_dict, wt_records_dict)
if self.peptide_fasta:
unique_peptides.add(peptide)
if peptide is not None:
unique_peptides.add(peptide)
else:
unique_peptides.add(full_peptide)

Expand Down
2 changes: 1 addition & 1 deletion pvactools/lib/input_file_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def parse_arriba_file(self, starfusion_entries):
for record in reader:
(five_prime_chr, five_prime_start) = record['breakpoint1'].split(':')
(three_prime_chr, three_prime_start) = record['breakpoint2'].split(':')
if record['peptide_sequence'] == '.':
if record['peptide_sequence'] == '.' or record['peptide_sequence'] is None or record['peptide_sequence'] == "":
continue
(fusion_position, fusion_amino_acid_sequence) = self.determine_fusion_sequence(record['peptide_sequence'], '|')
gene_name = "{}-{}".format(record['#gene1'], record['gene2'])
Expand Down
13 changes: 8 additions & 5 deletions pvactools/lib/output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def match_wildtype_and_mutant_entry_for_frameshift(self, result, mt_position, wt
result['wt_percentiles'] = self.format_match_na(result, 'percentile')
mutation_position = self.find_mutation_position(wt_epitope_seq, mt_epitope_seq)
if mutation_position == peptide_length:
result['mutation_position'] = mutation_position
result['mutation_position'] = '{}'.format(mutation_position)
else:
result['mutation_position'] = '{}-{}'.format(mutation_position, peptide_length)
result['wt_epitope_position'] = match_position
Expand All @@ -277,7 +277,7 @@ def match_wildtype_and_mutant_entry_for_inframe_indel(self, result, mt_position,
best_match_position = previous_result['wt_epitope_position'] + 1
result['wt_epitope_position'] = best_match_position
result['match_direction'] = 'right'
result['mutation_position'] = self.determine_ins_mut_position_from_previous_result(previous_result, mt_epitope_seq, result)
result['mutation_position'] = self.determine_ins_mut_position_from_previous_result(previous_result, mt_epitope_seq, result)

#We need to ensure that the matched WT eptiope has enough overlapping amino acids with the MT epitope
best_match_wt_result = wt_results[str(best_match_position)]
Expand Down Expand Up @@ -307,7 +307,10 @@ def match_wildtype_and_mutant_entry_for_inframe_indel(self, result, mt_position,
result['wt_percentiles'] = self.format_match_na(result, 'percentile')
#We then infer the mutation position and match direction from the previous MT epitope
result['match_direction'] = previous_result['match_direction']
result['mutation_position'] = self.determine_ins_mut_position_from_previous_result(previous_result, mt_epitope_seq, result)
if previous_result['mutation_position'] == 'NA' or previous_result['mutation_position'] == '1':
result['mutation_position'] = 'NA'
else:
result['mutation_position'] = self.determine_ins_mut_position_from_previous_result(previous_result, mt_epitope_seq, result)
return

baseline_best_match_wt_result = wt_results[baseline_best_match_position]
Expand Down Expand Up @@ -368,12 +371,12 @@ def match_wildtype_and_mutant_entry_for_inframe_indel(self, result, mt_position,
if result['variant_type'] == 'inframe_ins':
mutation_position = self.find_ins_mut_position(baseline_best_match_wt_epitope_seq, mt_epitope_seq, result['amino_acid_change'], match_direction)
if mutation_position is None:
result['mutation_position'] = None
result['mutation_position'] = 'NA'
else:
if previous_result is None:
result['mutation_position'] = '{}-{}'.format(mutation_position[0], mutation_position[1]) if len(mutation_position)==2 else '{}'.format(mutation_position[0])
else:
if previous_result['mutation_position'] is None:
if previous_result['mutation_position'] == 'NA':
result['mutation_position'] = '{}-{}'.format(mutation_position[0], mutation_position[1]) if len(mutation_position)==2 else '{}'.format(mutation_position[0])
else:
result['mutation_position'] = self.determine_ins_mut_position_from_previous_result(previous_result, mt_epitope_seq, result)
Expand Down
46 changes: 25 additions & 21 deletions pvactools/lib/prediction_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
from Bio import SeqIO
import random
import uuid
from mhcflurry.downloads import get_default_class1_presentation_models_dir
from mhcflurry.class1_presentation_predictor import Class1PresentationPredictor
import numpy

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

class IEDB(metaclass=ABCMeta):
@classmethod
Expand Down Expand Up @@ -318,29 +323,28 @@ def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb

all_epitopes = list(set(all_epitopes))
if len(all_epitopes) > 0:
tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
arguments = ["mhcflurry-predict", "--alleles", allele, "--out", tmp_output_file.name, "--peptides"]
arguments.extend(all_epitopes)
stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
try:
response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
except:
stderr_fh.close()
with open(stderr_fh.name, 'r') as fh:
err = fh.read()
os.unlink(stderr_fh.name)
raise Exception("An error occurred while calling MHCflurry:\n{}".format(err))
stderr_fh.close()
os.unlink(stderr_fh.name)
tmp_output_file.close()
df = pd.read_csv(tmp_output_file.name)
os.unlink(tmp_output_file.name)
models_dir = get_default_class1_presentation_models_dir(test_exists=True)
predictor = Class1PresentationPredictor.load(models_dir)
df = predictor.predict(
peptides=numpy.array(all_epitopes, dtype='object'),
n_flanks=None,
c_flanks=None,
alleles={allele: [allele]},
throw=True,
include_affinity_percentile=True,
verbose=0
)
df.rename(columns={
'mhcflurry_prediction': 'ic50',
'mhcflurry_affinity': 'ic50',
'mhcflurry_prediction_percentile': 'percentile',
'mhcflurry_affinity_percentile': 'percentile'
'prediction': 'ic50',
'affinity': 'ic50',
'prediction_percentile': 'percentile',
'affinity_percentile': 'percentile',
'processing_score': 'mhcflurry_processing_score',
'presentation_score': 'mhcflurry_presentation_score',
'presentation_percentile': 'mhcflurry_presentation_percentile',
'best_allele': 'allele',
}, inplace=True)
df.drop(labels='peptide_num', axis=1, inplace=True)
for record in SeqIO.parse(input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
Expand Down
2 changes: 1 addition & 1 deletion pvactools/lib/run_argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def __init__(self):
tool_name = "pvacseq"
input_file_help = (
"A VEP-annotated single- or multi-sample VCF containing genotype, transcript, "
"Wildtype protein sequence, and Downstream protein sequence information."
"Wildtype protein sequence, and Frameshift protein sequence information."
"The VCF may be gzipped (requires tabix index)."
)
PredictionRunWithFastaGenerationArgumentParser.__init__(self, tool_name, input_file_help)
Expand Down
2 changes: 1 addition & 1 deletion pvactools/tools/pvacseq/generate_protein_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def define_parser():
parser.add_argument(
"input_vcf",
help="A VEP-annotated single- or multi-sample VCF containing genotype, transcript, "
+"Wildtype protein sequence, and Downstream protein sequence information."
+"Wildtype protein sequence, and Frameshift protein sequence information."
+"The VCF may be gzipped (requires tabix index)."
)
parser.add_argument(
Expand Down
Loading

0 comments on commit e2f0ea5

Please sign in to comment.