Skip to content

Commit

Permalink
WIP for transcripts in annotation
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Dec 7, 2023
1 parent 2d14a3e commit db26e08
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 7 deletions.
3 changes: 2 additions & 1 deletion bin/evaluation/map_genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def load_clinvar_data(clinvar_xml):
def main(clinvar_xml, output_file):
"""Load ClinVar XML, map to Ensembl gene IDs, and dump results to TSV."""
variants = load_clinvar_data(clinvar_xml)
annotated_variants = annotate_ensembl_gene_info(variants)
# Don't include transcripts for evaluation
annotated_variants = annotate_ensembl_gene_info(variants, include_transcripts=False)
annotated_variants[['RCVaccession', 'EnsemblGeneID']].to_csv(output_file, sep='\t', index=False, header=False)


Expand Down
1 change: 1 addition & 0 deletions cmat/output_generation/annotated_clinvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def annotate(self, record):
self.overall_counts['both_measure_and_trait'] += 1

def annotate_and_count_measure(self, record):
# TODO include transcript if present in variant_to_gene_mappings
consequence_types, variant_category = get_consequence_types(record.measure, self.variant_to_gene_mappings)
record.measure.add_ensembl_annotations(consequence_types)

Expand Down
1 change: 1 addition & 0 deletions cmat/output_generation/consequence_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def process_gene(consequence_type_dict, variant_id, ensembl_gene_id, so_term):


def process_consequence_type_file(snp_2_gene_file, consequence_type_dict=None):
# TODO adapt for transcripts if present
"""
Return a dictionary of consequence information extracted from the given file.
If consequence_type_dict is provided then the information will be merge into this dictionary.
Expand Down
17 changes: 11 additions & 6 deletions pipelines/annotation_pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ def helpMessage() {
Generate ClinVar evidence strings for Open Targets, or annotated ClinVar XML.
Params:
--output_dir Directory for output
--schema Open Targets JSON schema version (optional, will output XML if omitted)
--clinvar ClinVar XML file (optional, will download latest if omitted)
--mappings Trait mappings file (optional, will use a default path if omitted)
--evaluate Whether to run evaluation or not (default false)
--output_dir Directory for output
--schema Open Targets JSON schema version (optional, will output XML if omitted)
--clinvar ClinVar XML file (optional, will download latest if omitted)
--mappings Trait mappings file (optional, will use a default path if omitted)
--include_transcripts Whether to include transcripts in consequences (default false)
--evaluate Whether to run evaluation or not (default false)
"""
}

Expand All @@ -21,6 +22,7 @@ params.output_dir = null
params.schema = null
params.clinvar = null
params.mappings = '${BATCH_ROOT_BASE}/manual_curation/latest_mappings.tsv'
params.include_transcripts = false
params.evaluate = false

if (params.help) {
Expand All @@ -31,7 +33,7 @@ if (!params.output_dir) {
}
batchRoot = params.output_dir
codeRoot = "${projectDir}/.."

includeTranscriptsFlag = params.include_transcripts ? "--include-transcripts" : ""

/*
* Main workflow.
Expand Down Expand Up @@ -135,6 +137,7 @@ process runSnpIndel {
-N 200 `# Number of records (lines) per worker` \
--tmpdir . `# Store temporary files in the current directory to avoid /tmp overflow` \
\${PYTHON_BIN} "${codeRoot}/cmat/consequence_prediction/snp_indel_variants/pipeline.py" \
${includeTranscriptsFlag} \
| sort -u > consequences_snp.tsv
"""
}
Expand All @@ -161,6 +164,7 @@ process runRepeat {
"""
\${PYTHON_BIN} ${codeRoot}/bin/consequence_prediction/run_repeat_expansion_variants.py \
--clinvar-xml ${clinvarXml} \
${includeTranscriptsFlag} \
--output-consequences consequences_repeat.tsv
# create an empty file if nothing generated
Expand Down Expand Up @@ -191,6 +195,7 @@ process runStructural {
"""
\${PYTHON_BIN} ${codeRoot}/bin/consequence_prediction/run_structural_variants.py \
--clinvar-xml ${clinvarXml} \
${includeTranscriptsFlag} \
--output-consequences consequences_structural.tsv
# create an empty file if nothing generated
Expand Down

0 comments on commit db26e08

Please sign in to comment.