From 9d4597ecd41d1717064130e2be120911627a9d20 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 10 Sep 2024 18:50:49 -0700 Subject: [PATCH] Add emerging subclades to nextflu-private builds Adds rules and configs to annotate emerging subclades to nextflu-private builds, paving the way for any builds that define the `emerging_subclade` path in their config to use the same clade definitions. Most of these emerging subclades map directly to the main branch in the corresponding influenza nomenclature repos. H3N2 HA is an exception where the definitions come from a branch called `emerging`. --- Snakefile | 15 +++++ profiles/nextflu-private.yaml | 3 + .../h1n1pdm/ha/auspice_config.json | 63 +++++++++++++++++++ .../h3n2/ha/auspice_config.json | 41 +++++++++++- .../vic/ha/auspice_config.json | 35 +++++++++++ workflow/snakemake_rules/core.smk | 37 +++++++++++ workflow/snakemake_rules/export.smk | 3 + 7 files changed, 196 insertions(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index ab19f5e8..a7bfa7eb 100644 --- a/Snakefile +++ b/Snakefile @@ -48,6 +48,21 @@ subclade_url_by_lineage_and_segment = { } } +emerging_subclade_url_by_lineage_and_segment = { + "h1n1pdm": { + "ha": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H1N1pdm_HA/main/.auto-generated/subclades.tsv", + "na": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H1N1pdm_NA/main/.auto-generated/subclades.tsv", + }, + "h3n2": { + "ha": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H3N2_HA/emerging/.auto-generated/subclades.tsv", + "na": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_A-H3N2_NA/main/.auto-generated/subclades.tsv", + }, + "vic": { + "ha": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_B-Vic_HA/main/.auto-generated/subclades.tsv", + "na": "https://raw.githubusercontent.com/influenza-clade-nomenclature/seasonal_B-Vic_NA/main/.auto-generated/subclades.tsv", + } +} + if "data_source" in config and config["data_source"]=='fauna': include: "workflow/snakemake_rules/download_from_fauna.smk" diff --git a/profiles/nextflu-private.yaml b/profiles/nextflu-private.yaml index 7fb770b0..4f4b10d0 100644 --- a/profiles/nextflu-private.yaml +++ b/profiles/nextflu-private.yaml @@ -56,6 +56,7 @@ builds: tree_exclude_sites: "config/h1n1pdm/{segment}/exclude-sites.txt" clades: "config/h1n1pdm/ha/clades.tsv" subclades: "config/h1n1pdm/{segment}/subclades.tsv" + emerging_subclades: "config/h1n1pdm/{segment}/emerging_subclades.tsv" auspice_config: "profiles/nextflu-private/h1n1pdm/{segment}/auspice_config.json" min_date: "2Y" reference_min_date: "6Y" @@ -104,6 +105,7 @@ builds: tree_exclude_sites: "config/h3n2/{segment}/exclude-sites.txt" clades: "config/h3n2/ha/clades.tsv" subclades: "config/h3n2/{segment}/subclades.tsv" + emerging_subclades: "config/h3n2/{segment}/emerging_subclades.tsv" auspice_config: "profiles/nextflu-private/h3n2/{segment}/auspice_config.json" vaccines: "config/h3n2/vaccine.json" enable_glycosylation: true @@ -149,6 +151,7 @@ builds: tree_exclude_sites: "config/vic/{segment}/exclude-sites.txt" clades: "profiles/nextflu-private/vic/ha/clades.tsv" subclades: "config/vic/{segment}/subclades.tsv" + emerging_subclades: "config/h3n2/{segment}/emerging_subclades.tsv" auspice_config: "profiles/nextflu-private/vic/{segment}/auspice_config.json" min_date: "2Y" reference_min_date: "6Y" diff --git a/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json b/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json index 4a2b7e13..005106c9 100644 --- a/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json +++ b/profiles/nextflu-private/h1n1pdm/ha/auspice_config.json @@ -129,6 +129,69 @@ ] ] }, + { + "key": "emerging_subclade", + "title": "Emerging subclade", + "type": "categorical", + "scale": [ + [ + "C.1", + "#492AB5" + ], + [ + "C.1.1", + "#3F4CCB" + ], + [ + "C.1.2", + "#4271CE" + ], + [ + "C.1.5", + "#4C8FC0" + ], + [ + "C.1.7", + "#5AA5A8" + ], + [ + "C.1.7.1", + "#6DB38A" + ], + [ + "C.1.7.2", + "#85BA6F" + ], + [ + "C.1.8", + "#A0BE59" + ], + [ + "C.1.9", + "#BBBC49" + ], + [ + "D", + "#D2B340" + ], + [ + "D.1", + "#E19F3A" + ], + [ + "D.2", + "#E68033" + ], + [ + "D.3", + "#E2562B" + ], + [ + "D.4", + "#DB2823" + ] + ] + }, { "key": "haplotype", "title": "Derived haplotype", diff --git a/profiles/nextflu-private/h3n2/ha/auspice_config.json b/profiles/nextflu-private/h3n2/ha/auspice_config.json index 46e1707c..84abe5c1 100644 --- a/profiles/nextflu-private/h3n2/ha/auspice_config.json +++ b/profiles/nextflu-private/h3n2/ha/auspice_config.json @@ -141,6 +141,45 @@ ] ] }, + { + "key": "emerging_subclade", + "title": "Emerging subclade", + "type": "categorical", + "scale": [ + [ + "J", + "#4068CF" + ], + [ + "J.1", + "#5098B9" + ], + [ + "J.1.1", + "#6CB28C" + ], + [ + "J.2", + "#94BD62" + ], + [ + "J.2.1", + "#BFBB47" + ], + [ + "J.2.2", + "#DFA53B" + ], + [ + "J.3", + "#E67131" + ], + [ + "J.4", + "#DB2823" + ] + ] + }, { "key": "haplotype", "title": "Derived haplotype", @@ -555,4 +594,4 @@ "entropy", "frequencies" ] -} +} \ No newline at end of file diff --git a/profiles/nextflu-private/vic/ha/auspice_config.json b/profiles/nextflu-private/vic/ha/auspice_config.json index b9884b3c..ea047f20 100644 --- a/profiles/nextflu-private/vic/ha/auspice_config.json +++ b/profiles/nextflu-private/vic/ha/auspice_config.json @@ -79,6 +79,41 @@ ] ] }, + { + "key": "emerging_subclade", + "title": "Emerging subclade", + "type": "categorical", + "scale": [ + [ + "C.2", + "#4272CE" + ], + [ + "C.3", + "#58A2AC" + ], + [ + "C.5", + "#7DB877" + ], + [ + "C.5.1", + "#AEBD50" + ], + [ + "C.5.4", + "#D8AE3E" + ], + [ + "C.5.6", + "#E67A32" + ], + [ + "C.5.7", + "#DB2823" + ] + ] + }, { "key": "haplotype", "title": "Derived haplotype", diff --git a/workflow/snakemake_rules/core.smk b/workflow/snakemake_rules/core.smk index 1efb4e4d..37bec68d 100644 --- a/workflow/snakemake_rules/core.smk +++ b/workflow/snakemake_rules/core.smk @@ -420,6 +420,43 @@ rule import_clades: --output {output.node_data} 2>&1 | tee {log} """ +rule download_emerging_subclades: + output: + subclades="config/{lineage}/{segment}/emerging_subclades.tsv", + conda: "../envs/nextstrain.yaml" + params: + url=lambda wildcards: emerging_subclade_url_by_lineage_and_segment.get(wildcards.lineage, {}).get(wildcards.segment), + shell: + """ + curl -o {output.subclades} "{params.url}" + """ + +rule emerging_subclades: + input: + tree = build_dir + "/{build_name}/{segment}/tree.nwk", + muts = build_dir + "/{build_name}/{segment}/muts.json", + clades = lambda wildcards: config["builds"][wildcards.build_name].get("emerging_subclades"), + output: + node_data = build_dir + "/{build_name}/{segment}/emerging_subclades.json", + params: + membership_name = "emerging_subclade", + label_name = "Emerging subclade", + conda: "../envs/nextstrain.yaml" + benchmark: + "benchmarks/emerging_subclades_{build_name}_{segment}.txt" + log: + "logs/emerging_subclades_{build_name}_{segment}.txt" + shell: + """ + augur clades \ + --tree {input.tree} \ + --mutations {input.muts} \ + --clades {input.clades} \ + --membership-name {params.membership_name} \ + --label-name {params.label_name:q} \ + --output {output.node_data} 2>&1 | tee {log} + """ + rule annotate_haplotypes: input: tree=build_dir + "/{build_name}/ha/tree.nwk", diff --git a/workflow/snakemake_rules/export.smk b/workflow/snakemake_rules/export.smk index e5d5391f..b9088382 100644 --- a/workflow/snakemake_rules/export.smk +++ b/workflow/snakemake_rules/export.smk @@ -23,6 +23,9 @@ def _get_node_data_by_wildcards(wildcards): if config["builds"][wildcards.build_name].get('subclades', False): inputs.append(rules.subclades.output.node_data) + if config["builds"][wildcards.build_name].get('emerging_subclades', False): + inputs.append(rules.emerging_subclades.output.node_data) + if config["builds"][wildcards.build_name].get('enable_titer_models', False) and wildcards.segment == 'ha': for collection in config["builds"][wildcards.build_name]["titer_collections"]: inputs.append(rules.titers_sub.output.titers_model.format(titer_collection=collection["name"], **wildcards_dict))