Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add measurements panel for Welsh et al. escape scores #150

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
"outputs": [],
"source": [
"import json\n",
"import pandas as pd"
"import pandas as pd\n",
"from pathlib import Path"
]
},
{
Expand Down Expand Up @@ -552,10 +553,93 @@
" )"
]
},
{
"cell_type": "markdown",
"id": "935dd658-3b21-4399-a104-b564d43b116a",
"metadata": {},
"source": [
"## Create distance map per serum sample"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38ad1e18-0459-45db-806f-534213290614",
"metadata": {},
"outputs": [],
"source": [
"output_dir = Path(\"welsh_escape_by_site_and_amino_acid_by_serum\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4aff470-c3e1-4ebb-8768-ec42f4bbe006",
"metadata": {},
"outputs": [],
"source": [
"output_dir.mkdir(exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "486de66c-1d23-44a9-83e2-0e65b8917ca7",
"metadata": {},
"outputs": [],
"source": [
"for (serum, cohort), serum_scores in nonnegative_ha1_escape_scores.groupby([\"serum\", \"cohort\"]):\n",
" serum_escape_records = serum_scores[[\n",
" \"site\",\n",
" \"wildtype\",\n",
" \"mutant\",\n",
" \"escape_mean\",\n",
" ]].to_dict(orient=\"records\")\n",
" \n",
" distance_map = {\n",
" \"name\": f\"Welsh et al. escape scores per site and amino acid for sample '{serum}' in cohort '{cohort}'\",\n",
" \"serum\": serum,\n",
" \"cohort\": cohort,\n",
" \"cohort_serum\": f\"{cohort}/{serum}\",\n",
" \"default\": 0,\n",
" \"map\": {\n",
" \"HA1\": {}\n",
" }\n",
" }\n",
" \n",
" for record in serum_escape_records:\n",
" site = str(record[\"site\"])\n",
" if str(site) not in distance_map[\"map\"][\"HA1\"]:\n",
" distance_map[\"map\"][\"HA1\"][str(site)] = []\n",
"\n",
" distance_map[\"map\"][\"HA1\"][str(site)].append({\n",
" \"from\": record[\"wildtype\"],\n",
" \"to\": record[\"mutant\"],\n",
" \"weight\": round(record[\"escape_mean\"], 6),\n",
" })\n",
" \n",
" with open(output_dir / f\"{serum}.json\", \"w\") as oh:\n",
" json.dump(\n",
" distance_map,\n",
" oh,\n",
" indent=2,\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "31a4b0cb-0a24-408d-a020-9e3941dfcd42",
"metadata": {},
"outputs": [],
"source": [
"!ls -l welsh_escape_by_site_and_amino_acid_by_serum/"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10bb367b-ad31-43ca-afff-7226f59349a6",
"id": "9eb7029c-ea8c-4816-b9d4-f35a5b5560b8",
"metadata": {},
"outputs": [],
"source": []
Expand All @@ -577,7 +661,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
"version": "3.8.13"
}
},
"nbformat": 4,
Expand Down
123 changes: 123 additions & 0 deletions profiles/nextflu-private/antigenic_distances.smk
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,126 @@ rule export_private:
--minify-json \
--output {output.auspice_json} 2>&1 | tee {log}
"""

rule welsh_epitope_distances_by_serum:
input:
tree=rules.refine.output.tree,
translations_done= build_dir + "/{build_name}/{segment}/translations.done",
distance_map="config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/{serum}.json",
output:
distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum/{serum}.json",
params:
alignments=lambda w: [f"{build_dir}/{w.build_name}/{w.segment}/translations/{gene}_withInternalNodes.fasta" for gene in GENES[w.segment]],
genes=lambda w: GENES[w.segment],
comparisons="root",
attribute_names="welsh_escape_for_serum",
conda: "../../workflow/envs/nextstrain.yaml"
benchmark:
"benchmarks/welsh_epitope_distances_by_serum_{build_name}_{segment}_{serum}.txt"
log:
"logs/welsh_epitope_distances_by_serum_{build_name}_{segment}_{serum}.txt"
shell:
"""
augur distance \
--tree {input.tree} \
--alignment {params.alignments} \
--gene-names {params.genes} \
--compare-to {params.comparisons} \
--attribute-name {params.attribute_names} \
--map {input.distance_map} \
--output {output.distances} 2>&1 | tee {log}
"""

rule convert_welsh_epitope_distances_to_table:
input:
tree="builds/{build_name}/{segment}/tree.nwk",
distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum/{serum}.json",
distance_map="config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/{serum}.json",
output:
distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum/{serum}.tsv",
params:
distance_map_attributes=["cohort_serum", "serum", "cohort"]
conda: "../../workflow/envs/nextstrain.yaml"
shell:
"""
python3 scripts/convert_welsh_epitope_distances_to_table.py \
--tree {input.tree} \
--distances {input.distances} \
--distance-map {input.distance_map} \
--distance-map-attributes {params.distance_map_attributes:q} \
--output {output.distances}
"""

def get_welsh_epitope_distances_by_serum(wildcards):
import glob
from pathlib import Path

distance_maps = glob.glob("config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/*.json")
serum_samples = [
Path(distance_map).stem
for distance_map in distance_maps
]

return [
f"builds/{wildcards.build_name}/{wildcards.segment}/welsh_epitope_distances_by_serum/{serum}.tsv"
for serum in serum_samples
]

rule aggregate_welsh_epitope_distances_by_serum:
input:
distances=get_welsh_epitope_distances_by_serum,
output:
distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum.tsv",
conda: "../../workflow/envs/nextstrain.yaml"
shell:
"""
csvtk concat -t {input.distances} > {output.distances}
"""

rule export_welsh_measurements:
input:
distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum.tsv",
output:
measurements="builds/{build_name}/{segment}/welsh_measurements.json",
conda: "../../workflow/envs/nextstrain.yaml"
benchmark:
"benchmarks/export_welsh_measurements_{build_name}_{segment}.txt"
log:
"logs/export_welsh_measurements_{build_name}_{segment}.txt"
params:
default_group_by="cohort_serum",
strain_column="strain",
value_column="welsh_escape_for_serum",
key="welsh_escape",
title="Welsh et al. escape scores",
x_axis_label="escape score",
thresholds=[0.0],
filters=[
"cohort_serum",
"cohort",
],
include_columns=[
"strain",
"cohort_serum",
"cohort",
"welsh_escape_for_serum",
],
shell:
"""
augur measurements export \
--collection {input.distances} \
--grouping-column {params.filters} \
--group-by {params.default_group_by} \
--include-columns {params.include_columns:q} \
--strain-column {params.strain_column} \
--value-column {params.value_column} \
--key {params.key} \
--title {params.title:q} \
--x-axis-label {params.x_axis_label:q} \
--thresholds {params.thresholds} \
--filters {params.filters} \
--show-threshold \
--hide-overall-mean \
--minify-json \
--output-json {output.measurements} 2>&1 | tee {log}
"""
44 changes: 44 additions & 0 deletions scripts/convert_welsh_epitope_distances_to_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import argparse
from augur.utils import read_node_data, read_tree
import csv
import json


if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--tree", required=True, help="Newick tree used to identify leaf nodes in distances JSON")
parser.add_argument("--distances", required=True, help="distances in node data JSON format")
parser.add_argument("--distance-map", help="distance map with metadata stored in specific top-level attributes")
parser.add_argument("--distance-map-attributes", nargs="*", help="optional list of top-level attributes to extract as metadata from the distance map")
parser.add_argument("--output", required=True, help="distances in TSV format")

args = parser.parse_args()

# Load tree.
tree = read_tree(args.tree)

# Load distances.
distances_per_node = read_node_data(args.distances)["nodes"]

# Load distance map, if attributes requested.
attributes = {}
if args.distance_map and args.distance_map_attributes:
with open(args.distance_map, encoding="utf-8") as fh:
distance_map = json.load(fh)

attributes = [
distance_map[attribute]
for attribute in args.distance_map_attributes
]

# Write out distances in TSV format.
with open(args.output, "w", encoding="utf-8") as oh:
writer = csv.writer(oh, delimiter="\t", lineterminator="\n")
writer.writerow(["strain", "welsh_escape_for_serum"] + args.distance_map_attributes)

for node in tree.find_clades(terminal=True):
record = [
node.name,
distances_per_node[node.name]["welsh_escape_for_serum"],
] + attributes
writer.writerow(record)
3 changes: 3 additions & 0 deletions workflow/snakemake_rules/titer_models.smk
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,9 @@ def get_titer_collections(wildcards):
for collection in config["builds"][wildcards.build_name]["titer_collections"]:
files.append(f"builds/{wildcards.build_name}/{wildcards.segment}/measurements/{collection['name']}.json")

if wildcards.build_name == "h3n2_2y" and wildcards.segment == "ha":
files.append(f"builds/{wildcards.build_name}/{wildcards.segment}/welsh_measurements.json",)

return files

rule concat_measurements:
Expand Down