nextstrain · huddlej · Feb 15, 2024 · Feb 15, 2024 · Feb 16, 2024 · Feb 16, 2024
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/150C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/150C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/18C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/18C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/197C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/197C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/199C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/199C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/210C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/210C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/215C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/215C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2323.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2323.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2350.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2350.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2365.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2365.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2367.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2367.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2380.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2380.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2382.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2382.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2388.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2388.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2389.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2389.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2462.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/2462.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/33C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/33C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/34C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/34C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3856.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3856.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3857.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3857.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3862.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3862.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3866.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3866.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3944.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3944.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3973.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/3973.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/4299.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/4299.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/4584.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/4584.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/68C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/68C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/74C.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/74C.json
diff --git a/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/AUSAB-13.json b/config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/AUSAB-13.json
diff --git a/notebooks/2023-12-15-identify-epitope-sites-and-scores-from-Welsh-et-al-data.ipynb b/notebooks/2023-12-15-identify-epitope-sites-and-scores-from-Welsh-et-al-data.ipynb
@@ -28,7 +28,8 @@
    "outputs": [],
    "source": [
     "import json\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "from pathlib import Path"
    ]
   },
   {
@@ -552,10 +553,93 @@
     "    )"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "935dd658-3b21-4399-a104-b564d43b116a",
+   "metadata": {},
+   "source": [
+    "## Create distance map per serum sample"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38ad1e18-0459-45db-806f-534213290614",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_dir = Path(\"welsh_escape_by_site_and_amino_acid_by_serum\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4aff470-c3e1-4ebb-8768-ec42f4bbe006",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_dir.mkdir(exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "486de66c-1d23-44a9-83e2-0e65b8917ca7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for (serum, cohort), serum_scores in nonnegative_ha1_escape_scores.groupby([\"serum\", \"cohort\"]):\n",
+    "    serum_escape_records = serum_scores[[\n",
+    "        \"site\",\n",
+    "        \"wildtype\",\n",
+    "        \"mutant\",\n",
+    "        \"escape_mean\",\n",
+    "    ]].to_dict(orient=\"records\")\n",
+    "    \n",
+    "    distance_map = {\n",
+    "        \"name\": f\"Welsh et al. escape scores per site and amino acid for sample '{serum}' in cohort '{cohort}'\",\n",
+    "        \"serum\": serum,\n",
+    "        \"cohort\": cohort,\n",
+    "        \"cohort_serum\": f\"{cohort}/{serum}\",\n",
+    "        \"default\": 0,\n",
+    "        \"map\": {\n",
+    "            \"HA1\": {}\n",
+    "        }\n",
+    "    }\n",
+    "    \n",
+    "    for record in serum_escape_records:\n",
+    "        site = str(record[\"site\"])\n",
+    "        if str(site) not in distance_map[\"map\"][\"HA1\"]:\n",
+    "            distance_map[\"map\"][\"HA1\"][str(site)] = []\n",
+    "\n",
+    "        distance_map[\"map\"][\"HA1\"][str(site)].append({\n",
+    "            \"from\": record[\"wildtype\"],\n",
+    "            \"to\": record[\"mutant\"],\n",
+    "            \"weight\": round(record[\"escape_mean\"], 6),\n",
+    "        })\n",
+    "        \n",
+    "    with open(output_dir / f\"{serum}.json\", \"w\") as oh:\n",
+    "        json.dump(\n",
+    "            distance_map,\n",
+    "            oh,\n",
+    "            indent=2,\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "31a4b0cb-0a24-408d-a020-9e3941dfcd42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!ls -l welsh_escape_by_site_and_amino_acid_by_serum/"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "10bb367b-ad31-43ca-afff-7226f59349a6",
+   "id": "9eb7029c-ea8c-4816-b9d4-f35a5b5560b8",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -577,7 +661,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.0"
+   "version": "3.8.13"
   }
  },
  "nbformat": 4,

diff --git a/profiles/nextflu-private/antigenic_distances.smk b/profiles/nextflu-private/antigenic_distances.smk
@@ -193,3 +193,126 @@ rule export_private:
             --minify-json \
             --output {output.auspice_json} 2>&1 | tee {log}
         """
+
+rule welsh_epitope_distances_by_serum:
+    input:
+        tree=rules.refine.output.tree,
+        translations_done= build_dir + "/{build_name}/{segment}/translations.done",
+        distance_map="config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/{serum}.json",
+    output:
+        distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum/{serum}.json",
+    params:
+        alignments=lambda w: [f"{build_dir}/{w.build_name}/{w.segment}/translations/{gene}_withInternalNodes.fasta" for gene in GENES[w.segment]],
+        genes=lambda w: GENES[w.segment],
+        comparisons="root",
+        attribute_names="welsh_escape_for_serum",
+    conda: "../../workflow/envs/nextstrain.yaml"
+    benchmark:
+        "benchmarks/welsh_epitope_distances_by_serum_{build_name}_{segment}_{serum}.txt"
+    log:
+        "logs/welsh_epitope_distances_by_serum_{build_name}_{segment}_{serum}.txt"
+    shell:
+        """
+        augur distance \
+            --tree {input.tree} \
+            --alignment {params.alignments} \
+            --gene-names {params.genes} \
+            --compare-to {params.comparisons} \
+            --attribute-name {params.attribute_names} \
+            --map {input.distance_map} \
+            --output {output.distances} 2>&1 | tee {log}
+        """
+
+rule convert_welsh_epitope_distances_to_table:
+    input:
+        tree="builds/{build_name}/{segment}/tree.nwk",
+        distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum/{serum}.json",
+        distance_map="config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/{serum}.json",
+    output:
+        distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum/{serum}.tsv",
+    params:
+        distance_map_attributes=["cohort_serum", "serum", "cohort"]
+    conda: "../../workflow/envs/nextstrain.yaml"
+    shell:
+        """
+        python3 scripts/convert_welsh_epitope_distances_to_table.py \
+            --tree {input.tree} \
+            --distances {input.distances} \
+            --distance-map {input.distance_map} \
+            --distance-map-attributes {params.distance_map_attributes:q} \
+            --output {output.distances}
+        """
+
+def get_welsh_epitope_distances_by_serum(wildcards):
+    import glob
+    from pathlib import Path
+
+    distance_maps = glob.glob("config/distance_maps/h3n2/ha/welsh_escape_by_site_and_amino_acid_by_serum/*.json")
+    serum_samples = [
+        Path(distance_map).stem
+        for distance_map in distance_maps
+    ]
+
+    return [
+        f"builds/{wildcards.build_name}/{wildcards.segment}/welsh_epitope_distances_by_serum/{serum}.tsv"
+        for serum in serum_samples
+    ]
+
+rule aggregate_welsh_epitope_distances_by_serum:
+    input:
+        distances=get_welsh_epitope_distances_by_serum,
+    output:
+        distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum.tsv",
+    conda: "../../workflow/envs/nextstrain.yaml"
+    shell:
+        """
+        csvtk concat -t {input.distances} > {output.distances}
+        """
+
+rule export_welsh_measurements:
+    input:
+        distances="builds/{build_name}/{segment}/welsh_epitope_distances_by_serum.tsv",
+    output:
+        measurements="builds/{build_name}/{segment}/welsh_measurements.json",
+    conda: "../../workflow/envs/nextstrain.yaml"
+    benchmark:
+        "benchmarks/export_welsh_measurements_{build_name}_{segment}.txt"
+    log:
+        "logs/export_welsh_measurements_{build_name}_{segment}.txt"
+    params:
+        default_group_by="cohort_serum",
+        strain_column="strain",
+        value_column="welsh_escape_for_serum",
+        key="welsh_escape",
+        title="Welsh et al. escape scores",
+        x_axis_label="escape score",
+        thresholds=[0.0],
+        filters=[
+            "cohort_serum",
+            "cohort",
+        ],
+        include_columns=[
+            "strain",
+            "cohort_serum",
+            "cohort",
+            "welsh_escape_for_serum",
+        ],
+    shell:
+        """
+        augur measurements export \
+            --collection {input.distances} \
+            --grouping-column {params.filters} \
+            --group-by {params.default_group_by} \
+            --include-columns {params.include_columns:q} \
+            --strain-column {params.strain_column} \
+            --value-column {params.value_column} \
+            --key {params.key} \
+            --title {params.title:q} \
+            --x-axis-label {params.x_axis_label:q} \
+            --thresholds {params.thresholds} \
+            --filters {params.filters} \
+            --show-threshold \
+            --hide-overall-mean \
+            --minify-json \
+            --output-json {output.measurements} 2>&1 | tee {log}
+        """
diff --git a/scripts/convert_welsh_epitope_distances_to_table.py b/scripts/convert_welsh_epitope_distances_to_table.py
@@ -0,0 +1,44 @@
+import argparse
+from augur.utils import read_node_data, read_tree
+import csv
+import json
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--tree", required=True, help="Newick tree used to identify leaf nodes in distances JSON")
+    parser.add_argument("--distances", required=True, help="distances in node data JSON format")
+    parser.add_argument("--distance-map", help="distance map with metadata stored in specific top-level attributes")
+    parser.add_argument("--distance-map-attributes", nargs="*", help="optional list of top-level attributes to extract as metadata from the distance map")
+    parser.add_argument("--output", required=True, help="distances in TSV format")
+
+    args = parser.parse_args()
+
+    # Load tree.
+    tree = read_tree(args.tree)
+
+    # Load distances.
+    distances_per_node = read_node_data(args.distances)["nodes"]
+
+    # Load distance map, if attributes requested.
+    attributes = {}
+    if args.distance_map and args.distance_map_attributes:
+        with open(args.distance_map, encoding="utf-8") as fh:
+            distance_map = json.load(fh)
+
+        attributes = [
+            distance_map[attribute]
+            for attribute in args.distance_map_attributes
+        ]
+
+    # Write out distances in TSV format.
+    with open(args.output, "w", encoding="utf-8") as oh:
+        writer = csv.writer(oh, delimiter="\t", lineterminator="\n")
+        writer.writerow(["strain", "welsh_escape_for_serum"] + args.distance_map_attributes)
+
+        for node in tree.find_clades(terminal=True):
+            record = [
+                node.name,
+                distances_per_node[node.name]["welsh_escape_for_serum"],
+            ] + attributes
+            writer.writerow(record)
diff --git a/workflow/snakemake_rules/titer_models.smk b/workflow/snakemake_rules/titer_models.smk
@@ -226,6 +226,9 @@ def get_titer_collections(wildcards):
     for collection in config["builds"][wildcards.build_name]["titer_collections"]:
         files.append(f"builds/{wildcards.build_name}/{wildcards.segment}/measurements/{collection['name']}.json")
 
+    if wildcards.build_name == "h3n2_2y" and wildcards.segment == "ha":
+        files.append(f"builds/{wildcards.build_name}/{wildcards.segment}/welsh_measurements.json",)
+
     return files
 
 rule concat_measurements: