Skip to content

Commit

Permalink
Combine color-related rules
Browse files Browse the repository at this point in the history
Both simplifies the DAG and allows GISAID builds which have GenoFLU
colorings to get a nicely ordered set of colours.

Note that H9N2 & H7N9 builds, which don't export genoflu metadata, will
get colors assigned by the colors rule (because there is a value of "Not
assigned (too divergent)" in the metadata TSV) which is then dropped by
export.
  • Loading branch information
jameshadfield committed Feb 26, 2025
1 parent 374ed87 commit 06c7974
Show file tree
Hide file tree
Showing 6 changed files with 161 additions and 41 deletions.
17 changes: 15 additions & 2 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -668,12 +668,25 @@ rule auspice_config:

rule colors:
input:
colors = config['colors']['hardcoded'],
metadata = rules.filter.output.metadata,
# TODO XXX - d1.1 & cattle-flu used
# metadata = "results/{subtype}/genome/{time}/metadata.tsv", # Always use the genome metadata, even for segment builds
colors = lambda w: get_config('colors', 'hardcoded', w),
ordering = lambda w: get_config('colors', 'ordering', w),
schemes = lambda w: get_config('colors', 'schemes', w),
output:
colors = "results/{subtype}/{segment}/{time}/colors.tsv",
params:
duplications = lambda w: ["=".join(pair) for pair in get_config('colors', 'duplications', w)],
shell:
"""
cp {input.colors} {output.colors}
cp {input.colors} {output.colors} && \
python3 scripts/assign-colors.py \
--metadata {input.metadata} \
--ordering {input.ordering} \
--color-schemes {input.schemes} \
--duplications {params.duplications} \
>> {output.colors}
"""

rule export:
Expand Down
9 changes: 8 additions & 1 deletion config/gisaid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,14 @@ traits:
FALLBACK: true

colors:
hardcoded: config/{subtype}/colors_{subtype}.tsv
hardcoded:
FALLBACK: config/{subtype}/colors_{subtype}.tsv
ordering:
FALLBACK: "config/shared/color_ordering_genoflu.tsv" # for dynamic assignment
schemes:
FALLBACK: "config/shared/color_schemes.tsv"
duplications:
FALLBACK: []

export:
title:
Expand Down
12 changes: 8 additions & 4 deletions config/h5n1-cattle-outbreak.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,15 @@ traits:
FALLBACK: true

colors:
hardcoded: config/h5n1/colors_h5n1.tsv # use H5N1 colors
ordering: "config/h5n1-cattle-outbreak/color_ordering.tsv" # for dynamic assignment
schemes: "config/shared/color_schemes.tsv"
hardcoded:
FALLBACK: config/h5n1/colors_h5n1.tsv # use H5N1 colors
ordering:
FALLBACK: "config/h5n1-cattle-outbreak/color_ordering.tsv" # for dynamic assignment
schemes:
FALLBACK: "config/shared/color_schemes.tsv"
duplications:
- ["division", "division_metadata"]
FALLBACK:
- ["division", "division_metadata"]

export:
genome_title:
Expand Down
14 changes: 9 additions & 5 deletions config/h5n1-d1.1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,16 @@ traits:
FALLBACK: true

colors:
hardcoded: config/h5n1/colors_h5n1.tsv # use H5N1 colors
ordering: "config/h5n1-cattle-outbreak/color_ordering.tsv" # for dynamic assignment
schemes: "config/shared/color_schemes.tsv"
hardcoded:
FALLBACK: config/h5n1/colors_h5n1.tsv # use H5N1 colors
ordering:
FALLBACK: "config/h5n1-cattle-outbreak/color_ordering.tsv" # for dynamic assignment
# NOTE: we could add config/shared/color_ordering_genoflu.tsv here but these builds only have 1 genoflu value
schemes:
FALLBACK: "config/shared/color_schemes.tsv"
duplications:
- ["division", "division_metadata"]

FALLBACK:
- ["division", "division_metadata"]

export:
genome_title:
Expand Down
121 changes: 121 additions & 0 deletions config/shared/color_ordering_genoflu.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
genoflu A1
genoflu A2
genoflu A3
genoflu A4
genoflu A5
genoflu A6
genoflu B1.1
genoflu B1.2
genoflu B1.3
genoflu B2.1
genoflu B2.2
genoflu B3.1
genoflu B3.10
genoflu B3.11
genoflu B3.12
genoflu B3.13
genoflu B3.2
genoflu B3.3
genoflu B3.4
genoflu B3.5
genoflu B3.6
genoflu B3.7
genoflu B3.8
genoflu B3.9
genoflu B4.1
genoflu B5.1
genoflu C1.1
genoflu C2.1
genoflu C3.1
genoflu D1.1
genoflu D1.2
genoflu D1.3
genoflu D2.1
genoflu Minor01
genoflu Minor04
genoflu Minor07
genoflu Minor08
genoflu Minor09
genoflu Minor10
genoflu Minor11
genoflu Minor12
genoflu Minor13
genoflu Minor14
genoflu Minor15
genoflu Minor16
genoflu Minor17
genoflu Minor18
genoflu Minor19
genoflu Minor24
genoflu Minor25
genoflu Minor26
genoflu Minor27
genoflu Minor28
genoflu Minor29
genoflu Minor30
genoflu Minor31
genoflu Minor32
genoflu Minor33
genoflu Minor34
genoflu Minor35
genoflu Minor36
genoflu Minor37
genoflu Minor38
genoflu Minor39
genoflu Minor40
genoflu Minor41
genoflu Minor42
genoflu Minor43
genoflu Minor44
genoflu Minor45
genoflu Minor46
genoflu Minor47
genoflu Minor48
genoflu Minor50
genoflu Minor51
genoflu Minor52
genoflu Minor53
genoflu Minor55
genoflu Minor56
genoflu Minor57
genoflu Minor58
genoflu Minor60
genoflu Minor61
genoflu Minor62
genoflu Minor63
genoflu Minor65
genoflu Minor66
genoflu Minor67
genoflu Minor70
genoflu Minor71
genoflu Minor73
genoflu Minor74
genoflu Minor75
genoflu Minor76
genoflu Minor77
genoflu Minor78
genoflu Minor79
genoflu Minor80
genoflu Minor81
genoflu Minor82
genoflu Minor83
genoflu Minor84
genoflu Minor86
genoflu Minor87
genoflu Minor89
genoflu Minor90
genoflu Minor91
genoflu Minor92
genoflu Minor93
genoflu Minor94
genoflu Minor95
genoflu Minor97
genoflu Minor98
genoflu Minor99
genoflu Minor100
genoflu Minor101
genoflu Minor102
genoflu Minor103
genoflu Minor104
genoflu Minor105
genoflu Minor106
29 changes: 0 additions & 29 deletions rules/cattle-flu.smk
Original file line number Diff line number Diff line change
Expand Up @@ -162,32 +162,3 @@ rule prune_tree:
--output-tree {output.tree} \
--output-metadata {output.node_data}
"""

rule colors_genome:
# TODO: add these input files / params to the config YAML. The config YAML must also
# define the concept of whether this rule should run so this isn't trivial and is
# thus left as a to-do
input:
metadata = "results/{subtype}/genome/{time}/metadata.tsv", # Always use the genome metadata, even for segment builds
ordering = config['colors']['ordering'],
schemes = config['colors']['schemes'],
colors = config['colors']['hardcoded']
output:
colors = "results/{subtype}/{segment}/{time}/colors.tsv",
params:
duplications = ["=".join(pair) for pair in config['colors'].get('duplications', [])]
wildcard_constraints:
subtype='h5n1-cattle-outbreak|h5n1-d1.1',
time="default",
shell:
"""
cp {input.colors} {output.colors} && \
python3 scripts/assign-colors.py \
--metadata {input.metadata} \
--ordering {input.ordering} \
--color-schemes {input.schemes} \
--duplications {params.duplications} \
>> {output.colors}
"""

ruleorder: colors_genome > colors

0 comments on commit 06c7974

Please sign in to comment.