Skip to content

Commit

Permalink
🚧 subsample: Remove priorities feature
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Mar 27, 2024
1 parent 2bb77f6 commit f7c4149
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 51 deletions.
25 changes: 1 addition & 24 deletions defaults/parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,7 @@ subsampling:
group_by: "country year month"
seq_per_group: 4
exclude: "--exclude-where 'region={region}'"
priorities:
type: "proximity"
focus: "region"
# FIXME: remove priorities from other files

# Custom subsampling logic for global region.
region_global:
Expand All @@ -242,9 +240,6 @@ subsampling:
group_by: "country year month"
seq_per_group: 6
exclude: "--exclude-where 'region={region}'"
priorities:
type: "proximity"
focus: "region"

# Default subsampling logic for countries
country:
Expand All @@ -258,18 +253,12 @@ subsampling:
group_by: "country year month"
seq_per_group: 20
exclude: "--exclude-where 'country={country}' 'region!={region}'"
priorities:
type: "proximity"
focus: "country"
# Contextual samples from the rest of the world,
# excluding the current region to avoid resampling.
global:
group_by: "country year month"
seq_per_group: 10
exclude: "--exclude-where 'region={region}'"
priorities:
type: "proximity"
focus: "country"

# Default subsampling logic for divisions
division:
Expand All @@ -283,26 +272,17 @@ subsampling:
group_by: "division year month"
seq_per_group: 20
exclude: "--exclude-where 'region!={region}' 'country!={country}' 'division={division}'"
priorities:
type: "proximity"
focus: "division"
# Contextual samples from division's region
region:
group_by: "country year month"
seq_per_group: 10
exclude: "--exclude-where 'region!={region}' 'country={country}'"
priorities:
type: "proximity"
focus: "division"
# Contextual samples from the rest of the world, excluding the current
# division to avoid resampling.
global:
group_by: "country year month"
seq_per_group: 5
exclude: "--exclude-where 'region={region}'"
priorities:
type: "proximity"
focus: "division"

# Default subsampling logic for locations
location:
Expand All @@ -316,9 +296,6 @@ subsampling:
group_by: "country year month"
seq_per_group: 20
exclude: "--exclude-where 'location={location}'"
priorities:
type: "proximity"
focus: "focal"
# Contextual samples from the rest of the world
contextual:
group_by: "country year month"
Expand Down
27 changes: 0 additions & 27 deletions workflow/snakemake_rules/main_workflow.smk
Original file line number Diff line number Diff line change
Expand Up @@ -140,29 +140,6 @@ def _get_subsampling_settings(wildcards):
return subsampling_settings


def get_priorities(wildcards):
subsampling_settings = _get_subsampling_settings(wildcards)

if "priorities" in subsampling_settings and subsampling_settings["priorities"]["type"] == "proximity":
return f"results/{wildcards.build_name}/priorities_{subsampling_settings['priorities']['focus']}.tsv"
else:
# TODO: find a way to make the list of input files depend on config
return config["files"]["include"]


def get_priority_argument(wildcards):
subsampling_settings = _get_subsampling_settings(wildcards)
if "priorities" not in subsampling_settings:
return ""

if subsampling_settings["priorities"]["type"] == "proximity":
return "--priority " + shquote(get_priorities(wildcards))
elif subsampling_settings["priorities"]["type"] == "file" and "file" in subsampling_settings["priorities"]:
return "--priority " + shquote(subsampling_settings["priorities"]["file"])
else:
return ""


def _get_specific_subsampling_setting(setting, optional=False):
# Note -- this function contains a lot of conditional logic because
# we have the situation where some config options must define the
Expand Down Expand Up @@ -279,12 +256,10 @@ rule subsample:
- exclude: {params.exclude_argument}
- include: {params.include_argument}
- query: {params.query_argument}
- priority: {params.priority_argument}
"""
input:
metadata = _get_unified_metadata,
include = config["files"]["include"],
priorities = get_priorities,
exclude = config["files"]["exclude"]
output:
strains="results/{build_name}/sample-{subsample}.txt",
Expand All @@ -303,7 +278,6 @@ rule subsample:
exclude_ambiguous_dates_argument = _get_specific_subsampling_setting("exclude_ambiguous_dates_by", optional=True),
min_date = _get_specific_subsampling_setting("min_date", optional=True),
max_date = _get_specific_subsampling_setting("max_date", optional=True),
priority_argument = get_priority_argument
resources:
# Memory use scales with the number of sequences per group * number of groups.
# We pin this to a reasonably high value based on Nextstrain production builds.
Expand All @@ -321,7 +295,6 @@ rule subsample:
{params.include_argument} \
{params.query_argument} \
{params.exclude_ambiguous_dates_argument} \
{params.priority_argument} \
{params.group_by} \
{params.sequences_per_group} \
{params.subsample_max_sequences} \
Expand Down

0 comments on commit f7c4149

Please sign in to comment.