Skip to content

Commit

Permalink
add option to run multimodels template with array of balancing strate…
Browse files Browse the repository at this point in the history
…gies (#56)

* add option balancing strategy to multimodel template

---------

Co-authored-by: Jelle Teijema <[email protected]>
  • Loading branch information
BjarneJesse and jteijema authored Apr 18, 2024
1 parent 44bd389 commit 726a734
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 10 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,18 +175,18 @@ optional arguments:
--platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates.
--n_runs N_RUNS Number of runs. Default: 1.
--no_wordclouds Disables the generation of wordclouds.
--balance_strategy BALANCE_STRATEGY Balance strategy to use. Default: double.
--instances_per_query INSTANCES_PER_QUERY Number of instances per query. Default: 1.
--stop_if STOP_IF The number of label actions to simulate. Default 'min' will stop simulating when all relevant records are found.
--classifiers CLASSIFIERS Classifiers to use Default: ['logistic', 'nb', 'rf', 'svm']
--feature_extractors FEATURE_EXTRACTOR Feature extractors to use Default: ['doc2vec', 'sbert', 'tfidf']
--query_strategies QUERY_STRATEGY Query strategies to use Default: ['max']
--balancing_strategies BALANCE_STRATEGY Balance strategies to use Default: ['double']
--impossible_models IMPOSSIBLE_MODELS Model combinations to exclude Default: ['nb,doc2vec', 'nb,sbert']
```

If you want to specify certain combinations of classifiers and feature
extractors that should and should not be used, you can use the `--classifiers`,
`--feature_extractors`, `--query_strategies` and `--impossible_models` option. For instance, if you
`--feature_extractors`, `--query_strategies`, `--balancing_strategies` and `--impossible_models` option. For instance, if you
want to exclude the combinations of `nb` with `doc2vec` and `logistic` with
`tfidf`, use the following command:

Expand Down
9 changes: 8 additions & 1 deletion asreviewcontrib/makita/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,13 @@ def execute(self, argv): # noqa: C901
help="Query strategies to use. Only for template 'multimodel'. "
"Default: ['max']",
)
parser_template.add_argument(
"--balancing_strategies",
nargs="+",
default=["double"],
help="Balancing strategies to use. Only for template 'multimodel'. "
"Default: ['double']",
)
parser_template.add_argument(
"--impossible_models",
nargs="+",
Expand Down Expand Up @@ -275,8 +282,8 @@ def _template(self, args):
all_classifiers=args.classifiers,
all_feature_extractors=args.feature_extractors,
all_query_strategies=args.query_strategies,
all_balancing_strategies=args.balancing_strategies,
impossible_models=args.impossible_models,
balance_strategy=args.balance_strategy,
instances_per_query=args.instances_per_query,
stop_if=args.stop_if,
fp_template=fp_template,
Expand Down
7 changes: 5 additions & 2 deletions asreviewcontrib/makita/template_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def render_jobs_multimodel(
all_classifiers=None,
all_feature_extractors=None,
all_query_strategies=None,
all_balancing_strategies=None,
impossible_models=None,
balance_strategy="double",
instances_per_query=1,
stop_if='min',
fp_template=None,
Expand All @@ -39,6 +39,9 @@ def render_jobs_multimodel(
if all_query_strategies is None:
all_query_strategies = ["max"]

if all_balancing_strategies is None:
all_balancing_strategies = ["double"]

if impossible_models is None:
impossible_models = ["nb,doc2vec", "nb,sbert"]

Expand Down Expand Up @@ -108,7 +111,6 @@ def render_jobs_multimodel(
{
"datasets": params,
"create_wordclouds": create_wordclouds,
"balance_strategy": balance_strategy,
"instances_per_query": instances_per_query,
"stop_if": stop_if,
"output_folder": output_folder,
Expand All @@ -119,6 +121,7 @@ def render_jobs_multimodel(
"all_query_strategies": all_query_strategies,
"all_classifiers": all_classifiers,
"all_feature_extractors": all_feature_extractors,
"all_balancing_strategies": all_balancing_strategies,
"impossible_models": [i.split(",") for i in impossible_models],
}
)
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,20 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files
{% for classifier in all_classifiers %}
{% for feature_extraction in all_feature_extractors %}
{% for query_strategy in all_query_strategies %}
{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}{{ temp.append(query_strategy)|default("", True) }}
{% for balance_strategy in all_balancing_strategies %}
{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}
{% if temp in impossible_models %}

# Skipped {{ classifier }} + {{ feature_extraction }} + {{ query_strategy}} model
{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }}
{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }}, Balance strategy = {{balance_strategy}}
{% for run in range(n_runs) %}
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.json
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --balance_strategy {{balance_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.json
{% endfor %}{% endif %}
{% endfor %}
{% endfor %}
{% endfor %}

{% endfor %}

# Generate plot and tables for dataset
python {{ scripts_folder }}/get_plot.py -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/ -o {{ output_folder }}/figures/plot_recall_sim_{{ dataset.input_file_stem }}.png --show_legend model
Expand Down

0 comments on commit 726a734

Please sign in to comment.