Skip to content

Commit

Permalink
Add option to run multimodels template with array of query strategies (
Browse files Browse the repository at this point in the history
…#53)

* Implemented multimodel query strategy

---------

Co-authored-by: JT <[email protected]>
  • Loading branch information
BjarneJesse and jteijema authored Apr 4, 2024
1 parent 35f5814 commit 44bd389
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 12 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,23 +175,23 @@ optional arguments:
--platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates.
--n_runs N_RUNS Number of runs. Default: 1.
--no_wordclouds Disables the generation of wordclouds.
--query_strategy QUERY_STRATEGY Query strategy to use. Default: max.
--balance_strategy BALANCE_STRATEGY Balance strategy to use. Default: double.
--instances_per_query INSTANCES_PER_QUERY Number of instances per query. Default: 1.
--stop_if STOP_IF The number of label actions to simulate. Default 'min' will stop simulating when all relevant records are found.
--classifiers CLASSIFIERS Classifiers to use Default: ['logistic', 'nb', 'rf', 'svm']
--feature_extractors FEATURE_EXTRACTOR Feature extractors to use Default: ['doc2vec', 'sbert', 'tfidf']
--query_strategies QUERY_STRATEGY Query strategies to use Default: ['max']
--impossible_models IMPOSSIBLE_MODELS Model combinations to exclude Default: ['nb,doc2vec', 'nb,sbert']
```

If you want to specify certain combinations of classifiers and feature
extractors that should and should not be used, you can use the `--classifiers`,
`--feature_extractors`, and `--impossible_models` option. For instance, if you
`--feature_extractors`, `--query_strategies` and `--impossible_models` option. For instance, if you
want to exclude the combinations of `nb` with `doc2vec` and `logistic` with
`tfidf`, use the following command:

```console
asreview makita template multimodel --classifiers logistic nb --feature_extractors tfidf doc2vec --impossible_models nb,doc2vec logistic,tfidf
asreview makita template multimodel --classifiers logistic nb --feature_extractors tfidf doc2vec --query_strategies max max_random max_uncertainty cluster --impossible_models nb,doc2vec logistic,tfidf
```

## Advanced usage
Expand Down
9 changes: 8 additions & 1 deletion asreviewcontrib/makita/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ def execute(self, argv): # noqa: C901
help="Feature extractors to use. Only for template 'multimodel'. "
"Default: ['doc2vec', 'sbert', 'tfidf']",
)
parser_template.add_argument(
"--query_strategies",
nargs="+",
default=["max"],
help="Query strategies to use. Only for template 'multimodel'. "
"Default: ['max']",
)
parser_template.add_argument(
"--impossible_models",
nargs="+",
Expand Down Expand Up @@ -267,8 +274,8 @@ def _template(self, args):
model_seed=args.model_seed,
all_classifiers=args.classifiers,
all_feature_extractors=args.feature_extractors,
all_query_strategies=args.query_strategies,
impossible_models=args.impossible_models,
query_strategy=args.query_strategy,
balance_strategy=args.balance_strategy,
instances_per_query=args.instances_per_query,
stop_if=args.stop_if,
Expand Down
9 changes: 6 additions & 3 deletions asreviewcontrib/makita/template_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def render_jobs_multimodel(
model_seed=165,
all_classifiers=None,
all_feature_extractors=None,
all_query_strategies=None,
impossible_models=None,
query_strategy="max",
balance_strategy="double",
instances_per_query=1,
stop_if='min',
Expand All @@ -36,11 +36,14 @@ def render_jobs_multimodel(
if all_feature_extractors is None:
all_feature_extractors = ["doc2vec", "sbert", "tfidf"]

if all_query_strategies is None:
all_query_strategies = ["max"]

if impossible_models is None:
impossible_models = ["nb,doc2vec", "nb,sbert"]


"""Render jobs."""

if not platform_sys:
platform_sys = platform.system()
if not job_file:
Expand Down Expand Up @@ -105,7 +108,6 @@ def render_jobs_multimodel(
{
"datasets": params,
"create_wordclouds": create_wordclouds,
"query_strategy": query_strategy,
"balance_strategy": balance_strategy,
"instances_per_query": instances_per_query,
"stop_if": stop_if,
Expand All @@ -114,6 +116,7 @@ def render_jobs_multimodel(
"scripts_folder": scripts_folder,
"platform": platform_sys,
"version": __version__,
"all_query_strategies": all_query_strategies,
"all_classifiers": all_classifiers,
"all_feature_extractors": all_feature_extractors,
"impossible_models": [i.split(",") for i in impossible_models],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,20 @@ python -m asreview wordcloud {{ dataset.input_file }} -o {{ output_folder }}/fig
mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files
{% for classifier in all_classifiers %}
{% for feature_extraction in all_feature_extractors %}
{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}
{% for query_strategy in all_query_strategies %}
{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}{{ temp.append(query_strategy)|default("", True) }}
{% if temp in impossible_models %}

# Skipped {{ classifier }} + {{ feature_extraction }} model
{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }} , Query strategy = max
# Skipped {{ classifier }} + {{ feature_extraction }} + {{ query_strategy}} model
{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }}
{% for run in range(n_runs) %}
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ run }}.asreview --model {{ classifier }} --query_strategy max --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ run }}.json
python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }}
python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.json
{% endfor %}{% endif %}
{% endfor %}
{% endfor %}
{% endfor %}


# Generate plot and tables for dataset
python {{ scripts_folder }}/get_plot.py -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/ -o {{ output_folder }}/figures/plot_recall_sim_{{ dataset.input_file_stem }}.png --show_legend model
Expand Down

0 comments on commit 44bd389

Please sign in to comment.