diff --git a/README.md b/README.md index 3dd7bbc..5ba7da9 100644 --- a/README.md +++ b/README.md @@ -175,18 +175,18 @@ optional arguments: --platform PLATFORM Platform to run jobs: Windows, Darwin, Linux. Default: the system of rendering templates. --n_runs N_RUNS Number of runs. Default: 1. --no_wordclouds Disables the generation of wordclouds. - --balance_strategy BALANCE_STRATEGY Balance strategy to use. Default: double. --instances_per_query INSTANCES_PER_QUERY Number of instances per query. Default: 1. --stop_if STOP_IF The number of label actions to simulate. Default 'min' will stop simulating when all relevant records are found. --classifiers CLASSIFIERS Classifiers to use Default: ['logistic', 'nb', 'rf', 'svm'] --feature_extractors FEATURE_EXTRACTOR Feature extractors to use Default: ['doc2vec', 'sbert', 'tfidf'] --query_strategies QUERY_STRATEGY Query strategies to use Default: ['max'] + --balancing_strategies BALANCE_STRATEGY Balance strategies to use Default: ['double'] --impossible_models IMPOSSIBLE_MODELS Model combinations to exclude Default: ['nb,doc2vec', 'nb,sbert'] ``` If you want to specify certain combinations of classifiers and feature extractors that should and should not be used, you can use the `--classifiers`, -`--feature_extractors`, `--query_strategies` and `--impossible_models` option. For instance, if you +`--feature_extractors`, `--query_strategies`, `--balancing_strategies` and `--impossible_models` option. For instance, if you want to exclude the combinations of `nb` with `doc2vec` and `logistic` with `tfidf`, use the following command: diff --git a/asreviewcontrib/makita/entrypoint.py b/asreviewcontrib/makita/entrypoint.py index 989584d..84444d4 100644 --- a/asreviewcontrib/makita/entrypoint.py +++ b/asreviewcontrib/makita/entrypoint.py @@ -166,6 +166,13 @@ def execute(self, argv): # noqa: C901 help="Query strategies to use. Only for template 'multimodel'. " "Default: ['max']", ) + parser_template.add_argument( + "--balancing_strategies", + nargs="+", + default=["double"], + help="Balancing strategies to use. Only for template 'multimodel'. " + "Default: ['double']", + ) parser_template.add_argument( "--impossible_models", nargs="+", @@ -275,8 +282,8 @@ def _template(self, args): all_classifiers=args.classifiers, all_feature_extractors=args.feature_extractors, all_query_strategies=args.query_strategies, + all_balancing_strategies=args.balancing_strategies, impossible_models=args.impossible_models, - balance_strategy=args.balance_strategy, instances_per_query=args.instances_per_query, stop_if=args.stop_if, fp_template=fp_template, diff --git a/asreviewcontrib/makita/template_multimodel.py b/asreviewcontrib/makita/template_multimodel.py index 3f23657..e830e45 100644 --- a/asreviewcontrib/makita/template_multimodel.py +++ b/asreviewcontrib/makita/template_multimodel.py @@ -22,8 +22,8 @@ def render_jobs_multimodel( all_classifiers=None, all_feature_extractors=None, all_query_strategies=None, + all_balancing_strategies=None, impossible_models=None, - balance_strategy="double", instances_per_query=1, stop_if='min', fp_template=None, @@ -39,6 +39,9 @@ def render_jobs_multimodel( if all_query_strategies is None: all_query_strategies = ["max"] + if all_balancing_strategies is None: + all_balancing_strategies = ["double"] + if impossible_models is None: impossible_models = ["nb,doc2vec", "nb,sbert"] @@ -108,7 +111,6 @@ def render_jobs_multimodel( { "datasets": params, "create_wordclouds": create_wordclouds, - "balance_strategy": balance_strategy, "instances_per_query": instances_per_query, "stop_if": stop_if, "output_folder": output_folder, @@ -119,6 +121,7 @@ def render_jobs_multimodel( "all_query_strategies": all_query_strategies, "all_classifiers": all_classifiers, "all_feature_extractors": all_feature_extractors, + "all_balancing_strategies": all_balancing_strategies, "impossible_models": [i.split(",") for i in impossible_models], } ) diff --git a/asreviewcontrib/makita/templates/template_multimodel.txt.template b/asreviewcontrib/makita/templates/template_multimodel.txt.template index 2e59e5f..c1a318d 100644 --- a/asreviewcontrib/makita/templates/template_multimodel.txt.template +++ b/asreviewcontrib/makita/templates/template_multimodel.txt.template @@ -48,19 +48,20 @@ mkdir {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files {% for classifier in all_classifiers %} {% for feature_extraction in all_feature_extractors %} {% for query_strategy in all_query_strategies %} -{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }}{{ temp.append(query_strategy)|default("", True) }} +{% for balance_strategy in all_balancing_strategies %} +{% set temp = [] %}{{ temp.append(classifier)|default("", True) }}{{ temp.append(feature_extraction)|default("", True) }} {% if temp in impossible_models %} # Skipped {{ classifier }} + {{ feature_extraction }} + {{ query_strategy}} model -{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }} +{% else %}# Classifier = {{ classifier }}, Feature extractor = {{ feature_extraction }}, Query strategy = {{ query_strategy }}, Balance strategy = {{balance_strategy}} {% for run in range(n_runs) %} -python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} -python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{ run }}.json +python -m asreview simulate {{ dataset.input_file }} -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.asreview --model {{ classifier }} --query_strategy {{query_strategy}} --balance_strategy {{balance_strategy}} --feature_extraction {{ feature_extraction }} --init_seed {{ dataset.init_seed + run }} --seed {{ dataset.model_seed }} -q {{ query_strategy }} -b {{ balance_strategy }} --n_instances {{ instances_per_query }} --stop_if {{ stop_if }} +python -m asreview metrics {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.asreview -o {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/metrics/metrics_sim_{{ dataset.input_file_stem }}_{{ classifier }}_{{ feature_extraction }}_{{ query_strategy }}_{{balance_strategy}}_{{ run }}.json {% endfor %}{% endif %} {% endfor %} {% endfor %} {% endfor %} - +{% endfor %} # Generate plot and tables for dataset python {{ scripts_folder }}/get_plot.py -s {{ output_folder }}/simulation/{{ dataset.input_file_stem }}/state_files/ -o {{ output_folder }}/figures/plot_recall_sim_{{ dataset.input_file_stem }}.png --show_legend model