-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_experiments.py
143 lines (131 loc) · 3.76 KB
/
run_experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
"""Run experiments.
I recommend suppressing logging from Prefect.
```console
$ export PREFECT__LOGGING__LEVEL=ERROR
```
"""
import click
from experiments.flow import gen_flow
@click.group()
def cli():
"""CLI group."""
pass
@cli.command()
@click.option(
"--dataset",
type=click.Choice(
[
"ames-housing",
"avocado-sales",
"employee_salaries",
"flight-delay-usa-dec-2017",
"particulate-matter-ukair-2017",
"churn",
"click_prediction_small",
]
),
help="The dataset",
)
@click.option(
"--algorithm",
type=click.Choice(["xgboost", "lightgbm", "gbm"]),
help="The algorithm",
)
@click.option(
"--n-estimators",
type=click.INT,
multiple=True,
default=[0, 25, 50, 75, 100, 125, 150, 175, 200],
)
@click.option("--seeds", type=click.INT, multiple=True, default=[5, 10, 16, 42, 44])
def sample(dataset, algorithm, n_estimators, seeds):
"""Run the sampling experiment."""
for n_est in n_estimators:
for seed in seeds:
click.echo(
click.style(
(
f"Running experiment for {dataset} with algorithm {algorithm}, "
f"{n_est} estimators, and seed {seed}"
),
fg="green",
)
)
flow = gen_flow(
project="sample.json",
dataset=dataset,
encoder="bayes",
algorithm=algorithm,
seed=seed,
n_estimators=n_est,
)
_ = flow.run()
if not _.is_successful():
click.echo(click.style("Experiment failed.", fg="red"))
else:
click.echo(click.style("Experiment finished.", fg="green"))
@cli.command()
@click.option(
"--dataset",
type=click.Choice(
[
"ames-housing",
"avocado-sales",
"employee_salaries",
"flight-delay-usa-dec-2017",
"particulate-matter-ukair-2017",
"churn",
"click_prediction_small",
]
),
help="The dataset",
)
@click.option(
"--algorithm",
type=click.Choice(["xgboost", "lightgbm", "gbm"]),
help="The algorithm",
)
@click.option(
"--encoder",
type=click.Choice(
["frequency", "glmm", "james-stein", "integer", "target", "bayes"]
),
help="Categorical encoder",
)
@click.option("--n-estimators", type=click.INT, default=0)
@click.option("--seeds", type=click.INT, multiple=True, default=[5, 10, 16, 42, 44])
@click.option(
"--marginal", is_flag=True, help="Whether or not to use marginal encoding"
)
@click.option(
"--residual", is_flag=True, help="Whether or not to use residual encoding"
)
def compare(dataset, algorithm, encoder, n_estimators, seeds, marginal, residual):
"""Run the comparison experiment."""
for seed in seeds:
click.echo(
click.style(
(
f"Running experiment for {dataset} with algorithm {algorithm}, "
f"encoder {encoder}, {n_estimators} estimators, and seed {seed}."
),
fg="green",
)
)
flow = gen_flow(
project="compare.json",
dataset=dataset,
encoder=encoder,
algorithm=algorithm,
marginal=marginal,
residual=residual,
seed=seed,
n_estimators=n_estimators,
)
_ = flow.run()
if not _.is_successful():
click.echo(click.style("Experiment failed.", fg="red"))
else:
click.echo(click.style("Experiment finished.", fg="green"))
if __name__ == "__main__":
cli()