-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.py
241 lines (200 loc) · 10.5 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
from mloptimizer.genoptimizer import SklearnOptimizer
from mloptimizer.plots import plotly_search_space, plotly_logbook
from mloptimizer.alg_wrapper import CustomXGBClassifier
from mloptimizer.hyperparams import Hyperparam, HyperparameterSpace
from sklearn.datasets import load_iris
import streamlit as st
import pandas as pd
from threading import Thread
from streamlit.runtime.scriptrunner import add_script_run_ctx
from watcher import *
from utils import *
import json
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
###########################################################################################################################
############################################## MAIN FRONT-END ELEMENTS ####################################################
###########################################################################################################################
# Config
st.set_page_config(
page_title="MLOptimizer UI",
page_icon="💻",
layout="wide"
)
# Title
st.header('MLOptimizer UI')
st.subheader('Find the best hyperparameters for training your data!')
st.divider()
# Initialization (Utils is class with methods to manage optimizer and editable variables)
utils = Utils()
###########################################################################################################################
# Input file section
st.write("You can try MLOptimizer UI with a dummy example or start using it with your own input dataset")
use_custom_input = st.toggle('Try with our example')
if use_custom_input:
utils.restart_session_state_vars()
with open('data/iris.csv', "r") as iris_file:
df = pd.read_csv(iris_file)
utils.set_input_data_frame(input_data_frame=df)
# Example file section - show data
with st.expander("Take a look at input data"):
st.dataframe(df, use_container_width=True)
else:
col1, col2 = st.columns([0.3, 0.7])
with col1:
input_csv_file = st.file_uploader("Upload your input file", type='csv',
help='⚠️ Pay attention to the quality of your input data '
'(column names, types of values, consistency, etc).')
if input_csv_file is not None:
# Input file section - data editor
df = pd.read_csv(input_csv_file)
utils.set_input_data_frame(input_data_frame=df)
with col2:
with st.expander("Review and/or edit your data"):
st.data_editor(df, use_container_width=True)
else:
utils.restart_session_state_vars()
st.divider()
# #####################################################################################################################
if st.session_state.input_data_frame is not None:
# Editable variables section
target_tab, algorithm_tab, genetic_params_tab = st.tabs(["Target", "Algorithm", "Genetic params"])
# Editable variables section - target
with target_tab:
col1, col2 = st.columns(2)
with col1:
target = st.selectbox(
'Which column do you want to use as target?',
df.columns)
utils.set_target(target=target)
utils.set_y(y=df[target])
utils.set_x(x=df.drop(target, axis=1))
# Editable variables section - algorithm
with algorithm_tab:
col1, col2 = st.columns([0.3, 0.7])
# Get available algorithms from mloptimizer library
with open(HyperparameterSpace.default_hyperparameter_spaces_json, 'r') as file:
default_hyperparams = json.load(file)
optimizer_class_list = [globals()[x] for x in default_hyperparams.keys()]
with open('data/classifiers_external_doc.json', 'r') as file:
classifier_links = json.load(file)
optimizer_class_name_list = []
optimizer_docu_list = []
for optimizer_item in optimizer_class_list:
class_name = optimizer_item.__name__
optimizer_class_name_list.append(class_name)
if class_name in classifier_links:
link = classifier_links[class_name]
if link:
optimizer_docu_list.append(f"learn more about [{utils.format_class_name(class_name_option=class_name)}]({link})")
else:
optimizer_docu_list.append(f"explore other sources of information to learn more about {utils.format_class_name(class_name_option=class_name)}")
else:
optimizer_docu_list.append(f"explore other sources of information to learn more about {utils.format_class_name(class_name_option=class_name)}")
# Select algorithm
with col1:
algorithm = st.radio(
label="Which algorithm would you like to use?",
options=optimizer_class_name_list,
captions=optimizer_docu_list,
format_func=lambda option: utils.format_class_name(class_name_option=option)
)
utils.set_algorithm(algorithm=algorithm)
# Algorithm hyperparameters data editor
with col2:
use_custom_hyperparams = st.toggle('Use custom hyperparams')
if use_custom_hyperparams:
st.write("Customize the table below with the values of hyperparameters you want to experiment with")
st.info(
"By default, parameters use ranges (they are not fixed). You can mark 'use fixed value' column of the "
"hyperparameter you want to set with a fixed value and set it in corresponding column.",
icon="🤓")
edited_df = st.data_editor(
utils.get_dataframe(),
hide_index=True,
use_container_width=True,
column_config={
"fixed value": st.column_config.NumberColumn(),
"range min": st.column_config.NumberColumn(),
"range max": st.column_config.NumberColumn(),
"scale": st.column_config.NumberColumn(
label="scale ⚠️",
help="Scale value to divide the hyperparameter value by. It applies only when the "
"'type' column is 'float'. If 'type' is 'int', this value should be 'None' as it "
"does not apply."
)
},
disabled=("hyperparam")
)
fixed_rows = edited_df.loc[edited_df["use fixed value"] == True]
range_rows = edited_df.loc[edited_df["use fixed value"] == False]
utils.set_custom_hyperparams(fixed_rows=fixed_rows, range_rows=range_rows)
else:
utils.delete_hyperparams_dictionaries()
# Editable variables section - genetic params
with genetic_params_tab:
col1, col2, col3 = st.columns([0.4, 0.4, 0.2])
# Select amount of individuals and generation
with col1:
individuals = st.select_slider(
'Select the amount of individuals',
range(2, 101),
value=utils.get_individuals())
utils.set_individuals(individuals=individuals)
generations = st.select_slider(
'Select the amount of generations',
range(2, 101),
value=utils.get_generations())
utils.set_generations(generations=generations)
# Customizable value of random seed
with col2:
use_custom_seed = st.toggle('Set custom Python Random seed')
if use_custom_seed:
st.write('Insert the value you want to initialize the random number generator in Python (seed):')
subcol1, subcol2 = st.columns([0.2, 0.8])
with subcol1:
custom_seed = st.number_input(
label='Integer values only',
min_value=0,
value=1,
step=1,
format="%d")
utils.set_custom_seed(seed=custom_seed)
else:
utils.set_custom_seed(seed=0)
st.divider()
###########################################################################################################################
# Restart state variables and execute
if st.button('Start new execution'):
utils.restart_session_state_vars()
utils.execute()
###########################################################################################################################
# Results section
if st.session_state.show_results is not False:
st.write("Take a look at the optimization results below")
population_tab, evolution_tab, search_space_tab = st.tabs(["Population", "Evolution", "Search Space"])
# Set variables needed by mloptimizer library to generate graphics
optimizer_param_names = list(utils.get_optimizer_hyperparams_keys())
optimizer_param_names.append("fitness")
population_df = utils.population_2_df()
# Results section - population: show and provide for downloading population resulting file
with population_tab:
with open(st.session_state.last_population_path) as file:
utils.download_files(population_path=st.session_state.last_population_path)
df_output = pd.read_csv(file)
st.dataframe(data=df_output, height=350, use_container_width=True)
file.seek(0)
# Results section - evolution: show evolution graphic and provide logbook resulting file for downloading
with evolution_tab:
with open(st.session_state.last_logbook_path) as file:
utils.download_files(logbook_path=st.session_state.last_logbook_path)
logbook_graphic = plotly_logbook(utils.get_optimizer_logbook(), population_df)
st.plotly_chart(logbook_graphic, use_container_width=True)
# Results section - search space: show search space graphic
with search_space_tab:
dfp = population_df[optimizer_param_names]
search_space_graphic = plotly_search_space(dfp)
st.plotly_chart(search_space_graphic, use_container_width=True)
#######################################################################################################################