-
Notifications
You must be signed in to change notification settings - Fork 2
/
time_scores.py
331 lines (297 loc) · 11.7 KB
/
time_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# pylint: skip-file
# Standard library
import re
# Third-party
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
# First-party
import moveroplot.config.plot_settings as plot_settings
from moveroplot.load_files import load_relevant_files
from moveroplot.plotting import get_total_dates_from_headers
# Local
from .utils.parse_plot_synop_ch import total_score_range
def _time_score_transformation(df, header):
df = df.replace(float(header["Missing value code"][0]), np.NaN)
names = {
"YYYY": "year",
"MM": "month",
"DD": "day",
"hh": "hour",
"mm": "minute",
}
df["timestamp"] = pd.to_datetime(
df[["YYYY", "MM", "DD", "hh", "mm"]].rename(columns=names)
)
df.drop(
["YYYY", "MM", "DD", "hh", "mm", "lt_hh", "lt_mm"],
axis=1,
inplace=True,
)
return df
# enter directory / read station_scores files / call plotting pipeline
def _time_scores_pipeline(
plot_setup,
lt_ranges,
file_prefix,
file_postfix,
input_dir,
output_dir,
debug,
) -> None:
"""Read all ATAB files that are present in: data_dir/season/model_version/<file_prefix><...><file_postfix>.
Extract relevant information (parameters/scores) from these files into a dataframe.
Rows --> Scores | Columns --> Stations | For each parameter, a separate station_scores File exists.
Args:
lt_ranges (list): lead time ranges, for which plots should be generated (i.e. 01-06, 07-12,...). part of the file name
parameters (list): parameters, for which plots should be generated (i.e. CLCT, DD_10M, FF_10M, PMSL,...). part of file name
file_prefix (str): prefix of files (i.e. time_scores)
file_postfix (str): postfix of files (i.e. '.dat')
input_dir (str): directory to seasons (i.e. /scratch/osm/movero/wd)
output_dir (str): output directory (i.e. plots/)
season (str): season of interest (i.e. 2021s4/)
model_version (str): model_version of interest (i.e. C-1E_ch)
scores (list): list of scores, for which plots should be generated
debug (bool): print further comments & debug statements
""" # noqa: E501
print("---initialising time score pipeline")
if not lt_ranges:
lt_ranges = "19-24"
for model_plots in plot_setup["model_versions"]:
for parameter, scores in plot_setup["parameter"].items():
model_data = load_relevant_files(
input_dir,
file_prefix,
file_postfix,
debug,
model_plots,
parameter,
lt_ranges,
ltr_first=True,
transform_func=_time_score_transformation,
)
if not model_data:
print(f"No matching files found with given ltr {lt_ranges}")
return
_generate_timeseries_plots(
plot_scores=scores,
models_data=model_data,
parameter=parameter,
output_dir=output_dir,
debug=debug,
)
def _clear_empty_axes_if_necessary(subplot_axes, idx):
# remove empty ``axes`` instances
if idx % 2 != 1:
[ax.axis("off") for ax in subplot_axes[(idx + 1) % 2 :]]
def _save_figure(output_dir, filename, title, fig, axes, idx):
fig.suptitle(
title,
horizontalalignment="center",
verticalalignment="top",
fontdict={
"size": 6,
"color": "k",
},
bbox={"facecolor": "none", "edgecolor": "grey"},
)
_clear_empty_axes_if_necessary(axes, idx)
fig.savefig(f"{output_dir}/{filename[:-1]}.png")
plt.close()
def _initialize_plots(labels: list):
fig, ((ax0), (ax1)) = plt.subplots(
nrows=2, ncols=1, tight_layout=True, figsize=(10, 10), dpi=200
)
custom_lines = [
Line2D([0], [0], color=plot_settings.modelcolors[i], lw=2)
for i in range(len(labels))
]
fig.legend(
custom_lines,
labels,
loc="upper right",
ncol=1,
frameon=False,
)
plt.tight_layout(w_pad=8, h_pad=5, rect=(0.05, 0.05, 0.90, 0.90))
return fig, [ax0, ax1]
# PLOTTING PIPELINE FOR TOTAL SCORES PLOTS
def _set_ylim(param, score, ax, debug): # pylint: disable=unused-argument
# define limits for yaxis if available
regular_param = (param, "min") in total_score_range.columns
regular_scores = score in total_score_range.index
if regular_param and regular_scores:
lower_bound = total_score_range[param]["min"].loc[score]
upper_bound = total_score_range[param]["max"].loc[score]
if lower_bound != upper_bound:
ax.set_ylim(lower_bound, upper_bound)
def _customise_ax(parameter, scores, x_ticks, grid, ax):
"""Apply cosmetics to current ax.
Args:
parameter (str): current parameter
score (str): current score
x_ticks (list): list of x-ticks labels (lead time ranges, as strings)
grid (bool): add grid to ax
ax (Axes): current ax
"""
if grid:
ax.grid(which="major", color="#DDDDDD", linewidth=0.8)
ax.grid(which="minor", color="#EEEEEE", linestyle=":", linewidth=0.5)
ax.minorticks_on()
ax.tick_params(axis="both", which="major", labelsize=8)
ax.tick_params(axis="both", which="minor", labelsize=6)
ax.set_title(f"{parameter}: {','.join(scores)}")
ax.set_xlabel("Lead-Time Range (h)")
# plotting too many data on the x-axis
steps = len(x_ticks) // 7
skip_indices = slice(None, None, steps) if steps > 0 else slice(None)
ax.set_xticks(range(len(x_ticks))[skip_indices], x_ticks[skip_indices])
ax.autoscale(axis="y")
def _plot_and_save_scores(
output_dir,
base_filename,
parameter,
plot_scores_setup,
sup_title,
ltr_models_data,
debug=False,
):
for ltr, models_data in ltr_models_data.items():
fig, subplot_axes = _initialize_plots(ltr_models_data[ltr].keys())
headers = [data["header"] for data in models_data.values()]
total_start_date, total_end_date = get_total_dates_from_headers(headers)
title_base = f"{parameter.upper()}: "
model_info = (
f" {list(models_data.keys())[0]}" if len(models_data.keys()) == 1 else ""
)
x_label_base = f"""{total_start_date.strftime("%Y-%m-%d %H:%M")} - {total_end_date.strftime("%Y-%m-%d %H:%M")}""" # noqa: E501
filename = base_filename + f"_{ltr}"
pattern = (
re.search(r"\(.*?\)", next(iter(plot_scores_setup))[0])
if plot_scores_setup
else None
)
prev_threshold = None
if pattern is not None:
prev_threshold = pattern.group()
current_threshold = prev_threshold
current_plot_idx = 0
for idx, score_setup in enumerate(plot_scores_setup):
prev_threshold = current_threshold
pattern = re.search(r"\(.*?\)", next(iter(score_setup)))
current_threshold = pattern.group() if pattern is not None else None
different_threshold = prev_threshold != current_threshold
if different_threshold:
_clear_empty_axes_if_necessary(subplot_axes, current_plot_idx - 1)
fig.suptitle(
sup_title,
horizontalalignment="center",
verticalalignment="top",
fontdict={
"size": 6,
"color": "k",
},
bbox={"facecolor": "none", "edgecolor": "grey"},
)
fig.savefig(f"{output_dir}/{filename}.png")
plt.close()
filename = base_filename + f"_{ltr}"
fig, subplot_axes = _initialize_plots(ltr_models_data[ltr].keys())
current_plot_idx += current_plot_idx % 2
title = title_base + ",".join(score_setup) + model_info + f" LT: {ltr}"
ax = subplot_axes[current_plot_idx % 2]
for model_idx, data in enumerate(models_data.values()):
model_plot_color = plot_settings.modelcolors[model_idx]
header = data["header"]
unit = header["Unit"][0]
x_int = data["df"][["timestamp"]]
y_label = ",".join(score_setup)
ax.set_ylabel(f"{y_label.upper()} ({unit})")
ax.set_xlabel(x_label_base)
ax.set_title(title)
for score_idx, score in enumerate(score_setup):
score_values = data["df"][[score]]
ax.plot(
np.asarray(x_int, dtype="datetime64[s]"),
score_values,
color=model_plot_color,
linestyle=plot_settings.line_styles[score_idx],
fillstyle="none",
label=f"{score.upper()}",
)
ax.tick_params(axis="both", which="major", labelsize=8)
ax.tick_params(axis="both", which="minor", labelsize=6)
ax.autoscale(axis="y")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %d\n%H:%M"))
if len(score_setup) > 1:
sub_plot_legend = ax.legend(
score_setup,
loc="upper right",
markerscale=0.9,
bbox_to_anchor=(1.1, 1.05),
)
for line in sub_plot_legend.get_lines():
line.set_color("black")
filename += "_" + "_".join(score_setup)
if current_plot_idx % 2 == 1 or idx == len(plot_scores_setup) - 1:
_clear_empty_axes_if_necessary(subplot_axes, current_plot_idx)
fig.suptitle(
sup_title,
horizontalalignment="center",
verticalalignment="top",
fontdict={
"size": 6,
"color": "k",
},
bbox={"facecolor": "none", "edgecolor": "grey"},
)
fig.savefig(f"{output_dir}/{filename}.png")
plt.close()
filename = base_filename + f"_{ltr}"
fig, subplot_axes = _initialize_plots(ltr_models_data[ltr].keys())
current_plot_idx += 1
# PLOTTING PIPELINE FOR TIME SCORES PLOTS
def _generate_timeseries_plots(
plot_scores,
models_data,
parameter,
output_dir,
debug,
):
# flat list of unique keys of dicts within models_data dict
model_versions = list({k for d in models_data.values() for k in d.keys()})
# initialise filename
base_filename = (
f"time_scores_{model_versions[0]}_{parameter}"
if len(model_versions) == 1
else f"time_scores_{parameter}"
)
headers = [
data["header"] for data in models_data[next(iter(models_data.keys()))].values()
]
total_start_date, total_end_date = get_total_dates_from_headers(headers)
# pylint: disable=line-too-long
period_info = f"""Period: {total_start_date.strftime("%Y-%m-%d %H:%M")} - {total_end_date.strftime("%Y-%m-%d %H:%M")} | © MeteoSwiss""" # noqa: E501
# pylint: enable=line-too-long
sup_title = f"{parameter}: " + period_info
# plot regular scores
_plot_and_save_scores(
output_dir,
base_filename,
parameter,
plot_scores["regular_scores"],
sup_title,
models_data,
debug=debug,
)
_plot_and_save_scores(
output_dir,
base_filename,
parameter,
plot_scores["cat_scores"],
sup_title,
models_data,
debug=debug,
)