-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplots.py
125 lines (117 loc) · 4.96 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# %%
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import rcParams
rcParams["figure.figsize"] = (10, 6)
plt.style.use("ggplot")
# %%
scenario_names = [
"large files 1 large patterns",
"large files 100 small patterns",
"small files 10 large patterns",
"small files 1000 small patterns",
]
results_gpu_percentage = pd.read_csv("results/results_gpu_percentage_v4.csv")
for name in scenario_names:
y = results_gpu_percentage[results_gpu_percentage["scenario"] == name]["mean_time"]
yerr = results_gpu_percentage[results_gpu_percentage["scenario"] == name]["std_time"]
x = results_gpu_percentage[results_gpu_percentage["scenario"] == name]["percentage_gpu"]
plt.errorbar(x, y, yerr=yerr, label=name)
plt.legend()
plt.show()
# %%
# with open("results/results_thread_block_v4.csv", "w") as f:
# f.write("scenario,thread_per_block,block_per_grid,mean_time,std_time\n")
results_thread_block = pd.read_csv("results/results_thread_block_v4.csv")
thread_per_block_values = results_thread_block["thread_per_block"].unique()
block_per_grid_values = results_thread_block["block_per_grid"].unique()
for name in scenario_names:
sub_df = results_thread_block[results_thread_block["scenario"] == name]
heatmap = np.zeros((len(thread_per_block_values), len(block_per_grid_values)))
for i, row in sub_df.iterrows():
heatmap[
thread_per_block_values == row["thread_per_block"],
block_per_grid_values == row["block_per_grid"],
] = row["mean_time"]
plt.imshow(heatmap, vmin=sub_df["mean_time"].min(), vmax=0.5)
plt.colorbar()
plt.xticks(range(len(block_per_grid_values)), block_per_grid_values, rotation=90)
plt.xlabel("block_per_grid")
plt.yticks(range(len(thread_per_block_values)), thread_per_block_values)
plt.ylabel("thread_per_block")
plt.title(name)
plt.show()
# %%
### Perf vs ndata & npattern when distributed vs not, at constant workload
results_dist_pattern = pd.read_csv("results/results_distributed_and_pattern_v4.csv")
distribute_labels = ["patterns not distributed", "patterns distributed"]
for distributed in [0, 1]:
x = results_dist_pattern[results_dist_pattern["distributed"] == distributed]["ndata"]
y = results_dist_pattern[results_dist_pattern["distributed"] == distributed]["mean_time"]
yerr = results_dist_pattern[results_dist_pattern["distributed"] == distributed]["std_time"]
plt.errorbar(x, y, yerr=yerr, label=distribute_labels[distributed])
plt.xlabel("size of the dataset")
plt.ylabel("mean execution time [s]")
plt.xscale("log")
plt.legend()
plt.show()
# %%
### Perf vs ndata & npattern when distributed vs not, at constant workload, NO GPU
results_dist_pattern = pd.read_csv("results/results_distributed_and_pattern_v4_no_gpu.csv")
distribute_labels = ["patterns not distributed", "patterns distributed"]
for distributed in [0, 1]:
x = results_dist_pattern[results_dist_pattern["distributed"] == distributed]["ndata"]
y = results_dist_pattern[results_dist_pattern["distributed"] == distributed]["mean_time"]
yerr = results_dist_pattern[results_dist_pattern["distributed"] == distributed]["std_time"]
plt.errorbar(x, y, yerr=yerr, label=distribute_labels[distributed])
plt.xlabel("size of the dataset")
plt.ylabel("mean execution time [s]")
plt.xscale("log")
plt.legend()
plt.show()
# %%
# # Compare the different settings
# with open("results/results_v4.csv", "w") as f:
# f.write("scenario,setting,mean_time,std_time\n")
result_settings = pd.read_csv("results/results_v4.csv")
# hbar plots of the different settings
height = 0.1
settings = result_settings["setting"].unique()
scenarios = result_settings["scenario"].unique()
for i, setting in enumerate(settings): # type: ignore
ys = np.arange(len(scenarios)) + i * height
plt.barh(
ys,
result_settings[result_settings["setting"] == setting]["mean_time"],
height,
xerr=result_settings[result_settings["setting"] == setting]["std_time"],
label=setting,
)
plt.ylabel("scenario")
plt.yticks(ys, scenarios) # type: ignore
plt.xlabel("mean execution time [s]")
plt.legend()
# %%
### Weak scaling
# with open("results/results_strong_scaling_v4.csv", "w") as f:
# f.write("scenario,nodes,mean_time,std_time\n")
results_weak = pd.read_csv("results/results_weak_scaling_v4.csv")
for name in scenario_names:
y = results_weak[results_weak["scenario"] == name]["mean_time"]
yerr = results_weak[results_weak["scenario"] == name]["std_time"]
x = results_weak[results_weak["scenario"] == name]["nodes"]
plt.errorbar(x, y, yerr=yerr, label=name)
# plt.xscale("log")
plt.ylim(0, results_weak["mean_time"].max() * 1.1)
plt.ylabel("mean execution time [s]")
first_point = results_weak[results_weak["nodes"] == 1]["mean_time"].mean()
plt.plot(
[1, results_weak["nodes"].max()],
[first_point, first_point * results_weak["nodes"].max()],
"--",
label="linear scaling",
)
plt.legend()
plt.xlabel("number of nodes")
# %%