-
Notifications
You must be signed in to change notification settings - Fork 200
/
Copy pathdgx_a100_14x8x640.py
118 lines (118 loc) · 7.8 KB
/
dgx_a100_14x8x640.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import hugectr
from mpi4py import MPI
# 1. Create Solver, DataReaderParams and Optimizer
solver = hugectr.CreateSolver(max_eval_batches = 125,
batchsize_eval = 716800,
batchsize = 71680,
vvgpu = [[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7],
[0,1,2,3,4,5,6,7]],
repeat_dataset = True,
lr = 26.0,
warmup_steps = 2500,
decay_start = 46821,
decay_steps = 15406,
decay_power = 2.0,
end_lr = 0.0,
use_mixed_precision = True,
scaler = 1024,
use_cuda_graph = False,
use_holistic_cuda_graph = True,
use_overlapped_pipeline = True,
all_reduce_algo = hugectr.AllReduceAlgo.OneShot,
grouped_all_reduce = True,
num_iterations_statistics = 20,
metrics_spec = {hugectr.MetricsType.AUC: 0.8025},
is_dlrm = True)
reader = hugectr.DataReaderParams(data_reader_type = hugectr.DataReaderType_t.RawAsync,
source = ["./train_data.bin"],
eval_source = "./test_data.bin",
check_type = hugectr.Check_t.Non,
num_samples = 4195196928,
eval_num_samples = 89137319,
cache_eval_data = 125,
slot_size_array = [39884406, 39043, 17289, 7420, 20263, 3, 7120, 1543, 63, 38532951, 2953546, 403346, 10, 2208, 11938, 155, 4, 976, 14, 39979771, 25641295, 39664984, 585935, 12972, 108, 36],
async_param = hugectr.AsyncParam(32, 4, 716800, 2, 512, True, hugectr.Alignment_t.Non))
optimizer = hugectr.CreateOptimizer(optimizer_type = hugectr.Optimizer_t.SGD,
update_type = hugectr.Update_t.Local,
atomic_update = True)
# 2. Initialize the Model instance
model = hugectr.Model(solver, reader, optimizer)
# 3. Construct the Model graph
model.add(hugectr.Input(label_dim = 1, label_name = "label",
dense_dim = 13, dense_name = "dense",
data_reader_sparse_param_array =
[hugectr.DataReaderSparseParam("data1", 1, True, 26)]))
model.add(hugectr.SparseEmbedding(embedding_type = hugectr.Embedding_t.HybridSparseEmbedding,
workspace_size_per_gpu_in_mb = 1500,
slot_size_array = [39884406, 39043, 17289, 7420, 20263, 3, 7120, 1543, 63, 38532951, 2953546, 403346, 10, 2208, 11938, 155, 4, 976, 14, 39979771, 25641295, 39664984, 585935, 12972, 108, 36],
embedding_vec_size = 128,
combiner = "sum",
sparse_embedding_name = "sparse_embedding1",
bottom_name = "data1",
optimizer = optimizer,
hybrid_embedding_param = hugectr.HybridEmbeddingParam(2, -1, 0.01, 1.3e11, 1.9e11, 1.0,
hugectr.CommunicationType.IB_NVLink_Hier,
hugectr.HybridEmbeddingType.Distributed)))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Head,
bottom_names = ["dense"],
top_names = ["fc11","fc12", "fc13", "fc14"],
num_output=512))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Body,
bottom_names = ["fc11","fc12", "fc13", "fc14"],
top_names = ["fc21","fc22", "fc23", "fc24"],
num_output=256))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Tail,
bottom_names = ["fc21","fc22", "fc23", "fc24"],
top_names = ["fc3"],
num_output=128))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.Interaction,
bottom_names = ["fc3","sparse_embedding1"],
top_names = ["interaction1"]))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Head,
bottom_names = ["interaction1"],
top_names = ["fc41","fc42", "fc43", "fc44"],
num_output=1024))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Body,
bottom_names = ["fc41","fc42", "fc43", "fc44"],
top_names = ["fc51","fc52", "fc53", "fc54"],
num_output=1024))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Body,
bottom_names = ["fc51","fc52", "fc53", "fc54"],
top_names = ["fc61","fc62", "fc63", "fc64"],
num_output=512))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Body,
bottom_names = ["fc61","fc62", "fc63", "fc64"],
top_names = ["fc71","fc72","fc73","fc74"],
num_output=256))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.FusedInnerProduct,
pos_type = hugectr.FcPosition_t.Tail,
act_type = hugectr.Activation_t.Non,
bottom_names = ["fc71","fc72","fc73","fc74"],
top_names = ["fc8"],
num_output=1))
model.add(hugectr.DenseLayer(layer_type = hugectr.Layer_t.BinaryCrossEntropyLoss,
bottom_names = ["fc8", "label"],
top_names = ["loss"]))
# 4. Compile & Fit
model.compile()
model.summary()
model.fit(max_iter = 58527, display = 1000, eval_interval = 2926, snapshot = 10000000, snapshot_prefix = "dlrm")