forked from PaddlePaddle/Paddle3D
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cape_r50_1408x512_24ep_wocbgs_imagenet_pretrain.yml
186 lines (181 loc) · 5.11 KB
/
cape_r50_1408x512_24ep_wocbgs_imagenet_pretrain.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
batch_size: 1
epochs: 24
train_dataset:
type: NuscenesMVDataset
dataset_root: data/nuscenes/
ann_file: data/nuscenes/petr_nuscenes_annotation_train.pkl
mode: train
use_valid_flag: True
class_names: [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
transforms:
- type: LoadMultiViewImageFromFiles
to_float32: True
- type: LoadAnnotations3D
with_bbox_3d: True
with_label_3d: True
- type: SampleRangeFilter
point_cloud_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
- type: SampleNameFilter
classes: [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
- type: ResizeCropFlipImage
sample_aug_cfg:
resize_lim: [0.8, 1.0]
final_dim: [512, 1408]
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
H: 900
W: 1600
rand_flip: True
training: True
- type: GlobalRotScaleTransImage
rot_range: [-0.3925, 0.3925]
translation_std: [0, 0, 0]
scale_ratio_range: [0.95, 1.05]
reverse_angle: True
training: True
- type: NormalizeMultiviewImage
mean: [103.530, 116.280, 123.675]
std: [1.0, 1.0, 1.0]
- type: PadMultiViewImage
size_divisor: 32
- type: SampleFilerByKey
keys: ['gt_bboxes_3d', 'gt_labels_3d', 'img']
meta_keys: ['filename', 'ori_shape', 'img_shape', 'lidar2img',
'intrinsics', 'extrinsics', 'pad_shape',
'scale_factor', 'flip', 'box_type_3d', 'img_norm_cfg', 'sample_idx',
'timestamp']
val_dataset:
type: NuscenesMVDataset
dataset_root: data/nuscenes/
ann_file: data/nuscenes/petr_nuscenes_annotation_val.pkl
mode: val
class_names: ['car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone']
transforms:
- type: LoadMultiViewImageFromFiles
to_float32: True
- type: ResizeCropFlipImage
sample_aug_cfg:
resize_lim: [0.8, 1.0]
final_dim: [512, 1408]
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
H: 900
W: 1600
rand_flip: True
training: False
- type: NormalizeMultiviewImage
mean: [103.530, 116.280, 123.675]
std: [1.0, 1.0, 1.0]
- type: PadMultiViewImage
size_divisor: 32
- type: SampleFilerByKey
keys: ['img']
meta_keys: ['filename', 'ori_shape', 'img_shape', 'lidar2img',
'intrinsics', 'extrinsics', 'pad_shape',
'scale_factor', 'flip', 'box_type_3d', 'img_norm_cfg', 'sample_idx',
'timestamp']
optimizer:
type: AdamW
weight_decay: 0.01
grad_clip:
type: ClipGradByGlobalNorm
clip_norm: 35
# auto_skip_clip: True
lr_scheduler:
type: LinearWarmup
learning_rate:
type: CosineAnnealingDecay
learning_rate: 0.0002
T_max: 84408 # 3517 * 24
eta_min: 0.0000002
warmup_steps: 500
start_lr: 0.00006666666
end_lr: 0.0002
model:
type: CAPE
use_recompute: True
use_grid_mask: True
img_backbone:
type: 'MMResNet'
depth: 50
num_stages: 4
out_indices: [2, 3,]
frozen_stages: -1
norm_cfg:
type_name: 'BatchNorm2D'
requires_grad: False
norm_eval: True
style: 'caffe'
dcn:
type_name: 'DeformConv2D'
deformable_groups: 1
# fallback_on_stride: False
stage_with_dcn: [False, False, True, True]
img_neck:
type: CPFPN ###remove unused parameters
in_channels: [1024, 2048]
out_channels: 256
num_outs: 2
pts_bbox_head:
type: CAPETemporalDNHead
num_classes: 10
in_channels: 256
num_query: 900
position_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
code_weights: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
normedlinear: False
dn_weight: 1.0
split: 0.75
with_time: False
transformer:
type: CAPETransformer
num_cameras: 6
num_layers: 6
feat_dim: 256
feat_stride: 32
image_height: 512
image_width: 1408
bound: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
with_fpe: True
depth_start: 1
depth_num: 64
att_layer:
type: 'CrossViewAttention'
hidden_dim: 512
num_queries: 900
qkv_bias: True
heads: 8
dim_head: 64
conditional: True
scalar: 10, ##noise groups
noise_scale: 1.0
with_time: False
positional_encoding:
type: SinePositionalEncoding3D
num_feats: 128
normalize: True
bbox_coder:
type: NMSFreeCoder
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
point_cloud_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
max_num: 300
voxel_size: [0.2, 0.2, 8]
num_classes: 10
loss_cls:
type: WeightedFocalLoss
gamma: 2.0
alpha: 0.25
loss_weight: 2.0
reduction: sum
loss_bbox:
type: WeightedL1Loss
loss_weight: 0.25
reduction: sum