-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathhist_guided_qmcan.yml
executable file
·54 lines (44 loc) · 1.32 KB
/
hist_guided_qmcan.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Dataset reader arguments
dataset:
image_features_train_h5: '/features_faster_rcnn_x101_train.h5'
image_features_val_h5: '/features_faster_rcnn_x101_val.h5'
image_features_test_h5: '/features_faster_rcnn_x101_test.h5'
word_counts_json: '/visdial_1.0_word_counts_train.json'
glove_npy: '/glove.npy'
img_norm: 1
concat_history: true
max_sequence_length: 20
vocab_min_count: 5
# Model related arguments
model:
encoder: 'hist_guided_qmcan'
decoder: 'disc'
use_hist: true
img_feature_size: 2048
word_embedding_size: 300
lstm_hidden_size: 512
lstm_num_layers: 1
dropout: 0.2
qh_attn_layer: 6 # Number of self attention layers for ques and hist
qh_multi_head: 8 # Number of heads in each layer
qi_attn_layer: 6 # Number of self attention layers for ques and image
qi_multi_head: 8 # Number of heads in each layer
# Optimization related arguments
solver:
batch_size: 16 # 32 x num_gpus is a good rule of thumb
num_epochs: 15
num_epochs_curriculum: 2
initial_lr: 0.0005
initial_lr_curriculum: 0.0001
training_splits: "train" # "trainval"
lr_gamma: 0.2
lr_milestones: # epochs when lr => lr * lr_gamma
- 7 # 10-5
- 10 # 10-6
warmup_factor: 0.2
warmup_epochs: 1
# lr_milestones: # epochs when lr => lr * lr_gamma
# - 3 # 10-4
# - 6 # 10-5
# - 8 # 10-6
# - 15