-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy patht2t_envi_est.sh
66 lines (62 loc) · 3.36 KB
/
t2t_envi_est.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# make dir
mkdir -p t2t_data t2t_datagen t2t_train t2t_output
# make data
python3 t2t-datagen --data_dir=t2t_data --tmp_dir=t2t_datagen \
--problem=translate_envi_iwslt32k
# set gpu
export CUDA_VISIBLE_DEVICES=0
# train
# div=-k means that you want to remain top k activations in each row of attention matrix, if k is larger than sequence length, all activations will remain.
# take k=-6 as an example
for div in -6
do
for random_seed in 1
do
name=envi_apd${div}_s${random_seed}
python3 t2t-trainer --data_dir=t2t_data --problem=translate_envi_iwslt32k \
--model=transformer --hparams_set=transformer_base --output_dir=t2t_output/${name} \
--train_steps=35000 --random_seed ${random_seed} \
--hparams self_attention_type=sparse_dot_product,before_softmax=True,before_padding=False,d=${div}
python3 t2t-avg-all --model_dir t2t_output/${name} --output_dir t2t_avg/${name}
python3 t2t-decoder --data_dir=t2t_data --problem=translate_envi_iwslt32k \
--model=transformer --decode_hparams="beam_size=4,alpha=0.6" \
--decode_from_file=t2t_datagen/tst2013.en --decode_to_file=${name}_test \
--hparams_set=transformer_base --output_dir=t2t_avg/${name} --random_seed ${random_seed} \
--hparams self_attention_type=sparse_dot_product,before_softmax=True,before_padding=False,d=${div}
python3 t2t-decoder --data_dir=t2t_data --problem=translate_envi_iwslt32k \
--model=transformer --decode_hparams="beam_size=4,alpha=0.6" \
--decode_from_file=t2t_datagen/tst2012.en --decode_to_file=${name}_valid \
--hparams_set=transformer_base --output_dir=t2t_avg/${name} --random_seed ${random_seed} \
--hparams self_attention_type=sparse_dot_product,before_softmax=True,before_padding=False,d=${div}
done
done
# evaluate
python3 t2t-bleu --translation=${name}_valid --reference=t2t_datagen/tst2012.vi
python3 t2t-bleu --translation=${name}_test --reference=t2t_datagen/tst2013.vi
# div=k means that you want to reamain top sequence_length / k activations.
# take k=4 as an example
for div in 4
do
for random_seed in 1
do
name=envi_apd${div}_s${random_seed}
python3 t2t-trainer --data_dir=t2t_data --problem=translate_envi_iwslt32k \
--model=transformer --hparams_set=transformer_base --output_dir=t2t_output/${name} \
--train_steps=35000 --random_seed ${random_seed} \
--hparams self_attention_type=sparse_dot_product,before_softmax=True,before_padding=False,d=${div}
python3 t2t-avg-all --model_dir t2t_output/${name} --output_dir t2t_avg/${name}
python3 t2t-decoder --data_dir=t2t_data --problem=translate_envi_iwslt32k \
--model=transformer --decode_hparams="beam_size=4,alpha=0.6" \
--decode_from_file=t2t_datagen/tst2013.en --decode_to_file=${name}_test \
--hparams_set=transformer_base --output_dir=t2t_avg/${name} --random_seed ${random_seed} \
--hparams self_attention_type=sparse_dot_product,before_softmax=True,before_padding=False,d=${div}
python3 t2t-decoder --data_dir=t2t_data --problem=translate_envi_iwslt32k \
--model=transformer --decode_hparams="beam_size=4,alpha=0.6" \
--decode_from_file=t2t_datagen/tst2012.en --decode_to_file=${name}_valid \
--hparams_set=transformer_base --output_dir=t2t_avg/${name} --random_seed ${random_seed} \
--hparams self_attention_type=sparse_dot_product,before_softmax=True,before_padding=False,d=${div}
done
done
# evaluate
python3 t2t-bleu --translation=${name}_valid --reference=t2t_datagen/tst2012.vi
python3 t2t-bleu --translation=${name}_test --reference=t2t_datagen/tst2013.vi