diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh index e95de232304..87cd42a0a74 100644 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_100k_semisupervised_1a.sh @@ -43,12 +43,12 @@ set -u -e -o pipefail -stage=0 # Start from -1 for supervised seed system training +stage=0 train_stage=-100 nj=80 test_nj=50 -# The following 3 options decide the output directory for semi-supervised +# The following 3 options decide the output directory for semi-supervised # chain system # dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix} @@ -89,7 +89,7 @@ echo "$0 $@" # Print the command line for logging if [ -f ./path.sh ]; then . ./path.sh; fi . ./utils/parse_options.sh -# The following can be replaced with the versions that model +# The following can be replaced with the versions that do not model # UNK using phone LM. $sup_lat_dir should also ideally be changed. unsup_decode_lang=data/lang_test_poco_sup100k_unk unsup_decode_graph_affix=_poco_sup100k_unk @@ -141,6 +141,8 @@ if [ $stage -le 2 ]; then steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \ --mfcc-config conf/mfcc_hires.conf data/${unsupervised_set}_sp_hires || exit 1 + steps/compute_cmvn_stats.sh data/${unsupervised_set}_sp_hires + utils/fix_data_dir.sh data/${unsupervised_set}_sp_hires fi unsupervised_set_perturbed=${unsupervised_set}_sp diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh index 2d5b2f8480e..c5b7a822991 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_50k_semisupervised_1a.sh @@ -109,7 +109,7 @@ test_graph_affix=_poco_unk unsup_rescore_lang=${unsup_decode_lang}_big -dir=$exp_root/chain${chain_affix}/tdnn${tdnn_affix} +dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix} if ! cuda-compiled; then cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - $train_extra_opts --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir --lang data/lang_chain_unk || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_ex250k_semisupervised_conf_smbr_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_ex250k_semisupervised_conf_smbr_b.sh deleted file mode 100644 index 37362657651..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_ex250k_semisupervised_conf_smbr_b.sh +++ /dev/null @@ -1,529 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -smbr_leaky_hmm_coefficient=0.00001 -mmi_factor_schedule="output-0=1.0,1.0 output-1=0.2,0.2" -smbr_factor_schedule="output-0=0.0,0.0 output-1=0.4,0.4" - -# Semi-supervised options -comb_affix=comb_250k_ex250k_1b_smbr # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -chain_smbr_extra_opts="--one-silence-class" -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --chain.smbr-leaky-hmm-coefficient $smbr_leaky_hmm_coefficient \ - --chain.mmi-factor-schedule="$mmi_factor_schedule" \ - --chain.smbr-factor-schedule="$smbr_factor_schedule" \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir --lang data/lang_chain_unk || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_a.sh deleted file mode 100644 index ebf52fa8b40..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_a.sh +++ /dev/null @@ -1,513 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=40 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7b # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_a - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb_250k_1a # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=3,2 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. 2 \ - $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_c.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_c.sh deleted file mode 100644 index f41374e4593..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_c.sh +++ /dev/null @@ -1,517 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=40 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7b # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_a - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb_250k_1c # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=768 -cell_dim=768 -projection_dim=192 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_d.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_d.sh deleted file mode 100644 index f5f41fd67c1..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_d.sh +++ /dev/null @@ -1,517 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7c # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb_250k_1d # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=768 -cell_dim=768 -projection_dim=192 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_e.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_e.sh deleted file mode 100644 index 61c55686efe..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_e.sh +++ /dev/null @@ -1,516 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb_250k_1e # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_f.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_f.sh deleted file mode 100644 index dfdc36d6428..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_f.sh +++ /dev/null @@ -1,516 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7g # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_e - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb_250k_1f # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true --constrained false \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true --constrained false $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 64 \ - --lang2weight $supervision_weights --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_g.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_g.sh deleted file mode 100644 index 9dcfb693eda..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_g.sh +++ /dev/null @@ -1,516 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7f # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_f - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb_250k_1g # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - $treedir ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true --constrained false \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true --constrained false $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 64 \ - --lang2weight $supervision_weights --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_smbr_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_smbr_a.sh deleted file mode 100644 index 23aa3531377..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_smbr_a.sh +++ /dev/null @@ -1,512 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb_250k_1b_smbr # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -chain_smbr_extra_opts="--one-silence-class" -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --chain.smbr-leaky-hmm-coefficient 0.00001 \ - --chain.mmi-factor-schedule="output-0=1.0,1.0 output-1=0.5,0.5" \ - --chain.smbr-factor-schedule="output-0=0.0,0.0 output-1=0.2,0.2" \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir --lang data/lang_chain_unk || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_smbr_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_smbr_b.sh deleted file mode 100644 index ce8bf87c87d..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_250k_semisupervised_conf_smbr_b.sh +++ /dev/null @@ -1,437 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k -semisup_train_set= # semisup100k_250k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -smbr_leaky_hmm_coefficient=0.00001 -mmi_factor_schedule="output-0=1.0,1.0 output-1=0.2,0.2" -smbr_factor_schedule="output-0=0.0,0.0 output-1=0.4,0.4" - -# Semi-supervised options -comb_affix=comb_250k_1b_smbr # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -chain_smbr_extra_opts="--one-silence-class" -lm_weights=3,2 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 64 \ - --lang2weight $supervision_weights --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --chain.smbr-leaky-hmm-coefficient $smbr_leaky_hmm_coefficient \ - --chain.mmi-factor-schedule="$mmi_factor_schedule" \ - --chain.smbr-factor-schedule="$smbr_factor_schedule" \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir --lang data/lang_chain_unk || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_ex500k_semisupervised_smbr_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_ex500k_semisupervised_smbr_a.sh deleted file mode 100644 index ec5d2138730..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_ex500k_semisupervised_smbr_a.sh +++ /dev/null @@ -1,531 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_500k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_500k -semisup_train_set= # semisup100k_500k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -smbr_leaky_hmm_coefficient=0.00001 -mmi_factor_schedule="output-0=1.0,1.0 output-1=0.2,0.2" -smbr_factor_schedule="output-0=0.0,0.0 output-1=0.4,0.4" - -# Semi-supervised options -comb_affix=comb_500k_ex500k_1a_smbr # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -chain_smbr_extra_opts="--one-silence-class" -lm_weights=3,1 -num_copies=2,1 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 128 \ - --lang2weight $supervision_weights --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --chain.smbr-leaky-hmm-coefficient $smbr_leaky_hmm_coefficient \ - --chain.mmi-factor-schedule="$mmi_factor_schedule" \ - --chain.smbr-factor-schedule="$smbr_factor_schedule" \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir --lang data/lang_chain_unk || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_kl_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_kl_a.sh deleted file mode 100644 index e2a94495332..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_kl_a.sh +++ /dev/null @@ -1,442 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_500k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_500k -semisup_train_set= # semisup100k_500k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -mmi_factor_schedule="output-0=1.0,1.0 output-1=0.5,0.5" -kl_factor_schedule="output-0=0.0,0.0 output-1=0.5,0.5" - -# Semi-supervised options -comb_affix=comb_500k_1a_kl0.5 # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=3,1 -num_copies=2,1 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 128 \ - --lang2weight $supervision_weights --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --chain.mmi-factor-schedule="$mmi_factor_schedule" \ - --chain.kl-factor-schedule="$kl_factor_schedule" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_smbr_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_smbr_b.sh deleted file mode 100644 index 281a6e4d88d..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_smbr_b.sh +++ /dev/null @@ -1,518 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_500k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_500k -semisup_train_set= # semisup100k_500k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -smbr_leaky_hmm_coefficient=0.00001 -mmi_factor_schedule="output-0=1.0,1.0 output-1=0.2,0.2" -smbr_factor_schedule="output-0=0.0,0.0 output-1=0.4,0.4" - -# Semi-supervised options -comb_affix=comb_500k_1b_smbr # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -chain_smbr_extra_opts="--one-silence-class" -lm_weights=3,1 -num_copies= -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 128 \ - --lang2weight $supervision_weights --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --chain.smbr-leaky-hmm-coefficient $smbr_leaky_hmm_coefficient \ - --chain.mmi-factor-schedule="$mmi_factor_schedule" \ - --chain.smbr-factor-schedule="$smbr_factor_schedule" \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir --lang data/lang_chain_unk || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_smbr_c.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_smbr_c.sh deleted file mode 100644 index f29b65c6e7b..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_500k_semisupervised_conf_smbr_c.sh +++ /dev/null @@ -1,446 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_500k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=80 -exp=exp/semisup_100k - -supervised_set=train_sup -unsupervised_set=train_unsup100k_500k -semisup_train_set= # semisup100k_500k - -tdnn_affix=7d_h1024 # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_c - -nnet3_affix= # affix for nnet3 and chain dir -- relates to i-vector used - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -smbr_leaky_hmm_coefficient=0.00001 -mmi_factor_schedule="output-0=1.0,1.0 output-1=0.2,0.2" -smbr_factor_schedule="output-0=0.0,0.0 output-1=0.4,0.4" - -# Semi-supervised options -comb_affix=comb_500k_1c_smbr # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -chain_smbr_extra_opts="--one-silence-class" -lm_weights=3,1 -num_copies=2,1 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $chaindir/best_path_${unsupervised_set}_sp${decode_affix}/frame_subsampling_factor -fi - -cmvn_opts=`cat $chaindir/cmvn_opts` || exit 1 - -sup_ali_dir=$exp/tri4a - -treedir=$exp/chain${nnet3_affix}/tree_${tree_affix} -if [ ! -f $treedir/final.mdl ]; then - echo "$0: $treedir/final.mdl does not exist." - exit 1 -fi - -diff $treedir/tree $chaindir/tree || { echo "$0: $treedir/tree and $chaindir/tree differ"; exit 1; } - -dir=$exp/chain${nnet3_affix}/tdnn_lstm${tdnn_affix}${decode_affix}${egs_affix}${comb_affix:+_$comb_affix} - -#if [ $stage -le 9 ]; then -# steps/subset_ali_dir.sh --cmd "$train_cmd" \ -# data/${unsupervised_set} data/${unsupervised_set}_sp_hires \ -# $chaindir/best_path_${unsupervised_set}_sp${decode_affix} \ -# $chaindir/best_path_${unsupervised_set}${decode_affix} -# echo $frame_subsampling_factor > $chaindir/best_path_${unsupervised_set}${decode_affix}/frame_subsampling_factor -#fi - -if [ $stage -le 10 ]; then - steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ - ${treedir} ${chaindir}/best_path_${unsupervised_set}_sp${decode_affix} \ - $dir -fi - -if [ $stage -le 11 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm4 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${nnet3_affix}/tri4a_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${unsupervised_set}_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 128 \ - --lang2weight $supervision_weights --lang2num-copies "$num_copies" \ - 2 $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --chain.smbr-leaky-hmm-coefficient $smbr_leaky_hmm_coefficient \ - --chain.mmi-factor-schedule="$mmi_factor_schedule" \ - --chain.smbr-factor-schedule="$smbr_factor_schedule" \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir --lang data/lang_chain_unk || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_b.sh deleted file mode 100755 index e686d977ded..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_b.sh +++ /dev/null @@ -1,225 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. - -# configs for 'chain' -stage=0 -tdnn_affix=7b -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_a -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_c.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_c.sh deleted file mode 100755 index 1854a4a86e1..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_c.sh +++ /dev/null @@ -1,225 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. - -# configs for 'chain' -stage=0 -tdnn_affix=7c -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_c -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_d.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_d.sh deleted file mode 100755 index 265a8c05a11..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_d.sh +++ /dev/null @@ -1,228 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. - -# configs for 'chain' -stage=0 -tdnn_affix=7d -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_c -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm4 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_f.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_f.sh deleted file mode 100755 index 9bc98d90934..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_f.sh +++ /dev/null @@ -1,228 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. - -# configs for 'chain' -stage=0 -tdnn_affix=7f -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_e -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm4 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true --constrained false" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_b.sh deleted file mode 100755 index 988299a4621..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_b.sh +++ /dev/null @@ -1,244 +0,0 @@ -#!/bin/bash -set -e -set -u - -# This is oracle experiment for semi-supervised training with 100 hours -# of supervised data and 250 hours of unsupervised data - -# configs for 'chain' -stage=0 -tdnn_affix=7b_oracle -train_stage=-10 -get_egs_stage=-10 -decode_iter= -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k_n10k -base_train_set=train_oracle100k_250k_n10k -tree_affix=bi_a -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a - -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ ! -f $treedir/final.mdl ]; then - echo "$0: Could not find $treedir/final.mdl" - exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_c.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_c.sh deleted file mode 100755 index b21dd72a37a..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_c.sh +++ /dev/null @@ -1,247 +0,0 @@ -#!/bin/bash -set -e -set -u - -# This is oracle experiment for semi-supervised training with 100 hours -# of supervised data and 250 hours of unsupervised data - -# configs for 'chain' -stage=0 -tdnn_affix=7c_oracle -train_stage=-10 -get_egs_stage=-10 -decode_iter= -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k_n10k -base_train_set=train_oracle100k_250k_n10k -tree_affix=bi_a -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a - -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ ! -f $treedir/final.mdl ]; then - echo "$0: Could not find $treedir/final.mdl" - exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=dnn2 dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=dnn4 dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=dnn6 dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_d.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_d.sh deleted file mode 100755 index 876633fedd6..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_oracle_d.sh +++ /dev/null @@ -1,247 +0,0 @@ -#!/bin/bash -set -e -set -u - -# This is oracle experiment for semi-supervised training with 100 hours -# of supervised data and 250 hours of unsupervised data - -# configs for 'chain' -stage=0 -tdnn_affix=7d_oracle -train_stage=-10 -get_egs_stage=-10 -decode_iter= -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k_n10k -base_train_set=train_oracle100k_250k_n10k -tree_affix=bi_a -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ ! -f $treedir/final.mdl ]; then - echo "$0: Could not find $treedir/final.mdl" - exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn8 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn9 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm4 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm4 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm4 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1b.sh new file mode 100644 index 00000000000..edf9f2eaac2 --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1b.sh @@ -0,0 +1,456 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script is semi-supervised recipe with 100 hours of supervised data +# and 250 hours unsupervised data with naive splitting. +# Based on "Semi-Supervised Training of Acoustic Models using Lattice-Free MMI", +# Vimal Manohar, Hossein Hadian, Daniel Povey, Sanjeev Khudanpur, ICASSP 2018 +# http://www.danielpovey.com/files/2018_icassp_semisupervised_mmi.pdf +# local/semisup/run_100k.sh shows how to call this. + +# This version of script uses only supervised data for i-vector extractor +# training as against using the combined data as in run_tdnn_50k_semisupervised.sh. +# We use 3-gram LM trained on 100 hours of supervised data. We do not have +# enough data to do 4-gram LM rescoring as in run_tdnn_50k_semisupervised.sh. + +# This script uses phone LM to model UNK. +# This script uses the same tree as that for the seed model. +# See the comments in the script about how to change these. + +# Unsupervised set: train_unsup100k_250k (250 hour subset of Fisher excluding 100 hours for supervised) +# unsup_frames_per_eg=150 +# Deriv weights: Lattice posterior of best path pdf +# Unsupervised weight: 1.0 +# Weights for phone LM (supervised, unsupervised): 3,2 +# LM for decoding unsupervised data: 4gram +# Supervision: Naive split lattices + +set -u -e -o pipefail + +stage=0 +train_stage=-100 +nj=80 +test_nj=50 + +# The following 3 options decide the output directory for semi-supervised +# chain system +# dir=${exp_root}/chain${chain_affix}/tdnn_lstm${tdnn_affix} + +exp_root=exp/semisup_100k +chain_affix= # affix for chain dir +tdnn_affix=_semisup_1b # affix for semi-supervised chain system + +# Datasets -- Expects data/$supervised_set and data/$unsupervised_set to be +# present +supervised_set=train_sup +unsupervised_set=train_unsup100k_250k + +# Input seed system +sup_chain_dir=exp/semisup_100k/chain/tdnn_lstm_1b_sp # supervised chain system +sup_lat_dir=exp/semisup_100k/chain/tri4a_train_sup_unk_lats # Seed model options +sup_tree_dir=exp/semisup_100k/chain/tree_bi_a # tree directory for supervised chain system +ivector_root_dir=exp/semisup_100k/nnet3 # i-vector extractor root directory + +# Semi-supervised options +supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. + # Can be used to scale down the effect of unsupervised data + # by using a smaller scale for it e.g. 1.0,0.3 +lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation + +sup_egs_dir= # Supply this to skip supervised egs creation +unsup_egs_dir= # Supply this to skip unsupervised egs creation +unsup_egs_opts= # Extra options to pass to unsupervised egs creation + +# Neural network opts +hidden_dim=1024 +cell_dim=1024 +projection_dim=256 + +# training options +num_epochs=2 +minibatch_size=64,32 +chunk_left_context=40 +chunk_right_context=0 +dropout_schedule='0,0@0.20,0.3@0.50,0' +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 + +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk_decoding=160 + +decode_iter= # Iteration to decode with + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +if [ -f ./path.sh ]; then . ./path.sh; fi +. ./utils/parse_options.sh + +# The following can be replaced with the versions that do not model +# UNK using phone LM. $sup_lat_dir should also ideally be changed. +unsup_decode_lang=data/lang_test_poco_sup100k_unk +unsup_decode_graph_affix=_poco_sup100k_unk +test_lang=data/lang_test_poco_unk +test_graph_affix=_poco_unk + +dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix} + +if ! cuda-compiled; then + cat < \ +# $sup_chain_dir/best_path_${unsupervised_set_perturbed}/frame_subsampling_factor +# +# # This should be 1 if using a different source for supervised data alignments. +# # However alignments in seed tree directory have already been sub-sampled. +# echo $frame_subsampling_factor > \ +# $sup_tree_dir/frame_subsampling_factor +# +# # Build a new tree using stats from both supervised and unsupervised data +# steps/nnet3/chain/build_tree_multiple_sources.sh \ +# --use-fmllr false --context-opts "--context-width=2 --central-position=1" \ +# --frame-subsampling-factor $frame_subsampling_factor \ +# 7000 $lang \ +# data/${supervised_set_perturbed} \ +# ${sup_tree_dir} \ +# data/${unsupervised_set_perturbed} \ +# $chaindir/best_path_${unsupervised_set_perturbed} \ +# $treedir || exit 1 +# fi +# +# sup_tree_dir=$treedir # Use the new tree dir for further steps + +# Train denominator FST using phone alignments from +# supervised and unsupervised data +if [ $stage -le 10 ]; then + steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ + ${sup_tree_dir} ${sup_chain_dir}/best_path_${unsupervised_set_perturbed} \ + $dir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=40" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=$hidden_dim + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim + + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + # We use separate outputs for supervised and unsupervised data + # so we can properly track the train and valid objectives. + + output name=output-0 input=output.affine + output name=output-1 input=output.affine + + output name=output-0-xent input=output-xent.log-softmax + output name=output-1-xent input=output-xent.log-softmax +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +. $dir/configs/vars + +left_context=$[model_left_context + chunk_left_context] +right_context=$[model_right_context + chunk_right_context] +left_context_initial=$model_left_context +right_context_final=$model_right_context + +egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)") +egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)") +egs_left_context_initial=$(perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)") +egs_right_context_final=$(perl -e "print int($right_context_final + $frame_subsampling_factor / 2)") + +if [ -z "$sup_egs_dir" ]; then + sup_egs_dir=$dir/egs_${supervised_set_perturbed} + frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg) + + if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage + fi + mkdir -p $sup_egs_dir/ + touch $sup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the supervised data" + steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor $frame_subsampling_factor \ + --frames-per-eg $frames_per_eg \ + --frames-per-iter 1500000 \ + --cmvn-opts "$cmvn_opts" \ + --online-ivector-dir $sup_ivector_dir \ + --generate-egs-scp true \ + data/${supervised_set_perturbed}_hires $dir \ + $sup_lat_dir $sup_egs_dir + fi +else + frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) +fi + +unsup_frames_per_eg=150 # Using a frames-per-eg of 150 for unsupervised data + # was found to be better than allowing smaller chunks + # (160,140,110,80) like for supervised system +lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices when + # creating numerator supervision +lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs + # for unsupervised data +tolerance=1 # frame-tolerance for chain training + +unsup_lat_dir=${sup_chain_dir}/decode_${unsupervised_set_perturbed} +if [ -z "$unsup_egs_dir" ]; then + unsup_egs_dir=$dir/egs_${unsupervised_set_perturbed} + + if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage + fi + mkdir -p $unsup_egs_dir + touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the unsupervised data" + steps/nnet3/chain/get_egs.sh \ + --cmd "$decode_cmd" --alignment-subsampling-factor 1 \ + --left-tolerance $tolerance --right-tolerance $tolerance \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ + --frame-subsampling-factor $frame_subsampling_factor \ + --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ + --lattice-prune-beam "$lattice_prune_beam" \ + --deriv-weights-scp $sup_chain_dir/best_path_${unsupervised_set_perturbed}/weights.scp \ + --online-ivector-dir $ivector_root_dir/ivectors_${unsupervised_set_perturbed}_hires \ + --generate-egs-scp true $unsup_egs_opts \ + data/${unsupervised_set_perturbed}_hires $dir \ + $unsup_lat_dir $unsup_egs_dir + fi +fi + +comb_egs_dir=$dir/comb_egs +if [ $stage -le 14 ]; then + steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ + --block-size 128 \ + --lang2weight $supervision_weights 2 \ + $sup_egs_dir $unsup_egs_dir $comb_egs_dir + touch $comb_egs_dir/.nodelete # keep egs around when that run dies. +fi + +if [ $train_stage -le -4 ]; then + # This is to skip stages of den-fst creation, which was already done. + train_stage=-4 +fi + +if [ $stage -le 15 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$comb_egs_dir" \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $sup_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights true \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.chunk-width $frames_per_eg \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch "$minibatch_size" \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs false \ + --feat-dir data/${supervised_set_perturbed}_hires \ + --tree-dir $sup_tree_dir \ + --lat-dir $sup_lat_dir \ + --dir $dir || exit 1; +fi + +test_graph_dir=$dir/graph${test_graph_affix} +if [ $stage -le 17 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 ${test_lang} $dir $test_graph_dir +fi + +if [ $stage -le 18 ]; then + rm -f $dir/.error + for decode_set in dev test; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + if [ $num_jobs -gt $test_nj ]; then num_jobs=$test_nj; fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $num_jobs --cmd "$decode_cmd" ${decode_iter:+--iter $decode_iter} \ + --online-ivector-dir $ivector_root_dir/ivectors_${decode_set}_hires \ + --frames-per-chunk $frames_per_chunk_decoding \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + $test_graph_dir data/${decode_set}_hires \ + $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || touch $dir/.error + ) & + done + wait; + if [ -f $dir/.error ]; then + echo "$0: Decoding failed. See $dir/decode${test_graph_affix}_*/log/*" + exit 1 + fi +fi + +exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1d.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1d.sh new file mode 100644 index 00000000000..466f4fb7c91 --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1d.sh @@ -0,0 +1,456 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script is semi-supervised recipe with 100 hours of supervised data +# and 250 hours unsupervised data with naive splitting. +# Based on "Semi-Supervised Training of Acoustic Models using Lattice-Free MMI", +# Vimal Manohar, Hossein Hadian, Daniel Povey, Sanjeev Khudanpur, ICASSP 2018 +# http://www.danielpovey.com/files/2018_icassp_semisupervised_mmi.pdf +# local/semisup/run_100k.sh shows how to call this. + +# This version of script uses only supervised data for i-vector extractor +# training as against using the combined data as in run_tdnn_50k_semisupervised.sh. +# We use 3-gram LM trained on 100 hours of supervised data. We do not have +# enough data to do 4-gram LM rescoring as in run_tdnn_50k_semisupervised.sh. + +# This script uses phone LM to model UNK. +# This script uses the same tree as that for the seed model. +# See the comments in the script about how to change these. + +# Unsupervised set: train_unsup100k_250k (250 hour subset of Fisher excluding 100 hours for supervised) +# unsup_frames_per_eg=150 +# Deriv weights: Lattice posterior of best path pdf +# Unsupervised weight: 1.0 +# Weights for phone LM (supervised, unsupervised): 3,2 +# LM for decoding unsupervised data: 4gram +# Supervision: Naive split lattices + +set -u -e -o pipefail + +stage=0 +train_stage=-100 +nj=80 +test_nj=50 + +# The following 3 options decide the output directory for semi-supervised +# chain system +# dir=${exp_root}/chain${chain_affix}/tdnn_lstm${tdnn_affix} + +exp_root=exp/semisup_100k +chain_affix= # affix for chain dir +tdnn_affix=_semisup_1d # affix for semi-supervised chain system + +# Datasets -- Expects data/$supervised_set and data/$unsupervised_set to be +# present +supervised_set=train_sup +unsupervised_set=train_unsup100k_250k + +# Input seed system +sup_chain_dir=exp/semisup_100k/chain/tdnn_lstm_1d_sp # supervised chain system +sup_lat_dir=exp/semisup_100k/chain/tri4a_train_sup_lats # Seed model options +sup_tree_dir=exp/semisup_100k/chain/tree_bi_c # tree directory for supervised chain system +ivector_root_dir=exp/semisup_100k/nnet3 # i-vector extractor root directory + +# Semi-supervised options +supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. + # Can be used to scale down the effect of unsupervised data + # by using a smaller scale for it e.g. 1.0,0.3 +lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation + +sup_egs_dir= # Supply this to skip supervised egs creation +unsup_egs_dir= # Supply this to skip unsupervised egs creation +unsup_egs_opts= # Extra options to pass to unsupervised egs creation + +# Neural network opts +hidden_dim=1024 +cell_dim=1024 +projection_dim=256 + +# training options +num_epochs=2 +minibatch_size=64,32 +chunk_left_context=40 +chunk_right_context=0 +dropout_schedule='0,0@0.20,0.3@0.50,0' +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 + +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk_decoding=160 + +decode_iter= # Iteration to decode with + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +if [ -f ./path.sh ]; then . ./path.sh; fi +. ./utils/parse_options.sh + +# The following can be replaced with the versions that do not model +# UNK using phone LM. $sup_lat_dir should also ideally be changed. +unsup_decode_lang=data/lang_test_poco_sup100k +unsup_decode_graph_affix=_poco_sup100k +test_lang=data/lang_test_poco +test_graph_affix=_poco + +dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix} + +if ! cuda-compiled; then + cat < \ +# $sup_chain_dir/best_path_${unsupervised_set_perturbed}/frame_subsampling_factor +# +# # This should be 1 if using a different source for supervised data alignments. +# # However alignments in seed tree directory have already been sub-sampled. +# echo $frame_subsampling_factor > \ +# $sup_tree_dir/frame_subsampling_factor +# +# # Build a new tree using stats from both supervised and unsupervised data +# steps/nnet3/chain/build_tree_multiple_sources.sh \ +# --use-fmllr false --context-opts "--context-width=2 --central-position=1" \ +# --frame-subsampling-factor $frame_subsampling_factor \ +# 7000 $lang \ +# data/${supervised_set_perturbed} \ +# ${sup_tree_dir} \ +# data/${unsupervised_set_perturbed} \ +# $chaindir/best_path_${unsupervised_set_perturbed} \ +# $treedir || exit 1 +# fi +# +# sup_tree_dir=$treedir # Use the new tree dir for further steps + +# Train denominator FST using phone alignments from +# supervised and unsupervised data +if [ $stage -le 10 ]; then + steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ + ${sup_tree_dir} ${sup_chain_dir}/best_path_${unsupervised_set_perturbed} \ + $dir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=40" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=$hidden_dim + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim + + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + # We use separate outputs for supervised and unsupervised data + # so we can properly track the train and valid objectives. + + output name=output-0 input=output.affine + output name=output-1 input=output.affine + + output name=output-0-xent input=output-xent.log-softmax + output name=output-1-xent input=output-xent.log-softmax +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +. $dir/configs/vars + +left_context=$[model_left_context + chunk_left_context] +right_context=$[model_right_context + chunk_right_context] +left_context_initial=$model_left_context +right_context_final=$model_right_context + +egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)") +egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)") +egs_left_context_initial=$(perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)") +egs_right_context_final=$(perl -e "print int($right_context_final + $frame_subsampling_factor / 2)") + +if [ -z "$sup_egs_dir" ]; then + sup_egs_dir=$dir/egs_${supervised_set_perturbed} + frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg) + + if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage + fi + mkdir -p $sup_egs_dir/ + touch $sup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the supervised data" + steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor $frame_subsampling_factor \ + --frames-per-eg $frames_per_eg \ + --frames-per-iter 1500000 \ + --cmvn-opts "$cmvn_opts" \ + --online-ivector-dir $sup_ivector_dir \ + --generate-egs-scp true \ + data/${supervised_set_perturbed}_hires $dir \ + $sup_lat_dir $sup_egs_dir + fi +else + frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) +fi + +unsup_frames_per_eg=150 # Using a frames-per-eg of 150 for unsupervised data + # was found to be better than allowing smaller chunks + # (160,140,110,80) like for supervised system +lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices when + # creating numerator supervision +lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs + # for unsupervised data +tolerance=1 # frame-tolerance for chain training + +unsup_lat_dir=${sup_chain_dir}/decode_${unsupervised_set_perturbed} +if [ -z "$unsup_egs_dir" ]; then + unsup_egs_dir=$dir/egs_${unsupervised_set_perturbed} + + if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage + fi + mkdir -p $unsup_egs_dir + touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the unsupervised data" + steps/nnet3/chain/get_egs.sh \ + --cmd "$decode_cmd" --alignment-subsampling-factor 1 \ + --left-tolerance $tolerance --right-tolerance $tolerance \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ + --frame-subsampling-factor $frame_subsampling_factor \ + --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ + --lattice-prune-beam "$lattice_prune_beam" \ + --deriv-weights-scp $sup_chain_dir/best_path_${unsupervised_set_perturbed}/weights.scp \ + --online-ivector-dir $ivector_root_dir/ivectors_${unsupervised_set_perturbed}_hires \ + --generate-egs-scp true $unsup_egs_opts \ + data/${unsupervised_set_perturbed}_hires $dir \ + $unsup_lat_dir $unsup_egs_dir + fi +fi + +comb_egs_dir=$dir/comb_egs +if [ $stage -le 14 ]; then + steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ + --block-size 128 \ + --lang2weight $supervision_weights 2 \ + $sup_egs_dir $unsup_egs_dir $comb_egs_dir + touch $comb_egs_dir/.nodelete # keep egs around when that run dies. +fi + +if [ $train_stage -le -4 ]; then + # This is to skip stages of den-fst creation, which was already done. + train_stage=-4 +fi + +if [ $stage -le 15 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$comb_egs_dir" \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $sup_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights true \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.chunk-width $frames_per_eg \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch "$minibatch_size" \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs false \ + --feat-dir data/${supervised_set_perturbed}_hires \ + --tree-dir $sup_tree_dir \ + --lat-dir $sup_lat_dir \ + --dir $dir || exit 1; +fi + +test_graph_dir=$dir/graph${test_graph_affix} +if [ $stage -le 17 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 ${test_lang} $dir $test_graph_dir +fi + +if [ $stage -le 18 ]; then + rm -f $dir/.error + for decode_set in dev test; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + if [ $num_jobs -gt $test_nj ]; then num_jobs=$test_nj; fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $num_jobs --cmd "$decode_cmd" ${decode_iter:+--iter $decode_iter} \ + --online-ivector-dir $ivector_root_dir/ivectors_${decode_set}_hires \ + --frames-per-chunk $frames_per_chunk_decoding \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + $test_graph_dir data/${decode_set}_hires \ + $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || touch $dir/.error + ) & + done + wait; + if [ -f $dir/.error ]; then + echo "$0: Decoding failed. See $dir/decode${test_graph_affix}_*/log/*" + exit 1 + fi +fi + +exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_smbr_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_smbr_a.sh deleted file mode 100755 index 1806303f319..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_smbr_a.sh +++ /dev/null @@ -1,233 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. - -# configs for 'chain' -stage=0 -tdnn_affix=7smbr_a -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_c -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -label_delay=5 -extra_opts="--chain.mmi-factor-schedule=1.0,1.0@0.1,0.5@0.2,0.5 --chain.smbr-factor-schedule=0.0,0.0@0.1,0.5@0.2,0.5" -chain_smbr_extra_opts= -smbr_leaky_hmm_coefficient=0.00001 -leaky_hmm_coefficient=0.1 -l2_regularize=0.0 # 00005 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient $leaky_hmm_coefficient \ - --chain.smbr-leaky-hmm-coefficient $smbr_leaky_hmm_coefficient \ - --chain.l2-regularize $l2_regularize \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --cleanup.preserve-model-interval 10 \ - --dir $dir --lang $lang $extra_opts || exit 1; -fi - -graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_smbr_a.sh.orig b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_smbr_a.sh.orig deleted file mode 100755 index a7505376a19..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_smbr_a.sh.orig +++ /dev/null @@ -1,240 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. - -# configs for 'chain' -stage=0 -tdnn_affix=7smbr_a -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_c -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -label_delay=5 -extra_opts="--chain.mmi-factor-schedule=1.0,1.0@0.1,0.5@0.2,0.5 --chain.smbr-factor-schedule=0.0,0.0@0.1,0.5@0.2,0.5" -chain_smbr_extra_opts= -smbr_leaky_hmm_coefficient=0.00001 -leaky_hmm_coefficient=0.1 -<<<<<<< Updated upstream -l2_regularize=0.0 # 00005 -======= ->>>>>>> Stashed changes - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient $leaky_hmm_coefficient \ - --chain.smbr-leaky-hmm-coefficient $smbr_leaky_hmm_coefficient \ -<<<<<<< Updated upstream - --chain.l2-regularize $l2_regularize \ -======= - --chain.l2-regularize 0.0 \ ->>>>>>> Stashed changes - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --cleanup.preserve-model-interval 10 \ - --dir $dir --lang $lang $extra_opts || exit 1; -fi - -graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_a.sh deleted file mode 100755 index bf1e4878c8e..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_a.sh +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. -# This is similar to _d, but uses a phone LM UNK model - -# configs for 'chain' -stage=0 -tdnn_affix=7a -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup15k -unsupervised_set=train_unsup100_250k -semisup_train_set=semisup15k_250k -tree_affix=bi_i -nnet3_affix=_semi15k_250k -chain_affix=_semi15k_250k -exp=exp/semisup_15k -gmm=tri3 -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=10 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_b.sh deleted file mode 100755 index 3c9ab27a353..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_b.sh +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. -# This is similar to _d, but uses a phone LM UNK model - -# configs for 'chain' -stage=0 -tdnn_affix=7b -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup15k -unsupervised_set=train_unsup100_250k -semisup_train_set=semisup15k_250k -tree_affix=bi_j -nnet3_affix=_semi15k_250k -chain_affix=_semi15k_250k -exp=exp/semisup_15k -gmm=tri3 -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=10 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_oracle_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_oracle_a.sh deleted file mode 100755 index 997a17a5329..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_oracle_a.sh +++ /dev/null @@ -1,244 +0,0 @@ -#!/bin/bash -set -e -set -u - -# This is oracle experiment for semi-supervised training with 100 hours -# of supervised data and 250 hours of unsupervised data - -# configs for 'chain' -stage=0 -tdnn_affix=7a_oracle -train_stage=-10 -get_egs_stage=-10 -decode_iter= -supervised_set=train_sup -unsupervised_set=train_unsup100k_250k_n10k -base_train_set=train_oracle100k_250k_n10k -tree_affix=bi_i -nnet3_affix=_semi15k_250k -chain_affix=_semi15k_250k -exp=exp/semisup_15k -gmm=tri3 - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -# training options -num_epochs=4 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ ! -f $treedir/final.mdl ]; then - echo "$0: Could not find $treedir/final.mdl" - exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - -EOF - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_a.sh deleted file mode 100644 index 145b4c0e178..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_a.sh +++ /dev/null @@ -1,507 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=40 -exp=exp/semisup_15k - -unsupervised_set=train_unsup100k_250k # set this to your choice of unsupervised data -supervised_set=train_sup15k -semisup_train_set=semisup15k_250k - -tdnn_affix=7a # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_i - -nnet3_affix=_semi15k_250k # affix for nnet3 and chain dir -- relates to i-vector used -chain_affix=_semi15k_250k - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb1a # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=5,2 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${chain_affix}/tri3_${supervised_set}_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. 2 \ - $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs 4 \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_b.sh deleted file mode 100644 index 681a46212c9..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_b.sh +++ /dev/null @@ -1,509 +0,0 @@ -#!/bin/bash - -# This script uses phone LM to model UNK. - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=40 -exp=exp/semisup_15k - -unsupervised_set=train_unsup100k_250k # set this to your choice of unsupervised data -supervised_set=train_sup15k -semisup_train_set=semisup15k_250k - -tdnn_affix=7a # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_i - -nnet3_affix=_semi15k_250k # affix for nnet3 and chain dir -- relates to i-vector used -chain_affix=_semi15k_250k - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb1b # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=5,2 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay - output name=output-1 input=output.affine@$label_delay - - output name=output-0-xent input=output-xent.log-softmax@$label_delay - output name=output-1-xent input=output-xent.log-softmax@$label_delay -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${chain_affix}/tri3_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --block-size 64 \ - --lang2weight $supervision_weights 2 \ - $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs 4 \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_c.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_c.sh deleted file mode 100644 index 01c0191be83..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_semisupervised_conf_c.sh +++ /dev/null @@ -1,507 +0,0 @@ -#!/bin/bash - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=40 -exp=exp/semisup_15k - -unsupervised_set=train_unsup100k_250k # set this to your choice of unsupervised data -supervised_set=train_sup15k -semisup_train_set=semisup15k_250k - -tdnn_affix=7b # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_j - -nnet3_affix=_semi15k_250k # affix for nnet3 and chain dir -- relates to i-vector used -chain_affix=_semi15k_250k - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb1c # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=5,2 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${chain_affix}/tri3_${supervised_set}_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. 2 \ - $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs 4 \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_smbr_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_smbr_a.sh deleted file mode 100755 index aff735560e0..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_15k_smbr_a.sh +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/bash -set -e - -# This is fisher chain recipe for training a model on a subset of around 15 hours. -# This is similar to _d, but uses a phone LM UNK model - -# configs for 'chain' -stage=0 -tdnn_affix=7smbr_a -train_stage=-10 -get_egs_stage=-10 -decode_iter= -train_set=train_sup15k -unsupervised_set=train_unsup100_250k -semisup_train_set=semisup15k_250k -tree_affix=bi_i -nnet3_affix=_semi15k_250k -chain_affix=_semi15k_250k -exp=exp/semisup_15k -gmm=tri3 -hidden_dim=512 -cell_dim=512 -projection_dim=128 - -# training options -num_epochs=10 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -label_delay=5 -extra_opts="--chain.mmi-factor-schedule=1.0,1.0@0.1,0.5@0.2,0.5 --chain.smbr-factor-schedule=0.0,0.0@0.1,0.5@0.2,0.5" -chain_smbr_extra_opts= - -# decode options -extra_left_context=50 -extra_right_context=0 - -remove_egs=false -common_egs_dir= - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. ./cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -if ! cuda-compiled; then - cat <$lang/topo -fi - -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 -fi - -if [ $stage -le 12 ]; then - echo "$0: creating neural net configs using the xconfig parser"; - - num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') - learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) - - lstm_opts="decay-time=40" - - mkdir -p $dir/configs - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage - fi - - mkdir -p $dir/egs - touch $dir/egs/.nodelete # keep egs around when that run dies. - - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$common_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $train_ivector_dir \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.0 \ - --chain.apply-deriv-weights false \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ - --egs.chunk-width 160,140,110,80 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch 64,32 \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs $num_epochs \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs $remove_egs \ - --feat-dir $train_data_dir \ - --tree-dir $treedir \ - --lat-dir $lat_dir \ - --chain.smbr-extra-opts="$chain_smbr_extra_opts" \ - --dir $dir --lang $lang $extra_opts || exit 1; -fi - -graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir -fi - -decode_suff= -if [ $stage -le 15 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - iter_opts=" --iter $decode_iter " - fi - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 \ - --extra-right-context-final 0 \ - --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; - ) & - done -fi -wait; -exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1a.sh similarity index 73% rename from egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_a.sh rename to egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1a.sh index 6bafc30f3aa..74d7fdb76e9 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_a.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1a.sh @@ -1,35 +1,44 @@ #!/bin/bash + +# Copyright 2017-18 Vimal Manohar +# Apache 2.0 + set -e +set -o pipefail -# This is fisher chain recipe for training a model on a subset of around 15 hours. -# This is similar to _d, but uses a phone LM UNK model +# This is fisher chain recipe for training a model on a subset of around +# 15-50 hours of supervised data. +# This system uses phone LM to model UNK. +# local/semisup/run_50k.sh and local/semisup/run_100k.sh show how to call this. # configs for 'chain' stage=0 -tdnn_affix=7a train_stage=-10 get_egs_stage=-10 -decode_iter= +exp_root=exp/semisup_50k + +nj=30 +tdnn_affix=_1a train_set=train_sup50k -unsupervised_set=train_unsup100_250k -semisup_train_set=semisup50k_250k -tree_affix=bi_i -nnet3_affix=_semi50k_250k -chain_affix=_semi50k_250k -exp=exp/semisup_50k -gmm=tri4a +ivector_train_set= # dataset for training i-vector extractor + +nnet3_affix= # affix for nnet3 dir -- relates to i-vector used +chain_affix= # affix for chain dir +tree_affix=bi_a +gmm=tri4a # Expect GMM model in $exp/$gmm for alignment + +# Neural network opts hidden_dim=1024 cell_dim=1024 projection_dim=256 # training options -num_epochs=10 +num_epochs=4 minibatch_size=64,32 chunk_left_context=40 chunk_right_context=0 dropout_schedule='0,0@0.20,0.3@0.50,0' xent_regularize=0.025 -self_repair_scale=0.00001 label_delay=5 # decode options @@ -37,15 +46,16 @@ extra_left_context=50 extra_right_context=0 remove_egs=false -common_egs_dir= +common_egs_dir= # if provided, will skip egs generation +common_treedir= # if provided, will skip the tree building stage -# End configuration section. +decode_iter= # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh -. ./path.sh +if [ -f ./path.sh ]; then . ./path.sh; fi . ./utils/parse_options.sh if ! cuda-compiled; then @@ -56,35 +66,48 @@ where "nvcc" is installed. EOF fi -gmm_dir=$exp/$gmm # used to get training lattices (for chain supervision) -treedir=$exp/chain${chain_affix}/tree_${tree_affix} -lat_dir=$exp/chain${chain_affix}/$(basename $gmm_dir)_${train_set}_sp_unk_lats # training lattices directory -dir=$exp/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp +gmm_dir=$exp_root/$gmm # used to get training lattices (for chain supervision) +treedir=$exp_root/chain${chain_affix}/tree_${tree_affix} +lat_dir=$exp_root/chain${chain_affix}/${gmm}_${train_set}_sp_unk_lats # training lattices directory +dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp train_data_dir=data/${train_set}_sp_hires -train_ivector_dir=$exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires +train_ivector_dir=$exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires lang=data/lang_chain_unk # The iVector-extraction and feature-dumping parts are the same as the standard # nnet3 setup, and you can skip them by setting "--stage 8" if you have already # run those things. - -local/semisup/nnet3/run_ivector_common.sh --stage $stage --exp $exp \ +local/nnet3/run_ivector_common.sh --stage $stage --exp-root $exp_root \ --speed-perturb true \ --train-set $train_set \ - --unsup-train-set $unsupervised_set \ - --semisup-train-set $semisup_train_set \ + --ivector-train-set "$ivector_train_set" \ --nnet3-affix "$nnet3_affix" || exit 1 -if [ $stage -le 9 ]; then +if [ "$train_set" != "$ivector_train_set" ]; then + if [ $stage -le 9 ]; then + # We extract iVectors on all the ${train_set} data, which will be what we + # train the system on. + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \ + data/${train_set}_sp_hires data/${train_set}_sp_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \ + data/${train_set}_sp_max2_hires $exp_root/nnet3${nnet3_affix}/extractor \ + $exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires || exit 1; + fi +fi + +if [ $stage -le 10 ]; then # Get the alignments as lattices (gives the chain training more freedom). # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 30 --cmd "$train_cmd" \ + steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" \ --generate-ali-from-lats true data/${train_set}_sp \ data/lang_unk $gmm_dir $lat_dir || exit 1; rm $lat_dir/fsts.*.gz # save space fi -if [ $stage -le 10 ]; then +if [ $stage -le 11 ]; then # Create a version of the lang/ directory that has one state per phone in the # topo file. [note, it really has two states.. the first one is only repeated # once, the second one has zero or more repeats.] @@ -97,15 +120,18 @@ if [ $stage -le 10 ]; then steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo fi -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 +if [ -z "$common_treedir" ]; then + if [ $stage -le 12 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 + fi +else + treedir=$common_treedir fi -if [ $stage -le 12 ]; then +if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') @@ -137,7 +163,7 @@ if [ $stage -le 12 ]; then fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 # adding the layers for xent branch # This block prints the configs for a separate output that will be @@ -149,12 +175,12 @@ if [ $stage -le 12 ]; then # constant; and the 0.5 was tuned so as to make the relative progress # similar in the xent and regular final layers. output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF +EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ fi -if [ $stage -le 13 ]; then +if [ $stage -le 14 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage @@ -164,6 +190,7 @@ if [ $stage -le 13 ]; then touch $dir/egs/.nodelete # keep egs around when that run dies. steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$common_egs_dir" \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ @@ -191,7 +218,6 @@ if [ $stage -le 13 ]; then --egs.chunk-right-context $chunk_right_context \ --egs.chunk-left-context-initial 0 \ --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ --cleanup.remove-egs $remove_egs \ --feat-dir $train_data_dir \ --tree-dir $treedir \ @@ -200,15 +226,15 @@ if [ $stage -le 13 ]; then fi graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then +if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_poco_unk $dir $graph_dir fi decode_suff= -if [ $stage -le 15 ]; then +if [ $stage -le 16 ]; then iter_opts= if [ ! -z $decode_iter ]; then iter_opts=" --iter $decode_iter " @@ -223,8 +249,8 @@ if [ $stage -le 15 ]; then --extra-left-context-initial 0 \ --extra-right-context-final 0 \ --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; + --online-ivector-dir $exp_root/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + $graph_dir data/${decode_set}_hires $dir/decode_poco_unk_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; ) & done fi diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_g.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1b.sh similarity index 74% rename from egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_g.sh rename to egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1b.sh index ff4e8d55efc..2632cb45fee 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_g.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1b.sh @@ -1,21 +1,33 @@ #!/bin/bash + +# Copyright 2017-18 Vimal Manohar +# Apache 2.0 + set -e +set -o pipefail -# This is fisher chain recipe for training a model on a subset of around 15 hours. +# This is fisher chain recipe for training a model on a subset of around +# 100-300 hours of supervised data. +# This system uses phone LM to model UNK. +# local/semisup/run_50k.sh and local/semisup/run_100k.sh show how to call this. # configs for 'chain' stage=0 -tdnn_affix=7g train_stage=-10 get_egs_stage=-10 -decode_iter= +exp_root=exp/semisup_100k + +nj=30 +tdnn_affix=_1b train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_e -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a +ivector_train_set= # dataset for training i-vector extractor + +nnet3_affix= # affix for nnet3 dir -- relates to i-vector used +chain_affix= # affix for chain dir +tree_affix=bi_a +gmm=tri4a # Expect GMM model in $exp/$gmm for alignment + +# Neural network opts hidden_dim=1024 cell_dim=1024 projection_dim=256 @@ -34,13 +46,16 @@ extra_left_context=50 extra_right_context=0 remove_egs=false -common_egs_dir= +common_egs_dir= # if provided, will skip egs generation +common_treedir= # if provided, will skip the tree building stage + +decode_iter= # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh -. ./path.sh +if [ -f ./path.sh ]; then . ./path.sh; fi . ./utils/parse_options.sh if ! cuda-compiled; then @@ -51,33 +66,48 @@ where "nvcc" is installed. EOF fi -gmm_dir=$exp/$gmm # used to get training lattices (for chain supervision) -treedir=$exp/chain${chain_affix}/tree_${tree_affix} -lat_dir=$exp/chain${chain_affix}/$(basename $gmm_dir)_${train_set}_sp_unk_lats # training lattices directory -dir=$exp/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp +gmm_dir=$exp_root/$gmm # used to get training lattices (for chain supervision) +treedir=$exp_root/chain${chain_affix}/tree_${tree_affix} +lat_dir=$exp_root/chain${chain_affix}/${gmm}_${train_set}_sp_unk_lats # training lattices directory +dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp train_data_dir=data/${train_set}_sp_hires -train_ivector_dir=$exp/nnet3${nnet3_affix}/ivectors_${ivector_train_set}_sp_hires +train_ivector_dir=$exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires lang=data/lang_chain_unk # The iVector-extraction and feature-dumping parts are the same as the standard # nnet3 setup, and you can skip them by setting "--stage 8" if you have already # run those things. - -local/semisup/nnet3/run_ivector_common.sh --stage $stage --exp $exp \ +local/nnet3/run_ivector_common.sh --stage $stage --exp-root $exp_root \ --speed-perturb true \ --train-set $train_set \ + --ivector-train-set "$ivector_train_set" \ --nnet3-affix "$nnet3_affix" || exit 1 -if [ $stage -le 9 ]; then +if [ "$train_set" != "$ivector_train_set" ]; then + if [ $stage -le 9 ]; then + # We extract iVectors on all the ${train_set} data, which will be what we + # train the system on. + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \ + data/${train_set}_sp_hires data/${train_set}_sp_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \ + data/${train_set}_sp_max2_hires $exp_root/nnet3${nnet3_affix}/extractor \ + $exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires || exit 1; + fi +fi + +if [ $stage -le 10 ]; then # Get the alignments as lattices (gives the chain training more freedom). # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 30 --cmd "$train_cmd" \ + steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" \ --generate-ali-from-lats true data/${train_set}_sp \ data/lang_unk $gmm_dir $lat_dir || exit 1; rm $lat_dir/fsts.*.gz # save space fi -if [ $stage -le 10 ]; then +if [ $stage -le 11 ]; then # Create a version of the lang/ directory that has one state per phone in the # topo file. [note, it really has two states.. the first one is only repeated # once, the second one has zero or more repeats.] @@ -90,15 +120,18 @@ if [ $stage -le 10 ]; then steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo fi -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 +if [ -z "$common_treedir" ]; then + if [ $stage -le 12 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 + fi +else + treedir=$common_treedir fi -if [ $stage -le 12 ]; then +if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') @@ -150,7 +183,7 @@ EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ fi -if [ $stage -le 13 ]; then +if [ $stage -le 14 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage @@ -160,6 +193,7 @@ if [ $stage -le 13 ]; then touch $dir/egs/.nodelete # keep egs around when that run dies. steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$common_egs_dir" \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ @@ -181,13 +215,12 @@ if [ $stage -le 13 ]; then --trainer.optimization.momentum 0.0 \ --trainer.deriv-truncate-margin 8 \ --egs.stage $get_egs_stage \ - --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true --constrained false" \ + --egs.opts "--frames-overlap-per-eg 0 --generate-egs-scp true" \ --egs.chunk-width 160,140,110,80 \ --egs.chunk-left-context $chunk_left_context \ --egs.chunk-right-context $chunk_right_context \ --egs.chunk-left-context-initial 0 \ --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ --cleanup.remove-egs $remove_egs \ --feat-dir $train_data_dir \ --tree-dir $treedir \ @@ -196,15 +229,15 @@ if [ $stage -le 13 ]; then fi graph_dir=$dir/graph_poco_unk -if [ $stage -le 14 ]; then +if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test_unk $dir $graph_dir + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_poco_unk $dir $graph_dir fi decode_suff= -if [ $stage -le 15 ]; then +if [ $stage -le 16 ]; then iter_opts= if [ ! -z $decode_iter ]; then iter_opts=" --iter $decode_iter " @@ -219,8 +252,8 @@ if [ $stage -le 15 ]; then --extra-left-context-initial 0 \ --extra-right-context-final 0 \ --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; + --online-ivector-dir $exp_root/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + $graph_dir data/${decode_set}_hires $dir/decode_poco_unk_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; ) & done fi diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1c.sh similarity index 73% rename from egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_b.sh rename to egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1c.sh index aa0387cc1d4..34a0ace9878 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_b.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1c.sh @@ -1,35 +1,44 @@ #!/bin/bash + +# Copyright 2017-18 Vimal Manohar +# Apache 2.0 + set -e +set -o pipefail -# This is fisher chain recipe for training a model on a subset of around 15 hours. -# This is similar to _d, but uses a phone LM UNK model +# This is fisher chain recipe for training a model on a subset of around +# 15-50 hours of supervised data. +# This system uses phone LM to model UNK. +# local/semisup/run_50k.sh and local/semisup/run_100k.sh show how to call this. # configs for 'chain' stage=0 -tdnn_affix=7b train_stage=-10 get_egs_stage=-10 -decode_iter= +exp_root=exp/semisup_50k + +nj=30 +tdnn_affix=_1c train_set=train_sup50k -unsupervised_set=train_unsup100_250k -semisup_train_set=semisup50k_250k -tree_affix=bi_j -nnet3_affix=_semi50k_250k -chain_affix=_semi50k_250k -exp=exp/semisup_50k -gmm=tri4a +ivector_train_set= # dataset for training i-vector extractor + +nnet3_affix= # affix for nnet3 dir -- relates to i-vector used +chain_affix= # affix for chain dir +tree_affix=bi_c +gmm=tri4a # Expect GMM model in $exp/$gmm for alignment + +# Neural network opts hidden_dim=1024 cell_dim=1024 projection_dim=256 # training options -num_epochs=10 +num_epochs=4 minibatch_size=64,32 chunk_left_context=40 chunk_right_context=0 dropout_schedule='0,0@0.20,0.3@0.50,0' xent_regularize=0.025 -self_repair_scale=0.00001 label_delay=5 # decode options @@ -37,15 +46,16 @@ extra_left_context=50 extra_right_context=0 remove_egs=false -common_egs_dir= +common_egs_dir= # if provided, will skip egs generation +common_treedir= # if provided, will skip the tree building stage -# End configuration section. +decode_iter= # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh -. ./path.sh +if [ -f ./path.sh ]; then . ./path.sh; fi . ./utils/parse_options.sh if ! cuda-compiled; then @@ -56,35 +66,48 @@ where "nvcc" is installed. EOF fi -gmm_dir=$exp/$gmm # used to get training lattices (for chain supervision) -treedir=$exp/chain${chain_affix}/tree_${tree_affix} -lat_dir=$exp/chain${chain_affix}/$(basename $gmm_dir)_${train_set}_sp_lats # training lattices directory -dir=$exp/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp +gmm_dir=$exp_root/$gmm # used to get training lattices (for chain supervision) +treedir=$exp_root/chain${chain_affix}/tree_${tree_affix} +lat_dir=$exp_root/chain${chain_affix}/${gmm}_${train_set}_sp_lats # training lattices directory +dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp train_data_dir=data/${train_set}_sp_hires -train_ivector_dir=$exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires +train_ivector_dir=$exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires lang=data/lang_chain # The iVector-extraction and feature-dumping parts are the same as the standard # nnet3 setup, and you can skip them by setting "--stage 8" if you have already # run those things. - -local/semisup/nnet3/run_ivector_common.sh --stage $stage --exp $exp \ +local/nnet3/run_ivector_common.sh --stage $stage --exp-root $exp_root \ --speed-perturb true \ --train-set $train_set \ - --unsup-train-set $unsupervised_set \ - --semisup-train-set $semisup_train_set \ + --ivector-train-set "$ivector_train_set" \ --nnet3-affix "$nnet3_affix" || exit 1 -if [ $stage -le 9 ]; then +if [ "$train_set" != "$ivector_train_set" ]; then + if [ $stage -le 9 ]; then + # We extract iVectors on all the ${train_set} data, which will be what we + # train the system on. + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \ + data/${train_set}_sp_hires data/${train_set}_sp_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \ + data/${train_set}_sp_max2_hires $exp_root/nnet3${nnet3_affix}/extractor \ + $exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires || exit 1; + fi +fi + +if [ $stage -le 10 ]; then # Get the alignments as lattices (gives the chain training more freedom). # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 30 --cmd "$train_cmd" \ + steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" \ --generate-ali-from-lats true data/${train_set}_sp \ data/lang $gmm_dir $lat_dir || exit 1; rm $lat_dir/fsts.*.gz # save space fi -if [ $stage -le 10 ]; then +if [ $stage -le 11 ]; then # Create a version of the lang/ directory that has one state per phone in the # topo file. [note, it really has two states.. the first one is only repeated # once, the second one has zero or more repeats.] @@ -97,15 +120,18 @@ if [ $stage -le 10 ]; then steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo fi -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 +if [ -z "$common_treedir" ]; then + if [ $stage -le 12 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 + fi +else + treedir=$common_treedir fi -if [ $stage -le 12 ]; then +if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') @@ -137,7 +163,7 @@ if [ $stage -le 12 ]; then fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts ## adding the layers for chain branch - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 + output-layer name=output input=lstm3 output-delay=$label_delay include-log-softmax=false dim=$num_targets max-change=1.5 # adding the layers for xent branch # This block prints the configs for a separate output that will be @@ -149,12 +175,12 @@ if [ $stage -le 12 ]; then # constant; and the 0.5 was tuned so as to make the relative progress # similar in the xent and regular final layers. output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 -EOF +EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ fi -if [ $stage -le 13 ]; then +if [ $stage -le 14 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage @@ -164,6 +190,7 @@ if [ $stage -le 13 ]; then touch $dir/egs/.nodelete # keep egs around when that run dies. steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$common_egs_dir" \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ @@ -191,7 +218,6 @@ if [ $stage -le 13 ]; then --egs.chunk-right-context $chunk_right_context \ --egs.chunk-left-context-initial 0 \ --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ --cleanup.remove-egs $remove_egs \ --feat-dir $train_data_dir \ --tree-dir $treedir \ @@ -200,15 +226,15 @@ if [ $stage -le 13 ]; then fi graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then +if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_poco $dir $graph_dir fi decode_suff= -if [ $stage -le 15 ]; then +if [ $stage -le 16 ]; then iter_opts= if [ ! -z $decode_iter ]; then iter_opts=" --iter $decode_iter " @@ -223,8 +249,8 @@ if [ $stage -le 15 ]; then --extra-left-context-initial 0 \ --extra-right-context-final 0 \ --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; + --online-ivector-dir $exp_root/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + $graph_dir data/${decode_set}_hires $dir/decode_poco_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; ) & done fi diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_e.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1d.sh similarity index 74% rename from egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_e.sh rename to egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1d.sh index 05fe3a017e3..67a0026b076 100755 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_100k_e.sh +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_1d.sh @@ -1,21 +1,33 @@ #!/bin/bash + +# Copyright 2017-18 Vimal Manohar +# Apache 2.0 + set -e +set -o pipefail -# This is fisher chain recipe for training a model on a subset of around 15 hours. +# This is fisher chain recipe for training a model on a subset of around +# 100-300 hours of supervised data. +# This system uses phone LM to model UNK. +# local/semisup/run_50k.sh and local/semisup/run_100k.sh show how to call this. # configs for 'chain' stage=0 -tdnn_affix=7e train_stage=-10 get_egs_stage=-10 -decode_iter= +exp_root=exp/semisup_100k + +nj=30 +tdnn_affix=_1d train_set=train_sup -ivector_train_set=train_sup -tree_affix=bi_e -nnet3_affix= -chain_affix= -exp=exp/semisup_100k -gmm=tri4a +ivector_train_set= # dataset for training i-vector extractor + +nnet3_affix= # affix for nnet3 dir -- relates to i-vector used +chain_affix= # affix for chain dir +tree_affix=bi_c +gmm=tri4a # Expect GMM model in $exp/$gmm for alignment + +# Neural network opts hidden_dim=1024 cell_dim=1024 projection_dim=256 @@ -34,13 +46,16 @@ extra_left_context=50 extra_right_context=0 remove_egs=false -common_egs_dir= +common_egs_dir= # if provided, will skip egs generation +common_treedir= # if provided, will skip the tree building stage + +decode_iter= # End configuration section. echo "$0 $@" # Print the command line for logging . ./cmd.sh -. ./path.sh +if [ -f ./path.sh ]; then . ./path.sh; fi . ./utils/parse_options.sh if ! cuda-compiled; then @@ -51,33 +66,48 @@ where "nvcc" is installed. EOF fi -gmm_dir=$exp/$gmm # used to get training lattices (for chain supervision) -treedir=$exp/chain${chain_affix}/tree_${tree_affix} -lat_dir=$exp/chain${chain_affix}/$(basename $gmm_dir)_${train_set}_sp_lats # training lattices directory -dir=$exp/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp +gmm_dir=$exp_root/$gmm # used to get training lattices (for chain supervision) +treedir=$exp_root/chain${chain_affix}/tree_${tree_affix} +lat_dir=$exp_root/chain${chain_affix}/${gmm}_${train_set}_sp_lats # training lattices directory +dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix}_sp train_data_dir=data/${train_set}_sp_hires -train_ivector_dir=$exp/nnet3${nnet3_affix}/ivectors_${ivector_train_set}_sp_hires +train_ivector_dir=$exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires lang=data/lang_chain # The iVector-extraction and feature-dumping parts are the same as the standard # nnet3 setup, and you can skip them by setting "--stage 8" if you have already # run those things. - -local/semisup/nnet3/run_ivector_common.sh --stage $stage --exp $exp \ +local/nnet3/run_ivector_common.sh --stage $stage --exp-root $exp_root \ --speed-perturb true \ --train-set $train_set \ + --ivector-train-set "$ivector_train_set" \ --nnet3-affix "$nnet3_affix" || exit 1 -if [ $stage -le 9 ]; then +if [ "$train_set" != "$ivector_train_set" ]; then + if [ $stage -le 9 ]; then + # We extract iVectors on all the ${train_set} data, which will be what we + # train the system on. + # having a larger number of speakers is helpful for generalization, and to + # handle per-utterance decoding well (iVector starts at zero). + utils/data/modify_speaker_info.sh --utts-per-spk-max 2 \ + data/${train_set}_sp_hires data/${train_set}_sp_max2_hires + + steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \ + data/${train_set}_sp_max2_hires $exp_root/nnet3${nnet3_affix}/extractor \ + $exp_root/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires || exit 1; + fi +fi + +if [ $stage -le 10 ]; then # Get the alignments as lattices (gives the chain training more freedom). # use the same num-jobs as the alignments - steps/align_fmllr_lats.sh --nj 30 --cmd "$train_cmd" \ + steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" \ --generate-ali-from-lats true data/${train_set}_sp \ data/lang $gmm_dir $lat_dir || exit 1; rm $lat_dir/fsts.*.gz # save space fi -if [ $stage -le 10 ]; then +if [ $stage -le 11 ]; then # Create a version of the lang/ directory that has one state per phone in the # topo file. [note, it really has two states.. the first one is only repeated # once, the second one has zero or more repeats.] @@ -90,15 +120,18 @@ if [ $stage -le 10 ]; then steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo fi -if [ $stage -le 11 ]; then - # Build a tree using our new topology. - steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ - --leftmost-questions-truncate -1 \ - --context-opts "--context-width=2 --central-position=1" \ - --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 +if [ -z "$common_treedir" ]; then + if [ $stage -le 12 ]; then + # Build a tree using our new topology. + steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 7000 data/${train_set}_sp $lang $lat_dir $treedir || exit 1 + fi +else + treedir=$common_treedir fi -if [ $stage -le 12 ]; then +if [ $stage -le 13 ]; then echo "$0: creating neural net configs using the xconfig parser"; num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}') @@ -150,7 +183,7 @@ EOF steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ fi -if [ $stage -le 13 ]; then +if [ $stage -le 14 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage @@ -160,6 +193,7 @@ if [ $stage -le 13 ]; then touch $dir/egs/.nodelete # keep egs around when that run dies. steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$common_egs_dir" \ --cmd "$decode_cmd" \ --feat.online-ivector-dir $train_ivector_dir \ --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ @@ -187,7 +221,6 @@ if [ $stage -le 13 ]; then --egs.chunk-right-context $chunk_right_context \ --egs.chunk-left-context-initial 0 \ --egs.chunk-right-context-final 0 \ - --egs.dir "$common_egs_dir" \ --cleanup.remove-egs $remove_egs \ --feat-dir $train_data_dir \ --tree-dir $treedir \ @@ -196,15 +229,15 @@ if [ $stage -le 13 ]; then fi graph_dir=$dir/graph_poco -if [ $stage -le 14 ]; then +if [ $stage -le 15 ]; then # Note: it might appear that this $lang directory is mismatched, and it is as # far as the 'topo' is concerned, but this script doesn't read the 'topo' from # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 data/lang_poco_test $dir $graph_dir + utils/mkgraph.sh --self-loop-scale 1.0 data/lang_test_poco $dir $graph_dir fi decode_suff= -if [ $stage -le 15 ]; then +if [ $stage -le 16 ]; then iter_opts= if [ ! -z $decode_iter ]; then iter_opts=" --iter $decode_iter " @@ -219,8 +252,8 @@ if [ $stage -le 15 ]; then --extra-left-context-initial 0 \ --extra-right-context-final 0 \ --frames-per-chunk 160 \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; + --online-ivector-dir $exp_root/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ + $graph_dir data/${decode_set}_hires $dir/decode_poco_${decode_set}${decode_iter:+_$decode_iter}${decode_suff} || exit 1; ) & done fi diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_1a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_1a.sh new file mode 100644 index 00000000000..c149a856515 --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_1a.sh @@ -0,0 +1,468 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script is semi-supervised recipe with around 50 hours of supervised data +# and 250 hours unsupervised data with naive splitting. +# Based on "Semi-Supervised Training of Acoustic Models using Lattice-Free MMI", +# Vimal Manohar, Hossein Hadian, Daniel Povey, Sanjeev Khudanpur, ICASSP 2018 +# http://www.danielpovey.com/files/2018_icassp_semisupervised_mmi.pdf +# local/semisup/run_50k.sh shows how to call this. + +# We use the combined data for i-vector extractor training. +# We use 4-gram LM trained on 1250 hours of data excluding the 250 hours +# unsupervised data to create LM for decoding. Rescoring is done with +# a larger 4-gram LM. +# This differs from the case in run_tdnn_100k_semisupervised.sh. + +# This script uses phone LM to model UNK. +# This script uses the same tree as that for the seed model. +# See the comments in the script about how to change these. + +# Unsupervised set: train_unsup100k_250k (250 hour subset of Fisher excluding 100 hours for supervised) +# unsup_frames_per_eg=150 +# Deriv weights: Lattice posterior of best path pdf +# Unsupervised weight: 1.0 +# Weights for phone LM (supervised, unsupervised): 3,2 +# LM for decoding unsupervised data: 4gram +# Supervision: Naive split lattices + +set -u -e -o pipefail + +stage=0 +train_stage=-100 +nj=80 +test_nj=50 + +# The following 3 options decide the output directory for semi-supervised +# chain system +# dir=${exp_root}/chain${chain_affix}/tdnn_lstm${tdnn_affix} + +exp_root=exp/semisup_50k +chain_affix=_semi50k_100k_250k # affix for chain dir + # 50 hour subset out of 100 hours of supervised data + # 250 hour subset out of (1500-100=1400) hours of unsupervised data +tdnn_affix=_semisup_1a + +# Datasets -- Expects data/$supervised_set and data/$unsupervised_set to be +# present +supervised_set=train_sup50k +unsupervised_set=train_unsup100k_250k + +# Input seed system +sup_chain_dir=exp/semisup_50k/chain_semi50k_100k_250k/tdnn_lstm_1a_sp # supervised chain system +sup_lat_dir=exp/semisup_50k/chain_semi50k_100k_250k/tri4a_train_sup50k_unk_lats # lattices for supervised set +sup_tree_dir=exp/semisup_50k/chain_semi50k_100k_250k/tree_bi_a # tree directory for supervised chain system +ivector_root_dir=exp/semisup_50k/nnet3_semi50k_100k_250k # i-vector extractor root directory + +# Semi-supervised options +supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. + # Can be used to scale down the effect of unsupervised data + # by using a smaller scale for it e.g. 1.0,0.3 +lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation + +sup_egs_dir= # Supply this to skip supervised egs creation +unsup_egs_dir= # Supply this to skip unsupervised egs creation +unsup_egs_opts= # Extra options to pass to unsupervised egs creation + +# Neural network opts +hidden_dim=1024 +cell_dim=1024 +projection_dim=256 + +# training options +num_epochs=2 +minibatch_size=64,32 +chunk_left_context=40 +chunk_right_context=0 +dropout_schedule='0,0@0.20,0.3@0.50,0' +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 + +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk_decoding=160 + +decode_iter= # Iteration to decode with + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +if [ -f ./path.sh ]; then . ./path.sh; fi +. ./utils/parse_options.sh + +# The following can be replaced with the versions that do not model +# UNK using phone LM. $sup_lat_dir should also ideally be changed. +unsup_decode_lang=data/lang_test_poco_ex250k_unk +unsup_decode_graph_affix=_poco_ex250k_unk +test_lang=data/lang_test_poco_unk +test_graph_affix=_poco_unk + +unsup_rescore_lang=${unsup_decode_lang}_big + +dir=$exp_root/chain${chain_affix}/tdnn_lst${tdnn_affix} + +if ! cuda-compiled; then + cat < \ +# $sup_chain_dir/best_path_${unsupervised_set_perturbed}_big/frame_subsampling_factor +# +# # This should be 1 if using a different source for supervised data alignments. +# # However alignments in seed tree directory have already been sub-sampled. +# echo $frame_subsampling_factor > \ +# $sup_tree_dir/frame_subsampling_factor +# +# # Build a new tree using stats from both supervised and unsupervised data +# steps/nnet3/chain/build_tree_multiple_sources.sh \ +# --use-fmllr false --context-opts "--context-width=2 --central-position=1" \ +# --frame-subsampling-factor $frame_subsampling_factor \ +# 7000 $lang \ +# data/${supervised_set_perturbed} \ +# ${sup_tree_dir} \ +# data/${unsupervised_set_perturbed} \ +# $chaindir/best_path_${unsupervised_set_perturbed} \ +# $treedir || exit 1 +# fi +# +# sup_tree_dir=$treedir # Use the new tree dir for further steps + +# Train denominator FST using phone alignments from +# supervised and unsupervised data +if [ $stage -le 10 ]; then + steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ + ${sup_tree_dir} ${sup_chain_dir}/best_path_${unsupervised_set_perturbed}_big \ + $dir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=40" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=$hidden_dim + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim + + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + # We use separate outputs for supervised and unsupervised data + # so we can properly track the train and valid objectives. + + output name=output-0 input=output.affine + output name=output-1 input=output.affine + + output name=output-0-xent input=output-xent.log-softmax + output name=output-1-xent input=output-xent.log-softmax +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +. $dir/configs/vars + +left_context=$[model_left_context + chunk_left_context] +right_context=$[model_right_context + chunk_right_context] +left_context_initial=$model_left_context +right_context_final=$model_right_context + +egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)") +egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)") +egs_left_context_initial=$(perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)") +egs_right_context_final=$(perl -e "print int($right_context_final + $frame_subsampling_factor / 2)") + +if [ -z "$sup_egs_dir" ]; then + sup_egs_dir=$dir/egs_${supervised_set_perturbed} + frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg) + + if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage + fi + mkdir -p $sup_egs_dir/ + touch $sup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the supervised data" + steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor $frame_subsampling_factor \ + --frames-per-eg $frames_per_eg \ + --frames-per-iter 1500000 \ + --cmvn-opts "$cmvn_opts" \ + --online-ivector-dir $sup_ivector_dir \ + --generate-egs-scp true \ + data/${supervised_set_perturbed}_hires $dir \ + $sup_lat_dir $sup_egs_dir + fi +else + frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) +fi + +unsup_frames_per_eg=150 # Using a frames-per-eg of 150 for unsupervised data + # was found to be better than allowing smaller chunks + # (160,140,110,80) like for supervised system +lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices when + # creating numerator supervision +lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs + # for unsupervised data +tolerance=1 # frame-tolerance for chain training + +unsup_lat_dir=${sup_chain_dir}/decode_${unsupervised_set_perturbed}_big +if [ -z "$unsup_egs_dir" ]; then + unsup_egs_dir=$dir/egs_${unsupervised_set_perturbed} + + if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage + fi + mkdir -p $unsup_egs_dir + touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the unsupervised data" + steps/nnet3/chain/get_egs.sh \ + --cmd "$decode_cmd" --alignment-subsampling-factor 1 \ + --left-tolerance $tolerance --right-tolerance $tolerance \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ + --frame-subsampling-factor $frame_subsampling_factor \ + --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ + --lattice-prune-beam "$lattice_prune_beam" \ + --deriv-weights-scp $sup_chain_dir/best_path_${unsupervised_set_perturbed}_big/weights.scp \ + --online-ivector-dir $ivector_root_dir/ivectors_${unsupervised_set_perturbed}_hires \ + --generate-egs-scp true $unsup_egs_opts \ + data/${unsupervised_set_perturbed}_hires $dir \ + $unsup_lat_dir $unsup_egs_dir + fi +fi + +comb_egs_dir=$dir/comb_egs +if [ $stage -le 14 ]; then + steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ + --block-size 128 \ + --lang2weight $supervision_weights 2 \ + $sup_egs_dir $unsup_egs_dir $comb_egs_dir + touch $comb_egs_dir/.nodelete # keep egs around when that run dies. +fi + +if [ $train_stage -le -4 ]; then + # This is to skip stages of den-fst creation, which was already done. + train_stage=-4 +fi + +if [ $stage -le 15 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$comb_egs_dir" \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $sup_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights true \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.chunk-width $frames_per_eg \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch "$minibatch_size" \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs false \ + --feat-dir data/${supervised_set_perturbed}_hires \ + --tree-dir $sup_tree_dir \ + --lat-dir $sup_lat_dir \ + --dir $dir || exit 1; +fi + +test_graph_dir=$dir/graph${test_graph_affix} +if [ $stage -le 17 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 ${test_lang} $dir $test_graph_dir +fi + +if [ $stage -le 18 ]; then + rm -f $dir/.error + for decode_set in dev test; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + if [ $num_jobs -gt $test_nj ]; then num_jobs=$test_nj; fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $num_jobs --cmd "$decode_cmd" ${decode_iter:+--iter $decode_iter} \ + --online-ivector-dir $ivector_root_dir/ivectors_${decode_set}_hires \ + --frames-per-chunk $frames_per_chunk_decoding \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + $test_graph_dir data/${decode_set}_hires \ + $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || touch $dir/.error + ) & + done + wait; + if [ -f $dir/.error ]; then + echo "$0: Decoding failed. See $dir/decode${test_graph_affix}_*/log/*" + exit 1 + fi +fi + +exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_1c.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_1c.sh new file mode 100644 index 00000000000..7de9103668f --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_1c.sh @@ -0,0 +1,468 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script is semi-supervised recipe with around 50 hours of supervised data +# and 250 hours unsupervised data with naive splitting. +# Based on "Semi-Supervised Training of Acoustic Models using Lattice-Free MMI", +# Vimal Manohar, Hossein Hadian, Daniel Povey, Sanjeev Khudanpur, ICASSP 2018 +# http://www.danielpovey.com/files/2018_icassp_semisupervised_mmi.pdf +# local/semisup/run_50k.sh shows how to call this. + +# We use the combined data for i-vector extractor training. +# We use 4-gram LM trained on 1250 hours of data excluding the 250 hours +# unsupervised data to create LM for decoding. Rescoring is done with +# a larger 4-gram LM. +# This differs from the case in run_tdnn_100k_semisupervised.sh. + +# This script uses phone LM to model UNK. +# This script uses the same tree as that for the seed model. +# See the comments in the script about how to change these. + +# Unsupervised set: train_unsup100k_250k (250 hour subset of Fisher excluding 100 hours for supervised) +# unsup_frames_per_eg=150 +# Deriv weights: Lattice posterior of best path pdf +# Unsupervised weight: 1.0 +# Weights for phone LM (supervised, unsupervised): 3,2 +# LM for decoding unsupervised data: 4gram +# Supervision: Naive split lattices + +set -u -e -o pipefail + +stage=0 +train_stage=-100 +nj=80 +test_nj=50 + +# The following 3 options decide the output directory for semi-supervised +# chain system +# dir=${exp_root}/chain${chain_affix}/tdnn_lstm${tdnn_affix} + +exp_root=exp/semisup_50k +chain_affix=_semi50k_100k_250k # affix for chain dir + # 50 hour subset out of 100 hours of supervised data + # 250 hour subset out of (1500-100=1400) hours of unsupervised data +tdnn_affix=_semisup_1c + +# Datasets -- Expects data/$supervised_set and data/$unsupervised_set to be +# present +supervised_set=train_sup50k +unsupervised_set=train_unsup100k_250k + +# Input seed system +sup_chain_dir=exp/semisup_50k/chain_semi50k_100k_250k/tdnn_lstm_1c_sp # supervised chain system +sup_lat_dir=exp/semisup_50k/chain_semi50k_100k_250k/tri4a_train_sup50k_lats # lattices for supervised set +sup_tree_dir=exp/semisup_50k/chain_semi50k_100k_250k/tree_bi_c # tree directory for supervised chain system +ivector_root_dir=exp/semisup_50k/nnet3_semi50k_100k_250k # i-vector extractor root directory + +# Semi-supervised options +supervision_weights=1.0,1.0 # Weights for supervised, unsupervised data egs. + # Can be used to scale down the effect of unsupervised data + # by using a smaller scale for it e.g. 1.0,0.3 +lm_weights=3,2 # Weights on phone counts from supervised, unsupervised data for denominator FST creation + +sup_egs_dir= # Supply this to skip supervised egs creation +unsup_egs_dir= # Supply this to skip unsupervised egs creation +unsup_egs_opts= # Extra options to pass to unsupervised egs creation + +# Neural network opts +hidden_dim=1024 +cell_dim=1024 +projection_dim=256 + +# training options +num_epochs=2 +minibatch_size=64,32 +chunk_left_context=40 +chunk_right_context=0 +dropout_schedule='0,0@0.20,0.3@0.50,0' +xent_regularize=0.025 +self_repair_scale=0.00001 +label_delay=5 + +# decode options +extra_left_context=50 +extra_right_context=0 +frames_per_chunk_decoding=160 + +decode_iter= # Iteration to decode with + +# End configuration section. +echo "$0 $@" # Print the command line for logging + +. ./cmd.sh +if [ -f ./path.sh ]; then . ./path.sh; fi +. ./utils/parse_options.sh + +# The following can be replaced with the versions that do not model +# UNK using phone LM. $sup_lat_dir should also ideally be changed. +unsup_decode_lang=data/lang_test_poco_ex250k +unsup_decode_graph_affix=_poco_ex250k +test_lang=data/lang_test_poco +test_graph_affix=_poco + +unsup_rescore_lang=${unsup_decode_lang}_big + +dir=$exp_root/chain${chain_affix}/tdnn_lst${tdnn_affix} + +if ! cuda-compiled; then + cat < \ +# $sup_chain_dir/best_path_${unsupervised_set_perturbed}_big/frame_subsampling_factor +# +# # This should be 1 if using a different source for supervised data alignments. +# # However alignments in seed tree directory have already been sub-sampled. +# echo $frame_subsampling_factor > \ +# $sup_tree_dir/frame_subsampling_factor +# +# # Build a new tree using stats from both supervised and unsupervised data +# steps/nnet3/chain/build_tree_multiple_sources.sh \ +# --use-fmllr false --context-opts "--context-width=2 --central-position=1" \ +# --frame-subsampling-factor $frame_subsampling_factor \ +# 7000 $lang \ +# data/${supervised_set_perturbed} \ +# ${sup_tree_dir} \ +# data/${unsupervised_set_perturbed} \ +# $chaindir/best_path_${unsupervised_set_perturbed} \ +# $treedir || exit 1 +# fi +# +# sup_tree_dir=$treedir # Use the new tree dir for further steps + +# Train denominator FST using phone alignments from +# supervised and unsupervised data +if [ $stage -le 10 ]; then + steps/nnet3/chain/make_weighted_den_fst.sh --num-repeats $lm_weights --cmd "$train_cmd" \ + ${sup_tree_dir} ${sup_chain_dir}/best_path_${unsupervised_set_perturbed}_big \ + $dir +fi + +if [ $stage -le 11 ]; then + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $sup_tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + + lstm_opts="decay-time=40" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-layer name=tdnn1 dim=$hidden_dim + relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim + relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim + + fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim + relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim + fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts + + output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 + + # adding the layers for xent branch + # This block prints the configs for a separate output that will be + # trained with a cross-entropy objective in the 'chain' models... this + # has the effect of regularizing the hidden parts of the model. we use + # 0.5 / args.xent_regularize as the learning rate factor- the factor of + # 0.5 / args.xent_regularize is suitable as it means the xent + # final-layer learns at a rate independent of the regularization + # constant; and the 0.5 was tuned so as to make the relative progress + # similar in the xent and regular final layers. + output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 + # We use separate outputs for supervised and unsupervised data + # so we can properly track the train and valid objectives. + + output name=output-0 input=output.affine + output name=output-1 input=output.affine + + output name=output-0-xent input=output-xent.log-softmax + output name=output-1-xent input=output-xent.log-softmax +EOF + + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + +. $dir/configs/vars + +left_context=$[model_left_context + chunk_left_context] +right_context=$[model_right_context + chunk_right_context] +left_context_initial=$model_left_context +right_context_final=$model_right_context + +egs_left_context=$(perl -e "print int($left_context + $frame_subsampling_factor / 2)") +egs_right_context=$(perl -e "print int($right_context + $frame_subsampling_factor / 2)") +egs_left_context_initial=$(perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)") +egs_right_context_final=$(perl -e "print int($right_context_final + $frame_subsampling_factor / 2)") + +if [ -z "$sup_egs_dir" ]; then + sup_egs_dir=$dir/egs_${supervised_set_perturbed} + frames_per_eg=$(cat $sup_chain_dir/egs/info/frames_per_eg) + + if [ $stage -le 12 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage + fi + mkdir -p $sup_egs_dir/ + touch $sup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the supervised data" + steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frame-subsampling-factor $frame_subsampling_factor \ + --alignment-subsampling-factor $frame_subsampling_factor \ + --frames-per-eg $frames_per_eg \ + --frames-per-iter 1500000 \ + --cmvn-opts "$cmvn_opts" \ + --online-ivector-dir $sup_ivector_dir \ + --generate-egs-scp true \ + data/${supervised_set_perturbed}_hires $dir \ + $sup_lat_dir $sup_egs_dir + fi +else + frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) +fi + +unsup_frames_per_eg=150 # Using a frames-per-eg of 150 for unsupervised data + # was found to be better than allowing smaller chunks + # (160,140,110,80) like for supervised system +lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices when + # creating numerator supervision +lattice_prune_beam=4.0 # beam for pruning the lattices prior to getting egs + # for unsupervised data +tolerance=1 # frame-tolerance for chain training + +unsup_lat_dir=${sup_chain_dir}/decode_${unsupervised_set_perturbed}_big +if [ -z "$unsup_egs_dir" ]; then + unsup_egs_dir=$dir/egs_${unsupervised_set_perturbed} + + if [ $stage -le 13 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then + utils/create_split_dir.pl \ + /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage + fi + mkdir -p $unsup_egs_dir + touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. + + echo "$0: generating egs from the unsupervised data" + steps/nnet3/chain/get_egs.sh \ + --cmd "$decode_cmd" --alignment-subsampling-factor 1 \ + --left-tolerance $tolerance --right-tolerance $tolerance \ + --left-context $egs_left_context --right-context $egs_right_context \ + --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ + --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ + --frame-subsampling-factor $frame_subsampling_factor \ + --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ + --lattice-prune-beam "$lattice_prune_beam" \ + --deriv-weights-scp $sup_chain_dir/best_path_${unsupervised_set_perturbed}_big/weights.scp \ + --online-ivector-dir $ivector_root_dir/ivectors_${unsupervised_set_perturbed}_hires \ + --generate-egs-scp true $unsup_egs_opts \ + data/${unsupervised_set_perturbed}_hires $dir \ + $unsup_lat_dir $unsup_egs_dir + fi +fi + +comb_egs_dir=$dir/comb_egs +if [ $stage -le 14 ]; then + steps/nnet3/chain/multilingual/combine_egs.sh --cmd "$train_cmd" \ + --block-size 128 \ + --lang2weight $supervision_weights 2 \ + $sup_egs_dir $unsup_egs_dir $comb_egs_dir + touch $comb_egs_dir/.nodelete # keep egs around when that run dies. +fi + +if [ $train_stage -le -4 ]; then + # This is to skip stages of den-fst creation, which was already done. + train_stage=-4 +fi + +if [ $stage -le 15 ]; then + steps/nnet3/chain/train.py --stage $train_stage \ + --egs.dir "$comb_egs_dir" \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $sup_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.00005 \ + --chain.apply-deriv-weights true \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --egs.chunk-width $frames_per_eg \ + --egs.chunk-left-context $chunk_left_context \ + --egs.chunk-right-context $chunk_right_context \ + --egs.chunk-left-context-initial 0 \ + --egs.chunk-right-context-final 0 \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.num-chunk-per-minibatch "$minibatch_size" \ + --trainer.frames-per-iter 1500000 \ + --trainer.num-epochs 4 \ + --trainer.optimization.shrink-value 0.99 \ + --trainer.optimization.num-jobs-initial 3 \ + --trainer.optimization.num-jobs-final 16 \ + --trainer.optimization.initial-effective-lrate 0.001 \ + --trainer.optimization.final-effective-lrate 0.0001 \ + --trainer.max-param-change 2.0 \ + --trainer.optimization.momentum 0.0 \ + --trainer.deriv-truncate-margin 8 \ + --cleanup.remove-egs false \ + --feat-dir data/${supervised_set_perturbed}_hires \ + --tree-dir $sup_tree_dir \ + --lat-dir $sup_lat_dir \ + --dir $dir || exit 1; +fi + +test_graph_dir=$dir/graph${test_graph_affix} +if [ $stage -le 17 ]; then + # Note: it might appear that this $lang directory is mismatched, and it is as + # far as the 'topo' is concerned, but this script doesn't read the 'topo' from + # the lang directory. + utils/mkgraph.sh --self-loop-scale 1.0 ${test_lang} $dir $test_graph_dir +fi + +if [ $stage -le 18 ]; then + rm -f $dir/.error + for decode_set in dev test; do + ( + num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` + if [ $num_jobs -gt $test_nj ]; then num_jobs=$test_nj; fi + steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ + --nj $num_jobs --cmd "$decode_cmd" ${decode_iter:+--iter $decode_iter} \ + --online-ivector-dir $ivector_root_dir/ivectors_${decode_set}_hires \ + --frames-per-chunk $frames_per_chunk_decoding \ + --extra-left-context $extra_left_context \ + --extra-right-context $extra_right_context \ + --extra-left-context-initial 0 --extra-right-context-final 0 \ + $test_graph_dir data/${decode_set}_hires \ + $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || touch $dir/.error + ) & + done + wait; + if [ -f $dir/.error ]; then + echo "$0: Decoding failed. See $dir/decode${test_graph_affix}_*/log/*" + exit 1 + fi +fi + +exit 0; diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_a.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_a.sh deleted file mode 100644 index bab9e69bbf3..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_a.sh +++ /dev/null @@ -1,511 +0,0 @@ -#!/bin/bash - -# This script uses phone LM to model UNK. - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=40 -exp=exp/semisup_50k - -unsupervised_set=train_unsup100k_250k # set this to your choice of unsupervised data -supervised_set=train_sup50k -semisup_train_set=semisup50k_250k - -tdnn_affix=7a # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_i - -gmm=tri4a - -nnet3_affix=_semi50k_250k # affix for nnet3 and chain dir -- relates to i-vector used -chain_affix=_semi50k_250k - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb1b # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=5,2 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${chain_affix}/${gmm}_${supervised_set}_unk_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. 2 \ - $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs 4 \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_b.sh b/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_b.sh deleted file mode 100644 index ebd6c090267..00000000000 --- a/egs/fisher_english/s5/local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_conf_b.sh +++ /dev/null @@ -1,511 +0,0 @@ -#!/bin/bash - -# This script uses phone LM to model UNK. - -# Unsupervised set: train_unsup100k_250k -# unsup_frames_per_eg=150 -# Deriv weights: Lattice posterior of best path pdf -# Unsupervised weight: 1.0 -# Weights for phone LM (supervised, unsupervises): 3,2 -# LM for decoding unsupervised data: 4gram - -set -u -e -o pipefail - -stage=-2 -train_stage=-100 -nj=40 -decode_nj=40 -exp=exp/semisup_50k - -unsupervised_set=train_unsup100k_250k # set this to your choice of unsupervised data -supervised_set=train_sup50k -semisup_train_set=semisup50k_250k - -tdnn_affix=7b # affix for the supervised chain-model directory -train_supervised_opts="--stage -10 --train-stage -10" -tree_affix=bi_j - -gmm=tri4a - -nnet3_affix=_semi50k_250k # affix for nnet3 and chain dir -- relates to i-vector used -chain_affix=_semi50k_250k - -# Unsupervised options -decode_affix= -egs_affix= # affix for the egs that are generated from unsupervised data and for the comined egs dir -unsup_frames_per_eg=150 # if empty will be equal to the supervised model's config -- you will need to change minibatch_size for comb training accordingly -lattice_lm_scale=0.5 # lm-scale for using the weights from unsupervised lattices -lattice_prune_beam=4.0 # If supplied will prune the lattices prior to getting egs for unsupervised data -tolerance=1 -phone_insertion_penalty= - -# Semi-supervised options -comb_affix=comb1b # affix for new chain-model directory trained on the combined supervised+unsupervised subsets -supervision_weights=1.0,1.0 -lm_weights=5,2 -sup_egs_dir= -unsup_egs_dir= -unsup_egs_opts= - -remove_egs=false -common_egs_dir= - -hidden_dim=1024 -cell_dim=1024 -projection_dim=256 - -apply_deriv_weights=true -use_smart_splitting=true - -# training options -num_epochs=2 -minibatch_size=64,32 -chunk_left_context=40 -chunk_right_context=0 -dropout_schedule='0,0@0.20,0.3@0.50,0' -xent_regularize=0.025 -self_repair_scale=0.00001 -label_delay=5 - -# decode options -extra_left_context=50 -extra_right_context=0 - -decode_iter= - -do_finetuning=false - -finetune_stage=-2 -finetune_suffix=_finetune -finetune_iter=final -num_epochs_finetune=1 -finetune_xent_regularize=0.1 -finetune_opts="--chain.mmi-factor-schedule=0.05,0.05 --chain.smbr-factor-schedule=0.05,0.05" - -# End configuration section. -echo "$0 $@" # Print the command line for logging - -. cmd.sh -. ./path.sh -. ./utils/parse_options.sh - -egs_affix=${egs_affix}_prun${lattice_prune_beam}_lmwt${lattice_lm_scale}_tol${tolerance} -if $use_smart_splitting; then - comb_affix=${comb_affix:+${comb_affix}_smart} -else - comb_affix=${comb_affix:+${comb_affix}_naive} -fi - -RANDOM=0 - -if ! cuda-compiled; then - cat < $dir/configs/network.xconfig - input dim=100 name=ivector - input dim=40 name=input - - # please note that it is important to have input layer with the name=input - # as the layer immediately preceding the fixed-affine-layer to enable - # the use of short notation for the descriptor - fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat - - # the first splicing is moved before the lda layer, so no splicing here - relu-batchnorm-layer name=tdnn1 dim=$hidden_dim - relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1) dim=$hidden_dim - relu-batchnorm-layer name=tdnn3 input=Append(-1,0,1) dim=$hidden_dim - - fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - relu-batchnorm-layer name=tdnn6 input=Append(-3,0,3) dim=$hidden_dim - relu-batchnorm-layer name=tdnn7 input=Append(-3,0,3) dim=$hidden_dim - fast-lstmp-layer name=lstm3 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim delay=-3 dropout-proportion=0.0 $lstm_opts - - output-layer name=output input=lstm3 output-delay=$label_delay dim=$num_targets include-log-softmax=false max-change=1.5 - - # adding the layers for xent branch - # This block prints the configs for a separate output that will be - # trained with a cross-entropy objective in the 'chain' models... this - # has the effect of regularizing the hidden parts of the model. we use - # 0.5 / args.xent_regularize as the learning rate factor- the factor of - # 0.5 / args.xent_regularize is suitable as it means the xent - # final-layer learns at a rate independent of the regularization - # constant; and the 0.5 was tuned so as to make the relative progress - # similar in the xent and regular final layers. - output-layer name=output-xent input=lstm3 output-delay=$label_delay dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 - - output name=output-0 input=output.affine@$label_delay skip-in-init=true - output name=output-1 input=output.affine@$label_delay skip-in-init=true - - output name=output-0-xent input=output-xent.log-softmax@$label_delay skip-in-init=true - output name=output-1-xent input=output-xent.log-softmax@$label_delay skip-in-init=true -EOF - - steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ -fi - -. $dir/configs/vars - -left_context=$[model_left_context + chunk_left_context] -right_context=$[model_right_context + chunk_right_context] -left_context_initial=$model_left_context -right_context_final=$model_right_context - -egs_left_context=`perl -e "print int($left_context + $frame_subsampling_factor / 2)"` -egs_right_context=`perl -e "print int($right_context + $frame_subsampling_factor / 2)"` -egs_left_context_initial=`perl -e "print int($left_context_initial + $frame_subsampling_factor / 2)"` -egs_right_context_final=`perl -e "print int($right_context_final + $frame_subsampling_factor / 2)"` - -supervised_set=${supervised_set}_sp -sup_lat_dir=$exp/chain${chain_affix}/${gmm}_${supervised_set}_lats -if [ -z "$sup_egs_dir" ]; then - sup_egs_dir=$dir/egs_${supervised_set} - frames_per_eg=$(cat $chaindir/egs/info/frames_per_eg) - - if [ $stage -le 12 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $sup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$sup_egs_dir/storage $sup_egs_dir/storage - fi - mkdir -p $sup_egs_dir/ - touch $sup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the supervised data" - steps/nnet3/chain/get_egs.sh --cmd "$decode_cmd" \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frame-subsampling-factor $frame_subsampling_factor \ - --alignment-subsampling-factor 3 \ - --frames-per-eg $frames_per_eg \ - --frames-per-iter 1500000 \ - --cmvn-opts "$cmvn_opts" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true \ - data/${supervised_set}_hires $dir \ - $sup_lat_dir $sup_egs_dir - fi -else - frames_per_eg=$(cat $sup_egs_dir/info/frames_per_eg) -fi - -unsupervised_set=${unsupervised_set}_sp -unsup_lat_dir=${chaindir}/decode_${unsupervised_set}${decode_affix} - -if [ -z "$unsup_egs_dir" ]; then - [ -z $unsup_frames_per_eg ] && [ ! -z "$frames_per_eg" ] && unsup_frames_per_eg=$frames_per_eg - unsup_egs_dir=$dir/egs_${unsupervised_set}${decode_affix}${egs_affix} - - if [ $stage -le 13 ]; then - if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $unsup_egs_dir/storage ]; then - utils/create_split_dir.pl \ - /export/b0{5,6,7,8}/$USER/kaldi-data/egs/fisher_english-$(date +'%m_%d_%H_%M')/s5c/$unsup_egs_dir/storage $unsup_egs_dir/storage - fi - mkdir -p $unsup_egs_dir - touch $unsup_egs_dir/.nodelete # keep egs around when that run dies. - - echo "$0: generating egs from the unsupervised data" - if $use_smart_splitting; then - get_egs_script=steps/nnet3/chain/get_egs_split.sh - else - get_egs_script=steps/nnet3/chain/get_egs.sh - fi - - $get_egs_script --cmd "$decode_cmd --h-rt 100:00:00" --alignment-subsampling-factor 1 \ - --left-tolerance $tolerance --right-tolerance $tolerance \ - --left-context $egs_left_context --right-context $egs_right_context \ - --left-context-initial $egs_left_context_initial --right-context-final $egs_right_context_final \ - --frames-per-eg $unsup_frames_per_eg --frames-per-iter 1500000 \ - --frame-subsampling-factor $frame_subsampling_factor \ - --cmvn-opts "$cmvn_opts" --lattice-lm-scale $lattice_lm_scale \ - --lattice-prune-beam "$lattice_prune_beam" \ - --phone-insertion-penalty "$phone_insertion_penalty" \ - --deriv-weights-scp $chaindir/best_path_${unsupervised_set}${decode_affix}/weights.scp \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --generate-egs-scp true $unsup_egs_opts \ - data/${unsupervised_set}_hires $dir \ - $unsup_lat_dir $unsup_egs_dir - fi -fi - -comb_egs_dir=$dir/${comb_affix}_egs${decode_affix}${egs_affix}_multi - -if [ $stage -le 14 ]; then - steps/nnet3/multilingual/combine_egs.sh --cmd "$train_cmd" \ - --minibatch-size 64 --frames-per-iter 1500000 \ - --lang2weight $supervision_weights --egs-prefix cegs. 2 \ - $sup_egs_dir $unsup_egs_dir $comb_egs_dir - touch $comb_egs_dir/.nodelete # keep egs around when that run dies. -fi - -if [ $train_stage -le -4 ]; then - train_stage=-4 -fi - -if [ $stage -le 15 ]; then - steps/nnet3/chain/train.py --stage $train_stage \ - --egs.dir "$comb_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${semisup_train_set}_sp_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ - --chain.xent-regularize $xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights $apply_deriv_weights \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width $frames_per_eg \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.dropout-schedule $dropout_schedule \ - --trainer.num-chunk-per-minibatch "$minibatch_size" \ - --trainer.frames-per-iter 1500000 \ - --trainer.max-param-change 2.0 \ - --trainer.num-epochs 4 \ - --trainer.optimization.shrink-value 0.99 \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.001 \ - --trainer.optimization.final-effective-lrate 0.0001 \ - --trainer.optimization.momentum 0.0 \ - --trainer.deriv-truncate-margin 8 \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir $dir || exit 1; -fi - -graph_dir=$dir/graph${test_graph_affix} -if [ $stage -le 17 ]; then - # Note: it might appear that this $lang directory is mismatched, and it is as - # far as the 'topo' is concerned, but this script doesn't read the 'topo' from - # the lang directory. - utils/mkgraph.sh --self-loop-scale 1.0 $test_lang $dir $graph_dir -fi - -if [ $stage -le 18 ]; then - iter_opts= - if [ ! -z $decode_iter ]; then - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/${decode_iter}.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/${decode_iter}.mdl $dir/${decode_iter}-output.mdl || exit 1 - iter_opts=" --iter ${decode_iter}-output " - else - nnet3-copy --edits="remove-output-nodes name=output;rename-node old-name=output-0 new-name=output" $dir/final.mdl - | \ - nnet3-am-copy --set-raw-nnet=- $dir/final.mdl $dir/final-output.mdl || exit 1 - iter_opts=" --iter final-output " - fi - - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" $iter_opts \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 160 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -if ! $do_finetuning; then - wait - exit 0 -fi - -if [ $stage -le 19 ]; then - mkdir -p ${dir}${finetune_suffix} - - for f in phone_lm.fst normalization.fst den.fst tree 0.trans_mdl cmvn_opts; do - cp ${dir}/$f ${dir}${finetune_suffix} || exit 1 - done - cp -r ${dir}/configs ${dir}${finetune_suffix} || exit 1 - - nnet3-copy --edits="remove-output-nodes name=output;remove-output-nodes name=output-xent;rename-node old-name=output-0 new-name=output;rename-node old-name=output-0-xent new-name=output-xent" \ - $dir/${finetune_iter}.mdl ${dir}${finetune_suffix}/init.raw - - if [ $finetune_stage -le -1 ]; then - finetune_stage=-1 - fi - - steps/nnet3/chain/train.py --stage $finetune_stage \ - --trainer.input-model ${dir}${finetune_suffix}/init.raw \ - --egs.dir "$sup_egs_dir" \ - --cmd "$decode_cmd" \ - --feat.online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${supervised_set}_hires \ - --feat.cmvn-opts "--norm-means=false --norm-vars=false" $finetune_opts \ - --chain.xent-regularize $finetune_xent_regularize \ - --chain.leaky-hmm-coefficient 0.1 \ - --chain.l2-regularize 0.00005 \ - --chain.apply-deriv-weights true \ - --chain.lm-opts="--num-extra-lm-states=2000" \ - --egs.opts "--frames-overlap-per-eg 0" \ - --egs.chunk-width 150 \ - --egs.chunk-left-context $chunk_left_context \ - --egs.chunk-right-context $chunk_right_context \ - --egs.chunk-left-context-initial 0 \ - --egs.chunk-right-context-final 0 \ - --trainer.num-chunk-per-minibatch "150=64/300=32" \ - --trainer.frames-per-iter 1500000 \ - --trainer.num-epochs $num_epochs_finetune \ - --trainer.optimization.num-jobs-initial 3 \ - --trainer.optimization.num-jobs-final 16 \ - --trainer.optimization.initial-effective-lrate 0.0001 \ - --trainer.optimization.final-effective-lrate 0.00001 \ - --trainer.max-param-change 2.0 \ - --trainer.optimization.do-final-combination false \ - --cleanup.remove-egs false \ - --feat-dir data/${supervised_set}_hires \ - --tree-dir $treedir \ - --lat-dir $sup_lat_dir \ - --dir ${dir}${finetune_suffix} || exit 1; -fi - -dir=${dir}${finetune_suffix} - -if [ $stage -le 20 ]; then - for decode_set in dev test; do - ( - num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l` - steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \ - --nj $num_jobs --cmd "$decode_cmd" \ - --online-ivector-dir $exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \ - --frames-per-chunk 150 \ - --extra-left-context $extra_left_context \ - --extra-right-context $extra_right_context \ - --extra-left-context-initial 0 --extra-right-context-final 0 \ - $graph_dir data/${decode_set}_hires $dir/decode_${decode_set}${decode_iter:+_iter$decode_iter} || exit 1; - ) & - done -fi - -wait; -exit 0; - diff --git a/egs/fisher_english/s5/local/semisup/run_lstm_100k.sh b/egs/fisher_english/s5/local/semisup/run_lstm_100k.sh new file mode 100644 index 00000000000..318cc55542a --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/run_lstm_100k.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script demonstrates semi-supervised training using 100 hours of +# supervised data and 250 hours of unsupervised data. +# We assume the supervised data is in data/train_sup and unsupervised data +# is in data/train_unsup100k_250k. +# For LM training, we only use the supervised set corresponding to 100 hours as +# opposed to the case in run_50k.sh, where we included part of the +# transcripts in data/train/text. +# This uses only 100 hours supervised set for i-vector extractor training, +# which is different from run_50k.sh, which uses combined supervised + +# unsupervised set. +# This script is expected to be run after stage 8 of run_100k.sh. + +. ./cmd.sh +. ./path.sh + +set -o pipefail +exp_root=exp/semisup_100k + +stage=0 + +. utils/parse_options.sh + +for f in data/train_sup/utt2spk data/train_unsup100k_250k/utt2spk \ + data/train_sup/text \ + data/lang_test_poco_sup100k/G.fst \ + data/lang_test_poco_sup100k_unk/G.fst + ; do + if [ ! -f $f ]; then + echo "$0: Could not find $f" + exit 1 + fi +done + + +############################################################################### +# Train seed chain system using 100 hours supervised data. +# Here we train i-vector extractor on only the supervised set. +############################################################################### + +if [ $stage -le 1 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1b.sh \ + --train-set train_sup \ + --ivector-train-set "" \ + --nnet3-affix "" --chain-affix "" \ + --tdnn-affix _1b --tree-affix bi_a \ + --gmm tri4a --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Semi-supervised training using 100 hours supervised data and +# 250 hours unsupervised data. We use i-vector extractor, tree, lattices +# and seed chain system from the previous stage. +############################################################################### + +if [ $stage -le 2 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1b.sh \ + --supervised-set train_sup \ + --unsupervised-set train_unsup100k_250k \ + --sup-chain-dir $exp_root/chain/tdnn_lstm_1b_sp \ + --sup-lat-dir $exp_root/chain/tri4a_train_sup_unk_lats \ + --sup-tree-dir $exp_root/chain/tree_bi_a \ + --ivector-root-dir $exp_root/nnet3 \ + --chain-affix "" \ + --tdnn-affix _semisup100k_250k_1b \ + --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Oracle system trained on combined 350 hours including both supervised and +# unsupervised sets. We use i-vector extractor, tree, and GMM trained +# on only the supervised for fair comparison to semi-supervised experiments. +############################################################################### + +if [ $stage -le 3 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1b.sh \ + --hidden-dim 1536 --cell-dim 1536 --projection-dim 384 \ + --train-set semisup100k_250k \ + --nnet3-affix "" --chain-affix "" \ + --common-treedir $exp_root/chain/tree_bi_a \ + --tdnn-affix _oracle100k_250k_1b --nj 100 \ + --gmm tri4a --exp $exp_root \ + --stage 9 || exit 1 +fi + diff --git a/egs/fisher_english/s5/local/semisup/run_lstm_100k_500k.sh b/egs/fisher_english/s5/local/semisup/run_lstm_100k_500k.sh new file mode 100644 index 00000000000..9584a0296be --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/run_lstm_100k_500k.sh @@ -0,0 +1,96 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script demonstrates semi-supervised training using 100 hours of +# supervised data and 250 hours of unsupervised data. +# We assume the supervised data is in data/train_sup and unsupervised data +# is in data/train_unsup100k_250k. +# For LM training, we only use the supervised set corresponding to 100 hours as +# opposed to the case in run_50k.sh, where we included part of the +# transcripts in data/train/text. +# This uses only 100 hours supervised set for i-vector extractor training, +# which is different from run_50k.sh, which uses combined supervised + +# unsupervised set. +# This script is expected to be run after stage 8 of run_100k.sh. + +. ./cmd.sh +. ./path.sh + +set -o pipefail +exp_root=exp/semisup_100k + +stage=0 + +. utils/parse_options.sh + +for f in data/train_sup/utt2spk data/train/utt2spk \ + data/train_sup/text \ + data/lang_test_poco_sup100k/G.fst \ + data/lang_test_poco_sup100k_unk/G.fst + ; do + if [ ! -f $f ]; then + echo "$0: Could not find $f" + exit 1 + fi +done + +if [ $stage -le 0 ]; then + utils/subset_data_dir.sh --utt-list <(utils/filter_scp.pl --exclude data/train_sup/utt2spk data/train/utt2spk) data/train data/train_unsup100k + utils/subset_data_dir.sh --speakers data/train_unsup100k 500000 data/train_unsup100k_500k + utils/combine_data.sh data/semisup100k_500k data/train_sup data/train_unsup100k_500k +fi + +############################################################################### +# Train seed chain system using 100 hours supervised data. +# Here we train i-vector extractor on only the supervised set. +############################################################################### + +if [ $stage -le 1 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1b.sh \ + --train-set train_sup \ + --ivector-train-set "" \ + --nnet3-affix "" --chain-affix "" \ + --tdnn-affix _1b --tree-affix bi_a \ + --gmm tri4a --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Semi-supervised training using 100 hours supervised data and +# 250 hours unsupervised data. We use i-vector extractor, tree, lattices +# and seed chain system from the previous stage. +############################################################################### + +if [ $stage -le 2 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_100k_semisupervised_1b.sh \ + --hidden-dim 1536 --cell-dim 1536 --projection-dim 384 \ + --supervised-set train_sup \ + --unsupervised-set train_unsup100k_500k \ + --lm-weights 3,1 \ + --sup-chain-dir $exp_root/chain/tdnn_lstm_1b_sp \ + --sup-lat-dir $exp_root/chain/tri4a_train_sup_unk_lats \ + --sup-tree-dir $exp_root/chain/tree_bi_a \ + --ivector-root-dir $exp_root/nnet3 \ + --chain-affix "" \ + --tdnn-affix _semisup100k_500k_1b \ + --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Oracle system trained on combined 350 hours including both supervised and +# unsupervised sets. We use i-vector extractor, tree, and GMM trained +# on only the supervised for fair comparison to semi-supervised experiments. +############################################################################### + +if [ $stage -le 3 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1b.sh \ + --hidden-dim 1536 --cell-dim 1536 --projection-dim 384 \ + --train-set semisup100k_500k \ + --nnet3-affix "" --chain-affix "" \ + --common-treedir $exp_root/chain/tree_bi_a \ + --tdnn-affix _oracle100k_500k_1b --nj 100 \ + --gmm tri4a --exp $exp_root \ + --stage 9 || exit 1 +fi + diff --git a/egs/fisher_english/s5/local/semisup/run_lstm_100k_ex250k.sh b/egs/fisher_english/s5/local/semisup/run_lstm_100k_ex250k.sh new file mode 100644 index 00000000000..1f9ee00dc8b --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/run_lstm_100k_ex250k.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script demonstrates semi-supervised training using 100 hours of +# supervised data and 250 hours of unsupervised data. +# We assume the supervised data is in data/train_sup and unsupervised data +# is in data/train_unsup100k_250k. +# For LM training, we only use the supervised set corresponding to 100 hours as +# opposed to the case in run_50k.sh, where we included part of the +# transcripts in data/train/text. +# This uses only 100 hours supervised set for i-vector extractor training, +# which is different from run_50k.sh, which uses combined supervised + +# unsupervised set. +# This script is expected to be run after stage 8 of run_100k.sh. + +. ./cmd.sh +. ./path.sh + +set -o pipefail +exp_root=exp/semisup_100k + +stage=0 + +. utils/parse_options.sh + +for f in data/train_sup/utt2spk data/train_unsup100k_250k/utt2spk \ + data/train_sup/text \ + data/lang_test_poco_ex250k/G.fst \ + data/lang_test_poco_ex250k_unk/G.fst \ + data/lang_test_poco_ex250k_big/G.carpa \ + data/lang_test_poco_ex250k_unk_big/G.carp + ; do + if [ ! -f $f ]; then + echo "$0: Could not find $f" + exit 1 + fi +done + + +############################################################################### +# Train seed chain system using 100 hours supervised data. +# Here we train i-vector extractor on only the supervised set. +############################################################################### + +if [ $stage -le 1 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1b.sh \ + --train-set train_sup \ + --ivector-train-set "" \ + --nnet3-affix "" --chain-affix "" \ + --tdnn-affix _1b --tree-affix bi_a \ + --gmm tri4a --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Semi-supervised training using 100 hours supervised data and +# 250 hours unsupervised data. We use i-vector extractor, tree, lattices +# and seed chain system from the previous stage. +############################################################################### + +if [ $stage -le 2 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_1a.sh \ + --supervised-set train_sup \ + --unsupervised-set train_unsup100k_250k \ + --sup-chain-dir $exp_root/chain/tdnn_lstm_1b_sp \ + --sup-lat-dir $exp_root/chain/tri4a_train_sup_unk_lats \ + --sup-tree-dir $exp_root/chain/tree_bi_a \ + --ivector-root-dir $exp_root/nnet3 \ + --chain-affix "" \ + --tdnn-affix _semisup100k_250k_ex250k_1a \ + --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Oracle system trained on combined 350 hours including both supervised and +# unsupervised sets. We use i-vector extractor, tree, and GMM trained +# on only the supervised for fair comparison to semi-supervised experiments. +############################################################################### + +if [ $stage -le 3 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1b.sh \ + --train-set semisup100k_250k \ + --nnet3-affix "" --chain-affix "" \ + --common-treedir $exp_root/chain/tree_bi_a \ + --tdnn-affix 1b_oracle --nj 100 \ + --gmm tri4a --exp $exp_root \ + --stage 9 || exit 1 +fi + diff --git a/egs/fisher_english/s5/local/semisup/run_lstm_50k.sh b/egs/fisher_english/s5/local/semisup/run_lstm_50k.sh new file mode 100644 index 00000000000..379090ec029 --- /dev/null +++ b/egs/fisher_english/s5/local/semisup/run_lstm_50k.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +# Copyright 2017 Vimal Manohar +# Apache 2.0 + +# This script demonstrates semi-supervised training using 50 hours of +# supervised data and 250 hours of unsupervised data. +# This script is expected to be run after stage 8 of run_50k.sh. + +# We assume the supervised data is in data/train_sup and unsupervised data +# is in data/train_unsup100k_250k. +# For LM training, we assume there is data/train/text, from which +# we will exclude the utterances contained in the unsupervised set. +# We use all 300 hours of semi-supervised data for i-vector extractor training. + +# This differs from run_lstm_100k.sh, which uses only 100 hours supervised data for +# both i-vector extractor training and LM training. + +. ./cmd.sh +. ./path.sh + +set -o pipefail +exp_root=exp/semisup_50k + +stage=0 + +. utils/parse_options.sh + +for f in data/train_sup/utt2spk data/train_unsup100k_250k/utt2spk \ + data/train/text \ + data/lang_test_poco_ex250k_big/G.carpa \ + data/lang_test_poco_ex250k/G.fst \ + data/lang_test_poco_ex250k_unk_big/G.carpa \ + data/lang_test_poco_ex250k_unk/G.fst; do + if [ ! -f $f ]; then + echo "$0: Could not find $f" + exit 1 + fi +done + +############################################################################### +# Train seed chain system using 50 hours supervised data. +# Here we train i-vector extractor on combined supervised and unsupervised data +############################################################################### + +if [ $stage -le 1 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1a.sh \ + --train-set train_sup50k \ + --ivector-train-set semisup50k_100k_250k \ + --nnet3-affix _semi50k_100k_250k \ + --chain-affix _semi50k_100k_250k \ + --tdnn-affix _1a --tree-affix bi_a \ + --gmm tri4a --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Semi-supervised training using 50 hours supervised data and +# 250 hours unsupervised data. We use i-vector extractor, tree, lattices +# and seed chain system from the previous stage. +############################################################################### + +if [ $stage -le 2 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_50k_semisupervised_1a.sh \ + --supervised-set train_sup50k \ + --unsupervised-set train_unsup100k_250k \ + --sup-chain-dir $exp_root/chain_semi50k_100k_250k/tdnn_lstm_1a_sp \ + --sup-lat-dir $exp_root/chain_semi50k_100k_250k/tri4a_train_sup50k_sp_unk_lats \ + --sup-tree-dir $exp_root/chain_semi50k_100k_250k/tree_bi_a \ + --ivector-root-dir $exp_root/nnet3_semi50k_100k_250k \ + --chain-affix _semi50k_100k_250k \ + --tdnn-affix _semisup_1a \ + --exp-root $exp_root || exit 1 +fi + +############################################################################### +# Oracle system trained on combined 300 hours including both supervised and +# unsupervised sets. We use i-vector extractor, tree, and GMM trained +# on only the supervised for fair comparison to semi-supervised experiments. +############################################################################### + +if [ $stage -le 3 ]; then + local/semisup/chain/tuning/run_tdnn_lstm_1b.sh \ + --train-set semisup50k_100k_250k \ + --nnet3-affix _semi50k_100k_250k \ + --chain-affix _semi50k_100k_250k \ + --common-treedir $exp_root/chain_semi50k_100k_250k/tree_bi_a \ + --tdnn-affix _1b_oracle --nj 100 \ + --gmm tri4a --exp-root $exp_root \ + --stage 9 || exit 1 +fi