Skip to content

Commit

Permalink
TDNN+LSTM semisup recipes
Browse files Browse the repository at this point in the history
  • Loading branch information
vimalmanohar committed Oct 20, 2018
1 parent 520b4a7 commit c0108e0
Show file tree
Hide file tree
Showing 46 changed files with 2,500 additions and 12,799 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@

set -u -e -o pipefail

stage=0 # Start from -1 for supervised seed system training
stage=0
train_stage=-100
nj=80
test_nj=50

# The following 3 options decide the output directory for semi-supervised
# The following 3 options decide the output directory for semi-supervised
# chain system
# dir=${exp_root}/chain${chain_affix}/tdnn${tdnn_affix}

Expand Down Expand Up @@ -89,7 +89,7 @@ echo "$0 $@" # Print the command line for logging
if [ -f ./path.sh ]; then . ./path.sh; fi
. ./utils/parse_options.sh

# The following can be replaced with the versions that model
# The following can be replaced with the versions that do not model
# UNK using phone LM. $sup_lat_dir should also ideally be changed.
unsup_decode_lang=data/lang_test_poco_sup100k_unk
unsup_decode_graph_affix=_poco_sup100k_unk
Expand Down Expand Up @@ -141,6 +141,8 @@ if [ $stage -le 2 ]; then

steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" \
--mfcc-config conf/mfcc_hires.conf data/${unsupervised_set}_sp_hires || exit 1
steps/compute_cmvn_stats.sh data/${unsupervised_set}_sp_hires
utils/fix_data_dir.sh data/${unsupervised_set}_sp_hires
fi
unsupervised_set_perturbed=${unsupervised_set}_sp

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ test_graph_affix=_poco_unk

unsup_rescore_lang=${unsup_decode_lang}_big

dir=$exp_root/chain${chain_affix}/tdnn${tdnn_affix}
dir=$exp_root/chain${chain_affix}/tdnn_lstm${tdnn_affix}

if ! cuda-compiled; then
cat <<EOF && exit 1
Expand Down Expand Up @@ -144,6 +144,8 @@ fi
if [ $stage -le 2 ]; then
utils/data/perturb_data_dir_speed_3way.sh data/${unsupervised_set} \
data/${unsupervised_set_perturbed}_hires
utils/data/perturb_data_dir_volume.sh \
data/${unsupervised_set_perturbed}_hires

steps/make_mfcc.sh --cmd "$train_cmd" --nj $nj \
--mfcc-config conf/mfcc_hires.conf \
Expand Down Expand Up @@ -177,7 +179,7 @@ fi
# Rescore undeterminized lattices with larger LM
if [ $stage -le 5 ]; then
steps/lmrescore_const_arpa_undeterminized.sh --cmd "$decode_cmd" \
--acwt 0.1 --beam 8.0 --skip-scoring true \
--acwt 0.1 --beam 8.0 --skip-scoring true \
$unsup_decode_lang $unsup_rescore_lang \
data/${unsupervised_set_perturbed}_hires \
$sup_chain_dir/decode_${unsupervised_set_perturbed} \
Expand Down Expand Up @@ -433,6 +435,7 @@ if [ $stage -le 18 ]; then
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
--nj $num_jobs --cmd "$decode_cmd" ${decode_iter:+--iter $decode_iter} \
--online-ivector-dir $ivector_root_dir/ivectors_${decode_set}_hires \
--frames-per-chunk 160 \
$test_graph_dir data/${decode_set}_hires \
$dir/decode${test_graph_affix}_${decode_set}${decode_iter:+_iter$decode_iter} || touch $dir/.error
) &
Expand Down
Loading

0 comments on commit c0108e0

Please sign in to comment.