]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - processor-sdk/kaldi.git/commitdiff
Modify .gitignore; script changes
authorDaniel Povey <dpovey@gmail.com>
Mon, 27 Jul 2015 21:41:12 +0000 (17:41 -0400)
committerDaniel Povey <dpovey@gmail.com>
Mon, 27 Jul 2015 21:41:12 +0000 (17:41 -0400)
.gitignore
egs/wsj/s5/steps/nnet3/get_egs.sh
egs/wsj/s5/steps/nnet3/train_tdnn.sh
src/matrix/core [new file with mode: 0644]
src/nnet3/core [new file with mode: 0644]

index 9a43ef3758c14d78c4814f96f054fc8bc3e4780d..d3a483d8a2f4382bcfdbc841cdf03c84365ca90c 100644 (file)
 /src/nnet3/nnet-example-test
 /src/nnet3/nnet-optimize-test
 
+/src/nnet3bin/nnet3-copy-egs
+/src/nnet3bin/nnet3-get-egs
+/src/nnet3bin/nnet3-info
+/src/nnet3bin/nnet3-init
+/src/nnet3bin/nnet3-shuffle-egs
+/src/nnet3bin/nnet3-subset-egs
index 8f13cdaea58b5a06df77abb9df1b880e4e6e8ae1..bd378771bf2e05d359aa77fc1bd423ccea12bdb3 100755 (executable)
@@ -235,7 +235,7 @@ if [ $stage -le 2 ]; then
 
   $cmd $dir/log/create_valid_subset.log \
     nnet3-get-egs $valid_ivector_opt $egs_opts "$valid_feats" \
-    "ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
      "ark:$dir/valid_all.egs" || touch $dir/.error &
   $cmd $dir/log/create_train_subset.log \
     nnet3-get-egs $train_subset_ivector_opt $egs_opts "$train_subset_feats" \
index 15f3e29e764ce739a0f0ab2300f3cf25ccbb71d4..7407fb030b2ad399b4c180612ab82dceb8b8b4ef 100755 (executable)
@@ -187,7 +187,9 @@ if [ $stage -le -5 ]; then
    $dir/configs || exit 1;
 
   # Initialize as "raw" nnet, prior to training the LDA-like preconditioning
-  # matrix.
+  # matrix.  This first config just does any initial splicing that we do;
+  # we do this as it's a convenient way to get the stats for the 'lda-like'
+  # transform.
   $cmd $dir/log/nnet_init.log \
     nnet3-init $dir/configs/init.config $dir/init.raw || exit 1;
 fi
@@ -195,6 +197,8 @@ fi
 # set left_context, right_context, num_hidden_layers
 . $dir/configs/vars || exit 1;
 
+context_opts="--left-context=$left_context --right-context=$right_context"
+
 ! [ "$num_hidden_layers" -gt 0 ] && echo \
  "$0: Expected num_hidden_layers to be defined" && exit 1;
 
@@ -253,8 +257,8 @@ if [ $stage -le -3 ]; then
   [ $num_lda_jobs -gt $max_lda_jobs ] && num_lda_jobs=$max_lda_jobs
 
   # Write stats with the same format as stats for LDA.
-  $cmd JOB=1:$num_lda_jobs $dir/log/get_transform_stats.JOB.log \
-      nnet3-get-transform-stats --num-leaves=$num_leaves \
+  $cmd JOB=1:$num_lda_jobs $dir/log/get_lda_stats.JOB.log \
+      nnet3-get-lda-stats --rand-prune=$rand_prune \
         $dir/init.raw $egs_dir/egs.JOB.ark $dir/JOB.lda_stats || exit 1;
 
   all_lda_accs=$(for n in $(seq $num_lda_jobs); do echo $dir/$n.lda_stats; done)
@@ -402,7 +406,7 @@ while [ $x -lt $num_iters ]; do
       # we're using different random subsets of it.
       rm $dir/post.$x.*.vec 2>/dev/null
       $cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
-        nnet3-copy-egs --srand=JOB --frame=random ark:$prev_egs_dir/egs.1.ark ark:- \| \
+        nnet3-copy-egs --srand=JOB --frame=random $context_opts ark:$prev_egs_dir/egs.1.ark ark:- \| \
         nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
         nnet3-compute-from-egs "nnet3-to-raw $dir/$x.mdl -|" ark:- ark:- \| \
         matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;
@@ -498,7 +502,7 @@ while [ $x -lt $num_iters ]; do
 
         $cmd $train_gpu_opt $dir/log/train.$x.$n.log \
           nnet3-train$parallel_suffix $parallel_train_opts --minibatch-size=$this_minibatch_size --srand=$x "$raw" \
-          "ark:nnet3-copy-egs --frame=$frame ark:$cur_egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --minibatch-size=$this_minibatch_size ark:- ark:- |" \
+          "ark:nnet3-copy-egs --frame=$frame $context_opts ark:$cur_egs_dir/egs.$archive.ark ark:- | nnet3-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-| nnet3-merge-egs --minibatch-size=$this_minibatch_size ark:- ark:- |" \
           $dir/$[$x+1].$n.raw || touch $dir/.error &
       done
       wait
@@ -595,7 +599,7 @@ if [ $stage -le $[$num_iters+1] ]; then
   # Note: this just uses CPUs, using a smallish subset of data.
   rm $dir/post.$x.*.vec 2>/dev/null
   $cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
-    nnet3-copy-egs --frame=random --srand=JOB ark:$cur_egs_dir/egs.1.ark ark:- \| \
+    nnet3-copy-egs --frame=random $context_opts --srand=JOB ark:$cur_egs_dir/egs.1.ark ark:- \| \
     nnet3-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
     nnet3-compute-from-egs "nnet3-am-copy --raw=true $dir/final.mdl -|" ark:- ark:- \| \
     matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;
diff --git a/src/matrix/core b/src/matrix/core
new file mode 100644 (file)
index 0000000..d53352b
Binary files /dev/null and b/src/matrix/core differ
diff --git a/src/nnet3/core b/src/nnet3/core
new file mode 100644 (file)
index 0000000..6e426d0
Binary files /dev/null and b/src/nnet3/core differ