diff options
Diffstat (limited to 'export_kaldi.sh')
-rw-r--r-- | export_kaldi.sh | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/export_kaldi.sh b/export_kaldi.sh new file mode 100644 index 000000000..255613fa1 --- /dev/null +++ b/export_kaldi.sh | |||
@@ -0,0 +1,155 @@ | |||
1 | # Texas Instruments 2017 | ||
2 | # | ||
3 | # This script can be used to extract the components of Kaldi ASR that are only | ||
4 | # necessary for decoding. Specify the type of decoding you you will be doing | ||
5 | # along with the corpus directories you wish to export along with kaldi. | ||
6 | # Note that the copied binaries will be those necessary to run the standard | ||
7 | # decoding scripts for that type of decoding, i.e. for nnet3-online the | ||
8 | # binaries required for steps/online/nnet3/decode.sh. It is also important | ||
9 | # that the corpus directories specified each have their own copies of utils | ||
10 | # and steps, such that they are not symbolically linked. | ||
11 | |||
12 | function usage { | ||
13 | cat <<EOF | ||
14 | Usage: export_kaldi.sh [decoding_opts] [corpus_directory_names] <kaldi_root> <export_dir> | ||
15 | Ex: ./export_kaldi.sh --nnet3-online --rm --apiai_decode /home/user/kaldi/kaldi-trunk /home/user/export_kaldi | ||
16 | |||
17 | Currently supported decoding options: | ||
18 | nnet2 | ||
19 | nnet3 | ||
20 | nnet2-online | ||
21 | nnet3-online | ||
22 | |||
23 | For corpus directory names, just specify --[name in egs folder] | ||
24 | |||
25 | EOF | ||
26 | } | ||
27 | |||
28 | nnet2=false | ||
29 | nnet3=false | ||
30 | nnet2_online=false | ||
31 | nnet3_online=false | ||
32 | corpuses=() | ||
33 | |||
34 | |||
35 | # Parse options | ||
36 | |||
37 | while [ $# -gt 2 ]; | ||
38 | do | ||
39 | case "$1" in | ||
40 | --nnet2) | ||
41 | nnet2=true; | ||
42 | shift ;; | ||
43 | --nnet3) | ||
44 | nnet3=true; | ||
45 | shift ;; | ||
46 | --nnet2-online) | ||
47 | nnet2_online=true; | ||
48 | shift ;; | ||
49 | --nnet3-online) | ||
50 | nnet3_online=true; | ||
51 | shift ;; | ||
52 | --*) | ||
53 | hold=`expr "$1" : '--\(.[A-Za-z0-9_]*\)'`; | ||
54 | corpuses=("${corpuses[@]}" "$hold"); | ||
55 | shift ;; | ||
56 | *) echo "Error: Unrecognized argument $1"; | ||
57 | echo "Exiting ..."; | ||
58 | usage; exit 1 ;; | ||
59 | esac | ||
60 | done | ||
61 | |||
62 | kaldi_root=$1 | ||
63 | export_dir=$2 | ||
64 | |||
65 | # Check that Kaldi root exists | ||
66 | if [ ! -d $kaldi_root ] ; then | ||
67 | echo "Error: Could not find Kaldi root: $kaldi_root" && exit 1; | ||
68 | fi | ||
69 | |||
70 | # Remove and recreate any pre-existing export directory | ||
71 | echo "Removing any previous export directory and making a new one" | ||
72 | rm -rf $export_dir || exit 1 ; | ||
73 | mkdir -p $export_dir || exit 1; | ||
74 | mkdir -p $export_dir/src || exit 1; | ||
75 | mkdir -p $export_dir/egs || exit 1; | ||
76 | mkdir -p $export_dir/tools/config || exit 1; | ||
77 | echo "Successfully removed and created new export directory" | ||
78 | |||
79 | # Copy common path script | ||
80 | echo "Copying common path script..." | ||
81 | cp $kaldi_root/tools/config/common_path.sh $export_dir/tools/config || exit 1; | ||
82 | |||
83 | # Check that specified corpus directories actuall exist | ||
84 | # If they do, copy them | ||
85 | echo "Copying corpus directories..." | ||
86 | for dir in "${corpuses[@]}"; do | ||
87 | if [ ! -d $kaldi_root/egs/$dir ]; then | ||
88 | echo "Error: Could not find corpus directory $kaldi_root/egs/$dir"; | ||
89 | echo "Exiting..." && exit 1; | ||
90 | else | ||
91 | cp -r $kaldi_root/egs/$dir $export_dir/egs; | ||
92 | fi | ||
93 | done | ||
94 | |||
95 | # Copy any required binaries to the src folder | ||
96 | echo "Copying binaries..." | ||
97 | if $nnet2; then | ||
98 | mkdir -p $export_dir/src/nnet2bin | ||
99 | mkdir -p $export_dir/src/featbin | ||
100 | mkdir -p $export_dir/src/bin | ||
101 | required_bins=("nnet2bin/nnet-latgen-faster" "featbin/apply-cmvn" "featbin/extract-segments" "featbin/compute-mfcc-feats" "featbin/copy-feats" "featbin/compute-cmvn-stats-two-channel" "featbin/compute-cmvn-stats" "bin/copy-matrix" "featbin/splice-feats" "featbin/transform-feats" "featbin/paste-feats" "featbin/subsample-feats") | ||
102 | for bin in "${required_bins[@]}"; do | ||
103 | if [ ! -e $kaldi_root/src/$bin ]; then | ||
104 | echo "Error: Could not find binary $kaldi_root/src/$bin" && exit 1; | ||
105 | else | ||
106 | cp -r $kaldi_root/src/$bin $export_dir/src/$bin; | ||
107 | fi | ||
108 | done | ||
109 | fi | ||
110 | |||
111 | if $nnet3; then | ||
112 | mkdir -p $export_dir/src/nnet3bin | ||
113 | mkdir -p $export_dir/src/featbin | ||
114 | mkdir -p $export_dir/src/latbin | ||
115 | mkdir -p $export_dir/src/bin | ||
116 | required_bins=("nnet3bin/nnet3-latgen-faster" "latbin/lattice-scale" "featbin/apply-cmvn" "featbin/extract-segments" "featbin/compute-mfcc-feats" "featbin/copy-feats" "featbin/compute-cmvn-stats-two-channel" "featbin/compute-cmvn-stats" "bin/copy-matrix" "featbin/splice-feats" "featbin/transform-feats") | ||
117 | for bin in "${required_bins[@]}"; do | ||
118 | if [ ! -e $kaldi_root/src/$bin ]; then | ||
119 | echo "Error: Could not find binary $kaldi_root/src/$bin" && exit 1; | ||
120 | else | ||
121 | cp -r $kaldi_root/src/$bin $export_dir/src/$bin; | ||
122 | fi | ||
123 | done | ||
124 | fi | ||
125 | |||
126 | if $nnet2_online; then | ||
127 | mkdir -p $export_dir/src/online2bin | ||
128 | mkdir -p $export_dir/src/featbin | ||
129 | required_bins=("online2bin/online2-wav-nnet2-latgen-faster" "featbin/wav-copy") | ||
130 | for bin in "${required_bins[@]}"; do | ||
131 | if [ ! -e $kaldi_root/src/$bin ]; then | ||
132 | echo "Error: Could not find binary $kaldi_root/src/$bin" && exit 1; | ||
133 | else | ||
134 | cp -r $kaldi_root/src/$bin $export_dir/src/$bin; | ||
135 | fi | ||
136 | done | ||
137 | fi | ||
138 | |||
139 | if $nnet3_online; then | ||
140 | mkdir -p $export_dir/src/online2bin | ||
141 | mkdir -p $export_dir/src/featbin | ||
142 | mkdir -p $export_dir/src/latbin | ||
143 | required_bins=("online2bin/online2-wav-nnet3-latgen-faster" "featbin/wav-copy" "latbin/lattice-scale") | ||
144 | for bin in "${required_bins[@]}"; do | ||
145 | if [ ! -e $kaldi_root/src/$bin ]; then | ||
146 | echo "Error: Could not find binary $kaldi_root/src/$bin" && exit 1; | ||
147 | else | ||
148 | cp -r $kaldi_root/src/$bin $export_dir/src/$bin; | ||
149 | fi | ||
150 | done | ||
151 | fi | ||
152 | |||
153 | echo "Success: Copied all corpuses and binaries to $export_dir" && exit 0; | ||
154 | |||
155 | exit 0; | ||