diff --git a/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh b/egs/wsj/s5/steps/cleanup/clean_and_segment_data.sh
index a523de30e6f19f016b37770a885bf36839d822cf..670e6c2b7140b42d5272c387b4f6362fde3a2429 100755 (executable)
echo "$0: based on the segments and text file in $dir/segments and $dir/text, creating new data-dir in $data_out"
padding=$(cat $dir/segment_end_padding) # e.g. 0.02
utils/data/subsegment_data_dir.sh --segment-end-padding $padding ${data} $dir/segments $dir/text $data_out
+ # utils/data/subsegment_data_dir.sh can output directories that have e.g. to many entries left in wav.scp
+ # Clean this up with the fix_dat_dir.sh script
+ utils/fix_data_dir.sh $data_out
fi
if [ $stage -le 9 ]; then