diff options
author | Vimal Manohar | 2017-09-08 13:54:11 -0500 |
---|---|---|
committer | Daniel Povey | 2017-09-08 13:54:11 -0500 |
commit | f4a6a664fe7d344544965e530661e540c28bca6d (patch) | |
tree | 4d0bbef403e33167c4bc2d49813d97ec43da59d5 | |
parent | 70748308810ffa12580761545b71d0946104d6d9 (diff) | |
download | kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.tar.gz kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.tar.xz kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.zip |
[scripts] Bug-fix in long-utterance-segmentation script (thanks: Armin Oliya) (#1877)
-rwxr-xr-x | egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py b/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py index 1bcec602d..e94018349 100755 --- a/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py +++ b/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py | |||
@@ -122,11 +122,16 @@ def read_text(text_file): | |||
122 | """ | 122 | """ |
123 | for line in text_file: | 123 | for line in text_file: |
124 | parts = line.strip().split() | 124 | parts = line.strip().split() |
125 | if len(parts) <= 2: | 125 | if len(parts) < 1: |
126 | raise RuntimeError( | 126 | raise RuntimeError( |
127 | "Did not get enough columns; line {0} in {1}" | 127 | "Did not get enough columns; line {0} in {1}" |
128 | "".format(line, text_file.name)) | 128 | "".format(line, text_file.name)) |
129 | yield parts[0], parts[1:] | 129 | elif len(parts) == 1: |
130 | logger.warn("Empty transcript for utterance %s in %s", | ||
131 | parts[0], text_file.name) | ||
132 | yield parts[0], [] | ||
133 | else: | ||
134 | yield parts[0], parts[1:] | ||
130 | text_file.close() | 135 | text_file.close() |
131 | 136 | ||
132 | 137 | ||