[scripts] Bug-fix in long-utterance-segmentation script (thanks: Armin Oliya) (#1877)

author: Vimal Manohar 2017-09-08 13:54:11 -0500
committer: Daniel Povey 2017-09-08 13:54:11 -0500
commit: f4a6a664fe7d344544965e530661e540c28bca6d (patch)
tree: 4d0bbef403e33167c4bc2d49813d97ec43da59d5
parent: 70748308810ffa12580761545b71d0946104d6d9 (diff)
download: kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.tar.gz
kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.tar.xz
kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.zip
1 files changed, 7 insertions, 2 deletions
diff --git a/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py b/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py
index 1bcec602d..e94018349 100755
--- a/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py
+++ b/egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py
@@ -122,11 +122,16 @@ def read_text(text_file):
    """
    for line in text_file:
        parts = line.strip().split()
-        if len(parts) <= 2:
+        if len(parts) < 1:
            raise RuntimeError(
                "Did not get enough columns; line {0} in {1}"
                "".format(line, text_file.name))
-        yield parts[0], parts[1:]
+        elif len(parts) == 1:
+            logger.warn("Empty transcript for utterance %s in %s", 
+                        parts[0], text_file.name)
+            yield parts[0], []
+        else:
+            yield parts[0], parts[1:]
    text_file.close()
author	Vimal Manohar	2017-09-08 13:54:11 -0500
committer	Daniel Povey	2017-09-08 13:54:11 -0500
commit	f4a6a664fe7d344544965e530661e540c28bca6d (patch)
tree	4d0bbef403e33167c4bc2d49813d97ec43da59d5
parent	70748308810ffa12580761545b71d0946104d6d9 (diff)
download	kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.tar.gz kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.tar.xz kaldi-f4a6a664fe7d344544965e530661e540c28bca6d.zip