[scripts] Bug-fix in long-utterance-segmentation script (thanks: Armin Oliya) (#1877)
authorVimal Manohar <vimal.manohar91@gmail.com>
Fri, 8 Sep 2017 18:54:11 +0000 (14:54 -0400)
committerDaniel Povey <dpovey@gmail.com>
Fri, 8 Sep 2017 18:54:11 +0000 (11:54 -0700)
egs/wsj/s5/steps/cleanup/internal/align_ctm_ref.py

index 1bcec602dc8a4ff21055862a391e5d6becbc6ac1..e940183497bf73e84ef2574ba4b1e89c75ee9458 100755 (executable)
@@ -122,11 +122,16 @@ def read_text(text_file):
     """
     for line in text_file:
         parts = line.strip().split()
-        if len(parts) <= 2:
+        if len(parts) < 1:
             raise RuntimeError(
                 "Did not get enough columns; line {0} in {1}"
                 "".format(line, text_file.name))
-        yield parts[0], parts[1:]
+        elif len(parts) == 1:
+            logger.warn("Empty transcript for utterance %s in %s", 
+                        parts[0], text_file.name)
+            yield parts[0], []
+        else:
+            yield parts[0], parts[1:]
     text_file.close()