summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (from parent 1: 66bcdce)
raw | patch | inline | side by side (from parent 1: 66bcdce)
author | Xiaohui Zhang <samuelzhang1104@gmail.com> | |
Tue, 15 Aug 2017 05:34:23 +0000 (01:34 -0400) | ||
committer | Daniel Povey <dpovey@gmail.com> | |
Tue, 15 Aug 2017 05:34:23 +0000 (22:34 -0700) |
egs/wsj/s5/utils/validate_dict_dir.pl | patch | blob | history | |
src/lm/arpa-lm-compiler.cc | patch | blob | history |
index 5c089fcd077529cd97c847e63a5fb46b8ed16f68..a5c9ff8da21b519bc3aac69a875f6f88d6095304 100755 (executable)
if (!defined $word) {
print "--> ERROR: empty lexicon line in $lex\n"; set_to_fail();
}
- if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>") {
+ if ($word eq "<s>" || $word eq "</s>" || $word eq "<eps>" || $word eq "#0") {
print "--> ERROR: lexicon.txt contains forbidden word $word\n";
set_to_fail();
}
index 634a6267c4e8a9f9493e02e57dd07be4ba926a18..c854b077d006e2b8c85a3465a10a80c18883409d 100644 (file)
// Copyright 2009-2011 Gilles Boulianne
// Copyright 2016 Smart Action LLC (kkm)
+// Copyright 2017 Xiaohui Zhang
// See ../../COPYING for clarification regarding multiple authors
//
StateId dest;
Symbol sym = ngram.words.back();
float weight = -ngram.logprob;
+ if (sym == sub_eps_ || sym == 0) {
+ KALDI_ERR << " <eps> or disambiguation symbol " << sym << "found in the ARPA file. ";
+ }
if (sym == eos_symbol_) {
if (sub_eps_ == 0) {
// Keep </s> as a real symbol when not substituting.