index 14378aa374c5b5ba142fffe8e659e1317f955aee..c854b077d006e2b8c85a3465a10a80c18883409d 100644 (file)
// Copyright 2009-2011 Gilles Boulianne
// Copyright 2016 Smart Action LLC (kkm)
+// Copyright 2017 Xiaohui Zhang
// See ../../COPYING for clarification regarding multiple authors
//
StateId dest;
Symbol sym = ngram.words.back();
float weight = -ngram.logprob;
+ if (sym == sub_eps_ || sym == 0) {
+ KALDI_ERR << " <eps> or disambiguation symbol " << sym << "found in the ARPA file. ";
+ }
if (sym == eos_symbol_) {
if (sub_eps_ == 0) {
// Keep </s> as a real symbol when not substituting.
void ArpaLmCompiler::RemoveRedundantStates() {
fst::StdArc::Label backoff_symbol = sub_eps_;
+ if (backoff_symbol == 0) {
+ // The method of removing redundant states implemented in this function
+ // leads to slow determinization of L o G when people use the older style of
+ // usage of arpa2fst where the --disambig-symbol option was not specified.
+ // The issue seems to be that it creates a non-deterministic FST, while G is
+ // supposed to be deterministic. By 'return'ing below, we just disable this
+ // method if people were using an older script. This method isn't really
+ // that consequential anyway, and people will move to the newer-style
+ // scripts (see current utils/format_lm.sh), so this isn't much of a
+ // problem.
+ return;
+ }
+
fst::StdArc::StateId num_states = fst_.NumStates();
+
+
// replace the #0 symbols on the input of arcs out of redundant states (states
// that are not final and have only a backoff arc leaving them), with <eps>.
- if (backoff_symbol != 0) {
- for (fst::StdArc::StateId state = 0; state < num_states; state++) {
- if (fst_.NumArcs(state) == 1 && fst_.Final(state) == fst::TropicalWeight::Zero()) {
- fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
- fst::StdArc arc = iter.Value();
- if (arc.ilabel == backoff_symbol) {
- arc.ilabel = 0;
- iter.SetValue(arc);
- }
+ for (fst::StdArc::StateId state = 0; state < num_states; state++) {
+ if (fst_.NumArcs(state) == 1 && fst_.Final(state) == fst::TropicalWeight::Zero()) {
+ fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
+ fst::StdArc arc = iter.Value();
+ if (arc.ilabel == backoff_symbol) {
+ arc.ilabel = 0;
+ iter.SetValue(arc);
}
}
}
+
// we could call fst::RemoveEps, and it would have the same effect in normal
// cases, where backoff_symbol != 0 and there are no epsilons in unexpected
// places, but RemoveEpsLocal is a bit safer in case something weird is going