summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 5eaa0c2)
raw | patch | inline | side by side (parent: 5eaa0c2)
author | lnetsch <lnetsch@localhost> | |
Thu, 16 Dec 2010 17:17:36 +0000 (17:17 +0000) | ||
committer | lnetsch <lnetsch@localhost> | |
Thu, 16 Dec 2010 17:17:36 +0000 (17:17 +0000) |
25 files changed:
index 81de8f40dfabef4ec54f1b3ce5b53998172da371..233b644c3463bc66860322478ccb5a843dbc609f 100755 (executable)
const short CM_NBEST_LOW_CONF = -50;
/* the confidence score for those utterances have too many N-bests */
-#define CM_SIZE 900
+#define CM_SIZE 1000
typedef struct{
short base_mem[CM_SIZE];
ushort mem_count ;
index 875d101436cc235456d9ae2841766e96ac3936b0..079ec33561c1ea2f2fca7a93f70cfce8d776d135 100755 (executable)
/* initialize confidence measure for one utterance */
void cm_init(gmhmm_type * gv);
-/* load parameters of confidence measure computation */
+/*-----------------------------------------------------------------------------
+ cm_load
+
+ Load statistical and update parameters for the confidence measure if the
+ JAC state file pointer exists. Otherwise initialize the parameters to default
+ values.
+
+ Arguments:
+ fp: File pointer to open JAC state file or NULL
+ gv: Pointer to ASR instance structure
+
+--------------------------------- */
TIESRENGINECOREAPI_API void cm_load(FILE*fp, gmhmm_type *gv);
/* save updated parameter for confidence measure computation */
index bbf2aac617e9e38e1757df45ab722a7737d51d2d..10a6601a5a99c1f2828d67aa76a024120d559c50 100755 (executable)
#include "jac_one_user.h"
-#ifdef USE_CONFIDENCE
+//#ifdef USE_CONFIDENCE
#include "confidence_user.h"
-#endif
+//#endif
#if defined (WIN32) || defined (WINCE)
//#endif
//#endif
-#ifdef REALTIMEGUARD
+//#ifdef REALTIMEGUARD
tiesr->getRealTimeGuardTh=GetRealTimeGuardTh;
tiesr->getGetRealTGuard=GetRealTGuard;
tiesr->setRealTimeGuardTh=SetRealTimeGuardTh;
tiesr->setGetRealTGuard=SetRealTGuard;
-#endif
+//#endif
// Defines should not change the compatibility of
// TIesr processing that is optional
tiesr->SetTIesrJAC = SetTIesrJAC;
tiesr->SetTIesrSBC = SetTIesrSBC;
-#ifdef USE_AUDIO
+//#ifdef USE_AUDIO
tiesr->SetTIesrVolRange = SetTIesrVolRange;
-#endif
+//#endif
tiesr->GetTIesrPrune = GetTIesrPrune;
tiesr->GetTIesrTransiWeight = GetTIesrTransiWeight;
tiesr->GetTIesrJacRate = GetTIesrJacRate;
tiesr->GetTIesrJAC = GetTIesrJAC;
tiesr->GetTIesrSBC = GetTIesrSBC;
-#ifdef USE_AUDIO
+//#ifdef USE_AUDIO
tiesr->GetTIesrVolRange = GetTIesrVolRange;
-#endif
+//#endif
#ifdef _MONOPHONE_NET
tiesr->GetBestWordStartAt = GetBestWordStartAt;
index e85e053c285b86777c96e236055ee7fa62062e2d..094336a92ac1bdd4628471fca12cc0b7e29ffe57 100644 (file)
======================================================================*/
-
-/*
-** functions to build a frame-based (live-mode) recognizer.
-*/
-
-// #include "use_si_dll.h"
-
#include "tiesr_config.h"
#include "status.h"
#include "winlen.h"
#include "gmhmm_type.h"
-/* initialization of ASR:
-** The function loads speech HMM and sentence network. Both of them are
-** stored in the directory (folder) specified by the argument.
-*/
+/*---------------------------------------------------------------------
+ OpenASR
+
+ Initialize the ASR. Load speech HMM and sentence network,
+ initializes parameters, and allocates memory structures.
+
+ Arguments:
+ path: Directory containing HMM models and grammar network for recognition
+ total_words: Number of shorts in memoryPool
+ memoryPool: Pointer to memory, the beginning of which is the gmhmm_type struct.
+-----------------------------*/
+TIesrEngineStatusType OpenASR(char *path, int total_words, gmhmm_type *memoryPool);
+/*-------------------------------------------------------------------
+ CloseASR
-/* GMHMM_SI_API */
-/*TIesrEngineStatusType OpenASR(char [], int, void *);*/
-TIesrEngineStatusType OpenASR(char[] , int total_words, gmhmm_type *memoryPool);
+ Deallocate memory assigned to models and processing blocks and close the ASR.
-/* close ASR: */
-/* GMHMM_SI_API */
-/*void CloseASR(void *);*/
-void CloseASR(gmhmm_type *);
+ Arguments:
+ memoryPool: Pointer to memory pool starting with the gmhmm_type struct.
+ ------------------------------*/
+void CloseASR(gmhmm_type * memoryPool );
-/* initialization:
-** Initialize functions and variables (e.g. utterance detection, frame counter)
-** for recognizing the next utterance.
-** RETURN: location for audio buffer
-*/
-/* GMHMM_SI_API */
-/* Now part of CORE API */
-/*TIesrEngineStatusType
-OpenSearchEngine(void *);
-*/
-/* search one frame:
-** Perform search (recognition) function for one frame, given in the argument.
-*/
-/* GMHMM_SI_API */
-/*TIesrEngineStatusType CallSearchEngine(short [], void *);*/
-TIesrEngineStatusType CallSearchEngine(short [], gmhmm_type *, unsigned int);
-
-/* back tracking search space:
-** Find the best sequence of word index.
-*/
-/* GMHMM_SI_API */
-/*TIesrEngineStatusType CloseSearchEngine(TIesrEngineStatusType, void *);*/
-TIesrEngineStatusType CloseSearchEngine(TIesrEngineStatusType, gmhmm_type *);
-
-/* speech activity detected:
-*/
-/* GMHMM_SI_API */
-/* Now part of CORE API
-short SpeechDetected(void *);
-*/
-
-/* utterance end detected:
-** Indicate (using utterance detection information) if the utterance has ended.
-*/
-/* GMHMM_SI_API */
-/* Now part of CORE API
-short SpeechEnded(void *);
-*/
-
-/*
-** the following functions access search related quantities
-*/
-
-/* frames fed into search engine, used in search.c */
-/* GMHMM_SI_API */
-/* Now part of CORE API
-short GetFrameCount(void *);
-*/
-
-/* GMHMM_SI_API */
-/* Now part of CORE API
-long GetTotalScore(void *);
-*/
-
-/* nbr of times the whole model set has been compensated */
-/* GMHMM_SI_API */
-/* Now part of CORE API
-unsigned short GetCycleCount(void *);
-*/
-
-/* index of words in the recognized string, in reversed time order */
-/* GMHMM_SI_API */
-/* Now part of CORE API
- unsigned short *GetAnswerIndex(void *);
-*/
-
-/* i-th word in the recognized string */
-/* GMHMM_SI_API */
-/* Now part of CORE API
-const char *GetAnswerWord(unsigned short, void *);
-*/
-
-/* number of words in the recognized string */
-/* GMHMM_SI_API */
-/* Now part of CORE API
-unsigned short GetAnswerCount(void const *) ;
-*/
-
-/* actual search space
- */
-/*unsigned short GetSearchMemorySize(void *);*/
-unsigned short GetSearchMemorySize(gmhmm_type *);
-
-
-/* Maximum number of MFCC frames */
-unsigned short GetMaxFrames(gmhmm_type *gv );
+/*---------------------------------------------------------------------------
+ CallSearchEngine
+
+ Perform recognition search for one frame.
+
+ Arguments:
+ sig_frm: A frame of speech data
+ reco: Pointer to the ASR structure holding recognition state information
+ aFramesQueued: Number of frames available for searching, used to adjust
+ search pruning.
+
+-------------------------------------*/
+TIesrEngineStatusType CallSearchEngine(short sig_frm[], gmhmm_type * reco,
+ unsigned int aFramesQueued );
+
+
+/*-----------------------------------------------------------------------
+ CloseSearchEngine
+
+ Back track the search space to determine the recognition result and the
+ associated best sequence word indices, if a recognition result exists. The
+ search may not include a grammar network stop symbol with a non-zero
+ probability, in which case there is no recognition result.
-//void GetSearchInfo(void *);
-
-/* indication of the volume setting */
-/* GMHMM_SI_API */
-/* Now part of CORE API
-short GetVolumeStatus(void *);
-*/
-
-/* GMHMM_SI_API */
-/* Now all of this is part of COREAPI */
-/*void SetTIesrPrune(void *data, short);*/
-/* GMHMM_SI_API */
-/*void GetTIesrPrune(void *data, short *);*/
-/* GMHMM_SI_API */
-/* void SetTIesrTransiWeight(void *data, short); */
-/* GMHMM_SI_API */
-/*void GetTIesrTransiWeight(void *data, short*);*/
-
-
-/*
-** reset SAD control parameters
-*/
-/*
-Th_SPEECH_DELTA = 10 // larger -> more robust
-Th_MIN_SPEECH_DB = 50 // min dB level to be considered as speech
-Th_MIN_BEG_FRM = 5 // larger -> more robust to noise pulses
-Th_MIN_END_FRM = 30 // smaller (.e.g. 30) --> quicker latency time
-Th_NOISE_FLR = 0 // larger -> more robust
-*/
-/* GMHMM_SI_API */
-/* Now part of CORE API */
-/*void SetTIesrSAD(void *, short, short, short, short, short);*/
-/* GMHMM_SI_API */
-/*void GetTIesrSAD(void *, short*, short*, short*, short*, short*);*/
-
-/*
-** set the number of mean vectors to adapt during each frame
-** PC: 300, DSP 75, Compal 45
-*/
-
-/* GMHMM_SI_API */
-/* Now in jac_one_user.h */
-/*void SetTIesrJacRate(void *, short);*/
-/*void SetTIesrJacRate(gmhmm_type *, short);*/
-/* GMHMM_SI_API */
-/*void GetTIesrJacRate(void *, short*);*/
-/*void GetTIesrJacRate(gmhmm_type *, short*);*/
-
-/* check audio volume range (typical: [1200, 25000]) */
-
-/* GMHMM_SI_API */
-/* Now part of CORE API */
-/* void SetTIesrVolRange(void *, unsigned short, unsigned short);*/
-/* GMHMM_SI_API */
-/*void GetTIesrVolRange(void *, unsigned short*, unsigned short*);*/
+ Arguments:
+ status: Status of recognizer search
+ reco: Pointer to the ASR structure holding recognition state information
+------------------------------------*/
+TIesrEngineStatusType CloseSearchEngine(TIesrEngineStatusType status,
+ gmhmm_type * reco );
+
+
+
+/*---------------------------------------------------------------------------
+ GetSearchMemorySize
+
+ Get the search space memory utilized for this utterance.
+
+ Arguments:
+ reco: Pointer to the ASR structure holding recognition state information
+
+-----------------------------------*/
+unsigned short GetSearchMemorySize(gmhmm_type *reco);
+
+
+/*----------------------------------------------------------------------------
+ GetMaxFrames
+
+ Get the maximum number of frames of speech to process with the search
+ engine. This does not include initial non-speech frames.
+
+ Arguments:
+ reco: Pointer to the ASR structure holding recognition state information
+ ----------------------------------*/
+unsigned short GetMaxFrames(gmhmm_type *gv );
index 035641e5e9ef060f5e3ae81cc3399c626c8e305e..a4537dd28bfe291554d7742d80d570258bb399af 100755 (executable)
/*for realtime guard*/
unsigned int FramesQueued;
unsigned int prev_FramesQueued;
-
+
+#ifdef REALTIMEGUARD
short guard;/*will be reset when queue reduce, but will increase when queue increase at a high level*/
short realtime_th; // frames that trigger more pruning as realtime guard, 50 is one second.
short realt_guard;
-
+#endif
} gmhmm_type;
index 148b5ae52d0ec441e4f6200e5327cafd643110cb..13d9e966df154e45135f8c3f779d2898b78a3c6d 100755 (executable)
/* GMHMM_SI_API */
TIESRENGINECOREAPI_API short GetConfidenceScore(const gmhmm_type *gvv)
{
- return (gvv)->cm_score ;
+#ifdef USE_CONFIDENCE
+ return (gvv )->cm_score;
+#else
+ return 0;
+#endif
}
/*--------------------------------*/
gvv->sbc_fgt = sbc_fgt;
}
-#ifdef REALTIMEGUARD
-// int realtime_th;
-// int realt_guard;
- TIESRENGINECOREAPI_API void GetRealTimeGuardTh(short* th, gmhmm_type * gv)
- { *th = gv->realtime_th ;
- }
-
- TIESRENGINECOREAPI_API void GetRealTGuard(short* guard, gmhmm_type * gv)
- { *guard=gv->realt_guard;
- }
+TIESRENGINECOREAPI_API void GetRealTimeGuardTh( short* th, gmhmm_type * gv )
+{
+#ifdef REALTIMEGUARD
+ *th = gv->realtime_th;
+#endif
+}
- TIESRENGINECOREAPI_API void SetRealTimeGuardTh(short th, gmhmm_type * gv)
- { gv->realtime_th= th;
- }
+TIESRENGINECOREAPI_API void GetRealTGuard( short* guard, gmhmm_type * gv )
+{
+#ifdef REALTIMEGUARD
+ *guard = gv->realt_guard;
+#endif
+}
- TIESRENGINECOREAPI_API void SetRealTGuard(short guard, gmhmm_type * gv)
- { gv->realt_guard=guard;
- }
+TIESRENGINECOREAPI_API void SetRealTimeGuardTh( short th, gmhmm_type * gv )
+{
+#ifdef REALTIMEGUARD
+ gv->realtime_th = th;
+#endif
+}
+TIESRENGINECOREAPI_API void SetRealTGuard( short guard, gmhmm_type * gv )
+{
+#ifdef REALTIMEGUARD
+ gv->realt_guard = guard;
#endif
+}
-/*--------------------------------
+
+
+/*---------------------------------------------------------------------------
OpenSearchEngine
This function initializes the gmhmm_type object parameters and variables
- in preparation for speech recognition.
+ in preparation for speech recognition of an utterance.
+
+ Arguments:
+ gvv: Pointer to the ASR instance structure.
+
--------------------------------*/
-/* GMHMM_SI_API */
TIesrEngineStatusType OpenSearchEngine(gmhmm_type *gvv)
{
gvv->signal_cnt = 0;
index d54d9dd9cfdc6f3ab604277f18225e554cc1f9ce..0ddce04be77ae15fac560ddd0691a8cb0d23450e 100755 (executable)
#endif
-#ifdef REALTIMEGUARD
+//#ifdef REALTIMEGUARD
TIESRENGINECOREAPI_API void GetRealTimeGuardTh(short* th, gmhmm_type * gvv);
TIESRENGINECOREAPI_API void GetRealTGuard(short* time, gmhmm_type * gvv);
TIESRENGINECOREAPI_API void SetRealTimeGuardTh(short th, gmhmm_type * gvv);
TIESRENGINECOREAPI_API void SetRealTGuard(short th, gmhmm_type * gvv);
-#endif
+//#endif
#ifdef USE_NBEST
index 765c5bf0691fd38b564eceeb12f503b170c48f38..c5642534ecf07924e1ac1adc161cbb8bfd2cb5b2 100755 (executable)
return bSucc;
}
-/*--------------------------------*/
-/* GMHMM_SI_API */
+/*-----------------------------------------------------------------------------
+ JAC_clear
+
+ Reinitialize JAC state parameter estimates and update accumulators. This will
+ be done if no JAC state file exists, or there was an error in attempting to
+ load the parameters.
+
+ Arguments:
+ gv: Pointer to ASR instance structure
+ aModelDir: Directory holding recognition models and grammar network
+ aTreeFile: Full name of file containing phontic regression tree,
+
+ ------------------------------------*/
TIesrEngineJACStatusType JAC_clear( gmhmm_type *gv,
const char* aModelDir,
const char* aTreeFile )
#endif
#ifdef USE_SBC
- /* Try to initialize SBC structure */
+ /* Try to initialize SBC phonetic regression tree structure, and statistics
+ accumulators */
if (gv->comp_type & SBC)
hStatus = sbc_init_tree( gv->sbcState, aModelDir,
aTreeFile, FALSE,
gv->sbcState->pHMM2phone = NULL;
}
+ /* Load set-indpendent HMM to phone mapping */
hStatus = sbc_load_hmm2phone(aModelDir, gv->trans->n_hmm_set,
gv->sbcState, TRUE);
/*----------------------------------------------------------------
- MP_JAC_load
+ MP_JAC_load
+
+ Read channel bias estimate and update paramaeters from JAC state file,
+ unless it does not exist, in which case bias is set to zero. Depending on
+ configuration, the file may also contain other adaptation estimates and
+ update parameters, including those for SVA, SBC, ORM, and CM.
+
+ Arguments:
+ bias_file: JAC state file
+ gv: Pointer to ASR instance structure
+ pNetDir: Directory holding grammar hmm and grammar network info
+ pPhoneticTreeFile: Full name of file containing phontic regression tree,
+ used for SBC
+ bTriphone: Boolean indicating to use hmm-to-phone map file
- Read bias from a file, unless !exist, in which case bias is set to
- zero. The file contains the channel and SVA information derived
- from past utterances.
----------------------------------------------------------------*/
static
TIesrEngineJACStatusType MP_JAC_load( const char *bias_file, gmhmm_type *gv,
#ifdef USE_WSVA
NssType* pNss = (NssType*) gv->pNss;
#endif
-
+
+ /* Open JAC state file */
FILE *pf = fopen(bias_file,"rb");
if( pf )
/* Initialization of SBC method */
#ifdef USE_SBC
- if (gv->comp_type & SBC){
- hStatus = sbc_init_tree( gv->sbcState, pNetDir, p_tree_file, FALSE,
- gv->nbr_dim,
- gv->trans->n_hmm_set , bTriphone);
-
- if( hStatus != eTIesrEngineHLRSuccess )
- {
- eStatus = eTIesrEngineJACSBCInitFail;
- goto Failure;
- }
-
- hStatus = sbc_load( gv->sbcState, pf);
+ if( gv->comp_type & SBC )
+ {
+ /* Initialize the hierarchical regression tree structure and
+ hmm-to-phone map from data files. */
+ hStatus = sbc_init_tree( gv->sbcState, pNetDir, p_tree_file, FALSE,
+ gv->nbr_dim,
+ gv->trans->n_hmm_set, bTriphone );
+
+ if( hStatus != eTIesrEngineHLRSuccess )
+ {
+ eStatus = eTIesrEngineJACSBCInitFail;
+ goto Failure;
+ }
+
+ /* Load tree regression statistics accumulators and biases */
+ hStatus = sbc_load( gv->sbcState, pf );
}
else
gv->sbcState->pHMM2phone = NULL;
}
+ /* Load 'gender' independent hmm to monophone mapping from data file.*/
hStatus = sbc_load_hmm2phone(pNetDir, gv->trans->n_hmm_set,
gv->sbcState, bTriphone);
}
#endif
#ifdef USE_ONLINE_REF
+ /* Load current ORM model adaptation data */
rj_load(gv, pf);
#endif
}
#ifdef USE_CONFIDENCE
+ /* Load confidence measure adaptation data */
cm_load(pf, gv);
#endif
- /* Some failure in the channel estimate file or SBC. Clear out
- all accumulators and channel estimates, and start afresh */
+ /* Check for failure in the JAC state file or SBC. If JAC state file
+ does not exist, or there was a load failure, then reinitialize the
+ JAC state accumulators and channel estimates, and start afresh */
Failure:
if (pf) fclose( pf );
- /* Clear JAC channel estimates, SVA estimates,
- and accumulators */
+
eStatusClear = eTIesrEngineJACSuccess;
- /*
- return the error status will results in Recognizer failure.
- Instead, reset the recognizer, so comment the error status */
+
+ /* Set NULL file pointer if loading failed */
if (eStatus == eTIesrEngineJACLoadFail) pf = NULL;
-
+
+ /* Reinitialize JAC if no JAC state file or load fail */
if (eStatus != eTIesrEngineJACSuccess || pf == NULL)
eStatusClear = JAC_clear( gv, pNetDir, p_tree_file );
+ /* Return status of attempt to load. If JAC was reset then recognition
+ can continue, but restarting JAC parameter estimation. */
if( eStatusClear != eTIesrEngineJACSuccess )
return eStatusClear;
else{
}
}
-/* GMHMM_SI_API */
+
+/*-----------------------------------------------------------------------------
+ JAC_load
+
+ Load the prior jac channel estimate, global accumulator, and
+ sva accumulator values so that jac estimation can continue from
+ prior utterances.
+
+ Arguments:
+ bias_file: JAC state file
+ gv: Pointer to ASR instance structure
+ pNetDir: Directory holding grammar hmm and grammar network info
+ pPhoneticTreeFile: Full name of file containing hierarchical regression tree,
+ used for SBC
+
+ --------------------------------*/
+
TIesrEngineJACStatusType JAC_load( const char *bias_file, gmhmm_type *gv,
const char *pNetDir, const char* p_tree_file)
{
eStatus = MP_JAC_load( bias_file, gvv, pNetDir, p_tree_file, FALSE);
if (eStatus != eTIesrEngineJACSuccess) return eStatus;
#endif
+ /* Load JAC state file, with hmm-to-phone map from data file. */
eStatus = MP_JAC_load( bias_file, gv, pNetDir, p_tree_file, TRUE);
return eStatus;
}
gvv->word_backtrace=HMMFA;
/* freeze background scores obtained in the first pass */
- cm_freeze((ConfType*)gv->pConf);
+#ifdef USE_CONFIDENCE
+ cm_freeze( (ConfType*) gv->pConf );
+#endif
if (jac_force_alignment(gv) != eTIesrEngineSuccess){
gvv->word_backtrace=WORDBT;
index b39b6e5e8a802fd53802df7a4419dcb305047ce2..5f83e2dccdd013ee72df92b3356fd1931ef8f2a3 100644 (file)
/* for debug of JAC */
//#define _JAC_DEBUG 1
-/*--------------------------------
+/*-----------------------------------------------------------------------------
JAC_default_params
Initialize the default JAC and SVA parameters used for adaptation of
models to present noise and channel conditions. These may
subsequently changed by the API.
+
+ Arguments:
+
+ gv: Pointer to the ASR structure holding recognition state information
--------------------------------*/
-/* GMHMM_SI_API */
TIesrEngineJACStatusType JAC_set_default_params( gmhmm_type *gv );
-/*--------------------------------
- JAC_load
+/*-----------------------------------------------------------------------------
+ JAC_load
- Load the prior jac channel estimate, global accumulator, and
- sva accumulator values so that jac estimation can continue from
- prior utterances.
+ Load the prior jac channel estimate, global accumulator, and
+ sva accumulator values so that jac estimation can continue from
+ prior utterances.
+
+ Arguments:
+ bias_file: JAC state information file
+ gv: Pointer to the ASR structure holding recognition state information
+ pNetDir: Directory holding grammar hmm and grammar network info
+ pPhoneticTreeFile: Full name of file containing hierarchical regression tree
--------------------------------*/
-/* GMHMM_SI_API */
-/*TIesrEngineJACStatusType JAC_load (const char *bias_file, void *g);*/
TIesrEngineJACStatusType JAC_load (const char *bias_file, gmhmm_type *gv,
const char* pNetDir, const char* pPhoneticTreeFile);
-/*--------------------------------
+/*-----------------------------------------------------------------------------
JAC_save
Save the jac channel estimate, global accumulators and sva accumulators
to a file, so that jac estimation can continue in the future from
the present accumulated values.
+
+ Arguments:
+ bias_file: JAC state information file
+ gv: Pointer to the ASR structure holding recognition state information
--------------------------------*/
-/* GMHMM_SI_API */
TIesrEngineJACStatusType JAC_save(const char *bias_file, gmhmm_type *gv );
-/*--------------------------------
+/*-----------------------------------------------------------------------------
JAC_clear
Clear the jac channel estimate, global accumulators, and sva accumulators
so that jac estimation starts afresh with the next utterance.
+
+ Arguments:
+ gv: Pointer to the ASR structure holding recognition state information
+ aModelDir: Directory containing current HMM model and grammar network files
+ aTreeFile: File containing regression tree for SBC.
--------------------------------*/
/* GMHMM_SI_API */
TIesrEngineJACStatusType JAC_clear(gmhmm_type *gv,
const char* aTreeFile );
-/*--------------------------------
+/*-----------------------------------------------------------------------------
JAC_update
Update the jac channel, global accumulators, and sva accumulators
with the results of the presently recognized utterance. The updated
jac channel and sva corrections can be used for recognition of
subsequent utterances.
+
+ Arguments:
+ gv: Pointer to the ASR structure holding recognition state information
--------------------------------*/
/* GMHMM_SI_API */
TIesrEngineJACStatusType JAC_update(gmhmm_type *gv);
index 10b192f98f47cebb6114f505eaa4d6d8a226639e..fba7295e89fc5f60223da33ef0229d13b9e22651 100755 (executable)
@@ -241,11 +241,17 @@ TIesrEngineJACStatusType per_frame_jac_compensate(short mfcc_buf[], gmhmm_type *
for (i = 0; i < gv->n_mu ; i++) {
#else
int j;
- j=gv->jac_vec_per_frame ;
-
- if (gv->speech_detected ==1 && gv->nbr_cpy ==0 )
+
+ /* Number of Gaussian mean vectors to update this frame. It does not need
+ to exceed the number of vectors available. */
+ j = (gv->jac_vec_per_frame > gv->n_mu ) ? gv->n_mu : gv->jac_vec_per_frame;
+
+ /* If speech detected and not all Gaussian mean vectors have been
+ compensated, then compensate all remaining uncompensated mean vectors. */
+ if ( gv->speech_detected == 1 && gv->nbr_cpy == 0 )
{
- j=gv->n_mu - gv->index_mean; j=j>0? j:0;
+ j = gv->n_mu - gv->index_mean;
+ j = j>0 ? j : 0;
}
/*one have to wait for 10 frame before any JAC is done, unless speech has been detected*/
index 96c7ad6fb48236ca1bdb4fd6587ef5bae4430f2e..0435ba31c0e04bd0ec318aca7b91d85d404505ce 100755 (executable)
#endif
-/* for multi-pass decoding */
+/*---------------------------------------------------------------------------
+ MP_CallEachPass
+
+ Process one frame of data through the ASR search engine.
+
+ Arguments:
+ sig_frm: The frame of audio data
+ gvv: The ASR instance structure
+ bDoSearch: Boolean specifying to do search
+
+ ----------------------------------*/
static TIesrEngineStatusType
MP_CallEachPass(short sig_frm[], gmhmm_type * gvv, Boolean bDoSearch)
{
gmhmm_type *gv = (gmhmm_type *) gvv;
TIesrEngineStatusType a_status = eTIesrEngineSuccess;
- short power_spectrum[ WINDOW_LEN ]; /* 1/2 is NOT enough! (in autocorreletion) */
+
+ /* Need more than half window for for autocorreletion */
+ short power_spectrum[ WINDOW_LEN ];
+
short idx, i, j, log_mel_energy[ N_FILTER26 ], noise_start_time;
long tmp;
ushort idx_noise;
#else
gvv->word_backtrace=HMMBT;
#endif
- /* do nothing but shifting input buffer */
+ /* do nothing but shifting input buffer for the first frame, since usually
+ input frame size = FRAME_LEN < WINDOW_LEN */
if (gv->pred_first_frame) {
/* overlap */
for (i = 0; i < OVERLAP; i++) gv->sample_signal[ i ] = sig_frm[ i + (FRAME_LEN - (OVERLAP)) ];
return eTIesrEngineSuccess;
}
- /* pack the input data into proper location */
+
+ /* pack the input frame data into the samaple buffer */
for (i = 0; i < FRAME_LEN; i++) gv->sample_signal[ i + OVERLAP] = sig_frm[ i ];
/* put into circular buffer, for regression computation */
return a_status;
}
-/*
-** recognize one utterance, live
-** return search status:
-** compute regression and search
-*/
+/*---------------------------------------------------------------------------
+ CallSearchEngine
+
+ Perform recognition search for one frame.
+
+ Arguments:
+ sig_frm: A frame of speech data
+ reco: The ASR structure holding recognition state information
+ aFramesQueued: Number of frames available for searching, used to adjust
+ search pruning.
-/* GMHMM_SI_API */
+-------------------------------------*/
TIesrEngineStatusType CallSearchEngine(short sig_frm[], gmhmm_type *gvv,
unsigned int aFramesQueued )
{
index 2a79cee54f568d7030b4e80f95029a2390bd34d0..271e03ad0568c8e906afaca9faab043fc5b791f1 100755 (executable)
@@ -1078,7 +1078,7 @@ TIESRENGINECOREAPI_API void dim_p2_init(const short nbr_mfcc, gmhmm_type *gvv)
gmhmm_type *gv = (gmhmm_type *)gvv;
int i;
- if (nbr_mfcc == MAX_DIM_MFCC16) {
+ if (nbr_mfcc == MAX_DIM_MFCC16 || nbr_mfcc == MAX_DIM_MFCC13 ) {
// printf("Using MAX_DIM_MFCC16 x N_FILTER26\n");
gv->n_filter = N_FILTER26 ;
for (i=0; i<MAX_DIM_MFCC16; i++) gv->cosxfm[ i ] = cosxfm26[ i ];
index 5bed7b8d717eba7e75f276fd33c4d596be8d5064..62c4ec632f1cef37681952c844efd7ec9c117a0d 100644 (file)
--- a/TIesrEngine/src/mfcc_f.h
+++ b/TIesrEngine/src/mfcc_f.h
#include "tiesrcommonmacros.h"
#include "mfcc_f_user.h"
-/*--------------------------------*/
-/* These are now in mfcc_f_def_struct_user.h */
-// size of buffer holding 2-byte audio samples that user must allocate
-
-/* window len = FFT len */
-/*
-#define WINDOW_LEN 256
-*/
-
-/* overlap between two frames */
-/*
-#define OVERLAP (WINDOW_LEN - FRAME_LEN)
-*/
-
-/*
-#define N_FILTER20 20
-*/
-
-/*
-#define N_FILTER26 26
-*/
-
-/* maximum number of mfcc */
-/*
-#define MAX_DIM_MFCC10 10
-*/
-
-/* maximum number of mfcc */
-/*
-#define MAX_DIM_MFCC16 16
-*/
-/* regression span */
-/*
-#define REG_SPAN 2
-*/
-
-/* #define MFCC_BUF_SZ (2 * REG_SPAN + 1) */
-/*
-#define MFCC_BUF_SZ (2 * REG_SPAN + 10)
-*/
/*--------------------------------*/
/* Local constants for mfcc processing */
#endif
-#define SAM_FREQ 8000
#define CUT_FREQ 1000
#define LOGPOLYORD 8
{ 20, 12667, 238}, { 20, 14466, 59}
};
*/
+
+
+#if (WINDOW_LEN == 512 && SAM_FREQ == 24000 )
+/* This filter bank is based on 512 sample FFT and linear filters
+ in the first 1kHz, and a 12kHz bandwidth. */
+static const mel_filter_type mel_filter26[ WINDOW_LEN / 2 ] =
+{
+ { 0, 0, 16384 }, { 0, 10752, 5632 }, { 1, 5120, 11264 },
+ { 1, 15872, 512 }, { 2, 10240, 6144 }, { 3, 4608, 11776 },
+ { 3, 15360, 1024 }, { 4, 9728, 6656 }, { 5, 4096, 12288 },
+ { 5, 14848, 1536 }, { 6, 9216, 7168 }, { 7, 3584, 12800 },
+ { 7, 14336, 2048 }, { 8, 8704, 7680 }, { 9, 3072, 13312 },
+ { 9, 13824, 2560 }, { 10, 8192, 8192 }, { 11, 2560, 13824 },
+ { 11, 13312, 3072 }, { 12, 7680, 8704 }, { 13, 2048, 14336 },
+ { 13, 12800, 3584 }, { 14, 2431, 13953 }, { 14, 6077, 10307 },
+ { 14, 9723, 6661 }, { 14, 13369, 3015 }, { 15, 521, 15863 },
+ { 15, 3533, 12851 }, { 15, 6545, 9839 }, { 15, 9556, 6828 },
+ { 15, 12568, 3816 }, { 15, 15580, 804 }, { 16, 1823, 14561 },
+ { 16, 4311, 12073 }, { 16, 6799, 9585 }, { 16, 9287, 7097 },
+ { 16, 11774, 4610 }, { 16, 14262, 2122 }, { 17, 302, 16082 },
+ { 17, 2357, 14027 }, { 17, 4412, 11972 }, { 17, 6467, 9917 },
+ { 17, 8522, 7862 }, { 17, 10576, 5808 }, { 17, 12631, 3753 },
+ { 17, 14686, 1698 }, { 18, 295, 16089 }, { 18, 1992, 14392 },
+ { 18, 3690, 12694 }, { 18, 5387, 10997 }, { 18, 7084, 9300 },
+ { 18, 8782, 7602 }, { 18, 10479, 5905 }, { 18, 12176, 4208 },
+ { 18, 13874, 2510 }, { 18, 15571, 813 }, { 19, 730, 15654 },
+ { 19, 2133, 14251 }, { 19, 3535, 12849 }, { 19, 4937, 11447 },
+ { 19, 6339, 10045 }, { 19, 7741, 8643 }, { 19, 9143, 7241 },
+ { 19, 10545, 5839 }, { 19, 11947, 4437 }, { 19, 13349, 3035 },
+ { 19, 14751, 1633 }, { 19, 16153, 231 }, { 20, 967, 15417 },
+ { 20, 2125, 14259 }, { 20, 3283, 13101 }, { 20, 4441, 11943 },
+ { 20, 5599, 10785 }, { 20, 6758, 9626 }, { 20, 7916, 8468 },
+ { 20, 9074, 7310 }, { 20, 10232, 6152 }, { 20, 11390, 4994 },
+ { 20, 12548, 3836 }, { 20, 13706, 2678 }, { 20, 14864, 1520 },
+ { 20, 16022, 362 }, { 21, 658, 15726 }, { 21, 1614, 14770 },
+ { 21, 2571, 13813 }, { 21, 3528, 12856 }, { 21, 4484, 11900 },
+ { 21, 5441, 10943 }, { 21, 6397, 9987 }, { 21, 7354, 9030 },
+ { 21, 8311, 8073 }, { 21, 9267, 7117 }, { 21, 10224, 6160 },
+ { 21, 11180, 5204 }, { 21, 12137, 4247 }, { 21, 13094, 3290 },
+ { 21, 14050, 2334 }, { 21, 15007, 1377 }, { 21, 15963, 421 },
+ { 22, 443, 15941 }, { 22, 1233, 15151 }, { 22, 2023, 14361 },
+ { 22, 2813, 13571 }, { 22, 3603, 12781 }, { 22, 4394, 11990 },
+ { 22, 5184, 11200 }, { 22, 5974, 10410 }, { 22, 6764, 9620 },
+ { 22, 7554, 8830 }, { 22, 8344, 8040 }, { 22, 9134, 7250 },
+ { 22, 9925, 6459 }, { 22, 10715, 5669 }, { 22, 11505, 4879 },
+ { 22, 12295, 4089 }, { 22, 13085, 3299 }, { 22, 13875, 2509 },
+ { 22, 14666, 1718 }, { 22, 15456, 928 }, { 22, 16246, 138 },
+ { 23, 539, 15845 }, { 23, 1191, 15193 }, { 23, 1844, 14540 },
+ { 23, 2497, 13887 }, { 23, 3149, 13235 }, { 23, 3802, 12582 },
+ { 23, 4455, 11929 }, { 23, 5107, 11277 }, { 23, 5760, 10624 },
+ { 23, 6413, 9971 }, { 23, 7065, 9319 }, { 23, 7718, 8666 },
+ { 23, 8371, 8013 }, { 23, 9023, 7361 }, { 23, 9676, 6708 },
+ { 23, 10329, 6055 }, { 23, 10982, 5402 }, { 23, 11634, 4750 },
+ { 23, 12287, 4097 }, { 23, 12940, 3444 }, { 23, 13592, 2792 },
+ { 23, 14245, 2139 }, { 23, 14898, 1486 }, { 23, 15550, 834 },
+ { 23, 16203, 181 }, { 24, 390, 15994 }, { 24, 929, 15455 },
+ { 24, 1468, 14916 }, { 24, 2007, 14377 }, { 24, 2546, 13838 },
+ { 24, 3085, 13299 }, { 24, 3624, 12760 }, { 24, 4163, 12221 },
+ { 24, 4703, 11681 }, { 24, 5242, 11142 }, { 24, 5781, 10603 },
+ { 24, 6320, 10064 }, { 24, 6859, 9525 }, { 24, 7398, 8986 },
+ { 24, 7937, 8447 }, { 24, 8476, 7908 }, { 24, 9016, 7368 },
+ { 24, 9555, 6829 }, { 24, 10094, 6290 }, { 24, 10633, 5751 },
+ { 24, 11172, 5212 }, { 24, 11711, 4673 }, { 24, 12250, 4134 },
+ { 24, 12789, 3595 }, { 24, 13329, 3055 }, { 24, 13868, 2516 },
+ { 24, 14407, 1977 }, { 24, 14946, 1438 }, { 24, 15485, 899 },
+ { 24, 16024, 360 }, { 25, 148, 16236 }, { 25, 593, 15791 },
+ { 25, 1039, 15345 }, { 25, 1484, 14900 }, { 25, 1929, 14455 },
+ { 25, 2375, 14009 }, { 25, 2820, 13564 }, { 25, 3265, 13119 },
+ { 25, 3711, 12673 }, { 25, 4156, 12228 }, { 25, 4601, 11783 },
+ { 25, 5047, 11337 }, { 25, 5492, 10892 }, { 25, 5937, 10447 },
+ { 25, 6383, 10001 }, { 25, 6828, 9556 }, { 25, 7273, 9111 },
+ { 25, 7719, 8665 }, { 25, 8164, 8220 }, { 25, 8609, 7775 },
+ { 25, 9054, 7330 }, { 25, 9500, 6884 }, { 25, 9945, 6439 },
+ { 25, 10390, 5994 }, { 25, 10836, 5548 }, { 25, 11281, 5103 },
+ { 25, 11726, 4658 }, { 25, 12172, 4212 }, { 25, 12617, 3767 },
+ { 25, 13062, 3322 }, { 25, 13508, 2876 }, { 25, 13953, 2431 },
+ { 25, 14398, 1986 }, { 25, 14844, 1540 }, { 25, 15289, 1095 },
+ { 25, 15734, 650 }, { 25, 16180, 204 }, { 26, 199, 16185 },
+ { 26, 567, 15817 }, { 26, 935, 15449 }, { 26, 1303, 15081 },
+ { 26, 1670, 14714 }, { 26, 2038, 14346 }, { 26, 2406, 13978 },
+ { 26, 2774, 13610 }, { 26, 3142, 13242 }, { 26, 3510, 12874 },
+ { 26, 3877, 12507 }, { 26, 4245, 12139 }, { 26, 4613, 11771 },
+ { 26, 4981, 11403 }, { 26, 5349, 11035 }, { 26, 5717, 10667 },
+ { 26, 6084, 10300 }, { 26, 6452, 9932 }, { 26, 6820, 9564 },
+ { 26, 7188, 9196 }, { 26, 7556, 8828 }, { 26, 7924, 8460 },
+ { 26, 8292, 8092 }, { 26, 8659, 7725 }, { 26, 9027, 7357 },
+ { 26, 9395, 6989 }, { 26, 9763, 6621 }, { 26, 10131, 6253 },
+ { 26, 10499, 5885 }, { 26, 10866, 5518 }, { 26, 11234, 5150 },
+ { 26, 11602, 4782 }, { 26, 11970, 4414 }, { 26, 12338, 4046 },
+ { 26, 12706, 3678 }, { 26, 13073, 3311 }, { 26, 13441, 2943 },
+ { 26, 13809, 2575 }, { 26, 14177, 2207 }, { 26, 14545, 1839 },
+ { 26, 14913, 1471 }, { 26, 15280, 1104 }, { 26, 15648, 736 },
+ { 26, 16016, 368 }
+};
+
+#elif (WINDOW_LEN == 256 && SAM_FREQ == 8000 )
+
/* filter with 26 frequency bins (to be used with 16 dimension mfcc) : */
static const mel_filter_type mel_filter26[ WINDOW_LEN / 2 ] = {
{ 26, 13853, 2530 },{ 26, 15118, 1265 }
};
+#endif
+
/*
{ 0, 0, 16384 },{ 0, 3136, 5184 },{ 0, 12544, 256 },
};
+#if (WINDOW_LEN == 256 )
const short w_real[ WINDOW_LEN / 2 ] = /* for FFT */
{
2845, 2785, 2735, 2694, 2662, 2639, 2625, 2621
};
+#elif ( WINDOW_LEN == 512 )
+const short w_real[ WINDOW_LEN / 2 ] =
+{ 32767, 32765, 32757, 32745, 32728, 32705, 32678, 32646, 32609, 32567,
+ 32521, 32469, 32412, 32351, 32285, 32213, 32137, 32057, 31971, 31880,
+ 31785, 31685, 31580, 31470, 31356, 31237, 31113, 30985, 30852, 30714,
+ 30571, 30424, 30273, 30117, 29956, 29791, 29621, 29447, 29268, 29085,
+ 28898, 28706, 28510, 28310, 28105, 27896, 27683, 27466, 27245, 27019,
+ 26790, 26556, 26319, 26077, 25832, 25582, 25329, 25072, 24811, 24547,
+ 24279, 24007, 23731, 23452, 23170, 22884, 22594, 22301, 22005, 21705,
+ 21403, 21096, 20787, 20475, 20159, 19841, 19519, 19195, 18868, 18537,
+ 18204, 17869, 17530, 17189, 16846, 16499, 16151, 15800, 15446, 15090,
+ 14732, 14372, 14010, 13645, 13279, 12910, 12539, 12167, 11793, 11417,
+ 11039, 10659, 10278, 9896, 9512, 9126, 8739, 8351, 7962, 7571,
+ 7179, 6786, 6393, 5998, 5602, 5205, 4808, 4410, 4011, 3612,
+ 3212, 2811, 2410, 2009, 1608, 1206, 804, 402, 0, -402,
+ -804, -1206, -1608, -2009, -2410, -2811, -3212, -3612, -4011, -4410,
+ -4808, -5205, -5602, -5998, -6393, -6786, -7179, -7571, -7962, -8351,
+ -8739, -9126, -9512, -9896, -10278, -10659, -11039, -11417, -11793, -12167,
+ -12539, -12910, -13279, -13645, -14010, -14372, -14732, -15090, -15446, -15800,
+ -16151, -16499, -16846, -17189, -17530, -17869, -18204, -18537, -18868, -19195,
+ -19519, -19841, -20159, -20475, -20787, -21096, -21403, -21705, -22005, -22301,
+ -22594, -22884, -23170, -23452, -23731, -24007, -24279, -24547, -24811, -25072,
+ -25329, -25582, -25832, -26077, -26319, -26556, -26790, -27019, -27245, -27466,
+ -27683, -27896, -28105, -28310, -28510, -28706, -28898, -29085, -29268, -29447,
+ -29621, -29791, -29956, -30117, -30273, -30424, -30571, -30714, -30852, -30985,
+ -31113, -31237, -31356, -31470, -31580, -31685, -31785, -31880, -31971, -32057,
+ -32137, -32213, -32285, -32351, -32412, -32469, -32521, -32567, -32609, -32646,
+ -32678, -32705, -32728, -32745, -32757, -32765
+};
+const short w_imag[ WINDOW_LEN / 2 ] =
+{ 0, -402, -804, -1206, -1608, -2009, -2410, -2811, -3212, -3612,
+ -4011, -4410, -4808, -5205, -5602, -5998, -6393, -6786, -7179, -7571,
+ -7962, -8351, -8739, -9126, -9512, -9896, -10278, -10659, -11039, -11417,
+ -11793, -12167, -12539, -12910, -13279, -13645, -14010, -14372, -14732, -15090,
+ -15446, -15800, -16151, -16499, -16846, -17189, -17530, -17869, -18204, -18537,
+ -18868, -19195, -19519, -19841, -20159, -20475, -20787, -21096, -21403, -21705,
+ -22005, -22301, -22594, -22884, -23170, -23452, -23731, -24007, -24279, -24547,
+ -24811, -25072, -25329, -25582, -25832, -26077, -26319, -26556, -26790, -27019,
+ -27245, -27466, -27683, -27896, -28105, -28310, -28510, -28706, -28898, -29085,
+ -29268, -29447, -29621, -29791, -29956, -30117, -30273, -30424, -30571, -30714,
+ -30852, -30985, -31113, -31237, -31356, -31470, -31580, -31685, -31785, -31880,
+ -31971, -32057, -32137, -32213, -32285, -32351, -32412, -32469, -32521, -32567,
+ -32609, -32646, -32678, -32705, -32728, -32745, -32757, -32765, -32767, -32765,
+ -32757, -32745, -32728, -32705, -32678, -32646, -32609, -32567, -32521, -32469,
+ -32412, -32351, -32285, -32213, -32137, -32057, -31971, -31880, -31785, -31685,
+ -31580, -31470, -31356, -31237, -31113, -30985, -30852, -30714, -30571, -30424,
+ -30273, -30117, -29956, -29791, -29621, -29447, -29268, -29085, -28898, -28706,
+ -28510, -28310, -28105, -27896, -27683, -27466, -27245, -27019, -26790, -26556,
+ -26319, -26077, -25832, -25582, -25329, -25072, -24811, -24547, -24279, -24007,
+ -23731, -23452, -23170, -22884, -22594, -22301, -22005, -21705, -21403, -21096,
+ -20787, -20475, -20159, -19841, -19519, -19195, -18868, -18537, -18204, -17869,
+ -17530, -17189, -16846, -16499, -16151, -15800, -15446, -15090, -14732, -14372,
+ -14010, -13645, -13279, -12910, -12539, -12167, -11793, -11417, -11039, -10659,
+ -10278, -9896, -9512, -9126, -8739, -8351, -7962, -7571, -7179, -6786,
+ -6393, -5998, -5602, -5205, -4808, -4410, -4011, -3612, -3212, -2811,
+ -2410, -2009, -1608, -1206, -804, -402
+};
+
+const short hw[ WINDOW_LEN ] =
+{ 2621, 2622, 2626, 2632, 2640, 2650, 2662, 2677, 2694, 2714,
+ 2735, 2759, 2785, 2814, 2844, 2877, 2912, 2949, 2989, 3031,
+ 3075, 3121, 3169, 3220, 3273, 3328, 3385, 3444, 3506, 3569,
+ 3635, 3703, 3773, 3845, 3919, 3996, 4074, 4155, 4237, 4321,
+ 4408, 4496, 4587, 4680, 4774, 4870, 4969, 5069, 5171, 5275,
+ 5381, 5489, 5599, 5710, 5824, 5939, 6056, 6174, 6295, 6417,
+ 6541, 6666, 6793, 6922, 7052, 7185, 7318, 7453, 7590, 7728,
+ 7868, 8010, 8152, 8296, 8442, 8589, 8737, 8887, 9038, 9191,
+ 9344, 9499, 9655, 9813, 9971, 10131, 10292, 10454, 10617, 10781,
+ 10946, 11113, 11280, 11448, 11617, 11787, 11958, 12130, 12303, 12476,
+ 12650, 12825, 13001, 13178, 13355, 13533, 13711, 13890, 14070, 14250,
+ 14431, 14612, 14793, 14975, 15158, 15341, 15524, 15708, 15892, 16076,
+ 16260, 16445, 16629, 16814, 16999, 17185, 17370, 17555, 17741, 17926,
+ 18111, 18296, 18481, 18667, 18851, 19036, 19221, 19405, 19589, 19773,
+ 19956, 20139, 20322, 20504, 20686, 20867, 21048, 21229, 21409, 21588,
+ 21767, 21945, 22122, 22299, 22475, 22651, 22825, 22999, 23172, 23344,
+ 23516, 23686, 23856, 24025, 24192, 24359, 24525, 24689, 24853, 25016,
+ 25177, 25337, 25496, 25654, 25811, 25967, 26121, 26274, 26426, 26576,
+ 26725, 26873, 27019, 27164, 27308, 27450, 27590, 27729, 27867, 28003,
+ 28137, 28270, 28401, 28531, 28659, 28785, 28910, 29033, 29154, 29274,
+ 29391, 29507, 29622, 29734, 29845, 29953, 30060, 30165, 30268, 30370,
+ 30469, 30566, 30662, 30755, 30847, 30936, 31024, 31109, 31193, 31274,
+ 31354, 31431, 31506, 31579, 31650, 31719, 31786, 31851, 31913, 31974,
+ 32032, 32088, 32142, 32194, 32243, 32291, 32336, 32379, 32419, 32458,
+ 32494, 32528, 32560, 32589, 32617, 32642, 32664, 32685, 32703, 32719,
+ 32733, 32744, 32753, 32760, 32764, 32767, 32767, 32764, 32760, 32753,
+ 32744, 32733, 32719, 32703, 32685, 32664, 32642, 32617, 32589, 32560,
+ 32528, 32494, 32458, 32419, 32379, 32336, 32291, 32243, 32194, 32142,
+ 32088, 32032, 31974, 31913, 31851, 31786, 31719, 31650, 31579, 31506,
+ 31431, 31354, 31274, 31193, 31109, 31024, 30936, 30847, 30755, 30662,
+ 30566, 30469, 30370, 30268, 30165, 30060, 29953, 29845, 29734, 29622,
+ 29507, 29391, 29274, 29154, 29033, 28910, 28785, 28659, 28531, 28401,
+ 28270, 28137, 28003, 27867, 27729, 27590, 27450, 27308, 27164, 27019,
+ 26873, 26725, 26576, 26426, 26274, 26121, 25967, 25811, 25654, 25496,
+ 25337, 25177, 25016, 24853, 24689, 24525, 24359, 24192, 24025, 23856,
+ 23686, 23516, 23344, 23172, 22999, 22825, 22651, 22475, 22299, 22122,
+ 21945, 21767, 21588, 21409, 21229, 21048, 20867, 20686, 20504, 20322,
+ 20139, 19956, 19773, 19589, 19405, 19221, 19036, 18851, 18667, 18481,
+ 18296, 18111, 17926, 17741, 17555, 17370, 17185, 16999, 16814, 16629,
+ 16445, 16260, 16076, 15892, 15708, 15524, 15341, 15158, 14975, 14793,
+ 14612, 14431, 14250, 14070, 13890, 13711, 13533, 13355, 13178, 13001,
+ 12825, 12650, 12476, 12303, 12130, 11958, 11787, 11617, 11448, 11280,
+ 11113, 10946, 10781, 10617, 10454, 10292, 10131, 9971, 9813, 9655,
+ 9499, 9344, 9191, 9038, 8887, 8737, 8589, 8442, 8296, 8152,
+ 8010, 7868, 7728, 7590, 7453, 7318, 7185, 7052, 6922, 6793,
+ 6666, 6541, 6417, 6295, 6174, 6056, 5939, 5824, 5710, 5599,
+ 5489, 5381, 5275, 5171, 5069, 4969, 4870, 4774, 4680, 4587,
+ 4496, 4408, 4321, 4237, 4155, 4074, 3996, 3919, 3845, 3773,
+ 3703, 3635, 3569, 3506, 3444, 3385, 3328, 3273, 3220, 3169,
+ 3121, 3075, 3031, 2989, 2949, 2912, 2877, 2844, 2814, 2785,
+ 2759, 2735, 2714, 2694, 2677, 2662, 2650, 2640, 2632, 2626,
+ 2622, 2621
+};
+#endif
index 76e1a42605466bf38e4740be0b4974506c34488f..aa314af75c5ef81f60c51faeaa5372fb6ff8ce65 100644 (file)
/*--------------------------------*/
/* Constants that need to be exposed so they can be used elsewhere */
-/* window len = FFT len */
-#define WINDOW_LEN 256
-
/* overlap between two frames */
#define OVERLAP (WINDOW_LEN - FRAME_LEN)
#define MAX_DIM_MFCC10 10
/* maximum number of mfcc */
-#define MAX_DIM_MFCC16 16
+#define MAX_DIM_MFCC16 16
+
+/* maximum number of mfcc */
+#define MAX_DIM_MFCC13 13
/* static + dynamic feature vector size*/
#define MAX_DIM (2 * MAX_DIM_MFCC16)
index d65cc1763287876918e7772a72cbfa568fb60e9e..c763fdbf14e0be01424e2d6a407431b3d053f47a 100755 (executable)
/* exp(-1) = 0.3679, Q 15 */
#define SSNLDNTC 12055
-/* ----------------------- Spectral Subtraction ----------------------------------------------------------
- The following functions are added to spectral subtraction: 1) VAD for noise estimation, which is
- copied from ETSI advanced front end, 2) Noise estimation with the VAD information, which basically
- updates noise estimates only in non-speech intervals. 3) Bernotti-style noise removal.
+/* ----------------------- Spectral Subtraction -------------------------------
+ The following functions are added to spectral subtraction: 1) VAD for
+ noise estimation, 2) Noise estimation with the VAD information, which
+ basically updates noise estimates only in non-speech intervals.
+ 3) Bernotti-style noise removal.
- The parameters of the noise subtraction were tuned with 10dB 9 types noise plus WAVES noise in three
- driving conditions, namely highway, stopngo and parked. In all noise conditions, except for TIMIT noise,
- word error rates for 90 English name recognition with phone-book trained models was below 2 percent.
-
- ------------------------------------------------------------------------------------------------------*/
+ -------------------------------------------------------------------------*/
//#define SS_OUT_PSD /* dump spectral subtraction results */
4681 /* 1/7 in Q15 */, 4096 /* 1/8 in Q15 */,
3641 /* 1/9 in Q15 */, 3277 /* 1/10 in Q15 */};
-/* ---------------- parameters of VAD for noise supression ---------------------------*/
+/* ---------------- parameters of VAD for noise supression ------------------*/
#define BUFFER_SIZE 7 // Number of frames in analysis buffer
#define SNR_THRESHOLD_UPD_LTE 640 /* 20/16 in Q9 */
#define MIN_FRAME 10
#define PROB_ALPHA 6554 // 0.2 in Q15 used for speech presence probability
-#define NSS_MIN_FRAME_ENR -3401 /* 0.01 in log2 in Q9 used for frame energy estimation*/
+/* 0.01 in log2 in Q9 used for frame energy estimation*/
+#define NSS_MIN_FRAME_ENR -3401
-/* ---------------- Beroutti-style noise supression parameters -------------------------*/
+/* ---------------- Beroutti-style noise supression parameters --------------*/
/* must be <= 32 */
#define ALPHA 30
#define NSS_ALPHA 29491 /* 0.9 in Q15 */
index fa29b490b4aa5db232cfe33dba6b5a8810c61494..c3a46761309d9d05ea6611db94a54c49d803c62d 100755 (executable)
//#define DBG_CONF
-
+#ifdef RAPID_JAC
void rj_noise_compensation( gmhmm_type* gv)
{
short j;
return eTIesrEngineJACSuccess;
}
+#endif /* #ifdef RAPID_JAC */
+
+
+
+#ifdef OFFLINE_CLS
+/*----------------------------------------------------------------------------
+ rj_offline_inv_of_average_var
+
+ Calculate the average variance vector over all variance vectors use in the
+ grammar, and calculate the Gaussian pdf normalization constant based on the
+ average variance. This defines the global variance over the set of Gaussians,
+ and a global distribution.
+
+ Arguments:
+ gv: Pointer to ASR instance structure
+ vqhmm: Pointer to Gaussian VQ structure
+
+ -----------------------------------*/
static void rj_offline_inv_of_average_var(gmhmm_type * gv, OFFLINE_VQ_HMM_type *vqhmm)
{
register short i, j;
@@ -181,9 +199,25 @@ static void rj_offline_inv_of_average_var(gmhmm_type * gv, OFFLINE_VQ_HMM_type *
gauss_det_const( vqhmm->inv_ave_var, 2, gv->muScaleP2, gv->nbr_dim);
}
-/* @param p_ava_mem : pointer to the next avaliable memory space
- return TRUE if success, else return FALSE
-*/
+
+/*---------------------------------------------------------------------------
+ rj_open_ROM
+
+ Read the vq clustering information from files prepared offline. The clustering
+ information includes the number of Gaussian vectors in the active grammar,
+ the class associated with each Gaussian mean vector in the grammar, the total
+ number of vq classes, and the Gaussian mean vector centroid associated with
+ each vq class.
+
+ Allocate space in the vq HMM structure to hold the data for rapid jac
+ calculation, defining an online reference model and possibly updating it, and
+ using vq distributions as part of voice activity detection.
+
+
+ Arguments:
+ gv: Pointer to ASR instance structure
+
+ ---------------------------------*/
static TIesrEngineStatusType rj_open_ROM( gmhmm_type *gv )
{
Boolean failed = FALSE;
else return eTIesrEngineSuccess;
}
+
+/*----------------------------------------------------------------------------
+ rj_vq
+
+ Initialize rapid JAC Gaussian vector quantization class information, allocate
+ space for JAC, ORM and VAD calculations, and determine global variance of
+ Gaussian components used in active grammar.
+
+ Arguments:
+ gv: Pointer to ASR instance structure
+
+ -------------------------------------*/
static TIesrEngineStatusType rj_vq(gmhmm_type * gv)
{
TIesrEngineStatusType bSucc = eTIesrEngineVQHMMMemorySize;
-
+
+ /* Read Gaussian vq class data and allocate space for calculations */
if ((bSucc = rj_open_ROM(gv))!= eTIesrEngineVQHMMMemorySize)
- /* prepare VQ HMM */
+
+ /* prepare VQ HMM. Determine global average variance and Gaussian const. */
rj_offline_inv_of_average_var(gv, (OFFLINE_VQ_HMM_type*) gv->offline_vq);
return bSucc;
}
+#endif /* #ifdef OFFLINE_CLS */
+
+
+
+#if defined( USE_GAUSS_SELECT )
+
/* @param perc_core_clusters_q15 precentage of core clusters, value in Q15,
@param perc_inter_clusters_q15 percentage of intermediate clusters, value in Q15
@param num_frms_gbg: number of begining frames to construct gabage model
gmhmm_type * gv )
{
-#ifdef RAPID_JAC
+//#ifdef RAPID_JAC
OFFLINE_VQ_HMM_type * vqhmm = (OFFLINE_VQ_HMM_type*) gv->offline_vq;
vqhmm->sVQ_CLS_TO_EVAL = q15_x(perc_core_clusters_q15, vqhmm->n_cs);
vqhmm->sVQ_NUM_INTER = q15_x(perc_inter_clusters_q15, vqhmm->n_cs) ;
-#endif
+//#endif
return TRUE;
}
+#endif /* #if defined( USE_GAUSS_SELECT ) */
+
+
+#ifdef OFFLINE_CLS
/* @param l_cnter pointer to a counter, each time, the function is evaluated, the counter will be added with one. If the pointer is NULL, no operation on the counter.
@param iargmix the mixture index corresponding to the largest likelihood score
the function evaluates CI/GI-HMM score */
@@ -411,7 +469,12 @@ short rj_gauss_obs_score_f(short *feature, int pdf_idx, gmhmm_type*gv, long * l_
return total_scr;
}
+#endif /* OFFLINE_CLS */
+
+
+
#if defined(RAPID_JAC) || defined(USE_CONFIDENCE)
+
/* got the approximated Gaussian score */
static short rj_dist(short *mfcc_feat, short* mean, short * inv, short gconst, short dm)
{
@@ -432,7 +495,8 @@ static short rj_dist(short *mfcc_feat, short* mean, short * inv, short gconst, s
return LONG2SHORT(dist);
}
-#endif
+
+#endif /* defined(RAPID_JAC) || defined(USE_CONFIDENCE) */
#ifdef DEV_CLS2PHONE
}
#endif
+#ifdef USE_ONLINE_REF
+/*----------------------------------------------------------------------------
+ rj_load
+
+ Load vq class indices and class probabilities for the vq classes currently
+ comprising the ORM model.
+
+ Arguments:
+ gv: Pointer to ASR instance structure
+ pf: File pointer to open JAC state file
+ -------------------------------------*/
TIesrEngineStatusType rj_load(gmhmm_type * gv, FILE * pf)
{
#ifdef USE_ORM_PU
return eTIesrEngineSuccess;
}
+#endif /* USE_ONLINE_REF */
+
+
+#ifdef USE_ONLINE_REF
TIesrEngineStatusType rj_save(gmhmm_type * gv, FILE * pf)
{
#ifdef USE_ORM_PU
#endif
return eTIesrEngineSuccess;
}
-
+
+#endif /* USE_ONLINE_REF */
+
+
+
#ifdef USE_ORM_PU
/* update prior probability of the reference model */
static void rj_update_prior(OFFLINE_VQ_HMM_type* vqhmm)
}
-#endif
+#endif /* USE_ORM_PU */
+
+
#ifdef USE_ORM_VAD
return NULL;
}
}
-#endif
+
/* GMM-based VAD
if SAD is not verified, buffer the input speech and return NULL,
#endif
}
+#endif /* USE_ORM_VAD */
+
+
+#if defined(USE_GAUSS_SELECT) || defined(USE_ORM_VAD)
/* The function conducts two operations:
1) if the current frame number is smaller than a threshold,
it records the best matched Gaussian cluster and put it to construct a reference model
@@ -896,6 +985,10 @@ TIESRENGINECOREAPI_API void rj_update_noise(gmhmm_type * gv, short * mfcc_feat,
#endif
}
+#endif
+
+
+
#ifdef DEV_ONLINE_GARBAGE
void SFB_rj_update_noise(gmhmm_type * gv, short * mfcc_feat)
{
}
#endif
+
+#if defined(USE_GAUSS_SELECT) || defined(USE_ONLINE_REF)
/* This function conducts scoring of Gaussian clusters,
It also assigns Gaussian clusters into categories such as core, intermedium, and outmost. */
short rj_clear_obs_scr(gmhmm_type * gv, short *mfcc_feat)
{
short imax = 0;
-#ifdef RAPID_JAC
/* find the closest clusters */
OFFLINE_VQ_HMM_type *vqhmm = (OFFLINE_VQ_HMM_type*) gv->offline_vq;
vqhmm->max_dist = maxdist;
-#endif
return imax;
}
+#endif /* #if defined(USE_GAUSS_SELECT) || defined(USE_ONLINE_REF) */
+
+
+#ifdef OFFLINE_CLS
+/*----------------------------------------------------------------------------
+ rj_init
+
+ Initialize all vq-cluster distribution distances to observed feature vector
+ to zero, and initialize parameters for ORM and ORM VAD in preparation for
+ processing an utterance.
+
+ Arguments:
+ gv: Pointer to ASR instance structure.
+
+ -----------------------------------*/
void rj_init(gmhmm_type *gv)
{
short i;
fclose(fp);
#endif
}
-
-/* @param pFlexModelPath The path saving models for TIesrFlex
- @param perc_core_clusters_q15 precentage of core clusters, value in Q15,
- @param perc_inter_clusters_q15 percentage of intermediate clusters, value in Q15
- @param gv pointer to decoder
- @param pMem pointer to avaliable memory space
- return TRUE if success, else return FALSE
-*/
+
+
+/*---------------------------------------------------------------------------
+ rj_open
+
+ Initialize the vq structure and set parameters in preparation for rapid
+ JAC calculations for an utterance, using vector quantized classes of Gaussians
+ to determine JAC adaptation parameters.
+
+ Arguments:
+
+ u_perc_cor: percentage of core clusters, in Q15
+ u_perc_inte:percentage of intermediate clusters, in Q15
+ gv: Pointer to ASR instance structure
+
+ -----------------------------------*/
TIesrEngineStatusType rj_open(ushort u_perc_cor,
ushort u_perc_inte,
gmhmm_type *gv )
pOffline->mem_count = 0;
bSucc = rj_vq(gv);
- if (bSucc == eTIesrEngineVQHMMMemorySize) return bSucc;
+ if (bSucc == eTIesrEngineVQHMMMemorySize) return bSucc;
- if(rj_set_param(u_perc_cor, u_perc_inte, gv) == FALSE)
- return eTIesrEngineVQHMMGarbage;
+#ifdef USE_GAUSS_SELECT
+ if( rj_set_param( u_perc_cor, u_perc_inte, gv ) == FALSE )
+ return eTIesrEngineVQHMMGarbage;
+#endif
#ifdef USE_ORM_PU
pOffline->inum_prior = 0;
return bSucc;
}
+#endif /* ifdef OFFLINE_CLS */
+
+
+#if defined(USE_ONLINE_REF)
/* setup parameters for online reference modeling, VAD and EOS detection
@param num_frms_gbg: number of begining frames to construct gabage model
@param noise_level : the noise level threshold to choose an LLR threshold
return TRUE;
}
+#endif /* defined(USE_ONLINE_REF) */
+
+
+#ifdef OFFLINE_CLS
void rj_close(gmhmm_type* gv)
{
}
+#endif /* ifdef OFFLINE_CLS */
+
+
+
#ifdef MODEL_LEVEL_PRUNE
void mlp_set_param(ushort uPercentage, gmhmm_type * gv)
{
}
#endif
+/* Confidence measure functions */
+#ifdef USE_CONFIDENCE
+
TIESRENGINECOREAPI_API void cm_freeze(ConfType* pconf)
{
if( pconf )
#endif
}
+
+/*-----------------------------------------------------------------------------
+ cm_load
+
+ Load statistical and update parameters for the confidence measure if the
+ JAC state file pointer exists. Otherwise initialize the parameters to default
+ values.
+
+ Arguments:
+ fp: File pointer to open JAC state file or NULL
+ gv: Pointer to ASR instance structure
+
+--------------------------------- */
TIESRENGINECOREAPI_API void cm_load(FILE* fp, gmhmm_type *gv)
{
#ifdef USE_CONFIDENCE
#endif
}
-#ifdef USE_CONFIDENCE
+
/* return Q6 */
static short cm_logistic(short *x, gmhmm_type * gv)
{
return LONG2SHORT(ltmp);
}
-#endif
-#ifdef USE_CONFIDENCE
+
+
/* online updating of parameters for hypothesis testing */
static void cm_update_hypo(short *feat, CM_hypo *hp)
{
#endif
}
-#endif
-#ifdef USE_CONFIDENCE
+
+
/* @param feat Q6 score
return log-likelihood ratio between H0 and H1 hypothesis, Q6 */
static short cm_hypo_ratio(short *feat, CM_hypo *h0, CM_hypo *h1)
return LONG2SHORT(diff);
}
-#endif
-#ifdef USE_CONFIDENCE
+
/* calculate distance of
p(H0|X)*(log(p(X|H0) - log(p(X|H1))
*/
return LONG2SHORT(ltmp >> 6);
}
-#endif
-#ifdef USE_CONFIDENCE
+
+
/* return the confidence score due to N-best numbers.
works as a bias to the confidence score reported from else */
static short cm_score_nbest(gmhmm_type * gv)
}
return 0;
}
-#endif
-#ifdef USE_CONFIDENCE
+
+
+
static void cm_calc_confidence( gmhmm_type * gv)
{
ConfType * pConf = (ConfType*) gv->pConf;
gv->cm_score += pConf->cm_scr[4];
}
-#endif
+
+#endif /* USE_CONFIDENCE */
/* @param iTrue indicator of the acceptance or rejection, Q6
}
#endif
+
+
+#ifdef USE_CONFIDENCE
short cm_score(gmhmm_type *gv)
{
ConfType * pConf = (ConfType*) (gv->pConf);
#endif
}
+#endif /* USE_CONFIDENCE */
+
+
#ifdef USE_AVL_TREE
index 1661a8d7ee8d871c5a94397060c31c20813ac887..df78fb45bd19b377530a365b859bbd03fbf0e9b1 100755 (executable)
TIESRENGINECOREAPI_API void rj_noise_compensation(gmhmm_type *cd_hmms);
TIESRENGINECOREAPI_API TIesrEngineJACStatusType rj_compensate(gmhmm_type * gv);
-/* @param pFlexModelPath The path saving models for TIesrFlex
- @param perc_core_clusters_q15 precentage of core clusters, value in Q15,
- @param perc_inter_clusters_q15 percentage of intermediate clusters, value in Q15
- @param gv pointer to decoder
- @param pMem pointer to avaliable memory space
- return TRUE if success, else return FALSE
-*/
+/*---------------------------------------------------------------------------
+ rj_open
+
+ Initialize the vq structure and set parameters in preparation for rapid
+ JAC calculations for an utterance, using vector quantized classes of Gaussians
+ to determine JAC adaptation parameters.
+
+ Arguments:
+
+ u_perc_cor: percentage of core clusters, in Q15
+ u_perc_inte:percentage of intermediate clusters, in Q15
+ gv: Pointer to ASR instance structure
+
+ -----------------------------------*/
TIESRENGINECOREAPI_API TIesrEngineStatusType rj_open(ushort u_perc_cor,
ushort u_perc_inte,
gmhmm_type *gv );
TIESRENGINECOREAPI_API short rj_clear_obs_scr(gmhmm_type * gv, short *mfcc_feat);
+/*----------------------------------------------------------------------------
+ rj_load
+ Load vq class indices and class probabilities for the vq classes currently
+ comprising the ORM model.
+
+ Arguments:
+ gv: Pointer to ASR instance structure
+ pf: File pointer to open JAC state file
+ -------------------------------------*/
TIESRENGINECOREAPI_API TIesrEngineStatusType rj_load(gmhmm_type * gv, FILE* pf);
+
TIESRENGINECOREAPI_API TIesrEngineStatusType rj_save(gmhmm_type * gv, FILE* pf);
short rj_gauss_obs_score_f(short *feature, int pdf_idx, gmhmm_type*gv, long * l_cnter, short * iargmix);
+
+/*----------------------------------------------------------------------------
+ rj_init
+
+ Initialize all vq-cluster distribution distances to observed feature vector
+ to zero, and initialize parameters for ORM and ORM VAD in preparation for
+ processing an utterance.
+
+ Arguments:
+ gv: Pointer to ASR instance structure.
+
+ -----------------------------------*/
void rj_init(gmhmm_type *gv);
+
/* @param perc_core_clusters_q15 precentage of core clusters, value in Q15,
@param perc_inter_clusters_q15 percentage of intermediate clusters, value in Q15
return TRUE if sucess, FALSE if can not meet the requirement
index 8b4dc364e30852fc233936eb397e3e17c099faf6..8c38592bfeb27214d5feba10818e280b447c0f42 100755 (executable)
--- a/TIesrEngine/src/sbc.cpp
+++ b/TIesrEngine/src/sbc.cpp
#include "load_user.h"
+/* SBC code is not used if USE_SBC not in configuration */
+#ifdef USE_SBC
+
//#define _SBC_DEBUG
#ifdef _SBC_DEBUG
static void sbc_assert(char expr, short dim){
else return NULL;
}
-/*--------------------------------
- @param bTriphone: TRUE if the HMM2phone maps triphone to monophone
- FALSE if the HMM2phone maps monophone to itself
-*/
+/*----------------------------------------------------------------------------
+ sbc_load_hmm2phone
+
+ Load the mapping from each set-independent HMM to phone index. This is used
+ by SBC to determine the regression parameters to update for each recognized
+ HMM. The memory to hold the mapping may be allocated dynamically or from
+ the SBC structure memory pool based on TIesr configuration.
+
+ Arguments:
+ gdir: Directory containing current HMM models and grammar network
+ n_hmms: Number of set-independent HMMs
+ sa: Pointer to SBC adaptation structure
+ bTriphone: Boolean indicating to used mapping data from file.
+
+----------------------------------*/
TIESRENGINECOREAPI_API
TIesrEngineHLRStatusType sbc_load_hmm2phone(const char gdir[],
ushort n_hmms,
@@ -399,11 +413,16 @@ static FeaHLRAccType* sbc_set_transform_idx(ushort inodeidx, ushort transform_id
}
-/*--------------------------------
+/*---------------------------------------------------------------------------
sbc_open
- This function clears the contents of the FeaHLRAccType structure
- that holds the SBC state information.
+ This function clears the pointers and parameters of the SBC adaptation
+ structure that holds the SBC state information in preparation for loading
+ the data in the structure.
+
+ Arguments:
+ sa: Pointer to SBC adaptation state structure
+
--------------------------------*/
TIESRENGINECOREAPI_API void sbc_open( FeaHLRAccType *sa )
{
#endif
-/*================================================================
- sbc_init_tree (from hlr_init_tree)
+/*-----------------------------------------------------------------------------
+ sbc_init_tree
- This function loads the structure of the regression tree, and the
- monophone to tree terminal node array. Note that it must be
- compatible with the monophone index assignment in flexphone.
+ This function loads the structure of the hierarchical regression tree, and the
+ monophone to tree terminal node array in the SBC adaptation structure. Note
+ that it must be compatible with the monophone index assignment from TIesrFlex.
+ It also initializes the SBC bias transforms and the statistical accumulators.
+ If there is a failure during initialization, all parameters in the SBC
+ adaptation structure are cleared.
Notes:
- The hlr information is as follows: The first unsigned short is the
+ The tree information is as follows: The first unsigned short is the
number of terminal nodes in the binary tree. ( In concept, each
terminal node corresponds to one of the monophones in the flexphone
list, but this is not really necessary. The only thing required is
allocated is long data. It is assumed that long data and short data
are powers of two bytes in size.
- if (load_monophone_only) then memory for tree is not allocated.
+ if boolean load_monophone_only set, then memory for regression tree is
+ not allocated.
- @param bTriphone: TRUE if maps triphone to monophone
- FALSE if maps monophone to itself
- ----------------------------------------------------------------*/
+ Arguments:
+ sa: Pointer to SBC adaptation structure
+ fnDir: Directory containing current HMM and grammar network data
+ fname: Full name of file containing phontic regression tree,
+ load_monophone_only: Do not load regression tree structure
+ cep_dim: Size of feature vector
+ n_hmms: Number of set-independent HMMs
+ bTriphone: Use mapping file from HMM to phone
+
+ ----------------------------------------------------------------*/
TIESRENGINECOREAPI_API TIesrEngineHLRStatusType sbc_init_tree( FeaHLRAccType *sa,
const char* fnDir,
const char *fname,
#endif
}
+
+
+#endif /* USE_SBC in configuration */
\ No newline at end of file
index 600e4e44d0276f9617c54e3f55e185c16a436611..f5d949f0e7c77edcb7ee7ea722f0a33176baada6 100755 (executable)
TIESRENGINECOREAPI_API void sbc_rst_seg_stat(FeaHLRAccType* pSBC );
-/*----------------------------------------------------------------
+/*---------------------------------------------------------------------------
sbc_open
-
- Set all elements of the FeaHLRAccType to opening values. This
- ensures that the structure is constructed correctly.
- ----------------------------------------------------------------*/
+
+ This function clears the pointers and parameters of the SBC adaptation
+ structure that holds the SBC state information in preparation for loading
+ the data in the structure.
+
+ Arguments:
+ pSBC: Pointer to SBC adaptation state structure
+
+ --------------------------------*/
TIESRENGINECOREAPI_API void sbc_open( FeaHLRAccType *pSBC );
@@ -127,7 +132,21 @@ TIESRENGINECOREAPI_API TIesrEngineHLRStatusType sbc_init_tree( FeaHLRAccType* pS
ushort n_hmms,
Boolean bTriphone );
-/* load triphone to monophone mapping list */
+/*----------------------------------------------------------------------------
+ sbc_load_hmm2phone
+
+ Load the mapping from each set-independent HMM to phone index. This is used
+ by SBC to determine the regression parameters to update for each recognized
+ HMM. The memory to hold the mapping may be allocated dynamically or from
+ the SBC structure memory pool based on TIesr configuration.
+
+ Arguments:
+ gdir: Diretory containing current HMM models and grammar network
+ n_hmms: Number of set-independent HMMs
+ sa: Pointer to SBC adaptation structure
+ bTriphone: Boolean indicating to use mapping data from file.
+
+----------------------------------*/
TIESRENGINECOREAPI_API TIesrEngineHLRStatusType sbc_load_hmm2phone(const char gdir[],
ushort n_hmms,
FeaHLRAccType *sa,
index b9d2432ff91443824b25b460dcdf7e151f464dc7..bba352884e16aa246930f5d571dbb4948a7a4af5 100755 (executable)
static void Init_Search(gmhmm_type * gv, const short * p_K_mean)
{
ushort i;
- Boolean bUseNBest = TRUE;
- short compType = JAC | SBC | SVA ;
+ Boolean bUseNBest = TRUE;
+
+ /* Use JAC by default */
+ short compType = JAC;
+
+ #ifdef USE_SVA
+ compType |= SVA;
+ #endif
+
+ #ifdef USE_SBC
+ compType |= SBC;
+ #endif
gv->comp_type = compType;
+
gv->scale_feat = gv->scale_mu;
/* Note: feature vector assumed static + delta mfcc */
index 55d1b9ee81bb3d8af170950cc62e395f34d23d34..e29b3cde69b8b6e3cffcc9338c65566eceba34ac 100644 (file)
#ifndef TIESR_CONFIG_H
#define TIESR_CONFIG_H
-/* Live mode feature and JAC processing is desired. To imitate
- file mode processing, comment out the line below*/
+/*--------------------------------------------------------------------
+ Process data as if it is live data. Live mode feature and JAC processing
+ is desired. This should always be used. Not defining this is deprecated.
+ ---------------------------------------------------------------------*/
#define LIVEMODE
-/* In file mode, FILEMODEWAIT determines how many frames are used
- to compute the JAC noise estimate. */
+/*---------------------------------------------------------------------
+ In file mode, FILEMODEWAIT determines how many frames are used
+ to compute the JAC noise estimate. This is deprecated.
+ ---------------------------------------------------------------------*/
+#ifndef LIVEMODE
#define FILEMODEWAIT 10
+#endif
-
-/*--------------------------------*/
-/* Use of live audio devices or file-mode processing */
-/* Also allows printing errors if not using audio */
+/*---------------------------------------------------------------------
+ Use of live audio devices or file-mode processing. Also allows
+ printing errors if not using audio. This should always be set.
+ Not setting this is deprecated.
+ ---------------------------------------------------------------------*/
#define USE_AUDIO
#ifndef USE_AUDIO
#endif
-/*--------------------------------*/
-/* If PRT allows "command" to occur, then printing of HLR
- intermediate result information is allowed */
+/*------------------------------------------------------------
+ If PRT allows "command" to occur, then printing of HLR
+ intermediate result information is allowed
+ -------------------------------------------------------------*/
#define PRT(command) /* command */
-/*--------------------------------*/
-/* Size of model vectors */
+/*-----------------------------------------------------------------
+ Size of model vectors.
+
+ Model vectors may either be 16-bit or byte. If vectors are byte,
+ then static and delta parameters are interleaved.
+ ------------------------------------------------------------------*/
-/* TIesr Engine will store and use byte mean vectors */
+/* TIesr Engine will store and use byte mean vectors if defined. */
#define BIT8MEAN
-/* TIesr Engine will store and use byte inverse variance vectors */
+/* TIesr Engine will store and use byte inverse variance vectors if defined */
// #define BIT8VAR
-/* TIesr Engine will utilize 8 bit feature vectors for doing
+/* TIesr Engine will utilize byte feature vectors for doing
adaptation. At this time this is the only method supported. Work
needs to be done on TIesr in order to use short feature vectors,
since at this time it means that static buffers need to be
//#define COMPACT
-/*--------------------------------*/
-/* Utilize SVA for variance adaptation of models */
+/*-------------------------------------------------------------------
+Utilize SVA for variance adaptation of models. Comment out to disable.
+ -------------------------------------------------------------------*/
+
+#define USE_SVA
+
+/*--------------------------------------------------------------------
+ Defining the following macro enables weighted SVA. Implies USE_SVA.
+ Comment out to disable.
+ --------------------------------------------------------------------*/
+#define USE_WSVA
+
+#if defined( USE_WSVA )
#define USE_SVA
+#endif
-/* Qifeng's improved ISVA */
-/* Presently the SVA START_DIMENSION is 1, but further improvements in ISVA
- may result in changing it back to 0. This research is not conclusive yet. */
+
+/*------------------------------------------------------------------------
+ Improved SVA. Presently the SVA START_DIMENSION is 1, but further
+ improvements in ISVA may result in changing it back to 0. This research
+ is not conclusive yet. Comment out to disable.
+ ------------------------------------------------------------------------*/
// #define ISVA
-/* Qifeng's Gaussian cache */
-#define USE_GAUSSIAN_CACHE
-/*--------------------------------*/
-/* Defining the following macro enables IJAC for noisy speech
- recognition */
+/*------------------------------------------------------------------------
+ Use Gaussian cache to reduce calculation of likelihoods.
+ ------------------------------------------------------------------------*/
+#define USE_GAUSSIAN_CACHE
+/*------------------------------------------------------------------------
+ Improved JAC calculations for channel estimate. Comment out to disable.
+ ------------------------------------------------------------------------ */
#define USE_IJAC
-/*--------------------------------*/
-/* Defining the following macro enables weighted SVA */
-#define USE_WSVA
-
-/*--------------------------------*/
-/* Defining the following macro to enables SBC for noisy speech
- recognition */
+/*-------------------------------------------------------------------------
+ Stochastic Bias Compensation for noisy speech recognition. SBC calculates
+ additional model mean biases to be applied based on hierarchical phone tree.
+ Comment out to disable.
+ -------------------------------------------------------------------------*/
#define USE_SBC
-/*--------------------------------*/
-/* Smooth the SBC estimate (also eliminates overflow problem) */
-
+/* Smooth the SBC estimates. Required for SBC. */
+#ifdef USE_SBC
#define SBC_PRIOR
+#endif
-/*--------------------------------*/
-/* Not sure of the use of REC. Not used for live TIESR. */
-/* #define REC */
-/*--------------------------------*/
-/* fast DCT */
+/*-------------------------------------------------------------------
+ Kept for historical reference in code. Now defunct.
+ -------------------------------------------------------------------*/
+/*#define REC */
+
+
+/*---------------------------------------------------------------------
+ Use fast DCT. A fast DCT is only available for a few specific feature
+ sizes. Comment out do disable.
+ ----------------------------------------------------------------------*/
//#define USE_FAST_DCT
-/*--------------------------------*/
-/* Dynamic Prunning
-enable the dynamic pruning (the hmm end pruning is always enabled)
-*/
+/*---------------------------------------------------------------------
+ Dynamic Pruning. Several dynamic pruning heuristics are applied.
+ The hmm end pruning is always enabled. Comment out to disable.
+
+--------------------------------------------------------------------- */
#define DYNAMIC_PRUNE
-// Turn on each part of dynamic pruning individually by its flag
+/* Turn on each part of dynamic pruning individually by its flag */
#define DYNAMIC_PRUNE_RAMP
+#ifdef DYNAMIC_PRUNE_RAMP
+#define DYNAMIC_PRUNE
+#endif
+
#define DYNAMIC_PRUNE_BEAM
+#ifdef DYNAMIC_PRUNE_BEAM
+#define DYNAMIC_PRUNE
+#endif
+
#define DYNAMIC_PRUNE_SUBLINEAR
+#ifdef DYNAMIC_PRUNE_SUBLINEAR
+#define DYNAMIC_PRUNE
+#endif
+
#define DYNAMIC_PRUNE_DEPTH
+#ifdef DYNAMIC_PRUNE_DEPTH
+#define DYNAMIC_PRUNE
+#endif
-/* ----- Spectral Subtraction Support ------------------
- Berouti-style noise supression (ETSI VAD is used inside)
- ----------------------------------------------------- */
+/* -----------------------------------------------------------------
+ Spectral Subtraction Support. Berouti-style noise supression applied to
+ incoming frames of data prior to converting to MFCC. Comment out to disable.
+ ------------------------------------------------------------------ */
#define USE_SNR_SS
-/*---------- Cluster-dependent JAC ---------------------
- Rapid JAC via offline vector-quantiziation of HMM mean vectors
- ------------------------------------------------------*/
-#define RAPID_JAC
/*-------------------------------------------------------
- Offline vector-quantiziation of HMM mean vectors
-
- -------------------- NOTICE -----------------------------
- | Need to load ROM2cls.bin and cls_centr.bin |
- | ROM2cls.bin : ROM mean index to cluster mapping |
- | cls_center.bin : 20-dimenional centroid |
- --------------------------------------------------------
-*/
+ Offline vector-quantiziation of HMM mean vectors available
+ in data files. Comment-out to disable.
+
+ Requires VQ cluster information data in the files ROM2cls.bin
+ and cls_centr.bin. ROM2cls.bin holds mean index to cluster mapping,
+ and cls_center.bin holds centroid vectors.
+ ----------------------------------------------------------*/
+#define OFFLINE_CLS
+
+
+/*--------------------------------------------------------------
+ Rapid JAC via offline vector-quantiziation of HMM mean vectors.
+ Reduces computation of bias data. Requires VQ cluster information.
+ Comment-out to disable.
+ ------------------------------------------------------*/
+#define RAPID_JAC
+#ifdef RAPID_JAC
#define OFFLINE_CLS
+#endif
-/* ----------------------------------------------
- Decoder uses 16-bit operation
- Packing/unpacking is not used
- ---------------------------------------------*/
+/* Rapid JAC decoder uses 16-bit operations. Packing/unpacking is not used */
+ #ifdef RAPID_JAC
#define USE_16BITMEAN_DECOD
-#if defined(USE_16BITMEAN_DECOD)
#undef BIT8MEAN
#endif
-/* ----------------------------------------------
- Enable Gaussian selection.
- ----------------------------------------------*/
-#define USE_GAUSS_SELECT
-/* -------------------------------------------------------
- Enables prunning with phone-level score. The phone-level score is the maximum
- triphone score for a given center phone. Prunning threshold is selected
- relative to a good phone-level score, which in the current setup,
- is the 50% quantile of all phone-level scores.
- -------------------------------------------------------*/
+/*---------------------------------------------------------------------
+ VQ-based Gaussian selection. Allows rapid evaluation of likelihoods
+ by using only VQ cluster or global likelihoods where possible. Requires
+ VQ cluster information. Comment-out to disable.
+ --------------------------------------------------------------------*/
+#define USE_GAUSS_SELECT
+#ifdef USE_GAUSS_SELECT /* && defined( RAPID_JAC ) */
+#define OFFLINE_CLS
+#endif
+
+/*---------------------------------------------------------------------
+ Enables prunning with phone-level score. The phone-level score is the maximum
+ triphone score for a given center phone. Pruning threshold is selected
+ relative to a good phone-level score, which in the current setup,
+ is the 50% quantile of all phone-level scores.
+ -------------------------------------------------------------------------*/
//#define MODEL_LEVEL_PRUNE
#if defined(MODEL_LEVEL_PRUNE)
#undef DYNAMIC_PRUNE
#endif
-/* --------------------------------------------------------
- Enable confidence measure output
- -------------------------------------------------------*/
-#define USE_CONFIDENCE
-/* --------------------------------------------------------------------------
- The On-line Reference Modeling Method
- used for two purposes:
- 1) GMM-based VAD
- 2) Rescoring of silence model to deal with very non-stationary noise
- such as competing speech.
+/*--------------------------------------------------------------------------
+ VQ-based On-line Reference Modeling Methods. Requires VQ information.
+ Used for two purposes:
+ 1) GMM-based VAD
+ 2) Rescoring of silence model to deal with very non-stationary noise
+ such as competing speech.
---------------------------------------------------------------------------*/
-#if defined(OFFLINE_CLS)&&defined(USE_SNR_SS)
-#define USE_ONLINE_REF
-// on-line reference modeling (ORM)
+
+/* On-line reference modeling (ORM). Dynamically defines a subset of
+the clustered model distributions to detect non-speech frames
+Comment out to disable */
+
+#define USE_ONLINE_REF
+#ifdef USE_ONLINE_REF
+#define OFFLINE_CLS
+#define USE_SNR_SS
+#endif
+
+/* Method for voice activity detection and robust speech recognition.
+ End-of-speech detection using the ORM method. Comment-out to disable. */
+
#define USE_ORM_VAD
-// method for voice activity detection and robust speech recognition
-// End-of-speech detection using the ORM method
+#ifdef USE_ORM_VAD
+#define USE_ONLINE_REF
+#define OFFLINE_CLS
+#define USE_SNR_SS
+#endif
-#define USE_ORM_PU // update cluster lists of ORM using background statistics
-// of the current utterance and the previous utterances.
+/* Update cluster lists of ORM using background statistics of the current
+ utterance and the previous utterances. Comment-out to disable. */
+#define USE_ORM_PU
+#ifdef USE_ORM_PU
+#define USE_ONLINE_REF
+#define USE_SNR_SS
+#define USE_ORM_VAD
#endif
+
//#define DEV_CLS2PHONE
-/* --------------- Confidence-driven unsupervised adaptation -----------------
- Update of adaptation parameters is done only if confidence score (in gmhmm_type->cm_score)
- is above a certain threshold. The threshold is by default set at CM_ADP_THRE (-200)
- --------------------------------------------------------------------------- */
-#if defined(USE_CONFIDENCE)
-#define USE_CONF_DRV_ADP
-#endif
-#define REALTIMEGUARD
+/*--------------------------------------------------------
+ Enable confidence measure output
+ -------------------------------------------------------*/
+#define USE_CONFIDENCE
-/* Word-level Backtrace
- the word-level backtrace is used in the first pass. the normal HMM-level back
- trace is used in the second pass to align utterances to phone levels.
-*/
-#define WORDBACKTRACE // if commented out, HMM based BT will be used.
+/* Confidence-driven unsupervised adaptation. Update of adaptation
+ parameters is done only if confidence score (in gmhmm_type->cm_score)
+ is above a certain threshold. The threshold is by default set at
+ CM_ADP_THRE (-200). Comment out to disable. */
+
+#define USE_CONF_DRV_ADP
+#ifdef USE_CONF_DRV_ADP
+#define USE_CONFIDENCE
+#endif
-// The setup below will remove all Gaussian selection and clustering processing
+/*-----------------------------------------------------------------------
+ Real time guard monitors how many frames are queued for recognition. If the
+ number of frames back-logged gets large, then the pruning parameter will be
+ adjusted to exclude more hypotheses in an attempt to maintain real-time.
+ Comment out to disable.
+ --------------------------------------------------------------------------*/
+#define REALTIMEGUARD
-//#undef OFFLINE_CLS
-//#undef RAPID_JAC
-//#undef USE_GAUSS_SELECT
-//#undef USE_ORIG_MEAN
-//#undef USE_ONLINE_REF
-//#undef USE_ORM_VAD
-//#undef USE_ORM_PU
+/*----------------------------------------------------------------------
+ Word-level Backtrace. The word-level backtrace is used in the first pass.
+ The normal HMM-level back trace is used in the second pass to align
+ utterances to phone levels. Research on multi-pass recognition is ongoing,
+ so currently this should be defined. If commented out, HMM based BT
+ will be used.
+ ------------------------------------------------------------------------*/
+#define WORDBACKTRACE
#endif
index eaa6fa8329f4fdff520811bf4e5438e106c17571..ef5c2b1ff0cecf094e0c3fbecb0df1c088ca11cd 100644 (file)
#endif
-/*
-** Data structure specifying TIESR SI algorithms.
-*/
-
-/*
-#include "status.h"
-#include "winlen.h"
-#include "use_audio.h"
-*/
#include "tiesr_config.h"
#include "status.h"
#include "winlen.h"
/*--------------------------------*/
-/*
- The TIesrEngineSIRECOType structure defines an instance of the
- TIesr SI recognizer. The user must instantiate and delete an
- instance by opening or closing the instance.
-*/
-/*
-** scope limitation: API function visible only within variable
-*/
+/* The TIesrEngineSIRECOType structure defines an instance of the
+ TIesr SI recognizer. The user must instantiate and delete an
+ instance by opening or closing the instance. The structure contains
+ only pointers to the functions of the interface. */
+
+
typedef struct gmhmm_type* TIesr_t;
-//typedef const struct gmhmm_struct* cTIesr_t;
+
typedef struct gmhmm_type const* cTIesr_t;
void (*GetTIesrNBest)(TIesr_t , short*);
unsigned short (*GetNumNbests)(cTIesr_t);
-//#ifdef USE_CONFIDENCE
+
short (*GetConfidenceScore)(cTIesr_t);
-//#ifdef USE_CONF_DRV_ADP
+
/*
1) setup paramters for confidence-driven adaptation
confidence score of an utterance has to be larger than a threshold to have adaptation
@@ -140,20 +129,16 @@ confidence score of an utterance has to be larger than a threshold to have adapt
*/
void (*SetTIesrConfidenceAdaptation)(short, short, TIesr_t);
void (*GetTIesrConfidenceAdaptation)(short*, short*, TIesr_t);
-//#endif
-//#endif
-// Defines should not change the compatibility
-// of TIesr processing that is optional
-//#ifdef USE_GAUSS_SELECT
+
+
/* setup parameters for Gaussian selection
default percentage_of_core = 16384,
percentage_of_inter = 0 */
void (*SetTIesrGaussSelection)(TIesr_t, unsigned short , unsigned short );
void (*GetTIesrGaussSelection)(cTIesr_t , unsigned short * , unsigned short *);
-//#endif
-//#if defined(USE_ORM_VAD)||defined(USE_ONLINE_REF)
+
/* setup parameters for on-line reference modeling method
default setup
number_of_frame_to_generate_ORM = 9
@@ -170,29 +155,21 @@ confidence score of an utterance has to be larger than a threshold to have adapt
short, unsigned short , short, short, TIesr_t);
void (*GetTIesrOnlineReferenceModel)(short*, short*, short*, short*,
short*, unsigned short*, short*, short *, TIesr_t);
-//#endif
-// Options should not redfine the user interface
-//#ifdef USE_SNR_SS
/* setup parameters for noise subtraction
default setup
alpha = 29491
beta = 3 */
void (*SetTIesrNoiseSubtraction)(unsigned short, short, TIesr_t);
void (*GetTIesrNoiseSubtraction)(unsigned short*, short*, TIesr_t);
-//#endif
-#ifdef REALTIMEGUARD
-
-
void (*getRealTimeGuardTh)(short*, TIesr_t);
void (*getGetRealTGuard)(short*, TIesr_t);
void (*setRealTimeGuardTh) (short, TIesr_t);
void (*setGetRealTGuard) (short, TIesr_t);
-#endif
short (*GetVolumeStatus)(cTIesr_t);
@@ -212,9 +189,7 @@ confidence score of an utterance has to be larger than a threshold to have adapt
void (*SetTIesrSBC)( TIesr_t, short );
-#ifdef USE_AUDIO
void (*SetTIesrVolRange)(TIesr_t, unsigned short, unsigned short);
-#endif
//
@@ -229,9 +204,7 @@ confidence score of an utterance has to be larger than a threshold to have adapt
short*, short* );
void (*GetTIesrSBC)( cTIesr_t, short* );
-#ifdef USE_AUDIO
void (*GetTIesrVolRange)(TIesr_t, unsigned short *, unsigned short *);
-#endif
} TIesrEngineSIRECOType;
index 0bd0beabe3e9d8d7a95cd1b7b61b239ff090bab3..7c6c2dbc95cc9e959fecb00c348da86f71640146 100644 (file)
--- a/TIesrEngine/src/uttdet.h
+++ b/TIesrEngine/src/uttdet.h
#ifndef _UTTDET_H
#define _UTTDET_H
+#include "winlen.h"
+#include "tiesrcommonmacros.h"
+
/*--------------------------------
Utterance detector default parameters
--------------------------------*/
Constants by define
--------------------------------*/
-// #define L_CUT 19 /* 256 * 600 / 8000 */
-//#define H_CUT 58 /* 256 * 1800 / 8000 */
+/* 400 Hz lower cut-off index for frequency shaping.
+ Typically 256 * 400/8000 = 13 for 256 sample window and 8kHz sampling */
+#define L_CUT ( WINDOW_LEN * 400 / SAM_FREQ )
+
+/* 2700 Hz upper cut-off index for frequency shaping.
+ Typically 256* 2700/8000 = 86 for 256 sample window and 8kHz sampling */
+#define H_CUT ( WINDOW_LEN * 2700 / SAM_FREQ )
+
+
+/* For normal 256 sample FFT set to 22938 => 0.70, Q 15 */
+# if ( WINDOW_LEN == 512 )
+#define L_RATE 27416
+#else
+#define L_RATE 22938
+#endif
-//#define L_CUT 6 /* 256 * 200/8000 */
+/* For normal 256 sample FFT set to 27853 => 0.85, Q 15 */
+#if ( WINDOW_LEN == 512 )
+#define H_RATE 30211
+#else
+#define H_RATE 27853
+#endif
-#define L_CUT 13 /* 256 * 400/8000 */
-#define H_CUT 86 /* 256*2700/8000 */
+/* Lower correlation index for search for correlation peak.
+ Corresponds to 400 Hz */
+#define LEND ( SAM_FREQ / 400 )
-#define L_RATE 22938 /* 0.70, Q 15 */
-#define H_RATE 27853 /* 0.85, Q 15 */
-#define LEND 20 /* 400 Hz */
-#define HEND 120 /* 75 Hz (66Hz?) */
+/* Higher correlation index for searching for correlation peak.
+ Corresponds to 67 Hz or highest index available corresponding to lowest
+ frequency available. */
+#define HEND ( MIN( ( SAM_FREQ / 67 ), ( (WINDOW_LEN>>1) - 1) ) )
-#define SMOOTH 16384 /* 13107 (0.4) */ /* 0.5, Q 15 */
+/* 0.5, Q 15 */
+#define SMOOTH 16384
#define AC_SCALE 64 /* auto correlation scale */
//#define SPEECH_DELTA 10
extern void compute_uttdet(short *power_spectrum, short cnt,short frm_cnt, short *, gmhmm_type *, NormType *);
extern void noise_subs(short *ps_signal, short norm, short *ps_noisy_smoothed, short *noise_floor,
- ushort, short *);
+ ushort, short *);
extern short update_level(short new_val, short old, short uptc, short dntc);
extern void init_spect_sub(short *);
-*/
+ */
/*
-** un scaled: W_B in Q 15
-*/
+ ** un scaled: W_B in Q 15
+ */
/*
#define SMOOTHING(A, W_B, B) ((long) ( 32767 - W_B ) * A + (long) W_B * B)
-*/
+ */
#endif
index 4ea6fb2f3315734e55e838987f43697a3e308099..31ed29be23a53475800c99a588b42f22600d5702 100755 (executable)
#include "tiesr_config.h"
-#ifdef USE_AUDIO
+
#include "status.h"
#include "volume_user.h"
/*
-** check volume range
-*/
+ ** check volume range
+ */
/* GMHMM_SI_API */
-TIESRENGINECOREAPI_API void SetTIesrVolRange(gmhmm_type *gvv, unsigned short low_v, unsigned short high_v)
+TIESRENGINECOREAPI_API void SetTIesrVolRange( gmhmm_type *gvv, unsigned short low_v, unsigned short high_v )
{
- gmhmm_type *gv = (gmhmm_type *) gvv;
- gv->low_vol_limit = low_v;
- gv->high_vol_limit = high_v;
+#ifdef USE_AUDIO
+ gmhmm_type *gv = (gmhmm_type *) gvv;
+ gv->low_vol_limit = low_v;
+ gv->high_vol_limit = high_v;
+#endif
}
-
/* GMHMM_SI_API */
-TIESRENGINECOREAPI_API void GetTIesrVolRange(gmhmm_type* gvv, unsigned short *low_v, unsigned short *high_v)
+TIESRENGINECOREAPI_API void GetTIesrVolRange( gmhmm_type* gvv, unsigned short *low_v, unsigned short *high_v )
{
- gmhmm_type *gv = (gmhmm_type *) gvv;
- *low_v = gv->low_vol_limit;
- *high_v = gv->high_vol_limit;
+#ifdef USE_AUDIO
+ gmhmm_type *gv = (gmhmm_type *) gvv;
+ *low_v = gv->low_vol_limit;
+ *high_v = gv->high_vol_limit;
+#else
+ *low_v = 0;
+ *high_v = 0;
+#endif
}
+#ifdef USE_AUDIO
TIESRENGINECOREAPI_API short set_volume_flag(gmhmm_type *gv)
{
return volume_flag;
}
-
#endif
index e5c2d72c0273d90d0ee26c7979db06b10ed8e348..7e2a4f23ccec48a636a65638d3dc0d43bf3dbe3a 100644 (file)
--- a/TIesrEngine/src/winlen.h
+++ b/TIesrEngine/src/winlen.h
*
* winlen.h
*
- * Header defining frame size.
+ * Header defining audio collection parameters, including frame size,
+ * window size and sample rate.
*
* Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/
*
* Lesser General Public License for more details.
*
- This header defines how many audio samples make up a frame that TIesr
- is expecting. This file is somewhat misnamed, since it is defining
- frame length instead of a window length.
-
+ This header defines parameters for collecting audio samples and how to
+ put them in frames.
======================================================================*/
#ifndef _WINLEN_H
#define _WINLEN_H
-/*
-** number of samples in a frame of data that user must copy to buffer
-*/
+/* Number of samples making up a frame window. This is the number of samples
+ that will be windowed by the Hamming window, and the number of samples input
+ to the FFT during MFCC computation. It must be 256 or 512. */
+
+#define WINDOW_LEN 256
+// #define WINDOW_LEN 512
+
+
+/* Number of samples in a frame of data that user must copy to buffer */
+
#define FRAME_LEN 160
+// #define FRAME_LEN 480
+
+
+/* A/D Sampling rate */
+
+#define SAM_FREQ 8000
+// #define SAM_FREQ 24000
#endif /* _WINLEN_H */