• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  SR_Recognizer.h  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #ifndef __SR_RECOGNIZER_H
21 #define __SR_RECOGNIZER_H
22 
23 
24 
25 #include "ESR_ReturnCode.h"
26 #include "SR_RecognizerPrefix.h"
27 #include "SR_AcousticModels.h"
28 #include "SR_Grammar.h"
29 #include "SR_RecognizerResult.h"
30 #include "SR_Nametags.h"
31 #include "pstdio.h"
32 #include "ptypes.h"
33 
34 /* forward decl needed because of SR_Recognizer.h <-> SR_Grammar.h include loop */
35 struct SR_Grammar_t;
36 
37 /**
38  * Recognizer status.
39  */
40 typedef enum SR_RecognizerStatus_t
41 {
42   /**
43    * Reserved value.
44    */
45   SR_RECOGNIZER_EVENT_INVALID,
46   /**
47    * Recognizer could not find a match for the utterance.
48    */
49   SR_RECOGNIZER_EVENT_NO_MATCH,
50   /**
51    * Recognizer processed one frame of audio.
52    */
53   SR_RECOGNIZER_EVENT_INCOMPLETE,
54   /**
55    * Recognizer has just been started.
56    */
57   SR_RECOGNIZER_EVENT_STARTED,
58   /**
59    * Recognizer is stopped.
60    */
61   SR_RECOGNIZER_EVENT_STOPPED,
62   /**
63    * Beginning of speech detected.
64    */
65   SR_RECOGNIZER_EVENT_START_OF_VOICING,
66   /**
67    * End of speech detected.
68    */
69   SR_RECOGNIZER_EVENT_END_OF_VOICING,
70   /**
71    * Beginning of utterance occured too soon.
72    */
73   SR_RECOGNIZER_EVENT_SPOKE_TOO_SOON,
74   /**
75    * Recognition match detected.
76    */
77   SR_RECOGNIZER_EVENT_RECOGNITION_RESULT,
78   /**
79    * Timeout occured before beginning of utterance.
80    */
81   SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT,
82   /**
83    * Timeout occured before speech recognition could complete.
84    */
85   SR_RECOGNIZER_EVENT_RECOGNITION_TIMEOUT,
86   /**
87    * Not enough samples to process one frame.
88    */
89   SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO,
90   /**
91    * More audio encountered than is allowed by 'swirec_max_speech_duration'.
92    */
93   SR_RECOGNIZER_EVENT_MAX_SPEECH,
94 } SR_RecognizerStatus;
95 
96 /**
97  * Type of RecognizerResult returned by SR_RecognizerAdvance().
98  */
99 typedef enum SR_RecognizerResultType_t
100 {
101   /**
102    * Reserved value.
103    */
104   SR_RECOGNIZER_RESULT_TYPE_INVALID,
105   /**
106    * The result is complete from a full recognition of audio.
107    */
108   SR_RECOGNIZER_RESULT_TYPE_COMPLETE,
109   /**
110    * No results at this time.
111    */
112   SR_RECOGNIZER_RESULT_TYPE_NONE,
113 } SR_RecognizerResultType;
114 
115 /**
116  * SR_Utterance stubbed out.
117  */
118 typedef void* SR_Utterance;
119 
120 typedef enum
121 {
122   ESR_LOCK,
123   ESR_UNLOCK
124 } ESR_LOCKMODE;
125 
126 /**
127  * Function which will be invoked before accessing internal variables.
128  */
129 typedef ESR_ReturnCode(*SR_RecognizerLockFunction)(ESR_LOCKMODE mode, void* data);
130 
131 /**
132  * @addtogroup SR_RecognizerModule SR_Recognizer API functions
133  * Synchronous speech recognizer.
134  *
135  * @{
136  */
137 
138 /**
139  * Synchronous speech recognizer.
140  */
141 typedef struct SR_Recognizer_t
142 {
143   /**
144    * Starts recognition.
145    *
146    * @param self SR_Recognizer handle
147   * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer,
148   * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason
149    */
150   ESR_ReturnCode(*start)(struct SR_Recognizer_t* self);
151   /**
152    * Stops the recognizer and invalidates the recognition result object.
153    * Calling this function before the recognizer receives the last frame causes the recognition
154    * to abort.
155    *
156    * @param self SR_Recognizer handle
157    * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
158    */
159   ESR_ReturnCode(*stop)(struct SR_Recognizer_t* self);
160   /**
161    * Destroy a recognizer.
162    *
163    * @param self SR_Recognizer handle
164   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
165    */
166   ESR_ReturnCode(*destroy)(struct SR_Recognizer_t* self);
167   /**
168    * Associates a set of models with the recognizer.
169    *
170    * @param self SR_Recognizer handle
171   * @return ESR_INVALID_ARGUMENT if self is null
172    */
173   ESR_ReturnCode(*setup)(struct SR_Recognizer_t* self);
174   /**
175    * Unconfigures recognizer.
176    *
177    * @param self SR_Recognizer handle
178   * @return ESR_INVALID_ARGUMENT if self is null
179    */
180   ESR_ReturnCode(*unsetup)(struct SR_Recognizer_t* self);
181   /**
182    * Indicates whether recognizer is configured for use.
183    *
184    * @param self SR_Recognizer handle
185    * @param isSetup True if recognizer is configured
186   * @return ESR_INVALID_ARGUMENT if self is null
187    */
188   ESR_ReturnCode(*isSetup)(struct SR_Recognizer_t* self, ESR_BOOL* isSetup);
189 
190   /**
191    * Returns copy of LCHAR recognition parameter.
192    *
193    * @param self SR_Recognizer handle
194    * @param key Parameter name
195    * @param value [out] Used to hold the parameter value
196    * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW,
197    *            the required length is returned in this variable.
198   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
199   * type LCHAR*
200    */
201   ESR_ReturnCode(*getParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value, size_t* len);
202   /**
203    * Return copy of size_t recognition parameter.
204    *
205    * @param self SR_Recognizer handle
206    * @param key Parameter name
207    * @param value [out] Used to hold the parameter value
208   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
209   * type size_t
210    */
211   ESR_ReturnCode(*getSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t* value);
212   /**
213    * Return copy of BOOL recognition parameter.
214    *
215    * @param self SR_Recognizer handle
216    * @param key Parameter name
217    * @param value [out] Used to hold the parameter value
218   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
219   * type bool
220    */
221   ESR_ReturnCode(*getBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL* value);
222   /**
223    * Sets recognition parameters.
224    *
225    * Key:             Description of associated value
226    *
227    * VoiceEnrollment       If "true", the next recognition will produce data required
228    *                              for Nametag support (i.e. Aurora bitstream).
229    *
230    * @param self SR_Recognizer handle
231    * @param key Parameter name
232    * @param value Parameter value
233   * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
234    */
235   ESR_ReturnCode(*setParameter)(struct SR_Recognizer_t* self, const LCHAR* key, LCHAR* value);
236   /**
237    * Sets recognition parameters.
238    *
239    * @param self SR_Recognizer handle
240    * @param key Parameter name
241    * @param value Parameter value
242   * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
243    */
244   ESR_ReturnCode(*setSize_tParameter)(struct SR_Recognizer_t* self, const LCHAR* key, size_t value);
245   /**
246    * Sets recognition parameters.
247    *
248    * @param self SR_Recognizer handle
249    * @param key Parameter name
250    * @param value Parameter value
251   * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
252    */
253   ESR_ReturnCode(*setBoolParameter)(struct SR_Recognizer_t* self, const LCHAR* key, ESR_BOOL value);
254 
255   /**
256    * Recognizer may be set up with multiple Grammars and multiple rules. All grammars
257    * must be unsetup before the recognizer can be destroy.
258    * A pre-compiled Grammar should have undergone a model consistency check with the
259    * recognizer prior to this call.
260    *
261    * @param self SR_Recognizer handle
262    * @param grammar Grammar containing rule
263    * @param ruleName Name of rule to associate with recognizer
264    * @see SR_GrammarCheckModelConsistency
265    * @return ESR_INVALID_ARGUMENT if self is null
266    */
267   ESR_ReturnCode (*setupRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar, const LCHAR* ruleName);
268   /**
269    * Indicates if Recognizer is configured with any rules within the specified Grammar.
270    *
271    * @param self SR_Recognizer handle
272    * @param hasSetupRules True if the Recognizer is configured for the Grammar
273   * @return ESR_INVALID_ARGUMENT if self is null
274    */
275   ESR_ReturnCode(*hasSetupRules)(struct SR_Recognizer_t* self, ESR_BOOL* hasSetupRules);
276   /**
277    * Activates rule in recognizer.
278    *
279    * @param self SR_Recognizer handle
280    * @param grammar Grammar containing rule
281    * @param ruleName Name of rule
282    * @param weight Relative weight to assign to self grammar vs. other activated grammars.
283    *               Values: Integers 0-2^31.
284   * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer,
285   * or if the rule could not be setup, or if the acoustic models could not be setup;
286   * ESR_BUFFER_OVERFLOW if ruleName is too long
287    */
288   ESR_ReturnCode (*activateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
289                                 const LCHAR* ruleName, unsigned int weight);
290   /**
291    * Deactivates rule in recognizer.
292    *
293    * @param self SR_Recognizer handle
294    * @param grammar Grammar containing rule
295    * @param ruleName Name of root rule
296    * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated
297    */
298   ESR_ReturnCode (*deactivateRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
299                                   const LCHAR* ruleName);
300 
301   /**
302    * Deactivates all grammar rules in recognizer.
303    *
304    * @param self SR_Recognizer handle
305   * @return ESR_INVALID_ARGUMENT if self is null
306    */
307   ESR_ReturnCode(*deactivateAllRules)(struct SR_Recognizer_t* self);
308 
309   /**
310    * Indicates if rule is active in recognizer.
311    *
312    * @param self SR_Recognizer handle
313    * @param grammar Grammar containing rule
314    * @param ruleName Name of rule
315    * @param isActiveRule True if rule is active
316   * @return ESR_INVALID_ARGUMENT if self is null
317    */
318   ESR_ReturnCode (*isActiveRule)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
319                                 const LCHAR* ruleName, ESR_BOOL* isActiveRule);
320    /**
321    * Configures the grammar for maximum amount of word addition
322    *
323    * @param self SR_Recognizer handle
324    * @param grammar Grammar whose ceiling to be set
325    * @return ESR_INVALID_ARGUMENT if self or grammar are null
326    */
327   ESR_ReturnCode (*setWordAdditionCeiling)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar );
328   /**
329    * Ensure the model usage in a pre-compiled grammar is consistent with the models
330    * that are associated with the Recognizer. You must first have called Recognizer_Setup().
331    *
332    * @param self SR_Recognizer handle
333    * @param grammar Grammar to check against
334    * @param isConsistent True if rule is consistent
335   * @return ESR_INVALID_ARGUMENT if self is null
336    */
337   ESR_ReturnCode (*checkGrammarConsistency)(struct SR_Recognizer_t* self, struct SR_Grammar_t* grammar,
338       ESR_BOOL* isConsistent);
339 
340  /**
341    * Ensure the model usage in a pre-compiled grammar is consistent with the models
342    * that are associated with the Recognizer. You must first have called Recognizer_Setup().
343    *
344    * @param self SR_Recognizer handle
345    * @param grammar Grammar to check against
346    * @param isConsistent True if rule is consistent
347   * @return ESR_INVALID_ARGUMENT if self is null
348    */
349   ESR_ReturnCode (*getModels)(struct SR_Recognizer_t* self, SR_AcousticModels** pmodels);
350 
351   /**
352    * Get audio into the recognizer.
353    *
354    * We decouple the Audio and frontend processing from the Recognizer processing via an
355    * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least
356    * as fast as real time so that voicing events are not unduly delayed. The audio buffer size
357    * must be at least one frame buffer's worth and some reasonable maximum size for synchronous
358    * behaviour. This function may be called independently of Recognizer_Advance.
359    *
360    * @param self SR_Recognizer handle
361    * @param buffer Buffer containing audio data
362    * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow,
363    *                            ESR_BUFFER_OVERFLOW is returned and this value holds the actual
364    *                            amount of samples that were pushed.
365    * @param isLast Indicates if the audio frame is the last one in this recognition
366   * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't
367   * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is
368   * full
369    */
370   ESR_ReturnCode (*putAudio)(struct SR_Recognizer_t* self, asr_int16_t* buffer, size_t* bufferSize,
371                             ESR_BOOL isLast);
372   /**
373    * Advance the recognizer by at least one utterance frame. The number of frames advanced
374    * depends on the underlying definition. We anticipate that the recognizer will keep up with
375    * the supplied audio buffers when waiting for voicing. After this point, the number of frames
376    * may be one (for our default frame-advance mode) or it may be more if the synchronous nature
377    * of this operation is not considered a problem. The recognizer may be advanced independently
378    * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data.
379    * A stop condition could be an appropriate consequence.
380    *
381    * @param self Recognizer handle
382    * @param status Resulting recognizer status
383    * @param type Resulting recognition result type
384    * @param result Resulting recognizer result
385   * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs
386    */
387   ESR_ReturnCode(*advance)(struct SR_Recognizer_t* self, SR_RecognizerStatus* status,
388                            SR_RecognizerResultType* type, SR_RecognizerResult** result);
389 
390 
391   /**
392    * Loads utterance from file.
393    *
394    * @param self SR_Recognizer handle
395    * @param filename File to read from
396   * @return ESR_INVALID_ARGUMENT if self is null
397    */
398   ESR_ReturnCode(*loadUtterance)(struct SR_Recognizer_t* self, const LCHAR* filename);
399   /**
400    * Loads utterance from WAVE file.
401    *
402    * @param self SR_Recognizer handle
403    * @param filename WAVE file to read from
404   * @return ESR_INVALID_ARGUMENT if self is null
405    */
406   ESR_ReturnCode(*loadWaveFile)(struct SR_Recognizer_t* self, const LCHAR* filename);
407 
408   /**
409    * Log recognizer-related event token.
410    *
411    * @param self SR_Recognizer handle
412    * @param event Token name
413    * @param value Value to be logged
414    * @return ESR_INVALID_ARGUMENT if self is null
415    */
416   ESR_ReturnCode(*logToken)(struct SR_Recognizer_t* self, const LCHAR* token, const LCHAR* value);
417 
418   /**
419    * Log recognizer-related event token integer.
420    *
421    * @param self SR_Recognizer handle
422    * @param event Token name
423    * @param value Value to be logged
424    * @return ESR_INVALID_ARGUMENT if self is null
425    */
426   ESR_ReturnCode(*logTokenInt)(struct SR_Recognizer_t* self, const LCHAR* token, int value);
427 
428   /**
429    * Log recognizer-related event and dump all previously accumulated tokens since last event to
430    * log.
431    *
432    * @param self SR_Recognizer handle
433    * @param event Event name
434    * @return ESR_INVALID_ARGUMENT if self is null
435    */
436   ESR_ReturnCode(*logEvent)(struct SR_Recognizer_t* self, const LCHAR* event);
437 
438   /**
439    * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions)
440    * and it is up to the application to decided when the session ends and a new one begins (e.g.
441    * timeout, number of recognitions, etc.)
442    *
443    * @param self SR_Recognizer handle
444    * @param sessionName Session name
445    * @return ESR_INVALID_ARGUMENT if self is null
446    */
447   ESR_ReturnCode(*logSessionStart)(struct SR_Recognizer_t* self, const LCHAR* sessionName);
448 
449   /**
450    * Log the end of a log session.
451    *
452    * @param self SR_Recognizer handle
453    * @return ESR_INVALID_ARGUMENT if self is null
454    */
455   ESR_ReturnCode(*logSessionEnd)(struct SR_Recognizer_t* self);
456 
457   /**
458    * Log data about a waveform obtained from a TCP file. This function is not called
459    * when doing live recognition.
460    *
461    * @param self SR_Recognizer handle
462    * @param waveformFilename Session name
463    * @param transcription Transcription for the utterance
464    * @param bos Beginning of speech (seconds)
465    * @param eos End of speech (seconds)
466    * @param isInvocab True if the transcription is accepted by the grammar, False otherwise
467    * @return ESR_INVALID_ARGUMENT if self is null
468    */
469   ESR_ReturnCode(*logWaveformData)(struct SR_Recognizer_t* self,
470                                    const LCHAR* waveformFilename,
471                                    const LCHAR* transcription,
472                                    const double bos,
473                                    const double eos,
474                                    ESR_BOOL isInvocab);
475 
476   /**
477    * Associates a locking function with the recognizer. This function is used to
478    * protect internal data from multithreaded access.
479    *
480    * @param self SR_Recognizer handle
481    * @param function Locking function
482    * @param data Function data
483    * @return ESR_INVALID_ARGUMENT if self is null
484    */
485   ESR_ReturnCode(*setLockFunction)(struct SR_Recognizer_t *self, SR_RecognizerLockFunction function, void* data);
486   /**
487    * Indicates if signal is getting clipped.
488    *
489    * @param self SR_Recognizer handle
490    * @param isClipping [out] Result value
491    * @return ESR_INVALID_ARGUMENT if self is null
492    */
493   ESR_ReturnCode(*isSignalClipping)(struct SR_Recognizer_t* self, ESR_BOOL* isClipping);
494   /**
495    * Indicates if signal has a DC-offset component.
496    *
497    * @param self SR_Recognizer handle
498    * @param isDCOffset [out] Result value
499    * @return ESR_INVALID_ARGUMENT if self is null
500    */
501   ESR_ReturnCode(*isSignalDCOffset)(struct SR_Recognizer_t* self, ESR_BOOL* isDCOffset);
502   /**
503    * Indicates if signal is noisy.
504    *
505    * @param self SR_Recognizer handle
506    * @param isNoisy [out] Result value
507    * @return ESR_INVALID_ARGUMENT if self is null
508    */
509   ESR_ReturnCode(*isSignalNoisy)(struct SR_Recognizer_t* self, ESR_BOOL* isNoisy);
510   /**
511    * Indicates if speech contained within the signal is too quiet.
512    *
513    * @param self SR_Recognizer handle
514    * @param isTooQuiet [out] Result value
515    * @return ESR_INVALID_ARGUMENT if self is null
516    */
517   ESR_ReturnCode(*isSignalTooQuiet)(struct SR_Recognizer_t* self, ESR_BOOL* isTooQuiet);
518   /**
519    * Indicates if there are too few samples in the signal for a proper recognition.
520    *
521    * @param self SR_Recognizer handle
522    * @param isTooFewSamples [out] Result value
523    * @return ESR_INVALID_ARGUMENT if self is null
524    */
525   ESR_ReturnCode(*isSignalTooFewSamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooFewSamples);
526   /**
527    * Indicates if there are too many samples in the signal for a proper recognition.
528    *
529    * @param self SR_Recognizer handle
530    * @param isTooManySamples [out] Result value
531    * @return ESR_INVALID_ARGUMENT if self is null
532    */
533   ESR_ReturnCode(*isSignalTooManySamples)(struct SR_Recognizer_t* self, ESR_BOOL* isTooManySamples);
534 }
535 SR_Recognizer;
536 
537 /**
538  * Starts recognition.
539  *
540  * @param self SR_Recognizer handle
541  * @return ESR_INVALID_ARGUMENT if self is null, if no acoustic models have been associated with the recognizer,
542  * if no grammars have been activated, or if the recognizer cannot be started for an unknown reason
543  */
544 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStart(SR_Recognizer* self);
545 /**
546  * Stops the recognizer and invalidates the recognition result object.
547  * Calling this function before the recognizer receives the last frame causes the recognition
548  * to abort.
549  *
550  * @param self SR_Recognizer handle
551  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
552  */
553 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerStop(SR_Recognizer* self);
554 
555 /**
556  * @name Recognizer Setup operations
557  *
558  * @{
559  */
560 
561 /**
562  * Create a new recognizer.
563  *
564  * @param self SR_Recognizer handle
565  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY if system is out of memory;
566  * ESR_INVALID_STATE if an internal error occurs
567  */
568 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self);
569 /**
570  * Destroy a recognizer.
571  *
572  * @param self SR_Recognizer handle
573  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if an internal error has occured
574  */
575 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDestroy(SR_Recognizer* self);
576 /**
577  * Associates a set of models with the recognizer. All grammars must use models consistently.
578  *
579  * @param self SR_Recognizer handle
580  * @see SR_RecognizerCheckGrammarConsistency
581  * @return ESR_INVALID_ARGUMENT if self is null
582  */
583 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetup(SR_Recognizer* self);
584 /**
585  * Unconfigures recognizer.
586  *
587  * @param self SR_Recognizer handle
588  * @return ESR_INVALID_ARGUMENT if self is null
589  */
590 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerUnsetup(SR_Recognizer* self);
591 /**
592  * Indicates whether recognizer is configured for use.
593  *
594  * @param self SR_Recognizer handle
595  * @param isSetup True if recognizer is configured
596  * @return ESR_INVALID_ARGUMENT if self is null
597  */
598 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSetup(SR_Recognizer* self, ESR_BOOL* isSetup);
599 
600 /**
601  * @}
602  *
603  * @name Recognizer parameter operations
604  *
605  * @{
606  */
607 
608 /**
609  * Returns copy of LCHAR recognition parameter.
610  *
611  * @param self SR_Recognizer handle
612  * @param key Parameter name
613  * @param value [out] Used to hold the parameter value
614  * @param len [in/out] Length of value argument. If the return code is ESR_BUFFER_OVERFLOW,
615  *            the required length is returned in this variable.
616  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
617  * type LCHAR*
618  */
619 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value, size_t* len);
620 /**
621  * Return copy of size_t recognition parameter.
622  *
623  * @param self SR_Recognizer handle
624  * @param key Parameter name
625  * @param value Used to hold the parameter value
626  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
627  * type size_t
628  */
629 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t* value);
630 /**
631  * Return copy of BOOL recognition parameter.
632  *
633  * @param self SR_Recognizer handle
634  * @param key Parameter name
635  * @param value Used to hold the parameter value
636  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_RESULT_TYPE if the specified property is not of
637  * type bool
638  */
639 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerGetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value);
640 /**
641  * Sets LCHAR* recognition parameters.
642  *
643  * Key:             Description of associated value
644  *
645  * VoiceEnrollment       If "true", the next recognition will produce data required
646  *                              for Nametag support (i.e. Aurora bitstream).
647  *
648  * @param self SR_Recognizer handle
649  * @param key Parameter name
650  * @param value Parameter value
651  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
652  */
653 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetParameter(SR_Recognizer* self, const LCHAR* key, LCHAR* value);
654 /**
655  * Sets size_t recognition parameter.
656  *
657  * @param self SR_Recognizer handle
658  * @param key Parameter name
659  * @param value Parameter value
660  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
661  */
662 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetSize_tParameter(SR_Recognizer* self, const LCHAR* key, size_t value);
663 /**
664  * Sets BOOL recognition parameter.
665  *
666  * @param self SR_Recognizer handle
667  * @param key Parameter name
668  * @param value Parameter value
669  * @return ESR_INVALID_ARGUMENT if self is null; ESR_OUT_OF_MEMORY is system is out of memory
670  */
671 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetBoolParameter(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value);
672 
673 /**
674  * @}
675  *
676  * @name Recognizer rule Setup/Activation operations
677  *
678  * @{
679  */
680 
681 /**
682  * Recognizer may be set up with multiple Grammars and multiple rules. All grammars
683  * must be unsetup before the recognizer can be destroyed.
684  * A pre-compiled Grammar should have undergone a model consistency check with the
685  * recognizer prior to this call.
686  *
687  * @param self SR_Recognizer handle
688  * @param grammar Grammar containing rule
689  * @param ruleName Name of rule to associate with recognizer
690  * @see SR_GrammarCheckModelConsistency
691  * @return ESR_INVALID_ARGUMENT if self is null
692  */
693 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetupRule(SR_Recognizer* self,
694                                                           struct SR_Grammar_t* grammar,
695     const LCHAR* ruleName);
696 /**
697  * Indicates if Recognizer is configured with any rules within the specified Grammar.
698  *
699  * @param self SR_Recognizer handle
700  * @param hasSetupRules True if the Recognizer is configured for the Grammar
701  * @return ESR_INVALID_ARGUMENT if self is null
702  */
703 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerHasSetupRules(SR_Recognizer* self,
704     ESR_BOOL* hasSetupRules);
705 /**
706  * Activates rule in recognizer.
707  *
708  * @param self SR_Recognizer handle
709  * @param grammar Grammar containing rule
710  * @param ruleName Name of rule
711  * @param weight Relative weight to assign to self grammar vs. other activated grammars.
712  *               Values: Integers 0-2^31.
713  * @return ESR_INVALID_ARGUMENT if self is null; ESR_INVALID_STATE if no models are associated with the recognizer,
714  * or if the rule could not be setup, or if the acoustic models could not be setup;
715  * ESR_BUFFER_OVERFLOW if ruleName is too long
716  */
717 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerActivateRule(SR_Recognizer* self,
718                                                              struct SR_Grammar_t* grammar,
719     const LCHAR* ruleName,
720     unsigned int weight);
721 /**
722  * Deactivates rule in recognizer.
723  *
724  * @param self SR_Recognizer handle
725  * @param grammar Grammar containing rule
726  * @param ruleName Name of rule
727  * @return ESR_INVALID_ARGUMENT if self is null; ESR_NO_MATCH_ERROR if grammar is not activated
728  */
729 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateRule(SR_Recognizer* self,
730                                                                struct SR_Grammar_t* grammar,
731     const LCHAR* ruleName);
732 
733 /**
734  * Deactivates all grammar rule in recognizer.
735  *
736  * @param self SR_Recognizer handle
737  * @return ESR_INVALID_ARGUMENT if self is null
738  */
739 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerDeactivateAllRules(SR_Recognizer* self);
740 
741 /**
742  * Indicates if rule is active in recognizer.
743  *
744  * @param self SR_Recognizer handle
745  * @param grammar Grammar containing rule
746  * @param ruleName Name of rule
747  * @param isActiveRule True if rule is active
748  * @return ESR_INVALID_ARGUMENT if self is null
749  */
750 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsActiveRule(SR_Recognizer* self,
751                                                              struct SR_Grammar_t* grammar,
752     const LCHAR* ruleName,
753     ESR_BOOL* isActiveRule);
754 /**
755  * Ensure the model usage in a pre-compiled grammar is consistent with the models
756  * that are associated with the Recognizer. You must first have called Recognizer_Setup().
757  *
758  * @param self SR_Recognizer handle
759  * @param grammar Grammar to check against
760  * @param isConsistent True if rule is consistent
761  * @return ESR_INVALID_ARGUMENT if self is null
762  */
763 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerCheckGrammarConsistency(SR_Recognizer* self,
764                                                                         struct SR_Grammar_t* grammar,
765     ESR_BOOL* isConsistent);
766 /**
767  * @}
768  *
769  * @name Recognizer Advance operations
770  *
771  * @{
772  */
773 
774 /**
775  * Get audio into the recognizer.
776  *
777  * We decouple the Audio and frontend processing from the Recognizer processing via an
778  * internal FIFO frame buffer (aka utterance buffer). This ensures that this call is at least
779  * as fast as real time so that voicing events are not unduly delayed. The audio buffer size
780  * must be at least one frame buffer's worth and some reasonable maximum size for synchronous
781  * behaviour. This function may be called independently of Recognizer_Advance.
782  *
783  * @param self SR_Recognizer handle
784  * @param buffer Buffer containing audio data
785  * @param bufferSize [in/out] Size of buffer in samples. In case of a buffer overflow,
786  *                            ESR_BUFFER_OVERFLOW is returned and this value holds the actual
787  *                            amount of samples that were pushed.
788  * @param isLast Indicates if the audio frame is the last one in this recognition
789  * @return ESR_INVALID_ARGUMENT if self, buffer, or bufferSize are null; ESR_INVALID_STATE if the recognizer isn't
790  * started, or the recognizer has already received the last frame; ESR_BUFFER_OVERFLOW if the recognizer buffer is
791  * full
792  */
793 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerPutAudio(SR_Recognizer* self, asr_int16_t* buffer,
794     size_t* bufferSize, ESR_BOOL isLast);
795 /**
796  * Advance the recognizer by at least one utterance frame. The number of frames advanced
797  * depends on the underlying definition. We anticipate that the recognizer will keep up with
798  * the supplied audio buffers when waiting for voicing. After this point, the number of frames
799  * may be one (for our default frame-advance mode) or it may be more if the synchronous nature
800  * of this operation is not considered a problem. The recognizer may be advanced independently
801  * of the Recognizer_PutAudio call. It is permissible to advance when there is no further data.
802  * A stop condition could be an appropriate consequence.
803  *
804  * @param self Recognizer handle
805  * @param status Resulting recognizer status
806  * @param type Resulting recognition result type
807  * @param result Resulting recognizer result
808  * @return ESR_INVALID_ARGUMENT if self, status, or type are null; ESR_INVALID_STATE if an internal error occurs
809  */
810 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerAdvance(SR_Recognizer* self,
811     SR_RecognizerStatus* status,
812     SR_RecognizerResultType* type,
813     SR_RecognizerResult** result);
814 /**
815  * @}
816  */
817 
818 /**
819  * Log recognizer-related event token.
820  *
821  * @param self SR_Recognizer handle
822  * @param token Token name
823  * @param value Value to be logged
824  * @return ESR_INVALID_ARGUMENT if self is null
825  */
826 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogToken(SR_Recognizer* self, const LCHAR* token, const LCHAR* value);
827 
828 /**
829  * Log recognizer-related event token integer.
830  *
831  * @param self SR_Recognizer handle
832  * @param token Token name
833  * @param value Value to be logged
834  * @return ESR_INVALID_ARGUMENT if self is null
835  */
836 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogTokenInt(SR_Recognizer* self, const LCHAR* token, int value);
837 
838 /**
839  * Log recognizer-related event and dump all previously accumulated tokens since last event to
840  * log.
841  *
842  * @param self SR_Recognizer handle
843  * @param event Event name
844  * @return ESR_INVALID_ARGUMENT if self is null
845  */
846 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogEvent(SR_Recognizer* self, const LCHAR* event);
847 
848 /**
849  * Log the beginning of a new log session. A log session contains zero or more recognitions (transactions)
850  * and it is up to the application to decided when the session ends and a new one begins (e.g.
851  * timeout, number of recognitions, etc.)
852  *
853  * @param self SR_Recognizer handle
854  * @param sessionName Session name
855  * @return ESR_INVALID_ARGUMENT if self is null
856  */
857 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionStart(SR_Recognizer* self, const LCHAR* sessionName);
858 
859 /**
860  * Log the end of a log session.
861  *
862  * @param self SR_Recognizer handle
863  * @return ESR_INVALID_ARGUMENT if self is null
864  */
865 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogSessionEnd(SR_Recognizer* self);
866 
867 /**
868  * Log data about a waveform obtained from a TCP file. This function is not called
869  * when doing live recognition.
870  *
871  * @param self SR_Recognizer handle
872  * @param waveformFilename Session name
873  * @param transcription Transcription for the utterance
874  * @param bos Beginning of speech (seconds)
875  * @param eos End of speech (seconds)
876  * @param isInvocab True if the transcription is accepted by the grammar, False otherwise
877  * @return ESR_INVALID_ARGUMENT if self is null
878  */
879 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLogWaveformData(SR_Recognizer* self,
880     const LCHAR* waveformFilename,
881     const LCHAR* transcription,
882     const double bos,
883     const double eos,
884     ESR_BOOL isInvocab);
885 
886 
887 /**
888  * Loads utterance from file.
889  *
890  * @param self SR_Recognizer handle
891  * @param filename File to read from
892  * @return ESR_INVALID_ARGUMENT if self is null
893  */
894 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadUtterance(SR_Recognizer* self, const LCHAR* filename);
895 /**
896  * Loads utterance from WAVE file.
897  *
898  * @param self SR_Recognizer handle
899  * @param filename WAVE file to read from
900  * @return ESR_INVALID_ARGUMENT if self is null
901  */
902 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerLoadWaveFile(SR_Recognizer* self, const LCHAR* filename);
903 
904 /**
905  * Associates a locking function with the recognizer. This function is used to
906  * protect internal data from multithreaded access.
907  *
908  * @param self SR_Recognizer handle
909  * @param function Locking function
910  * @param data Function data
911  * @return ESR_INVALID_ARGUMENT if self is null
912  */
913 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerSetLockFunction(SR_Recognizer* self,
914     SR_RecognizerLockFunction function,
915     void* data);
916 
917 /**
918  *
919  * @name Signal quality metrics
920  *
921  * @{
922  */
923 
924 /**
925  * Indicates if signal is getting clipped.
926  *
927  * @param self SR_Recognizer handle
928  * @param isClipping [out] Result value
929  * @return ESR_INVALID_ARGUMENT if self is null
930  */
931 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalClipping(SR_Recognizer* self, ESR_BOOL* isClipping);
932 /**
933  * Indicates if signal has a DC-offset component.
934  *
935  * @param self SR_Recognizer handle
936  * @param isDCOffset [out] Result value
937  * @return ESR_INVALID_ARGUMENT if self is null
938  */
939 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalDCOffset(SR_Recognizer* self, ESR_BOOL* isDCOffset);
940 /**
941  * Indicates if signal is noisy.
942  *
943  * @param self SR_Recognizer handle
944  * @param isNoisy [out] Result value
945  * @return ESR_INVALID_ARGUMENT if self is null
946  */
947 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalNoisy(SR_Recognizer* self, ESR_BOOL* isNoisy);
948 /**
949  * Indicates if speech contained within the signal is too quiet.
950  *
951  * @param self SR_Recognizer handle
952  * @param isTooQuiet [out] Result value
953  * @return ESR_INVALID_ARGUMENT if self is null
954  */
955 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooQuiet(SR_Recognizer* self, ESR_BOOL* isTooQuiet);
956 /**
957  * Indicates if there are too few samples in the signal for a proper recognition.
958  *
959  * @param self SR_Recognizer handle
960  * @param isTooFewSamples [out] Result value
961  * @return ESR_INVALID_ARGUMENT if self is null
962  */
963 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooFewSamples(SR_Recognizer* self, ESR_BOOL* isTooFewSamples);
964 /**
965  * Indicates if there are too many samples in the signal for a proper recognition.
966  *
967  * @param self SR_Recognizer handle
968  * @param isTooManySamples [out] Result value
969  * @return ESR_INVALID_ARGUMENT if self is null
970  */
971 SREC_RECOGNIZER_API ESR_ReturnCode SR_RecognizerIsSignalTooManySamples(SR_Recognizer* self, ESR_BOOL* isTooManySamples);
972 
973 /**
974  * Changes the sample rate of audio.
975  *
976  * @param self SR_Recognizer handle
977  * @param new_sample_rate [in] New Sample Rate
978  * @return ESR_ReturnCode if self is null
979  */
980 SREC_RECOGNIZER_API ESR_ReturnCode SR_Recognizer_Change_Sample_Rate ( SR_Recognizer *self, size_t new_sample_rate );
981 
982 /**
983  * @}
984  */
985 
986 /**
987  * @}
988  */
989 
990 
991 #endif /* __SR_RECOGNIZER_H */
992