• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  RecognizerImpl.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 #include "ESR_Session.h"
22 #include "ESR_SessionTypeImpl.h"
23 #include "IntArrayList.h"
24 #include "LCHAR.h"
25 #include "passert.h"
26 #include "plog.h"
27 #include "pstdio.h"
28 #include "pmemory.h"
29 #include "ptimestamp.h"
30 #include "SR_AcousticModelsImpl.h"
31 #include "SR_AcousticStateImpl.h"
32 #include "SR_GrammarImpl.h"
33 #include "SR_SemprocDefinitions.h"
34 #include "SR_SemanticResult.h"
35 #include "SR_SemanticResultImpl.h"
36 #include "SR_Recognizer.h"
37 #include "SR_RecognizerImpl.h"
38 #include "SR_RecognizerResultImpl.h"
39 #include "SR_SemanticResultImpl.h"
40 #include "SR_EventLog.h"
41 #include "srec.h"
42 
43 #define MTAG NULL
44 #define FILTER_NBEST_BY_SEM_RESULT 1
45 #define AUDIO_CIRC_BUFFER_SIZE 20000
46 #define SEMPROC_ACTIVE 1
47 #define SAMPLE_SIZE (16 / CHAR_BIT) /* 16-bits / sample */
48 
49 /* milliseconds per FRAME = 1/FRAMERATE * 1000 */
50 /* We multiple by 2 because we skip even frames */
51 #define MSEC_PER_FRAME (2000/FRAMERATE)
52 #define MAX_ENTRY_LENGTH 512
53 #define PREFIX_WORD     "-pau-"
54 #define PREFIX_WORD_LEN 5
55 #define SUFFIX_WORD     "-pau2-"
56 #define SUFFIX_WORD_LEN 6
57 
58 
59 static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl );
60 
61 /**
62  * Initializes recognizer properties to default values.
63  *
64  * Replaces setup_recognition_parameters()
65  */
SR_RecognizerToSessionImpl()66 ESR_ReturnCode SR_RecognizerToSessionImpl()
67 {
68   ESR_ReturnCode rc;
69 
70   /* Old comment: remember to keep "ca_rip.h" up to date with these parameters... */
71 
72   /* CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_acoustic_models", 2)); */
73   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Recognizer.partial_results", ESR_FALSE));
74   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.NBest", 1));
75   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.eou_threshold", 100));
76   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_altword_tokens", 400));
77   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_frames", 1000));
78   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_arcs", 3000));
79   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_nodes", 3000));
80   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsmnode_tokens", 1000));
81   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_hmm_tokens", 1000));
82   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_model_states", 1000));
83   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_searches", 2));
84   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_word_tokens", 1000));
85   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.non_terminal_timeout", 50));
86   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.num_wordends_per_frame", 10));
87   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.often", 10));
88   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.optional_terminal_timeout", 30));
89   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.reject", 500));
90   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.terminal_timeout", 10));
91   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.viterbi_prune_thresh", 5000));
92   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.wordpen", 0));
93 
94   CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("SREC.Recognizer.utterance_timeout", 400));
95 
96   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("enableGetWaveform", ESR_FALSE));
97 
98   return ESR_SUCCESS;
99 CLEANUP:
100   return rc;
101 }
102 
103 /**
104  * Initializes frontend properties to default values.
105  *
106  * Replaces load_up_parameter_list()
107  */
SR_RecognizerFrontendToSessionImpl()108 ESR_ReturnCode SR_RecognizerFrontendToSessionImpl()
109 {
110   IntArrayList* intList = NULL;
111   ESR_ReturnCode rc;
112   ESR_BOOL exists;
113   size_t i;
114 
115   /* Old comment: Remember to keep "ca_pip.h" up to date with these parameters... */
116 
117   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.mel_dim", 12));
118   CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("CREC.Frontend.samplerate", 8000));
119   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.premel", 0.98f));
120   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lowcut", 260));  /* Hz */
121   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.highcut", 4000)); /* Hz */
122   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.window_factor", 2.0)); /* times the frame size */
123   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_skip_even_frames", ESR_FALSE)); /* 10/20 ms rate */
124   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.offset", 0)); /* additional */
125   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.ddmel", ESR_FALSE)); /* delta-delta mel pars */
126   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.forgetfactor", 40));
127   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.sv6_margin", 10));
128   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rasta", ESR_FALSE));
129   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rastac0", ESR_FALSE));
130   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.spectral_subtraction", ESR_FALSE));
131   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.spec_sub_dur", 0));
132   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.spec_sub_scale", 1.0));
133   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_dump", ESR_FALSE)); /* Output is filterbank (30 floats) */
134   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_input", ESR_FALSE)); /* Input is filterbank (30 floats) in place of audio samples */
135   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_smooth_c0", ESR_TRUE));
136   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.plp", ESR_FALSE)); /* Do PLP instead of MEL */
137   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lpcorder", 12)); /* order of lpc analysis in plp processing */
138   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.warp_scale", 1.0));
139   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.piecewise_start", 1.0));
140   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecayup", -1.0)); /* If +ve, decay factor on peakpicker (low to high) */
141   CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecaydown", -1.0)); /* If +ve, decay factor on peakpicker (high to low) */
142   CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.cuberoot", ESR_FALSE)); /* Use cube root instead of log */
143 
144   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_offset", &exists));
145   if (!exists)
146   {
147     CHKLOG(rc, IntArrayListCreate(&intList));
148     for (i = 0; i < 32; ++i)
149       CHKLOG(rc, IntArrayListAdd(intList, 0));
150     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_offset", intList, TYPES_INTARRAYLIST));
151     intList = NULL;
152   }
153 
154   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_loop", &exists));
155   if (!exists)
156   {
157     CHKLOG(rc, IntArrayListCreate(&intList));
158     for (i = 0; i < 32; ++i)
159       CHKLOG(rc, IntArrayListAdd(intList, 1));
160     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_loop", intList, TYPES_INTARRAYLIST));
161     intList = NULL;
162   }
163 
164   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melA", &exists));
165   if (!exists)
166   {
167     CHKLOG(rc, IntArrayListCreate(&intList));
168     CHKLOG(rc, IntArrayListAdd(intList, (int) 13.2911));
169     CHKLOG(rc, IntArrayListAdd(intList, (int) 47.2229));
170     CHKLOG(rc, IntArrayListAdd(intList, (int) 79.2485));
171     CHKLOG(rc, IntArrayListAdd(intList, (int) 92.1967));
172     CHKLOG(rc, IntArrayListAdd(intList, (int) 136.3855));
173     CHKLOG(rc, IntArrayListAdd(intList, (int) 152.2896));
174     CHKLOG(rc, IntArrayListAdd(intList, (int) 183.3601));
175     CHKLOG(rc, IntArrayListAdd(intList, (int) 197.4200));
176     CHKLOG(rc, IntArrayListAdd(intList, (int) 217.8278));
177     CHKLOG(rc, IntArrayListAdd(intList, (int) 225.6556));
178     CHKLOG(rc, IntArrayListAdd(intList, (int) 263.3073));
179     CHKLOG(rc, IntArrayListAdd(intList, (int) 277.193));
180     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melA", intList, TYPES_INTARRAYLIST));
181     intList = NULL;
182   }
183 
184   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melB", &exists));
185   if (!exists)
186   {
187     CHKLOG(rc, IntArrayListCreate(&intList));
188     CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0847));
189     CHKLOG(rc, IntArrayListAdd(intList, (int) 91.3289));
190     CHKLOG(rc, IntArrayListAdd(intList, (int) 113.9995));
191     CHKLOG(rc, IntArrayListAdd(intList, (int) 123.0336));
192     CHKLOG(rc, IntArrayListAdd(intList, (int) 131.2704));
193     CHKLOG(rc, IntArrayListAdd(intList, (int) 128.9942));
194     CHKLOG(rc, IntArrayListAdd(intList, (int) 120.5267));
195     CHKLOG(rc, IntArrayListAdd(intList, (int) 132.0079));
196     CHKLOG(rc, IntArrayListAdd(intList, (int) 129.8076));
197     CHKLOG(rc, IntArrayListAdd(intList, (int) 126.5029));
198     CHKLOG(rc, IntArrayListAdd(intList, (int) 121.8519));
199     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melB", intList, TYPES_INTARRAYLIST));
200     intList = NULL;
201   }
202 
203   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelA", &exists));
204   if (!exists)
205   {
206     CHKLOG(rc, IntArrayListCreate(&intList));
207     CHKLOG(rc, IntArrayListAdd(intList, (int) 91.6305));
208     CHKLOG(rc, IntArrayListAdd(intList, (int) 358.3790));
209     CHKLOG(rc, IntArrayListAdd(intList, (int) 527.5946));
210     CHKLOG(rc, IntArrayListAdd(intList, (int) 536.3163));
211     CHKLOG(rc, IntArrayListAdd(intList, (int) 731.2385));
212     CHKLOG(rc, IntArrayListAdd(intList, (int) 757.8382));
213     CHKLOG(rc, IntArrayListAdd(intList, (int) 939.4460));
214     CHKLOG(rc, IntArrayListAdd(intList, (int) 1028.4136));
215     CHKLOG(rc, IntArrayListAdd(intList, (int) 1071.3193));
216     CHKLOG(rc, IntArrayListAdd(intList, (int) 1183.7922));
217     CHKLOG(rc, IntArrayListAdd(intList, (int) 1303.1014));
218     CHKLOG(rc, IntArrayListAdd(intList, (int) 1447.7766));
219     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelA", intList, TYPES_INTARRAYLIST));
220     intList = NULL;
221   }
222 
223   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelB", &exists));
224   if (!exists)
225   {
226     CHKLOG(rc, IntArrayListCreate(&intList));
227     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4785));
228     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3878));
229     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4029));
230     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3182));
231     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3706));
232     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5394));
233     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5150));
234     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4270));
235     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4871));
236     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4088));
237     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4361));
238     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5449));
239     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelB", intList, TYPES_INTARRAYLIST));
240     intList = NULL;
241   }
242 
243   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelA", &exists));
244   if (!exists)
245   {
246     CHKLOG(rc, IntArrayListCreate(&intList));
247     CHKLOG(rc, IntArrayListAdd(intList, (int) 10.7381));
248     CHKLOG(rc, IntArrayListAdd(intList, (int) 32.6775));
249     CHKLOG(rc, IntArrayListAdd(intList, (int) 46.2301));
250     CHKLOG(rc, IntArrayListAdd(intList, (int) 51.5438));
251     CHKLOG(rc, IntArrayListAdd(intList, (int) 57.6636));
252     CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0581));
253     CHKLOG(rc, IntArrayListAdd(intList, (int) 65.3696));
254     CHKLOG(rc, IntArrayListAdd(intList, (int) 70.1910));
255     CHKLOG(rc, IntArrayListAdd(intList, (int) 71.6751));
256     CHKLOG(rc, IntArrayListAdd(intList, (int) 78.2364));
257     CHKLOG(rc, IntArrayListAdd(intList, (int) 83.2440));
258     CHKLOG(rc, IntArrayListAdd(intList, (int) 89.6261));
259     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelA", intList, TYPES_INTARRAYLIST));
260     intList = NULL;
261   }
262 
263   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelB", &exists));
264   if (!exists)
265   {
266     CHKLOG(rc, IntArrayListCreate(&intList));
267     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5274));
268     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5098));
269     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5333));
270     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5963));
271     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5132));
272     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5282));
273     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5530));
274     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5682));
275     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4662));
276     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4342));
277     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5235));
278     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4061));
279     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelB", intList, TYPES_INTARRAYLIST));
280     intList = NULL;
281   }
282 
283   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaA", &exists));
284   if (!exists)
285   {
286     CHKLOG(rc, IntArrayListCreate(&intList));
287     CHKLOG(rc, IntArrayListAdd(intList, (int) 7.80));
288     CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0));
289     CHKLOG(rc, IntArrayListAdd(intList, (int) 54.0));
290     CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0));
291     CHKLOG(rc, IntArrayListAdd(intList, (int) 84.0));
292     CHKLOG(rc, IntArrayListAdd(intList, (int) 86.5));
293     CHKLOG(rc, IntArrayListAdd(intList, (int) 98.1));
294     CHKLOG(rc, IntArrayListAdd(intList, (int) 127.0));
295     CHKLOG(rc, IntArrayListAdd(intList, (int) 153.0));
296     CHKLOG(rc, IntArrayListAdd(intList, (int) 160.0));
297     CHKLOG(rc, IntArrayListAdd(intList, (int) 188.0));
298     CHKLOG(rc, IntArrayListAdd(intList, (int) 199.0));
299     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaA", intList, TYPES_INTARRAYLIST));
300     intList = NULL;
301   }
302 
303   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaB", &exists));
304   if (!exists)
305   {
306     CHKLOG(rc, IntArrayListCreate(&intList));
307     CHKLOG(rc, IntArrayListAdd(intList, 117));
308     CHKLOG(rc, IntArrayListAdd(intList, 121));
309     CHKLOG(rc, IntArrayListAdd(intList, 114));
310     CHKLOG(rc, IntArrayListAdd(intList, 111));
311     CHKLOG(rc, IntArrayListAdd(intList, 113));
312     CHKLOG(rc, IntArrayListAdd(intList, 126));
313     CHKLOG(rc, IntArrayListAdd(intList, 134));
314     CHKLOG(rc, IntArrayListAdd(intList, 130));
315     CHKLOG(rc, IntArrayListAdd(intList, 135));
316     CHKLOG(rc, IntArrayListAdd(intList, 129));
317     CHKLOG(rc, IntArrayListAdd(intList, 139));
318     CHKLOG(rc, IntArrayListAdd(intList, 138));
319     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaB", intList, TYPES_INTARRAYLIST));
320     intList = NULL;
321   }
322 
323   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_detect", 18));
324   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_above", 18));
325   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.ambient_within", 12));
326   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.start_windback", 50));
327   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.utterance_allowance", 40));
328   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_duration", 6));
329   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.quiet_duration", 20));
330 
331   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_clip", 32767));
332   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_clip", -32768));
333   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_per10000_clip", 10));
334   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_dc_offset", 1000));
335   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_noise_level_bit", 11));
336   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_speech_level_bit", 11));
337   CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.min_samples", 10000));
338 
339   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_freq", &exists));
340   if (!exists)
341   {
342     CHKLOG(rc, IntArrayListCreate(&intList));
343     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_freq", intList, TYPES_INTARRAYLIST));
344     intList = NULL;
345   }
346   CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_spread", &exists));
347   if (!exists)
348   {
349     CHKLOG(rc, IntArrayListCreate(&intList));
350     CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_spread", intList, TYPES_INTARRAYLIST));
351     intList = NULL;
352   }
353   return ESR_SUCCESS;
354 CLEANUP:
355   if (intList != NULL)
356     intList->destroy(intList);
357   return rc;
358 }
359 
360 /**
361  * Generate legacy frontend parameter structure from ESR_Session.
362  *
363  * @param impl SR_RecognizerImpl handle
364  * @param params Resulting structure
365  */
SR_RecognizerGetFrontendLegacyParametersImpl(CA_FrontendInputParams * params)366 ESR_ReturnCode SR_RecognizerGetFrontendLegacyParametersImpl(CA_FrontendInputParams* params)
367 {
368   ESR_ReturnCode rc;
369   IntArrayList* intList;
370   size_t size, i, size_tValue;
371   int iValue;
372 
373   passert(params != NULL);
374   params->is_loaded = ESR_FALSE;
375   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.mel_dim", &params->mel_dim));
376   CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &size_tValue));
377   params->samplerate = (int) size_tValue;
378   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.premel", &params->pre_mel));
379   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lowcut", &params->low_cut));
380   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.highcut", &params->high_cut));
381   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.window_factor", &params->window_factor));
382   CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_skip_even_frames", &params->do_skip_even_frames));
383   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.offset", &params->offset));
384   CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.ddmel", &params->do_dd_mel));
385   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.forgetfactor", &params->forget_factor));
386   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.sv6_margin", &params->sv6_margin));
387   CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.rastac0", &params->do_rastac0));
388   CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.spectral_subtraction", &params->do_spectral_sub));
389   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.spec_sub_dur", &params->spectral_sub_frame_dur));
390   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.spec_sub_scale", &params->spec_sub_scale));
391   CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_dump", &params->do_filterbank_input));
392   CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_input", &params->do_filterbank_input));
393   CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_smooth_c0", &params->do_smooth_c0));
394   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lpcorder", &params->lpc_order));
395   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.warp_scale", &params->warp_scale));
396   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.piecewise_start", &params->piecewise_start));
397   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecayup", &params->peakpickup));
398   CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecaydown", &params->peakpickdown));
399 
400   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_offset", (void **)&intList, TYPES_INTARRAYLIST));
401   if (intList == NULL)
402   {
403     PLogError(L("ESR_INVALID_STATE"));
404     return ESR_INVALID_STATE;
405   }
406   CHKLOG(rc, IntArrayListGetSize(intList, &size));
407   for (i = 0; i < size; ++i)
408     CHKLOG(rc, IntArrayListGet(intList, i, &params->mel_offset[i]));
409 
410   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_loop", (void **)&intList, TYPES_INTARRAYLIST));
411   if (intList == NULL)
412   {
413     PLogError(L("ESR_INVALID_STATE"));
414     return ESR_INVALID_STATE;
415   }
416   CHKLOG(rc, IntArrayListGetSize(intList, &size));
417   for (i = 0; i < size; ++i)
418     CHKLOG(rc, IntArrayListGet(intList, i, &params->mel_loop[i]));
419 
420   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melA", (void **)&intList, TYPES_INTARRAYLIST));
421   CHKLOG(rc, IntArrayListGetSize(intList, &size));
422   for (i = 0; i < size; ++i)
423     CHKLOG(rc, IntArrayListGet(intList, i, &params->melA_scale[i]));
424 
425   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melB", (void **)&intList, TYPES_INTARRAYLIST));
426   CHKLOG(rc, IntArrayListGetSize(intList, &size));
427   for (i = 0; i < size; ++i)
428     CHKLOG(rc, IntArrayListGet(intList, i, &params->melB_scale[i]));
429 
430   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelA", (void **)&intList, TYPES_INTARRAYLIST));
431   CHKLOG(rc, IntArrayListGetSize(intList, &size));
432   for (i = 0; i < size; ++i)
433     CHKLOG(rc, IntArrayListGet(intList, i, &params->dmelA_scale[i]));
434 
435   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelB", (void **)&intList, TYPES_INTARRAYLIST));
436   CHKLOG(rc, IntArrayListGetSize(intList, &size));
437   for (i = 0; i < size; ++i)
438     CHKLOG(rc, IntArrayListGet(intList, i, &params->dmelB_scale[i]));
439 
440   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelA", (void **)&intList, TYPES_INTARRAYLIST));
441   CHKLOG(rc, IntArrayListGetSize(intList, &size));
442   for (i = 0; i < size; ++i)
443     CHKLOG(rc, IntArrayListGet(intList, i, &params->ddmelA_scale[i]));
444 
445   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelB", (void **)&intList, TYPES_INTARRAYLIST));
446   CHKLOG(rc, IntArrayListGetSize(intList, &size));
447   for (i = 0; i < size; ++i)
448     CHKLOG(rc, IntArrayListGet(intList, i, &params->ddmelB_scale[i]));
449 
450   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaA", (void **)&intList, TYPES_INTARRAYLIST));
451   CHKLOG(rc, IntArrayListGetSize(intList, &size));
452   for (i = 0; i < size; ++i)
453     CHKLOG(rc, IntArrayListGet(intList, i, &params->rastaA_scale[i]));
454 
455   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaB", (void **)&intList, TYPES_INTARRAYLIST));
456   CHKLOG(rc, IntArrayListGetSize(intList, &size));
457   for (i = 0; i < size; ++i)
458     CHKLOG(rc, IntArrayListGet(intList, i, &params->rastaB_scale[i]));
459 
460   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_detect", &params->voice_margin));
461   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_above", &params->fast_voice_margin));
462   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.ambient_within", &params->tracker_margin));
463   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.start_windback", &params->start_windback));
464   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.utterance_allowance", &params->unsure_duration));
465   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_duration", &params->voice_duration));
466   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.quiet_duration", &params->quiet_duration));
467 
468   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_clip", &params->high_clip));
469   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_clip", &params->low_clip));
470   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_per10000_clip", &params->max_per10000_clip));
471   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_dc_offset", &params->max_dc_offset));
472   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_noise_level_bit", &params->high_noise_level_bit));
473   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_speech_level_bit", &params->low_speech_level_bit));
474   CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.min_samples", &params->min_samples));
475 
476   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_freq", (void **)&intList, TYPES_INTARRAYLIST));
477   if (intList == NULL)
478   {
479     PLogError(L("ESR_INVALID_STATE"));
480     return ESR_INVALID_STATE;
481   }
482   CHKLOG(rc, IntArrayListGetSize(intList, &size));
483   for (i = 0; i < size; ++i)
484   {
485     CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
486     params->spectrum_filter_freq[i] = iValue;
487   }
488 
489   CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_spread", (void **)&intList, TYPES_INTARRAYLIST));
490   if (intList == NULL)
491   {
492     PLogError(L("ESR_INVALID_STATE"));
493     return ESR_INVALID_STATE;
494   }
495   CHKLOG(rc, IntArrayListGetSize(intList, &size));
496   for (i = 0; i < size; ++i)
497   {
498     CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
499     params->spectrum_filter_spread[i] = iValue;
500   }
501   params->is_loaded = ESR_TRUE;
502   return ESR_SUCCESS;
503 CLEANUP:
504   return rc;
505 }
506 
507 /**
508  * Creates frontend components of SR_Recognizer.
509  *
510  * @param impl SR_RecognizerImpl handle
511  */
SR_RecognizerCreateFrontendImpl(SR_RecognizerImpl * impl)512 ESR_ReturnCode SR_RecognizerCreateFrontendImpl(SR_RecognizerImpl* impl)
513 {
514   ESR_ReturnCode rc;
515   CA_FrontendInputParams* frontendParams;
516 
517   /* Create a frontend object */
518   impl->frontend = CA_AllocateFrontend(1, 0, 1);
519   frontendParams = CA_AllocateFrontendParameters();
520   CHKLOG(rc, SR_RecognizerGetFrontendLegacyParametersImpl(frontendParams));
521 
522   CA_ConfigureFrontend(impl->frontend, frontendParams);
523 
524   /* Create a wave object */
525   impl->wavein = CA_AllocateWave('N');
526   if (impl->wavein == NULL)
527   {
528     rc = ESR_OUT_OF_MEMORY;
529     PLogError(ESR_rc2str(rc));
530     goto CLEANUP;
531   }
532   CA_ConfigureWave(impl->wavein, impl->frontend);
533   CA_ConfigureVoicingAnalysis(impl->wavein, frontendParams);
534 
535   CA_LoadCMSParameters(impl->wavein, NULL, frontendParams);
536 
537   /* Create an utterance object */
538   impl->utterance = CA_AllocateUtterance();
539   if (impl->utterance == NULL)
540   {
541     rc = ESR_OUT_OF_MEMORY;
542     PLogError(ESR_rc2str(rc));
543     goto CLEANUP;
544   }
545   CA_InitUtteranceForFrontend(impl->utterance, frontendParams);
546   CA_AttachCMStoUtterance(impl->wavein, impl->utterance);
547   CA_FreeFrontendParameters(frontendParams);
548   return ESR_SUCCESS;
549 
550 CLEANUP:
551   if (impl->frontend != NULL)
552   {
553     CA_UnconfigureFrontend(impl->frontend);
554     CA_FreeFrontend(impl->frontend);
555     impl->frontend = NULL;
556   }
557   if (impl->wavein != NULL)
558   {
559     CA_UnconfigureWave(impl->wavein);
560     CA_FreeWave(impl->wavein);
561     impl->wavein = NULL;
562   }
563   if (impl->utterance != NULL)
564   {
565     CA_ClearUtterance(impl->utterance);
566     CA_FreeUtterance(impl->utterance);
567     impl->utterance = NULL;
568   }
569   if (frontendParams != NULL)
570     CA_FreeFrontendParameters(frontendParams);
571   return rc;
572 }
573 
574 /**
575  * Populates legacy recognizer parameters from the session.
576  *
577  * Replaces setup_pattern_parameters()
578  */
SR_AcousticModels_LoadLegacyRecognizerParameters(CA_RecInputParams * params)579 ESR_ReturnCode SR_AcousticModels_LoadLegacyRecognizerParameters(CA_RecInputParams* params)
580 {
581   ESR_ReturnCode rc;
582 
583   passert(params != NULL);
584   params->is_loaded = ESR_FALSE;
585   CHKLOG(rc, ESR_SessionGetBool("CREC.Recognizer.partial_results", &params->do_partial));
586   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.NBest", &params->top_choices));
587   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.eou_threshold", &params->eou_threshold));
588   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_altword_tokens", &params->max_altword_tokens));
589   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_frames", &params->max_frames));
590   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_arcs", &params->max_fsm_arcs));
591   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_nodes", &params->max_fsm_nodes));
592   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsmnode_tokens", &params->max_fsmnode_tokens));
593   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_hmm_tokens", &params->max_hmm_tokens));
594   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_model_states", &params->max_model_states));
595   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_searches", &params->max_searches));
596   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_word_tokens", &params->max_word_tokens));
597   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.non_terminal_timeout", &params->non_terminal_timeout));
598   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.num_wordends_per_frame", &params->num_wordends_per_frame));
599   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.often", &params->traceback_freq));
600   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.optional_terminal_timeout", &params->optional_terminal_timeout));
601   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.reject", &params->reject_score));
602   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.terminal_timeout", &params->terminal_timeout));
603   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.viterbi_prune_thresh", &params->viterbi_prune_thresh));
604   CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.wordpen", &params->word_penalty));
605   params->is_loaded = ESR_TRUE;
606 
607   return ESR_SUCCESS;
608 CLEANUP:
609   return rc;
610 }
611 
SR_RecognizerCreate(SR_Recognizer ** self)612 ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self)
613 {
614   SR_RecognizerImpl* impl;
615   CA_RecInputParams* recogParams = NULL;
616   ESR_ReturnCode rc;
617   LCHAR recHandle[12];
618 
619   if (self == NULL)
620   {
621     PLogError(L("ESR_INVALID_ARGUMENT"));
622     return ESR_INVALID_ARGUMENT;
623   }
624   impl = NEW(SR_RecognizerImpl, MTAG);
625   if (impl == NULL)
626   {
627     PLogError(L("ESR_OUT_OF_MEMORY"));
628     return ESR_OUT_OF_MEMORY;
629   }
630 
631   impl->Interface.start = &SR_RecognizerStartImpl;
632   impl->Interface.stop = &SR_RecognizerStopImpl;
633   impl->Interface.destroy = &SR_RecognizerDestroyImpl;
634   impl->Interface.setup = &SR_RecognizerSetupImpl;
635   impl->Interface.unsetup = &SR_RecognizerUnsetupImpl;
636   impl->Interface.isSetup = &SR_RecognizerIsSetupImpl;
637   impl->Interface.getParameter = &SR_RecognizerGetParameterImpl;
638   impl->Interface.getSize_tParameter = &SR_RecognizerGetSize_tParameterImpl;
639   impl->Interface.getBoolParameter = &SR_RecognizerGetBoolParameterImpl;
640   impl->Interface.setParameter = &SR_RecognizerSetParameterImpl;
641   impl->Interface.setSize_tParameter = &SR_RecognizerSetSize_tParameterImpl;
642   impl->Interface.setBoolParameter = &SR_RecognizerSetBoolParameterImpl;
643   impl->Interface.setLockFunction = &SR_RecognizerSetLockFunctionImpl;
644   impl->Interface.hasSetupRules = &SR_RecognizerHasSetupRulesImpl;
645   impl->Interface.activateRule = &SR_RecognizerActivateRuleImpl;
646   impl->Interface.deactivateRule = &SR_RecognizerDeactivateRuleImpl;
647   impl->Interface.deactivateAllRules = &SR_RecognizerDeactivateAllRulesImpl;
648   impl->Interface.isActiveRule = &SR_RecognizerIsActiveRuleImpl;
649   impl->Interface.setWordAdditionCeiling = &SR_RecognizerSetWordAdditionCeilingImpl;
650   impl->Interface.checkGrammarConsistency = &SR_RecognizerCheckGrammarConsistencyImpl;
651   impl->Interface.getModels = &SR_RecognizerGetModelsImpl;
652   impl->Interface.putAudio = &SR_RecognizerPutAudioImpl;
653   impl->Interface.advance = &SR_RecognizerAdvanceImpl;
654   impl->Interface.loadUtterance = &SR_RecognizerLoadUtteranceImpl;
655   impl->Interface.loadWaveFile = &SR_RecognizerLoadWaveFileImpl;
656   impl->Interface.logEvent = &SR_RecognizerLogEventImpl;
657   impl->Interface.logToken = &SR_RecognizerLogTokenImpl;
658   impl->Interface.logTokenInt = &SR_RecognizerLogTokenIntImpl;
659   impl->Interface.logSessionStart = &SR_RecognizerLogSessionStartImpl;
660   impl->Interface.logSessionEnd = &SR_RecognizerLogSessionEndImpl;
661   impl->Interface.logWaveformData = &SR_RecognizerLogWaveformDataImpl;
662   impl->Interface.isSignalClipping = &SR_RecognizerIsSignalClippingImpl;
663   impl->Interface.isSignalDCOffset = &SR_RecognizerIsSignalDCOffsetImpl;
664   impl->Interface.isSignalNoisy = &SR_RecognizerIsSignalNoisyImpl;
665   impl->Interface.isSignalTooFewSamples = &SR_RecognizerIsSignalTooFewSamplesImpl;
666   impl->Interface.isSignalTooManySamples = &SR_RecognizerIsSignalTooManySamplesImpl;
667   impl->Interface.isSignalTooQuiet = &SR_RecognizerIsSignalTooQuietImpl;
668 
669   impl->frontend = NULL;
670   impl->wavein = NULL;
671   impl->utterance = NULL;
672   impl->confidenceScorer = NULL;
673   impl->recognizer = NULL;
674   impl->models = NULL;
675   impl->grammars = NULL;
676   impl->result = NULL;
677   impl->parameters = NULL;
678   impl->acousticState = NULL;
679   impl->audioBuffer = NULL;
680   impl->buffer = NULL;
681   impl->frames = impl->processed;
682   impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
683   impl->isStarted = ESR_FALSE;
684   impl->isRecognizing = ESR_FALSE;
685   impl->gotLastFrame = ESR_FALSE;
686   impl->sampleRate = 0;
687   impl->lockFunction = NULL;
688   impl->lockData = NULL;
689   impl->eventLog = NULL;
690   impl->osi_log_level = 0;
691   impl->waveformBuffer = NULL;
692   impl->isSignalQualityInitialized = ESR_FALSE;
693   impl->beginningOfSpeechOffset = 0;
694   impl->gatedMode = ESR_TRUE;
695   impl->bgsniff = 0;
696   impl->isSignalClipping       = ESR_FALSE;
697   impl->isSignalDCOffset       = ESR_FALSE;
698   impl->isSignalNoisy          = ESR_FALSE;
699   impl->isSignalTooFewSamples  = ESR_FALSE;
700   impl->isSignalTooManySamples = ESR_FALSE;
701   impl->isSignalTooQuiet       = ESR_FALSE;
702 
703   CHKLOG(rc, ESR_SessionTypeCreate(&impl->parameters));
704   CHKLOG(rc, SR_RecognizerToSessionImpl());
705   CHKLOG(rc, ESR_SessionGetSize_t(L("SREC.Recognizer.osi_log_level"), &impl->osi_log_level));
706 
707   /* create the event log */
708   if (impl->osi_log_level) /* do some logging if non-zero val */
709     CHKLOG(rc, ESR_SessionGetProperty(L("eventlog"), (void **)&impl->eventLog, TYPES_SR_EVENTLOG));
710 
711   /* Record the OSI log event */
712   psprintf(recHandle, L("%p"), impl);
713   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
714   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrst")));
715 
716   CHKLOG(rc, SR_RecognizerFrontendToSessionImpl());
717   CHKLOG(rc, SR_RecognizerCreateFrontendImpl(impl));
718   rc = ESR_SessionGetProperty("recognizer.confidenceScorer", (void **)&impl->confidenceScorer, TYPES_CONFIDENCESCORER);
719   if (rc == ESR_NO_MATCH_ERROR)
720   {
721     impl->confidenceScorer = CA_AllocateConfidenceScorer();
722 
723     if (!CA_LoadConfidenceScorer(impl->confidenceScorer)) {
724       rc = ESR_INVALID_STATE;
725       PLogError(ESR_rc2str(rc));
726       goto CLEANUP;
727     }
728     CHKLOG(rc, ESR_SessionSetProperty("recognizer.confidenceScorer", impl->confidenceScorer, TYPES_CONFIDENCESCORER));
729   }
730   else if (rc != ESR_SUCCESS)
731   {
732     PLogError(ESR_rc2str(rc));
733     goto CLEANUP;
734   }
735 
736   recogParams = CA_AllocateRecognitionParameters();
737   if (recogParams == NULL)
738   {
739     rc = ESR_OUT_OF_MEMORY;
740     PLogError(ESR_rc2str(rc));
741     goto CLEANUP;
742   }
743   CHKLOG(rc, SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams));
744   impl->recognizer = CA_AllocateRecognition();
745   if (impl->recognizer == NULL)
746   {
747     PLogError(ESR_rc2str(rc));
748     goto CLEANUP;
749   }
750   CA_ConfigureRecognition(impl->recognizer, recogParams);
751   CA_FreeRecognitionParameters(recogParams);
752   CHKLOG(rc, HashMapCreate(&impl->grammars));
753   CHKLOG(rc, CircularBufferCreate(sizeof(asr_int16_t) * AUDIO_CIRC_BUFFER_SIZE, MTAG, &impl->buffer));
754   CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &impl->sampleRate));
755 
756   impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
757 
758   if ((impl->audioBuffer = MALLOC(impl->FRAME_SIZE, MTAG)) == NULL)
759   {
760     rc = ESR_OUT_OF_MEMORY;
761     goto CLEANUP;
762   }
763 
764   /* create the waveform buffer */
765   CHKLOG(rc, WaveformBuffer_Create(&impl->waveformBuffer, impl->FRAME_SIZE));
766 
767   CHKLOG(rc, ESR_SessionGetSize_t("SREC.Recognizer.utterance_timeout", &impl->utterance_timeout));
768 
769   /* OSI logging (SUCCESS) */
770   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
771   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
772   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd")));
773 
774   CHKLOG(rc, SR_AcousticStateCreateImpl(&impl->Interface));
775 
776   CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.bgsniff"), &impl->bgsniff));
777   /* gated mode == beginning of speech detection */
778   CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &impl->gatedMode));
779 
780   *self = (SR_Recognizer*) impl;
781   return ESR_SUCCESS;
782 CLEANUP:
783   /* OSI logging (FAILURE) */
784   if (impl->eventLog != NULL)
785   {
786     SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle);
787     SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("FAILURE"), ESR_rc2str(rc));
788     SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd"));
789   }
790 
791   if (recogParams != NULL)
792     CA_FreeRecognitionParameters(recogParams);
793   impl->Interface.destroy(&impl->Interface);
794   return rc;
795 }
796 
SR_RecognizerDestroyImpl(SR_Recognizer * self)797 ESR_ReturnCode SR_RecognizerDestroyImpl(SR_Recognizer* self)
798 {
799   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
800   ESR_BOOL exists; // isSetup;
801   ESR_ReturnCode rc;
802   LCHAR recHandle[12];
803 
804   if (impl->result != NULL)
805   {
806     SR_RecognizerResult_Destroy(impl->result);
807     impl->result = NULL;
808   }
809 
810   if (impl->eventLog != NULL)
811   {
812     /* Record the OSI log event */
813     psprintf(recHandle, L("%p"), impl);
814     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
815     CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesst")));
816   }
817 
818   /* Clean session */
819   CHKLOG(rc, ESR_SessionContains("recognizer.confidenceScorer", &exists));
820   if (exists)
821     CHKLOG(rc, ESR_SessionRemoveProperty("recognizer.confidenceScorer"));
822 
823   if (impl->confidenceScorer != NULL)
824   {
825     CA_FreeConfidenceScorer(impl->confidenceScorer);
826     impl->confidenceScorer = NULL;
827   }
828 
829   /* Clear CMS, CRS_RecognizerClose() */
830   if (impl->wavein != NULL)
831   {
832     ESR_BOOL isAttached, isConfigured;
833 
834     CHKLOG(rc, CA_IsCMSAttachedtoUtterance(impl->wavein, &isAttached));
835     if (isAttached)
836       CA_DetachCMSfromUtterance(impl->wavein, impl->utterance);
837 
838     CHKLOG(rc, CA_IsConfiguredForAgc(impl->wavein, &isConfigured));
839     if (isConfigured)
840       CA_ClearCMSParameters(impl->wavein);
841   }
842 
843   /* Free Utterance */
844   if (impl->utterance != NULL)
845   {
846     CA_ClearUtterance(impl->utterance);
847     CA_FreeUtterance(impl->utterance);
848     impl->utterance = NULL;
849   }
850 
851   /* Free WaveformBuffer */
852   if (impl->waveformBuffer != NULL)
853   {
854     WaveformBuffer_Destroy(impl->waveformBuffer);
855     impl->waveformBuffer = NULL;
856   }
857 
858   /* Free recognizer */
859 /*  CHKLOG(rc, self->isSetup(self, &isSetup));
860   if (isSetup)
861     CHKLOG(rc, self->unsetup(self));*/
862   if (impl->grammars != NULL)
863     CHKLOG(rc, self->deactivateAllRules(self));
864   if (impl->recognizer != NULL)
865   {
866     CA_UnloadRecognitionModels(impl->recognizer);
867     CA_UnconfigureRecognition(impl->recognizer);
868     CA_FreeRecognition(impl->recognizer);
869     impl->recognizer = NULL;
870   }
871 
872   if (impl->grammars != NULL)
873   {
874     CHKLOG(rc, HashMapDestroy(impl->grammars));
875     impl->grammars = NULL;
876   }
877 
878   if (impl->buffer != NULL)
879   {
880     FREE(impl->buffer);
881     impl->buffer = NULL;
882   }
883 
884   if (impl->audioBuffer != NULL)
885   {
886     FREE(impl->audioBuffer);
887     impl->audioBuffer = NULL;
888   }
889 
890   /* Free frontend */
891   if (impl->frontend)
892   {
893     CA_UnconfigureFrontend(impl->frontend);
894     CA_FreeFrontend(impl->frontend);
895     impl->frontend = NULL;
896   }
897 
898   /* Free wave */
899   if (impl->wavein)
900   {
901     CA_UnconfigureWave(impl->wavein);
902     CA_FreeWave(impl->wavein);
903     impl->wavein = NULL;
904   }
905 
906   if (impl->parameters != NULL)
907     CHKLOG(rc, impl->parameters->destroy(impl->parameters));
908 
909   if (impl->eventLog != NULL)
910   {
911     /* OSI logging (SUCCESS) */
912     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
913     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
914     CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesnd")));
915     impl->eventLog = NULL;
916   }
917 
918   if (impl->acousticState != NULL)
919   {
920     impl->acousticState->destroy(self);
921     impl->acousticState = NULL;
922   }
923   FREE(impl);
924   return ESR_SUCCESS;
925 CLEANUP:
926   return rc;
927 }
928 
beginRecognizing(SR_RecognizerImpl * impl)929 ESR_ReturnCode beginRecognizing(SR_RecognizerImpl* impl)
930 {
931   CA_RecInputParams* recogParams;
932   LCHAR tok[80];
933   LCHAR* val;
934   PTimeStamp BORT;
935   size_t i, grammarSize;
936   ESR_ReturnCode rc;
937 
938   /* Setup recognizer for new utterance */
939   recogParams = CA_AllocateRecognitionParameters();
940   if (recogParams == NULL)
941   {
942     rc = ESR_OUT_OF_MEMORY;
943     PLogError(ESR_rc2str(rc));
944     goto CLEANUP;
945   }
946   SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams);
947   CA_BeginRecognition(impl->recognizer, NULL, 1, recogParams);
948   CA_FreeRecognitionParameters(recogParams);
949   impl->isRecognizing = ESR_TRUE;
950 
951   /* OSI log the  grammars */
952   CHKLOG(rc, HashMapGetSize(impl->grammars, &grammarSize));
953   for (i = 0; i < grammarSize; ++i)
954   {
955     psprintf(tok, L("GURI%d"), i);
956     /* use the key as the grammar URI */
957     CHKLOG(rc, HashMapGetKeyAtIndex(impl->grammars, i, &val));
958     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, tok, val));
959   }
960   /* OSI ACST acoustic state reset */
961   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("ACST"), 0));
962   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("LANG"), L("en-us")));
963 
964   /* OSI log the start of recognition */
965   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcst")));
966 
967   /* save the BORT timing (begin of recog) */
968   PTimeStampSet(&BORT);
969   impl->recogLogTimings.BORT = PTimeStampDiff(&BORT, &impl->timestamp);
970 
971   return ESR_SUCCESS;
972 CLEANUP:
973   if (recogParams != NULL)
974     CA_FreeRecognitionParameters(recogParams);
975   return rc;
976 }
977 
SR_RecognizerStartImpl(SR_Recognizer * self)978 ESR_ReturnCode SR_RecognizerStartImpl(SR_Recognizer* self)
979 {
980   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
981   size_t silence_duration_in_frames;
982   size_t end_of_utterance_hold_off_in_frames;
983   size_t grammarCount;
984   ESR_ReturnCode rc;
985   ESR_BOOL enableGetWaveform = ESR_FALSE;
986 
987   CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarCount));
988   if (impl->models == NULL)
989   {
990     PLogError("ESR_INVALID_STATE: No rule has been set up");
991     return ESR_INVALID_STATE;
992   }
993   if (grammarCount < 1)
994   {
995     PLogError("ESR_INVALID_STATE: No rule has been activated");
996     return ESR_INVALID_STATE;
997   }
998 
999   if (!CA_OpenWaveFromDevice(impl->wavein, DEVICE_RAW_PCM, impl->frontend->samplerate, 0, WAVE_DEVICE_RAW))
1000   {
1001     rc = ESR_INVALID_STATE;
1002     PLogError(ESR_rc2str(rc));
1003     goto CLEANUP;
1004   }
1005 
1006   /* Setup utterance */
1007   CA_UnlockUtteranceForInput(impl->utterance);
1008 
1009   /* Setup utterance */
1010   CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.silence_duration_in_frames"), &silence_duration_in_frames));
1011   CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.end_of_utterance_hold_off_in_frames"), &end_of_utterance_hold_off_in_frames));
1012   CA_SetEndOfUtteranceByLevelTimeout(impl->utterance, silence_duration_in_frames, end_of_utterance_hold_off_in_frames);
1013 
1014   CA_ResetVoicing(impl->utterance);
1015 
1016   /*
1017    * NOTE: We don't actually begin the recognizer here, the beginning of speech
1018    * detector will do that.
1019    */
1020 
1021   impl->gotLastFrame = ESR_FALSE;
1022   impl->isStarted = ESR_TRUE;
1023   impl->isRecognizing = ESR_FALSE;
1024   impl->isSignalQualityInitialized = ESR_FALSE;
1025   impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
1026   PTimeStampSet(&impl->timestamp);
1027 
1028   /* reset waveform buffer at start of every recognition */
1029   CHKLOG(rc, WaveformBuffer_Reset(impl->waveformBuffer));
1030 
1031   /* is waveform buffering active? */
1032   rc = ESR_SessionGetBool(L("enableGetWaveform"), &enableGetWaveform);
1033   // rc = impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
1034   if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR)
1035   {
1036     PLogError(L("%s: could determine whether VoiceEnrollment active or not"), ESR_rc2str(rc));
1037     goto CLEANUP;
1038   }
1039   if (enableGetWaveform)
1040     CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_CIRCULAR));
1041   else
1042     CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_OFF));
1043 
1044   /* I am going to try to open the audio waveform file here */
1045   if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1046   {
1047     /* open a new audio waveform file */
1048     rc = SR_EventLogAudioOpen(impl->eventLog, L("audio/L16"), impl->sampleRate, SAMPLE_SIZE);
1049     if (rc != ESR_SUCCESS)
1050     {
1051       PLogError(L("%s: could not open the RIFF audio file"), ESR_rc2str(rc));
1052       goto CLEANUP;
1053     }
1054   }
1055   impl->frames = impl->processed = 0;
1056   return ESR_SUCCESS;
1057 CLEANUP:
1058 /*  self->stop(self);*/
1059   return rc;
1060 }
1061 
SR_RecognizerStopImpl(SR_Recognizer * self)1062 ESR_ReturnCode SR_RecognizerStopImpl(SR_Recognizer* self)
1063 {
1064   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1065   SR_AcousticModelsImpl* modelsImpl;
1066   ESR_ReturnCode rc;
1067 
1068   PLOG_DBG_API_ENTER();
1069   if (!impl->isStarted)
1070   {
1071     /* In case the user calls stop() twice */
1072     return ESR_SUCCESS;
1073   }
1074   modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1075 
1076   /* Clean-up recognizer and utterance */
1077   switch (impl->internalState)
1078   {
1079     case SR_RECOGNIZER_INTERNAL_BEGIN:
1080       /* Recognizer was never started */
1081       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BEGIN")));
1082       CA_LockUtteranceFromInput(impl->utterance);
1083       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1084       if (impl->eventLog != NULL)
1085       {
1086         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BEGIN -> SR_RECOGNIZER_INTERNAL_END")));
1087         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1088         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1089         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1090       }
1091       break;
1092 
1093     case SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT:
1094       /* Recognizer was never started */
1095       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_TIMEOUT")));
1096       CA_LockUtteranceFromInput(impl->utterance);
1097       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1098       if (impl->eventLog != NULL)
1099       {
1100         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT -> SR_RECOGNIZER_INTERNAL_END")));
1101         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1102         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1103         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1104       }
1105       break;
1106 
1107     case SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH:
1108       /* Recognizer was never started */
1109       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_NO_MATCH")));
1110       CA_LockUtteranceFromInput(impl->utterance);
1111       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1112       if (impl->eventLog != NULL)
1113       {
1114         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH -> SR_RECOGNIZER_INTERNAL_END")));
1115         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1116         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1117         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1118       }
1119       break;
1120 
1121     case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
1122       /* Recognizer was never started */
1123       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_DETECTION")));
1124       CA_LockUtteranceFromInput(impl->utterance);
1125       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1126       if (impl->eventLog != NULL)
1127       {
1128         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1129         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1130         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1131         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1132       }
1133       break;
1134 
1135     case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
1136       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS_DETECTION")));
1137       CA_LockUtteranceFromInput(impl->utterance);
1138       if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1139       {
1140         rc = ESR_INVALID_STATE;
1141         PLogError(ESR_rc2str(rc));
1142         goto CLEANUP;
1143       }
1144       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1145       if (impl->eventLog != NULL)
1146       {
1147         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1148         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1149         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1150         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1151       }
1152       break;
1153 
1154     case SR_RECOGNIZER_INTERNAL_EOI:
1155       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOI")));
1156       CA_LockUtteranceFromInput(impl->utterance);
1157       if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1158       {
1159         rc = ESR_INVALID_STATE;
1160         PLogError(ESR_rc2str(rc));
1161         goto CLEANUP;
1162       }
1163       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1164       if (impl->eventLog != NULL)
1165       {
1166         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOI -> SR_RECOGNIZER_INTERNAL_END")));
1167         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1168         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1169         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1170       }
1171       break;
1172 
1173     case SR_RECOGNIZER_INTERNAL_EOS:
1174       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS")));
1175       CA_LockUtteranceFromInput(impl->utterance);
1176       if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1177       {
1178         rc = ESR_INVALID_STATE;
1179         PLogError(ESR_rc2str(rc));
1180         goto CLEANUP;
1181       }
1182       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1183       if (impl->eventLog != NULL)
1184       {
1185         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS -> SR_RECOGNIZER_INTERNAL_END")));
1186         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1187         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1188         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1189       }
1190       break;
1191 
1192     case SR_RECOGNIZER_INTERNAL_END:
1193       /* Recognizer already shut down */
1194       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("END")));
1195       break;
1196 
1197     default:
1198       /* Shut down recognizer */
1199       CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), impl->internalState));
1200       if (impl->eventLog != NULL)
1201       {
1202         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("unknown state -> SR_RECOGNIZER_INTERNAL_END")));
1203         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1204         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1205         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1206       }
1207       CA_LockUtteranceFromInput(impl->utterance);
1208       if (impl->isRecognizing)
1209       {
1210         if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1211         {
1212           rc = ESR_INVALID_STATE;
1213           PLogError(ESR_rc2str(rc));
1214           goto CLEANUP;
1215         }
1216       }
1217       rc = ESR_INVALID_STATE;
1218       PLogError(L("%s: %d"), ESR_rc2str(rc), impl->internalState);
1219       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1220       goto CLEANUP;
1221   }
1222   if (impl->eventLog != NULL)
1223   {
1224     int n;
1225     LCHAR result[MAX_ENTRY_LENGTH];
1226     result[0] = L('\0');
1227 
1228     n = CA_GetUnprocessedFramesInUtterance(impl->utterance);
1229     CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CA_GetUnprocessedFramesInUtterance() (x10ms)"), n));
1230     CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
1231     CHKLOG(rc, SR_EventLogToken(impl->eventLog, L("CA_FullResultLabel() (x20ms)"), result));
1232     n = CircularBufferGetSize(impl->buffer);
1233     CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CircularBufferGetSize() (samples)"), n / SAMPLE_SIZE));
1234   }
1235   if (impl->lockFunction)
1236     impl->lockFunction(ESR_LOCK, impl->lockData);
1237   CircularBufferReset(impl->buffer);
1238   if (impl->lockFunction)
1239     impl->lockFunction(ESR_UNLOCK, impl->lockData);
1240   if (CA_RecognitionHasResults(impl->recognizer))
1241     CA_ClearResults(impl->recognizer);
1242   CA_FlushUtteranceFrames(impl->utterance);
1243   CA_CalculateCMSParameters(impl->wavein);
1244   CA_CloseDevice(impl->wavein);
1245 
1246   /* record the OSI event */
1247   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIstop")));
1248 
1249   if (impl->result != NULL)
1250   {
1251     CHKLOG(rc, SR_RecognizerResult_Destroy(impl->result));
1252     impl->result = NULL;
1253   }
1254 
1255   if (impl->lockFunction)
1256     impl->lockFunction(ESR_LOCK, impl->lockData);
1257   impl->gotLastFrame = ESR_TRUE;
1258   PLOG_DBG_TRACE((L("SR_Recognizer shutdown occured")));
1259   impl->isStarted = ESR_FALSE;
1260   impl->isRecognizing = ESR_FALSE;
1261   if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1262     SR_EventLogAudioClose(impl->eventLog);
1263 
1264   impl->recogLogTimings.BORT = 0;
1265   impl->recogLogTimings.DURS = 0;
1266   impl->recogLogTimings.EORT = 0;
1267   impl->recogLogTimings.EOSD = 0;
1268   impl->recogLogTimings.EOSS = 0;
1269   impl->recogLogTimings.BOSS = 0;
1270   impl->recogLogTimings.EOST = 0;
1271   impl->eos_reason = L("undefined");
1272 
1273   if (impl->lockFunction)
1274     impl->lockFunction(ESR_UNLOCK, impl->lockData);
1275   PLOG_DBG_API_EXIT(rc);
1276   return rc;
1277 CLEANUP:
1278   PLOG_DBG_API_EXIT(rc);
1279   return rc;
1280 }
1281 
SR_RecognizerSetupImpl(SR_Recognizer * self)1282 ESR_ReturnCode SR_RecognizerSetupImpl(SR_Recognizer* self)
1283 {
1284   ESR_ReturnCode rc;
1285   CA_AcoustInputParams* acousticParams = NULL;
1286   SR_AcousticModelsImpl* modelsImpl;
1287   SR_AcousticModels* models;
1288   SR_RecognizerImpl* recogImpl = NULL;
1289   CA_Acoustic* acoustic;
1290   size_t size, i;
1291   LCHAR           filenames[P_PATH_MAX];
1292   size_t          len;
1293 
1294   len = P_PATH_MAX;
1295   CHKLOG(rc, ESR_SessionGetLCHAR ( L("cmdline.modelfiles"), filenames, &len ));
1296 
1297   CHKLOG(rc, SR_AcousticModelsLoad ( filenames, &models ));
1298 
1299   if (models == NULL)
1300     {
1301       PLogError(L("ESR_INVALID_STATE while finding cmdline.modelfiles"));
1302       return ESR_INVALID_STATE;
1303     }
1304   modelsImpl = (SR_AcousticModelsImpl*) models;
1305   recogImpl = (SR_RecognizerImpl*) self;
1306   acousticParams = NULL;
1307 
1308   CHKLOG(rc, SR_AcousticModelsGetCount(models, &size));
1309   acousticParams = CA_AllocateAcousticParameters();
1310   if (acousticParams == NULL)
1311       {
1312       rc = ESR_OUT_OF_MEMORY;
1313       PLogError(ESR_rc2str(rc));
1314       goto CLEANUP;
1315       }
1316     CHKLOG(rc, modelsImpl->getLegacyParameters(acousticParams));
1317     CHKLOG(rc, ArrayListGetSize(modelsImpl->acoustic, &size));
1318     for (i = 0; i < size; ++i)
1319       {
1320       CHKLOG(rc, ArrayListGet(modelsImpl->acoustic, i, (void **)&acoustic));
1321       CA_LoadModelsInAcoustic(recogImpl->recognizer, acoustic, acousticParams);
1322       }
1323   CA_FreeAcousticParameters(acousticParams);
1324 
1325   recogImpl->models = models;
1326   CHKLOG(rc, modelsImpl->setupPattern(recogImpl->models, self));
1327   return ESR_SUCCESS;
1328  CLEANUP:
1329   if (acousticParams != NULL)
1330     CA_FreeAcousticParameters(acousticParams);
1331   if (recogImpl != NULL)
1332     CA_UnloadRecognitionModels(recogImpl->recognizer);
1333   return rc;
1334 }
1335 
SR_RecognizerUnsetupImpl(SR_Recognizer * self)1336 ESR_ReturnCode SR_RecognizerUnsetupImpl(SR_Recognizer* self)
1337 {
1338   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1339   SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1340   ESR_ReturnCode rc;
1341 
1342   CHKLOG(rc, modelsImpl->unsetupPattern(impl->models));
1343   CA_UnloadRecognitionModels(impl->recognizer);
1344   CHKLOG(rc, SR_AcousticModelsDestroy ( impl->models ));
1345   impl->models = NULL;
1346   return ESR_SUCCESS;
1347  CLEANUP:
1348   return rc;
1349 }
1350 
SR_RecognizerIsSetupImpl(SR_Recognizer * self,ESR_BOOL * isSetup)1351 ESR_ReturnCode SR_RecognizerIsSetupImpl(SR_Recognizer* self, ESR_BOOL* isSetup)
1352 {
1353   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1354 
1355   if (isSetup == NULL)
1356   {
1357     PLogError(L("ESR_INVALID_ARGUMENT"));
1358     return ESR_INVALID_ARGUMENT;
1359   }
1360   *isSetup = impl->models != NULL;
1361   return ESR_SUCCESS;
1362 }
1363 
SR_RecognizerGetParameterImpl(SR_Recognizer * self,const LCHAR * key,LCHAR * value,size_t * len)1364 ESR_ReturnCode SR_RecognizerGetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1365     LCHAR* value, size_t* len)
1366 {
1367   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1368   ESR_ReturnCode rc;
1369 
1370   rc = impl->parameters->getLCHAR(impl->parameters, key, value, len);
1371   if (rc == ESR_NO_MATCH_ERROR)
1372   {
1373     CHKLOG(rc, ESR_SessionGetLCHAR(key, value, len));
1374     return ESR_SUCCESS;
1375   }
1376   else if (rc != ESR_SUCCESS)
1377   {
1378     PLogError(ESR_rc2str(rc));
1379     goto CLEANUP;
1380   }
1381   return ESR_SUCCESS;
1382 CLEANUP:
1383   return rc;
1384 }
1385 
1386 /*
1387  * The get / set code is a mess. Since we only use size_t parameters, that's all
1388  * that I am going to make work. The impl->parameters don't work so you always
1389  * have to get them from the session. The impl always logs an error. SteveR
1390  */
1391 
SR_RecognizerGetSize_tParameterImpl(SR_Recognizer * self,const LCHAR * key,size_t * value)1392 ESR_ReturnCode SR_RecognizerGetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1393     size_t* value)
1394 {
1395   ESR_ReturnCode rc;
1396 
1397   CHKLOG(rc, ESR_SessionGetSize_t(key, value));
1398   return ESR_SUCCESS;
1399 CLEANUP:
1400   return rc;
1401 }
1402 
SR_RecognizerGetBoolParameterImpl(SR_Recognizer * self,const LCHAR * key,ESR_BOOL * value)1403 ESR_ReturnCode SR_RecognizerGetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value)
1404 {
1405   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1406   ESR_ReturnCode rc;
1407 
1408   rc = impl->parameters->getBool(impl->parameters, key, value);
1409   if (rc == ESR_NO_MATCH_ERROR)
1410   {
1411     CHKLOG(rc, ESR_SessionGetBool(key, value));
1412     return ESR_SUCCESS;
1413   }
1414   else if (rc != ESR_SUCCESS)
1415   {
1416     PLogError(ESR_rc2str(rc));
1417     goto CLEANUP;
1418   }
1419   return ESR_SUCCESS;
1420 CLEANUP:
1421   return rc;
1422 }
1423 
SR_RecognizerSetParameterImpl(SR_Recognizer * self,const LCHAR * key,LCHAR * value)1424 ESR_ReturnCode SR_RecognizerSetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1425     LCHAR* value)
1426 {
1427   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1428   LCHAR temp[256];
1429   ESR_ReturnCode rc;
1430   size_t len = 256;
1431 
1432   rc = impl->parameters->getLCHAR(impl->parameters, key, temp, &len);
1433   if (rc == ESR_SUCCESS)
1434   {
1435     if (LSTRCMP(temp, value) == 0)
1436       return ESR_SUCCESS;
1437     CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1438   }
1439   else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1440   {
1441     PLogError(ESR_rc2str(rc));
1442     goto CLEANUP;
1443   }
1444 
1445   CHKLOG(rc, impl->parameters->setLCHAR(impl->parameters, key, value));
1446   return ESR_SUCCESS;
1447 CLEANUP:
1448   return rc;
1449 }
1450 /*
1451  * The only set param function that is working is for the size_t parameters; and not
1452  * all of them are working, only the ones specified in the function itself. There are
1453  * two reasons for this: first most of the set functions just put the value in an unused
1454  * table that has no effect; second many of the changes need to be propogated to a specific
1455  * part of the code. This needs to be evaluated on a per parameter basis. SteveR
1456  */
1457 
1458 /*
1459  * This function will be used to set parameters in the session. We need to go through
1460  * the recognizer so as to propogate the values into the recognizer. We will rely on
1461  * the session to do the right thing. SteveR
1462  */
1463 
SR_RecognizerSetSize_tParameterImpl(SR_Recognizer * self,const LCHAR * key,size_t value)1464 ESR_ReturnCode SR_RecognizerSetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1465     size_t value)
1466 {
1467   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1468   ESR_ReturnCode rc;
1469 
1470   rc = ESR_SessionSetSize_t ( key, value );
1471 
1472   if (rc == ESR_SUCCESS)
1473   {
1474     if  ( LSTRCMP ( L("SREC.Recognizer.utterance_timeout"), key ) == 0 )
1475     {
1476       impl->utterance_timeout = value;
1477     }
1478     else if  ( LSTRCMP ( L("CREC.Recognizer.terminal_timeout"), key ) == 0 )
1479     {
1480       impl->recognizer->eosd_parms->endnode_timeout = value;
1481     }
1482     else if  ( LSTRCMP ( L("CREC.Recognizer.optional_terminal_timeout"), key ) == 0 )
1483     {
1484       impl->recognizer->eosd_parms->optendnode_timeout = value;
1485     }
1486     else if  ( LSTRCMP ( L("CREC.Recognizer.non_terminal_timeout"), key ) == 0 )
1487     {
1488       impl->recognizer->eosd_parms->internalnode_timeout = value;
1489     }
1490     else if  ( LSTRCMP ( L("CREC.Recognizer.eou_threshold"), key ) == 0 )
1491     {
1492       impl->recognizer->eosd_parms->eos_costdelta = (frameID)value;
1493       impl->recognizer->eosd_parms->opt_eos_costdelta = (frameID)value;
1494     }
1495     else
1496     {
1497       PLogError(L("ESR_INVALID_ARGUMENT"));
1498       rc = ESR_INVALID_ARGUMENT;
1499     }
1500   }
1501   return rc;
1502 }
1503 
1504 
SR_RecognizerSetBoolParameterImpl(SR_Recognizer * self,const LCHAR * key,ESR_BOOL value)1505 ESR_ReturnCode SR_RecognizerSetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value)
1506 {
1507   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1508   ESR_BOOL temp;
1509   ESR_ReturnCode rc;
1510 
1511   rc = impl->parameters->getBool(impl->parameters, key, &temp);
1512   if (rc == ESR_SUCCESS)
1513   {
1514     if (temp == value)
1515       return ESR_SUCCESS;
1516     CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1517   }
1518   else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1519     return rc;
1520 
1521   CHKLOG(rc, impl->parameters->setBool(impl->parameters, key, value));
1522   return ESR_SUCCESS;
1523 CLEANUP:
1524   return rc;
1525 }
1526 
SR_RecognizerHasSetupRulesImpl(SR_Recognizer * self,ESR_BOOL * hasSetupRules)1527 ESR_ReturnCode SR_RecognizerHasSetupRulesImpl(SR_Recognizer* self, ESR_BOOL* hasSetupRules)
1528 {
1529   SR_RecognizerImpl* recogImpl = (SR_RecognizerImpl*) self;
1530   size_t size;
1531   ESR_ReturnCode rc;
1532 
1533   if (hasSetupRules == NULL)
1534   {
1535     PLogError(L("ESR_INVALID_ARGUMENT"));
1536     return ESR_INVALID_ARGUMENT;
1537   }
1538   CHKLOG(rc, HashMapGetSize(recogImpl->grammars, &size));
1539   *hasSetupRules = size > 0;
1540   return ESR_SUCCESS;
1541 CLEANUP:
1542   return rc;
1543 }
1544 
SR_RecognizerActivateRuleImpl(SR_Recognizer * self,SR_Grammar * grammar,const LCHAR * ruleName,unsigned int weight)1545 ESR_ReturnCode SR_RecognizerActivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1546     const LCHAR* ruleName, unsigned int weight)
1547 {
1548   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1549   SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1550   SR_AcousticModelsImpl* modelsImpl;
1551   LCHAR grammarID[80];
1552   ESR_ReturnCode rc;
1553   char *failure_reason = NULL;
1554 
1555   if (grammar == NULL)
1556   {
1557     if (impl->eventLog)
1558       failure_reason = "badinput";
1559     rc = ESR_INVALID_ARGUMENT;
1560     PLogError(L("ESR_INVALID_ARGUMENT"));
1561     goto CLEANUP;
1562   }
1563 
1564   if (impl->models == NULL)
1565   {
1566     failure_reason = "nomodels";
1567     rc = ESR_INVALID_STATE;
1568     PLogError(L("acoustic models must be configured"));
1569     goto CLEANUP;
1570   }
1571 
1572   modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1573 
1574   if (ruleName == NULL)
1575     psprintf(grammarID, L("%p"), grammar);
1576   else
1577   {
1578     if (LSTRLEN(ruleName) > 80)
1579     {
1580       rc = ESR_BUFFER_OVERFLOW;
1581       PLogError(ESR_rc2str(rc));
1582       goto CLEANUP;
1583     }
1584     LSTRCPY(grammarID, ruleName);
1585   }
1586 
1587   CHKLOG(rc, HashMapPut(impl->grammars, grammarID, grammar));
1588   if (CA_SetupSyntaxForRecognizer(grammarImpl->syntax, impl->recognizer))
1589   {
1590     failure_reason = "cafailed";
1591     rc = ESR_INVALID_STATE;
1592     PLogError(L("ESR_INVALID_STATE"));
1593     goto CLEANUP;
1594   }
1595 
1596    CHKLOG(rc, SR_Grammar_SetupRecognizer(grammar, self));
1597   grammarImpl->isActivated = ESR_TRUE;
1598 
1599   /*
1600    * If we want to log dynamically added words, then we must give the grammar a reference
1601    * to our event log. The grammar logs word additions if and only if its reference to
1602    * eventLog is non-null.
1603    */
1604   if (impl->osi_log_level & OSI_LOG_LEVEL_ADDWD)
1605     grammarImpl->eventLog = impl->eventLog;
1606   else
1607     grammarImpl->eventLog = NULL;
1608 
1609   rc = ESR_SUCCESS;
1610 
1611 CLEANUP:
1612   if (impl->eventLog)
1613   {
1614     if (failure_reason)
1615     {
1616       SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar);
1617       SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1618       SR_EventLogToken(impl->eventLog, L("rslt"), "fail");
1619       SR_EventLogToken(impl->eventLog, L("reason"), failure_reason);
1620       SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1621     }
1622     else
1623     {
1624       SR_EventLogTokenInt(impl->eventLog, L("igrm"), (int) grammar);
1625       SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1626       SR_EventLogToken(impl->eventLog, L("rslt"), "ok");
1627       SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1628     }
1629   }
1630   return rc;
1631 }
1632 
SR_RecognizerDeactivateRuleImpl(SR_Recognizer * self,SR_Grammar * grammar,const LCHAR * ruleName)1633 ESR_ReturnCode SR_RecognizerDeactivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1634     const LCHAR* ruleName)
1635 {
1636   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1637   SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1638   LCHAR grammarID[MAX_INT_DIGITS+1];
1639   ESR_ReturnCode rc;
1640 
1641   if (ruleName == NULL)
1642   {
1643     psprintf(grammarID, L("%p"), grammar);
1644     CHKLOG(rc, HashMapRemove(impl->grammars, grammarID));
1645   }
1646   else
1647     CHKLOG(rc, HashMapRemove(impl->grammars, ruleName));
1648   grammarImpl->isActivated = ESR_FALSE;
1649   return ESR_SUCCESS;
1650 CLEANUP:
1651   return rc;
1652 }
1653 
SR_RecognizerDeactivateAllRulesImpl(SR_Recognizer * self)1654 ESR_ReturnCode SR_RecognizerDeactivateAllRulesImpl(SR_Recognizer* self)
1655 {
1656   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1657   ESR_ReturnCode rc;
1658 
1659   CHKLOG(rc, HashMapRemoveAll(impl->grammars));
1660   CA_ClearSyntaxForRecognizer(0, impl->recognizer);
1661   return ESR_SUCCESS;
1662 CLEANUP:
1663   return rc;
1664 }
1665 
SR_RecognizerIsActiveRuleImpl(SR_Recognizer * self,SR_Grammar * grammar,const LCHAR * ruleName,ESR_BOOL * isActiveRule)1666 ESR_ReturnCode SR_RecognizerIsActiveRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1667     const LCHAR* ruleName, ESR_BOOL* isActiveRule)
1668 {
1669   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1670   LCHAR grammarID[MAX_INT_DIGITS+1];
1671   ESR_ReturnCode rc;
1672 
1673   psprintf(grammarID, L("%p"), grammar);
1674   CHKLOG(rc, HashMapContainsKey(impl->grammars, (LCHAR*) &grammarID, isActiveRule));
1675   return ESR_SUCCESS;
1676 CLEANUP:
1677   return rc;
1678 }
1679 
SR_RecognizerSetWordAdditionCeilingImpl(SR_Recognizer * self,SR_Grammar * grammar)1680 ESR_ReturnCode SR_RecognizerSetWordAdditionCeilingImpl(SR_Recognizer* self, SR_Grammar* grammar)
1681 {
1682   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1683   SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*)grammar;
1684   int iRc;
1685 
1686   if(!impl || !grammarImpl)
1687     return ESR_INVALID_ARGUMENT;
1688   iRc = CA_CeilingSyntaxForRecognizer( grammarImpl->syntax, impl->recognizer);
1689   if(iRc) return ESR_INVALID_STATE;
1690 
1691   return ESR_SUCCESS;
1692 }
1693 
SR_RecognizerCheckGrammarConsistencyImpl(SR_Recognizer * self,SR_Grammar * grammar,ESR_BOOL * isConsistent)1694 ESR_ReturnCode SR_RecognizerCheckGrammarConsistencyImpl(SR_Recognizer* self, SR_Grammar* grammar,
1695     ESR_BOOL* isConsistent)
1696 {
1697   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1698   SR_GrammarImpl* grammarImpl;
1699   SR_RecognizerImpl* impl2;
1700 
1701 
1702   grammarImpl = (SR_GrammarImpl*) grammar;
1703   impl2 = (SR_RecognizerImpl*)grammarImpl->recognizer;
1704   // *isConsistent = grammarImpl->models == impl->models;
1705   *isConsistent = (impl2->models == impl->models);
1706   return ESR_SUCCESS;
1707 }
1708 
SR_RecognizerGetModelsImpl(SR_Recognizer * self,SR_AcousticModels ** pmodels)1709 ESR_ReturnCode SR_RecognizerGetModelsImpl(SR_Recognizer* self, SR_AcousticModels** pmodels)
1710 {
1711   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1712   *pmodels = impl->models;
1713   return ESR_SUCCESS;
1714 }
1715 
SR_RecognizerPutAudioImpl(SR_Recognizer * self,asr_int16_t * buffer,size_t * bufferSize,ESR_BOOL isLast)1716 ESR_ReturnCode SR_RecognizerPutAudioImpl(SR_Recognizer* self, asr_int16_t* buffer, size_t* bufferSize,
1717     ESR_BOOL isLast)
1718 {
1719   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1720   ESR_ReturnCode rc;
1721   int    rcBufWrite;
1722   size_t nbWritten;
1723 
1724   if (isLast == ESR_FALSE && (buffer == NULL || bufferSize == NULL))
1725   {
1726     PLogError(L("ESR_INVALID_ARGUMENT"));
1727     return ESR_INVALID_ARGUMENT;
1728   }
1729 
1730   if (impl->lockFunction)
1731     impl->lockFunction(ESR_LOCK, impl->lockData);
1732   if (!impl->isStarted)
1733   {
1734     if (impl->lockFunction)
1735       impl->lockFunction(ESR_UNLOCK, impl->lockData);
1736     PLogMessage(L("ESR_INVALID_STATE: Tried pushing audio while recognizer was offline"));
1737     return ESR_INVALID_STATE;
1738   }
1739   if (impl->gotLastFrame)
1740   {
1741     if (impl->lockFunction)
1742       impl->lockFunction(ESR_UNLOCK, impl->lockData);
1743     PLogMessage(L("ESR_INVALID_STATE: isLast=TRUE"));
1744     return ESR_INVALID_STATE;
1745   }
1746   if (buffer == NULL && isLast == ESR_FALSE)
1747   {
1748     if (impl->lockFunction)
1749       impl->lockFunction(ESR_UNLOCK, impl->lockData);
1750     PLogError(L("ESR_INVALID_ARGUMENT: got NULL  buffer on non-terminal frame"));
1751     return ESR_INVALID_ARGUMENT;
1752   }
1753 
1754   rcBufWrite = CircularBufferWrite(impl->buffer, buffer, *bufferSize * SAMPLE_SIZE);
1755   if (rcBufWrite < 0)
1756   {
1757     rc = ESR_INVALID_STATE;
1758     PLogError(L("%s: error writing to buffer (buffer=%d, available=%u)"), ESR_rc2str(rc), (int) impl->buffer, CircularBufferGetAvailable(impl->buffer));
1759     goto CLEANUP;
1760   }
1761 
1762   nbWritten = (size_t)rcBufWrite;
1763   if (nbWritten % SAMPLE_SIZE != 0)
1764   {
1765     size_t amountUnwritten;
1766 
1767     /* The buffer is byte-based while we're sample based. Make sure we write entire samples or not at all */
1768     amountUnwritten = CircularBufferUnwrite(impl->buffer, nbWritten % SAMPLE_SIZE);
1769     passert(amountUnwritten == nbWritten % SAMPLE_SIZE);
1770     nbWritten -= amountUnwritten;
1771   }
1772   passert(nbWritten % 2 == 0); /* make sure CircularBufferSize is divisible by 2 */
1773 
1774   if (nbWritten < *bufferSize * SAMPLE_SIZE)
1775   {
1776     rc = ESR_BUFFER_OVERFLOW;
1777 #ifndef NDEBUG
1778     PLOG_DBG_TRACE((L("%s: writing to circular buffer"), ESR_rc2str(rc)));
1779 #endif
1780     *bufferSize = nbWritten / SAMPLE_SIZE;
1781     if (impl->lockFunction)
1782       impl->lockFunction(ESR_UNLOCK, impl->lockData);
1783     goto CLEANUP;
1784   }
1785   if (impl->lockFunction)
1786     impl->lockFunction(ESR_UNLOCK, impl->lockData);
1787 
1788   if (isLast)
1789     impl->gotLastFrame = ESR_TRUE;
1790   return ESR_SUCCESS;
1791 CLEANUP:
1792   return rc;
1793 }
1794 
1795 /* utility function to sort the ArrayList of nbest list results by the score of the first
1796    semantic result */
SemanticResults_SortByScore(ArrayList * results,size_t nbestSize)1797 ESR_ReturnCode SemanticResults_SortByScore(ArrayList *results, size_t nbestSize)
1798 {
1799   ESR_ReturnCode rc;
1800   ArrayList* semanticResultList;
1801   ArrayList* semanticResultList_swap;
1802   SR_SemanticResult* semanticResult_i;
1803   SR_SemanticResult* semanticResult_j;
1804   size_t i, j;
1805   LCHAR scoreStr[MAX_ENTRY_LENGTH] ;
1806   size_t scoreStrLen = MAX_ENTRY_LENGTH ;
1807   int score_i, score_j;
1808 
1809   /* bubble sort */
1810   for (i = 0; i < (size_t)nbestSize; ++i)
1811   {
1812     for (j = i + 1; j < (size_t)nbestSize; ++j)
1813     {
1814       /* get for i */
1815       CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList)); /* nbest index */
1816       CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_i));      /* semresult 0 */
1817 
1818       /* get for j */
1819       CHKLOG(rc, ArrayListGet(results, j, (void **)&semanticResultList)); /* nbest index */
1820       CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_j));      /* semresult 0 */
1821 
1822       scoreStrLen = MAX_ENTRY_LENGTH ;
1823       CHKLOG(rc, semanticResult_i->getValue(semanticResult_i, "raws", scoreStr, &scoreStrLen));
1824       CHKLOG(rc, lstrtoi(scoreStr, &score_i, 10));
1825       scoreStrLen = MAX_ENTRY_LENGTH ;
1826       CHKLOG(rc, semanticResult_j->getValue(semanticResult_j, "raws", scoreStr, &scoreStrLen));
1827       CHKLOG(rc, lstrtoi(scoreStr, &score_j, 10));
1828 
1829       if (score_j < score_i)
1830       {
1831         /* need to swap */
1832         CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList_swap)); /* put i in swap */
1833         CHKLOG(rc, ArrayListSet(results, i, semanticResultList));       /* put j in i    */
1834         CHKLOG(rc, ArrayListSet(results, j, semanticResultList_swap));  /* put swap in j */
1835       }
1836     }
1837   }
1838   return ESR_SUCCESS;
1839 CLEANUP:
1840   return rc;
1841 }
1842 
filter_CA_FullResultLabel(const LCHAR * label,LCHAR * filtered_label,size_t * boss,size_t * eoss)1843 ESR_ReturnCode filter_CA_FullResultLabel(const LCHAR* label, LCHAR *filtered_label, size_t* boss, size_t* eoss)
1844 {
1845   ESR_ReturnCode rc;
1846   enum
1847   {
1848     NO_COPY,
1849     FRAME,
1850     WORD,
1851   } filter_state = WORD;
1852   LCHAR *dst = filtered_label;
1853   LCHAR eosBuf[16]; /* max 9999 + '\0' */
1854   LCHAR bosBuf[16]; /* max 9999 + '\0' */
1855   LCHAR* pBuf = NULL;
1856 
1857   /**
1858    * example: you want to filter this:
1859    *
1860    * "-pau-@23 clock@97 twenty_four@125 hour@145  "
1861    *        ^boss = 23                       ^ eoss = 145
1862    * and get this:
1863    *
1864    * "clock twenty_four hour"
1865    */
1866 
1867   passert(LSTRLEN(label) > 0);
1868   while (*label)
1869   {
1870     switch (filter_state)
1871     {
1872       case NO_COPY:
1873         if (*label == L(' '))
1874           filter_state = WORD;
1875         else if (*label == L('@'))
1876         {
1877           filter_state = FRAME;
1878           if (pBuf == NULL)
1879             pBuf = bosBuf;
1880           else
1881           {
1882             *pBuf = 0;
1883             pBuf = eosBuf;
1884           }
1885         }
1886         break;
1887       case WORD:
1888         if (*label == L('@'))
1889         {
1890           *dst = L(' '); /* insert space */
1891           dst++;
1892           filter_state = FRAME;
1893           if (pBuf == NULL)
1894             pBuf = bosBuf;
1895           else
1896           {
1897             *pBuf = 0;
1898             pBuf = eosBuf;
1899           }
1900         }
1901         else
1902         {
1903           *dst = *label;
1904           dst++;
1905         }
1906         break;
1907       case FRAME:
1908         if (*label == L(' '))
1909           filter_state = WORD;
1910         else
1911         {
1912           *pBuf = *label;
1913           pBuf++;
1914         }
1915         break;
1916     }
1917     label++;
1918   }
1919   *dst = 0; /* term the string */
1920   *pBuf = 0; /* term the string */
1921 
1922   /* trim the end spaces */
1923   dst--;
1924   while (*dst == ' ')
1925     *dst-- = '\0';
1926 
1927   /* set the eos signal indicated by the end pointed data */
1928   if (eosBuf[0] != 0)
1929     CHKLOG(rc, lstrtoui(eosBuf, eoss, 10));
1930   else
1931     eoss = 0;
1932 
1933   if (bosBuf[0] != 0)
1934     CHKLOG(rc, lstrtoui(bosBuf, boss, 10));
1935   else
1936     boss = 0;
1937 
1938   return ESR_SUCCESS;
1939 CLEANUP:
1940   return rc;
1941 }
1942 
1943 /**
1944  * Populates the recognizer result if it can, otherwise it returns NO MATCH cuz no results exist
1945  *
1946  * INPUT STATE: SR_RECOGNIZER_INTERNAL_EOS
1947  *
1948  * @param self SR_Recognizer handle
1949  * @todo break up into smaller functions
1950  */
SR_RecognizerCreateResultImpl(SR_Recognizer * self,SR_RecognizerStatus * status,SR_RecognizerResultType * type)1951 ESR_ReturnCode SR_RecognizerCreateResultImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
1952     SR_RecognizerResultType* type)
1953 {
1954   LCHAR label[MAX_ENTRY_LENGTH * 2];  /* run out of buffer */
1955 #define WORDID_COUNT 48 /* can be quite high for voice enrollment! */
1956   wordID wordIDs[WORDID_COUNT];
1957   LCHAR tok[80];
1958   LCHAR waveformFilename[P_PATH_MAX];
1959   LCHAR* pkey;
1960   SR_GrammarImpl* pgrammar;
1961   asr_int32_t raws; /* raw score */
1962   size_t iBest, nbestSize, jBest, k, grammarSize, semanticResultsSize, grammarIndex_for_iBest;
1963   LCHAR* lValue;
1964   LCHAR* lValue2;
1965   int confValue;
1966   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1967   SR_RecognizerResultImpl* resultImpl = (SR_RecognizerResultImpl*) impl->result;
1968   ESR_BOOL containsKey;
1969   int valid, score, recogID;
1970   LCHAR result[MAX_ENTRY_LENGTH];
1971   size_t len, size;
1972   size_t locale;
1973   int current_choice;
1974 
1975   /**
1976    * Semantic result stuff
1977    */
1978   /* a temp buffer to hold semantic results of a parse (there may be several results) */
1979   SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
1980   ArrayList* semanticList;
1981   ArrayList* semanticList2;
1982   SR_SemanticResultImpl* semanticImpl;
1983   SR_SemanticResultImpl* semanticImpl2;
1984   SR_SemanticResult* semanticResult;
1985   SR_SemanticResult* semanticResult2;
1986   waveform_buffering_state_t buffering_state;
1987 
1988   SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1989   ESR_ReturnCode rc;
1990   PTimeStamp EORT;
1991 
1992   CA_LockUtteranceFromInput(impl->utterance);
1993   if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1994   {
1995     PLogError(L("ESR_INVALID_STATE"));
1996     return ESR_INVALID_STATE;
1997   }
1998 
1999   /* check if the forward search was successful */
2000   valid = CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
2001   CA_GetRecogID(impl->recognizer, &recogID);
2002   CA_FullResultScore(impl->recognizer, &score, 1);
2003 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2004   PLogMessage(L("R: %s type %d score %d from recognizer%d"), result, type, score, valid, recogID);
2005   PLogMessage(L("R: %s score %d from recognizer%d"), result, score, valid, recogID);
2006 #endif
2007 #ifdef _WIN32
2008   //pfprintf(PSTDOUT, ("R: %s type %d score %d from recognizer%d\n"), result, type, score, valid, recogID);
2009 #endif
2010 
2011 
2012   switch (valid)
2013   {
2014     case FULL_RESULT:
2015       CHKLOG(rc, filter_CA_FullResultLabel(result, label, &impl->recogLogTimings.BOSS, &impl->recogLogTimings.EOSS));
2016 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2017       PLogMessage("R: %s", result);
2018 #endif
2019       CA_FullResultScore(impl->recognizer, (int*) &raws, 0);
2020 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2021       PLogMessage("S: %d", raws);
2022 #endif
2023 
2024       /* now that we have an endpointed result, we can parse the result transcription
2025          to see where speech started and ended. Then we can trim off excess parts of the
2026          recorded audio waveform (if exists) so that nametags are just the right amount of
2027          audio
2028       */
2029       CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
2030       if (buffering_state != WAVEFORM_BUFFERING_OFF)
2031       {
2032         CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &size));
2033         if (size > 0)
2034         {
2035           rc = WaveformBuffer_ParseEndPointedResultAndTrim(impl->waveformBuffer, result, impl->FRAME_SIZE);
2036           if (rc == ESR_BUFFER_OVERFLOW)
2037           {
2038             /* Nametag EOS occured beyond end of buffer */
2039           }
2040           else if (rc != ESR_SUCCESS)
2041           {
2042             PLogError(ESR_rc2str(rc));
2043             goto CLEANUP;
2044           }
2045         }
2046       }
2047       break;
2048 
2049     case REJECT_RESULT:
2050 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2051       PLogMessage(L("R: <REJECTED>"));
2052 #endif
2053       break;
2054     default:
2055 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2056       PLogMessage(L("E: No results available"));
2057       PLogMessage(L("R: <FAILED>"));
2058 #endif
2059       break;
2060   }
2061 
2062 
2063   if (valid == FULL_RESULT)
2064   {
2065     /* Populate SR_RecognizerResult */
2066     resultImpl->nbestList = CA_PrepareNBestList(impl->recognizer, 10, &raws);
2067     if (resultImpl->nbestList == NULL)
2068     {
2069       /*
2070        * This is not a failure. It simply means that I have not advanced far
2071        * enough in recognition in order to obtain results (no paths in
2072        * graph). This occurs, for instance, when a eof is reached (no more data)
2073        * and I have not even created any paths in my graph.
2074        */
2075 
2076       *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2077       *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2078       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2079       if (impl->eventLog != NULL)
2080       {
2081         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2082         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2083         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2084         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2085       }
2086       passert(0);
2087       return ESR_SUCCESS;
2088     }
2089 
2090     nbestSize = CA_NBestListCount(resultImpl->nbestList);
2091   }
2092   else
2093     nbestSize = 0;
2094 
2095   if (resultImpl->results != NULL)
2096     ArrayListRemoveAll(resultImpl->results);
2097   else
2098     CHKLOG(rc, ArrayListCreate(&resultImpl->results));
2099   if (nbestSize == 0)
2100   {
2101     /*
2102      * Got empty n-best list even though the recognition was successful.
2103      * We handle this in the same way that recog_startpt does... we consider it a no match.
2104      * We could adjust the CREC.Recognizer.viterbi_prune_thresh to a higher level, but that
2105      * may not fix the problem completely. We need to fix the bug in the astar search!!!
2106      */
2107     *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2108     *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2109     impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2110     if (impl->eventLog != NULL)
2111     {
2112       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2113       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2114       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2115       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2116     }
2117 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2118     PLogMessage(L("ESR_INVALID_STATE: got empty n-best list even though the recognition was successful"));
2119 #endif
2120     return ESR_SUCCESS; /* we do not want to halt the app in this case */
2121   }
2122   else
2123   {
2124     *status = SR_RECOGNIZER_EVENT_RECOGNITION_RESULT;
2125     *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2126     impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2127     if (impl->eventLog != NULL)
2128     {
2129       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2130       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2131       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2132       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2133     }
2134   }
2135 
2136   /**
2137    * All grammars associated with the recognizer are considered to be active
2138    * and therefore, I do a semantic parse on each. On the first grammar that
2139    * gives one or more semantic results, I stop parsing the other grammars.
2140    */
2141   CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarSize));
2142   ASSERT( grammarSize == 1);
2143 
2144   for (iBest = 0; iBest < nbestSize; ++iBest)
2145   {
2146     len = WORDID_COUNT;
2147     if (CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) != ESR_SUCCESS)
2148     {
2149       *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2150       *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2151       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2152       if (impl->eventLog != NULL)
2153       {
2154         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2155         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2156         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2157         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2158       }
2159       PLogError(L("ESR_INVALID_STATE: got bad n-best list entry %d"), iBest);
2160       return ESR_INVALID_STATE;
2161     }
2162 
2163     CHKLOG(rc, ArrayListCreate(&semanticList));
2164     CHKLOG(rc, resultImpl->results->add(resultImpl->results, semanticList));
2165 
2166     grammarIndex_for_iBest = 0;
2167     CHKLOG(rc, impl->grammars->getKeyAtIndex(impl->grammars, grammarIndex_for_iBest, &pkey));
2168     CHKLOG(rc, impl->grammars->get(impl->grammars, pkey, (void **)&pgrammar));
2169 
2170     CHKLOG(rc, SR_GrammarGetSize_tParameter((SR_Grammar*) pgrammar, L("locale"), &locale));
2171     resultImpl->locale = locale;
2172 
2173     /* I need to manage my semantic results external to the check parse function */
2174     for (k = 0; k < MAX_SEM_RESULTS; ++k)
2175       SR_SemanticResultCreate(&semanticResults[k]);
2176 
2177     /*
2178        The code here tries to make the voice-enrollment more effective.
2179        The VE grammar decodes a sequence of best phonemes, but the nbest
2180        processing may find a better score for an alternative choice than
2181        the score of the viterbi best choice.  The reason for this is that
2182        alternative choices don't honor cross-word context-dependency quite
2183        accurately.  If we choose an alternative choice then the sequence of
2184        phoneme decoded does not correspond to the sequence of models decoded.
2185        To counter this, we FORCIBLY make sure the top choice here is the
2186        VITERBI top choice.
2187     */
2188 
2189     if (iBest == 0)
2190       {
2191         if (CA_IsEnrollmentSyntax( pgrammar->syntax)) {
2192           /* this was voice enrollment, so let's try to replace */
2193           // 	char* word1 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[0]);
2194           // char* word2 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[1]);
2195           // if (!strncmp(word1,voice_enroll_word_prefix,VEWPLEN)&&!strncmp(word2,voice_enroll_word_prefix,VEWPLEN))
2196           len = WORDID_COUNT;
2197           rc = CA_FullResultWordIDs(impl->recognizer, wordIDs, &len);
2198           if (rc != ESR_SUCCESS)
2199             {
2200               /* in case of problem with viterbi path choice, we revert back */
2201               len = WORDID_COUNT;
2202               rc = CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) ;
2203             }
2204         }
2205       }
2206 
2207     LSTRCPY(label, L(""));
2208     for (k = 0; wordIDs[k] != MAXwordID; ++k)
2209       {
2210         LCHAR* wordk = NULL;
2211         wordk = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[k]);
2212         LSTRCAT(label, wordk);
2213         LSTRCAT(label, L(" "));
2214       }
2215     CHKLOG(rc, CA_ResultStripSlotMarkers(label));
2216     passert(LSTRCMP(label, L("")) != 0);
2217 
2218     /* strip the trailing blank */
2219     k = LSTRLEN(label) - 1;
2220     if (k > 0 && label[k] == L(' '))
2221       label[k] = 0;
2222 
2223     semanticResultsSize = MAX_SEM_RESULTS;
2224 
2225 #if SEMPROC_ACTIVE
2226 
2227     /* set the literal prior to processing so that semproc can read the value
2228        during processing */
2229     CHKLOG(rc, pgrammar->semproc->flush(pgrammar->semproc));
2230     CHKLOG(rc, pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), label));
2231 
2232     rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
2233                                                wordIDs, semanticResults, &semanticResultsSize);
2234 
2235     /* rc = pgrammar->semproc->checkParse(pgrammar->semproc, pgrammar->semgraph,
2236        label, semanticResults, &semanticResultsSize); */
2237 
2238     if (rc != ESR_SUCCESS)
2239       {
2240         for (k = 0; k < MAX_SEM_RESULTS; ++k)
2241           {
2242             semanticResults[k]->destroy(semanticResults[k]);
2243             semanticResults[k] = NULL;
2244           }
2245         goto CLEANUP;
2246       }
2247 #else
2248     semanticResultsSize = 0;
2249 #endif
2250     /* cleanup the empty ones */
2251     for (k = semanticResultsSize; k < MAX_SEM_RESULTS; ++k)
2252       {
2253         CHKLOG(rc, semanticResults[k]->destroy(semanticResults[k]));
2254         semanticResults[k] = NULL;
2255       }
2256 
2257     /* save the good ones */
2258     for (k = 0; k < semanticResultsSize; ++k)
2259       {
2260         /*
2261          * Save the pointer to the semantic result that was created.
2262          * Remember that the semantic result array only holds pointers
2263          * and for each time that the function is called, new semantic results
2264          * are created, and the pointers overwrite old values in the array
2265          */
2266         CHKLOG(rc, semanticList->add(semanticList, semanticResults[k]));
2267       }
2268 
2269 #if SEMPROC_ACTIVE
2270     if (semanticResultsSize > 0)
2271       {
2272         /* OSI log the grammar(s) that was used in recognizing */
2273         psprintf(tok, L("GURI%d"), grammarIndex_for_iBest);
2274         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2275       }
2276 #else
2277     /* OSI log the grammar(s) that was used in recognizing */
2278     psprintf(tok, L("GURI%d"), grammarIndex_for_iBest);
2279     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2280 #endif
2281 
2282     /* Populate semantic results for each nbest list entry */
2283     CHKLOG(rc, semanticList->getSize(semanticList, &semanticResultsSize));
2284     if (semanticResultsSize == 0)
2285     {
2286       /*
2287        * If there was no semantic result... then I need to create one so that I can store
2288        * literal, conf, meaning which are default keys that must ALWAYS exist
2289        */
2290       CHKLOG(rc, SR_SemanticResultCreate(&semanticResult));
2291       CHKLOG(rc, semanticList->add(semanticList, semanticResult));
2292       semanticResultsSize = 1;
2293     }
2294 
2295     for (k = 0; k < semanticResultsSize;++k)
2296     {
2297       CHKLOG(rc, semanticList->get(semanticList, k, (void **)&semanticResult));
2298       if (semanticResult == NULL)
2299       {
2300         PLogError(L("nbest entry contained NULL semanticResult"), ESR_INVALID_STATE);
2301         return ESR_INVALID_STATE;
2302       }
2303 
2304       semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2305 
2306       /* put in the literal */
2307       lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2308       if (lValue == NULL)
2309       {
2310         PLogError(L("ESR_OUT_OF_MEMORY"));
2311         return ESR_OUT_OF_MEMORY;
2312       }
2313       LSTRCPY(lValue, label);
2314       CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("literal"), lValue));
2315 
2316       /* if the meaning is not set, then put in the meaning which will be the literal */
2317       CHKLOG(rc, semanticImpl->results->containsKey(semanticImpl->results, L("meaning"), &containsKey));
2318       if (!containsKey)
2319       {
2320         lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2321         if (lValue == NULL)
2322         {
2323           PLogError(L("ESR_OUT_OF_MEMORY"));
2324           return ESR_OUT_OF_MEMORY;
2325         }
2326         LSTRCPY(lValue, label);
2327         CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("meaning"), lValue));
2328       }
2329 
2330       /* put in the raw score */
2331       psprintf(label, L("%d"), raws);
2332       lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2333       if (lValue == NULL)
2334       {
2335         PLogError(L("ESR_OUT_OF_MEMORY"));
2336         return ESR_OUT_OF_MEMORY;
2337       }
2338       LSTRCPY(lValue, label);
2339       CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("raws"), lValue));
2340     }
2341   }
2342 
2343   /* Now I have an nBest list where each entry has at least one semantic result */
2344   /* What I need to do is filter out the nBest list entries which have matching
2345      semantic results for 'meaning' */
2346   /* Once I have filtered out the nBest list based on this criteria, I can calculate the confidence
2347      score and populate the result of the first entry with the raw score */
2348 
2349 #if FILTER_NBEST_BY_SEM_RESULT
2350 
2351   for (iBest = nbestSize-1; iBest>0; iBest--) /* do not filter out nBest entry 0 */
2352   {
2353     /**
2354      * This is the entry (indexed by i) targeted for removal
2355      *
2356      */
2357 
2358     /* get the nBest entry which you wish to remove (if duplicate found) */
2359     CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void **)&semanticList));
2360 
2361     /* get the first sem_result for the entry */
2362     CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2363     semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2364 
2365     /* get the meaning */
2366     CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2367 
2368     /* get the other entries to check against (start with 0, end on the current i entry) */
2369     for (jBest = 0; jBest < iBest; ++jBest)
2370     {
2371       /*
2372        * This is the entry (indexed by jBest) that we will compare with
2373        */
2374 
2375       /* get the nBest entry which you wish to compare with */
2376       CHKLOG(rc, ArrayListGet(resultImpl->results, jBest, (void **)&semanticList2));
2377 
2378       CHKLOG(rc, ArrayListGet(semanticList2, 0, (void **)&semanticResult2));
2379       semanticImpl2 = (SR_SemanticResultImpl*) semanticResult2;
2380 
2381       CHKLOG(rc, semanticImpl2->results->get(semanticImpl2->results, L("meaning"), (void **)&lValue2));
2382       if (LSTRCMP(lValue, lValue2) == 0)
2383       {
2384         /* pfprintf(PSTDOUT,"duplicate sem result found %d == %d\n", iBest, jBest);
2385         pfprintf(PSTDOUT,"removing %d\n", iBest); */
2386 
2387         /* removing from the list indexed by iBest */
2388         CHKLOG(rc, semanticList->remove(semanticList, semanticResult));
2389         CHKLOG(rc, semanticResult->destroy(semanticResult));
2390 
2391         CHKLOG(rc, resultImpl->results->remove(resultImpl->results, semanticList));
2392         CHKLOG(rc, semanticList->destroy(semanticList));
2393 
2394         if (!CA_NBestListRemoveResult(resultImpl->nbestList, iBest))
2395           return ESR_ARGUMENT_OUT_OF_BOUNDS;
2396         break;
2397       }
2398     }
2399   }
2400   nbestSize = CA_NBestListCount(resultImpl->nbestList);
2401 #endif
2402 
2403   CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2404 
2405   if (nbestSize)
2406   {
2407    if(CA_ComputeConfidenceValues(impl->confidenceScorer, impl->recognizer, resultImpl->nbestList))
2408         return ESR_INVALID_STATE;
2409 
2410    for(current_choice=nbestSize-1;current_choice>=0;current_choice--)
2411    {
2412     /* get the nBest entry you want to deal with */
2413     CHKLOG(rc, ArrayListGet(resultImpl->results, current_choice, (void **)&semanticList));
2414     /* get the first sem_result for that entry */
2415     CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2416     semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2417 
2418     /* put in the conf value for that nBest entry */
2419     if(!CA_NBestListGetResultConfidenceValue( resultImpl->nbestList, current_choice, &confValue))
2420       return ESR_ARGUMENT_OUT_OF_BOUNDS;
2421 
2422     psprintf(label, L("%d"), confValue);
2423     lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2424       if (lValue == NULL)
2425       {
2426         PLogError(L("ESR_OUT_OF_MEMORY"));
2427         return ESR_OUT_OF_MEMORY;
2428       }
2429       LSTRCPY(lValue, label);
2430       CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("conf"),lValue));
2431     }
2432   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("CMPT"), 0));
2433   }
2434 
2435   /* OSI log the end of recognition and all bufferred tokens */
2436 
2437   /* OSI log end of recognition time */
2438   PTimeStampSet(&EORT);
2439   impl->recogLogTimings.EORT = PTimeStampDiff(&EORT, &impl->timestamp);
2440   impl->recogLogTimings.DURS = impl->processed * MSEC_PER_FRAME;
2441 
2442   /*****************************************/
2443   /* OSI Logging stuff */
2444   /*****************************************/
2445 if( impl->osi_log_level != 0)
2446  {
2447   /* get the nBest size (this size may have changed since previous set cuz of nbest list filtering) */
2448   CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2449   /* OSI log the nBest list size */
2450   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("NBST"), nbestSize));
2451 
2452 
2453   for (iBest = 0; iBest < nbestSize; iBest++) /* loop */
2454   {
2455     /* get the nBest entry */
2456     CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void**)&semanticList));
2457 
2458     /* get the first sem_result for the entry (ther emay be many, but ignore others) */
2459     CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2460     semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2461 
2462     /* get the meaning and OSI log it */
2463     CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2464     /* OSI log RSLT (meaning) for nbest item */
2465     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSLT"), lValue));
2466 
2467     /* get the literal and OSI log it */
2468     CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("literal"), (void **)&lValue));
2469     /* OSI log RAWT SPOK (literal) for nbest item */
2470     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWT"), lValue));
2471     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SPOK"), lValue));
2472 
2473     /* get the score and OSI log it */
2474     CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("raws"), (void **)&lValue));
2475     /* OSI log RAWS (score) for nbest item */
2476     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWS"), lValue));
2477     /* get the confidence value and OSI log it */
2478     CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("conf"), (void **)&lValue));
2479     /* OSI log CONF (values) for nbest item */
2480     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("CONF"), lValue));
2481   }
2482 
2483   /* log the values */
2484   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BORT"), impl->recogLogTimings.BORT));
2485   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("DURS"), impl->recogLogTimings.DURS));
2486   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EORT"), impl->recogLogTimings.EORT));
2487   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSD"), impl->recogLogTimings.EOSD));
2488   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSS"), impl->recogLogTimings.EOSS));
2489   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOST"), impl->recogLogTimings.EOST));
2490   if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2491   {
2492     len = P_PATH_MAX;
2493     CHKLOG(rc, SR_EventLogAudioGetFilename(impl->eventLog, waveformFilename, &len));
2494     CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("WVNM"), waveformFilename));
2495   }
2496   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSTT"), L("ok")));
2497   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RENR"), L("ok")));
2498   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("ENDR"), impl->eos_reason));
2499   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcnd")));
2500 
2501   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSS"), impl->recogLogTimings.BOSS)); /* extra not in OSI spec */
2502   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRboss")));
2503 
2504   /*
2505    * Record which recognizer was the successful one (male or female)
2506    * this index refers to the order in the swimdllist file.
2507    */
2508   CHKLOG(rc, CA_GetRecogID(impl->recognizer, &recogID));
2509   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("RECOG"), recogID));
2510   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRrcid")));
2511 
2512   /* Record semantic results returned by top nbestlist entry */
2513   if (1)
2514   {
2515 #define MAX_SEMANTIC_KEYS 50
2516     LCHAR* semanticKeys[MAX_SEMANTIC_KEYS];
2517 #define SEMANTIC_VALUE_SIZE 512
2518     LCHAR semanticValue[SEMANTIC_VALUE_SIZE];
2519     size_t num_semanticKeys;
2520 
2521     rc = resultImpl->results->getSize(resultImpl->results, &nbestSize);
2522     if (rc != ESR_SUCCESS)
2523     {
2524       PLogError(ESR_rc2str(rc));
2525       goto DONE;
2526     }
2527     for (iBest = 0; iBest < nbestSize; ++iBest) /* loop2 */
2528     {
2529       rc = resultImpl->results->get(resultImpl->results, iBest, (void **)&semanticList);
2530       if (rc != ESR_SUCCESS)
2531       {
2532         PLogError(ESR_rc2str(rc));
2533         goto DONE;
2534       }
2535 
2536 	  /* semanticResultsSize is the number of semantic meanings, although
2537 		 ambiguous parses are not entirely supported
2538 		 num_semanticKeys    is associated to a particular parse         */
2539 
2540       rc = semanticList->getSize(semanticList, &semanticResultsSize);
2541       if (rc != ESR_SUCCESS)
2542       {
2543         PLogError(ESR_rc2str(rc));
2544         goto DONE;
2545       }
2546       for (k = 0; k < semanticResultsSize; ++k)
2547       {
2548 		size_t iKey;
2549         rc = semanticList->get(semanticList, k, (void **)&semanticResult);
2550         if (rc != ESR_SUCCESS)
2551         {
2552           PLogError(ESR_rc2str(rc));
2553           goto DONE;
2554         }
2555         num_semanticKeys = MAX_SEMANTIC_KEYS;
2556         rc = semanticResult->getKeyList(semanticResult, (LCHAR**) & semanticKeys, &num_semanticKeys);
2557         if (rc != ESR_SUCCESS)
2558         {
2559           PLogError(ESR_rc2str(rc));
2560           goto DONE;
2561         }
2562 
2563         for (iKey=0; iKey<num_semanticKeys; ++iKey)
2564         {
2565           len = SEMANTIC_VALUE_SIZE;
2566           rc = semanticResult->getValue(semanticResult, semanticKeys[iKey], (LCHAR*) &semanticValue, &len);
2567           if (rc != ESR_SUCCESS)
2568           {
2569             PLogError(ESR_rc2str(rc));
2570             goto DONE;
2571           }
2572 
2573           rc = SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, semanticKeys[iKey], semanticValue);
2574           if (rc != ESR_SUCCESS)
2575           {
2576             PLogError(ESR_rc2str(rc));
2577             goto DONE;
2578           }
2579         }
2580       }
2581     }
2582     rc = SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESR_SemanticResult[0]"));
2583     if (rc != ESR_SUCCESS)
2584     {
2585       PLogError(ESR_rc2str(rc));
2586       goto DONE;
2587     }
2588   }
2589 }
2590 DONE:
2591   return ESR_SUCCESS;
2592 CLEANUP:
2593   impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2594   return rc;
2595 }
2596 
2597 /**
2598  * Indicates if it is possible to push data from SREC into the internal recognizer.
2599  * If data can be pushed, ESR_CONTINUE_PROCESSING is returned.
2600  *
2601  * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2602  * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI
2603  */
canPushAudioIntoRecognizer(SR_RecognizerImpl * impl)2604 PINLINE ESR_ReturnCode canPushAudioIntoRecognizer(SR_RecognizerImpl* impl)
2605 {
2606   ESR_ReturnCode rc;
2607 
2608   if (impl->lockFunction)
2609     impl->lockFunction(ESR_LOCK, impl->lockData);
2610 
2611   /* do I have enough to make a frame ? */
2612   if (CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2613   {
2614     /* Not enough data */
2615     if (!impl->gotLastFrame)
2616     {
2617       /* not last frame, so ask for more audio */
2618       if (impl->lockFunction)
2619         impl->lockFunction(ESR_UNLOCK, impl->lockData);
2620       return ESR_SUCCESS;
2621     }
2622     else
2623     {
2624       /* last frame, make do with what you have */
2625       if (impl->lockFunction)
2626         impl->lockFunction(ESR_UNLOCK, impl->lockData);
2627 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2628       PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2629 #endif
2630       impl->isRecognizing = ESR_FALSE;
2631       impl->recogLogTimings.EOSD = impl->frames;
2632       impl->eos_reason = L("EOI");
2633       impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2634       if (impl->eventLog != NULL)
2635       {
2636         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_EOI")));
2637         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2638         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2639         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2640       }
2641       return ESR_CONTINUE_PROCESSING;
2642     }
2643   }
2644   if (impl->lockFunction)
2645     impl->lockFunction(ESR_UNLOCK, impl->lockData);
2646   return ESR_CONTINUE_PROCESSING;
2647 CLEANUP:
2648   return rc;
2649 }
2650 
2651 /**
2652  * Pushes data from SREC into the internal recognizer.
2653  *
2654  * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2655  * OUTPUT STATES: same
2656  */
pushAudioIntoRecognizer(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2657 PINLINE ESR_ReturnCode pushAudioIntoRecognizer(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2658     SR_RecognizerResultType* type,
2659     SR_RecognizerResult* result)
2660 {
2661   size_t count;
2662   ESR_ReturnCode rc;
2663 
2664   if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2665   {
2666     /* Don't push frames unless they're needed */
2667 
2668     /* Check for leaked state */
2669     passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2670     return ESR_CONTINUE_PROCESSING;
2671   }
2672   if (impl->lockFunction)
2673     impl->lockFunction(ESR_LOCK, impl->lockData);
2674   count = CircularBufferRead(impl->buffer, impl->audioBuffer, impl->FRAME_SIZE);
2675   if (impl->lockFunction)
2676     impl->lockFunction(ESR_UNLOCK, impl->lockData);
2677 
2678   WaveformBuffer_Write(impl->waveformBuffer, impl->audioBuffer, count);
2679   if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2680   {
2681     rc = SR_EventLogAudioWrite(impl->eventLog, impl->audioBuffer, count);
2682     if (rc == ESR_BUFFER_OVERFLOW)
2683       rc = ESR_INVALID_STATE;
2684     if (rc != ESR_SUCCESS)
2685     {
2686       PLogError(ESR_rc2str(rc));
2687       if (impl->lockFunction)
2688         impl->lockFunction(ESR_UNLOCK, impl->lockData);
2689       goto CLEANUP;
2690     }
2691   }
2692   if (count < impl->FRAME_SIZE)
2693   {
2694     rc = ESR_INVALID_STATE;
2695     PLogError(L("%s: error reading buffer data (count=%d, frameSize=%d)"), ESR_rc2str(rc), count, impl->FRAME_SIZE);
2696     goto CLEANUP;
2697   }
2698   if (!CA_LoadSamples(impl->wavein, impl->audioBuffer, impl->sampleRate / FRAMERATE))
2699   {
2700     PLogError(L("ESR_INVALID_STATE"));
2701     rc = ESR_INVALID_STATE;
2702     goto CLEANUP;
2703   }
2704 
2705   CA_ConditionSamples(impl->wavein);
2706   /* Check for leaked state */
2707   passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2708   return ESR_CONTINUE_PROCESSING;
2709 CLEANUP:
2710   return rc;
2711 }
2712 
2713 /**
2714  * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2715  * OUTPUT STATES: same
2716  */
generateFrameFromAudio(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2717 PINLINE ESR_ReturnCode generateFrameFromAudio(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2718     SR_RecognizerResultType* type,
2719     SR_RecognizerResult* result)
2720 {
2721   if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2722   {
2723     /* Don't create frames unless they're needed */
2724 
2725     /* Check for leaked state */
2726     passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2727     return ESR_CONTINUE_PROCESSING;
2728   }
2729 
2730   /* Try processing one frame */
2731   if (!CA_MakeFrame(impl->frontend, impl->utterance, impl->wavein))
2732   {
2733     /*
2734     * One of three cases occured:
2735     *
2736     * - We don't have enough samples to process one frame. This should be impossible because
2737     * pushAudioIntoRecognizer() is always called before us and will not continue if we don't
2738     * have enough samples.
2739     *
2740     * - The internal recognizer needs a minimum amount of audio before it'll begin generating
2741     *   frames. This is normal and we return with a success value.
2742     *
2743     * - The recognizer skips every even frame number (for performance reasons). This is normal
2744     *   and we return with a success value.
2745     */
2746     *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2747     *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2748     return ESR_SUCCESS;
2749   }
2750   ++impl->frames;
2751   /* Check for leaked state */
2752   passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2753   return ESR_CONTINUE_PROCESSING;
2754 }
2755 
2756 /**
2757  * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2758  * OUTPUT STATES: same
2759  */
generateFrameStats(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2760 PINLINE ESR_ReturnCode generateFrameStats(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2761                            SR_RecognizerResultType* type,
2762                            SR_RecognizerResult* result)
2763 {
2764   if (impl->frames < impl->bgsniff)
2765   {
2766     /* Wait until we have enough frames to estimate background stats */
2767     *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2768     *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2769     return ESR_SUCCESS;
2770   }
2771   else if (impl->frames == impl->bgsniff)
2772     CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->bgsniff);
2773 
2774   /* Check for leaked state */
2775   passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2776   return ESR_CONTINUE_PROCESSING;
2777 }
2778 
2779 /**
2780  * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2781  * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS
2782  */
generatePatternFromFrame(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2783 PINLINE ESR_ReturnCode generatePatternFromFrame(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2784     SR_RecognizerResultType* type,
2785     SR_RecognizerResult* result)
2786 {
2787   SR_AcousticModelsImpl* modelsImpl;
2788   ESR_ReturnCode rc;
2789 
2790   /* Run the search */
2791   modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2792   if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2793   {
2794     *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2795     *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2796     impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2797     if (impl->eventLog != NULL)
2798     {
2799       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_END")));
2800       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2801       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2802       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2803     }
2804     PLogError(L("ESR_INVALID_STATE"));
2805     return ESR_INVALID_STATE;
2806   }
2807   if (!CA_AdvanceUtteranceFrame(impl->utterance))
2808   {
2809     *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2810     *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2811     impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2812     if (impl->eventLog != NULL)
2813     {
2814       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_END")));
2815       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2816       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2817       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2818     }
2819     PLogError(L("ESR_INVALID_STATE"));
2820     return ESR_INVALID_STATE;
2821   }
2822   CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
2823   ++impl->processed;
2824 
2825   if (impl->lockFunction)
2826     impl->lockFunction(ESR_LOCK, impl->lockData);
2827   if (impl->gotLastFrame && CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2828   {
2829     /*
2830      * SREC have run out of data but the underlying recognizer might have some frames
2831      * queued for processing.
2832      */
2833     if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0)
2834     {
2835       /* EOI means end of input */
2836 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2837       PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2838 #endif
2839       impl->isRecognizing = ESR_FALSE;
2840       impl->recogLogTimings.EOSD = impl->frames;
2841       impl->eos_reason = L("EOI");
2842       impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2843       if (impl->eventLog != NULL)
2844       {
2845         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOI")));
2846         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2847         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2848         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2849       }
2850     }
2851     else
2852     {
2853 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2854       PLogMessage("L: Voicing END (EOF) at %d frames (%d processed)", impl->frames, impl->processed);
2855 #endif
2856 
2857       impl->isRecognizing = ESR_FALSE;
2858       impl->recogLogTimings.EOSD = impl->frames;
2859       impl->eos_reason = L("EOF");
2860       impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2861       if (impl->eventLog != NULL)
2862       {
2863         CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOS")));
2864         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2865         CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2866         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2867       }
2868       *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2869       *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2870       passert(impl->processed == impl->frames);
2871       if (impl->lockFunction)
2872         impl->lockFunction(ESR_UNLOCK, impl->lockData);
2873       return ESR_SUCCESS;
2874     }
2875   }
2876   if (impl->lockFunction)
2877     impl->lockFunction(ESR_UNLOCK, impl->lockData);
2878 
2879   /* Check for leaked state */
2880   passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2881   return ESR_CONTINUE_PROCESSING;
2882 CLEANUP:
2883   return rc;
2884 }
2885 
2886 /**
2887  * Same as generatePatternFromFrame() only the buffer is known to be empty.
2888  *
2889  * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI
2890  * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
2891  */
generatePatternFromFrameEOI(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2892 PINLINE ESR_ReturnCode generatePatternFromFrameEOI(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2893     SR_RecognizerResultType* type,
2894     SR_RecognizerResult* result)
2895 {
2896   SR_AcousticModelsImpl* modelsImpl;
2897   ESR_ReturnCode rc;
2898 
2899   /* Run the search */
2900   modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2901 
2902   if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
2903   {
2904     passert(impl->processed == impl->frames);
2905     *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2906     *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2907     impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2908     return ESR_SUCCESS;
2909   }
2910 
2911   if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2912   {
2913     *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2914     *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2915     impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2916     if (impl->eventLog != NULL)
2917     {
2918       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2919       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2920       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2921       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2922     }
2923     PLogError(L("ESR_INVALID_STATE"));
2924     return ESR_INVALID_STATE;
2925   }
2926   if (!CA_AdvanceUtteranceFrame(impl->utterance))
2927   {
2928     *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2929     *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2930     impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2931     if (impl->eventLog != NULL)
2932     {
2933       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2934       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2935       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2936       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2937     }
2938     PLogError(L("ESR_INVALID_STATE"));
2939     return ESR_INVALID_STATE;
2940   }
2941   CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
2942   ++impl->processed;
2943 
2944   if (impl->lockFunction)
2945     impl->lockFunction(ESR_LOCK, impl->lockData);
2946 
2947   if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
2948   {
2949     passert(impl->processed == impl->frames);
2950     *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2951     *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2952     impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2953     if (impl->eventLog != NULL)
2954     {
2955       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_EOS")));
2956       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2957       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2958       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2959     }
2960     if (impl->lockFunction)
2961       impl->lockFunction(ESR_UNLOCK, impl->lockData);
2962     return ESR_SUCCESS;
2963   }
2964   if (impl->lockFunction)
2965     impl->lockFunction(ESR_UNLOCK, impl->lockData);
2966 
2967   /* Check for leaked state */
2968   passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2969   return ESR_CONTINUE_PROCESSING;
2970 CLEANUP:
2971   if (impl->lockFunction)
2972     impl->lockFunction(ESR_UNLOCK, impl->lockData);
2973   return rc;
2974 }
2975 
2976 
2977 /**
2978  * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2979  * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
2980  */
detectEndOfSpeech(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2981 ESR_ReturnCode detectEndOfSpeech(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2982                                  SR_RecognizerResultType* type,
2983                                  SR_RecognizerResult* result)
2984 {
2985   EOSrc eos; /* eos means end of speech */
2986   int eos_by_level; /* eos means end of speech */
2987   PTimeStamp timestamp;
2988   ESR_ReturnCode rc;
2989   ESR_BOOL enableGetWaveform = ESR_FALSE;
2990 
2991   eos_by_level = CA_UtteranceHasEnded(impl->utterance);
2992   if (eos_by_level)
2993   {
2994     eos = SPEECH_ENDED_BY_LEVEL_TIMEOUT;
2995   }
2996   else
2997   {
2998     eos = CA_IsEndOfUtteranceByResults(impl->recognizer);
2999   }
3000 
3001   ESR_SessionGetBool(L("enableGetWaveform"), &enableGetWaveform);
3002   //impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
3003 
3004   if (eos == VALID_SPEECH_CONTINUING && enableGetWaveform && impl->waveformBuffer->overflow_count > 0)
3005   {
3006     size_t bufferSize;
3007     CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &bufferSize));
3008     PLogMessage("Forcing EOS due to wfbuf overflow (fr=%d,sz=%d,of=%d)", impl->frames, bufferSize, impl->waveformBuffer->overflow_count);
3009     eos = SPEECH_TOO_LONG;
3010   }
3011 
3012   if (eos != VALID_SPEECH_CONTINUING)
3013   {
3014     switch (eos)
3015     {
3016       case SPEECH_ENDED:
3017         /* normal */
3018         impl->eos_reason = L("itimeout");
3019         break;
3020 
3021       case SPEECH_ENDED_WITH_ERROR:
3022         /* error */
3023         impl->eos_reason = L("err");
3024         break;
3025 
3026       case SPEECH_TOO_LONG:
3027         /* timeout*/
3028         impl->eos_reason = L("ctimeout");
3029         break;
3030 
3031       case SPEECH_MAYBE_ENDED:
3032         /* normal */
3033         impl->eos_reason = L("itimeout");
3034         break;
3035       case SPEECH_ENDED_BY_LEVEL_TIMEOUT:
3036         /* normal */
3037         impl->eos_reason = L("levelTimeout");
3038         break;
3039 
3040       default:
3041         /* error */
3042         impl->eos_reason = L("err");
3043     }
3044 
3045 #ifdef SREC_ENGINE_VERBOSE_LOGGING
3046     PLogMessage("L: Voicing END (EOS) at %d frames, %d processed (reason: %s)\n", impl->frames, impl->processed, impl->eos_reason);
3047 #endif
3048 
3049     impl->recogLogTimings.EOSD = impl->frames; /* how many frames have been sent prior to detect EOS */
3050     PTimeStampSet(&timestamp); /* time it took to detect EOS (in millisec) */
3051     impl->recogLogTimings.EOST = PTimeStampDiff(&timestamp, &impl->timestamp);
3052 
3053     *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
3054     *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3055     impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
3056     if (impl->eventLog != NULL)
3057     {
3058       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("detectEndOfSpeech() -> SR_RECOGNIZER_INTERNAL_EOS")));
3059       CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("reason"), impl->eos_reason));
3060       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
3061       CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
3062       CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
3063     }
3064     impl->isRecognizing = ESR_FALSE;
3065     return ESR_SUCCESS;
3066   }
3067 
3068   /* Check for leaked state */
3069   passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
3070   return ESR_CONTINUE_PROCESSING;
3071 CLEANUP:
3072   return rc;
3073 }
3074 
3075 /**
3076  * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION
3077  * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOI
3078  */
detectBeginningOfSpeech(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)3079 ESR_ReturnCode detectBeginningOfSpeech(SR_RecognizerImpl* impl,
3080                                        SR_RecognizerStatus* status,
3081                                        SR_RecognizerResultType* type,
3082                                        SR_RecognizerResult* result)
3083 {
3084   ESR_ReturnCode rc;
3085   ESR_BOOL gatedMode;
3086   size_t num_windback_bytes, num_windback_frames;
3087   waveform_buffering_state_t buffering_state;
3088 
3089   CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &gatedMode));
3090 
3091   if (gatedMode || (!gatedMode && impl->frames < impl->bgsniff))
3092   {
3093     ESR_BOOL pushable = ESR_FALSE;
3094 
3095     rc = canPushAudioIntoRecognizer(impl);
3096     if (rc == ESR_SUCCESS)
3097     {
3098       /* Not enough samples to process one frame */
3099       if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3100       {
3101         *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3102         *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3103         return ESR_SUCCESS;
3104       }
3105     }
3106     else if (rc != ESR_CONTINUE_PROCESSING)
3107       return rc;
3108     else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3109     {
3110       /* Got end of input before beginning of speech */
3111       *status = SR_RECOGNIZER_EVENT_NO_MATCH;
3112       *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3113       impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH;
3114       CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3115       return ESR_SUCCESS;
3116     }
3117     else
3118       pushable = ESR_TRUE;
3119     if (pushable)
3120     {
3121       rc = pushAudioIntoRecognizer(impl, status, type, result);
3122       /* OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI */
3123       if (rc != ESR_CONTINUE_PROCESSING)
3124       {
3125         /* Not enough samples to process one frame */
3126         return rc;
3127       }
3128       rc = generateFrameFromAudio(impl, status, type, result);
3129       /* OUTPUT STATES: same */
3130       if (rc != ESR_CONTINUE_PROCESSING)
3131       {
3132         /*
3133          * The internal recognizer needs a minimum amount of audio before
3134          * it begins generating frames.
3135          */
3136         return rc;
3137       }
3138     }
3139     if (!CA_AdvanceUtteranceFrame(impl->utterance))
3140     {
3141       PLogError(L("ESR_INVALID_STATE: Failed Advancing Utt Frame %d"), impl->frames);
3142       return ESR_INVALID_STATE;
3143     }
3144     if (CA_UtteranceHasVoicing(impl->utterance))
3145     {
3146       /* Utterance stats for Lombard if enough frames */
3147       if (impl->frames > impl->bgsniff)
3148       {
3149 #ifdef SREC_ENGINE_VERBOSE_LOGGING
3150         PLogMessage("L:  Voicing START at %d frames", impl->frames);
3151 #endif
3152         /* OSI log the endpointed data */
3153 
3154         CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BTIM"), impl->frames * MSEC_PER_FRAME));
3155         CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BRGN"), 0)); /* Barge-in not supported */
3156         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIendp")));
3157 
3158         CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSD"), impl->frames));
3159         CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRbosd")));
3160 
3161         if (gatedMode)
3162           CA_CalculateUtteranceStatistics(impl->utterance, (int)(impl->frames * -1), 0);
3163         else
3164           CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->frames);
3165       }
3166 
3167       /* OK, we've got voicing or the end of input has occured
3168       ** (or both, I suppose).  If we had voicing then progress
3169       ** the recognizer, otherwise skip to the end.
3170       ** Of course, we could be running outside 'Gated Mode'
3171       ** so we won't have any frames processed at all yet -
3172       ** in this case start the recognizer anyway.
3173       */
3174 
3175       /*************************************
3176        ** Run recognition until endOfInput **
3177        *************************************/
3178 
3179       /*
3180        * Initialize both recognizers first
3181        * and disable reporting of results
3182        */
3183       if (gatedMode)
3184       {
3185         /*
3186          * We're in Gated Mode -
3187          * Because we'll have had voicing we wind-back
3188          * until the start of voicing (unsure region)
3189          */
3190         num_windback_frames = CA_SeekStartOfUtterance(impl->utterance);
3191         impl->beginningOfSpeechOffset = impl->frames - num_windback_frames;
3192         num_windback_bytes = num_windback_frames * impl->FRAME_SIZE * 2 /* due to skip even frames */;
3193 
3194         /* pfprintf(PSTDOUT,L("audio buffer windback %d frames == %d bytes\n"), num_windback_frames, num_windback_bytes); */
3195         CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
3196         if (buffering_state != WAVEFORM_BUFFERING_OFF)
3197           CHKLOG(rc, WaveformBuffer_WindBack(impl->waveformBuffer, num_windback_bytes));
3198 
3199         /*
3200          * Only transition to linear if it was previously circular (in other words if
3201          * buffering was active in the first place)
3202          */
3203         if (buffering_state == WAVEFORM_BUFFERING_ON_CIRCULAR)
3204           CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_LINEAR));
3205         impl->frames = CA_GetUnprocessedFramesInUtterance(impl->utterance);
3206       }
3207       else
3208         impl->frames = 0;
3209       /* reset the frames */
3210       impl->processed = 0;
3211       CHKLOG(rc, beginRecognizing(impl));
3212       impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3213       *status = SR_RECOGNIZER_EVENT_START_OF_VOICING;
3214       *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3215       return ESR_SUCCESS;
3216     }
3217     else
3218     {
3219       if (impl->frames > impl->utterance_timeout)
3220       {
3221         /* beginning of speech timeout */
3222         impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT;
3223         *status = SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT;
3224         *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3225         CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3226         return ESR_SUCCESS;
3227       }
3228     }
3229   }
3230   else if (!gatedMode && impl->frames >= impl->bgsniff)
3231   {
3232     /*
3233     * If not gated mode and I have processed enough frames, then start the recognizer
3234     * right away.
3235     */
3236     impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3237     *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3238     *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3239 
3240     /* reset the frames */
3241     impl->frames = impl->processed = 0;
3242     CHKLOG(rc, beginRecognizing(impl));
3243     return ESR_SUCCESS;
3244   }
3245   *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3246   *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3247   return ESR_SUCCESS;
3248 
3249 CLEANUP:
3250   return rc;
3251 }
3252 
SR_RecognizerAdvanceImpl(SR_Recognizer * self,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult ** result)3253 ESR_ReturnCode SR_RecognizerAdvanceImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
3254                                         SR_RecognizerResultType* type,
3255                                         SR_RecognizerResult** result)
3256 {
3257   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3258   ESR_BOOL pushable;
3259   ESR_ReturnCode rc;
3260 
3261   if (status == NULL || type == NULL || result == NULL)
3262   {
3263     PLogError(L("ESR_INVALID_ARGUMENT"));
3264     return ESR_INVALID_ARGUMENT;
3265   }
3266 
3267   /* create the result holder and save the pointer */
3268   /* creation only happens once (due to the if condition) */
3269   if (impl->result == NULL)
3270     CHKLOG(rc, SR_RecognizerResult_Create(&impl->result, impl));
3271   *result = impl->result;
3272 
3273   /*
3274    * The following two lines are used to detect bugs whereby we forget to set
3275    * status or type before returning
3276    */
3277   *status = SR_RECOGNIZER_EVENT_INVALID;
3278   *type = SR_RECOGNIZER_RESULT_TYPE_INVALID;
3279 
3280 MOVE_TO_NEXT_STATE:
3281   switch (impl->internalState)
3282   {
3283     case SR_RECOGNIZER_INTERNAL_BEGIN:
3284       impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_DETECTION;
3285       *status = SR_RECOGNIZER_EVENT_STARTED;
3286       *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3287       return ESR_SUCCESS;
3288 
3289     case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
3290       rc = detectBeginningOfSpeech(impl, status, type, impl->result);
3291       if (rc != ESR_CONTINUE_PROCESSING)
3292       {
3293         /*
3294          * SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION, or
3295          * SR_RECOGNIZER_INTERNAL_EOI
3296          */
3297         return rc;
3298       }
3299       /* Leaked state */
3300       passert(0);
3301       break;
3302 
3303     case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
3304       pushable = ESR_FALSE;
3305       rc = canPushAudioIntoRecognizer(impl);
3306       if (rc == ESR_SUCCESS)
3307       {
3308         /* Not enough samples to process one frame */
3309         if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3310         {
3311           *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3312           *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3313           return ESR_SUCCESS;
3314         }
3315       }
3316       else if (rc != ESR_CONTINUE_PROCESSING)
3317         return rc;
3318       else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3319         goto MOVE_TO_NEXT_STATE;
3320       else
3321         pushable = ESR_TRUE;
3322       if (pushable)
3323       {
3324         rc = pushAudioIntoRecognizer(impl, status, type, impl->result);
3325         if (rc != ESR_CONTINUE_PROCESSING)
3326         {
3327           /* Not enough samples to process one frame */
3328           return rc;
3329         }
3330         if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3331           goto MOVE_TO_NEXT_STATE;
3332         rc = generateFrameFromAudio(impl, status, type, impl->result);
3333         if (rc != ESR_CONTINUE_PROCESSING)
3334         {
3335           /*
3336            * The internal recognizer needs a minimum amount of audio before
3337            * it begins generating frames.
3338            */
3339           return rc;
3340         }
3341       }
3342       rc = generateFrameStats(impl, status, type, impl->result);
3343       if (rc != ESR_CONTINUE_PROCESSING)
3344       {
3345         /* Not enough frames to calculate stats */
3346         return rc;
3347       }
3348       rc = generatePatternFromFrame(impl, status, type, impl->result);
3349       if (rc != ESR_CONTINUE_PROCESSING)
3350       {
3351         /* End of speech detected */
3352         return rc;
3353       }
3354       if (impl->internalState == SR_RECOGNIZER_INTERNAL_END)
3355         goto MOVE_TO_NEXT_STATE;
3356       rc = detectEndOfSpeech(impl, status, type, impl->result);
3357       if (rc != ESR_CONTINUE_PROCESSING)
3358       {
3359         /* End of speech detected */
3360         return rc;
3361       }
3362       *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3363       *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3364       return ESR_SUCCESS;
3365 
3366     case SR_RECOGNIZER_INTERNAL_EOI:
3367       /*
3368        * On EOI (end of input), we need to process the remaining frames that had not
3369        * been processed when PutAudio set the gotLastFrame flag
3370        */
3371       rc = generatePatternFromFrameEOI(impl, status, type, impl->result);
3372       if (rc != ESR_CONTINUE_PROCESSING)
3373       {
3374         /* End of speech detected */
3375         return rc;
3376       }
3377       rc = detectEndOfSpeech(impl, status, type, impl->result);
3378       if (rc != ESR_CONTINUE_PROCESSING)
3379       {
3380         /* End of speech detected */
3381         return rc;
3382       }
3383       *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3384       *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3385       return ESR_SUCCESS;
3386 
3387     case SR_RECOGNIZER_INTERNAL_EOS:
3388       /* On EOS (end of speech detected - not due to end of input), create the result */
3389       if (impl->lockFunction)
3390         impl->lockFunction(ESR_LOCK, impl->lockData);
3391       CircularBufferReset(impl->buffer);
3392       if (impl->lockFunction)
3393         impl->lockFunction(ESR_UNLOCK, impl->lockData);
3394       CHKLOG(rc, SR_RecognizerCreateResultImpl((SR_Recognizer*) impl, status, type));
3395       impl->internalState = SR_RECOGNIZER_INTERNAL_END;
3396       return ESR_SUCCESS;
3397 
3398     case SR_RECOGNIZER_INTERNAL_END:
3399       return ESR_SUCCESS;
3400     default:
3401       PLogError(L("ESR_INVALID_STATE"));
3402       return ESR_INVALID_STATE;
3403   }
3404 CLEANUP:
3405   return rc;
3406 }
3407 
3408 
3409 
SR_RecognizerLoadUtteranceImpl(SR_Recognizer * self,const LCHAR * filename)3410 ESR_ReturnCode SR_RecognizerLoadUtteranceImpl(SR_Recognizer* self, const LCHAR* filename)
3411 {
3412   /* TODO: complete */
3413   return ESR_SUCCESS;
3414 }
3415 
SR_RecognizerLoadWaveFileImpl(SR_Recognizer * self,const LCHAR * filename)3416 ESR_ReturnCode SR_RecognizerLoadWaveFileImpl(SR_Recognizer* self, const LCHAR* filename)
3417 {
3418   /* TODO: complete */
3419   return ESR_SUCCESS;
3420 }
3421 
SR_RecognizerLogEventImpl(SR_Recognizer * self,const LCHAR * event)3422 ESR_ReturnCode SR_RecognizerLogEventImpl(SR_Recognizer* self, const LCHAR* event)
3423 {
3424   ESR_ReturnCode rc;
3425   SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3426   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, event));
3427   return ESR_SUCCESS;
3428 CLEANUP:
3429   return rc;
3430 }
3431 
SR_RecognizerLogTokenImpl(SR_Recognizer * self,const LCHAR * token,const LCHAR * value)3432 ESR_ReturnCode SR_RecognizerLogTokenImpl(SR_Recognizer* self, const LCHAR* token, const LCHAR* value)
3433 {
3434   ESR_ReturnCode rc;
3435   SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3436   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3437   return ESR_SUCCESS;
3438 CLEANUP:
3439   return rc;
3440 }
3441 
SR_RecognizerLogTokenIntImpl(SR_Recognizer * self,const LCHAR * token,int value)3442 ESR_ReturnCode SR_RecognizerLogTokenIntImpl(SR_Recognizer* self, const LCHAR* token, int value)
3443 {
3444   ESR_ReturnCode rc;
3445   SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3446   CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3447   return ESR_SUCCESS;
3448 CLEANUP:
3449   return rc;
3450 }
3451 
SR_RecognizerLogSessionStartImpl(SR_Recognizer * self,const LCHAR * sessionName)3452 ESR_ReturnCode SR_RecognizerLogSessionStartImpl(SR_Recognizer* self, const LCHAR* sessionName)
3453 {
3454   ESR_ReturnCode rc;
3455   SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3456   /**
3457   * OSI Platform logging.
3458   * In OSR, these events are logged by the platform. We have no platform in ESR, so we
3459    * log them here.
3460   */
3461 
3462   /* call (session) start, tokens optional */
3463   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclst")));
3464 
3465   /* service start, in this case SRecTest service */
3466   CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SVNM"), sessionName));
3467   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIsvst")));
3468   if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3469     CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3470 
3471   return ESR_SUCCESS;
3472 CLEANUP:
3473   return rc;
3474 }
3475 
SR_RecognizerLogSessionEndImpl(SR_Recognizer * self)3476 ESR_ReturnCode SR_RecognizerLogSessionEndImpl(SR_Recognizer* self)
3477 {
3478   ESR_ReturnCode rc;
3479   SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3480 
3481   /* OSI log end of call (session) */
3482   CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclnd")));
3483   if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3484     CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3485   return ESR_SUCCESS;
3486 CLEANUP:
3487   return rc;
3488 }
3489 
3490 
SR_RecognizerLogWaveformDataImpl(SR_Recognizer * self,const LCHAR * waveformFilename,const LCHAR * transcription,const double bos,const double eos,ESR_BOOL isInvocab)3491 ESR_ReturnCode SR_RecognizerLogWaveformDataImpl(SR_Recognizer* self, const LCHAR* waveformFilename,
3492     const LCHAR* transcription, const double bos,
3493     const double eos, ESR_BOOL isInvocab)
3494 {
3495   ESR_ReturnCode rc;
3496   SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3497   LCHAR num[P_PATH_MAX];
3498   int frame;
3499 
3500   CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("FILE"), waveformFilename));
3501   CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("TRANS"), transcription));
3502   sprintf(num, L("%.2f"), bos);
3503   CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_SEC"), num));
3504   sprintf(num, L("%.2f"), eos);
3505   CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_SEC"), num));
3506   CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("FRAMESIZE"), impl->FRAME_SIZE));
3507   CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("SAMPLERATE"), impl->sampleRate));
3508   frame = (int)(bos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3509   CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_FR"), frame));
3510   frame = (int)(eos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3511   CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_FR"), frame));
3512   CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("INVOCAB"), isInvocab));
3513   CHKLOG(rc, SR_EventLogEvent_AUDIO(impl->eventLog, impl->osi_log_level, L("ESRwfrd")));
3514   return ESR_SUCCESS;
3515 CLEANUP:
3516   return rc;
3517 }
3518 
SR_RecognizerSetLockFunctionImpl(SR_Recognizer * self,SR_RecognizerLockFunction function,void * data)3519 ESR_ReturnCode SR_RecognizerSetLockFunctionImpl(SR_Recognizer* self, SR_RecognizerLockFunction function, void* data)
3520 {
3521   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3522 
3523   impl->lockFunction = function;
3524   impl->lockData = data;
3525   return ESR_SUCCESS;
3526 }
3527 
doSignalQualityInit(SR_RecognizerImpl * impl)3528 static ESR_ReturnCode doSignalQualityInit(SR_RecognizerImpl* impl)
3529 {
3530   CA_DoSignalCheck(impl->wavein, &impl->isSignalClipping, &impl->isSignalDCOffset,
3531                    &impl->isSignalNoisy, &impl->isSignalTooQuiet, &impl->isSignalTooFewSamples,
3532                    &impl->isSignalTooManySamples);
3533   impl->isSignalQualityInitialized = ESR_TRUE;
3534   return ESR_SUCCESS;
3535 }
3536 
SR_RecognizerIsSignalClippingImpl(SR_Recognizer * self,ESR_BOOL * isClipping)3537 ESR_ReturnCode SR_RecognizerIsSignalClippingImpl(SR_Recognizer* self, ESR_BOOL* isClipping)
3538 {
3539   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3540   ESR_ReturnCode rc;
3541 
3542   if (isClipping == NULL)
3543   {
3544     PLogError("SR_RecognizerIsSignalClippingImpl", ESR_INVALID_ARGUMENT);
3545     return ESR_INVALID_ARGUMENT;
3546   }
3547   if (!impl->isSignalQualityInitialized)
3548     CHKLOG(rc, doSignalQualityInit(impl));
3549   *isClipping = impl->isSignalClipping;
3550   return ESR_SUCCESS;
3551 CLEANUP:
3552   return rc;
3553 }
3554 
SR_RecognizerIsSignalDCOffsetImpl(SR_Recognizer * self,ESR_BOOL * isDCOffset)3555 ESR_ReturnCode SR_RecognizerIsSignalDCOffsetImpl(SR_Recognizer* self, ESR_BOOL* isDCOffset)
3556 {
3557   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3558   ESR_ReturnCode rc;
3559 
3560   if (isDCOffset == NULL)
3561   {
3562     PLogError("SR_RecognizerIsSignalDCOffsetImpl", ESR_INVALID_ARGUMENT);
3563     return ESR_INVALID_ARGUMENT;
3564   }
3565   if (!impl->isSignalQualityInitialized)
3566     CHKLOG(rc, doSignalQualityInit(impl));
3567   *isDCOffset = impl->isSignalDCOffset;
3568   return ESR_SUCCESS;
3569 CLEANUP:
3570   return rc;
3571 }
3572 
SR_RecognizerIsSignalNoisyImpl(SR_Recognizer * self,ESR_BOOL * isNoisy)3573 ESR_ReturnCode SR_RecognizerIsSignalNoisyImpl(SR_Recognizer* self, ESR_BOOL* isNoisy)
3574 {
3575   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3576   ESR_ReturnCode rc;
3577 
3578   if (isNoisy == NULL)
3579   {
3580     PLogError("SR_RecognizerIsSignalNoisyImpl", ESR_INVALID_ARGUMENT);
3581     return ESR_INVALID_ARGUMENT;
3582   }
3583   if (!impl->isSignalQualityInitialized)
3584     CHKLOG(rc, doSignalQualityInit(impl));
3585   *isNoisy = impl->isSignalNoisy;
3586   return ESR_SUCCESS;
3587 CLEANUP:
3588   return rc;
3589 }
3590 
SR_RecognizerIsSignalTooQuietImpl(SR_Recognizer * self,ESR_BOOL * isTooQuiet)3591 ESR_ReturnCode SR_RecognizerIsSignalTooQuietImpl(SR_Recognizer* self, ESR_BOOL* isTooQuiet)
3592 {
3593   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3594   ESR_ReturnCode rc;
3595 
3596   if (isTooQuiet == NULL)
3597   {
3598     PLogError("SR_RecognizerIsSignalTooQuietImpl", ESR_INVALID_ARGUMENT);
3599     return ESR_INVALID_ARGUMENT;
3600   }
3601   if (!impl->isSignalQualityInitialized)
3602     CHKLOG(rc, doSignalQualityInit(impl));
3603   *isTooQuiet = impl->isSignalTooQuiet;
3604   return ESR_SUCCESS;
3605 CLEANUP:
3606   return rc;
3607 }
3608 
SR_RecognizerIsSignalTooFewSamplesImpl(SR_Recognizer * self,ESR_BOOL * isTooFewSamples)3609 ESR_ReturnCode SR_RecognizerIsSignalTooFewSamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooFewSamples)
3610 {
3611   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3612   ESR_ReturnCode rc;
3613 
3614   if (isTooFewSamples == NULL)
3615   {
3616     PLogError("SR_RecognizerIsSignalTooFewSamplesImpl", ESR_INVALID_ARGUMENT);
3617     return ESR_INVALID_ARGUMENT;
3618   }
3619   if (!impl->isSignalQualityInitialized)
3620     CHKLOG(rc, doSignalQualityInit(impl));
3621   *isTooFewSamples = impl->isSignalTooFewSamples;
3622   return ESR_SUCCESS;
3623 CLEANUP:
3624   return rc;
3625 }
3626 
SR_RecognizerIsSignalTooManySamplesImpl(SR_Recognizer * self,ESR_BOOL * isTooManySamples)3627 ESR_ReturnCode SR_RecognizerIsSignalTooManySamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooManySamples)
3628 {
3629   SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3630   ESR_ReturnCode rc;
3631 
3632   if (isTooManySamples == NULL)
3633   {
3634     PLogError("SR_RecognizerIsSignalTooManySamplesImpl", ESR_INVALID_ARGUMENT);
3635     return ESR_INVALID_ARGUMENT;
3636   }
3637   if (!impl->isSignalQualityInitialized)
3638     CHKLOG(rc, doSignalQualityInit(impl));
3639   *isTooManySamples = impl->isSignalTooManySamples;
3640   return ESR_SUCCESS;
3641 CLEANUP:
3642   return rc;
3643 }
3644 
3645 
3646 
3647 /**************************************/
3648 /* Waveform Buffer stuff              */
3649 /**************************************/
WaveformBuffer_Create(WaveformBuffer ** waveformBuffer,size_t frame_size)3650 ESR_ReturnCode WaveformBuffer_Create(WaveformBuffer** waveformBuffer, size_t frame_size)
3651 {
3652   ESR_ReturnCode rc;
3653   WaveformBuffer *buf;
3654   size_t val_size_t;
3655   int    val_int;
3656   ESR_BOOL   exists;
3657 
3658   buf = NEW(WaveformBuffer, L("SR_RecognizerImpl.wvfmbuf"));
3659   if (buf == NULL)
3660   {
3661     rc = ESR_OUT_OF_MEMORY;
3662     PLogError(L("%s: could not create WaveformBuffer"), ESR_rc2str(rc));
3663     goto CLEANUP;
3664   }
3665 
3666   ESR_SessionContains(L("SREC.voice_enroll.bufsz_kB"), &exists);
3667   if (exists)
3668     ESR_SessionGetSize_t(L("SREC.voice_enroll.bufsz_kB"), &val_size_t);
3669   else
3670     val_size_t = DEFAULT_WAVEFORM_BUFFER_MAX_SIZE;
3671   val_size_t *= 1024; /* convert to kB*/
3672   CHKLOG(rc, CircularBufferCreate(val_size_t, L("SR_RecognizerImpl.wvfmbuf.cbuffer"), &buf->cbuffer));
3673 
3674   ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
3675   if (exists)
3676     ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &val_int);
3677   else
3678     val_int = DEFAULT_WAVEFORM_WINDBACK_FRAMES;
3679   val_int *= frame_size; /* convert frames to bytes */
3680   buf->windback_buffer_sz = (size_t) val_int;
3681   buf->windback_buffer = MALLOC(buf->windback_buffer_sz, L("SR_RecognizerImpl.wvfmbuf.windback"));
3682   if (buf->windback_buffer == NULL)
3683   {
3684     rc = ESR_OUT_OF_MEMORY;
3685     PLogError(L("%s: could not create Waveform windback buffer"), ESR_rc2str(rc));
3686     goto CLEANUP;
3687   }
3688 
3689 
3690   ESR_SessionContains(L("SREC.voice_enroll.eos_comfort_frames"), &exists);
3691   if (exists)
3692     ESR_SessionGetSize_t(L("SREC.voice_enroll.eos_comfort_frames"), &val_size_t);
3693   else
3694     val_size_t = DEFAULT_EOS_COMFORT_FRAMES;
3695   buf->eos_comfort_frames = val_size_t;
3696 
3697   ESR_SessionContains(L("SREC.voice_enroll.bos_comfort_frames"), &exists);
3698   if (exists)
3699     ESR_SessionGetSize_t(L("SREC.voice_enroll.bos_comfort_frames"), &val_size_t);
3700   else
3701     val_size_t = DEFAULT_BOS_COMFORT_FRAMES;
3702   buf->bos_comfort_frames = val_size_t;
3703 
3704   /* initially off */
3705   buf->state = WAVEFORM_BUFFERING_OFF;
3706 
3707   *waveformBuffer = buf;
3708   return ESR_SUCCESS;
3709 CLEANUP:
3710   WaveformBuffer_Destroy(buf);
3711   return rc;
3712 }
3713 
WaveformBuffer_Write(WaveformBuffer * waveformBuffer,void * data,size_t num_bytes)3714 ESR_ReturnCode WaveformBuffer_Write(WaveformBuffer* waveformBuffer, void *data, size_t num_bytes)
3715 {
3716   size_t available_bytes;
3717   size_t done_bytes;
3718 
3719   /* do nothing if not active */
3720   switch (waveformBuffer->state)
3721   {
3722     case WAVEFORM_BUFFERING_OFF:
3723       return ESR_SUCCESS;
3724 
3725     case WAVEFORM_BUFFERING_ON_CIRCULAR:
3726       available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3727       if (available_bytes < num_bytes)
3728       {
3729         done_bytes = CircularBufferSkip(waveformBuffer->cbuffer, num_bytes - available_bytes);
3730         if (done_bytes != num_bytes - available_bytes)
3731         {
3732           PLogError("WaveformBuffer_Write: error when skipping bytes");
3733           return ESR_INVALID_STATE;
3734         }
3735       }
3736       done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3737       if (done_bytes != num_bytes)
3738       {
3739         PLogError("WaveformBuffer_Write: error when writing bytes");
3740         return ESR_INVALID_STATE;
3741       }
3742       return ESR_SUCCESS;
3743 
3744     case WAVEFORM_BUFFERING_ON_LINEAR:
3745       available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3746       if (available_bytes < num_bytes)
3747       {
3748         waveformBuffer->overflow_count += num_bytes;
3749         return ESR_BUFFER_OVERFLOW;
3750       }
3751       done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3752       if (done_bytes != num_bytes)
3753       {
3754         PLogError("WaveformBuffer_Write: error when writing bytes");
3755         return ESR_INVALID_STATE;
3756       }
3757       return ESR_SUCCESS;
3758 
3759     default:
3760       PLogError("WaveformBuffer_Write: bad control path");
3761       return ESR_INVALID_STATE;
3762   }
3763 }
3764 
WaveformBuffer_Read(WaveformBuffer * waveformBuffer,void * data,size_t * num_bytes)3765 ESR_ReturnCode WaveformBuffer_Read(WaveformBuffer* waveformBuffer, void *data, size_t* num_bytes)
3766 {
3767   size_t bytes_to_read;
3768   ESR_ReturnCode rc;
3769 
3770   if (num_bytes == NULL)
3771   {
3772     rc = ESR_INVALID_ARGUMENT;
3773     PLogError(ESR_rc2str(rc));
3774     goto CLEANUP;
3775   }
3776   if (waveformBuffer->overflow_count > 0)
3777   {
3778     memset(data, 0, *num_bytes);
3779     *num_bytes = 0;
3780     PLogError(L("WaveformBuffer_Read: previous overflow causes read to return NULL"));
3781     return ESR_SUCCESS;
3782   }
3783 
3784   if (waveformBuffer->read_size != 0 && *num_bytes > waveformBuffer->read_size)
3785   {
3786     PLogError(L("ESR_OUT_OF_MEMORY: waveform buffer too small for read, increase from %d to %d"), *num_bytes, waveformBuffer->read_size);
3787     return ESR_OUT_OF_MEMORY;
3788   }
3789 
3790   if (waveformBuffer->read_size == 0)
3791     bytes_to_read = *num_bytes;
3792   else
3793     bytes_to_read = MIN(waveformBuffer->read_size, *num_bytes);
3794   waveformBuffer->read_size -= bytes_to_read;
3795   *num_bytes = CircularBufferRead(waveformBuffer->cbuffer, data, bytes_to_read);
3796   if (*num_bytes != bytes_to_read)
3797   {
3798     PLogError("WaveformBuffer_Read: error reading buffer");
3799     return ESR_INVALID_STATE;
3800   }
3801   return ESR_SUCCESS;
3802 CLEANUP:
3803   return rc;
3804 }
3805 
3806 /* WindBack will save the last num_bytes recorded, reset the buffer, and then load the
3807    saved bytes at the beginning of the buffer */
WaveformBuffer_WindBack(WaveformBuffer * waveformBuffer,const size_t num_bytes)3808 ESR_ReturnCode WaveformBuffer_WindBack(WaveformBuffer* waveformBuffer, const size_t num_bytes)
3809 {
3810   ESR_ReturnCode rc;
3811   size_t bufferSize;
3812 
3813   if (num_bytes <= 0)
3814   {
3815     CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3816     return ESR_SUCCESS;
3817   }
3818 
3819   /* make sure windback buffer is big enough */
3820   if (num_bytes > waveformBuffer->windback_buffer_sz)
3821   {
3822     rc = ESR_OUT_OF_MEMORY;
3823     PLogError(L("%s: windback buffer is too small (needed=%d, had=%d)"), ESR_rc2str(rc), num_bytes, waveformBuffer->windback_buffer_sz);
3824     goto CLEANUP;
3825   }
3826 
3827   CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
3828   /* skip the first few bytes written */
3829   if (bufferSize < num_bytes)
3830   {
3831     PLogError("bufferSize %d num_bytes %d (ESR_INVALID_STATE)\n", bufferSize, num_bytes);
3832     bufferSize = 0;
3833   }
3834   else
3835   {
3836     bufferSize -= num_bytes;
3837   }
3838   CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, bufferSize));
3839   /* read the last few bytes written */
3840   bufferSize = num_bytes;
3841   CHKLOG(rc, WaveformBuffer_Read(waveformBuffer, waveformBuffer->windback_buffer, &bufferSize));
3842 
3843   /* reset buffer */
3844   CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3845 
3846   /* rewrite the saved bytes at the beginning */
3847   CHKLOG(rc, WaveformBuffer_Write(waveformBuffer, waveformBuffer->windback_buffer, bufferSize));
3848   return ESR_SUCCESS;
3849 CLEANUP:
3850   return rc;
3851 }
3852 
WaveformBuffer_Destroy(WaveformBuffer * waveformBuffer)3853 ESR_ReturnCode WaveformBuffer_Destroy(WaveformBuffer* waveformBuffer)
3854 {
3855   if (waveformBuffer->cbuffer)
3856     FREE(waveformBuffer->cbuffer);
3857   if (waveformBuffer->windback_buffer)
3858     FREE(waveformBuffer->windback_buffer);
3859   if (waveformBuffer)
3860     FREE(waveformBuffer);
3861   return ESR_SUCCESS;
3862 }
3863 
WaveformBuffer_SetBufferingState(WaveformBuffer * waveformBuffer,waveform_buffering_state_t state)3864 ESR_ReturnCode WaveformBuffer_SetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t state)
3865 {
3866   waveformBuffer->state = state;
3867   return ESR_SUCCESS;
3868 }
3869 
WaveformBuffer_GetBufferingState(WaveformBuffer * waveformBuffer,waveform_buffering_state_t * state)3870 ESR_ReturnCode WaveformBuffer_GetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t* state)
3871 {
3872   *state = waveformBuffer->state;
3873   return ESR_SUCCESS;
3874 }
3875 
3876 /**
3877  * @return ESR_BUFFER_OVERFLOW if nametag EOS occured beyond end of buffer
3878  */
WaveformBuffer_ParseEndPointedResultAndTrim(WaveformBuffer * waveformBuffer,const LCHAR * end_pointed_result,const size_t bytes_per_frame)3879 ESR_ReturnCode WaveformBuffer_ParseEndPointedResultAndTrim(WaveformBuffer* waveformBuffer, const LCHAR* end_pointed_result, const size_t bytes_per_frame)
3880 {
3881   const LCHAR *p;
3882   size_t bos_frame, eos_frame, bufferSize, read_start_offset;
3883   ESR_ReturnCode rc;
3884 
3885   /* potential end pointed results
3886 
3887      -pau-@19 tape@36 scan@64 down@88 -pau2-@104
3888      -pau-@19 tape@34 off@55 -pau2-@78
3889      -pau-@19 tape@47 help@66 -pau2-@80
3890      -pau-@16 tape@36 reverse@71 -pau2-@91
3891      -pau-@21 tape@42 scan@59 down@80 -pau2-@91
3892 
3893      what I need to extract is the integer between "-pau-@" and ' '
3894      and the integer between '@' and " -pau2-"
3895   */
3896 
3897 
3898   p = LSTRSTR( end_pointed_result, PREFIX_WORD);
3899   if(p) p+=PREFIX_WORD_LEN; while(p && *p == '@') p++;
3900   rc = p ? lstrtoui(p, &bos_frame, 10) : ESR_INVALID_ARGUMENT;
3901   if (rc == ESR_INVALID_ARGUMENT)
3902   {
3903     PLogError(L("%s: extracting bos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3904     goto CLEANUP;
3905   }
3906   else if (rc != ESR_SUCCESS)
3907     goto CLEANUP;
3908 
3909   p = LSTRSTR( end_pointed_result, SUFFIX_WORD);
3910   while(p && p>end_pointed_result && p[-1]!='@') --p;
3911   rc = p ? lstrtoui(p, &eos_frame, 10) : ESR_INVALID_ARGUMENT;
3912   if (rc == ESR_INVALID_ARGUMENT)
3913   {
3914     PLogError(L("%s: extracting eos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3915     goto CLEANUP;
3916   }
3917   else if (rc != ESR_SUCCESS)
3918     goto CLEANUP;
3919 
3920   bos_frame -= (bos_frame > waveformBuffer->bos_comfort_frames ? waveformBuffer->bos_comfort_frames : 0);
3921   eos_frame += waveformBuffer->eos_comfort_frames;
3922 
3923   /*
3924    * I know where speech started, so I want to skip frames 0 to bos_frame.
3925    * I also know where speech ended so I want to set the amount of frames(bytes) to read for
3926    * the nametag audio buffer (i.e. the read_size)
3927    */
3928 
3929   read_start_offset = bos_frame * bytes_per_frame * 2 /* times 2 because of skip even frames */;
3930   waveformBuffer->read_size = (eos_frame - bos_frame) * bytes_per_frame * 2 /* times 2 because of skip even frames */;
3931 
3932   CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
3933   if (read_start_offset + waveformBuffer->read_size > bufferSize)
3934   {
3935     waveformBuffer->overflow_count += read_start_offset + waveformBuffer->read_size - bufferSize;
3936     passert(waveformBuffer->overflow_count > 0);
3937     PLogMessage(L("Warning: Voice Enrollment audio buffer overflow (spoke too much, over by %d bytes)"),
3938                 waveformBuffer->overflow_count);
3939     return ESR_BUFFER_OVERFLOW;
3940   }
3941   CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, read_start_offset));
3942 #ifdef SREC_ENGINE_VERBOSE_LOGGING
3943   PLogMessage(L("Voice Enrollment: bos@%d, eos@%d, therefore sizeof(waveform) should be %d"), bos_frame, eos_frame, waveformBuffer->read_size);
3944 #endif
3945   return ESR_SUCCESS;
3946 CLEANUP:
3947   return rc;
3948 }
3949 
3950 
WaveformBuffer_Reset(WaveformBuffer * waveformBuffer)3951 ESR_ReturnCode WaveformBuffer_Reset(WaveformBuffer* waveformBuffer)
3952 {
3953   CircularBufferReset(waveformBuffer->cbuffer);
3954   waveformBuffer->overflow_count = 0;
3955   waveformBuffer->read_size = 0;
3956   return ESR_SUCCESS;
3957 }
3958 
WaveformBuffer_GetSize(WaveformBuffer * waveformBuffer,size_t * size)3959 ESR_ReturnCode WaveformBuffer_GetSize(WaveformBuffer* waveformBuffer, size_t* size)
3960 {
3961   *size = CircularBufferGetSize(waveformBuffer->cbuffer);
3962   return ESR_SUCCESS;
3963 }
3964 
WaveformBuffer_Skip(WaveformBuffer * waveformBuffer,const size_t bytes)3965 ESR_ReturnCode WaveformBuffer_Skip(WaveformBuffer* waveformBuffer, const size_t bytes)
3966 {
3967   if (CircularBufferSkip(waveformBuffer->cbuffer, bytes) != (int) bytes)
3968     return ESR_INVALID_STATE;
3969   return ESR_SUCCESS;
3970 }
3971 
3972 
3973 
SR_Recognizer_Reset_Buffers(SR_RecognizerImpl * impl)3974 static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl )
3975     {
3976     ESR_ReturnCode  reset_status;
3977 
3978     FREE ( impl->audioBuffer );
3979     impl->audioBuffer = NULL;
3980     impl->audioBuffer = MALLOC ( impl->FRAME_SIZE, MTAG );
3981 
3982     if ( impl->audioBuffer != NULL )
3983         {
3984         WaveformBuffer_Destroy ( impl->waveformBuffer );
3985         impl->waveformBuffer = NULL;
3986         reset_status = WaveformBuffer_Create ( &impl->waveformBuffer, impl->FRAME_SIZE );
3987         }
3988     else
3989         {
3990         reset_status = ESR_OUT_OF_MEMORY;
3991         }
3992     return ( reset_status );
3993     }
3994 
3995 
3996 
SR_Recognizer_Validate_Sample_Rate(size_t sample_rate)3997 static ESR_ReturnCode SR_Recognizer_Validate_Sample_Rate ( size_t sample_rate )
3998     {
3999     ESR_ReturnCode  validate_status;
4000 
4001     switch ( sample_rate )
4002         {
4003         case 8000:
4004         case 11025:
4005         case 16000:
4006         case 22050:
4007             validate_status = ESR_SUCCESS;
4008             break;
4009 
4010         default:
4011             validate_status = ESR_INVALID_ARGUMENT;
4012             break;
4013         }
4014     return ( validate_status );
4015     }
4016 
4017 
4018 
SR_Recognizer_Sample_Rate_Needs_Change(size_t new_sample_rate,ESR_BOOL * needs_changing)4019 static ESR_ReturnCode SR_Recognizer_Sample_Rate_Needs_Change ( size_t new_sample_rate, ESR_BOOL *needs_changing )
4020     {
4021     ESR_ReturnCode  validate_status;
4022     size_t          current_sample_rate;
4023 
4024     validate_status = ESR_SessionGetSize_t ( "CREC.Frontend.samplerate", &current_sample_rate );
4025 
4026     if ( validate_status == ESR_SUCCESS )
4027         {
4028         if ( new_sample_rate != current_sample_rate )
4029             *needs_changing = ESR_TRUE;
4030         else
4031             *needs_changing = ESR_TRUE;
4032         }
4033     return ( validate_status );
4034     }
4035 
4036 
4037 
SR_Recognizer_Change_Sample_Rate_Session_Params_8K(void)4038 static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( void )
4039     {
4040     ESR_ReturnCode  change_status;
4041     LCHAR           model_filenames [P_PATH_MAX];
4042     LCHAR           lda_filename [P_PATH_MAX];
4043     size_t          filename_length;
4044 
4045     filename_length = P_PATH_MAX;
4046     change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles8"), model_filenames, &filename_length );
4047 
4048     if ( change_status == ESR_SUCCESS )
4049         {
4050         filename_length = P_PATH_MAX;
4051         change_status = ESR_SessionGetLCHAR ( L("cmdline.lda8"), lda_filename, &filename_length );
4052 
4053 /* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4054         if ( change_status == ESR_SUCCESS )
4055             {
4056             change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", 8000 );
4057             if ( change_status == ESR_SUCCESS )
4058                 {
4059                 change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 4000 );
4060 
4061                 if ( change_status == ESR_SUCCESS )
4062                     {
4063                     change_status =  ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4064 
4065                     if ( change_status == ESR_SUCCESS )
4066                         change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4067                     }
4068                 }
4069             }
4070         else
4071             {
4072             PLogError (L("\nMissing Parameter lda8\n"));
4073             }
4074         }
4075     else
4076         {
4077         PLogError (L("\nMissing Parameter models8\n"));
4078         }
4079     return ( change_status );
4080     }
4081 
4082 
4083 
SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K(size_t sample_rate)4084 static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( size_t sample_rate )
4085     {
4086     ESR_ReturnCode  change_status;
4087     LCHAR           model_filenames [P_PATH_MAX];
4088     LCHAR           lda_filename [P_PATH_MAX];
4089     size_t          filename_length;
4090 
4091     filename_length = P_PATH_MAX;
4092     change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles11"), model_filenames, &filename_length );
4093 
4094     if ( change_status == ESR_SUCCESS )
4095         {
4096         filename_length = P_PATH_MAX;
4097         change_status = ESR_SessionGetLCHAR ( L("cmdline.lda11"), lda_filename, &filename_length );
4098 
4099 /* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4100         if ( change_status == ESR_SUCCESS )
4101             {
4102             change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", sample_rate );
4103 
4104             if ( change_status == ESR_SUCCESS )
4105                 {
4106                 change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 5500 );
4107 
4108                 if ( change_status == ESR_SUCCESS )
4109                     {
4110                     change_status =  ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4111 
4112                     if ( change_status == ESR_SUCCESS )
4113                         change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4114                     }
4115                 }
4116             }
4117         else
4118             {
4119             PLogError (L("\nMissing Parameter lda11\n"));
4120             }
4121         }
4122     else
4123         {
4124         PLogError (L("\nMissing Parameter models11\n"));
4125         }
4126     return ( change_status );
4127     }
4128 
4129 
4130 
SR_Recognizer_Change_Sample_Rate_Session_Params(size_t new_sample_rate)4131 static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params ( size_t new_sample_rate )
4132     {
4133     ESR_ReturnCode  change_status;
4134 
4135     if ( new_sample_rate == 8000 )
4136         change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( );
4137     else
4138         change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( new_sample_rate );
4139 
4140     return ( change_status );
4141     }
4142 
4143 
4144 
SR_Recognizer_Change_Sample_RateImpl(SR_Recognizer * recognizer,size_t new_sample_rate)4145 ESR_ReturnCode SR_Recognizer_Change_Sample_RateImpl ( SR_Recognizer *recognizer, size_t new_sample_rate )
4146     {
4147     ESR_ReturnCode          change_status;
4148     ESR_BOOL                rate_needs_changing;
4149     SR_RecognizerImpl       *impl;
4150     CA_FrontendInputParams  *frontendParams;
4151 
4152     change_status = SR_Recognizer_Validate_Sample_Rate ( new_sample_rate );
4153 
4154     if ( change_status == ESR_SUCCESS )
4155         {
4156         change_status = SR_Recognizer_Sample_Rate_Needs_Change ( new_sample_rate, &rate_needs_changing );
4157 
4158         if ( change_status == ESR_SUCCESS )
4159             {
4160             if ( rate_needs_changing == ESR_TRUE )
4161                 {
4162                 change_status = SR_Recognizer_Change_Sample_Rate_Session_Params ( new_sample_rate );
4163 
4164                 if ( change_status == ESR_SUCCESS )
4165                     { // SR_RecognizerCreateFrontendImpl
4166                     impl = (SR_RecognizerImpl *)recognizer;
4167                     change_status = SR_RecognizerUnsetupImpl( recognizer );
4168 
4169                     if ( change_status == ESR_SUCCESS )
4170                         {
4171                         CA_UnconfigureFrontend ( impl->frontend );
4172                         frontendParams = CA_AllocateFrontendParameters ( );
4173 
4174                         if ( frontendParams != NULL )
4175                             {
4176                             change_status = SR_RecognizerGetFrontendLegacyParametersImpl ( frontendParams );
4177 
4178                             if ( change_status == ESR_SUCCESS )
4179                                 {
4180                                 CA_ConfigureFrontend ( impl->frontend, frontendParams );
4181                                 CA_UnconfigureWave ( impl->wavein );
4182                                 CA_ConfigureWave ( impl->wavein, impl->frontend );
4183                                 impl->sampleRate = new_sample_rate;
4184                                 impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
4185                                 change_status = SR_Recognizer_Reset_Buffers ( impl );
4186 
4187                                 if ( change_status == ESR_SUCCESS )
4188                                     {
4189                                     change_status = SR_RecognizerSetupImpl( recognizer );
4190 
4191                                     if ( change_status == ESR_SUCCESS )
4192                                         change_status = SR_AcousticStateReset ( recognizer );
4193                                     }
4194                                 else
4195                                     {
4196                                     SR_RecognizerSetupImpl( recognizer );   /* Otherwise recognizer is in bad state */
4197                                     }
4198                                 }
4199                             CA_FreeFrontendParameters ( frontendParams );
4200                             }
4201                         else
4202                             {
4203                             SR_RecognizerSetupImpl( recognizer );   /* Otherwise recognizer is in bad state */
4204                             change_status = ESR_OUT_OF_MEMORY;
4205                             }
4206                         }
4207                     }
4208                 }
4209             }
4210         }
4211     return ( change_status );
4212     }
4213 
4214 
4215