1 /*---------------------------------------------------------------------------*
2 * RecognizerImpl.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20
21 #include "ESR_Session.h"
22 #include "ESR_SessionTypeImpl.h"
23 #include "IntArrayList.h"
24 #include "LCHAR.h"
25 #include "passert.h"
26 #include "plog.h"
27 #include "pstdio.h"
28 #include "pmemory.h"
29 #include "ptimestamp.h"
30 #include "SR_AcousticModelsImpl.h"
31 #include "SR_AcousticStateImpl.h"
32 #include "SR_GrammarImpl.h"
33 #include "SR_SemprocDefinitions.h"
34 #include "SR_SemanticResult.h"
35 #include "SR_SemanticResultImpl.h"
36 #include "SR_Recognizer.h"
37 #include "SR_RecognizerImpl.h"
38 #include "SR_RecognizerResultImpl.h"
39 #include "SR_SemanticResultImpl.h"
40 #include "SR_EventLog.h"
41 #include "srec.h"
42
43 #define MTAG NULL
44 #define FILTER_NBEST_BY_SEM_RESULT 1
45 #define AUDIO_CIRC_BUFFER_SIZE 20000
46 #define SEMPROC_ACTIVE 1
47 #define SAMPLE_SIZE (16 / CHAR_BIT) /* 16-bits / sample */
48
49 /* milliseconds per FRAME = 1/FRAMERATE * 1000 */
50 /* We multiple by 2 because we skip even frames */
51 #define MSEC_PER_FRAME (2000/FRAMERATE)
52 #define MAX_ENTRY_LENGTH 512
53 #define PREFIX_WORD "-pau-"
54 #define PREFIX_WORD_LEN 5
55 #define SUFFIX_WORD "-pau2-"
56 #define SUFFIX_WORD_LEN 6
57
58
59 static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl );
60
61 /**
62 * Initializes recognizer properties to default values.
63 *
64 * Replaces setup_recognition_parameters()
65 */
SR_RecognizerToSessionImpl()66 ESR_ReturnCode SR_RecognizerToSessionImpl()
67 {
68 ESR_ReturnCode rc;
69
70 /* Old comment: remember to keep "ca_rip.h" up to date with these parameters... */
71
72 /* CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_acoustic_models", 2)); */
73 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Recognizer.partial_results", ESR_FALSE));
74 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.NBest", 1));
75 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.eou_threshold", 100));
76 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_altword_tokens", 400));
77 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_frames", 1000));
78 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_arcs", 3000));
79 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsm_nodes", 3000));
80 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_fsmnode_tokens", 1000));
81 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_hmm_tokens", 1000));
82 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_model_states", 1000));
83 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_searches", 2));
84 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.max_word_tokens", 1000));
85 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.non_terminal_timeout", 50));
86 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.num_wordends_per_frame", 10));
87 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.often", 10));
88 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.optional_terminal_timeout", 30));
89 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.reject", 500));
90 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.terminal_timeout", 10));
91 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.viterbi_prune_thresh", 5000));
92 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Recognizer.wordpen", 0));
93
94 CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("SREC.Recognizer.utterance_timeout", 400));
95
96 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("enableGetWaveform", ESR_FALSE));
97
98 return ESR_SUCCESS;
99 CLEANUP:
100 return rc;
101 }
102
103 /**
104 * Initializes frontend properties to default values.
105 *
106 * Replaces load_up_parameter_list()
107 */
SR_RecognizerFrontendToSessionImpl()108 ESR_ReturnCode SR_RecognizerFrontendToSessionImpl()
109 {
110 IntArrayList* intList = NULL;
111 ESR_ReturnCode rc;
112 ESR_BOOL exists;
113 size_t i;
114
115 /* Old comment: Remember to keep "ca_pip.h" up to date with these parameters... */
116
117 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.mel_dim", 12));
118 CHKLOG(rc, ESR_SessionSetSize_tIfEmpty("CREC.Frontend.samplerate", 8000));
119 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.premel", 0.98f));
120 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lowcut", 260)); /* Hz */
121 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.highcut", 4000)); /* Hz */
122 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.window_factor", 2.0)); /* times the frame size */
123 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_skip_even_frames", ESR_FALSE)); /* 10/20 ms rate */
124 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.offset", 0)); /* additional */
125 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.ddmel", ESR_FALSE)); /* delta-delta mel pars */
126 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.forgetfactor", 40));
127 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.sv6_margin", 10));
128 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rasta", ESR_FALSE));
129 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.rastac0", ESR_FALSE));
130 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.spectral_subtraction", ESR_FALSE));
131 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.spec_sub_dur", 0));
132 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.spec_sub_scale", 1.0));
133 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_dump", ESR_FALSE)); /* Output is filterbank (30 floats) */
134 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_filterbank_input", ESR_FALSE)); /* Input is filterbank (30 floats) in place of audio samples */
135 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.do_smooth_c0", ESR_TRUE));
136 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.plp", ESR_FALSE)); /* Do PLP instead of MEL */
137 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.lpcorder", 12)); /* order of lpc analysis in plp processing */
138 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.warp_scale", 1.0));
139 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.piecewise_start", 1.0));
140 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecayup", -1.0)); /* If +ve, decay factor on peakpicker (low to high) */
141 CHKLOG(rc, ESR_SessionSetFloatIfEmpty("CREC.Frontend.peakdecaydown", -1.0)); /* If +ve, decay factor on peakpicker (high to low) */
142 CHKLOG(rc, ESR_SessionSetBoolIfEmpty("CREC.Frontend.cuberoot", ESR_FALSE)); /* Use cube root instead of log */
143
144 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_offset", &exists));
145 if (!exists)
146 {
147 CHKLOG(rc, IntArrayListCreate(&intList));
148 for (i = 0; i < 32; ++i)
149 CHKLOG(rc, IntArrayListAdd(intList, 0));
150 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_offset", intList, TYPES_INTARRAYLIST));
151 intList = NULL;
152 }
153
154 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.mel_loop", &exists));
155 if (!exists)
156 {
157 CHKLOG(rc, IntArrayListCreate(&intList));
158 for (i = 0; i < 32; ++i)
159 CHKLOG(rc, IntArrayListAdd(intList, 1));
160 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.mel_loop", intList, TYPES_INTARRAYLIST));
161 intList = NULL;
162 }
163
164 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melA", &exists));
165 if (!exists)
166 {
167 CHKLOG(rc, IntArrayListCreate(&intList));
168 CHKLOG(rc, IntArrayListAdd(intList, (int) 13.2911));
169 CHKLOG(rc, IntArrayListAdd(intList, (int) 47.2229));
170 CHKLOG(rc, IntArrayListAdd(intList, (int) 79.2485));
171 CHKLOG(rc, IntArrayListAdd(intList, (int) 92.1967));
172 CHKLOG(rc, IntArrayListAdd(intList, (int) 136.3855));
173 CHKLOG(rc, IntArrayListAdd(intList, (int) 152.2896));
174 CHKLOG(rc, IntArrayListAdd(intList, (int) 183.3601));
175 CHKLOG(rc, IntArrayListAdd(intList, (int) 197.4200));
176 CHKLOG(rc, IntArrayListAdd(intList, (int) 217.8278));
177 CHKLOG(rc, IntArrayListAdd(intList, (int) 225.6556));
178 CHKLOG(rc, IntArrayListAdd(intList, (int) 263.3073));
179 CHKLOG(rc, IntArrayListAdd(intList, (int) 277.193));
180 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melA", intList, TYPES_INTARRAYLIST));
181 intList = NULL;
182 }
183
184 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.melB", &exists));
185 if (!exists)
186 {
187 CHKLOG(rc, IntArrayListCreate(&intList));
188 CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0847));
189 CHKLOG(rc, IntArrayListAdd(intList, (int) 91.3289));
190 CHKLOG(rc, IntArrayListAdd(intList, (int) 113.9995));
191 CHKLOG(rc, IntArrayListAdd(intList, (int) 123.0336));
192 CHKLOG(rc, IntArrayListAdd(intList, (int) 131.2704));
193 CHKLOG(rc, IntArrayListAdd(intList, (int) 128.9942));
194 CHKLOG(rc, IntArrayListAdd(intList, (int) 120.5267));
195 CHKLOG(rc, IntArrayListAdd(intList, (int) 132.0079));
196 CHKLOG(rc, IntArrayListAdd(intList, (int) 129.8076));
197 CHKLOG(rc, IntArrayListAdd(intList, (int) 126.5029));
198 CHKLOG(rc, IntArrayListAdd(intList, (int) 121.8519));
199 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.melB", intList, TYPES_INTARRAYLIST));
200 intList = NULL;
201 }
202
203 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelA", &exists));
204 if (!exists)
205 {
206 CHKLOG(rc, IntArrayListCreate(&intList));
207 CHKLOG(rc, IntArrayListAdd(intList, (int) 91.6305));
208 CHKLOG(rc, IntArrayListAdd(intList, (int) 358.3790));
209 CHKLOG(rc, IntArrayListAdd(intList, (int) 527.5946));
210 CHKLOG(rc, IntArrayListAdd(intList, (int) 536.3163));
211 CHKLOG(rc, IntArrayListAdd(intList, (int) 731.2385));
212 CHKLOG(rc, IntArrayListAdd(intList, (int) 757.8382));
213 CHKLOG(rc, IntArrayListAdd(intList, (int) 939.4460));
214 CHKLOG(rc, IntArrayListAdd(intList, (int) 1028.4136));
215 CHKLOG(rc, IntArrayListAdd(intList, (int) 1071.3193));
216 CHKLOG(rc, IntArrayListAdd(intList, (int) 1183.7922));
217 CHKLOG(rc, IntArrayListAdd(intList, (int) 1303.1014));
218 CHKLOG(rc, IntArrayListAdd(intList, (int) 1447.7766));
219 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelA", intList, TYPES_INTARRAYLIST));
220 intList = NULL;
221 }
222
223 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.dmelB", &exists));
224 if (!exists)
225 {
226 CHKLOG(rc, IntArrayListCreate(&intList));
227 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4785));
228 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3878));
229 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4029));
230 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3182));
231 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.3706));
232 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5394));
233 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5150));
234 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4270));
235 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4871));
236 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4088));
237 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4361));
238 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5449));
239 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.dmelB", intList, TYPES_INTARRAYLIST));
240 intList = NULL;
241 }
242
243 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelA", &exists));
244 if (!exists)
245 {
246 CHKLOG(rc, IntArrayListCreate(&intList));
247 CHKLOG(rc, IntArrayListAdd(intList, (int) 10.7381));
248 CHKLOG(rc, IntArrayListAdd(intList, (int) 32.6775));
249 CHKLOG(rc, IntArrayListAdd(intList, (int) 46.2301));
250 CHKLOG(rc, IntArrayListAdd(intList, (int) 51.5438));
251 CHKLOG(rc, IntArrayListAdd(intList, (int) 57.6636));
252 CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0581));
253 CHKLOG(rc, IntArrayListAdd(intList, (int) 65.3696));
254 CHKLOG(rc, IntArrayListAdd(intList, (int) 70.1910));
255 CHKLOG(rc, IntArrayListAdd(intList, (int) 71.6751));
256 CHKLOG(rc, IntArrayListAdd(intList, (int) 78.2364));
257 CHKLOG(rc, IntArrayListAdd(intList, (int) 83.2440));
258 CHKLOG(rc, IntArrayListAdd(intList, (int) 89.6261));
259 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelA", intList, TYPES_INTARRAYLIST));
260 intList = NULL;
261 }
262
263 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.ddmelB", &exists));
264 if (!exists)
265 {
266 CHKLOG(rc, IntArrayListCreate(&intList));
267 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5274));
268 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5098));
269 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5333));
270 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5963));
271 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5132));
272 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5282));
273 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5530));
274 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5682));
275 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4662));
276 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4342));
277 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.5235));
278 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.4061));
279 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.ddmelB", intList, TYPES_INTARRAYLIST));
280 intList = NULL;
281 }
282
283 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaA", &exists));
284 if (!exists)
285 {
286 CHKLOG(rc, IntArrayListCreate(&intList));
287 CHKLOG(rc, IntArrayListAdd(intList, (int) 7.80));
288 CHKLOG(rc, IntArrayListAdd(intList, (int) 37.0));
289 CHKLOG(rc, IntArrayListAdd(intList, (int) 54.0));
290 CHKLOG(rc, IntArrayListAdd(intList, (int) 57.0));
291 CHKLOG(rc, IntArrayListAdd(intList, (int) 84.0));
292 CHKLOG(rc, IntArrayListAdd(intList, (int) 86.5));
293 CHKLOG(rc, IntArrayListAdd(intList, (int) 98.1));
294 CHKLOG(rc, IntArrayListAdd(intList, (int) 127.0));
295 CHKLOG(rc, IntArrayListAdd(intList, (int) 153.0));
296 CHKLOG(rc, IntArrayListAdd(intList, (int) 160.0));
297 CHKLOG(rc, IntArrayListAdd(intList, (int) 188.0));
298 CHKLOG(rc, IntArrayListAdd(intList, (int) 199.0));
299 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaA", intList, TYPES_INTARRAYLIST));
300 intList = NULL;
301 }
302
303 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.rastaB", &exists));
304 if (!exists)
305 {
306 CHKLOG(rc, IntArrayListCreate(&intList));
307 CHKLOG(rc, IntArrayListAdd(intList, 117));
308 CHKLOG(rc, IntArrayListAdd(intList, 121));
309 CHKLOG(rc, IntArrayListAdd(intList, 114));
310 CHKLOG(rc, IntArrayListAdd(intList, 111));
311 CHKLOG(rc, IntArrayListAdd(intList, 113));
312 CHKLOG(rc, IntArrayListAdd(intList, 126));
313 CHKLOG(rc, IntArrayListAdd(intList, 134));
314 CHKLOG(rc, IntArrayListAdd(intList, 130));
315 CHKLOG(rc, IntArrayListAdd(intList, 135));
316 CHKLOG(rc, IntArrayListAdd(intList, 129));
317 CHKLOG(rc, IntArrayListAdd(intList, 139));
318 CHKLOG(rc, IntArrayListAdd(intList, 138));
319 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.rastaB", intList, TYPES_INTARRAYLIST));
320 intList = NULL;
321 }
322
323 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_detect", 18));
324 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_above", 18));
325 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.ambient_within", 12));
326 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.start_windback", 50));
327 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.utterance_allowance", 40));
328 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.speech_duration", 6));
329 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.quiet_duration", 20));
330
331 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_clip", 32767));
332 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_clip", -32768));
333 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_per10000_clip", 10));
334 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.max_dc_offset", 1000));
335 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.high_noise_level_bit", 11));
336 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.low_speech_level_bit", 11));
337 CHKLOG(rc, ESR_SessionSetIntIfEmpty("CREC.Frontend.min_samples", 10000));
338
339 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_freq", &exists));
340 if (!exists)
341 {
342 CHKLOG(rc, IntArrayListCreate(&intList));
343 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_freq", intList, TYPES_INTARRAYLIST));
344 intList = NULL;
345 }
346 CHKLOG(rc, ESR_SessionContains("CREC.Frontend.spectrum_filter_spread", &exists));
347 if (!exists)
348 {
349 CHKLOG(rc, IntArrayListCreate(&intList));
350 CHKLOG(rc, ESR_SessionSetProperty("CREC.Frontend.spectrum_filter_spread", intList, TYPES_INTARRAYLIST));
351 intList = NULL;
352 }
353 return ESR_SUCCESS;
354 CLEANUP:
355 if (intList != NULL)
356 intList->destroy(intList);
357 return rc;
358 }
359
360 /**
361 * Generate legacy frontend parameter structure from ESR_Session.
362 *
363 * @param impl SR_RecognizerImpl handle
364 * @param params Resulting structure
365 */
SR_RecognizerGetFrontendLegacyParametersImpl(CA_FrontendInputParams * params)366 ESR_ReturnCode SR_RecognizerGetFrontendLegacyParametersImpl(CA_FrontendInputParams* params)
367 {
368 ESR_ReturnCode rc;
369 IntArrayList* intList;
370 size_t size, i, size_tValue;
371 int iValue;
372
373 passert(params != NULL);
374 params->is_loaded = ESR_FALSE;
375 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.mel_dim", ¶ms->mel_dim));
376 CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &size_tValue));
377 params->samplerate = (int) size_tValue;
378 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.premel", ¶ms->pre_mel));
379 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lowcut", ¶ms->low_cut));
380 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.highcut", ¶ms->high_cut));
381 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.window_factor", ¶ms->window_factor));
382 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_skip_even_frames", ¶ms->do_skip_even_frames));
383 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.offset", ¶ms->offset));
384 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.ddmel", ¶ms->do_dd_mel));
385 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.forgetfactor", ¶ms->forget_factor));
386 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.sv6_margin", ¶ms->sv6_margin));
387 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.rastac0", ¶ms->do_rastac0));
388 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.spectral_subtraction", ¶ms->do_spectral_sub));
389 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.spec_sub_dur", ¶ms->spectral_sub_frame_dur));
390 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.spec_sub_scale", ¶ms->spec_sub_scale));
391 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_dump", ¶ms->do_filterbank_input));
392 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_filterbank_input", ¶ms->do_filterbank_input));
393 CHKLOG(rc, ESR_SessionGetBool("CREC.Frontend.do_smooth_c0", ¶ms->do_smooth_c0));
394 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.lpcorder", ¶ms->lpc_order));
395 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.warp_scale", ¶ms->warp_scale));
396 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.piecewise_start", ¶ms->piecewise_start));
397 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecayup", ¶ms->peakpickup));
398 CHKLOG(rc, ESR_SessionGetFloat("CREC.Frontend.peakdecaydown", ¶ms->peakpickdown));
399
400 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_offset", (void **)&intList, TYPES_INTARRAYLIST));
401 if (intList == NULL)
402 {
403 PLogError(L("ESR_INVALID_STATE"));
404 return ESR_INVALID_STATE;
405 }
406 CHKLOG(rc, IntArrayListGetSize(intList, &size));
407 for (i = 0; i < size; ++i)
408 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->mel_offset[i]));
409
410 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.mel_loop", (void **)&intList, TYPES_INTARRAYLIST));
411 if (intList == NULL)
412 {
413 PLogError(L("ESR_INVALID_STATE"));
414 return ESR_INVALID_STATE;
415 }
416 CHKLOG(rc, IntArrayListGetSize(intList, &size));
417 for (i = 0; i < size; ++i)
418 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->mel_loop[i]));
419
420 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melA", (void **)&intList, TYPES_INTARRAYLIST));
421 CHKLOG(rc, IntArrayListGetSize(intList, &size));
422 for (i = 0; i < size; ++i)
423 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->melA_scale[i]));
424
425 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.melB", (void **)&intList, TYPES_INTARRAYLIST));
426 CHKLOG(rc, IntArrayListGetSize(intList, &size));
427 for (i = 0; i < size; ++i)
428 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->melB_scale[i]));
429
430 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelA", (void **)&intList, TYPES_INTARRAYLIST));
431 CHKLOG(rc, IntArrayListGetSize(intList, &size));
432 for (i = 0; i < size; ++i)
433 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->dmelA_scale[i]));
434
435 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.dmelB", (void **)&intList, TYPES_INTARRAYLIST));
436 CHKLOG(rc, IntArrayListGetSize(intList, &size));
437 for (i = 0; i < size; ++i)
438 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->dmelB_scale[i]));
439
440 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelA", (void **)&intList, TYPES_INTARRAYLIST));
441 CHKLOG(rc, IntArrayListGetSize(intList, &size));
442 for (i = 0; i < size; ++i)
443 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->ddmelA_scale[i]));
444
445 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.ddmelB", (void **)&intList, TYPES_INTARRAYLIST));
446 CHKLOG(rc, IntArrayListGetSize(intList, &size));
447 for (i = 0; i < size; ++i)
448 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->ddmelB_scale[i]));
449
450 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaA", (void **)&intList, TYPES_INTARRAYLIST));
451 CHKLOG(rc, IntArrayListGetSize(intList, &size));
452 for (i = 0; i < size; ++i)
453 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->rastaA_scale[i]));
454
455 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.rastaB", (void **)&intList, TYPES_INTARRAYLIST));
456 CHKLOG(rc, IntArrayListGetSize(intList, &size));
457 for (i = 0; i < size; ++i)
458 CHKLOG(rc, IntArrayListGet(intList, i, ¶ms->rastaB_scale[i]));
459
460 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_detect", ¶ms->voice_margin));
461 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_above", ¶ms->fast_voice_margin));
462 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.ambient_within", ¶ms->tracker_margin));
463 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.start_windback", ¶ms->start_windback));
464 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.utterance_allowance", ¶ms->unsure_duration));
465 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.speech_duration", ¶ms->voice_duration));
466 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.quiet_duration", ¶ms->quiet_duration));
467
468 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_clip", ¶ms->high_clip));
469 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_clip", ¶ms->low_clip));
470 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_per10000_clip", ¶ms->max_per10000_clip));
471 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.max_dc_offset", ¶ms->max_dc_offset));
472 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.high_noise_level_bit", ¶ms->high_noise_level_bit));
473 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.low_speech_level_bit", ¶ms->low_speech_level_bit));
474 CHKLOG(rc, ESR_SessionGetInt("CREC.Frontend.min_samples", ¶ms->min_samples));
475
476 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_freq", (void **)&intList, TYPES_INTARRAYLIST));
477 if (intList == NULL)
478 {
479 PLogError(L("ESR_INVALID_STATE"));
480 return ESR_INVALID_STATE;
481 }
482 CHKLOG(rc, IntArrayListGetSize(intList, &size));
483 for (i = 0; i < size; ++i)
484 {
485 CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
486 params->spectrum_filter_freq[i] = iValue;
487 }
488
489 CHKLOG(rc, ESR_SessionGetProperty("CREC.Frontend.spectrum_filter_spread", (void **)&intList, TYPES_INTARRAYLIST));
490 if (intList == NULL)
491 {
492 PLogError(L("ESR_INVALID_STATE"));
493 return ESR_INVALID_STATE;
494 }
495 CHKLOG(rc, IntArrayListGetSize(intList, &size));
496 for (i = 0; i < size; ++i)
497 {
498 CHKLOG(rc, IntArrayListGet(intList, i, &iValue));
499 params->spectrum_filter_spread[i] = iValue;
500 }
501 params->is_loaded = ESR_TRUE;
502 return ESR_SUCCESS;
503 CLEANUP:
504 return rc;
505 }
506
507 /**
508 * Creates frontend components of SR_Recognizer.
509 *
510 * @param impl SR_RecognizerImpl handle
511 */
SR_RecognizerCreateFrontendImpl(SR_RecognizerImpl * impl)512 ESR_ReturnCode SR_RecognizerCreateFrontendImpl(SR_RecognizerImpl* impl)
513 {
514 ESR_ReturnCode rc;
515 CA_FrontendInputParams* frontendParams;
516
517 /* Create a frontend object */
518 impl->frontend = CA_AllocateFrontend(1, 0, 1);
519 frontendParams = CA_AllocateFrontendParameters();
520 CHKLOG(rc, SR_RecognizerGetFrontendLegacyParametersImpl(frontendParams));
521
522 CA_ConfigureFrontend(impl->frontend, frontendParams);
523
524 /* Create a wave object */
525 impl->wavein = CA_AllocateWave('N');
526 if (impl->wavein == NULL)
527 {
528 rc = ESR_OUT_OF_MEMORY;
529 PLogError(ESR_rc2str(rc));
530 goto CLEANUP;
531 }
532 CA_ConfigureWave(impl->wavein, impl->frontend);
533 CA_ConfigureVoicingAnalysis(impl->wavein, frontendParams);
534
535 CA_LoadCMSParameters(impl->wavein, NULL, frontendParams);
536
537 /* Create an utterance object */
538 impl->utterance = CA_AllocateUtterance();
539 if (impl->utterance == NULL)
540 {
541 rc = ESR_OUT_OF_MEMORY;
542 PLogError(ESR_rc2str(rc));
543 goto CLEANUP;
544 }
545 CA_InitUtteranceForFrontend(impl->utterance, frontendParams);
546 CA_AttachCMStoUtterance(impl->wavein, impl->utterance);
547 CA_FreeFrontendParameters(frontendParams);
548 return ESR_SUCCESS;
549
550 CLEANUP:
551 if (impl->frontend != NULL)
552 {
553 CA_UnconfigureFrontend(impl->frontend);
554 CA_FreeFrontend(impl->frontend);
555 impl->frontend = NULL;
556 }
557 if (impl->wavein != NULL)
558 {
559 CA_UnconfigureWave(impl->wavein);
560 CA_FreeWave(impl->wavein);
561 impl->wavein = NULL;
562 }
563 if (impl->utterance != NULL)
564 {
565 CA_ClearUtterance(impl->utterance);
566 CA_FreeUtterance(impl->utterance);
567 impl->utterance = NULL;
568 }
569 if (frontendParams != NULL)
570 CA_FreeFrontendParameters(frontendParams);
571 return rc;
572 }
573
574 /**
575 * Populates legacy recognizer parameters from the session.
576 *
577 * Replaces setup_pattern_parameters()
578 */
SR_AcousticModels_LoadLegacyRecognizerParameters(CA_RecInputParams * params)579 ESR_ReturnCode SR_AcousticModels_LoadLegacyRecognizerParameters(CA_RecInputParams* params)
580 {
581 ESR_ReturnCode rc;
582
583 passert(params != NULL);
584 params->is_loaded = ESR_FALSE;
585 CHKLOG(rc, ESR_SessionGetBool("CREC.Recognizer.partial_results", ¶ms->do_partial));
586 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.NBest", ¶ms->top_choices));
587 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.eou_threshold", ¶ms->eou_threshold));
588 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_altword_tokens", ¶ms->max_altword_tokens));
589 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_frames", ¶ms->max_frames));
590 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_arcs", ¶ms->max_fsm_arcs));
591 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsm_nodes", ¶ms->max_fsm_nodes));
592 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_fsmnode_tokens", ¶ms->max_fsmnode_tokens));
593 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_hmm_tokens", ¶ms->max_hmm_tokens));
594 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_model_states", ¶ms->max_model_states));
595 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_searches", ¶ms->max_searches));
596 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.max_word_tokens", ¶ms->max_word_tokens));
597 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.non_terminal_timeout", ¶ms->non_terminal_timeout));
598 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.num_wordends_per_frame", ¶ms->num_wordends_per_frame));
599 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.often", ¶ms->traceback_freq));
600 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.optional_terminal_timeout", ¶ms->optional_terminal_timeout));
601 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.reject", ¶ms->reject_score));
602 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.terminal_timeout", ¶ms->terminal_timeout));
603 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.viterbi_prune_thresh", ¶ms->viterbi_prune_thresh));
604 CHKLOG(rc, ESR_SessionGetInt("CREC.Recognizer.wordpen", ¶ms->word_penalty));
605 params->is_loaded = ESR_TRUE;
606
607 return ESR_SUCCESS;
608 CLEANUP:
609 return rc;
610 }
611
SR_RecognizerCreate(SR_Recognizer ** self)612 ESR_ReturnCode SR_RecognizerCreate(SR_Recognizer** self)
613 {
614 SR_RecognizerImpl* impl;
615 CA_RecInputParams* recogParams = NULL;
616 ESR_ReturnCode rc;
617 LCHAR recHandle[12];
618
619 if (self == NULL)
620 {
621 PLogError(L("ESR_INVALID_ARGUMENT"));
622 return ESR_INVALID_ARGUMENT;
623 }
624 impl = NEW(SR_RecognizerImpl, MTAG);
625 if (impl == NULL)
626 {
627 PLogError(L("ESR_OUT_OF_MEMORY"));
628 return ESR_OUT_OF_MEMORY;
629 }
630
631 impl->Interface.start = &SR_RecognizerStartImpl;
632 impl->Interface.stop = &SR_RecognizerStopImpl;
633 impl->Interface.destroy = &SR_RecognizerDestroyImpl;
634 impl->Interface.setup = &SR_RecognizerSetupImpl;
635 impl->Interface.unsetup = &SR_RecognizerUnsetupImpl;
636 impl->Interface.isSetup = &SR_RecognizerIsSetupImpl;
637 impl->Interface.getParameter = &SR_RecognizerGetParameterImpl;
638 impl->Interface.getSize_tParameter = &SR_RecognizerGetSize_tParameterImpl;
639 impl->Interface.getBoolParameter = &SR_RecognizerGetBoolParameterImpl;
640 impl->Interface.setParameter = &SR_RecognizerSetParameterImpl;
641 impl->Interface.setSize_tParameter = &SR_RecognizerSetSize_tParameterImpl;
642 impl->Interface.setBoolParameter = &SR_RecognizerSetBoolParameterImpl;
643 impl->Interface.setLockFunction = &SR_RecognizerSetLockFunctionImpl;
644 impl->Interface.hasSetupRules = &SR_RecognizerHasSetupRulesImpl;
645 impl->Interface.activateRule = &SR_RecognizerActivateRuleImpl;
646 impl->Interface.deactivateRule = &SR_RecognizerDeactivateRuleImpl;
647 impl->Interface.deactivateAllRules = &SR_RecognizerDeactivateAllRulesImpl;
648 impl->Interface.isActiveRule = &SR_RecognizerIsActiveRuleImpl;
649 impl->Interface.setWordAdditionCeiling = &SR_RecognizerSetWordAdditionCeilingImpl;
650 impl->Interface.checkGrammarConsistency = &SR_RecognizerCheckGrammarConsistencyImpl;
651 impl->Interface.getModels = &SR_RecognizerGetModelsImpl;
652 impl->Interface.putAudio = &SR_RecognizerPutAudioImpl;
653 impl->Interface.advance = &SR_RecognizerAdvanceImpl;
654 impl->Interface.loadUtterance = &SR_RecognizerLoadUtteranceImpl;
655 impl->Interface.loadWaveFile = &SR_RecognizerLoadWaveFileImpl;
656 impl->Interface.logEvent = &SR_RecognizerLogEventImpl;
657 impl->Interface.logToken = &SR_RecognizerLogTokenImpl;
658 impl->Interface.logTokenInt = &SR_RecognizerLogTokenIntImpl;
659 impl->Interface.logSessionStart = &SR_RecognizerLogSessionStartImpl;
660 impl->Interface.logSessionEnd = &SR_RecognizerLogSessionEndImpl;
661 impl->Interface.logWaveformData = &SR_RecognizerLogWaveformDataImpl;
662 impl->Interface.isSignalClipping = &SR_RecognizerIsSignalClippingImpl;
663 impl->Interface.isSignalDCOffset = &SR_RecognizerIsSignalDCOffsetImpl;
664 impl->Interface.isSignalNoisy = &SR_RecognizerIsSignalNoisyImpl;
665 impl->Interface.isSignalTooFewSamples = &SR_RecognizerIsSignalTooFewSamplesImpl;
666 impl->Interface.isSignalTooManySamples = &SR_RecognizerIsSignalTooManySamplesImpl;
667 impl->Interface.isSignalTooQuiet = &SR_RecognizerIsSignalTooQuietImpl;
668
669 impl->frontend = NULL;
670 impl->wavein = NULL;
671 impl->utterance = NULL;
672 impl->confidenceScorer = NULL;
673 impl->recognizer = NULL;
674 impl->models = NULL;
675 impl->grammars = NULL;
676 impl->result = NULL;
677 impl->parameters = NULL;
678 impl->acousticState = NULL;
679 impl->audioBuffer = NULL;
680 impl->buffer = NULL;
681 impl->frames = impl->processed;
682 impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
683 impl->isStarted = ESR_FALSE;
684 impl->isRecognizing = ESR_FALSE;
685 impl->gotLastFrame = ESR_FALSE;
686 impl->sampleRate = 0;
687 impl->lockFunction = NULL;
688 impl->lockData = NULL;
689 impl->eventLog = NULL;
690 impl->osi_log_level = 0;
691 impl->waveformBuffer = NULL;
692 impl->isSignalQualityInitialized = ESR_FALSE;
693 impl->beginningOfSpeechOffset = 0;
694 impl->gatedMode = ESR_TRUE;
695 impl->bgsniff = 0;
696 impl->isSignalClipping = ESR_FALSE;
697 impl->isSignalDCOffset = ESR_FALSE;
698 impl->isSignalNoisy = ESR_FALSE;
699 impl->isSignalTooFewSamples = ESR_FALSE;
700 impl->isSignalTooManySamples = ESR_FALSE;
701 impl->isSignalTooQuiet = ESR_FALSE;
702
703 CHKLOG(rc, ESR_SessionTypeCreate(&impl->parameters));
704 CHKLOG(rc, SR_RecognizerToSessionImpl());
705 CHKLOG(rc, ESR_SessionGetSize_t(L("SREC.Recognizer.osi_log_level"), &impl->osi_log_level));
706
707 /* create the event log */
708 if (impl->osi_log_level) /* do some logging if non-zero val */
709 CHKLOG(rc, ESR_SessionGetProperty(L("eventlog"), (void **)&impl->eventLog, TYPES_SR_EVENTLOG));
710
711 /* Record the OSI log event */
712 psprintf(recHandle, L("%p"), impl);
713 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
714 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrst")));
715
716 CHKLOG(rc, SR_RecognizerFrontendToSessionImpl());
717 CHKLOG(rc, SR_RecognizerCreateFrontendImpl(impl));
718 rc = ESR_SessionGetProperty("recognizer.confidenceScorer", (void **)&impl->confidenceScorer, TYPES_CONFIDENCESCORER);
719 if (rc == ESR_NO_MATCH_ERROR)
720 {
721 impl->confidenceScorer = CA_AllocateConfidenceScorer();
722
723 if (!CA_LoadConfidenceScorer(impl->confidenceScorer)) {
724 rc = ESR_INVALID_STATE;
725 PLogError(ESR_rc2str(rc));
726 goto CLEANUP;
727 }
728 CHKLOG(rc, ESR_SessionSetProperty("recognizer.confidenceScorer", impl->confidenceScorer, TYPES_CONFIDENCESCORER));
729 }
730 else if (rc != ESR_SUCCESS)
731 {
732 PLogError(ESR_rc2str(rc));
733 goto CLEANUP;
734 }
735
736 recogParams = CA_AllocateRecognitionParameters();
737 if (recogParams == NULL)
738 {
739 rc = ESR_OUT_OF_MEMORY;
740 PLogError(ESR_rc2str(rc));
741 goto CLEANUP;
742 }
743 CHKLOG(rc, SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams));
744 impl->recognizer = CA_AllocateRecognition();
745 if (impl->recognizer == NULL)
746 {
747 PLogError(ESR_rc2str(rc));
748 goto CLEANUP;
749 }
750 CA_ConfigureRecognition(impl->recognizer, recogParams);
751 CA_FreeRecognitionParameters(recogParams);
752 CHKLOG(rc, HashMapCreate(&impl->grammars));
753 CHKLOG(rc, CircularBufferCreate(sizeof(asr_int16_t) * AUDIO_CIRC_BUFFER_SIZE, MTAG, &impl->buffer));
754 CHKLOG(rc, ESR_SessionGetSize_t("CREC.Frontend.samplerate", &impl->sampleRate));
755
756 impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
757
758 if ((impl->audioBuffer = MALLOC(impl->FRAME_SIZE, MTAG)) == NULL)
759 {
760 rc = ESR_OUT_OF_MEMORY;
761 goto CLEANUP;
762 }
763
764 /* create the waveform buffer */
765 CHKLOG(rc, WaveformBuffer_Create(&impl->waveformBuffer, impl->FRAME_SIZE));
766
767 CHKLOG(rc, ESR_SessionGetSize_t("SREC.Recognizer.utterance_timeout", &impl->utterance_timeout));
768
769 /* OSI logging (SUCCESS) */
770 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
771 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
772 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd")));
773
774 CHKLOG(rc, SR_AcousticStateCreateImpl(&impl->Interface));
775
776 CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.bgsniff"), &impl->bgsniff));
777 /* gated mode == beginning of speech detection */
778 CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &impl->gatedMode));
779
780 *self = (SR_Recognizer*) impl;
781 return ESR_SUCCESS;
782 CLEANUP:
783 /* OSI logging (FAILURE) */
784 if (impl->eventLog != NULL)
785 {
786 SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle);
787 SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("FAILURE"), ESR_rc2str(rc));
788 SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIcrnd"));
789 }
790
791 if (recogParams != NULL)
792 CA_FreeRecognitionParameters(recogParams);
793 impl->Interface.destroy(&impl->Interface);
794 return rc;
795 }
796
SR_RecognizerDestroyImpl(SR_Recognizer * self)797 ESR_ReturnCode SR_RecognizerDestroyImpl(SR_Recognizer* self)
798 {
799 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
800 ESR_BOOL exists; // isSetup;
801 ESR_ReturnCode rc;
802 LCHAR recHandle[12];
803
804 if (impl->result != NULL)
805 {
806 SR_RecognizerResult_Destroy(impl->result);
807 impl->result = NULL;
808 }
809
810 if (impl->eventLog != NULL)
811 {
812 /* Record the OSI log event */
813 psprintf(recHandle, L("%p"), impl);
814 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
815 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesst")));
816 }
817
818 /* Clean session */
819 CHKLOG(rc, ESR_SessionContains("recognizer.confidenceScorer", &exists));
820 if (exists)
821 CHKLOG(rc, ESR_SessionRemoveProperty("recognizer.confidenceScorer"));
822
823 if (impl->confidenceScorer != NULL)
824 {
825 CA_FreeConfidenceScorer(impl->confidenceScorer);
826 impl->confidenceScorer = NULL;
827 }
828
829 /* Clear CMS, CRS_RecognizerClose() */
830 if (impl->wavein != NULL)
831 {
832 ESR_BOOL isAttached, isConfigured;
833
834 CHKLOG(rc, CA_IsCMSAttachedtoUtterance(impl->wavein, &isAttached));
835 if (isAttached)
836 CA_DetachCMSfromUtterance(impl->wavein, impl->utterance);
837
838 CHKLOG(rc, CA_IsConfiguredForAgc(impl->wavein, &isConfigured));
839 if (isConfigured)
840 CA_ClearCMSParameters(impl->wavein);
841 }
842
843 /* Free Utterance */
844 if (impl->utterance != NULL)
845 {
846 CA_ClearUtterance(impl->utterance);
847 CA_FreeUtterance(impl->utterance);
848 impl->utterance = NULL;
849 }
850
851 /* Free WaveformBuffer */
852 if (impl->waveformBuffer != NULL)
853 {
854 WaveformBuffer_Destroy(impl->waveformBuffer);
855 impl->waveformBuffer = NULL;
856 }
857
858 /* Free recognizer */
859 /* CHKLOG(rc, self->isSetup(self, &isSetup));
860 if (isSetup)
861 CHKLOG(rc, self->unsetup(self));*/
862 if (impl->grammars != NULL)
863 CHKLOG(rc, self->deactivateAllRules(self));
864 if (impl->recognizer != NULL)
865 {
866 CA_UnloadRecognitionModels(impl->recognizer);
867 CA_UnconfigureRecognition(impl->recognizer);
868 CA_FreeRecognition(impl->recognizer);
869 impl->recognizer = NULL;
870 }
871
872 if (impl->grammars != NULL)
873 {
874 CHKLOG(rc, HashMapDestroy(impl->grammars));
875 impl->grammars = NULL;
876 }
877
878 if (impl->buffer != NULL)
879 {
880 FREE(impl->buffer);
881 impl->buffer = NULL;
882 }
883
884 if (impl->audioBuffer != NULL)
885 {
886 FREE(impl->audioBuffer);
887 impl->audioBuffer = NULL;
888 }
889
890 /* Free frontend */
891 if (impl->frontend)
892 {
893 CA_UnconfigureFrontend(impl->frontend);
894 CA_FreeFrontend(impl->frontend);
895 impl->frontend = NULL;
896 }
897
898 /* Free wave */
899 if (impl->wavein)
900 {
901 CA_UnconfigureWave(impl->wavein);
902 CA_FreeWave(impl->wavein);
903 impl->wavein = NULL;
904 }
905
906 if (impl->parameters != NULL)
907 CHKLOG(rc, impl->parameters->destroy(impl->parameters));
908
909 if (impl->eventLog != NULL)
910 {
911 /* OSI logging (SUCCESS) */
912 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("REC"), recHandle));
913 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SUCCESS"), L("ESR_SUCCESS")));
914 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIdesnd")));
915 impl->eventLog = NULL;
916 }
917
918 if (impl->acousticState != NULL)
919 {
920 impl->acousticState->destroy(self);
921 impl->acousticState = NULL;
922 }
923 FREE(impl);
924 return ESR_SUCCESS;
925 CLEANUP:
926 return rc;
927 }
928
beginRecognizing(SR_RecognizerImpl * impl)929 ESR_ReturnCode beginRecognizing(SR_RecognizerImpl* impl)
930 {
931 CA_RecInputParams* recogParams;
932 LCHAR tok[80];
933 LCHAR* val;
934 PTimeStamp BORT;
935 size_t i, grammarSize;
936 ESR_ReturnCode rc;
937
938 /* Setup recognizer for new utterance */
939 recogParams = CA_AllocateRecognitionParameters();
940 if (recogParams == NULL)
941 {
942 rc = ESR_OUT_OF_MEMORY;
943 PLogError(ESR_rc2str(rc));
944 goto CLEANUP;
945 }
946 SR_AcousticModels_LoadLegacyRecognizerParameters(recogParams);
947 CA_BeginRecognition(impl->recognizer, NULL, 1, recogParams);
948 CA_FreeRecognitionParameters(recogParams);
949 impl->isRecognizing = ESR_TRUE;
950
951 /* OSI log the grammars */
952 CHKLOG(rc, HashMapGetSize(impl->grammars, &grammarSize));
953 for (i = 0; i < grammarSize; ++i)
954 {
955 psprintf(tok, L("GURI%zu"), i);
956 /* use the key as the grammar URI */
957 CHKLOG(rc, HashMapGetKeyAtIndex(impl->grammars, i, &val));
958 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, tok, val));
959 }
960 /* OSI ACST acoustic state reset */
961 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("ACST"), 0));
962 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("LANG"), L("en-us")));
963
964 /* OSI log the start of recognition */
965 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcst")));
966
967 /* save the BORT timing (begin of recog) */
968 PTimeStampSet(&BORT);
969 impl->recogLogTimings.BORT = PTimeStampDiff(&BORT, &impl->timestamp);
970
971 return ESR_SUCCESS;
972 CLEANUP:
973 if (recogParams != NULL)
974 CA_FreeRecognitionParameters(recogParams);
975 return rc;
976 }
977
SR_RecognizerStartImpl(SR_Recognizer * self)978 ESR_ReturnCode SR_RecognizerStartImpl(SR_Recognizer* self)
979 {
980 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
981 size_t silence_duration_in_frames;
982 size_t end_of_utterance_hold_off_in_frames;
983 size_t grammarCount;
984 ESR_ReturnCode rc;
985 ESR_BOOL enableGetWaveform = ESR_FALSE;
986
987 CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarCount));
988 if (impl->models == NULL)
989 {
990 PLogError("ESR_INVALID_STATE: No rule has been set up");
991 return ESR_INVALID_STATE;
992 }
993 if (grammarCount < 1)
994 {
995 PLogError("ESR_INVALID_STATE: No rule has been activated");
996 return ESR_INVALID_STATE;
997 }
998
999 if (!CA_OpenWaveFromDevice(impl->wavein, DEVICE_RAW_PCM, impl->frontend->samplerate, 0, WAVE_DEVICE_RAW))
1000 {
1001 rc = ESR_INVALID_STATE;
1002 PLogError(ESR_rc2str(rc));
1003 goto CLEANUP;
1004 }
1005
1006 /* Setup utterance */
1007 CA_UnlockUtteranceForInput(impl->utterance);
1008
1009 /* Setup utterance */
1010 CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.silence_duration_in_frames"), &silence_duration_in_frames));
1011 CHKLOG(rc, ESR_SessionGetSize_t(L("cmdline.end_of_utterance_hold_off_in_frames"), &end_of_utterance_hold_off_in_frames));
1012 CA_SetEndOfUtteranceByLevelTimeout(impl->utterance, silence_duration_in_frames, end_of_utterance_hold_off_in_frames);
1013
1014 CA_ResetVoicing(impl->utterance);
1015
1016 /*
1017 * NOTE: We don't actually begin the recognizer here, the beginning of speech
1018 * detector will do that.
1019 */
1020
1021 impl->gotLastFrame = ESR_FALSE;
1022 impl->isStarted = ESR_TRUE;
1023 impl->isRecognizing = ESR_FALSE;
1024 impl->isSignalQualityInitialized = ESR_FALSE;
1025 impl->internalState = SR_RECOGNIZER_INTERNAL_BEGIN;
1026 PTimeStampSet(&impl->timestamp);
1027
1028 /* reset waveform buffer at start of every recognition */
1029 CHKLOG(rc, WaveformBuffer_Reset(impl->waveformBuffer));
1030
1031 /* is waveform buffering active? */
1032 rc = ESR_SessionGetBool(L("enableGetWaveform"), &enableGetWaveform);
1033 // rc = impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
1034 if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR)
1035 {
1036 PLogError(L("%s: could determine whether VoiceEnrollment active or not"), ESR_rc2str(rc));
1037 goto CLEANUP;
1038 }
1039 if (enableGetWaveform)
1040 CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_CIRCULAR));
1041 else
1042 CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_OFF));
1043
1044 /* I am going to try to open the audio waveform file here */
1045 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1046 {
1047 /* open a new audio waveform file */
1048 rc = SR_EventLogAudioOpen(impl->eventLog, L("audio/L16"), impl->sampleRate, SAMPLE_SIZE);
1049 if (rc != ESR_SUCCESS)
1050 {
1051 PLogError(L("%s: could not open the RIFF audio file"), ESR_rc2str(rc));
1052 goto CLEANUP;
1053 }
1054 }
1055 impl->frames = impl->processed = 0;
1056 return ESR_SUCCESS;
1057 CLEANUP:
1058 /* self->stop(self);*/
1059 return rc;
1060 }
1061
SR_RecognizerStopImpl(SR_Recognizer * self)1062 ESR_ReturnCode SR_RecognizerStopImpl(SR_Recognizer* self)
1063 {
1064 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1065 SR_AcousticModelsImpl* modelsImpl;
1066 ESR_ReturnCode rc;
1067
1068 PLOG_DBG_API_ENTER();
1069 if (!impl->isStarted)
1070 {
1071 /* In case the user calls stop() twice */
1072 return ESR_SUCCESS;
1073 }
1074 modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1075
1076 /* Clean-up recognizer and utterance */
1077 switch (impl->internalState)
1078 {
1079 case SR_RECOGNIZER_INTERNAL_BEGIN:
1080 /* Recognizer was never started */
1081 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BEGIN")));
1082 CA_LockUtteranceFromInput(impl->utterance);
1083 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1084 if (impl->eventLog != NULL)
1085 {
1086 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BEGIN -> SR_RECOGNIZER_INTERNAL_END")));
1087 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1088 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1089 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1090 }
1091 break;
1092
1093 case SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT:
1094 /* Recognizer was never started */
1095 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_TIMEOUT")));
1096 CA_LockUtteranceFromInput(impl->utterance);
1097 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1098 if (impl->eventLog != NULL)
1099 {
1100 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT -> SR_RECOGNIZER_INTERNAL_END")));
1101 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1102 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1103 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1104 }
1105 break;
1106
1107 case SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH:
1108 /* Recognizer was never started */
1109 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_NO_MATCH")));
1110 CA_LockUtteranceFromInput(impl->utterance);
1111 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1112 if (impl->eventLog != NULL)
1113 {
1114 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH -> SR_RECOGNIZER_INTERNAL_END")));
1115 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1116 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1117 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1118 }
1119 break;
1120
1121 case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
1122 /* Recognizer was never started */
1123 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("BOS_DETECTION")));
1124 CA_LockUtteranceFromInput(impl->utterance);
1125 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1126 if (impl->eventLog != NULL)
1127 {
1128 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_BOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1129 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1130 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1131 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1132 }
1133 break;
1134
1135 case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
1136 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS_DETECTION")));
1137 CA_LockUtteranceFromInput(impl->utterance);
1138 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1139 {
1140 rc = ESR_INVALID_STATE;
1141 PLogError(ESR_rc2str(rc));
1142 goto CLEANUP;
1143 }
1144 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1145 if (impl->eventLog != NULL)
1146 {
1147 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS_DETECTION -> SR_RECOGNIZER_INTERNAL_END")));
1148 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1149 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1150 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1151 }
1152 break;
1153
1154 case SR_RECOGNIZER_INTERNAL_EOI:
1155 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOI")));
1156 CA_LockUtteranceFromInput(impl->utterance);
1157 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1158 {
1159 rc = ESR_INVALID_STATE;
1160 PLogError(ESR_rc2str(rc));
1161 goto CLEANUP;
1162 }
1163 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1164 if (impl->eventLog != NULL)
1165 {
1166 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOI -> SR_RECOGNIZER_INTERNAL_END")));
1167 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1168 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1169 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1170 }
1171 break;
1172
1173 case SR_RECOGNIZER_INTERNAL_EOS:
1174 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("EOS")));
1175 CA_LockUtteranceFromInput(impl->utterance);
1176 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1177 {
1178 rc = ESR_INVALID_STATE;
1179 PLogError(ESR_rc2str(rc));
1180 goto CLEANUP;
1181 }
1182 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1183 if (impl->eventLog != NULL)
1184 {
1185 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RECOGNIZER_INTERNAL_EOS -> SR_RECOGNIZER_INTERNAL_END")));
1186 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1187 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1188 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1189 }
1190 break;
1191
1192 case SR_RECOGNIZER_INTERNAL_END:
1193 /* Recognizer already shut down */
1194 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), L("END")));
1195 break;
1196
1197 default:
1198 /* Shut down recognizer */
1199 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("MODE"), impl->internalState));
1200 if (impl->eventLog != NULL)
1201 {
1202 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("unknown state -> SR_RECOGNIZER_INTERNAL_END")));
1203 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
1204 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
1205 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
1206 }
1207 CA_LockUtteranceFromInput(impl->utterance);
1208 if (impl->isRecognizing)
1209 {
1210 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1211 {
1212 rc = ESR_INVALID_STATE;
1213 PLogError(ESR_rc2str(rc));
1214 goto CLEANUP;
1215 }
1216 }
1217 rc = ESR_INVALID_STATE;
1218 PLogError(L("%s: %d"), ESR_rc2str(rc), impl->internalState);
1219 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
1220 goto CLEANUP;
1221 }
1222 if (impl->eventLog != NULL)
1223 {
1224 int n;
1225 LCHAR result[MAX_ENTRY_LENGTH];
1226 result[0] = L('\0');
1227
1228 n = CA_GetUnprocessedFramesInUtterance(impl->utterance);
1229 CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CA_GetUnprocessedFramesInUtterance() (x10ms)"), n));
1230 CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
1231 CHKLOG(rc, SR_EventLogToken(impl->eventLog, L("CA_FullResultLabel() (x20ms)"), result));
1232 n = CircularBufferGetSize(impl->buffer);
1233 CHKLOG(rc, SR_EventLogTokenInt(impl->eventLog, L("CircularBufferGetSize() (samples)"), n / SAMPLE_SIZE));
1234 }
1235 if (impl->lockFunction)
1236 impl->lockFunction(ESR_LOCK, impl->lockData);
1237 CircularBufferReset(impl->buffer);
1238 if (impl->lockFunction)
1239 impl->lockFunction(ESR_UNLOCK, impl->lockData);
1240 if (CA_RecognitionHasResults(impl->recognizer))
1241 CA_ClearResults(impl->recognizer);
1242 CA_FlushUtteranceFrames(impl->utterance);
1243 CA_CalculateCMSParameters(impl->wavein);
1244 CA_CloseDevice(impl->wavein);
1245
1246 /* record the OSI event */
1247 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIstop")));
1248
1249 if (impl->result != NULL)
1250 {
1251 CHKLOG(rc, SR_RecognizerResult_Destroy(impl->result));
1252 impl->result = NULL;
1253 }
1254
1255 if (impl->lockFunction)
1256 impl->lockFunction(ESR_LOCK, impl->lockData);
1257 impl->gotLastFrame = ESR_TRUE;
1258 PLOG_DBG_TRACE((L("SR_Recognizer shutdown occured")));
1259 impl->isStarted = ESR_FALSE;
1260 impl->isRecognizing = ESR_FALSE;
1261 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
1262 SR_EventLogAudioClose(impl->eventLog);
1263
1264 impl->recogLogTimings.BORT = 0;
1265 impl->recogLogTimings.DURS = 0;
1266 impl->recogLogTimings.EORT = 0;
1267 impl->recogLogTimings.EOSD = 0;
1268 impl->recogLogTimings.EOSS = 0;
1269 impl->recogLogTimings.BOSS = 0;
1270 impl->recogLogTimings.EOST = 0;
1271 impl->eos_reason = L("undefined");
1272
1273 if (impl->lockFunction)
1274 impl->lockFunction(ESR_UNLOCK, impl->lockData);
1275 PLOG_DBG_API_EXIT(rc);
1276 return rc;
1277 CLEANUP:
1278 PLOG_DBG_API_EXIT(rc);
1279 return rc;
1280 }
1281
SR_RecognizerSetupImpl(SR_Recognizer * self)1282 ESR_ReturnCode SR_RecognizerSetupImpl(SR_Recognizer* self)
1283 {
1284 ESR_ReturnCode rc;
1285 CA_AcoustInputParams* acousticParams = NULL;
1286 SR_AcousticModelsImpl* modelsImpl;
1287 SR_AcousticModels* models;
1288 SR_RecognizerImpl* recogImpl = NULL;
1289 CA_Acoustic* acoustic;
1290 size_t size, i;
1291 LCHAR filenames[P_PATH_MAX];
1292 size_t len;
1293
1294 len = P_PATH_MAX;
1295 CHKLOG(rc, ESR_SessionGetLCHAR ( L("cmdline.modelfiles"), filenames, &len ));
1296
1297 CHKLOG(rc, SR_AcousticModelsLoad ( filenames, &models ));
1298
1299 if (models == NULL)
1300 {
1301 PLogError(L("ESR_INVALID_STATE while finding cmdline.modelfiles"));
1302 return ESR_INVALID_STATE;
1303 }
1304 modelsImpl = (SR_AcousticModelsImpl*) models;
1305 recogImpl = (SR_RecognizerImpl*) self;
1306 acousticParams = NULL;
1307
1308 CHKLOG(rc, SR_AcousticModelsGetCount(models, &size));
1309 acousticParams = CA_AllocateAcousticParameters();
1310 if (acousticParams == NULL)
1311 {
1312 rc = ESR_OUT_OF_MEMORY;
1313 PLogError(ESR_rc2str(rc));
1314 goto CLEANUP;
1315 }
1316 CHKLOG(rc, modelsImpl->getLegacyParameters(acousticParams));
1317 CHKLOG(rc, ArrayListGetSize(modelsImpl->acoustic, &size));
1318 for (i = 0; i < size; ++i)
1319 {
1320 CHKLOG(rc, ArrayListGet(modelsImpl->acoustic, i, (void **)&acoustic));
1321 CA_LoadModelsInAcoustic(recogImpl->recognizer, acoustic, acousticParams);
1322 }
1323 CA_FreeAcousticParameters(acousticParams);
1324
1325 recogImpl->models = models;
1326 CHKLOG(rc, modelsImpl->setupPattern(recogImpl->models, self));
1327 return ESR_SUCCESS;
1328 CLEANUP:
1329 if (acousticParams != NULL)
1330 CA_FreeAcousticParameters(acousticParams);
1331 if (recogImpl != NULL)
1332 CA_UnloadRecognitionModels(recogImpl->recognizer);
1333 return rc;
1334 }
1335
SR_RecognizerUnsetupImpl(SR_Recognizer * self)1336 ESR_ReturnCode SR_RecognizerUnsetupImpl(SR_Recognizer* self)
1337 {
1338 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1339 SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1340 ESR_ReturnCode rc;
1341
1342 CHKLOG(rc, modelsImpl->unsetupPattern(impl->models));
1343 CA_UnloadRecognitionModels(impl->recognizer);
1344 CHKLOG(rc, SR_AcousticModelsDestroy ( impl->models ));
1345 impl->models = NULL;
1346 return ESR_SUCCESS;
1347 CLEANUP:
1348 return rc;
1349 }
1350
SR_RecognizerIsSetupImpl(SR_Recognizer * self,ESR_BOOL * isSetup)1351 ESR_ReturnCode SR_RecognizerIsSetupImpl(SR_Recognizer* self, ESR_BOOL* isSetup)
1352 {
1353 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1354
1355 if (isSetup == NULL)
1356 {
1357 PLogError(L("ESR_INVALID_ARGUMENT"));
1358 return ESR_INVALID_ARGUMENT;
1359 }
1360 *isSetup = impl->models != NULL;
1361 return ESR_SUCCESS;
1362 }
1363
SR_RecognizerGetParameterImpl(SR_Recognizer * self,const LCHAR * key,LCHAR * value,size_t * len)1364 ESR_ReturnCode SR_RecognizerGetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1365 LCHAR* value, size_t* len)
1366 {
1367 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1368 ESR_ReturnCode rc;
1369
1370 rc = impl->parameters->getLCHAR(impl->parameters, key, value, len);
1371 if (rc == ESR_NO_MATCH_ERROR)
1372 {
1373 CHKLOG(rc, ESR_SessionGetLCHAR(key, value, len));
1374 return ESR_SUCCESS;
1375 }
1376 else if (rc != ESR_SUCCESS)
1377 {
1378 PLogError(ESR_rc2str(rc));
1379 goto CLEANUP;
1380 }
1381 return ESR_SUCCESS;
1382 CLEANUP:
1383 return rc;
1384 }
1385
1386 /*
1387 * The get / set code is a mess. Since we only use size_t parameters, that's all
1388 * that I am going to make work. The impl->parameters don't work so you always
1389 * have to get them from the session. The impl always logs an error. SteveR
1390 */
1391
SR_RecognizerGetSize_tParameterImpl(SR_Recognizer * self,const LCHAR * key,size_t * value)1392 ESR_ReturnCode SR_RecognizerGetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1393 size_t* value)
1394 {
1395 ESR_ReturnCode rc;
1396
1397 CHKLOG(rc, ESR_SessionGetSize_t(key, value));
1398 return ESR_SUCCESS;
1399 CLEANUP:
1400 return rc;
1401 }
1402
SR_RecognizerGetBoolParameterImpl(SR_Recognizer * self,const LCHAR * key,ESR_BOOL * value)1403 ESR_ReturnCode SR_RecognizerGetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL* value)
1404 {
1405 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1406 ESR_ReturnCode rc;
1407
1408 rc = impl->parameters->getBool(impl->parameters, key, value);
1409 if (rc == ESR_NO_MATCH_ERROR)
1410 {
1411 CHKLOG(rc, ESR_SessionGetBool(key, value));
1412 return ESR_SUCCESS;
1413 }
1414 else if (rc != ESR_SUCCESS)
1415 {
1416 PLogError(ESR_rc2str(rc));
1417 goto CLEANUP;
1418 }
1419 return ESR_SUCCESS;
1420 CLEANUP:
1421 return rc;
1422 }
1423
SR_RecognizerSetParameterImpl(SR_Recognizer * self,const LCHAR * key,LCHAR * value)1424 ESR_ReturnCode SR_RecognizerSetParameterImpl(SR_Recognizer* self, const LCHAR* key,
1425 LCHAR* value)
1426 {
1427 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1428 LCHAR temp[256];
1429 ESR_ReturnCode rc;
1430 size_t len = 256;
1431
1432 rc = impl->parameters->getLCHAR(impl->parameters, key, temp, &len);
1433 if (rc == ESR_SUCCESS)
1434 {
1435 if (LSTRCMP(temp, value) == 0)
1436 return ESR_SUCCESS;
1437 CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1438 }
1439 else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1440 {
1441 PLogError(ESR_rc2str(rc));
1442 goto CLEANUP;
1443 }
1444
1445 CHKLOG(rc, impl->parameters->setLCHAR(impl->parameters, key, value));
1446 return ESR_SUCCESS;
1447 CLEANUP:
1448 return rc;
1449 }
1450 /*
1451 * The only set param function that is working is for the size_t parameters; and not
1452 * all of them are working, only the ones specified in the function itself. There are
1453 * two reasons for this: first most of the set functions just put the value in an unused
1454 * table that has no effect; second many of the changes need to be propogated to a specific
1455 * part of the code. This needs to be evaluated on a per parameter basis. SteveR
1456 */
1457
1458 /*
1459 * This function will be used to set parameters in the session. We need to go through
1460 * the recognizer so as to propogate the values into the recognizer. We will rely on
1461 * the session to do the right thing. SteveR
1462 */
1463
SR_RecognizerSetSize_tParameterImpl(SR_Recognizer * self,const LCHAR * key,size_t value)1464 ESR_ReturnCode SR_RecognizerSetSize_tParameterImpl(SR_Recognizer* self, const LCHAR* key,
1465 size_t value)
1466 {
1467 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1468 ESR_ReturnCode rc;
1469
1470 rc = ESR_SessionSetSize_t ( key, value );
1471
1472 if (rc == ESR_SUCCESS)
1473 {
1474 if ( LSTRCMP ( L("SREC.Recognizer.utterance_timeout"), key ) == 0 )
1475 {
1476 impl->utterance_timeout = value;
1477 }
1478 else if ( LSTRCMP ( L("CREC.Recognizer.terminal_timeout"), key ) == 0 )
1479 {
1480 impl->recognizer->eosd_parms->endnode_timeout = value;
1481 }
1482 else if ( LSTRCMP ( L("CREC.Recognizer.optional_terminal_timeout"), key ) == 0 )
1483 {
1484 impl->recognizer->eosd_parms->optendnode_timeout = value;
1485 }
1486 else if ( LSTRCMP ( L("CREC.Recognizer.non_terminal_timeout"), key ) == 0 )
1487 {
1488 impl->recognizer->eosd_parms->internalnode_timeout = value;
1489 }
1490 else if ( LSTRCMP ( L("CREC.Recognizer.eou_threshold"), key ) == 0 )
1491 {
1492 impl->recognizer->eosd_parms->eos_costdelta = (frameID)value;
1493 impl->recognizer->eosd_parms->opt_eos_costdelta = (frameID)value;
1494 }
1495 else
1496 {
1497 PLogError(L("ESR_INVALID_ARGUMENT"));
1498 rc = ESR_INVALID_ARGUMENT;
1499 }
1500 }
1501 return rc;
1502 }
1503
1504
SR_RecognizerSetBoolParameterImpl(SR_Recognizer * self,const LCHAR * key,ESR_BOOL value)1505 ESR_ReturnCode SR_RecognizerSetBoolParameterImpl(SR_Recognizer* self, const LCHAR* key, ESR_BOOL value)
1506 {
1507 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1508 ESR_BOOL temp;
1509 ESR_ReturnCode rc;
1510
1511 rc = impl->parameters->getBool(impl->parameters, key, &temp);
1512 if (rc == ESR_SUCCESS)
1513 {
1514 if (temp == value)
1515 return ESR_SUCCESS;
1516 CHKLOG(rc, impl->parameters->removeAndFreeProperty(impl->parameters, key));
1517 }
1518 else if (rc != ESR_NO_MATCH_ERROR && rc != ESR_INVALID_RESULT_TYPE)
1519 return rc;
1520
1521 CHKLOG(rc, impl->parameters->setBool(impl->parameters, key, value));
1522 return ESR_SUCCESS;
1523 CLEANUP:
1524 return rc;
1525 }
1526
SR_RecognizerHasSetupRulesImpl(SR_Recognizer * self,ESR_BOOL * hasSetupRules)1527 ESR_ReturnCode SR_RecognizerHasSetupRulesImpl(SR_Recognizer* self, ESR_BOOL* hasSetupRules)
1528 {
1529 SR_RecognizerImpl* recogImpl = (SR_RecognizerImpl*) self;
1530 size_t size;
1531 ESR_ReturnCode rc;
1532
1533 if (hasSetupRules == NULL)
1534 {
1535 PLogError(L("ESR_INVALID_ARGUMENT"));
1536 return ESR_INVALID_ARGUMENT;
1537 }
1538 CHKLOG(rc, HashMapGetSize(recogImpl->grammars, &size));
1539 *hasSetupRules = size > 0;
1540 return ESR_SUCCESS;
1541 CLEANUP:
1542 return rc;
1543 }
1544
SR_RecognizerActivateRuleImpl(SR_Recognizer * self,SR_Grammar * grammar,const LCHAR * ruleName,unsigned int weight)1545 ESR_ReturnCode SR_RecognizerActivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1546 const LCHAR* ruleName, unsigned int weight)
1547 {
1548 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1549 SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1550 SR_AcousticModelsImpl* modelsImpl;
1551 LCHAR grammarID[80];
1552 ESR_ReturnCode rc;
1553 char *failure_reason = NULL;
1554
1555 if (grammar == NULL)
1556 {
1557 if (impl->eventLog)
1558 failure_reason = "badinput";
1559 rc = ESR_INVALID_ARGUMENT;
1560 PLogError(L("ESR_INVALID_ARGUMENT"));
1561 goto CLEANUP;
1562 }
1563
1564 if (impl->models == NULL)
1565 {
1566 failure_reason = "nomodels";
1567 rc = ESR_INVALID_STATE;
1568 PLogError(L("acoustic models must be configured"));
1569 goto CLEANUP;
1570 }
1571
1572 modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1573
1574 if (ruleName == NULL)
1575 psprintf(grammarID, L("%p"), grammar);
1576 else
1577 {
1578 if (LSTRLEN(ruleName) > 80)
1579 {
1580 rc = ESR_BUFFER_OVERFLOW;
1581 PLogError(ESR_rc2str(rc));
1582 goto CLEANUP;
1583 }
1584 LSTRCPY(grammarID, ruleName);
1585 }
1586
1587 CHKLOG(rc, HashMapPut(impl->grammars, grammarID, grammar));
1588 if (CA_SetupSyntaxForRecognizer(grammarImpl->syntax, impl->recognizer))
1589 {
1590 failure_reason = "cafailed";
1591 rc = ESR_INVALID_STATE;
1592 PLogError(L("ESR_INVALID_STATE"));
1593 goto CLEANUP;
1594 }
1595
1596 CHKLOG(rc, SR_Grammar_SetupRecognizer(grammar, self));
1597 grammarImpl->isActivated = ESR_TRUE;
1598
1599 /*
1600 * If we want to log dynamically added words, then we must give the grammar a reference
1601 * to our event log. The grammar logs word additions if and only if its reference to
1602 * eventLog is non-null.
1603 */
1604 if (impl->osi_log_level & OSI_LOG_LEVEL_ADDWD)
1605 grammarImpl->eventLog = impl->eventLog;
1606 else
1607 grammarImpl->eventLog = NULL;
1608
1609 rc = ESR_SUCCESS;
1610
1611 CLEANUP:
1612 if (impl->eventLog)
1613 {
1614 if (failure_reason)
1615 {
1616 SR_EventLogTokenPointer(impl->eventLog, L("igrm"), grammar);
1617 SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1618 SR_EventLogToken(impl->eventLog, L("rslt"), "fail");
1619 SR_EventLogToken(impl->eventLog, L("reason"), failure_reason);
1620 SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1621 }
1622 else
1623 {
1624 SR_EventLogTokenPointer(impl->eventLog, L("igrm"), grammar);
1625 SR_EventLogToken(impl->eventLog, L("rule"), ruleName);
1626 SR_EventLogToken(impl->eventLog, L("rslt"), "ok");
1627 SR_EventLogEvent(impl->eventLog, L("ESRacGrm"));
1628 }
1629 }
1630 return rc;
1631 }
1632
SR_RecognizerDeactivateRuleImpl(SR_Recognizer * self,SR_Grammar * grammar,const LCHAR * ruleName)1633 ESR_ReturnCode SR_RecognizerDeactivateRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1634 const LCHAR* ruleName)
1635 {
1636 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1637 SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*) grammar;
1638 LCHAR grammarID[MAX_INT_DIGITS+1];
1639 ESR_ReturnCode rc;
1640
1641 if (ruleName == NULL)
1642 {
1643 psprintf(grammarID, L("%p"), grammar);
1644 CHKLOG(rc, HashMapRemove(impl->grammars, grammarID));
1645 }
1646 else
1647 CHKLOG(rc, HashMapRemove(impl->grammars, ruleName));
1648 grammarImpl->isActivated = ESR_FALSE;
1649 return ESR_SUCCESS;
1650 CLEANUP:
1651 return rc;
1652 }
1653
SR_RecognizerDeactivateAllRulesImpl(SR_Recognizer * self)1654 ESR_ReturnCode SR_RecognizerDeactivateAllRulesImpl(SR_Recognizer* self)
1655 {
1656 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1657 ESR_ReturnCode rc;
1658
1659 CHKLOG(rc, HashMapRemoveAll(impl->grammars));
1660 CA_ClearSyntaxForRecognizer(0, impl->recognizer);
1661 return ESR_SUCCESS;
1662 CLEANUP:
1663 return rc;
1664 }
1665
SR_RecognizerIsActiveRuleImpl(SR_Recognizer * self,SR_Grammar * grammar,const LCHAR * ruleName,ESR_BOOL * isActiveRule)1666 ESR_ReturnCode SR_RecognizerIsActiveRuleImpl(SR_Recognizer* self, SR_Grammar* grammar,
1667 const LCHAR* ruleName, ESR_BOOL* isActiveRule)
1668 {
1669 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1670 LCHAR grammarID[MAX_INT_DIGITS+1];
1671 ESR_ReturnCode rc;
1672
1673 psprintf(grammarID, L("%p"), grammar);
1674 CHKLOG(rc, HashMapContainsKey(impl->grammars, (LCHAR*) &grammarID, isActiveRule));
1675 return ESR_SUCCESS;
1676 CLEANUP:
1677 return rc;
1678 }
1679
SR_RecognizerSetWordAdditionCeilingImpl(SR_Recognizer * self,SR_Grammar * grammar)1680 ESR_ReturnCode SR_RecognizerSetWordAdditionCeilingImpl(SR_Recognizer* self, SR_Grammar* grammar)
1681 {
1682 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1683 SR_GrammarImpl* grammarImpl = (SR_GrammarImpl*)grammar;
1684 int iRc;
1685
1686 if(!impl || !grammarImpl)
1687 return ESR_INVALID_ARGUMENT;
1688 iRc = CA_CeilingSyntaxForRecognizer( grammarImpl->syntax, impl->recognizer);
1689 if(iRc) return ESR_INVALID_STATE;
1690
1691 return ESR_SUCCESS;
1692 }
1693
SR_RecognizerCheckGrammarConsistencyImpl(SR_Recognizer * self,SR_Grammar * grammar,ESR_BOOL * isConsistent)1694 ESR_ReturnCode SR_RecognizerCheckGrammarConsistencyImpl(SR_Recognizer* self, SR_Grammar* grammar,
1695 ESR_BOOL* isConsistent)
1696 {
1697 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1698 SR_GrammarImpl* grammarImpl;
1699 SR_RecognizerImpl* impl2;
1700
1701
1702 grammarImpl = (SR_GrammarImpl*) grammar;
1703 impl2 = (SR_RecognizerImpl*)grammarImpl->recognizer;
1704 // *isConsistent = grammarImpl->models == impl->models;
1705 *isConsistent = (impl2->models == impl->models);
1706 return ESR_SUCCESS;
1707 }
1708
SR_RecognizerGetModelsImpl(SR_Recognizer * self,SR_AcousticModels ** pmodels)1709 ESR_ReturnCode SR_RecognizerGetModelsImpl(SR_Recognizer* self, SR_AcousticModels** pmodels)
1710 {
1711 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1712 *pmodels = impl->models;
1713 return ESR_SUCCESS;
1714 }
1715
SR_RecognizerPutAudioImpl(SR_Recognizer * self,asr_int16_t * buffer,size_t * bufferSize,ESR_BOOL isLast)1716 ESR_ReturnCode SR_RecognizerPutAudioImpl(SR_Recognizer* self, asr_int16_t* buffer, size_t* bufferSize,
1717 ESR_BOOL isLast)
1718 {
1719 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1720 ESR_ReturnCode rc;
1721 int rcBufWrite;
1722 size_t nbWritten;
1723
1724 if (isLast == ESR_FALSE && (buffer == NULL || bufferSize == NULL))
1725 {
1726 PLogError(L("ESR_INVALID_ARGUMENT"));
1727 return ESR_INVALID_ARGUMENT;
1728 }
1729
1730 if (impl->lockFunction)
1731 impl->lockFunction(ESR_LOCK, impl->lockData);
1732 if (!impl->isStarted)
1733 {
1734 if (impl->lockFunction)
1735 impl->lockFunction(ESR_UNLOCK, impl->lockData);
1736 PLogMessage(L("ESR_INVALID_STATE: Tried pushing audio while recognizer was offline"));
1737 return ESR_INVALID_STATE;
1738 }
1739 if (impl->gotLastFrame)
1740 {
1741 if (impl->lockFunction)
1742 impl->lockFunction(ESR_UNLOCK, impl->lockData);
1743 PLogMessage(L("ESR_INVALID_STATE: isLast=TRUE"));
1744 return ESR_INVALID_STATE;
1745 }
1746 if (buffer == NULL && isLast == ESR_FALSE)
1747 {
1748 if (impl->lockFunction)
1749 impl->lockFunction(ESR_UNLOCK, impl->lockData);
1750 PLogError(L("ESR_INVALID_ARGUMENT: got NULL buffer on non-terminal frame"));
1751 return ESR_INVALID_ARGUMENT;
1752 }
1753
1754 rcBufWrite = CircularBufferWrite(impl->buffer, buffer, *bufferSize * SAMPLE_SIZE);
1755 if (rcBufWrite < 0)
1756 {
1757 rc = ESR_INVALID_STATE;
1758 PLogError(L("%s: error writing to buffer (buffer=%p, available=%u)"), ESR_rc2str(rc), impl->buffer, CircularBufferGetAvailable(impl->buffer));
1759 goto CLEANUP;
1760 }
1761
1762 nbWritten = (size_t)rcBufWrite;
1763 if (nbWritten % SAMPLE_SIZE != 0)
1764 {
1765 size_t amountUnwritten;
1766
1767 /* The buffer is byte-based while we're sample based. Make sure we write entire samples or not at all */
1768 amountUnwritten = CircularBufferUnwrite(impl->buffer, nbWritten % SAMPLE_SIZE);
1769 passert(amountUnwritten == nbWritten % SAMPLE_SIZE);
1770 nbWritten -= amountUnwritten;
1771 }
1772 passert(nbWritten % 2 == 0); /* make sure CircularBufferSize is divisible by 2 */
1773
1774 if (nbWritten < *bufferSize * SAMPLE_SIZE)
1775 {
1776 rc = ESR_BUFFER_OVERFLOW;
1777 #ifndef NDEBUG
1778 PLOG_DBG_TRACE((L("%s: writing to circular buffer"), ESR_rc2str(rc)));
1779 #endif
1780 *bufferSize = nbWritten / SAMPLE_SIZE;
1781 if (impl->lockFunction)
1782 impl->lockFunction(ESR_UNLOCK, impl->lockData);
1783 goto CLEANUP;
1784 }
1785 if (impl->lockFunction)
1786 impl->lockFunction(ESR_UNLOCK, impl->lockData);
1787
1788 if (isLast)
1789 impl->gotLastFrame = ESR_TRUE;
1790 return ESR_SUCCESS;
1791 CLEANUP:
1792 return rc;
1793 }
1794
1795 /* utility function to sort the ArrayList of nbest list results by the score of the first
1796 semantic result */
SemanticResults_SortByScore(ArrayList * results,size_t nbestSize)1797 ESR_ReturnCode SemanticResults_SortByScore(ArrayList *results, size_t nbestSize)
1798 {
1799 ESR_ReturnCode rc;
1800 ArrayList* semanticResultList;
1801 ArrayList* semanticResultList_swap;
1802 SR_SemanticResult* semanticResult_i;
1803 SR_SemanticResult* semanticResult_j;
1804 size_t i, j;
1805 LCHAR scoreStr[MAX_ENTRY_LENGTH] ;
1806 size_t scoreStrLen = MAX_ENTRY_LENGTH ;
1807 int score_i, score_j;
1808
1809 /* bubble sort */
1810 for (i = 0; i < (size_t)nbestSize; ++i)
1811 {
1812 for (j = i + 1; j < (size_t)nbestSize; ++j)
1813 {
1814 /* get for i */
1815 CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList)); /* nbest index */
1816 CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_i)); /* semresult 0 */
1817
1818 /* get for j */
1819 CHKLOG(rc, ArrayListGet(results, j, (void **)&semanticResultList)); /* nbest index */
1820 CHKLOG(rc, ArrayListGet(semanticResultList, 0, (void **)&semanticResult_j)); /* semresult 0 */
1821
1822 scoreStrLen = MAX_ENTRY_LENGTH ;
1823 CHKLOG(rc, semanticResult_i->getValue(semanticResult_i, "raws", scoreStr, &scoreStrLen));
1824 CHKLOG(rc, lstrtoi(scoreStr, &score_i, 10));
1825 scoreStrLen = MAX_ENTRY_LENGTH ;
1826 CHKLOG(rc, semanticResult_j->getValue(semanticResult_j, "raws", scoreStr, &scoreStrLen));
1827 CHKLOG(rc, lstrtoi(scoreStr, &score_j, 10));
1828
1829 if (score_j < score_i)
1830 {
1831 /* need to swap */
1832 CHKLOG(rc, ArrayListGet(results, i, (void **)&semanticResultList_swap)); /* put i in swap */
1833 CHKLOG(rc, ArrayListSet(results, i, semanticResultList)); /* put j in i */
1834 CHKLOG(rc, ArrayListSet(results, j, semanticResultList_swap)); /* put swap in j */
1835 }
1836 }
1837 }
1838 return ESR_SUCCESS;
1839 CLEANUP:
1840 return rc;
1841 }
1842
filter_CA_FullResultLabel(const LCHAR * label,LCHAR * filtered_label,size_t * boss,size_t * eoss)1843 ESR_ReturnCode filter_CA_FullResultLabel(const LCHAR* label, LCHAR *filtered_label, size_t* boss, size_t* eoss)
1844 {
1845 ESR_ReturnCode rc;
1846 enum
1847 {
1848 NO_COPY,
1849 FRAME,
1850 WORD,
1851 } filter_state = WORD;
1852 LCHAR *dst = filtered_label;
1853 LCHAR eosBuf[16]; /* max 9999 + '\0' */
1854 LCHAR bosBuf[16]; /* max 9999 + '\0' */
1855 LCHAR* pBuf = NULL;
1856
1857 /**
1858 * example: you want to filter this:
1859 *
1860 * "-pau-@23 clock@97 twenty_four@125 hour@145 "
1861 * ^boss = 23 ^ eoss = 145
1862 * and get this:
1863 *
1864 * "clock twenty_four hour"
1865 */
1866
1867 passert(LSTRLEN(label) > 0);
1868 while (*label)
1869 {
1870 switch (filter_state)
1871 {
1872 case NO_COPY:
1873 if (*label == L(' '))
1874 filter_state = WORD;
1875 else if (*label == L('@'))
1876 {
1877 filter_state = FRAME;
1878 if (pBuf == NULL)
1879 pBuf = bosBuf;
1880 else
1881 {
1882 *pBuf = 0;
1883 pBuf = eosBuf;
1884 }
1885 }
1886 break;
1887 case WORD:
1888 if (*label == L('@'))
1889 {
1890 *dst = L(' '); /* insert space */
1891 dst++;
1892 filter_state = FRAME;
1893 if (pBuf == NULL)
1894 pBuf = bosBuf;
1895 else
1896 {
1897 *pBuf = 0;
1898 pBuf = eosBuf;
1899 }
1900 }
1901 else
1902 {
1903 *dst = *label;
1904 dst++;
1905 }
1906 break;
1907 case FRAME:
1908 if (*label == L(' '))
1909 filter_state = WORD;
1910 else
1911 {
1912 *pBuf = *label;
1913 pBuf++;
1914 }
1915 break;
1916 }
1917 label++;
1918 }
1919 *dst = 0; /* term the string */
1920 *pBuf = 0; /* term the string */
1921
1922 /* trim the end spaces */
1923 dst--;
1924 while (*dst == ' ')
1925 *dst-- = '\0';
1926
1927 /* set the eos signal indicated by the end pointed data */
1928 if (eosBuf[0] != 0)
1929 CHKLOG(rc, lstrtosize_t(eosBuf, eoss, 10));
1930 else
1931 eoss = 0;
1932
1933 if (bosBuf[0] != 0)
1934 CHKLOG(rc, lstrtosize_t(bosBuf, boss, 10));
1935 else
1936 boss = 0;
1937
1938 return ESR_SUCCESS;
1939 CLEANUP:
1940 return rc;
1941 }
1942
1943 /**
1944 * Populates the recognizer result if it can, otherwise it returns NO MATCH cuz no results exist
1945 *
1946 * INPUT STATE: SR_RECOGNIZER_INTERNAL_EOS
1947 *
1948 * @param self SR_Recognizer handle
1949 * @todo break up into smaller functions
1950 */
SR_RecognizerCreateResultImpl(SR_Recognizer * self,SR_RecognizerStatus * status,SR_RecognizerResultType * type)1951 ESR_ReturnCode SR_RecognizerCreateResultImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
1952 SR_RecognizerResultType* type)
1953 {
1954 LCHAR label[MAX_ENTRY_LENGTH * 2]; /* run out of buffer */
1955 #define WORDID_COUNT 48 /* can be quite high for voice enrollment! */
1956 wordID wordIDs[WORDID_COUNT];
1957 LCHAR tok[80];
1958 LCHAR waveformFilename[P_PATH_MAX];
1959 LCHAR* pkey;
1960 SR_GrammarImpl* pgrammar;
1961 asr_int32_t raws; /* raw score */
1962 size_t iBest, nbestSize, jBest, k, grammarSize, semanticResultsSize, grammarIndex_for_iBest;
1963 LCHAR* lValue;
1964 LCHAR* lValue2;
1965 int confValue;
1966 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
1967 SR_RecognizerResultImpl* resultImpl = (SR_RecognizerResultImpl*) impl->result;
1968 ESR_BOOL containsKey;
1969 int valid, score, recogID;
1970 LCHAR result[MAX_ENTRY_LENGTH];
1971 size_t len, size;
1972 size_t locale;
1973 int current_choice;
1974
1975 /**
1976 * Semantic result stuff
1977 */
1978 /* a temp buffer to hold semantic results of a parse (there may be several results) */
1979 SR_SemanticResult* semanticResults[MAX_SEM_RESULTS];
1980 ArrayList* semanticList;
1981 ArrayList* semanticList2;
1982 SR_SemanticResultImpl* semanticImpl;
1983 SR_SemanticResultImpl* semanticImpl2;
1984 SR_SemanticResult* semanticResult;
1985 SR_SemanticResult* semanticResult2;
1986 waveform_buffering_state_t buffering_state;
1987
1988 SR_AcousticModelsImpl* modelsImpl = (SR_AcousticModelsImpl*) impl->models;
1989 ESR_ReturnCode rc;
1990 PTimeStamp EORT;
1991
1992 CA_LockUtteranceFromInput(impl->utterance);
1993 if (!CA_EndRecognition(impl->recognizer, modelsImpl->pattern, impl->utterance))
1994 {
1995 PLogError(L("ESR_INVALID_STATE"));
1996 return ESR_INVALID_STATE;
1997 }
1998
1999 /* check if the forward search was successful */
2000 valid = CA_FullResultLabel(impl->recognizer, result, MAX_ENTRY_LENGTH - 1);
2001 CA_GetRecogID(impl->recognizer, &recogID);
2002 CA_FullResultScore(impl->recognizer, &score, 1);
2003 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2004 PLogMessage(L("R: %s type %d score %d from recognizer%d"), result, type, score, valid, recogID);
2005 PLogMessage(L("R: %s score %d from recognizer%d"), result, score, valid, recogID);
2006 #endif
2007 #ifdef _WIN32
2008 //pfprintf(PSTDOUT, ("R: %s type %d score %d from recognizer%d\n"), result, type, score, valid, recogID);
2009 #endif
2010
2011
2012 switch (valid)
2013 {
2014 case FULL_RESULT:
2015 CHKLOG(rc, filter_CA_FullResultLabel(result, label, &impl->recogLogTimings.BOSS, &impl->recogLogTimings.EOSS));
2016 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2017 PLogMessage("R: %s", result);
2018 #endif
2019 CA_FullResultScore(impl->recognizer, (int*) &raws, 0);
2020 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2021 PLogMessage("S: %d", raws);
2022 #endif
2023
2024 /* now that we have an endpointed result, we can parse the result transcription
2025 to see where speech started and ended. Then we can trim off excess parts of the
2026 recorded audio waveform (if exists) so that nametags are just the right amount of
2027 audio
2028 */
2029 CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
2030 if (buffering_state != WAVEFORM_BUFFERING_OFF)
2031 {
2032 CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &size));
2033 if (size > 0)
2034 {
2035 rc = WaveformBuffer_ParseEndPointedResultAndTrim(impl->waveformBuffer, result, impl->FRAME_SIZE);
2036 if (rc == ESR_BUFFER_OVERFLOW)
2037 {
2038 /* Nametag EOS occured beyond end of buffer */
2039 }
2040 else if (rc != ESR_SUCCESS)
2041 {
2042 PLogError(ESR_rc2str(rc));
2043 goto CLEANUP;
2044 }
2045 }
2046 }
2047 break;
2048
2049 case REJECT_RESULT:
2050 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2051 PLogMessage(L("R: <REJECTED>"));
2052 #endif
2053 break;
2054 default:
2055 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2056 PLogMessage(L("E: No results available"));
2057 PLogMessage(L("R: <FAILED>"));
2058 #endif
2059 break;
2060 }
2061
2062
2063 if (valid == FULL_RESULT)
2064 {
2065 /* Populate SR_RecognizerResult */
2066 resultImpl->nbestList = CA_PrepareNBestList(impl->recognizer, 10, &raws);
2067 if (resultImpl->nbestList == NULL)
2068 {
2069 /*
2070 * This is not a failure. It simply means that I have not advanced far
2071 * enough in recognition in order to obtain results (no paths in
2072 * graph). This occurs, for instance, when a eof is reached (no more data)
2073 * and I have not even created any paths in my graph.
2074 */
2075
2076 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2077 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2078 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2079 if (impl->eventLog != NULL)
2080 {
2081 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2082 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2083 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2084 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2085 }
2086 passert(0);
2087 return ESR_SUCCESS;
2088 }
2089
2090 nbestSize = CA_NBestListCount(resultImpl->nbestList);
2091 }
2092 else
2093 nbestSize = 0;
2094
2095 if (resultImpl->results != NULL)
2096 ArrayListRemoveAll(resultImpl->results);
2097 else
2098 CHKLOG(rc, ArrayListCreate(&resultImpl->results));
2099 if (nbestSize == 0)
2100 {
2101 /*
2102 * Got empty n-best list even though the recognition was successful.
2103 * We handle this in the same way that recog_startpt does... we consider it a no match.
2104 * We could adjust the CREC.Recognizer.viterbi_prune_thresh to a higher level, but that
2105 * may not fix the problem completely. We need to fix the bug in the astar search!!!
2106 */
2107 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2108 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2109 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2110 if (impl->eventLog != NULL)
2111 {
2112 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2113 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2114 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2115 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2116 }
2117 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2118 PLogMessage(L("ESR_INVALID_STATE: got empty n-best list even though the recognition was successful"));
2119 #endif
2120 return ESR_SUCCESS; /* we do not want to halt the app in this case */
2121 }
2122 else
2123 {
2124 *status = SR_RECOGNIZER_EVENT_RECOGNITION_RESULT;
2125 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2126 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2127 if (impl->eventLog != NULL)
2128 {
2129 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2130 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2131 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2132 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2133 }
2134 }
2135
2136 /**
2137 * All grammars associated with the recognizer are considered to be active
2138 * and therefore, I do a semantic parse on each. On the first grammar that
2139 * gives one or more semantic results, I stop parsing the other grammars.
2140 */
2141 CHKLOG(rc, impl->grammars->getSize(impl->grammars, &grammarSize));
2142 ASSERT( grammarSize == 1);
2143
2144 for (iBest = 0; iBest < nbestSize; ++iBest)
2145 {
2146 len = WORDID_COUNT;
2147 if (CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) != ESR_SUCCESS)
2148 {
2149 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2150 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2151 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2152 if (impl->eventLog != NULL)
2153 {
2154 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("SR_RecognizerCreateResultImpl() -> SR_RECOGNIZER_INTERNAL_END")));
2155 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2156 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2157 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2158 }
2159 PLogError(L("ESR_INVALID_STATE: got bad n-best list entry %d"), iBest);
2160 return ESR_INVALID_STATE;
2161 }
2162
2163 CHKLOG(rc, ArrayListCreate(&semanticList));
2164 CHKLOG(rc, resultImpl->results->add(resultImpl->results, semanticList));
2165
2166 grammarIndex_for_iBest = 0;
2167 CHKLOG(rc, impl->grammars->getKeyAtIndex(impl->grammars, grammarIndex_for_iBest, &pkey));
2168 CHKLOG(rc, impl->grammars->get(impl->grammars, pkey, (void **)&pgrammar));
2169
2170 CHKLOG(rc, SR_GrammarGetSize_tParameter((SR_Grammar*) pgrammar, L("locale"), &locale));
2171 resultImpl->locale = locale;
2172
2173 /* I need to manage my semantic results external to the check parse function */
2174 for (k = 0; k < MAX_SEM_RESULTS; ++k)
2175 SR_SemanticResultCreate(&semanticResults[k]);
2176
2177 /*
2178 The code here tries to make the voice-enrollment more effective.
2179 The VE grammar decodes a sequence of best phonemes, but the nbest
2180 processing may find a better score for an alternative choice than
2181 the score of the viterbi best choice. The reason for this is that
2182 alternative choices don't honor cross-word context-dependency quite
2183 accurately. If we choose an alternative choice then the sequence of
2184 phoneme decoded does not correspond to the sequence of models decoded.
2185 To counter this, we FORCIBLY make sure the top choice here is the
2186 VITERBI top choice.
2187 */
2188
2189 if (iBest == 0)
2190 {
2191 if (CA_IsEnrollmentSyntax( pgrammar->syntax)) {
2192 /* this was voice enrollment, so let's try to replace */
2193 // char* word1 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[0]);
2194 // char* word2 = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[1]);
2195 // if (!strncmp(word1,voice_enroll_word_prefix,VEWPLEN)&&!strncmp(word2,voice_enroll_word_prefix,VEWPLEN))
2196 len = WORDID_COUNT;
2197 rc = CA_FullResultWordIDs(impl->recognizer, wordIDs, &len);
2198 if (rc != ESR_SUCCESS)
2199 {
2200 /* in case of problem with viterbi path choice, we revert back */
2201 len = WORDID_COUNT;
2202 rc = CA_NBestListGetResultWordIDs(resultImpl->nbestList, iBest, wordIDs, &len, &raws) ;
2203 }
2204 }
2205 }
2206
2207 LSTRCPY(label, L(""));
2208 for (k = 0; wordIDs[k] != MAXwordID; ++k)
2209 {
2210 LCHAR* wordk = NULL;
2211 wordk = CA_NBestListGetResultWord(resultImpl->nbestList,wordIDs[k]);
2212 LSTRCAT(label, wordk);
2213 LSTRCAT(label, L(" "));
2214 }
2215 CHKLOG(rc, CA_ResultStripSlotMarkers(label));
2216 passert(LSTRCMP(label, L("")) != 0);
2217
2218 /* strip the trailing blank */
2219 k = LSTRLEN(label) - 1;
2220 if (k > 0 && label[k] == L(' '))
2221 label[k] = 0;
2222
2223 semanticResultsSize = MAX_SEM_RESULTS;
2224
2225 #if SEMPROC_ACTIVE
2226
2227 /* set the literal prior to processing so that semproc can read the value
2228 during processing */
2229 CHKLOG(rc, pgrammar->semproc->flush(pgrammar->semproc));
2230 CHKLOG(rc, pgrammar->semproc->setParam(pgrammar->semproc, L("literal"), label));
2231
2232 rc = pgrammar->semproc->checkParseByWordID(pgrammar->semproc, pgrammar->semgraph,
2233 wordIDs, semanticResults, &semanticResultsSize);
2234
2235 /* rc = pgrammar->semproc->checkParse(pgrammar->semproc, pgrammar->semgraph,
2236 label, semanticResults, &semanticResultsSize); */
2237
2238 if (rc != ESR_SUCCESS)
2239 {
2240 for (k = 0; k < MAX_SEM_RESULTS; ++k)
2241 {
2242 semanticResults[k]->destroy(semanticResults[k]);
2243 semanticResults[k] = NULL;
2244 }
2245 goto CLEANUP;
2246 }
2247 #else
2248 semanticResultsSize = 0;
2249 #endif
2250 /* cleanup the empty ones */
2251 for (k = semanticResultsSize; k < MAX_SEM_RESULTS; ++k)
2252 {
2253 CHKLOG(rc, semanticResults[k]->destroy(semanticResults[k]));
2254 semanticResults[k] = NULL;
2255 }
2256
2257 /* save the good ones */
2258 for (k = 0; k < semanticResultsSize; ++k)
2259 {
2260 /*
2261 * Save the pointer to the semantic result that was created.
2262 * Remember that the semantic result array only holds pointers
2263 * and for each time that the function is called, new semantic results
2264 * are created, and the pointers overwrite old values in the array
2265 */
2266 CHKLOG(rc, semanticList->add(semanticList, semanticResults[k]));
2267 }
2268
2269 #if SEMPROC_ACTIVE
2270 if (semanticResultsSize > 0)
2271 {
2272 /* OSI log the grammar(s) that was used in recognizing */
2273 psprintf(tok, L("GURI%zd"), grammarIndex_for_iBest);
2274 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2275 }
2276 #else
2277 /* OSI log the grammar(s) that was used in recognizing */
2278 psprintf(tok, L("GURI%d"), grammarIndex_for_iBest);
2279 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("GRMR"), tok));
2280 #endif
2281
2282 /* Populate semantic results for each nbest list entry */
2283 CHKLOG(rc, semanticList->getSize(semanticList, &semanticResultsSize));
2284 if (semanticResultsSize == 0)
2285 {
2286 /*
2287 * If there was no semantic result... then I need to create one so that I can store
2288 * literal, conf, meaning which are default keys that must ALWAYS exist
2289 */
2290 CHKLOG(rc, SR_SemanticResultCreate(&semanticResult));
2291 CHKLOG(rc, semanticList->add(semanticList, semanticResult));
2292 semanticResultsSize = 1;
2293 }
2294
2295 for (k = 0; k < semanticResultsSize;++k)
2296 {
2297 CHKLOG(rc, semanticList->get(semanticList, k, (void **)&semanticResult));
2298 if (semanticResult == NULL)
2299 {
2300 PLogError(L("nbest entry contained NULL semanticResult"), ESR_INVALID_STATE);
2301 return ESR_INVALID_STATE;
2302 }
2303
2304 semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2305
2306 /* put in the literal */
2307 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2308 if (lValue == NULL)
2309 {
2310 PLogError(L("ESR_OUT_OF_MEMORY"));
2311 return ESR_OUT_OF_MEMORY;
2312 }
2313 LSTRCPY(lValue, label);
2314 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("literal"), lValue));
2315
2316 /* if the meaning is not set, then put in the meaning which will be the literal */
2317 CHKLOG(rc, semanticImpl->results->containsKey(semanticImpl->results, L("meaning"), &containsKey));
2318 if (!containsKey)
2319 {
2320 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2321 if (lValue == NULL)
2322 {
2323 PLogError(L("ESR_OUT_OF_MEMORY"));
2324 return ESR_OUT_OF_MEMORY;
2325 }
2326 LSTRCPY(lValue, label);
2327 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("meaning"), lValue));
2328 }
2329
2330 /* put in the raw score */
2331 psprintf(label, L("%d"), raws);
2332 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2333 if (lValue == NULL)
2334 {
2335 PLogError(L("ESR_OUT_OF_MEMORY"));
2336 return ESR_OUT_OF_MEMORY;
2337 }
2338 LSTRCPY(lValue, label);
2339 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("raws"), lValue));
2340 }
2341 }
2342
2343 /* Now I have an nBest list where each entry has at least one semantic result */
2344 /* What I need to do is filter out the nBest list entries which have matching
2345 semantic results for 'meaning' */
2346 /* Once I have filtered out the nBest list based on this criteria, I can calculate the confidence
2347 score and populate the result of the first entry with the raw score */
2348
2349 #if FILTER_NBEST_BY_SEM_RESULT
2350
2351 for (iBest = nbestSize-1; iBest>0; iBest--) /* do not filter out nBest entry 0 */
2352 {
2353 /**
2354 * This is the entry (indexed by i) targeted for removal
2355 *
2356 */
2357
2358 /* get the nBest entry which you wish to remove (if duplicate found) */
2359 CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void **)&semanticList));
2360
2361 /* get the first sem_result for the entry */
2362 CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2363 semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2364
2365 /* get the meaning */
2366 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2367
2368 /* get the other entries to check against (start with 0, end on the current i entry) */
2369 for (jBest = 0; jBest < iBest; ++jBest)
2370 {
2371 /*
2372 * This is the entry (indexed by jBest) that we will compare with
2373 */
2374
2375 /* get the nBest entry which you wish to compare with */
2376 CHKLOG(rc, ArrayListGet(resultImpl->results, jBest, (void **)&semanticList2));
2377
2378 CHKLOG(rc, ArrayListGet(semanticList2, 0, (void **)&semanticResult2));
2379 semanticImpl2 = (SR_SemanticResultImpl*) semanticResult2;
2380
2381 CHKLOG(rc, semanticImpl2->results->get(semanticImpl2->results, L("meaning"), (void **)&lValue2));
2382 if (LSTRCMP(lValue, lValue2) == 0)
2383 {
2384 /* pfprintf(PSTDOUT,"duplicate sem result found %d == %d\n", iBest, jBest);
2385 pfprintf(PSTDOUT,"removing %d\n", iBest); */
2386
2387 /* removing from the list indexed by iBest */
2388 CHKLOG(rc, semanticList->remove(semanticList, semanticResult));
2389 CHKLOG(rc, semanticResult->destroy(semanticResult));
2390
2391 CHKLOG(rc, resultImpl->results->remove(resultImpl->results, semanticList));
2392 CHKLOG(rc, semanticList->destroy(semanticList));
2393
2394 if (!CA_NBestListRemoveResult(resultImpl->nbestList, iBest))
2395 return ESR_ARGUMENT_OUT_OF_BOUNDS;
2396 break;
2397 }
2398 }
2399 }
2400 nbestSize = CA_NBestListCount(resultImpl->nbestList);
2401 #endif
2402
2403 CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2404
2405 if (nbestSize)
2406 {
2407 if(CA_ComputeConfidenceValues(impl->confidenceScorer, impl->recognizer, resultImpl->nbestList))
2408 return ESR_INVALID_STATE;
2409
2410 for(current_choice=nbestSize-1;current_choice>=0;current_choice--)
2411 {
2412 /* get the nBest entry you want to deal with */
2413 CHKLOG(rc, ArrayListGet(resultImpl->results, current_choice, (void **)&semanticList));
2414 /* get the first sem_result for that entry */
2415 CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2416 semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2417
2418 /* put in the conf value for that nBest entry */
2419 if(!CA_NBestListGetResultConfidenceValue( resultImpl->nbestList, current_choice, &confValue))
2420 return ESR_ARGUMENT_OUT_OF_BOUNDS;
2421
2422 psprintf(label, L("%d"), confValue);
2423 lValue = MALLOC(sizeof(LCHAR) * (LSTRLEN(label) + 1), MTAG);
2424 if (lValue == NULL)
2425 {
2426 PLogError(L("ESR_OUT_OF_MEMORY"));
2427 return ESR_OUT_OF_MEMORY;
2428 }
2429 LSTRCPY(lValue, label);
2430 CHKLOG(rc, semanticImpl->results->put(semanticImpl->results, L("conf"),lValue));
2431 }
2432 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("CMPT"), 0));
2433 }
2434
2435 /* OSI log the end of recognition and all bufferred tokens */
2436
2437 /* OSI log end of recognition time */
2438 PTimeStampSet(&EORT);
2439 impl->recogLogTimings.EORT = PTimeStampDiff(&EORT, &impl->timestamp);
2440 impl->recogLogTimings.DURS = impl->processed * MSEC_PER_FRAME;
2441
2442 /*****************************************/
2443 /* OSI Logging stuff */
2444 /*****************************************/
2445 if( impl->osi_log_level != 0)
2446 {
2447 /* get the nBest size (this size may have changed since previous set cuz of nbest list filtering) */
2448 CHKLOG(rc, ArrayListGetSize(resultImpl->results, &nbestSize));
2449 /* OSI log the nBest list size */
2450 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("NBST"), nbestSize));
2451
2452
2453 for (iBest = 0; iBest < nbestSize; iBest++) /* loop */
2454 {
2455 /* get the nBest entry */
2456 CHKLOG(rc, ArrayListGet(resultImpl->results, iBest, (void**)&semanticList));
2457
2458 /* get the first sem_result for the entry (ther emay be many, but ignore others) */
2459 CHKLOG(rc, ArrayListGet(semanticList, 0, (void **)&semanticResult));
2460 semanticImpl = (SR_SemanticResultImpl*) semanticResult;
2461
2462 /* get the meaning and OSI log it */
2463 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("meaning"), (void **)&lValue));
2464 /* OSI log RSLT (meaning) for nbest item */
2465 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSLT"), lValue));
2466
2467 /* get the literal and OSI log it */
2468 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("literal"), (void **)&lValue));
2469 /* OSI log RAWT SPOK (literal) for nbest item */
2470 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWT"), lValue));
2471 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SPOK"), lValue));
2472
2473 /* get the score and OSI log it */
2474 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("raws"), (void **)&lValue));
2475 /* OSI log RAWS (score) for nbest item */
2476 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RAWS"), lValue));
2477 /* get the confidence value and OSI log it */
2478 CHKLOG(rc, semanticImpl->results->get(semanticImpl->results, L("conf"), (void **)&lValue));
2479 /* OSI log CONF (values) for nbest item */
2480 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("CONF"), lValue));
2481 }
2482
2483 /* log the values */
2484 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BORT"), impl->recogLogTimings.BORT));
2485 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("DURS"), impl->recogLogTimings.DURS));
2486 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EORT"), impl->recogLogTimings.EORT));
2487 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSD"), impl->recogLogTimings.EOSD));
2488 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOSS"), impl->recogLogTimings.EOSS));
2489 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("EOST"), impl->recogLogTimings.EOST));
2490 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2491 {
2492 len = P_PATH_MAX;
2493 CHKLOG(rc, SR_EventLogAudioGetFilename(impl->eventLog, waveformFilename, &len));
2494 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("WVNM"), waveformFilename));
2495 }
2496 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RSTT"), L("ok")));
2497 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("RENR"), L("ok")));
2498 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("ENDR"), impl->eos_reason));
2499 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIrcnd")));
2500
2501 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSS"), impl->recogLogTimings.BOSS)); /* extra not in OSI spec */
2502 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRboss")));
2503
2504 /*
2505 * Record which recognizer was the successful one (male or female)
2506 * this index refers to the order in the swimdllist file.
2507 */
2508 CHKLOG(rc, CA_GetRecogID(impl->recognizer, &recogID));
2509 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("RECOG"), recogID));
2510 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRrcid")));
2511
2512 /* Record semantic results returned by top nbestlist entry */
2513 if (1)
2514 {
2515 #define MAX_SEMANTIC_KEYS 50
2516 LCHAR* semanticKeys[MAX_SEMANTIC_KEYS];
2517 #define SEMANTIC_VALUE_SIZE 512
2518 LCHAR semanticValue[SEMANTIC_VALUE_SIZE];
2519 size_t num_semanticKeys;
2520
2521 rc = resultImpl->results->getSize(resultImpl->results, &nbestSize);
2522 if (rc != ESR_SUCCESS)
2523 {
2524 PLogError(ESR_rc2str(rc));
2525 goto DONE;
2526 }
2527 for (iBest = 0; iBest < nbestSize; ++iBest) /* loop2 */
2528 {
2529 rc = resultImpl->results->get(resultImpl->results, iBest, (void **)&semanticList);
2530 if (rc != ESR_SUCCESS)
2531 {
2532 PLogError(ESR_rc2str(rc));
2533 goto DONE;
2534 }
2535
2536 /* semanticResultsSize is the number of semantic meanings, although
2537 ambiguous parses are not entirely supported
2538 num_semanticKeys is associated to a particular parse */
2539
2540 rc = semanticList->getSize(semanticList, &semanticResultsSize);
2541 if (rc != ESR_SUCCESS)
2542 {
2543 PLogError(ESR_rc2str(rc));
2544 goto DONE;
2545 }
2546 for (k = 0; k < semanticResultsSize; ++k)
2547 {
2548 size_t iKey;
2549 rc = semanticList->get(semanticList, k, (void **)&semanticResult);
2550 if (rc != ESR_SUCCESS)
2551 {
2552 PLogError(ESR_rc2str(rc));
2553 goto DONE;
2554 }
2555 num_semanticKeys = MAX_SEMANTIC_KEYS;
2556 rc = semanticResult->getKeyList(semanticResult, (LCHAR**) & semanticKeys, &num_semanticKeys);
2557 if (rc != ESR_SUCCESS)
2558 {
2559 PLogError(ESR_rc2str(rc));
2560 goto DONE;
2561 }
2562
2563 for (iKey=0; iKey<num_semanticKeys; ++iKey)
2564 {
2565 len = SEMANTIC_VALUE_SIZE;
2566 rc = semanticResult->getValue(semanticResult, semanticKeys[iKey], (LCHAR*) &semanticValue, &len);
2567 if (rc != ESR_SUCCESS)
2568 {
2569 PLogError(ESR_rc2str(rc));
2570 goto DONE;
2571 }
2572
2573 rc = SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, semanticKeys[iKey], semanticValue);
2574 if (rc != ESR_SUCCESS)
2575 {
2576 PLogError(ESR_rc2str(rc));
2577 goto DONE;
2578 }
2579 }
2580 }
2581 }
2582 rc = SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESR_SemanticResult[0]"));
2583 if (rc != ESR_SUCCESS)
2584 {
2585 PLogError(ESR_rc2str(rc));
2586 goto DONE;
2587 }
2588 }
2589 }
2590 DONE:
2591 return ESR_SUCCESS;
2592 CLEANUP:
2593 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2594 return rc;
2595 }
2596
2597 /**
2598 * Indicates if it is possible to push data from SREC into the internal recognizer.
2599 * If data can be pushed, ESR_CONTINUE_PROCESSING is returned.
2600 *
2601 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2602 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI
2603 */
canPushAudioIntoRecognizer(SR_RecognizerImpl * impl)2604 static PINLINE ESR_ReturnCode canPushAudioIntoRecognizer(SR_RecognizerImpl* impl)
2605 {
2606 ESR_ReturnCode rc;
2607
2608 if (impl->lockFunction)
2609 impl->lockFunction(ESR_LOCK, impl->lockData);
2610
2611 /* do I have enough to make a frame ? */
2612 if (CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2613 {
2614 /* Not enough data */
2615 if (!impl->gotLastFrame)
2616 {
2617 /* not last frame, so ask for more audio */
2618 if (impl->lockFunction)
2619 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2620 return ESR_SUCCESS;
2621 }
2622 else
2623 {
2624 /* last frame, make do with what you have */
2625 if (impl->lockFunction)
2626 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2627 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2628 PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2629 #endif
2630 impl->isRecognizing = ESR_FALSE;
2631 impl->recogLogTimings.EOSD = impl->frames;
2632 impl->eos_reason = L("EOI");
2633 impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2634 if (impl->eventLog != NULL)
2635 {
2636 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_EOI")));
2637 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2638 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2639 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2640 }
2641 return ESR_CONTINUE_PROCESSING;
2642 }
2643 }
2644 if (impl->lockFunction)
2645 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2646 return ESR_CONTINUE_PROCESSING;
2647 CLEANUP:
2648 return rc;
2649 }
2650
2651 /**
2652 * Pushes data from SREC into the internal recognizer.
2653 *
2654 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2655 * OUTPUT STATES: same
2656 */
pushAudioIntoRecognizer(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2657 static PINLINE ESR_ReturnCode pushAudioIntoRecognizer(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2658 SR_RecognizerResultType* type,
2659 SR_RecognizerResult* result)
2660 {
2661 size_t count;
2662 ESR_ReturnCode rc;
2663
2664 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2665 {
2666 /* Don't push frames unless they're needed */
2667
2668 /* Check for leaked state */
2669 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2670 return ESR_CONTINUE_PROCESSING;
2671 }
2672 if (impl->lockFunction)
2673 impl->lockFunction(ESR_LOCK, impl->lockData);
2674 count = CircularBufferRead(impl->buffer, impl->audioBuffer, impl->FRAME_SIZE);
2675 if (impl->lockFunction)
2676 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2677
2678 WaveformBuffer_Write(impl->waveformBuffer, impl->audioBuffer, count);
2679 if (impl->osi_log_level & OSI_LOG_LEVEL_AUDIO)
2680 {
2681 rc = SR_EventLogAudioWrite(impl->eventLog, impl->audioBuffer, count);
2682 if (rc == ESR_BUFFER_OVERFLOW)
2683 rc = ESR_INVALID_STATE;
2684 if (rc != ESR_SUCCESS)
2685 {
2686 PLogError(ESR_rc2str(rc));
2687 if (impl->lockFunction)
2688 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2689 goto CLEANUP;
2690 }
2691 }
2692 if (count < impl->FRAME_SIZE)
2693 {
2694 rc = ESR_INVALID_STATE;
2695 PLogError(L("%s: error reading buffer data (count=%d, frameSize=%d)"), ESR_rc2str(rc), count, impl->FRAME_SIZE);
2696 goto CLEANUP;
2697 }
2698 if (!CA_LoadSamples(impl->wavein, impl->audioBuffer, impl->sampleRate / FRAMERATE))
2699 {
2700 PLogError(L("ESR_INVALID_STATE"));
2701 rc = ESR_INVALID_STATE;
2702 goto CLEANUP;
2703 }
2704
2705 CA_ConditionSamples(impl->wavein);
2706 /* Check for leaked state */
2707 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2708 return ESR_CONTINUE_PROCESSING;
2709 CLEANUP:
2710 return rc;
2711 }
2712
2713 /**
2714 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2715 * OUTPUT STATES: same
2716 */
generateFrameFromAudio(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2717 static PINLINE ESR_ReturnCode generateFrameFromAudio(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2718 SR_RecognizerResultType* type,
2719 SR_RecognizerResult* result)
2720 {
2721 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0 && impl->frames >= impl->bgsniff)
2722 {
2723 /* Don't create frames unless they're needed */
2724
2725 /* Check for leaked state */
2726 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2727 return ESR_CONTINUE_PROCESSING;
2728 }
2729
2730 /* Try processing one frame */
2731 if (!CA_MakeFrame(impl->frontend, impl->utterance, impl->wavein))
2732 {
2733 /*
2734 * One of three cases occured:
2735 *
2736 * - We don't have enough samples to process one frame. This should be impossible because
2737 * pushAudioIntoRecognizer() is always called before us and will not continue if we don't
2738 * have enough samples.
2739 *
2740 * - The internal recognizer needs a minimum amount of audio before it'll begin generating
2741 * frames. This is normal and we return with a success value.
2742 *
2743 * - The recognizer skips every even frame number (for performance reasons). This is normal
2744 * and we return with a success value.
2745 */
2746 *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2747 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2748 return ESR_SUCCESS;
2749 }
2750 ++impl->frames;
2751 /* Check for leaked state */
2752 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2753 return ESR_CONTINUE_PROCESSING;
2754 }
2755
2756 /**
2757 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2758 * OUTPUT STATES: same
2759 */
generateFrameStats(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2760 static PINLINE ESR_ReturnCode generateFrameStats(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2761 SR_RecognizerResultType* type,
2762 SR_RecognizerResult* result)
2763 {
2764 if (impl->frames < impl->bgsniff)
2765 {
2766 /* Wait until we have enough frames to estimate background stats */
2767 *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
2768 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2769 return ESR_SUCCESS;
2770 }
2771 else if (impl->frames == impl->bgsniff)
2772 CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->bgsniff);
2773
2774 /* Check for leaked state */
2775 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2776 return ESR_CONTINUE_PROCESSING;
2777 }
2778
2779 /**
2780 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2781 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS
2782 */
generatePatternFromFrame(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2783 static PINLINE ESR_ReturnCode generatePatternFromFrame(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2784 SR_RecognizerResultType* type,
2785 SR_RecognizerResult* result)
2786 {
2787 SR_AcousticModelsImpl* modelsImpl;
2788 ESR_ReturnCode rc;
2789
2790 /* Run the search */
2791 modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2792 if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2793 {
2794 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2795 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2796 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2797 if (impl->eventLog != NULL)
2798 {
2799 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_END")));
2800 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2801 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2802 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2803 }
2804 PLogError(L("ESR_INVALID_STATE"));
2805 return ESR_INVALID_STATE;
2806 }
2807 if (!CA_AdvanceUtteranceFrame(impl->utterance))
2808 {
2809 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2810 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2811 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2812 if (impl->eventLog != NULL)
2813 {
2814 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("canPushAudioIntoRecognizer() -> SR_RECOGNIZER_INTERNAL_END")));
2815 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2816 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2817 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2818 }
2819 PLogError(L("ESR_INVALID_STATE"));
2820 return ESR_INVALID_STATE;
2821 }
2822 CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
2823 ++impl->processed;
2824
2825 if (impl->lockFunction)
2826 impl->lockFunction(ESR_LOCK, impl->lockData);
2827 if (impl->gotLastFrame && CircularBufferGetSize(impl->buffer) < impl->FRAME_SIZE)
2828 {
2829 /*
2830 * SREC have run out of data but the underlying recognizer might have some frames
2831 * queued for processing.
2832 */
2833 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) > 0)
2834 {
2835 /* EOI means end of input */
2836 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2837 PLogMessage("L: Voicing END (EOI) at %d frames (%d processed)", impl->frames, impl->processed);
2838 #endif
2839 impl->isRecognizing = ESR_FALSE;
2840 impl->recogLogTimings.EOSD = impl->frames;
2841 impl->eos_reason = L("EOI");
2842 impl->internalState = SR_RECOGNIZER_INTERNAL_EOI;
2843 if (impl->eventLog != NULL)
2844 {
2845 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOI")));
2846 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2847 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2848 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2849 }
2850 }
2851 else
2852 {
2853 #ifdef SREC_ENGINE_VERBOSE_LOGGING
2854 PLogMessage("L: Voicing END (EOF) at %d frames (%d processed)", impl->frames, impl->processed);
2855 #endif
2856
2857 impl->isRecognizing = ESR_FALSE;
2858 impl->recogLogTimings.EOSD = impl->frames;
2859 impl->eos_reason = L("EOF");
2860 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2861 if (impl->eventLog != NULL)
2862 {
2863 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrame() -> SR_RECOGNIZER_INTERNAL_EOS")));
2864 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2865 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2866 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2867 }
2868 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2869 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2870 passert(impl->processed == impl->frames);
2871 if (impl->lockFunction)
2872 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2873 return ESR_SUCCESS;
2874 }
2875 }
2876 if (impl->lockFunction)
2877 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2878
2879 /* Check for leaked state */
2880 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2881 return ESR_CONTINUE_PROCESSING;
2882 CLEANUP:
2883 return rc;
2884 }
2885
2886 /**
2887 * Same as generatePatternFromFrame() only the buffer is known to be empty.
2888 *
2889 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI
2890 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
2891 */
generatePatternFromFrameEOI(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2892 static PINLINE ESR_ReturnCode generatePatternFromFrameEOI(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2893 SR_RecognizerResultType* type,
2894 SR_RecognizerResult* result)
2895 {
2896 SR_AcousticModelsImpl* modelsImpl;
2897 ESR_ReturnCode rc;
2898
2899 /* Run the search */
2900 modelsImpl = (SR_AcousticModelsImpl*) impl->models;
2901
2902 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
2903 {
2904 passert(impl->processed == impl->frames);
2905 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2906 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2907 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2908 return ESR_SUCCESS;
2909 }
2910
2911 if (!CA_MakePatternFrame(modelsImpl->pattern, impl->utterance))
2912 {
2913 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2914 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2915 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2916 if (impl->eventLog != NULL)
2917 {
2918 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2919 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2920 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2921 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2922 }
2923 PLogError(L("ESR_INVALID_STATE"));
2924 return ESR_INVALID_STATE;
2925 }
2926 if (!CA_AdvanceUtteranceFrame(impl->utterance))
2927 {
2928 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
2929 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
2930 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
2931 if (impl->eventLog != NULL)
2932 {
2933 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_END")));
2934 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2935 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2936 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2937 }
2938 PLogError(L("ESR_INVALID_STATE"));
2939 return ESR_INVALID_STATE;
2940 }
2941 CA_AdvanceRecognitionByFrame(impl->recognizer, modelsImpl->pattern, impl->utterance);
2942 ++impl->processed;
2943
2944 if (impl->lockFunction)
2945 impl->lockFunction(ESR_LOCK, impl->lockData);
2946
2947 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
2948 {
2949 passert(impl->processed == impl->frames);
2950 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
2951 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
2952 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
2953 if (impl->eventLog != NULL)
2954 {
2955 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("generatePatternFromFrameEOI() -> SR_RECOGNIZER_INTERNAL_EOS")));
2956 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
2957 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
2958 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
2959 }
2960 if (impl->lockFunction)
2961 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2962 return ESR_SUCCESS;
2963 }
2964 if (impl->lockFunction)
2965 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2966
2967 /* Check for leaked state */
2968 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
2969 return ESR_CONTINUE_PROCESSING;
2970 CLEANUP:
2971 if (impl->lockFunction)
2972 impl->lockFunction(ESR_UNLOCK, impl->lockData);
2973 return rc;
2974 }
2975
2976
2977 /**
2978 * INPUT STATES: SR_RECOGNIZER_INTERNAL_EOI, SR_RECOGNIZER_INTERNAL_EOS_DETECTION
2979 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS
2980 */
detectEndOfSpeech(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)2981 ESR_ReturnCode detectEndOfSpeech(SR_RecognizerImpl* impl, SR_RecognizerStatus* status,
2982 SR_RecognizerResultType* type,
2983 SR_RecognizerResult* result)
2984 {
2985 EOSrc eos; /* eos means end of speech */
2986 int eos_by_level; /* eos means end of speech */
2987 PTimeStamp timestamp;
2988 ESR_ReturnCode rc;
2989 ESR_BOOL enableGetWaveform = ESR_FALSE;
2990
2991 eos_by_level = CA_UtteranceHasEnded(impl->utterance);
2992 if (eos_by_level)
2993 {
2994 eos = SPEECH_ENDED_BY_LEVEL_TIMEOUT;
2995 }
2996 else
2997 {
2998 eos = CA_IsEndOfUtteranceByResults(impl->recognizer);
2999 }
3000
3001 ESR_SessionGetBool(L("enableGetWaveform"), &enableGetWaveform);
3002 //impl->parameters->getBool(impl->parameters, L("enableGetWaveform"), &enableGetWaveform);
3003
3004 if (eos == VALID_SPEECH_CONTINUING && enableGetWaveform && impl->waveformBuffer->overflow_count > 0)
3005 {
3006 size_t bufferSize;
3007 CHKLOG(rc, WaveformBuffer_GetSize(impl->waveformBuffer, &bufferSize));
3008 PLogMessage("Forcing EOS due to wfbuf overflow (fr=%d,sz=%d,of=%d)", impl->frames, bufferSize, impl->waveformBuffer->overflow_count);
3009 eos = SPEECH_TOO_LONG;
3010 }
3011
3012 if (eos != VALID_SPEECH_CONTINUING)
3013 {
3014 switch (eos)
3015 {
3016 case SPEECH_ENDED:
3017 /* normal */
3018 impl->eos_reason = L("itimeout");
3019 break;
3020
3021 case SPEECH_ENDED_WITH_ERROR:
3022 /* error */
3023 impl->eos_reason = L("err");
3024 break;
3025
3026 case SPEECH_TOO_LONG:
3027 /* timeout*/
3028 impl->eos_reason = L("ctimeout");
3029 break;
3030
3031 case SPEECH_MAYBE_ENDED:
3032 /* normal */
3033 impl->eos_reason = L("itimeout");
3034 break;
3035 case SPEECH_ENDED_BY_LEVEL_TIMEOUT:
3036 /* normal */
3037 impl->eos_reason = L("levelTimeout");
3038 break;
3039
3040 default:
3041 /* error */
3042 impl->eos_reason = L("err");
3043 }
3044
3045 #ifdef SREC_ENGINE_VERBOSE_LOGGING
3046 PLogMessage("L: Voicing END (EOS) at %d frames, %d processed (reason: %s)\n", impl->frames, impl->processed, impl->eos_reason);
3047 #endif
3048
3049 impl->recogLogTimings.EOSD = impl->frames; /* how many frames have been sent prior to detect EOS */
3050 PTimeStampSet(×tamp); /* time it took to detect EOS (in millisec) */
3051 impl->recogLogTimings.EOST = PTimeStampDiff(×tamp, &impl->timestamp);
3052
3053 *status = SR_RECOGNIZER_EVENT_END_OF_VOICING;
3054 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3055 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS;
3056 if (impl->eventLog != NULL)
3057 {
3058 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("internalState"), L("detectEndOfSpeech() -> SR_RECOGNIZER_INTERNAL_EOS")));
3059 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("reason"), impl->eos_reason));
3060 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("frames"), impl->frames));
3061 CHKLOG(rc, SR_EventLogTokenSize_t_BASIC(impl->eventLog, impl->osi_log_level, L("processed"), impl->processed));
3062 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SR_Recognizer")));
3063 }
3064 impl->isRecognizing = ESR_FALSE;
3065 return ESR_SUCCESS;
3066 }
3067
3068 /* Check for leaked state */
3069 passert(*status == SR_RECOGNIZER_EVENT_INVALID && *type == SR_RECOGNIZER_RESULT_TYPE_INVALID);
3070 return ESR_CONTINUE_PROCESSING;
3071 CLEANUP:
3072 return rc;
3073 }
3074
3075 /**
3076 * INPUT STATES: SR_RECOGNIZER_INTERNAL_BOS_DETECTION
3077 * OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOI
3078 */
detectBeginningOfSpeech(SR_RecognizerImpl * impl,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult * result)3079 ESR_ReturnCode detectBeginningOfSpeech(SR_RecognizerImpl* impl,
3080 SR_RecognizerStatus* status,
3081 SR_RecognizerResultType* type,
3082 SR_RecognizerResult* result)
3083 {
3084 ESR_ReturnCode rc;
3085 ESR_BOOL gatedMode;
3086 size_t num_windback_bytes, num_windback_frames;
3087 waveform_buffering_state_t buffering_state;
3088
3089 CHKLOG(rc, ESR_SessionGetBool(L("cmdline.gatedmode"), &gatedMode));
3090
3091 if (gatedMode || (!gatedMode && impl->frames < impl->bgsniff))
3092 {
3093 ESR_BOOL pushable = ESR_FALSE;
3094
3095 rc = canPushAudioIntoRecognizer(impl);
3096 if (rc == ESR_SUCCESS)
3097 {
3098 /* Not enough samples to process one frame */
3099 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3100 {
3101 *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3102 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3103 return ESR_SUCCESS;
3104 }
3105 }
3106 else if (rc != ESR_CONTINUE_PROCESSING)
3107 return rc;
3108 else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3109 {
3110 /* Got end of input before beginning of speech */
3111 *status = SR_RECOGNIZER_EVENT_NO_MATCH;
3112 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3113 impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_NO_MATCH;
3114 CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3115 return ESR_SUCCESS;
3116 }
3117 else
3118 pushable = ESR_TRUE;
3119 if (pushable)
3120 {
3121 rc = pushAudioIntoRecognizer(impl, status, type, result);
3122 /* OUTPUT STATES: same or SR_RECOGNIZER_INTERNAL_EOI */
3123 if (rc != ESR_CONTINUE_PROCESSING)
3124 {
3125 /* Not enough samples to process one frame */
3126 return rc;
3127 }
3128 rc = generateFrameFromAudio(impl, status, type, result);
3129 /* OUTPUT STATES: same */
3130 if (rc != ESR_CONTINUE_PROCESSING)
3131 {
3132 /*
3133 * The internal recognizer needs a minimum amount of audio before
3134 * it begins generating frames.
3135 */
3136 return rc;
3137 }
3138 }
3139 if (!CA_AdvanceUtteranceFrame(impl->utterance))
3140 {
3141 PLogError(L("ESR_INVALID_STATE: Failed Advancing Utt Frame %d"), impl->frames);
3142 return ESR_INVALID_STATE;
3143 }
3144 if (CA_UtteranceHasVoicing(impl->utterance))
3145 {
3146 /* Utterance stats for Lombard if enough frames */
3147 if (impl->frames > impl->bgsniff)
3148 {
3149 #ifdef SREC_ENGINE_VERBOSE_LOGGING
3150 PLogMessage("L: Voicing START at %d frames", impl->frames);
3151 #endif
3152 /* OSI log the endpointed data */
3153
3154 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BTIM"), impl->frames * MSEC_PER_FRAME));
3155 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BRGN"), 0)); /* Barge-in not supported */
3156 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIendp")));
3157
3158 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, L("BOSD"), impl->frames));
3159 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("ESRbosd")));
3160
3161 if (gatedMode)
3162 CA_CalculateUtteranceStatistics(impl->utterance, (int)(impl->frames * -1), 0);
3163 else
3164 CA_CalculateUtteranceStatistics(impl->utterance, 0, impl->frames);
3165 }
3166
3167 /* OK, we've got voicing or the end of input has occured
3168 ** (or both, I suppose). If we had voicing then progress
3169 ** the recognizer, otherwise skip to the end.
3170 ** Of course, we could be running outside 'Gated Mode'
3171 ** so we won't have any frames processed at all yet -
3172 ** in this case start the recognizer anyway.
3173 */
3174
3175 /*************************************
3176 ** Run recognition until endOfInput **
3177 *************************************/
3178
3179 /*
3180 * Initialize both recognizers first
3181 * and disable reporting of results
3182 */
3183 if (gatedMode)
3184 {
3185 /*
3186 * We're in Gated Mode -
3187 * Because we'll have had voicing we wind-back
3188 * until the start of voicing (unsure region)
3189 */
3190 num_windback_frames = CA_SeekStartOfUtterance(impl->utterance);
3191 impl->beginningOfSpeechOffset = impl->frames - num_windback_frames;
3192 num_windback_bytes = num_windback_frames * impl->FRAME_SIZE * 2 /* due to skip even frames */;
3193
3194 /* pfprintf(PSTDOUT,L("audio buffer windback %d frames == %d bytes\n"), num_windback_frames, num_windback_bytes); */
3195 CHKLOG(rc, WaveformBuffer_GetBufferingState(impl->waveformBuffer, &buffering_state));
3196 if (buffering_state != WAVEFORM_BUFFERING_OFF)
3197 CHKLOG(rc, WaveformBuffer_WindBack(impl->waveformBuffer, num_windback_bytes));
3198
3199 /*
3200 * Only transition to linear if it was previously circular (in other words if
3201 * buffering was active in the first place)
3202 */
3203 if (buffering_state == WAVEFORM_BUFFERING_ON_CIRCULAR)
3204 CHKLOG(rc, WaveformBuffer_SetBufferingState(impl->waveformBuffer, WAVEFORM_BUFFERING_ON_LINEAR));
3205 impl->frames = CA_GetUnprocessedFramesInUtterance(impl->utterance);
3206 }
3207 else
3208 impl->frames = 0;
3209 /* reset the frames */
3210 impl->processed = 0;
3211 CHKLOG(rc, beginRecognizing(impl));
3212 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3213 *status = SR_RECOGNIZER_EVENT_START_OF_VOICING;
3214 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3215 return ESR_SUCCESS;
3216 }
3217 else
3218 {
3219 if (impl->frames > impl->utterance_timeout)
3220 {
3221 /* beginning of speech timeout */
3222 impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_TIMEOUT;
3223 *status = SR_RECOGNIZER_EVENT_START_OF_UTTERANCE_TIMEOUT;
3224 *type = SR_RECOGNIZER_RESULT_TYPE_COMPLETE;
3225 CHKLOG(rc, impl->Interface.stop(&impl->Interface));
3226 return ESR_SUCCESS;
3227 }
3228 }
3229 }
3230 else if (!gatedMode && impl->frames >= impl->bgsniff)
3231 {
3232 /*
3233 * If not gated mode and I have processed enough frames, then start the recognizer
3234 * right away.
3235 */
3236 impl->internalState = SR_RECOGNIZER_INTERNAL_EOS_DETECTION;
3237 *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3238 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3239
3240 /* reset the frames */
3241 impl->frames = impl->processed = 0;
3242 CHKLOG(rc, beginRecognizing(impl));
3243 return ESR_SUCCESS;
3244 }
3245 *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3246 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3247 return ESR_SUCCESS;
3248
3249 CLEANUP:
3250 return rc;
3251 }
3252
SR_RecognizerAdvanceImpl(SR_Recognizer * self,SR_RecognizerStatus * status,SR_RecognizerResultType * type,SR_RecognizerResult ** result)3253 ESR_ReturnCode SR_RecognizerAdvanceImpl(SR_Recognizer* self, SR_RecognizerStatus* status,
3254 SR_RecognizerResultType* type,
3255 SR_RecognizerResult** result)
3256 {
3257 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3258 ESR_BOOL pushable;
3259 ESR_ReturnCode rc;
3260
3261 if (status == NULL || type == NULL || result == NULL)
3262 {
3263 PLogError(L("ESR_INVALID_ARGUMENT"));
3264 return ESR_INVALID_ARGUMENT;
3265 }
3266
3267 /* create the result holder and save the pointer */
3268 /* creation only happens once (due to the if condition) */
3269 if (impl->result == NULL)
3270 CHKLOG(rc, SR_RecognizerResult_Create(&impl->result, impl));
3271 *result = impl->result;
3272
3273 /*
3274 * The following two lines are used to detect bugs whereby we forget to set
3275 * status or type before returning
3276 */
3277 *status = SR_RECOGNIZER_EVENT_INVALID;
3278 *type = SR_RECOGNIZER_RESULT_TYPE_INVALID;
3279
3280 MOVE_TO_NEXT_STATE:
3281 switch (impl->internalState)
3282 {
3283 case SR_RECOGNIZER_INTERNAL_BEGIN:
3284 impl->internalState = SR_RECOGNIZER_INTERNAL_BOS_DETECTION;
3285 *status = SR_RECOGNIZER_EVENT_STARTED;
3286 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3287 return ESR_SUCCESS;
3288
3289 case SR_RECOGNIZER_INTERNAL_BOS_DETECTION:
3290 rc = detectBeginningOfSpeech(impl, status, type, impl->result);
3291 if (rc != ESR_CONTINUE_PROCESSING)
3292 {
3293 /*
3294 * SR_RECOGNIZER_INTERNAL_BOS_DETECTION, SR_RECOGNIZER_INTERNAL_EOS_DETECTION, or
3295 * SR_RECOGNIZER_INTERNAL_EOI
3296 */
3297 return rc;
3298 }
3299 /* Leaked state */
3300 passert(0);
3301 break;
3302
3303 case SR_RECOGNIZER_INTERNAL_EOS_DETECTION:
3304 pushable = ESR_FALSE;
3305 rc = canPushAudioIntoRecognizer(impl);
3306 if (rc == ESR_SUCCESS)
3307 {
3308 /* Not enough samples to process one frame */
3309 if (CA_GetUnprocessedFramesInUtterance(impl->utterance) <= 0)
3310 {
3311 *status = SR_RECOGNIZER_EVENT_NEED_MORE_AUDIO;
3312 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3313 return ESR_SUCCESS;
3314 }
3315 }
3316 else if (rc != ESR_CONTINUE_PROCESSING)
3317 return rc;
3318 else if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3319 goto MOVE_TO_NEXT_STATE;
3320 else
3321 pushable = ESR_TRUE;
3322 if (pushable)
3323 {
3324 rc = pushAudioIntoRecognizer(impl, status, type, impl->result);
3325 if (rc != ESR_CONTINUE_PROCESSING)
3326 {
3327 /* Not enough samples to process one frame */
3328 return rc;
3329 }
3330 if (impl->internalState == SR_RECOGNIZER_INTERNAL_EOI)
3331 goto MOVE_TO_NEXT_STATE;
3332 rc = generateFrameFromAudio(impl, status, type, impl->result);
3333 if (rc != ESR_CONTINUE_PROCESSING)
3334 {
3335 /*
3336 * The internal recognizer needs a minimum amount of audio before
3337 * it begins generating frames.
3338 */
3339 return rc;
3340 }
3341 }
3342 rc = generateFrameStats(impl, status, type, impl->result);
3343 if (rc != ESR_CONTINUE_PROCESSING)
3344 {
3345 /* Not enough frames to calculate stats */
3346 return rc;
3347 }
3348 rc = generatePatternFromFrame(impl, status, type, impl->result);
3349 if (rc != ESR_CONTINUE_PROCESSING)
3350 {
3351 /* End of speech detected */
3352 return rc;
3353 }
3354 if (impl->internalState == SR_RECOGNIZER_INTERNAL_END)
3355 goto MOVE_TO_NEXT_STATE;
3356 rc = detectEndOfSpeech(impl, status, type, impl->result);
3357 if (rc != ESR_CONTINUE_PROCESSING)
3358 {
3359 /* End of speech detected */
3360 return rc;
3361 }
3362 *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3363 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3364 return ESR_SUCCESS;
3365
3366 case SR_RECOGNIZER_INTERNAL_EOI:
3367 /*
3368 * On EOI (end of input), we need to process the remaining frames that had not
3369 * been processed when PutAudio set the gotLastFrame flag
3370 */
3371 rc = generatePatternFromFrameEOI(impl, status, type, impl->result);
3372 if (rc != ESR_CONTINUE_PROCESSING)
3373 {
3374 /* End of speech detected */
3375 return rc;
3376 }
3377 rc = detectEndOfSpeech(impl, status, type, impl->result);
3378 if (rc != ESR_CONTINUE_PROCESSING)
3379 {
3380 /* End of speech detected */
3381 return rc;
3382 }
3383 *status = SR_RECOGNIZER_EVENT_INCOMPLETE;
3384 *type = SR_RECOGNIZER_RESULT_TYPE_NONE;
3385 return ESR_SUCCESS;
3386
3387 case SR_RECOGNIZER_INTERNAL_EOS:
3388 /* On EOS (end of speech detected - not due to end of input), create the result */
3389 if (impl->lockFunction)
3390 impl->lockFunction(ESR_LOCK, impl->lockData);
3391 CircularBufferReset(impl->buffer);
3392 if (impl->lockFunction)
3393 impl->lockFunction(ESR_UNLOCK, impl->lockData);
3394 CHKLOG(rc, SR_RecognizerCreateResultImpl((SR_Recognizer*) impl, status, type));
3395 impl->internalState = SR_RECOGNIZER_INTERNAL_END;
3396 return ESR_SUCCESS;
3397
3398 case SR_RECOGNIZER_INTERNAL_END:
3399 return ESR_SUCCESS;
3400 default:
3401 PLogError(L("ESR_INVALID_STATE"));
3402 return ESR_INVALID_STATE;
3403 }
3404 CLEANUP:
3405 return rc;
3406 }
3407
3408
3409
SR_RecognizerLoadUtteranceImpl(SR_Recognizer * self,const LCHAR * filename)3410 ESR_ReturnCode SR_RecognizerLoadUtteranceImpl(SR_Recognizer* self, const LCHAR* filename)
3411 {
3412 /* TODO: complete */
3413 return ESR_SUCCESS;
3414 }
3415
SR_RecognizerLoadWaveFileImpl(SR_Recognizer * self,const LCHAR * filename)3416 ESR_ReturnCode SR_RecognizerLoadWaveFileImpl(SR_Recognizer* self, const LCHAR* filename)
3417 {
3418 /* TODO: complete */
3419 return ESR_SUCCESS;
3420 }
3421
SR_RecognizerLogEventImpl(SR_Recognizer * self,const LCHAR * event)3422 ESR_ReturnCode SR_RecognizerLogEventImpl(SR_Recognizer* self, const LCHAR* event)
3423 {
3424 ESR_ReturnCode rc;
3425 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3426 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, event));
3427 return ESR_SUCCESS;
3428 CLEANUP:
3429 return rc;
3430 }
3431
SR_RecognizerLogTokenImpl(SR_Recognizer * self,const LCHAR * token,const LCHAR * value)3432 ESR_ReturnCode SR_RecognizerLogTokenImpl(SR_Recognizer* self, const LCHAR* token, const LCHAR* value)
3433 {
3434 ESR_ReturnCode rc;
3435 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3436 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3437 return ESR_SUCCESS;
3438 CLEANUP:
3439 return rc;
3440 }
3441
SR_RecognizerLogTokenIntImpl(SR_Recognizer * self,const LCHAR * token,int value)3442 ESR_ReturnCode SR_RecognizerLogTokenIntImpl(SR_Recognizer* self, const LCHAR* token, int value)
3443 {
3444 ESR_ReturnCode rc;
3445 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3446 CHKLOG(rc, SR_EventLogTokenInt_BASIC(impl->eventLog, impl->osi_log_level, token, value));
3447 return ESR_SUCCESS;
3448 CLEANUP:
3449 return rc;
3450 }
3451
SR_RecognizerLogSessionStartImpl(SR_Recognizer * self,const LCHAR * sessionName)3452 ESR_ReturnCode SR_RecognizerLogSessionStartImpl(SR_Recognizer* self, const LCHAR* sessionName)
3453 {
3454 ESR_ReturnCode rc;
3455 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3456 /**
3457 * OSI Platform logging.
3458 * In OSR, these events are logged by the platform. We have no platform in ESR, so we
3459 * log them here.
3460 */
3461
3462 /* call (session) start, tokens optional */
3463 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclst")));
3464
3465 /* service start, in this case SRecTest service */
3466 CHKLOG(rc, SR_EventLogToken_BASIC(impl->eventLog, impl->osi_log_level, L("SVNM"), sessionName));
3467 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIsvst")));
3468 if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3469 CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3470
3471 return ESR_SUCCESS;
3472 CLEANUP:
3473 return rc;
3474 }
3475
SR_RecognizerLogSessionEndImpl(SR_Recognizer * self)3476 ESR_ReturnCode SR_RecognizerLogSessionEndImpl(SR_Recognizer* self)
3477 {
3478 ESR_ReturnCode rc;
3479 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3480
3481 /* OSI log end of call (session) */
3482 CHKLOG(rc, SR_EventLogEvent_BASIC(impl->eventLog, impl->osi_log_level, L("SWIclnd")));
3483 if (impl->osi_log_level & OSI_LOG_LEVEL_BASIC)
3484 CHKLOG(rc, SR_EventLogEventSession(impl->eventLog));
3485 return ESR_SUCCESS;
3486 CLEANUP:
3487 return rc;
3488 }
3489
3490
SR_RecognizerLogWaveformDataImpl(SR_Recognizer * self,const LCHAR * waveformFilename,const LCHAR * transcription,const double bos,const double eos,ESR_BOOL isInvocab)3491 ESR_ReturnCode SR_RecognizerLogWaveformDataImpl(SR_Recognizer* self, const LCHAR* waveformFilename,
3492 const LCHAR* transcription, const double bos,
3493 const double eos, ESR_BOOL isInvocab)
3494 {
3495 ESR_ReturnCode rc;
3496 SR_RecognizerImpl *impl = (SR_RecognizerImpl*) self;
3497 LCHAR num[P_PATH_MAX];
3498 int frame;
3499
3500 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("FILE"), waveformFilename));
3501 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("TRANS"), transcription));
3502 sprintf(num, L("%.2f"), bos);
3503 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_SEC"), num));
3504 sprintf(num, L("%.2f"), eos);
3505 CHKLOG(rc, SR_EventLogToken_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_SEC"), num));
3506 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("FRAMESIZE"), impl->FRAME_SIZE));
3507 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("SAMPLERATE"), impl->sampleRate));
3508 frame = (int)(bos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3509 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("BOS_FR"), frame));
3510 frame = (int)(eos * impl->sampleRate * 2 /* 2 bytes per sample */) / impl->FRAME_SIZE;
3511 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("EOS_FR"), frame));
3512 CHKLOG(rc, SR_EventLogTokenInt_AUDIO(impl->eventLog, impl->osi_log_level, L("INVOCAB"), isInvocab));
3513 CHKLOG(rc, SR_EventLogEvent_AUDIO(impl->eventLog, impl->osi_log_level, L("ESRwfrd")));
3514 return ESR_SUCCESS;
3515 CLEANUP:
3516 return rc;
3517 }
3518
SR_RecognizerSetLockFunctionImpl(SR_Recognizer * self,SR_RecognizerLockFunction function,void * data)3519 ESR_ReturnCode SR_RecognizerSetLockFunctionImpl(SR_Recognizer* self, SR_RecognizerLockFunction function, void* data)
3520 {
3521 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3522
3523 impl->lockFunction = function;
3524 impl->lockData = data;
3525 return ESR_SUCCESS;
3526 }
3527
doSignalQualityInit(SR_RecognizerImpl * impl)3528 static ESR_ReturnCode doSignalQualityInit(SR_RecognizerImpl* impl)
3529 {
3530 CA_DoSignalCheck(impl->wavein, &impl->isSignalClipping, &impl->isSignalDCOffset,
3531 &impl->isSignalNoisy, &impl->isSignalTooQuiet, &impl->isSignalTooFewSamples,
3532 &impl->isSignalTooManySamples);
3533 impl->isSignalQualityInitialized = ESR_TRUE;
3534 return ESR_SUCCESS;
3535 }
3536
SR_RecognizerIsSignalClippingImpl(SR_Recognizer * self,ESR_BOOL * isClipping)3537 ESR_ReturnCode SR_RecognizerIsSignalClippingImpl(SR_Recognizer* self, ESR_BOOL* isClipping)
3538 {
3539 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3540 ESR_ReturnCode rc;
3541
3542 if (isClipping == NULL)
3543 {
3544 PLogError("SR_RecognizerIsSignalClippingImpl", ESR_INVALID_ARGUMENT);
3545 return ESR_INVALID_ARGUMENT;
3546 }
3547 if (!impl->isSignalQualityInitialized)
3548 CHKLOG(rc, doSignalQualityInit(impl));
3549 *isClipping = impl->isSignalClipping;
3550 return ESR_SUCCESS;
3551 CLEANUP:
3552 return rc;
3553 }
3554
SR_RecognizerIsSignalDCOffsetImpl(SR_Recognizer * self,ESR_BOOL * isDCOffset)3555 ESR_ReturnCode SR_RecognizerIsSignalDCOffsetImpl(SR_Recognizer* self, ESR_BOOL* isDCOffset)
3556 {
3557 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3558 ESR_ReturnCode rc;
3559
3560 if (isDCOffset == NULL)
3561 {
3562 PLogError("SR_RecognizerIsSignalDCOffsetImpl", ESR_INVALID_ARGUMENT);
3563 return ESR_INVALID_ARGUMENT;
3564 }
3565 if (!impl->isSignalQualityInitialized)
3566 CHKLOG(rc, doSignalQualityInit(impl));
3567 *isDCOffset = impl->isSignalDCOffset;
3568 return ESR_SUCCESS;
3569 CLEANUP:
3570 return rc;
3571 }
3572
SR_RecognizerIsSignalNoisyImpl(SR_Recognizer * self,ESR_BOOL * isNoisy)3573 ESR_ReturnCode SR_RecognizerIsSignalNoisyImpl(SR_Recognizer* self, ESR_BOOL* isNoisy)
3574 {
3575 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3576 ESR_ReturnCode rc;
3577
3578 if (isNoisy == NULL)
3579 {
3580 PLogError("SR_RecognizerIsSignalNoisyImpl", ESR_INVALID_ARGUMENT);
3581 return ESR_INVALID_ARGUMENT;
3582 }
3583 if (!impl->isSignalQualityInitialized)
3584 CHKLOG(rc, doSignalQualityInit(impl));
3585 *isNoisy = impl->isSignalNoisy;
3586 return ESR_SUCCESS;
3587 CLEANUP:
3588 return rc;
3589 }
3590
SR_RecognizerIsSignalTooQuietImpl(SR_Recognizer * self,ESR_BOOL * isTooQuiet)3591 ESR_ReturnCode SR_RecognizerIsSignalTooQuietImpl(SR_Recognizer* self, ESR_BOOL* isTooQuiet)
3592 {
3593 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3594 ESR_ReturnCode rc;
3595
3596 if (isTooQuiet == NULL)
3597 {
3598 PLogError("SR_RecognizerIsSignalTooQuietImpl", ESR_INVALID_ARGUMENT);
3599 return ESR_INVALID_ARGUMENT;
3600 }
3601 if (!impl->isSignalQualityInitialized)
3602 CHKLOG(rc, doSignalQualityInit(impl));
3603 *isTooQuiet = impl->isSignalTooQuiet;
3604 return ESR_SUCCESS;
3605 CLEANUP:
3606 return rc;
3607 }
3608
SR_RecognizerIsSignalTooFewSamplesImpl(SR_Recognizer * self,ESR_BOOL * isTooFewSamples)3609 ESR_ReturnCode SR_RecognizerIsSignalTooFewSamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooFewSamples)
3610 {
3611 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3612 ESR_ReturnCode rc;
3613
3614 if (isTooFewSamples == NULL)
3615 {
3616 PLogError("SR_RecognizerIsSignalTooFewSamplesImpl", ESR_INVALID_ARGUMENT);
3617 return ESR_INVALID_ARGUMENT;
3618 }
3619 if (!impl->isSignalQualityInitialized)
3620 CHKLOG(rc, doSignalQualityInit(impl));
3621 *isTooFewSamples = impl->isSignalTooFewSamples;
3622 return ESR_SUCCESS;
3623 CLEANUP:
3624 return rc;
3625 }
3626
SR_RecognizerIsSignalTooManySamplesImpl(SR_Recognizer * self,ESR_BOOL * isTooManySamples)3627 ESR_ReturnCode SR_RecognizerIsSignalTooManySamplesImpl(SR_Recognizer* self, ESR_BOOL* isTooManySamples)
3628 {
3629 SR_RecognizerImpl* impl = (SR_RecognizerImpl*) self;
3630 ESR_ReturnCode rc;
3631
3632 if (isTooManySamples == NULL)
3633 {
3634 PLogError("SR_RecognizerIsSignalTooManySamplesImpl", ESR_INVALID_ARGUMENT);
3635 return ESR_INVALID_ARGUMENT;
3636 }
3637 if (!impl->isSignalQualityInitialized)
3638 CHKLOG(rc, doSignalQualityInit(impl));
3639 *isTooManySamples = impl->isSignalTooManySamples;
3640 return ESR_SUCCESS;
3641 CLEANUP:
3642 return rc;
3643 }
3644
3645
3646
3647 /**************************************/
3648 /* Waveform Buffer stuff */
3649 /**************************************/
WaveformBuffer_Create(WaveformBuffer ** waveformBuffer,size_t frame_size)3650 ESR_ReturnCode WaveformBuffer_Create(WaveformBuffer** waveformBuffer, size_t frame_size)
3651 {
3652 ESR_ReturnCode rc;
3653 WaveformBuffer *buf;
3654 size_t val_size_t;
3655 int val_int;
3656 ESR_BOOL exists;
3657
3658 buf = NEW(WaveformBuffer, L("SR_RecognizerImpl.wvfmbuf"));
3659 if (buf == NULL)
3660 {
3661 rc = ESR_OUT_OF_MEMORY;
3662 PLogError(L("%s: could not create WaveformBuffer"), ESR_rc2str(rc));
3663 goto CLEANUP;
3664 }
3665
3666 ESR_SessionContains(L("SREC.voice_enroll.bufsz_kB"), &exists);
3667 if (exists)
3668 ESR_SessionGetSize_t(L("SREC.voice_enroll.bufsz_kB"), &val_size_t);
3669 else
3670 val_size_t = DEFAULT_WAVEFORM_BUFFER_MAX_SIZE;
3671 val_size_t *= 1024; /* convert to kB*/
3672 CHKLOG(rc, CircularBufferCreate(val_size_t, L("SR_RecognizerImpl.wvfmbuf.cbuffer"), &buf->cbuffer));
3673
3674 ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
3675 if (exists)
3676 ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &val_int);
3677 else
3678 val_int = DEFAULT_WAVEFORM_WINDBACK_FRAMES;
3679 val_int *= frame_size; /* convert frames to bytes */
3680 buf->windback_buffer_sz = (size_t) val_int;
3681 buf->windback_buffer = MALLOC(buf->windback_buffer_sz, L("SR_RecognizerImpl.wvfmbuf.windback"));
3682 if (buf->windback_buffer == NULL)
3683 {
3684 rc = ESR_OUT_OF_MEMORY;
3685 PLogError(L("%s: could not create Waveform windback buffer"), ESR_rc2str(rc));
3686 goto CLEANUP;
3687 }
3688
3689
3690 ESR_SessionContains(L("SREC.voice_enroll.eos_comfort_frames"), &exists);
3691 if (exists)
3692 ESR_SessionGetSize_t(L("SREC.voice_enroll.eos_comfort_frames"), &val_size_t);
3693 else
3694 val_size_t = DEFAULT_EOS_COMFORT_FRAMES;
3695 buf->eos_comfort_frames = val_size_t;
3696
3697 ESR_SessionContains(L("SREC.voice_enroll.bos_comfort_frames"), &exists);
3698 if (exists)
3699 ESR_SessionGetSize_t(L("SREC.voice_enroll.bos_comfort_frames"), &val_size_t);
3700 else
3701 val_size_t = DEFAULT_BOS_COMFORT_FRAMES;
3702 buf->bos_comfort_frames = val_size_t;
3703
3704 /* initially off */
3705 buf->state = WAVEFORM_BUFFERING_OFF;
3706
3707 *waveformBuffer = buf;
3708 return ESR_SUCCESS;
3709 CLEANUP:
3710 WaveformBuffer_Destroy(buf);
3711 return rc;
3712 }
3713
WaveformBuffer_Write(WaveformBuffer * waveformBuffer,void * data,size_t num_bytes)3714 ESR_ReturnCode WaveformBuffer_Write(WaveformBuffer* waveformBuffer, void *data, size_t num_bytes)
3715 {
3716 size_t available_bytes;
3717 size_t done_bytes;
3718
3719 /* do nothing if not active */
3720 switch (waveformBuffer->state)
3721 {
3722 case WAVEFORM_BUFFERING_OFF:
3723 return ESR_SUCCESS;
3724
3725 case WAVEFORM_BUFFERING_ON_CIRCULAR:
3726 available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3727 if (available_bytes < num_bytes)
3728 {
3729 done_bytes = CircularBufferSkip(waveformBuffer->cbuffer, num_bytes - available_bytes);
3730 if (done_bytes != num_bytes - available_bytes)
3731 {
3732 PLogError("WaveformBuffer_Write: error when skipping bytes");
3733 return ESR_INVALID_STATE;
3734 }
3735 }
3736 done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3737 if (done_bytes != num_bytes)
3738 {
3739 PLogError("WaveformBuffer_Write: error when writing bytes");
3740 return ESR_INVALID_STATE;
3741 }
3742 return ESR_SUCCESS;
3743
3744 case WAVEFORM_BUFFERING_ON_LINEAR:
3745 available_bytes = CircularBufferGetAvailable(waveformBuffer->cbuffer);
3746 if (available_bytes < num_bytes)
3747 {
3748 waveformBuffer->overflow_count += num_bytes;
3749 return ESR_BUFFER_OVERFLOW;
3750 }
3751 done_bytes = CircularBufferWrite(waveformBuffer->cbuffer, data, num_bytes);
3752 if (done_bytes != num_bytes)
3753 {
3754 PLogError("WaveformBuffer_Write: error when writing bytes");
3755 return ESR_INVALID_STATE;
3756 }
3757 return ESR_SUCCESS;
3758
3759 default:
3760 PLogError("WaveformBuffer_Write: bad control path");
3761 return ESR_INVALID_STATE;
3762 }
3763 }
3764
WaveformBuffer_Read(WaveformBuffer * waveformBuffer,void * data,size_t * num_bytes)3765 ESR_ReturnCode WaveformBuffer_Read(WaveformBuffer* waveformBuffer, void *data, size_t* num_bytes)
3766 {
3767 size_t bytes_to_read;
3768 ESR_ReturnCode rc;
3769
3770 if (num_bytes == NULL)
3771 {
3772 rc = ESR_INVALID_ARGUMENT;
3773 PLogError(ESR_rc2str(rc));
3774 goto CLEANUP;
3775 }
3776 if (waveformBuffer->overflow_count > 0)
3777 {
3778 memset(data, 0, *num_bytes);
3779 *num_bytes = 0;
3780 PLogError(L("WaveformBuffer_Read: previous overflow causes read to return NULL"));
3781 return ESR_SUCCESS;
3782 }
3783
3784 if (waveformBuffer->read_size != 0 && *num_bytes > waveformBuffer->read_size)
3785 {
3786 PLogError(L("ESR_OUT_OF_MEMORY: waveform buffer too small for read, increase from %d to %d"), *num_bytes, waveformBuffer->read_size);
3787 return ESR_OUT_OF_MEMORY;
3788 }
3789
3790 if (waveformBuffer->read_size == 0)
3791 bytes_to_read = *num_bytes;
3792 else
3793 bytes_to_read = MIN(waveformBuffer->read_size, *num_bytes);
3794 waveformBuffer->read_size -= bytes_to_read;
3795 *num_bytes = CircularBufferRead(waveformBuffer->cbuffer, data, bytes_to_read);
3796 if (*num_bytes != bytes_to_read)
3797 {
3798 PLogError("WaveformBuffer_Read: error reading buffer");
3799 return ESR_INVALID_STATE;
3800 }
3801 return ESR_SUCCESS;
3802 CLEANUP:
3803 return rc;
3804 }
3805
3806 /* WindBack will save the last num_bytes recorded, reset the buffer, and then load the
3807 saved bytes at the beginning of the buffer */
WaveformBuffer_WindBack(WaveformBuffer * waveformBuffer,const size_t num_bytes)3808 ESR_ReturnCode WaveformBuffer_WindBack(WaveformBuffer* waveformBuffer, const size_t num_bytes)
3809 {
3810 ESR_ReturnCode rc;
3811 size_t bufferSize;
3812
3813 if (num_bytes <= 0)
3814 {
3815 CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3816 return ESR_SUCCESS;
3817 }
3818
3819 /* make sure windback buffer is big enough */
3820 if (num_bytes > waveformBuffer->windback_buffer_sz)
3821 {
3822 rc = ESR_OUT_OF_MEMORY;
3823 PLogError(L("%s: windback buffer is too small (needed=%d, had=%d)"), ESR_rc2str(rc), num_bytes, waveformBuffer->windback_buffer_sz);
3824 goto CLEANUP;
3825 }
3826
3827 CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
3828 /* skip the first few bytes written */
3829 if (bufferSize < num_bytes)
3830 {
3831 PLogError("bufferSize %d num_bytes %d (ESR_INVALID_STATE)\n", bufferSize, num_bytes);
3832 bufferSize = 0;
3833 }
3834 else
3835 {
3836 bufferSize -= num_bytes;
3837 }
3838 CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, bufferSize));
3839 /* read the last few bytes written */
3840 bufferSize = num_bytes;
3841 CHKLOG(rc, WaveformBuffer_Read(waveformBuffer, waveformBuffer->windback_buffer, &bufferSize));
3842
3843 /* reset buffer */
3844 CHKLOG(rc, WaveformBuffer_Reset(waveformBuffer));
3845
3846 /* rewrite the saved bytes at the beginning */
3847 CHKLOG(rc, WaveformBuffer_Write(waveformBuffer, waveformBuffer->windback_buffer, bufferSize));
3848 return ESR_SUCCESS;
3849 CLEANUP:
3850 return rc;
3851 }
3852
WaveformBuffer_Destroy(WaveformBuffer * waveformBuffer)3853 ESR_ReturnCode WaveformBuffer_Destroy(WaveformBuffer* waveformBuffer)
3854 {
3855 if (waveformBuffer->cbuffer)
3856 FREE(waveformBuffer->cbuffer);
3857 if (waveformBuffer->windback_buffer)
3858 FREE(waveformBuffer->windback_buffer);
3859 if (waveformBuffer)
3860 FREE(waveformBuffer);
3861 return ESR_SUCCESS;
3862 }
3863
WaveformBuffer_SetBufferingState(WaveformBuffer * waveformBuffer,waveform_buffering_state_t state)3864 ESR_ReturnCode WaveformBuffer_SetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t state)
3865 {
3866 waveformBuffer->state = state;
3867 return ESR_SUCCESS;
3868 }
3869
WaveformBuffer_GetBufferingState(WaveformBuffer * waveformBuffer,waveform_buffering_state_t * state)3870 ESR_ReturnCode WaveformBuffer_GetBufferingState(WaveformBuffer* waveformBuffer, waveform_buffering_state_t* state)
3871 {
3872 *state = waveformBuffer->state;
3873 return ESR_SUCCESS;
3874 }
3875
3876 /**
3877 * @return ESR_BUFFER_OVERFLOW if nametag EOS occured beyond end of buffer
3878 */
WaveformBuffer_ParseEndPointedResultAndTrim(WaveformBuffer * waveformBuffer,const LCHAR * end_pointed_result,const size_t bytes_per_frame)3879 ESR_ReturnCode WaveformBuffer_ParseEndPointedResultAndTrim(WaveformBuffer* waveformBuffer, const LCHAR* end_pointed_result, const size_t bytes_per_frame)
3880 {
3881 const LCHAR *p;
3882 size_t bos_frame, eos_frame, bufferSize, read_start_offset;
3883 ESR_ReturnCode rc;
3884
3885 /* potential end pointed results
3886
3887 -pau-@19 tape@36 scan@64 down@88 -pau2-@104
3888 -pau-@19 tape@34 off@55 -pau2-@78
3889 -pau-@19 tape@47 help@66 -pau2-@80
3890 -pau-@16 tape@36 reverse@71 -pau2-@91
3891 -pau-@21 tape@42 scan@59 down@80 -pau2-@91
3892
3893 what I need to extract is the integer between "-pau-@" and ' '
3894 and the integer between '@' and " -pau2-"
3895 */
3896
3897
3898 p = LSTRSTR( end_pointed_result, PREFIX_WORD);
3899 if(p) p+=PREFIX_WORD_LEN; while(p && *p == '@') p++;
3900 rc = p ? lstrtosize_t(p, &bos_frame, 10) : ESR_INVALID_ARGUMENT;
3901 if (rc == ESR_INVALID_ARGUMENT)
3902 {
3903 PLogError(L("%s: extracting bos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3904 goto CLEANUP;
3905 }
3906 else if (rc != ESR_SUCCESS)
3907 goto CLEANUP;
3908
3909 p = LSTRSTR( end_pointed_result, SUFFIX_WORD);
3910 while(p && p>end_pointed_result && p[-1]!='@') --p;
3911 rc = p ? lstrtosize_t(p, &eos_frame, 10) : ESR_INVALID_ARGUMENT;
3912 if (rc == ESR_INVALID_ARGUMENT)
3913 {
3914 PLogError(L("%s: extracting eos from text=%s"), ESR_rc2str(rc), end_pointed_result);
3915 goto CLEANUP;
3916 }
3917 else if (rc != ESR_SUCCESS)
3918 goto CLEANUP;
3919
3920 bos_frame -= (bos_frame > waveformBuffer->bos_comfort_frames ? waveformBuffer->bos_comfort_frames : 0);
3921 eos_frame += waveformBuffer->eos_comfort_frames;
3922
3923 /*
3924 * I know where speech started, so I want to skip frames 0 to bos_frame.
3925 * I also know where speech ended so I want to set the amount of frames(bytes) to read for
3926 * the nametag audio buffer (i.e. the read_size)
3927 */
3928
3929 read_start_offset = bos_frame * bytes_per_frame * 2 /* times 2 because of skip even frames */;
3930 waveformBuffer->read_size = (eos_frame - bos_frame) * bytes_per_frame * 2 /* times 2 because of skip even frames */;
3931
3932 CHKLOG(rc, WaveformBuffer_GetSize(waveformBuffer, &bufferSize));
3933 if (read_start_offset + waveformBuffer->read_size > bufferSize)
3934 {
3935 waveformBuffer->overflow_count += read_start_offset + waveformBuffer->read_size - bufferSize;
3936 passert(waveformBuffer->overflow_count > 0);
3937 PLogMessage(L("Warning: Voice Enrollment audio buffer overflow (spoke too much, over by %d bytes)"),
3938 waveformBuffer->overflow_count);
3939 return ESR_BUFFER_OVERFLOW;
3940 }
3941 CHKLOG(rc, WaveformBuffer_Skip(waveformBuffer, read_start_offset));
3942 #ifdef SREC_ENGINE_VERBOSE_LOGGING
3943 PLogMessage(L("Voice Enrollment: bos@%d, eos@%d, therefore sizeof(waveform) should be %d"), bos_frame, eos_frame, waveformBuffer->read_size);
3944 #endif
3945 return ESR_SUCCESS;
3946 CLEANUP:
3947 return rc;
3948 }
3949
3950
WaveformBuffer_Reset(WaveformBuffer * waveformBuffer)3951 ESR_ReturnCode WaveformBuffer_Reset(WaveformBuffer* waveformBuffer)
3952 {
3953 CircularBufferReset(waveformBuffer->cbuffer);
3954 waveformBuffer->overflow_count = 0;
3955 waveformBuffer->read_size = 0;
3956 return ESR_SUCCESS;
3957 }
3958
WaveformBuffer_GetSize(WaveformBuffer * waveformBuffer,size_t * size)3959 ESR_ReturnCode WaveformBuffer_GetSize(WaveformBuffer* waveformBuffer, size_t* size)
3960 {
3961 *size = CircularBufferGetSize(waveformBuffer->cbuffer);
3962 return ESR_SUCCESS;
3963 }
3964
WaveformBuffer_Skip(WaveformBuffer * waveformBuffer,const size_t bytes)3965 ESR_ReturnCode WaveformBuffer_Skip(WaveformBuffer* waveformBuffer, const size_t bytes)
3966 {
3967 if (CircularBufferSkip(waveformBuffer->cbuffer, bytes) != (int) bytes)
3968 return ESR_INVALID_STATE;
3969 return ESR_SUCCESS;
3970 }
3971
3972
3973
SR_Recognizer_Reset_Buffers(SR_RecognizerImpl * impl)3974 static ESR_ReturnCode SR_Recognizer_Reset_Buffers ( SR_RecognizerImpl *impl )
3975 {
3976 ESR_ReturnCode reset_status;
3977
3978 FREE ( impl->audioBuffer );
3979 impl->audioBuffer = NULL;
3980 impl->audioBuffer = MALLOC ( impl->FRAME_SIZE, MTAG );
3981
3982 if ( impl->audioBuffer != NULL )
3983 {
3984 WaveformBuffer_Destroy ( impl->waveformBuffer );
3985 impl->waveformBuffer = NULL;
3986 reset_status = WaveformBuffer_Create ( &impl->waveformBuffer, impl->FRAME_SIZE );
3987 }
3988 else
3989 {
3990 reset_status = ESR_OUT_OF_MEMORY;
3991 }
3992 return ( reset_status );
3993 }
3994
3995
3996
SR_Recognizer_Validate_Sample_Rate(size_t sample_rate)3997 static ESR_ReturnCode SR_Recognizer_Validate_Sample_Rate ( size_t sample_rate )
3998 {
3999 ESR_ReturnCode validate_status;
4000
4001 switch ( sample_rate )
4002 {
4003 case 8000:
4004 case 11025:
4005 case 16000:
4006 case 22050:
4007 validate_status = ESR_SUCCESS;
4008 break;
4009
4010 default:
4011 validate_status = ESR_INVALID_ARGUMENT;
4012 break;
4013 }
4014 return ( validate_status );
4015 }
4016
4017
4018
SR_Recognizer_Sample_Rate_Needs_Change(size_t new_sample_rate,ESR_BOOL * needs_changing)4019 static ESR_ReturnCode SR_Recognizer_Sample_Rate_Needs_Change ( size_t new_sample_rate, ESR_BOOL *needs_changing )
4020 {
4021 ESR_ReturnCode validate_status;
4022 size_t current_sample_rate;
4023
4024 validate_status = ESR_SessionGetSize_t ( "CREC.Frontend.samplerate", ¤t_sample_rate );
4025
4026 if ( validate_status == ESR_SUCCESS )
4027 {
4028 if ( new_sample_rate != current_sample_rate )
4029 *needs_changing = ESR_TRUE;
4030 else
4031 *needs_changing = ESR_TRUE;
4032 }
4033 return ( validate_status );
4034 }
4035
4036
4037
SR_Recognizer_Change_Sample_Rate_Session_Params_8K(void)4038 static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( void )
4039 {
4040 ESR_ReturnCode change_status;
4041 LCHAR model_filenames [P_PATH_MAX];
4042 LCHAR lda_filename [P_PATH_MAX];
4043 size_t filename_length;
4044
4045 filename_length = P_PATH_MAX;
4046 change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles8"), model_filenames, &filename_length );
4047
4048 if ( change_status == ESR_SUCCESS )
4049 {
4050 filename_length = P_PATH_MAX;
4051 change_status = ESR_SessionGetLCHAR ( L("cmdline.lda8"), lda_filename, &filename_length );
4052
4053 /* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4054 if ( change_status == ESR_SUCCESS )
4055 {
4056 change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", 8000 );
4057 if ( change_status == ESR_SUCCESS )
4058 {
4059 change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 4000 );
4060
4061 if ( change_status == ESR_SUCCESS )
4062 {
4063 change_status = ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4064
4065 if ( change_status == ESR_SUCCESS )
4066 change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4067 }
4068 }
4069 }
4070 else
4071 {
4072 PLogError (L("\nMissing Parameter lda8\n"));
4073 }
4074 }
4075 else
4076 {
4077 PLogError (L("\nMissing Parameter models8\n"));
4078 }
4079 return ( change_status );
4080 }
4081
4082
4083
SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K(size_t sample_rate)4084 static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( size_t sample_rate )
4085 {
4086 ESR_ReturnCode change_status;
4087 LCHAR model_filenames [P_PATH_MAX];
4088 LCHAR lda_filename [P_PATH_MAX];
4089 size_t filename_length;
4090
4091 filename_length = P_PATH_MAX;
4092 change_status = ESR_SessionGetLCHAR ( L("cmdline.modelfiles11"), model_filenames, &filename_length );
4093
4094 if ( change_status == ESR_SUCCESS )
4095 {
4096 filename_length = P_PATH_MAX;
4097 change_status = ESR_SessionGetLCHAR ( L("cmdline.lda11"), lda_filename, &filename_length );
4098
4099 /* From this point on, if an error occurs, we're screwed and recovery is probably impossible */
4100 if ( change_status == ESR_SUCCESS )
4101 {
4102 change_status = ESR_SessionSetSize_t ( "CREC.Frontend.samplerate", sample_rate );
4103
4104 if ( change_status == ESR_SUCCESS )
4105 {
4106 change_status = ESR_SessionSetInt ( "CREC.Frontend.highcut", 5500 );
4107
4108 if ( change_status == ESR_SUCCESS )
4109 {
4110 change_status = ESR_SessionSetLCHAR ( L("cmdline.modelfiles"), model_filenames );
4111
4112 if ( change_status == ESR_SUCCESS )
4113 change_status = ESR_SessionSetLCHAR ( L("cmdline.lda"), lda_filename );
4114 }
4115 }
4116 }
4117 else
4118 {
4119 PLogError (L("\nMissing Parameter lda11\n"));
4120 }
4121 }
4122 else
4123 {
4124 PLogError (L("\nMissing Parameter models11\n"));
4125 }
4126 return ( change_status );
4127 }
4128
4129
4130
SR_Recognizer_Change_Sample_Rate_Session_Params(size_t new_sample_rate)4131 static ESR_ReturnCode SR_Recognizer_Change_Sample_Rate_Session_Params ( size_t new_sample_rate )
4132 {
4133 ESR_ReturnCode change_status;
4134
4135 if ( new_sample_rate == 8000 )
4136 change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_8K ( );
4137 else
4138 change_status = SR_Recognizer_Change_Sample_Rate_Session_Params_11K_to_22K ( new_sample_rate );
4139
4140 return ( change_status );
4141 }
4142
4143
4144
SR_Recognizer_Change_Sample_RateImpl(SR_Recognizer * recognizer,size_t new_sample_rate)4145 ESR_ReturnCode SR_Recognizer_Change_Sample_RateImpl ( SR_Recognizer *recognizer, size_t new_sample_rate )
4146 {
4147 ESR_ReturnCode change_status;
4148 ESR_BOOL rate_needs_changing;
4149 SR_RecognizerImpl *impl;
4150 CA_FrontendInputParams *frontendParams;
4151
4152 change_status = SR_Recognizer_Validate_Sample_Rate ( new_sample_rate );
4153
4154 if ( change_status == ESR_SUCCESS )
4155 {
4156 change_status = SR_Recognizer_Sample_Rate_Needs_Change ( new_sample_rate, &rate_needs_changing );
4157
4158 if ( change_status == ESR_SUCCESS )
4159 {
4160 if ( rate_needs_changing == ESR_TRUE )
4161 {
4162 change_status = SR_Recognizer_Change_Sample_Rate_Session_Params ( new_sample_rate );
4163
4164 if ( change_status == ESR_SUCCESS )
4165 { // SR_RecognizerCreateFrontendImpl
4166 impl = (SR_RecognizerImpl *)recognizer;
4167 change_status = SR_RecognizerUnsetupImpl( recognizer );
4168
4169 if ( change_status == ESR_SUCCESS )
4170 {
4171 CA_UnconfigureFrontend ( impl->frontend );
4172 frontendParams = CA_AllocateFrontendParameters ( );
4173
4174 if ( frontendParams != NULL )
4175 {
4176 change_status = SR_RecognizerGetFrontendLegacyParametersImpl ( frontendParams );
4177
4178 if ( change_status == ESR_SUCCESS )
4179 {
4180 CA_ConfigureFrontend ( impl->frontend, frontendParams );
4181 CA_UnconfigureWave ( impl->wavein );
4182 CA_ConfigureWave ( impl->wavein, impl->frontend );
4183 impl->sampleRate = new_sample_rate;
4184 impl->FRAME_SIZE = impl->sampleRate / FRAMERATE * SAMPLE_SIZE;
4185 change_status = SR_Recognizer_Reset_Buffers ( impl );
4186
4187 if ( change_status == ESR_SUCCESS )
4188 {
4189 change_status = SR_RecognizerSetupImpl( recognizer );
4190
4191 if ( change_status == ESR_SUCCESS )
4192 change_status = SR_AcousticStateReset ( recognizer );
4193 }
4194 else
4195 {
4196 SR_RecognizerSetupImpl( recognizer ); /* Otherwise recognizer is in bad state */
4197 }
4198 }
4199 CA_FreeFrontendParameters ( frontendParams );
4200 }
4201 else
4202 {
4203 SR_RecognizerSetupImpl( recognizer ); /* Otherwise recognizer is in bad state */
4204 change_status = ESR_OUT_OF_MEMORY;
4205 }
4206 }
4207 }
4208 }
4209 }
4210 }
4211 return ( change_status );
4212 }
4213
4214
4215