1 /*---------------------------------------------------------------------------*
2 * VocabularyImpl.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include "ESR_Session.h"
21 #include "SR_Vocabulary.h"
22 #include "SR_VocabularyImpl.h"
23 #include "passert.h"
24 #include "plog.h"
25 #include "ptypes.h"
26 #include "pmemory.h"
27
28 //#define DEBUG 1
29 #define MAX_PRON_LEN 256
30 #define MAX_WORD_LEN 40
31 #define MTAG NULL
32 #define MAX_PHONE_LEN 4
33 #define DO_DEFER_LOADING_UNTIL_LOOKUPS 1
34
35 static PINLINE LCHAR* get_first_word(LCHAR* curr, LCHAR* end);
36 static PINLINE LCHAR* get_next_word(LCHAR* curr, LCHAR* end);
37 static ESR_ReturnCode run_ttt(const LCHAR *input_sentence, LCHAR *output_sentence, int *text_length);
38
39 #define MAX_NUM_PRONS 4
40 #define LSTRDUP(src) LSTRCPY(CALLOC(LSTRLEN(src)+1, sizeof(LCHAR), "srec.Vocabulary.LSTRDUP"), (src))
41 #define LSTRFREE(src) FREE(src)
42
43 /**
44 * Creates a new vocabulary but does not set the locale.
45 *
46 * @param self SR_Vocabulary handle
47 */
48 #ifdef USE_TTP
SR_CreateG2P(SR_Vocabulary * self)49 ESR_ReturnCode SR_CreateG2P(SR_Vocabulary* self)
50 {
51 ESR_ReturnCode rc = ESR_SUCCESS;
52 SWIsltsResult res = SWIsltsSuccess;
53 SR_VocabularyImpl * impl = (SR_VocabularyImpl*) self;
54 LCHAR szG2PDataFile[P_PATH_MAX];
55 size_t len = P_PATH_MAX;
56 ESR_BOOL bG2P = ESR_TRUE;
57
58 rc = ESR_SessionGetBool ( L("G2P.Available"), &bG2P );
59 if ( rc != ESR_SUCCESS )
60 {
61 PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetBool() - G2P.Available fails with return code %d\n"), rc);
62 return rc;
63 }
64 if ( bG2P == ESR_FALSE )
65 {
66 impl->hSlts = NULL;
67 return ESR_SUCCESS;
68 }
69
70 rc = ESR_SessionGetLCHAR ( L("G2P.Data"), szG2PDataFile, &len );
71 if ( rc != ESR_SUCCESS )
72 {
73 PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetLCHAR() - G2P.Data fails with return code %d\n"), rc);
74 return rc;
75 }
76 rc = ESR_SessionPrefixWithBaseDirectory(szG2PDataFile, &len);
77 if ( rc != ESR_SUCCESS )
78 {
79 PLogError(L("ESR_FATAL_ERROR: ESR_SessionPrefixWithBaseDirectory() - G2P.Data fails with return code %d\n"), rc);
80 return rc;
81 }
82
83 res = SWIsltsInit();
84 if (res == SWIsltsSuccess)
85 {
86 /* data_file: en-US-ttp.data */
87 res = SWIsltsOpen(&(impl->hSlts), szG2PDataFile);
88 if (res != SWIsltsSuccess)
89 {
90 PLogError(L("ESR_FATAL_ERROR: SWIsltsOpen( ) fails with return code %d\n"), res);
91 FREE(impl);
92 return ESR_FATAL_ERROR;
93 }
94 }
95 else
96 {
97 PLogError(L("ESR_FATAL_ERROR: SWIsltsInit( ) fails with return code %d\n"), res);
98 FREE(impl);
99 return ESR_FATAL_ERROR;
100 }
101 return rc;
102 }
103
SR_DestroyG2P(SR_Vocabulary * self)104 ESR_ReturnCode SR_DestroyG2P(SR_Vocabulary* self)
105 {
106 ESR_ReturnCode rc = ESR_SUCCESS;
107 SWIsltsResult res = SWIsltsSuccess;
108 SR_VocabularyImpl * impl = (SR_VocabularyImpl*) self;
109 ESR_BOOL bG2P = ESR_TRUE;
110
111 rc = ESR_SessionGetBool ( L("G2P.Available"), &bG2P );
112 if ( rc != ESR_SUCCESS )
113 {
114 PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetBool() - G2P.Available fails with return code %d\n"), rc);
115 return rc;
116 }
117 if ( bG2P == ESR_FALSE || impl->hSlts == NULL)
118 {
119 return ESR_SUCCESS;
120 }
121
122 res = SWIsltsClose(impl->hSlts);
123 if (res == SWIsltsSuccess)
124 {
125 res = SWIsltsTerm();
126 if (res != SWIsltsSuccess)
127 {
128 PLogError(L("ESR_FATAL_ERROR: SWIsltsTerm( ) fails with return code %d\n"), res);
129 rc = ESR_FATAL_ERROR;
130 }
131 }
132 else
133 {
134 PLogError(L("ESR_FATAL_ERROR: SWIsltsClose( ) fails with return code %d\n"), res);
135 rc = ESR_FATAL_ERROR;
136 }
137 return rc;
138 }
139 #endif /* USE_TTP */
140
141 /**
142 * Creates a new vocabulary but does not set the locale.
143 *
144 * @param self SR_Vocabulary handle
145 */
SR_VocabularyCreateImpl(SR_Vocabulary ** self)146 ESR_ReturnCode SR_VocabularyCreateImpl(SR_Vocabulary** self)
147 {
148 SR_VocabularyImpl* impl;
149
150 if (self==NULL)
151 {
152 PLogError(L("ESR_INVALID_ARGUMENT"));
153 return ESR_INVALID_ARGUMENT;
154 }
155 impl = NEW(SR_VocabularyImpl, MTAG);
156 if (impl==NULL)
157 {
158 PLogError(L("ESR_OUT_OF_MEMORY"));
159 return ESR_OUT_OF_MEMORY;
160 }
161
162 impl->Interface.save = &SR_VocabularySaveImpl;
163 impl->Interface.getPronunciation = &SR_VocabularyGetPronunciationImpl;
164 impl->Interface.getLanguage = &SR_VocabularyGetLanguageImpl;
165 impl->Interface.destroy = &SR_VocabularyDestroyImpl;
166 impl->vocabulary = NULL;
167
168 *self = (SR_Vocabulary*) impl;
169 impl->hSlts = NULL;
170 return ESR_SUCCESS;
171 }
172
SR_VocabularyDestroyImpl(SR_Vocabulary * self)173 ESR_ReturnCode SR_VocabularyDestroyImpl(SR_Vocabulary* self)
174 {
175 SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
176
177 #ifdef USE_TTP
178 SR_DestroyG2P(self);
179 #endif
180
181 if (impl->vocabulary!=NULL)
182 {
183 CA_UnloadDictionary(impl->vocabulary);
184 CA_FreeVocabulary(impl->vocabulary);
185 impl->vocabulary = NULL;
186 }
187 LSTRFREE(impl->filename);
188 FREE(impl);
189 return ESR_SUCCESS;
190 }
191
sr_vocabularyloadimpl_for_real(SR_VocabularyImpl * impl)192 ESR_ReturnCode sr_vocabularyloadimpl_for_real(SR_VocabularyImpl* impl)
193 {
194 ESR_ReturnCode rc = ESR_SUCCESS;
195 ESR_BOOL sessionExists = ESR_FALSE;
196 LCHAR vocabulary[P_PATH_MAX];
197 size_t len;
198
199 impl->vocabulary = CA_AllocateVocabulary();
200 if (impl->vocabulary==NULL)
201 {
202 rc = ESR_OUT_OF_MEMORY;
203 PLogError(ESR_rc2str(rc));
204 goto CLEANUP;
205 }
206
207 CHKLOG(rc, ESR_SessionExists(&sessionExists));
208
209 if (sessionExists)
210 {
211 LSTRCPY(vocabulary, impl->filename);
212 len = P_PATH_MAX;
213 CHKLOG(rc, ESR_SessionPrefixWithBaseDirectory(vocabulary, &len));
214 }
215 else
216 LSTRCPY(vocabulary, impl->filename);
217
218 CA_LoadDictionary(impl->vocabulary, vocabulary, L(""), &impl->locale);
219 if(impl->vocabulary->is_loaded == False /*(booldata)*/ ) {
220 CHKLOG(rc, ESR_INVALID_ARGUMENT);
221 }
222 impl->ttp_lang = TTP_LANG(impl->locale);
223
224 #ifdef USE_TTP
225 rc = SR_CreateG2P((SR_Vocabulary*)impl);
226 if (rc != ESR_SUCCESS) {
227 goto CLEANUP;
228 }
229 #endif
230
231 CLEANUP:
232 return rc;
233 }
234
SR_VocabularyLoadImpl(const LCHAR * filename,SR_Vocabulary ** self)235 ESR_ReturnCode SR_VocabularyLoadImpl(const LCHAR* filename, SR_Vocabulary** self)
236 {
237 SR_Vocabulary* Interface;
238 SR_VocabularyImpl* impl;
239 ESR_ReturnCode rc;
240
241 CHK(rc, SR_VocabularyCreateImpl(&Interface));
242 impl = (SR_VocabularyImpl*) Interface;
243 #if DO_DEFER_LOADING_UNTIL_LOOKUPS
244 impl->vocabulary = NULL;
245 impl->ttp_lang = NULL;
246 impl->filename = LSTRDUP( filename);
247 impl->locale = ESR_LOCALE_EN_US; // default really
248 impl->hSlts = NULL;
249 #else
250 impl->filename = LSTRDUP( filename);
251 CHKLOG( rc, sr_vocabularyloadimpl_for_real( impl));
252 #endif
253
254 *self = Interface;
255 return ESR_SUCCESS;
256 CLEANUP:
257 Interface->destroy(Interface);
258 return rc;
259 }
260
SR_VocabularySaveImpl(SR_Vocabulary * self,const LCHAR * filename)261 ESR_ReturnCode SR_VocabularySaveImpl(SR_Vocabulary* self, const LCHAR* filename)
262 {
263 /* TODO: complete */
264 return ESR_SUCCESS;
265 }
266
267 /* internal util function prototype */
268 /* we split the string on all non-alphanum and "'" which
269 is handled below */
270 #define LSINGLEQUOTE L('\'')
split_on_nonalphanum(LCHAR * toSplit,LCHAR ** end,const ESR_Locale locale)271 int split_on_nonalphanum(LCHAR* toSplit, LCHAR** end, const ESR_Locale locale)
272 {
273 int nsplits = 0;
274 LCHAR* _next = toSplit;
275 while(*_next)
276 {
277 do {
278 if(*_next == LSINGLEQUOTE && locale == ESR_LOCALE_EN_US) {
279 if(_next[1] != 't' && _next[1] != 's') break;
280 else if( LISALNUM(_next[2])) break; // LISDIGIT
281 else { *_next++; continue; }
282 }
283 if(!*_next || !LISALNUM(*_next)) break;
284 *_next++;
285 } while(1);
286 // FORMERLY: while(*_next && LISALNUM(*_next)) _next++;
287
288 /* check if I am at the last word or not */
289 if(*_next)
290 {
291 *_next = 0; /* replace split_char with '\0' the word */
292 nsplits++;
293 _next++; /* point to first char of next word */
294 *end = _next; /* we'll be push forward later, if there's content here!*/
295 }
296 else
297 *end = _next;
298 }
299 return nsplits;
300 }
301
join(LCHAR * toJoin,LCHAR * end,LCHAR join_char)302 void join(LCHAR* toJoin, LCHAR* end, LCHAR join_char)
303 {
304 LCHAR* _next;
305 for(_next = toJoin; _next<end; _next++)
306 if(*_next == 0) *_next = join_char;
307 }
308
get_num_prons(const LCHAR * word_prons,const LCHAR ** word_pron_ptr,int max_num_prons)309 size_t get_num_prons( const LCHAR* word_prons, const LCHAR** word_pron_ptr, int max_num_prons)
310 {
311 int num_prons = 0;
312 while(word_prons && *word_prons) {
313 word_pron_ptr[ num_prons++] = word_prons;
314 if(num_prons >= max_num_prons) break;
315 while( *word_prons) word_prons++;
316 word_prons++;
317 }
318 return num_prons;
319 }
320
321 /* This function is used from multi-word phrases, such as "mike smith". We
322 build up the pronunication of the phrase, by appending the pronunciation
323 of each word. We need to handle the cases of multiple prons for "mike"
324 and multiple prons for "smith". For simple cases we try to run faster
325 code. */
326
append_to_each_with_joiner(LCHAR * phrase_prons,const LCHAR * word_prons,const LCHAR joiner,size_t max_len,size_t * len)327 int append_to_each_with_joiner( LCHAR* phrase_prons, const LCHAR* word_prons, const LCHAR joiner, size_t max_len, size_t* len)
328 {
329 LCHAR* word_pron_ptr[MAX_NUM_PRONS];
330 LCHAR* phrase_pron_ptr[MAX_NUM_PRONS];
331 LCHAR *dst, *max_dst;
332 const LCHAR *src;
333 size_t nphrase_prons = get_num_prons( phrase_prons, (const LCHAR**)phrase_pron_ptr, MAX_NUM_PRONS);
334 size_t nword_prons = get_num_prons( word_prons, (const LCHAR**)word_pron_ptr, MAX_NUM_PRONS);
335 max_dst = phrase_prons+max_len-3;
336
337 if( nword_prons == 0)
338 return 0;
339 else if(nphrase_prons == 0) {
340 for(src=word_prons,dst=phrase_prons; src && *src; ) {
341 for( ; *src && dst<max_dst; ) {
342 *dst++ = *src++;
343 }
344 *dst++ = *src++; // copy the null
345 }
346 *dst = 0; // add a double-null
347 *len = dst-phrase_prons;
348 return 0;
349 }
350 else if(nphrase_prons == 1 && nword_prons == 1) {
351 for(dst=phrase_prons; *dst; ) dst++;
352 if(joiner!=L('\0')) *dst++ = joiner;
353 for(src=word_prons; *src && dst<max_dst; ) *dst++ = *src++;
354 *dst++ = 0;
355 *dst = 0; // add a double-null
356 *len = dst-phrase_prons;
357 return 0;
358 }
359 else {
360 size_t i,j;
361 LCHAR *phrase_pron_dups[MAX_NUM_PRONS];
362 LCHAR *dst_good_end = phrase_prons+1;
363 for(i=0;i<nphrase_prons; i++)
364 phrase_pron_dups[i] = LSTRDUP( phrase_pron_ptr[i]);
365 dst = phrase_prons;
366 for(i=0;i<nphrase_prons; i++) {
367 for(j=0; j<nword_prons; j++) {
368 for(src=phrase_pron_dups[i]; *src && dst<max_dst; ) *dst++=*src++;
369 if(dst>max_dst) break;
370 if(joiner!=L('\0')) *dst++ = joiner;
371 for(src=word_pron_ptr[j]; *src && dst<max_dst; ) *dst++=*src++;
372 if(dst>max_dst) break;
373 *dst++ = 0;
374 dst_good_end = dst;
375 }
376 }
377 *dst_good_end++ = 0; // double-null terminator
378 for(i=0; i<nphrase_prons; i++) LSTRFREE( phrase_pron_dups[i]);
379 return 0;
380 }
381 }
382
get_first_word(LCHAR * curr,LCHAR * end)383 PINLINE LCHAR* get_first_word(LCHAR* curr, LCHAR* end)
384 {
385 while(*curr==L('\0') && curr<end) curr++;
386 return curr;
387 }
388
get_next_word(LCHAR * curr,LCHAR * end)389 PINLINE LCHAR* get_next_word(LCHAR* curr, LCHAR* end)
390 {
391 while(*curr) curr++;
392 if(curr<end) curr++;
393 while( !*curr && curr<end) curr++;
394 return curr;
395 }
396
397 /*
398 For each word in a phrase (words separated by spaces)
399
400 if the complete word is in the dictionary
401 return pron
402 else
403 if the word contains '_', split the word into parts
404 and check if parts are in the dictionary.
405 if none of the parts are in the dictionary,
406 reassemble the parts and pass the whole thing to TTP
407 else
408 build the pron by concat of TTP pron and dictionary pron for individual parts
409 */
SR_VocabularyGetPronunciationImpl(SR_Vocabulary * self,const LCHAR * phrase,LCHAR * pronunciation,size_t * pronunciation_len)410 ESR_ReturnCode SR_VocabularyGetPronunciationImpl(SR_Vocabulary* self, const LCHAR* phrase, LCHAR* pronunciation, size_t* pronunciation_len)
411 {
412 SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
413 /* copy of phrase */
414 LCHAR copy_of_phrase[MAX_PRON_LEN];
415
416 /* pointer to curr phoneme output */
417 LCHAR* curr_phoneme = pronunciation;
418 // size_t pronunciation_len = *len;
419
420 ESR_ReturnCode nEsrRes = ESR_SUCCESS;
421 int text_length;
422 size_t len;
423 int nsplits;
424
425 #ifdef USE_TTP
426 SWIsltsResult res = SWIsltsSuccess;
427 SWIsltsTranscription *pTranscriptions = NULL;
428 int nNbrOfTranscriptions = 0;
429 #endif /* USE_TTP */
430 /* full inf pron after conversion */
431 LCHAR infpron[MAX_PRON_LEN];
432 LCHAR* p_infpron;
433 LCHAR* curr; /* pointer to current word */
434 LCHAR* end = 0; /* pointer to end of phrase */
435
436 if(self == NULL || phrase == NULL)
437 {
438 PLogError(L("ESR_INVALID_ARGUMENT"));
439 return ESR_INVALID_ARGUMENT;
440 }
441
442 if( LSTRLEN(phrase) >= MAX_PRON_LEN)
443 return ESR_ARGUMENT_OUT_OF_BOUNDS;
444
445 #if DO_DEFER_LOADING_UNTIL_LOOKUPS
446 if( impl->vocabulary == NULL) {
447 CHKLOG( nEsrRes, sr_vocabularyloadimpl_for_real( impl));
448 }
449 #endif
450
451 /* by default, check the whole word entry first (regardless of underscores) */
452 if( CA_GetEntryInDictionary(impl->vocabulary, phrase, pronunciation, (int*)&len, MAX_PRON_LEN)) {
453 // len includes the final null, but not the double-null
454 *pronunciation_len = LSTRLEN(pronunciation)+1;
455 // look for double-null terminator
456 while( pronunciation[ (*pronunciation_len)] != L('\0'))
457 *pronunciation_len += LSTRLEN( pronunciation + (*pronunciation_len)) + 1;
458
459 return ESR_SUCCESS;
460 }
461
462 /*************************/
463 /* split digit strings */
464 text_length = MAX_PRON_LEN;
465 nEsrRes = run_ttt(phrase, copy_of_phrase, &text_length);
466 if (nEsrRes != ESR_SUCCESS)
467 {
468 PLogError(L("ESR_FATAL_ERROR: run_ttt( ) fails with return code %d\n"), nEsrRes);
469 return nEsrRes;
470 }
471
472 len = 0;
473 *curr_phoneme = L('\0');
474 if( *pronunciation_len>=12) curr_phoneme[1] = L('\0');
475 else return ESR_INVALID_ARGUMENT;
476
477 /*************************/
478 /* split into word parts */
479 nsplits = split_on_nonalphanum(copy_of_phrase, &end, impl->locale);
480
481 /******************************************************/
482 /* if none of the words are found in the dictionary, then
483 reassemble and get the TTP pron for the whole thing */
484 curr=get_first_word(copy_of_phrase,end);
485 /* check if there are any valid characters at all */
486 if(!curr || !*curr)
487 return ESR_INVALID_ARGUMENT;
488 /* now loop over all words in the phrase */
489 for( ; *curr; curr = get_next_word(curr,end))
490 {
491 LCHAR* squote = NULL;
492 p_infpron = infpron;
493
494 /* by default, check the whole word entry first (regardless of LSINGLEQUOTE) */
495 if(CA_GetEntryInDictionary(impl->vocabulary, curr, p_infpron, (int*)&len, MAX_PRON_LEN))
496 {
497 /* concatenate, and insert join_char between words */
498 append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
499 }
500 else {
501 p_infpron[0] = 0;
502 /* if this is English AND we're dealing with a quote (possessive or a
503 contraction), then we use the dictionary for the stuff before the
504 quote, and use the TTP to find out what single phoneme should
505 correspond the the thing after the quote ('s' or 't'). This keeps
506 the code clean (no phoneme codes here), and maps 's' to 's' or 'z'
507 with the intelligence of the G2P engine */
508 if( impl->locale == ESR_LOCALE_EN_US) {
509 if( (squote=LSTRCHR(curr,LSINGLEQUOTE))==NULL) {}
510 else {
511 *squote = L('\0'); // temporary
512 if( CA_GetEntryInDictionary(impl->vocabulary, curr, p_infpron, (int*)&len, MAX_PRON_LEN)) {
513 } else
514 p_infpron[0] = 0;
515 *squote = LSINGLEQUOTE; // undo temporary
516 }
517 }
518 #ifdef USE_TTP
519 pTranscriptions = NULL;
520 if (impl->hSlts)
521 {
522 res = SWIsltsG2PGetWordTranscriptions(impl->hSlts, curr, &pTranscriptions, &nNbrOfTranscriptions);
523 if (res != SWIsltsSuccess) {
524 PLogError(L("ESR_FATAL_ERROR: SWIsltsG2PGetWordTranscriptions( ) fails with return code %d\n"), res);
525 return ESR_FATAL_ERROR;
526 }
527 if( impl->locale == ESR_LOCALE_EN_US && p_infpron[0] && squote!=L('\0')) {
528 const LCHAR* lastPhoneme = pTranscriptions[0].pBuffer;
529 while(lastPhoneme && *lastPhoneme && lastPhoneme[1]!=L('\0'))
530 lastPhoneme++;
531 append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
532 append_to_each_with_joiner( pronunciation, lastPhoneme, L('\0'), MAX_PRON_LEN, &len);
533 } else {
534 /* only one transcription available from seti */
535 p_infpron = pTranscriptions[0].pBuffer;
536 append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
537 #if defined(SREC_ENGINE_VERBOSE_LOGGING)
538 PLogError("L: used G2P for %s", curr);
539 #endif
540
541 }
542 if (pTranscriptions) {
543 res = SWIsltsG2PFreeWordTranscriptions(impl->hSlts, pTranscriptions);
544 pTranscriptions = NULL;
545 if (res != SWIsltsSuccess) {
546 PLogError(L("ESR_FATAL_ERROR: SWIsltsG2PFreeWordTranscriptions( ) fails with return code %d\n"), res);
547 return ESR_FATAL_ERROR;
548 }
549 }
550 } else {
551 nEsrRes = ESR_INVALID_ARGUMENT;
552 PLogError(L("ESR_INVALID_ARGUMENT: impl->hSlts was not configured!"));
553 return nEsrRes;
554 }
555 #else /* USE_TTP */
556 nEsrRes = ESR_INVALID_ARGUMENT;
557 PLogError(L("ESR_INVALID_ARGUMENT: need USE_TTP build to guess pronunciations!"));
558 return nEsrRes;
559 #endif
560 } /* multi-word phrase */
561 } /* loop over words in phrase */
562 len = LSTRLEN(pronunciation)+1;
563 // look for double-null terminator
564 while( pronunciation[ len] != L('\0'))
565 len += LSTRLEN( pronunciation + len) + 1;
566 *pronunciation_len = len;
567 nEsrRes = ESR_SUCCESS;
568 CLEANUP:
569 return nEsrRes;
570 }
571
SR_VocabularyGetLanguageImpl(SR_Vocabulary * self,ESR_Locale * locale)572 ESR_ReturnCode SR_VocabularyGetLanguageImpl(SR_Vocabulary* self, ESR_Locale* locale)
573 {
574 SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
575
576 *locale = impl->locale;
577 return ESR_SUCCESS;
578 }
579
580 /* simple text normalization rountine for splitting up any digit string */
run_ttt(const LCHAR * input_sentence,LCHAR * output_sentence,int * text_length)581 static ESR_ReturnCode run_ttt(const LCHAR *input_sentence, LCHAR *output_sentence, int *text_length)
582 {
583 ESR_ReturnCode nRes = ESR_SUCCESS;
584 int num_out = 0;
585 int max_text_length = *text_length / sizeof(LCHAR) - 1;
586 ESR_BOOL bDigit = False;
587
588 while (*input_sentence != L('\0')) {
589 if (num_out + 2 >= max_text_length) {
590 nRes = ESR_FATAL_ERROR;
591 goto CLEAN_UP;
592 }
593
594 if (L('0') <= *input_sentence && *input_sentence <= L('9')) {
595 if (num_out > 0 && !LISSPACE(output_sentence[num_out-1]) ) {
596 // put 1 space before digits
597 output_sentence[num_out] = L(' ');
598 num_out++;
599 while( LISSPACE(*input_sentence) ) input_sentence++;
600 }
601 output_sentence[num_out] = *input_sentence;
602 num_out++;
603 bDigit = True;
604 }
605 else {
606 if (bDigit == True && !LISSPACE(output_sentence[num_out-1])) {
607 // put 1 space after digits
608 output_sentence[num_out] = L(' ');
609 num_out++;
610 while( LISSPACE(*input_sentence)) input_sentence++;
611 }
612 output_sentence[num_out] = *input_sentence;
613 num_out++;
614 bDigit = False;
615 }
616 input_sentence++;
617 if( LISSPACE(output_sentence[num_out-1]))
618 while(LISSPACE(*input_sentence )) input_sentence++; // remove repeated spaces
619 }
620
621 output_sentence[num_out] = L('\0');
622 *text_length = num_out * sizeof(LCHAR);
623 return ESR_SUCCESS;
624
625 CLEAN_UP:
626
627 *output_sentence = L('\0');
628 *text_length = 0;
629 return nRes;
630 }
631