• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  VocabularyImpl.c                                                         *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include "ESR_Session.h"
21 #include "SR_Vocabulary.h"
22 #include "SR_VocabularyImpl.h"
23 #include "passert.h"
24 #include "plog.h"
25 #include "ptypes.h"
26 #include "pmemory.h"
27 
28 //#define DEBUG 1
29 #define MAX_PRON_LEN 256
30 #define MAX_WORD_LEN    40
31 #define MTAG NULL
32 #define MAX_PHONE_LEN 4
33 #define DO_DEFER_LOADING_UNTIL_LOOKUPS 1
34 
35 static PINLINE LCHAR* get_first_word(LCHAR* curr, LCHAR* end);
36 static PINLINE LCHAR* get_next_word(LCHAR* curr, LCHAR* end);
37 static ESR_ReturnCode run_ttt(const LCHAR *input_sentence, LCHAR *output_sentence, int *text_length);
38 
39 #define MAX_NUM_PRONS 4
40 #define LSTRDUP(src) LSTRCPY(CALLOC(LSTRLEN(src)+1, sizeof(LCHAR), "srec.Vocabulary.LSTRDUP"), (src))
41 #define LSTRFREE(src) FREE(src)
42 
43 /**
44  * Creates a new vocabulary but does not set the locale.
45  *
46  * @param self SR_Vocabulary handle
47  */
48 #ifdef USE_TTP
SR_CreateG2P(SR_Vocabulary * self)49 ESR_ReturnCode SR_CreateG2P(SR_Vocabulary* self)
50 {
51   ESR_ReturnCode      rc = ESR_SUCCESS;
52   SWIsltsResult       res = SWIsltsSuccess;
53   SR_VocabularyImpl * impl = (SR_VocabularyImpl*) self;
54   LCHAR               szG2PDataFile[P_PATH_MAX];
55   size_t              len = P_PATH_MAX;
56   ESR_BOOL                bG2P = ESR_TRUE;
57 
58      rc = ESR_SessionGetBool ( L("G2P.Available"), &bG2P );
59      if ( rc != ESR_SUCCESS )
60        {
61 	 PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetBool() - G2P.Available fails with return code %d\n"), rc);
62 	 return rc;
63        }
64      if ( bG2P == ESR_FALSE )
65        {
66 	 impl->hSlts = NULL;
67 	 return ESR_SUCCESS;
68        }
69 
70      rc = ESR_SessionGetLCHAR ( L("G2P.Data"), szG2PDataFile, &len );
71      if ( rc != ESR_SUCCESS )
72        {
73 	 PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetLCHAR() - G2P.Data fails with return code %d\n"), rc);
74 	 return rc;
75      }
76      rc = ESR_SessionPrefixWithBaseDirectory(szG2PDataFile, &len);
77      if ( rc != ESR_SUCCESS )
78        {
79 	 PLogError(L("ESR_FATAL_ERROR: ESR_SessionPrefixWithBaseDirectory() - G2P.Data fails with return code %d\n"), rc);
80 	 return rc;
81        }
82 
83      res = SWIsltsInit();
84      if (res == SWIsltsSuccess)
85        {
86 	 /* data_file: en-US-ttp.data */
87 	 res = SWIsltsOpen(&(impl->hSlts), szG2PDataFile);
88 	 if (res != SWIsltsSuccess)
89 	   {
90 	     PLogError(L("ESR_FATAL_ERROR: SWIsltsOpen( ) fails with return code %d\n"), res);
91 	     FREE(impl);
92 	     return ESR_FATAL_ERROR;
93 	   }
94        }
95      else
96      {
97        PLogError(L("ESR_FATAL_ERROR: SWIsltsInit( ) fails with return code %d\n"), res);
98        FREE(impl);
99        return ESR_FATAL_ERROR;
100      }
101      return rc;
102 }
103 
SR_DestroyG2P(SR_Vocabulary * self)104 ESR_ReturnCode SR_DestroyG2P(SR_Vocabulary* self)
105 {
106   ESR_ReturnCode      rc = ESR_SUCCESS;
107   SWIsltsResult       res = SWIsltsSuccess;
108   SR_VocabularyImpl * impl = (SR_VocabularyImpl*) self;
109   ESR_BOOL                bG2P = ESR_TRUE;
110 
111   rc = ESR_SessionGetBool ( L("G2P.Available"), &bG2P );
112   if ( rc != ESR_SUCCESS )
113      {
114        PLogError(L("ESR_FATAL_ERROR: ESR_SessionGetBool() - G2P.Available fails with return code %d\n"), rc);
115        return rc;
116      }
117   if ( bG2P == ESR_FALSE || impl->hSlts == NULL)
118     {
119       return ESR_SUCCESS;
120     }
121 
122   res = SWIsltsClose(impl->hSlts);
123   if (res == SWIsltsSuccess)
124     {
125       res = SWIsltsTerm();
126       if (res != SWIsltsSuccess)
127 	{
128 	  PLogError(L("ESR_FATAL_ERROR: SWIsltsTerm( ) fails with return code %d\n"), res);
129 	  rc = ESR_FATAL_ERROR;
130           }
131     }
132   else
133     {
134       PLogError(L("ESR_FATAL_ERROR: SWIsltsClose( ) fails with return code %d\n"), res);
135       rc = ESR_FATAL_ERROR;
136     }
137   return rc;
138 }
139 #endif /* USE_TTP */
140 
141 /**
142  * Creates a new vocabulary but does not set the locale.
143  *
144  * @param self SR_Vocabulary handle
145  */
SR_VocabularyCreateImpl(SR_Vocabulary ** self)146 ESR_ReturnCode SR_VocabularyCreateImpl(SR_Vocabulary** self)
147 {
148   SR_VocabularyImpl* impl;
149 
150   if (self==NULL)
151     {
152       PLogError(L("ESR_INVALID_ARGUMENT"));
153       return ESR_INVALID_ARGUMENT;
154     }
155   impl = NEW(SR_VocabularyImpl, MTAG);
156   if (impl==NULL)
157     {
158       PLogError(L("ESR_OUT_OF_MEMORY"));
159       return ESR_OUT_OF_MEMORY;
160     }
161 
162   impl->Interface.save = &SR_VocabularySaveImpl;
163   impl->Interface.getPronunciation = &SR_VocabularyGetPronunciationImpl;
164      impl->Interface.getLanguage = &SR_VocabularyGetLanguageImpl;
165      impl->Interface.destroy = &SR_VocabularyDestroyImpl;
166      impl->vocabulary = NULL;
167 
168      *self = (SR_Vocabulary*) impl;
169      impl->hSlts = NULL;
170      return ESR_SUCCESS;
171 }
172 
SR_VocabularyDestroyImpl(SR_Vocabulary * self)173 ESR_ReturnCode SR_VocabularyDestroyImpl(SR_Vocabulary* self)
174 {
175   SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
176 
177 #ifdef USE_TTP
178   SR_DestroyG2P(self);
179 #endif
180 
181      if (impl->vocabulary!=NULL)
182        {
183 	 CA_UnloadDictionary(impl->vocabulary);
184 	 CA_FreeVocabulary(impl->vocabulary);
185 	 impl->vocabulary = NULL;
186        }
187 	   LSTRFREE(impl->filename);
188      FREE(impl);
189      return ESR_SUCCESS;
190 }
191 
sr_vocabularyloadimpl_for_real(SR_VocabularyImpl * impl)192 ESR_ReturnCode sr_vocabularyloadimpl_for_real(SR_VocabularyImpl* impl)
193 {
194 	ESR_ReturnCode rc = ESR_SUCCESS;
195 	ESR_BOOL sessionExists = ESR_FALSE;
196   LCHAR vocabulary[P_PATH_MAX];
197   size_t len;
198 
199      impl->vocabulary = CA_AllocateVocabulary();
200      if (impl->vocabulary==NULL)
201        {
202 	 rc = ESR_OUT_OF_MEMORY;
203 	 PLogError(ESR_rc2str(rc));
204 	 goto CLEANUP;
205        }
206 
207      CHKLOG(rc, ESR_SessionExists(&sessionExists));
208 
209      if (sessionExists)
210        {
211           LSTRCPY(vocabulary, impl->filename);
212           len = P_PATH_MAX;
213           CHKLOG(rc, ESR_SessionPrefixWithBaseDirectory(vocabulary, &len));
214        }
215      else
216        LSTRCPY(vocabulary, impl->filename);
217 
218      CA_LoadDictionary(impl->vocabulary, vocabulary, L(""), &impl->locale);
219      if(impl->vocabulary->is_loaded == False /*(booldata)*/ ) {
220        CHKLOG(rc, ESR_INVALID_ARGUMENT);
221      }
222      impl->ttp_lang = TTP_LANG(impl->locale);
223 
224 #ifdef USE_TTP
225      rc = SR_CreateG2P((SR_Vocabulary*)impl);
226 	 if (rc != ESR_SUCCESS) {
227           goto CLEANUP;
228      }
229 #endif
230 
231 CLEANUP:
232 	 return rc;
233 }
234 
SR_VocabularyLoadImpl(const LCHAR * filename,SR_Vocabulary ** self)235 ESR_ReturnCode SR_VocabularyLoadImpl(const LCHAR* filename, SR_Vocabulary** self)
236 {
237   SR_Vocabulary* Interface;
238   SR_VocabularyImpl* impl;
239   ESR_ReturnCode rc;
240 
241      CHK(rc, SR_VocabularyCreateImpl(&Interface));
242      impl = (SR_VocabularyImpl*) Interface;
243 #if DO_DEFER_LOADING_UNTIL_LOOKUPS
244 	 impl->vocabulary = NULL;
245 	 impl->ttp_lang = NULL;
246 	 impl->filename = LSTRDUP( filename);
247 	 impl->locale = ESR_LOCALE_EN_US; // default really
248 	 impl->hSlts = NULL;
249 #else
250 	 impl->filename = LSTRDUP( filename);
251 	 CHKLOG( rc, sr_vocabularyloadimpl_for_real( impl));
252 #endif
253 
254      *self = Interface;
255      return ESR_SUCCESS;
256  CLEANUP:
257      Interface->destroy(Interface);
258      return rc;
259 }
260 
SR_VocabularySaveImpl(SR_Vocabulary * self,const LCHAR * filename)261 ESR_ReturnCode SR_VocabularySaveImpl(SR_Vocabulary* self, const LCHAR* filename)
262 {
263   /* TODO: complete */
264   return ESR_SUCCESS;
265 }
266 
267 /* internal util function prototype */
268 /* we split the string on all non-alphanum and "'" which
269 is handled below */
270 #define LSINGLEQUOTE L('\'')
split_on_nonalphanum(LCHAR * toSplit,LCHAR ** end,const ESR_Locale locale)271 int split_on_nonalphanum(LCHAR* toSplit, LCHAR** end, const ESR_Locale locale)
272 {
273   int nsplits = 0;
274   LCHAR* _next = toSplit;
275     while(*_next)
276     {
277 		do {
278 			if(*_next == LSINGLEQUOTE && locale == ESR_LOCALE_EN_US) {
279 				if(_next[1] != 't' && _next[1] != 's') break;
280 				else if( LISALNUM(_next[2])) break; // LISDIGIT
281 				else { *_next++; continue; }
282 			}
283 			if(!*_next || !LISALNUM(*_next)) break;
284 			*_next++;
285 		} while(1);
286       // FORMERLY:  while(*_next && LISALNUM(*_next))     _next++;
287 
288       /* check if I am at the last word or not */
289       if(*_next)
290       {
291         *_next = 0; /* replace split_char with '\0' the word */
292 		nsplits++;
293         _next++;    /* point to first char of next word */
294 		*end = _next; /* we'll be push forward later, if there's content here!*/
295       }
296       else
297         *end = _next;
298     }
299 	return nsplits;
300 }
301 
join(LCHAR * toJoin,LCHAR * end,LCHAR join_char)302 void join(LCHAR* toJoin, LCHAR* end, LCHAR join_char)
303 {
304   LCHAR* _next;
305     for(_next = toJoin; _next<end; _next++)
306 		if(*_next == 0) *_next = join_char;
307 }
308 
get_num_prons(const LCHAR * word_prons,const LCHAR ** word_pron_ptr,int max_num_prons)309 size_t get_num_prons( const LCHAR* word_prons, const LCHAR** word_pron_ptr, int max_num_prons)
310 {
311   int num_prons = 0;
312   while(word_prons && *word_prons) {
313     word_pron_ptr[ num_prons++] = word_prons;
314     if(num_prons >= max_num_prons) break;
315     while( *word_prons) word_prons++;
316     word_prons++;
317   }
318   return num_prons;
319 }
320 
321 /* This function is used from multi-word phrases, such as "mike smith".  We
322    build up the pronunication of the phrase, by appending the pronunciation
323    of each word.  We need to handle the cases of multiple prons for "mike"
324    and multiple prons for "smith".  For simple cases we try to run faster
325    code. */
326 
append_to_each_with_joiner(LCHAR * phrase_prons,const LCHAR * word_prons,const LCHAR joiner,size_t max_len,size_t * len)327 int append_to_each_with_joiner( LCHAR* phrase_prons, const LCHAR* word_prons, const LCHAR joiner, size_t max_len, size_t* len)
328 {
329   LCHAR* word_pron_ptr[MAX_NUM_PRONS];
330   LCHAR* phrase_pron_ptr[MAX_NUM_PRONS];
331   LCHAR *dst, *max_dst;
332   const LCHAR *src;
333   size_t nphrase_prons = get_num_prons( phrase_prons, (const LCHAR**)phrase_pron_ptr, MAX_NUM_PRONS);
334   size_t nword_prons = get_num_prons( word_prons, (const LCHAR**)word_pron_ptr, MAX_NUM_PRONS);
335   max_dst = phrase_prons+max_len-3;
336 
337   if( nword_prons == 0)
338     return 0;
339   else if(nphrase_prons == 0) {
340 	for(src=word_prons,dst=phrase_prons; src && *src; ) {
341 		for( ; *src && dst<max_dst; ) {
342 			*dst++ = *src++;
343 		}
344       *dst++ = *src++; // copy the null
345     }
346     *dst = 0; // add a double-null
347 	*len = dst-phrase_prons;
348     return 0;
349   }
350   else if(nphrase_prons == 1 && nword_prons == 1) {
351     for(dst=phrase_prons; *dst; ) dst++;
352     if(joiner!=L('\0')) *dst++ = joiner;
353     for(src=word_prons; *src && dst<max_dst; ) *dst++ = *src++;
354     *dst++ = 0;
355     *dst = 0; // add a double-null
356 	*len = dst-phrase_prons;
357     return 0;
358   }
359   else  {
360     size_t i,j;
361     LCHAR *phrase_pron_dups[MAX_NUM_PRONS];
362     LCHAR *dst_good_end = phrase_prons+1;
363     for(i=0;i<nphrase_prons; i++)
364       phrase_pron_dups[i] = LSTRDUP( phrase_pron_ptr[i]);
365     dst = phrase_prons;
366     for(i=0;i<nphrase_prons; i++) {
367       for(j=0; j<nword_prons; j++) {
368 	for(src=phrase_pron_dups[i]; *src && dst<max_dst; ) *dst++=*src++;
369 	if(dst>max_dst) break;
370 	if(joiner!=L('\0')) *dst++ = joiner;
371 	for(src=word_pron_ptr[j]; *src && dst<max_dst; ) *dst++=*src++;
372 	if(dst>max_dst) break;
373 	*dst++ = 0;
374 	dst_good_end = dst;
375       }
376     }
377     *dst_good_end++ = 0; // double-null terminator
378     for(i=0; i<nphrase_prons; i++) LSTRFREE( phrase_pron_dups[i]);
379     return 0;
380   }
381 }
382 
get_first_word(LCHAR * curr,LCHAR * end)383 PINLINE LCHAR* get_first_word(LCHAR* curr, LCHAR* end)
384 {
385   while(*curr==L('\0') && curr<end) curr++;
386   return curr;
387 }
388 
get_next_word(LCHAR * curr,LCHAR * end)389 PINLINE LCHAR* get_next_word(LCHAR* curr, LCHAR* end)
390 {
391   while(*curr) curr++;
392   if(curr<end)  curr++;
393   while( !*curr && curr<end) curr++;
394   return curr;
395 }
396 
397 /*
398   For each word in a phrase (words separated by spaces)
399 
400   if the complete word is in the dictionary
401   return pron
402   else
403   if the word contains '_', split the word into parts
404   and check if parts are in the dictionary.
405   if none of the parts are in the dictionary,
406   reassemble the parts and pass the whole thing to TTP
407   else
408   build the pron by concat of TTP pron and dictionary pron for individual parts
409 */
SR_VocabularyGetPronunciationImpl(SR_Vocabulary * self,const LCHAR * phrase,LCHAR * pronunciation,size_t * pronunciation_len)410 ESR_ReturnCode SR_VocabularyGetPronunciationImpl(SR_Vocabulary* self, const LCHAR* phrase, LCHAR* pronunciation, size_t* pronunciation_len)
411 {
412   SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
413   /* copy of phrase */
414   LCHAR copy_of_phrase[MAX_PRON_LEN];
415 
416   /* pointer to curr phoneme output */
417   LCHAR* curr_phoneme = pronunciation;
418   // size_t pronunciation_len = *len;
419 
420   ESR_ReturnCode nEsrRes = ESR_SUCCESS;
421   int text_length;
422   size_t len;
423   int nsplits;
424 
425 #ifdef USE_TTP
426   SWIsltsResult      res = SWIsltsSuccess;
427   SWIsltsTranscription  *pTranscriptions = NULL;
428   int nNbrOfTranscriptions = 0;
429 #endif /* USE_TTP */
430   /* full inf pron after conversion */
431   LCHAR infpron[MAX_PRON_LEN];
432   LCHAR* p_infpron;
433   LCHAR* curr;     /* pointer to current word */
434   LCHAR* end = 0;   /* pointer to end of phrase */
435 
436   if(self == NULL || phrase == NULL)
437     {
438       PLogError(L("ESR_INVALID_ARGUMENT"));
439       return ESR_INVALID_ARGUMENT;
440     }
441 
442   if( LSTRLEN(phrase) >= MAX_PRON_LEN)
443 	return ESR_ARGUMENT_OUT_OF_BOUNDS;
444 
445 #if DO_DEFER_LOADING_UNTIL_LOOKUPS
446   if( impl->vocabulary == NULL) {
447     CHKLOG( nEsrRes, sr_vocabularyloadimpl_for_real( impl));
448   }
449 #endif
450 
451   /* by default, check the whole word entry first (regardless of underscores) */
452   if( CA_GetEntryInDictionary(impl->vocabulary, phrase, pronunciation, (int*)&len, MAX_PRON_LEN)) {
453     // len includes the final null, but not the double-null
454     *pronunciation_len = LSTRLEN(pronunciation)+1;
455     // look for double-null terminator
456     while( pronunciation[ (*pronunciation_len)] != L('\0'))
457       *pronunciation_len += LSTRLEN( pronunciation + (*pronunciation_len)) + 1;
458 
459     return ESR_SUCCESS;
460   }
461 
462   /*************************/
463   /* split digit strings */
464   text_length = MAX_PRON_LEN;
465   nEsrRes = run_ttt(phrase, copy_of_phrase, &text_length);
466   if (nEsrRes != ESR_SUCCESS)
467     {
468       PLogError(L("ESR_FATAL_ERROR: run_ttt( ) fails with return code %d\n"), nEsrRes);
469       return nEsrRes;
470     }
471 
472   len = 0;
473   *curr_phoneme = L('\0');
474   if( *pronunciation_len>=12) curr_phoneme[1] = L('\0');
475   else return ESR_INVALID_ARGUMENT;
476 
477   /*************************/
478   /* split into word parts */
479   nsplits = split_on_nonalphanum(copy_of_phrase, &end, impl->locale);
480 
481   /******************************************************/
482   /* if none of the words are found in the dictionary, then
483      reassemble and get the TTP pron for the whole thing */
484   curr=get_first_word(copy_of_phrase,end);
485   /* check if there are any valid characters at all */
486   if(!curr || !*curr)
487     return ESR_INVALID_ARGUMENT;
488   /* now loop over all words in the phrase */
489   for(   ; *curr; curr = get_next_word(curr,end))
490     {
491       LCHAR* squote = NULL;
492       p_infpron = infpron;
493 
494       /* by default, check the whole word entry first (regardless of LSINGLEQUOTE) */
495       if(CA_GetEntryInDictionary(impl->vocabulary, curr, p_infpron, (int*)&len, MAX_PRON_LEN))
496         {
497           /* concatenate, and insert join_char between words */
498           append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
499         }
500       else {
501         p_infpron[0] = 0;
502         /* if this is English AND we're dealing with a quote (possessive or a
503            contraction), then we use the dictionary for the stuff before the
504            quote, and use the TTP to find out what single phoneme should
505            correspond the the thing after the quote ('s' or 't').  This keeps
506            the code clean (no phoneme codes here), and maps 's' to 's' or 'z'
507            with the intelligence of the G2P engine */
508         if( impl->locale == ESR_LOCALE_EN_US) {
509           if( (squote=LSTRCHR(curr,LSINGLEQUOTE))==NULL) {}
510           else {
511             *squote = L('\0');   // temporary
512             if( CA_GetEntryInDictionary(impl->vocabulary, curr, p_infpron, (int*)&len, MAX_PRON_LEN)) {
513             } else
514               p_infpron[0] = 0;
515             *squote = LSINGLEQUOTE; // undo temporary
516           }
517         }
518 #ifdef USE_TTP
519         pTranscriptions = NULL;
520         if (impl->hSlts)
521           {
522             res = SWIsltsG2PGetWordTranscriptions(impl->hSlts, curr, &pTranscriptions, &nNbrOfTranscriptions);
523             if (res != SWIsltsSuccess) {
524               PLogError(L("ESR_FATAL_ERROR: SWIsltsG2PGetWordTranscriptions( ) fails with return code %d\n"), res);
525               return ESR_FATAL_ERROR;
526             }
527             if( impl->locale == ESR_LOCALE_EN_US && p_infpron[0] && squote!=L('\0')) {
528               const LCHAR* lastPhoneme = pTranscriptions[0].pBuffer;
529               while(lastPhoneme && *lastPhoneme && lastPhoneme[1]!=L('\0'))
530                 lastPhoneme++;
531               append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
532               append_to_each_with_joiner( pronunciation, lastPhoneme, L('\0'), MAX_PRON_LEN, &len);
533             } else {
534               /* only one transcription available from seti */
535               p_infpron = pTranscriptions[0].pBuffer;
536               append_to_each_with_joiner( pronunciation, p_infpron, OPTSILENCE_CODE, MAX_PRON_LEN, &len);
537 #if defined(SREC_ENGINE_VERBOSE_LOGGING)
538               PLogError("L: used G2P for %s", curr);
539 #endif
540 
541             }
542             if (pTranscriptions) {
543               res = SWIsltsG2PFreeWordTranscriptions(impl->hSlts, pTranscriptions);
544               pTranscriptions = NULL;
545               if (res != SWIsltsSuccess) {
546                 PLogError(L("ESR_FATAL_ERROR: SWIsltsG2PFreeWordTranscriptions( ) fails with return code %d\n"), res);
547                 return ESR_FATAL_ERROR;
548               }
549             }
550           } else {
551             nEsrRes = ESR_INVALID_ARGUMENT;
552             PLogError(L("ESR_INVALID_ARGUMENT: impl->hSlts was not configured!"));
553             return nEsrRes;
554           }
555 #else /* USE_TTP */
556         nEsrRes = ESR_INVALID_ARGUMENT;
557         PLogError(L("ESR_INVALID_ARGUMENT: need USE_TTP build to guess pronunciations!"));
558         return nEsrRes;
559 #endif
560       } /* multi-word phrase */
561     } /* loop over words in phrase */
562   len = LSTRLEN(pronunciation)+1;
563   // look for double-null terminator
564   while( pronunciation[ len] != L('\0'))
565     len += LSTRLEN( pronunciation + len) + 1;
566   *pronunciation_len = len;
567   nEsrRes = ESR_SUCCESS;
568  CLEANUP:
569   return nEsrRes;
570 }
571 
SR_VocabularyGetLanguageImpl(SR_Vocabulary * self,ESR_Locale * locale)572 ESR_ReturnCode SR_VocabularyGetLanguageImpl(SR_Vocabulary* self, ESR_Locale* locale)
573 {
574   SR_VocabularyImpl* impl = (SR_VocabularyImpl*) self;
575 
576   *locale = impl->locale;
577   return ESR_SUCCESS;
578 }
579 
580 /* simple text normalization rountine for splitting up any digit string */
run_ttt(const LCHAR * input_sentence,LCHAR * output_sentence,int * text_length)581 static ESR_ReturnCode run_ttt(const LCHAR *input_sentence, LCHAR *output_sentence, int *text_length)
582 {
583   ESR_ReturnCode         nRes = ESR_SUCCESS;
584   int                    num_out = 0;
585   int                    max_text_length = *text_length / sizeof(LCHAR) - 1;
586   ESR_BOOL                   bDigit = False;
587 
588   while (*input_sentence != L('\0')) {
589     if (num_out + 2 >= max_text_length) {
590       nRes = ESR_FATAL_ERROR;
591       goto CLEAN_UP;
592     }
593 
594     if (L('0') <= *input_sentence && *input_sentence <= L('9')) {
595       if (num_out > 0 && !LISSPACE(output_sentence[num_out-1]) ) {
596 		  // put 1 space before digits
597         output_sentence[num_out] = L(' ');
598         num_out++;
599 		while( LISSPACE(*input_sentence) ) input_sentence++;
600       }
601       output_sentence[num_out] = *input_sentence;
602       num_out++;
603       bDigit = True;
604     }
605     else {
606       if (bDigit == True && !LISSPACE(output_sentence[num_out-1])) {
607 		// put 1 space after digits
608         output_sentence[num_out] = L(' ');
609         num_out++;
610 		while( LISSPACE(*input_sentence)) input_sentence++;
611       }
612 		output_sentence[num_out] = *input_sentence;
613 		num_out++;
614       bDigit = False;
615     }
616     input_sentence++;
617 	if( LISSPACE(output_sentence[num_out-1]))
618 		while(LISSPACE(*input_sentence )) input_sentence++; // remove repeated spaces
619   }
620 
621   output_sentence[num_out] = L('\0');
622   *text_length = num_out * sizeof(LCHAR);
623   return ESR_SUCCESS;
624 
625  CLEAN_UP:
626 
627   *output_sentence = L('\0');
628   *text_length = 0;
629   return nRes;
630 }
631