• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  run_seq_lts.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 
22 #include <stdlib.h>
23 #include <string.h>
24 #include <math.h>
25 #include <ctype.h>
26 
27 #ifndef NO_STDERR
28 #include <stdio.h>
29 #else
30 extern void PrintError(char *msg, unsigned long p1, unsigned long p2, unsigned long p3);
31 #endif
32 
33 #include "passert.h"
34 #include "pmemory.h"
35 #include "plog.h"
36 #include "phashtable.h"
37 #include "lts_error.h"
38 #include "lts.h"
39 #include "lts_seq_internal.h"
40 #include "port_fileio.h"
41 #include "platform_utils.h" /* strdup, safe_strtok, etc */
42 
43 #define ASSERT(x) passert(x)
44 
45 #ifdef TI_DSP
46 #include "tidsp_defines.h"
47 #endif
48 
49 #ifdef _DEBUG
50 #define PRINT_LOAD_TREE_SUMMARY 0
51 #define PRINT_LOAD_TREE 0
52 #define PRINT_CONS_COMB 0
53 #define PRINT_DP_LETTER 0
54 #define PRINT_LTS_WORD 0
55 #define PRINT_DICT_LOOKUP 0
56 #endif
57 
58 #define LTS_MARKER_WORD_START "WS"
59 #define LTS_MARKER_PRON_START "PS"
60 #define LTS_MARKER_SYLL_START "SS"
61 #define LTS_MARKER_SYLL_START_DD "SS%d"
62 #define LTS_MARKER_PIPESEP "|"
63 #define LTS_MARKER_PIPESEP_CHAR '|'
64 
65 static int load_int(PORT_FILE *fp);
66 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp);
67 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions);
68 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap);
69 static SWIsltsResult free_letter_mapping(LM *lm);
70 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap);
71 static SWIsltsResult free_phone_mapping(PM *pm);
72 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp);
73 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num);
74 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
75                               LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp);
76 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters, LQUESTION **questions, int num_questions, LM *lm);
77 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp);
78 static SWIsltsResult free_allowable_cons_comb(LTS *lts);
79 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp);
80 static SWIsltsResult free_question_strings(LTS* lts);
81 #define find_letter_index( myLet, myLM) (myLM->letter_index_for_letter[ toupper(myLet)])
82 int find_phone(const char *ph, PM *pm);
83 int find_best_string(const char *str, LTS* lts);
84 int find_best_prefix_string(const char *str, LTS* lts);
85 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone);
86 #define in_list(myV, myQ)   (bitarray_read_bit( myQ->membership, myV))
87 #define qmatches(myQ, myU)  (in_list( myU->properties[ myQ->type], myQ))
88 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp) ;
89 int find_output_for_dp(LTS *lts, int *pbackoff_output);
90 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length);
91 int is_allowable_cons_comb(LTS *lts, const char *cons_string);
92 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length);
93 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *num_out);
94 
95 /*------------
96  *
97  * bitarray
98  *
99  *-----------*/
100 
101 #define bitarray_read_bit( biTs, iBiT) ( biTs[iBiT/16] & (1<<((iBiT)%16)) )
102 /* int bitarray_read_bit( unsigned short* bits, int iBit)
103    {  // ASSERT( iBit<256);
104    return bits[iBit/16] & (1<<((iBit)%16));
105    } */
106 
bitarray_write_bit(unsigned short * bits,int iBit,int iVal)107 void bitarray_write_bit( unsigned short* bits, int iBit, int iVal)
108 {
109   unsigned short sect;
110   ASSERT( iBit<256);
111   sect = bits[iBit/16];
112   if(iVal) { sect |= (1<<(iBit%16)); }
113   else { sect &= ~(1<<(iBit%16)); }
114   bits[ iBit/16] = sect;
115 }
bitarray_populate_from_list(unsigned short * bits,char * list,int listlen)116 void bitarray_populate_from_list(unsigned short* bits, char* list, int listlen)
117 {
118   unsigned int i;
119   for(i=0; i<UCHAR_MAX/sizeof(unsigned short)/8; i++)
120     bits[i] = 0;
121   for(i=0; i<(unsigned int)listlen; i++)
122     bitarray_write_bit( bits, list[i], 1);
123 }
124 
125 /*-----------
126  *
127  * PHashTable
128  *
129  *-----------*/
130 
HashCmpWord(const LCHAR * key1,const LCHAR * key2)131 static int HashCmpWord(const LCHAR *key1, const LCHAR *key2)
132 { return strcmp((const char*)key1,(const char*)key2); }
HashGetCode(const void * key)133 static unsigned int HashGetCode(const void *key)
134 {
135   const char* k = (const char*)key;
136   unsigned int i, len, h = 0;
137   len = strlen(k);
138   for (i=0; i<len; i++) h = 31*h + (unsigned int)k[i];
139   return h;
140 }
my_PHashTableCreate_FromStrings(const char * strings[],int num_strings,const LCHAR * hashName)141 void* my_PHashTableCreate_FromStrings( const char* strings[], int num_strings,
142 				       const LCHAR* hashName)
143 {
144   PHashTable* table = NULL;
145   ESR_ReturnCode       rc = ESR_SUCCESS;
146   PHashTableArgs       hashArgs;
147   int i;
148   hashArgs.capacity = 63;
149   hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION;
150   hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION;
151   hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
152   rc = PHashTableCreate( &hashArgs, hashName, &table);
153   for(i=0; i<num_strings; i++) {
154     void* old;
155     /* formerly the code used linear lookup, so let's avoid dups to match up */
156     rc = PHashTableGetValue( table, strings[i], (void**)&old);
157     if(rc != ESR_SUCCESS) {
158       rc = PHashTablePutValue( table, strings[i], (const void *)i, NULL );
159     }
160   }
161   return table;
162 }
163 
164 /*---------
165  *
166  * i/o
167  *
168  *---------*/
169 
load_int(PORT_FILE * fp)170 static int load_int(PORT_FILE *fp)
171 {
172   int v;
173 
174   PORT_FREAD_INT16((uint16 *)&v, sizeof(int), 1, fp);
175 
176   return v;
177 }
178 
load_lquestions(LQUESTION *** pquestions,int * pnum_questions,PORT_FILE * fp)179 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp)
180 {
181   int                  i, num_questions;
182   LQUESTION         ** questions;
183   SWIsltsResult          nRes = SWIsltsSuccess;
184 
185   num_questions = load_int(fp);
186 
187 #if PRINT_LOAD_TREE_SUMMARY
188   pfprintf(PSTDOUT,"loading %d questions\n", num_questions);
189 #endif
190 
191   *pquestions = questions = (LQUESTION**) lts_alloc(num_questions, sizeof(LQUESTION*));
192   if (questions == NULL) {
193     nRes = SWIsltsErrAllocResource;
194     goto CLEAN_UP;
195   }
196 
197   for (i=0;i<num_questions;i++) {
198     questions[i] = (LQUESTION*) lts_alloc(1, sizeof(LQUESTION));
199     if (questions[i] == NULL) {
200       nRes = SWIsltsErrAllocResource;
201       goto CLEAN_UP;
202     }
203 
204 #if PRINT_LOAD_TREE
205     pfprintf(PSTDOUT,"LOAD_TREE: loading question %d\n", i);
206 #endif
207 
208     PORT_FREAD_CHAR(&(questions[i]->type), sizeof(char), 1, fp);
209     PORT_FREAD_CHAR(&(questions[i]->num_list), sizeof(char), 1, fp);
210 
211     questions[i]->list = (unsigned char*) lts_alloc(questions[i]->num_list, sizeof(unsigned char));
212     if (questions[i]->list == NULL) {
213       nRes = SWIsltsErrAllocResource;
214       goto CLEAN_UP;
215     }
216 
217     PORT_FREAD_CHAR(questions[i]->list, sizeof(char), (questions[i]->num_list), fp);
218 
219     bitarray_populate_from_list( questions[i]->membership, (char*) questions[i]->list, questions[i]->num_list);
220   }
221 
222   *pnum_questions = num_questions;
223   return SWIsltsSuccess;
224 
225  CLEAN_UP:
226 
227   free_lquestions(questions, num_questions);
228   *pnum_questions = 0;
229   *pquestions = NULL;
230   return nRes;
231 }
232 
233 /* deallocate questions */
free_lquestions(LQUESTION ** questions,int num_questions)234 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions)
235 {
236   SWIsltsResult          nRes = SWIsltsSuccess;
237   int                  i;
238 
239   if (questions) {
240     for (i=0; i<num_questions; i++) {
241       if (questions[i]->list) {
242         FREE(questions[i]->list);
243         questions[i]->list = NULL;
244       }
245       FREE(questions[i]);
246       questions[i] = NULL;
247     }
248     FREE(questions);
249   }
250   return nRes;
251 }
252 
load_letter_mapping(PORT_FILE * fp,LM ** ppLetterMap)253 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap)
254 {
255   SWIsltsResult          nRes = SWIsltsSuccess;
256   unsigned char        len;
257   LM                 * lm;
258   int                  i;
259 
260   /*  pfprintf(PSTDOUT,"got len %d\n", len);*/
261   lm = (LM*) lts_alloc(1, sizeof(LM));
262   if (lm == NULL) {
263     nRes = SWIsltsErrAllocResource;
264     goto CLEAN_UP;
265   }
266 
267   PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
268   lm->num_letters = len;
269 
270   lm->letters = (char*) lts_alloc(len, sizeof(char));
271   if (lm->letters == NULL) {
272     nRes = SWIsltsErrAllocResource;
273     goto CLEAN_UP;
274   }
275 
276   lm->type = (char*) lts_alloc(len, sizeof(char));
277   if (lm->type == NULL) {
278     nRes = SWIsltsErrAllocResource;
279     goto CLEAN_UP;
280   }
281 
282   PORT_FREAD_CHAR(lm->letters, sizeof(char), len, fp);
283   PORT_FREAD_CHAR(lm->type, sizeof(char), len, fp);
284 
285   {
286     unsigned int letter;
287     for (letter=0; letter <= UCHAR_MAX; letter++)
288       lm->letter_index_for_letter[letter] = LTS_MAXCHAR;
289   }
290 
291   for (i=0;i<len;i++) {
292     char letter = toupper(lm->letters[i]);
293     lm->letters[i] = letter;
294     lm->letter_index_for_letter[(unsigned char)letter] = i;
295   }
296   *ppLetterMap = lm;
297   return SWIsltsSuccess;
298 
299  CLEAN_UP:
300   free_letter_mapping(lm);
301   *ppLetterMap = NULL;
302   return nRes;
303 }
304 
305 /* deallocate letter mapping */
free_letter_mapping(LM * lm)306 static SWIsltsResult free_letter_mapping(LM *lm)
307 {
308   SWIsltsResult          nRes = SWIsltsSuccess;
309 
310   if (lm) {
311     if (lm->letters) {
312       FREE(lm->letters);
313       lm->letters = NULL;
314     }
315     if (lm->type) {
316       FREE(lm->type);
317       lm->type = NULL;
318     }
319     lm->num_letters = 0;
320     FREE(lm);
321   }
322   return nRes;
323 }
324 
load_phone_mapping(PORT_FILE * fp,PM ** ppPhoneMap)325 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap)
326 {
327   SWIsltsResult          nRes = SWIsltsSuccess;
328   PM                 * pm;
329   int                  i;
330   unsigned char        len;
331   char               * ph;
332 
333   pm = (PM*) lts_alloc(1, sizeof(PM));
334   if (pm == NULL) {
335     nRes = SWIsltsErrAllocResource;
336     goto CLEAN_UP;
337   }
338 
339   pm->num_phones = load_int(fp);
340 
341   pm->phones = (char**) lts_alloc(pm->num_phones, sizeof(char*));
342   if (pm->phones == NULL) {
343     nRes = SWIsltsErrAllocResource;
344     goto CLEAN_UP;
345   }
346 
347   for (i=0;i<pm->num_phones;i++) {
348     PORT_FREAD_CHAR(&len, sizeof(unsigned char), 1, fp);
349 
350     pm->phoneH = NULL;
351     pm->phones[i] = ph = (char*) lts_alloc(len+1, sizeof(char));
352     if (ph == NULL) {
353       nRes = SWIsltsErrAllocResource;
354       goto CLEAN_UP;
355     }
356 
357     PORT_FREAD_CHAR(ph, sizeof(char), len, fp);
358     ph[len] = '\0';
359   }
360   pm->phoneH = my_PHashTableCreate_FromStrings( (const char**)pm->phones,
361 						pm->num_phones,
362 						L("lts.phoneH"));
363   if(pm->phoneH == NULL) {
364     nRes = SWIsltsErrAllocResource;
365     goto CLEAN_UP;
366   }
367   *ppPhoneMap = pm;
368   return SWIsltsSuccess;
369 
370  CLEAN_UP:
371   free_phone_mapping(pm);
372   *ppPhoneMap = NULL;
373 
374   return nRes;
375 }
376 
377 /* deallocate phone mapping */
free_phone_mapping(PM * pm)378 static SWIsltsResult free_phone_mapping(PM *pm)
379 {
380   SWIsltsResult          nRes = SWIsltsSuccess;
381   int                  i;
382 
383   if (pm) {
384     if (pm->phones) {
385       for (i=0; i<pm->num_phones; i++) {
386         if (pm->phones[i]) {
387           FREE(pm->phones[i]);
388           pm->phones[i] = NULL;
389         }
390       }
391       FREE(pm->phones);
392       pm->phones = NULL;
393     }
394     if(pm->phoneH)
395       PHashTableDestroy( (PHashTable*)pm->phoneH);
396     pm->phoneH = NULL;
397     FREE(pm);
398   }
399   return nRes;
400 }
401 
402 
load_outputs(char *** poutputs,char *** pinputs,int * pnum,PORT_FILE * fp)403 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp)
404 {
405   SWIsltsResult        nRes = SWIsltsSuccess;
406   int                  i;
407   char              ** outputs = NULL;
408   char              ** inputs = NULL;
409   int                  num;
410   unsigned char        olen;
411   char               * out;
412   unsigned char        ilen;
413   char               * in;
414 
415   num = load_int(fp);
416 
417   *poutputs = outputs = (char **) lts_alloc(num, sizeof(char*));
418   if (outputs == NULL) {
419     nRes = SWIsltsErrAllocResource;
420     goto CLEAN_UP;
421   }
422 
423   *pinputs = inputs = (char **) lts_alloc(num, sizeof(char*));
424   if (inputs == NULL) {
425     nRes = SWIsltsErrAllocResource;
426     goto CLEAN_UP;
427   }
428 
429   for (i=0;i<num;i++) {
430     PORT_FREAD_CHAR(&olen, sizeof(char), 1, fp);
431     out = outputs[i] = lts_alloc(olen + 1, sizeof(char));
432     if (out == NULL) {
433       nRes = SWIsltsErrAllocResource;
434       goto CLEAN_UP;
435     }
436 
437     if (olen > 0) {
438       PORT_FREAD_CHAR(out, sizeof(char), olen, fp);
439     }
440     out[olen] = '\0';
441     PORT_FREAD_CHAR(&ilen, sizeof(char), 1, fp);
442     in = inputs[i] = lts_alloc(ilen + 1, sizeof(char));
443     if (in == NULL) {
444       nRes = SWIsltsErrAllocResource;
445       goto CLEAN_UP;
446     }
447 
448     if (ilen > 0) {
449       PORT_FREAD_CHAR(in, sizeof(char), ilen, fp);
450     }
451     in[ilen] = '\0';
452 #if PRINT_LOAD_TREE
453     if (ilen > 0) pfprintf(PSTDOUT,"LOAD_TREE: got input %s out %s\n", in, outputs[i]);
454     pfprintf(PSTDOUT,"LOAD_TREE: outputs[%d] len %d out %x out %s\n", i, olen, outputs[i], outputs[i]);
455 #endif
456   }
457 
458   *pnum = num;
459   return SWIsltsSuccess;
460 
461  CLEAN_UP:
462 
463   free_outputs(outputs, inputs, num);
464   *poutputs = NULL;
465   *pinputs = NULL;
466   *pnum = 0;
467 
468   return nRes;
469 }
470 
free_outputs(char ** outputs,char ** inputs,int num)471 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num)
472 {
473   SWIsltsResult          nRes = SWIsltsSuccess;
474   int                  i;
475 
476   if (outputs) {
477     for (i=0; i<num; i++) {
478       if (outputs[i]) {
479         FREE(outputs[i]);
480         outputs[i] = NULL;
481       }
482     }
483     FREE(outputs);
484   }
485 
486   if (inputs) {
487     for (i=0; i<num; i++) {
488       if (inputs[i]) {
489         FREE(inputs[i]);
490         inputs[i] = NULL;
491       }
492     }
493     FREE(inputs);
494   }
495   return nRes;
496 }
497 
load_trees(RT_LTREE *** ptrees,int * num_letters,LQUESTION *** pquestions,int * num_questions,LM ** plm,PORT_FILE * fp)498 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
499                       LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp)
500 {
501   SWIsltsResult          nRes = SWIsltsSuccess;
502   int                  let, i;
503   RT_LTREE           * tree = NULL;
504   RT_LTREE          ** trees = NULL;
505 
506 #if PRINT_LOAD_TREE_SUMMARY
507   pfprintf(PSTDOUT,"loading letter mapping\n");
508 #endif
509   *ptrees = NULL;
510   *pquestions = NULL;
511   *plm = NULL;
512 
513   nRes = load_letter_mapping(fp, plm);
514   if (nRes != SWIsltsSuccess) {
515     goto CLEAN_UP;
516   }
517 
518 #if PRINT_LOAD_TREE_SUMMARY
519   pfprintf(PSTDOUT,"loading questions\n");
520 #endif
521 
522   nRes = load_lquestions(pquestions, num_questions, fp);
523   if (nRes != SWIsltsSuccess) {
524     goto CLEAN_UP;
525   }
526 
527   *num_letters = load_int(fp);
528 
529   if (*num_letters != (*plm)->num_letters) {
530 #ifndef NO_STDERR
531     PLogError(L("Error loading data, num_letters %d doesn't match num from mapping %d\n"),
532             *num_letters, (*plm)->num_letters);
533 #endif
534     nRes = SWIsltsInternalErr;
535     goto CLEAN_UP;
536   }
537 
538   *ptrees = trees = (RT_LTREE**) lts_alloc(*num_letters, sizeof(RT_LTREE*));
539   if (trees == NULL) {
540     nRes = SWIsltsErrAllocResource;
541     goto CLEAN_UP;
542   }
543 
544   for (let=0;let<*num_letters;let++) {
545     /*    pfprintf(PSTDOUT,"loading for t %d\n", t);*/
546 
547     trees[let] = tree = (RT_LTREE*) lts_alloc(1, sizeof(RT_LTREE));
548     if (tree == NULL) {
549       nRes = SWIsltsErrAllocResource;
550       goto CLEAN_UP;
551     }
552 
553     tree->num_nodes = load_int(fp);
554 
555     tree->values_or_question1 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
556     if (tree->values_or_question1 == NULL) {
557       nRes = SWIsltsErrAllocResource;
558       goto CLEAN_UP;
559     }
560 
561     tree->question2 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
562     if (tree->question2 == NULL) {
563       nRes = SWIsltsErrAllocResource;
564       goto CLEAN_UP;
565     }
566 
567     tree->left_nodes = (short *) lts_alloc(tree->num_nodes, sizeof(short));
568     if (tree->left_nodes == NULL) {
569       nRes = SWIsltsErrAllocResource;
570       goto CLEAN_UP;
571     }
572 
573 #if PRINT_LOAD_TREE
574     pfprintf(PSTDOUT,"LOAD_TREE: Tree for let %d num_nodes %d\n", let, tree->num_nodes);
575 #endif
576 
577     for (i=0;i<tree->num_nodes;i++) {
578       PORT_FREAD_INT16(&(tree->left_nodes[i]), sizeof(short), 1, fp);
579       PORT_FREAD_INT16(&(tree->values_or_question1[i]), sizeof(short), 1, fp);
580 
581 #if PRINT_LOAD_TREE
582       pfprintf(PSTDOUT,"LOAD_TREE:  node[%d] %d %d", i, tree->left_nodes[i], tree->values_or_question1[i]);
583 #endif
584 
585       PORT_FREAD_INT16(&(tree->question2[i]), sizeof(short), 1, fp);
586       if (tree->left_nodes[i] != NO_NODE) {
587         if (tree->question2[i] == -1) tree->question2[i] = 0;
588 #if PRINT_LOAD_TREE
589         pfprintf(PSTDOUT," %x", (unsigned short) tree->question2[i]);
590 #endif
591       }
592 
593 #if PRINT_LOAD_TREE
594       pfprintf(PSTDOUT,"\n");
595 #endif
596     }
597   }
598 
599   return SWIsltsSuccess;
600 
601  CLEAN_UP:
602 
603   free_trees(trees, *num_letters, *pquestions, *num_questions, *plm);
604   *ptrees = NULL;
605   *pquestions = NULL;
606   *plm = NULL;
607   *num_letters = 0;
608   *num_questions = 0;
609 
610   return nRes;
611 }
612 
613 /* deallocate trees */
free_trees(RT_LTREE ** trees,int num_letters,LQUESTION ** questions,int num_questions,LM * lm)614 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters,
615                        LQUESTION **questions, int num_questions, LM *lm)
616 {
617   SWIsltsResult          nRes = SWIsltsSuccess;
618   int                  i;
619   RT_LTREE           * tree;
620 
621   if (lm) {
622     free_letter_mapping(lm);
623   }
624   if (questions) {
625     free_lquestions(questions, num_questions);
626   }
627 
628   if (trees) {
629     for (i=0; i<num_letters; i++) {
630       if (trees[i]) {
631         tree = trees[i];
632         if (tree->values_or_question1) {
633           FREE(tree->values_or_question1);
634           tree->values_or_question1 = NULL;
635         }
636         if (tree->question2) {
637           FREE(tree->question2);
638           tree->question2 = NULL;
639         }
640         if (tree->left_nodes) {
641           FREE(tree->left_nodes);
642           tree->left_nodes = NULL;
643         }
644         FREE(trees[i]);
645         trees[i] = NULL;
646       }
647     }
648     FREE(trees);
649   }
650   return nRes;
651 }
652 
load_allowable_cons_comb(LTS * lts,PORT_FILE * fp)653 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp)
654 {
655   SWIsltsResult          nRes = SWIsltsSuccess;
656   char                line[50];
657   char                tempstr[50];
658   char              * tok;
659   int                 i, toklen;
660   int                 count;
661   char          seps[] = " 	\n";
662 
663   lts->num_cons_comb = 0;
664   lts->allowable_cons_combH = NULL;
665 
666   while (PORT_FGETS(line, 50, fp)) {
667 
668 #ifndef TI_DSP
669 
670     /*need to get rid of sme crud at the end of the line because it is being read in binary mode*/
671     for (i=strlen(line)-1;i>=0;i--) {
672       if (!isalpha(line[i])) line[i] = ' ';
673     }
674 #endif
675     count = 0;
676     tok = safe_strtok(line, seps, &toklen);
677     tempstr[0] = '\0';
678 
679     /* get all available sequence of tokens */
680     while(tok && toklen > 0){
681       count += toklen;
682       strncat(tempstr, tok, toklen);
683       tempstr[count+1] = '\0';
684       strcat(tempstr, " ");
685       count++;
686 
687       tok = safe_strtok(tok+toklen, seps, &toklen);
688     }
689     if (count > 0) {
690 
691         /* delete the final space */
692         tempstr[count-1] = '\0';
693 
694         lts->allowable_cons_comb[lts->num_cons_comb] = (char*) lts_alloc(strlen(tempstr)+1, sizeof(char));
695         if (lts->allowable_cons_comb[lts->num_cons_comb] == NULL) {
696           nRes = SWIsltsErrAllocResource;
697           goto CLEAN_UP;
698         }
699 
700         strcpy(lts->allowable_cons_comb[lts->num_cons_comb], tempstr);
701 
702 #if PRINT_CONS_COMB
703         pfprintf(PSTDOUT,"LOAD_TREE: allowable_cons_comb[%d]: %s\n", lts->num_cons_comb, tempstr);
704 #endif
705 
706         lts->num_cons_comb++;
707         if (lts->num_cons_comb >= MAX_CONS_COMB) {
708 #ifndef NO_STDERR
709             PLogError(L("MAX_CONS_COMB %d exceeded\n"), MAX_CONS_COMB);
710 #endif
711           nRes = SWIsltsInternalErr;
712           goto CLEAN_UP;
713         }
714     }
715   }
716   if (lts->num_cons_comb == 0) {
717 #ifndef NO_STDERR
718     PLogError(L("Warning: the data file is missing consonant combinations - syllable boundaries will be incorrect\n"));
719 #endif
720   }
721   lts->allowable_cons_combH = my_PHashTableCreate_FromStrings( (const char**)lts->allowable_cons_comb, lts->num_cons_comb, L("lts.allowable_cons_combH"));
722   if(lts->allowable_cons_combH == NULL) {
723     nRes = SWIsltsErrAllocResource;
724     goto CLEAN_UP;
725   }
726 
727 #if PRINT_LOAD_TREE_SUMMARY
728   pfprintf(PSTDOUT,"loaded %d cons combinations\n", lts->num_cons_comb);
729 #endif
730 
731   return SWIsltsSuccess;
732 
733  CLEAN_UP:
734 
735   free_allowable_cons_comb(lts);
736 
737   return nRes;
738 }
739 
free_allowable_cons_comb(LTS * lts)740 static SWIsltsResult free_allowable_cons_comb(LTS *lts)
741 {
742   SWIsltsResult          nRes = SWIsltsSuccess;
743   int                  i;
744 
745   for (i=0; i<lts->num_cons_comb; i++) {
746     if (lts->allowable_cons_comb[i]) {
747       FREE(lts->allowable_cons_comb[i]);
748       lts->allowable_cons_comb[i] = NULL;
749     }
750   }
751   if(lts->allowable_cons_combH)
752     PHashTableDestroy( (PHashTable*)lts->allowable_cons_combH);
753   lts->allowable_cons_combH = NULL;
754   return nRes;
755 }
756 
load_question_strings(LTS * lts,PORT_FILE * fp)757 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp)
758 {
759   SWIsltsResult          nRes = SWIsltsSuccess;
760   int                  i;
761   int                  num;
762   unsigned char        len;
763   char              ** strings;
764   char               * str;
765 
766   num = load_int(fp);
767 
768   lts->strings = strings = (char **) lts_alloc(num, sizeof(char*));
769   lts->string_lens = (char*)lts_alloc(num, sizeof(char));
770 
771   if (strings == NULL || lts->string_lens == NULL ) {
772     nRes = SWIsltsErrAllocResource;
773     goto CLEAN_UP;
774   }
775 
776   for (i=0;i<num;i++) {
777     PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
778 
779     str = strings[i] = lts_alloc(len + 1, sizeof(char));
780     if (str == NULL) {
781       nRes = SWIsltsErrAllocResource;
782       goto CLEAN_UP;
783     }
784 
785     if (len > 0) {
786       PORT_FREAD_CHAR(str, sizeof(char), len, fp);
787     }
788     str[len] = '\0';
789 
790     bitarray_populate_from_list( lts->membership, lts->strings[i], len);
791     lts->string_lens[i] = strlen(lts->strings[i]);
792   }
793 
794   // *pnum = num;
795   lts->num_strings = num;
796 
797   return SWIsltsSuccess;
798 
799  CLEAN_UP:
800 
801   free_question_strings(lts);
802 
803   return nRes;
804 }
805 
806 /* deallocate question strings */
free_question_strings(LTS * lts)807 static SWIsltsResult free_question_strings(LTS* lts)
808 {
809   SWIsltsResult          nRes = SWIsltsSuccess;
810   int                  i;
811 
812   if (lts->strings) {
813     for (i=0;i<lts->num_strings;i++) {
814       if (lts->strings[i]) {
815         FREE(lts->strings[i]);
816         lts->strings[i] = NULL;
817       }
818     }
819     FREE(lts->strings);
820     if(lts->string_lens) FREE(lts->string_lens);
821     lts->strings = NULL;
822     lts->string_lens = NULL;
823   }
824   return nRes;
825 }
826 
827 
create_lts(char * data_filename,LTS_HANDLE * phLts)828 SWIsltsResult create_lts(char *data_filename, LTS_HANDLE *phLts)
829 {
830   SWIsltsResult          nRes = SWIsltsSuccess;
831   LTS                * lts;
832 
833 #ifdef USE_STATIC_SLTS
834   /* TODO: language-specific ID here? */
835   lts = &g_lts;
836 
837 #else /* !USE_STATIC_SLTS */
838 
839   PORT_FILE *fp;
840 
841   lts = (LTS*) lts_alloc(1, sizeof(LTS));
842   if (lts == NULL) {
843     nRes = SWIsltsErrAllocResource;
844     goto CLEAN_UP;
845   }
846 
847   fp = PORT_FOPEN(data_filename, "rb");
848   if (fp == NULL) {
849 #ifndef NO_STDERR
850     PLogError(L("Cannot open %s\n"), data_filename);
851 #endif
852     nRes = SWIsltsFileOpenErr;
853     goto CLEAN_UP;
854   }
855    nRes = load_phone_mapping(fp, &lts->phone_mapping);
856    if (nRes != SWIsltsSuccess) {
857      PLogError(L("SWIsltsErr: load_phone_mapping() failed: Err_code = %d\n"), nRes);
858      goto CLEAN_UP;
859    }
860 
861    nRes = load_question_strings(lts, fp);
862    if (nRes != SWIsltsSuccess) {
863      PLogError(L("SWIsltsErr: load_question_strings() failed: Err_code = %d\n"), nRes);
864      goto CLEAN_UP;
865    }
866 
867    nRes  = load_outputs(&(lts->outputs), &(lts->input_for_output), &lts->num_outputs, fp);
868    if (nRes != SWIsltsSuccess) {
869      PLogError(L("SWIsltsErr: load_outputs() failed: Err_code = %d\n"), nRes);
870      goto CLEAN_UP;
871    }
872 
873 #if PRINT_LOAD_TREE
874   pfprintf(PSTDOUT,"LOAD_TREE: got %d outputs, loading trees\n", lts->num_outputs);
875 #endif
876 
877   nRes = load_trees(&(lts->trees), &(lts->num_letters),
878                  &(lts->questions), &(lts->num_questions),
879                  &(lts->letter_mapping),
880                  fp);
881   if (nRes != SWIsltsSuccess) {
882     PLogError(L("SWIsltsErr: load_trees() failed: Err_code = %d\n"), nRes);
883     goto CLEAN_UP;
884   }
885 
886   nRes = load_allowable_cons_comb(lts, fp);
887   if (nRes != SWIsltsSuccess) {
888     PLogError(L("SWIsltsErr: load_allowable_cons_comb() failed: Err_code = %d\n"), nRes);
889     goto CLEAN_UP;
890   }
891 
892   PORT_FCLOSE(fp);
893 
894 #endif /* !USE_STATIC_SLTS */
895 
896   *phLts = lts;
897   return SWIsltsSuccess;
898 
899  CLEAN_UP:
900 
901   free_lts(lts);
902   *phLts = NULL;
903   return nRes;
904 }
905 
906 /* deallocates LTS */
free_lts(LTS_HANDLE hlts)907 SWIsltsResult free_lts(LTS_HANDLE hlts)
908 {
909   SWIsltsResult          nRes = SWIsltsSuccess;
910   LTS                * lts = (LTS *)hlts;
911 
912   if (lts) {
913 
914 #ifndef USE_STATIC_SLTS
915     free_phone_mapping(lts->phone_mapping);
916     free_question_strings(lts);
917     lts->strings = NULL;
918     lts->phone_mapping = NULL;
919 
920     free_outputs(lts->outputs, lts->input_for_output, lts->num_outputs);
921     lts->input_for_output = lts->outputs = NULL;
922 
923     free_trees(lts->trees, lts->num_letters,
924                lts->questions, lts->num_questions,
925                lts->letter_mapping);
926     lts->trees = NULL;
927     lts->questions = NULL;
928     lts->letter_mapping = NULL;
929 
930     free_allowable_cons_comb(lts);
931     FREE(lts);
932 #endif /* !USE_STATIC_LTS */
933   }
934 
935   return nRes;
936 }
937 
938 
find_phone(const char * ph,PM * pm)939 int find_phone(const char *ph, PM *pm)
940 {
941   ESR_ReturnCode rc;
942   int iRet = -1;
943   rc = PHashTableGetValue((PHashTable*)pm->phoneH, ph, (void**)(void*)&iRet);
944   if (rc != ESR_SUCCESS)
945     PLogError("error while in find_phone(%s,%x)\n", ph, pm);
946   return iRet;
947 }
948 
find_best_string(const char * str,LTS * lts)949 int find_best_string(const char *str, LTS* lts)
950 {
951   int i, maxlen, maxi, len;
952   int len_str;
953 
954   if(str[0] == '\0')   return -1;
955   len_str = strlen(str);
956 
957   maxi = -1;
958   maxlen = 0;
959 
960   for (i=0;i<lts->num_strings;i++) {
961     len = lts->string_lens[i];
962     if( len > len_str)
963       continue; /* no point in comparison */
964     if (strncmp(str, lts->strings[i], len) == 0) {
965       if (len > maxlen) {
966 	maxlen = len;
967         maxi = i;
968       }
969     }
970   }
971   return maxi;
972 }
973 
find_best_prefix_string(const char * str,LTS * lts)974 int find_best_prefix_string(const char *str, LTS* lts)
975 {
976   int i;
977   int maxlen;
978   int maxi;
979   int len;
980   int prelen;
981 
982   maxi = -1;
983   maxlen = 0;
984 
985   prelen = strlen(str);
986 
987   for (i=0;i<lts->num_strings;i++) {
988     len = lts->string_lens[i];
989     if (len <= prelen) {
990       if (strncmp(str + (prelen - len), lts->strings[i], len) == 0) {
991         if (len > maxlen) {
992           maxlen = len;
993           maxi = i;
994         }
995       }
996     }
997   }
998   return maxi;
999 }
1000 
fill_up_dp_for_letter(LTS * lts,const char * input_word,int word_len,int index,int root_start,int root_end,int left_phone)1001 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone)
1002 {
1003   int i,j;
1004   LDP *dp;
1005   unsigned char letter;
1006   int hit_wb;
1007   LM *lm;
1008   unsigned char word[MAX_WORD_LEN];
1009   char tempstr[MAX_WORD_LEN];
1010   int first_syl_end;
1011   int last_syl_start;
1012 
1013   dp = &(lts->dp);
1014   lm = lts->letter_mapping;
1015 
1016   /* the LTS decision tree does not seem to be well trained at all for
1017      the letter ' when followed by "s"  ... It seems to result in the
1018 	 phoneme 'm', which is wrong.   "'t" seems to be OK though.
1019 	 BAD: Kevin's : k6v6nmz ...  pal's : palmz ... paul's : p{lz
1020 	 BAD: janice's : jan6s6mz ... tom's house : t)mmz&h?s ... tonya's : t)ny6mz
1021 	 BAD: jake's house : jAk6mz&h?s
1022 	 Ignoring ' as below we get ...
1023      BETTER: Kevin's : kev6nz  ... pal's : palz ... paul's : p{lz
1024 	 BETTER: janice's : jan6s6s ... tom's house : t)mz&h?s ... tonya's : t)ny6s
1025 	 BETTER: jake's house : jAk6s&h?s
1026 	 The proper solution requires a legitimate text normalizer with special
1027 	 handling of cases like 's which would always put a "z" there,
1028 	 except if preceded by an unvoiced stop (ptk) which requires a "s" there.
1029 	 For now let's just skip the ' letter, which testing shows to be generally
1030 	 safe (janice's, jake's etc are better but still not quite right). */
1031 
1032   if(input_word[index] == '\'')
1033     return 1; // same as unknown character
1034 
1035   letter = find_letter_index(input_word[index], lm);
1036 
1037   if (letter == LTS_MAXCHAR) {
1038   /* lisa - we need to decide how to handle this case.  Do we just silently skip unknown
1039     characters or warn the app or user somehow*/
1040 #ifdef NO_STDERR
1041     PrintError("unknown character on input %c - skipping\n", input_word[index], NULL, NULL);
1042 #else
1043     PLogError(L("unknown character on input %c - skipping\n"), input_word[index]);
1044 #endif
1045     return 1;
1046   }
1047 
1048   hit_wb = 0;
1049 
1050   /*pfprintf(PSTDOUT,"left context\n");*/
1051 
1052   for (j=0;j<5;j++) {
1053     if (hit_wb) {
1054       dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1055     } else {
1056       i = index - (j+1);
1057       if (i < 0) dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1058       else {
1059         dp->properties[ Left1+j] = find_letter_index(input_word[i], lm);
1060         if (dp->properties[ Left1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
1061           dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1062           hit_wb = 1;
1063         }
1064       }
1065     }
1066   }
1067 
1068   /*pfprintf(PSTDOUT,"right context\n");*/
1069 
1070   hit_wb = 0;
1071   for (j=0;j<5;j++) {
1072     if (hit_wb) {
1073       dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1074     } else {
1075       i = index + (j+1);
1076       if (i >= word_len) dp->properties[Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1077       else {
1078         dp->properties[ Right1+j] = find_letter_index(input_word[i], lm);
1079         if (dp->properties[ Right1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
1080           dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1081           hit_wb = 1;
1082         }
1083       }
1084     }
1085   }
1086 
1087   dp->letter = letter; // properties[ Letter] = letter;
1088 
1089   dp->properties[ LeftPhone1] = left_phone;
1090 
1091   /*pfprintf(PSTDOUT,"word stuff\n"); */
1092 
1093   /*find word start and end - use unknown character as word boundaries*/
1094 
1095   dp->properties[ WordLen] = word_len;
1096 
1097   if (index == 0) dp->properties[ LetInWord] = 0;
1098   else if (index == word_len-1) dp->properties[ LetInWord] = 2;
1099   else dp->properties[ LetInWord] = 1;
1100 
1101   for (i=0;i<word_len;i++) {
1102     word[i] = find_letter_index(input_word[i], lm);
1103   }
1104 
1105   /*figure out syllable in word - not really syllables - just looks to see if is or at first or last vowel*/
1106   /*  pfprintf(PSTDOUT,"syl stuff\n");*/
1107 
1108   first_syl_end = word_len;
1109   for (i=0;i<word_len;i++) {
1110     if (lm->type[word[i]] == 1) {
1111       for (j=i+1;j<word_len;j++) {
1112         if (lm->type[word[j]] != 1) break;
1113       }
1114       first_syl_end = j;
1115       break;
1116     }
1117   }
1118   last_syl_start = 0;
1119   for (i=word_len-1;i>=0;i--) {
1120     if (lm->type[word[i]] == 1) {
1121       for (j=i-1;j>=0;j--) {
1122         if (lm->type[word[j]] != 1) break;
1123       }
1124       last_syl_start = j;
1125       break;
1126     }
1127   }
1128 
1129 #if PRINT_DP_LETTER
1130   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1131 #endif
1132 
1133   if (index > last_syl_start) dp->properties[ SylInWord] = 2;
1134   else if (index < first_syl_end) dp->properties[ SylInWord] = 0;
1135   else dp->properties[ SylInWord] = 1;
1136 
1137   first_syl_end = word_len;
1138   for (i=0;i<word_len;i++) {
1139     if (lm->type[word[i]] == 1) {
1140       for (j=i+1;j<word_len;j++) {
1141         if (lm->type[word[j]] != 1) break;
1142       }
1143       for (;j<word_len;j++) {
1144         if (lm->type[word[j]] == 1) break;
1145       }
1146       first_syl_end = j;
1147       break;
1148     }
1149   }
1150   last_syl_start = 0;
1151   for (i=word_len-1;i>=0;i--) {
1152     if (lm->type[word[i]] == 1) {
1153       for (j=i-1;j>=0;j--) {
1154         if (lm->type[word[j]] != 1) break;
1155       }
1156       for (;j>=0;j--) {
1157         if (lm->type[word[j]] == 1) break;
1158       }
1159       last_syl_start = j;
1160       break;
1161     }
1162   }
1163 
1164 #if PRINT_DP_LETTER
1165   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1166 #endif
1167 
1168   if (index > last_syl_start) dp->properties[ Syl2InWord] = 2;
1169   else if (index  < first_syl_end) dp->properties[ Syl2InWord] = 0;
1170   else dp->properties[Syl2InWord] = 1;
1171 
1172 
1173   first_syl_end = word_len;
1174   for (i=root_start;i<root_end;i++) {
1175     if (lm->type[word[i]] == 1) {
1176       for (j=i+1;j<word_len;j++) {
1177         if (lm->type[word[j]] != 1) break;
1178       }
1179       first_syl_end = j;
1180       break;
1181     }
1182   }
1183   last_syl_start = 0;
1184   for (i=root_end-1;i>=root_start;i--) {
1185     if (lm->type[word[i]] == 1) {
1186       for (j=i-1;j>=0;j--) {
1187         if (lm->type[word[j]] != 1) break;
1188       }
1189       last_syl_start = j;
1190       break;
1191     }
1192   }
1193 
1194 #if PRINT_DP_LETTER
1195   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1196 #endif
1197 
1198   if (index > last_syl_start) dp->properties[SylInRoot] = 2;
1199   else if (index < first_syl_end) dp->properties[ SylInRoot] = 0;
1200   else dp->properties[ SylInRoot] = 1;
1201 
1202   first_syl_end = word_len;
1203   for (i=root_start;i<root_end;i++) {
1204     if (lm->type[word[i]] == 1) {
1205       for (j=i+1;j<word_len;j++) {
1206         if (lm->type[word[j]] != 1) break;
1207       }
1208       for (;j<word_len;j++) {
1209         if (lm->type[word[j]] == 1) break;
1210       }
1211       first_syl_end = j;
1212       break;
1213     }
1214   }
1215   last_syl_start = 0;
1216   for (i=root_end-1;i>=root_start;i--) {
1217     if (lm->type[word[i]] == 1) {
1218       for (j=i-1;j>=0;j--) {
1219         if (lm->type[word[j]] != 1) break;
1220       }
1221       for (;j>=0;j--) {
1222         if (lm->type[word[j]] == 1) break;
1223       }
1224       last_syl_start = j;
1225       break;
1226     }
1227   }
1228 
1229 #if PRINT_DP_LETTER
1230   pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1231 #endif
1232 
1233   if (index > last_syl_start) dp->properties[Syl2InRoot] = 2;
1234   else if (index  < first_syl_end) dp->properties[Syl2InRoot] = 0;
1235   else dp->properties[Syl2InRoot] = 1;
1236 
1237 
1238   dp->properties[Left_DFRE] = index - root_start;
1239   dp->properties[Right_DFRE] = (root_end - index) - 1;
1240 
1241 
1242   /*  pfprintf(PSTDOUT,"strings\n");*/
1243 #if PRINT_DP_LETTER
1244   pfprintf(PSTDOUT,"input word %s num_strings %d\n", input_word, lts->num_strings);
1245 #endif
1246 
1247   dp->properties[RightString] = find_best_string(input_word+index+1, lts);
1248   strcpy(tempstr, input_word);
1249   tempstr[index] = '\0';
1250 
1251   dp->properties[LeftString] = find_best_prefix_string(tempstr, lts);
1252 
1253 #if PRINT_DP_LETTER
1254   pfprintf(PSTDOUT,"dp %c ", lm->letters[dp->letter]);
1255 
1256   for (i=0;i<word_len;i++) {
1257     pfprintf(PSTDOUT,"%c", lm->letters[word[i]]);
1258   }
1259   pfprintf(PSTDOUT," %c%c%c {%c} %c%c%c liw %d siw %d s2iw %d nw %d sir %d s2ir %d left_DFRE %d right_DFRE %d\n",
1260          lm->letters[dp->left_context[2]],
1261          lm->letters[dp->left_context[1]],
1262          lm->letters[dp->left_context[0]],
1263          lm->letters[dp->letter],
1264          lm->letters[dp->right_context[0]],
1265          lm->letters[dp->right_context[1]],
1266          lm->letters[dp->right_context[2]],
1267          dp->let_in_word,
1268          dp->syl_in_word,
1269          dp->syl2_in_word,
1270          dp->word_len,
1271          dp->syl_in_root,
1272          dp->syl2_in_root,
1273          dp->left_DFRE, dp->right_DFRE);
1274 #endif
1275 
1276   return 0;
1277 }
1278 
matches(LQUESTION * q1,LQUESTION * q2,int type,LDP * dp)1279 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp)
1280 {
1281   int m1, m2;
1282   switch(type) {
1283   case 0:
1284     return qmatches(q1, dp);
1285   case 1:
1286     m1 = qmatches(q1, dp);
1287     m2 = qmatches(q2, dp);
1288     return(m1 && m2);
1289   case 2:
1290     m1 = qmatches(q1, dp);
1291     m2 = qmatches(q2, dp);
1292     return(m1 && !m2);
1293   case 3:
1294     m1 = qmatches(q1, dp);
1295     m2 = qmatches(q2, dp);
1296     return(!m1 && m2);
1297   case 4:
1298     m1 = qmatches(q1, dp);
1299     m2 = qmatches(q2, dp);
1300     return(!m1 && !m2);
1301   default:
1302     return -1;
1303   }
1304   /* should not come here */
1305   return -1;
1306 }
1307 
find_output_for_dp(LTS * lts,int * pbackoff_output)1308 int find_output_for_dp(LTS *lts, int *pbackoff_output)
1309 {
1310   LDP *dp;
1311   int index;
1312   RT_LTREE *tree;
1313   LQUESTION *q1;
1314   LQUESTION *q2;
1315   int comb_type;
1316   int q2_index;
1317   int left_index;
1318 
1319   dp = &(lts->dp);
1320   tree = lts->trees[dp->letter]; // properties[Letter]];
1321 
1322   index = 0;
1323 
1324   while (1) {
1325     left_index = tree->left_nodes[index];
1326 
1327     if (left_index == NO_NODE) { /*means its a leaf node*/
1328       *pbackoff_output = tree->question2[index];
1329       return tree->values_or_question1[index];
1330     }
1331     q1 = lts->questions[tree->values_or_question1[index]];
1332     q2_index = tree->question2[index] & 0x1FFF;
1333     comb_type = (tree->question2[index] & 0xE000) >> 13;
1334 
1335     q2 = lts->questions[q2_index];
1336 
1337     if (matches(q1, q2, comb_type, dp)) {
1338       index = left_index;
1339     } else {
1340       index = left_index+1;
1341     }
1342   }
1343 }
add_output(char * output,char ** output_phone_string,int out_len,int max_phone_length)1344 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length)
1345 {
1346   char *tok;
1347   int toklen;
1348   char seps[] = " ";
1349 
1350   if (strlen(output) == 0) return out_len;
1351 
1352   tok = safe_strtok(output, seps, &toklen);
1353   while (tok && toklen) {
1354     if ((toklen > 0) && (strncmp(tok, "null", 4) != 0)) {
1355 
1356       if (isdigit(tok[toklen-1])) {
1357         /*means it's a vowel.  So, add a syllable boundary.  It's position
1358           gets adjusted later by adjust_syllable_boundaries()*/
1359         strcpy(output_phone_string[out_len++], LTS_MARKER_SYLL_START);
1360         if (out_len >= max_phone_length) return max_phone_length;
1361       }
1362       strncpy(output_phone_string[out_len], tok, toklen);
1363       output_phone_string[out_len++][toklen] = '\0';
1364       if (out_len >= max_phone_length) return max_phone_length;
1365     }
1366     tok = safe_strtok(tok+toklen, seps, &toklen);
1367   }
1368   return out_len;
1369 }
1370 
is_allowable_cons_comb(LTS * lts,const char * cons_string)1371 int is_allowable_cons_comb(LTS *lts, const char *cons_string)
1372 {
1373   /* int i;
1374      for (i=0;i<lts->num_cons_comb;i++) {
1375      #if PRINT_CONS_COMB
1376      pfprintf(PSTDOUT,"checking {%s} vs c[%d] {%s}\n", cons_string, i, lts->allowable_cons_comb[i]);
1377      #endif
1378      if (strcmp(cons_string, lts->allowable_cons_comb[i]) == 0) return 1;
1379      }
1380      return 0;
1381   */
1382   ESR_ReturnCode rc;
1383   void* iVal = NULL;
1384   rc = PHashTableGetValue( (PHashTable*)lts->allowable_cons_combH, cons_string, &iVal);
1385   if(rc == ESR_SUCCESS)
1386     return 1;
1387   else
1388     return 0;
1389 }
1390 
1391 
1392 
1393 
1394 
adjust_syllable_boundaries(LTS * lts,char ** output_phone_string,int num_out,int max_phone_length)1395 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length)
1396 {
1397   char *out;
1398   int i,j;
1399   int syl_start;
1400   int stress = 0;
1401   int first_syl_bound;
1402 
1403   char tempstr[20];
1404 
1405   /*there should already be a syllable boundary before each vowel (add_output put one there)*/
1406   /*so just find these, then shift back by allowable consonant combinations and move the syllable mark*/
1407 
1408   for (i=0;i<num_out;i++) {
1409     out = output_phone_string[i];
1410     if (strcmp(out, LTS_MARKER_SYLL_START) == 0) { /*means there is a syllable boundary
1411       														 find start of allowable sequence*/
1412 
1413       syl_start = 0;
1414 
1415       for (j=i-1;j>0;j--) {
1416         out = output_phone_string[j];
1417         if (isdigit(out[strlen(out)-1])) {
1418           syl_start = j+1;
1419           break; /*means it's a vowel*/
1420         }
1421         if (strcmp(out, LTS_MARKER_WORD_START) == 0) {
1422           syl_start = j+1;
1423           break; /*don't push syl boundaries before word boundaries*/
1424         }
1425         if (strcmp(out, LTS_MARKER_PRON_START) == 0) {
1426           syl_start = j+1;
1427           break; /*don't push syl boundaries before phrase boundaries*/
1428         }
1429 
1430         /* for sequences longer than 2,
1431            check 3-syllable onset first, then check 2-syllable onset */
1432         if(j > 1){
1433           sprintf(tempstr, "%s %s %s", output_phone_string[j-2], output_phone_string[j-1],
1434             output_phone_string[j]);
1435           if (!is_allowable_cons_comb(lts, tempstr)) {
1436             sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
1437             if (!is_allowable_cons_comb(lts, tempstr)) {
1438 #if PRINT_CONS_COMB
1439               pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
1440                 output_phone_string[j]);
1441 #endif
1442               syl_start = j;
1443               break;
1444             }
1445           }
1446         }
1447         /* for sequences shorter than 2 */
1448         else
1449         {
1450           sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
1451           if (!is_allowable_cons_comb(lts, tempstr)) {
1452 #if PRINT_CONS_COMB
1453             pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
1454               output_phone_string[j]);
1455 #endif
1456             syl_start = j;
1457             break;
1458           }
1459         }
1460       } /* end for j=i-1 */
1461 
1462       /*shift over stuff between syl_start a gap*/
1463       for (j=i;j>syl_start;j--) {
1464         strcpy(output_phone_string[j], output_phone_string[j-1]);
1465       }
1466       /*now find stress level from phone (and remove it) and add it to syl bound*/
1467 
1468       if (i<num_out-1) {
1469         out = output_phone_string[i+1];
1470 
1471         if (isdigit(out[strlen(out)-1])) {
1472           stress = atoi(out + strlen(out)-1);
1473         } else {
1474           stress = 0; /*should not happen*/
1475         }
1476       } else {
1477         stress = 0; /*should not happen*/
1478       }
1479 
1480       sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
1481     } /* end if (strcmp(out, LTS_MARKER_SYLL_START) == 0) */
1482   } /* end for i=0 */
1483 
1484   /*remove all the stress marking from the vowels*/
1485   for (i=0;i<num_out;i++) {
1486     out = output_phone_string[i];
1487     if ((strncmp(out, LTS_MARKER_SYLL_START, 2) != 0) && isdigit(out[strlen(out)-1])) {
1488       out[strlen(out)-1] = '\0'; /*remove the stress from the vowel*/
1489     }
1490   }
1491 
1492   /* word boundary must be followed by syllable boundary
1493     if no syllable boundary exists after a word boundary, move the first
1494     syllable boundary to after the word boundary */
1495   first_syl_bound = -1;
1496   syl_start = -1;
1497   for (i=1;i<num_out;i++) {
1498     if ((strcmp(output_phone_string[i-1], LTS_MARKER_WORD_START) == 0) &&
1499       (strncmp(output_phone_string[i], LTS_MARKER_SYLL_START, 2) != 0)) {
1500 
1501       syl_start = i;
1502       /* search for first occurance of syllable boundary */
1503       for(j=syl_start+1;j<num_out; j++){
1504         out = output_phone_string[j];
1505         if(strncmp(out, LTS_MARKER_SYLL_START, 2) == 0 && isdigit(out[strlen(out)-1])){
1506             stress = atoi(out + strlen(out)-1);
1507             first_syl_bound = j;
1508             break;
1509         }
1510       }
1511 
1512       /* swap entries until syl bound reaches word bound */
1513       if(first_syl_bound >= 0){
1514         for(; j>syl_start; j--){
1515           strcpy(output_phone_string[j], output_phone_string[j-1]);
1516         }
1517         /* put syllable boundary after word boundary */
1518         sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
1519 
1520         /* advance i, reset variables */
1521         i = first_syl_bound;
1522         first_syl_bound = syl_start = -1;
1523 
1524       }
1525     }
1526   }
1527 
1528 }
1529 
1530 
lts_for_word(LTS * lts,char * word,int word_len,char ** output_phone_string,int max_phone_length,int * pnum_out)1531 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *pnum_out)
1532 {
1533   SWIsltsResult          nRes = SWIsltsSuccess;
1534   int                  i,j;
1535   int                  root_start;
1536   int                  root_end;
1537   int                  output_index;
1538   int                  left_phone;
1539   char               * input_seq;
1540   int                  found_match;
1541   int                  start_num_out;
1542   int                  backoff_output;
1543   int                  num_out;
1544 
1545   start_num_out = num_out = *pnum_out;
1546 
1547   root_start = 0;
1548   root_end = word_len;
1549 
1550   for (i=0;i<word_len;i++) {
1551 
1552     if ((i == 0) || (num_out == 0)) {
1553       /*      pfprintf(PSTDOUT,"about to call find_phone1\n");*/
1554       left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1555 
1556 #if PRINT_LTS_WORD
1557       pfprintf(PSTDOUT,"got phone %d for initial | (LTS_MARKER_PIPESEP)\n", left_phone);
1558 #endif
1559       if (left_phone < 0) {
1560 
1561 #ifdef NO_STDERR
1562         PrintError("Error, cannot find | in phone mappings\n", NULL, NULL, NULL);
1563 #else
1564         PLogError(L("Error, cannot find | in phone mappings\n"));
1565 #endif
1566         nRes = SWIsltsInternalErr;
1567         goto CLEAN_UP;
1568       }
1569     } else {
1570 
1571 #if PRINT_LTS_WORD
1572       pfprintf(PSTDOUT,"about to call find_phone2 num_out %d\n", num_out);
1573       pfprintf(PSTDOUT,"out[%d] %s\n", num_out-1, output_phone_string[num_out-1]);
1574 #endif
1575 
1576       if (strcmp(output_phone_string[num_out-1], LTS_MARKER_PRON_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1577       else if (strcmp(output_phone_string[num_out-1], LTS_MARKER_WORD_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1578       else left_phone = find_phone(output_phone_string[num_out-1], lts->phone_mapping);
1579 
1580 #if PRINT_LTS_WORD
1581       pfprintf(PSTDOUT,"got phone %d for %s\n", left_phone, output_phone_string[num_out-1]);
1582 #endif
1583 
1584       if (left_phone < 0) {
1585 
1586 #ifdef NO_STDERR
1587         PrintError("Error, cannot find %s in phone mappings\n", (unsigned long)output_phone_string[num_out-1], NULL, NULL);
1588 #else
1589         PLogError(L("Error, cannot find %s in phone mappings\n"), output_phone_string[num_out-1]);
1590 #endif
1591         nRes = SWIsltsInternalErr;
1592         goto CLEAN_UP;
1593       }
1594     }
1595 
1596     /*    pfprintf(PSTDOUT,"calling fill up dp\n");*/
1597     if (fill_up_dp_for_letter(lts, word, word_len, i, root_start, root_end, left_phone)) continue;
1598 
1599     /*    pfprintf(PSTDOUT,"calling find output\n");*/
1600     output_index = find_output_for_dp(lts, &backoff_output);
1601 
1602 #if PRINT_LTS_WORD
1603     pfprintf(PSTDOUT,"got output %d\n", output_index);
1604 #endif
1605 
1606     found_match = 1;
1607 
1608     if (strlen(lts->input_for_output[output_index]) > 0) {
1609         /*some extra input string to use up*/
1610 #if PRINT_LTS_WORD
1611       pfprintf(PSTDOUT,"GOT INPUT %s for %s letter %c\n", lts->input_for_output[output_index], word, word[i]);
1612 #endif
1613 
1614       input_seq = lts->input_for_output[output_index];
1615       if (input_seq[0] == '=') {
1616         root_end = i;
1617         input_seq = input_seq+1; /*skip suffix indicator*/
1618       }
1619       for (j=i+1;;j++) {
1620         if (input_seq[j-(i+1)] == '\0') break;
1621         if (input_seq[j-(i+1)] == '-') {
1622           root_start = j;
1623           break;
1624         }
1625         if (j >= word_len) {
1626           found_match = 0;
1627           break;
1628         }
1629 
1630         if (input_seq[j-(i+1)] != word[j]) {
1631           found_match = 0;
1632           break;
1633         }
1634       }
1635       if (found_match) {
1636         i = j-1;
1637       }
1638     }
1639 
1640     if (!found_match) {
1641 #if PRINT_LTS_WORD
1642       pfprintf(PSTDOUT,"using backoff output %s instead of regular %s\n",
1643                lts->outputs[backoff_output],
1644                ts->outputs[output_index]);
1645 #endif
1646 
1647       num_out = add_output(lts->outputs[backoff_output], output_phone_string, num_out, max_phone_length);
1648     }
1649     else {
1650       num_out = add_output(lts->outputs[output_index], output_phone_string, num_out, max_phone_length);
1651     }
1652     if (num_out >= max_phone_length) {
1653       nRes = SWIsltsMaxInputExceeded;
1654       goto CLEAN_UP;
1655     }
1656   }
1657 
1658   *pnum_out = num_out;
1659   return SWIsltsSuccess;
1660 
1661  CLEAN_UP:
1662 
1663   *pnum_out = 0;
1664   return nRes;
1665 }
1666 
1667 
1668 
run_lts(LTS_HANDLE h,FSM_DICT_HANDLE hdict,char * input_sentence,char ** output_phone_string,int * phone_length)1669 SWIsltsResult run_lts(LTS_HANDLE h, FSM_DICT_HANDLE hdict, char *input_sentence, char **output_phone_string, int *phone_length)
1670 {
1671   SWIsltsResult            nRes = SWIsltsSuccess;
1672   int                    i;
1673   int                    len;
1674   int                    num_out = 0;
1675   LTS                  * lts;
1676   int                    was_in_phrase;
1677   char                   word[MAX_WORD_LEN];
1678   int                    num_in_word;
1679   int                    max_phone_length;
1680   int                    pron_len;
1681 
1682   max_phone_length = *phone_length;
1683 
1684   len = strlen(input_sentence);
1685 
1686   lts = (LTS*) h;
1687 
1688   was_in_phrase = 0;
1689 
1690   /*add a phrase start then word start at beginning*/
1691 
1692   strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
1693   if (num_out >= max_phone_length) {
1694     nRes = SWIsltsMaxInputExceeded;
1695     goto CLEAN_UP;
1696   }
1697 
1698   num_in_word = 0;
1699   pron_len = 1;    // for the first time through
1700 
1701   for (i=0;i<=len;i++) {
1702 
1703 #if PRINT_LTS_WORD
1704     pfprintf(PSTDOUT,"WORKING on letter %d %c\n", i, input_sentence[i]);
1705 #endif
1706 
1707     /* Treat hyphen as word delimiter.  Not quite right for German
1708        hyphenated compounds, but still an improvement. */
1709     if ((input_sentence[i] == ' ') || (input_sentence[i] == '-') || (input_sentence[i] == '\t') || (i == len)) {
1710       if (num_in_word>0 ) {
1711         strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
1712         if (num_out >= max_phone_length) {
1713           nRes = SWIsltsMaxInputExceeded;
1714           goto CLEAN_UP;
1715         }
1716 
1717         word[num_in_word] = '\0';
1718 
1719         if (1) {
1720 
1721 #if PRINT_DICT_LOOKUP
1722           pfprintf(PSTDOUT,"Did not find %s in dictionary\n", word);
1723 #endif
1724 		  pron_len = -num_out;
1725           nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
1726 		  pron_len += num_out; // now pron_len is the number of phonemes/markers added
1727 		  if(pron_len == 0)
1728 			  num_out--; // to backspace on the LTS_MARKER_WORD_START !!
1729           if (nRes != SWIsltsSuccess) {
1730             goto CLEAN_UP;
1731           }
1732         }
1733         num_in_word = 0;
1734       }
1735     }
1736     else if ( (input_sentence[i] == '.')
1737                 || (input_sentence[i] == ',')
1738                 || (input_sentence[i] == '!')
1739                 || (input_sentence[i] == '?')
1740                 || (input_sentence[i] == '\n')) {
1741       if (was_in_phrase) {
1742         /*add a phrase boundary after lts is called*/
1743         if (num_in_word > 0) {
1744           strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
1745           if (num_out >= max_phone_length) {
1746             nRes = SWIsltsMaxInputExceeded;
1747             goto CLEAN_UP;
1748           }
1749 
1750           word[num_in_word] = '\0';
1751 
1752           if (1) {
1753             nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
1754             if (nRes != SWIsltsSuccess) {
1755               goto CLEAN_UP;
1756             }
1757           }
1758           num_in_word = 0;
1759         }
1760         strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
1761         if (num_out >= max_phone_length) {
1762           nRes = SWIsltsMaxInputExceeded;
1763           goto CLEAN_UP;
1764         }
1765         was_in_phrase = 0;
1766       }
1767     }
1768     else {
1769       if (num_in_word < MAX_WORD_LEN-1) {
1770         word[num_in_word++] = toupper(input_sentence[i]);
1771         was_in_phrase = 1;
1772       }
1773     }
1774   }
1775   /*adjust syllable boundaries*/
1776   adjust_syllable_boundaries(lts, output_phone_string, num_out, max_phone_length);
1777 
1778   *phone_length = num_out;
1779   return SWIsltsSuccess;
1780 
1781  CLEAN_UP:
1782 
1783   *phone_length = 0;
1784   return nRes;
1785 }
1786 
1787 #ifdef USE_STATIC_SLTS
lts_alloc(int num,int size)1788 void *lts_alloc(int num, int size)
1789 {
1790 #ifdef NO_STDERR
1791     PrintError("USE_STATIC_SLTS: lts_alloc should not be called", NULL, NULL, NULL);
1792 #else
1793     PLogError(L("USE_STATIC_SLTS: lts_alloc should not be called"));
1794 #endif
1795   return NULL;
1796 }
1797 #else
1798 
lts_alloc(int num,int size)1799 void *lts_alloc(int num, int size)
1800 {
1801   void *p;
1802   p = CALLOC(num, size, MTAG);
1803   return p;
1804 }
1805 #endif /* USE_STATIC_SLTS */
1806