1 /*---------------------------------------------------------------------------*
2 * run_seq_lts.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20
21
22 #include <stdlib.h>
23 #include <string.h>
24 #include <math.h>
25 #include <ctype.h>
26
27 #ifndef NO_STDERR
28 #include <stdio.h>
29 #else
30 extern void PrintError(char *msg, unsigned long p1, unsigned long p2, unsigned long p3);
31 #endif
32
33 #include "passert.h"
34 #include "pmemory.h"
35 #include "plog.h"
36 #include "phashtable.h"
37 #include "lts_error.h"
38 #include "lts.h"
39 #include "lts_seq_internal.h"
40 #include "port_fileio.h"
41 #include "platform_utils.h" /* strdup, safe_strtok, etc */
42
43 #define ASSERT(x) passert(x)
44
45 #ifdef TI_DSP
46 #include "tidsp_defines.h"
47 #endif
48
49 #ifdef _DEBUG
50 #define PRINT_LOAD_TREE_SUMMARY 0
51 #define PRINT_LOAD_TREE 0
52 #define PRINT_CONS_COMB 0
53 #define PRINT_DP_LETTER 0
54 #define PRINT_LTS_WORD 0
55 #define PRINT_DICT_LOOKUP 0
56 #endif
57
58 #define LTS_MARKER_WORD_START "WS"
59 #define LTS_MARKER_PRON_START "PS"
60 #define LTS_MARKER_SYLL_START "SS"
61 #define LTS_MARKER_SYLL_START_DD "SS%d"
62 #define LTS_MARKER_PIPESEP "|"
63 #define LTS_MARKER_PIPESEP_CHAR '|'
64
65 static int load_int(PORT_FILE *fp);
66 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp);
67 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions);
68 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap);
69 static SWIsltsResult free_letter_mapping(LM *lm);
70 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap);
71 static SWIsltsResult free_phone_mapping(PM *pm);
72 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp);
73 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num);
74 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
75 LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp);
76 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters, LQUESTION **questions, int num_questions, LM *lm);
77 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp);
78 static SWIsltsResult free_allowable_cons_comb(LTS *lts);
79 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp);
80 static SWIsltsResult free_question_strings(LTS* lts);
81 #define find_letter_index( myLet, myLM) (myLM->letter_index_for_letter[ toupper(myLet)])
82 int find_phone(const char *ph, PM *pm);
83 int find_best_string(const char *str, LTS* lts);
84 int find_best_prefix_string(const char *str, LTS* lts);
85 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone);
86 #define in_list(myV, myQ) (bitarray_read_bit( myQ->membership, myV))
87 #define qmatches(myQ, myU) (in_list( myU->properties[ myQ->type], myQ))
88 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp) ;
89 int find_output_for_dp(LTS *lts, int *pbackoff_output);
90 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length);
91 int is_allowable_cons_comb(LTS *lts, const char *cons_string);
92 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length);
93 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *num_out);
94
95 /*------------
96 *
97 * bitarray
98 *
99 *-----------*/
100
101 #define bitarray_read_bit( biTs, iBiT) ( biTs[iBiT/16] & (1<<((iBiT)%16)) )
102 /* int bitarray_read_bit( unsigned short* bits, int iBit)
103 { // ASSERT( iBit<256);
104 return bits[iBit/16] & (1<<((iBit)%16));
105 } */
106
bitarray_write_bit(unsigned short * bits,int iBit,int iVal)107 void bitarray_write_bit( unsigned short* bits, int iBit, int iVal)
108 {
109 unsigned short sect;
110 ASSERT( iBit<256);
111 sect = bits[iBit/16];
112 if(iVal) { sect |= (1<<(iBit%16)); }
113 else { sect &= ~(1<<(iBit%16)); }
114 bits[ iBit/16] = sect;
115 }
bitarray_populate_from_list(unsigned short * bits,char * list,int listlen)116 void bitarray_populate_from_list(unsigned short* bits, char* list, int listlen)
117 {
118 unsigned int i;
119 for(i=0; i<UCHAR_MAX/sizeof(unsigned short)/8; i++)
120 bits[i] = 0;
121 for(i=0; i<(unsigned int)listlen; i++)
122 bitarray_write_bit( bits, list[i], 1);
123 }
124
125 /*-----------
126 *
127 * PHashTable
128 *
129 *-----------*/
130
HashCmpWord(const LCHAR * key1,const LCHAR * key2)131 static int HashCmpWord(const LCHAR *key1, const LCHAR *key2)
132 { return strcmp((const char*)key1,(const char*)key2); }
HashGetCode(const void * key)133 static unsigned int HashGetCode(const void *key)
134 {
135 const char* k = (const char*)key;
136 unsigned int i, len, h = 0;
137 len = strlen(k);
138 for (i=0; i<len; i++) h = 31*h + (unsigned int)k[i];
139 return h;
140 }
my_PHashTableCreate_FromStrings(const char * strings[],int num_strings,const LCHAR * hashName)141 void* my_PHashTableCreate_FromStrings( const char* strings[], int num_strings,
142 const LCHAR* hashName)
143 {
144 PHashTable* table = NULL;
145 ESR_ReturnCode rc = ESR_SUCCESS;
146 PHashTableArgs hashArgs;
147 int i;
148 hashArgs.capacity = 63;
149 hashArgs.compFunction = HashCmpWord; // PHASH_TABLE_DEFAULT_COMP_FUNCTION;
150 hashArgs.hashFunction = HashGetCode; // PHASH_TABLE_DEFAULT_HASH_FUNCTION;
151 hashArgs.maxLoadFactor = PHASH_TABLE_DEFAULT_MAX_LOAD_FACTOR;
152 rc = PHashTableCreate( &hashArgs, hashName, &table);
153 for(i=0; i<num_strings; i++) {
154 void* old;
155 /* formerly the code used linear lookup, so let's avoid dups to match up */
156 rc = PHashTableGetValue( table, strings[i], (void**)&old);
157 if(rc != ESR_SUCCESS) {
158 rc = PHashTablePutValue( table, strings[i], (const void *)i, NULL );
159 }
160 }
161 return table;
162 }
163
164 /*---------
165 *
166 * i/o
167 *
168 *---------*/
169
load_int(PORT_FILE * fp)170 static int load_int(PORT_FILE *fp)
171 {
172 int v;
173
174 PORT_FREAD_INT16((uint16 *)&v, sizeof(int), 1, fp);
175
176 return v;
177 }
178
load_lquestions(LQUESTION *** pquestions,int * pnum_questions,PORT_FILE * fp)179 static SWIsltsResult load_lquestions(LQUESTION ***pquestions, int *pnum_questions, PORT_FILE *fp)
180 {
181 int i, num_questions;
182 LQUESTION ** questions;
183 SWIsltsResult nRes = SWIsltsSuccess;
184
185 num_questions = load_int(fp);
186
187 #if PRINT_LOAD_TREE_SUMMARY
188 pfprintf(PSTDOUT,"loading %d questions\n", num_questions);
189 #endif
190
191 *pquestions = questions = (LQUESTION**) lts_alloc(num_questions, sizeof(LQUESTION*));
192 if (questions == NULL) {
193 nRes = SWIsltsErrAllocResource;
194 goto CLEAN_UP;
195 }
196
197 for (i=0;i<num_questions;i++) {
198 questions[i] = (LQUESTION*) lts_alloc(1, sizeof(LQUESTION));
199 if (questions[i] == NULL) {
200 nRes = SWIsltsErrAllocResource;
201 goto CLEAN_UP;
202 }
203
204 #if PRINT_LOAD_TREE
205 pfprintf(PSTDOUT,"LOAD_TREE: loading question %d\n", i);
206 #endif
207
208 PORT_FREAD_CHAR(&(questions[i]->type), sizeof(char), 1, fp);
209 PORT_FREAD_CHAR(&(questions[i]->num_list), sizeof(char), 1, fp);
210
211 questions[i]->list = (unsigned char*) lts_alloc(questions[i]->num_list, sizeof(unsigned char));
212 if (questions[i]->list == NULL) {
213 nRes = SWIsltsErrAllocResource;
214 goto CLEAN_UP;
215 }
216
217 PORT_FREAD_CHAR(questions[i]->list, sizeof(char), (questions[i]->num_list), fp);
218
219 bitarray_populate_from_list( questions[i]->membership, (char*) questions[i]->list, questions[i]->num_list);
220 }
221
222 *pnum_questions = num_questions;
223 return SWIsltsSuccess;
224
225 CLEAN_UP:
226
227 free_lquestions(questions, num_questions);
228 *pnum_questions = 0;
229 *pquestions = NULL;
230 return nRes;
231 }
232
233 /* deallocate questions */
free_lquestions(LQUESTION ** questions,int num_questions)234 static SWIsltsResult free_lquestions(LQUESTION ** questions, int num_questions)
235 {
236 SWIsltsResult nRes = SWIsltsSuccess;
237 int i;
238
239 if (questions) {
240 for (i=0; i<num_questions; i++) {
241 if (questions[i]->list) {
242 FREE(questions[i]->list);
243 questions[i]->list = NULL;
244 }
245 FREE(questions[i]);
246 questions[i] = NULL;
247 }
248 FREE(questions);
249 }
250 return nRes;
251 }
252
load_letter_mapping(PORT_FILE * fp,LM ** ppLetterMap)253 static SWIsltsResult load_letter_mapping(PORT_FILE *fp, LM **ppLetterMap)
254 {
255 SWIsltsResult nRes = SWIsltsSuccess;
256 unsigned char len;
257 LM * lm;
258 int i;
259
260 /* pfprintf(PSTDOUT,"got len %d\n", len);*/
261 lm = (LM*) lts_alloc(1, sizeof(LM));
262 if (lm == NULL) {
263 nRes = SWIsltsErrAllocResource;
264 goto CLEAN_UP;
265 }
266
267 PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
268 lm->num_letters = len;
269
270 lm->letters = (char*) lts_alloc(len, sizeof(char));
271 if (lm->letters == NULL) {
272 nRes = SWIsltsErrAllocResource;
273 goto CLEAN_UP;
274 }
275
276 lm->type = (char*) lts_alloc(len, sizeof(char));
277 if (lm->type == NULL) {
278 nRes = SWIsltsErrAllocResource;
279 goto CLEAN_UP;
280 }
281
282 PORT_FREAD_CHAR(lm->letters, sizeof(char), len, fp);
283 PORT_FREAD_CHAR(lm->type, sizeof(char), len, fp);
284
285 {
286 unsigned int letter;
287 for (letter=0; letter <= UCHAR_MAX; letter++)
288 lm->letter_index_for_letter[letter] = LTS_MAXCHAR;
289 }
290
291 for (i=0;i<len;i++) {
292 char letter = toupper(lm->letters[i]);
293 lm->letters[i] = letter;
294 lm->letter_index_for_letter[(unsigned char)letter] = i;
295 }
296 *ppLetterMap = lm;
297 return SWIsltsSuccess;
298
299 CLEAN_UP:
300 free_letter_mapping(lm);
301 *ppLetterMap = NULL;
302 return nRes;
303 }
304
305 /* deallocate letter mapping */
free_letter_mapping(LM * lm)306 static SWIsltsResult free_letter_mapping(LM *lm)
307 {
308 SWIsltsResult nRes = SWIsltsSuccess;
309
310 if (lm) {
311 if (lm->letters) {
312 FREE(lm->letters);
313 lm->letters = NULL;
314 }
315 if (lm->type) {
316 FREE(lm->type);
317 lm->type = NULL;
318 }
319 lm->num_letters = 0;
320 FREE(lm);
321 }
322 return nRes;
323 }
324
load_phone_mapping(PORT_FILE * fp,PM ** ppPhoneMap)325 static SWIsltsResult load_phone_mapping(PORT_FILE *fp, PM **ppPhoneMap)
326 {
327 SWIsltsResult nRes = SWIsltsSuccess;
328 PM * pm;
329 int i;
330 unsigned char len;
331 char * ph;
332
333 pm = (PM*) lts_alloc(1, sizeof(PM));
334 if (pm == NULL) {
335 nRes = SWIsltsErrAllocResource;
336 goto CLEAN_UP;
337 }
338
339 pm->num_phones = load_int(fp);
340
341 pm->phones = (char**) lts_alloc(pm->num_phones, sizeof(char*));
342 if (pm->phones == NULL) {
343 nRes = SWIsltsErrAllocResource;
344 goto CLEAN_UP;
345 }
346
347 for (i=0;i<pm->num_phones;i++) {
348 PORT_FREAD_CHAR(&len, sizeof(unsigned char), 1, fp);
349
350 pm->phoneH = NULL;
351 pm->phones[i] = ph = (char*) lts_alloc(len+1, sizeof(char));
352 if (ph == NULL) {
353 nRes = SWIsltsErrAllocResource;
354 goto CLEAN_UP;
355 }
356
357 PORT_FREAD_CHAR(ph, sizeof(char), len, fp);
358 ph[len] = '\0';
359 }
360 pm->phoneH = my_PHashTableCreate_FromStrings( (const char**)pm->phones,
361 pm->num_phones,
362 L("lts.phoneH"));
363 if(pm->phoneH == NULL) {
364 nRes = SWIsltsErrAllocResource;
365 goto CLEAN_UP;
366 }
367 *ppPhoneMap = pm;
368 return SWIsltsSuccess;
369
370 CLEAN_UP:
371 free_phone_mapping(pm);
372 *ppPhoneMap = NULL;
373
374 return nRes;
375 }
376
377 /* deallocate phone mapping */
free_phone_mapping(PM * pm)378 static SWIsltsResult free_phone_mapping(PM *pm)
379 {
380 SWIsltsResult nRes = SWIsltsSuccess;
381 int i;
382
383 if (pm) {
384 if (pm->phones) {
385 for (i=0; i<pm->num_phones; i++) {
386 if (pm->phones[i]) {
387 FREE(pm->phones[i]);
388 pm->phones[i] = NULL;
389 }
390 }
391 FREE(pm->phones);
392 pm->phones = NULL;
393 }
394 if(pm->phoneH)
395 PHashTableDestroy( (PHashTable*)pm->phoneH);
396 pm->phoneH = NULL;
397 FREE(pm);
398 }
399 return nRes;
400 }
401
402
load_outputs(char *** poutputs,char *** pinputs,int * pnum,PORT_FILE * fp)403 static SWIsltsResult load_outputs(char ***poutputs, char ***pinputs, int *pnum, PORT_FILE *fp)
404 {
405 SWIsltsResult nRes = SWIsltsSuccess;
406 int i;
407 char ** outputs = NULL;
408 char ** inputs = NULL;
409 int num;
410 unsigned char olen;
411 char * out;
412 unsigned char ilen;
413 char * in;
414
415 num = load_int(fp);
416
417 *poutputs = outputs = (char **) lts_alloc(num, sizeof(char*));
418 if (outputs == NULL) {
419 nRes = SWIsltsErrAllocResource;
420 goto CLEAN_UP;
421 }
422
423 *pinputs = inputs = (char **) lts_alloc(num, sizeof(char*));
424 if (inputs == NULL) {
425 nRes = SWIsltsErrAllocResource;
426 goto CLEAN_UP;
427 }
428
429 for (i=0;i<num;i++) {
430 PORT_FREAD_CHAR(&olen, sizeof(char), 1, fp);
431 out = outputs[i] = lts_alloc(olen + 1, sizeof(char));
432 if (out == NULL) {
433 nRes = SWIsltsErrAllocResource;
434 goto CLEAN_UP;
435 }
436
437 if (olen > 0) {
438 PORT_FREAD_CHAR(out, sizeof(char), olen, fp);
439 }
440 out[olen] = '\0';
441 PORT_FREAD_CHAR(&ilen, sizeof(char), 1, fp);
442 in = inputs[i] = lts_alloc(ilen + 1, sizeof(char));
443 if (in == NULL) {
444 nRes = SWIsltsErrAllocResource;
445 goto CLEAN_UP;
446 }
447
448 if (ilen > 0) {
449 PORT_FREAD_CHAR(in, sizeof(char), ilen, fp);
450 }
451 in[ilen] = '\0';
452 #if PRINT_LOAD_TREE
453 if (ilen > 0) pfprintf(PSTDOUT,"LOAD_TREE: got input %s out %s\n", in, outputs[i]);
454 pfprintf(PSTDOUT,"LOAD_TREE: outputs[%d] len %d out %x out %s\n", i, olen, outputs[i], outputs[i]);
455 #endif
456 }
457
458 *pnum = num;
459 return SWIsltsSuccess;
460
461 CLEAN_UP:
462
463 free_outputs(outputs, inputs, num);
464 *poutputs = NULL;
465 *pinputs = NULL;
466 *pnum = 0;
467
468 return nRes;
469 }
470
free_outputs(char ** outputs,char ** inputs,int num)471 static SWIsltsResult free_outputs(char **outputs, char **inputs, int num)
472 {
473 SWIsltsResult nRes = SWIsltsSuccess;
474 int i;
475
476 if (outputs) {
477 for (i=0; i<num; i++) {
478 if (outputs[i]) {
479 FREE(outputs[i]);
480 outputs[i] = NULL;
481 }
482 }
483 FREE(outputs);
484 }
485
486 if (inputs) {
487 for (i=0; i<num; i++) {
488 if (inputs[i]) {
489 FREE(inputs[i]);
490 inputs[i] = NULL;
491 }
492 }
493 FREE(inputs);
494 }
495 return nRes;
496 }
497
load_trees(RT_LTREE *** ptrees,int * num_letters,LQUESTION *** pquestions,int * num_questions,LM ** plm,PORT_FILE * fp)498 static SWIsltsResult load_trees(RT_LTREE ***ptrees, int *num_letters,
499 LQUESTION ***pquestions, int *num_questions, LM **plm, PORT_FILE *fp)
500 {
501 SWIsltsResult nRes = SWIsltsSuccess;
502 int let, i;
503 RT_LTREE * tree = NULL;
504 RT_LTREE ** trees = NULL;
505
506 #if PRINT_LOAD_TREE_SUMMARY
507 pfprintf(PSTDOUT,"loading letter mapping\n");
508 #endif
509 *ptrees = NULL;
510 *pquestions = NULL;
511 *plm = NULL;
512
513 nRes = load_letter_mapping(fp, plm);
514 if (nRes != SWIsltsSuccess) {
515 goto CLEAN_UP;
516 }
517
518 #if PRINT_LOAD_TREE_SUMMARY
519 pfprintf(PSTDOUT,"loading questions\n");
520 #endif
521
522 nRes = load_lquestions(pquestions, num_questions, fp);
523 if (nRes != SWIsltsSuccess) {
524 goto CLEAN_UP;
525 }
526
527 *num_letters = load_int(fp);
528
529 if (*num_letters != (*plm)->num_letters) {
530 #ifndef NO_STDERR
531 PLogError(L("Error loading data, num_letters %d doesn't match num from mapping %d\n"),
532 *num_letters, (*plm)->num_letters);
533 #endif
534 nRes = SWIsltsInternalErr;
535 goto CLEAN_UP;
536 }
537
538 *ptrees = trees = (RT_LTREE**) lts_alloc(*num_letters, sizeof(RT_LTREE*));
539 if (trees == NULL) {
540 nRes = SWIsltsErrAllocResource;
541 goto CLEAN_UP;
542 }
543
544 for (let=0;let<*num_letters;let++) {
545 /* pfprintf(PSTDOUT,"loading for t %d\n", t);*/
546
547 trees[let] = tree = (RT_LTREE*) lts_alloc(1, sizeof(RT_LTREE));
548 if (tree == NULL) {
549 nRes = SWIsltsErrAllocResource;
550 goto CLEAN_UP;
551 }
552
553 tree->num_nodes = load_int(fp);
554
555 tree->values_or_question1 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
556 if (tree->values_or_question1 == NULL) {
557 nRes = SWIsltsErrAllocResource;
558 goto CLEAN_UP;
559 }
560
561 tree->question2 = (short*) lts_alloc(tree->num_nodes, sizeof(short));
562 if (tree->question2 == NULL) {
563 nRes = SWIsltsErrAllocResource;
564 goto CLEAN_UP;
565 }
566
567 tree->left_nodes = (short *) lts_alloc(tree->num_nodes, sizeof(short));
568 if (tree->left_nodes == NULL) {
569 nRes = SWIsltsErrAllocResource;
570 goto CLEAN_UP;
571 }
572
573 #if PRINT_LOAD_TREE
574 pfprintf(PSTDOUT,"LOAD_TREE: Tree for let %d num_nodes %d\n", let, tree->num_nodes);
575 #endif
576
577 for (i=0;i<tree->num_nodes;i++) {
578 PORT_FREAD_INT16(&(tree->left_nodes[i]), sizeof(short), 1, fp);
579 PORT_FREAD_INT16(&(tree->values_or_question1[i]), sizeof(short), 1, fp);
580
581 #if PRINT_LOAD_TREE
582 pfprintf(PSTDOUT,"LOAD_TREE: node[%d] %d %d", i, tree->left_nodes[i], tree->values_or_question1[i]);
583 #endif
584
585 PORT_FREAD_INT16(&(tree->question2[i]), sizeof(short), 1, fp);
586 if (tree->left_nodes[i] != NO_NODE) {
587 if (tree->question2[i] == -1) tree->question2[i] = 0;
588 #if PRINT_LOAD_TREE
589 pfprintf(PSTDOUT," %x", (unsigned short) tree->question2[i]);
590 #endif
591 }
592
593 #if PRINT_LOAD_TREE
594 pfprintf(PSTDOUT,"\n");
595 #endif
596 }
597 }
598
599 return SWIsltsSuccess;
600
601 CLEAN_UP:
602
603 free_trees(trees, *num_letters, *pquestions, *num_questions, *plm);
604 *ptrees = NULL;
605 *pquestions = NULL;
606 *plm = NULL;
607 *num_letters = 0;
608 *num_questions = 0;
609
610 return nRes;
611 }
612
613 /* deallocate trees */
free_trees(RT_LTREE ** trees,int num_letters,LQUESTION ** questions,int num_questions,LM * lm)614 static SWIsltsResult free_trees(RT_LTREE **trees, int num_letters,
615 LQUESTION **questions, int num_questions, LM *lm)
616 {
617 SWIsltsResult nRes = SWIsltsSuccess;
618 int i;
619 RT_LTREE * tree;
620
621 if (lm) {
622 free_letter_mapping(lm);
623 }
624 if (questions) {
625 free_lquestions(questions, num_questions);
626 }
627
628 if (trees) {
629 for (i=0; i<num_letters; i++) {
630 if (trees[i]) {
631 tree = trees[i];
632 if (tree->values_or_question1) {
633 FREE(tree->values_or_question1);
634 tree->values_or_question1 = NULL;
635 }
636 if (tree->question2) {
637 FREE(tree->question2);
638 tree->question2 = NULL;
639 }
640 if (tree->left_nodes) {
641 FREE(tree->left_nodes);
642 tree->left_nodes = NULL;
643 }
644 FREE(trees[i]);
645 trees[i] = NULL;
646 }
647 }
648 FREE(trees);
649 }
650 return nRes;
651 }
652
load_allowable_cons_comb(LTS * lts,PORT_FILE * fp)653 static SWIsltsResult load_allowable_cons_comb(LTS *lts, PORT_FILE *fp)
654 {
655 SWIsltsResult nRes = SWIsltsSuccess;
656 char line[50];
657 char tempstr[50];
658 char * tok;
659 int i, toklen;
660 int count;
661 char seps[] = " \n";
662
663 lts->num_cons_comb = 0;
664 lts->allowable_cons_combH = NULL;
665
666 while (PORT_FGETS(line, 50, fp)) {
667
668 #ifndef TI_DSP
669
670 /*need to get rid of sme crud at the end of the line because it is being read in binary mode*/
671 for (i=strlen(line)-1;i>=0;i--) {
672 if (!isalpha(line[i])) line[i] = ' ';
673 }
674 #endif
675 count = 0;
676 tok = safe_strtok(line, seps, &toklen);
677 tempstr[0] = '\0';
678
679 /* get all available sequence of tokens */
680 while(tok && toklen > 0){
681 count += toklen;
682 strncat(tempstr, tok, toklen);
683 tempstr[count+1] = '\0';
684 strcat(tempstr, " ");
685 count++;
686
687 tok = safe_strtok(tok+toklen, seps, &toklen);
688 }
689 if (count > 0) {
690
691 /* delete the final space */
692 tempstr[count-1] = '\0';
693
694 lts->allowable_cons_comb[lts->num_cons_comb] = (char*) lts_alloc(strlen(tempstr)+1, sizeof(char));
695 if (lts->allowable_cons_comb[lts->num_cons_comb] == NULL) {
696 nRes = SWIsltsErrAllocResource;
697 goto CLEAN_UP;
698 }
699
700 strcpy(lts->allowable_cons_comb[lts->num_cons_comb], tempstr);
701
702 #if PRINT_CONS_COMB
703 pfprintf(PSTDOUT,"LOAD_TREE: allowable_cons_comb[%d]: %s\n", lts->num_cons_comb, tempstr);
704 #endif
705
706 lts->num_cons_comb++;
707 if (lts->num_cons_comb >= MAX_CONS_COMB) {
708 #ifndef NO_STDERR
709 PLogError(L("MAX_CONS_COMB %d exceeded\n"), MAX_CONS_COMB);
710 #endif
711 nRes = SWIsltsInternalErr;
712 goto CLEAN_UP;
713 }
714 }
715 }
716 if (lts->num_cons_comb == 0) {
717 #ifndef NO_STDERR
718 PLogError(L("Warning: the data file is missing consonant combinations - syllable boundaries will be incorrect\n"));
719 #endif
720 }
721 lts->allowable_cons_combH = my_PHashTableCreate_FromStrings( (const char**)lts->allowable_cons_comb, lts->num_cons_comb, L("lts.allowable_cons_combH"));
722 if(lts->allowable_cons_combH == NULL) {
723 nRes = SWIsltsErrAllocResource;
724 goto CLEAN_UP;
725 }
726
727 #if PRINT_LOAD_TREE_SUMMARY
728 pfprintf(PSTDOUT,"loaded %d cons combinations\n", lts->num_cons_comb);
729 #endif
730
731 return SWIsltsSuccess;
732
733 CLEAN_UP:
734
735 free_allowable_cons_comb(lts);
736
737 return nRes;
738 }
739
free_allowable_cons_comb(LTS * lts)740 static SWIsltsResult free_allowable_cons_comb(LTS *lts)
741 {
742 SWIsltsResult nRes = SWIsltsSuccess;
743 int i;
744
745 for (i=0; i<lts->num_cons_comb; i++) {
746 if (lts->allowable_cons_comb[i]) {
747 FREE(lts->allowable_cons_comb[i]);
748 lts->allowable_cons_comb[i] = NULL;
749 }
750 }
751 if(lts->allowable_cons_combH)
752 PHashTableDestroy( (PHashTable*)lts->allowable_cons_combH);
753 lts->allowable_cons_combH = NULL;
754 return nRes;
755 }
756
load_question_strings(LTS * lts,PORT_FILE * fp)757 static SWIsltsResult load_question_strings(LTS* lts, PORT_FILE* fp)
758 {
759 SWIsltsResult nRes = SWIsltsSuccess;
760 int i;
761 int num;
762 unsigned char len;
763 char ** strings;
764 char * str;
765
766 num = load_int(fp);
767
768 lts->strings = strings = (char **) lts_alloc(num, sizeof(char*));
769 lts->string_lens = (char*)lts_alloc(num, sizeof(char));
770
771 if (strings == NULL || lts->string_lens == NULL ) {
772 nRes = SWIsltsErrAllocResource;
773 goto CLEAN_UP;
774 }
775
776 for (i=0;i<num;i++) {
777 PORT_FREAD_CHAR(&len, sizeof(char), 1, fp);
778
779 str = strings[i] = lts_alloc(len + 1, sizeof(char));
780 if (str == NULL) {
781 nRes = SWIsltsErrAllocResource;
782 goto CLEAN_UP;
783 }
784
785 if (len > 0) {
786 PORT_FREAD_CHAR(str, sizeof(char), len, fp);
787 }
788 str[len] = '\0';
789
790 bitarray_populate_from_list( lts->membership, lts->strings[i], len);
791 lts->string_lens[i] = strlen(lts->strings[i]);
792 }
793
794 // *pnum = num;
795 lts->num_strings = num;
796
797 return SWIsltsSuccess;
798
799 CLEAN_UP:
800
801 free_question_strings(lts);
802
803 return nRes;
804 }
805
806 /* deallocate question strings */
free_question_strings(LTS * lts)807 static SWIsltsResult free_question_strings(LTS* lts)
808 {
809 SWIsltsResult nRes = SWIsltsSuccess;
810 int i;
811
812 if (lts->strings) {
813 for (i=0;i<lts->num_strings;i++) {
814 if (lts->strings[i]) {
815 FREE(lts->strings[i]);
816 lts->strings[i] = NULL;
817 }
818 }
819 FREE(lts->strings);
820 if(lts->string_lens) FREE(lts->string_lens);
821 lts->strings = NULL;
822 lts->string_lens = NULL;
823 }
824 return nRes;
825 }
826
827
create_lts(char * data_filename,LTS_HANDLE * phLts)828 SWIsltsResult create_lts(char *data_filename, LTS_HANDLE *phLts)
829 {
830 SWIsltsResult nRes = SWIsltsSuccess;
831 LTS * lts;
832
833 #ifdef USE_STATIC_SLTS
834 /* TODO: language-specific ID here? */
835 lts = &g_lts;
836
837 #else /* !USE_STATIC_SLTS */
838
839 PORT_FILE *fp;
840
841 lts = (LTS*) lts_alloc(1, sizeof(LTS));
842 if (lts == NULL) {
843 nRes = SWIsltsErrAllocResource;
844 goto CLEAN_UP;
845 }
846
847 fp = PORT_FOPEN(data_filename, "rb");
848 if (fp == NULL) {
849 #ifndef NO_STDERR
850 PLogError(L("Cannot open %s\n"), data_filename);
851 #endif
852 nRes = SWIsltsFileOpenErr;
853 goto CLEAN_UP;
854 }
855 nRes = load_phone_mapping(fp, <s->phone_mapping);
856 if (nRes != SWIsltsSuccess) {
857 PLogError(L("SWIsltsErr: load_phone_mapping() failed: Err_code = %d\n"), nRes);
858 goto CLEAN_UP;
859 }
860
861 nRes = load_question_strings(lts, fp);
862 if (nRes != SWIsltsSuccess) {
863 PLogError(L("SWIsltsErr: load_question_strings() failed: Err_code = %d\n"), nRes);
864 goto CLEAN_UP;
865 }
866
867 nRes = load_outputs(&(lts->outputs), &(lts->input_for_output), <s->num_outputs, fp);
868 if (nRes != SWIsltsSuccess) {
869 PLogError(L("SWIsltsErr: load_outputs() failed: Err_code = %d\n"), nRes);
870 goto CLEAN_UP;
871 }
872
873 #if PRINT_LOAD_TREE
874 pfprintf(PSTDOUT,"LOAD_TREE: got %d outputs, loading trees\n", lts->num_outputs);
875 #endif
876
877 nRes = load_trees(&(lts->trees), &(lts->num_letters),
878 &(lts->questions), &(lts->num_questions),
879 &(lts->letter_mapping),
880 fp);
881 if (nRes != SWIsltsSuccess) {
882 PLogError(L("SWIsltsErr: load_trees() failed: Err_code = %d\n"), nRes);
883 goto CLEAN_UP;
884 }
885
886 nRes = load_allowable_cons_comb(lts, fp);
887 if (nRes != SWIsltsSuccess) {
888 PLogError(L("SWIsltsErr: load_allowable_cons_comb() failed: Err_code = %d\n"), nRes);
889 goto CLEAN_UP;
890 }
891
892 PORT_FCLOSE(fp);
893
894 #endif /* !USE_STATIC_SLTS */
895
896 *phLts = lts;
897 return SWIsltsSuccess;
898
899 CLEAN_UP:
900
901 free_lts(lts);
902 *phLts = NULL;
903 return nRes;
904 }
905
906 /* deallocates LTS */
free_lts(LTS_HANDLE hlts)907 SWIsltsResult free_lts(LTS_HANDLE hlts)
908 {
909 SWIsltsResult nRes = SWIsltsSuccess;
910 LTS * lts = (LTS *)hlts;
911
912 if (lts) {
913
914 #ifndef USE_STATIC_SLTS
915 free_phone_mapping(lts->phone_mapping);
916 free_question_strings(lts);
917 lts->strings = NULL;
918 lts->phone_mapping = NULL;
919
920 free_outputs(lts->outputs, lts->input_for_output, lts->num_outputs);
921 lts->input_for_output = lts->outputs = NULL;
922
923 free_trees(lts->trees, lts->num_letters,
924 lts->questions, lts->num_questions,
925 lts->letter_mapping);
926 lts->trees = NULL;
927 lts->questions = NULL;
928 lts->letter_mapping = NULL;
929
930 free_allowable_cons_comb(lts);
931 FREE(lts);
932 #endif /* !USE_STATIC_LTS */
933 }
934
935 return nRes;
936 }
937
938
find_phone(const char * ph,PM * pm)939 int find_phone(const char *ph, PM *pm)
940 {
941 ESR_ReturnCode rc;
942 int iRet = -1;
943 rc = PHashTableGetValue((PHashTable*)pm->phoneH, ph, (void**)(void*)&iRet);
944 if (rc != ESR_SUCCESS)
945 PLogError("error while in find_phone(%s,%x)\n", ph, pm);
946 return iRet;
947 }
948
find_best_string(const char * str,LTS * lts)949 int find_best_string(const char *str, LTS* lts)
950 {
951 int i, maxlen, maxi, len;
952 int len_str;
953
954 if(str[0] == '\0') return -1;
955 len_str = strlen(str);
956
957 maxi = -1;
958 maxlen = 0;
959
960 for (i=0;i<lts->num_strings;i++) {
961 len = lts->string_lens[i];
962 if( len > len_str)
963 continue; /* no point in comparison */
964 if (strncmp(str, lts->strings[i], len) == 0) {
965 if (len > maxlen) {
966 maxlen = len;
967 maxi = i;
968 }
969 }
970 }
971 return maxi;
972 }
973
find_best_prefix_string(const char * str,LTS * lts)974 int find_best_prefix_string(const char *str, LTS* lts)
975 {
976 int i;
977 int maxlen;
978 int maxi;
979 int len;
980 int prelen;
981
982 maxi = -1;
983 maxlen = 0;
984
985 prelen = strlen(str);
986
987 for (i=0;i<lts->num_strings;i++) {
988 len = lts->string_lens[i];
989 if (len <= prelen) {
990 if (strncmp(str + (prelen - len), lts->strings[i], len) == 0) {
991 if (len > maxlen) {
992 maxlen = len;
993 maxi = i;
994 }
995 }
996 }
997 }
998 return maxi;
999 }
1000
fill_up_dp_for_letter(LTS * lts,const char * input_word,int word_len,int index,int root_start,int root_end,int left_phone)1001 int fill_up_dp_for_letter(LTS *lts, const char *input_word, int word_len, int index, int root_start, int root_end, int left_phone)
1002 {
1003 int i,j;
1004 LDP *dp;
1005 unsigned char letter;
1006 int hit_wb;
1007 LM *lm;
1008 unsigned char word[MAX_WORD_LEN];
1009 char tempstr[MAX_WORD_LEN];
1010 int first_syl_end;
1011 int last_syl_start;
1012
1013 dp = &(lts->dp);
1014 lm = lts->letter_mapping;
1015
1016 /* the LTS decision tree does not seem to be well trained at all for
1017 the letter ' when followed by "s" ... It seems to result in the
1018 phoneme 'm', which is wrong. "'t" seems to be OK though.
1019 BAD: Kevin's : k6v6nmz ... pal's : palmz ... paul's : p{lz
1020 BAD: janice's : jan6s6mz ... tom's house : t)mmz&h?s ... tonya's : t)ny6mz
1021 BAD: jake's house : jAk6mz&h?s
1022 Ignoring ' as below we get ...
1023 BETTER: Kevin's : kev6nz ... pal's : palz ... paul's : p{lz
1024 BETTER: janice's : jan6s6s ... tom's house : t)mz&h?s ... tonya's : t)ny6s
1025 BETTER: jake's house : jAk6s&h?s
1026 The proper solution requires a legitimate text normalizer with special
1027 handling of cases like 's which would always put a "z" there,
1028 except if preceded by an unvoiced stop (ptk) which requires a "s" there.
1029 For now let's just skip the ' letter, which testing shows to be generally
1030 safe (janice's, jake's etc are better but still not quite right). */
1031
1032 if(input_word[index] == '\'')
1033 return 1; // same as unknown character
1034
1035 letter = find_letter_index(input_word[index], lm);
1036
1037 if (letter == LTS_MAXCHAR) {
1038 /* lisa - we need to decide how to handle this case. Do we just silently skip unknown
1039 characters or warn the app or user somehow*/
1040 #ifdef NO_STDERR
1041 PrintError("unknown character on input %c - skipping\n", input_word[index], NULL, NULL);
1042 #else
1043 PLogError(L("unknown character on input %c - skipping\n"), input_word[index]);
1044 #endif
1045 return 1;
1046 }
1047
1048 hit_wb = 0;
1049
1050 /*pfprintf(PSTDOUT,"left context\n");*/
1051
1052 for (j=0;j<5;j++) {
1053 if (hit_wb) {
1054 dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1055 } else {
1056 i = index - (j+1);
1057 if (i < 0) dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1058 else {
1059 dp->properties[ Left1+j] = find_letter_index(input_word[i], lm);
1060 if (dp->properties[ Left1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
1061 dp->properties[ Left1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1062 hit_wb = 1;
1063 }
1064 }
1065 }
1066 }
1067
1068 /*pfprintf(PSTDOUT,"right context\n");*/
1069
1070 hit_wb = 0;
1071 for (j=0;j<5;j++) {
1072 if (hit_wb) {
1073 dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1074 } else {
1075 i = index + (j+1);
1076 if (i >= word_len) dp->properties[Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1077 else {
1078 dp->properties[ Right1+j] = find_letter_index(input_word[i], lm);
1079 if (dp->properties[ Right1+j] == LTS_MAXCHAR) { /*assume an unknown character is a word boundary*/
1080 dp->properties[ Right1+j] = find_letter_index(LTS_MARKER_PIPESEP_CHAR, lm);
1081 hit_wb = 1;
1082 }
1083 }
1084 }
1085 }
1086
1087 dp->letter = letter; // properties[ Letter] = letter;
1088
1089 dp->properties[ LeftPhone1] = left_phone;
1090
1091 /*pfprintf(PSTDOUT,"word stuff\n"); */
1092
1093 /*find word start and end - use unknown character as word boundaries*/
1094
1095 dp->properties[ WordLen] = word_len;
1096
1097 if (index == 0) dp->properties[ LetInWord] = 0;
1098 else if (index == word_len-1) dp->properties[ LetInWord] = 2;
1099 else dp->properties[ LetInWord] = 1;
1100
1101 for (i=0;i<word_len;i++) {
1102 word[i] = find_letter_index(input_word[i], lm);
1103 }
1104
1105 /*figure out syllable in word - not really syllables - just looks to see if is or at first or last vowel*/
1106 /* pfprintf(PSTDOUT,"syl stuff\n");*/
1107
1108 first_syl_end = word_len;
1109 for (i=0;i<word_len;i++) {
1110 if (lm->type[word[i]] == 1) {
1111 for (j=i+1;j<word_len;j++) {
1112 if (lm->type[word[j]] != 1) break;
1113 }
1114 first_syl_end = j;
1115 break;
1116 }
1117 }
1118 last_syl_start = 0;
1119 for (i=word_len-1;i>=0;i--) {
1120 if (lm->type[word[i]] == 1) {
1121 for (j=i-1;j>=0;j--) {
1122 if (lm->type[word[j]] != 1) break;
1123 }
1124 last_syl_start = j;
1125 break;
1126 }
1127 }
1128
1129 #if PRINT_DP_LETTER
1130 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1131 #endif
1132
1133 if (index > last_syl_start) dp->properties[ SylInWord] = 2;
1134 else if (index < first_syl_end) dp->properties[ SylInWord] = 0;
1135 else dp->properties[ SylInWord] = 1;
1136
1137 first_syl_end = word_len;
1138 for (i=0;i<word_len;i++) {
1139 if (lm->type[word[i]] == 1) {
1140 for (j=i+1;j<word_len;j++) {
1141 if (lm->type[word[j]] != 1) break;
1142 }
1143 for (;j<word_len;j++) {
1144 if (lm->type[word[j]] == 1) break;
1145 }
1146 first_syl_end = j;
1147 break;
1148 }
1149 }
1150 last_syl_start = 0;
1151 for (i=word_len-1;i>=0;i--) {
1152 if (lm->type[word[i]] == 1) {
1153 for (j=i-1;j>=0;j--) {
1154 if (lm->type[word[j]] != 1) break;
1155 }
1156 for (;j>=0;j--) {
1157 if (lm->type[word[j]] == 1) break;
1158 }
1159 last_syl_start = j;
1160 break;
1161 }
1162 }
1163
1164 #if PRINT_DP_LETTER
1165 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1166 #endif
1167
1168 if (index > last_syl_start) dp->properties[ Syl2InWord] = 2;
1169 else if (index < first_syl_end) dp->properties[ Syl2InWord] = 0;
1170 else dp->properties[Syl2InWord] = 1;
1171
1172
1173 first_syl_end = word_len;
1174 for (i=root_start;i<root_end;i++) {
1175 if (lm->type[word[i]] == 1) {
1176 for (j=i+1;j<word_len;j++) {
1177 if (lm->type[word[j]] != 1) break;
1178 }
1179 first_syl_end = j;
1180 break;
1181 }
1182 }
1183 last_syl_start = 0;
1184 for (i=root_end-1;i>=root_start;i--) {
1185 if (lm->type[word[i]] == 1) {
1186 for (j=i-1;j>=0;j--) {
1187 if (lm->type[word[j]] != 1) break;
1188 }
1189 last_syl_start = j;
1190 break;
1191 }
1192 }
1193
1194 #if PRINT_DP_LETTER
1195 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1196 #endif
1197
1198 if (index > last_syl_start) dp->properties[SylInRoot] = 2;
1199 else if (index < first_syl_end) dp->properties[ SylInRoot] = 0;
1200 else dp->properties[ SylInRoot] = 1;
1201
1202 first_syl_end = word_len;
1203 for (i=root_start;i<root_end;i++) {
1204 if (lm->type[word[i]] == 1) {
1205 for (j=i+1;j<word_len;j++) {
1206 if (lm->type[word[j]] != 1) break;
1207 }
1208 for (;j<word_len;j++) {
1209 if (lm->type[word[j]] == 1) break;
1210 }
1211 first_syl_end = j;
1212 break;
1213 }
1214 }
1215 last_syl_start = 0;
1216 for (i=root_end-1;i>=root_start;i--) {
1217 if (lm->type[word[i]] == 1) {
1218 for (j=i-1;j>=0;j--) {
1219 if (lm->type[word[j]] != 1) break;
1220 }
1221 for (;j>=0;j--) {
1222 if (lm->type[word[j]] == 1) break;
1223 }
1224 last_syl_start = j;
1225 break;
1226 }
1227 }
1228
1229 #if PRINT_DP_LETTER
1230 pfprintf(PSTDOUT,"first_syl_end %d last_syl_start %d\n", first_syl_end, last_syl_start);
1231 #endif
1232
1233 if (index > last_syl_start) dp->properties[Syl2InRoot] = 2;
1234 else if (index < first_syl_end) dp->properties[Syl2InRoot] = 0;
1235 else dp->properties[Syl2InRoot] = 1;
1236
1237
1238 dp->properties[Left_DFRE] = index - root_start;
1239 dp->properties[Right_DFRE] = (root_end - index) - 1;
1240
1241
1242 /* pfprintf(PSTDOUT,"strings\n");*/
1243 #if PRINT_DP_LETTER
1244 pfprintf(PSTDOUT,"input word %s num_strings %d\n", input_word, lts->num_strings);
1245 #endif
1246
1247 dp->properties[RightString] = find_best_string(input_word+index+1, lts);
1248 strcpy(tempstr, input_word);
1249 tempstr[index] = '\0';
1250
1251 dp->properties[LeftString] = find_best_prefix_string(tempstr, lts);
1252
1253 #if PRINT_DP_LETTER
1254 pfprintf(PSTDOUT,"dp %c ", lm->letters[dp->letter]);
1255
1256 for (i=0;i<word_len;i++) {
1257 pfprintf(PSTDOUT,"%c", lm->letters[word[i]]);
1258 }
1259 pfprintf(PSTDOUT," %c%c%c {%c} %c%c%c liw %d siw %d s2iw %d nw %d sir %d s2ir %d left_DFRE %d right_DFRE %d\n",
1260 lm->letters[dp->left_context[2]],
1261 lm->letters[dp->left_context[1]],
1262 lm->letters[dp->left_context[0]],
1263 lm->letters[dp->letter],
1264 lm->letters[dp->right_context[0]],
1265 lm->letters[dp->right_context[1]],
1266 lm->letters[dp->right_context[2]],
1267 dp->let_in_word,
1268 dp->syl_in_word,
1269 dp->syl2_in_word,
1270 dp->word_len,
1271 dp->syl_in_root,
1272 dp->syl2_in_root,
1273 dp->left_DFRE, dp->right_DFRE);
1274 #endif
1275
1276 return 0;
1277 }
1278
matches(LQUESTION * q1,LQUESTION * q2,int type,LDP * dp)1279 int matches(LQUESTION *q1, LQUESTION *q2, int type, LDP *dp)
1280 {
1281 int m1, m2;
1282 switch(type) {
1283 case 0:
1284 return qmatches(q1, dp);
1285 case 1:
1286 m1 = qmatches(q1, dp);
1287 m2 = qmatches(q2, dp);
1288 return(m1 && m2);
1289 case 2:
1290 m1 = qmatches(q1, dp);
1291 m2 = qmatches(q2, dp);
1292 return(m1 && !m2);
1293 case 3:
1294 m1 = qmatches(q1, dp);
1295 m2 = qmatches(q2, dp);
1296 return(!m1 && m2);
1297 case 4:
1298 m1 = qmatches(q1, dp);
1299 m2 = qmatches(q2, dp);
1300 return(!m1 && !m2);
1301 default:
1302 return -1;
1303 }
1304 /* should not come here */
1305 return -1;
1306 }
1307
find_output_for_dp(LTS * lts,int * pbackoff_output)1308 int find_output_for_dp(LTS *lts, int *pbackoff_output)
1309 {
1310 LDP *dp;
1311 int index;
1312 RT_LTREE *tree;
1313 LQUESTION *q1;
1314 LQUESTION *q2;
1315 int comb_type;
1316 int q2_index;
1317 int left_index;
1318
1319 dp = &(lts->dp);
1320 tree = lts->trees[dp->letter]; // properties[Letter]];
1321
1322 index = 0;
1323
1324 while (1) {
1325 left_index = tree->left_nodes[index];
1326
1327 if (left_index == NO_NODE) { /*means its a leaf node*/
1328 *pbackoff_output = tree->question2[index];
1329 return tree->values_or_question1[index];
1330 }
1331 q1 = lts->questions[tree->values_or_question1[index]];
1332 q2_index = tree->question2[index] & 0x1FFF;
1333 comb_type = (tree->question2[index] & 0xE000) >> 13;
1334
1335 q2 = lts->questions[q2_index];
1336
1337 if (matches(q1, q2, comb_type, dp)) {
1338 index = left_index;
1339 } else {
1340 index = left_index+1;
1341 }
1342 }
1343 }
add_output(char * output,char ** output_phone_string,int out_len,int max_phone_length)1344 int add_output(char *output, char **output_phone_string, int out_len, int max_phone_length)
1345 {
1346 char *tok;
1347 int toklen;
1348 char seps[] = " ";
1349
1350 if (strlen(output) == 0) return out_len;
1351
1352 tok = safe_strtok(output, seps, &toklen);
1353 while (tok && toklen) {
1354 if ((toklen > 0) && (strncmp(tok, "null", 4) != 0)) {
1355
1356 if (isdigit(tok[toklen-1])) {
1357 /*means it's a vowel. So, add a syllable boundary. It's position
1358 gets adjusted later by adjust_syllable_boundaries()*/
1359 strcpy(output_phone_string[out_len++], LTS_MARKER_SYLL_START);
1360 if (out_len >= max_phone_length) return max_phone_length;
1361 }
1362 strncpy(output_phone_string[out_len], tok, toklen);
1363 output_phone_string[out_len++][toklen] = '\0';
1364 if (out_len >= max_phone_length) return max_phone_length;
1365 }
1366 tok = safe_strtok(tok+toklen, seps, &toklen);
1367 }
1368 return out_len;
1369 }
1370
is_allowable_cons_comb(LTS * lts,const char * cons_string)1371 int is_allowable_cons_comb(LTS *lts, const char *cons_string)
1372 {
1373 /* int i;
1374 for (i=0;i<lts->num_cons_comb;i++) {
1375 #if PRINT_CONS_COMB
1376 pfprintf(PSTDOUT,"checking {%s} vs c[%d] {%s}\n", cons_string, i, lts->allowable_cons_comb[i]);
1377 #endif
1378 if (strcmp(cons_string, lts->allowable_cons_comb[i]) == 0) return 1;
1379 }
1380 return 0;
1381 */
1382 ESR_ReturnCode rc;
1383 void* iVal = NULL;
1384 rc = PHashTableGetValue( (PHashTable*)lts->allowable_cons_combH, cons_string, &iVal);
1385 if(rc == ESR_SUCCESS)
1386 return 1;
1387 else
1388 return 0;
1389 }
1390
1391
1392
1393
1394
adjust_syllable_boundaries(LTS * lts,char ** output_phone_string,int num_out,int max_phone_length)1395 void adjust_syllable_boundaries(LTS *lts, char **output_phone_string, int num_out, int max_phone_length)
1396 {
1397 char *out;
1398 int i,j;
1399 int syl_start;
1400 int stress = 0;
1401 int first_syl_bound;
1402
1403 char tempstr[20];
1404
1405 /*there should already be a syllable boundary before each vowel (add_output put one there)*/
1406 /*so just find these, then shift back by allowable consonant combinations and move the syllable mark*/
1407
1408 for (i=0;i<num_out;i++) {
1409 out = output_phone_string[i];
1410 if (strcmp(out, LTS_MARKER_SYLL_START) == 0) { /*means there is a syllable boundary
1411 find start of allowable sequence*/
1412
1413 syl_start = 0;
1414
1415 for (j=i-1;j>0;j--) {
1416 out = output_phone_string[j];
1417 if (isdigit(out[strlen(out)-1])) {
1418 syl_start = j+1;
1419 break; /*means it's a vowel*/
1420 }
1421 if (strcmp(out, LTS_MARKER_WORD_START) == 0) {
1422 syl_start = j+1;
1423 break; /*don't push syl boundaries before word boundaries*/
1424 }
1425 if (strcmp(out, LTS_MARKER_PRON_START) == 0) {
1426 syl_start = j+1;
1427 break; /*don't push syl boundaries before phrase boundaries*/
1428 }
1429
1430 /* for sequences longer than 2,
1431 check 3-syllable onset first, then check 2-syllable onset */
1432 if(j > 1){
1433 sprintf(tempstr, "%s %s %s", output_phone_string[j-2], output_phone_string[j-1],
1434 output_phone_string[j]);
1435 if (!is_allowable_cons_comb(lts, tempstr)) {
1436 sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
1437 if (!is_allowable_cons_comb(lts, tempstr)) {
1438 #if PRINT_CONS_COMB
1439 pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
1440 output_phone_string[j]);
1441 #endif
1442 syl_start = j;
1443 break;
1444 }
1445 }
1446 }
1447 /* for sequences shorter than 2 */
1448 else
1449 {
1450 sprintf(tempstr, "%s %s", output_phone_string[j-1], output_phone_string[j]);
1451 if (!is_allowable_cons_comb(lts, tempstr)) {
1452 #if PRINT_CONS_COMB
1453 pfprintf(PSTDOUT,"cons comb %s %s not allowed\n", output_phone_string[j-1],
1454 output_phone_string[j]);
1455 #endif
1456 syl_start = j;
1457 break;
1458 }
1459 }
1460 } /* end for j=i-1 */
1461
1462 /*shift over stuff between syl_start a gap*/
1463 for (j=i;j>syl_start;j--) {
1464 strcpy(output_phone_string[j], output_phone_string[j-1]);
1465 }
1466 /*now find stress level from phone (and remove it) and add it to syl bound*/
1467
1468 if (i<num_out-1) {
1469 out = output_phone_string[i+1];
1470
1471 if (isdigit(out[strlen(out)-1])) {
1472 stress = atoi(out + strlen(out)-1);
1473 } else {
1474 stress = 0; /*should not happen*/
1475 }
1476 } else {
1477 stress = 0; /*should not happen*/
1478 }
1479
1480 sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
1481 } /* end if (strcmp(out, LTS_MARKER_SYLL_START) == 0) */
1482 } /* end for i=0 */
1483
1484 /*remove all the stress marking from the vowels*/
1485 for (i=0;i<num_out;i++) {
1486 out = output_phone_string[i];
1487 if ((strncmp(out, LTS_MARKER_SYLL_START, 2) != 0) && isdigit(out[strlen(out)-1])) {
1488 out[strlen(out)-1] = '\0'; /*remove the stress from the vowel*/
1489 }
1490 }
1491
1492 /* word boundary must be followed by syllable boundary
1493 if no syllable boundary exists after a word boundary, move the first
1494 syllable boundary to after the word boundary */
1495 first_syl_bound = -1;
1496 syl_start = -1;
1497 for (i=1;i<num_out;i++) {
1498 if ((strcmp(output_phone_string[i-1], LTS_MARKER_WORD_START) == 0) &&
1499 (strncmp(output_phone_string[i], LTS_MARKER_SYLL_START, 2) != 0)) {
1500
1501 syl_start = i;
1502 /* search for first occurance of syllable boundary */
1503 for(j=syl_start+1;j<num_out; j++){
1504 out = output_phone_string[j];
1505 if(strncmp(out, LTS_MARKER_SYLL_START, 2) == 0 && isdigit(out[strlen(out)-1])){
1506 stress = atoi(out + strlen(out)-1);
1507 first_syl_bound = j;
1508 break;
1509 }
1510 }
1511
1512 /* swap entries until syl bound reaches word bound */
1513 if(first_syl_bound >= 0){
1514 for(; j>syl_start; j--){
1515 strcpy(output_phone_string[j], output_phone_string[j-1]);
1516 }
1517 /* put syllable boundary after word boundary */
1518 sprintf(output_phone_string[syl_start], LTS_MARKER_SYLL_START_DD, stress);
1519
1520 /* advance i, reset variables */
1521 i = first_syl_bound;
1522 first_syl_bound = syl_start = -1;
1523
1524 }
1525 }
1526 }
1527
1528 }
1529
1530
lts_for_word(LTS * lts,char * word,int word_len,char ** output_phone_string,int max_phone_length,int * pnum_out)1531 SWIsltsResult lts_for_word(LTS *lts, char *word, int word_len, char **output_phone_string, int max_phone_length, int *pnum_out)
1532 {
1533 SWIsltsResult nRes = SWIsltsSuccess;
1534 int i,j;
1535 int root_start;
1536 int root_end;
1537 int output_index;
1538 int left_phone;
1539 char * input_seq;
1540 int found_match;
1541 int start_num_out;
1542 int backoff_output;
1543 int num_out;
1544
1545 start_num_out = num_out = *pnum_out;
1546
1547 root_start = 0;
1548 root_end = word_len;
1549
1550 for (i=0;i<word_len;i++) {
1551
1552 if ((i == 0) || (num_out == 0)) {
1553 /* pfprintf(PSTDOUT,"about to call find_phone1\n");*/
1554 left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1555
1556 #if PRINT_LTS_WORD
1557 pfprintf(PSTDOUT,"got phone %d for initial | (LTS_MARKER_PIPESEP)\n", left_phone);
1558 #endif
1559 if (left_phone < 0) {
1560
1561 #ifdef NO_STDERR
1562 PrintError("Error, cannot find | in phone mappings\n", NULL, NULL, NULL);
1563 #else
1564 PLogError(L("Error, cannot find | in phone mappings\n"));
1565 #endif
1566 nRes = SWIsltsInternalErr;
1567 goto CLEAN_UP;
1568 }
1569 } else {
1570
1571 #if PRINT_LTS_WORD
1572 pfprintf(PSTDOUT,"about to call find_phone2 num_out %d\n", num_out);
1573 pfprintf(PSTDOUT,"out[%d] %s\n", num_out-1, output_phone_string[num_out-1]);
1574 #endif
1575
1576 if (strcmp(output_phone_string[num_out-1], LTS_MARKER_PRON_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1577 else if (strcmp(output_phone_string[num_out-1], LTS_MARKER_WORD_START) == 0) left_phone = find_phone(LTS_MARKER_PIPESEP, lts->phone_mapping);
1578 else left_phone = find_phone(output_phone_string[num_out-1], lts->phone_mapping);
1579
1580 #if PRINT_LTS_WORD
1581 pfprintf(PSTDOUT,"got phone %d for %s\n", left_phone, output_phone_string[num_out-1]);
1582 #endif
1583
1584 if (left_phone < 0) {
1585
1586 #ifdef NO_STDERR
1587 PrintError("Error, cannot find %s in phone mappings\n", (unsigned long)output_phone_string[num_out-1], NULL, NULL);
1588 #else
1589 PLogError(L("Error, cannot find %s in phone mappings\n"), output_phone_string[num_out-1]);
1590 #endif
1591 nRes = SWIsltsInternalErr;
1592 goto CLEAN_UP;
1593 }
1594 }
1595
1596 /* pfprintf(PSTDOUT,"calling fill up dp\n");*/
1597 if (fill_up_dp_for_letter(lts, word, word_len, i, root_start, root_end, left_phone)) continue;
1598
1599 /* pfprintf(PSTDOUT,"calling find output\n");*/
1600 output_index = find_output_for_dp(lts, &backoff_output);
1601
1602 #if PRINT_LTS_WORD
1603 pfprintf(PSTDOUT,"got output %d\n", output_index);
1604 #endif
1605
1606 found_match = 1;
1607
1608 if (strlen(lts->input_for_output[output_index]) > 0) {
1609 /*some extra input string to use up*/
1610 #if PRINT_LTS_WORD
1611 pfprintf(PSTDOUT,"GOT INPUT %s for %s letter %c\n", lts->input_for_output[output_index], word, word[i]);
1612 #endif
1613
1614 input_seq = lts->input_for_output[output_index];
1615 if (input_seq[0] == '=') {
1616 root_end = i;
1617 input_seq = input_seq+1; /*skip suffix indicator*/
1618 }
1619 for (j=i+1;;j++) {
1620 if (input_seq[j-(i+1)] == '\0') break;
1621 if (input_seq[j-(i+1)] == '-') {
1622 root_start = j;
1623 break;
1624 }
1625 if (j >= word_len) {
1626 found_match = 0;
1627 break;
1628 }
1629
1630 if (input_seq[j-(i+1)] != word[j]) {
1631 found_match = 0;
1632 break;
1633 }
1634 }
1635 if (found_match) {
1636 i = j-1;
1637 }
1638 }
1639
1640 if (!found_match) {
1641 #if PRINT_LTS_WORD
1642 pfprintf(PSTDOUT,"using backoff output %s instead of regular %s\n",
1643 lts->outputs[backoff_output],
1644 ts->outputs[output_index]);
1645 #endif
1646
1647 num_out = add_output(lts->outputs[backoff_output], output_phone_string, num_out, max_phone_length);
1648 }
1649 else {
1650 num_out = add_output(lts->outputs[output_index], output_phone_string, num_out, max_phone_length);
1651 }
1652 if (num_out >= max_phone_length) {
1653 nRes = SWIsltsMaxInputExceeded;
1654 goto CLEAN_UP;
1655 }
1656 }
1657
1658 *pnum_out = num_out;
1659 return SWIsltsSuccess;
1660
1661 CLEAN_UP:
1662
1663 *pnum_out = 0;
1664 return nRes;
1665 }
1666
1667
1668
run_lts(LTS_HANDLE h,FSM_DICT_HANDLE hdict,char * input_sentence,char ** output_phone_string,int * phone_length)1669 SWIsltsResult run_lts(LTS_HANDLE h, FSM_DICT_HANDLE hdict, char *input_sentence, char **output_phone_string, int *phone_length)
1670 {
1671 SWIsltsResult nRes = SWIsltsSuccess;
1672 int i;
1673 int len;
1674 int num_out = 0;
1675 LTS * lts;
1676 int was_in_phrase;
1677 char word[MAX_WORD_LEN];
1678 int num_in_word;
1679 int max_phone_length;
1680 int pron_len;
1681
1682 max_phone_length = *phone_length;
1683
1684 len = strlen(input_sentence);
1685
1686 lts = (LTS*) h;
1687
1688 was_in_phrase = 0;
1689
1690 /*add a phrase start then word start at beginning*/
1691
1692 strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
1693 if (num_out >= max_phone_length) {
1694 nRes = SWIsltsMaxInputExceeded;
1695 goto CLEAN_UP;
1696 }
1697
1698 num_in_word = 0;
1699 pron_len = 1; // for the first time through
1700
1701 for (i=0;i<=len;i++) {
1702
1703 #if PRINT_LTS_WORD
1704 pfprintf(PSTDOUT,"WORKING on letter %d %c\n", i, input_sentence[i]);
1705 #endif
1706
1707 /* Treat hyphen as word delimiter. Not quite right for German
1708 hyphenated compounds, but still an improvement. */
1709 if ((input_sentence[i] == ' ') || (input_sentence[i] == '-') || (input_sentence[i] == '\t') || (i == len)) {
1710 if (num_in_word>0 ) {
1711 strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
1712 if (num_out >= max_phone_length) {
1713 nRes = SWIsltsMaxInputExceeded;
1714 goto CLEAN_UP;
1715 }
1716
1717 word[num_in_word] = '\0';
1718
1719 if (1) {
1720
1721 #if PRINT_DICT_LOOKUP
1722 pfprintf(PSTDOUT,"Did not find %s in dictionary\n", word);
1723 #endif
1724 pron_len = -num_out;
1725 nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
1726 pron_len += num_out; // now pron_len is the number of phonemes/markers added
1727 if(pron_len == 0)
1728 num_out--; // to backspace on the LTS_MARKER_WORD_START !!
1729 if (nRes != SWIsltsSuccess) {
1730 goto CLEAN_UP;
1731 }
1732 }
1733 num_in_word = 0;
1734 }
1735 }
1736 else if ( (input_sentence[i] == '.')
1737 || (input_sentence[i] == ',')
1738 || (input_sentence[i] == '!')
1739 || (input_sentence[i] == '?')
1740 || (input_sentence[i] == '\n')) {
1741 if (was_in_phrase) {
1742 /*add a phrase boundary after lts is called*/
1743 if (num_in_word > 0) {
1744 strcpy(output_phone_string[num_out++], LTS_MARKER_WORD_START);
1745 if (num_out >= max_phone_length) {
1746 nRes = SWIsltsMaxInputExceeded;
1747 goto CLEAN_UP;
1748 }
1749
1750 word[num_in_word] = '\0';
1751
1752 if (1) {
1753 nRes = lts_for_word(lts, word, num_in_word, output_phone_string, max_phone_length, &num_out);
1754 if (nRes != SWIsltsSuccess) {
1755 goto CLEAN_UP;
1756 }
1757 }
1758 num_in_word = 0;
1759 }
1760 strcpy(output_phone_string[num_out++], LTS_MARKER_PRON_START);
1761 if (num_out >= max_phone_length) {
1762 nRes = SWIsltsMaxInputExceeded;
1763 goto CLEAN_UP;
1764 }
1765 was_in_phrase = 0;
1766 }
1767 }
1768 else {
1769 if (num_in_word < MAX_WORD_LEN-1) {
1770 word[num_in_word++] = toupper(input_sentence[i]);
1771 was_in_phrase = 1;
1772 }
1773 }
1774 }
1775 /*adjust syllable boundaries*/
1776 adjust_syllable_boundaries(lts, output_phone_string, num_out, max_phone_length);
1777
1778 *phone_length = num_out;
1779 return SWIsltsSuccess;
1780
1781 CLEAN_UP:
1782
1783 *phone_length = 0;
1784 return nRes;
1785 }
1786
1787 #ifdef USE_STATIC_SLTS
lts_alloc(int num,int size)1788 void *lts_alloc(int num, int size)
1789 {
1790 #ifdef NO_STDERR
1791 PrintError("USE_STATIC_SLTS: lts_alloc should not be called", NULL, NULL, NULL);
1792 #else
1793 PLogError(L("USE_STATIC_SLTS: lts_alloc should not be called"));
1794 #endif
1795 return NULL;
1796 }
1797 #else
1798
lts_alloc(int num,int size)1799 void *lts_alloc(int num, int size)
1800 {
1801 void *p;
1802 p = CALLOC(num, size, MTAG);
1803 return p;
1804 }
1805 #endif /* USE_STATIC_SLTS */
1806