1 /*---------------------------------------------------------------------------*
2 * srec_arb.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include "pstdio.h"
21 #include "passert.h"
22 #include "portable.h"
23
24 #include<string.h>
25
26 #include"portable.h"
27
28 #include"sizes.h"
29 #include"hmm_desc.h"
30 #include"search_network.h" /* for EPSILON_OFFSET */
31 #include"srec_arb.h"
32
33 #define DEBUG_PRONS 0
34 #define IF_DEBUG_PRONS(X)
35
36 static const char *rcsid = 0 ? (const char *) &rcsid :
37 "$Id: srec_arb.c,v 1.27.4.15 2007/12/14 22:03:51 dahan Exp $";
38
question_check(srec_question * quest,phonemeID lphon,phonemeID cphon,phonemeID rphon)39 int question_check(srec_question* quest, phonemeID lphon, phonemeID cphon, phonemeID rphon)
40 {
41 asr_int16_t a = 0, b = 0;
42 /* phon = a*16+b */
43 if (quest->qtype == QUESTION_LEFT)
44 {
45 BIT_ADDRESS(lphon, a, b);
46 }
47 #if USE_WWTRIPHONE
48 else if(quest->qtype == QUESTION_WBLEFT) {
49 if( lphon == WBPHONEME_CODE) return ANSWER_PASS;
50 else return ANSWER_FAIL;
51 }
52 else if(quest->qtype == QUESTION_WBRIGHT) {
53 if( rphon == WBPHONEME_CODE) return ANSWER_PASS;
54 else return ANSWER_FAIL;
55 }
56 #endif
57 else
58 {
59 ASSERT(quest->qtype == QUESTION_RIGHT);
60 BIT_ADDRESS(rphon, a, b);
61 }
62 return (quest->membership_bits[a] & b ? ANSWER_PASS : ANSWER_FAIL);
63 }
64
65 /* get model id for phoneme in context */
get_modelid_for_pic(srec_arbdata * allotree,phonemeID lphon,phonemeID cphon,phonemeID rphon)66 int get_modelid_for_pic(srec_arbdata* allotree, phonemeID lphon, phonemeID cphon, phonemeID rphon)
67 {
68 int ans;
69 tree_node* tnode = allotree->pdata[cphon].model_nodes;
70 while (tnode->node.quest_index >= 0)
71 {
72 ans = question_check(&allotree->questions[tnode->node.quest_index],
73 lphon, cphon, rphon);
74 tnode = (ans == ANSWER_FAIL ? (tree_node*)tnode->node.fail : (tree_node*)tnode->node.pass);
75 }
76 return tnode->term.pelid;
77 }
78
read_questions(srec_question ** pquestions,asr_int16_t num_questions,char ** buffer,PFile * fp)79 void read_questions(srec_question** pquestions, asr_int16_t num_questions, char **buffer, PFile *fp)
80 {
81 srec_question *q;
82
83 q = *pquestions = (srec_question*)(*buffer);
84
85 *buffer += num_questions * sizeof(srec_question);
86 while (num_questions-- > 0)
87 {
88 pfread(&(q->qtype), sizeof(asr_uint16_t), 1, fp);
89 pfread(&(q->membership_bits), sizeof(asr_uint16_t), PSET_BIT_ARRAY_SIZE, fp);
90 q++;
91 }
92 }
93
94 /* we need to handle the interword silence here somehow,
95 proposal: we create one supermodel which combines the
96 the model preceding silence and that follows silence, so
97 "boston&mass" .. we'll have "n&m" as a single "supermodel",
98 we'll put that supermodel in the graph but then overlay the
99 actual models there on. the overlay only needs to be done
100 once. The number of possible supermodels is 113655 which is
101 larger than what an ilabel can hold, the solution to that is
102 to encode also on the "cost" of the supermodel arc.
103
104 /------SUPER(a&b)---\
105 o----a1---o----b1----o
106 \--a2--o--#--o--b2--/
107
108 cost is 16bits, ilabel is 16bits
109 on ilabel we encode the a1,(a2-a1)
110 on cost we encode b1,(b2-b1)
111 ... a1,b1 use 9 bits (400 models)
112 ... deltas use 6 bits (+/-31 range)
113 That leaves 1 bit left over, which is the top bit to signal this encoding,
114 and make sure the cost is very high.
115 */
116
117
get_modelids_for_pron(srec_arbdata * allotree,const char * phonemes,int num_phonemes,modelID * acoustic_model_ids)118 int get_modelids_for_pron(srec_arbdata* allotree,
119 const char* phonemes, int num_phonemes,
120 modelID* acoustic_model_ids)
121 {
122 int i;
123 modelID modelid;
124 phonemeID lphon, cphon, rphon;
125
126 if( allotree == NULL)
127 return 1;
128
129 if (num_phonemes == 0)
130 return 0;
131
132 IF_DEBUG_PRONS(printf("%s get_modelids_for_pron pronunciation %s\n", __FILE__, (char*)phonemes));
133
134 #if !USE_WWTRIPHONE
135 lphon = (phonemeID)allotree->phoneme_index[ SILENCE_CODE];
136 cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
137 #else
138 lphon = WBPHONEME_CODE; //(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE];
139 cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
140 #endif
141 if(cphon == MAXphonemeID)
142 return 1; /* bad phoneme */
143 for(i=0; i<num_phonemes; i++) {
144 #if !USE_WWTRIPHONE
145 rphon = (i==num_phonemes-1 ?
146 (phonemeID)allotree->phoneme_index[ SILENCE_CODE] :
147 (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
148 #else
149 rphon = (i==num_phonemes-1 ?
150 WBPHONEME_CODE /*(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE] */ :
151 (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
152 #endif
153 if (rphon == MAXphonemeID)
154 return 1; /* bad phoneme */
155
156 modelid = (modelID) get_modelid_for_pic(allotree, lphon, cphon, rphon);
157 acoustic_model_ids[i] = modelid;
158 #if DEBUG_PRONS
159 printf("%c%c%c hmm%d states", allotree->pdata[lphon].code,
160 allotree->pdata[cphon].code, allotree->pdata[rphon].code,
161 acoustic_model_ids[i]);
162 for (j = 0; j < allotree->hmm_infos[modelid].num_states; j++)
163 printf(" %d", allotree->hmm_infos[modelid].state_indices[j]);
164 printf("\n");
165 #endif
166 lphon = cphon;
167 cphon = rphon;
168 }
169 return 0;
170 }
171
172 /*-----------------------------------------------------------------------*
173 * *
174 * phoneme data stream functions *
175 * *
176 *-----------------------------------------------------------------------*/
177
read_tree_node_f(char ** buffer,PFile * fp)178 tree_node* read_tree_node_f(char **buffer, PFile *fp)
179 {
180 tree_node* tnode = (tree_node*) * buffer;
181 pfread(&(tnode->node.quest_index), sizeof(asr_int16_t), 1, fp);
182 pfread(&(tnode->term.pelid), sizeof(asr_int16_t), 1, fp);
183 pfread(&(tnode->node.fail), sizeof(tree_branch_info*), 1, fp);
184 pfread(&(tnode->node.pass), sizeof(tree_branch_info*), 1, fp);
185
186 /* because tree_node is a union, the actual size maybe large than we have read */
187 ASSERT(sizeof(asr_int16_t)*2 + sizeof(tree_branch_info *)*2 == sizeof(tree_node));
188
189 *buffer += sizeof(tree_node);
190 if (tnode->node.quest_index >= 0)
191 {
192 tnode->node.fail = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
193 tnode->node.pass = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
194 }
195 return tnode;
196 }
197
read_phoneme_data(phoneme_data ** pdata,asr_int16_t num_phonemes,char ** buffer,PFile * fp)198 void read_phoneme_data(phoneme_data** pdata, asr_int16_t num_phonemes, char **buffer, PFile *fp)
199 {
200 int i, ptr;
201 phoneme_data *pd;
202
203 pd = *pdata = (phoneme_data*)(*buffer);
204
205 for (i = 0; i < num_phonemes; i++)
206 {
207 pfread(&(pd->name), sizeof(char), MAX_PHONEME_NAME_LEN, fp);
208 pfread(&(pd->code), sizeof(asr_uint16_t), 1, fp);
209 pfread(&ptr, sizeof(asr_int16_t), 1, fp);
210 pfread(&(pd->model_nodes), sizeof(tree_node *), 1, fp);
211 pfread(&(pd->num_states), sizeof(asr_uint16_t), 1, fp);
212 pfread(&ptr, sizeof(asr_int16_t), 1, fp);
213 pfread(&(pd->state_nodes), sizeof(tree_node *), MAX_PHONE_STATES, fp);
214 pd++;
215 }
216 ASSERT(sizeof(phoneme_data) == MAX_PHONEME_NAME_LEN + sizeof(asr_int16_t)*4 + sizeof(tree_node *)*(1 + MAX_PHONE_STATES));
217 (*buffer) += num_phonemes * sizeof(phoneme_data) / BYTES_PER_ATOM;
218 ASSERT((char *)pd == *buffer);
219
220 for (i = 0; i < num_phonemes; i++)
221 {
222 #if STATE_NODES_NEEDED_AT_RUNTIME
223 for (j = 0; j < (*pdata)[i].num_states; j++)
224 (*pdata)[i].state_nodes[j] = read_tree_node_f(buffer);
225 #endif
226 (*pdata)[i].model_nodes = read_tree_node_f(buffer, fp);
227 }
228 }
229
230 /*-----------------------------------------------------------------------*
231 * *
232 * hmm info stream functions *
233 * *
234 *-----------------------------------------------------------------------*/
235
read_hmminfos(srec_arbdata * allotree,char ** buffer,PFile * fp)236 void read_hmminfos(srec_arbdata* allotree, char** buffer, PFile *fp)
237 {
238 int i, offset, num_atoms, num_hmms = allotree->num_hmms, ptr;
239 HMMInfo* hmm_infos;
240 hmm_infos = (HMMInfo*) * buffer;
241 num_atoms = sizeof(HMMInfo) * num_hmms / BYTES_PER_ATOM;
242 (*buffer) += num_atoms;
243 for (i = 0; i < num_hmms; i++)
244 {
245 pfread(&hmm_infos[i].name[0], sizeof(char), MAX_PHONEME_NAME_LEN, fp);
246 pfread(&(hmm_infos[i].num_states), sizeof(asr_int16_t), 1, fp);
247 pfread(&ptr, sizeof(asr_int16_t), 1, fp);
248 pfread(&(hmm_infos[i].state_indices), sizeof(asr_int16_t*), 1, fp);
249 }
250
251 /* through this and comments below, I was trying to keep the state_indices
252 self-contained, to calculate offsets from saved pointers, but it doesn't
253 appear to work; so we resort to recovering state offsets from num_states
254 state_indices = hmm_infos[0].state_indices; */
255 pfread(*buffer, sizeof(asr_int16_t), allotree->num_states, fp);
256
257 hmm_infos[0].state_indices = (asr_int16_t*) * buffer;
258 num_atoms = sizeof(hmm_infos[0].state_indices[0]) * allotree->num_states / BYTES_PER_ATOM;
259 (*buffer) += num_atoms;
260
261 for (i = 0, offset = 0; i < num_hmms; i++)
262 {
263 /* int j,offset2 = hmm_infos[i].state_indices - state_indices; */
264 hmm_infos[i].state_indices = hmm_infos[0].state_indices + offset;
265 if (i >= HMM_COUNTER_OFFSET + NUM_SILENCE_HMMS - 1)
266 offset += hmm_infos[i].num_states;
267 /* printf("offset %d %d offset2 %d\n", i, offset, offset2);
268 printf("hmm %d %x states", i, hmm_infos[i].state_indices);
269 for(j=0; j<hmm_infos[i].num_states; j++)
270 printf(" %d", hmm_infos[i].state_indices[j]);
271 printf("\n"); */
272
273 }
274 allotree->hmm_infos = hmm_infos;
275 }
276
277 /*-----------------------------------------------------------------------*
278 * *
279 * top level stream functions *
280 * *
281 *-----------------------------------------------------------------------*/
282
read_arbdata_from_stream(srec_arbdata ** pallotree,char * filename,int buffer_size)283 int read_arbdata_from_stream(srec_arbdata** pallotree, char* filename, int buffer_size)
284 {
285 char* pbuf;
286 srec_arbdata* allotree;
287 int ptr;
288
289 PFile* fp;
290 long fpos;
291 char* buffer;
292
293 fp = file_must_open(NULL, (char*)filename, L("rb"), ESR_TRUE);
294 if(!fp) {
295 *pallotree = NULL;
296 return 0;
297 }
298 pfseek(fp, 0, SEEK_END);
299 fpos = pftell(fp);
300 buffer = (char*)CALLOC_CLR(fpos, sizeof(char), "srec.arbdata");
301 pfseek(fp, 0, SEEK_SET);
302
303 /* buffer_size = fpos; */
304 pbuf = buffer;
305
306 allotree = (srec_arbdata*)buffer;
307 /* ASSERT(allotree->image_size == buffer_size); hack for now */
308
309 /* read structure arbdata from file */
310 pfread(&allotree->image, sizeof(char *), 1, fp); /* image */
311 pfread(&allotree->image_size, sizeof(asr_uint16_t), 1, fp); /* image_szie */
312 pfread(&allotree->num_phonemes, sizeof(asr_int16_t), 1, fp); /* num_phonemes */
313 pfread(&allotree->pdata, sizeof(phoneme_data *), 1, fp); /* pdate */
314 pfread(&allotree->num_questions, sizeof(asr_int16_t), 1, fp); /* num_questions */
315
316 pfread(&ptr, sizeof(asr_int16_t), 1, fp); /* alignment problem */
317
318 pfread(&allotree->questions, sizeof(srec_question *), 1, fp);/* questions */
319 pfread(&allotree->num_states, sizeof(asr_int16_t), 1, fp); /* num_states */
320 pfread(&allotree->num_hmms, sizeof(asr_int16_t), 1, fp); /* num_hmms */
321 pfread(&allotree->hmm_infos, sizeof(HMMInfo *), 1, fp); /* hmm_infos */
322 pfread(allotree->phoneme_index, sizeof(asr_uint16_t), NUM_PHONEME_INDICES, fp); /* phoneme_index */
323
324 allotree->image = buffer;
325
326 pbuf += sizeof(*allotree) / BYTES_PER_ATOM;
327 pbuf -= sizeof(void*); // PCPinfo
328
329 ASSERT(pftell(fp) == pbuf - buffer);
330
331 #ifdef SREC_ENGINE_VERBOSE_LOGGING
332 PLogMessage("read allotree done %d\n", (int)(pbuf - buffer));
333 #endif
334
335 allotree->questions = (srec_question *)pbuf;
336 read_questions(&allotree->questions, allotree->num_questions, &pbuf, fp);
337 #ifdef SREC_ENGINE_VERBOSE_LOGGING
338 PLogMessage("read_questions done %d\n", (int)(pbuf - buffer));
339 #endif
340 ASSERT(pftell(fp) == pbuf - buffer);
341
342 /* readme phoneme_data */
343 read_phoneme_data(&allotree->pdata, allotree->num_phonemes, &pbuf, fp);
344 #ifdef SREC_ENGINE_VERBOSE_LOGGING
345 PLogMessage("read_phoneme_data done %d\n", (int)(pbuf - buffer));
346 #endif
347 ASSERT(pftell(fp) == pbuf - buffer);
348
349 read_hmminfos(allotree, &pbuf, fp);
350 #ifdef SREC_ENGINE_VERBOSE_LOGGING
351 PLogMessage("read_hmminfos done %d\n", (int)(pbuf - buffer));
352 #endif
353 ASSERT(pftell(fp) == pbuf - buffer);
354
355 *pallotree = allotree;
356 #ifdef SREC_ENGINE_VERBOSE_LOGGING
357 PLogMessage("read arbdata image size %d\n", allotree->image_size);
358 #endif
359 ASSERT(pbuf - buffer == buffer_size);
360
361 pfclose(fp);
362
363 return 0;
364 }
365
366 /**
367 * @todo document
368 */
369 typedef struct
370 {
371 unsigned short nnodes;
372 unsigned long size;
373 long phoneme;
374 unsigned short node_pos;
375 unsigned long node_off;
376 short low_genone_no;
377 short high_genone_no;
378 short low_pel_no;
379 short high_pel_no;
380 }
381 tree_head;
382
traverse_tree(tree_node * node,tree_head * tree_topo,int * num_terminal_nodes)383 static int traverse_tree(tree_node* node, tree_head *tree_topo, int *num_terminal_nodes)
384 {
385 if (!node) return 0; /* should not happen */
386 if (!tree_topo) return 0; /* should not happen */
387 if (tree_topo->nnodes > 255)
388 return 0; /* should not happen, might indicate infinite looping */
389
390 tree_topo->nnodes++;
391
392 if (node->node.quest_index < 0)
393 {
394 if (num_terminal_nodes)
395 {
396 (*num_terminal_nodes)++;
397 }
398 if (node->term.pelid < tree_topo->low_pel_no)
399 {
400 tree_topo->low_pel_no = node->term.pelid;
401 tree_topo->low_genone_no = node->term.pelid;
402 }
403 if (node->term.pelid > tree_topo->high_pel_no)
404 {
405 tree_topo->high_pel_no = node->term.pelid;
406 tree_topo->high_genone_no = node->term.pelid;
407 }
408 }
409 else
410 {
411 traverse_tree((tree_node*)node->node.fail, tree_topo, num_terminal_nodes);
412 traverse_tree((tree_node*)node->node.pass, tree_topo, num_terminal_nodes);
413 }
414 return 0;
415
416 }
417
418 #if 0
419 static int num_nodes_in_tree(tree_node* node, int *num_terminal_nodes)
420 {
421 tree_head topo;
422 *num_terminal_nodes = 0;
423 topo.nnodes = 0;
424 traverse_tree(node, &topo, num_terminal_nodes);
425 return topo.nnodes;
426 }
427 #endif
428
version_arbdata_add(unsigned int ics,int data)429 static unsigned int version_arbdata_add(unsigned int ics, int data)
430 {
431 unsigned int ocs = ((ics << 3) | (ics >> 29)) + data;
432 /* if(debug)printf("ocs %d ics %d data %d\n", ocs, ics, data);*/
433 return ocs;
434 }
435
436
version_arbdata_models(srec_arbdata * a)437 unsigned int version_arbdata_models(srec_arbdata* a)
438 {
439 int i, num_hmms_in_phoneme;
440
441 tree_head topo;
442 unsigned int checksum = 0;
443 /* if(debug)printf("num_hmms %d\n", a->num_hmms); */
444 /* if(debug)printf("num_phonemes %d\n", a->num_phonemes); */
445 for (i = 0; i < a->num_phonemes; i++)
446 {
447 num_hmms_in_phoneme = 0;
448 topo.low_pel_no = 32567;
449 topo.high_pel_no = 0;
450 topo.nnodes = 0;
451 traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme);
452 /* if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme,
453 topo.low_pel_no, topo.high_pel_no); */
454 if (topo.nnodes == 256) return 0;
455 checksum = version_arbdata_add(checksum, topo.low_pel_no);
456 }
457 return checksum;
458 }
459
460
461
462
463