• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  srec_arb.c                                                               *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include "pstdio.h"
21 #include "passert.h"
22 #include "portable.h"
23 
24 #include<string.h>
25 
26 #include"portable.h"
27 
28 #include"sizes.h"
29 #include"hmm_desc.h"
30 #include"search_network.h"     /* for EPSILON_OFFSET */
31 #include"srec_arb.h"
32 
33 #define DEBUG_PRONS       0
34 #define IF_DEBUG_PRONS(X)
35 
36 static const char *rcsid = 0 ? (const char *) &rcsid :
37 "$Id: srec_arb.c,v 1.27.4.15 2007/12/14 22:03:51 dahan Exp $";
38 
question_check(srec_question * quest,phonemeID lphon,phonemeID cphon,phonemeID rphon)39 int question_check(srec_question* quest, phonemeID lphon, phonemeID cphon, phonemeID rphon)
40 {
41   asr_int16_t a = 0, b = 0;
42   /* phon = a*16+b */
43   if (quest->qtype == QUESTION_LEFT)
44   {
45     BIT_ADDRESS(lphon, a, b);
46   }
47 #if USE_WWTRIPHONE
48   else if(quest->qtype == QUESTION_WBLEFT) {
49     if( lphon == WBPHONEME_CODE) return ANSWER_PASS;
50     else return ANSWER_FAIL;
51   }
52   else if(quest->qtype == QUESTION_WBRIGHT) {
53     if( rphon == WBPHONEME_CODE) return ANSWER_PASS;
54     else return ANSWER_FAIL;
55   }
56 #endif
57   else
58   {
59     ASSERT(quest->qtype == QUESTION_RIGHT);
60     BIT_ADDRESS(rphon, a, b);
61   }
62   return (quest->membership_bits[a] & b ? ANSWER_PASS : ANSWER_FAIL);
63 }
64 
65 /* get model id for phoneme in context */
get_modelid_for_pic(srec_arbdata * allotree,phonemeID lphon,phonemeID cphon,phonemeID rphon)66 int get_modelid_for_pic(srec_arbdata* allotree, phonemeID lphon, phonemeID cphon, phonemeID rphon)
67 {
68   int ans;
69   tree_node* tnode = allotree->pdata[cphon].model_nodes;
70   while (tnode->node.quest_index >= 0)
71   {
72     ans = question_check(&allotree->questions[tnode->node.quest_index],
73                          lphon, cphon, rphon);
74     tnode = (ans == ANSWER_FAIL ? (tree_node*)tnode->node.fail : (tree_node*)tnode->node.pass);
75   }
76   return tnode->term.pelid;
77 }
78 
read_questions(srec_question ** pquestions,asr_int16_t num_questions,char ** buffer,PFile * fp)79 void read_questions(srec_question** pquestions, asr_int16_t num_questions, char **buffer, PFile *fp)
80 {
81   srec_question *q;
82 
83   q = *pquestions = (srec_question*)(*buffer);
84 
85   *buffer += num_questions * sizeof(srec_question);
86   while (num_questions-- > 0)
87   {
88     pfread(&(q->qtype), sizeof(asr_uint16_t), 1, fp);
89     pfread(&(q->membership_bits), sizeof(asr_uint16_t), PSET_BIT_ARRAY_SIZE, fp);
90     q++;
91   }
92 }
93 
94 /* we need to handle the interword silence here somehow,
95    proposal:  we create one supermodel which combines the
96    the model preceding silence and that follows silence, so
97    "boston&mass" .. we'll have "n&m" as a single "supermodel",
98    we'll put that supermodel in the graph but then overlay the
99    actual models there on.   the overlay only needs to be done
100    once.  The number of possible supermodels is 113655 which is
101    larger than what an ilabel can hold, the solution to that is
102    to encode also on the "cost" of the supermodel arc.
103 
104    /------SUPER(a&b)---\
105    o----a1---o----b1----o
106    \--a2--o--#--o--b2--/
107 
108    cost is 16bits, ilabel is 16bits
109    on ilabel we encode the a1,(a2-a1)
110    on cost we encode b1,(b2-b1)
111    ... a1,b1 use 9 bits (400 models)
112    ... deltas use 6 bits (+/-31 range)
113    That leaves 1 bit left over, which is the top bit to signal this encoding,
114    and make sure the cost is very high.
115 */
116 
117 
get_modelids_for_pron(srec_arbdata * allotree,const char * phonemes,int num_phonemes,modelID * acoustic_model_ids)118 int get_modelids_for_pron(srec_arbdata* allotree,
119                           const char* phonemes, int num_phonemes,
120                           modelID* acoustic_model_ids)
121 {
122   int i;
123   modelID modelid;
124   phonemeID lphon, cphon, rphon;
125 
126   if( allotree == NULL)
127 	  return 1;
128 
129   if (num_phonemes == 0)
130     return 0;
131 
132   IF_DEBUG_PRONS(printf("%s get_modelids_for_pron pronunciation %s\n", __FILE__, (char*)phonemes));
133 
134 #if !USE_WWTRIPHONE
135   lphon = (phonemeID)allotree->phoneme_index[ SILENCE_CODE];
136   cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
137 #else
138   lphon = WBPHONEME_CODE; //(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE];
139   cphon = (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[0]];
140 #endif
141   if(cphon == MAXphonemeID)
142     return 1; /* bad phoneme */
143   for(i=0; i<num_phonemes; i++) {
144 #if !USE_WWTRIPHONE
145     rphon = (i==num_phonemes-1 ?
146 	     (phonemeID)allotree->phoneme_index[ SILENCE_CODE] :
147 	     (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
148 #else
149     rphon = (i==num_phonemes-1 ?
150 	     WBPHONEME_CODE /*(phonemeID)allotree->phoneme_index[ WBPHONEME_CODE] */ :
151 	     (phonemeID)allotree->phoneme_index[ (unsigned)phonemes[i+1] ] ) ;
152 #endif
153     if (rphon == MAXphonemeID)
154       return 1; /* bad phoneme */
155 
156     modelid = (modelID) get_modelid_for_pic(allotree, lphon, cphon, rphon);
157     acoustic_model_ids[i] = modelid;
158 #if DEBUG_PRONS
159     printf("%c%c%c hmm%d states", allotree->pdata[lphon].code,
160            allotree->pdata[cphon].code, allotree->pdata[rphon].code,
161            acoustic_model_ids[i]);
162     for (j = 0; j < allotree->hmm_infos[modelid].num_states; j++)
163       printf(" %d", allotree->hmm_infos[modelid].state_indices[j]);
164     printf("\n");
165 #endif
166     lphon = cphon;
167     cphon = rphon;
168   }
169   return 0;
170 }
171 
172 /*-----------------------------------------------------------------------*
173  *                                                                       *
174  * phoneme data stream functions                                         *
175  *                                                                       *
176  *-----------------------------------------------------------------------*/
177 
read_tree_node_f(char ** buffer,PFile * fp)178 tree_node* read_tree_node_f(char **buffer, PFile *fp)
179 {
180   tree_node* tnode = (tree_node*) * buffer;
181   pfread(&(tnode->node.quest_index), sizeof(asr_int16_t), 1, fp);
182   pfread(&(tnode->term.pelid), sizeof(asr_int16_t), 1, fp);
183   pfread(&(tnode->node.fail), sizeof(tree_branch_info*), 1, fp);
184   pfread(&(tnode->node.pass), sizeof(tree_branch_info*), 1, fp);
185 
186   /* because tree_node is a union, the actual size maybe large than we have read */
187   ASSERT(sizeof(asr_int16_t)*2 + sizeof(tree_branch_info *)*2 == sizeof(tree_node));
188 
189   *buffer += sizeof(tree_node);
190   if (tnode->node.quest_index >= 0)
191   {
192     tnode->node.fail = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
193     tnode->node.pass = (struct tree_branch_info*)read_tree_node_f(buffer, fp);
194   }
195   return tnode;
196 }
197 
read_phoneme_data(phoneme_data ** pdata,asr_int16_t num_phonemes,char ** buffer,PFile * fp)198 void read_phoneme_data(phoneme_data** pdata, asr_int16_t num_phonemes, char **buffer,  PFile *fp)
199 {
200   int i, ptr;
201   phoneme_data *pd;
202 
203   pd = *pdata = (phoneme_data*)(*buffer);
204 
205   for (i = 0; i < num_phonemes; i++)
206   {
207     pfread(&(pd->name), sizeof(char), MAX_PHONEME_NAME_LEN, fp);
208     pfread(&(pd->code), sizeof(asr_uint16_t), 1, fp);
209     pfread(&ptr, sizeof(asr_int16_t), 1, fp);
210     pfread(&(pd->model_nodes), sizeof(tree_node *), 1, fp);
211     pfread(&(pd->num_states), sizeof(asr_uint16_t), 1, fp);
212     pfread(&ptr, sizeof(asr_int16_t), 1, fp);
213     pfread(&(pd->state_nodes), sizeof(tree_node *), MAX_PHONE_STATES, fp);
214     pd++;
215   }
216   ASSERT(sizeof(phoneme_data) == MAX_PHONEME_NAME_LEN + sizeof(asr_int16_t)*4 + sizeof(tree_node *)*(1 + MAX_PHONE_STATES));
217   (*buffer) += num_phonemes * sizeof(phoneme_data) / BYTES_PER_ATOM;
218   ASSERT((char *)pd == *buffer);
219 
220   for (i = 0; i < num_phonemes; i++)
221   {
222 #if STATE_NODES_NEEDED_AT_RUNTIME
223     for (j = 0; j < (*pdata)[i].num_states; j++)
224       (*pdata)[i].state_nodes[j] = read_tree_node_f(buffer);
225 #endif
226     (*pdata)[i].model_nodes = read_tree_node_f(buffer, fp);
227   }
228 }
229 
230 /*-----------------------------------------------------------------------*
231  *                                                                       *
232  * hmm info stream functions                                             *
233  *                                                                       *
234  *-----------------------------------------------------------------------*/
235 
read_hmminfos(srec_arbdata * allotree,char ** buffer,PFile * fp)236 void read_hmminfos(srec_arbdata* allotree, char** buffer, PFile *fp)
237 {
238   int i, offset, num_atoms, num_hmms = allotree->num_hmms, ptr;
239   HMMInfo* hmm_infos;
240   hmm_infos = (HMMInfo*) * buffer;
241   num_atoms = sizeof(HMMInfo) * num_hmms / BYTES_PER_ATOM;
242   (*buffer) += num_atoms;
243   for (i = 0; i < num_hmms; i++)
244   {
245     pfread(&hmm_infos[i].name[0], sizeof(char), MAX_PHONEME_NAME_LEN, fp);
246     pfread(&(hmm_infos[i].num_states), sizeof(asr_int16_t), 1, fp);
247     pfread(&ptr, sizeof(asr_int16_t), 1, fp);
248     pfread(&(hmm_infos[i].state_indices), sizeof(asr_int16_t*), 1, fp);
249   }
250 
251   /* through this and comments below, I was trying to keep the state_indices
252      self-contained, to calculate offsets from saved pointers, but it doesn't
253      appear to work;  so we resort to recovering state offsets from num_states
254      state_indices = hmm_infos[0].state_indices; */
255   pfread(*buffer, sizeof(asr_int16_t), allotree->num_states, fp);
256 
257   hmm_infos[0].state_indices = (asr_int16_t*) * buffer;
258   num_atoms = sizeof(hmm_infos[0].state_indices[0]) * allotree->num_states / BYTES_PER_ATOM;
259   (*buffer) += num_atoms;
260 
261   for (i = 0, offset = 0; i < num_hmms; i++)
262   {
263     /* int j,offset2 = hmm_infos[i].state_indices - state_indices; */
264     hmm_infos[i].state_indices = hmm_infos[0].state_indices + offset;
265     if (i >= HMM_COUNTER_OFFSET + NUM_SILENCE_HMMS - 1)
266       offset += hmm_infos[i].num_states;
267     /* printf("offset %d %d offset2 %d\n", i, offset, offset2);
268        printf("hmm %d %x states", i, hmm_infos[i].state_indices);
269        for(j=0; j<hmm_infos[i].num_states; j++)
270        printf(" %d", hmm_infos[i].state_indices[j]);
271        printf("\n"); */
272 
273   }
274   allotree->hmm_infos = hmm_infos;
275 }
276 
277 /*-----------------------------------------------------------------------*
278  *                                                                       *
279  * top level stream functions                                            *
280  *                                                                       *
281  *-----------------------------------------------------------------------*/
282 
read_arbdata_from_stream(srec_arbdata ** pallotree,char * filename,int buffer_size)283 int read_arbdata_from_stream(srec_arbdata** pallotree, char* filename, int buffer_size)
284 {
285   char* pbuf;
286   srec_arbdata* allotree;
287   int ptr;
288 
289   PFile* fp;
290   long fpos;
291   char* buffer;
292 
293   fp = file_must_open(NULL, (char*)filename, L("rb"), ESR_TRUE);
294   if(!fp) {
295     *pallotree = NULL;
296     return 0;
297   }
298   pfseek(fp, 0, SEEK_END);
299   fpos = pftell(fp);
300   buffer = (char*)CALLOC_CLR(fpos, sizeof(char), "srec.arbdata");
301   pfseek(fp, 0, SEEK_SET);
302 
303   buffer_size = fpos;
304   pbuf = buffer;
305 
306   allotree = (srec_arbdata*)buffer;
307   /* ASSERT(allotree->image_size == buffer_size); hack for now */
308 
309   /* read structure arbdata from file */
310   pfread(&allotree->image, sizeof(char *), 1, fp);             /* image */
311   pfread(&allotree->image_size, sizeof(asr_uint16_t), 1, fp);       /* image_szie */
312   pfread(&allotree->num_phonemes, sizeof(asr_int16_t), 1, fp);      /* num_phonemes */
313   pfread(&allotree->pdata, sizeof(phoneme_data *), 1, fp);     /* pdate */
314   pfread(&allotree->num_questions, sizeof(asr_int16_t), 1, fp);     /* num_questions */
315 
316   pfread(&ptr, sizeof(asr_int16_t), 1, fp);     /* alignment problem */
317 
318   pfread(&allotree->questions, sizeof(srec_question *), 1, fp);/* questions */
319   pfread(&allotree->num_states, sizeof(asr_int16_t), 1, fp);        /* num_states */
320   pfread(&allotree->num_hmms, sizeof(asr_int16_t), 1, fp);          /* num_hmms */
321   pfread(&allotree->hmm_infos, sizeof(HMMInfo *), 1, fp);      /* hmm_infos */
322   pfread(allotree->phoneme_index, sizeof(asr_uint16_t), NUM_PHONEME_INDICES, fp); /* phoneme_index */
323 
324   allotree->image = buffer;
325 
326   pbuf += sizeof(*allotree) / BYTES_PER_ATOM;
327   pbuf -= sizeof(void*); // PCPinfo
328 
329   ASSERT(pftell(fp) == pbuf - buffer);
330 
331 #ifdef SREC_ENGINE_VERBOSE_LOGGING
332   PLogMessage("read allotree done %d\n", (int)(pbuf - buffer));
333 #endif
334 
335   allotree->questions = (srec_question *)pbuf;
336   read_questions(&allotree->questions, allotree->num_questions, &pbuf, fp);
337 #ifdef SREC_ENGINE_VERBOSE_LOGGING
338   PLogMessage("read_questions done %d\n", (int)(pbuf - buffer));
339 #endif
340   ASSERT(pftell(fp) == pbuf - buffer);
341 
342   /* readme phoneme_data */
343   read_phoneme_data(&allotree->pdata, allotree->num_phonemes, &pbuf, fp);
344 #ifdef SREC_ENGINE_VERBOSE_LOGGING
345   PLogMessage("read_phoneme_data done %d\n", (int)(pbuf - buffer));
346 #endif
347   ASSERT(pftell(fp) == pbuf - buffer);
348 
349   read_hmminfos(allotree, &pbuf, fp);
350 #ifdef SREC_ENGINE_VERBOSE_LOGGING
351   PLogMessage("read_hmminfos done %d\n", (int)(pbuf - buffer));
352 #endif
353   ASSERT(pftell(fp) == pbuf - buffer);
354 
355   *pallotree = allotree;
356 #ifdef SREC_ENGINE_VERBOSE_LOGGING
357   PLogMessage("read arbdata image size %d\n", allotree->image_size);
358 #endif
359   ASSERT(pbuf - buffer == buffer_size);
360 
361   pfclose(fp);
362 
363   return 0;
364 }
365 
366 /**
367  * @todo document
368  */
369 typedef struct
370 {
371   unsigned short nnodes;
372   unsigned long  size;
373   long    phoneme;
374   unsigned short node_pos;
375   unsigned long  node_off;
376   short    low_genone_no;
377   short    high_genone_no;
378   short    low_pel_no;
379   short    high_pel_no;
380 }
381 tree_head;
382 
traverse_tree(tree_node * node,tree_head * tree_topo,int * num_terminal_nodes)383 static int traverse_tree(tree_node* node, tree_head *tree_topo, int *num_terminal_nodes)
384 {
385   if (!node) return 0; /* should not happen */
386   if (!tree_topo) return 0; /* should not happen */
387   if (tree_topo->nnodes > 255)
388     return 0; /* should not happen, might indicate infinite looping */
389 
390   tree_topo->nnodes++;
391 
392   if (node->node.quest_index < 0)
393   {
394     if (num_terminal_nodes)
395     {
396       (*num_terminal_nodes)++;
397     }
398     if (node->term.pelid < tree_topo->low_pel_no)
399     {
400       tree_topo->low_pel_no = node->term.pelid;
401       tree_topo->low_genone_no = node->term.pelid;
402     }
403     if (node->term.pelid > tree_topo->high_pel_no)
404     {
405       tree_topo->high_pel_no = node->term.pelid;
406       tree_topo->high_genone_no = node->term.pelid;
407     }
408   }
409   else
410   {
411     traverse_tree((tree_node*)node->node.fail, tree_topo, num_terminal_nodes);
412     traverse_tree((tree_node*)node->node.pass, tree_topo, num_terminal_nodes);
413   }
414   return 0;
415 
416 }
417 
418 #if 0
419 static int num_nodes_in_tree(tree_node* node, int *num_terminal_nodes)
420 {
421   tree_head topo;
422   *num_terminal_nodes = 0;
423   topo.nnodes = 0;
424   traverse_tree(node, &topo, num_terminal_nodes);
425   return topo.nnodes;
426 }
427 #endif
428 
version_arbdata_add(unsigned int ics,int data)429 static unsigned int version_arbdata_add(unsigned int ics, int data)
430 {
431   unsigned int ocs = ((ics << 3) | (ics >> 29)) + data;
432   /* if(debug)printf("ocs %d ics %d data %d\n", ocs, ics, data);*/
433   return ocs;
434 }
435 
436 
version_arbdata_models(srec_arbdata * a)437 unsigned int version_arbdata_models(srec_arbdata* a)
438 {
439   int i, num_hmms_in_phoneme;
440 
441   tree_head topo;
442   unsigned int checksum = 0;
443   /* if(debug)printf("num_hmms %d\n", a->num_hmms); */
444   /* if(debug)printf("num_phonemes %d\n", a->num_phonemes); */
445   for (i = 0; i < a->num_phonemes; i++)
446   {
447     num_hmms_in_phoneme = 0;
448     topo.low_pel_no = 32567;
449     topo.high_pel_no = 0;
450     topo.nnodes = 0;
451     traverse_tree(a->pdata[i].model_nodes, &topo, &num_hmms_in_phoneme);
452     /* if(debug)printf("phoneme %d num_hmms %d (%d-%d)\n", i, num_hmms_in_phoneme,
453     topo.low_pel_no, topo.high_pel_no); */
454     if (topo.nnodes == 256) return 0;
455     checksum = version_arbdata_add(checksum, topo.low_pel_no);
456   }
457   return checksum;
458 }
459 
460 
461 
462 
463