• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  priority_q.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23 #include "passert.h"
24 
25 #include "portable.h"
26 
27 #include "hmm_desc.h"
28 #include "utteranc.h"
29 #include "hmmlib.h"
30 
31 #include "srec_sizes.h"
32 #include "search_network.h"
33 #include "srec.h"
34 #include "word_lattice.h"
35 
36 #define PRINT_SEARCH_DETAILS 0
37 
38 /*this is just implemented as a list so far - FIX this!!*/
39 
40 /*allocates priority_q to han le max_n entries*/
allocate_priority_q(int max_n)41 priority_q* allocate_priority_q(int max_n)
42 {
43   priority_q *pq;
44 
45   pq = (priority_q*) CALLOC(1, sizeof(priority_q), "search.srec.priority_q");
46   pq->max_cost_in_q = MAXcostdata;
47   pq->word_token_list = MAXwordID;
48   pq->max_in_q = (miscdata)max_n;
49   pq->num_in_q = 0;
50   return pq;
51 }
52 
free_priority_q(priority_q * pq)53 void free_priority_q(priority_q* pq)
54 {
55   FREE(pq);
56 }
57 
58 /*empties out priority_q*/
59 
clear_priority_q(priority_q * pq)60 void clear_priority_q(priority_q *pq)
61 {
62   pq->max_cost_in_q = MAXcostdata;
63   pq->word_token_list = MAXwordID;
64   pq->num_in_q = 0;
65   /* Jean: what about the list of free tokens? */
66 }
67 /* returns the head of a linked list of all words in the priority_q.
68    Return MAXwtokenID if list is empty */
69 
get_word_token_list(priority_q * pq,word_token * word_token_array)70 wtokenID get_word_token_list(priority_q *pq, word_token *word_token_array)
71 {
72   return pq->word_token_list;
73 }
74 
remove_non_end_word_from_q(srec * rec,priority_q * pq,word_token * word_token_array,nodeID end_node)75 void remove_non_end_word_from_q(srec *rec, priority_q *pq, word_token *word_token_array, nodeID end_node)
76 {
77   word_token *token;
78   wtokenID *ptoken_index;
79   wtokenID old_token_index;
80 
81   pq->max_cost_in_q = MAXcostdata;
82   pq->num_in_q = 0;
83   ptoken_index = &(pq->word_token_list);
84 
85   while (*ptoken_index != MAXwtokenID)
86   {
87     token = &(word_token_array[*ptoken_index]);
88     if (token->end_node != end_node)
89     {
90       old_token_index = *ptoken_index;
91       *ptoken_index = token->next_token_index;
92       free_word_token(rec, old_token_index);
93       pq->max_cost_in_q = MAXcostdata; /* fix: sep9 */
94     }
95     else
96     {
97       pq->num_in_q++;
98       if ((pq->max_cost_in_q == MAXcostdata) || (token->cost > pq->max_cost_in_q))
99       {
100         pq->max_cost_in_q = token->cost;
101       }
102       ptoken_index = &(token->next_token_index);
103     }
104   }
105 }
106 
compare_histories(word_token * token1,word_token * token2,word_token * word_token_array)107 int compare_histories(word_token* token1, word_token* token2,
108                       word_token* word_token_array)
109 {
110   int history_for_token1 = 0;
111   int history_for_token2 = 0;
112 
113   /* compare_histories() was an attempt to be smart about the priority_q,
114      in that we don't need to store two word_tokens when the two tokens
115      are the same word (obviously ending at the same frame), and with the
116      same word history.  This happens for a digit that has multiple end nodes
117      due to context-dependency.  When "history_for_token" ignores the end_node,
118      then we're all clear to save just 1 word_token, but continue propagating
119      all paths from the end nodes.  That bit of "continue propagating" is not
120      done. THE OTHER PROBLEM is that the two nodes may NOT be
121      simply different CD end models, they may be different from digit shifting!
122      We're screwed if we drop the path, unless we compare all the way back to
123      the start of utterance. */
124 
125   if (token1->word != token2->word)
126     return 1;
127   if (token1->end_node != token2->end_node)
128     return 1;
129 
130   if (token1->backtrace != MAXwordID)
131   {
132     history_for_token1 += token1->end_node * 1000000;
133     history_for_token1 += word_token_array[token1->backtrace].word * 10000;
134     history_for_token1 += word_token_array[token1->backtrace].end_time;
135   }
136 
137   if (token2->backtrace != MAXwordID)
138   {
139     history_for_token2 += token2->end_node * 1000000;
140     history_for_token2 += word_token_array[token2->backtrace].word * 10000;
141     history_for_token2 += word_token_array[token2->backtrace].end_time;
142   }
143 
144 #if PRINT_SEARCH_DETAILS
145   printf("comparing history_for_token1 %d history_for_token2 %d\n",
146          history_for_token1, history_for_token2);
147 #endif
148 
149   if (history_for_token1 == history_for_token2)
150   {
151     return 0;
152   }
153   else
154   {
155     return 1;
156   }
157 }
158 
159 #if PRINT_SEARCH_DETAILS
sanity_check_priority_q(priority_q * pq,word_token * word_token_array)160 void sanity_check_priority_q(priority_q* pq, word_token *word_token_array)
161 {
162   int n = 0;
163   wtokenID token_index;
164   word_token* token;
165   n = 0;
166   token_index = pq->word_token_list;
167   while (token_index != MAXwordID)
168   {
169     token = &(word_token_array[token_index]);
170     token_index = token->next_token_index;
171     n++;
172   }
173   ASSERT(n == pq->num_in_q);
174   if (pq->num_in_q == pq->max_in_q)
175   {
176     token = &(word_token_array[pq->word_token_list]);
177     ASSERT(pq->max_cost_in_q == token->cost);
178   }
179 }
180 #endif
181 
182 /*adds a word token to the priority_q.  Returns the index of the word to
183   remove.
184   if nothing needs to be removed, returns MAXwtokenID.
185   if no room on priority_q, returns the one being put on */
186 
add_word_token_to_priority_q(priority_q * pq,wtokenID token_index_to_add,word_token * word_token_array)187 wtokenID add_word_token_to_priority_q(priority_q *pq, wtokenID token_index_to_add, word_token *word_token_array)
188 {
189   word_token *token;
190   word_token *token_to_add;
191   wtokenID token_index, return_token_index;
192   wordID word_to_add;
193   costdata cost_to_add;
194   wtokenID *ptoken_index;
195   wtokenID *pplace_to_add;
196   wtokenID *pdelete_index;
197   word_token *token_to_delete;
198 
199   token_to_add = &(word_token_array[token_index_to_add]);
200   cost_to_add = token_to_add->cost;
201 
202 #if PRINT_SEARCH_DETAILS
203   printf("WORDADD PQ tokenid %d cost %d\n", token_index_to_add, cost_to_add);
204   token_index = pq->word_token_list;
205   while (token_index != MAXwordID)
206   {
207     token = &(word_token_array[token_index]);
208     printf("WORDADD PQ token %d word %d cost %d\n", token_index, token->word, token->cost);
209     token_index = token->next_token_index;
210   }
211 #endif
212 
213   if (cost_to_add >= pq->max_cost_in_q && pq->num_in_q >= pq->max_in_q)
214   {
215 #if PRINT_SEARCH_DETAILS
216     printf("WORDADD PQ - rejecting because cost too high cost_to_add(%d) max_cost_in_q(%d) num_in_q(%d)\n",
217            cost_to_add, pq->max_cost_in_q, pq->num_in_q);
218 #endif
219 #if PRINT_SEARCH_DETAILS
220     printf("WORDADD PQ (D) returning %d\n", token_index_to_add);
221     sanity_check_priority_q(pq, word_token_array);
222 #endif
223     return token_index_to_add;
224   }
225 
226   word_to_add = token_to_add->word;
227   /* search for duplicate words first */
228   ptoken_index = &(pq->word_token_list);
229   pplace_to_add = NULL;
230   pdelete_index = NULL;
231   while ((*ptoken_index) != MAXwordID)
232   {
233     token = &word_token_array[(*ptoken_index)];
234 
235     if (token->word == token_to_add->word
236         && !compare_histories(token, token_to_add, word_token_array))
237     {
238       if (token->cost < cost_to_add)
239       {
240         /* don't bother adding, there's another like it on the list!
241            with a better score! */
242 #if PRINT_SEARCH_DETAILS
243         printf("WORDADD PQ - rejecting because another like it is on the list\n");
244 #endif
245         /* TODO: when returning back on the basis that something else is better,
246            we should let the caller know what to use instead, ie, make the
247            distinction between no-space and something-else-better */
248         token = &word_token_array[ token_index_to_add];
249         token->next_token_index = (*ptoken_index);
250         return token_index_to_add;
251       }
252       else
253       {
254         /* ok, replace the one on the list with this better scoring one! */
255         pdelete_index = ptoken_index;
256       }
257     }
258     if (token->cost < cost_to_add && pplace_to_add == NULL)
259     {
260       pplace_to_add = ptoken_index;
261       /* do not break, 'cuz we're still searching for a possible duplicates */
262     }
263     ptoken_index = &(token->next_token_index);
264   }
265   if (!pplace_to_add)
266     pplace_to_add = ptoken_index;
267 
268   /* add the token by inserting in the linked list */
269   token_index = *pplace_to_add;
270   *pplace_to_add = token_index_to_add;
271   token_to_add->next_token_index = token_index;
272   pq->num_in_q++;
273   if (pplace_to_add == &pq->word_token_list && pq->num_in_q >= pq->max_in_q)
274     pq->max_cost_in_q = cost_to_add;
275 
276   /* now delete any duplicate that was found */
277   if (pdelete_index)
278   {
279     token_index = *pdelete_index;
280     token_to_delete = &word_token_array[  token_index];
281     *pdelete_index = token_to_delete->next_token_index;
282     pq->num_in_q--;
283 #if PRINT_SEARCH_DETAILS
284     printf("WORDADD PQ (B) returning %d\n", token_index);
285 #endif
286     return_token_index = token_index;
287   }
288 
289   /* now check for max length in the queue */
290   if (pq->num_in_q > pq->max_in_q)
291   { /* really expecting just 1 over */
292     token_index = pq->word_token_list;
293     token = &(word_token_array[ token_index]);
294     pq->num_in_q--;
295     pq->word_token_list = token->next_token_index;
296 #if PRINT_SEARCH_DETAILS
297     printf("WORDADD PQ (C) returning %d\n", token_index);
298 #endif
299     return_token_index = token_index;
300   }
301   else
302   {
303     return_token_index = MAXwtokenID;
304   }
305   if (pq->num_in_q >= pq->max_in_q)
306   {
307     token_index = pq->word_token_list;
308     token = &(word_token_array[token_index]);
309     pq->max_cost_in_q = token->cost;
310   }
311   else
312   { /* pq->num_in_q < pq->max_in_q, fixed sep9 */
313     pq->max_cost_in_q = MAXcostdata;
314   }
315 #if PRINT_SEARCH_DETAILS
316   printf("WORDADD PQ (A) returning %d\n", token_index);
317   sanity_check_priority_q(pq, word_token_array);
318 #endif
319   return return_token_index;
320 }
321 
322 
323 /*returns the cost threshold for the end of the priority queue.
324   If words have greater cost than this, no need to try to put them on the
325   queue*/
326 
get_priority_q_threshold(priority_q * pq,word_token * word_token_array)327 costdata get_priority_q_threshold(priority_q *pq, word_token *word_token_array)
328 {
329   return pq->max_cost_in_q;
330 }
331 
332 
333 
334 
335 
336