1 /*---------------------------------------------------------------------------*
2 * priority_q.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #include <stdlib.h>
21 #include <string.h>
22 #include <math.h>
23 #include "passert.h"
24
25 #include "portable.h"
26
27 #include "hmm_desc.h"
28 #include "utteranc.h"
29 #include "hmmlib.h"
30
31 #include "srec_sizes.h"
32 #include "search_network.h"
33 #include "srec.h"
34 #include "word_lattice.h"
35
36 #define PRINT_SEARCH_DETAILS 0
37
38 /*this is just implemented as a list so far - FIX this!!*/
39
40 /*allocates priority_q to han le max_n entries*/
allocate_priority_q(int max_n)41 priority_q* allocate_priority_q(int max_n)
42 {
43 priority_q *pq;
44
45 pq = (priority_q*) CALLOC(1, sizeof(priority_q), "search.srec.priority_q");
46 pq->max_cost_in_q = MAXcostdata;
47 pq->word_token_list = MAXwordID;
48 pq->max_in_q = (miscdata)max_n;
49 pq->num_in_q = 0;
50 return pq;
51 }
52
free_priority_q(priority_q * pq)53 void free_priority_q(priority_q* pq)
54 {
55 FREE(pq);
56 }
57
58 /*empties out priority_q*/
59
clear_priority_q(priority_q * pq)60 void clear_priority_q(priority_q *pq)
61 {
62 pq->max_cost_in_q = MAXcostdata;
63 pq->word_token_list = MAXwordID;
64 pq->num_in_q = 0;
65 /* Jean: what about the list of free tokens? */
66 }
67 /* returns the head of a linked list of all words in the priority_q.
68 Return MAXwtokenID if list is empty */
69
get_word_token_list(priority_q * pq,word_token * word_token_array)70 wtokenID get_word_token_list(priority_q *pq, word_token *word_token_array)
71 {
72 return pq->word_token_list;
73 }
74
remove_non_end_word_from_q(srec * rec,priority_q * pq,word_token * word_token_array,nodeID end_node)75 void remove_non_end_word_from_q(srec *rec, priority_q *pq, word_token *word_token_array, nodeID end_node)
76 {
77 word_token *token;
78 wtokenID *ptoken_index;
79 wtokenID old_token_index;
80
81 pq->max_cost_in_q = MAXcostdata;
82 pq->num_in_q = 0;
83 ptoken_index = &(pq->word_token_list);
84
85 while (*ptoken_index != MAXwtokenID)
86 {
87 token = &(word_token_array[*ptoken_index]);
88 if (token->end_node != end_node)
89 {
90 old_token_index = *ptoken_index;
91 *ptoken_index = token->next_token_index;
92 free_word_token(rec, old_token_index);
93 pq->max_cost_in_q = MAXcostdata; /* fix: sep9 */
94 }
95 else
96 {
97 pq->num_in_q++;
98 if ((pq->max_cost_in_q == MAXcostdata) || (token->cost > pq->max_cost_in_q))
99 {
100 pq->max_cost_in_q = token->cost;
101 }
102 ptoken_index = &(token->next_token_index);
103 }
104 }
105 }
106
compare_histories(word_token * token1,word_token * token2,word_token * word_token_array)107 int compare_histories(word_token* token1, word_token* token2,
108 word_token* word_token_array)
109 {
110 int history_for_token1 = 0;
111 int history_for_token2 = 0;
112
113 /* compare_histories() was an attempt to be smart about the priority_q,
114 in that we don't need to store two word_tokens when the two tokens
115 are the same word (obviously ending at the same frame), and with the
116 same word history. This happens for a digit that has multiple end nodes
117 due to context-dependency. When "history_for_token" ignores the end_node,
118 then we're all clear to save just 1 word_token, but continue propagating
119 all paths from the end nodes. That bit of "continue propagating" is not
120 done. THE OTHER PROBLEM is that the two nodes may NOT be
121 simply different CD end models, they may be different from digit shifting!
122 We're screwed if we drop the path, unless we compare all the way back to
123 the start of utterance. */
124
125 if (token1->word != token2->word)
126 return 1;
127 if (token1->end_node != token2->end_node)
128 return 1;
129
130 if (token1->backtrace != MAXwordID)
131 {
132 history_for_token1 += token1->end_node * 1000000;
133 history_for_token1 += word_token_array[token1->backtrace].word * 10000;
134 history_for_token1 += word_token_array[token1->backtrace].end_time;
135 }
136
137 if (token2->backtrace != MAXwordID)
138 {
139 history_for_token2 += token2->end_node * 1000000;
140 history_for_token2 += word_token_array[token2->backtrace].word * 10000;
141 history_for_token2 += word_token_array[token2->backtrace].end_time;
142 }
143
144 #if PRINT_SEARCH_DETAILS
145 printf("comparing history_for_token1 %d history_for_token2 %d\n",
146 history_for_token1, history_for_token2);
147 #endif
148
149 if (history_for_token1 == history_for_token2)
150 {
151 return 0;
152 }
153 else
154 {
155 return 1;
156 }
157 }
158
159 #if PRINT_SEARCH_DETAILS
sanity_check_priority_q(priority_q * pq,word_token * word_token_array)160 void sanity_check_priority_q(priority_q* pq, word_token *word_token_array)
161 {
162 int n = 0;
163 wtokenID token_index;
164 word_token* token;
165 n = 0;
166 token_index = pq->word_token_list;
167 while (token_index != MAXwordID)
168 {
169 token = &(word_token_array[token_index]);
170 token_index = token->next_token_index;
171 n++;
172 }
173 ASSERT(n == pq->num_in_q);
174 if (pq->num_in_q == pq->max_in_q)
175 {
176 token = &(word_token_array[pq->word_token_list]);
177 ASSERT(pq->max_cost_in_q == token->cost);
178 }
179 }
180 #endif
181
182 /*adds a word token to the priority_q. Returns the index of the word to
183 remove.
184 if nothing needs to be removed, returns MAXwtokenID.
185 if no room on priority_q, returns the one being put on */
186
add_word_token_to_priority_q(priority_q * pq,wtokenID token_index_to_add,word_token * word_token_array)187 wtokenID add_word_token_to_priority_q(priority_q *pq, wtokenID token_index_to_add, word_token *word_token_array)
188 {
189 word_token *token;
190 word_token *token_to_add;
191 wtokenID token_index, return_token_index;
192 wordID word_to_add;
193 costdata cost_to_add;
194 wtokenID *ptoken_index;
195 wtokenID *pplace_to_add;
196 wtokenID *pdelete_index;
197 word_token *token_to_delete;
198
199 token_to_add = &(word_token_array[token_index_to_add]);
200 cost_to_add = token_to_add->cost;
201
202 #if PRINT_SEARCH_DETAILS
203 printf("WORDADD PQ tokenid %d cost %d\n", token_index_to_add, cost_to_add);
204 token_index = pq->word_token_list;
205 while (token_index != MAXwordID)
206 {
207 token = &(word_token_array[token_index]);
208 printf("WORDADD PQ token %d word %d cost %d\n", token_index, token->word, token->cost);
209 token_index = token->next_token_index;
210 }
211 #endif
212
213 if (cost_to_add >= pq->max_cost_in_q && pq->num_in_q >= pq->max_in_q)
214 {
215 #if PRINT_SEARCH_DETAILS
216 printf("WORDADD PQ - rejecting because cost too high cost_to_add(%d) max_cost_in_q(%d) num_in_q(%d)\n",
217 cost_to_add, pq->max_cost_in_q, pq->num_in_q);
218 #endif
219 #if PRINT_SEARCH_DETAILS
220 printf("WORDADD PQ (D) returning %d\n", token_index_to_add);
221 sanity_check_priority_q(pq, word_token_array);
222 #endif
223 return token_index_to_add;
224 }
225
226 word_to_add = token_to_add->word;
227 /* search for duplicate words first */
228 ptoken_index = &(pq->word_token_list);
229 pplace_to_add = NULL;
230 pdelete_index = NULL;
231 while ((*ptoken_index) != MAXwordID)
232 {
233 token = &word_token_array[(*ptoken_index)];
234
235 if (token->word == token_to_add->word
236 && !compare_histories(token, token_to_add, word_token_array))
237 {
238 if (token->cost < cost_to_add)
239 {
240 /* don't bother adding, there's another like it on the list!
241 with a better score! */
242 #if PRINT_SEARCH_DETAILS
243 printf("WORDADD PQ - rejecting because another like it is on the list\n");
244 #endif
245 /* TODO: when returning back on the basis that something else is better,
246 we should let the caller know what to use instead, ie, make the
247 distinction between no-space and something-else-better */
248 token = &word_token_array[ token_index_to_add];
249 token->next_token_index = (*ptoken_index);
250 return token_index_to_add;
251 }
252 else
253 {
254 /* ok, replace the one on the list with this better scoring one! */
255 pdelete_index = ptoken_index;
256 }
257 }
258 if (token->cost < cost_to_add && pplace_to_add == NULL)
259 {
260 pplace_to_add = ptoken_index;
261 /* do not break, 'cuz we're still searching for a possible duplicates */
262 }
263 ptoken_index = &(token->next_token_index);
264 }
265 if (!pplace_to_add)
266 pplace_to_add = ptoken_index;
267
268 /* add the token by inserting in the linked list */
269 token_index = *pplace_to_add;
270 *pplace_to_add = token_index_to_add;
271 token_to_add->next_token_index = token_index;
272 pq->num_in_q++;
273 if (pplace_to_add == &pq->word_token_list && pq->num_in_q >= pq->max_in_q)
274 pq->max_cost_in_q = cost_to_add;
275
276 /* now delete any duplicate that was found */
277 if (pdelete_index)
278 {
279 token_index = *pdelete_index;
280 token_to_delete = &word_token_array[ token_index];
281 *pdelete_index = token_to_delete->next_token_index;
282 pq->num_in_q--;
283 #if PRINT_SEARCH_DETAILS
284 printf("WORDADD PQ (B) returning %d\n", token_index);
285 #endif
286 return_token_index = token_index;
287 }
288
289 /* now check for max length in the queue */
290 if (pq->num_in_q > pq->max_in_q)
291 { /* really expecting just 1 over */
292 token_index = pq->word_token_list;
293 token = &(word_token_array[ token_index]);
294 pq->num_in_q--;
295 pq->word_token_list = token->next_token_index;
296 #if PRINT_SEARCH_DETAILS
297 printf("WORDADD PQ (C) returning %d\n", token_index);
298 #endif
299 return_token_index = token_index;
300 }
301 else
302 {
303 return_token_index = MAXwtokenID;
304 }
305 if (pq->num_in_q >= pq->max_in_q)
306 {
307 token_index = pq->word_token_list;
308 token = &(word_token_array[token_index]);
309 pq->max_cost_in_q = token->cost;
310 }
311 else
312 { /* pq->num_in_q < pq->max_in_q, fixed sep9 */
313 pq->max_cost_in_q = MAXcostdata;
314 }
315 #if PRINT_SEARCH_DETAILS
316 printf("WORDADD PQ (A) returning %d\n", token_index);
317 sanity_check_priority_q(pq, word_token_array);
318 #endif
319 return return_token_index;
320 }
321
322
323 /*returns the cost threshold for the end of the priority queue.
324 If words have greater cost than this, no need to try to put them on the
325 queue*/
326
get_priority_q_threshold(priority_q * pq,word_token * word_token_array)327 costdata get_priority_q_threshold(priority_q *pq, word_token *word_token_array)
328 {
329 return pq->max_cost_in_q;
330 }
331
332
333
334
335
336