1 /*---------------------------------------------------------------------------*
2 * srec_initialize.c *
3 * *
4 * Copyright 2007, 2008 Nuance Communciations, Inc. *
5 * *
6 * Licensed under the Apache License, Version 2.0 (the 'License'); *
7 * you may not use this file except in compliance with the License. *
8 * *
9 * You may obtain a copy of the License at *
10 * http://www.apache.org/licenses/LICENSE-2.0 *
11 * *
12 * Unless required by applicable law or agreed to in writing, software *
13 * distributed under the License is distributed on an 'AS IS' BASIS, *
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15 * See the License for the specific language governing permissions and *
16 * limitations under the License. *
17 * *
18 *---------------------------------------------------------------------------*/
19
20 #ifndef _RTT
21 #include "pstdio.h"
22 #endif
23 #include <stdlib.h>
24 #include <string.h>
25 #include <math.h>
26 #include "passert.h"
27
28 #include "portable.h"
29
30 #include "hmm_desc.h"
31 #include "utteranc.h"
32 #include "hmmlib.h"
33
34 #include "srec_sizes.h"
35 #include "srec.h"
36 #include "word_lattice.h"
37 #include "swimodel.h"
38
39 #include "c42mul.h"
40
41 /*this file contains code which handles the initialization of the srec data structures*/
42
43 /*allocates an srec -
44
45 input args come from config and are:
46
47 int viterbi_prune_thresh; score-based pruning threshold - only keep paths within this delta of best cost
48
49 int max_hmm_tokens; controls the maximum number of HMM's alive in any frame. If number
50 exceeded, pruning gets tightened. So, this threshold can be used
51 to tradeoff accuracy for computation an memory
52 int max_fsmnode_tokens; controls the maximum number of FSMs alive in any frame. If number,
53 exceeded, pruning gets tightened. So, this threshold can be used
54 to tradeoff accuracy for computation an memory
55 int max_word_tokens; controls the maximum number of word tokens kept in the word lattice.
56 if number exceeded, the word lattice is pruned more tightly (less word
57 ends per frame
58
59 int max_altword_tokens; controls the maximum number of alternative paths to propagate for proper nbest
60
61 int num_wordends_per_frame; controls the size of the word lattice - the number of word ends to
62 keep at each time frame
63 int max_fsm_nodes; allocation size of a few arrays in the search - needs to be big enough
64 to handle any grammar that the search needs to run. Initialization fails
65 if num exceeded
66 int max_fsm_arcs; allocation size of a few arrays in the search - needs to be big enough
67 to handle any grammar that the search needs to run. Initialization fails
68 if num exceeded
69
70 */
71
allocate_recognition1(srec * rec,int viterbi_prune_thresh,int max_hmm_tokens,int max_fsmnode_tokens,int max_word_tokens,int max_altword_tokens,int num_wordends_per_frame,int max_frames,int max_model_states)72 static void allocate_recognition1(srec *rec,
73 int viterbi_prune_thresh, /*score-based pruning threshold - only keep paths within this delta of best cost*/
74 int max_hmm_tokens,
75 int max_fsmnode_tokens,
76 int max_word_tokens,
77 int max_altword_tokens,
78 int num_wordends_per_frame,
79 int max_frames,
80 int max_model_states)
81 {
82 #ifdef SREC_ENGINE_VERBOSE_LOGGING
83 PLogMessage("allocating recognition arrays2 prune %d max_hmm_tokens %d max_fsmnode_tokens %d max_word_tokens %d max_altword_tokens %d max_wordends_per_frame %d\n",
84 viterbi_prune_thresh,
85 max_hmm_tokens,
86 max_fsmnode_tokens,
87 max_word_tokens,
88 max_altword_tokens,
89 num_wordends_per_frame);
90 #endif
91 rec->current_model_scores = (costdata*) CALLOC_CLR(max_model_states, sizeof(costdata), "search.srec.current_model_scores"); /*FIX - either get NUM_MODELS from acoustic models, or check this someplace to make sure we have enough room*/
92 rec->num_model_slots_allocated = (modelID)max_model_states;
93
94 rec->fsmarc_token_array_size = (stokenID)max_hmm_tokens;
95
96 rec->fsmarc_token_array = (fsmarc_token*) CALLOC_CLR(rec->fsmarc_token_array_size , sizeof(fsmarc_token), "search.srec.fsmarc_token_array");
97 rec->max_new_states = (stokenID)max_hmm_tokens;
98
99 rec->word_token_array = (word_token*) CALLOC_CLR(max_word_tokens, sizeof(word_token), "search.srec.word_token_array");
100 rec->word_token_array_size = (wtokenID)max_word_tokens;
101 /* todo: change this to a bit array later */
102 rec->word_token_array_flags = (asr_int16_t*) CALLOC_CLR(max_word_tokens, sizeof(asr_int16_t), "search.srec.word_token_array_flags");
103
104 rec->fsmnode_token_array = (fsmnode_token*) CALLOC_CLR(max_fsmnode_tokens, sizeof(fsmnode_token), "search.srec.fsmnode_token_array");
105 rec->fsmnode_token_array_size = (ftokenID)max_fsmnode_tokens;
106
107 rec->altword_token_array = (altword_token*) CALLOC_CLR(max_altword_tokens, sizeof(altword_token), "search.srec.altword_token_array");
108 rec->altword_token_array_size = (wtokenID)max_altword_tokens;
109
110 rec->prune_delta = (costdata)viterbi_prune_thresh;
111
112 rec->max_frames = (frameID)max_frames;
113 rec->best_model_cost_for_frame = (costdata*)CALLOC_CLR(max_frames, sizeof(costdata), "search.srec.best_model_cost_for_frame");
114 rec->word_lattice = allocate_word_lattice((frameID)max_frames);
115
116 rec->word_priority_q = allocate_priority_q(num_wordends_per_frame);
117 rec->best_fsmarc_token = MAXstokenID;
118
119 #define ASTAR_NBEST_LEN 10
120 rec->astar_stack = astar_stack_make(rec, ASTAR_NBEST_LEN);
121 rec->context = NULL;
122 }
123
check_parameter_range(int parval,int parmin,int parmax,const char * parname)124 static int check_parameter_range(int parval, int parmin, int parmax, const char* parname)
125 {
126 if (parval > parmax)
127 {
128 log_report("Error: %s value %d is out-of-range [%d,%d]\n", parname,
129 parval, parmin, parmax);
130 return 1;
131 }
132 else
133 {
134 return 0;
135 }
136 }
137
allocate_recognition(multi_srec * rec,int viterbi_prune_thresh,int max_hmm_tokens,int max_fsmnode_tokens,int max_word_tokens,int max_altword_tokens,int num_wordends_per_frame,int max_fsm_nodes,int max_fsm_arcs,int max_frames,int max_model_states,int max_searches)138 int allocate_recognition(multi_srec *rec,
139 int viterbi_prune_thresh, /*score-based pruning threshold - only keep paths within this delta of best cost*/
140 int max_hmm_tokens,
141 int max_fsmnode_tokens,
142 int max_word_tokens,
143 int max_altword_tokens,
144 int num_wordends_per_frame,
145 int max_fsm_nodes,
146 int max_fsm_arcs,
147 int max_frames,
148 int max_model_states,
149 int max_searches)
150 {
151 int i;
152
153 if (check_parameter_range(max_fsm_nodes, 1, MAXnodeID, "max_fsm_nodes"))
154 return 1;
155 if (check_parameter_range(max_fsm_arcs, 1, MAXarcID, "max_fsm_arcs"))
156 return 1;
157 if (check_parameter_range(max_frames, 1, MAXframeID, "max_frames"))
158 return 1;
159 if (check_parameter_range(max_model_states, 1, MAXmodelID, "max_model_states"))
160 return 1;
161 if (check_parameter_range(max_hmm_tokens, 1, MAXstokenID, "max_hmm_tokens"))
162 return 1;
163 if (check_parameter_range(max_fsmnode_tokens, 1, MAXftokenID, "max_fsmnode_tokens"))
164 return 1;
165 if (check_parameter_range(viterbi_prune_thresh, 1, MAXcostdata, "viterbi_prune_thresh"))
166 return 1;
167 if (check_parameter_range(max_altword_tokens, 0, MAXftokenID, "max_altword_tokens"))
168 return 1;
169 if (check_parameter_range(max_searches, 1, 2, "max_searches"))
170 return 1;
171
172 rec->rec = (srec*)CALLOC_CLR(max_searches, sizeof(srec), "search.srec.base");
173 rec->num_allocated_recs = max_searches;
174 rec->num_swimodels = 0;
175
176 /* best_token_for_arc and best_token_for_node are shared across
177 multiple searches */
178 rec->best_token_for_arc = (stokenID*)CALLOC_CLR(max_fsm_arcs, sizeof(stokenID), "search.srec.best_token_for_arc");
179 rec->max_fsm_arcs = (arcID)max_fsm_arcs;
180
181 rec->best_token_for_node = (ftokenID*)CALLOC_CLR(max_fsm_nodes, sizeof(ftokenID), "search.srec.best_token_for_node");
182 rec->max_fsm_nodes = (nodeID)max_fsm_nodes;
183
184 /* cost offsets and accumulated cost offsets are pooled for all
185 different searches, this saves memory and enables each search
186 to know it's total scores */
187 rec->cost_offset_for_frame = (costdata*)CALLOC_CLR(max_frames, sizeof(costdata), "search.srec.current_best_costs");
188 rec->accumulated_cost_offset = (bigcostdata*)CALLOC_CLR(max_frames, sizeof(bigcostdata), "search.srec.accumulated_cost_offset");
189 rec->max_frames = (frameID)max_frames;
190 for (i = 0; i < max_frames; i++)
191 rec->accumulated_cost_offset[i] = 0;
192
193 /* now copy the shared data down to individual recogs */
194 for (i = 0; i < rec->num_allocated_recs; i++)
195 {
196 allocate_recognition1(&rec->rec[i], viterbi_prune_thresh, max_hmm_tokens, max_fsmnode_tokens, max_word_tokens, max_altword_tokens, num_wordends_per_frame, max_frames, max_model_states);
197 rec->rec[i].best_token_for_node = rec->best_token_for_node;
198 rec->rec[i].max_fsm_nodes = rec->max_fsm_nodes;
199 rec->rec[i].best_token_for_arc = rec->best_token_for_arc;
200 rec->rec[i].max_fsm_arcs = rec->max_fsm_arcs;
201 rec->rec[i].max_frames = rec->max_frames;
202 rec->rec[i].cost_offset_for_frame = rec->cost_offset_for_frame;
203 rec->rec[i].accumulated_cost_offset = rec->accumulated_cost_offset;
204 rec->rec[i].id = (asr_int16_t)i;
205 }
206 rec->eos_status = VALID_SPEECH_NOT_YET_DETECTED;
207 return 0;
208 }
209
210
free_recognition1(srec * rec)211 static void free_recognition1(srec *rec)
212 {
213 FREE(rec->current_model_scores);
214 FREE(rec->fsmarc_token_array);
215 FREE(rec->word_token_array);
216 FREE(rec->word_token_array_flags);
217 FREE(rec->fsmnode_token_array);
218 FREE(rec->altword_token_array);
219 FREE(rec->best_model_cost_for_frame);
220 destroy_word_lattice(rec->word_lattice);
221 free_priority_q(rec->word_priority_q);
222 astar_stack_destroy(rec);
223 }
224
free_recognition(multi_srec * rec)225 void free_recognition(multi_srec *rec)
226 {
227 int i;
228 for (i = 0; i < rec->num_allocated_recs; i++)
229 free_recognition1(&rec->rec[i]);
230 FREE(rec->accumulated_cost_offset);
231 FREE(rec->cost_offset_for_frame);
232 FREE(rec->best_token_for_node);
233 FREE(rec->best_token_for_arc);
234 FREE(rec->rec);
235 }
236
237