• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  pre_desc.h  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 
22 #ifndef _h_pre_desc_
23 #define _h_pre_desc_
24 
25 #ifdef SET_RCSID
26 static const char pre_desc_h[] = "$Id: pre_desc.h,v 1.3.6.10 2008/03/07 19:41:39 dahan Exp $";
27 #endif
28 
29 
30 #include "all_defs.h"
31 #include "hmm_type.h"
32 #include "specnorm.h"
33 #ifndef _RTT
34 #include "duk_io.h"
35 #endif
36 
37 #define DO_SUBTRACTED_SEGMENTATION  0
38 
39 #ifndef NONE
40 #define NONE   0
41 #endif
42 #define SCALE   1 /* Scaling the channels */
43 #define LIN_TRAN  2 /* Linear Transformation */
44 #define VFR   4 /* Variable frame rate */
45 #define USE_MULTAB      8 /* Set up multable distance calculations */
46 
47 /**
48  * @todo document
49  */
50 typedef struct
51 {  /* mul-table data types */
52   unsigned short sigma;
53   int   num;
54   short *pdf;
55 }
56 mul_table;
57 
58 /**
59  * @todo document
60  */
61 typedef struct
62 {
63   unsigned short num_dev8_index;
64   unsigned char  *dev8_index;
65   unsigned short *wt_index;
66   short    *gauss_dist_table;
67   short    **dist_ptr;
68   prdata    multable_factor; /* euclidean to multable */
69   prdata    multable_factor_gaussian; /* euclidean to multable */
70   prdata    grand_mod_cov; /* grand covariance modulus */
71   prdata    grand_mod_cov_gaussian; /* grand covariance modulus */
72 }
73 mul_table_info;
74 
75 /**
76  * @todo document
77  */
78 typedef struct
79 {
80   const prdata *table;
81   prdata add_log_limit;
82   prdata scale;   /* X - scale to log function */
83   prdata inv_scale;
84   float logscale;  /* Y - scale to log function */
85 }
86 logadd_table_info;
87 
88 /**
89  * @todo document
90  */
91 typedef struct
92 {
93   unsigned long num;
94   accdata **between;
95   accdata *bmean;
96   accdata **within;
97   accdata *wmean;
98 }
99 transform_info;
100 
101 /**
102  * @todo document
103  */
104 typedef struct
105 {   /* Segmentation parameters */
106   int  rel_low;
107   int  rel_high;
108   int  gap_period;
109   int  click_period;
110   int  breath_period;
111   int  extend_annotation;
112   int  param;
113   int         min_initial_quiet_frames;    /* num silence frames needed before input */
114   int         min_annotation_frames;          /* minimum overall length */
115   int         max_annotation_frames;          /* maximum overall length */
116   int         delete_leading_segments;        /* num segments to delete. 0=no action */
117   int         leading_segment_accept_if_not_found; /* Do not reject segmentation if not found */
118   int         leading_segment_min_frames;   /* remove unless shorter */
119   int         leading_segment_max_frames;   /* remove unless exceeded */
120   int         leading_segment_min_silence_gap_frames;/* remove if good silence gap to next segment */
121   int  beep_size;  /*X201 beep filter */
122   int  beep_threshold;  /*X201 beep filter */
123   int  min_segment_rel_c0; /* Any segment gets deleted whose peak c0 is < max - min_segment_rel_c0 */
124 
125 #if DO_SUBTRACTED_SEGMENTATION
126   int         snr_holdoff;    /* Ignore first n frames when estimating speech level for SNR measure */
127   int         min_acceptable_snr; /* for an acceptable segmentation */
128 #endif
129 }
130 endpoint_info;
131 
132 
133 /**
134  * @todo document
135  */
136 typedef struct
137 {  /* processed speech data/front end output */
138   int  ref_count; /* reference counts */
139   /* Pattern vector section */
140   int  dim;  /* dimension of frame vector */
141   int  use_dim; /* dimension used for recognition */
142   int  whole_dim; /* reduced feature use. Set unused to 127 (0) on model construction */
143   int  use_from; /* first channel used for recognition */
144   featdata *last_frame; /* last frame processed in frame buffer */
145   imeldata *seq;  /* current valid frame */
146   imeldata *seq_unnorm; /* current valid frame, for whole-word models */
147   prdata seq_sq_sum; /* sum of the squared of frames */
148   prdata seq_sq_sum_whole; /* sum of the squared of frames, for wholeword */
149   prdata seq_unnorm_sq_sum_whole; /* sum of the squared of frames, for wholeword */
150   int  voicing_status; /* voicing code */
151   int  post_proc; /* post processing functions */
152   imeldata *offset; /* offset vector with transformation */
153   imeldata **matrix; /* linear transformation matrix */
154   int  imel_shift; /* Imelda scale factor (in shifts) */
155   covdata **imelda; /* linear transformation matrix, PMC or RN */
156   imeldata **invmat; /* inverse transformation matrix */
157   int  inv_shift; /* inverse Imelda scale factor (in shifts) */
158   covdata **inverse; /* inverse linear transformation matrix, PMC or RN */
159 #if PARTIAL_DISTANCE_APPROX /* Gaussian tail approximation? */
160   int  partial_distance_calc_dim;  /* number of params to calc distance over, before approximating if beyond threshold */
161   scodata partial_distance_threshold;
162   prdata partial_distance_calc_threshold;
163   prdata partial_distance_offset;
164   prdata global_distance_over_n_params;
165   int  global_model_means[MAX_DIMEN];
166   prdata partial_mean_sq_sum;
167   prdata partial_seq_sq_sum;
168   prdata partial_seq_unnorm_sq_sum;
169 #endif
170   imeldata *chan_offset;
171   /* Channel Normalization etc */
172 
173   /* Tables */
174   prdata exp_wt[MAX_WTS]; /* weights exp lookup table */
175   mul_table_info mul;  /* Mul-table */
176   logadd_table_info add; /* logadd-table */
177   /* ENC */
178   booldata is_setup_for_noise;
179   booldata do_whole_enc; /* to enable ENC */
180   booldata do_sub_enc; /* to enable ENC */
181   booldata enc_count;
182   booldata ambient_valid; /* ambient estimates valid */
183   imeldata **pmc_fixmat; /* ENC matrix */
184   imeldata **pmc_fixinv; /* inverse ENC matrix */
185   covdata **pmc_matrix; /* ENC matrix in float */
186   covdata **pmc_inverse; /* inverse ENC matrix in float */
187   int  pmc_matshift; /* scaling */
188   int  pmc_invshift; /* scaling */
189   imeldata    *ambient_mean; /* ambient mean vector */
190   imeldata    *ambient_prof; /* ambient estimates, pseudo space */
191   imeldata    *ambient_prof_unnorm; /* ambient estimates, unnormalised */
192   logadd_table_info fbadd; /* logadd-table for ENC */
193 #if DO_SUBTRACTED_SEGMENTATION
194   int  mel_dim;
195   covdata **spec_inverse;
196   imeldata **spec_fixinv;
197   int  spec_invshift;
198   int  *cep_offset;
199 #endif
200   /* Parameters */
201   prdata mix_score_scale; /* Mixture score scaling constant */
202   prdata uni_score_scale; /* Unimodal score scaling constant */
203   prdata uni_score_offset; /* Unimodal score offset constant */
204   prdata imelda_scale;  /* Imelda grand variance */
205   /* Endpoint data */
206   endpoint_info end;
207 
208 }
209 preprocessed;
210 
211 /**
212  * @todo document
213  */
214 typedef struct
215 {
216   preprocessed    *prep; /* The preprocessed data structure */
217   /* The following stuff cannot be cloned */
218   booldata do_imelda; /* Alignment based accumulation */
219   transform_info  imelda_acc;
220 }
221 pattern_info;
222 
223 #endif /* _h_pre_desc_ */
224