• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  get_fram.c  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 #include <stdlib.h>
22 #ifndef _RTT
23 #include "pstdio.h"
24 #endif
25 #include <limits.h>
26 #include <math.h>
27 #include <string.h>
28 #include "passert.h"
29 
30 #include "c42mul.h"
31 #include "portable.h"
32 
33 #include "../clib/fpi_tgt.inl"
34 
35 #define DEBUG   0
36 #define FUDGE_FACTOR 1.2f
37 
38 const float root_pi_over_2 = (float) 1.2533141;
39 
40 static const char get_fram[] = "$Id: get_fram.c,v 1.7.6.13 2007/10/15 18:06:24 dahan Exp $";
41 
42 static void create_cepstrum_offsets(preprocessed *prep);
43 static void destroy_cepstrum_offsets(preprocessed *prep);
44 static void apply_channel_offset(preprocessed *prep);
45 static int compare_cached_frame(preprocessed *prep, utterance_info *utt);
46 
init_utterance(utterance_info * utt,int utt_type,int dimen,int buffer_size,int keep_frames,int num_chan,int do_voicing)47 void init_utterance(utterance_info *utt, int utt_type, int dimen,
48                     int buffer_size, int keep_frames, int num_chan, int do_voicing)
49 /*
50 **  To setup the utterance structure
51 */
52 {
53   /*  Construct frame buffer  and voice buffer here
54   */
55   ASSERT(utt);
56   ASSERT(dimen > 0);
57   if (buffer_size < keep_frames)
58     SERVICE_ERROR(BAD_ARGUMENT);
59   utt->utt_type = utt_type;
60   utt->gen_utt.dim = dimen;
61   utt->gen_utt.frame = createFrameBuffer(buffer_size,
62                                          dimen, keep_frames, do_voicing);
63   utt->gen_utt.num_chan = num_chan;
64 
65   setup_ambient_estimation(utt->gen_utt.backchan,
66                            utt->gen_utt.num_chan, 100);
67   return;
68 }
69 
set_voicing_durations(utterance_info * utt,int voice_duration,int quiet_duration,int unsure_duration,int start_windback)70 void set_voicing_durations(utterance_info *utt, int voice_duration,
71                            int quiet_duration, int unsure_duration,
72                            int start_windback)
73 {
74   utt->gen_utt.voice_duration = voice_duration;
75   utt->gen_utt.quiet_duration = quiet_duration;
76   utt->gen_utt.unsure_duration = unsure_duration;
77   utt->gen_utt.start_windback = start_windback;
78   return;
79 }
80 
free_utterance(utterance_info * utt)81 void free_utterance(utterance_info *utt)
82 /*
83 **  To close data file pointers etc.
84 */
85 {
86   /*  Destroy frame buffer
87   */
88   ASSERT(utt);
89 
90   clear_ambient_estimation(utt->gen_utt.backchan, utt->gen_utt.dim);
91   if (utt->gen_utt.frame)
92   {
93     destroyFrameBuffer(utt->gen_utt.frame);
94     utt->gen_utt.frame = NULL;
95   }
96   return;
97 }
98 
init_preprocessed(preprocessed * prep,int dimen,float imelda_scale)99 void init_preprocessed(preprocessed *prep, int dimen, float imelda_scale)
100 /*
101 **  To setup the preprocessed structure
102 */
103 {
104 
105   ASSERT(prep);
106   ASSERT(dimen > 0);
107   prep->dim = dimen;
108   prep->seq = (imeldata *) CALLOC(prep->dim, sizeof(imeldata),
109                                         "srec.prep->seq");
110   prep->seq_unnorm = (imeldata *) CALLOC(prep->dim, sizeof(imeldata),
111                      "srec.prep->seq_unnorm");
112   prep->last_frame = (featdata *) CALLOC(prep->dim, sizeof(featdata),
113                      "srec.prep->last_frame");
114 
115   /*  Setup constants for distance calculation
116   */
117   /* TODO: check numbers for non-zero */
118   prep->add.scale = (prdata)((2 * imelda_scale * imelda_scale) / MUL_SCALE
119                              + 0.5) - (prdata)0.5;
120   prep->add.inv_scale = (prdata)(((float)(0x01 << 12) * MUL_SCALE) /
121                                  (2 * imelda_scale * imelda_scale) + 0.5) -
122                         (prdata)0.5;
123   prep->mul.multable_factor_gaussian = 1;
124   prep->mul.multable_factor = (prdata)(((MUL_SCALE * (0x01 << EUCLID_SHIFT)
125                                          * prep->uni_score_scale)
126                                         / (2 * (imelda_scale * imelda_scale
127                                                 * FUDGE_FACTOR * FUDGE_FACTOR))) / 128 + 0.5)
128                               - (prdata)0.5;
129   prep->mul.grand_mod_cov = (prdata)((MUL_SCALE * prep->uni_score_scale *
130                                       prep->whole_dim *
131                                       log((imelda_scale * FUDGE_FACTOR) /
132                                           (SIGMA_BIAS * root_pi_over_2))) / 128 + 0.5)
133                             - (prdata)0.5 - prep->uni_score_offset;
134   prep->mul.grand_mod_cov_gaussian = (prdata)(2 * imelda_scale * imelda_scale *
135                                      prep->use_dim *
136                                      log(imelda_scale /
137                                          (SIGMA_BIAS * root_pi_over_2)) + 0.5)
138                                      - (prdata)0.5;
139 #if DEBUG
140   log_report("grand_mod_cov %.1f, grand_mod_cov_gaussian %.1f\n",
141              (float)prep->mul.grand_mod_cov,
142              (float)prep->mul.grand_mod_cov_gaussian);
143   log_report("multable_factor %f, multable_factor_gaussian %f\n",
144              (float)prep->mul.multable_factor,
145              (float)prep->mul.multable_factor_gaussian);
146 #endif
147 
148 
149   create_cepstrum_offsets(prep);
150   return;
151 }
152 
clear_preprocessed(preprocessed * prep)153 void clear_preprocessed(preprocessed *prep)
154 /*
155 **  To setup the preprocessed structure
156 */
157 {
158   ASSERT(prep);
159   destroy_cepstrum_offsets(prep);
160   prep->dim = 0;
161   FREE((char *)prep->last_frame);
162   FREE((char *)prep->seq);
163   FREE((char *)prep->seq_unnorm);
164   return;
165 }
166 
get_data_frame(preprocessed * prep,utterance_info * utt)167 int get_data_frame(preprocessed *prep, utterance_info *utt)
168 /*
169 **  To get a frame amount of data and perform preprocessing functions
170 */
171 {
172   int status_code;
173 
174   ASSERT(prep);
175   ASSERT(utt);
176   if (utt->gen_utt.channorm && !utt->gen_utt.channorm->adj_valid)
177     convert_adjustment_to_imelda(utt->gen_utt.channorm, prep);
178   if (utt->gen_utt.dim != prep->dim)
179     SERVICE_ERROR(UTTERANCE_DIMEN_MISMATCH);
180 
181   if (prep->post_proc & VFR)
182   {
183     if ((status_code = get_utterance_frame(prep, utt)) <= 0)
184       return (status_code);
185 
186     log_report("get_data_frame vfr not supported\n");
187     SERVICE_ERROR(FEATURE_NOT_SUPPORTED);
188   }
189   else
190   {
191     status_code = get_utterance_frame(prep, utt);
192     if (status_code == 0) return(status_code);
193     else if (status_code == -1) return(1);
194   }
195 
196   if (prep->chan_offset)
197     apply_channel_offset(prep);
198 
199   /*  Apply linear transformation if necessary
200   */
201   if (prep->post_proc & LIN_TRAN)
202     linear_transform_frame(prep, prep->seq, True);
203 
204   memcpy(prep->seq_unnorm, prep->seq, prep->dim * sizeof(imeldata));
205   if (utt->gen_utt.channorm)
206     apply_channel_normalization_in_imelda(utt->gen_utt.channorm,
207                                           prep->seq, prep->seq_unnorm,
208                                           utt->gen_utt.channorm->dim);
209   return (1);
210 }
211 
get_utterance_frame(preprocessed * prep,utterance_info * utt)212 int get_utterance_frame(preprocessed *prep, utterance_info *utt)
213 /*
214 **  To get a frame amount of data
215 **  Maintains a single data buffer and passes the pointers to frame of data.
216 **  Post-increments after copying
217 */
218 {
219   featdata  *frame_ptr;
220   int ii;
221 
222   ASSERT(prep);
223   ASSERT(utt);
224 
225   /*  Get the next data frame in
226   */
227   if (getFrameGap(utt->gen_utt.frame) > 0)
228   {
229     /*  is it a cloned object */
230     if (prep->ref_count > 1 && compare_cached_frame(prep, utt))
231       return (-1);
232 
233     frame_ptr = currentRECframePtr(utt->gen_utt.frame);
234     if (frame_ptr == NULL)
235       return (0);
236     if (prep->ref_count > 1)
237     {
238       ASSERT(prep->last_frame);
239       memcpy(prep->last_frame, frame_ptr,
240              prep->dim* sizeof(featdata));
241     }
242     for (ii = 0; ii < utt->gen_utt.dim; ii++)
243       prep->seq[ii] = (imeldata)frame_ptr[ii];
244     /*  Apply fast-voice corrections if necessary */
245     if (utt->gen_utt.frame->haveVoiced)
246     {
247       utterance_detection_fixup(utt->gen_utt.frame,
248                                 &utt->gen_utt.last_push, utt->gen_utt.voice_duration,
249                                 utt->gen_utt.quiet_duration, utt->gen_utt.unsure_duration);
250       /*     if (isFrameBufferActive (utt->gen_utt.frame)
251         && getFrameGap (utt->gen_utt.frame) <= utt->gen_utt.quiet_duration)
252             SERVICE_ERROR (INTERNAL_ERROR); */
253       prep->voicing_status =
254         rec_frame_voicing_status(utt->gen_utt.frame);
255     }
256     return (1);
257   }
258   return (0);
259 }
260 
261 
advance_utterance_frame(utterance_info * utt)262 int advance_utterance_frame(utterance_info *utt)
263 /*
264 **  To get a frame amount of data
265 */
266 {
267   ASSERT(utt);
268   /*  if more samples are needed then read from file if the type matched
269   */
270   /*  Get the next data frame in
271   */
272   if (getFrameGap(utt->gen_utt.frame) > 0)
273   {
274     if (incRECframePtr(utt->gen_utt.frame) != False)
275       return (0);
276     return (1);
277   }
278   return (0);
279 }
280 
retreat_utterance_frame(utterance_info * utt)281 int retreat_utterance_frame(utterance_info *utt)
282 /*
283 **  To get a frame amount of data
284 */
285 {
286   ASSERT(utt);
287 
288   if (getBlockGap(utt->gen_utt.frame) > 0)
289   {
290     if (decRECframePtr(utt->gen_utt.frame) != False)
291       return (0);
292     return (1);
293   }
294   return (0);
295 }
296 
prepare_data_frame(preprocessed * prep)297 void prepare_data_frame(preprocessed *prep)
298 {
299   int ii;
300   prdata sum_sq;
301 
302   sum_sq = 0;
303 
304   for (ii = 0; ii < prep->whole_dim; ii++)
305     sum_sq += (prdata) SQR((prdata)prep->seq[ii]);
306   prep->seq_sq_sum_whole = -sum_sq;
307 
308   ASSERT(prep->whole_dim <= prep->use_dim);
309   for (ii = 0; ii < prep->use_dim; ii++)
310     sum_sq += (prdata) SQR((prdata)prep->seq[ii]);
311   prep->seq_sq_sum = -sum_sq;
312 
313   sum_sq = 0;
314 
315   for (ii = 0; ii < prep->whole_dim; ii++)
316     sum_sq += (prdata) SQR((prdata)prep->seq_unnorm[ii]);
317   prep->seq_unnorm_sq_sum_whole = -sum_sq;
318 
319   return;
320 }
321 
utterance_started(utterance_info * utt)322 int utterance_started(utterance_info *utt)
323 {
324   ASSERT(utt);
325   if (utt->gen_utt.frame->haveVoiced
326       && utt->gen_utt.frame->voicingDetected)
327     return (True);
328   else
329     return (False);
330 }
331 
utterance_ended(utterance_info * utt)332 int utterance_ended(utterance_info *utt)
333 {
334   ASSERT(utt);
335   return (utt->gen_utt.frame->utt_ended);
336 }
337 
load_utterance_frame(utterance_info * utt,unsigned char * pUttFrame,int voicing)338 int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing)
339 {
340   featdata framdata[MAX_DIMEN];
341   int      ii;
342 
343   ASSERT(utt);
344   ASSERT(pUttFrame);
345 
346   for (ii = 0; ii < utt->gen_utt.frame->uttDim; ii++)
347     framdata[ii] = (featdata) pUttFrame[ii];
348 
349   if (pushSingleFEPframe(utt->gen_utt.frame, framdata, voicing) != False)
350     return (0);
351 
352   return (1);
353 }
354 
copy_utterance_frame(utterance_info * oututt,utterance_info * inutt)355 int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt)
356 {
357   int      voicedata;
358   featdata *framdata;
359 
360   ASSERT(oututt);
361   ASSERT(inutt);
362 
363   if ((framdata = currentRECframePtr(inutt->gen_utt.frame)) == NULL)
364     return (0);
365 
366   voicedata = getVoicingCode(inutt->gen_utt.frame, framdata);
367 
368   if (pushSingleFEPframe(oututt->gen_utt.frame, framdata, voicedata) != False)
369     return (0);
370 
371   return (1);
372 }
373 
copy_pattern_frame(utterance_info * oututt,preprocessed * prep)374 int copy_pattern_frame(utterance_info *oututt, preprocessed *prep)
375 {
376   int      ii;
377   featdata frame_ptr[MAX_DIMEN];
378 
379   ASSERT(oututt);
380   ASSERT(prep);
381   ASSERT(oututt->gen_utt.dim < MAX_DIMEN);
382   for (ii = 0; ii < oututt->gen_utt.dim; ii++)
383     frame_ptr[ii] = (featdata) RANGE(prep->seq[ii], 0, 255);
384   if (pushSingleFEPframe(oututt->gen_utt.frame, frame_ptr,
385                          prep->voicing_status)
386       != False) return(0);
387   return (1);
388 }
389 
create_cepstrum_offsets(preprocessed * prep)390 static void create_cepstrum_offsets(preprocessed *prep)
391 {
392   ASSERT(prep);
393   prep->chan_offset = (imeldata *) CALLOC_CLR(prep->dim,
394                       sizeof(imeldata), "srec.chan_offset");
395   return;
396 }
397 
set_cepstrum_offset(preprocessed * prep,int index,int value)398 void set_cepstrum_offset(preprocessed *prep, int index, int value)
399 {
400   ASSERT(prep);
401   ASSERT(prep->chan_offset);
402   ASSERT(index >= 0 && index < prep->dim);
403   prep->chan_offset[index] = (imeldata) value;
404   return;
405 }
406 
destroy_cepstrum_offsets(preprocessed * prep)407 static void destroy_cepstrum_offsets(preprocessed *prep)
408 {
409   ASSERT(prep);
410   FREE((char *)prep->chan_offset);
411   prep->chan_offset = 0;
412   return;
413 }
414 
apply_channel_offset(preprocessed * prep)415 static void apply_channel_offset(preprocessed *prep)
416 {
417   int ii;
418 
419   for (ii = 0; ii < prep->dim; ii++)
420     prep->seq[ii] += prep->chan_offset[ii];
421   return;
422 }
423 
compare_cached_frame(preprocessed * prep,utterance_info * utt)424 static int compare_cached_frame(preprocessed *prep, utterance_info *utt)
425 {
426   int      ii;
427   featdata *frame_ptr;
428 
429   frame_ptr = currentRECframePtr(utt->gen_utt.frame);
430   if (frame_ptr == NULL)
431     return (False);
432   for (ii = 0; ii < utt->gen_utt.dim; ii++)
433     if (prep->last_frame[ii] != frame_ptr[ii])
434       return (False);
435   return (True);
436 }
437 
convert_adjustment_to_imelda(norm_info * norm,preprocessed * prep)438 void convert_adjustment_to_imelda(norm_info *norm, preprocessed *prep)
439 {
440   int      ii;
441   imeldata fram[MAX_DIMEN];
442 
443   ASSERT(prep);
444   ASSERT(norm);
445   for (ii = 0; ii < 12; ii++)      /* TODO: fix dimension properly, and sort out rouding/type */
446     fram[ii] = (imeldata) norm->adjust[ii]; /* TODO: review types */
447   for (; ii < prep->dim; ii++)
448     fram[ii] = 0;
449 
450   linear_transform_frame(prep, fram, False);
451 
452   for (ii = 0; ii < prep->dim; ii++)
453     norm->imelda_adjust[ii] = fram[ii];
454 #if DEBUG
455   log_report("NORM AUX: ");
456   for (ii = 0; ii < norm->dim; ii++)
457     log_report("%d ", (int)norm->imelda_adjust[ii]);
458   log_report("\n");
459 #endif
460   norm->adj_valid = True;
461   return;
462 }
463