• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  swicms.c                                                                 *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include <string.h>
21 #include"swicms.h"
22 #include"srec_sizes.h"
23 #include"prelib.h"
24 
25 #include "passert.h"
26 #include "ESR_Session.h"
27 #include "ESR_SessionType.h"
28 #include "IntArrayList.h"
29 #include "portable.h"
30 
31 #define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %x", (int)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }
32 
33 /* Cross-utterance CMN calculation:
34    We try to normalize the speech frames before they get to the recognizer.
35    The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
36    We collect these speech frames during recognition. At the end of
37    recognition we exclude the silence frames from the collected data, and
38    generate a new channel average based on the previous average and the new
39    data, using an exponential decay formula.
40 
41    In-utterance CMN calculation:
42    A new short-term average mechanism was introduced, with faster update,
43    to improve recognition on the very first recognition after init or reset.
44    We wait for a minimum number of new data frames to apply this. We also
45    disable the fast updater after some frames, because we assume the
46    cross-utterance estimator to be more reliable, particularly in its
47    ability to exclude silence frames from the calculation.
48 */
49 
50 /* default settings for cross-utterance cms */
51 #define SWICMS_FORGET_FACTOR_DEFAULT        400 /* effective frms of history */
52 #define SWICMS_SBINDEX_DEFAULT              100 /* use speech frames only */
53 /* #define SWICMS_CACHE_RESOLUTION_DEFAULT  see swicms.h */
54 /* #define SWICMS_CACHE_SIZE_DEFAULT        see swicms.h */
55 
56 /* default settings for in-utterance cms */
57 #define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
58 #define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
59 /* disable this when cross-utt become more reliable */
60 #define SWICMS_INUTT_DISABLE_AFTER_FRAMES   200
61 /* wait while the estimate is poor */
62 #define SWICMS_INUTT_ENABLE_AFTER_FRAMES    10
63 
64 /**
65  * Logging Stuff
66  */
67 #define LOG_LEVEL 2
68 #define MODULE_NAME L("swicms.c")
69 //static const char* MTAG = MODULE_NAME;
70 
71 static const char *rcsid = 0 ? (const char *) &rcsid :
72                            "$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";
73 
74 static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;
75 
76 /* these are good values from cmn/tmn files */
77 static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
78   {
79     158, 141,  99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
80     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
81     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
82   };
83 
84 static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
85   {
86     163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
87     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
88     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
89   };
90 
91 static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
92   {
93     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
95     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
96   };
97 
98 static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
99   {
100     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
102     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
103   };
104 
GetSomeIntsIfAny(const LCHAR * parname,imeldata * parvalue,size_t reqSize)105 static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
106 {
107   size_t i, size;
108   ESR_ReturnCode rc;
109   ESR_BOOL exists;
110   IntArrayList* intList = 0;
111 
112   CHKLOG(rc, ESR_SessionContains(parname, &exists));
113   if (exists) {
114     rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
115     if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
116       /* no match will revert to default data already in static array */
117       PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
118       return ESR_FATAL_ERROR;
119     }
120     else if (rc == ESR_SUCCESS) {
121       CHKLOG(rc, IntArrayListGetSize(intList, &size));
122       if(size != reqSize) {
123 	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
124 	return ESR_FATAL_ERROR;
125       }
126       if(reqSize == 1)
127 	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
128       else {
129 	for (i=0; i<size; ++i)
130 	  CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
131       }
132     }
133   }
134   return ESR_SUCCESS;
135  CLEANUP:
136   return rc;
137 }
138 
swicms_init(swicms_norm_info * swicms)139 int swicms_init(swicms_norm_info* swicms)
140 {
141   ESR_ReturnCode    rc = ESR_SUCCESS;
142   size_t            i;
143   ESR_BOOL          exists, sessionExists;
144   size_t 	    sample_rate;
145 
146   /* defaults */
147   swicms->sbindex          = SWICMS_SBINDEX_DEFAULT;
148   swicms->cached_num_frames = 0;
149   swicms->forget_factor    = SWICMS_FORGET_FACTOR_DEFAULT;
150   swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
151   swicms->num_frames_in_cmn = 0;
152 
153   CHKLOG(rc, ESR_SessionExists(&sessionExists));
154 
155   if (sessionExists)
156   {  /* We'll assume this rate is valid or someone else will be complaining.   SteveR */
157     rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );
158 
159     if ( rc != ESR_SUCCESS )
160       return ( rc );
161   }
162   else
163     sample_rate = 11025;
164 
165   /* init the data structures by copying the static data so that we can have a copy if we need to reset */
166   if ( sample_rate == 8000 )
167   {
168     for ( i = 0; i < MAX_CHAN_DIM; i++ )
169     {
170       swicms->cmn [i] = gswicms_cmn1_8 [i];
171       swicms->tmn [i] = gswicms_tmn1_8 [i];
172 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
173       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
174       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
175     }
176   }
177   else
178   {
179     for ( i = 0; i < MAX_CHAN_DIM; i++ )
180     {
181       swicms->cmn [i] = gswicms_cmn1_11 [i];
182       swicms->tmn [i] = gswicms_tmn1_11 [i];
183 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
184       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
185       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
186     }
187   }
188   CHKLOG(rc, ESR_SessionExists(&sessionExists));
189 
190   if (sessionExists)
191   {
192     const LCHAR* parname = L("CREC.Frontend.swicms.debug");
193     CHKLOG(rc, ESR_SessionContains(parname, &exists));
194     if (exists) {
195       rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
196       if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
197         PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
198         return rc;
199       }
200     }
201 
202     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
203 			   &swicms->forget_factor, 1);
204     if(rc != ESR_SUCCESS) return rc;
205 
206     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
207 			   &swicms->sbindex, 1);
208     if(rc != ESR_SUCCESS) return rc;
209 
210     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
211 			   &swicms->cmn[0], MAX_CHAN_DIM);
212     if(rc != ESR_SUCCESS) return rc;
213 
214     if ( sample_rate == 8000 )
215     {
216       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);
217 
218       if(rc != ESR_SUCCESS)
219         return rc;
220     }
221     else
222     {
223       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);
224 
225       if(rc != ESR_SUCCESS)
226         return rc;
227     }
228 
229     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
230 			   &swicms->tmn[0], MAX_CHAN_DIM);
231     if(rc != ESR_SUCCESS) return rc;
232   }
233 
234   swicms->is_valid = 0;
235   for (i = 0; i < MAX_CHAN_DIM; i++)
236     swicms->adjust[i] = 255;
237 
238 #ifdef SREC_ENGINE_VERBOSE_LOGGING
239   PLogMessage("swicms->forget_factor    = %d\n", swicms->forget_factor);
240   PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
241   PLogMessage("swicms->sbindex          = %d\n", swicms->sbindex);
242 #endif
243 
244   /* in-utt cms parameters */
245   swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
246   swicms->inutt.disable_after  = 200;
247   swicms->inutt.enable_after   = 10;    /* in-utt is less reliable       */
248   swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
249   swicms->inutt.num_frames_since_bou = 0;
250   swicms->inutt.num_frames_in_accum = 0;
251   for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;
252 
253   if (sessionExists) {
254     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
255 			  &swicms->inutt.forget_factor2, 1);
256     if(rc != ESR_SUCCESS) return rc;
257 
258     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
259 			  &swicms->inutt.disable_after, 1);
260     if(rc != ESR_SUCCESS) return rc;
261 
262     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
263 			  &swicms->inutt.enable_after, 1);
264     if(rc != ESR_SUCCESS) return rc;
265 
266     /* we need to estimate the in-utt cmn from speech frames only! so let's
267        make sure to skip some frames before collecting data, */
268     ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
269     if (exists) {
270       ESR_BOOL do_skip_even_frames = ESR_TRUE;
271       ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
272       ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
273       if( do_skip_even_frames)
274 	swicms->inutt.num_bou_frames_to_skip /= 2;
275       swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
276     }
277   }
278 
279   return 0;
280  CLEANUP:
281   return rc;
282 }
283 
284 
swicms_get_cmn(swicms_norm_info * swicms,LCHAR * cmn_params,size_t * len)285 ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR *cmn_params, size_t* len )
286 {
287   int dim_count;
288   int i;
289   imeldata temp[MAX_CHAN_DIM];
290   const size_t INT_LENGTH = 12;
291 
292   if (  swicms->_prep != NULL )	/* lda exists give them transformed lda. */
293   {
294     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
295       temp [dim_count] = swicms->lda_cmn [dim_count];
296     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
297   }
298   else	/* lda does not exist give them raw cmn values */
299   {
300     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
301       temp [dim_count] = swicms->cmn [dim_count];
302   }
303 
304   for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
305   {
306     i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
307     if (i + INT_LENGTH >= *len) {
308         *len = MAX_CHAN_DIM * (INT_LENGTH + 2) * sizeof(LCHAR);
309         return ESR_BUFFER_OVERFLOW;
310     }
311   }
312 
313   return ESR_SUCCESS;
314 }
315 
316 
swicms_set_cmn(swicms_norm_info * swicms,const char * cmn_params)317 ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
318 {
319   ESR_ReturnCode    set_status;
320   int               length_of_params;
321   int               dim_count;
322   int               got_word;
323   int               current_position;
324   char              *copy_of_params;
325   char              *parsed_strings [MAX_CHAN_DIM];
326   int               temp_cmn [MAX_CHAN_DIM];
327 
328   length_of_params = strlen ( cmn_params ) + 1;
329   copy_of_params = (char*)MALLOC ( length_of_params, NULL );
330 
331   if ( copy_of_params != NULL )
332   {
333     set_status = ESR_SUCCESS;
334     memcpy ( copy_of_params, cmn_params, length_of_params );
335     dim_count = 0;
336     current_position = 0;
337     got_word = 0;
338     parsed_strings [dim_count] = copy_of_params + current_position;
339 
340     while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
341     {
342       switch ( *( copy_of_params + current_position ) )
343       {
344         case '\0':
345           if ( got_word == 1 )
346           {
347             if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
348               dim_count++;
349             else
350             {
351               PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
352               set_status = ESR_INVALID_ARGUMENT;
353             }
354           }
355           else
356           {
357             PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
358             set_status = ESR_INVALID_ARGUMENT;
359           }
360           break;
361 
362         case ',':
363           if ( got_word == 1 )
364           {
365             if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
366             {
367               dim_count++;
368               *( copy_of_params + current_position) = '\0';
369               current_position++;
370 
371               if ( current_position == length_of_params )
372               {
373                 PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
374                 set_status = ESR_INVALID_ARGUMENT;
375               }
376               parsed_strings [dim_count] = copy_of_params + current_position;
377               got_word = 0;
378             }
379             else
380             {
381               PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
382               set_status = ESR_INVALID_ARGUMENT;
383             }
384           }
385           else
386           {
387             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
388             set_status = ESR_INVALID_ARGUMENT;
389           }
390           break;
391 
392         case '0':
393         case '1':
394         case '2':
395         case '3':
396         case '4':
397         case '5':
398         case '6':
399         case '7':
400         case '8':
401         case '9':
402           got_word = 1;
403           current_position++;
404 
405           if ( current_position == length_of_params )
406           {
407             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
408             set_status = ESR_INVALID_ARGUMENT;
409           }
410           break;
411 
412         default:
413           PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
414           set_status = ESR_INVALID_ARGUMENT;
415           break;
416       }
417     }
418     if ( set_status == ESR_SUCCESS )
419     {
420       dim_count = 0;
421 
422       while ( ( dim_count < MAX_CHAN_DIM ) && (  set_status == ESR_SUCCESS ) )
423       {
424         temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );
425 
426         if ( ( temp_cmn [dim_count] < 0 ) || ( temp_cmn [dim_count] > 255 ) )
427         {
428           set_status = ESR_INVALID_ARGUMENT;
429         }
430       }
431       if ( set_status == ESR_SUCCESS )
432       {
433         for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
434           swicms->cmn [dim_count] = temp_cmn [dim_count];
435         if ( swicms->_prep != NULL )	/* Set now if NULL it will automatically be set on first utterance */
436           linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /*do_shift*/);
437       }
438     }
439     FREE ( copy_of_params );
440   }
441   else
442   {
443     PLogError ( "Channel Normalization Out Of Memory Error\n" );
444     set_status = ESR_OUT_OF_MEMORY;
445   }
446   swicms->num_frames_in_cmn = 0;
447   return ( set_status );
448 }
449 
450 
swicms_cache_frame(swicms_norm_info * swicms,imeldata * frame,int dimen)451 int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
452 {
453   int i;
454   imeldata *pcache, *pframe;
455 
456   ASSERT(dimen == MAX_CHAN_DIM);
457   i = swicms->cached_num_frames / swicms->cache_resolution;
458   if (i < SWICMS_CACHE_SIZE_DEFAULT)
459   {
460     pcache = swicms->cached_sections[ i];
461     if (swicms->cached_num_frames % swicms->cache_resolution == 0)
462     {
463       for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
464       pcache -= MAX_CHAN_DIM;
465     }
466     pframe = frame;
467     for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ += *pframe++;
468     swicms->cached_num_frames++;
469   }
470 
471   return 0;
472 }
473 
apply_channel_normalization_in_swicms(swicms_norm_info * swicms,imeldata * oframe,imeldata * iframe,int dimen)474 int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
475     imeldata* oframe,
476     imeldata* iframe, int dimen)
477 {
478   int ii;
479   ASSERT(dimen == MAX_CHAN_DIM);
480 
481   /* IF inutt is activated at all */
482   if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
483     /* AND IF we have not disabled it (due to x-utt more reliable) */
484     if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
485       /* AND IF we have skipped past the silence frames */
486       if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
487 	swicms->inutt.num_frames_in_accum++;
488 	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
489 	/* AND IF we've already seen at least 10 frames (presumably) of speech */
490 	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
491 	  /* THEN we update the adjustment in-line with the current utterance! */
492 	  for(ii=0;ii<dimen;ii++) {
493 	    imeldata denom = ( swicms->inutt.forget_factor2
494 			       + swicms->inutt.num_frames_in_accum );
495 	    /* tmp: weighted average of the old lda_cmn and the new accum */
496 	    imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
497 			  + swicms->inutt.accum[ii] + denom/2) / denom;
498 	    swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
499 	  }
500 	  //printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
501 	}
502       }
503     }
504     swicms->inutt.num_frames_since_bou++;
505   }
506 
507   for (ii = 0; ii < dimen; ii++)
508     oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
509   return 0;
510 }
511 
swicms_update(swicms_norm_info * swicms,int speech_start,int speech_end)512 int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
513 {
514   int i, j;
515   asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
516   int ff;
517   int nn, speech_nn, backgr_nn;
518   int num_frames = swicms->cached_num_frames;
519   int cache_start, cache_end, backgr_cache_end;
520   int sbindex = swicms->sbindex;
521 
522   /* init for utterance */
523   swicms->inutt.num_frames_since_bou = 0;
524 
525   swicms->cached_num_frames = 0;
526   cache_start = speech_start;
527   cache_start -= (cache_start % swicms->cache_resolution);
528   cache_start /= swicms->cache_resolution;
529 
530   if (speech_end == MAXframeID)
531   {
532     cache_end = SWICMS_CACHE_SIZE_DEFAULT;
533   }
534   else
535   {
536     if (speech_end < num_frames)
537       cache_end = speech_end;
538     else
539       cache_end = num_frames;
540     cache_end -= (cache_end % swicms->cache_resolution);
541     cache_end /= swicms->cache_resolution;
542   }
543 
544   if (num_frames == 0 || speech_end == 0 || speech_start == speech_end || speech_end == MAXframeID)
545   {
546     if (speech_end != 0 || speech_start != 0)
547       PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
548                 speech_start, speech_end, num_frames);
549 	if (SWICMS_DEBUG) {
550       //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
551     }
552     return 1;
553   }
554 
555   backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;
556 
557   speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
558   backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;
559 
560   for (i = 0; i < MAX_CHAN_DIM; i++)
561   {
562     speech_avg[i] = 0;
563     backgr_avg[i] = 0;
564     for (j = cache_start; j < cache_end; j++)
565       speech_avg[i] += swicms->cached_sections[j][i];
566     for (j = 0; j < cache_start; j++)
567       backgr_avg[i] += swicms->cached_sections[j][i];
568     for (j = cache_end; j < backgr_cache_end; j++)
569       backgr_avg[i] += swicms->cached_sections[j][i];
570     if (speech_nn == 0 && backgr_nn > 0)
571     {
572       backgr_avg[i] /= backgr_nn;
573       speech_avg[i] = backgr_avg[i];
574       speech_nn = backgr_nn;
575     }
576     else if (speech_nn > 0 && backgr_nn == 0)
577     {
578       speech_avg[i] /= speech_nn;
579       backgr_avg[i] = speech_avg[i];
580       backgr_nn = speech_nn;
581     }
582     else if (speech_nn > 0 && backgr_nn > 0)
583     {
584       speech_avg[i] /= speech_nn;
585       backgr_avg[i] /= backgr_nn;
586     }
587     else
588     {
589       return 0;
590     }
591 
592     avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
593   }
594   nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;
595 
596   for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
597   {
598     ff += (swicms->lda_tmn[i] - avg[i]);
599   }
600   ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
601   if (ff > 5)
602   {
603     PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
604     //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
605     return 1;
606   }
607   ff = swicms->forget_factor;
608   if (ff < 9999)
609   {
610     for (i = 0; i < MAX_CHAN_DIM; i++)
611     {
612       swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2)  / (ff + nn);
613       swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
614     }
615   }
616 
617   if (SWICMS_DEBUG)
618     {
619       imeldata temp[MAX_CHAN_DIM];
620       PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);
621 
622       for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
623       inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
624       /* use this dump, to put back into CREC.Frontend.swicms.cmn */
625       printf_vector("swicms.cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
626 
627       //printf_vector("swicms.lda_cmn   ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
628       //printf_vector("swicms.lda_tmn   ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
629       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
630       //printf_vector("avg.speech    ", " %d", avg, MAX_CHAN_DIM);
631     }
632   else
633     {
634 #ifndef NDEBUG
635       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
636 #endif
637     }
638   swicms->num_frames_in_cmn += nn;
639   return 0;
640 }
641 
swicms_lda_process(swicms_norm_info * swicms,preprocessed * prep)642 int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
643 {
644   int i;
645 
646   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
647   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
648   linear_transform_frame(prep, swicms->lda_tmn, 1 /*do_shift*/);
649   linear_transform_frame(prep, swicms->lda_cmn, 1 /*do_shift*/);
650 
651   for (i = 0; i < MAX_CHAN_DIM; i++)
652   {
653     swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
654   }
655 
656 #ifndef NDEBUG
657   //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
658 #endif
659   swicms->is_valid = 1;
660   swicms->_prep = prep;
661 
662   if(SWICMS_DEBUG) {
663     imeldata temp[MAX_CHAN_DIM];
664     printf_vector("swicms->cmn     ", " %d", swicms->cmn,     MAX_CHAN_DIM);
665     printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
666     //printf_vector("swicms->tmn     ", " %d", swicms->tmn,     MAX_CHAN_DIM);
667     //printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
668     //printf_vector("swicms->adjust  ", " %d", swicms->adjust,  MAX_CHAN_DIM);
669 
670     //for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
671     //inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
672     //printf_vector("swicms->tmn(r)  ", " %d", temp, MAX_CHAN_DIM);
673 
674     for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
675     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
676     printf_vector("swicms->cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
677   }
678   return 0;
679 }
680 
681 
682 
683