• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  swicms.c                                                                 *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #include <string.h>
21 #include"swicms.h"
22 #include"srec_sizes.h"
23 #include"prelib.h"
24 
25 #include "passert.h"
26 #include "ESR_Session.h"
27 #include "ESR_SessionType.h"
28 #include "IntArrayList.h"
29 #include "portable.h"
30 
31 #define printf_vector(HEAD, FMT, PTR, NN) { int i; LCHAR buffer[256]; sprintf(buffer, HEAD); sprintf(buffer + LSTRLEN(buffer), " %p", (void *)PTR); for (i=0; i<(NN); ++i) sprintf(buffer + LSTRLEN(buffer), FMT, PTR[i]); PLogMessage(buffer); }
32 
33 /* Cross-utterance CMN calculation:
34    We try to normalize the speech frames before they get to the recognizer.
35    The speech frames are LDA-processed mfcc-with-dynamic feature vectors.
36    We collect these speech frames during recognition. At the end of
37    recognition we exclude the silence frames from the collected data, and
38    generate a new channel average based on the previous average and the new
39    data, using an exponential decay formula.
40 
41    In-utterance CMN calculation:
42    A new short-term average mechanism was introduced, with faster update,
43    to improve recognition on the very first recognition after init or reset.
44    We wait for a minimum number of new data frames to apply this. We also
45    disable the fast updater after some frames, because we assume the
46    cross-utterance estimator to be more reliable, particularly in its
47    ability to exclude silence frames from the calculation.
48 */
49 
50 /* default settings for cross-utterance cms */
51 #define SWICMS_FORGET_FACTOR_DEFAULT        400 /* effective frms of history */
52 #define SWICMS_SBINDEX_DEFAULT              100 /* use speech frames only */
53 /* #define SWICMS_CACHE_RESOLUTION_DEFAULT  see swicms.h */
54 /* #define SWICMS_CACHE_SIZE_DEFAULT        see swicms.h */
55 
56 /* default settings for in-utterance cms */
57 #define SWICMS_INUTT_FORGET_FACTOR2_DISABLE 65535 /* any large number */
58 #define SWICMS_INUTT_FORGET_FACTOR2_DEFAULT SWICMS_INUTT_FORGET_FACTOR2_DISABLE
59 /* disable this when cross-utt become more reliable */
60 #define SWICMS_INUTT_DISABLE_AFTER_FRAMES   200
61 /* wait while the estimate is poor */
62 #define SWICMS_INUTT_ENABLE_AFTER_FRAMES    10
63 
64 /**
65  * Logging Stuff
66  */
67 #define LOG_LEVEL 2
68 #define MODULE_NAME L("swicms.c")
69 //static const char* MTAG = MODULE_NAME;
70 
71 static const char *rcsid = 0 ? (const char *) &rcsid :
72                            "$Id: swicms.c,v 1.21.6.16 2008/06/05 19:00:55 stever Exp $";
73 
74 static ESR_BOOL SWICMS_DEBUG = ESR_FALSE;
75 
76 /* these are good values from cmn/tmn files */
77 static const imeldata gswicms_cmn1_8 [MAX_CHAN_DIM] =
78   {
79     158, 141,  99, 125, 101, 162, 113, 138, 128, 143, 123, 141,
80     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
81     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
82   };
83 
84 static const imeldata gswicms_cmn1_11 [MAX_CHAN_DIM] =
85   {
86     163, 121, 120, 114, 124, 139, 144, 108, 150, 119, 146, 124,
87     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
88     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
89   };
90 
91 static const imeldata gswicms_tmn1_8 [MAX_CHAN_DIM] =
92   {
93     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
95     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
96   };
97 
98 static const imeldata gswicms_tmn1_11 [MAX_CHAN_DIM] =
99   {
100     108, 138, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
102     127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127
103   };
104 
GetSomeIntsIfAny(const LCHAR * parname,imeldata * parvalue,size_t reqSize)105 static ESR_ReturnCode GetSomeIntsIfAny( const LCHAR* parname, imeldata* parvalue, size_t reqSize)
106 {
107   size_t i, size;
108   ESR_ReturnCode rc;
109   ESR_BOOL exists;
110   IntArrayList* intList = 0;
111 
112   CHKLOG(rc, ESR_SessionContains(parname, &exists));
113   if (exists) {
114     rc = ESR_SessionGetProperty(parname, (void**)&intList, TYPES_INTARRAYLIST);
115     if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
116       /* no match will revert to default data already in static array */
117       PLogError(L("Error reading %s from session: %s"), parname, ESR_rc2str(rc));
118       return ESR_FATAL_ERROR;
119     }
120     else if (rc == ESR_SUCCESS) {
121       CHKLOG(rc, IntArrayListGetSize(intList, &size));
122       if(size != reqSize) {
123 	PLogError(L("Error reading %s from session, expected len %d: %s"), parname, reqSize, ESR_rc2str(rc));
124 	return ESR_FATAL_ERROR;
125       }
126       if(reqSize == 1)
127 	CHKLOG(rc, IntArrayListGet(intList, 0, parvalue));
128       else {
129 	for (i=0; i<size; ++i)
130 	  CHKLOG(rc, IntArrayListGet(intList, i, &parvalue[i]));
131       }
132     }
133   }
134   return ESR_SUCCESS;
135  CLEANUP:
136   return rc;
137 }
138 
swicms_init(swicms_norm_info * swicms)139 int swicms_init(swicms_norm_info* swicms)
140 {
141   ESR_ReturnCode    rc = ESR_SUCCESS;
142   size_t            i;
143   ESR_BOOL          exists, sessionExists;
144   size_t 	    sample_rate;
145 
146   /* defaults */
147   swicms->sbindex          = SWICMS_SBINDEX_DEFAULT;
148   swicms->cached_num_frames = 0;
149   swicms->forget_factor    = SWICMS_FORGET_FACTOR_DEFAULT;
150   swicms->cache_resolution = SWICMS_CACHE_RESOLUTION_DEFAULT;
151   swicms->num_frames_in_cmn = 0;
152 
153   CHKLOG(rc, ESR_SessionExists(&sessionExists));
154 
155   if (sessionExists)
156   {  /* We'll assume this rate is valid or someone else will be complaining.   SteveR */
157     rc = ESR_SessionGetSize_t ( L ( "CREC.Frontend.samplerate" ), &sample_rate );
158 
159     if ( rc != ESR_SUCCESS )
160       return ( rc );
161   }
162   else
163     sample_rate = 11025;
164 
165   /* init the data structures by copying the static data so that we can have a copy if we need to reset */
166   if ( sample_rate == 8000 )
167   {
168     for ( i = 0; i < MAX_CHAN_DIM; i++ )
169     {
170       swicms->cmn [i] = gswicms_cmn1_8 [i];
171       swicms->tmn [i] = gswicms_tmn1_8 [i];
172 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
173       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
174       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
175     }
176   }
177   else
178   {
179     for ( i = 0; i < MAX_CHAN_DIM; i++ )
180     {
181       swicms->cmn [i] = gswicms_cmn1_11 [i];
182       swicms->tmn [i] = gswicms_tmn1_11 [i];
183 // _lda_*mn below are OK, but are recalculated in swicms_lda_process()
184       swicms->lda_cmn [i] = 0; /* calculated by swicms_lda_process() */
185       swicms->lda_tmn [i] = 0; /* calculated by swicms_lda_process() */
186     }
187   }
188   CHKLOG(rc, ESR_SessionExists(&sessionExists));
189 
190   if (sessionExists)
191   {
192     const LCHAR* parname = L("CREC.Frontend.swicms.debug");
193     CHKLOG(rc, ESR_SessionContains(parname, &exists));
194     if (exists) {
195       rc = ESR_SessionGetBool(parname, &SWICMS_DEBUG);
196       if (rc != ESR_SUCCESS && rc != ESR_NO_MATCH_ERROR) {
197         PLOG_DBG_ERROR((L("Error reading %s from session: %s"), parname, ESR_rc2str(rc)));
198         return rc;
199       }
200     }
201 
202     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.forget_factor"),
203 			   &swicms->forget_factor, 1);
204     if(rc != ESR_SUCCESS) return rc;
205 
206     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.sbindex"),
207 			   &swicms->sbindex, 1);
208     if(rc != ESR_SUCCESS) return rc;
209 
210     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn"),
211 			   &swicms->cmn[0], MAX_CHAN_DIM);
212     if(rc != ESR_SUCCESS) return rc;
213 
214     if ( sample_rate == 8000 )
215     {
216       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn8"), &swicms->cmn[0], MAX_CHAN_DIM);
217 
218       if(rc != ESR_SUCCESS)
219         return rc;
220     }
221     else
222     {
223       rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.cmn11"), &swicms->cmn[0], MAX_CHAN_DIM);
224 
225       if(rc != ESR_SUCCESS)
226         return rc;
227     }
228 
229     rc = GetSomeIntsIfAny( L("CREC.Frontend.swicms.tmn"),
230 			   &swicms->tmn[0], MAX_CHAN_DIM);
231     if(rc != ESR_SUCCESS) return rc;
232   }
233 
234   swicms->is_valid = 0;
235   for (i = 0; i < MAX_CHAN_DIM; i++)
236     swicms->adjust[i] = 255;
237 
238 #ifdef SREC_ENGINE_VERBOSE_LOGGING
239   PLogMessage("swicms->forget_factor    = %d\n", swicms->forget_factor);
240   PLogMessage("swicms->cache_resolution = %d\n", swicms->cache_resolution);
241   PLogMessage("swicms->sbindex          = %d\n", swicms->sbindex);
242 #endif
243 
244   /* in-utt cms parameters */
245   swicms->inutt.forget_factor2 = SWICMS_INUTT_FORGET_FACTOR2_DEFAULT;
246   swicms->inutt.disable_after  = 200;
247   swicms->inutt.enable_after   = 10;    /* in-utt is less reliable       */
248   swicms->inutt.num_bou_frames_to_skip = 20; /* silence frames! see windback */
249   swicms->inutt.num_frames_since_bou = 0;
250   swicms->inutt.num_frames_in_accum = 0;
251   for(i=0; i<MAX_CHAN_DIM; i++) swicms->inutt.accum[i] = 0;
252 
253   if (sessionExists) {
254     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.forget_factor2"),
255 			  &swicms->inutt.forget_factor2, 1);
256     if(rc != ESR_SUCCESS) return rc;
257 
258     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.disable_after"),
259 			  &swicms->inutt.disable_after, 1);
260     if(rc != ESR_SUCCESS) return rc;
261 
262     rc = GetSomeIntsIfAny(L("CREC.Frontend.swicms.inutt.enable_after"),
263 			  &swicms->inutt.enable_after, 1);
264     if(rc != ESR_SUCCESS) return rc;
265 
266     /* we need to estimate the in-utt cmn from speech frames only! so let's
267        make sure to skip some frames before collecting data, */
268     ESR_SessionContains(L("CREC.Frontend.start_windback"), &exists);
269     if (exists) {
270       ESR_BOOL do_skip_even_frames = ESR_TRUE;
271       ESR_SessionGetBool(L("CREC.Frontend.do_skip_even_frames"), &do_skip_even_frames);
272       ESR_SessionGetInt(L("CREC.Frontend.start_windback"), &swicms->inutt.num_bou_frames_to_skip);
273       if( do_skip_even_frames)
274 	swicms->inutt.num_bou_frames_to_skip /= 2;
275       swicms->inutt.num_bou_frames_to_skip -= 5; /* ensure spch frames only */
276     }
277   }
278 
279   return 0;
280  CLEANUP:
281   return rc;
282 }
283 
284 
swicms_get_cmn(swicms_norm_info * swicms,LCHAR * cmn_params,size_t * len)285 ESR_ReturnCode swicms_get_cmn ( swicms_norm_info* swicms, LCHAR *cmn_params, size_t* len )
286 {
287   int dim_count;
288   int i;
289   imeldata temp[MAX_CHAN_DIM];
290   const size_t INT_LENGTH = 12;
291 
292   if (  swicms->_prep != NULL )	/* lda exists give them transformed lda. */
293   {
294     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
295       temp [dim_count] = swicms->lda_cmn [dim_count];
296     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
297   }
298   else	/* lda does not exist give them raw cmn values */
299   {
300     for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
301       temp [dim_count] = swicms->cmn [dim_count];
302   }
303 
304   for ( dim_count = 0, i = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
305   {
306     i += sprintf( cmn_params + i, dim_count==0 ? "%d" : ",%d", temp [dim_count] );
307     if (i + INT_LENGTH >= *len) {
308         *len = MAX_CHAN_DIM * (INT_LENGTH + 2) * sizeof(LCHAR);
309         return ESR_BUFFER_OVERFLOW;
310     }
311   }
312 
313   return ESR_SUCCESS;
314 }
315 
316 
swicms_set_cmn(swicms_norm_info * swicms,const char * cmn_params)317 ESR_ReturnCode swicms_set_cmn ( swicms_norm_info* swicms, const char *cmn_params )
318 {
319   ESR_ReturnCode    set_status;
320   int               length_of_params;
321   int               dim_count;
322   int               got_word;
323   int               current_position;
324   char              *copy_of_params;
325   char              *parsed_strings [MAX_CHAN_DIM];
326   int               temp_cmn [MAX_CHAN_DIM];
327 
328   length_of_params = strlen ( cmn_params ) + 1;
329   copy_of_params = (char*)MALLOC ( length_of_params, NULL );
330 
331   if ( copy_of_params != NULL )
332   {
333     set_status = ESR_SUCCESS;
334     memcpy ( copy_of_params, cmn_params, length_of_params );
335     dim_count = 0;
336     current_position = 0;
337     got_word = 0;
338     parsed_strings [dim_count] = copy_of_params + current_position;
339 
340     while ( ( dim_count < MAX_CHAN_DIM ) && ( set_status == ESR_SUCCESS ) )
341     {
342       switch ( *( copy_of_params + current_position ) )
343       {
344         case '\0':
345           if ( got_word == 1 )
346           {
347             if ( dim_count == ( MAX_CHAN_DIM - 1 ) )
348               dim_count++;
349             else
350             {
351               PLogError ( "Channel Normalization : Missing Params Must Contain %d Params\n", MAX_CHAN_DIM );
352               set_status = ESR_INVALID_ARGUMENT;
353             }
354           }
355           else
356           {
357             PLogError ( "Channel Normalization : Missing Params Mus Contain %d Params\n", MAX_CHAN_DIM );
358             set_status = ESR_INVALID_ARGUMENT;
359           }
360           break;
361 
362         case ',':
363           if ( got_word == 1 )
364           {
365             if ( dim_count < ( MAX_CHAN_DIM - 1 ) )
366             {
367               dim_count++;
368               *( copy_of_params + current_position) = '\0';
369               current_position++;
370 
371               if ( current_position == length_of_params )
372               {
373                 PLogError ( "Channel Normalization : Delimiter At End Of Param String\n" );
374                 set_status = ESR_INVALID_ARGUMENT;
375               }
376               parsed_strings [dim_count] = copy_of_params + current_position;
377               got_word = 0;
378             }
379             else
380             {
381               PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
382               set_status = ESR_INVALID_ARGUMENT;
383             }
384           }
385           else
386           {
387             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
388             set_status = ESR_INVALID_ARGUMENT;
389           }
390           break;
391 
392         case '0':
393         case '1':
394         case '2':
395         case '3':
396         case '4':
397         case '5':
398         case '6':
399         case '7':
400         case '8':
401         case '9':
402           got_word = 1;
403           current_position++;
404 
405           if ( current_position == length_of_params )
406           {
407             PLogError ( "Channel Normalization : Too Many Params Must Contain %d Params\n", MAX_CHAN_DIM );
408             set_status = ESR_INVALID_ARGUMENT;
409           }
410           break;
411 
412         default:
413           PLogError ( "Channel Normalization : Invalid Param : %c : Params Must Contain Only Digits\n" );
414           set_status = ESR_INVALID_ARGUMENT;
415           break;
416       }
417     }
418     if ( set_status == ESR_SUCCESS )
419     {
420       dim_count = 0;
421 
422       while ( ( dim_count < MAX_CHAN_DIM ) && (  set_status == ESR_SUCCESS ) )
423       {
424         temp_cmn [dim_count] = atoi ( parsed_strings [dim_count] );
425 
426         if ( ( temp_cmn [dim_count] < 0 ) || ( temp_cmn [dim_count] > 255 ) )
427         {
428           set_status = ESR_INVALID_ARGUMENT;
429         }
430 
431         dim_count++;
432       }
433       if ( set_status == ESR_SUCCESS )
434       {
435         for ( dim_count = 0; dim_count < MAX_CHAN_DIM; dim_count++ )
436           swicms->cmn [dim_count] = temp_cmn [dim_count];
437         if ( swicms->_prep != NULL )	/* Set now if NULL it will automatically be set on first utterance */
438           linear_transform_frame(swicms->_prep, swicms->lda_cmn, 1 /*do_shift*/);
439       }
440     }
441     FREE ( copy_of_params );
442   }
443   else
444   {
445     PLogError ( "Channel Normalization Out Of Memory Error\n" );
446     set_status = ESR_OUT_OF_MEMORY;
447   }
448   swicms->num_frames_in_cmn = 0;
449   return ( set_status );
450 }
451 
452 
swicms_cache_frame(swicms_norm_info * swicms,imeldata * frame,int dimen)453 int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen)
454 {
455   int i;
456   imeldata *pcache, *pframe;
457 
458   ASSERT(dimen == MAX_CHAN_DIM);
459   i = swicms->cached_num_frames / swicms->cache_resolution;
460   if (i < SWICMS_CACHE_SIZE_DEFAULT)
461   {
462     pcache = swicms->cached_sections[ i];
463     if (swicms->cached_num_frames % swicms->cache_resolution == 0)
464     {
465       for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ = 0;
466       pcache -= MAX_CHAN_DIM;
467     }
468     pframe = frame;
469     for (i = 0; i < MAX_CHAN_DIM; i++) *pcache++ += *pframe++;
470     swicms->cached_num_frames++;
471   }
472 
473   return 0;
474 }
475 
apply_channel_normalization_in_swicms(swicms_norm_info * swicms,imeldata * oframe,imeldata * iframe,int dimen)476 int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
477     imeldata* oframe,
478     imeldata* iframe, int dimen)
479 {
480   int ii;
481   ASSERT(dimen == MAX_CHAN_DIM);
482 
483   /* IF inutt is activated at all */
484   if(swicms->inutt.forget_factor2 != SWICMS_INUTT_FORGET_FACTOR2_DISABLE) {
485     /* AND IF we have not disabled it (due to x-utt more reliable) */
486     if(swicms->inutt.num_frames_in_accum < swicms->inutt.disable_after) {
487       /* AND IF we have skipped past the silence frames */
488       if( swicms->inutt.num_frames_since_bou >= swicms->inutt.num_bou_frames_to_skip){
489 	swicms->inutt.num_frames_in_accum++;
490 	for(ii=0;ii<dimen;ii++) swicms->inutt.accum[ii] += iframe[ii];
491 	/* AND IF we've already seen at least 10 frames (presumably) of speech */
492 	if(swicms->inutt.num_frames_in_accum>swicms->inutt.enable_after) {
493 	  /* THEN we update the adjustment in-line with the current utterance! */
494 	  for(ii=0;ii<dimen;ii++) {
495 	    imeldata denom = ( swicms->inutt.forget_factor2
496 			       + swicms->inutt.num_frames_in_accum );
497 	    /* tmp: weighted average of the old lda_cmn and the new accum */
498 	    imeldata tmp=(swicms->lda_cmn[ii]*swicms->inutt.forget_factor2
499 			  + swicms->inutt.accum[ii] + denom/2) / denom;
500 	    swicms->adjust[ii] = swicms->lda_tmn[ii] - tmp;
501 	  }
502 	  //printf_vector("swicms->adjust2 "," %d",swicms->adjust, dimen);
503 	}
504       }
505     }
506     swicms->inutt.num_frames_since_bou++;
507   }
508 
509   for (ii = 0; ii < dimen; ii++)
510     oframe[ii] = MAKEBYTE(iframe[ii] + swicms->adjust[ii]);
511   return 0;
512 }
513 
swicms_update(swicms_norm_info * swicms,int speech_start,int speech_end)514 int swicms_update(swicms_norm_info* swicms, int speech_start, int speech_end)
515 {
516   int i, j;
517   asr_int32_t speech_avg[MAX_CHAN_DIM], backgr_avg[MAX_CHAN_DIM], avg[MAX_CHAN_DIM];
518   int ff;
519   int nn, speech_nn, backgr_nn;
520   int num_frames = swicms->cached_num_frames;
521   int cache_start, cache_end, backgr_cache_end;
522   int sbindex = swicms->sbindex;
523 
524   /* init for utterance */
525   swicms->inutt.num_frames_since_bou = 0;
526 
527   swicms->cached_num_frames = 0;
528   cache_start = speech_start;
529   cache_start -= (cache_start % swicms->cache_resolution);
530   cache_start /= swicms->cache_resolution;
531 
532   if (speech_end == MAXframeID)
533   {
534     cache_end = SWICMS_CACHE_SIZE_DEFAULT;
535   }
536   else
537   {
538     if (speech_end < num_frames)
539       cache_end = speech_end;
540     else
541       cache_end = num_frames;
542     cache_end -= (cache_end % swicms->cache_resolution);
543     cache_end /= swicms->cache_resolution;
544   }
545 
546   if (num_frames == 0 || speech_end == 0 || speech_start == speech_end || speech_end == MAXframeID)
547   {
548     if (speech_end != 0 || speech_start != 0)
549       PLogError("Warning: speech_bounds (%d,%d) swicms->cached_num_frames (%d)\n",
550                 speech_start, speech_end, num_frames);
551 	if (SWICMS_DEBUG) {
552       //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
553     }
554     return 1;
555   }
556 
557   backgr_cache_end = (num_frames - num_frames % swicms->cache_resolution) / swicms->cache_resolution;
558 
559   speech_nn = (cache_end - cache_start) * swicms->cache_resolution;
560   backgr_nn = backgr_cache_end * swicms->cache_resolution - speech_nn;
561 
562   for (i = 0; i < MAX_CHAN_DIM; i++)
563   {
564     speech_avg[i] = 0;
565     backgr_avg[i] = 0;
566     for (j = cache_start; j < cache_end; j++)
567       speech_avg[i] += swicms->cached_sections[j][i];
568     for (j = 0; j < cache_start; j++)
569       backgr_avg[i] += swicms->cached_sections[j][i];
570     for (j = cache_end; j < backgr_cache_end; j++)
571       backgr_avg[i] += swicms->cached_sections[j][i];
572     if (speech_nn == 0 && backgr_nn > 0)
573     {
574       backgr_avg[i] /= backgr_nn;
575       speech_avg[i] = backgr_avg[i];
576       speech_nn = backgr_nn;
577     }
578     else if (speech_nn > 0 && backgr_nn == 0)
579     {
580       speech_avg[i] /= speech_nn;
581       backgr_avg[i] = speech_avg[i];
582       backgr_nn = speech_nn;
583     }
584     else if (speech_nn > 0 && backgr_nn > 0)
585     {
586       speech_avg[i] /= speech_nn;
587       backgr_avg[i] /= backgr_nn;
588     }
589     else
590     {
591       return 0;
592     }
593 
594     avg[i] = (sbindex * speech_avg[i] + (100 - sbindex) * backgr_avg[i] + 50) / 100;
595   }
596   nn = (sbindex * speech_nn + (100 - sbindex) * backgr_nn + 50) / 100;
597 
598   for (i = 0, ff = 0; i < MAX_CHAN_DIM; i++)
599   {
600     ff += (swicms->lda_tmn[i] - avg[i]);
601   }
602   ff /= MAX_CHAN_DIM; /* sum is now the average offset from TMN */
603   if (ff > 5)
604   {
605     PLogError("Warning: bad utt mean during swicms_update() (moffs=%d)\n", ff);
606     //printf_vector("swicms->adjust.rep", " %d", swicms->adjust, MAX_CHAN_DIM);
607     return 1;
608   }
609   ff = swicms->forget_factor;
610   if (ff < 9999)
611   {
612     for (i = 0; i < MAX_CHAN_DIM; i++)
613     {
614       swicms->lda_cmn[i] = (swicms->lda_cmn[i] * ff + avg[i] * nn + (ff + nn) / 2)  / (ff + nn);
615       swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
616     }
617   }
618 
619   if (SWICMS_DEBUG)
620     {
621       imeldata temp[MAX_CHAN_DIM];
622       PLogMessage("swicms_update() used %d frames (%d-%d)", nn, speech_start, speech_end);
623 
624       for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
625       inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
626       /* use this dump, to put back into CREC.Frontend.swicms.cmn */
627       printf_vector("swicms.cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
628 
629       //printf_vector("swicms.lda_cmn   ", " %d", &swicms.lda_cmn [0], MAX_CHAN_DIM);
630       //printf_vector("swicms.lda_tmn   ", " %d", &swicms.lda_tmn [0], MAX_CHAN_DIM);
631       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
632       //printf_vector("avg.speech    ", " %d", avg, MAX_CHAN_DIM);
633     }
634   else
635     {
636 #ifndef NDEBUG
637       //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
638 #endif
639     }
640   swicms->num_frames_in_cmn += nn;
641   return 0;
642 }
643 
swicms_lda_process(swicms_norm_info * swicms,preprocessed * prep)644 int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep)
645 {
646   int i;
647 
648   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_tmn[i] = swicms->tmn[i];
649   for (i = 0; i < MAX_CHAN_DIM; i++) swicms->lda_cmn[i] = swicms->cmn[i];
650   linear_transform_frame(prep, swicms->lda_tmn, 1 /*do_shift*/);
651   linear_transform_frame(prep, swicms->lda_cmn, 1 /*do_shift*/);
652 
653   for (i = 0; i < MAX_CHAN_DIM; i++)
654   {
655     swicms->adjust[i] = swicms->lda_tmn[i] - swicms->lda_cmn[i];
656   }
657 
658 #ifndef NDEBUG
659   //printf_vector("swicms->adjust", " %d", swicms->adjust, MAX_CHAN_DIM);
660 #endif
661   swicms->is_valid = 1;
662   swicms->_prep = prep;
663 
664   if(SWICMS_DEBUG) {
665     imeldata temp[MAX_CHAN_DIM];
666     printf_vector("swicms->cmn     ", " %d", swicms->cmn,     MAX_CHAN_DIM);
667     printf_vector("swicms->lda_cmn ", " %d", swicms->lda_cmn, MAX_CHAN_DIM);
668     //printf_vector("swicms->tmn     ", " %d", swicms->tmn,     MAX_CHAN_DIM);
669     //printf_vector("swicms->lda_tmn ", " %d", swicms->lda_tmn, MAX_CHAN_DIM);
670     //printf_vector("swicms->adjust  ", " %d", swicms->adjust,  MAX_CHAN_DIM);
671 
672     //for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_tmn[i];
673     //inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
674     //printf_vector("swicms->tmn(r)  ", " %d", temp, MAX_CHAN_DIM);
675 
676     for(i=0;i<MAX_CHAN_DIM;i++) temp[i]=swicms->lda_cmn[i];
677     inverse_transform_frame( swicms->_prep, temp, 1 /*do_shift*/);
678     printf_vector("swicms->cmn(r)  ", " %d", temp, MAX_CHAN_DIM);
679   }
680   return 0;
681 }
682 
683 
684 
685