• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  swicms.h                                                                 *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                         *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 #ifndef __SWICMS_H__
21 #define __SWICMS_H__
22 
23 #include"all_defs.h"
24 #include"sizes.h"
25 #include"fronttyp.h"
26 #include"pre_desc.h"
27 
28 #define DEBUG_SWICMS        0
29 #define MAX_CACHED_FRAMES 800
30 #define SWICMS_CACHE_RESOLUTION_DEFAULT   8
31 #define SWICMS_CACHE_SIZE_DEFAULT         100 /* equals #frames/resolution */
32 
33 /**
34  * This is used for casting in debugger, just type (imelvec*)tmn.
35  */
36 typedef struct
37 {
38   imeldata vec[MAX_CHAN_DIM];
39 }
40 imelvec;
41 
42 /**
43  * Does channel normalization without using fine recognition segmenation.  It remembers the
44  * frames of speech and uses that as a channel mean for the next utterance.  A forget_factor
45  * is used to weigh the new speech mean estimate with an older one.
46  */
47 typedef struct
48 {
49   imeldata tmn [MAX_CHAN_DIM];                 /* target mean */
50   imeldata cmn [MAX_CHAN_DIM];                 /* channel mean */
51 
52   imeldata lda_tmn [MAX_CHAN_DIM];                 /* target mean */
53   imeldata lda_cmn [MAX_CHAN_DIM];                 /* channel mean */
54 
55   imeldata adjust[MAX_CHAN_DIM]; /* target less channel */
56 
57   int is_valid;
58   int forget_factor;           /* in frames, mass of cmn average */
59   int sbindex;                 /* speech to background index
60         100 -> use only speech to calculate CMN
61         000 -> use only background to calculate CMN
62         050 -> use half/half ..
63         all numbers in between are acceptable */
64 
65   int num_frames_in_cmn; /* num frames used to estimate cmn (or lda_cmn) */
66 
67   /* for in-utterance channel normalization */
68   struct {
69     int forget_factor2;     /* cmn is given this weight to start off */
70     int disable_after;      /* we disable in-utt cms after this many fr*/
71     int enable_after;       /* we enable in-utt cms after this many fr*/
72     int num_bou_frames_to_skip;   /* don't start accum 'til this many frames */
73     int num_frames_since_bou;     /* counter for above, bou=begin-of-utt     */
74     int num_frames_in_accum;      /* number of frames in accum */
75     imeldata accum[MAX_CHAN_DIM]; /* accumulates frames of the current utt */
76   } inutt;
77 
78   int cached_num_frames;       /* we cache frames, until recognition is done
79         and can calculate speech mean from these */
80   int cache_resolution;        /* we'll avg this many frames per section */
81   imeldata cached_sections[SWICMS_CACHE_SIZE_DEFAULT][MAX_CHAN_DIM];
82   /*const*/ preprocessed* _prep;
83 }
84 swicms_norm_info;
85 
86 int swicms_init(swicms_norm_info* swicms);
87 int swicms_cache_frame(swicms_norm_info* swicms, imeldata* frame, int dimen);
88 int apply_channel_normalization_in_swicms(swicms_norm_info *swicms,
89     imeldata* oframe, imeldata* iframe,
90     int dimen);
91 int swicms_lda_process(swicms_norm_info* swicms, preprocessed* prep);
92 
93 int swicms_update(swicms_norm_info* swicms, int speech_start_frame, int speech_end_frame);
94 
95 ESR_ReturnCode swicms_set_cmn(swicms_norm_info *swicms, const LCHAR *new_cmn_params );
96 ESR_ReturnCode swicms_get_cmn(swicms_norm_info *swicms, LCHAR *cmn_params, size_t* len );
97 
98 #if DEBUG_SWICMS
99 int swicms_compare(swicms_norm_info* swicms, imeldata* imelda_adjust);
100 int swicms_dump_stats(swicms_norm_info* swicms);
101 #else
102 #define swicms_compare(swicms,ia)
103 #define swicms_dump_stats(swicms)
104 #endif
105 
106 #endif
107 
108