• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*---------------------------------------------------------------------------*
2  *  utteranc.h  *
3  *                                                                           *
4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
5  *                                                                           *
6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
7  *  you may not use this file except in compliance with the License.         *
8  *                                                                           *
9  *  You may obtain a copy of the License at                                  *
10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
11  *                                                                           *
12  *  Unless required by applicable law or agreed to in writing, software      *
13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
15  *  See the License for the specific language governing permissions and      *
16  *  limitations under the License.                                           *
17  *                                                                           *
18  *---------------------------------------------------------------------------*/
19 
20 
21 
22 #ifndef _h_utteranc_
23 #define _h_utteranc_
24 
25 #ifdef SET_RCSID
26 static const char utteranc_h[] = "$Id: utteranc.h,v 1.3.6.7 2007/08/31 17:44:53 dahan Exp $";
27 #endif
28 
29 
30 
31 #include "all_defs.h"
32 #include "hmm_type.h"
33 #include "fpi_tgt.h"
34 #include "voicing.h"
35 #include "specnorm.h"
36 #include "channorm.h"
37 #include "swicms.h"
38 #ifndef _RTT
39 #include "duk_io.h"
40 #endif
41 
42 #define DEFAULT_BUFFER_SIZE 100 /* in frames */
43 #define KEEP_FRAMES   40 /* in frames, past frames kept */
44 
45 /*  Functions supported are
46 **  new, delete (by source)
47 **  open file/device, close file/device
48 **  attach and detach sink
49 **  read/store samples - including the header
50 */
51 
52 /**
53  * @todo document
54  */
55 typedef struct
56 {                /* label structure */
57   char *label;
58   long begin;
59   long end;
60   char *extra;
61   unsigned char flag;
62 }
63 annotate;
64 
65 
66 /**
67  * @todo document
68  */
69 typedef struct
70 {
71   int   utt_type;
72   int   dim;
73   fepFramePkt  *frame;
74   int   num_chan;
75   int   do_channorm;
76   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
77   norm_info   *channorm; /*  Mirrored from the Wave object */
78   swicms_norm_info     *swicms;    /* copy of wave obj pointer */
79   spect_dist_info *backchan[MAX_CHAN_DIM];
80   featdata  *last_push;
81   int   voice_duration;
82   int   quiet_duration;
83   int   unsure_duration;
84   int   start_windback;
85 }
86 utt_generic_info;
87 
88 #ifndef _RTT
89 /**
90  * @todo document
91  */
92 typedef struct
93 {
94   char  typ;  /* s (16 bit), c (8 bit), u (newton .utb) */
95   int   endian;  /* 0 is little 1 is big */
96   int   do_skip; /* skip every other frame */
97   unsigned long len;  /* length of file/utterance */
98   PFile* file;  /* pointer to file */
99   char  name[MAX_LABEL]; /* file name */
100   /*    int   op;  read or write */
101   int   num_utts; /* no. of utterances in utb file */
102   annotate  *utb_table; /* utb file header information */
103 }
104 utt_file_info;
105 
106 /**
107  * @todo document
108  */
109 typedef struct
110 {
111   int   utt_type;
112   int   dim;
113   fepFramePkt  *frame;
114   int   num_chan;
115   int   do_channorm;
116   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
117   norm_info   *channorm; /*  Mirrored from the Wave object */
118   swicms_norm_info    *swicms;          /* copy of wave obj pointer */
119   spect_dist_info *backchan[MAX_CHAN_DIM];
120   featdata  *last_push;
121   int   voice_duration;
122   int   quiet_duration;
123   int   unsure_duration;
124   int   start_windback;
125   /*    voicing_info voice; */
126   utt_file_info file;
127 }
128 file_utterance_info;
129 #endif
130 
131 /**
132  * @todo document
133  */
134 typedef struct
135 {
136   int   utt_type;
137   int   dim;
138   fepFramePkt  *frame;
139   int   num_chan;
140   int   do_channorm;
141   spect_dist_info **spchchan; /*  Mirrored from the Wave object */
142   norm_info   *channorm; /*  Mirrored from the Wave object */
143   swicms_norm_info    *swicms;        /* copy of wave obj pointer */
144   spect_dist_info *backchan[MAX_CHAN_DIM];
145   featdata  *last_push;
146   int   voice_duration;
147   int   quiet_duration;
148   int   unsure_duration;
149   int   start_windback;
150 }
151 live_utterance_info;
152 
153 /**
154  * @todo document
155  */
156 typedef union
157 {
158   int   utt_type; /* live or from file */
159   utt_generic_info    gen_utt; /* generic one */
160 #ifndef _RTT
161   file_utterance_info file_utt;
162 #endif
163   live_utterance_info live_utt;
164 } utterance_info;
165 
166 
167 /*
168 **  Size of the utb file headers and details
169 */
170 
171 #ifndef _RTT
172 #define UTT_VERSION 2
173 #define UTT_HEADER_SIZE 16        /*Size on disk*/
174 #define UTB_HEADER_SIZE 32        /*Size on disk*/
175 #define UTB_HEADER_USED 16        /*Size on disk*/   /* SAL */
176 
177 /**
178  * UTB file header.
179  */
180 typedef struct _UttHeader
181 {
182 	/**
183 	 * The size of the header in bytes.
184 	 */
185   unsigned short headerSize;
186 	/**
187 	 * The version of the file format.
188 	 */
189   unsigned short version;
190 	/**
191 	 * The size of the payload in bytes.
192 	 */
193   unsigned long  nBytes;
194 	/**
195 	 * The number of parameters per frame.
196 	 */
197   unsigned short nParametersPerFrame;
198 	/**
199 	 * 0=unknown, 1=none, 2=amp-based, 3=harmonicity-based, 4=mrec style
200 	 */
201   unsigned short channelNormalization;
202   /**
203 	 * 0=unknown, 1=no, 2=yes
204 	 */
205   unsigned short speakerNormalization;
206   /**
207 	 * 0=unknown, 1=no, 2=yes
208 	 */
209   unsigned short imeldaization;
210 	/**
211 	 * Before imelda truncation.
212 	 */
213   unsigned short nOriginalParameters;
214 	/**
215 	 * The number of samples per frame.
216 	 */
217   unsigned short samplesPerFrame;
218 	/**
219 	 * The audio sample rate.
220 	 */
221   unsigned long  sampleRate;
222 	/**
223 	 * not used in version 5.
224 	 */
225   unsigned long  checksum;
226 }
227 UttHeader;
228 
229 int    update_utb_header(file_utterance_info *utt, int frames, int samplerate,
230                          int framerate);
231 void    init_utt_v5_header(UttHeader *uhead, int dim, int samplerate, int framerate);
232 int init_data_file(char *filename, file_utterance_info *utt, int dimen,
233                    char typ, int endian, int do_skip);
234 int new_data_file(char *filename, file_utterance_info *utt, int dimen,
235                   char typ, int endian);
236 int set_data_frame(file_utterance_info *utt, long begin);
237 int buffer_data_frames(file_utterance_info *utt, long f_begin, long f_end);
238 void more_data_frames(file_utterance_info *utt);
239 int save_data_frames(file_utterance_info *utt);
240 void close_data_stream(file_utterance_info *utt);
241 int init_utb_file(file_utterance_info *utt, annotate **table);
242 int position_utb_file(file_utterance_info *utt, long position, annotate *table);
243 int load_utb_data(file_utterance_info *utt, int num_frames, int do_skip);
244 int load_short_data(file_utterance_info *utt, int num_frames, int do_skip);
245 int save_utb_data(file_utterance_info *utt, int num_frames);
246 int save_short_data(file_utterance_info *utt, int num_frames);
247 int read_utt_head(UttHeader *head, PFile* datafile);
248 int write_utt_head(UttHeader *head, PFile* datafile);
249 int check_for_utb(char* filename);
250 
251 /*  TCP reading routines
252 */
253 int     read_tcp(char *filename, annotate **tag_base);
254 int     read_lst(char *filename, annotate *tag_base, int ntags);
255 int     read_utb_table(char *filename, annotate **tag_base);
256 void    save_tcp(char *tcpnam, annotate *tag, int ntags);
257 void compose_tcp_name_of_utt(char* uttname , char* tcpname);
258 
259 #endif
260 
261 void init_utterance(utterance_info *utt, int utt_type, int dimen,
262                     int buffer_size, int keep_frames, int num_chan, int do_voicing);
263 void set_voicing_durations(utterance_info *utt, int voice_duration,
264                            int quiet_duration, int unsure_duration,
265                            int start_windback);
266 void free_utterance(utterance_info *utt);
267 int utterance_started(utterance_info *utt);
268 int utterance_ended(utterance_info *utt);
269 int load_utterance_frame(utterance_info *utt, unsigned char* pUttFrame, int voicing);
270 int copy_utterance_frame(utterance_info *oututt, utterance_info *inutt);
271 
272 #endif /* _h_utteranc_ */
273