1 /* Copyright (C) 2002 Jean-Marc Valin
2 File: vbr.c
3
4 VBR-related routines
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions
8 are met:
9
10 - Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12
13 - Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 - Neither the name of the Xiph.org Foundation nor the names of its
18 contributors may be used to endorse or promote products derived from
19 this software without specific prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33 */
34
35 #ifdef HAVE_CONFIG_H
36 #include "config.h"
37 #endif
38
39 #include "vbr.h"
40 #include <math.h>
41
42
43 #define sqr(x) ((x)*(x))
44
45 #define MIN_ENERGY 6000
46 #define NOISE_POW .3
47
48 #ifndef DISABLE_VBR
49
50 const float vbr_nb_thresh[9][11]={
51 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* CNG */
52 { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}, /* 2 kbps */
53 {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}, /* 6 kbps */
54 {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}, /* 8 kbps */
55 {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}, /* 11 kbps */
56 {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}, /* 15 kbps */
57 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}, /* 18 kbps */
58 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}, /* 24 kbps */
59 { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f} /* 4 kbps */
60 };
61
62
63 const float vbr_hb_thresh[5][11]={
64 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
65 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* 2 kbps */
66 {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}, /* 6 kbps */
67 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}, /* 10 kbps */
68 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f} /* 18 kbps */
69 };
70
71 const float vbr_uhb_thresh[2][11]={
72 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}, /* silence */
73 { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f} /* 2 kbps */
74 };
75
vbr_init(VBRState * vbr)76 void vbr_init(VBRState *vbr)
77 {
78 int i;
79
80 vbr->average_energy=0;
81 vbr->last_energy=1;
82 vbr->accum_sum=0;
83 vbr->energy_alpha=.1;
84 vbr->soft_pitch=0;
85 vbr->last_pitch_coef=0;
86 vbr->last_quality=0;
87
88 vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW);
89 vbr->noise_accum_count=.05;
90 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
91 vbr->consec_noise=0;
92
93
94 for (i=0;i<VBR_MEMORY_SIZE;i++)
95 vbr->last_log_energy[i] = log(MIN_ENERGY);
96 }
97
98
99 /*
100 This function should analyse the signal and decide how critical the
101 coding error will be perceptually. The following factors should be
102 taken into account:
103
104 -Attacks (positive energy derivative) should be coded with more bits
105
106 -Stationary voiced segments should receive more bits
107
108 -Segments with (very) low absolute energy should receive less bits (maybe
109 only shaped noise?)
110
111 -DTX for near-zero energy?
112
113 -Stationary fricative segments should have less bits
114
115 -Temporal masking: when energy slope is decreasing, decrease the bit-rate
116
117 -Decrease bit-rate for males (low pitch)?
118
119 -(wideband only) less bits in the high-band when signal is very
120 non-stationary (harder to notice high-frequency noise)???
121
122 */
123
vbr_analysis(VBRState * vbr,spx_word16_t * sig,int len,int pitch,float pitch_coef)124 float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float pitch_coef)
125 {
126 int i;
127 float ener=0, ener1=0, ener2=0;
128 float qual=7;
129 int va;
130 float log_energy;
131 float non_st=0;
132 float voicing;
133 float pow_ener;
134
135 for (i=0;i<len>>1;i++)
136 ener1 += ((float)sig[i])*sig[i];
137
138 for (i=len>>1;i<len;i++)
139 ener2 += ((float)sig[i])*sig[i];
140 ener=ener1+ener2;
141
142 log_energy = log(ener+MIN_ENERGY);
143 for (i=0;i<VBR_MEMORY_SIZE;i++)
144 non_st += sqr(log_energy-vbr->last_log_energy[i]);
145 non_st = non_st/(30*VBR_MEMORY_SIZE);
146 if (non_st>1)
147 non_st=1;
148
149 voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4);
150 vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
151 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
152 pow_ener = pow(ener,NOISE_POW);
153 if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
154 vbr->noise_accum = .05*pow_ener;
155
156 if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
157 || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
158 || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
159 || (voicing<0 && non_st < .05))
160 {
161 float tmp;
162 va = 0;
163 vbr->consec_noise++;
164 if (pow_ener > 3*vbr->noise_level)
165 tmp = 3*vbr->noise_level;
166 else
167 tmp = pow_ener;
168 if (vbr->consec_noise>=4)
169 {
170 vbr->noise_accum = .95*vbr->noise_accum + .05*tmp;
171 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
172 }
173 } else {
174 va = 1;
175 vbr->consec_noise=0;
176 }
177
178 if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
179 {
180 vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
181 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
182 }
183
184 /* Checking for very low absolute energy */
185 if (ener < 30000)
186 {
187 qual -= .7;
188 if (ener < 10000)
189 qual-=.7;
190 if (ener < 3000)
191 qual-=.7;
192 } else {
193 float short_diff, long_diff;
194 short_diff = log((ener+1)/(1+vbr->last_energy));
195 long_diff = log((ener+1)/(1+vbr->average_energy));
196 /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/
197
198 if (long_diff<-5)
199 long_diff=-5;
200 if (long_diff>2)
201 long_diff=2;
202
203 if (long_diff>0)
204 qual += .6*long_diff;
205 if (long_diff<0)
206 qual += .5*long_diff;
207 if (short_diff>0)
208 {
209 if (short_diff>5)
210 short_diff=5;
211 qual += .5*short_diff;
212 }
213 /* Checking for energy increases */
214 if (ener2 > 1.6*ener1)
215 qual += .5;
216 }
217 vbr->last_energy = ener;
218 vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef;
219 qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4));
220
221 if (qual < vbr->last_quality)
222 qual = .5*qual + .5*vbr->last_quality;
223 if (qual<4)
224 qual=4;
225 if (qual>10)
226 qual=10;
227
228 /*
229 if (vbr->consec_noise>=2)
230 qual-=1.3;
231 if (vbr->consec_noise>=5)
232 qual-=1.3;
233 if (vbr->consec_noise>=12)
234 qual-=1.3;
235 */
236 if (vbr->consec_noise>=3)
237 qual=4;
238
239 if (vbr->consec_noise)
240 qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
241 if (qual<0)
242 qual=0;
243
244 if (ener<60000)
245 {
246 if (vbr->consec_noise>2)
247 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
248 if (ener<10000&&vbr->consec_noise>2)
249 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
250 if (qual<0)
251 qual=0;
252 qual += .3*log(.0001+ener/60000.0);
253 }
254 if (qual<-1)
255 qual=-1;
256
257 /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
258
259 vbr->last_pitch_coef = pitch_coef;
260 vbr->last_quality = qual;
261
262 for (i=VBR_MEMORY_SIZE-1;i>0;i--)
263 vbr->last_log_energy[i] = vbr->last_log_energy[i-1];
264 vbr->last_log_energy[0] = log_energy;
265
266 /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/
267
268 return qual;
269 }
270
vbr_destroy(VBRState * vbr)271 void vbr_destroy(VBRState *vbr)
272 {
273 }
274
275 #endif /* #ifndef DISABLE_VBR */
276