1 /*
2 ** Copyright 2003-2010, VisualOn, Inc.
3 **
4 ** Licensed under the Apache License, Version 2.0 (the "License");
5 ** you may not use this file except in compliance with the License.
6 ** You may obtain a copy of the License at
7 **
8 ** http://www.apache.org/licenses/LICENSE-2.0
9 **
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 */
16
17 /***********************************************************************
18 * File: dtx.c *
19 * *
20 * Description:DTX functions *
21 * *
22 ************************************************************************/
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include "typedef.h"
27 #include "basic_op.h"
28 #include "oper_32b.h"
29 #include "math_op.h"
30 #include "cnst.h"
31 #include "acelp.h" /* prototype of functions */
32 #include "bits.h"
33 #include "dtx.h"
34 #include "log2.h"
35 #include "mem_align.h"
36
37 static void aver_isf_history(
38 Word16 isf_old[],
39 Word16 indices[],
40 Word32 isf_aver[]
41 );
42
43 static void find_frame_indices(
44 Word16 isf_old_tx[],
45 Word16 indices[],
46 dtx_encState * st
47 );
48
49 static Word16 dithering_control(
50 dtx_encState * st
51 );
52
53 /* excitation energy adjustment depending on speech coder mode used, Q7 */
54 static Word16 en_adjust[9] =
55 {
56 230, /* mode0 = 7k : -5.4dB */
57 179, /* mode1 = 9k : -4.2dB */
58 141, /* mode2 = 12k : -3.3dB */
59 128, /* mode3 = 14k : -3.0dB */
60 122, /* mode4 = 16k : -2.85dB */
61 115, /* mode5 = 18k : -2.7dB */
62 115, /* mode6 = 20k : -2.7dB */
63 115, /* mode7 = 23k : -2.7dB */
64 115 /* mode8 = 24k : -2.7dB */
65 };
66
67 /**************************************************************************
68 *
69 * Function : dtx_enc_init
70 *
71 **************************************************************************/
dtx_enc_init(dtx_encState ** st,Word16 isf_init[],VO_MEM_OPERATOR * pMemOP)72 Word16 dtx_enc_init(dtx_encState ** st, Word16 isf_init[], VO_MEM_OPERATOR *pMemOP)
73 {
74 dtx_encState *s;
75
76 if (st == (dtx_encState **) NULL)
77 {
78 fprintf(stderr, "dtx_enc_init: invalid parameter\n");
79 return -1;
80 }
81 *st = NULL;
82
83 /* allocate memory */
84 if ((s = (dtx_encState *)mem_malloc(pMemOP, sizeof(dtx_encState), 32, VO_INDEX_ENC_AMRWB)) == NULL)
85 {
86 fprintf(stderr, "dtx_enc_init: can not malloc state structure\n");
87 return -1;
88 }
89 dtx_enc_reset(s, isf_init);
90 *st = s;
91 return 0;
92 }
93
94 /**************************************************************************
95 *
96 * Function : dtx_enc_reset
97 *
98 **************************************************************************/
dtx_enc_reset(dtx_encState * st,Word16 isf_init[])99 Word16 dtx_enc_reset(dtx_encState * st, Word16 isf_init[])
100 {
101 Word32 i;
102
103 if (st == (dtx_encState *) NULL)
104 {
105 fprintf(stderr, "dtx_enc_reset: invalid parameter\n");
106 return -1;
107 }
108 st->hist_ptr = 0;
109 st->log_en_index = 0;
110
111 /* Init isf_hist[] */
112 for (i = 0; i < DTX_HIST_SIZE; i++)
113 {
114 Copy(isf_init, &st->isf_hist[i * M], M);
115 }
116 st->cng_seed = RANDOM_INITSEED;
117
118 /* Reset energy history */
119 Set_zero(st->log_en_hist, DTX_HIST_SIZE);
120
121 st->dtxHangoverCount = DTX_HANG_CONST;
122 st->decAnaElapsedCount = 32767;
123
124 for (i = 0; i < 28; i++)
125 {
126 st->D[i] = 0;
127 }
128
129 for (i = 0; i < DTX_HIST_SIZE - 1; i++)
130 {
131 st->sumD[i] = 0;
132 }
133
134 return 1;
135 }
136
137 /**************************************************************************
138 *
139 * Function : dtx_enc_exit
140 *
141 **************************************************************************/
dtx_enc_exit(dtx_encState ** st,VO_MEM_OPERATOR * pMemOP)142 void dtx_enc_exit(dtx_encState ** st, VO_MEM_OPERATOR *pMemOP)
143 {
144 if (st == NULL || *st == NULL)
145 return;
146 /* deallocate memory */
147 mem_free(pMemOP, *st, VO_INDEX_ENC_AMRWB);
148 *st = NULL;
149 return;
150 }
151
152
153 /**************************************************************************
154 *
155 * Function : dtx_enc
156 *
157 **************************************************************************/
dtx_enc(dtx_encState * st,Word16 isf[M],Word16 * exc2,Word16 ** prms)158 Word16 dtx_enc(
159 dtx_encState * st, /* i/o : State struct */
160 Word16 isf[M], /* o : CN ISF vector */
161 Word16 * exc2, /* o : CN excitation */
162 Word16 ** prms
163 )
164 {
165 Word32 i, j;
166 Word16 indice[7];
167 Word16 log_en, gain, level, exp, exp0, tmp;
168 Word16 log_en_int_e, log_en_int_m;
169 Word32 L_isf[M], ener32, level32;
170 Word16 isf_order[3];
171 Word16 CN_dith;
172
173 /* VOX mode computation of SID parameters */
174 log_en = 0;
175 for (i = 0; i < M; i++)
176 {
177 L_isf[i] = 0;
178 }
179 /* average energy and isf */
180 for (i = 0; i < DTX_HIST_SIZE; i++)
181 {
182 /* Division by DTX_HIST_SIZE = 8 has been done in dtx_buffer. log_en is in Q10 */
183 log_en = add(log_en, st->log_en_hist[i]);
184
185 }
186 find_frame_indices(st->isf_hist, isf_order, st);
187 aver_isf_history(st->isf_hist, isf_order, L_isf);
188
189 for (j = 0; j < M; j++)
190 {
191 isf[j] = (Word16)(L_isf[j] >> 3); /* divide by 8 */
192 }
193
194 /* quantize logarithmic energy to 6 bits (-6 : 66 dB) which corresponds to -2:22 in log2(E). */
195 /* st->log_en_index = (short)( (log_en + 2.0) * 2.625 ); */
196
197 /* increase dynamics to 7 bits (Q8) */
198 log_en = (log_en >> 2);
199
200 /* Add 2 in Q8 = 512 to get log2(E) between 0:24 */
201 log_en = add(log_en, 512);
202
203 /* Multiply by 2.625 to get full 6 bit range. 2.625 = 21504 in Q13. The result is in Q6 */
204 log_en = mult(log_en, 21504);
205
206 /* Quantize Energy */
207 st->log_en_index = shr(log_en, 6);
208
209 if(st->log_en_index > 63)
210 {
211 st->log_en_index = 63;
212 }
213 if (st->log_en_index < 0)
214 {
215 st->log_en_index = 0;
216 }
217 /* Quantize ISFs */
218 Qisf_ns(isf, isf, indice);
219
220
221 Parm_serial(indice[0], 6, prms);
222 Parm_serial(indice[1], 6, prms);
223 Parm_serial(indice[2], 6, prms);
224 Parm_serial(indice[3], 5, prms);
225 Parm_serial(indice[4], 5, prms);
226
227 Parm_serial((st->log_en_index), 6, prms);
228
229 CN_dith = dithering_control(st);
230 Parm_serial(CN_dith, 1, prms);
231
232 /* level = (float)( pow( 2.0f, (float)st->log_en_index / 2.625 - 2.0 ) ); */
233 /* log2(E) in Q9 (log2(E) lies in between -2:22) */
234 log_en = shl(st->log_en_index, 15 - 6);
235
236 /* Divide by 2.625; log_en will be between 0:24 */
237 log_en = mult(log_en, 12483);
238 /* the result corresponds to log2(gain) in Q10 */
239
240 /* Find integer part */
241 log_en_int_e = (log_en >> 10);
242
243 /* Find fractional part */
244 log_en_int_m = (Word16) (log_en & 0x3ff);
245 log_en_int_m = shl(log_en_int_m, 5);
246
247 /* Subtract 2 from log_en in Q9, i.e divide the gain by 2 (energy by 4) */
248 /* Add 16 in order to have the result of pow2 in Q16 */
249 log_en_int_e = add(log_en_int_e, 16 - 1);
250
251 level32 = Pow2(log_en_int_e, log_en_int_m); /* Q16 */
252 exp0 = norm_l(level32);
253 level32 = (level32 << exp0); /* level in Q31 */
254 exp0 = (15 - exp0);
255 level = extract_h(level32); /* level in Q15 */
256
257 /* generate white noise vector */
258 for (i = 0; i < L_FRAME; i++)
259 {
260 exc2[i] = (Random(&(st->cng_seed)) >> 4);
261 }
262
263 /* gain = level / sqrt(ener) * sqrt(L_FRAME) */
264
265 /* energy of generated excitation */
266 ener32 = Dot_product12(exc2, exc2, L_FRAME, &exp);
267
268 Isqrt_n(&ener32, &exp);
269
270 gain = extract_h(ener32);
271
272 gain = mult(level, gain); /* gain in Q15 */
273
274 exp = add(exp0, exp);
275
276 /* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */
277 exp += 4;
278
279 for (i = 0; i < L_FRAME; i++)
280 {
281 tmp = mult(exc2[i], gain); /* Q0 * Q15 */
282 exc2[i] = shl(tmp, exp);
283 }
284
285 return 0;
286 }
287
288 /**************************************************************************
289 *
290 * Function : dtx_buffer Purpose : handles the DTX buffer
291 *
292 **************************************************************************/
dtx_buffer(dtx_encState * st,Word16 isf_new[],Word32 enr,Word16 codec_mode)293 Word16 dtx_buffer(
294 dtx_encState * st, /* i/o : State struct */
295 Word16 isf_new[], /* i : isf vector */
296 Word32 enr, /* i : residual energy (in L_FRAME) */
297 Word16 codec_mode
298 )
299 {
300 Word16 log_en;
301
302 Word16 log_en_e;
303 Word16 log_en_m;
304 st->hist_ptr = add(st->hist_ptr, 1);
305 if(st->hist_ptr == DTX_HIST_SIZE)
306 {
307 st->hist_ptr = 0;
308 }
309 /* copy lsp vector into buffer */
310 Copy(isf_new, &st->isf_hist[st->hist_ptr * M], M);
311
312 /* log_en = (float)log10(enr*0.0059322)/(float)log10(2.0f); */
313 Log2(enr, &log_en_e, &log_en_m);
314
315 /* convert exponent and mantissa to Word16 Q7. Q7 is used to simplify averaging in dtx_enc */
316 log_en = shl(log_en_e, 7); /* Q7 */
317 log_en = add(log_en, shr(log_en_m, 15 - 7));
318
319 /* Find energy per sample by multiplying with 0.0059322, i.e subtract log2(1/0.0059322) = 7.39722 The
320 * constant 0.0059322 takes into account windowings and analysis length from autocorrelation
321 * computations; 7.39722 in Q7 = 947 */
322 /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
323 /* log_en = sub( log_en, 947 + en_adjust[codec_mode] ); */
324
325 /* Find energy per sample (divide by L_FRAME=256), i.e subtract log2(256) = 8.0 (1024 in Q7) */
326 /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
327
328 log_en = sub(log_en, add(1024, en_adjust[codec_mode]));
329
330 /* Insert into the buffer */
331 st->log_en_hist[st->hist_ptr] = log_en;
332 return 0;
333 }
334
335 /**************************************************************************
336 *
337 * Function : tx_dtx_handler Purpose : adds extra speech hangover
338 * to analyze speech on
339 * the decoding side.
340 **************************************************************************/
tx_dtx_handler(dtx_encState * st,Word16 vad_flag,Word16 * usedMode)341 void tx_dtx_handler(dtx_encState * st, /* i/o : State struct */
342 Word16 vad_flag, /* i : vad decision */
343 Word16 * usedMode /* i/o : mode changed or not */
344 )
345 {
346
347 /* this state machine is in synch with the GSMEFR txDtx machine */
348 st->decAnaElapsedCount = add(st->decAnaElapsedCount, 1);
349
350 if (vad_flag != 0)
351 {
352 st->dtxHangoverCount = DTX_HANG_CONST;
353 } else
354 { /* non-speech */
355 if (st->dtxHangoverCount == 0)
356 { /* out of decoder analysis hangover */
357 st->decAnaElapsedCount = 0;
358 *usedMode = MRDTX;
359 } else
360 { /* in possible analysis hangover */
361 st->dtxHangoverCount = sub(st->dtxHangoverCount, 1);
362
363 /* decAnaElapsedCount + dtxHangoverCount < DTX_ELAPSED_FRAMES_THRESH */
364 if (sub(add(st->decAnaElapsedCount, st->dtxHangoverCount),
365 DTX_ELAPSED_FRAMES_THRESH) < 0)
366 {
367 *usedMode = MRDTX;
368 /* if short time since decoder update, do not add extra HO */
369 }
370 /* else override VAD and stay in speech mode *usedMode and add extra hangover */
371 }
372 }
373
374 return;
375 }
376
377
378
aver_isf_history(Word16 isf_old[],Word16 indices[],Word32 isf_aver[])379 static void aver_isf_history(
380 Word16 isf_old[],
381 Word16 indices[],
382 Word32 isf_aver[]
383 )
384 {
385 Word32 i, j, k;
386 Word16 isf_tmp[2 * M];
387 Word32 L_tmp;
388
389 /* Memorize in isf_tmp[][] the ISF vectors to be replaced by */
390 /* the median ISF vector prior to the averaging */
391 for (k = 0; k < 2; k++)
392 {
393 if ((indices[k] + 1) != 0)
394 {
395 for (i = 0; i < M; i++)
396 {
397 isf_tmp[k * M + i] = isf_old[indices[k] * M + i];
398 isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i];
399 }
400 }
401 }
402
403 /* Perform the ISF averaging */
404 for (j = 0; j < M; j++)
405 {
406 L_tmp = 0;
407
408 for (i = 0; i < DTX_HIST_SIZE; i++)
409 {
410 L_tmp = L_add(L_tmp, L_deposit_l(isf_old[i * M + j]));
411 }
412 isf_aver[j] = L_tmp;
413 }
414
415 /* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */
416 for (k = 0; k < 2; k++)
417 {
418 if ((indices[k] + 1) != 0)
419 {
420 for (i = 0; i < M; i++)
421 {
422 isf_old[indices[k] * M + i] = isf_tmp[k * M + i];
423 }
424 }
425 }
426
427 return;
428 }
429
find_frame_indices(Word16 isf_old_tx[],Word16 indices[],dtx_encState * st)430 static void find_frame_indices(
431 Word16 isf_old_tx[],
432 Word16 indices[],
433 dtx_encState * st
434 )
435 {
436 Word32 L_tmp, summin, summax, summax2nd;
437 Word16 i, j, tmp;
438 Word16 ptr;
439
440 /* Remove the effect of the oldest frame from the column */
441 /* sum sumD[0..DTX_HIST_SIZE-1]. sumD[DTX_HIST_SIZE] is */
442 /* not updated since it will be removed later. */
443
444 tmp = DTX_HIST_SIZE_MIN_ONE;
445 j = -1;
446 for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++)
447 {
448 j = add(j, tmp);
449 st->sumD[i] = L_sub(st->sumD[i], st->D[j]);
450 tmp = sub(tmp, 1);
451 }
452
453 /* Shift the column sum sumD. The element sumD[DTX_HIST_SIZE-1] */
454 /* corresponding to the oldest frame is removed. The sum of */
455 /* the distances between the latest isf and other isfs, */
456 /* i.e. the element sumD[0], will be computed during this call. */
457 /* Hence this element is initialized to zero. */
458
459 for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--)
460 {
461 st->sumD[i] = st->sumD[i - 1];
462 }
463 st->sumD[0] = 0;
464
465 /* Remove the oldest frame from the distance matrix. */
466 /* Note that the distance matrix is replaced by a one- */
467 /* dimensional array to save static memory. */
468
469 tmp = 0;
470 for (i = 27; i >= 12; i = (Word16) (i - tmp))
471 {
472 tmp = add(tmp, 1);
473 for (j = tmp; j > 0; j--)
474 {
475 st->D[i - j + 1] = st->D[i - j - tmp];
476 }
477 }
478
479 /* Compute the first column of the distance matrix D */
480 /* (squared Euclidean distances from isf1[] to isf_old_tx[][]). */
481
482 ptr = st->hist_ptr;
483 for (i = 1; i < DTX_HIST_SIZE; i++)
484 {
485 /* Compute the distance between the latest isf and the other isfs. */
486 ptr = sub(ptr, 1);
487 if (ptr < 0)
488 {
489 ptr = DTX_HIST_SIZE_MIN_ONE;
490 }
491 L_tmp = 0;
492 for (j = 0; j < M; j++)
493 {
494 tmp = sub(isf_old_tx[st->hist_ptr * M + j], isf_old_tx[ptr * M + j]);
495 L_tmp = L_mac(L_tmp, tmp, tmp);
496 }
497 st->D[i - 1] = L_tmp;
498
499 /* Update also the column sums. */
500 st->sumD[0] = L_add(st->sumD[0], st->D[i - 1]);
501 st->sumD[i] = L_add(st->sumD[i], st->D[i - 1]);
502 }
503
504 /* Find the minimum and maximum distances */
505 summax = st->sumD[0];
506 summin = st->sumD[0];
507 indices[0] = 0;
508 indices[2] = 0;
509 for (i = 1; i < DTX_HIST_SIZE; i++)
510 {
511 if (L_sub(st->sumD[i], summax) > 0)
512 {
513 indices[0] = i;
514 summax = st->sumD[i];
515 }
516 if (L_sub(st->sumD[i], summin) < 0)
517 {
518 indices[2] = i;
519 summin = st->sumD[i];
520 }
521 }
522
523 /* Find the second largest distance */
524 summax2nd = -2147483647L;
525 indices[1] = -1;
526 for (i = 0; i < DTX_HIST_SIZE; i++)
527 {
528 if ((L_sub(st->sumD[i], summax2nd) > 0) && (sub(i, indices[0]) != 0))
529 {
530 indices[1] = i;
531 summax2nd = st->sumD[i];
532 }
533 }
534
535 for (i = 0; i < 3; i++)
536 {
537 indices[i] = sub(st->hist_ptr, indices[i]);
538 if (indices[i] < 0)
539 {
540 indices[i] = add(indices[i], DTX_HIST_SIZE);
541 }
542 }
543
544 /* If maximum distance/MED_THRESH is smaller than minimum distance */
545 /* then the median ISF vector replacement is not performed */
546 tmp = norm_l(summax);
547 summax = (summax << tmp);
548 summin = (summin << tmp);
549 L_tmp = L_mult(voround(summax), INV_MED_THRESH);
550 if(L_tmp <= summin)
551 {
552 indices[0] = -1;
553 }
554 /* If second largest distance/MED_THRESH is smaller than */
555 /* minimum distance then the median ISF vector replacement is */
556 /* not performed */
557 summax2nd = L_shl(summax2nd, tmp);
558 L_tmp = L_mult(voround(summax2nd), INV_MED_THRESH);
559 if(L_tmp <= summin)
560 {
561 indices[1] = -1;
562 }
563 return;
564 }
565
dithering_control(dtx_encState * st)566 static Word16 dithering_control(
567 dtx_encState * st
568 )
569 {
570 Word16 tmp, mean, CN_dith, gain_diff;
571 Word32 i, ISF_diff;
572
573 /* determine how stationary the spectrum of background noise is */
574 ISF_diff = 0;
575 for (i = 0; i < 8; i++)
576 {
577 ISF_diff = L_add(ISF_diff, st->sumD[i]);
578 }
579 if ((ISF_diff >> 26) > 0)
580 {
581 CN_dith = 1;
582 } else
583 {
584 CN_dith = 0;
585 }
586
587 /* determine how stationary the energy of background noise is */
588 mean = 0;
589 for (i = 0; i < DTX_HIST_SIZE; i++)
590 {
591 mean = add(mean, st->log_en_hist[i]);
592 }
593 mean = (mean >> 3);
594 gain_diff = 0;
595 for (i = 0; i < DTX_HIST_SIZE; i++)
596 {
597 tmp = abs_s(sub(st->log_en_hist[i], mean));
598 gain_diff = add(gain_diff, tmp);
599 }
600 if (gain_diff > GAIN_THR)
601 {
602 CN_dith = 1;
603 }
604 return CN_dith;
605 }
606