• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ** Copyright 2003-2010, VisualOn, Inc.
3  **
4  ** Licensed under the Apache License, Version 2.0 (the "License");
5  ** you may not use this file except in compliance with the License.
6  ** You may obtain a copy of the License at
7  **
8  **     http://www.apache.org/licenses/LICENSE-2.0
9  **
10  ** Unless required by applicable law or agreed to in writing, software
11  ** distributed under the License is distributed on an "AS IS" BASIS,
12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  ** See the License for the specific language governing permissions and
14  ** limitations under the License.
15  */
16 
17 /***********************************************************************
18 *       File: dtx.c                                                    *
19 *                                                                      *
20 *       Description:DTX functions                                  *
21 *                                                                      *
22 ************************************************************************/
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include "typedef.h"
27 #include "basic_op.h"
28 #include "oper_32b.h"
29 #include "math_op.h"
30 #include "cnst.h"
31 #include "acelp.h"                         /* prototype of functions    */
32 #include "bits.h"
33 #include "dtx.h"
34 #include "log2.h"
35 #include "mem_align.h"
36 
37 static void aver_isf_history(
38         Word16 isf_old[],
39         Word16 indices[],
40         Word32 isf_aver[]
41         );
42 
43 static void find_frame_indices(
44         Word16 isf_old_tx[],
45         Word16 indices[],
46         dtx_encState * st
47         );
48 
49 static Word16 dithering_control(
50         dtx_encState * st
51         );
52 
53 /* excitation energy adjustment depending on speech coder mode used, Q7 */
54 static Word16 en_adjust[9] =
55 {
56     230,                                   /* mode0 = 7k  :  -5.4dB  */
57     179,                                   /* mode1 = 9k  :  -4.2dB  */
58     141,                                   /* mode2 = 12k :  -3.3dB  */
59     128,                                   /* mode3 = 14k :  -3.0dB  */
60     122,                                   /* mode4 = 16k :  -2.85dB */
61     115,                                   /* mode5 = 18k :  -2.7dB  */
62     115,                                   /* mode6 = 20k :  -2.7dB  */
63     115,                                   /* mode7 = 23k :  -2.7dB  */
64     115                                    /* mode8 = 24k :  -2.7dB  */
65 };
66 
67 /**************************************************************************
68 *
69 * Function    : dtx_enc_init
70 *
71 **************************************************************************/
dtx_enc_init(dtx_encState ** st,Word16 isf_init[],VO_MEM_OPERATOR * pMemOP)72 Word16 dtx_enc_init(dtx_encState ** st, Word16 isf_init[], VO_MEM_OPERATOR *pMemOP)
73 {
74     dtx_encState *s;
75 
76     if (st == (dtx_encState **) NULL)
77     {
78         fprintf(stderr, "dtx_enc_init: invalid parameter\n");
79         return -1;
80     }
81     *st = NULL;
82 
83     /* allocate memory */
84     if ((s = (dtx_encState *)mem_malloc(pMemOP, sizeof(dtx_encState), 32, VO_INDEX_ENC_AMRWB)) == NULL)
85     {
86         fprintf(stderr, "dtx_enc_init: can not malloc state structure\n");
87         return -1;
88     }
89     dtx_enc_reset(s, isf_init);
90     *st = s;
91     return 0;
92 }
93 
94 /**************************************************************************
95 *
96 * Function    : dtx_enc_reset
97 *
98 **************************************************************************/
dtx_enc_reset(dtx_encState * st,Word16 isf_init[])99 Word16 dtx_enc_reset(dtx_encState * st, Word16 isf_init[])
100 {
101     Word32 i;
102 
103     if (st == (dtx_encState *) NULL)
104     {
105         fprintf(stderr, "dtx_enc_reset: invalid parameter\n");
106         return -1;
107     }
108     st->hist_ptr = 0;
109     st->log_en_index = 0;
110 
111     /* Init isf_hist[] */
112     for (i = 0; i < DTX_HIST_SIZE; i++)
113     {
114         Copy(isf_init, &st->isf_hist[i * M], M);
115     }
116     st->cng_seed = RANDOM_INITSEED;
117 
118     /* Reset energy history */
119     Set_zero(st->log_en_hist, DTX_HIST_SIZE);
120 
121     st->dtxHangoverCount = DTX_HANG_CONST;
122     st->decAnaElapsedCount = 32767;
123 
124     for (i = 0; i < 28; i++)
125     {
126         st->D[i] = 0;
127     }
128 
129     for (i = 0; i < DTX_HIST_SIZE - 1; i++)
130     {
131         st->sumD[i] = 0;
132     }
133 
134     return 1;
135 }
136 
137 /**************************************************************************
138 *
139 * Function    : dtx_enc_exit
140 *
141 **************************************************************************/
dtx_enc_exit(dtx_encState ** st,VO_MEM_OPERATOR * pMemOP)142 void dtx_enc_exit(dtx_encState ** st, VO_MEM_OPERATOR *pMemOP)
143 {
144     if (st == NULL || *st == NULL)
145         return;
146     /* deallocate memory */
147     mem_free(pMemOP, *st, VO_INDEX_ENC_AMRWB);
148     *st = NULL;
149     return;
150 }
151 
152 
153 /**************************************************************************
154 *
155 * Function    : dtx_enc
156 *
157 **************************************************************************/
dtx_enc(dtx_encState * st,Word16 isf[M],Word16 * exc2,Word16 ** prms)158 Word16 dtx_enc(
159         dtx_encState * st,                    /* i/o : State struct                                         */
160         Word16 isf[M],                        /* o   : CN ISF vector                                        */
161         Word16 * exc2,                        /* o   : CN excitation                                        */
162         Word16 ** prms
163           )
164 {
165     Word32 i, j;
166     Word16 indice[7];
167     Word16 log_en, gain, level, exp, exp0, tmp;
168     Word16 log_en_int_e, log_en_int_m;
169     Word32 L_isf[M], ener32, level32;
170     Word16 isf_order[3];
171     Word16 CN_dith;
172 
173     /* VOX mode computation of SID parameters */
174     log_en = 0;
175     for (i = 0; i < M; i++)
176     {
177         L_isf[i] = 0;
178     }
179     /* average energy and isf */
180     for (i = 0; i < DTX_HIST_SIZE; i++)
181     {
182         /* Division by DTX_HIST_SIZE = 8 has been done in dtx_buffer. log_en is in Q10 */
183         log_en = add(log_en, st->log_en_hist[i]);
184 
185     }
186     find_frame_indices(st->isf_hist, isf_order, st);
187     aver_isf_history(st->isf_hist, isf_order, L_isf);
188 
189     for (j = 0; j < M; j++)
190     {
191         isf[j] = (Word16)(L_isf[j] >> 3);  /* divide by 8 */
192     }
193 
194     /* quantize logarithmic energy to 6 bits (-6 : 66 dB) which corresponds to -2:22 in log2(E).  */
195     /* st->log_en_index = (short)( (log_en + 2.0) * 2.625 ); */
196 
197     /* increase dynamics to 7 bits (Q8) */
198     log_en = (log_en >> 2);
199 
200     /* Add 2 in Q8 = 512 to get log2(E) between 0:24 */
201     log_en = add(log_en, 512);
202 
203     /* Multiply by 2.625 to get full 6 bit range. 2.625 = 21504 in Q13. The result is in Q6 */
204     log_en = mult(log_en, 21504);
205 
206     /* Quantize Energy */
207     st->log_en_index = shr(log_en, 6);
208 
209     if(st->log_en_index > 63)
210     {
211         st->log_en_index = 63;
212     }
213     if (st->log_en_index < 0)
214     {
215         st->log_en_index = 0;
216     }
217     /* Quantize ISFs */
218     Qisf_ns(isf, isf, indice);
219 
220 
221     Parm_serial(indice[0], 6, prms);
222     Parm_serial(indice[1], 6, prms);
223     Parm_serial(indice[2], 6, prms);
224     Parm_serial(indice[3], 5, prms);
225     Parm_serial(indice[4], 5, prms);
226 
227     Parm_serial((st->log_en_index), 6, prms);
228 
229     CN_dith = dithering_control(st);
230     Parm_serial(CN_dith, 1, prms);
231 
232     /* level = (float)( pow( 2.0f, (float)st->log_en_index / 2.625 - 2.0 ) );    */
233     /* log2(E) in Q9 (log2(E) lies in between -2:22) */
234     log_en = shl(st->log_en_index, 15 - 6);
235 
236     /* Divide by 2.625; log_en will be between 0:24  */
237     log_en = mult(log_en, 12483);
238     /* the result corresponds to log2(gain) in Q10 */
239 
240     /* Find integer part  */
241     log_en_int_e = (log_en >> 10);
242 
243     /* Find fractional part */
244     log_en_int_m = (Word16) (log_en & 0x3ff);
245     log_en_int_m = shl(log_en_int_m, 5);
246 
247     /* Subtract 2 from log_en in Q9, i.e divide the gain by 2 (energy by 4) */
248     /* Add 16 in order to have the result of pow2 in Q16 */
249     log_en_int_e = add(log_en_int_e, 16 - 1);
250 
251     level32 = Pow2(log_en_int_e, log_en_int_m); /* Q16 */
252     exp0 = norm_l(level32);
253     level32 = (level32 << exp0);        /* level in Q31 */
254     exp0 = (15 - exp0);
255     level = extract_h(level32);            /* level in Q15 */
256 
257     /* generate white noise vector */
258     for (i = 0; i < L_FRAME; i++)
259     {
260         exc2[i] = (Random(&(st->cng_seed)) >> 4);
261     }
262 
263     /* gain = level / sqrt(ener) * sqrt(L_FRAME) */
264 
265     /* energy of generated excitation */
266     ener32 = Dot_product12(exc2, exc2, L_FRAME, &exp);
267 
268     Isqrt_n(&ener32, &exp);
269 
270     gain = extract_h(ener32);
271 
272     gain = mult(level, gain);              /* gain in Q15 */
273 
274     exp = add(exp0, exp);
275 
276     /* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */
277     exp += 4;
278 
279     for (i = 0; i < L_FRAME; i++)
280     {
281         tmp = mult(exc2[i], gain);         /* Q0 * Q15 */
282         exc2[i] = shl(tmp, exp);
283     }
284 
285     return 0;
286 }
287 
288 /**************************************************************************
289 *
290 * Function    : dtx_buffer Purpose     : handles the DTX buffer
291 *
292 **************************************************************************/
dtx_buffer(dtx_encState * st,Word16 isf_new[],Word32 enr,Word16 codec_mode)293 Word16 dtx_buffer(
294         dtx_encState * st,                    /* i/o : State struct                    */
295         Word16 isf_new[],                     /* i   : isf vector                      */
296         Word32 enr,                           /* i   : residual energy (in L_FRAME)    */
297         Word16 codec_mode
298         )
299 {
300     Word16 log_en;
301 
302     Word16 log_en_e;
303     Word16 log_en_m;
304     st->hist_ptr = add(st->hist_ptr, 1);
305     if(st->hist_ptr == DTX_HIST_SIZE)
306     {
307         st->hist_ptr = 0;
308     }
309     /* copy lsp vector into buffer */
310     Copy(isf_new, &st->isf_hist[st->hist_ptr * M], M);
311 
312     /* log_en = (float)log10(enr*0.0059322)/(float)log10(2.0f);  */
313     Log2(enr, &log_en_e, &log_en_m);
314 
315     /* convert exponent and mantissa to Word16 Q7. Q7 is used to simplify averaging in dtx_enc */
316     log_en = shl(log_en_e, 7);             /* Q7 */
317     log_en = add(log_en, shr(log_en_m, 15 - 7));
318 
319     /* Find energy per sample by multiplying with 0.0059322, i.e subtract log2(1/0.0059322) = 7.39722 The
320      * constant 0.0059322 takes into account windowings and analysis length from autocorrelation
321      * computations; 7.39722 in Q7 = 947  */
322     /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
323     /* log_en = sub( log_en, 947 + en_adjust[codec_mode] ); */
324 
325     /* Find energy per sample (divide by L_FRAME=256), i.e subtract log2(256) = 8.0  (1024 in Q7) */
326     /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */
327 
328     log_en = sub(log_en, add(1024, en_adjust[codec_mode]));
329 
330     /* Insert into the buffer */
331     st->log_en_hist[st->hist_ptr] = log_en;
332     return 0;
333 }
334 
335 /**************************************************************************
336 *
337 * Function    : tx_dtx_handler Purpose     : adds extra speech hangover
338 *                                            to analyze speech on
339 *                                            the decoding side.
340 **************************************************************************/
tx_dtx_handler(dtx_encState * st,Word16 vad_flag,Word16 * usedMode)341 void tx_dtx_handler(dtx_encState * st,     /* i/o : State struct           */
342         Word16 vad_flag,                      /* i   : vad decision           */
343         Word16 * usedMode                     /* i/o : mode changed or not    */
344         )
345 {
346 
347     /* this state machine is in synch with the GSMEFR txDtx machine      */
348     st->decAnaElapsedCount = add(st->decAnaElapsedCount, 1);
349 
350     if (vad_flag != 0)
351     {
352         st->dtxHangoverCount = DTX_HANG_CONST;
353     } else
354     {                                      /* non-speech */
355         if (st->dtxHangoverCount == 0)
356         {                                  /* out of decoder analysis hangover  */
357             st->decAnaElapsedCount = 0;
358             *usedMode = MRDTX;
359         } else
360         {                                  /* in possible analysis hangover */
361             st->dtxHangoverCount = sub(st->dtxHangoverCount, 1);
362 
363             /* decAnaElapsedCount + dtxHangoverCount < DTX_ELAPSED_FRAMES_THRESH */
364             if (sub(add(st->decAnaElapsedCount, st->dtxHangoverCount),
365                         DTX_ELAPSED_FRAMES_THRESH) < 0)
366             {
367                 *usedMode = MRDTX;
368                 /* if short time since decoder update, do not add extra HO */
369             }
370             /* else override VAD and stay in speech mode *usedMode and add extra hangover */
371         }
372     }
373 
374     return;
375 }
376 
377 
378 
aver_isf_history(Word16 isf_old[],Word16 indices[],Word32 isf_aver[])379 static void aver_isf_history(
380         Word16 isf_old[],
381         Word16 indices[],
382         Word32 isf_aver[]
383         )
384 {
385     Word32 i, j, k;
386     Word16 isf_tmp[2 * M];
387     Word32 L_tmp;
388 
389     /* Memorize in isf_tmp[][] the ISF vectors to be replaced by */
390     /* the median ISF vector prior to the averaging               */
391     for (k = 0; k < 2; k++)
392     {
393         if ((indices[k] + 1) != 0)
394         {
395             for (i = 0; i < M; i++)
396             {
397                 isf_tmp[k * M + i] = isf_old[indices[k] * M + i];
398                 isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i];
399             }
400         }
401     }
402 
403     /* Perform the ISF averaging */
404     for (j = 0; j < M; j++)
405     {
406         L_tmp = 0;
407 
408         for (i = 0; i < DTX_HIST_SIZE; i++)
409         {
410             L_tmp = L_add(L_tmp, L_deposit_l(isf_old[i * M + j]));
411         }
412         isf_aver[j] = L_tmp;
413     }
414 
415     /* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */
416     for (k = 0; k < 2; k++)
417     {
418         if ((indices[k] + 1) != 0)
419         {
420             for (i = 0; i < M; i++)
421             {
422                 isf_old[indices[k] * M + i] = isf_tmp[k * M + i];
423             }
424         }
425     }
426 
427     return;
428 }
429 
find_frame_indices(Word16 isf_old_tx[],Word16 indices[],dtx_encState * st)430 static void find_frame_indices(
431         Word16 isf_old_tx[],
432         Word16 indices[],
433         dtx_encState * st
434         )
435 {
436     Word32 L_tmp, summin, summax, summax2nd;
437     Word16 i, j, tmp;
438     Word16 ptr;
439 
440     /* Remove the effect of the oldest frame from the column */
441     /* sum sumD[0..DTX_HIST_SIZE-1]. sumD[DTX_HIST_SIZE] is    */
442     /* not updated since it will be removed later.           */
443 
444     tmp = DTX_HIST_SIZE_MIN_ONE;
445     j = -1;
446     for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++)
447     {
448         j = add(j, tmp);
449         st->sumD[i] = L_sub(st->sumD[i], st->D[j]);
450         tmp = sub(tmp, 1);
451     }
452 
453     /* Shift the column sum sumD. The element sumD[DTX_HIST_SIZE-1]    */
454     /* corresponding to the oldest frame is removed. The sum of     */
455     /* the distances between the latest isf and other isfs, */
456     /* i.e. the element sumD[0], will be computed during this call. */
457     /* Hence this element is initialized to zero.                   */
458 
459     for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--)
460     {
461         st->sumD[i] = st->sumD[i - 1];
462     }
463     st->sumD[0] = 0;
464 
465     /* Remove the oldest frame from the distance matrix.           */
466     /* Note that the distance matrix is replaced by a one-         */
467     /* dimensional array to save static memory.                    */
468 
469     tmp = 0;
470     for (i = 27; i >= 12; i = (Word16) (i - tmp))
471     {
472         tmp = add(tmp, 1);
473         for (j = tmp; j > 0; j--)
474         {
475             st->D[i - j + 1] = st->D[i - j - tmp];
476         }
477     }
478 
479     /* Compute the first column of the distance matrix D            */
480     /* (squared Euclidean distances from isf1[] to isf_old_tx[][]). */
481 
482     ptr = st->hist_ptr;
483     for (i = 1; i < DTX_HIST_SIZE; i++)
484     {
485         /* Compute the distance between the latest isf and the other isfs. */
486         ptr = sub(ptr, 1);
487         if (ptr < 0)
488         {
489             ptr = DTX_HIST_SIZE_MIN_ONE;
490         }
491         L_tmp = 0;
492         for (j = 0; j < M; j++)
493         {
494             tmp = sub(isf_old_tx[st->hist_ptr * M + j], isf_old_tx[ptr * M + j]);
495             L_tmp = L_mac(L_tmp, tmp, tmp);
496         }
497         st->D[i - 1] = L_tmp;
498 
499         /* Update also the column sums. */
500         st->sumD[0] = L_add(st->sumD[0], st->D[i - 1]);
501         st->sumD[i] = L_add(st->sumD[i], st->D[i - 1]);
502     }
503 
504     /* Find the minimum and maximum distances */
505     summax = st->sumD[0];
506     summin = st->sumD[0];
507     indices[0] = 0;
508     indices[2] = 0;
509     for (i = 1; i < DTX_HIST_SIZE; i++)
510     {
511         if (L_sub(st->sumD[i], summax) > 0)
512         {
513             indices[0] = i;
514             summax = st->sumD[i];
515         }
516         if (L_sub(st->sumD[i], summin) < 0)
517         {
518             indices[2] = i;
519             summin = st->sumD[i];
520         }
521     }
522 
523     /* Find the second largest distance */
524     summax2nd = -2147483647L;
525     indices[1] = -1;
526     for (i = 0; i < DTX_HIST_SIZE; i++)
527     {
528         if ((L_sub(st->sumD[i], summax2nd) > 0) && (sub(i, indices[0]) != 0))
529         {
530             indices[1] = i;
531             summax2nd = st->sumD[i];
532         }
533     }
534 
535     for (i = 0; i < 3; i++)
536     {
537         indices[i] = sub(st->hist_ptr, indices[i]);
538         if (indices[i] < 0)
539         {
540             indices[i] = add(indices[i], DTX_HIST_SIZE);
541         }
542     }
543 
544     /* If maximum distance/MED_THRESH is smaller than minimum distance */
545     /* then the median ISF vector replacement is not performed         */
546     tmp = norm_l(summax);
547     summax = (summax << tmp);
548     summin = (summin << tmp);
549     L_tmp = L_mult(voround(summax), INV_MED_THRESH);
550     if(L_tmp <= summin)
551     {
552         indices[0] = -1;
553     }
554     /* If second largest distance/MED_THRESH is smaller than     */
555     /* minimum distance then the median ISF vector replacement is    */
556     /* not performed                                                 */
557     summax2nd = L_shl(summax2nd, tmp);
558     L_tmp = L_mult(voround(summax2nd), INV_MED_THRESH);
559     if(L_tmp <= summin)
560     {
561         indices[1] = -1;
562     }
563     return;
564 }
565 
dithering_control(dtx_encState * st)566 static Word16 dithering_control(
567         dtx_encState * st
568         )
569 {
570     Word16 tmp, mean, CN_dith, gain_diff;
571     Word32 i, ISF_diff;
572 
573     /* determine how stationary the spectrum of background noise is */
574     ISF_diff = 0;
575     for (i = 0; i < 8; i++)
576     {
577         ISF_diff = L_add(ISF_diff, st->sumD[i]);
578     }
579     if ((ISF_diff >> 26) > 0)
580     {
581         CN_dith = 1;
582     } else
583     {
584         CN_dith = 0;
585     }
586 
587     /* determine how stationary the energy of background noise is */
588     mean = 0;
589     for (i = 0; i < DTX_HIST_SIZE; i++)
590     {
591         mean = add(mean, st->log_en_hist[i]);
592     }
593     mean = (mean >> 3);
594     gain_diff = 0;
595     for (i = 0; i < DTX_HIST_SIZE; i++)
596     {
597         tmp = abs_s(sub(st->log_en_hist[i], mean));
598         gain_diff = add(gain_diff, tmp);
599     }
600     if (gain_diff > GAIN_THR)
601     {
602         CN_dith = 1;
603     }
604     return CN_dith;
605 }
606