1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_processing/aecm/aecm_core.h"
12
13 #include <assert.h>
14
15 #include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
16 #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
17
18 static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
19 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
20 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
21 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
22 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
23 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
24 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
25 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
26 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
27 };
28
29 static const int16_t kNoiseEstQDomain = 15;
30 static const int16_t kNoiseEstIncCount = 5;
31
32 static int16_t coefTable[] = {
33 0, 4, 256, 260, 128, 132, 384, 388,
34 64, 68, 320, 324, 192, 196, 448, 452,
35 32, 36, 288, 292, 160, 164, 416, 420,
36 96, 100, 352, 356, 224, 228, 480, 484,
37 16, 20, 272, 276, 144, 148, 400, 404,
38 80, 84, 336, 340, 208, 212, 464, 468,
39 48, 52, 304, 308, 176, 180, 432, 436,
40 112, 116, 368, 372, 240, 244, 496, 500,
41 8, 12, 264, 268, 136, 140, 392, 396,
42 72, 76, 328, 332, 200, 204, 456, 460,
43 40, 44, 296, 300, 168, 172, 424, 428,
44 104, 108, 360, 364, 232, 236, 488, 492,
45 24, 28, 280, 284, 152, 156, 408, 412,
46 88, 92, 344, 348, 216, 220, 472, 476,
47 56, 60, 312, 316, 184, 188, 440, 444,
48 120, 124, 376, 380, 248, 252, 504, 508
49 };
50
51 static int16_t coefTable_ifft[] = {
52 0, 512, 256, 508, 128, 252, 384, 380,
53 64, 124, 320, 444, 192, 188, 448, 316,
54 32, 60, 288, 476, 160, 220, 416, 348,
55 96, 92, 352, 412, 224, 156, 480, 284,
56 16, 28, 272, 492, 144, 236, 400, 364,
57 80, 108, 336, 428, 208, 172, 464, 300,
58 48, 44, 304, 460, 176, 204, 432, 332,
59 112, 76, 368, 396, 240, 140, 496, 268,
60 8, 12, 264, 500, 136, 244, 392, 372,
61 72, 116, 328, 436, 200, 180, 456, 308,
62 40, 52, 296, 468, 168, 212, 424, 340,
63 104, 84, 360, 404, 232, 148, 488, 276,
64 24, 20, 280, 484, 152, 228, 408, 356,
65 88, 100, 344, 420, 216, 164, 472, 292,
66 56, 36, 312, 452, 184, 196, 440, 324,
67 120, 68, 376, 388, 248, 132, 504, 260
68 };
69
70 static void ComfortNoise(AecmCore_t* aecm,
71 const uint16_t* dfa,
72 complex16_t* out,
73 const int16_t* lambda);
74
WindowAndFFT(AecmCore_t * aecm,int16_t * fft,const int16_t * time_signal,complex16_t * freq_signal,int time_signal_scaling)75 static void WindowAndFFT(AecmCore_t* aecm,
76 int16_t* fft,
77 const int16_t* time_signal,
78 complex16_t* freq_signal,
79 int time_signal_scaling) {
80 int i, j;
81 int32_t tmp1, tmp2, tmp3, tmp4;
82 int16_t* pfrfi;
83 complex16_t* pfreq_signal;
84 int16_t f_coef, s_coef;
85 int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1;
86 int32_t hann, hann1, coefs;
87
88 memset(fft, 0, sizeof(int16_t) * PART_LEN4);
89
90 // FFT of signal
91 __asm __volatile (
92 ".set push \n\t"
93 ".set noreorder \n\t"
94 "addiu %[shift], %[time_signal_scaling], -14 \n\t"
95 "addiu %[i], $zero, 64 \n\t"
96 "addiu %[load_ptr], %[time_signal], 0 \n\t"
97 "addiu %[hann], %[hanning], 0 \n\t"
98 "addiu %[hann1], %[hanning], 128 \n\t"
99 "addiu %[coefs], %[coefTable], 0 \n\t"
100 "bltz %[shift], 2f \n\t"
101 " negu %[shift1], %[shift] \n\t"
102 "1: \n\t"
103 "lh %[tmp1], 0(%[load_ptr]) \n\t"
104 "lh %[tmp2], 0(%[hann]) \n\t"
105 "lh %[tmp3], 128(%[load_ptr]) \n\t"
106 "lh %[tmp4], 0(%[hann1]) \n\t"
107 "addiu %[i], %[i], -1 \n\t"
108 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
109 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
110 "lh %[f_coef], 0(%[coefs]) \n\t"
111 "lh %[s_coef], 2(%[coefs]) \n\t"
112 "addiu %[load_ptr], %[load_ptr], 2 \n\t"
113 "addiu %[hann], %[hann], 2 \n\t"
114 "addiu %[hann1], %[hann1], -2 \n\t"
115 "addu %[store_ptr1], %[fft], %[f_coef] \n\t"
116 "addu %[store_ptr2], %[fft], %[s_coef] \n\t"
117 "sllv %[tmp1], %[tmp1], %[shift] \n\t"
118 "sllv %[tmp3], %[tmp3], %[shift] \n\t"
119 "sh %[tmp1], 0(%[store_ptr1]) \n\t"
120 "sh %[tmp3], 0(%[store_ptr2]) \n\t"
121 "bgtz %[i], 1b \n\t"
122 " addiu %[coefs], %[coefs], 4 \n\t"
123 "b 3f \n\t"
124 " nop \n\t"
125 "2: \n\t"
126 "lh %[tmp1], 0(%[load_ptr]) \n\t"
127 "lh %[tmp2], 0(%[hann]) \n\t"
128 "lh %[tmp3], 128(%[load_ptr]) \n\t"
129 "lh %[tmp4], 0(%[hann1]) \n\t"
130 "addiu %[i], %[i], -1 \n\t"
131 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
132 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
133 "lh %[f_coef], 0(%[coefs]) \n\t"
134 "lh %[s_coef], 2(%[coefs]) \n\t"
135 "addiu %[load_ptr], %[load_ptr], 2 \n\t"
136 "addiu %[hann], %[hann], 2 \n\t"
137 "addiu %[hann1], %[hann1], -2 \n\t"
138 "addu %[store_ptr1], %[fft], %[f_coef] \n\t"
139 "addu %[store_ptr2], %[fft], %[s_coef] \n\t"
140 "srav %[tmp1], %[tmp1], %[shift1] \n\t"
141 "srav %[tmp3], %[tmp3], %[shift1] \n\t"
142 "sh %[tmp1], 0(%[store_ptr1]) \n\t"
143 "sh %[tmp3], 0(%[store_ptr2]) \n\t"
144 "bgtz %[i], 2b \n\t"
145 " addiu %[coefs], %[coefs], 4 \n\t"
146 "3: \n\t"
147 ".set pop \n\t"
148 : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann),
149 [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs),
150 [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
151 [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef),
152 [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1),
153 [store_ptr2] "=&r" (store_ptr2)
154 : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable),
155 [time_signal_scaling] "r" (time_signal_scaling),
156 [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft)
157 : "memory", "hi", "lo"
158 );
159
160 WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
161 pfrfi = fft;
162 pfreq_signal = freq_signal;
163
164 __asm __volatile (
165 ".set push \n\t"
166 ".set noreorder \n\t"
167 "addiu %[j], $zero, 128 \n\t"
168 "1: \n\t"
169 "lh %[tmp1], 0(%[pfrfi]) \n\t"
170 "lh %[tmp2], 2(%[pfrfi]) \n\t"
171 "lh %[tmp3], 4(%[pfrfi]) \n\t"
172 "lh %[tmp4], 6(%[pfrfi]) \n\t"
173 "subu %[tmp2], $zero, %[tmp2] \n\t"
174 "sh %[tmp1], 0(%[pfreq_signal]) \n\t"
175 "sh %[tmp2], 2(%[pfreq_signal]) \n\t"
176 "subu %[tmp4], $zero, %[tmp4] \n\t"
177 "sh %[tmp3], 4(%[pfreq_signal]) \n\t"
178 "sh %[tmp4], 6(%[pfreq_signal]) \n\t"
179 "lh %[tmp1], 8(%[pfrfi]) \n\t"
180 "lh %[tmp2], 10(%[pfrfi]) \n\t"
181 "lh %[tmp3], 12(%[pfrfi]) \n\t"
182 "lh %[tmp4], 14(%[pfrfi]) \n\t"
183 "addiu %[j], %[j], -8 \n\t"
184 "subu %[tmp2], $zero, %[tmp2] \n\t"
185 "sh %[tmp1], 8(%[pfreq_signal]) \n\t"
186 "sh %[tmp2], 10(%[pfreq_signal]) \n\t"
187 "subu %[tmp4], $zero, %[tmp4] \n\t"
188 "sh %[tmp3], 12(%[pfreq_signal]) \n\t"
189 "sh %[tmp4], 14(%[pfreq_signal]) \n\t"
190 "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t"
191 "bgtz %[j], 1b \n\t"
192 " addiu %[pfrfi], %[pfrfi], 16 \n\t"
193 ".set pop \n\t"
194 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
195 [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal),
196 [tmp4] "=&r" (tmp4)
197 :
198 : "memory"
199 );
200 }
201
InverseFFTAndWindow(AecmCore_t * aecm,int16_t * fft,complex16_t * efw,int16_t * output,const int16_t * nearendClean)202 static void InverseFFTAndWindow(AecmCore_t* aecm,
203 int16_t* fft,
204 complex16_t* efw,
205 int16_t* output,
206 const int16_t* nearendClean) {
207 int i, outCFFT;
208 int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im;
209 int16_t* pcoefTable_ifft = coefTable_ifft;
210 int16_t* pfft = fft;
211 int16_t* ppfft = fft;
212 complex16_t* pefw = efw;
213 int32_t out_aecm;
214 int16_t* paecm_buf = aecm->outBuf;
215 const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning;
216 const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN];
217 int16_t* output1 = output;
218
219 __asm __volatile (
220 ".set push \n\t"
221 ".set noreorder \n\t"
222 "addiu %[i], $zero, 64 \n\t"
223 "1: \n\t"
224 "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t"
225 "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t"
226 "lh %[tmp_re], 0(%[pefw]) \n\t"
227 "lh %[tmp_im], 2(%[pefw]) \n\t"
228 "addu %[pfft], %[fft], %[tmp2] \n\t"
229 "sh %[tmp_re], 0(%[pfft]) \n\t"
230 "sh %[tmp_im], 2(%[pfft]) \n\t"
231 "addu %[pfft], %[fft], %[tmp1] \n\t"
232 "sh %[tmp_re], 0(%[pfft]) \n\t"
233 "subu %[tmp_im], $zero, %[tmp_im] \n\t"
234 "sh %[tmp_im], 2(%[pfft]) \n\t"
235 "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t"
236 "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t"
237 "lh %[tmp_re], 4(%[pefw]) \n\t"
238 "lh %[tmp_im], 6(%[pefw]) \n\t"
239 "addu %[pfft], %[fft], %[tmp2] \n\t"
240 "sh %[tmp_re], 0(%[pfft]) \n\t"
241 "sh %[tmp_im], 2(%[pfft]) \n\t"
242 "addu %[pfft], %[fft], %[tmp1] \n\t"
243 "sh %[tmp_re], 0(%[pfft]) \n\t"
244 "subu %[tmp_im], $zero, %[tmp_im] \n\t"
245 "sh %[tmp_im], 2(%[pfft]) \n\t"
246 "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t"
247 "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t"
248 "lh %[tmp_re], 8(%[pefw]) \n\t"
249 "lh %[tmp_im], 10(%[pefw]) \n\t"
250 "addu %[pfft], %[fft], %[tmp2] \n\t"
251 "sh %[tmp_re], 0(%[pfft]) \n\t"
252 "sh %[tmp_im], 2(%[pfft]) \n\t"
253 "addu %[pfft], %[fft], %[tmp1] \n\t"
254 "sh %[tmp_re], 0(%[pfft]) \n\t"
255 "subu %[tmp_im], $zero, %[tmp_im] \n\t"
256 "sh %[tmp_im], 2(%[pfft]) \n\t"
257 "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t"
258 "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t"
259 "lh %[tmp_re], 12(%[pefw]) \n\t"
260 "lh %[tmp_im], 14(%[pefw]) \n\t"
261 "addu %[pfft], %[fft], %[tmp2] \n\t"
262 "sh %[tmp_re], 0(%[pfft]) \n\t"
263 "sh %[tmp_im], 2(%[pfft]) \n\t"
264 "addu %[pfft], %[fft], %[tmp1] \n\t"
265 "sh %[tmp_re], 0(%[pfft]) \n\t"
266 "subu %[tmp_im], $zero, %[tmp_im] \n\t"
267 "sh %[tmp_im], 2(%[pfft]) \n\t"
268 "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t"
269 "addiu %[i], %[i], -4 \n\t"
270 "bgtz %[i], 1b \n\t"
271 " addiu %[pefw], %[pefw], 16 \n\t"
272 ".set pop \n\t"
273 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
274 [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im),
275 [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft),
276 [fft] "+r" (fft)
277 :
278 : "memory"
279 );
280
281 fft[2] = efw[PART_LEN].real;
282 fft[3] = -efw[PART_LEN].imag;
283
284 outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
285 pfft = fft;
286
287 __asm __volatile (
288 ".set push \n\t"
289 ".set noreorder \n\t"
290 "addiu %[i], $zero, 128 \n\t"
291 "1: \n\t"
292 "lh %[tmp1], 0(%[ppfft]) \n\t"
293 "lh %[tmp2], 4(%[ppfft]) \n\t"
294 "lh %[tmp3], 8(%[ppfft]) \n\t"
295 "lh %[tmp4], 12(%[ppfft]) \n\t"
296 "addiu %[i], %[i], -4 \n\t"
297 "sh %[tmp1], 0(%[pfft]) \n\t"
298 "sh %[tmp2], 2(%[pfft]) \n\t"
299 "sh %[tmp3], 4(%[pfft]) \n\t"
300 "sh %[tmp4], 6(%[pfft]) \n\t"
301 "addiu %[ppfft], %[ppfft], 16 \n\t"
302 "bgtz %[i], 1b \n\t"
303 " addiu %[pfft], %[pfft], 8 \n\t"
304 ".set pop \n\t"
305 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
306 [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
307 [ppfft] "+r" (ppfft)
308 :
309 : "memory"
310 );
311
312 pfft = fft;
313 out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain);
314
315 __asm __volatile (
316 ".set push \n\t"
317 ".set noreorder \n\t"
318 "addiu %[i], $zero, 64 \n\t"
319 "11: \n\t"
320 "lh %[tmp1], 0(%[pfft]) \n\t"
321 "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t"
322 "addiu %[i], %[i], -2 \n\t"
323 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
324 "lh %[tmp3], 2(%[pfft]) \n\t"
325 "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t"
326 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
327 "addiu %[tmp1], %[tmp1], 8192 \n\t"
328 "sra %[tmp1], %[tmp1], 14 \n\t"
329 "addiu %[tmp3], %[tmp3], 8192 \n\t"
330 "sra %[tmp3], %[tmp3], 14 \n\t"
331 "bgez %[out_aecm], 1f \n\t"
332 " negu %[tmp2], %[out_aecm] \n\t"
333 "srav %[tmp1], %[tmp1], %[tmp2] \n\t"
334 "b 2f \n\t"
335 " srav %[tmp3], %[tmp3], %[tmp2] \n\t"
336 "1: \n\t"
337 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"
338 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"
339 "2: \n\t"
340 "lh %[tmp4], 0(%[paecm_buf]) \n\t"
341 "lh %[tmp2], 2(%[paecm_buf]) \n\t"
342 "addu %[tmp3], %[tmp3], %[tmp2] \n\t"
343 "addu %[tmp1], %[tmp1], %[tmp4] \n\t"
344 #if defined(MIPS_DSP_R1_LE)
345 "shll_s.w %[tmp1], %[tmp1], 16 \n\t"
346 "sra %[tmp1], %[tmp1], 16 \n\t"
347 "shll_s.w %[tmp3], %[tmp3], 16 \n\t"
348 "sra %[tmp3], %[tmp3], 16 \n\t"
349 #else // #if defined(MIPS_DSP_R1_LE)
350 "sra %[tmp4], %[tmp1], 31 \n\t"
351 "sra %[tmp2], %[tmp1], 15 \n\t"
352 "beq %[tmp4], %[tmp2], 3f \n\t"
353 " ori %[tmp2], $zero, 0x7fff \n\t"
354 "xor %[tmp1], %[tmp2], %[tmp4] \n\t"
355 "3: \n\t"
356 "sra %[tmp2], %[tmp3], 31 \n\t"
357 "sra %[tmp4], %[tmp3], 15 \n\t"
358 "beq %[tmp2], %[tmp4], 4f \n\t"
359 " ori %[tmp4], $zero, 0x7fff \n\t"
360 "xor %[tmp3], %[tmp4], %[tmp2] \n\t"
361 "4: \n\t"
362 #endif // #if defined(MIPS_DSP_R1_LE)
363 "sh %[tmp1], 0(%[pfft]) \n\t"
364 "sh %[tmp1], 0(%[output1]) \n\t"
365 "sh %[tmp3], 2(%[pfft]) \n\t"
366 "sh %[tmp3], 2(%[output1]) \n\t"
367 "lh %[tmp1], 128(%[pfft]) \n\t"
368 "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t"
369 "mul %[tmp1], %[tmp1], %[tmp2] \n\t"
370 "lh %[tmp3], 130(%[pfft]) \n\t"
371 "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t"
372 "mul %[tmp3], %[tmp3], %[tmp4] \n\t"
373 "sra %[tmp1], %[tmp1], 14 \n\t"
374 "sra %[tmp3], %[tmp3], 14 \n\t"
375 "bgez %[out_aecm], 5f \n\t"
376 " negu %[tmp2], %[out_aecm] \n\t"
377 "srav %[tmp3], %[tmp3], %[tmp2] \n\t"
378 "b 6f \n\t"
379 " srav %[tmp1], %[tmp1], %[tmp2] \n\t"
380 "5: \n\t"
381 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t"
382 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t"
383 "6: \n\t"
384 #if defined(MIPS_DSP_R1_LE)
385 "shll_s.w %[tmp1], %[tmp1], 16 \n\t"
386 "sra %[tmp1], %[tmp1], 16 \n\t"
387 "shll_s.w %[tmp3], %[tmp3], 16 \n\t"
388 "sra %[tmp3], %[tmp3], 16 \n\t"
389 #else // #if defined(MIPS_DSP_R1_LE)
390 "sra %[tmp4], %[tmp1], 31 \n\t"
391 "sra %[tmp2], %[tmp1], 15 \n\t"
392 "beq %[tmp4], %[tmp2], 7f \n\t"
393 " ori %[tmp2], $zero, 0x7fff \n\t"
394 "xor %[tmp1], %[tmp2], %[tmp4] \n\t"
395 "7: \n\t"
396 "sra %[tmp2], %[tmp3], 31 \n\t"
397 "sra %[tmp4], %[tmp3], 15 \n\t"
398 "beq %[tmp2], %[tmp4], 8f \n\t"
399 " ori %[tmp4], $zero, 0x7fff \n\t"
400 "xor %[tmp3], %[tmp4], %[tmp2] \n\t"
401 "8: \n\t"
402 #endif // #if defined(MIPS_DSP_R1_LE)
403 "sh %[tmp1], 0(%[paecm_buf]) \n\t"
404 "sh %[tmp3], 2(%[paecm_buf]) \n\t"
405 "addiu %[output1], %[output1], 4 \n\t"
406 "addiu %[paecm_buf], %[paecm_buf], 4 \n\t"
407 "addiu %[pfft], %[pfft], 4 \n\t"
408 "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t"
409 "bgtz %[i], 11b \n\t"
410 " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t"
411 ".set pop \n\t"
412 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft),
413 [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4),
414 [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i),
415 [pp_kSqrtHanning] "+r" (pp_kSqrtHanning),
416 [p_kSqrtHanning] "+r" (p_kSqrtHanning)
417 : [out_aecm] "r" (out_aecm),
418 [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning)
419 : "hi", "lo","memory"
420 );
421
422 // Copy the current block to the old position
423 // (aecm->outBuf is shifted elsewhere)
424 memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
425 memcpy(aecm->dBufNoisy,
426 aecm->dBufNoisy + PART_LEN,
427 sizeof(int16_t) * PART_LEN);
428 if (nearendClean != NULL) {
429 memcpy(aecm->dBufClean,
430 aecm->dBufClean + PART_LEN,
431 sizeof(int16_t) * PART_LEN);
432 }
433 }
434
WebRtcAecm_CalcLinearEnergies_mips(AecmCore_t * aecm,const uint16_t * far_spectrum,int32_t * echo_est,uint32_t * far_energy,uint32_t * echo_energy_adapt,uint32_t * echo_energy_stored)435 void WebRtcAecm_CalcLinearEnergies_mips(AecmCore_t* aecm,
436 const uint16_t* far_spectrum,
437 int32_t* echo_est,
438 uint32_t* far_energy,
439 uint32_t* echo_energy_adapt,
440 uint32_t* echo_energy_stored) {
441 int i;
442 uint32_t par1 = (*far_energy);
443 uint32_t par2 = (*echo_energy_adapt);
444 uint32_t par3 = (*echo_energy_stored);
445 int16_t* ch_stored_p = &(aecm->channelStored[0]);
446 int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]);
447 uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0]));
448 int32_t* echo_p = &(echo_est[0]);
449 int32_t temp0, stored0, echo0, adept0, spectrum0;
450 int32_t stored1, adept1, spectrum1, echo1, temp1;
451
452 // Get energy for the delayed far end signal and estimated
453 // echo using both stored and adapted channels.
454 for (i = 0; i < PART_LEN; i+= 4) {
455 __asm __volatile (
456 ".set push \n\t"
457 ".set noreorder \n\t"
458 "lh %[stored0], 0(%[ch_stored_p]) \n\t"
459 "lhu %[adept0], 0(%[ch_adapt_p]) \n\t"
460 "lhu %[spectrum0], 0(%[spectrum_p]) \n\t"
461 "lh %[stored1], 2(%[ch_stored_p]) \n\t"
462 "lhu %[adept1], 2(%[ch_adapt_p]) \n\t"
463 "lhu %[spectrum1], 2(%[spectrum_p]) \n\t"
464 "mul %[echo0], %[stored0], %[spectrum0] \n\t"
465 "mul %[temp0], %[adept0], %[spectrum0] \n\t"
466 "mul %[echo1], %[stored1], %[spectrum1] \n\t"
467 "mul %[temp1], %[adept1], %[spectrum1] \n\t"
468 "addu %[par1], %[par1], %[spectrum0] \n\t"
469 "addu %[par1], %[par1], %[spectrum1] \n\t"
470 "addiu %[echo_p], %[echo_p], 16 \n\t"
471 "addu %[par3], %[par3], %[echo0] \n\t"
472 "addu %[par2], %[par2], %[temp0] \n\t"
473 "addu %[par3], %[par3], %[echo1] \n\t"
474 "addu %[par2], %[par2], %[temp1] \n\t"
475 "usw %[echo0], -16(%[echo_p]) \n\t"
476 "usw %[echo1], -12(%[echo_p]) \n\t"
477 "lh %[stored0], 4(%[ch_stored_p]) \n\t"
478 "lhu %[adept0], 4(%[ch_adapt_p]) \n\t"
479 "lhu %[spectrum0], 4(%[spectrum_p]) \n\t"
480 "lh %[stored1], 6(%[ch_stored_p]) \n\t"
481 "lhu %[adept1], 6(%[ch_adapt_p]) \n\t"
482 "lhu %[spectrum1], 6(%[spectrum_p]) \n\t"
483 "mul %[echo0], %[stored0], %[spectrum0] \n\t"
484 "mul %[temp0], %[adept0], %[spectrum0] \n\t"
485 "mul %[echo1], %[stored1], %[spectrum1] \n\t"
486 "mul %[temp1], %[adept1], %[spectrum1] \n\t"
487 "addu %[par1], %[par1], %[spectrum0] \n\t"
488 "addu %[par1], %[par1], %[spectrum1] \n\t"
489 "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t"
490 "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t"
491 "addiu %[spectrum_p], %[spectrum_p], 8 \n\t"
492 "addu %[par3], %[par3], %[echo0] \n\t"
493 "addu %[par2], %[par2], %[temp0] \n\t"
494 "addu %[par3], %[par3], %[echo1] \n\t"
495 "addu %[par2], %[par2], %[temp1] \n\t"
496 "usw %[echo0], -8(%[echo_p]) \n\t"
497 "usw %[echo1], -4(%[echo_p]) \n\t"
498 ".set pop \n\t"
499 : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0),
500 [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0),
501 [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3),
502 [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1),
503 [adept1] "=&r" (adept1), [echo1] "=&r" (echo1),
504 [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1),
505 [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p),
506 [spectrum_p] "+r" (spectrum_p)
507 :
508 : "hi", "lo", "memory"
509 );
510 }
511
512 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
513 far_spectrum[PART_LEN]);
514 par1 += (uint32_t)(far_spectrum[PART_LEN]);
515 par2 += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[PART_LEN],
516 far_spectrum[PART_LEN]);
517 par3 += (uint32_t)echo_est[PART_LEN];
518
519 (*far_energy) = par1;
520 (*echo_energy_adapt) = par2;
521 (*echo_energy_stored) = par3;
522 }
523
524 #if defined(MIPS_DSP_R1_LE)
WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore_t * aecm,const uint16_t * far_spectrum,int32_t * echo_est)525 void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore_t* aecm,
526 const uint16_t* far_spectrum,
527 int32_t* echo_est) {
528 int i;
529 int16_t* temp1;
530 uint16_t* temp8;
531 int32_t temp0, temp2, temp3, temp4, temp5, temp6;
532 int32_t* temp7 = &(echo_est[0]);
533 temp1 = &(aecm->channelStored[0]);
534 temp8 = (uint16_t*)(&far_spectrum[0]);
535
536 // During startup we store the channel every block.
537 memcpy(aecm->channelStored, aecm->channelAdapt16,
538 sizeof(int16_t) * PART_LEN1);
539 // Recalculate echo estimate
540 for (i = 0; i < PART_LEN; i += 4) {
541 __asm __volatile (
542 "ulw %[temp0], 0(%[temp8]) \n\t"
543 "ulw %[temp2], 0(%[temp1]) \n\t"
544 "ulw %[temp4], 4(%[temp8]) \n\t"
545 "ulw %[temp5], 4(%[temp1]) \n\t"
546 "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t"
547 "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t"
548 "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t"
549 "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t"
550 "addiu %[temp7], %[temp7], 16 \n\t"
551 "addiu %[temp1], %[temp1], 8 \n\t"
552 "addiu %[temp8], %[temp8], 8 \n\t"
553 "sra %[temp3], %[temp3], 1 \n\t"
554 "sra %[temp0], %[temp0], 1 \n\t"
555 "sra %[temp6], %[temp6], 1 \n\t"
556 "sra %[temp4], %[temp4], 1 \n\t"
557 "usw %[temp3], -12(%[temp7]) \n\t"
558 "usw %[temp0], -16(%[temp7]) \n\t"
559 "usw %[temp6], -4(%[temp7]) \n\t"
560 "usw %[temp4], -8(%[temp7]) \n\t"
561 : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
562 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
563 [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7)
564 :
565 : "hi", "lo", "memory"
566 );
567 }
568 echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
569 far_spectrum[i]);
570 }
571
WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore_t * aecm)572 void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore_t* aecm) {
573 int i;
574 int32_t* temp3;
575 int16_t* temp0;
576 int32_t temp1, temp2, temp4, temp5;
577
578 temp0 = &(aecm->channelStored[0]);
579 temp3 = &(aecm->channelAdapt32[0]);
580
581 // The stored channel has a significantly lower MSE than the adaptive one for
582 // two consecutive calculations. Reset the adaptive channel.
583 memcpy(aecm->channelAdapt16,
584 aecm->channelStored,
585 sizeof(int16_t) * PART_LEN1);
586
587 // Restore the W32 channel
588 for (i = 0; i < PART_LEN; i += 4) {
589 __asm __volatile (
590 "ulw %[temp1], 0(%[temp0]) \n\t"
591 "ulw %[temp4], 4(%[temp0]) \n\t"
592 "preceq.w.phl %[temp2], %[temp1] \n\t"
593 "preceq.w.phr %[temp1], %[temp1] \n\t"
594 "preceq.w.phl %[temp5], %[temp4] \n\t"
595 "preceq.w.phr %[temp4], %[temp4] \n\t"
596 "addiu %[temp0], %[temp0], 8 \n\t"
597 "usw %[temp2], 4(%[temp3]) \n\t"
598 "usw %[temp1], 0(%[temp3]) \n\t"
599 "usw %[temp5], 12(%[temp3]) \n\t"
600 "usw %[temp4], 8(%[temp3]) \n\t"
601 "addiu %[temp3], %[temp3], 16 \n\t"
602 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2),
603 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5),
604 [temp3] "+r" (temp3), [temp0] "+r" (temp0)
605 :
606 : "memory"
607 );
608 }
609
610 aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
611 (int32_t)aecm->channelStored[i], 16);
612 }
613 #endif // #if defined(MIPS_DSP_R1_LE)
614
615 // Transforms a time domain signal into the frequency domain, outputting the
616 // complex valued signal, absolute value and sum of absolute values.
617 //
618 // time_signal [in] Pointer to time domain signal
619 // freq_signal_real [out] Pointer to real part of frequency domain array
620 // freq_signal_imag [out] Pointer to imaginary part of frequency domain
621 // array
622 // freq_signal_abs [out] Pointer to absolute value of frequency domain
623 // array
624 // freq_signal_sum_abs [out] Pointer to the sum of all absolute values in
625 // the frequency domain array
626 // return value The Q-domain of current frequency values
627 //
TimeToFrequencyDomain(AecmCore_t * aecm,const int16_t * time_signal,complex16_t * freq_signal,uint16_t * freq_signal_abs,uint32_t * freq_signal_sum_abs)628 static int TimeToFrequencyDomain(AecmCore_t* aecm,
629 const int16_t* time_signal,
630 complex16_t* freq_signal,
631 uint16_t* freq_signal_abs,
632 uint32_t* freq_signal_sum_abs)
633 {
634 int i = 0;
635 int time_signal_scaling = 0;
636
637 // In fft_buf, +16 for 32-byte alignment.
638 int16_t fft_buf[PART_LEN4 + 16];
639 int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
640
641 int16_t tmp16no1;
642 #if !defined(MIPS_DSP_R2_LE)
643 int32_t tmp32no1;
644 int32_t tmp32no2;
645 int16_t tmp16no2;
646 #else
647 int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13;
648 int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23;
649 int16_t* freqp;
650 uint16_t* freqabsp;
651 uint32_t freqt0, freqt1, freqt2, freqt3;
652 uint32_t freqs;
653 #endif
654
655 #ifdef AECM_DYNAMIC_Q
656 tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
657 time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
658 #endif
659
660 WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
661
662 // Extract imaginary and real part,
663 // calculate the magnitude for all frequency bins
664 freq_signal[0].imag = 0;
665 freq_signal[PART_LEN].imag = 0;
666 freq_signal[PART_LEN].real = fft[PART_LEN2];
667 freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
668 freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
669 freq_signal[PART_LEN].real);
670 (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
671 (uint32_t)(freq_signal_abs[PART_LEN]);
672
673 #if !defined(MIPS_DSP_R2_LE)
674 for (i = 1; i < PART_LEN; i++) {
675 if (freq_signal[i].real == 0)
676 {
677 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
678 freq_signal[i].imag);
679 }
680 else if (freq_signal[i].imag == 0)
681 {
682 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(
683 freq_signal[i].real);
684 }
685 else
686 {
687 // Approximation for magnitude of complex fft output
688 // magn = sqrt(real^2 + imag^2)
689 // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
690 //
691 // The parameters alpha and beta are stored in Q15
692 tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
693 tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
694 tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1);
695 tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2);
696 tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2);
697 tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
698
699 freq_signal_abs[i] = (uint16_t)tmp32no1;
700 }
701 (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
702 }
703 #else // #if !defined(MIPS_DSP_R2_LE)
704 freqs = (uint32_t)(freq_signal_abs[0]) +
705 (uint32_t)(freq_signal_abs[PART_LEN]);
706 freqp = &(freq_signal[1].real);
707
708 __asm __volatile (
709 "lw %[freqt0], 0(%[freqp]) \n\t"
710 "lw %[freqt1], 4(%[freqp]) \n\t"
711 "lw %[freqt2], 8(%[freqp]) \n\t"
712 "mult $ac0, $zero, $zero \n\t"
713 "mult $ac1, $zero, $zero \n\t"
714 "mult $ac2, $zero, $zero \n\t"
715 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"
716 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"
717 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"
718 "addiu %[freqp], %[freqp], 12 \n\t"
719 "extr.w %[tmp32no20], $ac0, 1 \n\t"
720 "extr.w %[tmp32no21], $ac1, 1 \n\t"
721 "extr.w %[tmp32no22], $ac2, 1 \n\t"
722 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
723 [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp),
724 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
725 [tmp32no22] "=r" (tmp32no22)
726 :
727 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo"
728 );
729
730 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
731 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
732 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
733 freq_signal_abs[1] = (uint16_t)tmp32no10;
734 freq_signal_abs[2] = (uint16_t)tmp32no11;
735 freq_signal_abs[3] = (uint16_t)tmp32no12;
736 freqs += (uint32_t)tmp32no10;
737 freqs += (uint32_t)tmp32no11;
738 freqs += (uint32_t)tmp32no12;
739 freqabsp = &(freq_signal_abs[4]);
740 for (i = 4; i < PART_LEN; i+=4)
741 {
742 __asm __volatile (
743 "ulw %[freqt0], 0(%[freqp]) \n\t"
744 "ulw %[freqt1], 4(%[freqp]) \n\t"
745 "ulw %[freqt2], 8(%[freqp]) \n\t"
746 "ulw %[freqt3], 12(%[freqp]) \n\t"
747 "mult $ac0, $zero, $zero \n\t"
748 "mult $ac1, $zero, $zero \n\t"
749 "mult $ac2, $zero, $zero \n\t"
750 "mult $ac3, $zero, $zero \n\t"
751 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t"
752 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t"
753 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t"
754 "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t"
755 "addiu %[freqp], %[freqp], 16 \n\t"
756 "addiu %[freqabsp], %[freqabsp], 8 \n\t"
757 "extr.w %[tmp32no20], $ac0, 1 \n\t"
758 "extr.w %[tmp32no21], $ac1, 1 \n\t"
759 "extr.w %[tmp32no22], $ac2, 1 \n\t"
760 "extr.w %[tmp32no23], $ac3, 1 \n\t"
761 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1),
762 [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3),
763 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21),
764 [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23),
765 [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp)
766 :
767 : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
768 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
769 );
770
771 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20);
772 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21);
773 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22);
774 tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23);
775
776 __asm __volatile (
777 "sh %[tmp32no10], -8(%[freqabsp]) \n\t"
778 "sh %[tmp32no11], -6(%[freqabsp]) \n\t"
779 "sh %[tmp32no12], -4(%[freqabsp]) \n\t"
780 "sh %[tmp32no13], -2(%[freqabsp]) \n\t"
781 "addu %[freqs], %[freqs], %[tmp32no10] \n\t"
782 "addu %[freqs], %[freqs], %[tmp32no11] \n\t"
783 "addu %[freqs], %[freqs], %[tmp32no12] \n\t"
784 "addu %[freqs], %[freqs], %[tmp32no13] \n\t"
785 : [freqs] "+r" (freqs)
786 : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11),
787 [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13),
788 [freqabsp] "r" (freqabsp)
789 : "memory"
790 );
791 }
792
793 (*freq_signal_sum_abs) = freqs;
794 #endif
795
796 return time_signal_scaling;
797 }
798
WebRtcAecm_ProcessBlock(AecmCore_t * aecm,const int16_t * farend,const int16_t * nearendNoisy,const int16_t * nearendClean,int16_t * output)799 int WebRtcAecm_ProcessBlock(AecmCore_t* aecm,
800 const int16_t* farend,
801 const int16_t* nearendNoisy,
802 const int16_t* nearendClean,
803 int16_t* output) {
804 int i;
805 uint32_t xfaSum;
806 uint32_t dfaNoisySum;
807 uint32_t dfaCleanSum;
808 uint32_t echoEst32Gained;
809 uint32_t tmpU32;
810 int32_t tmp32no1;
811
812 uint16_t xfa[PART_LEN1];
813 uint16_t dfaNoisy[PART_LEN1];
814 uint16_t dfaClean[PART_LEN1];
815 uint16_t* ptrDfaClean = dfaClean;
816 const uint16_t* far_spectrum_ptr = NULL;
817
818 // 32 byte aligned buffers (with +8 or +16).
819 int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
820 int32_t echoEst32_buf[PART_LEN1 + 8];
821 int32_t dfw_buf[PART_LEN2 + 8];
822 int32_t efw_buf[PART_LEN2 + 8];
823
824 int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31);
825 int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31);
826 complex16_t* dfw = (complex16_t*)(((uint32_t)dfw_buf + 31) & ~ 31);
827 complex16_t* efw = (complex16_t*)(((uint32_t)efw_buf + 31) & ~ 31);
828
829 int16_t hnl[PART_LEN1];
830 int16_t numPosCoef = 0;
831 int delay;
832 int16_t tmp16no1;
833 int16_t tmp16no2;
834 int16_t mu;
835 int16_t supGain;
836 int16_t zeros32, zeros16;
837 int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
838 int far_q;
839 int16_t resolutionDiff, qDomainDiff;
840
841 const int kMinPrefBand = 4;
842 const int kMaxPrefBand = 24;
843 int32_t avgHnl32 = 0;
844
845 int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
846 int16_t* ptr;
847 int16_t* ptr1;
848 int16_t* er_ptr;
849 int16_t* dr_ptr;
850
851 ptr = &hnl[0];
852 ptr1 = &hnl[0];
853 er_ptr = &efw[0].real;
854 dr_ptr = &dfw[0].real;
855
856 // Determine startup state. There are three states:
857 // (0) the first CONV_LEN blocks
858 // (1) another CONV_LEN blocks
859 // (2) the rest
860
861 if (aecm->startupState < 2) {
862 aecm->startupState = (aecm->totCount >= CONV_LEN) +
863 (aecm->totCount >= CONV_LEN2);
864 }
865 // END: Determine startup state
866
867 // Buffer near and far end signals
868 memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
869 memcpy(aecm->dBufNoisy + PART_LEN,
870 nearendNoisy,
871 sizeof(int16_t) * PART_LEN);
872 if (nearendClean != NULL) {
873 memcpy(aecm->dBufClean + PART_LEN,
874 nearendClean,
875 sizeof(int16_t) * PART_LEN);
876 }
877
878 // Transform far end signal from time domain to frequency domain.
879 far_q = TimeToFrequencyDomain(aecm,
880 aecm->xBuf,
881 dfw,
882 xfa,
883 &xfaSum);
884
885 // Transform noisy near end signal from time domain to frequency domain.
886 zerosDBufNoisy = TimeToFrequencyDomain(aecm,
887 aecm->dBufNoisy,
888 dfw,
889 dfaNoisy,
890 &dfaNoisySum);
891 aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
892 aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
893
894 if (nearendClean == NULL) {
895 ptrDfaClean = dfaNoisy;
896 aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
897 aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
898 dfaCleanSum = dfaNoisySum;
899 } else {
900 // Transform clean near end signal from time domain to frequency domain.
901 zerosDBufClean = TimeToFrequencyDomain(aecm,
902 aecm->dBufClean,
903 dfw,
904 dfaClean,
905 &dfaCleanSum);
906 aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
907 aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
908 }
909
910 // Get the delay
911 // Save far-end history and estimate delay
912 WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
913
914 if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1,
915 far_q) == -1) {
916 return -1;
917 }
918 delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
919 dfaNoisy,
920 PART_LEN1,
921 zerosDBufNoisy);
922 if (delay == -1) {
923 return -1;
924 }
925 else if (delay == -2) {
926 // If the delay is unknown, we assume zero.
927 // NOTE: this will have to be adjusted if we ever add lookahead.
928 delay = 0;
929 }
930
931 if (aecm->fixedDelay >= 0) {
932 // Use fixed delay
933 delay = aecm->fixedDelay;
934 }
935
936 // Get aligned far end spectrum
937 far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
938 zerosXBuf = (int16_t) far_q;
939
940 if (far_spectrum_ptr == NULL) {
941 return -1;
942 }
943
944 // Calculate log(energy) and update energy threshold levels
945 WebRtcAecm_CalcEnergies(aecm,
946 far_spectrum_ptr,
947 zerosXBuf,
948 dfaNoisySum,
949 echoEst32);
950 // Calculate stepsize
951 mu = WebRtcAecm_CalcStepSize(aecm);
952
953 // Update counters
954 aecm->totCount++;
955
956 // This is the channel estimation algorithm.
957 // It is base on NLMS but has a variable step length,
958 // which was calculated above.
959 WebRtcAecm_UpdateChannel(aecm,
960 far_spectrum_ptr,
961 zerosXBuf,
962 dfaNoisy,
963 mu,
964 echoEst32);
965
966 supGain = WebRtcAecm_CalcSuppressionGain(aecm);
967
968 // Calculate Wiener filter hnl[]
969 for (i = 0; i < PART_LEN1; i++) {
970 // Far end signal through channel estimate in Q8
971 // How much can we shift right to preserve resolution
972 tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
973 aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(
974 WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8);
975
976 zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
977 zeros16 = WebRtcSpl_NormW16(supGain) + 1;
978 if (zeros32 + zeros16 > 16) {
979 // Multiplication is safe
980 // Result in
981 // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff])
982 echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
983 (uint16_t)supGain);
984 resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
985 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
986 } else {
987 tmp16no1 = 17 - zeros32 - zeros16;
988 resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
989 RESOLUTION_SUPGAIN;
990 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
991 if (zeros32 > tmp16no1) {
992 echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
993 (uint32_t)aecm->echoFilt[i],
994 (uint16_t)WEBRTC_SPL_RSHIFT_W16(supGain, tmp16no1));
995 } else {
996 // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
997 echoEst32Gained = WEBRTC_SPL_UMUL_32_16(
998 (uint32_t)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i],
999 tmp16no1),
1000 (uint16_t)supGain);
1001 }
1002 }
1003
1004 zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
1005 if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld))
1006 & (aecm->nearFilt[i])) {
1007 tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16);
1008 qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld;
1009 tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff);
1010 } else {
1011 tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i],
1012 aecm->dfaCleanQDomain
1013 - aecm->dfaCleanQDomainOld);
1014 qDomainDiff = 0;
1015 tmp16no2 = ptrDfaClean[i];
1016 }
1017
1018 tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
1019 tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4);
1020 tmp16no2 += tmp16no1;
1021 zeros16 = WebRtcSpl_NormW16(tmp16no2);
1022 if ((tmp16no2) & (-qDomainDiff > zeros16)) {
1023 aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
1024 } else {
1025 aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff);
1026 }
1027
1028 // Wiener filter coefficients, resulting hnl in Q14
1029 if (echoEst32Gained == 0) {
1030 hnl[i] = ONE_Q14;
1031 numPosCoef++;
1032 } else if (aecm->nearFilt[i] == 0) {
1033 hnl[i] = 0;
1034 } else {
1035 // Multiply the suppression gain
1036 // Rounding
1037 echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
1038 tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
1039 (uint16_t)aecm->nearFilt[i]);
1040
1041 // Current resolution is
1042 // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN
1043 // - max(0, 17 - zeros16 - zeros32))
1044 // Make sure we are in Q14
1045 tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
1046 if (tmp32no1 > ONE_Q14) {
1047 hnl[i] = 0;
1048 } else if (tmp32no1 < 0) {
1049 hnl[i] = ONE_Q14;
1050 numPosCoef++;
1051 } else {
1052 // 1-echoEst/dfa
1053 hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
1054 if (hnl[i] <= 0) {
1055 hnl[i] = 0;
1056 } else {
1057 numPosCoef++;
1058 }
1059 }
1060 }
1061 }
1062
1063 // Only in wideband. Prevent the gain in upper band from being larger than
1064 // in lower band.
1065 if (aecm->mult == 2) {
1066 // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
1067 // speech distortion in double-talk.
1068 for (i = 0; i < (PART_LEN1 >> 3); i++) {
1069 __asm __volatile (
1070 "lh %[temp1], 0(%[ptr1]) \n\t"
1071 "lh %[temp2], 2(%[ptr1]) \n\t"
1072 "lh %[temp3], 4(%[ptr1]) \n\t"
1073 "lh %[temp4], 6(%[ptr1]) \n\t"
1074 "lh %[temp5], 8(%[ptr1]) \n\t"
1075 "lh %[temp6], 10(%[ptr1]) \n\t"
1076 "lh %[temp7], 12(%[ptr1]) \n\t"
1077 "lh %[temp8], 14(%[ptr1]) \n\t"
1078 "mul %[temp1], %[temp1], %[temp1] \n\t"
1079 "mul %[temp2], %[temp2], %[temp2] \n\t"
1080 "mul %[temp3], %[temp3], %[temp3] \n\t"
1081 "mul %[temp4], %[temp4], %[temp4] \n\t"
1082 "mul %[temp5], %[temp5], %[temp5] \n\t"
1083 "mul %[temp6], %[temp6], %[temp6] \n\t"
1084 "mul %[temp7], %[temp7], %[temp7] \n\t"
1085 "mul %[temp8], %[temp8], %[temp8] \n\t"
1086 "sra %[temp1], %[temp1], 14 \n\t"
1087 "sra %[temp2], %[temp2], 14 \n\t"
1088 "sra %[temp3], %[temp3], 14 \n\t"
1089 "sra %[temp4], %[temp4], 14 \n\t"
1090 "sra %[temp5], %[temp5], 14 \n\t"
1091 "sra %[temp6], %[temp6], 14 \n\t"
1092 "sra %[temp7], %[temp7], 14 \n\t"
1093 "sra %[temp8], %[temp8], 14 \n\t"
1094 "sh %[temp1], 0(%[ptr1]) \n\t"
1095 "sh %[temp2], 2(%[ptr1]) \n\t"
1096 "sh %[temp3], 4(%[ptr1]) \n\t"
1097 "sh %[temp4], 6(%[ptr1]) \n\t"
1098 "sh %[temp5], 8(%[ptr1]) \n\t"
1099 "sh %[temp6], 10(%[ptr1]) \n\t"
1100 "sh %[temp7], 12(%[ptr1]) \n\t"
1101 "sh %[temp8], 14(%[ptr1]) \n\t"
1102 "addiu %[ptr1], %[ptr1], 16 \n\t"
1103 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
1104 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6),
1105 [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1)
1106 :
1107 : "memory", "hi", "lo"
1108 );
1109 }
1110 for(i = 0; i < (PART_LEN1 & 7); i++) {
1111 __asm __volatile (
1112 "lh %[temp1], 0(%[ptr1]) \n\t"
1113 "mul %[temp1], %[temp1], %[temp1] \n\t"
1114 "sra %[temp1], %[temp1], 14 \n\t"
1115 "sh %[temp1], 0(%[ptr1]) \n\t"
1116 "addiu %[ptr1], %[ptr1], 2 \n\t"
1117 : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1)
1118 :
1119 : "memory", "hi", "lo"
1120 );
1121 }
1122
1123 for (i = kMinPrefBand; i <= kMaxPrefBand; i++) {
1124 avgHnl32 += (int32_t)hnl[i];
1125 }
1126
1127 assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
1128 avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
1129
1130 for (i = kMaxPrefBand; i < PART_LEN1; i++) {
1131 if (hnl[i] > (int16_t)avgHnl32) {
1132 hnl[i] = (int16_t)avgHnl32;
1133 }
1134 }
1135 }
1136
1137 // Calculate NLP gain, result is in Q14
1138 if (aecm->nlpFlag) {
1139 if (numPosCoef < 3) {
1140 for (i = 0; i < PART_LEN1; i++) {
1141 efw[i].real = 0;
1142 efw[i].imag = 0;
1143 hnl[i] = 0;
1144 }
1145 } else {
1146 for (i = 0; i < PART_LEN1; i++) {
1147 #if defined(MIPS_DSP_R1_LE)
1148 __asm __volatile (
1149 ".set push \n\t"
1150 ".set noreorder \n\t"
1151 "lh %[temp1], 0(%[ptr]) \n\t"
1152 "lh %[temp2], 0(%[dr_ptr]) \n\t"
1153 "slti %[temp4], %[temp1], 0x4001 \n\t"
1154 "beqz %[temp4], 3f \n\t"
1155 " lh %[temp3], 2(%[dr_ptr]) \n\t"
1156 "slti %[temp5], %[temp1], 3277 \n\t"
1157 "bnez %[temp5], 2f \n\t"
1158 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"
1159 "mul %[temp2], %[temp2], %[temp1] \n\t"
1160 "mul %[temp3], %[temp3], %[temp1] \n\t"
1161 "shra_r.w %[temp2], %[temp2], 14 \n\t"
1162 "shra_r.w %[temp3], %[temp3], 14 \n\t"
1163 "b 4f \n\t"
1164 " nop \n\t"
1165 "2: \n\t"
1166 "addu %[temp1], $zero, $zero \n\t"
1167 "addu %[temp2], $zero, $zero \n\t"
1168 "addu %[temp3], $zero, $zero \n\t"
1169 "b 1f \n\t"
1170 " nop \n\t"
1171 "3: \n\t"
1172 "addiu %[temp1], $0, 0x4000 \n\t"
1173 "1: \n\t"
1174 "sh %[temp1], 0(%[ptr]) \n\t"
1175 "4: \n\t"
1176 "sh %[temp2], 0(%[er_ptr]) \n\t"
1177 "sh %[temp3], 2(%[er_ptr]) \n\t"
1178 "addiu %[ptr], %[ptr], 2 \n\t"
1179 "addiu %[er_ptr], %[er_ptr], 4 \n\t"
1180 ".set pop \n\t"
1181 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
1182 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
1183 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
1184 :
1185 : "memory", "hi", "lo"
1186 );
1187 #else
1188 __asm __volatile (
1189 ".set push \n\t"
1190 ".set noreorder \n\t"
1191 "lh %[temp1], 0(%[ptr]) \n\t"
1192 "lh %[temp2], 0(%[dr_ptr]) \n\t"
1193 "slti %[temp4], %[temp1], 0x4001 \n\t"
1194 "beqz %[temp4], 3f \n\t"
1195 " lh %[temp3], 2(%[dr_ptr]) \n\t"
1196 "slti %[temp5], %[temp1], 3277 \n\t"
1197 "bnez %[temp5], 2f \n\t"
1198 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t"
1199 "mul %[temp2], %[temp2], %[temp1] \n\t"
1200 "mul %[temp3], %[temp3], %[temp1] \n\t"
1201 "addiu %[temp2], %[temp2], 0x2000 \n\t"
1202 "addiu %[temp3], %[temp3], 0x2000 \n\t"
1203 "sra %[temp2], %[temp2], 14 \n\t"
1204 "sra %[temp3], %[temp3], 14 \n\t"
1205 "b 4f \n\t"
1206 " nop \n\t"
1207 "2: \n\t"
1208 "addu %[temp1], $zero, $zero \n\t"
1209 "addu %[temp2], $zero, $zero \n\t"
1210 "addu %[temp3], $zero, $zero \n\t"
1211 "b 1f \n\t"
1212 " nop \n\t"
1213 "3: \n\t"
1214 "addiu %[temp1], $0, 0x4000 \n\t"
1215 "1: \n\t"
1216 "sh %[temp1], 0(%[ptr]) \n\t"
1217 "4: \n\t"
1218 "sh %[temp2], 0(%[er_ptr]) \n\t"
1219 "sh %[temp3], 2(%[er_ptr]) \n\t"
1220 "addiu %[ptr], %[ptr], 2 \n\t"
1221 "addiu %[er_ptr], %[er_ptr], 4 \n\t"
1222 ".set pop \n\t"
1223 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3),
1224 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr),
1225 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr)
1226 :
1227 : "memory", "hi", "lo"
1228 );
1229 #endif
1230 }
1231 }
1232 }
1233 else {
1234 // multiply with Wiener coefficients
1235 for (i = 0; i < PART_LEN1; i++) {
1236 efw[i].real = (int16_t)
1237 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
1238 hnl[i],
1239 14));
1240 efw[i].imag = (int16_t)
1241 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
1242 hnl[i],
1243 14));
1244 }
1245 }
1246
1247 if (aecm->cngMode == AecmTrue) {
1248 ComfortNoise(aecm, ptrDfaClean, efw, hnl);
1249 }
1250
1251 InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
1252
1253 return 0;
1254 }
1255
1256 // Generate comfort noise and add to output signal.
ComfortNoise(AecmCore_t * aecm,const uint16_t * dfa,complex16_t * out,const int16_t * lambda)1257 static void ComfortNoise(AecmCore_t* aecm,
1258 const uint16_t* dfa,
1259 complex16_t* out,
1260 const int16_t* lambda) {
1261 int16_t i;
1262 int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2;
1263 int32_t tmp32, tmp321, tnoise, tnoise1;
1264 int32_t tmp322, tmp323, *tmp1;
1265 int16_t* dfap;
1266 int16_t* lambdap;
1267 const int32_t c2049 = 2049;
1268 const int32_t c359 = 359;
1269 const int32_t c114 = ONE_Q14;
1270
1271 int16_t randW16[PART_LEN];
1272 int16_t uReal[PART_LEN1];
1273 int16_t uImag[PART_LEN1];
1274 int32_t outLShift32;
1275
1276 int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
1277 int16_t minTrackShift = 9;
1278
1279 assert(shiftFromNearToNoise >= 0);
1280 assert(shiftFromNearToNoise < 16);
1281
1282 if (aecm->noiseEstCtr < 100) {
1283 // Track the minimum more quickly initially.
1284 aecm->noiseEstCtr++;
1285 minTrackShift = 6;
1286 }
1287
1288 // Generate a uniform random array on [0 2^15-1].
1289 WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
1290 int16_t* randW16p = (int16_t*)randW16;
1291 #if defined (MIPS_DSP_R1_LE)
1292 int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable;
1293 int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable;
1294 #endif // #if defined(MIPS_DSP_R1_LE)
1295 tmp1 = (int32_t*)aecm->noiseEst + 1;
1296 dfap = (int16_t*)dfa + 1;
1297 lambdap = (int16_t*)lambda + 1;
1298 // Estimate noise power.
1299 for (i = 1; i < PART_LEN1; i+=2) {
1300 // Shift to the noise domain.
1301 __asm __volatile (
1302 "lh %[tmp32], 0(%[dfap]) \n\t"
1303 "lw %[tnoise], 0(%[tmp1]) \n\t"
1304 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"
1305 : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32),
1306 [tnoise] "=&r" (tnoise)
1307 : [tmp1] "r" (tmp1), [dfap] "r" (dfap),
1308 [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
1309 : "memory"
1310 );
1311
1312 if (outLShift32 < tnoise) {
1313 // Reset "too low" counter
1314 aecm->noiseEstTooLowCtr[i] = 0;
1315 // Track the minimum.
1316 if (tnoise < (1 << minTrackShift)) {
1317 // For small values, decrease noiseEst[i] every
1318 // |kNoiseEstIncCount| block. The regular approach below can not
1319 // go further down due to truncation.
1320 aecm->noiseEstTooHighCtr[i]++;
1321 if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) {
1322 tnoise--;
1323 aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
1324 }
1325 } else {
1326 __asm __volatile (
1327 "subu %[tmp32], %[tnoise], %[outLShift32] \n\t"
1328 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"
1329 "subu %[tnoise], %[tnoise], %[tmp32] \n\t"
1330 : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise)
1331 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
1332 );
1333 }
1334 } else {
1335 // Reset "too high" counter
1336 aecm->noiseEstTooHighCtr[i] = 0;
1337 // Ramp slowly upwards until we hit the minimum again.
1338 if ((tnoise >> 19) <= 0) {
1339 if ((tnoise >> 11) > 0) {
1340 // Large enough for relative increase
1341 __asm __volatile (
1342 "mul %[tnoise], %[tnoise], %[c2049] \n\t"
1343 "sra %[tnoise], %[tnoise], 11 \n\t"
1344 : [tnoise] "+r" (tnoise)
1345 : [c2049] "r" (c2049)
1346 : "hi", "lo"
1347 );
1348 } else {
1349 // Make incremental increases based on size every
1350 // |kNoiseEstIncCount| block
1351 aecm->noiseEstTooLowCtr[i]++;
1352 if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) {
1353 __asm __volatile (
1354 "sra %[tmp32], %[tnoise], 9 \n\t"
1355 "addi %[tnoise], %[tnoise], 1 \n\t"
1356 "addu %[tnoise], %[tnoise], %[tmp32] \n\t"
1357 : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32)
1358 :
1359 );
1360 aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
1361 }
1362 }
1363 } else {
1364 // Avoid overflow.
1365 // Multiplication with 2049 will cause wrap around. Scale
1366 // down first and then multiply
1367 __asm __volatile (
1368 "sra %[tnoise], %[tnoise], 11 \n\t"
1369 "mul %[tnoise], %[tnoise], %[c2049] \n\t"
1370 : [tnoise] "+r" (tnoise)
1371 : [c2049] "r" (c2049)
1372 : "hi", "lo"
1373 );
1374 }
1375 }
1376
1377 // Shift to the noise domain.
1378 __asm __volatile (
1379 "lh %[tmp32], 2(%[dfap]) \n\t"
1380 "lw %[tnoise1], 4(%[tmp1]) \n\t"
1381 "addiu %[dfap], %[dfap], 4 \n\t"
1382 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t"
1383 : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap),
1384 [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1)
1385 : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
1386 : "memory"
1387 );
1388
1389 if (outLShift32 < tnoise1) {
1390 // Reset "too low" counter
1391 aecm->noiseEstTooLowCtr[i + 1] = 0;
1392 // Track the minimum.
1393 if (tnoise1 < (1 << minTrackShift)) {
1394 // For small values, decrease noiseEst[i] every
1395 // |kNoiseEstIncCount| block. The regular approach below can not
1396 // go further down due to truncation.
1397 aecm->noiseEstTooHighCtr[i + 1]++;
1398 if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) {
1399 tnoise1--;
1400 aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter
1401 }
1402 } else {
1403 __asm __volatile (
1404 "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t"
1405 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t"
1406 "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t"
1407 : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1)
1408 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift)
1409 );
1410 }
1411 } else {
1412 // Reset "too high" counter
1413 aecm->noiseEstTooHighCtr[i + 1] = 0;
1414 // Ramp slowly upwards until we hit the minimum again.
1415 if ((tnoise1 >> 19) <= 0) {
1416 if ((tnoise1 >> 11) > 0) {
1417 // Large enough for relative increase
1418 __asm __volatile (
1419 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"
1420 "sra %[tnoise1], %[tnoise1], 11 \n\t"
1421 : [tnoise1] "+r" (tnoise1)
1422 : [c2049] "r" (c2049)
1423 : "hi", "lo"
1424 );
1425 } else {
1426 // Make incremental increases based on size every
1427 // |kNoiseEstIncCount| block
1428 aecm->noiseEstTooLowCtr[i + 1]++;
1429 if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) {
1430 __asm __volatile (
1431 "sra %[tmp32], %[tnoise1], 9 \n\t"
1432 "addi %[tnoise1], %[tnoise1], 1 \n\t"
1433 "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t"
1434 : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32)
1435 :
1436 );
1437 aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter
1438 }
1439 }
1440 } else {
1441 // Avoid overflow.
1442 // Multiplication with 2049 will cause wrap around. Scale
1443 // down first and then multiply
1444 __asm __volatile (
1445 "sra %[tnoise1], %[tnoise1], 11 \n\t"
1446 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t"
1447 : [tnoise1] "+r" (tnoise1)
1448 : [c2049] "r" (c2049)
1449 : "hi", "lo"
1450 );
1451 }
1452 }
1453
1454 __asm __volatile (
1455 "lh %[tmp16], 0(%[lambdap]) \n\t"
1456 "lh %[tmp161], 2(%[lambdap]) \n\t"
1457 "sw %[tnoise], 0(%[tmp1]) \n\t"
1458 "sw %[tnoise1], 4(%[tmp1]) \n\t"
1459 "subu %[tmp16], %[c114], %[tmp16] \n\t"
1460 "subu %[tmp161], %[c114], %[tmp161] \n\t"
1461 "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t"
1462 "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t"
1463 "addiu %[lambdap], %[lambdap], 4 \n\t"
1464 "addiu %[tmp1], %[tmp1], 8 \n\t"
1465 : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1),
1466 [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap)
1467 : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114),
1468 [shiftFromNearToNoise] "r" (shiftFromNearToNoise)
1469 : "memory"
1470 );
1471
1472 if (tmp32 > 32767) {
1473 tmp32 = 32767;
1474 aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise);
1475 }
1476 if (tmp321 > 32767) {
1477 tmp321 = 32767;
1478 aecm->noiseEst[i+1] = WEBRTC_SPL_LSHIFT_W32(tmp321, shiftFromNearToNoise);
1479 }
1480
1481 __asm __volatile (
1482 "mul %[tmp32], %[tmp32], %[tmp16] \n\t"
1483 "mul %[tmp321], %[tmp321], %[tmp161] \n\t"
1484 "sra %[nrsh1], %[tmp32], 14 \n\t"
1485 "sra %[nrsh2], %[tmp321], 14 \n\t"
1486 : [nrsh1] "=r" (nrsh1), [nrsh2] "=r" (nrsh2)
1487 : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32),
1488 [tmp321] "r" (tmp321)
1489 : "memory", "hi", "lo"
1490 );
1491
1492 __asm __volatile (
1493 "lh %[tmp32], 0(%[randW16p]) \n\t"
1494 "lh %[tmp321], 2(%[randW16p]) \n\t"
1495 "addiu %[randW16p], %[randW16p], 4 \n\t"
1496 "mul %[tmp32], %[tmp32], %[c359] \n\t"
1497 "mul %[tmp321], %[tmp321], %[c359] \n\t"
1498 "sra %[tmp16], %[tmp32], 15 \n\t"
1499 "sra %[tmp161], %[tmp321], 15 \n\t"
1500 : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32),
1501 [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321)
1502 : [c359] "r" (c359)
1503 : "memory", "hi", "lo"
1504 );
1505
1506 #if !defined(MIPS_DSP_R1_LE)
1507 tmp32 = WebRtcAecm_kCosTable[tmp16];
1508 tmp321 = WebRtcAecm_kSinTable[tmp16];
1509 tmp322 = WebRtcAecm_kCosTable[tmp161];
1510 tmp323 = WebRtcAecm_kSinTable[tmp161];
1511 #else
1512 __asm __volatile (
1513 "sll %[tmp16], %[tmp16], 1 \n\t"
1514 "sll %[tmp161], %[tmp161], 1 \n\t"
1515 "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t"
1516 "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t"
1517 "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t"
1518 "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t"
1519 : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321),
1520 [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323)
1521 : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16),
1522 [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep)
1523 : "memory"
1524 );
1525 #endif
1526 __asm __volatile (
1527 "mul %[tmp32], %[tmp32], %[nrsh1] \n\t"
1528 "negu %[tmp162], %[nrsh1] \n\t"
1529 "mul %[tmp322], %[tmp322], %[nrsh2] \n\t"
1530 "negu %[tmp163], %[nrsh2] \n\t"
1531 "sra %[tmp32], %[tmp32], 13 \n\t"
1532 "mul %[tmp321], %[tmp321], %[tmp162] \n\t"
1533 "sra %[tmp322], %[tmp322], 13 \n\t"
1534 "mul %[tmp323], %[tmp323], %[tmp163] \n\t"
1535 "sra %[tmp321], %[tmp321], 13 \n\t"
1536 "sra %[tmp323], %[tmp323], 13 \n\t"
1537 : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162),
1538 [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163)
1539 : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2)
1540 : "hi", "lo"
1541 );
1542 // Tables are in Q13.
1543 uReal[i] = (int16_t)tmp32;
1544 uImag[i] = (int16_t)tmp321;
1545 uReal[i + 1] = (int16_t)tmp322;
1546 uImag[i + 1] = (int16_t)tmp323;
1547 }
1548
1549 int32_t tt, sgn;
1550 tt = out[0].real;
1551 sgn = ((int)tt) >> 31;
1552 out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1553 tt = out[0].imag;
1554 sgn = ((int)tt) >> 31;
1555 out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1556 for (i = 1; i < PART_LEN; i++) {
1557 tt = out[i].real + uReal[i];
1558 sgn = ((int)tt) >> 31;
1559 out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1560 tt = out[i].imag + uImag[i];
1561 sgn = ((int)tt) >> 31;
1562 out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1563 }
1564 tt = out[PART_LEN].real + uReal[PART_LEN];
1565 sgn = ((int)tt) >> 31;
1566 out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1567 tt = out[PART_LEN].imag;
1568 sgn = ((int)tt) >> 31;
1569 out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn);
1570 }
1571
1572