1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <string.h>
13
14 #include "webrtc/modules/audio_processing/ns/noise_suppression_x.h"
15 #include "webrtc/modules/audio_processing/ns/nsx_core.h"
16
17 static const int16_t kIndicatorTable[17] = {
18 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718,
19 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187
20 };
21
22 // Compute speech/noise probability
23 // speech/noise probability is returned in: probSpeechFinal
24 //snrLocPrior is the prior SNR for each frequency (in Q11)
25 //snrLocPost is the post SNR for each frequency (in Q11)
WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC * inst,uint16_t * nonSpeechProbFinal,uint32_t * priorLocSnr,uint32_t * postLocSnr)26 void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
27 uint16_t* nonSpeechProbFinal,
28 uint32_t* priorLocSnr,
29 uint32_t* postLocSnr) {
30 uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
31 int32_t indPriorFX, tmp32no1;
32 int32_t logLrtTimeAvgKsumFX;
33 int16_t indPriorFX16;
34 int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac;
35 size_t i;
36 int normTmp, nShifts;
37
38 int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
39 int32_t const_max = 0x7fffffff;
40 int32_t const_neg43 = -43;
41 int32_t const_5412 = 5412;
42 int32_t const_11rsh12 = (11 << 12);
43 int32_t const_178 = 178;
44
45
46 // compute feature based on average LR factor
47 // this is the average over all frequencies of the smooth log LRT
48 logLrtTimeAvgKsumFX = 0;
49 for (i = 0; i < inst->magnLen; i++) {
50 r0 = postLocSnr[i]; // Q11
51 r1 = priorLocSnr[i];
52 r2 = inst->logLrtTimeAvgW32[i];
53
54 __asm __volatile(
55 ".set push \n\t"
56 ".set noreorder \n\t"
57 "clz %[r3], %[r0] \n\t"
58 "clz %[r5], %[r1] \n\t"
59 "slti %[r4], %[r3], 32 \n\t"
60 "slti %[r6], %[r5], 32 \n\t"
61 "movz %[r3], $0, %[r4] \n\t"
62 "movz %[r5], $0, %[r6] \n\t"
63 "slti %[r4], %[r3], 11 \n\t"
64 "addiu %[r6], %[r3], -11 \n\t"
65 "neg %[r7], %[r6] \n\t"
66 "sllv %[r6], %[r1], %[r6] \n\t"
67 "srav %[r7], %[r1], %[r7] \n\t"
68 "movn %[r6], %[r7], %[r4] \n\t"
69 "sllv %[r1], %[r1], %[r5] \n\t"
70 "and %[r1], %[r1], %[const_max] \n\t"
71 "sra %[r1], %[r1], 19 \n\t"
72 "mul %[r7], %[r1], %[r1] \n\t"
73 "sllv %[r3], %[r0], %[r3] \n\t"
74 "divu %[r8], %[r3], %[r6] \n\t"
75 "slti %[r6], %[r6], 1 \n\t"
76 "mul %[r7], %[r7], %[const_neg43] \n\t"
77 "sra %[r7], %[r7], 19 \n\t"
78 "movz %[r3], %[r8], %[r6] \n\t"
79 "subu %[r0], %[r0], %[r3] \n\t"
80 "movn %[r0], $0, %[r6] \n\t"
81 "mul %[r1], %[r1], %[const_5412] \n\t"
82 "sra %[r1], %[r1], 12 \n\t"
83 "addu %[r7], %[r7], %[r1] \n\t"
84 "addiu %[r1], %[r7], 37 \n\t"
85 "addiu %[r5], %[r5], -31 \n\t"
86 "neg %[r5], %[r5] \n\t"
87 "sll %[r5], %[r5], 12 \n\t"
88 "addu %[r5], %[r5], %[r1] \n\t"
89 "subu %[r7], %[r5], %[const_11rsh12] \n\t"
90 "mul %[r7], %[r7], %[const_178] \n\t"
91 "sra %[r7], %[r7], 8 \n\t"
92 "addu %[r7], %[r7], %[r2] \n\t"
93 "sra %[r7], %[r7], 1 \n\t"
94 "subu %[r2], %[r2], %[r7] \n\t"
95 "addu %[r2], %[r2], %[r0] \n\t"
96 ".set pop \n\t"
97 : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
98 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
99 [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8)
100 : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43),
101 [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12),
102 [const_178] "r" (const_178)
103 : "hi", "lo"
104 );
105 inst->logLrtTimeAvgW32[i] = r2;
106 logLrtTimeAvgKsumFX += r2;
107 }
108
109 inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >>
110 (inst->stages + 11);
111
112 // done with computation of LR factor
113
114 //
115 // compute the indicator functions
116 //
117
118 // average LRT feature
119 // FLOAT code
120 // indicator0 = 0.5 * (tanh(widthPrior *
121 // (logLrtTimeAvgKsum - threshPrior0)) + 1.0);
122 tmpIndFX = 16384; // Q14(1.0)
123 tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12
124 nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5;
125 //use larger width in tanh map for pause regions
126 if (tmp32no1 < 0) {
127 tmpIndFX = 0;
128 tmp32no1 = -tmp32no1;
129 //widthPrior = widthPrior * 2.0;
130 nShifts++;
131 }
132 tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14
133 // compute indicator function: sigmoid map
134 tableIndex = (int16_t)(tmp32no1 >> 14);
135 if ((tableIndex < 16) && (tableIndex >= 0)) {
136 tmp16no2 = kIndicatorTable[tableIndex];
137 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
138 frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14
139 tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
140 if (tmpIndFX == 0) {
141 tmpIndFX = 8192 - tmp16no2; // Q14
142 } else {
143 tmpIndFX = 8192 + tmp16no2; // Q14
144 }
145 }
146 indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14
147
148 //spectral flatness feature
149 if (inst->weightSpecFlat) {
150 tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10
151 tmpIndFX = 16384; // Q14(1.0)
152 //use larger width in tanh map for pause regions
153 tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10
154 nShifts = 4;
155 if (inst->thresholdSpecFlat < tmpU32no1) {
156 tmpIndFX = 0;
157 tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat;
158 //widthPrior = widthPrior * 2.0;
159 nShifts++;
160 }
161 tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14
162 // compute indicator function: sigmoid map
163 // FLOAT code
164 // indicator1 = 0.5 * (tanh(sgnMap * widthPrior *
165 // (threshPrior1 - tmpFloat1)) + 1.0);
166 tableIndex = (int16_t)(tmpU32no1 >> 14);
167 if (tableIndex < 16) {
168 tmp16no2 = kIndicatorTable[tableIndex];
169 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
170 frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
171 tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14);
172 if (tmpIndFX) {
173 tmpIndFX = 8192 + tmp16no2; // Q14
174 } else {
175 tmpIndFX = 8192 - tmp16no2; // Q14
176 }
177 }
178 indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14
179 }
180
181 //for template spectral-difference
182 if (inst->weightSpecDiff) {
183 tmpU32no1 = 0;
184 if (inst->featureSpecDiff) {
185 normTmp = WEBRTC_SPL_MIN(20 - inst->stages,
186 WebRtcSpl_NormU32(inst->featureSpecDiff));
187 assert(normTmp >= 0);
188 tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages)
189 tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp);
190 if (tmpU32no2 > 0) {
191 // Q(20 - inst->stages)
192 tmpU32no1 /= tmpU32no2;
193 } else {
194 tmpU32no1 = (uint32_t)(0x7fffffff);
195 }
196 }
197 tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25;
198 tmpU32no2 = tmpU32no1 - tmpU32no3;
199 nShifts = 1;
200 tmpIndFX = 16384; // Q14(1.0)
201 //use larger width in tanh map for pause regions
202 if (tmpU32no2 & 0x80000000) {
203 tmpIndFX = 0;
204 tmpU32no2 = tmpU32no3 - tmpU32no1;
205 //widthPrior = widthPrior * 2.0;
206 nShifts--;
207 }
208 tmpU32no1 = tmpU32no2 >> nShifts;
209 // compute indicator function: sigmoid map
210 /* FLOAT code
211 indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0);
212 */
213 tableIndex = (int16_t)(tmpU32no1 >> 14);
214 if (tableIndex < 16) {
215 tmp16no2 = kIndicatorTable[tableIndex];
216 tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex];
217 frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14
218 tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
219 tmp16no1, frac, 14);
220 if (tmpIndFX) {
221 tmpIndFX = 8192 + tmp16no2;
222 } else {
223 tmpIndFX = 8192 - tmp16no2;
224 }
225 }
226 indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14
227 }
228
229 //combine the indicator function with the feature weights
230 // FLOAT code
231 // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 *
232 // indicator1 + weightIndPrior2 * indicator2);
233 indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14
234 // done with computing indicator function
235
236 //compute the prior probability
237 // FLOAT code
238 // inst->priorNonSpeechProb += PRIOR_UPDATE *
239 // (indPriorNonSpeech - inst->priorNonSpeechProb);
240 tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14
241 inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14);
242
243 //final speech probability: combine prior model with LR factor:
244
245 memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen);
246
247 if (inst->priorNonSpeechProb > 0) {
248 r0 = inst->priorNonSpeechProb;
249 r1 = 16384 - r0;
250 int32_t const_23637 = 23637;
251 int32_t const_44 = 44;
252 int32_t const_84 = 84;
253 int32_t const_1 = 1;
254 int32_t const_neg8 = -8;
255 for (i = 0; i < inst->magnLen; i++) {
256 r2 = inst->logLrtTimeAvgW32[i];
257 if (r2 < 65300) {
258 __asm __volatile(
259 ".set push \n\t"
260 ".set noreorder \n\t"
261 "mul %[r2], %[r2], %[const_23637] \n\t"
262 "sll %[r6], %[r1], 16 \n\t"
263 "clz %[r7], %[r6] \n\t"
264 "clo %[r8], %[r6] \n\t"
265 "slt %[r9], %[r6], $0 \n\t"
266 "movn %[r7], %[r8], %[r9] \n\t"
267 "sra %[r2], %[r2], 14 \n\t"
268 "andi %[r3], %[r2], 0xfff \n\t"
269 "mul %[r4], %[r3], %[r3] \n\t"
270 "mul %[r3], %[r3], %[const_84] \n\t"
271 "sra %[r2], %[r2], 12 \n\t"
272 "slt %[r5], %[r2], %[const_neg8] \n\t"
273 "movn %[r2], %[const_neg8], %[r5] \n\t"
274 "mul %[r4], %[r4], %[const_44] \n\t"
275 "sra %[r3], %[r3], 7 \n\t"
276 "addiu %[r7], %[r7], -1 \n\t"
277 "slti %[r9], %[r7], 31 \n\t"
278 "movz %[r7], $0, %[r9] \n\t"
279 "sra %[r4], %[r4], 19 \n\t"
280 "addu %[r4], %[r4], %[r3] \n\t"
281 "addiu %[r3], %[r2], 8 \n\t"
282 "addiu %[r2], %[r2], -4 \n\t"
283 "neg %[r5], %[r2] \n\t"
284 "sllv %[r6], %[r4], %[r2] \n\t"
285 "srav %[r5], %[r4], %[r5] \n\t"
286 "slt %[r2], %[r2], $0 \n\t"
287 "movn %[r6], %[r5], %[r2] \n\t"
288 "sllv %[r3], %[const_1], %[r3] \n\t"
289 "addu %[r2], %[r3], %[r6] \n\t"
290 "clz %[r4], %[r2] \n\t"
291 "clo %[r5], %[r2] \n\t"
292 "slt %[r8], %[r2], $0 \n\t"
293 "movn %[r4], %[r5], %[r8] \n\t"
294 "addiu %[r4], %[r4], -1 \n\t"
295 "slt %[r5], $0, %[r2] \n\t"
296 "or %[r5], %[r5], %[r7] \n\t"
297 "movz %[r4], $0, %[r5] \n\t"
298 "addiu %[r6], %[r7], -7 \n\t"
299 "addu %[r6], %[r6], %[r4] \n\t"
300 "bltz %[r6], 1f \n\t"
301 " nop \n\t"
302 "addiu %[r4], %[r6], -8 \n\t"
303 "neg %[r3], %[r4] \n\t"
304 "srav %[r5], %[r2], %[r3] \n\t"
305 "mul %[r5], %[r5], %[r1] \n\t"
306 "mul %[r2], %[r2], %[r1] \n\t"
307 "slt %[r4], %[r4], $0 \n\t"
308 "srav %[r5], %[r5], %[r6] \n\t"
309 "sra %[r2], %[r2], 8 \n\t"
310 "movn %[r2], %[r5], %[r4] \n\t"
311 "sll %[r3], %[r0], 8 \n\t"
312 "addu %[r2], %[r0], %[r2] \n\t"
313 "divu %[r3], %[r3], %[r2] \n\t"
314 "1: \n\t"
315 ".set pop \n\t"
316 : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4),
317 [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
318 [r8] "=&r" (r8), [r9] "=&r" (r9)
319 : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637),
320 [const_neg8] "r" (const_neg8), [const_84] "r" (const_84),
321 [const_1] "r" (const_1), [const_44] "r" (const_44)
322 : "hi", "lo"
323 );
324 nonSpeechProbFinal[i] = r3;
325 }
326 }
327 }
328 }
329
330 // Update analysis buffer for lower band, and window data before FFT.
WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC * inst,int16_t * out,int16_t * new_speech)331 void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
332 int16_t* out,
333 int16_t* new_speech) {
334 int iters, after;
335 int anaLen = (int)inst->anaLen;
336 int *window = (int*)inst->window;
337 int *anaBuf = (int*)inst->analysisBuffer;
338 int *outBuf = (int*)out;
339 int r0, r1, r2, r3, r4, r5, r6, r7;
340 #if defined(MIPS_DSP_R1_LE)
341 int r8;
342 #endif
343
344 // For lower band update analysis buffer.
345 memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms,
346 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer));
347 memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech,
348 inst->blockLen10ms * sizeof(*inst->analysisBuffer));
349
350 // Window data before FFT.
351 #if defined(MIPS_DSP_R1_LE)
352 __asm __volatile(
353 ".set push \n\t"
354 ".set noreorder \n\t"
355 "sra %[iters], %[anaLen], 3 \n\t"
356 "1: \n\t"
357 "blez %[iters], 2f \n\t"
358 " nop \n\t"
359 "lw %[r0], 0(%[window]) \n\t"
360 "lw %[r1], 0(%[anaBuf]) \n\t"
361 "lw %[r2], 4(%[window]) \n\t"
362 "lw %[r3], 4(%[anaBuf]) \n\t"
363 "lw %[r4], 8(%[window]) \n\t"
364 "lw %[r5], 8(%[anaBuf]) \n\t"
365 "lw %[r6], 12(%[window]) \n\t"
366 "lw %[r7], 12(%[anaBuf]) \n\t"
367 "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t"
368 "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t"
369 "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t"
370 "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t"
371 "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t"
372 "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t"
373 "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t"
374 "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t"
375 #if defined(MIPS_DSP_R2_LE)
376 "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t"
377 "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t"
378 "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t"
379 "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t"
380 "sw %[r8], 0(%[outBuf]) \n\t"
381 "sw %[r1], 4(%[outBuf]) \n\t"
382 "sw %[r3], 8(%[outBuf]) \n\t"
383 "sw %[r5], 12(%[outBuf]) \n\t"
384 #else
385 "shra_r.w %[r8], %[r8], 15 \n\t"
386 "shra_r.w %[r0], %[r0], 15 \n\t"
387 "shra_r.w %[r1], %[r1], 15 \n\t"
388 "shra_r.w %[r2], %[r2], 15 \n\t"
389 "shra_r.w %[r3], %[r3], 15 \n\t"
390 "shra_r.w %[r4], %[r4], 15 \n\t"
391 "shra_r.w %[r5], %[r5], 15 \n\t"
392 "shra_r.w %[r6], %[r6], 15 \n\t"
393 "sll %[r0], %[r0], 16 \n\t"
394 "sll %[r2], %[r2], 16 \n\t"
395 "sll %[r4], %[r4], 16 \n\t"
396 "sll %[r6], %[r6], 16 \n\t"
397 "packrl.ph %[r0], %[r8], %[r0] \n\t"
398 "packrl.ph %[r2], %[r1], %[r2] \n\t"
399 "packrl.ph %[r4], %[r3], %[r4] \n\t"
400 "packrl.ph %[r6], %[r5], %[r6] \n\t"
401 "sw %[r0], 0(%[outBuf]) \n\t"
402 "sw %[r2], 4(%[outBuf]) \n\t"
403 "sw %[r4], 8(%[outBuf]) \n\t"
404 "sw %[r6], 12(%[outBuf]) \n\t"
405 #endif
406 "addiu %[window], %[window], 16 \n\t"
407 "addiu %[anaBuf], %[anaBuf], 16 \n\t"
408 "addiu %[outBuf], %[outBuf], 16 \n\t"
409 "b 1b \n\t"
410 " addiu %[iters], %[iters], -1 \n\t"
411 "2: \n\t"
412 "andi %[after], %[anaLen], 7 \n\t"
413 "3: \n\t"
414 "blez %[after], 4f \n\t"
415 " nop \n\t"
416 "lh %[r0], 0(%[window]) \n\t"
417 "lh %[r1], 0(%[anaBuf]) \n\t"
418 "mul %[r0], %[r0], %[r1] \n\t"
419 "addiu %[window], %[window], 2 \n\t"
420 "addiu %[anaBuf], %[anaBuf], 2 \n\t"
421 "addiu %[outBuf], %[outBuf], 2 \n\t"
422 "shra_r.w %[r0], %[r0], 14 \n\t"
423 "sh %[r0], -2(%[outBuf]) \n\t"
424 "b 3b \n\t"
425 " addiu %[after], %[after], -1 \n\t"
426 "4: \n\t"
427 ".set pop \n\t"
428 : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
429 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
430 [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8),
431 [iters] "=&r" (iters), [after] "=&r" (after),
432 [window] "+r" (window),[anaBuf] "+r" (anaBuf),
433 [outBuf] "+r" (outBuf)
434 : [anaLen] "r" (anaLen)
435 : "memory", "hi", "lo"
436 );
437 #else
438 __asm __volatile(
439 ".set push \n\t"
440 ".set noreorder \n\t"
441 "sra %[iters], %[anaLen], 2 \n\t"
442 "1: \n\t"
443 "blez %[iters], 2f \n\t"
444 " nop \n\t"
445 "lh %[r0], 0(%[window]) \n\t"
446 "lh %[r1], 0(%[anaBuf]) \n\t"
447 "lh %[r2], 2(%[window]) \n\t"
448 "lh %[r3], 2(%[anaBuf]) \n\t"
449 "lh %[r4], 4(%[window]) \n\t"
450 "lh %[r5], 4(%[anaBuf]) \n\t"
451 "lh %[r6], 6(%[window]) \n\t"
452 "lh %[r7], 6(%[anaBuf]) \n\t"
453 "mul %[r0], %[r0], %[r1] \n\t"
454 "mul %[r2], %[r2], %[r3] \n\t"
455 "mul %[r4], %[r4], %[r5] \n\t"
456 "mul %[r6], %[r6], %[r7] \n\t"
457 "addiu %[window], %[window], 8 \n\t"
458 "addiu %[anaBuf], %[anaBuf], 8 \n\t"
459 "addiu %[r0], %[r0], 0x2000 \n\t"
460 "addiu %[r2], %[r2], 0x2000 \n\t"
461 "addiu %[r4], %[r4], 0x2000 \n\t"
462 "addiu %[r6], %[r6], 0x2000 \n\t"
463 "sra %[r0], %[r0], 14 \n\t"
464 "sra %[r2], %[r2], 14 \n\t"
465 "sra %[r4], %[r4], 14 \n\t"
466 "sra %[r6], %[r6], 14 \n\t"
467 "sh %[r0], 0(%[outBuf]) \n\t"
468 "sh %[r2], 2(%[outBuf]) \n\t"
469 "sh %[r4], 4(%[outBuf]) \n\t"
470 "sh %[r6], 6(%[outBuf]) \n\t"
471 "addiu %[outBuf], %[outBuf], 8 \n\t"
472 "b 1b \n\t"
473 " addiu %[iters], %[iters], -1 \n\t"
474 "2: \n\t"
475 "andi %[after], %[anaLen], 3 \n\t"
476 "3: \n\t"
477 "blez %[after], 4f \n\t"
478 " nop \n\t"
479 "lh %[r0], 0(%[window]) \n\t"
480 "lh %[r1], 0(%[anaBuf]) \n\t"
481 "mul %[r0], %[r0], %[r1] \n\t"
482 "addiu %[window], %[window], 2 \n\t"
483 "addiu %[anaBuf], %[anaBuf], 2 \n\t"
484 "addiu %[outBuf], %[outBuf], 2 \n\t"
485 "addiu %[r0], %[r0], 0x2000 \n\t"
486 "sra %[r0], %[r0], 14 \n\t"
487 "sh %[r0], -2(%[outBuf]) \n\t"
488 "b 3b \n\t"
489 " addiu %[after], %[after], -1 \n\t"
490 "4: \n\t"
491 ".set pop \n\t"
492 : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
493 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
494 [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters),
495 [after] "=&r" (after), [window] "+r" (window),
496 [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf)
497 : [anaLen] "r" (anaLen)
498 : "memory", "hi", "lo"
499 );
500 #endif
501 }
502
503 // For the noise supression process, synthesis, read out fully processed
504 // segment, and update synthesis buffer.
WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC * inst,int16_t * out_frame,int16_t gain_factor)505 void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
506 int16_t* out_frame,
507 int16_t gain_factor) {
508 int iters = (int)inst->blockLen10ms >> 2;
509 int after = inst->blockLen10ms & 3;
510 int r0, r1, r2, r3, r4, r5, r6, r7;
511 int16_t *window = (int16_t*)inst->window;
512 int16_t *real = inst->real;
513 int16_t *synthBuf = inst->synthesisBuffer;
514 int16_t *out = out_frame;
515 int sat_pos = 0x7fff;
516 int sat_neg = 0xffff8000;
517 int block10 = (int)inst->blockLen10ms;
518 int anaLen = (int)inst->anaLen;
519
520 __asm __volatile(
521 ".set push \n\t"
522 ".set noreorder \n\t"
523 "1: \n\t"
524 "blez %[iters], 2f \n\t"
525 " nop \n\t"
526 "lh %[r0], 0(%[window]) \n\t"
527 "lh %[r1], 0(%[real]) \n\t"
528 "lh %[r2], 2(%[window]) \n\t"
529 "lh %[r3], 2(%[real]) \n\t"
530 "lh %[r4], 4(%[window]) \n\t"
531 "lh %[r5], 4(%[real]) \n\t"
532 "lh %[r6], 6(%[window]) \n\t"
533 "lh %[r7], 6(%[real]) \n\t"
534 "mul %[r0], %[r0], %[r1] \n\t"
535 "mul %[r2], %[r2], %[r3] \n\t"
536 "mul %[r4], %[r4], %[r5] \n\t"
537 "mul %[r6], %[r6], %[r7] \n\t"
538 "addiu %[r0], %[r0], 0x2000 \n\t"
539 "addiu %[r2], %[r2], 0x2000 \n\t"
540 "addiu %[r4], %[r4], 0x2000 \n\t"
541 "addiu %[r6], %[r6], 0x2000 \n\t"
542 "sra %[r0], %[r0], 14 \n\t"
543 "sra %[r2], %[r2], 14 \n\t"
544 "sra %[r4], %[r4], 14 \n\t"
545 "sra %[r6], %[r6], 14 \n\t"
546 "mul %[r0], %[r0], %[gain_factor] \n\t"
547 "mul %[r2], %[r2], %[gain_factor] \n\t"
548 "mul %[r4], %[r4], %[gain_factor] \n\t"
549 "mul %[r6], %[r6], %[gain_factor] \n\t"
550 "addiu %[r0], %[r0], 0x1000 \n\t"
551 "addiu %[r2], %[r2], 0x1000 \n\t"
552 "addiu %[r4], %[r4], 0x1000 \n\t"
553 "addiu %[r6], %[r6], 0x1000 \n\t"
554 "sra %[r0], %[r0], 13 \n\t"
555 "sra %[r2], %[r2], 13 \n\t"
556 "sra %[r4], %[r4], 13 \n\t"
557 "sra %[r6], %[r6], 13 \n\t"
558 "slt %[r1], %[r0], %[sat_pos] \n\t"
559 "slt %[r3], %[r2], %[sat_pos] \n\t"
560 "slt %[r5], %[r4], %[sat_pos] \n\t"
561 "slt %[r7], %[r6], %[sat_pos] \n\t"
562 "movz %[r0], %[sat_pos], %[r1] \n\t"
563 "movz %[r2], %[sat_pos], %[r3] \n\t"
564 "movz %[r4], %[sat_pos], %[r5] \n\t"
565 "movz %[r6], %[sat_pos], %[r7] \n\t"
566 "lh %[r1], 0(%[synthBuf]) \n\t"
567 "lh %[r3], 2(%[synthBuf]) \n\t"
568 "lh %[r5], 4(%[synthBuf]) \n\t"
569 "lh %[r7], 6(%[synthBuf]) \n\t"
570 "addu %[r0], %[r0], %[r1] \n\t"
571 "addu %[r2], %[r2], %[r3] \n\t"
572 "addu %[r4], %[r4], %[r5] \n\t"
573 "addu %[r6], %[r6], %[r7] \n\t"
574 "slt %[r1], %[r0], %[sat_pos] \n\t"
575 "slt %[r3], %[r2], %[sat_pos] \n\t"
576 "slt %[r5], %[r4], %[sat_pos] \n\t"
577 "slt %[r7], %[r6], %[sat_pos] \n\t"
578 "movz %[r0], %[sat_pos], %[r1] \n\t"
579 "movz %[r2], %[sat_pos], %[r3] \n\t"
580 "movz %[r4], %[sat_pos], %[r5] \n\t"
581 "movz %[r6], %[sat_pos], %[r7] \n\t"
582 "slt %[r1], %[r0], %[sat_neg] \n\t"
583 "slt %[r3], %[r2], %[sat_neg] \n\t"
584 "slt %[r5], %[r4], %[sat_neg] \n\t"
585 "slt %[r7], %[r6], %[sat_neg] \n\t"
586 "movn %[r0], %[sat_neg], %[r1] \n\t"
587 "movn %[r2], %[sat_neg], %[r3] \n\t"
588 "movn %[r4], %[sat_neg], %[r5] \n\t"
589 "movn %[r6], %[sat_neg], %[r7] \n\t"
590 "sh %[r0], 0(%[synthBuf]) \n\t"
591 "sh %[r2], 2(%[synthBuf]) \n\t"
592 "sh %[r4], 4(%[synthBuf]) \n\t"
593 "sh %[r6], 6(%[synthBuf]) \n\t"
594 "sh %[r0], 0(%[out]) \n\t"
595 "sh %[r2], 2(%[out]) \n\t"
596 "sh %[r4], 4(%[out]) \n\t"
597 "sh %[r6], 6(%[out]) \n\t"
598 "addiu %[window], %[window], 8 \n\t"
599 "addiu %[real], %[real], 8 \n\t"
600 "addiu %[synthBuf],%[synthBuf], 8 \n\t"
601 "addiu %[out], %[out], 8 \n\t"
602 "b 1b \n\t"
603 " addiu %[iters], %[iters], -1 \n\t"
604 "2: \n\t"
605 "blez %[after], 3f \n\t"
606 " subu %[block10], %[anaLen], %[block10] \n\t"
607 "lh %[r0], 0(%[window]) \n\t"
608 "lh %[r1], 0(%[real]) \n\t"
609 "mul %[r0], %[r0], %[r1] \n\t"
610 "addiu %[window], %[window], 2 \n\t"
611 "addiu %[real], %[real], 2 \n\t"
612 "addiu %[r0], %[r0], 0x2000 \n\t"
613 "sra %[r0], %[r0], 14 \n\t"
614 "mul %[r0], %[r0], %[gain_factor] \n\t"
615 "addiu %[r0], %[r0], 0x1000 \n\t"
616 "sra %[r0], %[r0], 13 \n\t"
617 "slt %[r1], %[r0], %[sat_pos] \n\t"
618 "movz %[r0], %[sat_pos], %[r1] \n\t"
619 "lh %[r1], 0(%[synthBuf]) \n\t"
620 "addu %[r0], %[r0], %[r1] \n\t"
621 "slt %[r1], %[r0], %[sat_pos] \n\t"
622 "movz %[r0], %[sat_pos], %[r1] \n\t"
623 "slt %[r1], %[r0], %[sat_neg] \n\t"
624 "movn %[r0], %[sat_neg], %[r1] \n\t"
625 "sh %[r0], 0(%[synthBuf]) \n\t"
626 "sh %[r0], 0(%[out]) \n\t"
627 "addiu %[synthBuf],%[synthBuf], 2 \n\t"
628 "addiu %[out], %[out], 2 \n\t"
629 "b 2b \n\t"
630 " addiu %[after], %[after], -1 \n\t"
631 "3: \n\t"
632 "sra %[iters], %[block10], 2 \n\t"
633 "4: \n\t"
634 "blez %[iters], 5f \n\t"
635 " andi %[after], %[block10], 3 \n\t"
636 "lh %[r0], 0(%[window]) \n\t"
637 "lh %[r1], 0(%[real]) \n\t"
638 "lh %[r2], 2(%[window]) \n\t"
639 "lh %[r3], 2(%[real]) \n\t"
640 "lh %[r4], 4(%[window]) \n\t"
641 "lh %[r5], 4(%[real]) \n\t"
642 "lh %[r6], 6(%[window]) \n\t"
643 "lh %[r7], 6(%[real]) \n\t"
644 "mul %[r0], %[r0], %[r1] \n\t"
645 "mul %[r2], %[r2], %[r3] \n\t"
646 "mul %[r4], %[r4], %[r5] \n\t"
647 "mul %[r6], %[r6], %[r7] \n\t"
648 "addiu %[r0], %[r0], 0x2000 \n\t"
649 "addiu %[r2], %[r2], 0x2000 \n\t"
650 "addiu %[r4], %[r4], 0x2000 \n\t"
651 "addiu %[r6], %[r6], 0x2000 \n\t"
652 "sra %[r0], %[r0], 14 \n\t"
653 "sra %[r2], %[r2], 14 \n\t"
654 "sra %[r4], %[r4], 14 \n\t"
655 "sra %[r6], %[r6], 14 \n\t"
656 "mul %[r0], %[r0], %[gain_factor] \n\t"
657 "mul %[r2], %[r2], %[gain_factor] \n\t"
658 "mul %[r4], %[r4], %[gain_factor] \n\t"
659 "mul %[r6], %[r6], %[gain_factor] \n\t"
660 "addiu %[r0], %[r0], 0x1000 \n\t"
661 "addiu %[r2], %[r2], 0x1000 \n\t"
662 "addiu %[r4], %[r4], 0x1000 \n\t"
663 "addiu %[r6], %[r6], 0x1000 \n\t"
664 "sra %[r0], %[r0], 13 \n\t"
665 "sra %[r2], %[r2], 13 \n\t"
666 "sra %[r4], %[r4], 13 \n\t"
667 "sra %[r6], %[r6], 13 \n\t"
668 "slt %[r1], %[r0], %[sat_pos] \n\t"
669 "slt %[r3], %[r2], %[sat_pos] \n\t"
670 "slt %[r5], %[r4], %[sat_pos] \n\t"
671 "slt %[r7], %[r6], %[sat_pos] \n\t"
672 "movz %[r0], %[sat_pos], %[r1] \n\t"
673 "movz %[r2], %[sat_pos], %[r3] \n\t"
674 "movz %[r4], %[sat_pos], %[r5] \n\t"
675 "movz %[r6], %[sat_pos], %[r7] \n\t"
676 "lh %[r1], 0(%[synthBuf]) \n\t"
677 "lh %[r3], 2(%[synthBuf]) \n\t"
678 "lh %[r5], 4(%[synthBuf]) \n\t"
679 "lh %[r7], 6(%[synthBuf]) \n\t"
680 "addu %[r0], %[r0], %[r1] \n\t"
681 "addu %[r2], %[r2], %[r3] \n\t"
682 "addu %[r4], %[r4], %[r5] \n\t"
683 "addu %[r6], %[r6], %[r7] \n\t"
684 "slt %[r1], %[r0], %[sat_pos] \n\t"
685 "slt %[r3], %[r2], %[sat_pos] \n\t"
686 "slt %[r5], %[r4], %[sat_pos] \n\t"
687 "slt %[r7], %[r6], %[sat_pos] \n\t"
688 "movz %[r0], %[sat_pos], %[r1] \n\t"
689 "movz %[r2], %[sat_pos], %[r3] \n\t"
690 "movz %[r4], %[sat_pos], %[r5] \n\t"
691 "movz %[r6], %[sat_pos], %[r7] \n\t"
692 "slt %[r1], %[r0], %[sat_neg] \n\t"
693 "slt %[r3], %[r2], %[sat_neg] \n\t"
694 "slt %[r5], %[r4], %[sat_neg] \n\t"
695 "slt %[r7], %[r6], %[sat_neg] \n\t"
696 "movn %[r0], %[sat_neg], %[r1] \n\t"
697 "movn %[r2], %[sat_neg], %[r3] \n\t"
698 "movn %[r4], %[sat_neg], %[r5] \n\t"
699 "movn %[r6], %[sat_neg], %[r7] \n\t"
700 "sh %[r0], 0(%[synthBuf]) \n\t"
701 "sh %[r2], 2(%[synthBuf]) \n\t"
702 "sh %[r4], 4(%[synthBuf]) \n\t"
703 "sh %[r6], 6(%[synthBuf]) \n\t"
704 "addiu %[window], %[window], 8 \n\t"
705 "addiu %[real], %[real], 8 \n\t"
706 "addiu %[synthBuf],%[synthBuf], 8 \n\t"
707 "b 4b \n\t"
708 " addiu %[iters], %[iters], -1 \n\t"
709 "5: \n\t"
710 "blez %[after], 6f \n\t"
711 " nop \n\t"
712 "lh %[r0], 0(%[window]) \n\t"
713 "lh %[r1], 0(%[real]) \n\t"
714 "mul %[r0], %[r0], %[r1] \n\t"
715 "addiu %[window], %[window], 2 \n\t"
716 "addiu %[real], %[real], 2 \n\t"
717 "addiu %[r0], %[r0], 0x2000 \n\t"
718 "sra %[r0], %[r0], 14 \n\t"
719 "mul %[r0], %[r0], %[gain_factor] \n\t"
720 "addiu %[r0], %[r0], 0x1000 \n\t"
721 "sra %[r0], %[r0], 13 \n\t"
722 "slt %[r1], %[r0], %[sat_pos] \n\t"
723 "movz %[r0], %[sat_pos], %[r1] \n\t"
724 "lh %[r1], 0(%[synthBuf]) \n\t"
725 "addu %[r0], %[r0], %[r1] \n\t"
726 "slt %[r1], %[r0], %[sat_pos] \n\t"
727 "movz %[r0], %[sat_pos], %[r1] \n\t"
728 "slt %[r1], %[r0], %[sat_neg] \n\t"
729 "movn %[r0], %[sat_neg], %[r1] \n\t"
730 "sh %[r0], 0(%[synthBuf]) \n\t"
731 "addiu %[synthBuf],%[synthBuf], 2 \n\t"
732 "b 2b \n\t"
733 " addiu %[after], %[after], -1 \n\t"
734 "6: \n\t"
735 ".set pop \n\t"
736 : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
737 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5),
738 [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters),
739 [after] "+r" (after), [block10] "+r" (block10),
740 [window] "+r" (window), [real] "+r" (real),
741 [synthBuf] "+r" (synthBuf), [out] "+r" (out)
742 : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos),
743 [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen)
744 : "memory", "hi", "lo"
745 );
746
747 // update synthesis buffer
748 memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms,
749 (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer));
750 WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer
751 + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms);
752 }
753
754 // Filter the data in the frequency domain, and create spectrum.
WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC * inst,int16_t * freq_buf)755 void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
756 int16_t* freq_buf) {
757 uint16_t *noiseSupFilter = inst->noiseSupFilter;
758 int16_t *real = inst->real;
759 int16_t *imag = inst->imag;
760 int32_t loop_count = 2;
761 int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6;
762 int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4;
763 int16_t* freq_buf_f = freq_buf;
764 int16_t* freq_buf_s = &freq_buf[tmp16];
765
766 __asm __volatile (
767 ".set push \n\t"
768 ".set noreorder \n\t"
769 //first sample
770 "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
771 "lh %[tmp_2], 0(%[real]) \n\t"
772 "lh %[tmp_3], 0(%[imag]) \n\t"
773 "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
774 "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
775 "sra %[tmp_2], %[tmp_2], 14 \n\t"
776 "sra %[tmp_3], %[tmp_3], 14 \n\t"
777 "sh %[tmp_2], 0(%[real]) \n\t"
778 "sh %[tmp_3], 0(%[imag]) \n\t"
779 "negu %[tmp_3], %[tmp_3] \n\t"
780 "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
781 "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
782 "addiu %[real], %[real], 2 \n\t"
783 "addiu %[imag], %[imag], 2 \n\t"
784 "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t"
785 "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t"
786 "1: \n\t"
787 "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
788 "lh %[tmp_2], 0(%[real]) \n\t"
789 "lh %[tmp_3], 0(%[imag]) \n\t"
790 "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
791 "lh %[tmp_5], 2(%[real]) \n\t"
792 "lh %[tmp_6], 2(%[imag]) \n\t"
793 "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
794 "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
795 "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
796 "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
797 "addiu %[loop_count], %[loop_count], 2 \n\t"
798 "sra %[tmp_2], %[tmp_2], 14 \n\t"
799 "sra %[tmp_3], %[tmp_3], 14 \n\t"
800 "sra %[tmp_5], %[tmp_5], 14 \n\t"
801 "sra %[tmp_6], %[tmp_6], 14 \n\t"
802 "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t"
803 "sh %[tmp_2], 0(%[real]) \n\t"
804 "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
805 "sh %[tmp_3], 0(%[imag]) \n\t"
806 "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
807 "negu %[tmp_3], %[tmp_3] \n\t"
808 "sh %[tmp_5], 2(%[real]) \n\t"
809 "sh %[tmp_5], 0(%[freq_buf_s]) \n\t"
810 "sh %[tmp_6], 2(%[imag]) \n\t"
811 "sh %[tmp_6], 2(%[freq_buf_s]) \n\t"
812 "negu %[tmp_6], %[tmp_6] \n\t"
813 "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t"
814 "addiu %[real], %[real], 4 \n\t"
815 "addiu %[imag], %[imag], 4 \n\t"
816 "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
817 "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
818 "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
819 "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
820 "blt %[loop_count], %[loop_size], 1b \n\t"
821 " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t"
822 //last two samples:
823 "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t"
824 "lh %[tmp_2], 0(%[real]) \n\t"
825 "lh %[tmp_3], 0(%[imag]) \n\t"
826 "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t"
827 "lh %[tmp_5], 2(%[real]) \n\t"
828 "lh %[tmp_6], 2(%[imag]) \n\t"
829 "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t"
830 "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t"
831 "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t"
832 "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t"
833 "sra %[tmp_2], %[tmp_2], 14 \n\t"
834 "sra %[tmp_3], %[tmp_3], 14 \n\t"
835 "sra %[tmp_5], %[tmp_5], 14 \n\t"
836 "sra %[tmp_6], %[tmp_6], 14 \n\t"
837 "sh %[tmp_2], 0(%[real]) \n\t"
838 "sh %[tmp_2], 4(%[freq_buf_s]) \n\t"
839 "sh %[tmp_3], 0(%[imag]) \n\t"
840 "sh %[tmp_3], 6(%[freq_buf_s]) \n\t"
841 "negu %[tmp_3], %[tmp_3] \n\t"
842 "sh %[tmp_2], 0(%[freq_buf_f]) \n\t"
843 "sh %[tmp_3], 2(%[freq_buf_f]) \n\t"
844 "sh %[tmp_5], 4(%[freq_buf_f]) \n\t"
845 "sh %[tmp_6], 6(%[freq_buf_f]) \n\t"
846 "sh %[tmp_5], 2(%[real]) \n\t"
847 "sh %[tmp_6], 2(%[imag]) \n\t"
848 ".set pop \n\t"
849 : [real] "+r" (real), [imag] "+r" (imag),
850 [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s),
851 [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter),
852 [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3),
853 [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6)
854 : [loop_size] "r" (inst->anaLen2)
855 : "memory", "hi", "lo"
856 );
857 }
858
859 #if defined(MIPS_DSP_R1_LE)
860 // Denormalize the real-valued signal |in|, the output from inverse FFT.
WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC * inst,int16_t * in,int factor)861 void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
862 int16_t* in,
863 int factor) {
864 int32_t r0, r1, r2, r3, t0;
865 int len = (int)inst->anaLen;
866 int16_t *out = &inst->real[0];
867 int shift = factor - inst->normData;
868
869 __asm __volatile (
870 ".set push \n\t"
871 ".set noreorder \n\t"
872 "beqz %[len], 8f \n\t"
873 " nop \n\t"
874 "bltz %[shift], 4f \n\t"
875 " sra %[t0], %[len], 2 \n\t"
876 "beqz %[t0], 2f \n\t"
877 " andi %[len], %[len], 3 \n\t"
878 "1: \n\t"
879 "lh %[r0], 0(%[in]) \n\t"
880 "lh %[r1], 2(%[in]) \n\t"
881 "lh %[r2], 4(%[in]) \n\t"
882 "lh %[r3], 6(%[in]) \n\t"
883 "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
884 "shllv_s.ph %[r1], %[r1], %[shift] \n\t"
885 "shllv_s.ph %[r2], %[r2], %[shift] \n\t"
886 "shllv_s.ph %[r3], %[r3], %[shift] \n\t"
887 "addiu %[in], %[in], 8 \n\t"
888 "addiu %[t0], %[t0], -1 \n\t"
889 "sh %[r0], 0(%[out]) \n\t"
890 "sh %[r1], 2(%[out]) \n\t"
891 "sh %[r2], 4(%[out]) \n\t"
892 "sh %[r3], 6(%[out]) \n\t"
893 "bgtz %[t0], 1b \n\t"
894 " addiu %[out], %[out], 8 \n\t"
895 "2: \n\t"
896 "beqz %[len], 8f \n\t"
897 " nop \n\t"
898 "3: \n\t"
899 "lh %[r0], 0(%[in]) \n\t"
900 "addiu %[in], %[in], 2 \n\t"
901 "addiu %[len], %[len], -1 \n\t"
902 "shllv_s.ph %[r0], %[r0], %[shift] \n\t"
903 "addiu %[out], %[out], 2 \n\t"
904 "bgtz %[len], 3b \n\t"
905 " sh %[r0], -2(%[out]) \n\t"
906 "b 8f \n\t"
907 "4: \n\t"
908 "negu %[shift], %[shift] \n\t"
909 "beqz %[t0], 6f \n\t"
910 " andi %[len], %[len], 3 \n\t"
911 "5: \n\t"
912 "lh %[r0], 0(%[in]) \n\t"
913 "lh %[r1], 2(%[in]) \n\t"
914 "lh %[r2], 4(%[in]) \n\t"
915 "lh %[r3], 6(%[in]) \n\t"
916 "srav %[r0], %[r0], %[shift] \n\t"
917 "srav %[r1], %[r1], %[shift] \n\t"
918 "srav %[r2], %[r2], %[shift] \n\t"
919 "srav %[r3], %[r3], %[shift] \n\t"
920 "addiu %[in], %[in], 8 \n\t"
921 "addiu %[t0], %[t0], -1 \n\t"
922 "sh %[r0], 0(%[out]) \n\t"
923 "sh %[r1], 2(%[out]) \n\t"
924 "sh %[r2], 4(%[out]) \n\t"
925 "sh %[r3], 6(%[out]) \n\t"
926 "bgtz %[t0], 5b \n\t"
927 " addiu %[out], %[out], 8 \n\t"
928 "6: \n\t"
929 "beqz %[len], 8f \n\t"
930 " nop \n\t"
931 "7: \n\t"
932 "lh %[r0], 0(%[in]) \n\t"
933 "addiu %[in], %[in], 2 \n\t"
934 "addiu %[len], %[len], -1 \n\t"
935 "srav %[r0], %[r0], %[shift] \n\t"
936 "addiu %[out], %[out], 2 \n\t"
937 "bgtz %[len], 7b \n\t"
938 " sh %[r0], -2(%[out]) \n\t"
939 "8: \n\t"
940 ".set pop \n\t"
941 : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
942 [r2] "=&r" (r2), [r3] "=&r" (r3)
943 : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
944 [out] "r" (out)
945 : "memory"
946 );
947 }
948 #endif
949
950 // Normalize the real-valued signal |in|, the input to forward FFT.
WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC * inst,const int16_t * in,int16_t * out)951 void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
952 const int16_t* in,
953 int16_t* out) {
954 int32_t r0, r1, r2, r3, t0;
955 int len = (int)inst->anaLen;
956 int shift = inst->normData;
957
958 __asm __volatile (
959 ".set push \n\t"
960 ".set noreorder \n\t"
961 "beqz %[len], 4f \n\t"
962 " sra %[t0], %[len], 2 \n\t"
963 "beqz %[t0], 2f \n\t"
964 " andi %[len], %[len], 3 \n\t"
965 "1: \n\t"
966 "lh %[r0], 0(%[in]) \n\t"
967 "lh %[r1], 2(%[in]) \n\t"
968 "lh %[r2], 4(%[in]) \n\t"
969 "lh %[r3], 6(%[in]) \n\t"
970 "sllv %[r0], %[r0], %[shift] \n\t"
971 "sllv %[r1], %[r1], %[shift] \n\t"
972 "sllv %[r2], %[r2], %[shift] \n\t"
973 "sllv %[r3], %[r3], %[shift] \n\t"
974 "addiu %[in], %[in], 8 \n\t"
975 "addiu %[t0], %[t0], -1 \n\t"
976 "sh %[r0], 0(%[out]) \n\t"
977 "sh %[r1], 2(%[out]) \n\t"
978 "sh %[r2], 4(%[out]) \n\t"
979 "sh %[r3], 6(%[out]) \n\t"
980 "bgtz %[t0], 1b \n\t"
981 " addiu %[out], %[out], 8 \n\t"
982 "2: \n\t"
983 "beqz %[len], 4f \n\t"
984 " nop \n\t"
985 "3: \n\t"
986 "lh %[r0], 0(%[in]) \n\t"
987 "addiu %[in], %[in], 2 \n\t"
988 "addiu %[len], %[len], -1 \n\t"
989 "sllv %[r0], %[r0], %[shift] \n\t"
990 "addiu %[out], %[out], 2 \n\t"
991 "bgtz %[len], 3b \n\t"
992 " sh %[r0], -2(%[out]) \n\t"
993 "4: \n\t"
994 ".set pop \n\t"
995 : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1),
996 [r2] "=&r" (r2), [r3] "=&r" (r3)
997 : [len] "r" (len), [shift] "r" (shift), [in] "r" (in),
998 [out] "r" (out)
999 : "memory"
1000 );
1001 }
1002
1003