1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
12
13 // MIPS optimized implementation of the Autocorrelation function in fixed point.
14 // NOTE! Different from SPLIB-version in how it scales the signal.
WebRtcIsacfix_AutocorrMIPS(int32_t * __restrict r,const int16_t * __restrict x,int16_t N,int16_t order,int16_t * __restrict scale)15 int WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r,
16 const int16_t* __restrict x,
17 int16_t N,
18 int16_t order,
19 int16_t* __restrict scale) {
20 int i = 0;
21 int16_t scaling = 0;
22 int16_t* in = (int16_t*)x;
23 int loop_size = (int)(N >> 3);
24 int count = (int)(N & 7);
25 // Declare temporary variables used as registry values.
26 int32_t r0, r1, r2, r3;
27 #if !defined(MIPS_DSP_R2_LE)
28 // For non-DSPR2 optimizations 4 more registers are used.
29 int32_t r4, r5, r6, r7;
30 #endif
31
32 // Calculate r[0] and scaling needed.
33 __asm __volatile (
34 ".set push \n\t"
35 ".set noreorder \n\t"
36 "mult $0, $0 \n\t"
37 // Loop is unrolled 8 times, set accumulator to zero in branch delay slot.
38 "beqz %[loop_size], 2f \n\t"
39 " mult $0, $0 \n\t"
40 "1: \n\t"
41 // Load 8 samples per loop iteration.
42 #if defined(MIPS_DSP_R2_LE)
43 "ulw %[r0], 0(%[in]) \n\t"
44 "ulw %[r1], 4(%[in]) \n\t"
45 "ulw %[r2], 8(%[in]) \n\t"
46 "ulw %[r3], 12(%[in]) \n\t"
47 #else
48 "lh %[r0], 0(%[in]) \n\t"
49 "lh %[r1], 2(%[in]) \n\t"
50 "lh %[r2], 4(%[in]) \n\t"
51 "lh %[r3], 6(%[in]) \n\t"
52 "lh %[r4], 8(%[in]) \n\t"
53 "lh %[r5], 10(%[in]) \n\t"
54 "lh %[r6], 12(%[in]) \n\t"
55 "lh %[r7], 14(%[in]) \n\t"
56 #endif
57 "addiu %[loop_size], %[loop_size], -1 \n\t"
58 // Multiply and accumulate.
59 #if defined(MIPS_DSP_R2_LE)
60 "dpa.w.ph $ac0, %[r0], %[r0] \n\t"
61 "dpa.w.ph $ac0, %[r1], %[r1] \n\t"
62 "dpa.w.ph $ac0, %[r2], %[r2] \n\t"
63 "dpa.w.ph $ac0, %[r3], %[r3] \n\t"
64 #else
65 "madd %[r0], %[r0] \n\t"
66 "madd %[r1], %[r1] \n\t"
67 "madd %[r2], %[r2] \n\t"
68 "madd %[r3], %[r3] \n\t"
69 "madd %[r4], %[r4] \n\t"
70 "madd %[r5], %[r5] \n\t"
71 "madd %[r6], %[r6] \n\t"
72 "madd %[r7], %[r7] \n\t"
73 #endif
74 "bnez %[loop_size], 1b \n\t"
75 " addiu %[in], %[in], 16 \n\t"
76 "2: \n\t"
77 "beqz %[count], 4f \n\t"
78 #if defined(MIPS_DSP_R1_LE)
79 " extr.w %[r0], $ac0, 31 \n\t"
80 #else
81 " mfhi %[r2] \n\t"
82 #endif
83 // Process remaining samples (if any).
84 "3: \n\t"
85 "lh %[r0], 0(%[in]) \n\t"
86 "addiu %[count], %[count], -1 \n\t"
87 "madd %[r0], %[r0] \n\t"
88 "bnez %[count], 3b \n\t"
89 " addiu %[in], %[in], 2 \n\t"
90 #if defined(MIPS_DSP_R1_LE)
91 "extr.w %[r0], $ac0, 31 \n\t"
92 #else
93 "mfhi %[r2] \n\t"
94 #endif
95 "4: \n\t"
96 #if !defined(MIPS_DSP_R1_LE)
97 "mflo %[r3] \n\t"
98 "sll %[r0], %[r2], 1 \n\t"
99 "srl %[r1], %[r3], 31 \n\t"
100 "addu %[r0], %[r0], %[r1] \n\t"
101 #endif
102 // Calculate scaling (the value of shifting).
103 "clz %[r1], %[r0] \n\t"
104 "addiu %[r1], %[r1], -32 \n\t"
105 "subu %[scaling], $0, %[r1] \n\t"
106 "slti %[r1], %[r0], 0x1 \n\t"
107 "movn %[scaling], $0, %[r1] \n\t"
108 #if defined(MIPS_DSP_R1_LE)
109 "extrv.w %[r0], $ac0, %[scaling] \n\t"
110 "mfhi %[r2], $ac0 \n\t"
111 #else
112 "addiu %[r1], %[scaling], -32 \n\t"
113 "subu %[r1], $0, %[r1] \n\t"
114 "sllv %[r1], %[r2], %[r1] \n\t"
115 "srlv %[r0], %[r3], %[scaling] \n\t"
116 "addu %[r0], %[r0], %[r1] \n\t"
117 #endif
118 "slti %[r1], %[scaling], 32 \n\t"
119 "movz %[r0], %[r2], %[r1] \n\t"
120 ".set pop \n\t"
121 : [loop_size] "+r" (loop_size), [in] "+r" (in), [r0] "=&r" (r0),
122 [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
123 #if !defined(MIPS_DSP_R2_LE)
124 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
125 #endif
126 [count] "+r" (count), [scaling] "=r" (scaling)
127 : [N] "r" (N)
128 : "memory", "hi", "lo"
129 );
130 r[0] = r0;
131
132 // Correlation calculation is divided in 3 cases depending on the scaling
133 // value (different accumulator manipulation needed). Three slightly different
134 // loops are written in order to avoid branches inside the loop.
135 if (scaling == 0) {
136 // In this case, the result will be in low part of the accumulator.
137 for (i = 1; i < order + 1; i++) {
138 in = (int16_t*)x;
139 int16_t* in1 = (int16_t*)x + i;
140 count = N - i;
141 loop_size = (count) >> 2;
142 __asm __volatile (
143 ".set push \n\t"
144 ".set noreorder \n\t"
145 "mult $0, $0 \n\t"
146 "beqz %[loop_size], 2f \n\t"
147 " andi %[count], %[count], 0x3 \n\t"
148 // Loop processing 4 pairs of samples per iteration.
149 "1: \n\t"
150 #if defined(MIPS_DSP_R2_LE)
151 "ulw %[r0], 0(%[in]) \n\t"
152 "ulw %[r1], 0(%[in1]) \n\t"
153 "ulw %[r2], 4(%[in]) \n\t"
154 "ulw %[r3], 4(%[in1]) \n\t"
155 #else
156 "lh %[r0], 0(%[in]) \n\t"
157 "lh %[r1], 0(%[in1]) \n\t"
158 "lh %[r2], 2(%[in]) \n\t"
159 "lh %[r3], 2(%[in1]) \n\t"
160 "lh %[r4], 4(%[in]) \n\t"
161 "lh %[r5], 4(%[in1]) \n\t"
162 "lh %[r6], 6(%[in]) \n\t"
163 "lh %[r7], 6(%[in1]) \n\t"
164 #endif
165 "addiu %[loop_size], %[loop_size], -1 \n\t"
166 #if defined(MIPS_DSP_R2_LE)
167 "dpa.w.ph $ac0, %[r0], %[r1] \n\t"
168 "dpa.w.ph $ac0, %[r2], %[r3] \n\t"
169 #else
170 "madd %[r0], %[r1] \n\t"
171 "madd %[r2], %[r3] \n\t"
172 "madd %[r4], %[r5] \n\t"
173 "madd %[r6], %[r7] \n\t"
174 #endif
175 "addiu %[in], %[in], 8 \n\t"
176 "bnez %[loop_size], 1b \n\t"
177 " addiu %[in1], %[in1], 8 \n\t"
178 "2: \n\t"
179 "beqz %[count], 4f \n\t"
180 " mflo %[r0] \n\t"
181 // Process remaining samples (if any).
182 "3: \n\t"
183 "lh %[r0], 0(%[in]) \n\t"
184 "lh %[r1], 0(%[in1]) \n\t"
185 "addiu %[count], %[count], -1 \n\t"
186 "addiu %[in], %[in], 2 \n\t"
187 "madd %[r0], %[r1] \n\t"
188 "bnez %[count], 3b \n\t"
189 " addiu %[in1], %[in1], 2 \n\t"
190 "mflo %[r0] \n\t"
191 "4: \n\t"
192 ".set pop \n\t"
193 : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
194 #if !defined(MIPS_DSP_R2_LE)
195 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
196 #endif
197 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
198 [count] "+r" (count)
199 :
200 : "memory", "hi", "lo"
201 );
202 r[i] = r0;
203 }
204 } else if (scaling == 32) {
205 // In this case, the result will be high part of the accumulator.
206 for (i = 1; i < order + 1; i++) {
207 in = (int16_t*)x;
208 int16_t* in1 = (int16_t*)x + i;
209 count = N - i;
210 loop_size = (count) >> 2;
211 __asm __volatile (
212 ".set push \n\t"
213 ".set noreorder \n\t"
214 "mult $0, $0 \n\t"
215 "beqz %[loop_size], 2f \n\t"
216 " andi %[count], %[count], 0x3 \n\t"
217 // Loop processing 4 pairs of samples per iteration.
218 "1: \n\t"
219 #if defined(MIPS_DSP_R2_LE)
220 "ulw %[r0], 0(%[in]) \n\t"
221 "ulw %[r1], 0(%[in1]) \n\t"
222 "ulw %[r2], 4(%[in]) \n\t"
223 "ulw %[r3], 4(%[in1]) \n\t"
224 #else
225 "lh %[r0], 0(%[in]) \n\t"
226 "lh %[r1], 0(%[in1]) \n\t"
227 "lh %[r2], 2(%[in]) \n\t"
228 "lh %[r3], 2(%[in1]) \n\t"
229 "lh %[r4], 4(%[in]) \n\t"
230 "lh %[r5], 4(%[in1]) \n\t"
231 "lh %[r6], 6(%[in]) \n\t"
232 "lh %[r7], 6(%[in1]) \n\t"
233 #endif
234 "addiu %[loop_size], %[loop_size], -1 \n\t"
235 #if defined(MIPS_DSP_R2_LE)
236 "dpa.w.ph $ac0, %[r0], %[r1] \n\t"
237 "dpa.w.ph $ac0, %[r2], %[r3] \n\t"
238 #else
239 "madd %[r0], %[r1] \n\t"
240 "madd %[r2], %[r3] \n\t"
241 "madd %[r4], %[r5] \n\t"
242 "madd %[r6], %[r7] \n\t"
243 #endif
244 "addiu %[in], %[in], 8 \n\t"
245 "bnez %[loop_size], 1b \n\t"
246 " addiu %[in1], %[in1], 8 \n\t"
247 "2: \n\t"
248 "beqz %[count], 4f \n\t"
249 " mfhi %[r0] \n\t"
250 // Process remaining samples (if any).
251 "3: \n\t"
252 "lh %[r0], 0(%[in]) \n\t"
253 "lh %[r1], 0(%[in1]) \n\t"
254 "addiu %[count], %[count], -1 \n\t"
255 "addiu %[in], %[in], 2 \n\t"
256 "madd %[r0], %[r1] \n\t"
257 "bnez %[count], 3b \n\t"
258 " addiu %[in1], %[in1], 2 \n\t"
259 "mfhi %[r0] \n\t"
260 "4: \n\t"
261 ".set pop \n\t"
262 : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
263 #if !defined(MIPS_DSP_R2_LE)
264 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
265 #endif
266 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
267 [count] "+r" (count)
268 :
269 : "memory", "hi", "lo"
270 );
271 r[i] = r0;
272 }
273 } else {
274 // In this case, the result is obtained by combining low and high parts
275 // of the accumulator.
276 #if !defined(MIPS_DSP_R1_LE)
277 int32_t tmp_shift = 32 - scaling;
278 #endif
279 for (i = 1; i < order + 1; i++) {
280 in = (int16_t*)x;
281 int16_t* in1 = (int16_t*)x + i;
282 count = N - i;
283 loop_size = (count) >> 2;
284 __asm __volatile (
285 ".set push \n\t"
286 ".set noreorder \n\t"
287 "mult $0, $0 \n\t"
288 "beqz %[loop_size], 2f \n\t"
289 " andi %[count], %[count], 0x3 \n\t"
290 "1: \n\t"
291 #if defined(MIPS_DSP_R2_LE)
292 "ulw %[r0], 0(%[in]) \n\t"
293 "ulw %[r1], 0(%[in1]) \n\t"
294 "ulw %[r2], 4(%[in]) \n\t"
295 "ulw %[r3], 4(%[in1]) \n\t"
296 #else
297 "lh %[r0], 0(%[in]) \n\t"
298 "lh %[r1], 0(%[in1]) \n\t"
299 "lh %[r2], 2(%[in]) \n\t"
300 "lh %[r3], 2(%[in1]) \n\t"
301 "lh %[r4], 4(%[in]) \n\t"
302 "lh %[r5], 4(%[in1]) \n\t"
303 "lh %[r6], 6(%[in]) \n\t"
304 "lh %[r7], 6(%[in1]) \n\t"
305 #endif
306 "addiu %[loop_size], %[loop_size], -1 \n\t"
307 #if defined(MIPS_DSP_R2_LE)
308 "dpa.w.ph $ac0, %[r0], %[r1] \n\t"
309 "dpa.w.ph $ac0, %[r2], %[r3] \n\t"
310 #else
311 "madd %[r0], %[r1] \n\t"
312 "madd %[r2], %[r3] \n\t"
313 "madd %[r4], %[r5] \n\t"
314 "madd %[r6], %[r7] \n\t"
315 #endif
316 "addiu %[in], %[in], 8 \n\t"
317 "bnez %[loop_size], 1b \n\t"
318 " addiu %[in1], %[in1], 8 \n\t"
319 "2: \n\t"
320 "beqz %[count], 4f \n\t"
321 #if defined(MIPS_DSP_R1_LE)
322 " extrv.w %[r0], $ac0, %[scaling] \n\t"
323 #else
324 " mfhi %[r0] \n\t"
325 #endif
326 "3: \n\t"
327 "lh %[r0], 0(%[in]) \n\t"
328 "lh %[r1], 0(%[in1]) \n\t"
329 "addiu %[count], %[count], -1 \n\t"
330 "addiu %[in], %[in], 2 \n\t"
331 "madd %[r0], %[r1] \n\t"
332 "bnez %[count], 3b \n\t"
333 " addiu %[in1], %[in1], 2 \n\t"
334 #if defined(MIPS_DSP_R1_LE)
335 "extrv.w %[r0], $ac0, %[scaling] \n\t"
336 #else
337 "mfhi %[r0] \n\t"
338 #endif
339 "4: \n\t"
340 #if !defined(MIPS_DSP_R1_LE)
341 "mflo %[r1] \n\t"
342 "sllv %[r0], %[r0], %[tmp_shift] \n\t"
343 "srlv %[r1], %[r1], %[scaling] \n\t"
344 "addu %[r0], %[r0], %[r1] \n\t"
345 #endif
346 ".set pop \n\t"
347 : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
348 #if !defined(MIPS_DSP_R2_LE)
349 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
350 #endif
351 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
352 [count] "+r" (count)
353 : [scaling] "r" (scaling)
354 #if !defined(MIPS_DSP_R1_LE)
355 , [tmp_shift] "r" (tmp_shift)
356 #endif
357 : "memory", "hi", "lo"
358 );
359 r[i] = r0;
360 }
361 }
362 *scale = scaling;
363
364 return (order + 1);
365 }
366