• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
12 
13 // MIPS optimized implementation of the Autocorrelation function in fixed point.
14 // NOTE! Different from SPLIB-version in how it scales the signal.
WebRtcIsacfix_AutocorrMIPS(int32_t * __restrict r,const int16_t * __restrict x,int16_t N,int16_t order,int16_t * __restrict scale)15 int WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r,
16                                const int16_t* __restrict x,
17                                int16_t N,
18                                int16_t order,
19                                int16_t* __restrict scale) {
20   int i = 0;
21   int16_t scaling = 0;
22   int16_t* in = (int16_t*)x;
23   int loop_size = (int)(N >> 3);
24   int count = (int)(N & 7);
25   // Declare temporary variables used as registry values.
26   int32_t r0, r1, r2, r3;
27 #if !defined(MIPS_DSP_R2_LE)
28   // For non-DSPR2 optimizations 4 more registers are used.
29   int32_t r4, r5, r6, r7;
30 #endif
31 
32   // Calculate r[0] and scaling needed.
33   __asm __volatile (
34     ".set          push                                            \n\t"
35     ".set          noreorder                                       \n\t"
36     "mult          $0,             $0                              \n\t"
37     // Loop is unrolled 8 times, set accumulator to zero in branch delay slot.
38     "beqz          %[loop_size],   2f                              \n\t"
39     " mult         $0,             $0                              \n\t"
40    "1:                                                             \n\t"
41     // Load 8 samples per loop iteration.
42 #if defined(MIPS_DSP_R2_LE)
43     "ulw           %[r0],          0(%[in])                        \n\t"
44     "ulw           %[r1],          4(%[in])                        \n\t"
45     "ulw           %[r2],          8(%[in])                        \n\t"
46     "ulw           %[r3],          12(%[in])                       \n\t"
47 #else
48     "lh            %[r0],          0(%[in])                        \n\t"
49     "lh            %[r1],          2(%[in])                        \n\t"
50     "lh            %[r2],          4(%[in])                        \n\t"
51     "lh            %[r3],          6(%[in])                        \n\t"
52     "lh            %[r4],          8(%[in])                        \n\t"
53     "lh            %[r5],          10(%[in])                       \n\t"
54     "lh            %[r6],          12(%[in])                       \n\t"
55     "lh            %[r7],          14(%[in])                       \n\t"
56 #endif
57     "addiu         %[loop_size],   %[loop_size],   -1              \n\t"
58     // Multiply and accumulate.
59 #if defined(MIPS_DSP_R2_LE)
60     "dpa.w.ph      $ac0,           %[r0],          %[r0]           \n\t"
61     "dpa.w.ph      $ac0,           %[r1],          %[r1]           \n\t"
62     "dpa.w.ph      $ac0,           %[r2],          %[r2]           \n\t"
63     "dpa.w.ph      $ac0,           %[r3],          %[r3]           \n\t"
64 #else
65     "madd          %[r0],          %[r0]                           \n\t"
66     "madd          %[r1],          %[r1]                           \n\t"
67     "madd          %[r2],          %[r2]                           \n\t"
68     "madd          %[r3],          %[r3]                           \n\t"
69     "madd          %[r4],          %[r4]                           \n\t"
70     "madd          %[r5],          %[r5]                           \n\t"
71     "madd          %[r6],          %[r6]                           \n\t"
72     "madd          %[r7],          %[r7]                           \n\t"
73 #endif
74     "bnez          %[loop_size],   1b                              \n\t"
75     " addiu        %[in],          %[in],          16              \n\t"
76    "2:                                                             \n\t"
77     "beqz          %[count],       4f                              \n\t"
78 #if defined(MIPS_DSP_R1_LE)
79     " extr.w       %[r0],          $ac0,           31              \n\t"
80 #else
81     " mfhi         %[r2]                                           \n\t"
82 #endif
83     // Process remaining samples (if any).
84    "3:                                                             \n\t"
85     "lh            %[r0],          0(%[in])                        \n\t"
86     "addiu         %[count],       %[count],       -1              \n\t"
87     "madd          %[r0],          %[r0]                           \n\t"
88     "bnez          %[count],       3b                              \n\t"
89     " addiu        %[in],          %[in],          2               \n\t"
90 #if defined(MIPS_DSP_R1_LE)
91     "extr.w        %[r0],          $ac0,           31              \n\t"
92 #else
93     "mfhi          %[r2]                                           \n\t"
94 #endif
95    "4:                                                             \n\t"
96 #if !defined(MIPS_DSP_R1_LE)
97     "mflo          %[r3]                                           \n\t"
98     "sll           %[r0],          %[r2],          1               \n\t"
99     "srl           %[r1],          %[r3],          31              \n\t"
100     "addu          %[r0],          %[r0],          %[r1]           \n\t"
101 #endif
102     // Calculate scaling (the value of shifting).
103     "clz           %[r1],          %[r0]                           \n\t"
104     "addiu         %[r1],          %[r1],          -32             \n\t"
105     "subu          %[scaling],     $0,             %[r1]           \n\t"
106     "slti          %[r1],          %[r0],          0x1             \n\t"
107     "movn          %[scaling],     $0,             %[r1]           \n\t"
108 #if defined(MIPS_DSP_R1_LE)
109     "extrv.w       %[r0],          $ac0,           %[scaling]      \n\t"
110     "mfhi          %[r2],          $ac0                            \n\t"
111 #else
112     "addiu         %[r1],          %[scaling],     -32             \n\t"
113     "subu          %[r1],          $0,             %[r1]           \n\t"
114     "sllv          %[r1],          %[r2],          %[r1]           \n\t"
115     "srlv          %[r0],          %[r3],          %[scaling]      \n\t"
116     "addu          %[r0],          %[r0],          %[r1]           \n\t"
117 #endif
118     "slti          %[r1],          %[scaling],     32              \n\t"
119     "movz          %[r0],          %[r2],          %[r1]           \n\t"
120     ".set          pop                                             \n\t"
121     : [loop_size] "+r" (loop_size), [in] "+r" (in), [r0] "=&r" (r0),
122       [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
123 #if !defined(MIPS_DSP_R2_LE)
124       [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
125 #endif
126       [count] "+r" (count), [scaling] "=r" (scaling)
127     : [N] "r" (N)
128     : "memory", "hi", "lo"
129   );
130   r[0] = r0;
131 
132   // Correlation calculation is divided in 3 cases depending on the scaling
133   // value (different accumulator manipulation needed). Three slightly different
134   // loops are written in order to avoid branches inside the loop.
135   if (scaling == 0) {
136     // In this case, the result will be in low part of the accumulator.
137     for (i = 1; i < order + 1; i++) {
138       in = (int16_t*)x;
139       int16_t* in1 = (int16_t*)x + i;
140       count = N - i;
141       loop_size = (count) >> 2;
142       __asm  __volatile (
143         ".set        push                                          \n\t"
144         ".set        noreorder                                     \n\t"
145         "mult        $0,             $0                            \n\t"
146         "beqz        %[loop_size],   2f                            \n\t"
147         " andi       %[count],       %[count],       0x3           \n\t"
148         // Loop processing 4 pairs of samples per iteration.
149        "1:                                                         \n\t"
150 #if defined(MIPS_DSP_R2_LE)
151         "ulw         %[r0],          0(%[in])                      \n\t"
152         "ulw         %[r1],          0(%[in1])                     \n\t"
153         "ulw         %[r2],          4(%[in])                      \n\t"
154         "ulw         %[r3],          4(%[in1])                     \n\t"
155 #else
156         "lh          %[r0],          0(%[in])                      \n\t"
157         "lh          %[r1],          0(%[in1])                     \n\t"
158         "lh          %[r2],          2(%[in])                      \n\t"
159         "lh          %[r3],          2(%[in1])                     \n\t"
160         "lh          %[r4],          4(%[in])                      \n\t"
161         "lh          %[r5],          4(%[in1])                     \n\t"
162         "lh          %[r6],          6(%[in])                      \n\t"
163         "lh          %[r7],          6(%[in1])                     \n\t"
164 #endif
165         "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
166 #if defined(MIPS_DSP_R2_LE)
167         "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
168         "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
169 #else
170         "madd        %[r0],          %[r1]                         \n\t"
171         "madd        %[r2],          %[r3]                         \n\t"
172         "madd        %[r4],          %[r5]                         \n\t"
173         "madd        %[r6],          %[r7]                         \n\t"
174 #endif
175         "addiu       %[in],          %[in],          8             \n\t"
176         "bnez        %[loop_size],   1b                            \n\t"
177         " addiu      %[in1],         %[in1],         8             \n\t"
178        "2:                                                         \n\t"
179         "beqz        %[count],       4f                            \n\t"
180         " mflo       %[r0]                                         \n\t"
181         // Process remaining samples (if any).
182        "3:                                                         \n\t"
183         "lh          %[r0],          0(%[in])                      \n\t"
184         "lh          %[r1],          0(%[in1])                     \n\t"
185         "addiu       %[count],       %[count],       -1            \n\t"
186         "addiu       %[in],          %[in],          2             \n\t"
187         "madd        %[r0],          %[r1]                         \n\t"
188         "bnez        %[count],       3b                            \n\t"
189         " addiu      %[in1],         %[in1],         2             \n\t"
190         "mflo        %[r0]                                         \n\t"
191        "4:                                                         \n\t"
192         ".set        pop                                           \n\t"
193         : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
194 #if !defined(MIPS_DSP_R2_LE)
195           [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
196 #endif
197           [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
198           [count] "+r" (count)
199         :
200         : "memory", "hi", "lo"
201       );
202       r[i] = r0;
203     }
204   } else if (scaling == 32) {
205     // In this case, the result will be high part of the accumulator.
206     for (i = 1; i < order + 1; i++) {
207       in = (int16_t*)x;
208       int16_t* in1 = (int16_t*)x + i;
209       count = N - i;
210       loop_size = (count) >> 2;
211       __asm __volatile (
212         ".set        push                                          \n\t"
213         ".set        noreorder                                     \n\t"
214         "mult        $0,             $0                            \n\t"
215         "beqz        %[loop_size],   2f                            \n\t"
216         " andi       %[count],       %[count],       0x3           \n\t"
217         // Loop processing 4 pairs of samples per iteration.
218        "1:                                                         \n\t"
219 #if defined(MIPS_DSP_R2_LE)
220         "ulw         %[r0],          0(%[in])                      \n\t"
221         "ulw         %[r1],          0(%[in1])                     \n\t"
222         "ulw         %[r2],          4(%[in])                      \n\t"
223         "ulw         %[r3],          4(%[in1])                     \n\t"
224 #else
225         "lh          %[r0],          0(%[in])                      \n\t"
226         "lh          %[r1],          0(%[in1])                     \n\t"
227         "lh          %[r2],          2(%[in])                      \n\t"
228         "lh          %[r3],          2(%[in1])                     \n\t"
229         "lh          %[r4],          4(%[in])                      \n\t"
230         "lh          %[r5],          4(%[in1])                     \n\t"
231         "lh          %[r6],          6(%[in])                      \n\t"
232         "lh          %[r7],          6(%[in1])                     \n\t"
233 #endif
234         "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
235 #if defined(MIPS_DSP_R2_LE)
236         "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
237         "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
238 #else
239         "madd        %[r0],          %[r1]                         \n\t"
240         "madd        %[r2],          %[r3]                         \n\t"
241         "madd        %[r4],          %[r5]                         \n\t"
242         "madd        %[r6],          %[r7]                         \n\t"
243 #endif
244         "addiu       %[in],          %[in],          8             \n\t"
245         "bnez        %[loop_size],   1b                            \n\t"
246         " addiu      %[in1],         %[in1],         8             \n\t"
247        "2:                                                         \n\t"
248         "beqz        %[count],       4f                            \n\t"
249         " mfhi       %[r0]                                         \n\t"
250         // Process remaining samples (if any).
251        "3:                                                         \n\t"
252         "lh          %[r0],          0(%[in])                      \n\t"
253         "lh          %[r1],          0(%[in1])                     \n\t"
254         "addiu       %[count],       %[count],       -1            \n\t"
255         "addiu       %[in],          %[in],          2             \n\t"
256         "madd        %[r0],          %[r1]                         \n\t"
257         "bnez        %[count],       3b                            \n\t"
258         " addiu      %[in1],         %[in1],         2             \n\t"
259         "mfhi        %[r0]                                         \n\t"
260        "4:                                                         \n\t"
261         ".set        pop                                           \n\t"
262         : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
263 #if !defined(MIPS_DSP_R2_LE)
264           [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
265 #endif
266           [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
267           [count] "+r" (count)
268         :
269         : "memory", "hi", "lo"
270       );
271       r[i] = r0;
272     }
273   } else {
274     // In this case, the result is obtained by combining low and high parts
275     // of the accumulator.
276 #if !defined(MIPS_DSP_R1_LE)
277     int32_t tmp_shift = 32 - scaling;
278 #endif
279     for (i = 1; i < order + 1; i++) {
280       in = (int16_t*)x;
281       int16_t* in1 = (int16_t*)x + i;
282       count = N - i;
283       loop_size = (count) >> 2;
284       __asm __volatile (
285         ".set        push                                          \n\t"
286         ".set        noreorder                                     \n\t"
287         "mult        $0,             $0                            \n\t"
288         "beqz        %[loop_size],   2f                            \n\t"
289         " andi       %[count],       %[count],       0x3           \n\t"
290        "1:                                                         \n\t"
291 #if defined(MIPS_DSP_R2_LE)
292         "ulw         %[r0],          0(%[in])                      \n\t"
293         "ulw         %[r1],          0(%[in1])                     \n\t"
294         "ulw         %[r2],          4(%[in])                      \n\t"
295         "ulw         %[r3],          4(%[in1])                     \n\t"
296 #else
297         "lh          %[r0],          0(%[in])                      \n\t"
298         "lh          %[r1],          0(%[in1])                     \n\t"
299         "lh          %[r2],          2(%[in])                      \n\t"
300         "lh          %[r3],          2(%[in1])                     \n\t"
301         "lh          %[r4],          4(%[in])                      \n\t"
302         "lh          %[r5],          4(%[in1])                     \n\t"
303         "lh          %[r6],          6(%[in])                      \n\t"
304         "lh          %[r7],          6(%[in1])                     \n\t"
305 #endif
306         "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
307 #if defined(MIPS_DSP_R2_LE)
308         "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
309         "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
310 #else
311         "madd        %[r0],          %[r1]                         \n\t"
312         "madd        %[r2],          %[r3]                         \n\t"
313         "madd        %[r4],          %[r5]                         \n\t"
314         "madd        %[r6],          %[r7]                         \n\t"
315 #endif
316         "addiu       %[in],          %[in],          8             \n\t"
317         "bnez        %[loop_size],   1b                            \n\t"
318         " addiu      %[in1],         %[in1],         8             \n\t"
319        "2:                                                         \n\t"
320         "beqz        %[count],       4f                            \n\t"
321 #if defined(MIPS_DSP_R1_LE)
322         " extrv.w    %[r0],          $ac0,           %[scaling]    \n\t"
323 #else
324         " mfhi       %[r0]                                         \n\t"
325 #endif
326        "3:                                                         \n\t"
327         "lh          %[r0],          0(%[in])                      \n\t"
328         "lh          %[r1],          0(%[in1])                     \n\t"
329         "addiu       %[count],       %[count],       -1            \n\t"
330         "addiu       %[in],          %[in],          2             \n\t"
331         "madd        %[r0],          %[r1]                         \n\t"
332         "bnez        %[count],       3b                            \n\t"
333         " addiu      %[in1],         %[in1],         2             \n\t"
334 #if defined(MIPS_DSP_R1_LE)
335         "extrv.w     %[r0],          $ac0,           %[scaling]    \n\t"
336 #else
337         "mfhi        %[r0]                                         \n\t"
338 #endif
339        "4:                                                         \n\t"
340 #if !defined(MIPS_DSP_R1_LE)
341         "mflo        %[r1]                                         \n\t"
342         "sllv        %[r0],          %[r0],          %[tmp_shift]  \n\t"
343         "srlv        %[r1],          %[r1],          %[scaling]    \n\t"
344         "addu        %[r0],          %[r0],          %[r1]         \n\t"
345 #endif
346         ".set        pop                                           \n\t"
347         : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
348 #if !defined(MIPS_DSP_R2_LE)
349           [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
350 #endif
351           [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
352           [count] "+r" (count)
353         : [scaling] "r" (scaling)
354 #if !defined(MIPS_DSP_R1_LE)
355         , [tmp_shift] "r" (tmp_shift)
356 #endif
357         : "memory", "hi", "lo"
358       );
359       r[i] = r0;
360     }
361   }
362   *scale = scaling;
363 
364   return (order + 1);
365 }
366