1 /*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "webrtc/modules/audio_coding/codecs/isac/fix/source/filterbank_internal.h"
12
13 // WebRtcIsacfix_AllpassFilter2FixDec16 function optimized for MIPSDSP platform.
14 // Bit-exact with WebRtcIsacfix_AllpassFilter2FixDec16C from filterbanks.c.
WebRtcIsacfix_AllpassFilter2FixDec16MIPS(int16_t * data_ch1,int16_t * data_ch2,const int16_t * factor_ch1,const int16_t * factor_ch2,const int length,int32_t * filter_state_ch1,int32_t * filter_state_ch2)15 void WebRtcIsacfix_AllpassFilter2FixDec16MIPS(
16 int16_t* data_ch1, // Input and output in channel 1, in Q0.
17 int16_t* data_ch2, // Input and output in channel 2, in Q0.
18 const int16_t* factor_ch1, // Scaling factor for channel 1, in Q15.
19 const int16_t* factor_ch2, // Scaling factor for channel 2, in Q15.
20 const int length, // Length of the data buffers.
21 int32_t* filter_state_ch1, // Filter state for channel 1, in Q16.
22 int32_t* filter_state_ch2) { // Filter state for channel 2, in Q16.
23
24 int32_t st0_ch1, st1_ch1; // channel1 state variables.
25 int32_t st0_ch2, st1_ch2; // channel2 state variables.
26 int32_t f_ch10, f_ch11, f_ch20, f_ch21; // factor variables.
27 int32_t r0, r1, r2, r3, r4, r5; // temporary register variables.
28
29 __asm __volatile (
30 ".set push \n\t"
31 ".set noreorder \n\t"
32 // Load all the state and factor variables.
33 "lh %[f_ch10], 0(%[factor_ch1]) \n\t"
34 "lh %[f_ch20], 0(%[factor_ch2]) \n\t"
35 "lh %[f_ch11], 2(%[factor_ch1]) \n\t"
36 "lh %[f_ch21], 2(%[factor_ch2]) \n\t"
37 "lw %[st0_ch1], 0(%[filter_state_ch1]) \n\t"
38 "lw %[st1_ch1], 4(%[filter_state_ch1]) \n\t"
39 "lw %[st0_ch2], 0(%[filter_state_ch2]) \n\t"
40 "lw %[st1_ch2], 4(%[filter_state_ch2]) \n\t"
41 // Allpass filtering loop.
42 "1: \n\t"
43 "lh %[r0], 0(%[data_ch1]) \n\t"
44 "lh %[r1], 0(%[data_ch2]) \n\t"
45 "addiu %[length], %[length], -1 \n\t"
46 "mul %[r2], %[r0], %[f_ch10] \n\t"
47 "mul %[r3], %[r1], %[f_ch20] \n\t"
48 "sll %[r0], %[r0], 16 \n\t"
49 "sll %[r1], %[r1], 16 \n\t"
50 "sll %[r2], %[r2], 1 \n\t"
51 "addq_s.w %[r2], %[r2], %[st0_ch1] \n\t"
52 "sll %[r3], %[r3], 1 \n\t"
53 "addq_s.w %[r3], %[r3], %[st0_ch2] \n\t"
54 "sra %[r2], %[r2], 16 \n\t"
55 "mul %[st0_ch1], %[f_ch10], %[r2] \n\t"
56 "sra %[r3], %[r3], 16 \n\t"
57 "mul %[st0_ch2], %[f_ch20], %[r3] \n\t"
58 "mul %[r4], %[r2], %[f_ch11] \n\t"
59 "mul %[r5], %[r3], %[f_ch21] \n\t"
60 "sll %[st0_ch1], %[st0_ch1], 1 \n\t"
61 "subq_s.w %[st0_ch1], %[r0], %[st0_ch1] \n\t"
62 "sll %[st0_ch2], %[st0_ch2], 1 \n\t"
63 "subq_s.w %[st0_ch2], %[r1], %[st0_ch2] \n\t"
64 "sll %[r4], %[r4], 1 \n\t"
65 "addq_s.w %[r4], %[r4], %[st1_ch1] \n\t"
66 "sll %[r5], %[r5], 1 \n\t"
67 "addq_s.w %[r5], %[r5], %[st1_ch2] \n\t"
68 "sra %[r4], %[r4], 16 \n\t"
69 "mul %[r0], %[r4], %[f_ch11] \n\t"
70 "sra %[r5], %[r5], 16 \n\t"
71 "mul %[r1], %[r5], %[f_ch21] \n\t"
72 "sh %[r4], 0(%[data_ch1]) \n\t"
73 "sh %[r5], 0(%[data_ch2]) \n\t"
74 "addiu %[data_ch1], %[data_ch1], 2 \n\t"
75 "sll %[r2], %[r2], 16 \n\t"
76 "sll %[r0], %[r0], 1 \n\t"
77 "subq_s.w %[st1_ch1], %[r2], %[r0] \n\t"
78 "sll %[r3], %[r3], 16 \n\t"
79 "sll %[r1], %[r1], 1 \n\t"
80 "subq_s.w %[st1_ch2], %[r3], %[r1] \n\t"
81 "bgtz %[length], 1b \n\t"
82 " addiu %[data_ch2], %[data_ch2], 2 \n\t"
83 // Store channel states.
84 "sw %[st0_ch1], 0(%[filter_state_ch1]) \n\t"
85 "sw %[st1_ch1], 4(%[filter_state_ch1]) \n\t"
86 "sw %[st0_ch2], 0(%[filter_state_ch2]) \n\t"
87 "sw %[st1_ch2], 4(%[filter_state_ch2]) \n\t"
88 ".set pop \n\t"
89 : [f_ch10] "=&r" (f_ch10), [f_ch20] "=&r" (f_ch20),
90 [f_ch11] "=&r" (f_ch11), [f_ch21] "=&r" (f_ch21),
91 [st0_ch1] "=&r" (st0_ch1), [st1_ch1] "=&r" (st1_ch1),
92 [st0_ch2] "=&r" (st0_ch2), [st1_ch2] "=&r" (st1_ch2),
93 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2),
94 [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5)
95 : [factor_ch1] "r" (factor_ch1), [factor_ch2] "r" (factor_ch2),
96 [filter_state_ch1] "r" (filter_state_ch1),
97 [filter_state_ch2] "r" (filter_state_ch2),
98 [data_ch1] "r" (data_ch1), [data_ch2] "r" (data_ch2),
99 [length] "r" (length)
100 : "memory", "hi", "lo"
101 );
102 }
103
104 // WebRtcIsacfix_HighpassFilterFixDec32 function optimized for MIPSDSP platform.
105 // Bit-exact with WebRtcIsacfix_HighpassFilterFixDec32C from filterbanks.c.
WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t * io,int16_t len,const int16_t * coefficient,int32_t * state)106 void WebRtcIsacfix_HighpassFilterFixDec32MIPS(int16_t* io,
107 int16_t len,
108 const int16_t* coefficient,
109 int32_t* state) {
110 int k;
111 int32_t a1, a2, b1, b2, in;
112 int32_t state0 = state[0];
113 int32_t state1 = state[1];
114
115 int32_t c0, c1, c2, c3;
116 int32_t c4, c5, c6, c7;
117 int32_t state0_lo, state0_hi;
118 int32_t state1_lo, state1_hi;
119 int32_t t0, t1, t2, t3, t4, t5;
120
121 __asm __volatile (
122 "lh %[c0], 0(%[coeff_ptr]) \n\t"
123 "lh %[c1], 2(%[coeff_ptr]) \n\t"
124 "lh %[c2], 4(%[coeff_ptr]) \n\t"
125 "lh %[c3], 6(%[coeff_ptr]) \n\t"
126 "sra %[state0_hi], %[state0], 16 \n\t"
127 "sra %[state1_hi], %[state1], 16 \n\t"
128 "andi %[state0_lo], %[state0], 0xFFFF \n\t"
129 "andi %[state1_lo], %[state1], 0xFFFF \n\t"
130 "lh %[c4], 8(%[coeff_ptr]) \n\t"
131 "lh %[c5], 10(%[coeff_ptr]) \n\t"
132 "lh %[c6], 12(%[coeff_ptr]) \n\t"
133 "lh %[c7], 14(%[coeff_ptr]) \n\t"
134 "sra %[state0_lo], %[state0_lo], 1 \n\t"
135 "sra %[state1_lo], %[state1_lo], 1 \n\t"
136 : [c0] "=&r" (c0), [c1] "=&r" (c1), [c2] "=&r" (c2), [c3] "=&r" (c3),
137 [c4] "=&r" (c4), [c5] "=&r" (c5), [c6] "=&r" (c6), [c7] "=&r" (c7),
138 [state0_hi] "=&r" (state0_hi), [state0_lo] "=&r" (state0_lo),
139 [state1_hi] "=&r" (state1_hi), [state1_lo] "=&r" (state1_lo)
140 : [coeff_ptr] "r" (coefficient), [state0] "r" (state0),
141 [state1] "r" (state1)
142 : "memory"
143 );
144
145 for (k = 0; k < len; k++) {
146 in = (int32_t)io[k];
147
148 __asm __volatile (
149 ".set push \n\t"
150 ".set noreorder \n\t"
151 "mul %[t2], %[c4], %[state0_lo] \n\t"
152 "mul %[t0], %[c5], %[state0_lo] \n\t"
153 "mul %[t1], %[c4], %[state0_hi] \n\t"
154 "mul %[a1], %[c5], %[state0_hi] \n\t"
155 "mul %[t5], %[c6], %[state1_lo] \n\t"
156 "mul %[t3], %[c7], %[state1_lo] \n\t"
157 "mul %[t4], %[c6], %[state1_hi] \n\t"
158 "mul %[b1], %[c7], %[state1_hi] \n\t"
159 "shra_r.w %[t2], %[t2], 15 \n\t"
160 "shra_r.w %[t0], %[t0], 15 \n\t"
161 "addu %[t1], %[t1], %[t2] \n\t"
162 "addu %[a1], %[a1], %[t0] \n\t"
163 "sra %[t1], %[t1], 16 \n\t"
164 "addu %[a1], %[a1], %[t1] \n\t"
165 "shra_r.w %[t5], %[t5], 15 \n\t"
166 "shra_r.w %[t3], %[t3], 15 \n\t"
167 "addu %[t4], %[t4], %[t5] \n\t"
168 "addu %[b1], %[b1], %[t3] \n\t"
169 "sra %[t4], %[t4], 16 \n\t"
170 "addu %[b1], %[b1], %[t4] \n\t"
171 "mul %[t2], %[c0], %[state0_lo] \n\t"
172 "mul %[t0], %[c1], %[state0_lo] \n\t"
173 "mul %[t1], %[c0], %[state0_hi] \n\t"
174 "mul %[a2], %[c1], %[state0_hi] \n\t"
175 "mul %[t5], %[c2], %[state1_lo] \n\t"
176 "mul %[t3], %[c3], %[state1_lo] \n\t"
177 "mul %[t4], %[c2], %[state1_hi] \n\t"
178 "mul %[b2], %[c3], %[state1_hi] \n\t"
179 "shra_r.w %[t2], %[t2], 15 \n\t"
180 "shra_r.w %[t0], %[t0], 15 \n\t"
181 "addu %[t1], %[t1], %[t2] \n\t"
182 "addu %[a2], %[a2], %[t0] \n\t"
183 "sra %[t1], %[t1], 16 \n\t"
184 "addu %[a2], %[a2], %[t1] \n\t"
185 "shra_r.w %[t5], %[t5], 15 \n\t"
186 "shra_r.w %[t3], %[t3], 15 \n\t"
187 "addu %[t4], %[t4], %[t5] \n\t"
188 "addu %[b2], %[b2], %[t3] \n\t"
189 "sra %[t4], %[t4], 16 \n\t"
190 "addu %[b2], %[b2], %[t4] \n\t"
191 "addu %[a1], %[a1], %[b1] \n\t"
192 "sra %[a1], %[a1], 7 \n\t"
193 "addu %[a1], %[a1], %[in] \n\t"
194 "sll %[t0], %[in], 2 \n\t"
195 "addu %[a2], %[a2], %[b2] \n\t"
196 "subu %[t0], %[t0], %[a2] \n\t"
197 "shll_s.w %[a1], %[a1], 16 \n\t"
198 "shll_s.w %[t0], %[t0], 2 \n\t"
199 "sra %[a1], %[a1], 16 \n\t"
200 "addu %[state1_hi], %[state0_hi], $0 \n\t"
201 "addu %[state1_lo], %[state0_lo], $0 \n\t"
202 "sra %[state0_hi], %[t0], 16 \n\t"
203 "andi %[state0_lo], %[t0], 0xFFFF \n\t"
204 "sra %[state0_lo], %[state0_lo], 1 \n\t"
205 ".set pop \n\t"
206 : [a1] "=&r" (a1), [b1] "=&r" (b1), [a2] "=&r" (a2), [b2] "=&r" (b2),
207 [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo),
208 [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo),
209 [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2),
210 [t3] "=&r" (t3), [t4] "=&r" (t4), [t5] "=&r" (t5)
211 : [c0] "r" (c0), [c1] "r" (c1), [c2] "r" (c2), [c3] "r" (c3),
212 [c4] "r" (c4), [c5] "r" (c5), [c6] "r" (c6), [c7] "r" (c7),
213 [in] "r" (in)
214 : "hi", "lo"
215 );
216 io[k] = (int16_t)a1;
217 }
218 __asm __volatile (
219 ".set push \n\t"
220 ".set noreorder \n\t"
221 #if !defined(MIPS_DSP_R2_LE)
222 "sll %[state0_hi], %[state0_hi], 16 \n\t"
223 "sll %[state0_lo], %[state0_lo], 1 \n\t"
224 "sll %[state1_hi], %[state1_hi], 16 \n\t"
225 "sll %[state1_lo], %[state1_lo], 1 \n\t"
226 "or %[state0_hi], %[state0_hi], %[state0_lo] \n\t"
227 "or %[state1_hi], %[state1_hi], %[state1_lo] \n\t"
228 #else
229 "sll %[state0_lo], %[state0_lo], 1 \n\t"
230 "sll %[state1_lo], %[state1_lo], 1 \n\t"
231 "precr_sra.ph.w %[state0_hi], %[state0_lo], 0 \n\t"
232 "precr_sra.ph.w %[state1_hi], %[state1_lo], 0 \n\t"
233 #endif
234 "sw %[state0_hi], 0(%[state]) \n\t"
235 "sw %[state1_hi], 4(%[state]) \n\t"
236 ".set pop \n\t"
237 : [state0_hi] "+r" (state0_hi), [state0_lo] "+r" (state0_lo),
238 [state1_hi] "+r" (state1_hi), [state1_lo] "+r" (state1_lo)
239 : [state] "r" (state)
240 : "memory"
241 );
242 }
243