1 /*
2 *
3 * Bluetooth low-complexity, subband codec (SBC) library
4 *
5 * Copyright (C) 2010 Keith Mok <ek9852@gmail.com>
6 * Copyright (C) 2008-2010 Nokia Corporation
7 * Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
8 * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
9 * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
10 *
11 *
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with this library; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 *
26 */
27
28 #include <stdint.h>
29 #include <limits.h>
30 #include "sbc.h"
31 #include "sbc_math.h"
32 #include "sbc_tables.h"
33
34 #include "sbc_primitives_iwmmxt.h"
35
36 /*
37 * IWMMXT optimizations
38 */
39
40 #ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT
41
sbc_analyze_four_iwmmxt(const int16_t * in,int32_t * out,const FIXED_T * consts)42 static inline void sbc_analyze_four_iwmmxt(const int16_t *in, int32_t *out,
43 const FIXED_T *consts)
44 {
45 asm volatile (
46 "wldrd wr0, [%0]\n"
47 "tbcstw wr4, %2\n"
48 "wldrd wr2, [%1]\n"
49 "wldrd wr1, [%0, #8]\n"
50 "wldrd wr3, [%1, #8]\n"
51 "wmadds wr0, wr2, wr0\n"
52 " wldrd wr6, [%0, #16]\n"
53 "wmadds wr1, wr3, wr1\n"
54 " wldrd wr7, [%0, #24]\n"
55 "waddwss wr0, wr0, wr4\n"
56 " wldrd wr8, [%1, #16]\n"
57 "waddwss wr1, wr1, wr4\n"
58 " wldrd wr9, [%1, #24]\n"
59 " wmadds wr6, wr8, wr6\n"
60 " wldrd wr2, [%0, #32]\n"
61 " wmadds wr7, wr9, wr7\n"
62 " wldrd wr3, [%0, #40]\n"
63 " waddwss wr0, wr6, wr0\n"
64 " wldrd wr4, [%1, #32]\n"
65 " waddwss wr1, wr7, wr1\n"
66 " wldrd wr5, [%1, #40]\n"
67 " wmadds wr2, wr4, wr2\n"
68 "wldrd wr6, [%0, #48]\n"
69 " wmadds wr3, wr5, wr3\n"
70 "wldrd wr7, [%0, #56]\n"
71 " waddwss wr0, wr2, wr0\n"
72 "wldrd wr8, [%1, #48]\n"
73 " waddwss wr1, wr3, wr1\n"
74 "wldrd wr9, [%1, #56]\n"
75 "wmadds wr6, wr8, wr6\n"
76 " wldrd wr2, [%0, #64]\n"
77 "wmadds wr7, wr9, wr7\n"
78 " wldrd wr3, [%0, #72]\n"
79 "waddwss wr0, wr6, wr0\n"
80 " wldrd wr4, [%1, #64]\n"
81 "waddwss wr1, wr7, wr1\n"
82 " wldrd wr5, [%1, #72]\n"
83 " wmadds wr2, wr4, wr2\n"
84 "tmcr wcgr0, %4\n"
85 " wmadds wr3, wr5, wr3\n"
86 " waddwss wr0, wr2, wr0\n"
87 " waddwss wr1, wr3, wr1\n"
88 "\n"
89 "wsrawg wr0, wr0, wcgr0\n"
90 " wldrd wr4, [%1, #80]\n"
91 "wsrawg wr1, wr1, wcgr0\n"
92 " wldrd wr5, [%1, #88]\n"
93 "wpackwss wr0, wr0, wr0\n"
94 " wldrd wr6, [%1, #96]\n"
95 "wpackwss wr1, wr1, wr1\n"
96 "wmadds wr2, wr5, wr0\n"
97 " wldrd wr7, [%1, #104]\n"
98 "wmadds wr0, wr4, wr0\n"
99 "\n"
100 " wmadds wr3, wr7, wr1\n"
101 " wmadds wr1, wr6, wr1\n"
102 " waddwss wr2, wr3, wr2\n"
103 " waddwss wr0, wr1, wr0\n"
104 "\n"
105 "wstrd wr0, [%3]\n"
106 "wstrd wr2, [%3, #8]\n"
107 :
108 : "r" (in), "r" (consts),
109 "r" (1 << (SBC_PROTO_FIXED4_SCALE - 1)), "r" (out),
110 "r" (SBC_PROTO_FIXED4_SCALE)
111 : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7",
112 "wr8", "wr9", "wcgr0", "memory");
113 }
114
sbc_analyze_eight_iwmmxt(const int16_t * in,int32_t * out,const FIXED_T * consts)115 static inline void sbc_analyze_eight_iwmmxt(const int16_t *in, int32_t *out,
116 const FIXED_T *consts)
117 {
118 asm volatile (
119 "wldrd wr0, [%0]\n"
120 "tbcstw wr15, %2\n"
121 "wldrd wr1, [%0, #8]\n"
122 "wldrd wr2, [%0, #16]\n"
123 "wldrd wr3, [%0, #24]\n"
124 "wldrd wr4, [%1]\n"
125 "wldrd wr5, [%1, #8]\n"
126 "wldrd wr6, [%1, #16]\n"
127 "wldrd wr7, [%1, #24]\n"
128 "wmadds wr0, wr0, wr4\n"
129 " wldrd wr8, [%1, #32]\n"
130 "wmadds wr1, wr1, wr5\n"
131 " wldrd wr9, [%1, #40]\n"
132 "wmadds wr2, wr2, wr6\n"
133 " wldrd wr10, [%1, #48]\n"
134 "wmadds wr3, wr3, wr7\n"
135 " wldrd wr11, [%1, #56]\n"
136 "waddwss wr0, wr0, wr15\n"
137 " wldrd wr4, [%0, #32]\n"
138 "waddwss wr1, wr1, wr15\n"
139 " wldrd wr5, [%0, #40]\n"
140 "waddwss wr2, wr2, wr15\n"
141 " wldrd wr6, [%0, #48]\n"
142 "waddwss wr3, wr3, wr15\n"
143 " wldrd wr7, [%0, #56]\n"
144 " wmadds wr4, wr4, wr8\n"
145 " wldrd wr12, [%0, #64]\n"
146 " wmadds wr5, wr5, wr9\n"
147 " wldrd wr13, [%0, #72]\n"
148 " wmadds wr6, wr6, wr10\n"
149 " wldrd wr14, [%0, #80]\n"
150 " wmadds wr7, wr7, wr11\n"
151 " wldrd wr15, [%0, #88]\n"
152 " waddwss wr0, wr4, wr0\n"
153 " wldrd wr8, [%1, #64]\n"
154 " waddwss wr1, wr5, wr1\n"
155 " wldrd wr9, [%1, #72]\n"
156 " waddwss wr2, wr6, wr2\n"
157 " wldrd wr10, [%1, #80]\n"
158 " waddwss wr3, wr7, wr3\n"
159 " wldrd wr11, [%1, #88]\n"
160 " wmadds wr12, wr12, wr8\n"
161 "wldrd wr4, [%0, #96]\n"
162 " wmadds wr13, wr13, wr9\n"
163 "wldrd wr5, [%0, #104]\n"
164 " wmadds wr14, wr14, wr10\n"
165 "wldrd wr6, [%0, #112]\n"
166 " wmadds wr15, wr15, wr11\n"
167 "wldrd wr7, [%0, #120]\n"
168 " waddwss wr0, wr12, wr0\n"
169 "wldrd wr8, [%1, #96]\n"
170 " waddwss wr1, wr13, wr1\n"
171 "wldrd wr9, [%1, #104]\n"
172 " waddwss wr2, wr14, wr2\n"
173 "wldrd wr10, [%1, #112]\n"
174 " waddwss wr3, wr15, wr3\n"
175 "wldrd wr11, [%1, #120]\n"
176 "wmadds wr4, wr4, wr8\n"
177 " wldrd wr12, [%0, #128]\n"
178 "wmadds wr5, wr5, wr9\n"
179 " wldrd wr13, [%0, #136]\n"
180 "wmadds wr6, wr6, wr10\n"
181 " wldrd wr14, [%0, #144]\n"
182 "wmadds wr7, wr7, wr11\n"
183 " wldrd wr15, [%0, #152]\n"
184 "waddwss wr0, wr4, wr0\n"
185 " wldrd wr8, [%1, #128]\n"
186 "waddwss wr1, wr5, wr1\n"
187 " wldrd wr9, [%1, #136]\n"
188 "waddwss wr2, wr6, wr2\n"
189 " wldrd wr10, [%1, #144]\n"
190 " waddwss wr3, wr7, wr3\n"
191 " wldrd wr11, [%1, #152]\n"
192 " wmadds wr12, wr12, wr8\n"
193 "tmcr wcgr0, %4\n"
194 " wmadds wr13, wr13, wr9\n"
195 " wmadds wr14, wr14, wr10\n"
196 " wmadds wr15, wr15, wr11\n"
197 " waddwss wr0, wr12, wr0\n"
198 " waddwss wr1, wr13, wr1\n"
199 " waddwss wr2, wr14, wr2\n"
200 " waddwss wr3, wr15, wr3\n"
201 "\n"
202 "wsrawg wr0, wr0, wcgr0\n"
203 "wsrawg wr1, wr1, wcgr0\n"
204 "wsrawg wr2, wr2, wcgr0\n"
205 "wsrawg wr3, wr3, wcgr0\n"
206 "\n"
207 "wpackwss wr0, wr0, wr0\n"
208 "wpackwss wr1, wr1, wr1\n"
209 " wldrd wr4, [%1, #160]\n"
210 "wpackwss wr2, wr2, wr2\n"
211 " wldrd wr5, [%1, #168]\n"
212 "wpackwss wr3, wr3, wr3\n"
213 " wldrd wr6, [%1, #192]\n"
214 " wmadds wr4, wr4, wr0\n"
215 " wldrd wr7, [%1, #200]\n"
216 " wmadds wr5, wr5, wr0\n"
217 " wldrd wr8, [%1, #224]\n"
218 " wmadds wr6, wr6, wr1\n"
219 " wldrd wr9, [%1, #232]\n"
220 " wmadds wr7, wr7, wr1\n"
221 " waddwss wr4, wr6, wr4\n"
222 " waddwss wr5, wr7, wr5\n"
223 " wmadds wr8, wr8, wr2\n"
224 "wldrd wr6, [%1, #256]\n"
225 " wmadds wr9, wr9, wr2\n"
226 "wldrd wr7, [%1, #264]\n"
227 "waddwss wr4, wr8, wr4\n"
228 " waddwss wr5, wr9, wr5\n"
229 "wmadds wr6, wr6, wr3\n"
230 "wmadds wr7, wr7, wr3\n"
231 "waddwss wr4, wr6, wr4\n"
232 "waddwss wr5, wr7, wr5\n"
233 "\n"
234 "wstrd wr4, [%3]\n"
235 "wstrd wr5, [%3, #8]\n"
236 "\n"
237 "wldrd wr6, [%1, #176]\n"
238 "wldrd wr5, [%1, #184]\n"
239 "wmadds wr5, wr5, wr0\n"
240 "wldrd wr8, [%1, #208]\n"
241 "wmadds wr0, wr6, wr0\n"
242 "wldrd wr9, [%1, #216]\n"
243 "wmadds wr9, wr9, wr1\n"
244 "wldrd wr6, [%1, #240]\n"
245 "wmadds wr1, wr8, wr1\n"
246 "wldrd wr7, [%1, #248]\n"
247 "waddwss wr0, wr1, wr0\n"
248 "waddwss wr5, wr9, wr5\n"
249 "wmadds wr7, wr7, wr2\n"
250 "wldrd wr8, [%1, #272]\n"
251 "wmadds wr2, wr6, wr2\n"
252 "wldrd wr9, [%1, #280]\n"
253 "waddwss wr0, wr2, wr0\n"
254 "waddwss wr5, wr7, wr5\n"
255 "wmadds wr9, wr9, wr3\n"
256 "wmadds wr3, wr8, wr3\n"
257 "waddwss wr0, wr3, wr0\n"
258 "waddwss wr5, wr9, wr5\n"
259 "\n"
260 "wstrd wr0, [%3, #16]\n"
261 "wstrd wr5, [%3, #24]\n"
262 :
263 : "r" (in), "r" (consts),
264 "r" (1 << (SBC_PROTO_FIXED8_SCALE - 1)), "r" (out),
265 "r" (SBC_PROTO_FIXED8_SCALE)
266 : "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7",
267 "wr8", "wr9", "wr10", "wr11", "wr12", "wr13", "wr14", "wr15",
268 "wcgr0", "memory");
269 }
270
sbc_analyze_4b_4s_iwmmxt(int16_t * x,int32_t * out,int out_stride)271 static inline void sbc_analyze_4b_4s_iwmmxt(int16_t *x, int32_t *out,
272 int out_stride)
273 {
274 /* Analyze blocks */
275 sbc_analyze_four_iwmmxt(x + 12, out, analysis_consts_fixed4_simd_odd);
276 out += out_stride;
277 sbc_analyze_four_iwmmxt(x + 8, out, analysis_consts_fixed4_simd_even);
278 out += out_stride;
279 sbc_analyze_four_iwmmxt(x + 4, out, analysis_consts_fixed4_simd_odd);
280 out += out_stride;
281 sbc_analyze_four_iwmmxt(x + 0, out, analysis_consts_fixed4_simd_even);
282 }
283
sbc_analyze_4b_8s_iwmmxt(int16_t * x,int32_t * out,int out_stride)284 static inline void sbc_analyze_4b_8s_iwmmxt(int16_t *x, int32_t *out,
285 int out_stride)
286 {
287 /* Analyze blocks */
288 sbc_analyze_eight_iwmmxt(x + 24, out, analysis_consts_fixed8_simd_odd);
289 out += out_stride;
290 sbc_analyze_eight_iwmmxt(x + 16, out, analysis_consts_fixed8_simd_even);
291 out += out_stride;
292 sbc_analyze_eight_iwmmxt(x + 8, out, analysis_consts_fixed8_simd_odd);
293 out += out_stride;
294 sbc_analyze_eight_iwmmxt(x + 0, out, analysis_consts_fixed8_simd_even);
295 }
296
sbc_init_primitives_iwmmxt(struct sbc_encoder_state * state)297 void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *state)
298 {
299 state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_iwmmxt;
300 state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_iwmmxt;
301 state->implementation_info = "IWMMXT";
302 }
303
304 #endif
305