• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  *  Bluetooth low-complexity, subband codec (SBC) library
4  *
5  *  Copyright (C) 2010 Keith Mok <ek9852@gmail.com>
6  *  Copyright (C) 2008-2010  Nokia Corporation
7  *  Copyright (C) 2004-2010  Marcel Holtmann <marcel@holtmann.org>
8  *  Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch>
9  *  Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com>
10  *
11  *
12  *  This library is free software; you can redistribute it and/or
13  *  modify it under the terms of the GNU Lesser General Public
14  *  License as published by the Free Software Foundation; either
15  *  version 2.1 of the License, or (at your option) any later version.
16  *
17  *  This library is distributed in the hope that it will be useful,
18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  *  Lesser General Public License for more details.
21  *
22  *  You should have received a copy of the GNU Lesser General Public
23  *  License along with this library; if not, write to the Free Software
24  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
25  *
26  */
27 
28 #include <stdint.h>
29 #include <limits.h>
30 #include "sbc.h"
31 #include "sbc_math.h"
32 #include "sbc_tables.h"
33 
34 #include "sbc_primitives_iwmmxt.h"
35 
36 /*
37  * IWMMXT optimizations
38  */
39 
40 #ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT
41 
sbc_analyze_four_iwmmxt(const int16_t * in,int32_t * out,const FIXED_T * consts)42 static inline void sbc_analyze_four_iwmmxt(const int16_t *in, int32_t *out,
43 					const FIXED_T *consts)
44 {
45 	asm volatile (
46 		"wldrd        wr0, [%0]\n"
47 		"tbcstw       wr4, %2\n"
48 		"wldrd        wr2, [%1]\n"
49 		"wldrd        wr1, [%0, #8]\n"
50 		"wldrd        wr3, [%1, #8]\n"
51 		"wmadds       wr0, wr2, wr0\n"
52 		" wldrd       wr6, [%0, #16]\n"
53 		"wmadds       wr1, wr3, wr1\n"
54 		" wldrd       wr7, [%0, #24]\n"
55 		"waddwss      wr0, wr0, wr4\n"
56 		" wldrd       wr8, [%1, #16]\n"
57 		"waddwss      wr1, wr1, wr4\n"
58 		" wldrd       wr9, [%1, #24]\n"
59 		" wmadds      wr6, wr8, wr6\n"
60 		"  wldrd      wr2, [%0, #32]\n"
61 		" wmadds      wr7, wr9, wr7\n"
62 		"  wldrd      wr3, [%0, #40]\n"
63 		" waddwss     wr0, wr6, wr0\n"
64 		"  wldrd      wr4, [%1, #32]\n"
65 		" waddwss     wr1, wr7, wr1\n"
66 		"  wldrd      wr5, [%1, #40]\n"
67 		"  wmadds     wr2, wr4, wr2\n"
68 		"wldrd        wr6, [%0, #48]\n"
69 		"  wmadds     wr3, wr5, wr3\n"
70 		"wldrd        wr7, [%0, #56]\n"
71 		"  waddwss    wr0, wr2, wr0\n"
72 		"wldrd        wr8, [%1, #48]\n"
73 		"  waddwss    wr1, wr3, wr1\n"
74 		"wldrd        wr9, [%1, #56]\n"
75 		"wmadds       wr6, wr8, wr6\n"
76 		" wldrd       wr2, [%0, #64]\n"
77 		"wmadds       wr7, wr9, wr7\n"
78 		" wldrd       wr3, [%0, #72]\n"
79 		"waddwss      wr0, wr6, wr0\n"
80 		" wldrd       wr4, [%1, #64]\n"
81 		"waddwss      wr1, wr7, wr1\n"
82 		" wldrd       wr5, [%1, #72]\n"
83 		" wmadds      wr2, wr4, wr2\n"
84 		"tmcr       wcgr0, %4\n"
85 		" wmadds      wr3, wr5, wr3\n"
86 		" waddwss     wr0, wr2, wr0\n"
87 		" waddwss     wr1, wr3, wr1\n"
88 		"\n"
89 		"wsrawg       wr0, wr0, wcgr0\n"
90 		" wldrd       wr4, [%1, #80]\n"
91 		"wsrawg       wr1, wr1, wcgr0\n"
92 		" wldrd       wr5, [%1, #88]\n"
93 		"wpackwss     wr0, wr0, wr0\n"
94 		" wldrd       wr6, [%1, #96]\n"
95 		"wpackwss     wr1, wr1, wr1\n"
96 		"wmadds       wr2, wr5, wr0\n"
97 		" wldrd       wr7, [%1, #104]\n"
98 		"wmadds       wr0, wr4, wr0\n"
99 		"\n"
100 		" wmadds      wr3, wr7, wr1\n"
101 		" wmadds      wr1, wr6, wr1\n"
102 		" waddwss     wr2, wr3, wr2\n"
103 		" waddwss     wr0, wr1, wr0\n"
104 		"\n"
105 		"wstrd        wr0, [%3]\n"
106 		"wstrd        wr2, [%3, #8]\n"
107 		:
108 		: "r" (in), "r" (consts),
109 			"r" (1 << (SBC_PROTO_FIXED4_SCALE - 1)), "r" (out),
110 			"r" (SBC_PROTO_FIXED4_SCALE)
111 		: "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7",
112 		  "wr8", "wr9", "wcgr0", "memory");
113 }
114 
sbc_analyze_eight_iwmmxt(const int16_t * in,int32_t * out,const FIXED_T * consts)115 static inline void sbc_analyze_eight_iwmmxt(const int16_t *in, int32_t *out,
116 							const FIXED_T *consts)
117 {
118 	asm volatile (
119 		"wldrd        wr0, [%0]\n"
120 		"tbcstw       wr15, %2\n"
121 		"wldrd        wr1, [%0, #8]\n"
122 		"wldrd        wr2, [%0, #16]\n"
123 		"wldrd        wr3, [%0, #24]\n"
124 		"wldrd        wr4, [%1]\n"
125 		"wldrd        wr5, [%1, #8]\n"
126 		"wldrd        wr6, [%1, #16]\n"
127 		"wldrd        wr7, [%1, #24]\n"
128 		"wmadds       wr0, wr0, wr4\n"
129 		" wldrd       wr8, [%1, #32]\n"
130 		"wmadds       wr1, wr1, wr5\n"
131 		" wldrd       wr9, [%1, #40]\n"
132 		"wmadds       wr2, wr2, wr6\n"
133 		" wldrd      wr10, [%1, #48]\n"
134 		"wmadds       wr3, wr3, wr7\n"
135 		" wldrd      wr11, [%1, #56]\n"
136 		"waddwss      wr0, wr0, wr15\n"
137 		" wldrd       wr4, [%0, #32]\n"
138 		"waddwss      wr1, wr1, wr15\n"
139 		" wldrd       wr5, [%0, #40]\n"
140 		"waddwss      wr2, wr2, wr15\n"
141 		" wldrd       wr6, [%0, #48]\n"
142 		"waddwss      wr3, wr3, wr15\n"
143 		" wldrd       wr7, [%0, #56]\n"
144 		" wmadds      wr4, wr4, wr8\n"
145 		"  wldrd     wr12, [%0, #64]\n"
146 		" wmadds      wr5, wr5, wr9\n"
147 		"  wldrd     wr13, [%0, #72]\n"
148 		" wmadds      wr6, wr6, wr10\n"
149 		"  wldrd     wr14, [%0, #80]\n"
150 		" wmadds      wr7, wr7, wr11\n"
151 		"  wldrd     wr15, [%0, #88]\n"
152 		" waddwss     wr0, wr4, wr0\n"
153 		"  wldrd      wr8, [%1, #64]\n"
154 		" waddwss     wr1, wr5, wr1\n"
155 		"  wldrd      wr9, [%1, #72]\n"
156 		" waddwss     wr2, wr6, wr2\n"
157 		"  wldrd     wr10, [%1, #80]\n"
158 		" waddwss     wr3, wr7, wr3\n"
159 		"  wldrd     wr11, [%1, #88]\n"
160 		"  wmadds    wr12, wr12, wr8\n"
161 		"wldrd        wr4, [%0, #96]\n"
162 		"  wmadds    wr13, wr13, wr9\n"
163 		"wldrd        wr5, [%0, #104]\n"
164 		"  wmadds    wr14, wr14, wr10\n"
165 		"wldrd        wr6, [%0, #112]\n"
166 		"  wmadds    wr15, wr15, wr11\n"
167 		"wldrd        wr7, [%0, #120]\n"
168 		"  waddwss    wr0, wr12, wr0\n"
169 		"wldrd        wr8, [%1, #96]\n"
170 		"  waddwss    wr1, wr13, wr1\n"
171 		"wldrd        wr9, [%1, #104]\n"
172 		"  waddwss    wr2, wr14, wr2\n"
173 		"wldrd       wr10, [%1, #112]\n"
174 		"  waddwss    wr3, wr15, wr3\n"
175 		"wldrd       wr11, [%1, #120]\n"
176 		"wmadds       wr4, wr4, wr8\n"
177 		" wldrd      wr12, [%0, #128]\n"
178 		"wmadds       wr5, wr5, wr9\n"
179 		" wldrd      wr13, [%0, #136]\n"
180 		"wmadds       wr6, wr6, wr10\n"
181 		" wldrd      wr14, [%0, #144]\n"
182 		"wmadds       wr7, wr7, wr11\n"
183 		" wldrd      wr15, [%0, #152]\n"
184 		"waddwss      wr0, wr4, wr0\n"
185 		" wldrd       wr8, [%1, #128]\n"
186 		"waddwss      wr1, wr5, wr1\n"
187 		" wldrd       wr9, [%1, #136]\n"
188 		"waddwss      wr2, wr6, wr2\n"
189 		" wldrd      wr10, [%1, #144]\n"
190 		" waddwss     wr3, wr7, wr3\n"
191 		" wldrd     wr11, [%1, #152]\n"
192 		" wmadds     wr12, wr12, wr8\n"
193 		"tmcr       wcgr0, %4\n"
194 		" wmadds     wr13, wr13, wr9\n"
195 		" wmadds     wr14, wr14, wr10\n"
196 		" wmadds     wr15, wr15, wr11\n"
197 		" waddwss     wr0, wr12, wr0\n"
198 		" waddwss     wr1, wr13, wr1\n"
199 		" waddwss     wr2, wr14, wr2\n"
200 		" waddwss     wr3, wr15, wr3\n"
201 		"\n"
202 		"wsrawg       wr0, wr0, wcgr0\n"
203 		"wsrawg       wr1, wr1, wcgr0\n"
204 		"wsrawg       wr2, wr2, wcgr0\n"
205 		"wsrawg       wr3, wr3, wcgr0\n"
206 		"\n"
207 		"wpackwss     wr0, wr0, wr0\n"
208 		"wpackwss     wr1, wr1, wr1\n"
209 		" wldrd       wr4, [%1, #160]\n"
210 		"wpackwss     wr2, wr2, wr2\n"
211 		" wldrd       wr5, [%1, #168]\n"
212 		"wpackwss     wr3, wr3, wr3\n"
213 		"  wldrd      wr6, [%1, #192]\n"
214 		" wmadds      wr4, wr4, wr0\n"
215 		"  wldrd      wr7, [%1, #200]\n"
216 		" wmadds      wr5, wr5, wr0\n"
217 		"   wldrd     wr8, [%1, #224]\n"
218 		"  wmadds     wr6, wr6, wr1\n"
219 		"   wldrd     wr9, [%1, #232]\n"
220 		"  wmadds     wr7, wr7, wr1\n"
221 		"  waddwss    wr4, wr6, wr4\n"
222 		"  waddwss    wr5, wr7, wr5\n"
223 		"   wmadds    wr8, wr8, wr2\n"
224 		"wldrd        wr6, [%1, #256]\n"
225 		"   wmadds    wr9, wr9, wr2\n"
226 		"wldrd        wr7, [%1, #264]\n"
227 		"waddwss      wr4, wr8, wr4\n"
228 		"   waddwss   wr5, wr9, wr5\n"
229 		"wmadds       wr6, wr6, wr3\n"
230 		"wmadds       wr7, wr7, wr3\n"
231 		"waddwss      wr4, wr6, wr4\n"
232 		"waddwss      wr5, wr7, wr5\n"
233 		"\n"
234 		"wstrd        wr4, [%3]\n"
235 		"wstrd        wr5, [%3, #8]\n"
236 		"\n"
237 		"wldrd        wr6, [%1, #176]\n"
238 		"wldrd        wr5, [%1, #184]\n"
239 		"wmadds       wr5, wr5, wr0\n"
240 		"wldrd        wr8, [%1, #208]\n"
241 		"wmadds       wr0, wr6, wr0\n"
242 		"wldrd        wr9, [%1, #216]\n"
243 		"wmadds       wr9, wr9, wr1\n"
244 		"wldrd        wr6, [%1, #240]\n"
245 		"wmadds       wr1, wr8, wr1\n"
246 		"wldrd        wr7, [%1, #248]\n"
247 		"waddwss      wr0, wr1, wr0\n"
248 		"waddwss      wr5, wr9, wr5\n"
249 		"wmadds       wr7, wr7, wr2\n"
250 		"wldrd        wr8, [%1, #272]\n"
251 		"wmadds       wr2, wr6, wr2\n"
252 		"wldrd        wr9, [%1, #280]\n"
253 		"waddwss      wr0, wr2, wr0\n"
254 		"waddwss      wr5, wr7, wr5\n"
255 		"wmadds       wr9, wr9, wr3\n"
256 		"wmadds       wr3, wr8, wr3\n"
257 		"waddwss      wr0, wr3, wr0\n"
258 		"waddwss      wr5, wr9, wr5\n"
259 		"\n"
260 		"wstrd        wr0, [%3, #16]\n"
261 		"wstrd        wr5, [%3, #24]\n"
262 		:
263 		: "r" (in), "r" (consts),
264 			"r" (1 << (SBC_PROTO_FIXED8_SCALE - 1)), "r" (out),
265 			"r" (SBC_PROTO_FIXED8_SCALE)
266 		: "wr0", "wr1", "wr2", "wr3", "wr4", "wr5", "wr6", "wr7",
267 		  "wr8", "wr9", "wr10", "wr11", "wr12", "wr13", "wr14", "wr15",
268 		  "wcgr0", "memory");
269 }
270 
sbc_analyze_4b_4s_iwmmxt(int16_t * x,int32_t * out,int out_stride)271 static inline void sbc_analyze_4b_4s_iwmmxt(int16_t *x, int32_t *out,
272 						int out_stride)
273 {
274 	/* Analyze blocks */
275 	sbc_analyze_four_iwmmxt(x + 12, out, analysis_consts_fixed4_simd_odd);
276 	out += out_stride;
277 	sbc_analyze_four_iwmmxt(x + 8, out, analysis_consts_fixed4_simd_even);
278 	out += out_stride;
279 	sbc_analyze_four_iwmmxt(x + 4, out, analysis_consts_fixed4_simd_odd);
280 	out += out_stride;
281 	sbc_analyze_four_iwmmxt(x + 0, out, analysis_consts_fixed4_simd_even);
282 }
283 
sbc_analyze_4b_8s_iwmmxt(int16_t * x,int32_t * out,int out_stride)284 static inline void sbc_analyze_4b_8s_iwmmxt(int16_t *x, int32_t *out,
285 						int out_stride)
286 {
287 	/* Analyze blocks */
288 	sbc_analyze_eight_iwmmxt(x + 24, out, analysis_consts_fixed8_simd_odd);
289 	out += out_stride;
290 	sbc_analyze_eight_iwmmxt(x + 16, out, analysis_consts_fixed8_simd_even);
291 	out += out_stride;
292 	sbc_analyze_eight_iwmmxt(x + 8, out, analysis_consts_fixed8_simd_odd);
293 	out += out_stride;
294 	sbc_analyze_eight_iwmmxt(x + 0, out, analysis_consts_fixed8_simd_even);
295 }
296 
sbc_init_primitives_iwmmxt(struct sbc_encoder_state * state)297 void sbc_init_primitives_iwmmxt(struct sbc_encoder_state *state)
298 {
299 	state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_iwmmxt;
300 	state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_iwmmxt;
301 	state->implementation_info = "IWMMXT";
302 }
303 
304 #endif
305