1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "libyuv/row.h"
11
12 #include <string.h> // For memcpy and memset.
13
14 #include "libyuv/basic_types.h"
15
16 #ifdef __cplusplus
17 namespace libyuv {
18 extern "C" {
19 #endif
20
21 // This module is for Mips MMI.
22 #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
23
RGB24ToARGBRow_MMI(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)24 void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24,
25 uint8_t* dst_argb,
26 int width) {
27 uint64_t src0, src1, dest;
28 const uint64_t mask = 0xff000000ULL;
29
30 __asm__ volatile(
31 "1: \n\t"
32 "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
33 "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
34 "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
35 "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
36
37 "or %[src0], %[src0], %[mask] \n\t"
38 "or %[src1], %[src1], %[mask] \n\t"
39 "punpcklwd %[dest], %[src0], %[src1] \n\t"
40 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
41 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
42
43 "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
44 "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
45 "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
46 "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
47
48 "or %[src0], %[src0], %[mask] \n\t"
49 "or %[src1], %[src1], %[mask] \n\t"
50 "punpcklwd %[dest], %[src0], %[src1] \n\t"
51 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
52 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
53
54 "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
55 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
56 "daddi %[width], %[width], -0x04 \n\t"
57 "bnez %[width], 1b \n\t"
58 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
59 : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width),
60 [mask] "f"(mask)
61 : "memory");
62 }
63
RAWToARGBRow_MMI(const uint8_t * src_raw,uint8_t * dst_argb,int width)64 void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
65 uint64_t src0, src1, dest;
66 const uint64_t mask0 = 0x0;
67 const uint64_t mask1 = 0xff000000ULL;
68 const uint64_t mask2 = 0xc6;
69
70 __asm__ volatile(
71 "1: \n\t"
72 "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
73 "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
74 "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
75 "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
76
77 "or %[src0], %[src0], %[mask1] \n\t"
78 "punpcklbh %[src0], %[src0], %[mask0] \n\t"
79 "pshufh %[src0], %[src0], %[mask2] \n\t"
80 "or %[src1], %[src1], %[mask1] \n\t"
81 "punpcklbh %[src1], %[src1], %[mask0] \n\t"
82 "pshufh %[src1], %[src1], %[mask2] \n\t"
83 "packushb %[dest], %[src0], %[src1] \n\t"
84 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
85 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
86
87 "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
88 "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
89 "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
90 "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
91
92 "or %[src0], %[src0], %[mask1] \n\t"
93 "punpcklbh %[src0], %[src0], %[mask0] \n\t"
94 "pshufh %[src0], %[src0], %[mask2] \n\t"
95 "or %[src1], %[src1], %[mask1] \n\t"
96 "punpcklbh %[src1], %[src1], %[mask0] \n\t"
97 "pshufh %[src1], %[src1], %[mask2] \n\t"
98 "packushb %[dest], %[src0], %[src1] \n\t"
99 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
100 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
101
102 "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
103 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
104 "daddi %[width], %[width], -0x04 \n\t"
105 "bnez %[width], 1b \n\t"
106 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
107 : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
108 [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width)
109 : "memory");
110 }
111
RAWToRGB24Row_MMI(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)112 void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
113 uint64_t src0, src1;
114 uint64_t ftmp[4];
115 uint64_t mask0 = 0xc6;
116 uint64_t mask1 = 0x6c;
117
118 __asm__ volatile(
119 "1: \n\t"
120 "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t"
121 "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t"
122 "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t"
123 "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t"
124
125 "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
126 "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
127 "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
128 "punpcklbh %[src1], %[src1], %[zero] \n\t"
129 "pextrh %[ftmp2], %[ftmp0], %[three] \n\t"
130 "pextrh %[ftmp3], %[ftmp1], %[one] \n\t"
131 "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
132 "pextrh %[ftmp3], %[ftmp1], %[two] \n\t"
133 "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
134 "pshufh %[src1], %[src1], %[mask1] \n\t"
135 "pextrh %[ftmp2], %[src1], %[zero] \n\t"
136 "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
137 "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t"
138 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
139 "packushb %[src1], %[src1], %[zero] \n\t"
140
141 "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t"
142 "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t"
143 "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t"
144 "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t"
145
146 "daddiu %[src_raw], %[src_raw], 0x0c \n\t"
147 "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t"
148 "daddiu %[width], %[width], -0x04 \n\t"
149 "bgtz %[width], 1b \n\t"
150 : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
151 [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3])
152 : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width),
153 [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
154 [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03)
155 : "memory");
156 }
157
RGB565ToARGBRow_MMI(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)158 void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565,
159 uint8_t* dst_argb,
160 int width) {
161 uint64_t ftmp[5];
162 uint64_t c0 = 0x001f001f001f001f;
163 uint64_t c1 = 0x00ff00ff00ff00ff;
164 uint64_t c2 = 0x0007000700070007;
165 __asm__ volatile(
166 "1: \n\t"
167 "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
168 "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
169 "psrlh %[src1], %[src0], %[eight] \n\t"
170 "and %[b], %[src0], %[c0] \n\t"
171 "and %[src0], %[src0], %[c1] \n\t"
172 "psrlh %[src0], %[src0], %[five] \n\t"
173 "and %[g], %[src1], %[c2] \n\t"
174 "psllh %[g], %[g], %[three] \n\t"
175 "or %[g], %[src0], %[g] \n\t"
176 "psrlh %[r], %[src1], %[three] \n\t"
177 "psllh %[src0], %[b], %[three] \n\t"
178 "psrlh %[src1], %[b], %[two] \n\t"
179 "or %[b], %[src0], %[src1] \n\t"
180 "psllh %[src0], %[g], %[two] \n\t"
181 "psrlh %[src1], %[g], %[four] \n\t"
182 "or %[g], %[src0], %[src1] \n\t"
183 "psllh %[src0], %[r], %[three] \n\t"
184 "psrlh %[src1], %[r], %[two] \n\t"
185 "or %[r], %[src0], %[src1] \n\t"
186 "packushb %[b], %[b], %[r] \n\t"
187 "packushb %[g], %[g], %[c1] \n\t"
188 "punpcklbh %[src0], %[b], %[g] \n\t"
189 "punpckhbh %[src1], %[b], %[g] \n\t"
190 "punpcklhw %[r], %[src0], %[src1] \n\t"
191 "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
192 "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
193 "punpckhhw %[r], %[src0], %[src1] \n\t"
194 "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
195 "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
196 "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t"
197 "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
198 "daddiu %[width], %[width], -0x04 \n\t"
199 "bgtz %[width], 1b \n\t"
200 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
201 [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4])
202 : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb),
203 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
204 [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02),
205 [four] "f"(0x04)
206 : "memory");
207 }
208
ARGB1555ToARGBRow_MMI(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)209 void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555,
210 uint8_t* dst_argb,
211 int width) {
212 uint64_t ftmp[6];
213 uint64_t c0 = 0x001f001f001f001f;
214 uint64_t c1 = 0x00ff00ff00ff00ff;
215 uint64_t c2 = 0x0003000300030003;
216 uint64_t c3 = 0x007c007c007c007c;
217 uint64_t c4 = 0x0001000100010001;
218 __asm__ volatile(
219 "1: \n\t"
220 "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
221 "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
222 "psrlh %[src1], %[src0], %[eight] \n\t"
223 "and %[b], %[src0], %[c0] \n\t"
224 "and %[src0], %[src0], %[c1] \n\t"
225 "psrlh %[src0], %[src0], %[five] \n\t"
226 "and %[g], %[src1], %[c2] \n\t"
227 "psllh %[g], %[g], %[three] \n\t"
228 "or %[g], %[src0], %[g] \n\t"
229 "and %[r], %[src1], %[c3] \n\t"
230 "psrlh %[r], %[r], %[two] \n\t"
231 "psrlh %[a], %[src1], %[seven] \n\t"
232 "psllh %[src0], %[b], %[three] \n\t"
233 "psrlh %[src1], %[b], %[two] \n\t"
234 "or %[b], %[src0], %[src1] \n\t"
235 "psllh %[src0], %[g], %[three] \n\t"
236 "psrlh %[src1], %[g], %[two] \n\t"
237 "or %[g], %[src0], %[src1] \n\t"
238 "psllh %[src0], %[r], %[three] \n\t"
239 "psrlh %[src1], %[r], %[two] \n\t"
240 "or %[r], %[src0], %[src1] \n\t"
241 "xor %[a], %[a], %[c1] \n\t"
242 "paddb %[a], %[a], %[c4] \n\t"
243 "packushb %[b], %[b], %[r] \n\t"
244 "packushb %[g], %[g], %[a] \n\t"
245 "punpcklbh %[src0], %[b], %[g] \n\t"
246 "punpckhbh %[src1], %[b], %[g] \n\t"
247 "punpcklhw %[r], %[src0], %[src1] \n\t"
248 "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
249 "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
250 "punpckhhw %[r], %[src0], %[src1] \n\t"
251 "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
252 "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
253 "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t"
254 "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
255 "daddiu %[width], %[width], -0x04 \n\t"
256 "bgtz %[width], 1b \n\t"
257 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
258 [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
259 : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb),
260 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
261 [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05),
262 [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
263 : "memory");
264 }
265
ARGB4444ToARGBRow_MMI(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)266 void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444,
267 uint8_t* dst_argb,
268 int width) {
269 uint64_t ftmp[6];
270 uint64_t c0 = 0x000f000f000f000f;
271 uint64_t c1 = 0x00ff00ff00ff00ff;
272 __asm__ volatile(
273 "1: \n\t"
274 "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
275 "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
276 "psrlh %[src1], %[src0], %[eight] \n\t"
277 "and %[b], %[src0], %[c0] \n\t"
278 "and %[src0], %[src0], %[c1] \n\t"
279 "psrlh %[g], %[src0], %[four] \n\t"
280 "and %[r], %[src1], %[c0] \n\t"
281 "psrlh %[a], %[src1], %[four] \n\t"
282 "psllh %[src0], %[b], %[four] \n\t"
283 "or %[b], %[src0], %[b] \n\t"
284 "psllh %[src0], %[g], %[four] \n\t"
285 "or %[g], %[src0], %[g] \n\t"
286 "psllh %[src0], %[r], %[four] \n\t"
287 "or %[r], %[src0], %[r] \n\t"
288 "psllh %[src0], %[a], %[four] \n\t"
289 "or %[a], %[src0], %[a] \n\t"
290 "packushb %[b], %[b], %[r] \n\t"
291 "packushb %[g], %[g], %[a] \n\t"
292 "punpcklbh %[src0], %[b], %[g] \n\t"
293 "punpckhbh %[src1], %[b], %[g] \n\t"
294 "punpcklhw %[r], %[src0], %[src1] \n\t"
295 "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
296 "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
297 "punpckhhw %[r], %[src0], %[src1] \n\t"
298 "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
299 "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
300 "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t"
301 "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
302 "daddiu %[width], %[width], -0x04 \n\t"
303 "bgtz %[width], 1b \n\t"
304 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
305 [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
306 : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb),
307 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08),
308 [four] "f"(0x04)
309 : "memory");
310 }
311
ARGBToRGB24Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)312 void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
313 uint64_t src;
314
315 __asm__ volatile(
316 "1: \n\t"
317 "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
318 "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
319 "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t"
320 "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t"
321
322 "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t"
323 "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t"
324 "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t"
325 "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t"
326
327 "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t"
328 "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t"
329 "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t"
330 "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t"
331
332 "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t"
333 "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t"
334 "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t"
335 "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t"
336
337 "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
338 "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
339 "daddi %[width], %[width], -0x04 \n\t"
340 "bnez %[width], 1b \n\t"
341 : [src] "=&f"(src)
342 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width)
343 : "memory");
344 }
345
ARGBToRAWRow_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)346 void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
347 uint64_t src0, src1;
348 uint64_t ftmp[3];
349 uint64_t mask0 = 0xc6;
350 uint64_t mask1 = 0x18;
351
352 __asm__ volatile(
353 "1: \n\t"
354 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
355 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
356 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
357 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
358
359 "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
360 "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
361 "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
362 "punpcklbh %[ftmp2], %[src1], %[zero] \n\t"
363 "punpckhbh %[src1], %[src1], %[zero] \n\t"
364
365 "pextrh %[src0], %[ftmp1], %[two] \n\t"
366 "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t"
367 "pshufh %[ftmp1], %[ftmp1], %[one] \n\t"
368
369 "pextrh %[src0], %[ftmp2], %[two] \n\t"
370 "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t"
371 "pextrh %[src0], %[ftmp2], %[one] \n\t"
372 "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t"
373 "pextrh %[src0], %[ftmp2], %[zero] \n\t"
374 "pshufh %[src1], %[src1], %[mask1] \n\t"
375 "pinsrh_0 %[src1], %[src1], %[src0] \n\t"
376 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
377 "packushb %[src1], %[src1], %[zero] \n\t"
378
379 "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t"
380 "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t"
381 "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t"
382 "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t"
383
384 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
385 "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t"
386 "daddiu %[width], %[width], -0x04 \n\t"
387 "bgtz %[width], 1b \n\t"
388 : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
389 [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2])
390 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
391 [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
392 [one] "f"(0x01), [two] "f"(0x02)
393 : "memory");
394 }
395
ARGBToRGB565Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)396 void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
397 uint64_t src0, src1;
398 uint64_t ftmp[3];
399
400 __asm__ volatile(
401 "1: \n\t"
402 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
403 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
404 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
405 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
406
407 "punpcklbh %[b], %[src0], %[src1] \n\t"
408 "punpckhbh %[g], %[src0], %[src1] \n\t"
409 "punpcklbh %[src0], %[b], %[g] \n\t"
410 "punpckhbh %[src1], %[b], %[g] \n\t"
411 "punpcklbh %[b], %[src0], %[zero] \n\t"
412 "punpckhbh %[g], %[src0], %[zero] \n\t"
413 "punpcklbh %[r], %[src1], %[zero] \n\t"
414
415 "psrlh %[b], %[b], %[three] \n\t"
416 "psrlh %[g], %[g], %[two] \n\t"
417 "psrlh %[r], %[r], %[three] \n\t"
418
419 "psllh %[g], %[g], %[five] \n\t"
420 "psllh %[r], %[r], %[eleven] \n\t"
421 "or %[b], %[b], %[g] \n\t"
422 "or %[b], %[b], %[r] \n\t"
423
424 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
425 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
426
427 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
428 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
429 "daddiu %[width], %[width], -0x04 \n\t"
430 "bgtz %[width], 1b \n\t"
431 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
432 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
433 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
434 [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05),
435 [eleven] "f"(0x0b)
436 : "memory");
437 }
438
439 // dither4 is a row of 4 values from 4x4 dither matrix.
440 // The 4x4 matrix contains values to increase RGB. When converting to
441 // fewer bits (565) this provides an ordered dither.
442 // The order in the 4x4 matrix in first byte is upper left.
443 // The 4 values are passed as an int, then referenced as an array, so
444 // endian will not affect order of the original matrix. But the dither4
445 // will containing the first pixel in the lower byte for little endian
446 // or the upper byte for big endian.
ARGBToRGB565DitherRow_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)447 void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb,
448 uint8_t* dst_rgb,
449 const uint32_t dither4,
450 int width) {
451 uint64_t src0, src1;
452 uint64_t ftmp[3];
453 uint64_t c0 = 0x00ff00ff00ff00ff;
454
455 __asm__ volatile(
456 "punpcklbh %[dither], %[dither], %[zero] \n\t"
457 "1: \n\t"
458 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
459 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
460 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
461 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
462
463 "punpcklbh %[b], %[src0], %[src1] \n\t"
464 "punpckhbh %[g], %[src0], %[src1] \n\t"
465 "punpcklbh %[src0], %[b], %[g] \n\t"
466 "punpckhbh %[src1], %[b], %[g] \n\t"
467 "punpcklbh %[b], %[src0], %[zero] \n\t"
468 "punpckhbh %[g], %[src0], %[zero] \n\t"
469 "punpcklbh %[r], %[src1], %[zero] \n\t"
470
471 "paddh %[b], %[b], %[dither] \n\t"
472 "paddh %[g], %[g], %[dither] \n\t"
473 "paddh %[r], %[r], %[dither] \n\t"
474 "pcmpgth %[src0], %[b], %[c0] \n\t"
475 "or %[src0], %[src0], %[b] \n\t"
476 "and %[b], %[src0], %[c0] \n\t"
477 "pcmpgth %[src0], %[g], %[c0] \n\t"
478 "or %[src0], %[src0], %[g] \n\t"
479 "and %[g], %[src0], %[c0] \n\t"
480 "pcmpgth %[src0], %[r], %[c0] \n\t"
481 "or %[src0], %[src0], %[r] \n\t"
482 "and %[r], %[src0], %[c0] \n\t"
483
484 "psrlh %[b], %[b], %[three] \n\t"
485 "psrlh %[g], %[g], %[two] \n\t"
486 "psrlh %[r], %[r], %[three] \n\t"
487
488 "psllh %[g], %[g], %[five] \n\t"
489 "psllh %[r], %[r], %[eleven] \n\t"
490 "or %[b], %[b], %[g] \n\t"
491 "or %[b], %[b], %[r] \n\t"
492
493 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
494 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
495
496 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
497 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
498 "daddiu %[width], %[width], -0x04 \n\t"
499 "bgtz %[width], 1b \n\t"
500 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
501 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
502 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
503 [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02),
504 [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b)
505 : "memory");
506 }
507
ARGBToARGB1555Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)508 void ARGBToARGB1555Row_MMI(const uint8_t* src_argb,
509 uint8_t* dst_rgb,
510 int width) {
511 uint64_t src0, src1;
512 uint64_t ftmp[4];
513
514 __asm__ volatile(
515 "1: \n\t"
516 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
517 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
518 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
519 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
520
521 "punpcklbh %[b], %[src0], %[src1] \n\t"
522 "punpckhbh %[g], %[src0], %[src1] \n\t"
523 "punpcklbh %[src0], %[b], %[g] \n\t"
524 "punpckhbh %[src1], %[b], %[g] \n\t"
525 "punpcklbh %[b], %[src0], %[zero] \n\t"
526 "punpckhbh %[g], %[src0], %[zero] \n\t"
527 "punpcklbh %[r], %[src1], %[zero] \n\t"
528 "punpckhbh %[a], %[src1], %[zero] \n\t"
529
530 "psrlh %[b], %[b], %[three] \n\t"
531 "psrlh %[g], %[g], %[three] \n\t"
532 "psrlh %[r], %[r], %[three] \n\t"
533 "psrlh %[a], %[a], %[seven] \n\t"
534
535 "psllh %[g], %[g], %[five] \n\t"
536 "psllh %[r], %[r], %[ten] \n\t"
537 "psllh %[a], %[a], %[fifteen] \n\t"
538 "or %[b], %[b], %[g] \n\t"
539 "or %[b], %[b], %[r] \n\t"
540 "or %[b], %[b], %[a] \n\t"
541
542 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
543 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
544
545 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
546 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
547 "daddiu %[width], %[width], -0x04 \n\t"
548 "bgtz %[width], 1b \n\t"
549 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
550 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
551 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
552 [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05),
553 [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f)
554 : "memory");
555 }
556
ARGBToARGB4444Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)557 void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
558 uint8_t* dst_rgb,
559 int width) {
560 uint64_t src0, src1;
561 uint64_t ftmp[4];
562
563 __asm__ volatile(
564 "1: \n\t"
565 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
566 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
567 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
568 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
569
570 "punpcklbh %[b], %[src0], %[src1] \n\t"
571 "punpckhbh %[g], %[src0], %[src1] \n\t"
572 "punpcklbh %[src0], %[b], %[g] \n\t"
573 "punpckhbh %[src1], %[b], %[g] \n\t"
574 "punpcklbh %[b], %[src0], %[zero] \n\t"
575 "punpckhbh %[g], %[src0], %[zero] \n\t"
576 "punpcklbh %[r], %[src1], %[zero] \n\t"
577 "punpckhbh %[a], %[src1], %[zero] \n\t"
578
579 "psrlh %[b], %[b], %[four] \n\t"
580 "psrlh %[g], %[g], %[four] \n\t"
581 "psrlh %[r], %[r], %[four] \n\t"
582 "psrlh %[a], %[a], %[four] \n\t"
583
584 "psllh %[g], %[g], %[four] \n\t"
585 "psllh %[r], %[r], %[eight] \n\t"
586 "psllh %[a], %[a], %[twelve] \n\t"
587 "or %[b], %[b], %[g] \n\t"
588 "or %[b], %[b], %[r] \n\t"
589 "or %[b], %[b], %[a] \n\t"
590
591 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
592 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
593
594 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
595 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
596 "daddiu %[width], %[width], -0x04 \n\t"
597 "bgtz %[width], 1b \n\t"
598 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
599 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
600 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
601 [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08),
602 [twelve] "f"(0x0c)
603 : "memory");
604 }
605
ARGBToYRow_MMI(const uint8_t * src_argb0,uint8_t * dst_y,int width)606 void ARGBToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
607 uint64_t src, src_hi, src_lo;
608 uint64_t dest0, dest1, dest2, dest3;
609 const uint64_t value = 0x1080;
610 const uint64_t mask = 0x0001004200810019;
611
612 __asm__ volatile(
613 "1: \n\t"
614 "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
615 "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
616 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
617 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
618 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
619 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
620 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
621 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
622 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
623 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
624 "paddw %[dest0], %[dest0], %[src] \n\t"
625 "psrlw %[dest0], %[dest0], %[eight] \n\t"
626
627 "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
628 "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
629 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
630 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
631 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
632 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
633 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
634 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
635 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
636 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
637 "paddw %[dest1], %[dest1], %[src] \n\t"
638 "psrlw %[dest1], %[dest1], %[eight] \n\t"
639
640 "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
641 "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
642 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
643 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
644 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
645 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
646 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
647 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
648 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
649 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
650 "paddw %[dest2], %[dest2], %[src] \n\t"
651 "psrlw %[dest2], %[dest2], %[eight] \n\t"
652
653 "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
654 "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
655 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
656 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
657 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
658 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
659 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
660 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
661 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
662 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
663 "paddw %[dest3], %[dest3], %[src] \n\t"
664 "psrlw %[dest3], %[dest3], %[eight] \n\t"
665
666 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
667 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
668 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
669 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
670 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
671
672 "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
673 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
674 "daddi %[width], %[width], -0x08 \n\t"
675 "bnez %[width], 1b \n\t"
676 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
677 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
678 [dest3] "=&f"(dest3)
679 : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
680 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
681 [zero] "f"(0x00)
682 : "memory");
683 }
684
ARGBToUVRow_MMI(const uint8_t * src_rgb0,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)685 void ARGBToUVRow_MMI(const uint8_t* src_rgb0,
686 int src_stride_rgb,
687 uint8_t* dst_u,
688 uint8_t* dst_v,
689 int width) {
690 uint64_t src_rgb1;
691 uint64_t ftmp[12];
692 const uint64_t value = 0x4040;
693 const uint64_t mask_u = 0x0026004a00700002;
694 const uint64_t mask_v = 0x00020070005e0012;
695
696 __asm__ volatile(
697 "1: \n\t"
698 "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
699 "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
700 "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
701 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
702 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
703 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
704 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
705 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
706 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
707 "paddh %[src0], %[src0], %[src_lo] \n\t"
708 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
709 "paddh %[src0], %[src0], %[src_hi] \n\t"
710 "psrlh %[src0], %[src0], %[two] \n\t"
711 "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
712 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
713 "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
714 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
715 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
716
717 "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
718 "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
719 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
720 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
721 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
722 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
723 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
724 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
725 "paddh %[src0], %[src0], %[src_lo] \n\t"
726 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
727 "paddh %[src0], %[src0], %[src_hi] \n\t"
728 "psrlh %[src0], %[src0], %[two] \n\t"
729 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
730 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
731 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
732 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
733 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
734
735 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
736 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
737 "psubw %[dest0_u], %[src0], %[src1] \n\t"
738 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
739 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
740 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
741 "psubw %[dest0_v], %[src1], %[src0] \n\t"
742 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
743
744 "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
745 "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
746 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
747 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
748 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
749 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
750 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
751 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
752 "paddh %[src0], %[src0], %[src_lo] \n\t"
753 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
754 "paddh %[src0], %[src0], %[src_hi] \n\t"
755 "psrlh %[src0], %[src0], %[two] \n\t"
756 "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
757 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
758 "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
759 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
760 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
761
762 "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
763 "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
764 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
765 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
766 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
767 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
768 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
769 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
770 "paddh %[src0], %[src0], %[src_lo] \n\t"
771 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
772 "paddh %[src0], %[src0], %[src_hi] \n\t"
773 "psrlh %[src0], %[src0], %[two] \n\t"
774 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
775 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
776 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
777 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
778 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
779
780 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
781 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
782 "psubw %[dest1_u], %[src0], %[src1] \n\t"
783 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
784 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
785 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
786 "psubw %[dest1_v], %[src1], %[src0] \n\t"
787 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
788
789 "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
790 "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
791 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
792 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
793 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
794 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
795 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
796 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
797 "paddh %[src0], %[src0], %[src_lo] \n\t"
798 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
799 "paddh %[src0], %[src0], %[src_hi] \n\t"
800 "psrlh %[src0], %[src0], %[two] \n\t"
801 "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
802 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
803 "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
804 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
805 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
806
807 "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
808 "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
809 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
810 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
811 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
812 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
813 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
814 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
815 "paddh %[src0], %[src0], %[src_lo] \n\t"
816 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
817 "paddh %[src0], %[src0], %[src_hi] \n\t"
818 "psrlh %[src0], %[src0], %[two] \n\t"
819 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
820 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
821 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
822 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
823 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
824
825 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
826 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
827 "psubw %[dest2_u], %[src0], %[src1] \n\t"
828 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
829 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
830 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
831 "psubw %[dest2_v], %[src1], %[src0] \n\t"
832 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
833
834 "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
835 "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
836 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
837 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
838 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
839 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
840 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
841 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
842 "paddh %[src0], %[src0], %[src_lo] \n\t"
843 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
844 "paddh %[src0], %[src0], %[src_hi] \n\t"
845 "psrlh %[src0], %[src0], %[two] \n\t"
846 "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
847 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
848 "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
849 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
850 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
851
852 "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
853 "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
854 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
855 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
856 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
857 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
858 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
859 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
860 "paddh %[src0], %[src0], %[src_lo] \n\t"
861 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
862 "paddh %[src0], %[src0], %[src_hi] \n\t"
863 "psrlh %[src0], %[src0], %[two] \n\t"
864 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
865 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
866 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
867 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
868 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
869
870 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
871 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
872 "psubw %[dest3_u], %[src0], %[src1] \n\t"
873 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
874 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
875 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
876 "psubw %[dest3_v], %[src1], %[src0] \n\t"
877 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
878
879 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
880 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
881 "packushb %[dest0_u], %[src0], %[src1] \n\t"
882 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
883 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
884
885 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
886 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
887 "packushb %[dest0_v], %[src0], %[src1] \n\t"
888 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
889 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
890
891 "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
892 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
893 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
894 "daddi %[width], %[width], -0x10 \n\t"
895 "bgtz %[width], 1b \n\t"
896 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
897 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
898 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
899 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
900 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
901 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
902 : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
903 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
904 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
905 [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
906 [sixteen] "f"(0x10)
907 : "memory");
908 }
909
BGRAToYRow_MMI(const uint8_t * src_argb0,uint8_t * dst_y,int width)910 void BGRAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
911 uint64_t src, src_hi, src_lo;
912 uint64_t dest0, dest1, dest2, dest3;
913 const uint64_t value = 0x1080;
914 const uint64_t mask = 0x0019008100420001;
915
916 __asm__ volatile(
917 "1: \n\t"
918 "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
919 "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
920 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
921 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
922 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
923 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
924 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
925 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
926 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
927 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
928 "paddw %[dest0], %[dest0], %[src] \n\t"
929 "psrlw %[dest0], %[dest0], %[eight] \n\t"
930
931 "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
932 "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
933 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
934 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
935 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
936 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
937 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
938 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
939 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
940 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
941 "paddw %[dest1], %[dest1], %[src] \n\t"
942 "psrlw %[dest1], %[dest1], %[eight] \n\t"
943
944 "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
945 "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
946 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
947 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
948 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
949 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
950 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
951 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
952 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
953 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
954 "paddw %[dest2], %[dest2], %[src] \n\t"
955 "psrlw %[dest2], %[dest2], %[eight] \n\t"
956
957 "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
958 "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
959 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
960 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
961 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
962 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
963 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
964 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
965 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
966 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
967 "paddw %[dest3], %[dest3], %[src] \n\t"
968 "psrlw %[dest3], %[dest3], %[eight] \n\t"
969
970 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
971 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
972 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
973 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
974 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
975
976 "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
977 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
978 "daddi %[width], %[width], -0x08 \n\t"
979 "bnez %[width], 1b \n\t"
980 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
981 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
982 [dest3] "=&f"(dest3)
983 : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
984 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
985 [zero] "f"(0x00)
986 : "memory");
987 }
988
BGRAToUVRow_MMI(const uint8_t * src_rgb0,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)989 void BGRAToUVRow_MMI(const uint8_t* src_rgb0,
990 int src_stride_rgb,
991 uint8_t* dst_u,
992 uint8_t* dst_v,
993 int width) {
994 uint64_t src_rgb1;
995 uint64_t ftmp[12];
996 const uint64_t value = 0x4040;
997 const uint64_t mask_u = 0x00020070004a0026;
998 const uint64_t mask_v = 0x0012005e00700002;
999
1000 __asm__ volatile(
1001 "1: \n\t"
1002 "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
1003 "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
1004 "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
1005 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1006 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1007 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1008 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1009 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1010 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1011 "paddh %[src0], %[src0], %[src_lo] \n\t"
1012 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1013 "paddh %[src0], %[src0], %[src_hi] \n\t"
1014 "psrlh %[src0], %[src0], %[two] \n\t"
1015 "dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
1016 "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
1017 "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
1018 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1019 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1020
1021 "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
1022 "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
1023 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
1024 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
1025 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1026 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1027 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1028 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1029 "paddh %[src0], %[src0], %[src_lo] \n\t"
1030 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1031 "paddh %[src0], %[src0], %[src_hi] \n\t"
1032 "psrlh %[src0], %[src0], %[two] \n\t"
1033 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1034 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1035 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1036 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1037 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1038
1039 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
1040 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
1041 "psubw %[dest0_u], %[src1], %[src0] \n\t"
1042 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
1043 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
1044 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
1045 "psubw %[dest0_v], %[src0], %[src1] \n\t"
1046 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
1047
1048 "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
1049 "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
1050 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
1051 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
1052 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1053 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1054 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1055 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1056 "paddh %[src0], %[src0], %[src_lo] \n\t"
1057 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1058 "paddh %[src0], %[src0], %[src_hi] \n\t"
1059 "psrlh %[src0], %[src0], %[two] \n\t"
1060 "dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
1061 "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
1062 "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
1063 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
1064 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
1065
1066 "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
1067 "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
1068 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
1069 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
1070 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1071 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1072 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1073 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1074 "paddh %[src0], %[src0], %[src_lo] \n\t"
1075 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1076 "paddh %[src0], %[src0], %[src_hi] \n\t"
1077 "psrlh %[src0], %[src0], %[two] \n\t"
1078 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1079 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1080 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1081 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1082 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1083
1084 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
1085 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
1086 "psubw %[dest1_u], %[src1], %[src0] \n\t"
1087 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
1088 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
1089 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
1090 "psubw %[dest1_v], %[src0], %[src1] \n\t"
1091 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
1092
1093 "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
1094 "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
1095 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
1096 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
1097 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1098 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1099 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1100 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1101 "paddh %[src0], %[src0], %[src_lo] \n\t"
1102 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1103 "paddh %[src0], %[src0], %[src_hi] \n\t"
1104 "psrlh %[src0], %[src0], %[two] \n\t"
1105 "dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
1106 "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
1107 "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
1108 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
1109 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
1110
1111 "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
1112 "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
1113 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
1114 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
1115 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1116 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1117 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1118 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1119 "paddh %[src0], %[src0], %[src_lo] \n\t"
1120 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1121 "paddh %[src0], %[src0], %[src_hi] \n\t"
1122 "psrlh %[src0], %[src0], %[two] \n\t"
1123 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1124 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1125 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1126 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1127 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1128
1129 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
1130 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
1131 "psubw %[dest2_u], %[src1], %[src0] \n\t"
1132 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
1133 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
1134 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
1135 "psubw %[dest2_v], %[src0], %[src1] \n\t"
1136 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
1137
1138 "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
1139 "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
1140 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
1141 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
1142 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1143 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1144 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1145 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1146 "paddh %[src0], %[src0], %[src_lo] \n\t"
1147 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1148 "paddh %[src0], %[src0], %[src_hi] \n\t"
1149 "psrlh %[src0], %[src0], %[two] \n\t"
1150 "dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
1151 "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
1152 "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
1153 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
1154 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
1155
1156 "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
1157 "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
1158 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
1159 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
1160 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1161 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1162 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1163 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1164 "paddh %[src0], %[src0], %[src_lo] \n\t"
1165 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1166 "paddh %[src0], %[src0], %[src_hi] \n\t"
1167 "psrlh %[src0], %[src0], %[two] \n\t"
1168 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1169 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1170 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1171 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1172 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1173
1174 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
1175 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
1176 "psubw %[dest3_u], %[src1], %[src0] \n\t"
1177 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
1178 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
1179 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
1180 "psubw %[dest3_v], %[src0], %[src1] \n\t"
1181 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
1182
1183 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
1184 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
1185 "packushb %[dest0_u], %[src0], %[src1] \n\t"
1186 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
1187 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
1188
1189 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
1190 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
1191 "packushb %[dest0_v], %[src0], %[src1] \n\t"
1192 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
1193 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
1194
1195 "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
1196 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
1197 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
1198 "daddi %[width], %[width], -0x10 \n\t"
1199 "bgtz %[width], 1b \n\t"
1200 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
1201 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
1202 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
1203 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
1204 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
1205 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
1206 : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
1207 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
1208 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
1209 [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
1210 [sixteen] "f"(0x10)
1211 : "memory");
1212 }
1213
ABGRToYRow_MMI(const uint8_t * src_argb0,uint8_t * dst_y,int width)1214 void ABGRToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
1215 uint64_t src, src_hi, src_lo;
1216 uint64_t dest0, dest1, dest2, dest3;
1217 const uint64_t value = 0x1080;
1218 const uint64_t mask = 0x0001001900810042;
1219
1220 __asm__ volatile(
1221 "1: \n\t"
1222 "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
1223 "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
1224 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1225 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1226 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1227 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1228 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1229 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1230 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1231 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
1232 "paddw %[dest0], %[dest0], %[src] \n\t"
1233 "psrlw %[dest0], %[dest0], %[eight] \n\t"
1234
1235 "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
1236 "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
1237 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1238 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1239 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1240 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1241 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1242 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1243 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1244 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
1245 "paddw %[dest1], %[dest1], %[src] \n\t"
1246 "psrlw %[dest1], %[dest1], %[eight] \n\t"
1247
1248 "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
1249 "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
1250 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1251 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1252 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1253 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1254 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1255 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1256 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1257 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
1258 "paddw %[dest2], %[dest2], %[src] \n\t"
1259 "psrlw %[dest2], %[dest2], %[eight] \n\t"
1260
1261 "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
1262 "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
1263 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1264 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1265 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1266 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1267 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1268 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1269 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1270 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
1271 "paddw %[dest3], %[dest3], %[src] \n\t"
1272 "psrlw %[dest3], %[dest3], %[eight] \n\t"
1273
1274 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
1275 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
1276 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
1277 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
1278 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
1279
1280 "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
1281 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
1282 "daddi %[width], %[width], -0x08 \n\t"
1283 "bnez %[width], 1b \n\t"
1284 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
1285 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
1286 [dest3] "=&f"(dest3)
1287 : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
1288 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
1289 [zero] "f"(0x00)
1290 : "memory");
1291 }
1292
ABGRToUVRow_MMI(const uint8_t * src_rgb0,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)1293 void ABGRToUVRow_MMI(const uint8_t* src_rgb0,
1294 int src_stride_rgb,
1295 uint8_t* dst_u,
1296 uint8_t* dst_v,
1297 int width) {
1298 uint64_t src_rgb1;
1299 uint64_t ftmp[12];
1300 const uint64_t value = 0x4040;
1301 const uint64_t mask_u = 0x00020070004a0026;
1302 const uint64_t mask_v = 0x0012005e00700002;
1303
1304 __asm__ volatile(
1305 "1: \n\t"
1306 "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
1307 "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
1308 "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
1309 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1310 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1311 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1312 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1313 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1314 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1315 "paddh %[src0], %[src0], %[src_lo] \n\t"
1316 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1317 "paddh %[src0], %[src0], %[src_hi] \n\t"
1318 "psrlh %[src0], %[src0], %[two] \n\t"
1319 "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
1320 "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
1321 "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
1322 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1323 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1324
1325 "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
1326 "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
1327 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
1328 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
1329 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1330 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1331 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1332 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1333 "paddh %[src0], %[src0], %[src_lo] \n\t"
1334 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1335 "paddh %[src0], %[src0], %[src_hi] \n\t"
1336 "psrlh %[src0], %[src0], %[two] \n\t"
1337 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1338 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1339 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1340 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1341 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1342
1343 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
1344 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
1345 "psubw %[dest0_u], %[src1], %[src0] \n\t"
1346 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
1347 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
1348 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
1349 "psubw %[dest0_v], %[src0], %[src1] \n\t"
1350 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
1351
1352 "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
1353 "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
1354 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
1355 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
1356 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1357 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1358 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1359 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1360 "paddh %[src0], %[src0], %[src_lo] \n\t"
1361 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1362 "paddh %[src0], %[src0], %[src_hi] \n\t"
1363 "psrlh %[src0], %[src0], %[two] \n\t"
1364 "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
1365 "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
1366 "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
1367 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
1368 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
1369
1370 "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
1371 "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
1372 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
1373 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
1374 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1375 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1376 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1377 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1378 "paddh %[src0], %[src0], %[src_lo] \n\t"
1379 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1380 "paddh %[src0], %[src0], %[src_hi] \n\t"
1381 "psrlh %[src0], %[src0], %[two] \n\t"
1382 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1383 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1384 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1385 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1386 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1387
1388 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
1389 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
1390 "psubw %[dest1_u], %[src1], %[src0] \n\t"
1391 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
1392 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
1393 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
1394 "psubw %[dest1_v], %[src0], %[src1] \n\t"
1395 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
1396
1397 "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
1398 "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
1399 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
1400 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
1401 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1402 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1403 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1404 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1405 "paddh %[src0], %[src0], %[src_lo] \n\t"
1406 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1407 "paddh %[src0], %[src0], %[src_hi] \n\t"
1408 "psrlh %[src0], %[src0], %[two] \n\t"
1409 "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
1410 "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
1411 "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
1412 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
1413 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
1414
1415 "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
1416 "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
1417 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
1418 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
1419 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1420 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1421 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1422 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1423 "paddh %[src0], %[src0], %[src_lo] \n\t"
1424 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1425 "paddh %[src0], %[src0], %[src_hi] \n\t"
1426 "psrlh %[src0], %[src0], %[two] \n\t"
1427 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1428 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1429 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1430 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1431 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1432
1433 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
1434 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
1435 "psubw %[dest2_u], %[src1], %[src0] \n\t"
1436 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
1437 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
1438 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
1439 "psubw %[dest2_v], %[src0], %[src1] \n\t"
1440 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
1441
1442 "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
1443 "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
1444 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
1445 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
1446 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1447 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1448 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1449 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1450 "paddh %[src0], %[src0], %[src_lo] \n\t"
1451 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1452 "paddh %[src0], %[src0], %[src_hi] \n\t"
1453 "psrlh %[src0], %[src0], %[two] \n\t"
1454 "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
1455 "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
1456 "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
1457 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
1458 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
1459
1460 "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
1461 "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
1462 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
1463 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
1464 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1465 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1466 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1467 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1468 "paddh %[src0], %[src0], %[src_lo] \n\t"
1469 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1470 "paddh %[src0], %[src0], %[src_hi] \n\t"
1471 "psrlh %[src0], %[src0], %[two] \n\t"
1472 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1473 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1474 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1475 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1476 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1477
1478 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
1479 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
1480 "psubw %[dest3_u], %[src1], %[src0] \n\t"
1481 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
1482 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
1483 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
1484 "psubw %[dest3_v], %[src0], %[src1] \n\t"
1485 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
1486
1487 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
1488 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
1489 "packushb %[dest0_u], %[src0], %[src1] \n\t"
1490 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
1491 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
1492
1493 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
1494 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
1495 "packushb %[dest0_v], %[src0], %[src1] \n\t"
1496 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
1497 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
1498
1499 "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
1500 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
1501 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
1502 "daddi %[width], %[width], -0x10 \n\t"
1503 "bgtz %[width], 1b \n\t"
1504 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
1505 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
1506 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
1507 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
1508 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
1509 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
1510 : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
1511 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
1512 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
1513 [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
1514 [sixteen] "f"(0x10)
1515 : "memory");
1516 }
1517
RGBAToYRow_MMI(const uint8_t * src_argb0,uint8_t * dst_y,int width)1518 void RGBAToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
1519 uint64_t src, src_hi, src_lo;
1520 uint64_t dest0, dest1, dest2, dest3;
1521 const uint64_t value = 0x1080;
1522 const uint64_t mask = 0x0042008100190001;
1523
1524 __asm__ volatile(
1525 "1: \n\t"
1526 "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
1527 "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
1528 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1529 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1530 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1531 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1532 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1533 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1534 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1535 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
1536 "paddw %[dest0], %[dest0], %[src] \n\t"
1537 "psrlw %[dest0], %[dest0], %[eight] \n\t"
1538
1539 "gsldlc1 %[src], 0x0f(%[src_argb0]) \n\t"
1540 "gsldrc1 %[src], 0x08(%[src_argb0]) \n\t"
1541 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1542 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1543 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1544 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1545 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1546 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1547 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1548 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
1549 "paddw %[dest1], %[dest1], %[src] \n\t"
1550 "psrlw %[dest1], %[dest1], %[eight] \n\t"
1551
1552 "gsldlc1 %[src], 0x17(%[src_argb0]) \n\t"
1553 "gsldrc1 %[src], 0x10(%[src_argb0]) \n\t"
1554 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1555 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1556 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1557 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1558 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1559 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1560 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1561 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
1562 "paddw %[dest2], %[dest2], %[src] \n\t"
1563 "psrlw %[dest2], %[dest2], %[eight] \n\t"
1564
1565 "gsldlc1 %[src], 0x1f(%[src_argb0]) \n\t"
1566 "gsldrc1 %[src], 0x18(%[src_argb0]) \n\t"
1567 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1568 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1569 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1570 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1571 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1572 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1573 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1574 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
1575 "paddw %[dest3], %[dest3], %[src] \n\t"
1576 "psrlw %[dest3], %[dest3], %[eight] \n\t"
1577
1578 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
1579 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
1580 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
1581 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
1582 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
1583
1584 "daddiu %[src_argb0], %[src_argb0], 0x20 \n\t"
1585 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
1586 "daddi %[width], %[width], -0x08 \n\t"
1587 "bnez %[width], 1b \n\t"
1588 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
1589 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
1590 [dest3] "=&f"(dest3)
1591 : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
1592 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
1593 [zero] "f"(0x00)
1594 : "memory");
1595 }
1596
RGBAToUVRow_MMI(const uint8_t * src_rgb0,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)1597 void RGBAToUVRow_MMI(const uint8_t* src_rgb0,
1598 int src_stride_rgb,
1599 uint8_t* dst_u,
1600 uint8_t* dst_v,
1601 int width) {
1602 uint64_t src_rgb1;
1603 uint64_t ftmp[12];
1604 const uint64_t value = 0x4040;
1605 const uint64_t mask_u = 0x0026004a00700002;
1606 const uint64_t mask_v = 0x00020070005e0012;
1607
1608 __asm__ volatile(
1609 "1: \n\t"
1610 "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
1611 "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
1612 "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
1613 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1614 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1615 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1616 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1617 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1618 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1619 "paddh %[src0], %[src0], %[src_lo] \n\t"
1620 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1621 "paddh %[src0], %[src0], %[src_hi] \n\t"
1622 "psrlh %[src0], %[src0], %[two] \n\t"
1623 "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
1624 "dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
1625 "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
1626 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1627 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1628
1629 "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
1630 "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
1631 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
1632 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
1633 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1634 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1635 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1636 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1637 "paddh %[src0], %[src0], %[src_lo] \n\t"
1638 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1639 "paddh %[src0], %[src0], %[src_hi] \n\t"
1640 "psrlh %[src0], %[src0], %[two] \n\t"
1641 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1642 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1643 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1644 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1645 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1646
1647 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
1648 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
1649 "psubw %[dest0_u], %[src0], %[src1] \n\t"
1650 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
1651 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
1652 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
1653 "psubw %[dest0_v], %[src1], %[src0] \n\t"
1654 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
1655
1656 "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
1657 "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
1658 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
1659 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
1660 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1661 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1662 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1663 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1664 "paddh %[src0], %[src0], %[src_lo] \n\t"
1665 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1666 "paddh %[src0], %[src0], %[src_hi] \n\t"
1667 "psrlh %[src0], %[src0], %[two] \n\t"
1668 "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
1669 "dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
1670 "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
1671 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
1672 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
1673
1674 "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
1675 "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
1676 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
1677 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
1678 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1679 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1680 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1681 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1682 "paddh %[src0], %[src0], %[src_lo] \n\t"
1683 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1684 "paddh %[src0], %[src0], %[src_hi] \n\t"
1685 "psrlh %[src0], %[src0], %[two] \n\t"
1686 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1687 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1688 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1689 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1690 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1691
1692 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
1693 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
1694 "psubw %[dest1_u], %[src0], %[src1] \n\t"
1695 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
1696 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
1697 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
1698 "psubw %[dest1_v], %[src1], %[src0] \n\t"
1699 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
1700
1701 "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
1702 "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
1703 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
1704 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
1705 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1706 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1707 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1708 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1709 "paddh %[src0], %[src0], %[src_lo] \n\t"
1710 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1711 "paddh %[src0], %[src0], %[src_hi] \n\t"
1712 "psrlh %[src0], %[src0], %[two] \n\t"
1713 "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
1714 "dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
1715 "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
1716 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
1717 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
1718
1719 "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
1720 "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
1721 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
1722 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
1723 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1724 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1725 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1726 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1727 "paddh %[src0], %[src0], %[src_lo] \n\t"
1728 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1729 "paddh %[src0], %[src0], %[src_hi] \n\t"
1730 "psrlh %[src0], %[src0], %[two] \n\t"
1731 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1732 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1733 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1734 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1735 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1736
1737 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
1738 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
1739 "psubw %[dest2_u], %[src0], %[src1] \n\t"
1740 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
1741 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
1742 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
1743 "psubw %[dest2_v], %[src1], %[src0] \n\t"
1744 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
1745
1746 "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
1747 "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
1748 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
1749 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
1750 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1751 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1752 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1753 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1754 "paddh %[src0], %[src0], %[src_lo] \n\t"
1755 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1756 "paddh %[src0], %[src0], %[src_hi] \n\t"
1757 "psrlh %[src0], %[src0], %[two] \n\t"
1758 "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
1759 "dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
1760 "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
1761 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
1762 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
1763
1764 "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
1765 "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
1766 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
1767 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
1768 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1769 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1770 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1771 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1772 "paddh %[src0], %[src0], %[src_lo] \n\t"
1773 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1774 "paddh %[src0], %[src0], %[src_hi] \n\t"
1775 "psrlh %[src0], %[src0], %[two] \n\t"
1776 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1777 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1778 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1779 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1780 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1781
1782 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
1783 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
1784 "psubw %[dest3_u], %[src0], %[src1] \n\t"
1785 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
1786 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
1787 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
1788 "psubw %[dest3_v], %[src1], %[src0] \n\t"
1789 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
1790
1791 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
1792 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
1793 "packushb %[dest0_u], %[src0], %[src1] \n\t"
1794 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
1795 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
1796
1797 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
1798 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
1799 "packushb %[dest0_v], %[src0], %[src1] \n\t"
1800 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
1801 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
1802
1803 "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
1804 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
1805 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
1806 "daddi %[width], %[width], -0x10 \n\t"
1807 "bgtz %[width], 1b \n\t"
1808 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
1809 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
1810 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
1811 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
1812 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
1813 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
1814 : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
1815 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
1816 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
1817 [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
1818 [sixteen] "f"(0x10)
1819 : "memory");
1820 }
1821
RGB24ToYRow_MMI(const uint8_t * src_argb0,uint8_t * dst_y,int width)1822 void RGB24ToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
1823 uint64_t src, src_hi, src_lo;
1824 uint64_t dest0, dest1, dest2, dest3;
1825 const uint64_t value = 0x1080;
1826 const uint64_t mask = 0x0001004200810019;
1827
1828 __asm__ volatile(
1829 "1: \n\t"
1830 "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
1831 "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
1832 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1833 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1834 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1835 "dsll %[src], %[src], %[eight] \n\t"
1836 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1837 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1838 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1839 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1840 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
1841 "paddw %[dest0], %[dest0], %[src] \n\t"
1842 "psrlw %[dest0], %[dest0], %[eight] \n\t"
1843
1844 "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
1845 "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
1846 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1847 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1848 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1849 "dsll %[src], %[src], %[eight] \n\t"
1850 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1851 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1852 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1853 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1854 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
1855 "paddw %[dest1], %[dest1], %[src] \n\t"
1856 "psrlw %[dest1], %[dest1], %[eight] \n\t"
1857
1858 "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
1859 "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
1860 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1861 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1862 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1863 "dsll %[src], %[src], %[eight] \n\t"
1864 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1865 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1866 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1867 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1868 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
1869 "paddw %[dest2], %[dest2], %[src] \n\t"
1870 "psrlw %[dest2], %[dest2], %[eight] \n\t"
1871
1872 "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
1873 "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
1874 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1875 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1876 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1877 "dsll %[src], %[src], %[eight] \n\t"
1878 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1879 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1880 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1881 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1882 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
1883 "paddw %[dest3], %[dest3], %[src] \n\t"
1884 "psrlw %[dest3], %[dest3], %[eight] \n\t"
1885
1886 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
1887 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
1888 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
1889 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
1890 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
1891
1892 "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
1893 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
1894 "daddi %[width], %[width], -0x08 \n\t"
1895 "bnez %[width], 1b \n\t"
1896 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
1897 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
1898 [dest3] "=&f"(dest3)
1899 : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
1900 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
1901 [zero] "f"(0x00)
1902 : "memory");
1903 }
1904
RGB24ToUVRow_MMI(const uint8_t * src_rgb0,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)1905 void RGB24ToUVRow_MMI(const uint8_t* src_rgb0,
1906 int src_stride_rgb,
1907 uint8_t* dst_u,
1908 uint8_t* dst_v,
1909 int width) {
1910 uint64_t src_rgb1;
1911 uint64_t ftmp[12];
1912 const uint64_t value = 0x4040;
1913 const uint64_t mask_u = 0x0026004a00700002;
1914 const uint64_t mask_v = 0x00020070005e0012;
1915
1916 __asm__ volatile(
1917 "1: \n\t"
1918 "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
1919 "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
1920 "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
1921 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1922 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1923 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1924 "dsll %[src0], %[src0], %[eight] \n\t"
1925 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1926 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1927 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1928 "paddh %[src0], %[src0], %[src_lo] \n\t"
1929 "dsll %[src1], %[src1], %[eight] \n\t"
1930 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1931 "paddh %[src0], %[src0], %[src_hi] \n\t"
1932 "psrlh %[src0], %[src0], %[two] \n\t"
1933 "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
1934 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
1935 "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
1936 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1937 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1938
1939 "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
1940 "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
1941 "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
1942 "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
1943 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1944 "dsll %[src0], %[src0], %[eight] \n\t"
1945 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1946 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1947 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1948 "paddh %[src0], %[src0], %[src_lo] \n\t"
1949 "dsll %[src1], %[src1], %[eight] \n\t"
1950 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1951 "paddh %[src0], %[src0], %[src_hi] \n\t"
1952 "psrlh %[src0], %[src0], %[two] \n\t"
1953 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
1954 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1955 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
1956 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1957 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1958
1959 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
1960 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
1961 "psubw %[dest0_u], %[src0], %[src1] \n\t"
1962 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
1963 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
1964 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
1965 "psubw %[dest0_v], %[src1], %[src0] \n\t"
1966 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
1967
1968 "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
1969 "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
1970 "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
1971 "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
1972 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1973 "dsll %[src0], %[src0], %[eight] \n\t"
1974 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1975 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1976 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1977 "paddh %[src0], %[src0], %[src_lo] \n\t"
1978 "dsll %[src1], %[src1], %[eight] \n\t"
1979 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1980 "paddh %[src0], %[src0], %[src_hi] \n\t"
1981 "psrlh %[src0], %[src0], %[two] \n\t"
1982 "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
1983 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
1984 "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
1985 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
1986 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
1987
1988 "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
1989 "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
1990 "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
1991 "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
1992 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1993 "dsll %[src0], %[src0], %[eight] \n\t"
1994 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1995 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1996 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1997 "paddh %[src0], %[src0], %[src_lo] \n\t"
1998 "dsll %[src1], %[src1], %[eight] \n\t"
1999 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2000 "paddh %[src0], %[src0], %[src_hi] \n\t"
2001 "psrlh %[src0], %[src0], %[two] \n\t"
2002 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2003 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2004 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2005 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2006 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2007
2008 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
2009 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
2010 "psubw %[dest1_u], %[src0], %[src1] \n\t"
2011 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
2012 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
2013 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
2014 "psubw %[dest1_v], %[src1], %[src0] \n\t"
2015 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
2016
2017 "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
2018 "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
2019 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
2020 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
2021 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2022 "dsll %[src0], %[src0], %[eight] \n\t"
2023 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2024 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2025 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2026 "paddh %[src0], %[src0], %[src_lo] \n\t"
2027 "dsll %[src1], %[src1], %[eight] \n\t"
2028 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2029 "paddh %[src0], %[src0], %[src_hi] \n\t"
2030 "psrlh %[src0], %[src0], %[two] \n\t"
2031 "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
2032 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
2033 "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
2034 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
2035 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
2036
2037 "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
2038 "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
2039 "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
2040 "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
2041 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2042 "dsll %[src0], %[src0], %[eight] \n\t"
2043 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2044 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2045 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2046 "paddh %[src0], %[src0], %[src_lo] \n\t"
2047 "dsll %[src1], %[src1], %[eight] \n\t"
2048 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2049 "paddh %[src0], %[src0], %[src_hi] \n\t"
2050 "psrlh %[src0], %[src0], %[two] \n\t"
2051 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2052 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2053 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2054 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2055 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2056
2057 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
2058 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
2059 "psubw %[dest2_u], %[src0], %[src1] \n\t"
2060 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
2061 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
2062 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
2063 "psubw %[dest2_v], %[src1], %[src0] \n\t"
2064 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
2065
2066 "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
2067 "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
2068 "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
2069 "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
2070 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2071 "dsll %[src0], %[src0], %[eight] \n\t"
2072 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2073 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2074 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2075 "paddh %[src0], %[src0], %[src_lo] \n\t"
2076 "dsll %[src1], %[src1], %[eight] \n\t"
2077 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2078 "paddh %[src0], %[src0], %[src_hi] \n\t"
2079 "psrlh %[src0], %[src0], %[two] \n\t"
2080 "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
2081 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
2082 "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
2083 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
2084 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
2085
2086 "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
2087 "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
2088 "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
2089 "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
2090 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2091 "dsll %[src0], %[src0], %[eight] \n\t"
2092 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2093 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2094 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2095 "paddh %[src0], %[src0], %[src_lo] \n\t"
2096 "dsll %[src1], %[src1], %[eight] \n\t"
2097 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2098 "paddh %[src0], %[src0], %[src_hi] \n\t"
2099 "psrlh %[src0], %[src0], %[two] \n\t"
2100 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2101 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2102 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2103 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2104 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2105
2106 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
2107 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
2108 "psubw %[dest3_u], %[src0], %[src1] \n\t"
2109 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
2110 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
2111 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
2112 "psubw %[dest3_v], %[src1], %[src0] \n\t"
2113 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
2114
2115 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
2116 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
2117 "packushb %[dest0_u], %[src0], %[src1] \n\t"
2118 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
2119 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
2120
2121 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
2122 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
2123 "packushb %[dest0_v], %[src0], %[src1] \n\t"
2124 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
2125 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
2126
2127 "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
2128 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
2129 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
2130 "daddi %[width], %[width], -0x10 \n\t"
2131 "bgtz %[width], 1b \n\t"
2132 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
2133 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
2134 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
2135 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
2136 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
2137 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
2138 : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
2139 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
2140 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
2141 [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
2142 [sixteen] "f"(0x10)
2143 : "memory");
2144 }
2145
RAWToYRow_MMI(const uint8_t * src_argb0,uint8_t * dst_y,int width)2146 void RAWToYRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
2147 uint64_t src, src_hi, src_lo;
2148 uint64_t dest0, dest1, dest2, dest3;
2149 const uint64_t value = 0x1080;
2150 const uint64_t mask = 0x0001001900810042;
2151
2152 __asm__ volatile(
2153 "1: \n\t"
2154 "gsldlc1 %[src], 0x07(%[src_argb0]) \n\t"
2155 "gsldrc1 %[src], 0x00(%[src_argb0]) \n\t"
2156 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2157 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2158 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2159 "dsll %[src], %[src], %[eight] \n\t"
2160 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2161 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2162 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2163 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2164 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
2165 "paddw %[dest0], %[dest0], %[src] \n\t"
2166 "psrlw %[dest0], %[dest0], %[eight] \n\t"
2167
2168 "gsldlc1 %[src], 0x0d(%[src_argb0]) \n\t"
2169 "gsldrc1 %[src], 0x06(%[src_argb0]) \n\t"
2170 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2171 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2172 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2173 "dsll %[src], %[src], %[eight] \n\t"
2174 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2175 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2176 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2177 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2178 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
2179 "paddw %[dest1], %[dest1], %[src] \n\t"
2180 "psrlw %[dest1], %[dest1], %[eight] \n\t"
2181
2182 "gsldlc1 %[src], 0x13(%[src_argb0]) \n\t"
2183 "gsldrc1 %[src], 0x0c(%[src_argb0]) \n\t"
2184 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2185 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2186 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2187 "dsll %[src], %[src], %[eight] \n\t"
2188 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2189 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2190 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2191 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2192 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
2193 "paddw %[dest2], %[dest2], %[src] \n\t"
2194 "psrlw %[dest2], %[dest2], %[eight] \n\t"
2195
2196 "gsldlc1 %[src], 0x19(%[src_argb0]) \n\t"
2197 "gsldrc1 %[src], 0x12(%[src_argb0]) \n\t"
2198 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2199 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2200 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2201 "dsll %[src], %[src], %[eight] \n\t"
2202 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2203 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2204 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2205 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2206 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
2207 "paddw %[dest3], %[dest3], %[src] \n\t"
2208 "psrlw %[dest3], %[dest3], %[eight] \n\t"
2209
2210 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
2211 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
2212 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
2213 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
2214 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
2215
2216 "daddiu %[src_argb0], %[src_argb0], 0x18 \n\t"
2217 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
2218 "daddi %[width], %[width], -0x08 \n\t"
2219 "bnez %[width], 1b \n\t"
2220 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
2221 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
2222 [dest3] "=&f"(dest3)
2223 : [src_argb0] "r"(src_argb0), [dst_y] "r"(dst_y), [width] "r"(width),
2224 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
2225 [zero] "f"(0x00)
2226 : "memory");
2227 }
2228
RAWToUVRow_MMI(const uint8_t * src_rgb0,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)2229 void RAWToUVRow_MMI(const uint8_t* src_rgb0,
2230 int src_stride_rgb,
2231 uint8_t* dst_u,
2232 uint8_t* dst_v,
2233 int width) {
2234 uint64_t src_rgb1;
2235 uint64_t ftmp[12];
2236 const uint64_t value = 0x4040;
2237 const uint64_t mask_u = 0x00020070004a0026;
2238 const uint64_t mask_v = 0x0012005e00700002;
2239
2240 __asm__ volatile(
2241 "1: \n\t"
2242 "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
2243 "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
2244 "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
2245 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
2246 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
2247 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2248 "dsll %[src0], %[src0], %[eight] \n\t"
2249 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2250 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2251 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2252 "paddh %[src0], %[src0], %[src_lo] \n\t"
2253 "dsll %[src1], %[src1], %[eight] \n\t"
2254 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2255 "paddh %[src0], %[src0], %[src_hi] \n\t"
2256 "psrlh %[src0], %[src0], %[two] \n\t"
2257 "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
2258 "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
2259 "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
2260 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
2261 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
2262
2263 "gsldrc1 %[src0], 0x06(%[src_rgb0]) \n\t"
2264 "gsldlc1 %[src0], 0x0d(%[src_rgb0]) \n\t"
2265 "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
2266 "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
2267 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2268 "dsll %[src0], %[src0], %[eight] \n\t"
2269 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2270 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2271 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2272 "paddh %[src0], %[src0], %[src_lo] \n\t"
2273 "dsll %[src1], %[src1], %[eight] \n\t"
2274 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2275 "paddh %[src0], %[src0], %[src_hi] \n\t"
2276 "psrlh %[src0], %[src0], %[two] \n\t"
2277 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2278 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2279 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2280 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2281 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2282
2283 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
2284 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
2285 "psubw %[dest0_u], %[src1], %[src0] \n\t"
2286 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
2287 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
2288 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
2289 "psubw %[dest0_v], %[src0], %[src1] \n\t"
2290 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
2291
2292 "gsldrc1 %[src0], 0x0c(%[src_rgb0]) \n\t"
2293 "gsldlc1 %[src0], 0x13(%[src_rgb0]) \n\t"
2294 "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
2295 "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
2296 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2297 "dsll %[src0], %[src0], %[eight] \n\t"
2298 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2299 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2300 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2301 "paddh %[src0], %[src0], %[src_lo] \n\t"
2302 "dsll %[src1], %[src1], %[eight] \n\t"
2303 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2304 "paddh %[src0], %[src0], %[src_hi] \n\t"
2305 "psrlh %[src0], %[src0], %[two] \n\t"
2306 "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
2307 "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
2308 "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
2309 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
2310 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
2311
2312 "gsldrc1 %[src0], 0x12(%[src_rgb0]) \n\t"
2313 "gsldlc1 %[src0], 0x19(%[src_rgb0]) \n\t"
2314 "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
2315 "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
2316 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2317 "dsll %[src0], %[src0], %[eight] \n\t"
2318 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2319 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2320 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2321 "paddh %[src0], %[src0], %[src_lo] \n\t"
2322 "dsll %[src1], %[src1], %[eight] \n\t"
2323 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2324 "paddh %[src0], %[src0], %[src_hi] \n\t"
2325 "psrlh %[src0], %[src0], %[two] \n\t"
2326 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2327 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2328 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2329 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2330 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2331
2332 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
2333 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
2334 "psubw %[dest1_u], %[src1], %[src0] \n\t"
2335 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
2336 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
2337 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
2338 "psubw %[dest1_v], %[src0], %[src1] \n\t"
2339 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
2340
2341 "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
2342 "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
2343 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
2344 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
2345 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2346 "dsll %[src0], %[src0], %[eight] \n\t"
2347 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2348 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2349 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2350 "paddh %[src0], %[src0], %[src_lo] \n\t"
2351 "dsll %[src1], %[src1], %[eight] \n\t"
2352 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2353 "paddh %[src0], %[src0], %[src_hi] \n\t"
2354 "psrlh %[src0], %[src0], %[two] \n\t"
2355 "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
2356 "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
2357 "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
2358 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
2359 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
2360
2361 "gsldrc1 %[src0], 0x1e(%[src_rgb0]) \n\t"
2362 "gsldlc1 %[src0], 0x25(%[src_rgb0]) \n\t"
2363 "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
2364 "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
2365 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2366 "dsll %[src0], %[src0], %[eight] \n\t"
2367 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2368 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2369 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2370 "paddh %[src0], %[src0], %[src_lo] \n\t"
2371 "dsll %[src1], %[src1], %[eight] \n\t"
2372 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2373 "paddh %[src0], %[src0], %[src_hi] \n\t"
2374 "psrlh %[src0], %[src0], %[two] \n\t"
2375 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2376 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2377 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2378 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2379 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2380
2381 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
2382 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
2383 "psubw %[dest2_u], %[src1], %[src0] \n\t"
2384 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
2385 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
2386 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
2387 "psubw %[dest2_v], %[src0], %[src1] \n\t"
2388 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
2389
2390 "gsldrc1 %[src0], 0x24(%[src_rgb0]) \n\t"
2391 "gsldlc1 %[src0], 0x2b(%[src_rgb0]) \n\t"
2392 "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
2393 "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
2394 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2395 "dsll %[src0], %[src0], %[eight] \n\t"
2396 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2397 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2398 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2399 "paddh %[src0], %[src0], %[src_lo] \n\t"
2400 "dsll %[src1], %[src1], %[eight] \n\t"
2401 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2402 "paddh %[src0], %[src0], %[src_hi] \n\t"
2403 "psrlh %[src0], %[src0], %[two] \n\t"
2404 "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
2405 "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
2406 "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
2407 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
2408 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
2409
2410 "gsldrc1 %[src0], 0x2a(%[src_rgb0]) \n\t"
2411 "gsldlc1 %[src0], 0x31(%[src_rgb0]) \n\t"
2412 "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
2413 "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
2414 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2415 "dsll %[src0], %[src0], %[eight] \n\t"
2416 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2417 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2418 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2419 "paddh %[src0], %[src0], %[src_lo] \n\t"
2420 "dsll %[src1], %[src1], %[eight] \n\t"
2421 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2422 "paddh %[src0], %[src0], %[src_hi] \n\t"
2423 "psrlh %[src0], %[src0], %[two] \n\t"
2424 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2425 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2426 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2427 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2428 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2429
2430 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
2431 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
2432 "psubw %[dest3_u], %[src1], %[src0] \n\t"
2433 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
2434 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
2435 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
2436 "psubw %[dest3_v], %[src0], %[src1] \n\t"
2437 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
2438
2439 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
2440 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
2441 "packushb %[dest0_u], %[src0], %[src1] \n\t"
2442 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
2443 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
2444
2445 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
2446 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
2447 "packushb %[dest0_v], %[src0], %[src1] \n\t"
2448 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
2449 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
2450
2451 "daddiu %[src_rgb0], %[src_rgb0], 0x30 \n\t"
2452 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
2453 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
2454 "daddi %[width], %[width], -0x10 \n\t"
2455 "bgtz %[width], 1b \n\t"
2456 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
2457 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
2458 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
2459 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
2460 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
2461 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
2462 : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
2463 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
2464 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
2465 [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
2466 [sixteen] "f"(0x10)
2467 : "memory");
2468 }
2469
ARGBToYJRow_MMI(const uint8_t * src_argb0,uint8_t * dst_y,int width)2470 void ARGBToYJRow_MMI(const uint8_t* src_argb0, uint8_t* dst_y, int width) {
2471 uint64_t src, src_hi, src_lo;
2472 uint64_t dest, dest0, dest1, dest2, dest3;
2473 uint64_t tmp0, tmp1;
2474 const uint64_t shift = 0x07;
2475 const uint64_t value = 0x0040;
2476 const uint64_t mask0 = 0x0;
2477 const uint64_t mask1 = 0x00010026004B000FULL;
2478
2479 __asm__ volatile(
2480 "1: \n\t"
2481 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
2482 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
2483 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2484 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2485 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2486 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2487 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2488 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2489 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2490 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2491 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
2492 "psrlw %[dest0], %[dest0], %[shift] \n\t"
2493
2494 "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
2495 "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
2496 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2497 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2498 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2499 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2500 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2501 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2502 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2503 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2504 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
2505 "psrlw %[dest1], %[dest1], %[shift] \n\t"
2506
2507 "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
2508 "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
2509 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2510 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2511 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2512 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2513 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2514 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2515 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2516 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2517 "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
2518 "psrlw %[dest2], %[dest2], %[shift] \n\t"
2519
2520 "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
2521 "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
2522 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2523 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2524 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2525 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2526 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2527 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2528 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2529 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2530 "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
2531 "psrlw %[dest3], %[dest3], %[shift] \n\t"
2532
2533 "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
2534 "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
2535 "packushb %[dest], %[tmp0], %[tmp1] \n\t"
2536 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
2537 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
2538
2539 "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
2540 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
2541 "daddi %[width], %[width], -0x08 \n\t"
2542 "bnez %[width], 1b \n\t"
2543 : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
2544 [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
2545 [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0),
2546 [tmp1] "=&f"(tmp1)
2547 : [src_ptr] "r"(src_argb0), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
2548 [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value),
2549 [width] "r"(width)
2550 : "memory");
2551 }
2552
ARGBToUVJRow_MMI(const uint8_t * src_rgb0,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)2553 void ARGBToUVJRow_MMI(const uint8_t* src_rgb0,
2554 int src_stride_rgb,
2555 uint8_t* dst_u,
2556 uint8_t* dst_v,
2557 int width) {
2558 uint64_t src_rgb1;
2559 uint64_t ftmp[12];
2560 const uint64_t value = 0x4040;
2561 const uint64_t mask_u = 0x002b0054007f0002;
2562 const uint64_t mask_v = 0x0002007f006b0014;
2563
2564 __asm__ volatile(
2565 "1: \n\t"
2566 "daddu %[src_rgb1], %[src_rgb0], %[src_stride_rgb] \n\t"
2567 "gsldrc1 %[src0], 0x00(%[src_rgb0]) \n\t"
2568 "gsldlc1 %[src0], 0x07(%[src_rgb0]) \n\t"
2569 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
2570 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
2571 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2572 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2573 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2574 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2575 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2576 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2577 "pavgh %[src0], %[src0], %[src1] \n\t"
2578 "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
2579 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
2580 "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
2581 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
2582 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
2583
2584 "gsldrc1 %[src0], 0x08(%[src_rgb0]) \n\t"
2585 "gsldlc1 %[src0], 0x0f(%[src_rgb0]) \n\t"
2586 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
2587 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
2588 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2589 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2590 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2591 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2592 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2593 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2594 "pavgh %[src0], %[src0], %[src1] \n\t"
2595 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2596 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2597 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2598 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2599 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2600
2601 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
2602 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
2603 "psubw %[dest0_u], %[src0], %[src1] \n\t"
2604 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
2605 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
2606 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
2607 "psubw %[dest0_v], %[src1], %[src0] \n\t"
2608 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
2609
2610 "gsldrc1 %[src0], 0x10(%[src_rgb0]) \n\t"
2611 "gsldlc1 %[src0], 0x17(%[src_rgb0]) \n\t"
2612 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
2613 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
2614 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2615 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2616 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2617 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2618 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2619 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2620 "pavgh %[src0], %[src0], %[src1] \n\t"
2621 "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
2622 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
2623 "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
2624 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
2625 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
2626
2627 "gsldrc1 %[src0], 0x18(%[src_rgb0]) \n\t"
2628 "gsldlc1 %[src0], 0x1f(%[src_rgb0]) \n\t"
2629 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
2630 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
2631 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2632 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2633 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2634 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2635 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2636 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2637 "pavgh %[src0], %[src0], %[src1] \n\t"
2638 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2639 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2640 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2641 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2642 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2643
2644 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
2645 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
2646 "psubw %[dest1_u], %[src0], %[src1] \n\t"
2647 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
2648 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
2649 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
2650 "psubw %[dest1_v], %[src1], %[src0] \n\t"
2651 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
2652
2653 "gsldrc1 %[src0], 0x20(%[src_rgb0]) \n\t"
2654 "gsldlc1 %[src0], 0x27(%[src_rgb0]) \n\t"
2655 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
2656 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
2657 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2658 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2659 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2660 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2661 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2662 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2663 "pavgh %[src0], %[src0], %[src1] \n\t"
2664 "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
2665 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
2666 "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
2667 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
2668 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
2669
2670 "gsldrc1 %[src0], 0x28(%[src_rgb0]) \n\t"
2671 "gsldlc1 %[src0], 0x2f(%[src_rgb0]) \n\t"
2672 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
2673 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
2674 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2675 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2676 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2677 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2678 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2679 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2680 "pavgh %[src0], %[src0], %[src1] \n\t"
2681 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2682 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2683 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2684 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2685 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2686
2687 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
2688 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
2689 "psubw %[dest2_u], %[src0], %[src1] \n\t"
2690 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
2691 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
2692 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
2693 "psubw %[dest2_v], %[src1], %[src0] \n\t"
2694 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
2695
2696 "gsldrc1 %[src0], 0x30(%[src_rgb0]) \n\t"
2697 "gsldlc1 %[src0], 0x37(%[src_rgb0]) \n\t"
2698 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
2699 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
2700 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2701 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2702 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2703 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2704 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2705 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2706 "pavgh %[src0], %[src0], %[src1] \n\t"
2707 "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
2708 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
2709 "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
2710 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
2711 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
2712
2713 "gsldrc1 %[src0], 0x38(%[src_rgb0]) \n\t"
2714 "gsldlc1 %[src0], 0x3f(%[src_rgb0]) \n\t"
2715 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
2716 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
2717 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2718 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2719 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2720 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2721 "pavgh %[src0], %[src_lo], %[src0] \n\t"
2722 "pavgh %[src1], %[src_hi], %[src1] \n\t"
2723 "pavgh %[src0], %[src0], %[src1] \n\t"
2724 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2725 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2726 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2727 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2728 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2729
2730 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
2731 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
2732 "psubw %[dest3_u], %[src0], %[src1] \n\t"
2733 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
2734 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
2735 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
2736 "psubw %[dest3_v], %[src1], %[src0] \n\t"
2737 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
2738
2739 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
2740 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
2741 "packushb %[dest0_u], %[src0], %[src1] \n\t"
2742 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
2743 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
2744
2745 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
2746 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
2747 "packushb %[dest0_v], %[src0], %[src1] \n\t"
2748 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
2749 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
2750
2751 "daddiu %[src_rgb0], %[src_rgb0], 0x40 \n\t"
2752 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
2753 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
2754 "daddi %[width], %[width], -0x10 \n\t"
2755 "bgtz %[width], 1b \n\t"
2756 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
2757 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
2758 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
2759 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
2760 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
2761 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
2762 : [src_rgb0] "r"(src_rgb0), [src_stride_rgb] "r"(src_stride_rgb),
2763 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
2764 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
2765 [zero] "f"(0x00), [eight] "f"(0x08), [two] "f"(0x02),
2766 [sixteen] "f"(0x10)
2767 : "memory");
2768 }
2769
RGB565ToYRow_MMI(const uint8_t * src_rgb565,uint8_t * dst_y,int width)2770 void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
2771 uint64_t ftmp[11];
2772 const uint64_t value = 0x1080108010801080;
2773 const uint64_t mask = 0x0001004200810019;
2774 uint64_t c0 = 0x001f001f001f001f;
2775 uint64_t c1 = 0x00ff00ff00ff00ff;
2776 uint64_t c2 = 0x0007000700070007;
2777 __asm__ volatile(
2778 "1: \n\t"
2779 "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
2780 "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
2781 "psrlh %[src1], %[src0], %[eight] \n\t"
2782 "and %[b], %[src0], %[c0] \n\t"
2783 "and %[src0], %[src0], %[c1] \n\t"
2784 "psrlh %[src0], %[src0], %[five] \n\t"
2785 "and %[g], %[src1], %[c2] \n\t"
2786 "psllh %[g], %[g], %[three] \n\t"
2787 "or %[g], %[src0], %[g] \n\t"
2788 "psrlh %[r], %[src1], %[three] \n\t"
2789 "psllh %[src0], %[b], %[three] \n\t"
2790 "psrlh %[src1], %[b], %[two] \n\t"
2791 "or %[b], %[src0], %[src1] \n\t"
2792 "psllh %[src0], %[g], %[two] \n\t"
2793 "psrlh %[src1], %[g], %[four] \n\t"
2794 "or %[g], %[src0], %[src1] \n\t"
2795 "psllh %[src0], %[r], %[three] \n\t"
2796 "psrlh %[src1], %[r], %[two] \n\t"
2797 "or %[r], %[src0], %[src1] \n\t"
2798 "punpcklhw %[src0], %[b], %[r] \n\t"
2799 "punpcklhw %[src1], %[g], %[value] \n\t"
2800 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2801 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2802 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2803 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2804 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2805 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2806 "paddw %[dest0], %[src0], %[src1] \n\t"
2807 "psrlw %[dest0], %[dest0], %[eight] \n\t"
2808
2809 "punpckhhw %[src0], %[b], %[r] \n\t"
2810 "punpckhhw %[src1], %[g], %[value] \n\t"
2811 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2812 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2813 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2814 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2815 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2816 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2817 "paddw %[dest1], %[src0], %[src1] \n\t"
2818 "psrlw %[dest1], %[dest1], %[eight] \n\t"
2819
2820 "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
2821 "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
2822 "psrlh %[src1], %[src0], %[eight] \n\t"
2823 "and %[b], %[src0], %[c0] \n\t"
2824 "and %[src0], %[src0], %[c1] \n\t"
2825 "psrlh %[src0], %[src0], %[five] \n\t"
2826 "and %[g], %[src1], %[c2] \n\t"
2827 "psllh %[g], %[g], %[three] \n\t"
2828 "or %[g], %[src0], %[g] \n\t"
2829 "psrlh %[r], %[src1], %[three] \n\t"
2830 "psllh %[src0], %[b], %[three] \n\t"
2831 "psrlh %[src1], %[b], %[two] \n\t"
2832 "or %[b], %[src0], %[src1] \n\t"
2833 "psllh %[src0], %[g], %[two] \n\t"
2834 "psrlh %[src1], %[g], %[four] \n\t"
2835 "or %[g], %[src0], %[src1] \n\t"
2836 "psllh %[src0], %[r], %[three] \n\t"
2837 "psrlh %[src1], %[r], %[two] \n\t"
2838 "or %[r], %[src0], %[src1] \n\t"
2839 "punpcklhw %[src0], %[b], %[r] \n\t"
2840 "punpcklhw %[src1], %[g], %[value] \n\t"
2841 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2842 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2843 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2844 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2845 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2846 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2847 "paddw %[dest2], %[src0], %[src1] \n\t"
2848 "psrlw %[dest2], %[dest2], %[eight] \n\t"
2849
2850 "punpckhhw %[src0], %[b], %[r] \n\t"
2851 "punpckhhw %[src1], %[g], %[value] \n\t"
2852 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2853 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2854 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2855 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2856 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2857 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2858 "paddw %[dest3], %[src0], %[src1] \n\t"
2859 "psrlw %[dest3], %[dest3], %[eight] \n\t"
2860
2861 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
2862 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
2863 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
2864 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
2865 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
2866
2867 "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t"
2868 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
2869 "daddiu %[width], %[width], -0x08 \n\t"
2870 "bgtz %[width], 1b \n\t"
2871 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
2872 [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
2873 [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
2874 [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
2875 : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value),
2876 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
2877 [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05),
2878 [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04)
2879 : "memory");
2880 }
2881
ARGB1555ToYRow_MMI(const uint8_t * src_argb1555,uint8_t * dst_y,int width)2882 void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555,
2883 uint8_t* dst_y,
2884 int width) {
2885 uint64_t ftmp[11];
2886 const uint64_t value = 0x1080108010801080;
2887 const uint64_t mask = 0x0001004200810019;
2888 uint64_t c0 = 0x001f001f001f001f;
2889 uint64_t c1 = 0x00ff00ff00ff00ff;
2890 uint64_t c2 = 0x0003000300030003;
2891 uint64_t c3 = 0x007c007c007c007c;
2892 __asm__ volatile(
2893 "1: \n\t"
2894 "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
2895 "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
2896 "psrlh %[src1], %[src0], %[eight] \n\t"
2897 "and %[b], %[src0], %[c0] \n\t"
2898 "and %[src0], %[src0], %[c1] \n\t"
2899 "psrlh %[src0], %[src0], %[five] \n\t"
2900 "and %[g], %[src1], %[c2] \n\t"
2901 "psllh %[g], %[g], %[three] \n\t"
2902 "or %[g], %[src0], %[g] \n\t"
2903 "and %[r], %[src1], %[c3] \n\t"
2904 "psrlh %[r], %[r], %[two] \n\t"
2905 "psllh %[src0], %[b], %[three] \n\t"
2906 "psrlh %[src1], %[b], %[two] \n\t"
2907 "or %[b], %[src0], %[src1] \n\t"
2908 "psllh %[src0], %[g], %[three] \n\t"
2909 "psrlh %[src1], %[g], %[two] \n\t"
2910 "or %[g], %[src0], %[src1] \n\t"
2911 "psllh %[src0], %[r], %[three] \n\t"
2912 "psrlh %[src1], %[r], %[two] \n\t"
2913 "or %[r], %[src0], %[src1] \n\t"
2914 "punpcklhw %[src0], %[b], %[r] \n\t"
2915 "punpcklhw %[src1], %[g], %[value] \n\t"
2916 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2917 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2918 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2919 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2920 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2921 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2922 "paddw %[dest0], %[src0], %[src1] \n\t"
2923 "psrlw %[dest0], %[dest0], %[eight] \n\t"
2924
2925 "punpckhhw %[src0], %[b], %[r] \n\t"
2926 "punpckhhw %[src1], %[g], %[value] \n\t"
2927 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2928 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2929 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2930 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2931 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2932 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2933 "paddw %[dest1], %[src0], %[src1] \n\t"
2934 "psrlw %[dest1], %[dest1], %[eight] \n\t"
2935
2936 "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
2937 "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
2938 "psrlh %[src1], %[src0], %[eight] \n\t"
2939 "and %[b], %[src0], %[c0] \n\t"
2940 "and %[src0], %[src0], %[c1] \n\t"
2941 "psrlh %[src0], %[src0], %[five] \n\t"
2942 "and %[g], %[src1], %[c2] \n\t"
2943 "psllh %[g], %[g], %[three] \n\t"
2944 "or %[g], %[src0], %[g] \n\t"
2945 "and %[r], %[src1], %[c3] \n\t"
2946 "psrlh %[r], %[r], %[two] \n\t"
2947 "psllh %[src0], %[b], %[three] \n\t"
2948 "psrlh %[src1], %[b], %[two] \n\t"
2949 "or %[b], %[src0], %[src1] \n\t"
2950 "psllh %[src0], %[g], %[three] \n\t"
2951 "psrlh %[src1], %[g], %[two] \n\t"
2952 "or %[g], %[src0], %[src1] \n\t"
2953 "psllh %[src0], %[r], %[three] \n\t"
2954 "psrlh %[src1], %[r], %[two] \n\t"
2955 "or %[r], %[src0], %[src1] \n\t"
2956 "punpcklhw %[src0], %[b], %[r] \n\t"
2957 "punpcklhw %[src1], %[g], %[value] \n\t"
2958 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2959 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2960 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2961 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2962 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2963 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2964 "paddw %[dest2], %[src0], %[src1] \n\t"
2965 "psrlw %[dest2], %[dest2], %[eight] \n\t"
2966
2967 "punpckhhw %[src0], %[b], %[r] \n\t"
2968 "punpckhhw %[src1], %[g], %[value] \n\t"
2969 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2970 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2971 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2972 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2973 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2974 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2975 "paddw %[dest3], %[src0], %[src1] \n\t"
2976 "psrlw %[dest3], %[dest3], %[eight] \n\t"
2977
2978 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
2979 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
2980 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
2981 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
2982 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
2983
2984 "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t"
2985 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
2986 "daddiu %[width], %[width], -0x08 \n\t"
2987 "bgtz %[width], 1b \n\t"
2988 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
2989 [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
2990 [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
2991 [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
2992 : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y),
2993 [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
2994 [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08),
2995 [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
2996 : "memory");
2997 }
2998
ARGB4444ToYRow_MMI(const uint8_t * src_argb4444,uint8_t * dst_y,int width)2999 void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444,
3000 uint8_t* dst_y,
3001 int width) {
3002 uint64_t ftmp[11];
3003 uint64_t value = 0x1080108010801080;
3004 uint64_t mask = 0x0001004200810019;
3005 uint64_t c0 = 0x000f000f000f000f;
3006 uint64_t c1 = 0x00ff00ff00ff00ff;
3007 __asm__ volatile(
3008 "1: \n\t"
3009 "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
3010 "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
3011 "psrlh %[src1], %[src0], %[eight] \n\t"
3012 "and %[b], %[src0], %[c0] \n\t"
3013 "and %[src0], %[src0], %[c1] \n\t"
3014 "psrlh %[g], %[src0], %[four] \n\t"
3015 "and %[r], %[src1], %[c0] \n\t"
3016 "psllh %[src0], %[b], %[four] \n\t"
3017 "or %[b], %[src0], %[b] \n\t"
3018 "psllh %[src0], %[g], %[four] \n\t"
3019 "or %[g], %[src0], %[g] \n\t"
3020 "psllh %[src0], %[r], %[four] \n\t"
3021 "or %[r], %[src0], %[r] \n\t"
3022 "punpcklhw %[src0], %[b], %[r] \n\t"
3023 "punpcklhw %[src1], %[g], %[value] \n\t"
3024 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3025 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3026 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3027 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3028 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3029 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3030 "paddw %[dest0], %[src0], %[src1] \n\t"
3031 "psrlw %[dest0], %[dest0], %[eight] \n\t"
3032
3033 "punpckhhw %[src0], %[b], %[r] \n\t"
3034 "punpckhhw %[src1], %[g], %[value] \n\t"
3035 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3036 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3037 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3038 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3039 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3040 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3041 "paddw %[dest1], %[src0], %[src1] \n\t"
3042 "psrlw %[dest1], %[dest1], %[eight] \n\t"
3043
3044 "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
3045 "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
3046 "psrlh %[src1], %[src0], %[eight] \n\t"
3047 "and %[b], %[src0], %[c0] \n\t"
3048 "and %[src0], %[src0], %[c1] \n\t"
3049 "psrlh %[g], %[src0], %[four] \n\t"
3050 "and %[r], %[src1], %[c0] \n\t"
3051 "psllh %[src0], %[b], %[four] \n\t"
3052 "or %[b], %[src0], %[b] \n\t"
3053 "psllh %[src0], %[g], %[four] \n\t"
3054 "or %[g], %[src0], %[g] \n\t"
3055 "psllh %[src0], %[r], %[four] \n\t"
3056 "or %[r], %[src0], %[r] \n\t"
3057 "punpcklhw %[src0], %[b], %[r] \n\t"
3058 "punpcklhw %[src1], %[g], %[value] \n\t"
3059 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3060 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3061 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3062 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3063 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3064 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3065 "paddw %[dest2], %[src0], %[src1] \n\t"
3066 "psrlw %[dest2], %[dest2], %[eight] \n\t"
3067
3068 "punpckhhw %[src0], %[b], %[r] \n\t"
3069 "punpckhhw %[src1], %[g], %[value] \n\t"
3070 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3071 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3072 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3073 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3074 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3075 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3076 "paddw %[dest3], %[src0], %[src1] \n\t"
3077 "psrlw %[dest3], %[dest3], %[eight] \n\t"
3078
3079 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
3080 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
3081 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
3082 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
3083 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
3084
3085 "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t"
3086 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
3087 "daddiu %[width], %[width], -0x08 \n\t"
3088 "bgtz %[width], 1b \n\t"
3089 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
3090 [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
3091 [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
3092 [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
3093 : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y),
3094 [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
3095 [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04)
3096 : "memory");
3097 }
3098
RGB565ToUVRow_MMI(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)3099 void RGB565ToUVRow_MMI(const uint8_t* src_rgb565,
3100 int src_stride_rgb565,
3101 uint8_t* dst_u,
3102 uint8_t* dst_v,
3103 int width) {
3104 uint64_t ftmp[13];
3105 uint64_t value = 0x2020202020202020;
3106 uint64_t mask_u = 0x0026004a00700002;
3107 uint64_t mask_v = 0x00020070005e0012;
3108 uint64_t mask = 0x93;
3109 uint64_t c0 = 0x001f001f001f001f;
3110 uint64_t c1 = 0x00ff00ff00ff00ff;
3111 uint64_t c2 = 0x0007000700070007;
3112 __asm__ volatile(
3113 "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t"
3114 "1: \n\t"
3115 "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
3116 "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
3117 "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t"
3118 "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t"
3119 "psrlh %[dest0_u], %[src0], %[eight] \n\t"
3120 "and %[b0], %[src0], %[c0] \n\t"
3121 "and %[src0], %[src0], %[c1] \n\t"
3122 "psrlh %[src0], %[src0], %[five] \n\t"
3123 "and %[g0], %[dest0_u], %[c2] \n\t"
3124 "psllh %[g0], %[g0], %[three] \n\t"
3125 "or %[g0], %[src0], %[g0] \n\t"
3126 "psrlh %[r0], %[dest0_u], %[three] \n\t"
3127 "psrlh %[src0], %[src1], %[eight] \n\t"
3128 "and %[dest0_u], %[src1], %[c0] \n\t"
3129 "and %[src1], %[src1], %[c1] \n\t"
3130 "psrlh %[src1], %[src1], %[five] \n\t"
3131 "and %[dest0_v], %[src0], %[c2] \n\t"
3132 "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
3133 "or %[dest0_v], %[src1], %[dest0_v] \n\t"
3134 "psrlh %[src0], %[src0], %[three] \n\t"
3135 "paddh %[b0], %[b0], %[dest0_u] \n\t"
3136 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3137 "paddh %[r0], %[r0], %[src0] \n\t"
3138 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3139 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3140 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3141 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3142 "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
3143 "psrlh %[b0], %[src0], %[six] \n\t"
3144 "psllh %[r0], %[src0], %[one] \n\t"
3145 "or %[b0], %[b0], %[r0] \n\t"
3146 "punpcklhw %[src0], %[g0], %[value] \n\t"
3147 "punpckhhw %[src1], %[g0], %[value] \n\t"
3148 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3149 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3150 "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
3151 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3152 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3153
3154 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3155 "pshufh %[dest0_u], %[src0], %[mask] \n\t"
3156 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
3157 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3158 "pshufh %[b0], %[src1], %[mask] \n\t"
3159 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3160
3161 "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
3162 "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
3163 "psubw %[dest0_u], %[src0], %[src1] \n\t"
3164 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
3165 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3166 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3167 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3168 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3169
3170 "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
3171 "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
3172 "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t"
3173 "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t"
3174 "psrlh %[dest1_u], %[src0], %[eight] \n\t"
3175 "and %[b0], %[src0], %[c0] \n\t"
3176 "and %[src0], %[src0], %[c1] \n\t"
3177 "psrlh %[src0], %[src0], %[five] \n\t"
3178 "and %[g0], %[dest1_u], %[c2] \n\t"
3179 "psllh %[g0], %[g0], %[three] \n\t"
3180 "or %[g0], %[src0], %[g0] \n\t"
3181 "psrlh %[r0], %[dest1_u], %[three] \n\t"
3182 "psrlh %[src0], %[src1], %[eight] \n\t"
3183 "and %[dest1_u], %[src1], %[c0] \n\t"
3184 "and %[src1], %[src1], %[c1] \n\t"
3185 "psrlh %[src1], %[src1], %[five] \n\t"
3186 "and %[dest1_v], %[src0], %[c2] \n\t"
3187 "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
3188 "or %[dest1_v], %[src1], %[dest1_v] \n\t"
3189 "psrlh %[src0], %[src0], %[three] \n\t"
3190 "paddh %[b0], %[b0], %[dest1_u] \n\t"
3191 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3192 "paddh %[r0], %[r0], %[src0] \n\t"
3193 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3194 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3195 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3196 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3197 "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
3198 "psrlh %[b0], %[src0], %[six] \n\t"
3199 "psllh %[r0], %[src0], %[one] \n\t"
3200 "or %[b0], %[b0], %[r0] \n\t"
3201 "punpcklhw %[src0], %[g0], %[value] \n\t"
3202 "punpckhhw %[src1], %[g0], %[value] \n\t"
3203 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3204 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3205 "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
3206 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3207 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3208
3209 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3210 "pshufh %[dest1_u], %[src0], %[mask] \n\t"
3211 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
3212 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3213 "pshufh %[b0], %[src1], %[mask] \n\t"
3214 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3215
3216 "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
3217 "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
3218 "psubw %[dest1_u], %[src0], %[src1] \n\t"
3219 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
3220 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3221 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3222 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3223 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3224
3225 "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t"
3226 "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t"
3227 "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t"
3228 "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t"
3229 "psrlh %[dest2_u], %[src0], %[eight] \n\t"
3230 "and %[b0], %[src0], %[c0] \n\t"
3231 "and %[src0], %[src0], %[c1] \n\t"
3232 "psrlh %[src0], %[src0], %[five] \n\t"
3233 "and %[g0], %[dest2_u], %[c2] \n\t"
3234 "psllh %[g0], %[g0], %[three] \n\t"
3235 "or %[g0], %[src0], %[g0] \n\t"
3236 "psrlh %[r0], %[dest2_u], %[three] \n\t"
3237 "psrlh %[src0], %[src1], %[eight] \n\t"
3238 "and %[dest2_u], %[src1], %[c0] \n\t"
3239 "and %[src1], %[src1], %[c1] \n\t"
3240 "psrlh %[src1], %[src1], %[five] \n\t"
3241 "and %[dest2_v], %[src0], %[c2] \n\t"
3242 "psllh %[dest2_v], %[dest2_v], %[three] \n\t"
3243 "or %[dest2_v], %[src1], %[dest2_v] \n\t"
3244 "psrlh %[src0], %[src0], %[three] \n\t"
3245 "paddh %[b0], %[b0], %[dest2_u] \n\t"
3246 "paddh %[g0], %[g0], %[dest2_v] \n\t"
3247 "paddh %[r0], %[r0], %[src0] \n\t"
3248 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3249 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3250 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3251 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3252 "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
3253 "psrlh %[b0], %[src0], %[six] \n\t"
3254 "psllh %[r0], %[src0], %[one] \n\t"
3255 "or %[b0], %[b0], %[r0] \n\t"
3256 "punpcklhw %[src0], %[g0], %[value] \n\t"
3257 "punpckhhw %[src1], %[g0], %[value] \n\t"
3258 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3259 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3260 "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
3261 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3262 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3263
3264 "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
3265 "pshufh %[dest2_u], %[src0], %[mask] \n\t"
3266 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
3267 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3268 "pshufh %[b0], %[src1], %[mask] \n\t"
3269 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3270
3271 "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
3272 "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
3273 "psubw %[dest2_u], %[src0], %[src1] \n\t"
3274 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
3275 "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
3276 "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
3277 "psubw %[dest2_v], %[src1], %[src0] \n\t"
3278 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
3279
3280 "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t"
3281 "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t"
3282 "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t"
3283 "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t"
3284 "psrlh %[dest3_u], %[src0], %[eight] \n\t"
3285 "and %[b0], %[src0], %[c0] \n\t"
3286 "and %[src0], %[src0], %[c1] \n\t"
3287 "psrlh %[src0], %[src0], %[five] \n\t"
3288 "and %[g0], %[dest3_u], %[c2] \n\t"
3289 "psllh %[g0], %[g0], %[three] \n\t"
3290 "or %[g0], %[src0], %[g0] \n\t"
3291 "psrlh %[r0], %[dest3_u], %[three] \n\t"
3292 "psrlh %[src0], %[src1], %[eight] \n\t"
3293 "and %[dest3_u], %[src1], %[c0] \n\t"
3294 "and %[src1], %[src1], %[c1] \n\t"
3295 "psrlh %[src1], %[src1], %[five] \n\t"
3296 "and %[dest3_v], %[src0], %[c2] \n\t"
3297 "psllh %[dest3_v], %[dest3_v], %[three] \n\t"
3298 "or %[dest3_v], %[src1], %[dest3_v] \n\t"
3299 "psrlh %[src0], %[src0], %[three] \n\t"
3300 "paddh %[b0], %[b0], %[dest3_u] \n\t"
3301 "paddh %[g0], %[g0], %[dest3_v] \n\t"
3302 "paddh %[r0], %[r0], %[src0] \n\t"
3303 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3304 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3305 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3306 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3307 "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
3308 "psrlh %[b0], %[src0], %[six] \n\t"
3309 "psllh %[r0], %[src0], %[one] \n\t"
3310 "or %[b0], %[b0], %[r0] \n\t"
3311 "punpcklhw %[src0], %[g0], %[value] \n\t"
3312 "punpckhhw %[src1], %[g0], %[value] \n\t"
3313 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3314 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3315 "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
3316 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3317 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3318
3319 "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
3320 "pshufh %[dest3_u], %[src0], %[mask] \n\t"
3321 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
3322 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3323 "pshufh %[b0], %[src1], %[mask] \n\t"
3324 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3325
3326 "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
3327 "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
3328 "psubw %[dest3_u], %[src0], %[src1] \n\t"
3329 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
3330 "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
3331 "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
3332 "psubw %[dest3_v], %[src1], %[src0] \n\t"
3333 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
3334
3335 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
3336 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
3337 "packushb %[dest0_u], %[src0], %[src1] \n\t"
3338 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
3339 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
3340 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
3341 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
3342 "packushb %[dest0_v], %[src0], %[src1] \n\t"
3343 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
3344 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
3345
3346 "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t"
3347 "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t"
3348 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
3349 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
3350 "daddiu %[width], %[width], -0x10 \n\t"
3351 "bgtz %[width], 1b \n\t"
3352 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
3353 [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
3354 [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
3355 [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
3356 [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
3357 [dest3_v] "=&f"(ftmp[12])
3358 : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565),
3359 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
3360 [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
3361 [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
3362 [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
3363 [one] "f"(0x01)
3364 : "memory");
3365 }
3366
ARGB1555ToUVRow_MMI(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)3367 void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555,
3368 int src_stride_argb1555,
3369 uint8_t* dst_u,
3370 uint8_t* dst_v,
3371 int width) {
3372 uint64_t ftmp[11];
3373 uint64_t value = 0x2020202020202020;
3374 uint64_t mask_u = 0x0026004a00700002;
3375 uint64_t mask_v = 0x00020070005e0012;
3376 uint64_t mask = 0x93;
3377 uint64_t c0 = 0x001f001f001f001f;
3378 uint64_t c1 = 0x00ff00ff00ff00ff;
3379 uint64_t c2 = 0x0003000300030003;
3380 uint64_t c3 = 0x007c007c007c007c;
3381 __asm__ volatile(
3382 "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t"
3383 "1: \n\t"
3384 "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
3385 "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
3386 "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t"
3387 "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t"
3388 "psrlh %[dest0_u], %[src0], %[eight] \n\t"
3389 "and %[b0], %[src0], %[c0] \n\t"
3390 "and %[src0], %[src0], %[c1] \n\t"
3391 "psrlh %[src0], %[src0], %[five] \n\t"
3392 "and %[g0], %[dest0_u], %[c2] \n\t"
3393 "psllh %[g0], %[g0], %[three] \n\t"
3394 "or %[g0], %[src0], %[g0] \n\t"
3395 "and %[r0], %[dest0_u], %[c3] \n\t"
3396 "psrlh %[r0], %[r0], %[two] \n\t"
3397 "psrlh %[src0], %[src1], %[eight] \n\t"
3398 "and %[dest0_u], %[src1], %[c0] \n\t"
3399 "and %[src1], %[src1], %[c1] \n\t"
3400 "psrlh %[src1], %[src1], %[five] \n\t"
3401 "and %[dest0_v], %[src0], %[c2] \n\t"
3402 "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
3403 "or %[dest0_v], %[src1], %[dest0_v] \n\t"
3404 "and %[src0], %[src0], %[c3] \n\t"
3405 "psrlh %[src0], %[src0], %[two] \n\t"
3406 "paddh %[b0], %[b0], %[dest0_u] \n\t"
3407 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3408 "paddh %[r0], %[r0], %[src0] \n\t"
3409 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3410 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3411 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3412 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3413 "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
3414 "psrlh %[b0], %[src0], %[six] \n\t"
3415 "psllh %[r0], %[src0], %[one] \n\t"
3416 "or %[b0], %[b0], %[r0] \n\t"
3417 "psrlh %[r0], %[g0], %[six] \n\t"
3418 "psllh %[g0], %[g0], %[one] \n\t"
3419 "or %[g0], %[g0], %[r0] \n\t"
3420 "punpcklhw %[src0], %[g0], %[value] \n\t"
3421 "punpckhhw %[src1], %[g0], %[value] \n\t"
3422 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3423 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3424 "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
3425 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3426 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3427
3428 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3429 "pshufh %[dest0_u], %[src0], %[mask] \n\t"
3430 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
3431 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3432 "pshufh %[b0], %[src1], %[mask] \n\t"
3433 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3434
3435 "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
3436 "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
3437 "psubw %[dest0_u], %[src0], %[src1] \n\t"
3438 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
3439 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3440 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3441 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3442 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3443
3444 "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
3445 "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
3446 "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t"
3447 "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t"
3448 "psrlh %[dest1_u], %[src0], %[eight] \n\t"
3449 "and %[b0], %[src0], %[c0] \n\t"
3450 "and %[src0], %[src0], %[c1] \n\t"
3451 "psrlh %[src0], %[src0], %[five] \n\t"
3452 "and %[g0], %[dest1_u], %[c2] \n\t"
3453 "psllh %[g0], %[g0], %[three] \n\t"
3454 "or %[g0], %[src0], %[g0] \n\t"
3455 "and %[r0], %[dest1_u], %[c3] \n\t"
3456 "psrlh %[r0], %[r0], %[two] \n\t"
3457 "psrlh %[src0], %[src1], %[eight] \n\t"
3458 "and %[dest1_u], %[src1], %[c0] \n\t"
3459 "and %[src1], %[src1], %[c1] \n\t"
3460 "psrlh %[src1], %[src1], %[five] \n\t"
3461 "and %[dest1_v], %[src0], %[c2] \n\t"
3462 "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
3463 "or %[dest1_v], %[src1], %[dest1_v] \n\t"
3464 "and %[src0], %[src0], %[c3] \n\t"
3465 "psrlh %[src0], %[src0], %[two] \n\t"
3466 "paddh %[b0], %[b0], %[dest1_u] \n\t"
3467 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3468 "paddh %[r0], %[r0], %[src0] \n\t"
3469 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3470 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3471 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3472 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3473 "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
3474 "psrlh %[b0], %[src0], %[six] \n\t"
3475 "psllh %[r0], %[src0], %[one] \n\t"
3476 "or %[b0], %[b0], %[r0] \n\t"
3477 "psrlh %[r0], %[g0], %[six] \n\t"
3478 "psllh %[g0], %[g0], %[one] \n\t"
3479 "or %[g0], %[g0], %[r0] \n\t"
3480 "punpcklhw %[src0], %[g0], %[value] \n\t"
3481 "punpckhhw %[src1], %[g0], %[value] \n\t"
3482 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3483 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3484 "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
3485 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3486 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3487
3488 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3489 "pshufh %[dest1_u], %[src0], %[mask] \n\t"
3490 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
3491 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3492 "pshufh %[b0], %[src1], %[mask] \n\t"
3493 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3494
3495 "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
3496 "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
3497 "psubw %[dest1_u], %[src0], %[src1] \n\t"
3498 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
3499 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3500 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3501 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3502 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3503
3504 "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t"
3505 "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t"
3506
3507 "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t"
3508 "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t"
3509 "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t"
3510 "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t"
3511 "psrlh %[dest2_u], %[src0], %[eight] \n\t"
3512 "and %[b0], %[src0], %[c0] \n\t"
3513 "and %[src0], %[src0], %[c1] \n\t"
3514 "psrlh %[src0], %[src0], %[five] \n\t"
3515 "and %[g0], %[dest2_u], %[c2] \n\t"
3516 "psllh %[g0], %[g0], %[three] \n\t"
3517 "or %[g0], %[src0], %[g0] \n\t"
3518 "and %[r0], %[dest2_u], %[c3] \n\t"
3519 "psrlh %[r0], %[r0], %[two] \n\t"
3520 "psrlh %[src0], %[src1], %[eight] \n\t"
3521 "and %[dest2_u], %[src1], %[c0] \n\t"
3522 "and %[src1], %[src1], %[c1] \n\t"
3523 "psrlh %[src1], %[src1], %[five] \n\t"
3524 "and %[dest0_v], %[src0], %[c2] \n\t"
3525 "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
3526 "or %[dest0_v], %[src1], %[dest0_v] \n\t"
3527 "and %[src0], %[src0], %[c3] \n\t"
3528 "psrlh %[src0], %[src0], %[two] \n\t"
3529 "paddh %[b0], %[b0], %[dest2_u] \n\t"
3530 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3531 "paddh %[r0], %[r0], %[src0] \n\t"
3532 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3533 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3534 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3535 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3536 "paddh %[src0], %[dest2_u], %[dest0_v] \n\t"
3537 "psrlh %[b0], %[src0], %[six] \n\t"
3538 "psllh %[r0], %[src0], %[one] \n\t"
3539 "or %[b0], %[b0], %[r0] \n\t"
3540 "psrlh %[r0], %[g0], %[six] \n\t"
3541 "psllh %[g0], %[g0], %[one] \n\t"
3542 "or %[g0], %[g0], %[r0] \n\t"
3543 "punpcklhw %[src0], %[g0], %[value] \n\t"
3544 "punpckhhw %[src1], %[g0], %[value] \n\t"
3545 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3546 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3547 "paddh %[g0], %[dest2_u], %[dest0_v] \n\t"
3548 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3549 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3550
3551 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3552 "pshufh %[dest2_u], %[src0], %[mask] \n\t"
3553 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
3554 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3555 "pshufh %[b0], %[src1], %[mask] \n\t"
3556 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3557
3558 "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
3559 "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
3560 "psubw %[dest2_u], %[src0], %[src1] \n\t"
3561 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
3562 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3563 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3564 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3565 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3566
3567 "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t"
3568 "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t"
3569 "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t"
3570 "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t"
3571 "psrlh %[dest3_u], %[src0], %[eight] \n\t"
3572 "and %[b0], %[src0], %[c0] \n\t"
3573 "and %[src0], %[src0], %[c1] \n\t"
3574 "psrlh %[src0], %[src0], %[five] \n\t"
3575 "and %[g0], %[dest3_u], %[c2] \n\t"
3576 "psllh %[g0], %[g0], %[three] \n\t"
3577 "or %[g0], %[src0], %[g0] \n\t"
3578 "and %[r0], %[dest3_u], %[c3] \n\t"
3579 "psrlh %[r0], %[r0], %[two] \n\t"
3580 "psrlh %[src0], %[src1], %[eight] \n\t"
3581 "and %[dest3_u], %[src1], %[c0] \n\t"
3582 "and %[src1], %[src1], %[c1] \n\t"
3583 "psrlh %[src1], %[src1], %[five] \n\t"
3584 "and %[dest1_v], %[src0], %[c2] \n\t"
3585 "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
3586 "or %[dest1_v], %[src1], %[dest1_v] \n\t"
3587 "and %[src0], %[src0], %[c3] \n\t"
3588 "psrlh %[src0], %[src0], %[two] \n\t"
3589 "paddh %[b0], %[b0], %[dest3_u] \n\t"
3590 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3591 "paddh %[r0], %[r0], %[src0] \n\t"
3592 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3593 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3594 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3595 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3596 "paddh %[src0], %[dest3_u], %[dest1_v] \n\t"
3597 "psrlh %[b0], %[src0], %[six] \n\t"
3598 "psllh %[r0], %[src0], %[one] \n\t"
3599 "or %[b0], %[b0], %[r0] \n\t"
3600 "psrlh %[r0], %[g0], %[six] \n\t"
3601 "psllh %[g0], %[g0], %[one] \n\t"
3602 "or %[g0], %[g0], %[r0] \n\t"
3603 "punpcklhw %[src0], %[g0], %[value] \n\t"
3604 "punpckhhw %[src1], %[g0], %[value] \n\t"
3605 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3606 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3607 "paddh %[g0], %[dest3_u], %[dest1_v] \n\t"
3608 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3609 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3610
3611 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3612 "pshufh %[dest3_u], %[src0], %[mask] \n\t"
3613 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
3614 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3615 "pshufh %[b0], %[src1], %[mask] \n\t"
3616 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3617
3618 "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
3619 "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
3620 "psubw %[dest3_u], %[src0], %[src1] \n\t"
3621 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
3622 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3623 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3624 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3625 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3626
3627 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
3628 "packushb %[dest0_u], %[dest0_u], %[src1] \n\t"
3629 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
3630 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
3631 "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t"
3632 "packushb %[dest0_v], %[dest1_u], %[src1] \n\t"
3633 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
3634 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
3635
3636 "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t"
3637 "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t"
3638 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
3639 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
3640 "daddiu %[width], %[width], -0x10 \n\t"
3641 "bgtz %[width], 1b \n\t"
3642 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
3643 [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
3644 [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
3645 [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
3646 [dest1_v] "=&f"(ftmp[10])
3647 : [src_argb1555] "r"(src_argb1555),
3648 [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u),
3649 [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
3650 [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3),
3651 [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
3652 [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
3653 [two] "f"(0x02), [one] "f"(0x01)
3654 : "memory");
3655 }
3656
ARGB4444ToUVRow_MMI(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)3657 void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444,
3658 int src_stride_argb4444,
3659 uint8_t* dst_u,
3660 uint8_t* dst_v,
3661 int width) {
3662 uint64_t ftmp[13];
3663 uint64_t value = 0x2020202020202020;
3664 uint64_t mask_u = 0x0026004a00700002;
3665 uint64_t mask_v = 0x00020070005e0012;
3666 uint64_t mask = 0x93;
3667 uint64_t c0 = 0x000f000f000f000f;
3668 uint64_t c1 = 0x00ff00ff00ff00ff;
3669 __asm__ volatile(
3670 "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t"
3671 "1: \n\t"
3672 "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
3673 "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
3674 "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t"
3675 "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t"
3676 "psrlh %[dest0_u], %[src0], %[eight] \n\t"
3677 "and %[b0], %[src0], %[c0] \n\t"
3678 "and %[src0], %[src0], %[c1] \n\t"
3679 "psrlh %[g0], %[src0], %[four] \n\t"
3680 "and %[r0], %[dest0_u], %[c0] \n\t"
3681 "psrlh %[src0], %[src1], %[eight] \n\t"
3682 "and %[dest0_u], %[src1], %[c0] \n\t"
3683 "and %[src1], %[src1], %[c1] \n\t"
3684 "psrlh %[dest0_v], %[src1], %[four] \n\t"
3685 "and %[src0], %[src0], %[c0] \n\t"
3686 "paddh %[b0], %[b0], %[dest0_u] \n\t"
3687 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3688 "paddh %[r0], %[r0], %[src0] \n\t"
3689 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3690 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3691 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3692 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3693 "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
3694 "psrlh %[b0], %[src0], %[four] \n\t"
3695 "psllh %[r0], %[src0], %[two] \n\t"
3696 "or %[b0], %[b0], %[r0] \n\t"
3697 "psrlh %[r0], %[g0], %[four] \n\t"
3698 "psllh %[g0], %[g0], %[two] \n\t"
3699 "or %[g0], %[g0], %[r0] \n\t"
3700 "punpcklhw %[src0], %[g0], %[value] \n\t"
3701 "punpckhhw %[src1], %[g0], %[value] \n\t"
3702 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3703 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3704 "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
3705 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3706 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3707
3708 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3709 "pshufh %[dest0_u], %[src0], %[mask] \n\t"
3710 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
3711 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3712 "pshufh %[b0], %[src1], %[mask] \n\t"
3713 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3714
3715 "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
3716 "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
3717 "psubw %[dest0_u], %[src0], %[src1] \n\t"
3718 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
3719 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3720 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3721 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3722 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3723
3724 "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
3725 "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
3726 "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t"
3727 "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t"
3728 "psrlh %[dest1_u], %[src0], %[eight] \n\t"
3729 "and %[b0], %[src0], %[c0] \n\t"
3730 "and %[src0], %[src0], %[c1] \n\t"
3731 "psrlh %[g0], %[src0], %[four] \n\t"
3732 "and %[r0], %[dest1_u], %[c0] \n\t"
3733 "psrlh %[src0], %[src1], %[eight] \n\t"
3734 "and %[dest1_u], %[src1], %[c0] \n\t"
3735 "and %[src1], %[src1], %[c1] \n\t"
3736 "psrlh %[dest1_v], %[src1], %[four] \n\t"
3737 "and %[src0], %[src0], %[c0] \n\t"
3738 "paddh %[b0], %[b0], %[dest1_u] \n\t"
3739 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3740 "paddh %[r0], %[r0], %[src0] \n\t"
3741 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3742 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3743 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3744 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3745 "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
3746 "psrlh %[b0], %[src0], %[four] \n\t"
3747 "psllh %[r0], %[src0], %[two] \n\t"
3748 "or %[b0], %[b0], %[r0] \n\t"
3749 "psrlh %[r0], %[g0], %[four] \n\t"
3750 "psllh %[g0], %[g0], %[two] \n\t"
3751 "or %[g0], %[g0], %[r0] \n\t"
3752 "punpcklhw %[src0], %[g0], %[value] \n\t"
3753 "punpckhhw %[src1], %[g0], %[value] \n\t"
3754 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3755 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3756 "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
3757 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3758 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3759
3760 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3761 "pshufh %[dest1_u], %[src0], %[mask] \n\t"
3762 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
3763 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3764 "pshufh %[b0], %[src1], %[mask] \n\t"
3765 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3766
3767 "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
3768 "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
3769 "psubw %[dest1_u], %[src0], %[src1] \n\t"
3770 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
3771 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3772 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3773 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3774 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3775
3776 "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t"
3777 "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t"
3778 "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t"
3779 "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t"
3780 "psrlh %[dest2_u], %[src0], %[eight] \n\t"
3781 "and %[b0], %[src0], %[c0] \n\t"
3782 "and %[src0], %[src0], %[c1] \n\t"
3783 "psrlh %[g0], %[src0], %[four] \n\t"
3784 "and %[r0], %[dest2_u], %[c0] \n\t"
3785 "psrlh %[src0], %[src1], %[eight] \n\t"
3786 "and %[dest2_u], %[src1], %[c0] \n\t"
3787 "and %[src1], %[src1], %[c1] \n\t"
3788 "psrlh %[dest2_v], %[src1], %[four] \n\t"
3789 "and %[src0], %[src0], %[c0] \n\t"
3790 "paddh %[b0], %[b0], %[dest2_u] \n\t"
3791 "paddh %[g0], %[g0], %[dest2_v] \n\t"
3792 "paddh %[r0], %[r0], %[src0] \n\t"
3793 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3794 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3795 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3796 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3797 "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
3798 "psrlh %[b0], %[src0], %[four] \n\t"
3799 "psllh %[r0], %[src0], %[two] \n\t"
3800 "or %[b0], %[b0], %[r0] \n\t"
3801 "psrlh %[r0], %[g0], %[four] \n\t"
3802 "psllh %[g0], %[g0], %[two] \n\t"
3803 "or %[g0], %[g0], %[r0] \n\t"
3804 "punpcklhw %[src0], %[g0], %[value] \n\t"
3805 "punpckhhw %[src1], %[g0], %[value] \n\t"
3806 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3807 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3808 "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
3809 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3810 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3811
3812 "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
3813 "pshufh %[dest2_u], %[src0], %[mask] \n\t"
3814 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
3815 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3816 "pshufh %[b0], %[src1], %[mask] \n\t"
3817 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3818
3819 "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
3820 "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
3821 "psubw %[dest2_u], %[src0], %[src1] \n\t"
3822 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
3823 "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
3824 "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
3825 "psubw %[dest2_v], %[src1], %[src0] \n\t"
3826 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
3827
3828 "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t"
3829 "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t"
3830 "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t"
3831 "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t"
3832 "psrlh %[dest3_u], %[src0], %[eight] \n\t"
3833 "and %[b0], %[src0], %[c0] \n\t"
3834 "and %[src0], %[src0], %[c1] \n\t"
3835 "psrlh %[g0], %[src0], %[four] \n\t"
3836 "and %[r0], %[dest3_u], %[c0] \n\t"
3837 "psrlh %[src0], %[src1], %[eight] \n\t"
3838 "and %[dest3_u], %[src1], %[c0] \n\t"
3839 "and %[src1], %[src1], %[c1] \n\t"
3840 "psrlh %[dest3_v], %[src1], %[four] \n\t"
3841 "and %[src0], %[src0], %[c0] \n\t"
3842 "paddh %[b0], %[b0], %[dest3_u] \n\t"
3843 "paddh %[g0], %[g0], %[dest3_v] \n\t"
3844 "paddh %[r0], %[r0], %[src0] \n\t"
3845 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3846 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3847 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3848 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3849 "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
3850 "psrlh %[b0], %[src0], %[four] \n\t"
3851 "psllh %[r0], %[src0], %[two] \n\t"
3852 "or %[b0], %[b0], %[r0] \n\t"
3853 "psrlh %[r0], %[g0], %[four] \n\t"
3854 "psllh %[g0], %[g0], %[two] \n\t"
3855 "or %[g0], %[g0], %[r0] \n\t"
3856 "punpcklhw %[src0], %[g0], %[value] \n\t"
3857 "punpckhhw %[src1], %[g0], %[value] \n\t"
3858 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3859 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3860 "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
3861 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3862 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3863
3864 "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
3865 "pshufh %[dest3_u], %[src0], %[mask] \n\t"
3866 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
3867 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3868 "pshufh %[b0], %[src1], %[mask] \n\t"
3869 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3870
3871 "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
3872 "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
3873 "psubw %[dest3_u], %[src0], %[src1] \n\t"
3874 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
3875 "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
3876 "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
3877 "psubw %[dest3_v], %[src1], %[src0] \n\t"
3878 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
3879
3880 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
3881 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
3882 "packushb %[dest0_u], %[src0], %[src1] \n\t"
3883 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
3884 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
3885 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
3886 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
3887 "packushb %[dest0_v], %[src0], %[src1] \n\t"
3888 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
3889 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
3890
3891 "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t"
3892 "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t"
3893 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
3894 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
3895 "daddiu %[width], %[width], -0x10 \n\t"
3896 "bgtz %[width], 1b \n\t"
3897 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
3898 [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
3899 [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
3900 [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
3901 [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
3902 [dest3_v] "=&f"(ftmp[12])
3903 : [src_argb4444] "r"(src_argb4444),
3904 [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u),
3905 [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
3906 [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u),
3907 [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04),
3908 [two] "f"(0x02)
3909 : "memory");
3910 }
3911
ARGBToUV444Row_MMI(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)3912 void ARGBToUV444Row_MMI(const uint8_t* src_argb,
3913 uint8_t* dst_u,
3914 uint8_t* dst_v,
3915 int width) {
3916 uint64_t ftmp[12];
3917 const uint64_t value = 0x4040;
3918 const uint64_t mask_u = 0x0026004a00700002;
3919 const uint64_t mask_v = 0x00020070005e0012;
3920
3921 __asm__ volatile(
3922 "1: \n\t"
3923 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
3924 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
3925 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
3926 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
3927 "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t"
3928 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
3929 "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t"
3930 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
3931 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
3932
3933 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
3934 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
3935 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
3936 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
3937 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
3938
3939 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
3940 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
3941 "psubw %[dest0_u], %[src0], %[src1] \n\t"
3942 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
3943 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
3944 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
3945 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3946 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3947
3948 "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t"
3949 "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t"
3950 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
3951 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
3952 "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t"
3953 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
3954 "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t"
3955 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
3956 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
3957 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
3958 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
3959 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
3960 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
3961 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
3962
3963 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
3964 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
3965 "psubw %[dest1_u], %[src0], %[src1] \n\t"
3966 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
3967 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
3968 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
3969 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3970 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3971
3972 "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t"
3973 "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t"
3974 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
3975 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
3976 "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t"
3977 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
3978 "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t"
3979 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
3980 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
3981 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
3982 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
3983 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
3984 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
3985 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
3986
3987 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
3988 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
3989 "psubw %[dest2_u], %[src0], %[src1] \n\t"
3990 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
3991 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
3992 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
3993 "psubw %[dest2_v], %[src1], %[src0] \n\t"
3994 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
3995
3996 "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t"
3997 "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t"
3998 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
3999 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
4000 "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t"
4001 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
4002 "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t"
4003 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
4004 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
4005 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
4006 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
4007 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
4008 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
4009 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
4010
4011 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
4012 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
4013 "psubw %[dest3_u], %[src0], %[src1] \n\t"
4014 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
4015 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
4016 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
4017 "psubw %[dest3_v], %[src1], %[src0] \n\t"
4018 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
4019
4020 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
4021 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
4022 "packushb %[dest0_u], %[src0], %[src1] \n\t"
4023 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
4024 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
4025
4026 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
4027 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
4028 "packushb %[dest0_v], %[src0], %[src1] \n\t"
4029 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
4030 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
4031
4032 "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
4033 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
4034 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
4035 "daddi %[width], %[width], -0x08 \n\t"
4036 "bgtz %[width], 1b \n\t"
4037 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
4038 [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]),
4039 [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]),
4040 [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]),
4041 [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]),
4042 [dest3_v] "=&f"(ftmp[11])
4043 : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
4044 [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
4045 [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10),
4046 [eight] "f"(0x08)
4047 : "memory");
4048 }
4049
ARGBGrayRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,int width)4050 void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
4051 uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi;
4052 uint64_t tmp0, tmp1;
4053 const uint64_t mask0 = 0x0;
4054 const uint64_t mask1 = 0x01;
4055 const uint64_t mask2 = 0x00400026004B000FULL;
4056 const uint64_t mask3 = 0xFF000000FF000000ULL;
4057 const uint64_t mask4 = ~mask3;
4058 const uint64_t shift = 0x07;
4059
4060 __asm__ volatile(
4061 "1: \n\t"
4062 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4063 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4064
4065 "and %[src37], %[src], %[mask3] \n\t"
4066
4067 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
4068 "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t"
4069 "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t"
4070 "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t"
4071 "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t"
4072 "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t"
4073 "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t"
4074 "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t"
4075
4076 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
4077 "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t"
4078 "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t"
4079 "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t"
4080 "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t"
4081 "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t"
4082 "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t"
4083 "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t"
4084
4085 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4086 "and %[dest], %[dest], %[mask4] \n\t"
4087 "or %[dest], %[dest], %[src37] \n\t"
4088
4089 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4090 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4091
4092 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
4093 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4094 "daddi %[width], %[width], -0x02 \n\t"
4095 "bnez %[width], 1b \n\t"
4096 : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
4097 [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0),
4098 [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest),
4099 [src37] "=&f"(src37)
4100 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
4101 [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1),
4102 [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4)
4103 : "memory");
4104 }
4105
4106 // Convert a row of image to Sepia tone.
ARGBSepiaRow_MMI(uint8_t * dst_argb,int width)4107 void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) {
4108 uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2;
4109 uint64_t tmp0, tmp1;
4110 const uint64_t mask0 = 0x0;
4111 const uint64_t mask1 = 0x002300440011ULL;
4112 const uint64_t mask2 = 0x002D00580016ULL;
4113 const uint64_t mask3 = 0x003200620018ULL;
4114 const uint64_t mask4 = 0xFF000000FF000000ULL;
4115 const uint64_t shift = 0x07;
4116
4117 __asm__ volatile(
4118 "1: \n\t"
4119 "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4120 "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4121
4122 "and %[dest37], %[dest], %[mask4] \n\t"
4123
4124 "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t"
4125 "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t"
4126 "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t"
4127 "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t"
4128 "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
4129 "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
4130 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
4131 "psrlw %[dest0], %[dest0], %[shift] \n\t"
4132 "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
4133 "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
4134 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
4135 "psrlw %[dest1], %[dest1], %[shift] \n\t"
4136 "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
4137
4138 "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t"
4139 "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t"
4140 "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t"
4141 "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t"
4142 "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
4143 "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
4144 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
4145 "psrlw %[dest0], %[dest0], %[shift] \n\t"
4146 "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
4147 "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
4148 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
4149 "psrlw %[dest1], %[dest1], %[shift] \n\t"
4150 "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
4151
4152 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4153 "or %[dest], %[dest], %[dest37] \n\t"
4154
4155 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4156 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4157
4158 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4159 "daddi %[width], %[width], -0x02 \n\t"
4160 "bnez %[width], 1b \n\t"
4161 : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
4162 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
4163 [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1),
4164 [dest] "=&f"(dest)
4165 : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0),
4166 [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
4167 [mask4] "f"(mask4), [shift] "f"(shift)
4168 : "memory");
4169 }
4170
4171 // Apply color matrix to a row of image. Matrix is signed.
4172 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)4173 void ARGBColorMatrixRow_MMI(const uint8_t* src_argb,
4174 uint8_t* dst_argb,
4175 const int8_t* matrix_argb,
4176 int width) {
4177 uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2,
4178 dest3;
4179 uint64_t matrix, matrix_hi, matrix_lo;
4180 uint64_t tmp0, tmp1;
4181 const uint64_t shift0 = 0x06;
4182 const uint64_t shift1 = 0x08;
4183 const uint64_t mask0 = 0x0;
4184 const uint64_t mask1 = 0x08;
4185
4186 __asm__ volatile(
4187 "1: \n\t"
4188 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4189 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4190
4191 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
4192
4193 "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
4194 "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
4195 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4196 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4197 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4198 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4199 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4200 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4201 "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
4202 "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
4203 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4204 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4205 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
4206 "psraw %[dest0], %[dest0], %[shift0] \n\t"
4207
4208 "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
4209 "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
4210 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4211 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4212 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4213 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4214 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4215 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4216 "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
4217 "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
4218 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4219 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4220 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
4221 "psraw %[dest1], %[dest1], %[shift0] \n\t"
4222
4223 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
4224
4225 "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
4226 "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
4227 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4228 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4229 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4230 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4231 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4232 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4233 "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
4234 "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
4235 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4236 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4237 "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
4238 "psraw %[dest2], %[dest2], %[shift0] \n\t"
4239
4240 "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
4241 "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
4242 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4243 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4244 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4245 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4246 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4247 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4248 "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
4249 "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
4250 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4251 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4252 "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
4253 "psraw %[dest3], %[dest3], %[shift0] \n\t"
4254
4255 "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
4256 "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
4257 "packushb %[dest], %[tmp0], %[tmp1] \n\t"
4258
4259 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4260 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4261
4262 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
4263 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4264 "daddi %[width], %[width], -0x02 \n\t"
4265 "bnez %[width], 1b \n\t"
4266 : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
4267 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
4268 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
4269 [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest),
4270 [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi),
4271 [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix)
4272 : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb),
4273 [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0),
4274 [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1)
4275 : "memory");
4276 }
4277
ARGBShadeRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)4278 void ARGBShadeRow_MMI(const uint8_t* src_argb,
4279 uint8_t* dst_argb,
4280 int width,
4281 uint32_t value) {
4282 uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi;
4283 const uint64_t shift = 0x08;
4284
4285 __asm__ volatile(
4286 "1: \n\t"
4287 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4288 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4289 "punpcklbh %[src_lo], %[src], %[src] \n\t"
4290 "punpckhbh %[src_hi], %[src], %[src] \n\t"
4291
4292 "punpcklbh %[value], %[value], %[value] \n\t"
4293
4294 "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t"
4295 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
4296 "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t"
4297 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
4298 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4299
4300 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4301 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4302
4303 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
4304 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4305 "daddi %[width], %[width], -0x02 \n\t"
4306 "bnez %[width], 1b \n\t"
4307 : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
4308 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src),
4309 [dest] "=&f"(dest)
4310 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
4311 [value] "f"(value), [shift] "f"(shift)
4312 : "memory");
4313 }
4314
ARGBMultiplyRow_MMI(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)4315 void ARGBMultiplyRow_MMI(const uint8_t* src_argb0,
4316 const uint8_t* src_argb1,
4317 uint8_t* dst_argb,
4318 int width) {
4319 uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo;
4320 uint64_t dest, dest_lo, dest_hi;
4321 const uint64_t mask = 0x0;
4322
4323 __asm__ volatile(
4324 "1: \n\t"
4325 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
4326 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
4327 "punpcklbh %[src0_lo], %[src0], %[src0] \n\t"
4328 "punpckhbh %[src0_hi], %[src0], %[src0] \n\t"
4329
4330 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
4331 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
4332 "punpcklbh %[src1_lo], %[src1], %[mask] \n\t"
4333 "punpckhbh %[src1_hi], %[src1], %[mask] \n\t"
4334
4335 "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t"
4336 "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t"
4337 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4338
4339 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4340 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4341
4342 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
4343 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
4344 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4345 "daddi %[width], %[width], -0x02 \n\t"
4346 "bnez %[width], 1b \n\t"
4347 : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
4348 [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
4349 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0),
4350 [src1] "=&f"(src1), [dest] "=&f"(dest)
4351 : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
4352 [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask)
4353 : "memory");
4354 }
4355
ARGBAddRow_MMI(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)4356 void ARGBAddRow_MMI(const uint8_t* src_argb0,
4357 const uint8_t* src_argb1,
4358 uint8_t* dst_argb,
4359 int width) {
4360 uint64_t src0, src1, dest;
4361
4362 __asm__ volatile(
4363 "1: \n\t"
4364 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
4365 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
4366 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
4367 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
4368 "paddusb %[dest], %[src0], %[src1] \n\t"
4369 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4370 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4371
4372 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
4373 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
4374 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4375 "daddi %[width], %[width], -0x02 \n\t"
4376 "bnez %[width], 1b \n\t"
4377 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
4378 : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
4379 [dst_ptr] "r"(dst_argb), [width] "r"(width)
4380 : "memory");
4381 }
4382
ARGBSubtractRow_MMI(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)4383 void ARGBSubtractRow_MMI(const uint8_t* src_argb0,
4384 const uint8_t* src_argb1,
4385 uint8_t* dst_argb,
4386 int width) {
4387 uint64_t src0, src1, dest;
4388
4389 __asm__ volatile(
4390 "1: \n\t"
4391 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
4392 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
4393 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
4394 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
4395 "psubusb %[dest], %[src0], %[src1] \n\t"
4396 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4397 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4398
4399 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
4400 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
4401 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4402 "daddi %[width], %[width], -0x02 \n\t"
4403 "bnez %[width], 1b \n\t"
4404 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
4405 : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
4406 [dst_ptr] "r"(dst_argb), [width] "r"(width)
4407 : "memory");
4408 }
4409
4410 // Sobel functions which mimics SSSE3.
SobelXRow_MMI(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)4411 void SobelXRow_MMI(const uint8_t* src_y0,
4412 const uint8_t* src_y1,
4413 const uint8_t* src_y2,
4414 uint8_t* dst_sobelx,
4415 int width) {
4416 uint64_t y00 = 0, y10 = 0, y20 = 0;
4417 uint64_t y02 = 0, y12 = 0, y22 = 0;
4418 uint64_t zero = 0x0;
4419 uint64_t sobel = 0x0;
4420 __asm__ volatile(
4421 "1: \n\t"
4422 "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
4423 "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
4424 "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2]
4425 "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
4426
4427 "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i]
4428 "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
4429 "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2]
4430 "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
4431
4432 "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i]
4433 "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t"
4434 "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2]
4435 "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t"
4436
4437 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4438 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4439 "punpcklbh %[y20], %[y20], %[zero] \n\t"
4440
4441 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4442 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4443 "punpcklbh %[y22], %[y22], %[zero] \n\t"
4444
4445 "paddh %[y00], %[y00], %[y10] \n\t" // a+b
4446 "paddh %[y20], %[y20], %[y10] \n\t" // c+b
4447 "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c
4448
4449 "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub
4450 "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub
4451 "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub
4452
4453 "pmaxsh %[y10], %[y00], %[y02] \n\t"
4454 "pminsh %[y20], %[y00], %[y02] \n\t"
4455 "psubh %[sobel], %[y10], %[y20] \n\t" // Abs
4456
4457 "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
4458 "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
4459 "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
4460 "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
4461
4462 "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
4463 "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
4464 "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
4465 "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
4466
4467 "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t"
4468 "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t"
4469 "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t"
4470 "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t"
4471
4472 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4473 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4474 "punpcklbh %[y20], %[y20], %[zero] \n\t"
4475
4476 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4477 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4478 "punpcklbh %[y22], %[y22], %[zero] \n\t"
4479
4480 "paddh %[y00], %[y00], %[y10] \n\t"
4481 "paddh %[y20], %[y20], %[y10] \n\t"
4482 "paddh %[y00], %[y00], %[y20] \n\t"
4483
4484 "paddh %[y02], %[y02], %[y12] \n\t"
4485 "paddh %[y22], %[y22], %[y12] \n\t"
4486 "paddh %[y02], %[y02], %[y22] \n\t"
4487
4488 "pmaxsh %[y10], %[y00], %[y02] \n\t"
4489 "pminsh %[y20], %[y00], %[y02] \n\t"
4490 "psubh %[y00], %[y10], %[y20] \n\t"
4491
4492 "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
4493 "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t"
4494 "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t"
4495
4496 "daddiu %[src_y0], %[src_y0], 8 \n\t"
4497 "daddiu %[src_y1], %[src_y1], 8 \n\t"
4498 "daddiu %[src_y2], %[src_y2], 8 \n\t"
4499 "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t"
4500 "daddiu %[width], %[width], -8 \n\t"
4501 "bgtz %[width], 1b \n\t"
4502 "nop \n\t"
4503 : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10),
4504 [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22)
4505 : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2),
4506 [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero)
4507 : "memory");
4508 }
4509
SobelYRow_MMI(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)4510 void SobelYRow_MMI(const uint8_t* src_y0,
4511 const uint8_t* src_y1,
4512 uint8_t* dst_sobely,
4513 int width) {
4514 uint64_t y00 = 0, y01 = 0, y02 = 0;
4515 uint64_t y10 = 0, y11 = 0, y12 = 0;
4516 uint64_t zero = 0x0;
4517 uint64_t sobel = 0x0;
4518 __asm__ volatile(
4519 "1: \n\t"
4520 "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
4521 "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
4522 "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1]
4523 "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t"
4524 "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2]
4525 "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
4526
4527 "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i]
4528 "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
4529 "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1]
4530 "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t"
4531 "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2]
4532 "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
4533
4534 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4535 "punpcklbh %[y01], %[y01], %[zero] \n\t"
4536 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4537
4538 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4539 "punpcklbh %[y11], %[y11], %[zero] \n\t"
4540 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4541
4542 "paddh %[y00], %[y00], %[y01] \n\t" // a+b
4543 "paddh %[y02], %[y02], %[y01] \n\t" // c+b
4544 "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c
4545
4546 "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub
4547 "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub
4548 "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub
4549
4550 "pmaxsh %[y02], %[y00], %[y10] \n\t"
4551 "pminsh %[y12], %[y00], %[y10] \n\t"
4552 "psubh %[sobel], %[y02], %[y12] \n\t" // Abs
4553
4554 "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
4555 "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
4556 "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t"
4557 "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t"
4558 "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
4559 "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
4560
4561 "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
4562 "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
4563 "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t"
4564 "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t"
4565 "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
4566 "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
4567
4568 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4569 "punpcklbh %[y01], %[y01], %[zero] \n\t"
4570 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4571
4572 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4573 "punpcklbh %[y11], %[y11], %[zero] \n\t"
4574 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4575
4576 "paddh %[y00], %[y00], %[y01] \n\t"
4577 "paddh %[y02], %[y02], %[y01] \n\t"
4578 "paddh %[y00], %[y00], %[y02] \n\t"
4579
4580 "paddh %[y10], %[y10], %[y11] \n\t"
4581 "paddh %[y12], %[y12], %[y11] \n\t"
4582 "paddh %[y10], %[y10], %[y12] \n\t"
4583
4584 "pmaxsh %[y02], %[y00], %[y10] \n\t"
4585 "pminsh %[y12], %[y00], %[y10] \n\t"
4586 "psubh %[y00], %[y02], %[y12] \n\t"
4587
4588 "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
4589 "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t"
4590 "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t"
4591
4592 "daddiu %[src_y0], %[src_y0], 8 \n\t"
4593 "daddiu %[src_y1], %[src_y1], 8 \n\t"
4594 "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t"
4595 "daddiu %[width], %[width], -8 \n\t"
4596 "bgtz %[width], 1b \n\t"
4597 "nop \n\t"
4598 : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01),
4599 [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12)
4600 : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1),
4601 [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero)
4602 : "memory");
4603 }
4604
SobelRow_MMI(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)4605 void SobelRow_MMI(const uint8_t* src_sobelx,
4606 const uint8_t* src_sobely,
4607 uint8_t* dst_argb,
4608 int width) {
4609 double temp[3];
4610 uint64_t c1 = 0xff000000ff000000;
4611 __asm__ volatile(
4612 "1: \n\t"
4613 "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i]
4614 "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t"
4615 "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
4616 "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t"
4617 // s7 s6 s5 s4 s3 s2 s1 s0 = a+b
4618 "paddusb %[t2] , %[t0], %[t1] \n\t"
4619
4620 // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0
4621 "punpcklbh %[t0], %[t2], %[t2] \n\t"
4622
4623 // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0
4624 "punpcklbh %[t1], %[t0], %[t0] \n\t"
4625 "or %[t1], %[t1], %[c1] \n\t"
4626 // 255 s1 s1 s1 s55 s0 s0 s0
4627 "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t"
4628 "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t"
4629
4630 // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2
4631 "punpckhbh %[t1], %[t0], %[t0] \n\t"
4632 "or %[t1], %[t1], %[c1] \n\t"
4633 // 255 s3 s3 s3 255 s2 s2 s2
4634 "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t"
4635 "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t"
4636
4637 // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4
4638 "punpckhbh %[t0], %[t2], %[t2] \n\t"
4639
4640 // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4
4641 "punpcklbh %[t1], %[t0], %[t0] \n\t"
4642 "or %[t1], %[t1], %[c1] \n\t"
4643 "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t"
4644 "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t"
4645
4646 // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6
4647 "punpckhbh %[t1], %[t0], %[t0] \n\t"
4648 "or %[t1], %[t1], %[c1] \n\t"
4649 "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t"
4650 "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t"
4651
4652 "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
4653 "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
4654 "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
4655 "daddiu %[width], %[width], -8 \n\t"
4656 "bgtz %[width], 1b \n\t"
4657 "nop \n\t"
4658 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
4659 : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
4660 [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
4661 : "memory");
4662 }
4663
SobelToPlaneRow_MMI(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)4664 void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
4665 const uint8_t* src_sobely,
4666 uint8_t* dst_y,
4667 int width) {
4668 uint64_t tr = 0;
4669 uint64_t tb = 0;
4670 __asm__ volatile(
4671 "1: \n\t"
4672 "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t"
4673 "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i]
4674 "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t"
4675 "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i]
4676 "paddusb %[tr], %[tr], %[tb] \n\t" // g
4677 "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t"
4678 "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t"
4679
4680 "daddiu %[dst_y], %[dst_y], 8 \n\t"
4681 "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
4682 "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
4683 "daddiu %[width], %[width], -8 \n\t"
4684 "bgtz %[width], 1b \n\t"
4685 "nop \n\t"
4686 : [tr] "=&f"(tr), [tb] "=&f"(tb)
4687 : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
4688 [dst_y] "r"(dst_y), [width] "r"(width)
4689 : "memory");
4690 }
4691
SobelXYRow_MMI(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)4692 void SobelXYRow_MMI(const uint8_t* src_sobelx,
4693 const uint8_t* src_sobely,
4694 uint8_t* dst_argb,
4695 int width) {
4696 uint64_t temp[3];
4697 uint64_t result = 0;
4698 uint64_t gb = 0;
4699 uint64_t cr = 0;
4700 uint64_t c1 = 0xffffffffffffffff;
4701 __asm__ volatile(
4702 "1: \n\t"
4703 "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i]
4704 "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t"
4705 "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
4706 "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t"
4707 "paddusb %[tg] , %[tr], %[tb] \n\t" // g
4708
4709 // g3 b3 g2 b2 g1 b1 g0 b0
4710 "punpcklbh %[gb], %[tb], %[tg] \n\t"
4711 // c3 r3 r2 r2 c1 r1 c0 r0
4712 "punpcklbh %[cr], %[tr], %[c1] \n\t"
4713 // c1 r1 g1 b1 c0 r0 g0 b0
4714 "punpcklhw %[result], %[gb], %[cr] \n\t"
4715 "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t"
4716 "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t"
4717 // c3 r3 g3 b3 c2 r2 g2 b2
4718 "punpckhhw %[result], %[gb], %[cr] \n\t"
4719 "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t"
4720 "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t"
4721
4722 // g7 b7 g6 b6 g5 b5 g4 b4
4723 "punpckhbh %[gb], %[tb], %[tg] \n\t"
4724 // c7 r7 c6 r6 c5 r5 c4 r4
4725 "punpckhbh %[cr], %[tr], %[c1] \n\t"
4726 // c5 r5 g5 b5 c4 r4 g4 b4
4727 "punpcklhw %[result], %[gb], %[cr] \n\t"
4728 "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t"
4729 "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t"
4730 // c7 r7 g7 b7 c6 r6 g6 b6
4731 "punpckhhw %[result], %[gb], %[cr] \n\t"
4732 "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t"
4733 "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t"
4734
4735 "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
4736 "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
4737 "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
4738 "daddiu %[width], %[width], -8 \n\t"
4739 "bgtz %[width], 1b \n\t"
4740 "nop \n\t"
4741 : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]),
4742 [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result)
4743 : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
4744 [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
4745 : "memory");
4746 }
4747
J400ToARGBRow_MMI(const uint8_t * src_y,uint8_t * dst_argb,int width)4748 void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
4749 // Copy a Y to RGB.
4750 uint64_t src, dest;
4751 const uint64_t mask0 = 0x00ffffff00ffffffULL;
4752 const uint64_t mask1 = ~mask0;
4753
4754 __asm__ volatile(
4755 "1: \n\t"
4756 "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
4757 "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
4758 "punpcklbh %[src], %[src], %[src] \n\t"
4759 "punpcklhw %[dest], %[src], %[src] \n\t"
4760 "and %[dest], %[dest], %[mask0] \n\t"
4761 "or %[dest], %[dest], %[mask1] \n\t"
4762 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4763 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4764
4765 "punpckhhw %[dest], %[src], %[src] \n\t"
4766 "and %[dest], %[dest], %[mask0] \n\t"
4767 "or %[dest], %[dest], %[mask1] \n\t"
4768 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
4769 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
4770
4771 "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
4772 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
4773 "daddi %[width], %[width], -0x04 \n\t"
4774 "bnez %[width], 1b \n\t"
4775 : [src] "=&f"(src), [dest] "=&f"(dest)
4776 : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
4777 [mask1] "f"(mask1), [width] "r"(width)
4778 : "memory");
4779 }
4780
I400ToARGBRow_MMI(const uint8_t * src_y,uint8_t * rgb_buf,int width)4781 void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf, int width) {
4782 uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
4783 const uint64_t mask0 = 0x0;
4784 const uint64_t mask1 = 0x55;
4785 const uint64_t mask2 = 0xAA;
4786 const uint64_t mask3 = 0xFF;
4787 const uint64_t mask4 = 0x4A354A354A354A35ULL;
4788 const uint64_t mask5 = 0x0488048804880488ULL;
4789 const uint64_t shift0 = 0x08;
4790 const uint64_t shift1 = 0x06;
4791
4792 __asm__ volatile(
4793 "1: \n\t"
4794 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4795 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4796 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
4797 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
4798
4799 "pshufh %[src], %[src_lo], %[mask0] \n\t"
4800 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4801 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4802 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4803 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4804 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4805 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4806 "pshufh %[src], %[src_lo], %[mask1] \n\t"
4807 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4808 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4809 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4810 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4811 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4812 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4813 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4814 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4815 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4816
4817 "pshufh %[src], %[src_lo], %[mask2] \n\t"
4818 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4819 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4820 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4821 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4822 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4823 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4824 "pshufh %[src], %[src_lo], %[mask3] \n\t"
4825 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4826 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4827 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4828 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4829 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4830 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4831 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4832 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
4833 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
4834
4835 "pshufh %[src], %[src_hi], %[mask0] \n\t"
4836 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4837 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4838 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4839 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4840 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4841 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4842 "pshufh %[src], %[src_hi], %[mask1] \n\t"
4843 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4844 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4845 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4846 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4847 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4848 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4849 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4850 "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
4851 "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
4852
4853 "pshufh %[src], %[src_hi], %[mask2] \n\t"
4854 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4855 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4856 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4857 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4858 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4859 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4860 "pshufh %[src], %[src_hi], %[mask3] \n\t"
4861 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4862 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4863 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4864 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4865 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4866 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4867 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4868 "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
4869 "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
4870
4871 "daddi %[src_ptr], %[src_ptr], 0x08 \n\t"
4872 "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
4873 "daddi %[width], %[width], -0x08 \n\t"
4874 "bnez %[width], 1b \n\t"
4875 : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
4876 [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
4877 [dest_lo] "=&f"(dest_lo)
4878 : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0),
4879 [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
4880 [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0),
4881 [shift1] "f"(shift1), [width] "r"(width)
4882 : "memory");
4883 }
4884
MirrorRow_MMI(const uint8_t * src,uint8_t * dst,int width)4885 void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
4886 uint64_t source, src0, src1, dest;
4887 const uint64_t mask0 = 0x0;
4888 const uint64_t mask1 = 0x1b;
4889
4890 src += width - 1;
4891 __asm__ volatile(
4892 "1: \n\t"
4893 "gsldlc1 %[source], 0(%[src_ptr]) \n\t"
4894 "gsldrc1 %[source], -7(%[src_ptr]) \n\t"
4895 "punpcklbh %[src0], %[source], %[mask0] \n\t"
4896 "pshufh %[src0], %[src0], %[mask1] \n\t"
4897 "punpckhbh %[src1], %[source], %[mask0] \n\t"
4898 "pshufh %[src1], %[src1], %[mask1] \n\t"
4899 "packushb %[dest], %[src1], %[src0] \n\t"
4900
4901 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4902 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4903
4904 "daddi %[src_ptr], %[src_ptr], -0x08 \n\t"
4905 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4906 "daddi %[width], %[width], -0x08 \n\t"
4907 "bnez %[width], 1b \n\t"
4908 : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0),
4909 [src1] "=&f"(src1)
4910 : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
4911 [mask1] "f"(mask1), [width] "r"(width)
4912 : "memory");
4913 }
4914
MirrorUVRow_MMI(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)4915 void MirrorUVRow_MMI(const uint8_t* src_uv,
4916 uint8_t* dst_u,
4917 uint8_t* dst_v,
4918 int width) {
4919 uint64_t src0, src1, dest0, dest1;
4920 const uint64_t mask0 = 0x00ff00ff00ff00ffULL;
4921 const uint64_t mask1 = 0x1b;
4922 const uint64_t shift = 0x08;
4923
4924 src_uv += (width - 1) << 1;
4925
4926 __asm__ volatile(
4927 "1: \n\t"
4928 "gsldlc1 %[src0], 1(%[src_ptr]) \n\t"
4929 "gsldrc1 %[src0], -6(%[src_ptr]) \n\t"
4930 "gsldlc1 %[src1], -7(%[src_ptr]) \n\t"
4931 "gsldrc1 %[src1], -14(%[src_ptr]) \n\t"
4932
4933 "and %[dest0], %[src0], %[mask0] \n\t"
4934 "pshufh %[dest0], %[dest0], %[mask1] \n\t"
4935 "and %[dest1], %[src1], %[mask0] \n\t"
4936 "pshufh %[dest1], %[dest1], %[mask1] \n\t"
4937 "packushb %[dest0], %[dest0], %[dest1] \n\t"
4938 "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t"
4939 "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t"
4940
4941 "psrlh %[dest0], %[src0], %[shift] \n\t"
4942 "pshufh %[dest0], %[dest0], %[mask1] \n\t"
4943 "psrlh %[dest1], %[src1], %[shift] \n\t"
4944 "pshufh %[dest1], %[dest1], %[mask1] \n\t"
4945 "packushb %[dest0], %[dest0], %[dest1] \n\t"
4946 "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t"
4947 "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t"
4948
4949 "daddi %[src_ptr], %[src_ptr], -0x10 \n\t"
4950 "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t"
4951 "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t"
4952 "daddi %[width], %[width], -0x08 \n\t"
4953 "bnez %[width], 1b \n\t"
4954 : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
4955 [src1] "=&f"(src1)
4956 : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v),
4957 [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1),
4958 [shift] "f"(shift)
4959 : "memory");
4960 }
4961
ARGBMirrorRow_MMI(const uint8_t * src,uint8_t * dst,int width)4962 void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
4963 src += (width - 1) * 4;
4964 uint64_t temp = 0x0;
4965 uint64_t shuff = 0x4e; // 01 00 11 10
4966 __asm__ volatile(
4967 "1: \n\t"
4968 "gsldlc1 %[temp], 3(%[src]) \n\t"
4969 "gsldrc1 %[temp], -4(%[src]) \n\t"
4970 "pshufh %[temp], %[temp], %[shuff] \n\t"
4971 "gssdrc1 %[temp], 0x0(%[dst]) \n\t"
4972 "gssdlc1 %[temp], 0x7(%[dst]) \n\t"
4973
4974 "daddiu %[src], %[src], -0x08 \n\t"
4975 "daddiu %[dst], %[dst], 0x08 \n\t"
4976 "daddiu %[width], %[width], -0x02 \n\t"
4977 "bnez %[width], 1b \n\t"
4978 : [temp] "=&f"(temp)
4979 : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff)
4980 : "memory");
4981 }
4982
SplitUVRow_MMI(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)4983 void SplitUVRow_MMI(const uint8_t* src_uv,
4984 uint8_t* dst_u,
4985 uint8_t* dst_v,
4986 int width) {
4987 uint64_t c0 = 0x00ff00ff00ff00ff;
4988 uint64_t temp[4];
4989 uint64_t shift = 0x08;
4990 __asm__ volatile(
4991 "1: \n\t"
4992 "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t"
4993 "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t"
4994 "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t"
4995 "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t"
4996
4997 "and %[t2], %[t0], %[c0] \n\t"
4998 "and %[t3], %[t1], %[c0] \n\t"
4999 "packushb %[t2], %[t2], %[t3] \n\t"
5000 "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t"
5001 "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t"
5002
5003 "psrlh %[t2], %[t0], %[shift] \n\t"
5004 "psrlh %[t3], %[t1], %[shift] \n\t"
5005 "packushb %[t2], %[t2], %[t3] \n\t"
5006 "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t"
5007 "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t"
5008
5009 "daddiu %[src_uv], %[src_uv], 16 \n\t"
5010 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5011 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5012 "daddiu %[width], %[width], -8 \n\t"
5013 "bgtz %[width], 1b \n\t"
5014 "nop \n\t"
5015 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
5016 [t3] "=&f"(temp[3])
5017 : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
5018 [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
5019 : "memory");
5020 }
5021
MergeUVRow_MMI(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)5022 void MergeUVRow_MMI(const uint8_t* src_u,
5023 const uint8_t* src_v,
5024 uint8_t* dst_uv,
5025 int width) {
5026 uint64_t temp[3];
5027 __asm__ volatile(
5028 "1: \n\t"
5029 "gsldrc1 %[t0], 0x0(%[src_u]) \n\t"
5030 "gsldlc1 %[t0], 0x7(%[src_u]) \n\t"
5031 "gsldrc1 %[t1], 0x0(%[src_v]) \n\t"
5032 "gsldlc1 %[t1], 0x7(%[src_v]) \n\t"
5033 "punpcklbh %[t2], %[t0], %[t1] \n\t"
5034 "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t"
5035 "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t"
5036 "punpckhbh %[t2], %[t0], %[t1] \n\t"
5037 "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t"
5038 "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t"
5039
5040 "daddiu %[src_u], %[src_u], 8 \n\t"
5041 "daddiu %[src_v], %[src_v], 8 \n\t"
5042 "daddiu %[dst_uv], %[dst_uv], 16 \n\t"
5043 "daddiu %[width], %[width], -8 \n\t"
5044 "bgtz %[width], 1b \n\t"
5045 "nop \n\t"
5046 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
5047 : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v),
5048 [width] "r"(width)
5049 : "memory");
5050 }
5051
SplitRGBRow_MMI(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)5052 void SplitRGBRow_MMI(const uint8_t* src_rgb,
5053 uint8_t* dst_r,
5054 uint8_t* dst_g,
5055 uint8_t* dst_b,
5056 int width) {
5057 uint64_t src[4];
5058 uint64_t dest_hi, dest_lo, dest;
5059
5060 __asm__ volatile(
5061 "1: \n\t"
5062 "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
5063 "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
5064 "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
5065 "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
5066 "punpcklbh %[dest_lo], %[src0], %[src1] \n\t"
5067 "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t"
5068 "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t"
5069 "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t"
5070 "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t"
5071 "punpcklbh %[dest_hi], %[src2], %[src3] \n\t"
5072
5073 "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t"
5074 "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t"
5075 "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t"
5076 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5077 "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t"
5078 "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t"
5079 "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t"
5080 "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t"
5081 "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t"
5082
5083 "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
5084 "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t"
5085 "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t"
5086 "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t"
5087 "daddi %[width], %[width], -0x04 \n\t"
5088 "bnez %[width], 1b \n\t"
5089 : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]),
5090 [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi),
5091 [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
5092 : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g),
5093 [dstb_ptr] "r"(dst_b), [width] "r"(width)
5094 : "memory");
5095 }
5096
MergeRGBRow_MMI(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)5097 void MergeRGBRow_MMI(const uint8_t* src_r,
5098 const uint8_t* src_g,
5099 const uint8_t* src_b,
5100 uint8_t* dst_rgb,
5101 int width) {
5102 uint64_t srcr, srcg, srcb, dest;
5103 uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo;
5104 const uint64_t temp = 0x0;
5105
5106 __asm__ volatile(
5107 "1: \n\t"
5108 "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t"
5109 "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t"
5110 "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t"
5111 "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t"
5112 "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t"
5113 "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t"
5114
5115 "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t"
5116 "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t"
5117 "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t"
5118 "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t"
5119
5120 "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
5121 "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t"
5122 "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5123 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5124 "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t"
5125 "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t"
5126 "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
5127 "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t"
5128 "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t"
5129 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5130 "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
5131 "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t"
5132 "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
5133 "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
5134 "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
5135 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5136 "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t"
5137 "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
5138 "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
5139 "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t"
5140 "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t"
5141 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5142 "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t"
5143 "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t"
5144
5145 "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t"
5146 "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t"
5147 "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t"
5148 "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t"
5149 "daddi %[width], %[width], -0x08 \n\t"
5150 "bnez %[width], 1b \n\t"
5151 : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb),
5152 [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi),
5153 [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi),
5154 [srcbz_lo] "=&f"(srcbz_lo)
5155 : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b),
5156 [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp)
5157 : "memory");
5158 }
5159
5160 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_MMI(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)5161 void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
5162 int src_stride_yuy2,
5163 uint8_t* dst_u,
5164 uint8_t* dst_v,
5165 int width) {
5166 uint64_t c0 = 0xff00ff00ff00ff00;
5167 uint64_t c1 = 0x00ff00ff00ff00ff;
5168 uint64_t temp[3];
5169 uint64_t data[4];
5170 uint64_t shift = 0x08;
5171 uint64_t src_stride = 0x0;
5172 __asm__ volatile(
5173 "1: \n\t"
5174 "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
5175 "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
5176 "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t"
5177 "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
5178 "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
5179 "pavgb %[t0], %[t0], %[t1] \n\t"
5180
5181 "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t"
5182 "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t"
5183 "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
5184 "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
5185 "pavgb %[t1], %[t2], %[t1] \n\t"
5186
5187 "and %[t0], %[t0], %[c0] \n\t"
5188 "and %[t1], %[t1], %[c0] \n\t"
5189 "psrlh %[t0], %[t0], %[shift] \n\t"
5190 "psrlh %[t1], %[t1], %[shift] \n\t"
5191 "packushb %[t0], %[t0], %[t1] \n\t"
5192 "mov.s %[t1], %[t0] \n\t"
5193 "and %[d0], %[t0], %[c1] \n\t"
5194 "psrlh %[d1], %[t1], %[shift] \n\t"
5195
5196 "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
5197 "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
5198 "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
5199 "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
5200 "pavgb %[t0], %[t0], %[t1] \n\t"
5201
5202 "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t"
5203 "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t"
5204 "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
5205 "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
5206 "pavgb %[t1], %[t2], %[t1] \n\t"
5207
5208 "and %[t0], %[t0], %[c0] \n\t"
5209 "and %[t1], %[t1], %[c0] \n\t"
5210 "psrlh %[t0], %[t0], %[shift] \n\t"
5211 "psrlh %[t1], %[t1], %[shift] \n\t"
5212 "packushb %[t0], %[t0], %[t1] \n\t"
5213 "mov.s %[t1], %[t0] \n\t"
5214 "and %[d2], %[t0], %[c1] \n\t"
5215 "psrlh %[d3], %[t1], %[shift] \n\t"
5216
5217 "packushb %[d0], %[d0], %[d2] \n\t"
5218 "packushb %[d1], %[d1], %[d3] \n\t"
5219 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5220 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5221 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5222 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5223 "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
5224 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5225 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5226 "daddiu %[width], %[width], -16 \n\t"
5227 "bgtz %[width], 1b \n\t"
5228 "nop \n\t"
5229 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
5230 [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
5231 [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
5232 : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2),
5233 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
5234 [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
5235 : "memory");
5236 }
5237
5238 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_MMI(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)5239 void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
5240 uint8_t* dst_u,
5241 uint8_t* dst_v,
5242 int width) {
5243 uint64_t c0 = 0xff00ff00ff00ff00;
5244 uint64_t c1 = 0x00ff00ff00ff00ff;
5245 uint64_t temp[2];
5246 uint64_t data[4];
5247 uint64_t shift = 0x08;
5248 __asm__ volatile(
5249 "1: \n\t"
5250 "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
5251 "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
5252 "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
5253 "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
5254 "and %[t0], %[t0], %[c0] \n\t"
5255 "and %[t1], %[t1], %[c0] \n\t"
5256 "psrlh %[t0], %[t0], %[shift] \n\t"
5257 "psrlh %[t1], %[t1], %[shift] \n\t"
5258 "packushb %[t0], %[t0], %[t1] \n\t"
5259 "mov.s %[t1], %[t0] \n\t"
5260 "and %[d0], %[t0], %[c1] \n\t"
5261 "psrlh %[d1], %[t1], %[shift] \n\t"
5262
5263 "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
5264 "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
5265 "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t"
5266 "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t"
5267 "and %[t0], %[t0], %[c0] \n\t"
5268 "and %[t1], %[t1], %[c0] \n\t"
5269 "psrlh %[t0], %[t0], %[shift] \n\t"
5270 "psrlh %[t1], %[t1], %[shift] \n\t"
5271 "packushb %[t0], %[t0], %[t1] \n\t"
5272 "mov.s %[t1], %[t0] \n\t"
5273 "and %[d2], %[t0], %[c1] \n\t"
5274 "psrlh %[d3], %[t1], %[shift] \n\t"
5275
5276 "packushb %[d0], %[d0], %[d2] \n\t"
5277 "packushb %[d1], %[d1], %[d3] \n\t"
5278 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5279 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5280 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5281 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5282 "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
5283 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5284 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5285 "daddiu %[width], %[width], -16 \n\t"
5286 "bgtz %[width], 1b \n\t"
5287 "nop \n\t"
5288 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
5289 [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
5290 : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
5291 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
5292 : "memory");
5293 }
5294
5295 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_MMI(const uint8_t * src_yuy2,uint8_t * dst_y,int width)5296 void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
5297 uint64_t c0 = 0x00ff00ff00ff00ff;
5298 uint64_t temp[2];
5299 __asm__ volatile(
5300 "1: \n\t"
5301 "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
5302 "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
5303 "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
5304 "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
5305 "and %[t0], %[t0], %[c0] \n\t"
5306 "and %[t1], %[t1], %[c0] \n\t"
5307 "packushb %[t0], %[t0], %[t1] \n\t"
5308 "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
5309 "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
5310 "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t"
5311 "daddiu %[dst_y], %[dst_y], 8 \n\t"
5312 "daddiu %[width], %[width], -8 \n\t"
5313 "bgtz %[width], 1b \n\t"
5314 "nop \n\t"
5315 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
5316 : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width),
5317 [c0] "f"(c0)
5318 : "memory");
5319 }
5320
5321 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_MMI(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)5322 void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
5323 int src_stride_uyvy,
5324 uint8_t* dst_u,
5325 uint8_t* dst_v,
5326 int width) {
5327 // Output a row of UV values.
5328 uint64_t c0 = 0x00ff00ff00ff00ff;
5329 uint64_t temp[3];
5330 uint64_t data[4];
5331 uint64_t shift = 0x08;
5332 uint64_t src_stride = 0x0;
5333 __asm__ volatile(
5334 "1: \n\t"
5335 "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
5336 "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
5337 "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t"
5338 "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
5339 "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
5340 "pavgb %[t0], %[t0], %[t1] \n\t"
5341
5342 "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t"
5343 "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t"
5344 "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
5345 "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
5346 "pavgb %[t1], %[t2], %[t1] \n\t"
5347
5348 "and %[t0], %[t0], %[c0] \n\t"
5349 "and %[t1], %[t1], %[c0] \n\t"
5350 "packushb %[t0], %[t0], %[t1] \n\t"
5351 "mov.s %[t1], %[t0] \n\t"
5352 "and %[d0], %[t0], %[c0] \n\t"
5353 "psrlh %[d1], %[t1], %[shift] \n\t"
5354
5355 "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
5356 "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
5357 "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
5358 "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
5359 "pavgb %[t0], %[t0], %[t1] \n\t"
5360
5361 "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t"
5362 "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t"
5363 "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
5364 "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
5365 "pavgb %[t1], %[t2], %[t1] \n\t"
5366
5367 "and %[t0], %[t0], %[c0] \n\t"
5368 "and %[t1], %[t1], %[c0] \n\t"
5369 "packushb %[t0], %[t0], %[t1] \n\t"
5370 "mov.s %[t1], %[t0] \n\t"
5371 "and %[d2], %[t0], %[c0] \n\t"
5372 "psrlh %[d3], %[t1], %[shift] \n\t"
5373
5374 "packushb %[d0], %[d0], %[d2] \n\t"
5375 "packushb %[d1], %[d1], %[d3] \n\t"
5376 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5377 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5378 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5379 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5380 "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
5381 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5382 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5383 "daddiu %[width], %[width], -16 \n\t"
5384 "bgtz %[width], 1b \n\t"
5385 "nop \n\t"
5386 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
5387 [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
5388 [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
5389 : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy),
5390 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
5391 [c0] "f"(c0), [shift] "f"(shift)
5392 : "memory");
5393 }
5394
5395 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_MMI(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)5396 void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
5397 uint8_t* dst_u,
5398 uint8_t* dst_v,
5399 int width) {
5400 // Output a row of UV values.
5401 uint64_t c0 = 0x00ff00ff00ff00ff;
5402 uint64_t temp[2];
5403 uint64_t data[4];
5404 uint64_t shift = 0x08;
5405 __asm__ volatile(
5406 "1: \n\t"
5407 "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
5408 "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
5409 "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
5410 "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
5411 "and %[t0], %[t0], %[c0] \n\t"
5412 "and %[t1], %[t1], %[c0] \n\t"
5413 "packushb %[t0], %[t0], %[t1] \n\t"
5414 "mov.s %[t1], %[t0] \n\t"
5415 "and %[d0], %[t0], %[c0] \n\t"
5416 "psrlh %[d1], %[t1], %[shift] \n\t"
5417
5418 "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
5419 "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
5420 "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t"
5421 "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t"
5422 "and %[t0], %[t0], %[c0] \n\t"
5423 "and %[t1], %[t1], %[c0] \n\t"
5424 "packushb %[t0], %[t0], %[t1] \n\t"
5425 "mov.s %[t1], %[t0] \n\t"
5426 "and %[d2], %[t0], %[c0] \n\t"
5427 "psrlh %[d3], %[t1], %[shift] \n\t"
5428
5429 "packushb %[d0], %[d0], %[d2] \n\t"
5430 "packushb %[d1], %[d1], %[d3] \n\t"
5431 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5432 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5433 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5434 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5435 "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
5436 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5437 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5438 "daddiu %[width], %[width], -16 \n\t"
5439 "bgtz %[width], 1b \n\t"
5440 "nop \n\t"
5441 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
5442 [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
5443 : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
5444 [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
5445 : "memory");
5446 }
5447
5448 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_MMI(const uint8_t * src_uyvy,uint8_t * dst_y,int width)5449 void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
5450 // Output a row of Y values.
5451 uint64_t c0 = 0x00ff00ff00ff00ff;
5452 uint64_t shift = 0x08;
5453 uint64_t temp[2];
5454 __asm__ volatile(
5455 "1: \n\t"
5456 "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
5457 "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
5458 "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
5459 "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
5460 "dsrl %[t0], %[t0], %[shift] \n\t"
5461 "dsrl %[t1], %[t1], %[shift] \n\t"
5462 "and %[t0], %[t0], %[c0] \n\t"
5463 "and %[t1], %[t1], %[c0] \n\t"
5464 "and %[t1], %[t1], %[c0] \n\t"
5465 "packushb %[t0], %[t0], %[t1] \n\t"
5466 "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
5467 "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
5468 "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t"
5469 "daddiu %[dst_y], %[dst_y], 8 \n\t"
5470 "daddiu %[width], %[width], -8 \n\t"
5471 "bgtz %[width], 1b \n\t"
5472 "nop \n\t"
5473 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
5474 : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width),
5475 [c0] "f"(c0), [shift] "f"(shift)
5476 : "memory");
5477 }
5478
5479 // Blend src_argb0 over src_argb1 and store to dst_argb.
5480 // dst_argb may be src_argb0 or src_argb1.
5481 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_MMI(const uint8_t * src_argb0,const uint8_t * src_argb1,uint8_t * dst_argb,int width)5482 void ARGBBlendRow_MMI(const uint8_t* src_argb0,
5483 const uint8_t* src_argb1,
5484 uint8_t* dst_argb,
5485 int width) {
5486 uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi,
5487 dest_lo;
5488 const uint64_t mask0 = 0x0;
5489 const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL;
5490 const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
5491 const uint64_t mask3 = 0xFF;
5492 const uint64_t mask4 = ~mask1;
5493 const uint64_t shift = 0x08;
5494
5495 __asm__ volatile(
5496 "1: \n\t"
5497 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
5498 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
5499 "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
5500
5501 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
5502 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
5503 "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
5504
5505 "psubush %[alpha], %[mask2], %[src0_lo] \n\t"
5506 "pshufh %[alpha], %[alpha], %[mask3] \n\t"
5507 "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t"
5508 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
5509 "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t"
5510
5511 "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
5512 "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
5513
5514 "psubush %[alpha], %[mask2], %[src0_hi] \n\t"
5515 "pshufh %[alpha], %[alpha], %[mask3] \n\t"
5516 "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t"
5517 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
5518 "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t"
5519
5520 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
5521 "and %[dest], %[dest], %[mask1] \n\t"
5522 "or %[dest], %[dest], %[mask4] \n\t"
5523 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5524 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5525
5526 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
5527 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
5528 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5529 "daddi %[width], %[width], -0x02 \n\t"
5530 "bnez %[width], 1b \n\t"
5531 : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha),
5532 [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
5533 [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
5534 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo)
5535 : [src0_ptr] "r"(src_argb0), [src1_ptr] "r"(src_argb1),
5536 [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1),
5537 [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4),
5538 [shift] "f"(shift), [width] "r"(width)
5539 : "memory");
5540 }
5541
BlendPlaneRow_MMI(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)5542 void BlendPlaneRow_MMI(const uint8_t* src0,
5543 const uint8_t* src1,
5544 const uint8_t* alpha,
5545 uint8_t* dst,
5546 int width) {
5547 uint64_t source0, source1, dest, alph;
5548 uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi,
5549 dest_lo;
5550 uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi;
5551 const uint64_t mask0 = 0x0;
5552 const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL;
5553 const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
5554 const uint64_t shift = 0x08;
5555
5556 __asm__ volatile(
5557 "1: \n\t"
5558 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
5559 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
5560 "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
5561 "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
5562
5563 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
5564 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
5565 "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
5566 "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
5567
5568 "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t"
5569 "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t"
5570 "psubusb %[alpha_r], %[mask1], %[alpha] \n\t"
5571 "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t"
5572 "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t"
5573 "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t"
5574 "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t"
5575
5576 "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t"
5577 "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t"
5578 "paddush %[dest_lo], %[dest_lo], %[dest] \n\t"
5579 "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t"
5580 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
5581
5582 "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t"
5583 "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t"
5584 "paddush %[dest_hi], %[dest_hi], %[dest] \n\t"
5585 "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t"
5586 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
5587
5588 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
5589 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5590 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5591
5592 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
5593 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
5594 "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t"
5595 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5596 "daddi %[width], %[width], -0x08 \n\t"
5597 "bnez %[width], 1b \n\t"
5598 : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph),
5599 [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
5600 [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
5601 [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo),
5602 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
5603 [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi),
5604 [alpha_r] "=&f"(alpha_rev)
5605 : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha),
5606 [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1),
5607 [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width)
5608 : "memory");
5609 }
5610
5611 // Multiply source RGB by alpha and store to destination.
5612 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,int width)5613 void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
5614 uint8_t* dst_argb,
5615 int width) {
5616 uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha;
5617 const uint64_t mask0 = 0xFF;
5618 const uint64_t mask1 = 0xFF000000FF000000ULL;
5619 const uint64_t mask2 = ~mask1;
5620 const uint64_t shift = 0x08;
5621
5622 __asm__ volatile(
5623 "1: \n\t"
5624 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5625 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5626 "punpcklbh %[src_lo], %[src], %[src] \n\t"
5627 "punpckhbh %[src_hi], %[src], %[src] \n\t"
5628
5629 "pshufh %[alpha], %[src_lo], %[mask0] \n\t"
5630 "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t"
5631 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
5632 "pshufh %[alpha], %[src_hi], %[mask0] \n\t"
5633 "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t"
5634 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
5635
5636 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
5637 "and %[dest], %[dest], %[mask2] \n\t"
5638 "and %[src], %[src], %[mask1] \n\t"
5639 "or %[dest], %[dest], %[src] \n\t"
5640 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5641 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5642
5643 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
5644 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5645 "daddi %[width], %[width], -0x02 \n\t"
5646 "bnez %[width], 1b \n\t"
5647 : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
5648 [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
5649 [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha)
5650 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
5651 [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift),
5652 [width] "r"(width)
5653 : "memory");
5654 }
5655
ComputeCumulativeSumRow_MMI(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)5656 void ComputeCumulativeSumRow_MMI(const uint8_t* row,
5657 int32_t* cumsum,
5658 const int32_t* previous_cumsum,
5659 int width) {
5660 int64_t row_sum[2] = {0, 0};
5661 uint64_t src, dest0, dest1, presrc0, presrc1, dest;
5662 const uint64_t mask = 0x0;
5663
5664 __asm__ volatile(
5665 "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t"
5666 "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t"
5667
5668 "1: \n\t"
5669 "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t"
5670 "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t"
5671
5672 "punpcklbh %[src], %[src], %[mask] \n\t"
5673 "punpcklhw %[dest0], %[src], %[mask] \n\t"
5674 "punpckhhw %[dest1], %[src], %[mask] \n\t"
5675
5676 "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t"
5677 "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t"
5678
5679 "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t"
5680 "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t"
5681 "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t"
5682 "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t"
5683
5684 "paddw %[dest0], %[row_sum0], %[presrc0] \n\t"
5685 "paddw %[dest1], %[row_sum1], %[presrc1] \n\t"
5686
5687 "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
5688 "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
5689 "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
5690 "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
5691
5692 "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t"
5693 "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t"
5694 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
5695 "daddi %[width], %[width], -0x01 \n\t"
5696 "bnez %[width], 1b \n\t"
5697 : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
5698 [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]),
5699 [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0),
5700 [presrc1] "=&f"(presrc1)
5701 : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum),
5702 [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask)
5703 : "memory");
5704 }
5705
5706 // C version 2x2 -> 2x1.
InterpolateRow_MMI(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)5707 void InterpolateRow_MMI(uint8_t* dst_ptr,
5708 const uint8_t* src_ptr,
5709 ptrdiff_t src_stride,
5710 int width,
5711 int source_y_fraction) {
5712 if (source_y_fraction == 0) {
5713 __asm__ volatile(
5714 "1: \n\t"
5715 "ld $t0, 0x0(%[src_ptr]) \n\t"
5716 "sd $t0, 0x0(%[dst_ptr]) \n\t"
5717 "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
5718 "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
5719 "daddiu %[width], %[width], -8 \n\t"
5720 "bgtz %[width], 1b \n\t"
5721 "nop \n\t"
5722 :
5723 : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width)
5724 : "memory");
5725 return;
5726 }
5727 if (source_y_fraction == 128) {
5728 uint64_t uv = 0x0;
5729 uint64_t uv_stride = 0x0;
5730 __asm__ volatile(
5731 "1: \n\t"
5732 "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t"
5733 "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t"
5734 "daddu $t0, %[src_ptr], %[stride] \n\t"
5735 "gsldrc1 %[uv_stride], 0x0($t0) \n\t"
5736 "gsldlc1 %[uv_stride], 0x7($t0) \n\t"
5737
5738 "pavgb %[uv], %[uv], %[uv_stride] \n\t"
5739 "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t"
5740 "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t"
5741
5742 "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
5743 "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
5744 "daddiu %[width], %[width], -8 \n\t"
5745 "bgtz %[width], 1b \n\t"
5746 "nop \n\t"
5747 : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride)
5748 : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width),
5749 [stride] "r"((int64_t)src_stride)
5750 : "memory");
5751 return;
5752 }
5753 const uint8_t* src_ptr1 = src_ptr + src_stride;
5754 uint64_t temp;
5755 uint64_t data[4];
5756 uint64_t zero = 0x0;
5757 uint64_t c0 = 0x0080008000800080;
5758 uint64_t fy0 = 0x0100010001000100;
5759 uint64_t shift = 0x8;
5760 __asm__ volatile(
5761 "pshufh %[fy1], %[fy1], %[zero] \n\t"
5762 "psubh %[fy0], %[fy0], %[fy1] \n\t"
5763 "1: \n\t"
5764 "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t"
5765 "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t"
5766 "punpcklbh %[d0], %[t0], %[zero] \n\t"
5767 "punpckhbh %[d1], %[t0], %[zero] \n\t"
5768 "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t"
5769 "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t"
5770 "punpcklbh %[d2], %[t0], %[zero] \n\t"
5771 "punpckhbh %[d3], %[t0], %[zero] \n\t"
5772
5773 "pmullh %[d0], %[d0], %[fy0] \n\t"
5774 "pmullh %[d2], %[d2], %[fy1] \n\t"
5775 "paddh %[d0], %[d0], %[d2] \n\t"
5776 "paddh %[d0], %[d0], %[c0] \n\t"
5777 "psrlh %[d0], %[d0], %[shift] \n\t"
5778
5779 "pmullh %[d1], %[d1], %[fy0] \n\t"
5780 "pmullh %[d3], %[d3], %[fy1] \n\t"
5781 "paddh %[d1], %[d1], %[d3] \n\t"
5782 "paddh %[d1], %[d1], %[c0] \n\t"
5783 "psrlh %[d1], %[d1], %[shift] \n\t"
5784
5785 "packushb %[d0], %[d0], %[d1] \n\t"
5786 "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t"
5787 "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t"
5788 "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
5789 "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t"
5790 "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
5791 "daddiu %[width], %[width], -8 \n\t"
5792 "bgtz %[width], 1b \n\t"
5793 "nop \n\t"
5794 : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]),
5795 [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
5796 : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1),
5797 [dst_ptr] "r"(dst_ptr), [width] "r"(width),
5798 [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0),
5799 [shift] "f"(shift), [zero] "f"(zero)
5800 : "memory");
5801 }
5802
5803 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)5804 void ARGBShuffleRow_MMI(const uint8_t* src_argb,
5805 uint8_t* dst_argb,
5806 const uint8_t* shuffler,
5807 int width) {
5808 uint64_t source, dest0, dest1, dest;
5809 const uint64_t mask0 = 0x0;
5810 const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) |
5811 ((shuffler[2] & 0x03) << 4) |
5812 ((shuffler[3] & 0x03) << 6);
5813
5814 __asm__ volatile(
5815 "1: \n\t"
5816 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5817 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5818
5819 "punpcklbh %[dest0], %[src], %[mask0] \n\t"
5820 "pshufh %[dest0], %[dest0], %[mask1] \n\t"
5821 "punpckhbh %[dest1], %[src], %[mask0] \n\t"
5822 "pshufh %[dest1], %[dest1], %[mask1] \n\t"
5823 "packushb %[dest], %[dest0], %[dest1] \n\t"
5824
5825 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5826 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5827
5828 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
5829 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5830 "daddi %[width], %[width], -0x02 \n\t"
5831 "bnez %[width], 1b \n\t"
5832 : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
5833 [dest1] "=&f"(dest1)
5834 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
5835 [mask1] "f"(mask1), [width] "r"(width)
5836 : "memory");
5837 }
5838
I422ToYUY2Row_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)5839 void I422ToYUY2Row_MMI(const uint8_t* src_y,
5840 const uint8_t* src_u,
5841 const uint8_t* src_v,
5842 uint8_t* dst_frame,
5843 int width) {
5844 uint64_t temp[3];
5845 uint64_t vu = 0x0;
5846 __asm__ volatile(
5847 "1: \n\t"
5848 "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
5849 "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
5850 "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
5851 "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
5852 "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
5853 "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
5854 "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
5855 "punpcklbh %[tu], %[ty], %[vu] \n\t" // g
5856 "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
5857 "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
5858 "punpckhbh %[tu], %[ty], %[vu] \n\t" // g
5859 "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
5860 "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
5861 "daddiu %[src_y], %[src_y], 8 \n\t"
5862 "daddiu %[src_u], %[src_u], 4 \n\t"
5863 "daddiu %[src_v], %[src_v], 4 \n\t"
5864 "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
5865 "daddiu %[width], %[width], -8 \n\t"
5866 "bgtz %[width], 1b \n\t"
5867 "nop \n\t"
5868 : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
5869 [vu] "=&f"(vu)
5870 : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
5871 [dst_frame] "r"(dst_frame), [width] "r"(width)
5872 : "memory");
5873 }
5874
I422ToUYVYRow_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)5875 void I422ToUYVYRow_MMI(const uint8_t* src_y,
5876 const uint8_t* src_u,
5877 const uint8_t* src_v,
5878 uint8_t* dst_frame,
5879 int width) {
5880 uint64_t temp[3];
5881 uint64_t vu = 0x0;
5882 __asm__ volatile(
5883 "1: \n\t"
5884 "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
5885 "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
5886 "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
5887 "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
5888 "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
5889 "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
5890 "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
5891 "punpcklbh %[tu], %[vu], %[ty] \n\t" // g
5892 "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
5893 "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
5894 "punpckhbh %[tu], %[vu], %[ty] \n\t" // g
5895 "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
5896 "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
5897 "daddiu %[src_y], %[src_y], 8 \n\t"
5898 "daddiu %[src_u], %[src_u], 4 \n\t"
5899 "daddiu %[src_v], %[src_v], 4 \n\t"
5900 "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
5901 "daddiu %[width], %[width], -8 \n\t"
5902 "bgtz %[width], 1b \n\t"
5903 "nop \n\t"
5904 : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
5905 [vu] "=&f"(vu)
5906 : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
5907 [dst_frame] "r"(dst_frame), [width] "r"(width)
5908 : "memory");
5909 }
5910
ARGBCopyAlphaRow_MMI(const uint8_t * src,uint8_t * dst,int width)5911 void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
5912 uint64_t source, dest;
5913 const uint64_t mask0 = 0xff000000ff000000ULL;
5914 const uint64_t mask1 = ~mask0;
5915
5916 __asm__ volatile(
5917 "1: \n\t"
5918 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5919 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5920 "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5921 "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5922
5923 "and %[src], %[src], %[mask0] \n\t"
5924 "and %[dest], %[dest], %[mask1] \n\t"
5925 "or %[dest], %[src], %[dest] \n\t"
5926 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5927 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5928
5929 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
5930 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5931 "daddi %[width], %[width], -0x02 \n\t"
5932 "bnez %[width], 1b \n\t"
5933 : [src] "=&f"(source), [dest] "=&f"(dest)
5934 : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
5935 [mask1] "f"(mask1), [width] "r"(width)
5936 : "memory");
5937 }
5938
ARGBExtractAlphaRow_MMI(const uint8_t * src_argb,uint8_t * dst_a,int width)5939 void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb,
5940 uint8_t* dst_a,
5941 int width) {
5942 uint64_t src, dest0, dest1, dest_lo, dest_hi, dest;
5943 const uint64_t mask = 0xff000000ff000000ULL;
5944 const uint64_t shift = 0x18;
5945
5946 __asm__ volatile(
5947 "1: \n\t"
5948 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5949 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5950 "and %[dest0], %[src], %[mask] \n\t"
5951 "psrlw %[dest0], %[dest0], %[shift] \n\t"
5952 "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
5953 "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
5954 "and %[dest1], %[src], %[mask] \n\t"
5955 "psrlw %[dest1], %[dest1], %[shift] \n\t"
5956 "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
5957
5958 "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
5959 "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
5960 "and %[dest0], %[src], %[mask] \n\t"
5961 "psrlw %[dest0], %[dest0], %[shift] \n\t"
5962 "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
5963 "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
5964 "and %[dest1], %[src], %[mask] \n\t"
5965 "psrlw %[dest1], %[dest1], %[shift] \n\t"
5966 "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
5967
5968 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
5969
5970 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5971 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5972
5973 "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
5974 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5975 "daddi %[width], %[width], -0x08 \n\t"
5976 "bnez %[width], 1b \n\t"
5977 : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
5978 [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi)
5979 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask),
5980 [shift] "f"(shift), [width] "r"(width)
5981 : "memory");
5982 }
5983
ARGBCopyYToAlphaRow_MMI(const uint8_t * src,uint8_t * dst,int width)5984 void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
5985 uint64_t source, dest0, dest1, dest;
5986 const uint64_t mask0 = 0x0;
5987 const uint64_t mask1 = 0x00ffffff00ffffffULL;
5988
5989 __asm__ volatile(
5990 "1: \n\t"
5991 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5992 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5993
5994 "punpcklbh %[dest0], %[mask0], %[src] \n\t"
5995 "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
5996 "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5997 "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5998 "and %[dest], %[dest], %[mask1] \n\t"
5999 "or %[dest], %[dest], %[dest1] \n\t"
6000 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
6001 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
6002 "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
6003 "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
6004 "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
6005 "and %[dest], %[dest], %[mask1] \n\t"
6006 "or %[dest], %[dest], %[dest1] \n\t"
6007 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
6008 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
6009
6010 "punpckhbh %[dest0], %[mask0], %[src] \n\t"
6011 "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
6012 "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
6013 "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
6014 "and %[dest], %[dest], %[mask1] \n\t"
6015 "or %[dest], %[dest], %[dest1] \n\t"
6016 "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
6017 "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
6018 "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
6019 "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
6020 "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
6021 "and %[dest], %[dest], %[mask1] \n\t"
6022 "or %[dest], %[dest], %[dest1] \n\t"
6023 "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
6024 "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
6025
6026 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
6027 "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
6028 "daddi %[width], %[width], -0x08 \n\t"
6029 "bnez %[width], 1b \n\t"
6030 : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
6031 [dest1] "=&f"(dest1)
6032 : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
6033 [mask1] "f"(mask1), [width] "r"(width)
6034 : "memory");
6035 }
6036
6037 #endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
6038
6039 #ifdef __cplusplus
6040 } // extern "C"
6041 } // namespace libyuv
6042 #endif
6043