1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "libyuv/row.h"
11
12 #include <string.h> // For memcpy and memset.
13
14 #include "libyuv/basic_types.h"
15
16 #ifdef __cplusplus
17 namespace libyuv {
18 extern "C" {
19 #endif
20
21 // This module is for Mips MMI.
22 #if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
23
24 // clang-format off
25
RGB24ToARGBRow_MMI(const uint8_t * src_rgb24,uint8_t * dst_argb,int width)26 void RGB24ToARGBRow_MMI(const uint8_t* src_rgb24,
27 uint8_t* dst_argb,
28 int width) {
29 uint64_t src0, src1, dest;
30 const uint64_t mask = 0xff000000ULL;
31
32 __asm__ volatile(
33 "1: \n\t"
34 "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
35 "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
36 "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
37 "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
38
39 "or %[src0], %[src0], %[mask] \n\t"
40 "or %[src1], %[src1], %[mask] \n\t"
41 "punpcklwd %[dest], %[src0], %[src1] \n\t"
42 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
43 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
44
45 "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
46 "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
47 "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
48 "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
49
50 "or %[src0], %[src0], %[mask] \n\t"
51 "or %[src1], %[src1], %[mask] \n\t"
52 "punpcklwd %[dest], %[src0], %[src1] \n\t"
53 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
54 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
55
56 "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
57 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
58 "daddi %[width], %[width], -0x04 \n\t"
59 "bnez %[width], 1b \n\t"
60 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
61 : [src_ptr] "r"(src_rgb24), [dst_ptr] "r"(dst_argb), [width] "r"(width),
62 [mask] "f"(mask)
63 : "memory");
64 }
65
RAWToARGBRow_MMI(const uint8_t * src_raw,uint8_t * dst_argb,int width)66 void RAWToARGBRow_MMI(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
67 uint64_t src0, src1, dest;
68 const uint64_t mask0 = 0x0;
69 const uint64_t mask1 = 0xff000000ULL;
70 const uint64_t mask2 = 0xc6;
71
72 __asm__ volatile(
73 "1: \n\t"
74 "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
75 "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
76 "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
77 "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
78
79 "or %[src0], %[src0], %[mask1] \n\t"
80 "punpcklbh %[src0], %[src0], %[mask0] \n\t"
81 "pshufh %[src0], %[src0], %[mask2] \n\t"
82 "or %[src1], %[src1], %[mask1] \n\t"
83 "punpcklbh %[src1], %[src1], %[mask0] \n\t"
84 "pshufh %[src1], %[src1], %[mask2] \n\t"
85 "packushb %[dest], %[src0], %[src1] \n\t"
86 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
87 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
88
89 "gslwlc1 %[src0], 0x09(%[src_ptr]) \n\t"
90 "gslwrc1 %[src0], 0x06(%[src_ptr]) \n\t"
91 "gslwlc1 %[src1], 0x0c(%[src_ptr]) \n\t"
92 "gslwrc1 %[src1], 0x09(%[src_ptr]) \n\t"
93
94 "or %[src0], %[src0], %[mask1] \n\t"
95 "punpcklbh %[src0], %[src0], %[mask0] \n\t"
96 "pshufh %[src0], %[src0], %[mask2] \n\t"
97 "or %[src1], %[src1], %[mask1] \n\t"
98 "punpcklbh %[src1], %[src1], %[mask0] \n\t"
99 "pshufh %[src1], %[src1], %[mask2] \n\t"
100 "packushb %[dest], %[src0], %[src1] \n\t"
101 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
102 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
103
104 "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
105 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
106 "daddi %[width], %[width], -0x04 \n\t"
107 "bnez %[width], 1b \n\t"
108 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
109 : [src_ptr] "r"(src_raw), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
110 [mask1] "f"(mask1), [mask2] "f"(mask2), [width] "r"(width)
111 : "memory");
112 }
113
RAWToRGB24Row_MMI(const uint8_t * src_raw,uint8_t * dst_rgb24,int width)114 void RAWToRGB24Row_MMI(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
115 uint64_t src0, src1;
116 uint64_t ftmp[4];
117 uint64_t mask0 = 0xc6;
118 uint64_t mask1 = 0x6c;
119
120 __asm__ volatile(
121 "1: \n\t"
122 "gsldrc1 %[src0], 0x00(%[src_raw]) \n\t"
123 "gsldlc1 %[src0], 0x07(%[src_raw]) \n\t"
124 "gslwrc1 %[src1], 0x08(%[src_raw]) \n\t"
125 "gslwlc1 %[src1], 0x0b(%[src_raw]) \n\t"
126
127 "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
128 "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
129 "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
130 "punpcklbh %[src1], %[src1], %[zero] \n\t"
131 "pextrh %[ftmp2], %[ftmp0], %[three] \n\t"
132 "pextrh %[ftmp3], %[ftmp1], %[one] \n\t"
133 "pinsrh_3 %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
134 "pextrh %[ftmp3], %[ftmp1], %[two] \n\t"
135 "pinsrh_1 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
136 "pshufh %[src1], %[src1], %[mask1] \n\t"
137 "pextrh %[ftmp2], %[src1], %[zero] \n\t"
138 "pinsrh_2 %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
139 "pinsrh_0 %[src1], %[src1], %[ftmp3] \n\t"
140 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
141 "packushb %[src1], %[src1], %[zero] \n\t"
142
143 "gssdrc1 %[ftmp0], 0x00(%[dst_rgb24]) \n\t"
144 "gssdlc1 %[ftmp0], 0x07(%[dst_rgb24]) \n\t"
145 "gsswrc1 %[src1], 0x08(%[dst_rgb24]) \n\t"
146 "gsswlc1 %[src1], 0x0b(%[dst_rgb24]) \n\t"
147
148 "daddiu %[src_raw], %[src_raw], 0x0c \n\t"
149 "daddiu %[dst_rgb24], %[dst_rgb24], 0x0c \n\t"
150 "daddiu %[width], %[width], -0x04 \n\t"
151 "bgtz %[width], 1b \n\t"
152 : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
153 [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3])
154 : [src_raw] "r"(src_raw), [dst_rgb24] "r"(dst_rgb24), [width] "r"(width),
155 [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
156 [one] "f"(0x01), [two] "f"(0x02), [three] "f"(0x03)
157 : "memory");
158 }
159
RGB565ToARGBRow_MMI(const uint8_t * src_rgb565,uint8_t * dst_argb,int width)160 void RGB565ToARGBRow_MMI(const uint8_t* src_rgb565,
161 uint8_t* dst_argb,
162 int width) {
163 uint64_t ftmp[5];
164 uint64_t c0 = 0x001f001f001f001f;
165 uint64_t c1 = 0x00ff00ff00ff00ff;
166 uint64_t c2 = 0x0007000700070007;
167 __asm__ volatile(
168 "1: \n\t"
169 "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
170 "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
171 "psrlh %[src1], %[src0], %[eight] \n\t"
172 "and %[b], %[src0], %[c0] \n\t"
173 "and %[src0], %[src0], %[c1] \n\t"
174 "psrlh %[src0], %[src0], %[five] \n\t"
175 "and %[g], %[src1], %[c2] \n\t"
176 "psllh %[g], %[g], %[three] \n\t"
177 "or %[g], %[src0], %[g] \n\t"
178 "psrlh %[r], %[src1], %[three] \n\t"
179 "psllh %[src0], %[b], %[three] \n\t"
180 "psrlh %[src1], %[b], %[two] \n\t"
181 "or %[b], %[src0], %[src1] \n\t"
182 "psllh %[src0], %[g], %[two] \n\t"
183 "psrlh %[src1], %[g], %[four] \n\t"
184 "or %[g], %[src0], %[src1] \n\t"
185 "psllh %[src0], %[r], %[three] \n\t"
186 "psrlh %[src1], %[r], %[two] \n\t"
187 "or %[r], %[src0], %[src1] \n\t"
188 "packushb %[b], %[b], %[r] \n\t"
189 "packushb %[g], %[g], %[c1] \n\t"
190 "punpcklbh %[src0], %[b], %[g] \n\t"
191 "punpckhbh %[src1], %[b], %[g] \n\t"
192 "punpcklhw %[r], %[src0], %[src1] \n\t"
193 "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
194 "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
195 "punpckhhw %[r], %[src0], %[src1] \n\t"
196 "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
197 "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
198 "daddiu %[src_rgb565], %[src_rgb565], 0x08 \n\t"
199 "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
200 "daddiu %[width], %[width], -0x04 \n\t"
201 "bgtz %[width], 1b \n\t"
202 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
203 [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4])
204 : [src_rgb565] "r"(src_rgb565), [dst_argb] "r"(dst_argb),
205 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
206 [eight] "f"(0x08), [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02),
207 [four] "f"(0x04)
208 : "memory");
209 }
210
ARGB1555ToARGBRow_MMI(const uint8_t * src_argb1555,uint8_t * dst_argb,int width)211 void ARGB1555ToARGBRow_MMI(const uint8_t* src_argb1555,
212 uint8_t* dst_argb,
213 int width) {
214 uint64_t ftmp[6];
215 uint64_t c0 = 0x001f001f001f001f;
216 uint64_t c1 = 0x00ff00ff00ff00ff;
217 uint64_t c2 = 0x0003000300030003;
218 uint64_t c3 = 0x007c007c007c007c;
219 uint64_t c4 = 0x0001000100010001;
220 __asm__ volatile(
221 "1: \n\t"
222 "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
223 "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
224 "psrlh %[src1], %[src0], %[eight] \n\t"
225 "and %[b], %[src0], %[c0] \n\t"
226 "and %[src0], %[src0], %[c1] \n\t"
227 "psrlh %[src0], %[src0], %[five] \n\t"
228 "and %[g], %[src1], %[c2] \n\t"
229 "psllh %[g], %[g], %[three] \n\t"
230 "or %[g], %[src0], %[g] \n\t"
231 "and %[r], %[src1], %[c3] \n\t"
232 "psrlh %[r], %[r], %[two] \n\t"
233 "psrlh %[a], %[src1], %[seven] \n\t"
234 "psllh %[src0], %[b], %[three] \n\t"
235 "psrlh %[src1], %[b], %[two] \n\t"
236 "or %[b], %[src0], %[src1] \n\t"
237 "psllh %[src0], %[g], %[three] \n\t"
238 "psrlh %[src1], %[g], %[two] \n\t"
239 "or %[g], %[src0], %[src1] \n\t"
240 "psllh %[src0], %[r], %[three] \n\t"
241 "psrlh %[src1], %[r], %[two] \n\t"
242 "or %[r], %[src0], %[src1] \n\t"
243 "xor %[a], %[a], %[c1] \n\t"
244 "paddb %[a], %[a], %[c4] \n\t"
245 "packushb %[b], %[b], %[r] \n\t"
246 "packushb %[g], %[g], %[a] \n\t"
247 "punpcklbh %[src0], %[b], %[g] \n\t"
248 "punpckhbh %[src1], %[b], %[g] \n\t"
249 "punpcklhw %[r], %[src0], %[src1] \n\t"
250 "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
251 "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
252 "punpckhhw %[r], %[src0], %[src1] \n\t"
253 "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
254 "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
255 "daddiu %[src_argb1555], %[src_argb1555], 0x08 \n\t"
256 "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
257 "daddiu %[width], %[width], -0x04 \n\t"
258 "bgtz %[width], 1b \n\t"
259 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
260 [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
261 : [src_argb1555] "r"(src_argb1555), [dst_argb] "r"(dst_argb),
262 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
263 [c3] "f"(c3), [c4] "f"(c4), [eight] "f"(0x08), [five] "f"(0x05),
264 [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
265 : "memory");
266 }
267
ARGB4444ToARGBRow_MMI(const uint8_t * src_argb4444,uint8_t * dst_argb,int width)268 void ARGB4444ToARGBRow_MMI(const uint8_t* src_argb4444,
269 uint8_t* dst_argb,
270 int width) {
271 uint64_t ftmp[6];
272 uint64_t c0 = 0x000f000f000f000f;
273 uint64_t c1 = 0x00ff00ff00ff00ff;
274 __asm__ volatile(
275 "1: \n\t"
276 "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
277 "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
278 "psrlh %[src1], %[src0], %[eight] \n\t"
279 "and %[b], %[src0], %[c0] \n\t"
280 "and %[src0], %[src0], %[c1] \n\t"
281 "psrlh %[g], %[src0], %[four] \n\t"
282 "and %[r], %[src1], %[c0] \n\t"
283 "psrlh %[a], %[src1], %[four] \n\t"
284 "psllh %[src0], %[b], %[four] \n\t"
285 "or %[b], %[src0], %[b] \n\t"
286 "psllh %[src0], %[g], %[four] \n\t"
287 "or %[g], %[src0], %[g] \n\t"
288 "psllh %[src0], %[r], %[four] \n\t"
289 "or %[r], %[src0], %[r] \n\t"
290 "psllh %[src0], %[a], %[four] \n\t"
291 "or %[a], %[src0], %[a] \n\t"
292 "packushb %[b], %[b], %[r] \n\t"
293 "packushb %[g], %[g], %[a] \n\t"
294 "punpcklbh %[src0], %[b], %[g] \n\t"
295 "punpckhbh %[src1], %[b], %[g] \n\t"
296 "punpcklhw %[r], %[src0], %[src1] \n\t"
297 "gssdrc1 %[r], 0x00(%[dst_argb]) \n\t"
298 "gssdlc1 %[r], 0x07(%[dst_argb]) \n\t"
299 "punpckhhw %[r], %[src0], %[src1] \n\t"
300 "gssdrc1 %[r], 0x08(%[dst_argb]) \n\t"
301 "gssdlc1 %[r], 0x0f(%[dst_argb]) \n\t"
302 "daddiu %[src_argb4444], %[src_argb4444], 0x08 \n\t"
303 "daddiu %[dst_argb], %[dst_argb], 0x10 \n\t"
304 "daddiu %[width], %[width], -0x04 \n\t"
305 "bgtz %[width], 1b \n\t"
306 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b] "=&f"(ftmp[2]),
307 [g] "=&f"(ftmp[3]), [r] "=&f"(ftmp[4]), [a] "=&f"(ftmp[5])
308 : [src_argb4444] "r"(src_argb4444), [dst_argb] "r"(dst_argb),
309 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [eight] "f"(0x08),
310 [four] "f"(0x04)
311 : "memory");
312 }
313
ARGBToRGB24Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)314 void ARGBToRGB24Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
315 uint64_t src;
316
317 __asm__ volatile(
318 "1: \n\t"
319 "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
320 "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
321 "gsswlc1 %[src], 0x03(%[dst_ptr]) \n\t"
322 "gsswrc1 %[src], 0x00(%[dst_ptr]) \n\t"
323
324 "gslwlc1 %[src], 0x07(%[src_ptr]) \n\t"
325 "gslwrc1 %[src], 0x04(%[src_ptr]) \n\t"
326 "gsswlc1 %[src], 0x06(%[dst_ptr]) \n\t"
327 "gsswrc1 %[src], 0x03(%[dst_ptr]) \n\t"
328
329 "gslwlc1 %[src], 0x0b(%[src_ptr]) \n\t"
330 "gslwrc1 %[src], 0x08(%[src_ptr]) \n\t"
331 "gsswlc1 %[src], 0x09(%[dst_ptr]) \n\t"
332 "gsswrc1 %[src], 0x06(%[dst_ptr]) \n\t"
333
334 "gslwlc1 %[src], 0x0f(%[src_ptr]) \n\t"
335 "gslwrc1 %[src], 0x0c(%[src_ptr]) \n\t"
336 "gsswlc1 %[src], 0x0c(%[dst_ptr]) \n\t"
337 "gsswrc1 %[src], 0x09(%[dst_ptr]) \n\t"
338
339 "daddiu %[src_ptr], %[src_ptr], 0x10 \n\t"
340 "daddiu %[dst_ptr], %[dst_ptr], 0x0c \n\t"
341 "daddi %[width], %[width], -0x04 \n\t"
342 "bnez %[width], 1b \n\t"
343 : [src] "=&f"(src)
344 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_rgb), [width] "r"(width)
345 : "memory");
346 }
347
ARGBToRAWRow_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)348 void ARGBToRAWRow_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
349 uint64_t src0, src1;
350 uint64_t ftmp[3];
351 uint64_t mask0 = 0xc6;
352 uint64_t mask1 = 0x18;
353
354 __asm__ volatile(
355 "1: \n\t"
356 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
357 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
358 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
359 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
360
361 "punpcklbh %[ftmp0], %[src0], %[zero] \n\t"
362 "pshufh %[ftmp0], %[ftmp0], %[mask0] \n\t"
363 "punpckhbh %[ftmp1], %[src0], %[zero] \n\t"
364 "punpcklbh %[ftmp2], %[src1], %[zero] \n\t"
365 "punpckhbh %[src1], %[src1], %[zero] \n\t"
366
367 "pextrh %[src0], %[ftmp1], %[two] \n\t"
368 "pinsrh_3 %[ftmp0], %[ftmp0], %[src0] \n\t"
369 "pshufh %[ftmp1], %[ftmp1], %[one] \n\t"
370
371 "pextrh %[src0], %[ftmp2], %[two] \n\t"
372 "pinsrh_2 %[ftmp1], %[ftmp1], %[src0] \n\t"
373 "pextrh %[src0], %[ftmp2], %[one] \n\t"
374 "pinsrh_3 %[ftmp1], %[ftmp1], %[src0] \n\t"
375 "pextrh %[src0], %[ftmp2], %[zero] \n\t"
376 "pshufh %[src1], %[src1], %[mask1] \n\t"
377 "pinsrh_0 %[src1], %[src1], %[src0] \n\t"
378 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
379 "packushb %[src1], %[src1], %[zero] \n\t"
380
381 "gssdrc1 %[ftmp0], 0x00(%[dst_rgb]) \n\t"
382 "gssdlc1 %[ftmp0], 0x07(%[dst_rgb]) \n\t"
383 "gsswrc1 %[src1], 0x08(%[dst_rgb]) \n\t"
384 "gsswlc1 %[src1], 0x0b(%[dst_rgb]) \n\t"
385
386 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
387 "daddiu %[dst_rgb], %[dst_rgb], 0x0c \n\t"
388 "daddiu %[width], %[width], -0x04 \n\t"
389 "bgtz %[width], 1b \n\t"
390 : [src0] "=&f"(src0), [src1] "=&f"(src1), [ftmp0] "=&f"(ftmp[0]),
391 [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2])
392 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
393 [mask0] "f"(mask0), [mask1] "f"(mask1), [zero] "f"(0x00),
394 [one] "f"(0x01), [two] "f"(0x02)
395 : "memory");
396 }
397
ARGBToRGB565Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)398 void ARGBToRGB565Row_MMI(const uint8_t* src_argb, uint8_t* dst_rgb, int width) {
399 uint64_t src0, src1;
400 uint64_t ftmp[3];
401
402 __asm__ volatile(
403 "1: \n\t"
404 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
405 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
406 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
407 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
408
409 "punpcklbh %[b], %[src0], %[src1] \n\t"
410 "punpckhbh %[g], %[src0], %[src1] \n\t"
411 "punpcklbh %[src0], %[b], %[g] \n\t"
412 "punpckhbh %[src1], %[b], %[g] \n\t"
413 "punpcklbh %[b], %[src0], %[zero] \n\t"
414 "punpckhbh %[g], %[src0], %[zero] \n\t"
415 "punpcklbh %[r], %[src1], %[zero] \n\t"
416
417 "psrlh %[b], %[b], %[three] \n\t"
418 "psrlh %[g], %[g], %[two] \n\t"
419 "psrlh %[r], %[r], %[three] \n\t"
420
421 "psllh %[g], %[g], %[five] \n\t"
422 "psllh %[r], %[r], %[eleven] \n\t"
423 "or %[b], %[b], %[g] \n\t"
424 "or %[b], %[b], %[r] \n\t"
425
426 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
427 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
428
429 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
430 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
431 "daddiu %[width], %[width], -0x04 \n\t"
432 "bgtz %[width], 1b \n\t"
433 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
434 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
435 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
436 [zero] "f"(0x00), [two] "f"(0x02), [three] "f"(0x03), [five] "f"(0x05),
437 [eleven] "f"(0x0b)
438 : "memory");
439 }
440
441 // dither4 is a row of 4 values from 4x4 dither matrix.
442 // The 4x4 matrix contains values to increase RGB. When converting to
443 // fewer bits (565) this provides an ordered dither.
444 // The order in the 4x4 matrix in first byte is upper left.
445 // The 4 values are passed as an int, then referenced as an array, so
446 // endian will not affect order of the original matrix. But the dither4
447 // will containing the first pixel in the lower byte for little endian
448 // or the upper byte for big endian.
ARGBToRGB565DitherRow_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,const uint32_t dither4,int width)449 void ARGBToRGB565DitherRow_MMI(const uint8_t* src_argb,
450 uint8_t* dst_rgb,
451 const uint32_t dither4,
452 int width) {
453 uint64_t src0, src1;
454 uint64_t ftmp[3];
455 uint64_t c0 = 0x00ff00ff00ff00ff;
456
457 __asm__ volatile(
458 "punpcklbh %[dither], %[dither], %[zero] \n\t"
459 "1: \n\t"
460 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
461 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
462 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
463 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
464
465 "punpcklbh %[b], %[src0], %[src1] \n\t"
466 "punpckhbh %[g], %[src0], %[src1] \n\t"
467 "punpcklbh %[src0], %[b], %[g] \n\t"
468 "punpckhbh %[src1], %[b], %[g] \n\t"
469 "punpcklbh %[b], %[src0], %[zero] \n\t"
470 "punpckhbh %[g], %[src0], %[zero] \n\t"
471 "punpcklbh %[r], %[src1], %[zero] \n\t"
472
473 "paddh %[b], %[b], %[dither] \n\t"
474 "paddh %[g], %[g], %[dither] \n\t"
475 "paddh %[r], %[r], %[dither] \n\t"
476 "pcmpgth %[src0], %[b], %[c0] \n\t"
477 "or %[src0], %[src0], %[b] \n\t"
478 "and %[b], %[src0], %[c0] \n\t"
479 "pcmpgth %[src0], %[g], %[c0] \n\t"
480 "or %[src0], %[src0], %[g] \n\t"
481 "and %[g], %[src0], %[c0] \n\t"
482 "pcmpgth %[src0], %[r], %[c0] \n\t"
483 "or %[src0], %[src0], %[r] \n\t"
484 "and %[r], %[src0], %[c0] \n\t"
485
486 "psrlh %[b], %[b], %[three] \n\t"
487 "psrlh %[g], %[g], %[two] \n\t"
488 "psrlh %[r], %[r], %[three] \n\t"
489
490 "psllh %[g], %[g], %[five] \n\t"
491 "psllh %[r], %[r], %[eleven] \n\t"
492 "or %[b], %[b], %[g] \n\t"
493 "or %[b], %[b], %[r] \n\t"
494
495 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
496 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
497
498 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
499 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
500 "daddiu %[width], %[width], -0x04 \n\t"
501 "bgtz %[width], 1b \n\t"
502 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
503 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2])
504 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
505 [dither] "f"(dither4), [c0] "f"(c0), [zero] "f"(0x00), [two] "f"(0x02),
506 [three] "f"(0x03), [five] "f"(0x05), [eleven] "f"(0x0b)
507 : "memory");
508 }
509
ARGBToARGB1555Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)510 void ARGBToARGB1555Row_MMI(const uint8_t* src_argb,
511 uint8_t* dst_rgb,
512 int width) {
513 uint64_t src0, src1;
514 uint64_t ftmp[4];
515
516 __asm__ volatile(
517 "1: \n\t"
518 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
519 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
520 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
521 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
522
523 "punpcklbh %[b], %[src0], %[src1] \n\t"
524 "punpckhbh %[g], %[src0], %[src1] \n\t"
525 "punpcklbh %[src0], %[b], %[g] \n\t"
526 "punpckhbh %[src1], %[b], %[g] \n\t"
527 "punpcklbh %[b], %[src0], %[zero] \n\t"
528 "punpckhbh %[g], %[src0], %[zero] \n\t"
529 "punpcklbh %[r], %[src1], %[zero] \n\t"
530 "punpckhbh %[a], %[src1], %[zero] \n\t"
531
532 "psrlh %[b], %[b], %[three] \n\t"
533 "psrlh %[g], %[g], %[three] \n\t"
534 "psrlh %[r], %[r], %[three] \n\t"
535 "psrlh %[a], %[a], %[seven] \n\t"
536
537 "psllh %[g], %[g], %[five] \n\t"
538 "psllh %[r], %[r], %[ten] \n\t"
539 "psllh %[a], %[a], %[fifteen] \n\t"
540 "or %[b], %[b], %[g] \n\t"
541 "or %[b], %[b], %[r] \n\t"
542 "or %[b], %[b], %[a] \n\t"
543
544 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
545 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
546
547 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
548 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
549 "daddiu %[width], %[width], -0x04 \n\t"
550 "bgtz %[width], 1b \n\t"
551 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
552 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
553 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
554 [zero] "f"(0x00), [three] "f"(0x03), [five] "f"(0x05),
555 [seven] "f"(0x07), [ten] "f"(0x0a), [fifteen] "f"(0x0f)
556 : "memory");
557 }
558
ARGBToARGB4444Row_MMI(const uint8_t * src_argb,uint8_t * dst_rgb,int width)559 void ARGBToARGB4444Row_MMI(const uint8_t* src_argb,
560 uint8_t* dst_rgb,
561 int width) {
562 uint64_t src0, src1;
563 uint64_t ftmp[4];
564
565 __asm__ volatile(
566 "1: \n\t"
567 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
568 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
569 "gsldrc1 %[src1], 0x08(%[src_argb]) \n\t"
570 "gsldlc1 %[src1], 0x0f(%[src_argb]) \n\t"
571
572 "punpcklbh %[b], %[src0], %[src1] \n\t"
573 "punpckhbh %[g], %[src0], %[src1] \n\t"
574 "punpcklbh %[src0], %[b], %[g] \n\t"
575 "punpckhbh %[src1], %[b], %[g] \n\t"
576 "punpcklbh %[b], %[src0], %[zero] \n\t"
577 "punpckhbh %[g], %[src0], %[zero] \n\t"
578 "punpcklbh %[r], %[src1], %[zero] \n\t"
579 "punpckhbh %[a], %[src1], %[zero] \n\t"
580
581 "psrlh %[b], %[b], %[four] \n\t"
582 "psrlh %[g], %[g], %[four] \n\t"
583 "psrlh %[r], %[r], %[four] \n\t"
584 "psrlh %[a], %[a], %[four] \n\t"
585
586 "psllh %[g], %[g], %[four] \n\t"
587 "psllh %[r], %[r], %[eight] \n\t"
588 "psllh %[a], %[a], %[twelve] \n\t"
589 "or %[b], %[b], %[g] \n\t"
590 "or %[b], %[b], %[r] \n\t"
591 "or %[b], %[b], %[a] \n\t"
592
593 "gssdrc1 %[b], 0x00(%[dst_rgb]) \n\t"
594 "gssdlc1 %[b], 0x07(%[dst_rgb]) \n\t"
595
596 "daddiu %[src_argb], %[src_argb], 0x10 \n\t"
597 "daddiu %[dst_rgb], %[dst_rgb], 0x08 \n\t"
598 "daddiu %[width], %[width], -0x04 \n\t"
599 "bgtz %[width], 1b \n\t"
600 : [src0] "=&f"(src0), [src1] "=&f"(src1), [b] "=&f"(ftmp[0]),
601 [g] "=&f"(ftmp[1]), [r] "=&f"(ftmp[2]), [a] "=&f"(ftmp[3])
602 : [src_argb] "r"(src_argb), [dst_rgb] "r"(dst_rgb), [width] "r"(width),
603 [zero] "f"(0x00), [four] "f"(0x04), [eight] "f"(0x08),
604 [twelve] "f"(0x0c)
605 : "memory");
606 }
607
ARGBToYRow_MMI(const uint8_t * src_argb,uint8_t * dst_y,int width)608 void ARGBToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
609 uint64_t src, src_hi, src_lo;
610 uint64_t dest0, dest1, dest2, dest3;
611 const uint64_t value = 0x1080;
612 const uint64_t mask = 0x0001004200810019;
613
614 __asm__ volatile(
615 "1: \n\t"
616 "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
617 "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
618 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
619 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
620 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
621 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
622 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
623 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
624 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
625 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
626 "paddw %[dest0], %[dest0], %[src] \n\t"
627 "psrlw %[dest0], %[dest0], %[eight] \n\t"
628
629 "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
630 "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
631 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
632 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
633 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
634 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
635 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
636 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
637 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
638 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
639 "paddw %[dest1], %[dest1], %[src] \n\t"
640 "psrlw %[dest1], %[dest1], %[eight] \n\t"
641
642 "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
643 "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
644 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
645 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
646 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
647 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
648 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
649 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
650 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
651 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
652 "paddw %[dest2], %[dest2], %[src] \n\t"
653 "psrlw %[dest2], %[dest2], %[eight] \n\t"
654
655 "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
656 "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
657 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
658 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
659 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
660 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
661 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
662 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
663 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
664 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
665 "paddw %[dest3], %[dest3], %[src] \n\t"
666 "psrlw %[dest3], %[dest3], %[eight] \n\t"
667
668 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
669 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
670 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
671 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
672 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
673
674 "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
675 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
676 "daddi %[width], %[width], -0x08 \n\t"
677 "bnez %[width], 1b \n\t"
678 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
679 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
680 [dest3] "=&f"(dest3)
681 : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
682 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
683 [zero] "f"(0x00)
684 : "memory");
685 }
686
ARGBToUVRow_MMI(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)687 void ARGBToUVRow_MMI(const uint8_t* src_rgb,
688 int src_stride_rgb,
689 uint8_t* dst_u,
690 uint8_t* dst_v,
691 int width) {
692 uint64_t src_rgb1;
693 uint64_t ftmp[13];
694 uint64_t tmp[1];
695 const uint64_t value = 0x4040;
696 const uint64_t mask_u = 0x0013002500380002;
697 const uint64_t mask_v = 0x00020038002f0009;
698
699 __asm__ volatile(
700 "dli %[tmp0], 0x0001000100010001 \n\t"
701 "dmtc1 %[tmp0], %[ftmp12] \n\t"
702 "1: \n\t"
703 "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
704 "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
705 "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
706 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
707 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
708 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
709 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
710 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
711 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
712 "paddh %[src0], %[src0], %[src_lo] \n\t"
713 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
714 "paddh %[src0], %[src0], %[src_hi] \n\t"
715 "paddh %[src0], %[src0], %[ftmp12] \n\t"
716 "psrlh %[src0], %[src0], %[one] \n\t"
717 "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
718 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
719 "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
720 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
721 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
722
723 "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
724 "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
725 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
726 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
727 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
728 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
729 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
730 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
731 "paddh %[src0], %[src0], %[src_lo] \n\t"
732 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
733 "paddh %[src0], %[src0], %[src_hi] \n\t"
734 "paddh %[src0], %[src0], %[ftmp12] \n\t"
735 "psrlh %[src0], %[src0], %[one] \n\t"
736 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
737 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
738 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
739 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
740 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
741
742 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
743 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
744 "psubw %[dest0_u], %[src0], %[src1] \n\t"
745 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
746 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
747 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
748 "psubw %[dest0_v], %[src1], %[src0] \n\t"
749 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
750
751 "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
752 "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
753 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
754 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
755 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
756 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
757 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
758 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
759 "paddh %[src0], %[src0], %[src_lo] \n\t"
760 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
761 "paddh %[src0], %[src0], %[src_hi] \n\t"
762 "paddh %[src0], %[src0], %[ftmp12] \n\t"
763 "psrlh %[src0], %[src0], %[one] \n\t"
764 "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
765 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
766 "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
767 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
768 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
769
770 "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
771 "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
772 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
773 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
774 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
775 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
776 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
777 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
778 "paddh %[src0], %[src0], %[src_lo] \n\t"
779 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
780 "paddh %[src0], %[src0], %[src_hi] \n\t"
781 "paddh %[src0], %[src0], %[ftmp12] \n\t"
782 "psrlh %[src0], %[src0], %[one] \n\t"
783 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
784 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
785 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
786 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
787 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
788
789 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
790 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
791 "psubw %[dest1_u], %[src0], %[src1] \n\t"
792 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
793 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
794 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
795 "psubw %[dest1_v], %[src1], %[src0] \n\t"
796 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
797
798 "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
799 "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
800 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
801 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
802 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
803 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
804 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
805 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
806 "paddh %[src0], %[src0], %[src_lo] \n\t"
807 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
808 "paddh %[src0], %[src0], %[src_hi] \n\t"
809 "paddh %[src0], %[src0], %[ftmp12] \n\t"
810 "psrlh %[src0], %[src0], %[one] \n\t"
811 "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
812 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
813 "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
814 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
815 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
816
817 "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
818 "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
819 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
820 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
821 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
822 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
823 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
824 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
825 "paddh %[src0], %[src0], %[src_lo] \n\t"
826 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
827 "paddh %[src0], %[src0], %[src_hi] \n\t"
828 "paddh %[src0], %[src0], %[ftmp12] \n\t"
829 "psrlh %[src0], %[src0], %[one] \n\t"
830 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
831 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
832 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
833 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
834 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
835
836 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
837 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
838 "psubw %[dest2_u], %[src0], %[src1] \n\t"
839 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
840 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
841 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
842 "psubw %[dest2_v], %[src1], %[src0] \n\t"
843 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
844
845 "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
846 "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
847 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
848 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
849 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
850 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
851 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
852 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
853 "paddh %[src0], %[src0], %[src_lo] \n\t"
854 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
855 "paddh %[src0], %[src0], %[src_hi] \n\t"
856 "paddh %[src0], %[src0], %[ftmp12] \n\t"
857 "psrlh %[src0], %[src0], %[one] \n\t"
858 "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
859 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
860 "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
861 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
862 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
863
864 "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
865 "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
866 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
867 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
868 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
869 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
870 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
871 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
872 "paddh %[src0], %[src0], %[src_lo] \n\t"
873 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
874 "paddh %[src0], %[src0], %[src_hi] \n\t"
875 "paddh %[src0], %[src0], %[ftmp12] \n\t"
876 "psrlh %[src0], %[src0], %[one] \n\t"
877 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
878 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
879 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
880 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
881 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
882
883 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
884 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
885 "psubw %[dest3_u], %[src0], %[src1] \n\t"
886 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
887 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
888 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
889 "psubw %[dest3_v], %[src1], %[src0] \n\t"
890 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
891
892 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
893 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
894 "packushb %[dest0_u], %[src0], %[src1] \n\t"
895 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
896 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
897
898 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
899 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
900 "packushb %[dest0_v], %[src0], %[src1] \n\t"
901 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
902 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
903
904 "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
905 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
906 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
907 "daddi %[width], %[width], -0x10 \n\t"
908 "bgtz %[width], 1b \n\t"
909 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
910 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
911 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
912 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
913 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
914 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
915 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
916 : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
917 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
918 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
919 [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
920 [sixteen] "f"(0x10)
921 : "memory");
922 }
923
BGRAToYRow_MMI(const uint8_t * src_argb,uint8_t * dst_y,int width)924 void BGRAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
925 uint64_t src, src_hi, src_lo;
926 uint64_t dest0, dest1, dest2, dest3;
927 const uint64_t value = 0x1080;
928 const uint64_t mask = 0x0019008100420001;
929
930 __asm__ volatile(
931 "1: \n\t"
932 "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
933 "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
934 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
935 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
936 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
937 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
938 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
939 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
940 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
941 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
942 "paddw %[dest0], %[dest0], %[src] \n\t"
943 "psrlw %[dest0], %[dest0], %[eight] \n\t"
944
945 "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
946 "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
947 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
948 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
949 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
950 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
951 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
952 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
953 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
954 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
955 "paddw %[dest1], %[dest1], %[src] \n\t"
956 "psrlw %[dest1], %[dest1], %[eight] \n\t"
957
958 "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
959 "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
960 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
961 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
962 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
963 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
964 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
965 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
966 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
967 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
968 "paddw %[dest2], %[dest2], %[src] \n\t"
969 "psrlw %[dest2], %[dest2], %[eight] \n\t"
970
971 "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
972 "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
973 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
974 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
975 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
976 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
977 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
978 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
979 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
980 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
981 "paddw %[dest3], %[dest3], %[src] \n\t"
982 "psrlw %[dest3], %[dest3], %[eight] \n\t"
983
984 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
985 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
986 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
987 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
988 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
989
990 "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
991 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
992 "daddi %[width], %[width], -0x08 \n\t"
993 "bnez %[width], 1b \n\t"
994 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
995 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
996 [dest3] "=&f"(dest3)
997 : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
998 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
999 [zero] "f"(0x00)
1000 : "memory");
1001 }
1002
BGRAToUVRow_MMI(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)1003 void BGRAToUVRow_MMI(const uint8_t* src_rgb,
1004 int src_stride_rgb,
1005 uint8_t* dst_u,
1006 uint8_t* dst_v,
1007 int width) {
1008 uint64_t src_rgb1;
1009 uint64_t ftmp[13];
1010 uint64_t tmp[1];
1011 const uint64_t value = 0x4040;
1012 const uint64_t mask_u = 0x0002003800250013;
1013 const uint64_t mask_v = 0x0009002f00380002;
1014
1015 __asm__ volatile(
1016 "dli %[tmp0], 0x0001000100010001 \n\t"
1017 "dmtc1 %[tmp0], %[ftmp12] \n\t"
1018 "1: \n\t"
1019 "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
1020 "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
1021 "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
1022 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1023 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1024 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1025 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1026 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1027 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1028 "paddh %[src0], %[src0], %[src_lo] \n\t"
1029 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1030 "paddh %[src0], %[src0], %[src_hi] \n\t"
1031 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1032 "psrlh %[src0], %[src0], %[one] \n\t"
1033 "dsrl %[dest0_u], %[src0], %[sixteen] \n\t"
1034 "pinsrh_3 %[dest0_u], %[dest0_u], %[value] \n\t"
1035 "pinsrh_0 %[dest0_v], %[src0], %[value] \n\t"
1036 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1037 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1038
1039 "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
1040 "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
1041 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
1042 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
1043 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1044 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1045 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1046 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1047 "paddh %[src0], %[src0], %[src_lo] \n\t"
1048 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1049 "paddh %[src0], %[src0], %[src_hi] \n\t"
1050 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1051 "psrlh %[src0], %[src0], %[one] \n\t"
1052 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1053 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1054 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1055 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1056 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1057
1058 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
1059 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
1060 "psubw %[dest0_u], %[src1], %[src0] \n\t"
1061 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
1062 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
1063 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
1064 "psubw %[dest0_v], %[src0], %[src1] \n\t"
1065 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
1066
1067 "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
1068 "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
1069 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
1070 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
1071 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1072 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1073 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1074 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1075 "paddh %[src0], %[src0], %[src_lo] \n\t"
1076 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1077 "paddh %[src0], %[src0], %[src_hi] \n\t"
1078 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1079 "psrlh %[src0], %[src0], %[one] \n\t"
1080 "dsrl %[dest1_u], %[src0], %[sixteen] \n\t"
1081 "pinsrh_3 %[dest1_u], %[dest1_u], %[value] \n\t"
1082 "pinsrh_0 %[dest1_v], %[src0], %[value] \n\t"
1083 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
1084 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
1085
1086 "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
1087 "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
1088 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
1089 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
1090 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1091 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1092 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1093 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1094 "paddh %[src0], %[src0], %[src_lo] \n\t"
1095 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1096 "paddh %[src0], %[src0], %[src_hi] \n\t"
1097 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1098 "psrlh %[src0], %[src0], %[one] \n\t"
1099 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1100 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1101 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1102 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1103 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1104
1105 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
1106 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
1107 "psubw %[dest1_u], %[src1], %[src0] \n\t"
1108 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
1109 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
1110 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
1111 "psubw %[dest1_v], %[src0], %[src1] \n\t"
1112 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
1113
1114 "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
1115 "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
1116 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
1117 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
1118 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1119 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1120 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1121 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1122 "paddh %[src0], %[src0], %[src_lo] \n\t"
1123 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1124 "paddh %[src0], %[src0], %[src_hi] \n\t"
1125 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1126 "psrlh %[src0], %[src0], %[one] \n\t"
1127 "dsrl %[dest2_u], %[src0], %[sixteen] \n\t"
1128 "pinsrh_3 %[dest2_u], %[dest2_u], %[value] \n\t"
1129 "pinsrh_0 %[dest2_v], %[src0], %[value] \n\t"
1130 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
1131 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
1132
1133 "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
1134 "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
1135 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
1136 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
1137 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1138 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1139 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1140 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1141 "paddh %[src0], %[src0], %[src_lo] \n\t"
1142 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1143 "paddh %[src0], %[src0], %[src_hi] \n\t"
1144 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1145 "psrlh %[src0], %[src0], %[one] \n\t"
1146 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1147 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1148 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1149 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1150 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1151
1152 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
1153 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
1154 "psubw %[dest2_u], %[src1], %[src0] \n\t"
1155 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
1156 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
1157 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
1158 "psubw %[dest2_v], %[src0], %[src1] \n\t"
1159 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
1160
1161 "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
1162 "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
1163 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
1164 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
1165 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1166 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1167 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1168 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1169 "paddh %[src0], %[src0], %[src_lo] \n\t"
1170 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1171 "paddh %[src0], %[src0], %[src_hi] \n\t"
1172 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1173 "psrlh %[src0], %[src0], %[one] \n\t"
1174 "dsrl %[dest3_u], %[src0], %[sixteen] \n\t"
1175 "pinsrh_3 %[dest3_u], %[dest3_u], %[value] \n\t"
1176 "pinsrh_0 %[dest3_v], %[src0], %[value] \n\t"
1177 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
1178 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
1179
1180 "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
1181 "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
1182 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
1183 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
1184 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1185 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1186 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1187 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1188 "paddh %[src0], %[src0], %[src_lo] \n\t"
1189 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1190 "paddh %[src0], %[src0], %[src_hi] \n\t"
1191 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1192 "psrlh %[src0], %[src0], %[one] \n\t"
1193 "dsrl %[src_lo], %[src0], %[sixteen] \n\t"
1194 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1195 "pinsrh_0 %[src_hi], %[src0], %[value] \n\t"
1196 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1197 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1198
1199 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
1200 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
1201 "psubw %[dest3_u], %[src1], %[src0] \n\t"
1202 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
1203 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
1204 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
1205 "psubw %[dest3_v], %[src0], %[src1] \n\t"
1206 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
1207
1208 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
1209 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
1210 "packushb %[dest0_u], %[src0], %[src1] \n\t"
1211 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
1212 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
1213
1214 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
1215 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
1216 "packushb %[dest0_v], %[src0], %[src1] \n\t"
1217 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
1218 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
1219
1220 "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
1221 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
1222 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
1223 "daddi %[width], %[width], -0x10 \n\t"
1224 "bgtz %[width], 1b \n\t"
1225 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
1226 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
1227 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
1228 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
1229 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
1230 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
1231 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
1232 : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
1233 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
1234 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
1235 [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
1236 [sixteen] "f"(0x10)
1237 : "memory");
1238 }
1239
ABGRToYRow_MMI(const uint8_t * src_argb,uint8_t * dst_y,int width)1240 void ABGRToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
1241 uint64_t src, src_hi, src_lo;
1242 uint64_t dest0, dest1, dest2, dest3;
1243 const uint64_t value = 0x1080;
1244 const uint64_t mask = 0x0001001900810042;
1245
1246 __asm__ volatile(
1247 "1: \n\t"
1248 "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
1249 "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
1250 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1251 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1252 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1253 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1254 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1255 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1256 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1257 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
1258 "paddw %[dest0], %[dest0], %[src] \n\t"
1259 "psrlw %[dest0], %[dest0], %[eight] \n\t"
1260
1261 "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
1262 "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
1263 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1264 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1265 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1266 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1267 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1268 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1269 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1270 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
1271 "paddw %[dest1], %[dest1], %[src] \n\t"
1272 "psrlw %[dest1], %[dest1], %[eight] \n\t"
1273
1274 "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
1275 "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
1276 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1277 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1278 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1279 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1280 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1281 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1282 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1283 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
1284 "paddw %[dest2], %[dest2], %[src] \n\t"
1285 "psrlw %[dest2], %[dest2], %[eight] \n\t"
1286
1287 "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
1288 "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
1289 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1290 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1291 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1292 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1293 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1294 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1295 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1296 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
1297 "paddw %[dest3], %[dest3], %[src] \n\t"
1298 "psrlw %[dest3], %[dest3], %[eight] \n\t"
1299
1300 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
1301 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
1302 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
1303 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
1304 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
1305
1306 "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
1307 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
1308 "daddi %[width], %[width], -0x08 \n\t"
1309 "bnez %[width], 1b \n\t"
1310 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
1311 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
1312 [dest3] "=&f"(dest3)
1313 : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
1314 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
1315 [zero] "f"(0x00)
1316 : "memory");
1317 }
1318
ABGRToUVRow_MMI(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)1319 void ABGRToUVRow_MMI(const uint8_t* src_rgb,
1320 int src_stride_rgb,
1321 uint8_t* dst_u,
1322 uint8_t* dst_v,
1323 int width) {
1324 uint64_t src_rgb1;
1325 uint64_t ftmp[13];
1326 uint64_t tmp[1];
1327 const uint64_t value = 0x4040;
1328 const uint64_t mask_u = 0x0002003800250013;
1329 const uint64_t mask_v = 0x0009002F00380002;
1330
1331 __asm__ volatile(
1332 "dli %[tmp0], 0x0001000100010001 \n\t"
1333 "dmtc1 %[tmp0], %[ftmp12] \n\t"
1334 "1: \n\t"
1335 "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
1336 "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
1337 "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
1338 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1339 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1340 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1341 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1342 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1343 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1344 "paddh %[src0], %[src0], %[src_lo] \n\t"
1345 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1346 "paddh %[src0], %[src0], %[src_hi] \n\t"
1347 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1348 "psrlh %[src0], %[src0], %[one] \n\t"
1349 "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
1350 "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
1351 "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
1352 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1353 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1354
1355 "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
1356 "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
1357 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
1358 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
1359 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1360 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1361 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1362 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1363 "paddh %[src0], %[src0], %[src_lo] \n\t"
1364 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1365 "paddh %[src0], %[src0], %[src_hi] \n\t"
1366 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1367 "psrlh %[src0], %[src0], %[one] \n\t"
1368 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1369 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1370 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1371 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1372 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1373
1374 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
1375 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
1376 "psubw %[dest0_u], %[src1], %[src0] \n\t"
1377 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
1378 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
1379 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
1380 "psubw %[dest0_v], %[src0], %[src1] \n\t"
1381 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
1382
1383 "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
1384 "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
1385 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
1386 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
1387 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1388 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1389 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1390 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1391 "paddh %[src0], %[src0], %[src_lo] \n\t"
1392 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1393 "paddh %[src0], %[src0], %[src_hi] \n\t"
1394 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1395 "psrlh %[src0], %[src0], %[one] \n\t"
1396 "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
1397 "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
1398 "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
1399 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
1400 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
1401
1402 "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
1403 "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
1404 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
1405 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
1406 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1407 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1408 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1409 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1410 "paddh %[src0], %[src0], %[src_lo] \n\t"
1411 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1412 "paddh %[src0], %[src0], %[src_hi] \n\t"
1413 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1414 "psrlh %[src0], %[src0], %[one] \n\t"
1415 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1416 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1417 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1418 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1419 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1420
1421 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
1422 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
1423 "psubw %[dest1_u], %[src1], %[src0] \n\t"
1424 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
1425 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
1426 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
1427 "psubw %[dest1_v], %[src0], %[src1] \n\t"
1428 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
1429
1430 "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
1431 "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
1432 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
1433 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
1434 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1435 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1436 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1437 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1438 "paddh %[src0], %[src0], %[src_lo] \n\t"
1439 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1440 "paddh %[src0], %[src0], %[src_hi] \n\t"
1441 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1442 "psrlh %[src0], %[src0], %[one] \n\t"
1443 "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
1444 "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
1445 "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
1446 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
1447 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
1448
1449 "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
1450 "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
1451 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
1452 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
1453 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1454 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1455 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1456 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1457 "paddh %[src0], %[src0], %[src_lo] \n\t"
1458 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1459 "paddh %[src0], %[src0], %[src_hi] \n\t"
1460 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1461 "psrlh %[src0], %[src0], %[one] \n\t"
1462 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1463 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1464 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1465 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1466 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1467
1468 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
1469 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
1470 "psubw %[dest2_u], %[src1], %[src0] \n\t"
1471 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
1472 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
1473 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
1474 "psubw %[dest2_v], %[src0], %[src1] \n\t"
1475 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
1476
1477 "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
1478 "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
1479 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
1480 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
1481 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1482 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1483 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1484 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1485 "paddh %[src0], %[src0], %[src_lo] \n\t"
1486 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1487 "paddh %[src0], %[src0], %[src_hi] \n\t"
1488 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1489 "psrlh %[src0], %[src0], %[one] \n\t"
1490 "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
1491 "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
1492 "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
1493 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
1494 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
1495
1496 "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
1497 "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
1498 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
1499 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
1500 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1501 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1502 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1503 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1504 "paddh %[src0], %[src0], %[src_lo] \n\t"
1505 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1506 "paddh %[src0], %[src0], %[src_hi] \n\t"
1507 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1508 "psrlh %[src0], %[src0], %[one] \n\t"
1509 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
1510 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
1511 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1512 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1513 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1514
1515 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
1516 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
1517 "psubw %[dest3_u], %[src1], %[src0] \n\t"
1518 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
1519 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
1520 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
1521 "psubw %[dest3_v], %[src0], %[src1] \n\t"
1522 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
1523
1524 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
1525 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
1526 "packushb %[dest0_u], %[src0], %[src1] \n\t"
1527 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
1528 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
1529
1530 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
1531 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
1532 "packushb %[dest0_v], %[src0], %[src1] \n\t"
1533 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
1534 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
1535
1536 "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
1537 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
1538 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
1539 "daddi %[width], %[width], -0x10 \n\t"
1540 "bgtz %[width], 1b \n\t"
1541 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
1542 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
1543 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
1544 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
1545 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
1546 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
1547 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
1548 : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
1549 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
1550 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
1551 [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
1552 [sixteen] "f"(0x10)
1553 : "memory");
1554 }
1555
RGBAToYRow_MMI(const uint8_t * src_argb,uint8_t * dst_y,int width)1556 void RGBAToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
1557 uint64_t src, src_hi, src_lo;
1558 uint64_t dest0, dest1, dest2, dest3;
1559 const uint64_t value = 0x1080;
1560 const uint64_t mask = 0x0042008100190001;
1561
1562 __asm__ volatile(
1563 "1: \n\t"
1564 "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
1565 "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
1566 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1567 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1568 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1569 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1570 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1571 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1572 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1573 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
1574 "paddw %[dest0], %[dest0], %[src] \n\t"
1575 "psrlw %[dest0], %[dest0], %[eight] \n\t"
1576
1577 "gsldlc1 %[src], 0x0f(%[src_argb]) \n\t"
1578 "gsldrc1 %[src], 0x08(%[src_argb]) \n\t"
1579 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1580 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1581 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1582 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1583 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1584 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1585 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1586 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
1587 "paddw %[dest1], %[dest1], %[src] \n\t"
1588 "psrlw %[dest1], %[dest1], %[eight] \n\t"
1589
1590 "gsldlc1 %[src], 0x17(%[src_argb]) \n\t"
1591 "gsldrc1 %[src], 0x10(%[src_argb]) \n\t"
1592 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1593 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1594 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1595 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1596 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1597 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1598 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1599 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
1600 "paddw %[dest2], %[dest2], %[src] \n\t"
1601 "psrlw %[dest2], %[dest2], %[eight] \n\t"
1602
1603 "gsldlc1 %[src], 0x1f(%[src_argb]) \n\t"
1604 "gsldrc1 %[src], 0x18(%[src_argb]) \n\t"
1605 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1606 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
1607 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1608 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1609 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
1610 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1611 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1612 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
1613 "paddw %[dest3], %[dest3], %[src] \n\t"
1614 "psrlw %[dest3], %[dest3], %[eight] \n\t"
1615
1616 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
1617 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
1618 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
1619 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
1620 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
1621
1622 "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
1623 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
1624 "daddi %[width], %[width], -0x08 \n\t"
1625 "bnez %[width], 1b \n\t"
1626 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
1627 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
1628 [dest3] "=&f"(dest3)
1629 : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
1630 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
1631 [zero] "f"(0x00)
1632 : "memory");
1633 }
1634
RGBAToUVRow_MMI(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)1635 void RGBAToUVRow_MMI(const uint8_t* src_rgb,
1636 int src_stride_rgb,
1637 uint8_t* dst_u,
1638 uint8_t* dst_v,
1639 int width) {
1640 uint64_t src_rgb1;
1641 uint64_t ftmp[13];
1642 uint64_t tmp[1];
1643 const uint64_t value = 0x4040;
1644 const uint64_t mask_u = 0x0013002500380002;
1645 const uint64_t mask_v = 0x00020038002f0009;
1646
1647 __asm__ volatile(
1648 "dli %[tmp0], 0x0001000100010001 \n\t"
1649 "dmtc1 %[tmp0], %[ftmp12] \n\t"
1650 "1: \n\t"
1651 "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
1652 "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
1653 "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
1654 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1655 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1656 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1657 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1658 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1659 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1660 "paddh %[src0], %[src0], %[src_lo] \n\t"
1661 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1662 "paddh %[src0], %[src0], %[src_hi] \n\t"
1663 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1664 "psrlh %[src0], %[src0], %[one] \n\t"
1665 "pinsrh_0 %[dest0_u], %[src0], %[value] \n\t"
1666 "dsrl %[dest0_v], %[src0], %[sixteen] \n\t"
1667 "pinsrh_3 %[dest0_v], %[dest0_v], %[value] \n\t"
1668 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1669 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1670
1671 "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
1672 "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
1673 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
1674 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
1675 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1676 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1677 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1678 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1679 "paddh %[src0], %[src0], %[src_lo] \n\t"
1680 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1681 "paddh %[src0], %[src0], %[src_hi] \n\t"
1682 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1683 "psrlh %[src0], %[src0], %[one] \n\t"
1684 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1685 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1686 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1687 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1688 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1689
1690 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
1691 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
1692 "psubw %[dest0_u], %[src0], %[src1] \n\t"
1693 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
1694 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
1695 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
1696 "psubw %[dest0_v], %[src1], %[src0] \n\t"
1697 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
1698
1699 "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
1700 "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
1701 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
1702 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
1703 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1704 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1705 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1706 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1707 "paddh %[src0], %[src0], %[src_lo] \n\t"
1708 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1709 "paddh %[src0], %[src0], %[src_hi] \n\t"
1710 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1711 "psrlh %[src0], %[src0], %[one] \n\t"
1712 "pinsrh_0 %[dest1_u], %[src0], %[value] \n\t"
1713 "dsrl %[dest1_v], %[src0], %[sixteen] \n\t"
1714 "pinsrh_3 %[dest1_v], %[dest1_v], %[value] \n\t"
1715 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
1716 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
1717
1718 "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
1719 "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
1720 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
1721 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
1722 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1723 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1724 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1725 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1726 "paddh %[src0], %[src0], %[src_lo] \n\t"
1727 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1728 "paddh %[src0], %[src0], %[src_hi] \n\t"
1729 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1730 "psrlh %[src0], %[src0], %[one] \n\t"
1731 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1732 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1733 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1734 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1735 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1736
1737 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
1738 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
1739 "psubw %[dest1_u], %[src0], %[src1] \n\t"
1740 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
1741 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
1742 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
1743 "psubw %[dest1_v], %[src1], %[src0] \n\t"
1744 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
1745
1746 "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
1747 "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
1748 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
1749 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
1750 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1751 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1752 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1753 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1754 "paddh %[src0], %[src0], %[src_lo] \n\t"
1755 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1756 "paddh %[src0], %[src0], %[src_hi] \n\t"
1757 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1758 "psrlh %[src0], %[src0], %[one] \n\t"
1759 "pinsrh_0 %[dest2_u], %[src0], %[value] \n\t"
1760 "dsrl %[dest2_v], %[src0], %[sixteen] \n\t"
1761 "pinsrh_3 %[dest2_v], %[dest2_v], %[value] \n\t"
1762 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
1763 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
1764
1765 "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
1766 "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
1767 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
1768 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
1769 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1770 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1771 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1772 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1773 "paddh %[src0], %[src0], %[src_lo] \n\t"
1774 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1775 "paddh %[src0], %[src0], %[src_hi] \n\t"
1776 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1777 "psrlh %[src0], %[src0], %[one] \n\t"
1778 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1779 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1780 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1781 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1782 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1783
1784 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
1785 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
1786 "psubw %[dest2_u], %[src0], %[src1] \n\t"
1787 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
1788 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
1789 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
1790 "psubw %[dest2_v], %[src1], %[src0] \n\t"
1791 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
1792
1793 "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
1794 "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
1795 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
1796 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
1797 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1798 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1799 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1800 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1801 "paddh %[src0], %[src0], %[src_lo] \n\t"
1802 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1803 "paddh %[src0], %[src0], %[src_hi] \n\t"
1804 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1805 "psrlh %[src0], %[src0], %[one] \n\t"
1806 "pinsrh_0 %[dest3_u], %[src0], %[value] \n\t"
1807 "dsrl %[dest3_v], %[src0], %[sixteen] \n\t"
1808 "pinsrh_3 %[dest3_v], %[dest3_v], %[value] \n\t"
1809 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
1810 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
1811
1812 "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
1813 "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
1814 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
1815 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
1816 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1817 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1818 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1819 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1820 "paddh %[src0], %[src0], %[src_lo] \n\t"
1821 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1822 "paddh %[src0], %[src0], %[src_hi] \n\t"
1823 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1824 "psrlh %[src0], %[src0], %[one] \n\t"
1825 "pinsrh_0 %[src_lo], %[src0], %[value] \n\t"
1826 "dsrl %[src_hi], %[src0], %[sixteen] \n\t"
1827 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1828 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
1829 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
1830
1831 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
1832 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
1833 "psubw %[dest3_u], %[src0], %[src1] \n\t"
1834 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
1835 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
1836 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
1837 "psubw %[dest3_v], %[src1], %[src0] \n\t"
1838 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
1839
1840 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
1841 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
1842 "packushb %[dest0_u], %[src0], %[src1] \n\t"
1843 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
1844 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
1845
1846 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
1847 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
1848 "packushb %[dest0_v], %[src0], %[src1] \n\t"
1849 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
1850 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
1851
1852 "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
1853 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
1854 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
1855 "daddi %[width], %[width], -0x10 \n\t"
1856 "bgtz %[width], 1b \n\t"
1857 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
1858 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
1859 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
1860 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
1861 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
1862 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
1863 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
1864 : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
1865 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
1866 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
1867 [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
1868 [sixteen] "f"(0x10)
1869 : "memory");
1870 }
1871
RGB24ToYRow_MMI(const uint8_t * src_argb,uint8_t * dst_y,int width)1872 void RGB24ToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
1873 uint64_t src, src_hi, src_lo;
1874 uint64_t dest0, dest1, dest2, dest3;
1875 const uint64_t value = 0x1080;
1876 const uint64_t mask = 0x0001004200810019;
1877
1878 __asm__ volatile(
1879 "1: \n\t"
1880 "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
1881 "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
1882 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1883 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1884 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1885 "dsll %[src], %[src], %[eight] \n\t"
1886 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1887 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1888 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1889 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1890 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
1891 "paddw %[dest0], %[dest0], %[src] \n\t"
1892 "psrlw %[dest0], %[dest0], %[eight] \n\t"
1893
1894 "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
1895 "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
1896 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1897 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1898 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1899 "dsll %[src], %[src], %[eight] \n\t"
1900 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1901 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1902 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1903 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1904 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
1905 "paddw %[dest1], %[dest1], %[src] \n\t"
1906 "psrlw %[dest1], %[dest1], %[eight] \n\t"
1907
1908 "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
1909 "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
1910 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1911 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1912 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1913 "dsll %[src], %[src], %[eight] \n\t"
1914 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1915 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1916 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1917 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1918 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
1919 "paddw %[dest2], %[dest2], %[src] \n\t"
1920 "psrlw %[dest2], %[dest2], %[eight] \n\t"
1921
1922 "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
1923 "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
1924 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
1925 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
1926 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
1927 "dsll %[src], %[src], %[eight] \n\t"
1928 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
1929 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
1930 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
1931 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
1932 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
1933 "paddw %[dest3], %[dest3], %[src] \n\t"
1934 "psrlw %[dest3], %[dest3], %[eight] \n\t"
1935
1936 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
1937 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
1938 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
1939 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
1940 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
1941
1942 "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
1943 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
1944 "daddi %[width], %[width], -0x08 \n\t"
1945 "bnez %[width], 1b \n\t"
1946 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
1947 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
1948 [dest3] "=&f"(dest3)
1949 : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
1950 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
1951 [zero] "f"(0x00)
1952 : "memory");
1953 }
1954
RGB24ToUVRow_MMI(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)1955 void RGB24ToUVRow_MMI(const uint8_t* src_rgb,
1956 int src_stride_rgb,
1957 uint8_t* dst_u,
1958 uint8_t* dst_v,
1959 int width) {
1960 uint64_t src_rgb1;
1961 uint64_t ftmp[13];
1962 uint64_t tmp[1];
1963 const uint64_t value = 0x4040;
1964 const uint64_t mask_u = 0x0013002500380002;
1965 const uint64_t mask_v = 0x00020038002f0009;
1966
1967 __asm__ volatile(
1968 "dli %[tmp0], 0x0001000100010001 \n\t"
1969 "dmtc1 %[tmp0], %[ftmp12] \n\t"
1970 "1: \n\t"
1971 "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
1972 "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
1973 "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
1974 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
1975 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
1976 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1977 "dsll %[src0], %[src0], %[eight] \n\t"
1978 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
1979 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
1980 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
1981 "paddh %[src0], %[src0], %[src_lo] \n\t"
1982 "dsll %[src1], %[src1], %[eight] \n\t"
1983 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
1984 "paddh %[src0], %[src0], %[src_hi] \n\t"
1985 "paddh %[src0], %[src0], %[ftmp12] \n\t"
1986 "psrlh %[src0], %[src0], %[one] \n\t"
1987 "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
1988 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
1989 "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
1990 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
1991 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
1992
1993 "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
1994 "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
1995 "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
1996 "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
1997 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
1998 "dsll %[src0], %[src0], %[eight] \n\t"
1999 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2000 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2001 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2002 "paddh %[src0], %[src0], %[src_lo] \n\t"
2003 "dsll %[src1], %[src1], %[eight] \n\t"
2004 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2005 "paddh %[src0], %[src0], %[src_hi] \n\t"
2006 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2007 "psrlh %[src0], %[src0], %[one] \n\t"
2008 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2009 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2010 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2011 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2012 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2013
2014 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
2015 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
2016 "psubw %[dest0_u], %[src0], %[src1] \n\t"
2017 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
2018 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
2019 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
2020 "psubw %[dest0_v], %[src1], %[src0] \n\t"
2021 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
2022
2023 "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
2024 "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
2025 "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
2026 "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
2027 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2028 "dsll %[src0], %[src0], %[eight] \n\t"
2029 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2030 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2031 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2032 "paddh %[src0], %[src0], %[src_lo] \n\t"
2033 "dsll %[src1], %[src1], %[eight] \n\t"
2034 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2035 "paddh %[src0], %[src0], %[src_hi] \n\t"
2036 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2037 "psrlh %[src0], %[src0], %[one] \n\t"
2038 "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
2039 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
2040 "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
2041 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
2042 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
2043
2044 "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
2045 "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
2046 "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
2047 "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
2048 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2049 "dsll %[src0], %[src0], %[eight] \n\t"
2050 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2051 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2052 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2053 "paddh %[src0], %[src0], %[src_lo] \n\t"
2054 "dsll %[src1], %[src1], %[eight] \n\t"
2055 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2056 "paddh %[src0], %[src0], %[src_hi] \n\t"
2057 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2058 "psrlh %[src0], %[src0], %[one] \n\t"
2059 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2060 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2061 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2062 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2063 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2064
2065 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
2066 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
2067 "psubw %[dest1_u], %[src0], %[src1] \n\t"
2068 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
2069 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
2070 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
2071 "psubw %[dest1_v], %[src1], %[src0] \n\t"
2072 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
2073
2074 "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
2075 "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
2076 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
2077 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
2078 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2079 "dsll %[src0], %[src0], %[eight] \n\t"
2080 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2081 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2082 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2083 "paddh %[src0], %[src0], %[src_lo] \n\t"
2084 "dsll %[src1], %[src1], %[eight] \n\t"
2085 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2086 "paddh %[src0], %[src0], %[src_hi] \n\t"
2087 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2088 "psrlh %[src0], %[src0], %[one] \n\t"
2089 "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
2090 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
2091 "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
2092 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
2093 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
2094
2095 "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
2096 "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
2097 "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
2098 "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
2099 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2100 "dsll %[src0], %[src0], %[eight] \n\t"
2101 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2102 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2103 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2104 "paddh %[src0], %[src0], %[src_lo] \n\t"
2105 "dsll %[src1], %[src1], %[eight] \n\t"
2106 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2107 "paddh %[src0], %[src0], %[src_hi] \n\t"
2108 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2109 "psrlh %[src0], %[src0], %[one] \n\t"
2110 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2111 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2112 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2113 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2114 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2115
2116 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
2117 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
2118 "psubw %[dest2_u], %[src0], %[src1] \n\t"
2119 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
2120 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
2121 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
2122 "psubw %[dest2_v], %[src1], %[src0] \n\t"
2123 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
2124
2125 "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
2126 "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
2127 "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
2128 "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
2129 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2130 "dsll %[src0], %[src0], %[eight] \n\t"
2131 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2132 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2133 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2134 "paddh %[src0], %[src0], %[src_lo] \n\t"
2135 "dsll %[src1], %[src1], %[eight] \n\t"
2136 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2137 "paddh %[src0], %[src0], %[src_hi] \n\t"
2138 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2139 "psrlh %[src0], %[src0], %[one] \n\t"
2140 "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
2141 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
2142 "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
2143 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
2144 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
2145
2146 "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
2147 "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
2148 "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
2149 "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
2150 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2151 "dsll %[src0], %[src0], %[eight] \n\t"
2152 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2153 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2154 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2155 "paddh %[src0], %[src0], %[src_lo] \n\t"
2156 "dsll %[src1], %[src1], %[eight] \n\t"
2157 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2158 "paddh %[src0], %[src0], %[src_hi] \n\t"
2159 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2160 "psrlh %[src0], %[src0], %[one] \n\t"
2161 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2162 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2163 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2164 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2165 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2166
2167 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
2168 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
2169 "psubw %[dest3_u], %[src0], %[src1] \n\t"
2170 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
2171 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
2172 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
2173 "psubw %[dest3_v], %[src1], %[src0] \n\t"
2174 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
2175
2176 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
2177 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
2178 "packushb %[dest0_u], %[src0], %[src1] \n\t"
2179 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
2180 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
2181
2182 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
2183 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
2184 "packushb %[dest0_v], %[src0], %[src1] \n\t"
2185 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
2186 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
2187
2188 "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
2189 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
2190 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
2191 "daddi %[width], %[width], -0x10 \n\t"
2192 "bgtz %[width], 1b \n\t"
2193 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
2194 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
2195 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
2196 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
2197 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
2198 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
2199 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
2200 : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
2201 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
2202 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
2203 [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
2204 [sixteen] "f"(0x10)
2205 : "memory");
2206 }
2207
RAWToYRow_MMI(const uint8_t * src_argb,uint8_t * dst_y,int width)2208 void RAWToYRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
2209 uint64_t src, src_hi, src_lo;
2210 uint64_t dest0, dest1, dest2, dest3;
2211 const uint64_t value = 0x1080;
2212 const uint64_t mask = 0x0001001900810042;
2213
2214 __asm__ volatile(
2215 "1: \n\t"
2216 "gsldlc1 %[src], 0x07(%[src_argb]) \n\t"
2217 "gsldrc1 %[src], 0x00(%[src_argb]) \n\t"
2218 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2219 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2220 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2221 "dsll %[src], %[src], %[eight] \n\t"
2222 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2223 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2224 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2225 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2226 "punpckhwd %[dest0], %[src_lo], %[src_hi] \n\t"
2227 "paddw %[dest0], %[dest0], %[src] \n\t"
2228 "psrlw %[dest0], %[dest0], %[eight] \n\t"
2229
2230 "gsldlc1 %[src], 0x0d(%[src_argb]) \n\t"
2231 "gsldrc1 %[src], 0x06(%[src_argb]) \n\t"
2232 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2233 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2234 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2235 "dsll %[src], %[src], %[eight] \n\t"
2236 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2237 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2238 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2239 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2240 "punpckhwd %[dest1], %[src_lo], %[src_hi] \n\t"
2241 "paddw %[dest1], %[dest1], %[src] \n\t"
2242 "psrlw %[dest1], %[dest1], %[eight] \n\t"
2243
2244 "gsldlc1 %[src], 0x13(%[src_argb]) \n\t"
2245 "gsldrc1 %[src], 0x0c(%[src_argb]) \n\t"
2246 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2247 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2248 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2249 "dsll %[src], %[src], %[eight] \n\t"
2250 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2251 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2252 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2253 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2254 "punpckhwd %[dest2], %[src_lo], %[src_hi] \n\t"
2255 "paddw %[dest2], %[dest2], %[src] \n\t"
2256 "psrlw %[dest2], %[dest2], %[eight] \n\t"
2257
2258 "gsldlc1 %[src], 0x19(%[src_argb]) \n\t"
2259 "gsldrc1 %[src], 0x12(%[src_argb]) \n\t"
2260 "punpcklbh %[src_lo], %[src], %[zero] \n\t"
2261 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2262 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2263 "dsll %[src], %[src], %[eight] \n\t"
2264 "punpckhbh %[src_hi], %[src], %[zero] \n\t"
2265 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2266 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2267 "punpcklwd %[src], %[src_lo], %[src_hi] \n\t"
2268 "punpckhwd %[dest3], %[src_lo], %[src_hi] \n\t"
2269 "paddw %[dest3], %[dest3], %[src] \n\t"
2270 "psrlw %[dest3], %[dest3], %[eight] \n\t"
2271
2272 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
2273 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
2274 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
2275 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
2276 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
2277
2278 "daddiu %[src_argb], %[src_argb], 0x18 \n\t"
2279 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
2280 "daddi %[width], %[width], -0x08 \n\t"
2281 "bnez %[width], 1b \n\t"
2282 : [src] "=&f"(src), [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
2283 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
2284 [dest3] "=&f"(dest3)
2285 : [src_argb] "r"(src_argb), [dst_y] "r"(dst_y), [width] "r"(width),
2286 [mask] "f"(mask), [value] "f"(value), [eight] "f"(0x08),
2287 [zero] "f"(0x00)
2288 : "memory");
2289 }
2290
RAWToUVRow_MMI(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)2291 void RAWToUVRow_MMI(const uint8_t* src_rgb,
2292 int src_stride_rgb,
2293 uint8_t* dst_u,
2294 uint8_t* dst_v,
2295 int width) {
2296 uint64_t src_rgb1;
2297 uint64_t ftmp[13];
2298 uint64_t tmp[1];
2299 const uint64_t value = 0x4040;
2300 const uint64_t mask_u = 0x0002003800250013;
2301 const uint64_t mask_v = 0x0009002f00380002;
2302
2303 __asm__ volatile(
2304 "dli %[tmp0], 0x0001000100010001 \n\t"
2305 "dmtc1 %[tmp0], %[ftmp12] \n\t"
2306 "1: \n\t"
2307 "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
2308 "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
2309 "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
2310 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
2311 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
2312 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2313 "dsll %[src0], %[src0], %[eight] \n\t"
2314 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2315 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2316 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2317 "paddh %[src0], %[src0], %[src_lo] \n\t"
2318 "dsll %[src1], %[src1], %[eight] \n\t"
2319 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2320 "paddh %[src0], %[src0], %[src_hi] \n\t"
2321 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2322 "psrlh %[src0], %[src0], %[one] \n\t"
2323 "pinsrh_3 %[dest0_u], %[src0], %[value] \n\t"
2324 "dsll %[dest0_v], %[src0], %[sixteen] \n\t"
2325 "pinsrh_0 %[dest0_v], %[dest0_v], %[value] \n\t"
2326 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
2327 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
2328
2329 "gsldrc1 %[src0], 0x06(%[src_rgb]) \n\t"
2330 "gsldlc1 %[src0], 0x0d(%[src_rgb]) \n\t"
2331 "gsldrc1 %[src1], 0x06(%[src_rgb1]) \n\t"
2332 "gsldlc1 %[src1], 0x0d(%[src_rgb1]) \n\t"
2333 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2334 "dsll %[src0], %[src0], %[eight] \n\t"
2335 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2336 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2337 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2338 "paddh %[src0], %[src0], %[src_lo] \n\t"
2339 "dsll %[src1], %[src1], %[eight] \n\t"
2340 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2341 "paddh %[src0], %[src0], %[src_hi] \n\t"
2342 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2343 "psrlh %[src0], %[src0], %[one] \n\t"
2344 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2345 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2346 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2347 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2348 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2349
2350 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
2351 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
2352 "psubw %[dest0_u], %[src1], %[src0] \n\t"
2353 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
2354 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
2355 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
2356 "psubw %[dest0_v], %[src0], %[src1] \n\t"
2357 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
2358
2359 "gsldrc1 %[src0], 0x0c(%[src_rgb]) \n\t"
2360 "gsldlc1 %[src0], 0x13(%[src_rgb]) \n\t"
2361 "gsldrc1 %[src1], 0x0c(%[src_rgb1]) \n\t"
2362 "gsldlc1 %[src1], 0x13(%[src_rgb1]) \n\t"
2363 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2364 "dsll %[src0], %[src0], %[eight] \n\t"
2365 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2366 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2367 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2368 "paddh %[src0], %[src0], %[src_lo] \n\t"
2369 "dsll %[src1], %[src1], %[eight] \n\t"
2370 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2371 "paddh %[src0], %[src0], %[src_hi] \n\t"
2372 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2373 "psrlh %[src0], %[src0], %[one] \n\t"
2374 "pinsrh_3 %[dest1_u], %[src0], %[value] \n\t"
2375 "dsll %[dest1_v], %[src0], %[sixteen] \n\t"
2376 "pinsrh_0 %[dest1_v], %[dest1_v], %[value] \n\t"
2377 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
2378 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
2379
2380 "gsldrc1 %[src0], 0x12(%[src_rgb]) \n\t"
2381 "gsldlc1 %[src0], 0x19(%[src_rgb]) \n\t"
2382 "gsldrc1 %[src1], 0x12(%[src_rgb1]) \n\t"
2383 "gsldlc1 %[src1], 0x19(%[src_rgb1]) \n\t"
2384 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2385 "dsll %[src0], %[src0], %[eight] \n\t"
2386 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2387 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2388 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2389 "paddh %[src0], %[src0], %[src_lo] \n\t"
2390 "dsll %[src1], %[src1], %[eight] \n\t"
2391 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2392 "paddh %[src0], %[src0], %[src_hi] \n\t"
2393 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2394 "psrlh %[src0], %[src0], %[one] \n\t"
2395 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2396 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2397 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2398 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2399 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2400
2401 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
2402 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
2403 "psubw %[dest1_u], %[src1], %[src0] \n\t"
2404 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
2405 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
2406 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
2407 "psubw %[dest1_v], %[src0], %[src1] \n\t"
2408 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
2409
2410 "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
2411 "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
2412 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
2413 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
2414 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2415 "dsll %[src0], %[src0], %[eight] \n\t"
2416 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2417 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2418 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2419 "paddh %[src0], %[src0], %[src_lo] \n\t"
2420 "dsll %[src1], %[src1], %[eight] \n\t"
2421 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2422 "paddh %[src0], %[src0], %[src_hi] \n\t"
2423 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2424 "psrlh %[src0], %[src0], %[one] \n\t"
2425 "pinsrh_3 %[dest2_u], %[src0], %[value] \n\t"
2426 "dsll %[dest2_v], %[src0], %[sixteen] \n\t"
2427 "pinsrh_0 %[dest2_v], %[dest2_v], %[value] \n\t"
2428 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
2429 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
2430
2431 "gsldrc1 %[src0], 0x1e(%[src_rgb]) \n\t"
2432 "gsldlc1 %[src0], 0x25(%[src_rgb]) \n\t"
2433 "gsldrc1 %[src1], 0x1e(%[src_rgb1]) \n\t"
2434 "gsldlc1 %[src1], 0x25(%[src_rgb1]) \n\t"
2435 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2436 "dsll %[src0], %[src0], %[eight] \n\t"
2437 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2438 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2439 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2440 "paddh %[src0], %[src0], %[src_lo] \n\t"
2441 "dsll %[src1], %[src1], %[eight] \n\t"
2442 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2443 "paddh %[src0], %[src0], %[src_hi] \n\t"
2444 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2445 "psrlh %[src0], %[src0], %[one] \n\t"
2446 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2447 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2448 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2449 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2450 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2451
2452 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
2453 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
2454 "psubw %[dest2_u], %[src1], %[src0] \n\t"
2455 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
2456 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
2457 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
2458 "psubw %[dest2_v], %[src0], %[src1] \n\t"
2459 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
2460
2461 "gsldrc1 %[src0], 0x24(%[src_rgb]) \n\t"
2462 "gsldlc1 %[src0], 0x2b(%[src_rgb]) \n\t"
2463 "gsldrc1 %[src1], 0x24(%[src_rgb1]) \n\t"
2464 "gsldlc1 %[src1], 0x2b(%[src_rgb1]) \n\t"
2465 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2466 "dsll %[src0], %[src0], %[eight] \n\t"
2467 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2468 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2469 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2470 "paddh %[src0], %[src0], %[src_lo] \n\t"
2471 "dsll %[src1], %[src1], %[eight] \n\t"
2472 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2473 "paddh %[src0], %[src0], %[src_hi] \n\t"
2474 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2475 "psrlh %[src0], %[src0], %[one] \n\t"
2476 "pinsrh_3 %[dest3_u], %[src0], %[value] \n\t"
2477 "dsll %[dest3_v], %[src0], %[sixteen] \n\t"
2478 "pinsrh_0 %[dest3_v], %[dest3_v], %[value] \n\t"
2479 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
2480 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
2481
2482 "gsldrc1 %[src0], 0x2a(%[src_rgb]) \n\t"
2483 "gsldlc1 %[src0], 0x31(%[src_rgb]) \n\t"
2484 "gsldrc1 %[src1], 0x2a(%[src_rgb1]) \n\t"
2485 "gsldlc1 %[src1], 0x31(%[src_rgb1]) \n\t"
2486 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2487 "dsll %[src0], %[src0], %[eight] \n\t"
2488 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2489 "paddh %[src0], %[src_lo], %[src_hi] \n\t"
2490 "punpcklbh %[src_lo], %[src1], %[zero] \n\t"
2491 "paddh %[src0], %[src0], %[src_lo] \n\t"
2492 "dsll %[src1], %[src1], %[eight] \n\t"
2493 "punpckhbh %[src_hi], %[src1], %[zero] \n\t"
2494 "paddh %[src0], %[src0], %[src_hi] \n\t"
2495 "paddh %[src0], %[src0], %[ftmp12] \n\t"
2496 "psrlh %[src0], %[src0], %[one] \n\t"
2497 "pinsrh_3 %[src_lo], %[src0], %[value] \n\t"
2498 "dsll %[src_hi], %[src0], %[sixteen] \n\t"
2499 "pinsrh_0 %[src_hi], %[src_hi], %[value] \n\t"
2500 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2501 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2502
2503 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
2504 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
2505 "psubw %[dest3_u], %[src1], %[src0] \n\t"
2506 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
2507 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
2508 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
2509 "psubw %[dest3_v], %[src0], %[src1] \n\t"
2510 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
2511
2512 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
2513 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
2514 "packushb %[dest0_u], %[src0], %[src1] \n\t"
2515 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
2516 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
2517
2518 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
2519 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
2520 "packushb %[dest0_v], %[src0], %[src1] \n\t"
2521 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
2522 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
2523
2524 "daddiu %[src_rgb], %[src_rgb], 0x30 \n\t"
2525 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
2526 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
2527 "daddi %[width], %[width], -0x10 \n\t"
2528 "bgtz %[width], 1b \n\t"
2529 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
2530 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
2531 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
2532 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
2533 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
2534 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11]),
2535 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
2536 : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
2537 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
2538 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
2539 [zero] "f"(0x00), [eight] "f"(0x08), [one] "f"(0x01),
2540 [sixteen] "f"(0x10)
2541 : "memory");
2542 }
2543
ARGBToYJRow_MMI(const uint8_t * src_argb,uint8_t * dst_y,int width)2544 void ARGBToYJRow_MMI(const uint8_t* src_argb, uint8_t* dst_y, int width) {
2545 uint64_t src, src_hi, src_lo;
2546 uint64_t dest, dest0, dest1, dest2, dest3;
2547 uint64_t tmp0, tmp1;
2548 const uint64_t shift = 0x08;
2549 const uint64_t value = 0x80;
2550 const uint64_t mask0 = 0x0;
2551 const uint64_t mask1 = 0x0001004D0096001DULL;
2552
2553 __asm__ volatile(
2554 "1: \n\t"
2555 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
2556 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
2557 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2558 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2559 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2560 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2561 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2562 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2563 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2564 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2565 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
2566 "psrlw %[dest0], %[dest0], %[shift] \n\t"
2567
2568 "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
2569 "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
2570 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2571 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2572 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2573 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2574 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2575 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2576 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2577 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2578 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
2579 "psrlw %[dest1], %[dest1], %[shift] \n\t"
2580
2581 "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
2582 "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
2583 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2584 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2585 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2586 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2587 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2588 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2589 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2590 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2591 "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
2592 "psrlw %[dest2], %[dest2], %[shift] \n\t"
2593
2594 "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
2595 "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
2596 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
2597 "pinsrh_3 %[src_lo], %[src_lo], %[value] \n\t"
2598 "pmaddhw %[src_lo], %[src_lo], %[mask1] \n\t"
2599 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
2600 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
2601 "pmaddhw %[src_hi], %[src_hi], %[mask1] \n\t"
2602 "punpcklwd %[tmp0], %[src_lo], %[src_hi] \n\t"
2603 "punpckhwd %[tmp1], %[src_lo], %[src_hi] \n\t"
2604 "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
2605 "psrlw %[dest3], %[dest3], %[shift] \n\t"
2606
2607 "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
2608 "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
2609 "packushb %[dest], %[tmp0], %[tmp1] \n\t"
2610 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
2611 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
2612
2613 "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
2614 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
2615 "daddi %[width], %[width], -0x08 \n\t"
2616 "bnez %[width], 1b \n\t"
2617 : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
2618 [src_lo] "=&f"(src_lo), [dest0] "=&f"(dest0), [dest1] "=&f"(dest1),
2619 [dest2] "=&f"(dest2), [dest3] "=&f"(dest3), [tmp0] "=&f"(tmp0),
2620 [tmp1] "=&f"(tmp1)
2621 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_y), [mask0] "f"(mask0),
2622 [mask1] "f"(mask1), [shift] "f"(shift), [value] "f"(value),
2623 [width] "r"(width)
2624 : "memory");
2625 }
2626
ARGBToUVJRow_MMI(const uint8_t * src_rgb,int src_stride_rgb,uint8_t * dst_u,uint8_t * dst_v,int width)2627 void ARGBToUVJRow_MMI(const uint8_t* src_rgb,
2628 int src_stride_rgb,
2629 uint8_t* dst_u,
2630 uint8_t* dst_v,
2631 int width) {
2632 uint64_t src_rgb1;
2633 uint64_t ftmp[12];
2634 const uint64_t value = 0x4040;
2635 const uint64_t mask_u = 0x0015002a003f0002;
2636 const uint64_t mask_v = 0x0002003f0035000a;
2637
2638 __asm__ volatile(
2639 "1: \n\t"
2640 "daddu %[src_rgb1], %[src_rgb], %[src_stride_rgb] \n\t"
2641 "gsldrc1 %[src0], 0x00(%[src_rgb]) \n\t"
2642 "gsldlc1 %[src0], 0x07(%[src_rgb]) \n\t"
2643 "gsldrc1 %[src1], 0x00(%[src_rgb1]) \n\t"
2644 "gsldlc1 %[src1], 0x07(%[src_rgb1]) \n\t"
2645 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2646 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2647 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2648 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2649 "paddh %[src0], %[src_lo], %[src0] \n\t"
2650 "paddh %[src1], %[src_hi], %[src1] \n\t"
2651 "pavgh %[src0], %[src0], %[src1] \n\t"
2652 "dsll %[dest0_u], %[src0], %[sixteen] \n\t"
2653 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
2654 "pinsrh_3 %[dest0_v], %[src0], %[value] \n\t"
2655 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
2656 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
2657
2658 "gsldrc1 %[src0], 0x08(%[src_rgb]) \n\t"
2659 "gsldlc1 %[src0], 0x0f(%[src_rgb]) \n\t"
2660 "gsldrc1 %[src1], 0x08(%[src_rgb1]) \n\t"
2661 "gsldlc1 %[src1], 0x0f(%[src_rgb1]) \n\t"
2662 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2663 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2664 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2665 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2666 "paddh %[src0], %[src_lo], %[src0] \n\t"
2667 "paddh %[src1], %[src_hi], %[src1] \n\t"
2668 "pavgh %[src0], %[src0], %[src1] \n\t"
2669 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2670 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2671 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2672 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2673 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2674
2675 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
2676 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
2677 "psubw %[dest0_u], %[src0], %[src1] \n\t"
2678 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
2679 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
2680 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
2681 "psubw %[dest0_v], %[src1], %[src0] \n\t"
2682 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
2683
2684 "gsldrc1 %[src0], 0x10(%[src_rgb]) \n\t"
2685 "gsldlc1 %[src0], 0x17(%[src_rgb]) \n\t"
2686 "gsldrc1 %[src1], 0x10(%[src_rgb1]) \n\t"
2687 "gsldlc1 %[src1], 0x17(%[src_rgb1]) \n\t"
2688 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2689 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2690 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2691 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2692 "paddh %[src0], %[src_lo], %[src0] \n\t"
2693 "paddh %[src1], %[src_hi], %[src1] \n\t"
2694 "pavgh %[src0], %[src0], %[src1] \n\t"
2695 "dsll %[dest1_u], %[src0], %[sixteen] \n\t"
2696 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
2697 "pinsrh_3 %[dest1_v], %[src0], %[value] \n\t"
2698 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
2699 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
2700
2701 "gsldrc1 %[src0], 0x18(%[src_rgb]) \n\t"
2702 "gsldlc1 %[src0], 0x1f(%[src_rgb]) \n\t"
2703 "gsldrc1 %[src1], 0x18(%[src_rgb1]) \n\t"
2704 "gsldlc1 %[src1], 0x1f(%[src_rgb1]) \n\t"
2705 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2706 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2707 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2708 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2709 "paddh %[src0], %[src_lo], %[src0] \n\t"
2710 "paddh %[src1], %[src_hi], %[src1] \n\t"
2711 "pavgh %[src0], %[src0], %[src1] \n\t"
2712 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2713 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2714 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2715 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2716 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2717
2718 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
2719 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
2720 "psubw %[dest1_u], %[src0], %[src1] \n\t"
2721 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
2722 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
2723 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
2724 "psubw %[dest1_v], %[src1], %[src0] \n\t"
2725 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
2726
2727 "gsldrc1 %[src0], 0x20(%[src_rgb]) \n\t"
2728 "gsldlc1 %[src0], 0x27(%[src_rgb]) \n\t"
2729 "gsldrc1 %[src1], 0x20(%[src_rgb1]) \n\t"
2730 "gsldlc1 %[src1], 0x27(%[src_rgb1]) \n\t"
2731 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2732 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2733 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2734 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2735 "paddh %[src0], %[src_lo], %[src0] \n\t"
2736 "paddh %[src1], %[src_hi], %[src1] \n\t"
2737 "pavgh %[src0], %[src0], %[src1] \n\t"
2738 "dsll %[dest2_u], %[src0], %[sixteen] \n\t"
2739 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
2740 "pinsrh_3 %[dest2_v], %[src0], %[value] \n\t"
2741 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
2742 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
2743
2744 "gsldrc1 %[src0], 0x28(%[src_rgb]) \n\t"
2745 "gsldlc1 %[src0], 0x2f(%[src_rgb]) \n\t"
2746 "gsldrc1 %[src1], 0x28(%[src_rgb1]) \n\t"
2747 "gsldlc1 %[src1], 0x2f(%[src_rgb1]) \n\t"
2748 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2749 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2750 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2751 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2752 "paddh %[src0], %[src_lo], %[src0] \n\t"
2753 "paddh %[src1], %[src_hi], %[src1] \n\t"
2754 "pavgh %[src0], %[src0], %[src1] \n\t"
2755 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2756 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2757 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2758 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2759 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2760
2761 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
2762 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
2763 "psubw %[dest2_u], %[src0], %[src1] \n\t"
2764 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
2765 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
2766 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
2767 "psubw %[dest2_v], %[src1], %[src0] \n\t"
2768 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
2769
2770 "gsldrc1 %[src0], 0x30(%[src_rgb]) \n\t"
2771 "gsldlc1 %[src0], 0x37(%[src_rgb]) \n\t"
2772 "gsldrc1 %[src1], 0x30(%[src_rgb1]) \n\t"
2773 "gsldlc1 %[src1], 0x37(%[src_rgb1]) \n\t"
2774 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2775 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2776 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2777 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2778 "paddh %[src0], %[src_lo], %[src0] \n\t"
2779 "paddh %[src1], %[src_hi], %[src1] \n\t"
2780 "pavgh %[src0], %[src0], %[src1] \n\t"
2781 "dsll %[dest3_u], %[src0], %[sixteen] \n\t"
2782 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
2783 "pinsrh_3 %[dest3_v], %[src0], %[value] \n\t"
2784 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
2785 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
2786
2787 "gsldrc1 %[src0], 0x38(%[src_rgb]) \n\t"
2788 "gsldlc1 %[src0], 0x3f(%[src_rgb]) \n\t"
2789 "gsldrc1 %[src1], 0x38(%[src_rgb1]) \n\t"
2790 "gsldlc1 %[src1], 0x3f(%[src_rgb1]) \n\t"
2791 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
2792 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
2793 "punpcklbh %[src0], %[src1], %[zero] \n\t"
2794 "punpckhbh %[src1], %[src1], %[zero] \n\t"
2795 "paddh %[src0], %[src_lo], %[src0] \n\t"
2796 "paddh %[src1], %[src_hi], %[src1] \n\t"
2797 "pavgh %[src0], %[src0], %[src1] \n\t"
2798 "dsll %[src_lo], %[src0], %[sixteen] \n\t"
2799 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
2800 "pinsrh_3 %[src_hi], %[src0], %[value] \n\t"
2801 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
2802 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
2803
2804 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
2805 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
2806 "psubw %[dest3_u], %[src0], %[src1] \n\t"
2807 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
2808 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
2809 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
2810 "psubw %[dest3_v], %[src1], %[src0] \n\t"
2811 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
2812
2813 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
2814 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
2815 "packushb %[dest0_u], %[src0], %[src1] \n\t"
2816 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
2817 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
2818
2819 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
2820 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
2821 "packushb %[dest0_v], %[src0], %[src1] \n\t"
2822 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
2823 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
2824
2825 "daddiu %[src_rgb], %[src_rgb], 0x40 \n\t"
2826 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
2827 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
2828 "daddi %[width], %[width], -0x10 \n\t"
2829 "bgtz %[width], 1b \n\t"
2830 : [src_rgb1] "=&r"(src_rgb1), [src0] "=&f"(ftmp[0]),
2831 [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]), [src_hi] "=&f"(ftmp[3]),
2832 [dest0_u] "=&f"(ftmp[4]), [dest0_v] "=&f"(ftmp[5]),
2833 [dest1_u] "=&f"(ftmp[6]), [dest1_v] "=&f"(ftmp[7]),
2834 [dest2_u] "=&f"(ftmp[8]), [dest2_v] "=&f"(ftmp[9]),
2835 [dest3_u] "=&f"(ftmp[10]), [dest3_v] "=&f"(ftmp[11])
2836 : [src_rgb] "r"(src_rgb), [src_stride_rgb] "r"(src_stride_rgb),
2837 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
2838 [mask_u] "f"(mask_u), [mask_v] "f"(mask_v), [value] "f"(value),
2839 [zero] "f"(0x00), [eight] "f"(0x08),
2840 [sixteen] "f"(0x10)
2841 : "memory");
2842 }
2843
RGB565ToYRow_MMI(const uint8_t * src_rgb565,uint8_t * dst_y,int width)2844 void RGB565ToYRow_MMI(const uint8_t* src_rgb565, uint8_t* dst_y, int width) {
2845 uint64_t ftmp[11];
2846 const uint64_t value = 0x1080108010801080;
2847 const uint64_t mask = 0x0001004200810019;
2848 uint64_t c0 = 0x001f001f001f001f;
2849 uint64_t c1 = 0x00ff00ff00ff00ff;
2850 uint64_t c2 = 0x0007000700070007;
2851 __asm__ volatile(
2852 "1: \n\t"
2853 "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
2854 "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
2855 "psrlh %[src1], %[src0], %[eight] \n\t"
2856 "and %[b], %[src0], %[c0] \n\t"
2857 "and %[src0], %[src0], %[c1] \n\t"
2858 "psrlh %[src0], %[src0], %[five] \n\t"
2859 "and %[g], %[src1], %[c2] \n\t"
2860 "psllh %[g], %[g], %[three] \n\t"
2861 "or %[g], %[src0], %[g] \n\t"
2862 "psrlh %[r], %[src1], %[three] \n\t"
2863 "psllh %[src0], %[b], %[three] \n\t"
2864 "psrlh %[src1], %[b], %[two] \n\t"
2865 "or %[b], %[src0], %[src1] \n\t"
2866 "psllh %[src0], %[g], %[two] \n\t"
2867 "psrlh %[src1], %[g], %[four] \n\t"
2868 "or %[g], %[src0], %[src1] \n\t"
2869 "psllh %[src0], %[r], %[three] \n\t"
2870 "psrlh %[src1], %[r], %[two] \n\t"
2871 "or %[r], %[src0], %[src1] \n\t"
2872 "punpcklhw %[src0], %[b], %[r] \n\t"
2873 "punpcklhw %[src1], %[g], %[value] \n\t"
2874 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2875 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2876 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2877 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2878 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2879 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2880 "paddw %[dest0], %[src0], %[src1] \n\t"
2881 "psrlw %[dest0], %[dest0], %[eight] \n\t"
2882
2883 "punpckhhw %[src0], %[b], %[r] \n\t"
2884 "punpckhhw %[src1], %[g], %[value] \n\t"
2885 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2886 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2887 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2888 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2889 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2890 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2891 "paddw %[dest1], %[src0], %[src1] \n\t"
2892 "psrlw %[dest1], %[dest1], %[eight] \n\t"
2893
2894 "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
2895 "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
2896 "psrlh %[src1], %[src0], %[eight] \n\t"
2897 "and %[b], %[src0], %[c0] \n\t"
2898 "and %[src0], %[src0], %[c1] \n\t"
2899 "psrlh %[src0], %[src0], %[five] \n\t"
2900 "and %[g], %[src1], %[c2] \n\t"
2901 "psllh %[g], %[g], %[three] \n\t"
2902 "or %[g], %[src0], %[g] \n\t"
2903 "psrlh %[r], %[src1], %[three] \n\t"
2904 "psllh %[src0], %[b], %[three] \n\t"
2905 "psrlh %[src1], %[b], %[two] \n\t"
2906 "or %[b], %[src0], %[src1] \n\t"
2907 "psllh %[src0], %[g], %[two] \n\t"
2908 "psrlh %[src1], %[g], %[four] \n\t"
2909 "or %[g], %[src0], %[src1] \n\t"
2910 "psllh %[src0], %[r], %[three] \n\t"
2911 "psrlh %[src1], %[r], %[two] \n\t"
2912 "or %[r], %[src0], %[src1] \n\t"
2913 "punpcklhw %[src0], %[b], %[r] \n\t"
2914 "punpcklhw %[src1], %[g], %[value] \n\t"
2915 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2916 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2917 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2918 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2919 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2920 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2921 "paddw %[dest2], %[src0], %[src1] \n\t"
2922 "psrlw %[dest2], %[dest2], %[eight] \n\t"
2923
2924 "punpckhhw %[src0], %[b], %[r] \n\t"
2925 "punpckhhw %[src1], %[g], %[value] \n\t"
2926 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2927 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2928 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2929 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2930 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2931 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2932 "paddw %[dest3], %[src0], %[src1] \n\t"
2933 "psrlw %[dest3], %[dest3], %[eight] \n\t"
2934
2935 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
2936 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
2937 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
2938 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
2939 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
2940
2941 "daddiu %[src_rgb565], %[src_rgb565], 0x10 \n\t"
2942 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
2943 "daddiu %[width], %[width], -0x08 \n\t"
2944 "bgtz %[width], 1b \n\t"
2945 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
2946 [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
2947 [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
2948 [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
2949 : [src_rgb565] "r"(src_rgb565), [dst_y] "r"(dst_y), [value] "f"(value),
2950 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
2951 [mask] "f"(mask), [eight] "f"(0x08), [five] "f"(0x05),
2952 [three] "f"(0x03), [two] "f"(0x02), [four] "f"(0x04)
2953 : "memory");
2954 }
2955
ARGB1555ToYRow_MMI(const uint8_t * src_argb1555,uint8_t * dst_y,int width)2956 void ARGB1555ToYRow_MMI(const uint8_t* src_argb1555,
2957 uint8_t* dst_y,
2958 int width) {
2959 uint64_t ftmp[11];
2960 const uint64_t value = 0x1080108010801080;
2961 const uint64_t mask = 0x0001004200810019;
2962 uint64_t c0 = 0x001f001f001f001f;
2963 uint64_t c1 = 0x00ff00ff00ff00ff;
2964 uint64_t c2 = 0x0003000300030003;
2965 uint64_t c3 = 0x007c007c007c007c;
2966 __asm__ volatile(
2967 "1: \n\t"
2968 "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
2969 "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
2970 "psrlh %[src1], %[src0], %[eight] \n\t"
2971 "and %[b], %[src0], %[c0] \n\t"
2972 "and %[src0], %[src0], %[c1] \n\t"
2973 "psrlh %[src0], %[src0], %[five] \n\t"
2974 "and %[g], %[src1], %[c2] \n\t"
2975 "psllh %[g], %[g], %[three] \n\t"
2976 "or %[g], %[src0], %[g] \n\t"
2977 "and %[r], %[src1], %[c3] \n\t"
2978 "psrlh %[r], %[r], %[two] \n\t"
2979 "psllh %[src0], %[b], %[three] \n\t"
2980 "psrlh %[src1], %[b], %[two] \n\t"
2981 "or %[b], %[src0], %[src1] \n\t"
2982 "psllh %[src0], %[g], %[three] \n\t"
2983 "psrlh %[src1], %[g], %[two] \n\t"
2984 "or %[g], %[src0], %[src1] \n\t"
2985 "psllh %[src0], %[r], %[three] \n\t"
2986 "psrlh %[src1], %[r], %[two] \n\t"
2987 "or %[r], %[src0], %[src1] \n\t"
2988 "punpcklhw %[src0], %[b], %[r] \n\t"
2989 "punpcklhw %[src1], %[g], %[value] \n\t"
2990 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
2991 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
2992 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
2993 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
2994 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
2995 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
2996 "paddw %[dest0], %[src0], %[src1] \n\t"
2997 "psrlw %[dest0], %[dest0], %[eight] \n\t"
2998
2999 "punpckhhw %[src0], %[b], %[r] \n\t"
3000 "punpckhhw %[src1], %[g], %[value] \n\t"
3001 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3002 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3003 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3004 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3005 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3006 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3007 "paddw %[dest1], %[src0], %[src1] \n\t"
3008 "psrlw %[dest1], %[dest1], %[eight] \n\t"
3009
3010 "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
3011 "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
3012 "psrlh %[src1], %[src0], %[eight] \n\t"
3013 "and %[b], %[src0], %[c0] \n\t"
3014 "and %[src0], %[src0], %[c1] \n\t"
3015 "psrlh %[src0], %[src0], %[five] \n\t"
3016 "and %[g], %[src1], %[c2] \n\t"
3017 "psllh %[g], %[g], %[three] \n\t"
3018 "or %[g], %[src0], %[g] \n\t"
3019 "and %[r], %[src1], %[c3] \n\t"
3020 "psrlh %[r], %[r], %[two] \n\t"
3021 "psllh %[src0], %[b], %[three] \n\t"
3022 "psrlh %[src1], %[b], %[two] \n\t"
3023 "or %[b], %[src0], %[src1] \n\t"
3024 "psllh %[src0], %[g], %[three] \n\t"
3025 "psrlh %[src1], %[g], %[two] \n\t"
3026 "or %[g], %[src0], %[src1] \n\t"
3027 "psllh %[src0], %[r], %[three] \n\t"
3028 "psrlh %[src1], %[r], %[two] \n\t"
3029 "or %[r], %[src0], %[src1] \n\t"
3030 "punpcklhw %[src0], %[b], %[r] \n\t"
3031 "punpcklhw %[src1], %[g], %[value] \n\t"
3032 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3033 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3034 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3035 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3036 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3037 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3038 "paddw %[dest2], %[src0], %[src1] \n\t"
3039 "psrlw %[dest2], %[dest2], %[eight] \n\t"
3040
3041 "punpckhhw %[src0], %[b], %[r] \n\t"
3042 "punpckhhw %[src1], %[g], %[value] \n\t"
3043 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3044 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3045 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3046 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3047 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3048 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3049 "paddw %[dest3], %[src0], %[src1] \n\t"
3050 "psrlw %[dest3], %[dest3], %[eight] \n\t"
3051
3052 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
3053 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
3054 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
3055 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
3056 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
3057
3058 "daddiu %[src_argb1555], %[src_argb1555], 0x10 \n\t"
3059 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
3060 "daddiu %[width], %[width], -0x08 \n\t"
3061 "bgtz %[width], 1b \n\t"
3062 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
3063 [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
3064 [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
3065 [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
3066 : [src_argb1555] "r"(src_argb1555), [dst_y] "r"(dst_y),
3067 [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
3068 [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [eight] "f"(0x08),
3069 [five] "f"(0x05), [three] "f"(0x03), [two] "f"(0x02), [seven] "f"(0x07)
3070 : "memory");
3071 }
3072
ARGB4444ToYRow_MMI(const uint8_t * src_argb4444,uint8_t * dst_y,int width)3073 void ARGB4444ToYRow_MMI(const uint8_t* src_argb4444,
3074 uint8_t* dst_y,
3075 int width) {
3076 uint64_t ftmp[11];
3077 uint64_t value = 0x1080108010801080;
3078 uint64_t mask = 0x0001004200810019;
3079 uint64_t c0 = 0x000f000f000f000f;
3080 uint64_t c1 = 0x00ff00ff00ff00ff;
3081 __asm__ volatile(
3082 "1: \n\t"
3083 "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
3084 "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
3085 "psrlh %[src1], %[src0], %[eight] \n\t"
3086 "and %[b], %[src0], %[c0] \n\t"
3087 "and %[src0], %[src0], %[c1] \n\t"
3088 "psrlh %[g], %[src0], %[four] \n\t"
3089 "and %[r], %[src1], %[c0] \n\t"
3090 "psllh %[src0], %[b], %[four] \n\t"
3091 "or %[b], %[src0], %[b] \n\t"
3092 "psllh %[src0], %[g], %[four] \n\t"
3093 "or %[g], %[src0], %[g] \n\t"
3094 "psllh %[src0], %[r], %[four] \n\t"
3095 "or %[r], %[src0], %[r] \n\t"
3096 "punpcklhw %[src0], %[b], %[r] \n\t"
3097 "punpcklhw %[src1], %[g], %[value] \n\t"
3098 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3099 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3100 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3101 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3102 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3103 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3104 "paddw %[dest0], %[src0], %[src1] \n\t"
3105 "psrlw %[dest0], %[dest0], %[eight] \n\t"
3106
3107 "punpckhhw %[src0], %[b], %[r] \n\t"
3108 "punpckhhw %[src1], %[g], %[value] \n\t"
3109 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3110 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3111 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3112 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3113 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3114 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3115 "paddw %[dest1], %[src0], %[src1] \n\t"
3116 "psrlw %[dest1], %[dest1], %[eight] \n\t"
3117
3118 "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
3119 "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
3120 "psrlh %[src1], %[src0], %[eight] \n\t"
3121 "and %[b], %[src0], %[c0] \n\t"
3122 "and %[src0], %[src0], %[c1] \n\t"
3123 "psrlh %[g], %[src0], %[four] \n\t"
3124 "and %[r], %[src1], %[c0] \n\t"
3125 "psllh %[src0], %[b], %[four] \n\t"
3126 "or %[b], %[src0], %[b] \n\t"
3127 "psllh %[src0], %[g], %[four] \n\t"
3128 "or %[g], %[src0], %[g] \n\t"
3129 "psllh %[src0], %[r], %[four] \n\t"
3130 "or %[r], %[src0], %[r] \n\t"
3131 "punpcklhw %[src0], %[b], %[r] \n\t"
3132 "punpcklhw %[src1], %[g], %[value] \n\t"
3133 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3134 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3135 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3136 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3137 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3138 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3139 "paddw %[dest2], %[src0], %[src1] \n\t"
3140 "psrlw %[dest2], %[dest2], %[eight] \n\t"
3141
3142 "punpckhhw %[src0], %[b], %[r] \n\t"
3143 "punpckhhw %[src1], %[g], %[value] \n\t"
3144 "punpcklhw %[src_lo], %[src0], %[src1] \n\t"
3145 "punpckhhw %[src_hi], %[src0], %[src1] \n\t"
3146 "pmaddhw %[src_lo], %[src_lo], %[mask] \n\t"
3147 "pmaddhw %[src_hi], %[src_hi], %[mask] \n\t"
3148 "punpcklwd %[src0], %[src_lo], %[src_hi] \n\t"
3149 "punpckhwd %[src1], %[src_lo], %[src_hi] \n\t"
3150 "paddw %[dest3], %[src0], %[src1] \n\t"
3151 "psrlw %[dest3], %[dest3], %[eight] \n\t"
3152
3153 "packsswh %[src_lo], %[dest0], %[dest1] \n\t"
3154 "packsswh %[src_hi], %[dest2], %[dest3] \n\t"
3155 "packushb %[dest0], %[src_lo], %[src_hi] \n\t"
3156 "gssdlc1 %[dest0], 0x07(%[dst_y]) \n\t"
3157 "gssdrc1 %[dest0], 0x00(%[dst_y]) \n\t"
3158
3159 "daddiu %[src_argb4444], %[src_argb4444], 0x10 \n\t"
3160 "daddiu %[dst_y], %[dst_y], 0x08 \n\t"
3161 "daddiu %[width], %[width], -0x08 \n\t"
3162 "bgtz %[width], 1b \n\t"
3163 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
3164 [src_hi] "=&f"(ftmp[3]), [b] "=&f"(ftmp[4]), [g] "=&f"(ftmp[5]),
3165 [r] "=&f"(ftmp[6]), [dest0] "=&f"(ftmp[7]), [dest1] "=&f"(ftmp[8]),
3166 [dest2] "=&f"(ftmp[9]), [dest3] "=&f"(ftmp[10])
3167 : [src_argb4444] "r"(src_argb4444), [dst_y] "r"(dst_y),
3168 [width] "r"(width), [value] "f"(value), [mask] "f"(mask), [c0] "f"(c0),
3169 [c1] "f"(c1), [eight] "f"(0x08), [four] "f"(0x04)
3170 : "memory");
3171 }
3172
RGB565ToUVRow_MMI(const uint8_t * src_rgb565,int src_stride_rgb565,uint8_t * dst_u,uint8_t * dst_v,int width)3173 void RGB565ToUVRow_MMI(const uint8_t* src_rgb565,
3174 int src_stride_rgb565,
3175 uint8_t* dst_u,
3176 uint8_t* dst_v,
3177 int width) {
3178 uint64_t ftmp[13];
3179 uint64_t value = 0x2020202020202020;
3180 uint64_t mask_u = 0x0026004a00700002;
3181 uint64_t mask_v = 0x00020070005e0012;
3182 uint64_t mask = 0x93;
3183 uint64_t c0 = 0x001f001f001f001f;
3184 uint64_t c1 = 0x00ff00ff00ff00ff;
3185 uint64_t c2 = 0x0007000700070007;
3186 __asm__ volatile(
3187 "daddu %[next_rgb565], %[src_rgb565], %[next_rgb565] \n\t"
3188 "1: \n\t"
3189 "gsldrc1 %[src0], 0x00(%[src_rgb565]) \n\t"
3190 "gsldlc1 %[src0], 0x07(%[src_rgb565]) \n\t"
3191 "gsldrc1 %[src1], 0x00(%[next_rgb565]) \n\t"
3192 "gsldlc1 %[src1], 0x07(%[next_rgb565]) \n\t"
3193 "psrlh %[dest0_u], %[src0], %[eight] \n\t"
3194 "and %[b0], %[src0], %[c0] \n\t"
3195 "and %[src0], %[src0], %[c1] \n\t"
3196 "psrlh %[src0], %[src0], %[five] \n\t"
3197 "and %[g0], %[dest0_u], %[c2] \n\t"
3198 "psllh %[g0], %[g0], %[three] \n\t"
3199 "or %[g0], %[src0], %[g0] \n\t"
3200 "psrlh %[r0], %[dest0_u], %[three] \n\t"
3201 "psrlh %[src0], %[src1], %[eight] \n\t"
3202 "and %[dest0_u], %[src1], %[c0] \n\t"
3203 "and %[src1], %[src1], %[c1] \n\t"
3204 "psrlh %[src1], %[src1], %[five] \n\t"
3205 "and %[dest0_v], %[src0], %[c2] \n\t"
3206 "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
3207 "or %[dest0_v], %[src1], %[dest0_v] \n\t"
3208 "psrlh %[src0], %[src0], %[three] \n\t"
3209 "paddh %[b0], %[b0], %[dest0_u] \n\t"
3210 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3211 "paddh %[r0], %[r0], %[src0] \n\t"
3212 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3213 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3214 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3215 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3216 "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
3217 "psrlh %[b0], %[src0], %[six] \n\t"
3218 "psllh %[r0], %[src0], %[one] \n\t"
3219 "or %[b0], %[b0], %[r0] \n\t"
3220 "punpcklhw %[src0], %[g0], %[value] \n\t"
3221 "punpckhhw %[src1], %[g0], %[value] \n\t"
3222 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3223 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3224 "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
3225 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3226 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3227
3228 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3229 "pshufh %[dest0_u], %[src0], %[mask] \n\t"
3230 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
3231 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3232 "pshufh %[b0], %[src1], %[mask] \n\t"
3233 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3234
3235 "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
3236 "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
3237 "psubw %[dest0_u], %[src0], %[src1] \n\t"
3238 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
3239 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3240 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3241 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3242 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3243
3244 "gsldrc1 %[src0], 0x08(%[src_rgb565]) \n\t"
3245 "gsldlc1 %[src0], 0x0f(%[src_rgb565]) \n\t"
3246 "gsldrc1 %[src1], 0x08(%[next_rgb565]) \n\t"
3247 "gsldlc1 %[src1], 0x0f(%[next_rgb565]) \n\t"
3248 "psrlh %[dest1_u], %[src0], %[eight] \n\t"
3249 "and %[b0], %[src0], %[c0] \n\t"
3250 "and %[src0], %[src0], %[c1] \n\t"
3251 "psrlh %[src0], %[src0], %[five] \n\t"
3252 "and %[g0], %[dest1_u], %[c2] \n\t"
3253 "psllh %[g0], %[g0], %[three] \n\t"
3254 "or %[g0], %[src0], %[g0] \n\t"
3255 "psrlh %[r0], %[dest1_u], %[three] \n\t"
3256 "psrlh %[src0], %[src1], %[eight] \n\t"
3257 "and %[dest1_u], %[src1], %[c0] \n\t"
3258 "and %[src1], %[src1], %[c1] \n\t"
3259 "psrlh %[src1], %[src1], %[five] \n\t"
3260 "and %[dest1_v], %[src0], %[c2] \n\t"
3261 "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
3262 "or %[dest1_v], %[src1], %[dest1_v] \n\t"
3263 "psrlh %[src0], %[src0], %[three] \n\t"
3264 "paddh %[b0], %[b0], %[dest1_u] \n\t"
3265 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3266 "paddh %[r0], %[r0], %[src0] \n\t"
3267 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3268 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3269 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3270 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3271 "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
3272 "psrlh %[b0], %[src0], %[six] \n\t"
3273 "psllh %[r0], %[src0], %[one] \n\t"
3274 "or %[b0], %[b0], %[r0] \n\t"
3275 "punpcklhw %[src0], %[g0], %[value] \n\t"
3276 "punpckhhw %[src1], %[g0], %[value] \n\t"
3277 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3278 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3279 "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
3280 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3281 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3282
3283 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3284 "pshufh %[dest1_u], %[src0], %[mask] \n\t"
3285 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
3286 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3287 "pshufh %[b0], %[src1], %[mask] \n\t"
3288 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3289
3290 "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
3291 "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
3292 "psubw %[dest1_u], %[src0], %[src1] \n\t"
3293 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
3294 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3295 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3296 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3297 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3298
3299 "gsldrc1 %[src0], 0x10(%[src_rgb565]) \n\t"
3300 "gsldlc1 %[src0], 0x17(%[src_rgb565]) \n\t"
3301 "gsldrc1 %[src1], 0x10(%[next_rgb565]) \n\t"
3302 "gsldlc1 %[src1], 0x17(%[next_rgb565]) \n\t"
3303 "psrlh %[dest2_u], %[src0], %[eight] \n\t"
3304 "and %[b0], %[src0], %[c0] \n\t"
3305 "and %[src0], %[src0], %[c1] \n\t"
3306 "psrlh %[src0], %[src0], %[five] \n\t"
3307 "and %[g0], %[dest2_u], %[c2] \n\t"
3308 "psllh %[g0], %[g0], %[three] \n\t"
3309 "or %[g0], %[src0], %[g0] \n\t"
3310 "psrlh %[r0], %[dest2_u], %[three] \n\t"
3311 "psrlh %[src0], %[src1], %[eight] \n\t"
3312 "and %[dest2_u], %[src1], %[c0] \n\t"
3313 "and %[src1], %[src1], %[c1] \n\t"
3314 "psrlh %[src1], %[src1], %[five] \n\t"
3315 "and %[dest2_v], %[src0], %[c2] \n\t"
3316 "psllh %[dest2_v], %[dest2_v], %[three] \n\t"
3317 "or %[dest2_v], %[src1], %[dest2_v] \n\t"
3318 "psrlh %[src0], %[src0], %[three] \n\t"
3319 "paddh %[b0], %[b0], %[dest2_u] \n\t"
3320 "paddh %[g0], %[g0], %[dest2_v] \n\t"
3321 "paddh %[r0], %[r0], %[src0] \n\t"
3322 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3323 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3324 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3325 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3326 "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
3327 "psrlh %[b0], %[src0], %[six] \n\t"
3328 "psllh %[r0], %[src0], %[one] \n\t"
3329 "or %[b0], %[b0], %[r0] \n\t"
3330 "punpcklhw %[src0], %[g0], %[value] \n\t"
3331 "punpckhhw %[src1], %[g0], %[value] \n\t"
3332 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3333 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3334 "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
3335 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3336 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3337
3338 "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
3339 "pshufh %[dest2_u], %[src0], %[mask] \n\t"
3340 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
3341 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3342 "pshufh %[b0], %[src1], %[mask] \n\t"
3343 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3344
3345 "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
3346 "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
3347 "psubw %[dest2_u], %[src0], %[src1] \n\t"
3348 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
3349 "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
3350 "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
3351 "psubw %[dest2_v], %[src1], %[src0] \n\t"
3352 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
3353
3354 "gsldrc1 %[src0], 0x18(%[src_rgb565]) \n\t"
3355 "gsldlc1 %[src0], 0x1f(%[src_rgb565]) \n\t"
3356 "gsldrc1 %[src1], 0x18(%[next_rgb565]) \n\t"
3357 "gsldlc1 %[src1], 0x1f(%[next_rgb565]) \n\t"
3358 "psrlh %[dest3_u], %[src0], %[eight] \n\t"
3359 "and %[b0], %[src0], %[c0] \n\t"
3360 "and %[src0], %[src0], %[c1] \n\t"
3361 "psrlh %[src0], %[src0], %[five] \n\t"
3362 "and %[g0], %[dest3_u], %[c2] \n\t"
3363 "psllh %[g0], %[g0], %[three] \n\t"
3364 "or %[g0], %[src0], %[g0] \n\t"
3365 "psrlh %[r0], %[dest3_u], %[three] \n\t"
3366 "psrlh %[src0], %[src1], %[eight] \n\t"
3367 "and %[dest3_u], %[src1], %[c0] \n\t"
3368 "and %[src1], %[src1], %[c1] \n\t"
3369 "psrlh %[src1], %[src1], %[five] \n\t"
3370 "and %[dest3_v], %[src0], %[c2] \n\t"
3371 "psllh %[dest3_v], %[dest3_v], %[three] \n\t"
3372 "or %[dest3_v], %[src1], %[dest3_v] \n\t"
3373 "psrlh %[src0], %[src0], %[three] \n\t"
3374 "paddh %[b0], %[b0], %[dest3_u] \n\t"
3375 "paddh %[g0], %[g0], %[dest3_v] \n\t"
3376 "paddh %[r0], %[r0], %[src0] \n\t"
3377 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3378 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3379 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3380 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3381 "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
3382 "psrlh %[b0], %[src0], %[six] \n\t"
3383 "psllh %[r0], %[src0], %[one] \n\t"
3384 "or %[b0], %[b0], %[r0] \n\t"
3385 "punpcklhw %[src0], %[g0], %[value] \n\t"
3386 "punpckhhw %[src1], %[g0], %[value] \n\t"
3387 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3388 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3389 "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
3390 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3391 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3392
3393 "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
3394 "pshufh %[dest3_u], %[src0], %[mask] \n\t"
3395 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
3396 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3397 "pshufh %[b0], %[src1], %[mask] \n\t"
3398 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3399
3400 "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
3401 "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
3402 "psubw %[dest3_u], %[src0], %[src1] \n\t"
3403 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
3404 "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
3405 "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
3406 "psubw %[dest3_v], %[src1], %[src0] \n\t"
3407 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
3408
3409 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
3410 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
3411 "packushb %[dest0_u], %[src0], %[src1] \n\t"
3412 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
3413 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
3414 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
3415 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
3416 "packushb %[dest0_v], %[src0], %[src1] \n\t"
3417 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
3418 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
3419
3420 "daddiu %[src_rgb565], %[src_rgb565], 0x20 \n\t"
3421 "daddiu %[next_rgb565], %[next_rgb565], 0x20 \n\t"
3422 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
3423 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
3424 "daddiu %[width], %[width], -0x10 \n\t"
3425 "bgtz %[width], 1b \n\t"
3426 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
3427 [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
3428 [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
3429 [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
3430 [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
3431 [dest3_v] "=&f"(ftmp[12])
3432 : [src_rgb565] "r"(src_rgb565), [next_rgb565] "r"(src_stride_rgb565),
3433 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
3434 [value] "f"(value), [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2),
3435 [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
3436 [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
3437 [one] "f"(0x01)
3438 : "memory");
3439 }
3440
ARGB1555ToUVRow_MMI(const uint8_t * src_argb1555,int src_stride_argb1555,uint8_t * dst_u,uint8_t * dst_v,int width)3441 void ARGB1555ToUVRow_MMI(const uint8_t* src_argb1555,
3442 int src_stride_argb1555,
3443 uint8_t* dst_u,
3444 uint8_t* dst_v,
3445 int width) {
3446 uint64_t ftmp[11];
3447 uint64_t value = 0x2020202020202020;
3448 uint64_t mask_u = 0x0026004a00700002;
3449 uint64_t mask_v = 0x00020070005e0012;
3450 uint64_t mask = 0x93;
3451 uint64_t c0 = 0x001f001f001f001f;
3452 uint64_t c1 = 0x00ff00ff00ff00ff;
3453 uint64_t c2 = 0x0003000300030003;
3454 uint64_t c3 = 0x007c007c007c007c;
3455 __asm__ volatile(
3456 "daddu %[next_argb1555], %[src_argb1555], %[next_argb1555] \n\t"
3457 "1: \n\t"
3458 "gsldrc1 %[src0], 0x00(%[src_argb1555]) \n\t"
3459 "gsldlc1 %[src0], 0x07(%[src_argb1555]) \n\t"
3460 "gsldrc1 %[src1], 0x00(%[next_argb1555]) \n\t"
3461 "gsldlc1 %[src1], 0x07(%[next_argb1555]) \n\t"
3462 "psrlh %[dest0_u], %[src0], %[eight] \n\t"
3463 "and %[b0], %[src0], %[c0] \n\t"
3464 "and %[src0], %[src0], %[c1] \n\t"
3465 "psrlh %[src0], %[src0], %[five] \n\t"
3466 "and %[g0], %[dest0_u], %[c2] \n\t"
3467 "psllh %[g0], %[g0], %[three] \n\t"
3468 "or %[g0], %[src0], %[g0] \n\t"
3469 "and %[r0], %[dest0_u], %[c3] \n\t"
3470 "psrlh %[r0], %[r0], %[two] \n\t"
3471 "psrlh %[src0], %[src1], %[eight] \n\t"
3472 "and %[dest0_u], %[src1], %[c0] \n\t"
3473 "and %[src1], %[src1], %[c1] \n\t"
3474 "psrlh %[src1], %[src1], %[five] \n\t"
3475 "and %[dest0_v], %[src0], %[c2] \n\t"
3476 "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
3477 "or %[dest0_v], %[src1], %[dest0_v] \n\t"
3478 "and %[src0], %[src0], %[c3] \n\t"
3479 "psrlh %[src0], %[src0], %[two] \n\t"
3480 "paddh %[b0], %[b0], %[dest0_u] \n\t"
3481 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3482 "paddh %[r0], %[r0], %[src0] \n\t"
3483 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3484 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3485 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3486 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3487 "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
3488 "psrlh %[b0], %[src0], %[six] \n\t"
3489 "psllh %[r0], %[src0], %[one] \n\t"
3490 "or %[b0], %[b0], %[r0] \n\t"
3491 "psrlh %[r0], %[g0], %[six] \n\t"
3492 "psllh %[g0], %[g0], %[one] \n\t"
3493 "or %[g0], %[g0], %[r0] \n\t"
3494 "punpcklhw %[src0], %[g0], %[value] \n\t"
3495 "punpckhhw %[src1], %[g0], %[value] \n\t"
3496 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3497 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3498 "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
3499 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3500 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3501
3502 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3503 "pshufh %[dest0_u], %[src0], %[mask] \n\t"
3504 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
3505 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3506 "pshufh %[b0], %[src1], %[mask] \n\t"
3507 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3508
3509 "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
3510 "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
3511 "psubw %[dest0_u], %[src0], %[src1] \n\t"
3512 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
3513 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3514 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3515 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3516 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3517
3518 "gsldrc1 %[src0], 0x08(%[src_argb1555]) \n\t"
3519 "gsldlc1 %[src0], 0x0f(%[src_argb1555]) \n\t"
3520 "gsldrc1 %[src1], 0x08(%[next_argb1555]) \n\t"
3521 "gsldlc1 %[src1], 0x0f(%[next_argb1555]) \n\t"
3522 "psrlh %[dest1_u], %[src0], %[eight] \n\t"
3523 "and %[b0], %[src0], %[c0] \n\t"
3524 "and %[src0], %[src0], %[c1] \n\t"
3525 "psrlh %[src0], %[src0], %[five] \n\t"
3526 "and %[g0], %[dest1_u], %[c2] \n\t"
3527 "psllh %[g0], %[g0], %[three] \n\t"
3528 "or %[g0], %[src0], %[g0] \n\t"
3529 "and %[r0], %[dest1_u], %[c3] \n\t"
3530 "psrlh %[r0], %[r0], %[two] \n\t"
3531 "psrlh %[src0], %[src1], %[eight] \n\t"
3532 "and %[dest1_u], %[src1], %[c0] \n\t"
3533 "and %[src1], %[src1], %[c1] \n\t"
3534 "psrlh %[src1], %[src1], %[five] \n\t"
3535 "and %[dest1_v], %[src0], %[c2] \n\t"
3536 "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
3537 "or %[dest1_v], %[src1], %[dest1_v] \n\t"
3538 "and %[src0], %[src0], %[c3] \n\t"
3539 "psrlh %[src0], %[src0], %[two] \n\t"
3540 "paddh %[b0], %[b0], %[dest1_u] \n\t"
3541 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3542 "paddh %[r0], %[r0], %[src0] \n\t"
3543 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3544 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3545 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3546 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3547 "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
3548 "psrlh %[b0], %[src0], %[six] \n\t"
3549 "psllh %[r0], %[src0], %[one] \n\t"
3550 "or %[b0], %[b0], %[r0] \n\t"
3551 "psrlh %[r0], %[g0], %[six] \n\t"
3552 "psllh %[g0], %[g0], %[one] \n\t"
3553 "or %[g0], %[g0], %[r0] \n\t"
3554 "punpcklhw %[src0], %[g0], %[value] \n\t"
3555 "punpckhhw %[src1], %[g0], %[value] \n\t"
3556 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3557 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3558 "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
3559 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3560 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3561
3562 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3563 "pshufh %[dest1_u], %[src0], %[mask] \n\t"
3564 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
3565 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3566 "pshufh %[b0], %[src1], %[mask] \n\t"
3567 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3568
3569 "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
3570 "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
3571 "psubw %[dest1_u], %[src0], %[src1] \n\t"
3572 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
3573 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3574 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3575 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3576 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3577
3578 "packsswh %[dest0_u], %[dest0_u], %[dest1_u] \n\t"
3579 "packsswh %[dest1_u], %[dest0_v], %[dest1_v] \n\t"
3580
3581 "gsldrc1 %[src0], 0x10(%[src_argb1555]) \n\t"
3582 "gsldlc1 %[src0], 0x17(%[src_argb1555]) \n\t"
3583 "gsldrc1 %[src1], 0x10(%[next_argb1555]) \n\t"
3584 "gsldlc1 %[src1], 0x17(%[next_argb1555]) \n\t"
3585 "psrlh %[dest2_u], %[src0], %[eight] \n\t"
3586 "and %[b0], %[src0], %[c0] \n\t"
3587 "and %[src0], %[src0], %[c1] \n\t"
3588 "psrlh %[src0], %[src0], %[five] \n\t"
3589 "and %[g0], %[dest2_u], %[c2] \n\t"
3590 "psllh %[g0], %[g0], %[three] \n\t"
3591 "or %[g0], %[src0], %[g0] \n\t"
3592 "and %[r0], %[dest2_u], %[c3] \n\t"
3593 "psrlh %[r0], %[r0], %[two] \n\t"
3594 "psrlh %[src0], %[src1], %[eight] \n\t"
3595 "and %[dest2_u], %[src1], %[c0] \n\t"
3596 "and %[src1], %[src1], %[c1] \n\t"
3597 "psrlh %[src1], %[src1], %[five] \n\t"
3598 "and %[dest0_v], %[src0], %[c2] \n\t"
3599 "psllh %[dest0_v], %[dest0_v], %[three] \n\t"
3600 "or %[dest0_v], %[src1], %[dest0_v] \n\t"
3601 "and %[src0], %[src0], %[c3] \n\t"
3602 "psrlh %[src0], %[src0], %[two] \n\t"
3603 "paddh %[b0], %[b0], %[dest2_u] \n\t"
3604 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3605 "paddh %[r0], %[r0], %[src0] \n\t"
3606 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3607 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3608 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3609 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3610 "paddh %[src0], %[dest2_u], %[dest0_v] \n\t"
3611 "psrlh %[b0], %[src0], %[six] \n\t"
3612 "psllh %[r0], %[src0], %[one] \n\t"
3613 "or %[b0], %[b0], %[r0] \n\t"
3614 "psrlh %[r0], %[g0], %[six] \n\t"
3615 "psllh %[g0], %[g0], %[one] \n\t"
3616 "or %[g0], %[g0], %[r0] \n\t"
3617 "punpcklhw %[src0], %[g0], %[value] \n\t"
3618 "punpckhhw %[src1], %[g0], %[value] \n\t"
3619 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3620 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3621 "paddh %[g0], %[dest2_u], %[dest0_v] \n\t"
3622 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3623 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3624
3625 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3626 "pshufh %[dest2_u], %[src0], %[mask] \n\t"
3627 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
3628 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3629 "pshufh %[b0], %[src1], %[mask] \n\t"
3630 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3631
3632 "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
3633 "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
3634 "psubw %[dest2_u], %[src0], %[src1] \n\t"
3635 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
3636 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3637 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3638 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3639 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3640
3641 "gsldrc1 %[src0], 0x18(%[src_argb1555]) \n\t"
3642 "gsldlc1 %[src0], 0x1f(%[src_argb1555]) \n\t"
3643 "gsldrc1 %[src1], 0x18(%[next_argb1555]) \n\t"
3644 "gsldlc1 %[src1], 0x1f(%[next_argb1555]) \n\t"
3645 "psrlh %[dest3_u], %[src0], %[eight] \n\t"
3646 "and %[b0], %[src0], %[c0] \n\t"
3647 "and %[src0], %[src0], %[c1] \n\t"
3648 "psrlh %[src0], %[src0], %[five] \n\t"
3649 "and %[g0], %[dest3_u], %[c2] \n\t"
3650 "psllh %[g0], %[g0], %[three] \n\t"
3651 "or %[g0], %[src0], %[g0] \n\t"
3652 "and %[r0], %[dest3_u], %[c3] \n\t"
3653 "psrlh %[r0], %[r0], %[two] \n\t"
3654 "psrlh %[src0], %[src1], %[eight] \n\t"
3655 "and %[dest3_u], %[src1], %[c0] \n\t"
3656 "and %[src1], %[src1], %[c1] \n\t"
3657 "psrlh %[src1], %[src1], %[five] \n\t"
3658 "and %[dest1_v], %[src0], %[c2] \n\t"
3659 "psllh %[dest1_v], %[dest1_v], %[three] \n\t"
3660 "or %[dest1_v], %[src1], %[dest1_v] \n\t"
3661 "and %[src0], %[src0], %[c3] \n\t"
3662 "psrlh %[src0], %[src0], %[two] \n\t"
3663 "paddh %[b0], %[b0], %[dest3_u] \n\t"
3664 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3665 "paddh %[r0], %[r0], %[src0] \n\t"
3666 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3667 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3668 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3669 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3670 "paddh %[src0], %[dest3_u], %[dest1_v] \n\t"
3671 "psrlh %[b0], %[src0], %[six] \n\t"
3672 "psllh %[r0], %[src0], %[one] \n\t"
3673 "or %[b0], %[b0], %[r0] \n\t"
3674 "psrlh %[r0], %[g0], %[six] \n\t"
3675 "psllh %[g0], %[g0], %[one] \n\t"
3676 "or %[g0], %[g0], %[r0] \n\t"
3677 "punpcklhw %[src0], %[g0], %[value] \n\t"
3678 "punpckhhw %[src1], %[g0], %[value] \n\t"
3679 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3680 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3681 "paddh %[g0], %[dest3_u], %[dest1_v] \n\t"
3682 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3683 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3684
3685 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3686 "pshufh %[dest3_u], %[src0], %[mask] \n\t"
3687 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
3688 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3689 "pshufh %[b0], %[src1], %[mask] \n\t"
3690 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3691
3692 "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
3693 "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
3694 "psubw %[dest3_u], %[src0], %[src1] \n\t"
3695 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
3696 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3697 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3698 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3699 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3700
3701 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
3702 "packushb %[dest0_u], %[dest0_u], %[src1] \n\t"
3703 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
3704 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
3705 "packsswh %[src1], %[dest0_v], %[dest1_v] \n\t"
3706 "packushb %[dest0_v], %[dest1_u], %[src1] \n\t"
3707 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
3708 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
3709
3710 "daddiu %[src_argb1555], %[src_argb1555], 0x20 \n\t"
3711 "daddiu %[next_argb1555], %[next_argb1555], 0x20 \n\t"
3712 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
3713 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
3714 "daddiu %[width], %[width], -0x10 \n\t"
3715 "bgtz %[width], 1b \n\t"
3716 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
3717 [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
3718 [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
3719 [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
3720 [dest1_v] "=&f"(ftmp[10])
3721 : [src_argb1555] "r"(src_argb1555),
3722 [next_argb1555] "r"(src_stride_argb1555), [dst_u] "r"(dst_u),
3723 [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
3724 [c0] "f"(c0), [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3),
3725 [mask] "f"(mask), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
3726 [eight] "f"(0x08), [six] "f"(0x06), [five] "f"(0x05), [three] "f"(0x03),
3727 [two] "f"(0x02), [one] "f"(0x01)
3728 : "memory");
3729 }
3730
ARGB4444ToUVRow_MMI(const uint8_t * src_argb4444,int src_stride_argb4444,uint8_t * dst_u,uint8_t * dst_v,int width)3731 void ARGB4444ToUVRow_MMI(const uint8_t* src_argb4444,
3732 int src_stride_argb4444,
3733 uint8_t* dst_u,
3734 uint8_t* dst_v,
3735 int width) {
3736 uint64_t ftmp[13];
3737 uint64_t value = 0x2020202020202020;
3738 uint64_t mask_u = 0x0026004a00700002;
3739 uint64_t mask_v = 0x00020070005e0012;
3740 uint64_t mask = 0x93;
3741 uint64_t c0 = 0x000f000f000f000f;
3742 uint64_t c1 = 0x00ff00ff00ff00ff;
3743 __asm__ volatile(
3744 "daddu %[next_argb4444], %[src_argb4444], %[next_argb4444] \n\t"
3745 "1: \n\t"
3746 "gsldrc1 %[src0], 0x00(%[src_argb4444]) \n\t"
3747 "gsldlc1 %[src0], 0x07(%[src_argb4444]) \n\t"
3748 "gsldrc1 %[src1], 0x00(%[next_argb4444]) \n\t"
3749 "gsldlc1 %[src1], 0x07(%[next_argb4444]) \n\t"
3750 "psrlh %[dest0_u], %[src0], %[eight] \n\t"
3751 "and %[b0], %[src0], %[c0] \n\t"
3752 "and %[src0], %[src0], %[c1] \n\t"
3753 "psrlh %[g0], %[src0], %[four] \n\t"
3754 "and %[r0], %[dest0_u], %[c0] \n\t"
3755 "psrlh %[src0], %[src1], %[eight] \n\t"
3756 "and %[dest0_u], %[src1], %[c0] \n\t"
3757 "and %[src1], %[src1], %[c1] \n\t"
3758 "psrlh %[dest0_v], %[src1], %[four] \n\t"
3759 "and %[src0], %[src0], %[c0] \n\t"
3760 "paddh %[b0], %[b0], %[dest0_u] \n\t"
3761 "paddh %[g0], %[g0], %[dest0_v] \n\t"
3762 "paddh %[r0], %[r0], %[src0] \n\t"
3763 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3764 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3765 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3766 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3767 "paddh %[src0], %[dest0_u], %[dest0_v] \n\t"
3768 "psrlh %[b0], %[src0], %[four] \n\t"
3769 "psllh %[r0], %[src0], %[two] \n\t"
3770 "or %[b0], %[b0], %[r0] \n\t"
3771 "psrlh %[r0], %[g0], %[four] \n\t"
3772 "psllh %[g0], %[g0], %[two] \n\t"
3773 "or %[g0], %[g0], %[r0] \n\t"
3774 "punpcklhw %[src0], %[g0], %[value] \n\t"
3775 "punpckhhw %[src1], %[g0], %[value] \n\t"
3776 "punpcklwd %[dest0_u], %[src0], %[src1] \n\t"
3777 "punpckhwd %[dest0_v], %[src0], %[src1] \n\t"
3778 "paddh %[g0], %[dest0_u], %[dest0_v] \n\t"
3779 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3780 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3781
3782 "pmaddhw %[dest0_v], %[src0], %[mask_v] \n\t"
3783 "pshufh %[dest0_u], %[src0], %[mask] \n\t"
3784 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
3785 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3786 "pshufh %[b0], %[src1], %[mask] \n\t"
3787 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3788
3789 "punpcklwd %[src0], %[dest0_u], %[b0] \n\t"
3790 "punpckhwd %[src1], %[dest0_u], %[b0] \n\t"
3791 "psubw %[dest0_u], %[src0], %[src1] \n\t"
3792 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
3793 "punpcklwd %[src0], %[dest0_v], %[g0] \n\t"
3794 "punpckhwd %[src1], %[dest0_v], %[g0] \n\t"
3795 "psubw %[dest0_v], %[src1], %[src0] \n\t"
3796 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
3797
3798 "gsldrc1 %[src0], 0x08(%[src_argb4444]) \n\t"
3799 "gsldlc1 %[src0], 0x0f(%[src_argb4444]) \n\t"
3800 "gsldrc1 %[src1], 0x08(%[next_argb4444]) \n\t"
3801 "gsldlc1 %[src1], 0x0f(%[next_argb4444]) \n\t"
3802 "psrlh %[dest1_u], %[src0], %[eight] \n\t"
3803 "and %[b0], %[src0], %[c0] \n\t"
3804 "and %[src0], %[src0], %[c1] \n\t"
3805 "psrlh %[g0], %[src0], %[four] \n\t"
3806 "and %[r0], %[dest1_u], %[c0] \n\t"
3807 "psrlh %[src0], %[src1], %[eight] \n\t"
3808 "and %[dest1_u], %[src1], %[c0] \n\t"
3809 "and %[src1], %[src1], %[c1] \n\t"
3810 "psrlh %[dest1_v], %[src1], %[four] \n\t"
3811 "and %[src0], %[src0], %[c0] \n\t"
3812 "paddh %[b0], %[b0], %[dest1_u] \n\t"
3813 "paddh %[g0], %[g0], %[dest1_v] \n\t"
3814 "paddh %[r0], %[r0], %[src0] \n\t"
3815 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3816 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3817 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3818 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3819 "paddh %[src0], %[dest1_u], %[dest1_v] \n\t"
3820 "psrlh %[b0], %[src0], %[four] \n\t"
3821 "psllh %[r0], %[src0], %[two] \n\t"
3822 "or %[b0], %[b0], %[r0] \n\t"
3823 "psrlh %[r0], %[g0], %[four] \n\t"
3824 "psllh %[g0], %[g0], %[two] \n\t"
3825 "or %[g0], %[g0], %[r0] \n\t"
3826 "punpcklhw %[src0], %[g0], %[value] \n\t"
3827 "punpckhhw %[src1], %[g0], %[value] \n\t"
3828 "punpcklwd %[dest1_u], %[src0], %[src1] \n\t"
3829 "punpckhwd %[dest1_v], %[src0], %[src1] \n\t"
3830 "paddh %[g0], %[dest1_u], %[dest1_v] \n\t"
3831 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3832 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3833
3834 "pmaddhw %[dest1_v], %[src0], %[mask_v] \n\t"
3835 "pshufh %[dest1_u], %[src0], %[mask] \n\t"
3836 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
3837 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3838 "pshufh %[b0], %[src1], %[mask] \n\t"
3839 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3840
3841 "punpcklwd %[src0], %[dest1_u], %[b0] \n\t"
3842 "punpckhwd %[src1], %[dest1_u], %[b0] \n\t"
3843 "psubw %[dest1_u], %[src0], %[src1] \n\t"
3844 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
3845 "punpcklwd %[src0], %[dest1_v], %[g0] \n\t"
3846 "punpckhwd %[src1], %[dest1_v], %[g0] \n\t"
3847 "psubw %[dest1_v], %[src1], %[src0] \n\t"
3848 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
3849
3850 "gsldrc1 %[src0], 0x10(%[src_argb4444]) \n\t"
3851 "gsldlc1 %[src0], 0x17(%[src_argb4444]) \n\t"
3852 "gsldrc1 %[src1], 0x10(%[next_argb4444]) \n\t"
3853 "gsldlc1 %[src1], 0x17(%[next_argb4444]) \n\t"
3854 "psrlh %[dest2_u], %[src0], %[eight] \n\t"
3855 "and %[b0], %[src0], %[c0] \n\t"
3856 "and %[src0], %[src0], %[c1] \n\t"
3857 "psrlh %[g0], %[src0], %[four] \n\t"
3858 "and %[r0], %[dest2_u], %[c0] \n\t"
3859 "psrlh %[src0], %[src1], %[eight] \n\t"
3860 "and %[dest2_u], %[src1], %[c0] \n\t"
3861 "and %[src1], %[src1], %[c1] \n\t"
3862 "psrlh %[dest2_v], %[src1], %[four] \n\t"
3863 "and %[src0], %[src0], %[c0] \n\t"
3864 "paddh %[b0], %[b0], %[dest2_u] \n\t"
3865 "paddh %[g0], %[g0], %[dest2_v] \n\t"
3866 "paddh %[r0], %[r0], %[src0] \n\t"
3867 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3868 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3869 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3870 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3871 "paddh %[src0], %[dest2_u], %[dest2_v] \n\t"
3872 "psrlh %[b0], %[src0], %[four] \n\t"
3873 "psllh %[r0], %[src0], %[two] \n\t"
3874 "or %[b0], %[b0], %[r0] \n\t"
3875 "psrlh %[r0], %[g0], %[four] \n\t"
3876 "psllh %[g0], %[g0], %[two] \n\t"
3877 "or %[g0], %[g0], %[r0] \n\t"
3878 "punpcklhw %[src0], %[g0], %[value] \n\t"
3879 "punpckhhw %[src1], %[g0], %[value] \n\t"
3880 "punpcklwd %[dest2_u], %[src0], %[src1] \n\t"
3881 "punpckhwd %[dest2_v], %[src0], %[src1] \n\t"
3882 "paddh %[g0], %[dest2_u], %[dest2_v] \n\t"
3883 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3884 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3885
3886 "pmaddhw %[dest2_v], %[src0], %[mask_v] \n\t"
3887 "pshufh %[dest2_u], %[src0], %[mask] \n\t"
3888 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
3889 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3890 "pshufh %[b0], %[src1], %[mask] \n\t"
3891 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3892
3893 "punpcklwd %[src0], %[dest2_u], %[b0] \n\t"
3894 "punpckhwd %[src1], %[dest2_u], %[b0] \n\t"
3895 "psubw %[dest2_u], %[src0], %[src1] \n\t"
3896 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
3897 "punpcklwd %[src0], %[dest2_v], %[g0] \n\t"
3898 "punpckhwd %[src1], %[dest2_v], %[g0] \n\t"
3899 "psubw %[dest2_v], %[src1], %[src0] \n\t"
3900 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
3901
3902 "gsldrc1 %[src0], 0x18(%[src_argb4444]) \n\t"
3903 "gsldlc1 %[src0], 0x1f(%[src_argb4444]) \n\t"
3904 "gsldrc1 %[src1], 0x18(%[next_argb4444]) \n\t"
3905 "gsldlc1 %[src1], 0x1f(%[next_argb4444]) \n\t"
3906 "psrlh %[dest3_u], %[src0], %[eight] \n\t"
3907 "and %[b0], %[src0], %[c0] \n\t"
3908 "and %[src0], %[src0], %[c1] \n\t"
3909 "psrlh %[g0], %[src0], %[four] \n\t"
3910 "and %[r0], %[dest3_u], %[c0] \n\t"
3911 "psrlh %[src0], %[src1], %[eight] \n\t"
3912 "and %[dest3_u], %[src1], %[c0] \n\t"
3913 "and %[src1], %[src1], %[c1] \n\t"
3914 "psrlh %[dest3_v], %[src1], %[four] \n\t"
3915 "and %[src0], %[src0], %[c0] \n\t"
3916 "paddh %[b0], %[b0], %[dest3_u] \n\t"
3917 "paddh %[g0], %[g0], %[dest3_v] \n\t"
3918 "paddh %[r0], %[r0], %[src0] \n\t"
3919 "punpcklhw %[src0], %[b0], %[r0] \n\t"
3920 "punpckhhw %[src1], %[b0], %[r0] \n\t"
3921 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3922 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3923 "paddh %[src0], %[dest3_u], %[dest3_v] \n\t"
3924 "psrlh %[b0], %[src0], %[four] \n\t"
3925 "psllh %[r0], %[src0], %[two] \n\t"
3926 "or %[b0], %[b0], %[r0] \n\t"
3927 "psrlh %[r0], %[g0], %[four] \n\t"
3928 "psllh %[g0], %[g0], %[two] \n\t"
3929 "or %[g0], %[g0], %[r0] \n\t"
3930 "punpcklhw %[src0], %[g0], %[value] \n\t"
3931 "punpckhhw %[src1], %[g0], %[value] \n\t"
3932 "punpcklwd %[dest3_u], %[src0], %[src1] \n\t"
3933 "punpckhwd %[dest3_v], %[src0], %[src1] \n\t"
3934 "paddh %[g0], %[dest3_u], %[dest3_v] \n\t"
3935 "punpcklhw %[src0], %[b0], %[g0] \n\t"
3936 "punpckhhw %[src1], %[b0], %[g0] \n\t"
3937
3938 "pmaddhw %[dest3_v], %[src0], %[mask_v] \n\t"
3939 "pshufh %[dest3_u], %[src0], %[mask] \n\t"
3940 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
3941 "pmaddhw %[g0], %[src1], %[mask_v] \n\t"
3942 "pshufh %[b0], %[src1], %[mask] \n\t"
3943 "pmaddhw %[b0], %[b0], %[mask_u] \n\t"
3944
3945 "punpcklwd %[src0], %[dest3_u], %[b0] \n\t"
3946 "punpckhwd %[src1], %[dest3_u], %[b0] \n\t"
3947 "psubw %[dest3_u], %[src0], %[src1] \n\t"
3948 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
3949 "punpcklwd %[src0], %[dest3_v], %[g0] \n\t"
3950 "punpckhwd %[src1], %[dest3_v], %[g0] \n\t"
3951 "psubw %[dest3_v], %[src1], %[src0] \n\t"
3952 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
3953
3954 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
3955 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
3956 "packushb %[dest0_u], %[src0], %[src1] \n\t"
3957 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
3958 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
3959 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
3960 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
3961 "packushb %[dest0_v], %[src0], %[src1] \n\t"
3962 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
3963 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
3964
3965 "daddiu %[src_argb4444], %[src_argb4444], 0x20 \n\t"
3966 "daddiu %[next_argb4444], %[next_argb4444], 0x20 \n\t"
3967 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
3968 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
3969 "daddiu %[width], %[width], -0x10 \n\t"
3970 "bgtz %[width], 1b \n\t"
3971 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [b0] "=&f"(ftmp[2]),
3972 [g0] "=&f"(ftmp[3]), [r0] "=&f"(ftmp[4]), [dest0_u] "=&f"(ftmp[5]),
3973 [dest1_u] "=&f"(ftmp[6]), [dest2_u] "=&f"(ftmp[7]),
3974 [dest3_u] "=&f"(ftmp[8]), [dest0_v] "=&f"(ftmp[9]),
3975 [dest1_v] "=&f"(ftmp[10]), [dest2_v] "=&f"(ftmp[11]),
3976 [dest3_v] "=&f"(ftmp[12])
3977 : [src_argb4444] "r"(src_argb4444),
3978 [next_argb4444] "r"(src_stride_argb4444), [dst_u] "r"(dst_u),
3979 [dst_v] "r"(dst_v), [width] "r"(width), [value] "f"(value),
3980 [c0] "f"(c0), [c1] "f"(c1), [mask] "f"(mask), [mask_u] "f"(mask_u),
3981 [mask_v] "f"(mask_v), [eight] "f"(0x08), [four] "f"(0x04),
3982 [two] "f"(0x02)
3983 : "memory");
3984 }
3985
ARGBToUV444Row_MMI(const uint8_t * src_argb,uint8_t * dst_u,uint8_t * dst_v,int width)3986 void ARGBToUV444Row_MMI(const uint8_t* src_argb,
3987 uint8_t* dst_u,
3988 uint8_t* dst_v,
3989 int width) {
3990 uint64_t ftmp[12];
3991 const uint64_t value = 0x4040;
3992 const uint64_t mask_u = 0x0026004a00700002;
3993 const uint64_t mask_v = 0x00020070005e0012;
3994
3995 __asm__ volatile(
3996 "1: \n\t"
3997 "gsldrc1 %[src0], 0x00(%[src_argb]) \n\t"
3998 "gsldlc1 %[src0], 0x07(%[src_argb]) \n\t"
3999 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
4000 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
4001 "dsll %[dest0_u], %[src_lo], %[sixteen] \n\t"
4002 "pinsrh_0 %[dest0_u], %[dest0_u], %[value] \n\t"
4003 "pinsrh_3 %[dest0_v], %[src_lo], %[value] \n\t"
4004 "pmaddhw %[dest0_u], %[dest0_u], %[mask_u] \n\t"
4005 "pmaddhw %[dest0_v], %[dest0_v], %[mask_v] \n\t"
4006
4007 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
4008 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
4009 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
4010 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
4011 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
4012
4013 "punpcklwd %[src0], %[dest0_u], %[src_lo] \n\t"
4014 "punpckhwd %[src1], %[dest0_u], %[src_lo] \n\t"
4015 "psubw %[dest0_u], %[src0], %[src1] \n\t"
4016 "psraw %[dest0_u], %[dest0_u], %[eight] \n\t"
4017 "punpcklwd %[src0], %[dest0_v], %[src_hi] \n\t"
4018 "punpckhwd %[src1], %[dest0_v], %[src_hi] \n\t"
4019 "psubw %[dest0_v], %[src1], %[src0] \n\t"
4020 "psraw %[dest0_v], %[dest0_v], %[eight] \n\t"
4021
4022 "gsldrc1 %[src0], 0x08(%[src_argb]) \n\t"
4023 "gsldlc1 %[src0], 0x0f(%[src_argb]) \n\t"
4024 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
4025 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
4026 "dsll %[dest1_u], %[src_lo], %[sixteen] \n\t"
4027 "pinsrh_0 %[dest1_u], %[dest1_u], %[value] \n\t"
4028 "pinsrh_3 %[dest1_v], %[src_lo], %[value] \n\t"
4029 "pmaddhw %[dest1_u], %[dest1_u], %[mask_u] \n\t"
4030 "pmaddhw %[dest1_v], %[dest1_v], %[mask_v] \n\t"
4031 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
4032 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
4033 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
4034 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
4035 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
4036
4037 "punpcklwd %[src0], %[dest1_u], %[src_lo] \n\t"
4038 "punpckhwd %[src1], %[dest1_u], %[src_lo] \n\t"
4039 "psubw %[dest1_u], %[src0], %[src1] \n\t"
4040 "psraw %[dest1_u], %[dest1_u], %[eight] \n\t"
4041 "punpcklwd %[src0], %[dest1_v], %[src_hi] \n\t"
4042 "punpckhwd %[src1], %[dest1_v], %[src_hi] \n\t"
4043 "psubw %[dest1_v], %[src1], %[src0] \n\t"
4044 "psraw %[dest1_v], %[dest1_v], %[eight] \n\t"
4045
4046 "gsldrc1 %[src0], 0x10(%[src_argb]) \n\t"
4047 "gsldlc1 %[src0], 0x17(%[src_argb]) \n\t"
4048 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
4049 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
4050 "dsll %[dest2_u], %[src_lo], %[sixteen] \n\t"
4051 "pinsrh_0 %[dest2_u], %[dest2_u], %[value] \n\t"
4052 "pinsrh_3 %[dest2_v], %[src_lo], %[value] \n\t"
4053 "pmaddhw %[dest2_u], %[dest2_u], %[mask_u] \n\t"
4054 "pmaddhw %[dest2_v], %[dest2_v], %[mask_v] \n\t"
4055 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
4056 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
4057 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
4058 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
4059 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
4060
4061 "punpcklwd %[src0], %[dest2_u], %[src_lo] \n\t"
4062 "punpckhwd %[src1], %[dest2_u], %[src_lo] \n\t"
4063 "psubw %[dest2_u], %[src0], %[src1] \n\t"
4064 "psraw %[dest2_u], %[dest2_u], %[eight] \n\t"
4065 "punpcklwd %[src0], %[dest2_v], %[src_hi] \n\t"
4066 "punpckhwd %[src1], %[dest2_v], %[src_hi] \n\t"
4067 "psubw %[dest2_v], %[src1], %[src0] \n\t"
4068 "psraw %[dest2_v], %[dest2_v], %[eight] \n\t"
4069
4070 "gsldrc1 %[src0], 0x18(%[src_argb]) \n\t"
4071 "gsldlc1 %[src0], 0x1f(%[src_argb]) \n\t"
4072 "punpcklbh %[src_lo], %[src0], %[zero] \n\t"
4073 "punpckhbh %[src_hi], %[src0], %[zero] \n\t"
4074 "dsll %[dest3_u], %[src_lo], %[sixteen] \n\t"
4075 "pinsrh_0 %[dest3_u], %[dest3_u], %[value] \n\t"
4076 "pinsrh_3 %[dest3_v], %[src_lo], %[value] \n\t"
4077 "pmaddhw %[dest3_u], %[dest3_u], %[mask_u] \n\t"
4078 "pmaddhw %[dest3_v], %[dest3_v], %[mask_v] \n\t"
4079 "dsll %[src_lo], %[src_hi], %[sixteen] \n\t"
4080 "pinsrh_0 %[src_lo], %[src_lo], %[value] \n\t"
4081 "pinsrh_3 %[src_hi], %[src_hi], %[value] \n\t"
4082 "pmaddhw %[src_lo], %[src_lo], %[mask_u] \n\t"
4083 "pmaddhw %[src_hi], %[src_hi], %[mask_v] \n\t"
4084
4085 "punpcklwd %[src0], %[dest3_u], %[src_lo] \n\t"
4086 "punpckhwd %[src1], %[dest3_u], %[src_lo] \n\t"
4087 "psubw %[dest3_u], %[src0], %[src1] \n\t"
4088 "psraw %[dest3_u], %[dest3_u], %[eight] \n\t"
4089 "punpcklwd %[src0], %[dest3_v], %[src_hi] \n\t"
4090 "punpckhwd %[src1], %[dest3_v], %[src_hi] \n\t"
4091 "psubw %[dest3_v], %[src1], %[src0] \n\t"
4092 "psraw %[dest3_v], %[dest3_v], %[eight] \n\t"
4093
4094 "packsswh %[src0], %[dest0_u], %[dest1_u] \n\t"
4095 "packsswh %[src1], %[dest2_u], %[dest3_u] \n\t"
4096 "packushb %[dest0_u], %[src0], %[src1] \n\t"
4097 "gssdlc1 %[dest0_u], 0x07(%[dst_u]) \n\t"
4098 "gssdrc1 %[dest0_u], 0x00(%[dst_u]) \n\t"
4099
4100 "packsswh %[src0], %[dest0_v], %[dest1_v] \n\t"
4101 "packsswh %[src1], %[dest2_v], %[dest3_v] \n\t"
4102 "packushb %[dest0_v], %[src0], %[src1] \n\t"
4103 "gssdlc1 %[dest0_v], 0x07(%[dst_v]) \n\t"
4104 "gssdrc1 %[dest0_v], 0x00(%[dst_v]) \n\t"
4105
4106 "daddiu %[src_argb], %[src_argb], 0x20 \n\t"
4107 "daddiu %[dst_u], %[dst_u], 0x08 \n\t"
4108 "daddiu %[dst_v], %[dst_v], 0x08 \n\t"
4109 "daddi %[width], %[width], -0x08 \n\t"
4110 "bgtz %[width], 1b \n\t"
4111 : [src0] "=&f"(ftmp[0]), [src1] "=&f"(ftmp[1]), [src_lo] "=&f"(ftmp[2]),
4112 [src_hi] "=&f"(ftmp[3]), [dest0_u] "=&f"(ftmp[4]),
4113 [dest0_v] "=&f"(ftmp[5]), [dest1_u] "=&f"(ftmp[6]),
4114 [dest1_v] "=&f"(ftmp[7]), [dest2_u] "=&f"(ftmp[8]),
4115 [dest2_v] "=&f"(ftmp[9]), [dest3_u] "=&f"(ftmp[10]),
4116 [dest3_v] "=&f"(ftmp[11])
4117 : [src_argb] "r"(src_argb), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
4118 [width] "r"(width), [mask_u] "f"(mask_u), [mask_v] "f"(mask_v),
4119 [value] "f"(value), [zero] "f"(0x00), [sixteen] "f"(0x10),
4120 [eight] "f"(0x08)
4121 : "memory");
4122 }
4123
ARGBGrayRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,int width)4124 void ARGBGrayRow_MMI(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
4125 uint64_t src, src_lo, src_hi, src37, dest, dest_lo, dest_hi;
4126 uint64_t tmp0, tmp1;
4127 const uint64_t mask0 = 0x0;
4128 const uint64_t mask1 = 0x01;
4129 const uint64_t mask2 = 0x0080004D0096001DULL;
4130 const uint64_t mask3 = 0xFF000000FF000000ULL;
4131 const uint64_t mask4 = ~mask3;
4132 const uint64_t shift = 0x08;
4133
4134 __asm__ volatile(
4135 "1: \n\t"
4136 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4137 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4138
4139 "and %[src37], %[src], %[mask3] \n\t"
4140
4141 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
4142 "pinsrh_3 %[src_lo], %[src_lo], %[mask1] \n\t"
4143 "pmaddhw %[dest_lo], %[src_lo], %[mask2] \n\t"
4144 "punpcklwd %[tmp0], %[dest_lo], %[dest_lo] \n\t"
4145 "punpckhwd %[tmp1], %[dest_lo], %[dest_lo] \n\t"
4146 "paddw %[dest_lo], %[tmp0], %[tmp1] \n\t"
4147 "psrlw %[dest_lo], %[dest_lo], %[shift] \n\t"
4148 "packsswh %[dest_lo], %[dest_lo], %[dest_lo] \n\t"
4149
4150 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
4151 "pinsrh_3 %[src_hi], %[src_hi], %[mask1] \n\t"
4152 "pmaddhw %[dest_hi], %[src_hi], %[mask2] \n\t"
4153 "punpcklwd %[tmp0], %[dest_hi], %[dest_hi] \n\t"
4154 "punpckhwd %[tmp1], %[dest_hi], %[dest_hi] \n\t"
4155 "paddw %[dest_hi], %[tmp0], %[tmp1] \n\t"
4156 "psrlw %[dest_hi], %[dest_hi], %[shift] \n\t"
4157 "packsswh %[dest_hi], %[dest_hi], %[dest_hi] \n\t"
4158
4159 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4160 "and %[dest], %[dest], %[mask4] \n\t"
4161 "or %[dest], %[dest], %[src37] \n\t"
4162
4163 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4164 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4165
4166 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
4167 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4168 "daddi %[width], %[width], -0x02 \n\t"
4169 "bnez %[width], 1b \n\t"
4170 : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
4171 [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo), [tmp0] "=&f"(tmp0),
4172 [tmp1] "=&f"(tmp1), [src] "=&f"(src), [dest] "=&f"(dest),
4173 [src37] "=&f"(src37)
4174 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
4175 [shift] "f"(shift), [mask0] "f"(mask0), [mask1] "f"(mask1),
4176 [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4)
4177 : "memory");
4178 }
4179
4180 // Convert a row of image to Sepia tone.
ARGBSepiaRow_MMI(uint8_t * dst_argb,int width)4181 void ARGBSepiaRow_MMI(uint8_t* dst_argb, int width) {
4182 uint64_t dest, dest_lo, dest_hi, dest37, dest0, dest1, dest2;
4183 uint64_t tmp0, tmp1;
4184 const uint64_t mask0 = 0x0;
4185 const uint64_t mask1 = 0x002300440011ULL;
4186 const uint64_t mask2 = 0x002D00580016ULL;
4187 const uint64_t mask3 = 0x003200620018ULL;
4188 const uint64_t mask4 = 0xFF000000FF000000ULL;
4189 const uint64_t shift = 0x07;
4190
4191 __asm__ volatile(
4192 "1: \n\t"
4193 "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4194 "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4195
4196 "and %[dest37], %[dest], %[mask4] \n\t"
4197
4198 "punpcklbh %[dest_lo], %[dest], %[mask0] \n\t"
4199 "pmaddhw %[dest0], %[dest_lo], %[mask1] \n\t"
4200 "pmaddhw %[dest1], %[dest_lo], %[mask2] \n\t"
4201 "pmaddhw %[dest2], %[dest_lo], %[mask3] \n\t"
4202 "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
4203 "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
4204 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
4205 "psrlw %[dest0], %[dest0], %[shift] \n\t"
4206 "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
4207 "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
4208 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
4209 "psrlw %[dest1], %[dest1], %[shift] \n\t"
4210 "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
4211
4212 "punpckhbh %[dest_hi], %[dest], %[mask0] \n\t"
4213 "pmaddhw %[dest0], %[dest_hi], %[mask1] \n\t"
4214 "pmaddhw %[dest1], %[dest_hi], %[mask2] \n\t"
4215 "pmaddhw %[dest2], %[dest_hi], %[mask3] \n\t"
4216 "punpcklwd %[tmp0], %[dest0], %[dest1] \n\t"
4217 "punpckhwd %[tmp1], %[dest0], %[dest1] \n\t"
4218 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
4219 "psrlw %[dest0], %[dest0], %[shift] \n\t"
4220 "punpcklwd %[tmp0], %[dest2], %[mask0] \n\t"
4221 "punpckhwd %[tmp1], %[dest2], %[mask0] \n\t"
4222 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
4223 "psrlw %[dest1], %[dest1], %[shift] \n\t"
4224 "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
4225
4226 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4227 "or %[dest], %[dest], %[dest37] \n\t"
4228
4229 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4230 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4231
4232 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4233 "daddi %[width], %[width], -0x02 \n\t"
4234 "bnez %[width], 1b \n\t"
4235 : [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
4236 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
4237 [dest37] "=&f"(dest37), [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1),
4238 [dest] "=&f"(dest)
4239 : [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask0] "f"(mask0),
4240 [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
4241 [mask4] "f"(mask4), [shift] "f"(shift)
4242 : "memory");
4243 }
4244
4245 // Apply color matrix to a row of image. Matrix is signed.
4246 // TODO(fbarchard): Consider adding rounding (+32).
ARGBColorMatrixRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,const int8_t * matrix_argb,int width)4247 void ARGBColorMatrixRow_MMI(const uint8_t* src_argb,
4248 uint8_t* dst_argb,
4249 const int8_t* matrix_argb,
4250 int width) {
4251 uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi, dest0, dest1, dest2,
4252 dest3;
4253 uint64_t matrix, matrix_hi, matrix_lo;
4254 uint64_t tmp0, tmp1;
4255 const uint64_t shift0 = 0x06;
4256 const uint64_t shift1 = 0x08;
4257 const uint64_t mask0 = 0x0;
4258 const uint64_t mask1 = 0x08;
4259
4260 __asm__ volatile(
4261 "1: \n\t"
4262 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4263 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4264
4265 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
4266
4267 "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
4268 "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
4269 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4270 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4271 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4272 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4273 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4274 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4275 "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
4276 "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
4277 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4278 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4279 "paddw %[dest0], %[tmp0], %[tmp1] \n\t"
4280 "psraw %[dest0], %[dest0], %[shift0] \n\t"
4281
4282 "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
4283 "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
4284 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4285 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4286 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4287 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4288 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4289 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4290 "pmaddhw %[dest_lo], %[src_lo], %[matrix_lo] \n\t"
4291 "pmaddhw %[dest_hi], %[src_lo], %[matrix_hi] \n\t"
4292 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4293 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4294 "paddw %[dest1], %[tmp0], %[tmp1] \n\t"
4295 "psraw %[dest1], %[dest1], %[shift0] \n\t"
4296
4297 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
4298
4299 "gsldlc1 %[matrix], 0x07(%[matrix_ptr]) \n\t"
4300 "gsldrc1 %[matrix], 0x00(%[matrix_ptr]) \n\t"
4301 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4302 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4303 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4304 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4305 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4306 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4307 "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
4308 "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
4309 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4310 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4311 "paddw %[dest2], %[tmp0], %[tmp1] \n\t"
4312 "psraw %[dest2], %[dest2], %[shift0] \n\t"
4313
4314 "gsldlc1 %[matrix], 0x0f(%[matrix_ptr]) \n\t"
4315 "gsldrc1 %[matrix], 0x08(%[matrix_ptr]) \n\t"
4316 "punpcklbh %[matrix_lo], %[matrix], %[mask0] \n\t"
4317 "psllh %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4318 "psrah %[matrix_lo], %[matrix_lo], %[shift1] \n\t"
4319 "punpckhbh %[matrix_hi], %[matrix], %[mask0] \n\t"
4320 "psllh %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4321 "psrah %[matrix_hi], %[matrix_hi], %[shift1] \n\t"
4322 "pmaddhw %[dest_lo], %[src_hi], %[matrix_lo] \n\t"
4323 "pmaddhw %[dest_hi], %[src_hi], %[matrix_hi] \n\t"
4324 "punpcklwd %[tmp0], %[dest_lo], %[dest_hi] \n\t"
4325 "punpckhwd %[tmp1], %[dest_lo], %[dest_hi] \n\t"
4326 "paddw %[dest3], %[tmp0], %[tmp1] \n\t"
4327 "psraw %[dest3], %[dest3], %[shift0] \n\t"
4328
4329 "packsswh %[tmp0], %[dest0], %[dest1] \n\t"
4330 "packsswh %[tmp1], %[dest2], %[dest3] \n\t"
4331 "packushb %[dest], %[tmp0], %[tmp1] \n\t"
4332
4333 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4334 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4335
4336 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
4337 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4338 "daddi %[width], %[width], -0x02 \n\t"
4339 "bnez %[width], 1b \n\t"
4340 : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
4341 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
4342 [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [dest2] "=&f"(dest2),
4343 [dest3] "=&f"(dest3), [src] "=&f"(src), [dest] "=&f"(dest),
4344 [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [matrix_hi] "=&f"(matrix_hi),
4345 [matrix_lo] "=&f"(matrix_lo), [matrix] "=&f"(matrix)
4346 : [src_ptr] "r"(src_argb), [matrix_ptr] "r"(matrix_argb),
4347 [dst_ptr] "r"(dst_argb), [width] "r"(width), [shift0] "f"(shift0),
4348 [shift1] "f"(shift1), [mask0] "f"(mask0), [mask1] "f"(mask1)
4349 : "memory");
4350 }
4351
ARGBShadeRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,int width,uint32_t value)4352 void ARGBShadeRow_MMI(const uint8_t* src_argb,
4353 uint8_t* dst_argb,
4354 int width,
4355 uint32_t value) {
4356 uint64_t src, src_hi, src_lo, dest, dest_lo, dest_hi;
4357 const uint64_t shift = 0x08;
4358
4359 __asm__ volatile(
4360 "1: \n\t"
4361 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4362 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4363 "punpcklbh %[src_lo], %[src], %[src] \n\t"
4364 "punpckhbh %[src_hi], %[src], %[src] \n\t"
4365
4366 "punpcklbh %[value], %[value], %[value] \n\t"
4367
4368 "pmulhuh %[dest_lo], %[src_lo], %[value] \n\t"
4369 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
4370 "pmulhuh %[dest_hi], %[src_hi], %[value] \n\t"
4371 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
4372 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4373
4374 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4375 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4376
4377 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
4378 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4379 "daddi %[width], %[width], -0x02 \n\t"
4380 "bnez %[width], 1b \n\t"
4381 : [src_hi] "=&f"(src_hi), [src_lo] "=&f"(src_lo),
4382 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src] "=&f"(src),
4383 [dest] "=&f"(dest)
4384 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [width] "r"(width),
4385 [value] "f"(value), [shift] "f"(shift)
4386 : "memory");
4387 }
4388
ARGBMultiplyRow_MMI(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)4389 void ARGBMultiplyRow_MMI(const uint8_t* src_argb,
4390 const uint8_t* src_argb1,
4391 uint8_t* dst_argb,
4392 int width) {
4393 uint64_t src0, src0_hi, src0_lo, src1, src1_hi, src1_lo;
4394 uint64_t dest, dest_lo, dest_hi;
4395 const uint64_t mask = 0x0;
4396
4397 __asm__ volatile(
4398 "1: \n\t"
4399 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
4400 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
4401 "punpcklbh %[src0_lo], %[src0], %[src0] \n\t"
4402 "punpckhbh %[src0_hi], %[src0], %[src0] \n\t"
4403
4404 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
4405 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
4406 "punpcklbh %[src1_lo], %[src1], %[mask] \n\t"
4407 "punpckhbh %[src1_hi], %[src1], %[mask] \n\t"
4408
4409 "pmulhuh %[dest_lo], %[src0_lo], %[src1_lo] \n\t"
4410 "pmulhuh %[dest_hi], %[src0_hi], %[src1_hi] \n\t"
4411 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4412
4413 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4414 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4415
4416 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
4417 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
4418 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4419 "daddi %[width], %[width], -0x02 \n\t"
4420 "bnez %[width], 1b \n\t"
4421 : [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
4422 [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
4423 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo), [src0] "=&f"(src0),
4424 [src1] "=&f"(src1), [dest] "=&f"(dest)
4425 : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
4426 [dst_ptr] "r"(dst_argb), [width] "r"(width), [mask] "f"(mask)
4427 : "memory");
4428 }
4429
ARGBAddRow_MMI(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)4430 void ARGBAddRow_MMI(const uint8_t* src_argb,
4431 const uint8_t* src_argb1,
4432 uint8_t* dst_argb,
4433 int width) {
4434 uint64_t src0, src1, dest;
4435
4436 __asm__ volatile(
4437 "1: \n\t"
4438 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
4439 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
4440 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
4441 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
4442 "paddusb %[dest], %[src0], %[src1] \n\t"
4443 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4444 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4445
4446 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
4447 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
4448 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4449 "daddi %[width], %[width], -0x02 \n\t"
4450 "bnez %[width], 1b \n\t"
4451 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
4452 : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
4453 [dst_ptr] "r"(dst_argb), [width] "r"(width)
4454 : "memory");
4455 }
4456
ARGBSubtractRow_MMI(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)4457 void ARGBSubtractRow_MMI(const uint8_t* src_argb,
4458 const uint8_t* src_argb1,
4459 uint8_t* dst_argb,
4460 int width) {
4461 uint64_t src0, src1, dest;
4462
4463 __asm__ volatile(
4464 "1: \n\t"
4465 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
4466 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
4467 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
4468 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
4469 "psubusb %[dest], %[src0], %[src1] \n\t"
4470 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4471 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4472
4473 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
4474 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
4475 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4476 "daddi %[width], %[width], -0x02 \n\t"
4477 "bnez %[width], 1b \n\t"
4478 : [src0] "=&f"(src0), [src1] "=&f"(src1), [dest] "=&f"(dest)
4479 : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
4480 [dst_ptr] "r"(dst_argb), [width] "r"(width)
4481 : "memory");
4482 }
4483
4484 // Sobel functions which mimics SSSE3.
SobelXRow_MMI(const uint8_t * src_y0,const uint8_t * src_y1,const uint8_t * src_y2,uint8_t * dst_sobelx,int width)4485 void SobelXRow_MMI(const uint8_t* src_y0,
4486 const uint8_t* src_y1,
4487 const uint8_t* src_y2,
4488 uint8_t* dst_sobelx,
4489 int width) {
4490 uint64_t y00 = 0, y10 = 0, y20 = 0;
4491 uint64_t y02 = 0, y12 = 0, y22 = 0;
4492 uint64_t zero = 0x0;
4493 uint64_t sobel = 0x0;
4494 __asm__ volatile(
4495 "1: \n\t"
4496 "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
4497 "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
4498 "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // a_sub=src_y0[i+2]
4499 "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
4500
4501 "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // b=src_y1[i]
4502 "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
4503 "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // b_sub=src_y1[i+2]
4504 "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
4505
4506 "gsldlc1 %[y20], 0x07(%[src_y2]) \n\t" // c=src_y2[i]
4507 "gsldrc1 %[y20], 0x00(%[src_y2]) \n\t"
4508 "gsldlc1 %[y22], 0x09(%[src_y2]) \n\t" // c_sub=src_y2[i+2]
4509 "gsldrc1 %[y22], 0x02(%[src_y2]) \n\t"
4510
4511 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4512 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4513 "punpcklbh %[y20], %[y20], %[zero] \n\t"
4514
4515 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4516 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4517 "punpcklbh %[y22], %[y22], %[zero] \n\t"
4518
4519 "paddh %[y00], %[y00], %[y10] \n\t" // a+b
4520 "paddh %[y20], %[y20], %[y10] \n\t" // c+b
4521 "paddh %[y00], %[y00], %[y20] \n\t" // a+2b+c
4522
4523 "paddh %[y02], %[y02], %[y12] \n\t" // a_sub+b_sub
4524 "paddh %[y22], %[y22], %[y12] \n\t" // c_sub+b_sub
4525 "paddh %[y02], %[y02], %[y22] \n\t" // a_sub+2b_sub+c_sub
4526
4527 "pmaxsh %[y10], %[y00], %[y02] \n\t"
4528 "pminsh %[y20], %[y00], %[y02] \n\t"
4529 "psubh %[sobel], %[y10], %[y20] \n\t" // Abs
4530
4531 "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
4532 "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
4533 "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
4534 "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
4535
4536 "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
4537 "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
4538 "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
4539 "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
4540
4541 "gsldlc1 %[y20], 0x0B(%[src_y2]) \n\t"
4542 "gsldrc1 %[y20], 0x04(%[src_y2]) \n\t"
4543 "gsldlc1 %[y22], 0x0D(%[src_y2]) \n\t"
4544 "gsldrc1 %[y22], 0x06(%[src_y2]) \n\t"
4545
4546 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4547 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4548 "punpcklbh %[y20], %[y20], %[zero] \n\t"
4549
4550 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4551 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4552 "punpcklbh %[y22], %[y22], %[zero] \n\t"
4553
4554 "paddh %[y00], %[y00], %[y10] \n\t"
4555 "paddh %[y20], %[y20], %[y10] \n\t"
4556 "paddh %[y00], %[y00], %[y20] \n\t"
4557
4558 "paddh %[y02], %[y02], %[y12] \n\t"
4559 "paddh %[y22], %[y22], %[y12] \n\t"
4560 "paddh %[y02], %[y02], %[y22] \n\t"
4561
4562 "pmaxsh %[y10], %[y00], %[y02] \n\t"
4563 "pminsh %[y20], %[y00], %[y02] \n\t"
4564 "psubh %[y00], %[y10], %[y20] \n\t"
4565
4566 "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
4567 "gssdrc1 %[sobel], 0(%[dst_sobelx]) \n\t"
4568 "gssdlc1 %[sobel], 7(%[dst_sobelx]) \n\t"
4569
4570 "daddiu %[src_y0], %[src_y0], 8 \n\t"
4571 "daddiu %[src_y1], %[src_y1], 8 \n\t"
4572 "daddiu %[src_y2], %[src_y2], 8 \n\t"
4573 "daddiu %[dst_sobelx], %[dst_sobelx], 8 \n\t"
4574 "daddiu %[width], %[width], -8 \n\t"
4575 "bgtz %[width], 1b \n\t"
4576 "nop \n\t"
4577 : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y10] "=&f"(y10),
4578 [y20] "=&f"(y20), [y02] "=&f"(y02), [y12] "=&f"(y12), [y22] "=&f"(y22)
4579 : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1), [src_y2] "r"(src_y2),
4580 [dst_sobelx] "r"(dst_sobelx), [width] "r"(width), [zero] "f"(zero)
4581 : "memory");
4582 }
4583
SobelYRow_MMI(const uint8_t * src_y0,const uint8_t * src_y1,uint8_t * dst_sobely,int width)4584 void SobelYRow_MMI(const uint8_t* src_y0,
4585 const uint8_t* src_y1,
4586 uint8_t* dst_sobely,
4587 int width) {
4588 uint64_t y00 = 0, y01 = 0, y02 = 0;
4589 uint64_t y10 = 0, y11 = 0, y12 = 0;
4590 uint64_t zero = 0x0;
4591 uint64_t sobel = 0x0;
4592 __asm__ volatile(
4593 "1: \n\t"
4594 "gsldlc1 %[y00], 0x07(%[src_y0]) \n\t" // a=src_y0[i]
4595 "gsldrc1 %[y00], 0x00(%[src_y0]) \n\t"
4596 "gsldlc1 %[y01], 0x08(%[src_y0]) \n\t" // b=src_y0[i+1]
4597 "gsldrc1 %[y01], 0x01(%[src_y0]) \n\t"
4598 "gsldlc1 %[y02], 0x09(%[src_y0]) \n\t" // c=src_y0[i+2]
4599 "gsldrc1 %[y02], 0x02(%[src_y0]) \n\t"
4600
4601 "gsldlc1 %[y10], 0x07(%[src_y1]) \n\t" // a_sub=src_y1[i]
4602 "gsldrc1 %[y10], 0x00(%[src_y1]) \n\t"
4603 "gsldlc1 %[y11], 0x08(%[src_y1]) \n\t" // b_sub=src_y1[i+1]
4604 "gsldrc1 %[y11], 0x01(%[src_y1]) \n\t"
4605 "gsldlc1 %[y12], 0x09(%[src_y1]) \n\t" // c_sub=src_y1[i+2]
4606 "gsldrc1 %[y12], 0x02(%[src_y1]) \n\t"
4607
4608 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4609 "punpcklbh %[y01], %[y01], %[zero] \n\t"
4610 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4611
4612 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4613 "punpcklbh %[y11], %[y11], %[zero] \n\t"
4614 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4615
4616 "paddh %[y00], %[y00], %[y01] \n\t" // a+b
4617 "paddh %[y02], %[y02], %[y01] \n\t" // c+b
4618 "paddh %[y00], %[y00], %[y02] \n\t" // a+2b+c
4619
4620 "paddh %[y10], %[y10], %[y11] \n\t" // a_sub+b_sub
4621 "paddh %[y12], %[y12], %[y11] \n\t" // c_sub+b_sub
4622 "paddh %[y10], %[y10], %[y12] \n\t" // a_sub+2b_sub+c_sub
4623
4624 "pmaxsh %[y02], %[y00], %[y10] \n\t"
4625 "pminsh %[y12], %[y00], %[y10] \n\t"
4626 "psubh %[sobel], %[y02], %[y12] \n\t" // Abs
4627
4628 "gsldlc1 %[y00], 0x0B(%[src_y0]) \n\t"
4629 "gsldrc1 %[y00], 0x04(%[src_y0]) \n\t"
4630 "gsldlc1 %[y01], 0x0C(%[src_y0]) \n\t"
4631 "gsldrc1 %[y01], 0x05(%[src_y0]) \n\t"
4632 "gsldlc1 %[y02], 0x0D(%[src_y0]) \n\t"
4633 "gsldrc1 %[y02], 0x06(%[src_y0]) \n\t"
4634
4635 "gsldlc1 %[y10], 0x0B(%[src_y1]) \n\t"
4636 "gsldrc1 %[y10], 0x04(%[src_y1]) \n\t"
4637 "gsldlc1 %[y11], 0x0C(%[src_y1]) \n\t"
4638 "gsldrc1 %[y11], 0x05(%[src_y1]) \n\t"
4639 "gsldlc1 %[y12], 0x0D(%[src_y1]) \n\t"
4640 "gsldrc1 %[y12], 0x06(%[src_y1]) \n\t"
4641
4642 "punpcklbh %[y00], %[y00], %[zero] \n\t"
4643 "punpcklbh %[y01], %[y01], %[zero] \n\t"
4644 "punpcklbh %[y02], %[y02], %[zero] \n\t"
4645
4646 "punpcklbh %[y10], %[y10], %[zero] \n\t"
4647 "punpcklbh %[y11], %[y11], %[zero] \n\t"
4648 "punpcklbh %[y12], %[y12], %[zero] \n\t"
4649
4650 "paddh %[y00], %[y00], %[y01] \n\t"
4651 "paddh %[y02], %[y02], %[y01] \n\t"
4652 "paddh %[y00], %[y00], %[y02] \n\t"
4653
4654 "paddh %[y10], %[y10], %[y11] \n\t"
4655 "paddh %[y12], %[y12], %[y11] \n\t"
4656 "paddh %[y10], %[y10], %[y12] \n\t"
4657
4658 "pmaxsh %[y02], %[y00], %[y10] \n\t"
4659 "pminsh %[y12], %[y00], %[y10] \n\t"
4660 "psubh %[y00], %[y02], %[y12] \n\t"
4661
4662 "packushb %[sobel], %[sobel], %[y00] \n\t" // clamp255
4663 "gssdrc1 %[sobel], 0(%[dst_sobely]) \n\t"
4664 "gssdlc1 %[sobel], 7(%[dst_sobely]) \n\t"
4665
4666 "daddiu %[src_y0], %[src_y0], 8 \n\t"
4667 "daddiu %[src_y1], %[src_y1], 8 \n\t"
4668 "daddiu %[dst_sobely], %[dst_sobely], 8 \n\t"
4669 "daddiu %[width], %[width], -8 \n\t"
4670 "bgtz %[width], 1b \n\t"
4671 "nop \n\t"
4672 : [sobel] "=&f"(sobel), [y00] "=&f"(y00), [y01] "=&f"(y01),
4673 [y02] "=&f"(y02), [y10] "=&f"(y10), [y11] "=&f"(y11), [y12] "=&f"(y12)
4674 : [src_y0] "r"(src_y0), [src_y1] "r"(src_y1),
4675 [dst_sobely] "r"(dst_sobely), [width] "r"(width), [zero] "f"(zero)
4676 : "memory");
4677 }
4678
SobelRow_MMI(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)4679 void SobelRow_MMI(const uint8_t* src_sobelx,
4680 const uint8_t* src_sobely,
4681 uint8_t* dst_argb,
4682 int width) {
4683 double temp[3];
4684 uint64_t c1 = 0xff000000ff000000;
4685 __asm__ volatile(
4686 "1: \n\t"
4687 "gsldlc1 %[t0], 0x07(%[src_sobelx]) \n\t" // a=src_sobelx[i]
4688 "gsldrc1 %[t0], 0x00(%[src_sobelx]) \n\t"
4689 "gsldlc1 %[t1], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
4690 "gsldrc1 %[t1], 0x00(%[src_sobely]) \n\t"
4691 // s7 s6 s5 s4 s3 s2 s1 s0 = a+b
4692 "paddusb %[t2] , %[t0], %[t1] \n\t"
4693
4694 // s3 s2 s1 s0->s3 s3 s2 s2 s1 s1 s0 s0
4695 "punpcklbh %[t0], %[t2], %[t2] \n\t"
4696
4697 // s1 s1 s0 s0->s1 s2 s1 s1 s0 s0 s0 s0
4698 "punpcklbh %[t1], %[t0], %[t0] \n\t"
4699 "or %[t1], %[t1], %[c1] \n\t"
4700 // 255 s1 s1 s1 s55 s0 s0 s0
4701 "gssdrc1 %[t1], 0x00(%[dst_argb]) \n\t"
4702 "gssdlc1 %[t1], 0x07(%[dst_argb]) \n\t"
4703
4704 // s3 s3 s2 s2->s3 s3 s3 s3 s2 s2 s2 s2
4705 "punpckhbh %[t1], %[t0], %[t0] \n\t"
4706 "or %[t1], %[t1], %[c1] \n\t"
4707 // 255 s3 s3 s3 255 s2 s2 s2
4708 "gssdrc1 %[t1], 0x08(%[dst_argb]) \n\t"
4709 "gssdlc1 %[t1], 0x0f(%[dst_argb]) \n\t"
4710
4711 // s7 s6 s5 s4->s7 s7 s6 s6 s5 s5 s4 s4
4712 "punpckhbh %[t0], %[t2], %[t2] \n\t"
4713
4714 // s5 s5 s4 s4->s5 s5 s5 s5 s4 s4 s4 s4
4715 "punpcklbh %[t1], %[t0], %[t0] \n\t"
4716 "or %[t1], %[t1], %[c1] \n\t"
4717 "gssdrc1 %[t1], 0x10(%[dst_argb]) \n\t"
4718 "gssdlc1 %[t1], 0x17(%[dst_argb]) \n\t"
4719
4720 // s7 s7 s6 s6->s7 s7 s7 s7 s6 s6 s6 s6
4721 "punpckhbh %[t1], %[t0], %[t0] \n\t"
4722 "or %[t1], %[t1], %[c1] \n\t"
4723 "gssdrc1 %[t1], 0x18(%[dst_argb]) \n\t"
4724 "gssdlc1 %[t1], 0x1f(%[dst_argb]) \n\t"
4725
4726 "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
4727 "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
4728 "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
4729 "daddiu %[width], %[width], -8 \n\t"
4730 "bgtz %[width], 1b \n\t"
4731 "nop \n\t"
4732 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
4733 : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
4734 [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
4735 : "memory");
4736 }
4737
SobelToPlaneRow_MMI(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_y,int width)4738 void SobelToPlaneRow_MMI(const uint8_t* src_sobelx,
4739 const uint8_t* src_sobely,
4740 uint8_t* dst_y,
4741 int width) {
4742 uint64_t tr = 0;
4743 uint64_t tb = 0;
4744 __asm__ volatile(
4745 "1: \n\t"
4746 "gsldrc1 %[tr], 0x0(%[src_sobelx]) \n\t"
4747 "gsldlc1 %[tr], 0x7(%[src_sobelx]) \n\t" // r=src_sobelx[i]
4748 "gsldrc1 %[tb], 0x0(%[src_sobely]) \n\t"
4749 "gsldlc1 %[tb], 0x7(%[src_sobely]) \n\t" // b=src_sobely[i]
4750 "paddusb %[tr], %[tr], %[tb] \n\t" // g
4751 "gssdrc1 %[tr], 0x0(%[dst_y]) \n\t"
4752 "gssdlc1 %[tr], 0x7(%[dst_y]) \n\t"
4753
4754 "daddiu %[dst_y], %[dst_y], 8 \n\t"
4755 "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
4756 "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
4757 "daddiu %[width], %[width], -8 \n\t"
4758 "bgtz %[width], 1b \n\t"
4759 "nop \n\t"
4760 : [tr] "=&f"(tr), [tb] "=&f"(tb)
4761 : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
4762 [dst_y] "r"(dst_y), [width] "r"(width)
4763 : "memory");
4764 }
4765
SobelXYRow_MMI(const uint8_t * src_sobelx,const uint8_t * src_sobely,uint8_t * dst_argb,int width)4766 void SobelXYRow_MMI(const uint8_t* src_sobelx,
4767 const uint8_t* src_sobely,
4768 uint8_t* dst_argb,
4769 int width) {
4770 uint64_t temp[3];
4771 uint64_t result = 0;
4772 uint64_t gb = 0;
4773 uint64_t cr = 0;
4774 uint64_t c1 = 0xffffffffffffffff;
4775 __asm__ volatile(
4776 "1: \n\t"
4777 "gsldlc1 %[tr], 0x07(%[src_sobelx]) \n\t" // r=src_sobelx[i]
4778 "gsldrc1 %[tr], 0x00(%[src_sobelx]) \n\t"
4779 "gsldlc1 %[tb], 0x07(%[src_sobely]) \n\t" // b=src_sobely[i]
4780 "gsldrc1 %[tb], 0x00(%[src_sobely]) \n\t"
4781 "paddusb %[tg] , %[tr], %[tb] \n\t" // g
4782
4783 // g3 b3 g2 b2 g1 b1 g0 b0
4784 "punpcklbh %[gb], %[tb], %[tg] \n\t"
4785 // c3 r3 r2 r2 c1 r1 c0 r0
4786 "punpcklbh %[cr], %[tr], %[c1] \n\t"
4787 // c1 r1 g1 b1 c0 r0 g0 b0
4788 "punpcklhw %[result], %[gb], %[cr] \n\t"
4789 "gssdrc1 %[result], 0x00(%[dst_argb]) \n\t"
4790 "gssdlc1 %[result], 0x07(%[dst_argb]) \n\t"
4791 // c3 r3 g3 b3 c2 r2 g2 b2
4792 "punpckhhw %[result], %[gb], %[cr] \n\t"
4793 "gssdrc1 %[result], 0x08(%[dst_argb]) \n\t"
4794 "gssdlc1 %[result], 0x0f(%[dst_argb]) \n\t"
4795
4796 // g7 b7 g6 b6 g5 b5 g4 b4
4797 "punpckhbh %[gb], %[tb], %[tg] \n\t"
4798 // c7 r7 c6 r6 c5 r5 c4 r4
4799 "punpckhbh %[cr], %[tr], %[c1] \n\t"
4800 // c5 r5 g5 b5 c4 r4 g4 b4
4801 "punpcklhw %[result], %[gb], %[cr] \n\t"
4802 "gssdrc1 %[result], 0x10(%[dst_argb]) \n\t"
4803 "gssdlc1 %[result], 0x17(%[dst_argb]) \n\t"
4804 // c7 r7 g7 b7 c6 r6 g6 b6
4805 "punpckhhw %[result], %[gb], %[cr] \n\t"
4806 "gssdrc1 %[result], 0x18(%[dst_argb]) \n\t"
4807 "gssdlc1 %[result], 0x1f(%[dst_argb]) \n\t"
4808
4809 "daddiu %[dst_argb], %[dst_argb], 32 \n\t"
4810 "daddiu %[src_sobelx], %[src_sobelx], 8 \n\t"
4811 "daddiu %[src_sobely], %[src_sobely], 8 \n\t"
4812 "daddiu %[width], %[width], -8 \n\t"
4813 "bgtz %[width], 1b \n\t"
4814 "nop \n\t"
4815 : [tr] "=&f"(temp[0]), [tb] "=&f"(temp[1]), [tg] "=&f"(temp[2]),
4816 [gb] "=&f"(gb), [cr] "=&f"(cr), [result] "=&f"(result)
4817 : [src_sobelx] "r"(src_sobelx), [src_sobely] "r"(src_sobely),
4818 [dst_argb] "r"(dst_argb), [width] "r"(width), [c1] "f"(c1)
4819 : "memory");
4820 }
4821
J400ToARGBRow_MMI(const uint8_t * src_y,uint8_t * dst_argb,int width)4822 void J400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* dst_argb, int width) {
4823 // Copy a Y to RGB.
4824 uint64_t src, dest;
4825 const uint64_t mask0 = 0x00ffffff00ffffffULL;
4826 const uint64_t mask1 = ~mask0;
4827
4828 __asm__ volatile(
4829 "1: \n\t"
4830 "gslwlc1 %[src], 0x03(%[src_ptr]) \n\t"
4831 "gslwrc1 %[src], 0x00(%[src_ptr]) \n\t"
4832 "punpcklbh %[src], %[src], %[src] \n\t"
4833 "punpcklhw %[dest], %[src], %[src] \n\t"
4834 "and %[dest], %[dest], %[mask0] \n\t"
4835 "or %[dest], %[dest], %[mask1] \n\t"
4836 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4837 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4838
4839 "punpckhhw %[dest], %[src], %[src] \n\t"
4840 "and %[dest], %[dest], %[mask0] \n\t"
4841 "or %[dest], %[dest], %[mask1] \n\t"
4842 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
4843 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
4844
4845 "daddiu %[src_ptr], %[src_ptr], 0x04 \n\t"
4846 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
4847 "daddi %[width], %[width], -0x04 \n\t"
4848 "bnez %[width], 1b \n\t"
4849 : [src] "=&f"(src), [dest] "=&f"(dest)
4850 : [src_ptr] "r"(src_y), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
4851 [mask1] "f"(mask1), [width] "r"(width)
4852 : "memory");
4853 }
4854
4855 // TODO - respect YuvConstants
I400ToARGBRow_MMI(const uint8_t * src_y,uint8_t * rgb_buf,const struct YuvConstants *,int width)4856 void I400ToARGBRow_MMI(const uint8_t* src_y, uint8_t* rgb_buf,
4857 const struct YuvConstants*, int width) {
4858 uint64_t src, src_lo, src_hi, dest, dest_lo, dest_hi;
4859 const uint64_t mask0 = 0x0;
4860 const uint64_t mask1 = 0x55;
4861 const uint64_t mask2 = 0xAA;
4862 const uint64_t mask3 = 0xFF;
4863 const uint64_t mask4 = 0x4A354A354A354A35ULL;
4864 const uint64_t mask5 = 0x0488048804880488ULL;
4865 const uint64_t shift0 = 0x08;
4866 const uint64_t shift1 = 0x06;
4867
4868 __asm__ volatile(
4869 "1: \n\t"
4870 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
4871 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
4872 "punpcklbh %[src_lo], %[src], %[mask0] \n\t"
4873 "punpckhbh %[src_hi], %[src], %[mask0] \n\t"
4874
4875 "pshufh %[src], %[src_lo], %[mask0] \n\t"
4876 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4877 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4878 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4879 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4880 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4881 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4882 "pshufh %[src], %[src_lo], %[mask1] \n\t"
4883 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4884 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4885 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4886 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4887 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4888 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4889 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4890 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4891 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4892
4893 "pshufh %[src], %[src_lo], %[mask2] \n\t"
4894 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4895 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4896 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4897 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4898 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4899 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4900 "pshufh %[src], %[src_lo], %[mask3] \n\t"
4901 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4902 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4903 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4904 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4905 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4906 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4907 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4908 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
4909 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
4910
4911 "pshufh %[src], %[src_hi], %[mask0] \n\t"
4912 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4913 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4914 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4915 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4916 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4917 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4918 "pshufh %[src], %[src_hi], %[mask1] \n\t"
4919 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4920 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4921 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4922 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4923 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4924 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4925 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4926 "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
4927 "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
4928
4929 "pshufh %[src], %[src_hi], %[mask2] \n\t"
4930 "psllh %[dest_lo], %[src], %[shift0] \n\t"
4931 "paddush %[dest_lo], %[dest_lo], %[src] \n\t"
4932 "pmulhuh %[dest_lo], %[dest_lo], %[mask4] \n\t"
4933 "psubh %[dest_lo], %[dest_lo], %[mask5] \n\t"
4934 "psrah %[dest_lo], %[dest_lo], %[shift1] \n\t"
4935 "pinsrh_3 %[dest_lo], %[dest_lo], %[mask3] \n\t"
4936 "pshufh %[src], %[src_hi], %[mask3] \n\t"
4937 "psllh %[dest_hi], %[src], %[shift0] \n\t"
4938 "paddush %[dest_hi], %[dest_hi], %[src] \n\t"
4939 "pmulhuh %[dest_hi], %[dest_hi], %[mask4] \n\t"
4940 "psubh %[dest_hi], %[dest_hi], %[mask5] \n\t"
4941 "psrah %[dest_hi], %[dest_hi], %[shift1] \n\t"
4942 "pinsrh_3 %[dest_hi], %[dest_hi], %[mask3] \n\t"
4943 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
4944 "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
4945 "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
4946
4947 "daddi %[src_ptr], %[src_ptr], 0x08 \n\t"
4948 "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
4949 "daddi %[width], %[width], -0x08 \n\t"
4950 "bnez %[width], 1b \n\t"
4951 : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
4952 [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
4953 [dest_lo] "=&f"(dest_lo)
4954 : [src_ptr] "r"(src_y), [dst_ptr] "r"(rgb_buf), [mask0] "f"(mask0),
4955 [mask1] "f"(mask1), [mask2] "f"(mask2), [mask3] "f"(mask3),
4956 [mask4] "f"(mask4), [mask5] "f"(mask5), [shift0] "f"(shift0),
4957 [shift1] "f"(shift1), [width] "r"(width)
4958 : "memory");
4959 }
4960
MirrorRow_MMI(const uint8_t * src,uint8_t * dst,int width)4961 void MirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
4962 uint64_t source, src0, src1, dest;
4963 const uint64_t mask0 = 0x0;
4964 const uint64_t mask1 = 0x1b;
4965
4966 src += width - 1;
4967 __asm__ volatile(
4968 "1: \n\t"
4969 "gsldlc1 %[source], 0(%[src_ptr]) \n\t"
4970 "gsldrc1 %[source], -7(%[src_ptr]) \n\t"
4971 "punpcklbh %[src0], %[source], %[mask0] \n\t"
4972 "pshufh %[src0], %[src0], %[mask1] \n\t"
4973 "punpckhbh %[src1], %[source], %[mask0] \n\t"
4974 "pshufh %[src1], %[src1], %[mask1] \n\t"
4975 "packushb %[dest], %[src1], %[src0] \n\t"
4976
4977 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
4978 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
4979
4980 "daddi %[src_ptr], %[src_ptr], -0x08 \n\t"
4981 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
4982 "daddi %[width], %[width], -0x08 \n\t"
4983 "bnez %[width], 1b \n\t"
4984 : [source] "=&f"(source), [dest] "=&f"(dest), [src0] "=&f"(src0),
4985 [src1] "=&f"(src1)
4986 : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
4987 [mask1] "f"(mask1), [width] "r"(width)
4988 : "memory");
4989 }
4990
MirrorSplitUVRow_MMI(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)4991 void MirrorSplitUVRow_MMI(const uint8_t* src_uv,
4992 uint8_t* dst_u,
4993 uint8_t* dst_v,
4994 int width) {
4995 uint64_t src0, src1, dest0, dest1;
4996 const uint64_t mask0 = 0x00ff00ff00ff00ffULL;
4997 const uint64_t mask1 = 0x1b;
4998 const uint64_t shift = 0x08;
4999
5000 src_uv += (width - 1) << 1;
5001
5002 __asm__ volatile(
5003 "1: \n\t"
5004 "gsldlc1 %[src0], 1(%[src_ptr]) \n\t"
5005 "gsldrc1 %[src0], -6(%[src_ptr]) \n\t"
5006 "gsldlc1 %[src1], -7(%[src_ptr]) \n\t"
5007 "gsldrc1 %[src1], -14(%[src_ptr]) \n\t"
5008
5009 "and %[dest0], %[src0], %[mask0] \n\t"
5010 "pshufh %[dest0], %[dest0], %[mask1] \n\t"
5011 "and %[dest1], %[src1], %[mask0] \n\t"
5012 "pshufh %[dest1], %[dest1], %[mask1] \n\t"
5013 "packushb %[dest0], %[dest0], %[dest1] \n\t"
5014 "gssdlc1 %[dest0], 0x07(%[dstu_ptr]) \n\t"
5015 "gssdrc1 %[dest0], 0x00(%[dstu_ptr]) \n\t"
5016
5017 "psrlh %[dest0], %[src0], %[shift] \n\t"
5018 "pshufh %[dest0], %[dest0], %[mask1] \n\t"
5019 "psrlh %[dest1], %[src1], %[shift] \n\t"
5020 "pshufh %[dest1], %[dest1], %[mask1] \n\t"
5021 "packushb %[dest0], %[dest0], %[dest1] \n\t"
5022 "gssdlc1 %[dest0], 0x07(%[dstv_ptr]) \n\t"
5023 "gssdrc1 %[dest0], 0x00(%[dstv_ptr]) \n\t"
5024
5025 "daddi %[src_ptr], %[src_ptr], -0x10 \n\t"
5026 "daddiu %[dstu_ptr], %[dstu_ptr], 0x08 \n\t"
5027 "daddiu %[dstv_ptr], %[dstv_ptr], 0x08 \n\t"
5028 "daddi %[width], %[width], -0x08 \n\t"
5029 "bnez %[width], 1b \n\t"
5030 : [dest0] "=&f"(dest0), [dest1] "=&f"(dest1), [src0] "=&f"(src0),
5031 [src1] "=&f"(src1)
5032 : [src_ptr] "r"(src_uv), [dstu_ptr] "r"(dst_u), [dstv_ptr] "r"(dst_v),
5033 [width] "r"(width), [mask0] "f"(mask0), [mask1] "f"(mask1),
5034 [shift] "f"(shift)
5035 : "memory");
5036 }
5037
ARGBMirrorRow_MMI(const uint8_t * src,uint8_t * dst,int width)5038 void ARGBMirrorRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
5039 src += (width - 1) * 4;
5040 uint64_t temp = 0x0;
5041 uint64_t shuff = 0x4e; // 01 00 11 10
5042 __asm__ volatile(
5043 "1: \n\t"
5044 "gsldlc1 %[temp], 3(%[src]) \n\t"
5045 "gsldrc1 %[temp], -4(%[src]) \n\t"
5046 "pshufh %[temp], %[temp], %[shuff] \n\t"
5047 "gssdrc1 %[temp], 0x0(%[dst]) \n\t"
5048 "gssdlc1 %[temp], 0x7(%[dst]) \n\t"
5049
5050 "daddiu %[src], %[src], -0x08 \n\t"
5051 "daddiu %[dst], %[dst], 0x08 \n\t"
5052 "daddiu %[width], %[width], -0x02 \n\t"
5053 "bnez %[width], 1b \n\t"
5054 : [temp] "=&f"(temp)
5055 : [src] "r"(src), [dst] "r"(dst), [width] "r"(width), [shuff] "f"(shuff)
5056 : "memory");
5057 }
5058
SplitUVRow_MMI(const uint8_t * src_uv,uint8_t * dst_u,uint8_t * dst_v,int width)5059 void SplitUVRow_MMI(const uint8_t* src_uv,
5060 uint8_t* dst_u,
5061 uint8_t* dst_v,
5062 int width) {
5063 uint64_t c0 = 0x00ff00ff00ff00ff;
5064 uint64_t temp[4];
5065 uint64_t shift = 0x08;
5066 __asm__ volatile(
5067 "1: \n\t"
5068 "gsldrc1 %[t0], 0x00(%[src_uv]) \n\t"
5069 "gsldlc1 %[t0], 0x07(%[src_uv]) \n\t"
5070 "gsldrc1 %[t1], 0x08(%[src_uv]) \n\t"
5071 "gsldlc1 %[t1], 0x0f(%[src_uv]) \n\t"
5072
5073 "and %[t2], %[t0], %[c0] \n\t"
5074 "and %[t3], %[t1], %[c0] \n\t"
5075 "packushb %[t2], %[t2], %[t3] \n\t"
5076 "gssdrc1 %[t2], 0x0(%[dst_u]) \n\t"
5077 "gssdlc1 %[t2], 0x7(%[dst_u]) \n\t"
5078
5079 "psrlh %[t2], %[t0], %[shift] \n\t"
5080 "psrlh %[t3], %[t1], %[shift] \n\t"
5081 "packushb %[t2], %[t2], %[t3] \n\t"
5082 "gssdrc1 %[t2], 0x0(%[dst_v]) \n\t"
5083 "gssdlc1 %[t2], 0x7(%[dst_v]) \n\t"
5084
5085 "daddiu %[src_uv], %[src_uv], 16 \n\t"
5086 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5087 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5088 "daddiu %[width], %[width], -8 \n\t"
5089 "bgtz %[width], 1b \n\t"
5090 "nop \n\t"
5091 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
5092 [t3] "=&f"(temp[3])
5093 : [src_uv] "r"(src_uv), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
5094 [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
5095 : "memory");
5096 }
5097
MergeUVRow_MMI(const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_uv,int width)5098 void MergeUVRow_MMI(const uint8_t* src_u,
5099 const uint8_t* src_v,
5100 uint8_t* dst_uv,
5101 int width) {
5102 uint64_t temp[3];
5103 __asm__ volatile(
5104 "1: \n\t"
5105 "gsldrc1 %[t0], 0x0(%[src_u]) \n\t"
5106 "gsldlc1 %[t0], 0x7(%[src_u]) \n\t"
5107 "gsldrc1 %[t1], 0x0(%[src_v]) \n\t"
5108 "gsldlc1 %[t1], 0x7(%[src_v]) \n\t"
5109 "punpcklbh %[t2], %[t0], %[t1] \n\t"
5110 "gssdrc1 %[t2], 0x0(%[dst_uv]) \n\t"
5111 "gssdlc1 %[t2], 0x7(%[dst_uv]) \n\t"
5112 "punpckhbh %[t2], %[t0], %[t1] \n\t"
5113 "gssdrc1 %[t2], 0x8(%[dst_uv]) \n\t"
5114 "gssdlc1 %[t2], 0xf(%[dst_uv]) \n\t"
5115
5116 "daddiu %[src_u], %[src_u], 8 \n\t"
5117 "daddiu %[src_v], %[src_v], 8 \n\t"
5118 "daddiu %[dst_uv], %[dst_uv], 16 \n\t"
5119 "daddiu %[width], %[width], -8 \n\t"
5120 "bgtz %[width], 1b \n\t"
5121 "nop \n\t"
5122 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2])
5123 : [dst_uv] "r"(dst_uv), [src_u] "r"(src_u), [src_v] "r"(src_v),
5124 [width] "r"(width)
5125 : "memory");
5126 }
5127
SplitRGBRow_MMI(const uint8_t * src_rgb,uint8_t * dst_r,uint8_t * dst_g,uint8_t * dst_b,int width)5128 void SplitRGBRow_MMI(const uint8_t* src_rgb,
5129 uint8_t* dst_r,
5130 uint8_t* dst_g,
5131 uint8_t* dst_b,
5132 int width) {
5133 uint64_t src[4];
5134 uint64_t dest_hi, dest_lo, dest;
5135
5136 __asm__ volatile(
5137 "1: \n\t"
5138 "gslwlc1 %[src0], 0x03(%[src_ptr]) \n\t"
5139 "gslwrc1 %[src0], 0x00(%[src_ptr]) \n\t"
5140 "gslwlc1 %[src1], 0x06(%[src_ptr]) \n\t"
5141 "gslwrc1 %[src1], 0x03(%[src_ptr]) \n\t"
5142 "punpcklbh %[dest_lo], %[src0], %[src1] \n\t"
5143 "gslwlc1 %[src2], 0x09(%[src_ptr]) \n\t"
5144 "gslwrc1 %[src2], 0x06(%[src_ptr]) \n\t"
5145 "gslwlc1 %[src3], 0x0c(%[src_ptr]) \n\t"
5146 "gslwrc1 %[src3], 0x09(%[src_ptr]) \n\t"
5147 "punpcklbh %[dest_hi], %[src2], %[src3] \n\t"
5148
5149 "punpcklhw %[dest], %[dest_lo], %[dest_hi] \n\t"
5150 "gsswlc1 %[dest], 0x03(%[dstr_ptr]) \n\t"
5151 "gsswrc1 %[dest], 0x00(%[dstr_ptr]) \n\t"
5152 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5153 "gsswlc1 %[dest], 0x03(%[dstg_ptr]) \n\t"
5154 "gsswrc1 %[dest], 0x00(%[dstg_ptr]) \n\t"
5155 "punpckhhw %[dest], %[dest_lo], %[dest_hi] \n\t"
5156 "gsswlc1 %[dest], 0x03(%[dstb_ptr]) \n\t"
5157 "gsswrc1 %[dest], 0x00(%[dstb_ptr]) \n\t"
5158
5159 "daddiu %[src_ptr], %[src_ptr], 0x0c \n\t"
5160 "daddiu %[dstr_ptr], %[dstr_ptr], 0x04 \n\t"
5161 "daddiu %[dstg_ptr], %[dstg_ptr], 0x04 \n\t"
5162 "daddiu %[dstb_ptr], %[dstb_ptr], 0x04 \n\t"
5163 "daddi %[width], %[width], -0x04 \n\t"
5164 "bnez %[width], 1b \n\t"
5165 : [src0] "=&f"(src[0]), [src1] "=&f"(src[1]), [src2] "=&f"(src[2]),
5166 [src3] "=&f"(src[3]), [dest_hi] "=&f"(dest_hi),
5167 [dest_lo] "=&f"(dest_lo), [dest] "=&f"(dest)
5168 : [src_ptr] "r"(src_rgb), [dstr_ptr] "r"(dst_r), [dstg_ptr] "r"(dst_g),
5169 [dstb_ptr] "r"(dst_b), [width] "r"(width)
5170 : "memory");
5171 }
5172
MergeRGBRow_MMI(const uint8_t * src_r,const uint8_t * src_g,const uint8_t * src_b,uint8_t * dst_rgb,int width)5173 void MergeRGBRow_MMI(const uint8_t* src_r,
5174 const uint8_t* src_g,
5175 const uint8_t* src_b,
5176 uint8_t* dst_rgb,
5177 int width) {
5178 uint64_t srcr, srcg, srcb, dest;
5179 uint64_t srcrg_hi, srcrg_lo, srcbz_hi, srcbz_lo;
5180 const uint64_t temp = 0x0;
5181
5182 __asm__ volatile(
5183 "1: \n\t"
5184 "gsldlc1 %[srcr], 0x07(%[srcr_ptr]) \n\t"
5185 "gsldrc1 %[srcr], 0x00(%[srcr_ptr]) \n\t"
5186 "gsldlc1 %[srcg], 0x07(%[srcg_ptr]) \n\t"
5187 "gsldrc1 %[srcg], 0x00(%[srcg_ptr]) \n\t"
5188 "punpcklbh %[srcrg_lo], %[srcr], %[srcg] \n\t"
5189 "punpckhbh %[srcrg_hi], %[srcr], %[srcg] \n\t"
5190
5191 "gsldlc1 %[srcb], 0x07(%[srcb_ptr]) \n\t"
5192 "gsldrc1 %[srcb], 0x00(%[srcb_ptr]) \n\t"
5193 "punpcklbh %[srcbz_lo], %[srcb], %[temp] \n\t"
5194 "punpckhbh %[srcbz_hi], %[srcb], %[temp] \n\t"
5195
5196 "punpcklhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
5197 "gsswlc1 %[dest], 0x03(%[dst_ptr]) \n\t"
5198 "gsswrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5199 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5200 "gsswlc1 %[dest], 0x06(%[dst_ptr]) \n\t"
5201 "gsswrc1 %[dest], 0x03(%[dst_ptr]) \n\t"
5202 "punpckhhw %[dest], %[srcrg_lo], %[srcbz_lo] \n\t"
5203 "gsswlc1 %[dest], 0x09(%[dst_ptr]) \n\t"
5204 "gsswrc1 %[dest], 0x06(%[dst_ptr]) \n\t"
5205 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5206 "gsswlc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
5207 "gsswrc1 %[dest], 0x09(%[dst_ptr]) \n\t"
5208 "punpcklhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
5209 "gsswlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
5210 "gsswrc1 %[dest], 0x0c(%[dst_ptr]) \n\t"
5211 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5212 "gsswlc1 %[dest], 0x12(%[dst_ptr]) \n\t"
5213 "gsswrc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
5214 "punpckhhw %[dest], %[srcrg_hi], %[srcbz_hi] \n\t"
5215 "gsswlc1 %[dest], 0x15(%[dst_ptr]) \n\t"
5216 "gsswrc1 %[dest], 0x12(%[dst_ptr]) \n\t"
5217 "punpckhwd %[dest], %[dest], %[dest] \n\t"
5218 "gsswlc1 %[dest], 0x18(%[dst_ptr]) \n\t"
5219 "gsswrc1 %[dest], 0x15(%[dst_ptr]) \n\t"
5220
5221 "daddiu %[srcr_ptr], %[srcr_ptr], 0x08 \n\t"
5222 "daddiu %[srcg_ptr], %[srcg_ptr], 0x08 \n\t"
5223 "daddiu %[srcb_ptr], %[srcb_ptr], 0x08 \n\t"
5224 "daddiu %[dst_ptr], %[dst_ptr], 0x18 \n\t"
5225 "daddi %[width], %[width], -0x08 \n\t"
5226 "bnez %[width], 1b \n\t"
5227 : [srcr] "=&f"(srcr), [srcg] "=&f"(srcg), [srcb] "=&f"(srcb),
5228 [dest] "=&f"(dest), [srcrg_hi] "=&f"(srcrg_hi),
5229 [srcrg_lo] "=&f"(srcrg_lo), [srcbz_hi] "=&f"(srcbz_hi),
5230 [srcbz_lo] "=&f"(srcbz_lo)
5231 : [srcr_ptr] "r"(src_r), [srcg_ptr] "r"(src_g), [srcb_ptr] "r"(src_b),
5232 [dst_ptr] "r"(dst_rgb), [width] "r"(width), [temp] "f"(temp)
5233 : "memory");
5234 }
5235
5236 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
YUY2ToUVRow_MMI(const uint8_t * src_yuy2,int src_stride_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)5237 void YUY2ToUVRow_MMI(const uint8_t* src_yuy2,
5238 int src_stride_yuy2,
5239 uint8_t* dst_u,
5240 uint8_t* dst_v,
5241 int width) {
5242 uint64_t c0 = 0xff00ff00ff00ff00;
5243 uint64_t c1 = 0x00ff00ff00ff00ff;
5244 uint64_t temp[3];
5245 uint64_t data[4];
5246 uint64_t shift = 0x08;
5247 uint64_t src_stride = 0x0;
5248 __asm__ volatile(
5249 "1: \n\t"
5250 "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
5251 "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
5252 "daddu %[src_stride], %[src_yuy2], %[src_stride_yuy2] \n\t"
5253 "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
5254 "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
5255 "pavgb %[t0], %[t0], %[t1] \n\t"
5256
5257 "gsldrc1 %[t2], 0x08(%[src_yuy2]) \n\t"
5258 "gsldlc1 %[t2], 0x0f(%[src_yuy2]) \n\t"
5259 "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
5260 "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
5261 "pavgb %[t1], %[t2], %[t1] \n\t"
5262
5263 "and %[t0], %[t0], %[c0] \n\t"
5264 "and %[t1], %[t1], %[c0] \n\t"
5265 "psrlh %[t0], %[t0], %[shift] \n\t"
5266 "psrlh %[t1], %[t1], %[shift] \n\t"
5267 "packushb %[t0], %[t0], %[t1] \n\t"
5268 "mov.s %[t1], %[t0] \n\t"
5269 "and %[d0], %[t0], %[c1] \n\t"
5270 "psrlh %[d1], %[t1], %[shift] \n\t"
5271
5272 "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
5273 "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
5274 "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
5275 "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
5276 "pavgb %[t0], %[t0], %[t1] \n\t"
5277
5278 "gsldrc1 %[t2], 0x18(%[src_yuy2]) \n\t"
5279 "gsldlc1 %[t2], 0x1f(%[src_yuy2]) \n\t"
5280 "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
5281 "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
5282 "pavgb %[t1], %[t2], %[t1] \n\t"
5283
5284 "and %[t0], %[t0], %[c0] \n\t"
5285 "and %[t1], %[t1], %[c0] \n\t"
5286 "psrlh %[t0], %[t0], %[shift] \n\t"
5287 "psrlh %[t1], %[t1], %[shift] \n\t"
5288 "packushb %[t0], %[t0], %[t1] \n\t"
5289 "mov.s %[t1], %[t0] \n\t"
5290 "and %[d2], %[t0], %[c1] \n\t"
5291 "psrlh %[d3], %[t1], %[shift] \n\t"
5292
5293 "packushb %[d0], %[d0], %[d2] \n\t"
5294 "packushb %[d1], %[d1], %[d3] \n\t"
5295 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5296 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5297 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5298 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5299 "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
5300 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5301 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5302 "daddiu %[width], %[width], -16 \n\t"
5303 "bgtz %[width], 1b \n\t"
5304 "nop \n\t"
5305 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
5306 [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
5307 [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
5308 : [src_yuy2] "r"(src_yuy2), [src_stride_yuy2] "r"(src_stride_yuy2),
5309 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
5310 [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
5311 : "memory");
5312 }
5313
5314 // Copy row of YUY2 UV's (422) into U and V (422).
YUY2ToUV422Row_MMI(const uint8_t * src_yuy2,uint8_t * dst_u,uint8_t * dst_v,int width)5315 void YUY2ToUV422Row_MMI(const uint8_t* src_yuy2,
5316 uint8_t* dst_u,
5317 uint8_t* dst_v,
5318 int width) {
5319 uint64_t c0 = 0xff00ff00ff00ff00;
5320 uint64_t c1 = 0x00ff00ff00ff00ff;
5321 uint64_t temp[2];
5322 uint64_t data[4];
5323 uint64_t shift = 0x08;
5324 __asm__ volatile(
5325 "1: \n\t"
5326 "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
5327 "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
5328 "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
5329 "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
5330 "and %[t0], %[t0], %[c0] \n\t"
5331 "and %[t1], %[t1], %[c0] \n\t"
5332 "psrlh %[t0], %[t0], %[shift] \n\t"
5333 "psrlh %[t1], %[t1], %[shift] \n\t"
5334 "packushb %[t0], %[t0], %[t1] \n\t"
5335 "mov.s %[t1], %[t0] \n\t"
5336 "and %[d0], %[t0], %[c1] \n\t"
5337 "psrlh %[d1], %[t1], %[shift] \n\t"
5338
5339 "gsldrc1 %[t0], 0x10(%[src_yuy2]) \n\t"
5340 "gsldlc1 %[t0], 0x17(%[src_yuy2]) \n\t"
5341 "gsldrc1 %[t1], 0x18(%[src_yuy2]) \n\t"
5342 "gsldlc1 %[t1], 0x1f(%[src_yuy2]) \n\t"
5343 "and %[t0], %[t0], %[c0] \n\t"
5344 "and %[t1], %[t1], %[c0] \n\t"
5345 "psrlh %[t0], %[t0], %[shift] \n\t"
5346 "psrlh %[t1], %[t1], %[shift] \n\t"
5347 "packushb %[t0], %[t0], %[t1] \n\t"
5348 "mov.s %[t1], %[t0] \n\t"
5349 "and %[d2], %[t0], %[c1] \n\t"
5350 "psrlh %[d3], %[t1], %[shift] \n\t"
5351
5352 "packushb %[d0], %[d0], %[d2] \n\t"
5353 "packushb %[d1], %[d1], %[d3] \n\t"
5354 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5355 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5356 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5357 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5358 "daddiu %[src_yuy2], %[src_yuy2], 32 \n\t"
5359 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5360 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5361 "daddiu %[width], %[width], -16 \n\t"
5362 "bgtz %[width], 1b \n\t"
5363 "nop \n\t"
5364 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
5365 [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
5366 : [src_yuy2] "r"(src_yuy2), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
5367 [width] "r"(width), [c0] "f"(c0), [c1] "f"(c1), [shift] "f"(shift)
5368 : "memory");
5369 }
5370
5371 // Copy row of YUY2 Y's (422) into Y (420/422).
YUY2ToYRow_MMI(const uint8_t * src_yuy2,uint8_t * dst_y,int width)5372 void YUY2ToYRow_MMI(const uint8_t* src_yuy2, uint8_t* dst_y, int width) {
5373 uint64_t c0 = 0x00ff00ff00ff00ff;
5374 uint64_t temp[2];
5375 __asm__ volatile(
5376 "1: \n\t"
5377 "gsldrc1 %[t0], 0x00(%[src_yuy2]) \n\t"
5378 "gsldlc1 %[t0], 0x07(%[src_yuy2]) \n\t"
5379 "gsldrc1 %[t1], 0x08(%[src_yuy2]) \n\t"
5380 "gsldlc1 %[t1], 0x0f(%[src_yuy2]) \n\t"
5381 "and %[t0], %[t0], %[c0] \n\t"
5382 "and %[t1], %[t1], %[c0] \n\t"
5383 "packushb %[t0], %[t0], %[t1] \n\t"
5384 "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
5385 "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
5386 "daddiu %[src_yuy2], %[src_yuy2], 16 \n\t"
5387 "daddiu %[dst_y], %[dst_y], 8 \n\t"
5388 "daddiu %[width], %[width], -8 \n\t"
5389 "bgtz %[width], 1b \n\t"
5390 "nop \n\t"
5391 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
5392 : [src_yuy2] "r"(src_yuy2), [dst_y] "r"(dst_y), [width] "r"(width),
5393 [c0] "f"(c0)
5394 : "memory");
5395 }
5396
5397 // Filter 2 rows of UYVY UV's (422) into U and V (420).
UYVYToUVRow_MMI(const uint8_t * src_uyvy,int src_stride_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)5398 void UYVYToUVRow_MMI(const uint8_t* src_uyvy,
5399 int src_stride_uyvy,
5400 uint8_t* dst_u,
5401 uint8_t* dst_v,
5402 int width) {
5403 // Output a row of UV values.
5404 uint64_t c0 = 0x00ff00ff00ff00ff;
5405 uint64_t temp[3];
5406 uint64_t data[4];
5407 uint64_t shift = 0x08;
5408 uint64_t src_stride = 0x0;
5409 __asm__ volatile(
5410 "1: \n\t"
5411 "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
5412 "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
5413 "daddu %[src_stride], %[src_uyvy], %[src_stride_uyvy] \n\t"
5414 "gsldrc1 %[t1], 0x00(%[src_stride]) \n\t"
5415 "gsldlc1 %[t1], 0x07(%[src_stride]) \n\t"
5416 "pavgb %[t0], %[t0], %[t1] \n\t"
5417
5418 "gsldrc1 %[t2], 0x08(%[src_uyvy]) \n\t"
5419 "gsldlc1 %[t2], 0x0f(%[src_uyvy]) \n\t"
5420 "gsldrc1 %[t1], 0x08(%[src_stride]) \n\t"
5421 "gsldlc1 %[t1], 0x0f(%[src_stride]) \n\t"
5422 "pavgb %[t1], %[t2], %[t1] \n\t"
5423
5424 "and %[t0], %[t0], %[c0] \n\t"
5425 "and %[t1], %[t1], %[c0] \n\t"
5426 "packushb %[t0], %[t0], %[t1] \n\t"
5427 "mov.s %[t1], %[t0] \n\t"
5428 "and %[d0], %[t0], %[c0] \n\t"
5429 "psrlh %[d1], %[t1], %[shift] \n\t"
5430
5431 "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
5432 "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
5433 "gsldrc1 %[t1], 0x10(%[src_stride]) \n\t"
5434 "gsldlc1 %[t1], 0x17(%[src_stride]) \n\t"
5435 "pavgb %[t0], %[t0], %[t1] \n\t"
5436
5437 "gsldrc1 %[t2], 0x18(%[src_uyvy]) \n\t"
5438 "gsldlc1 %[t2], 0x1f(%[src_uyvy]) \n\t"
5439 "gsldrc1 %[t1], 0x18(%[src_stride]) \n\t"
5440 "gsldlc1 %[t1], 0x1f(%[src_stride]) \n\t"
5441 "pavgb %[t1], %[t2], %[t1] \n\t"
5442
5443 "and %[t0], %[t0], %[c0] \n\t"
5444 "and %[t1], %[t1], %[c0] \n\t"
5445 "packushb %[t0], %[t0], %[t1] \n\t"
5446 "mov.s %[t1], %[t0] \n\t"
5447 "and %[d2], %[t0], %[c0] \n\t"
5448 "psrlh %[d3], %[t1], %[shift] \n\t"
5449
5450 "packushb %[d0], %[d0], %[d2] \n\t"
5451 "packushb %[d1], %[d1], %[d3] \n\t"
5452 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5453 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5454 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5455 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5456 "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
5457 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5458 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5459 "daddiu %[width], %[width], -16 \n\t"
5460 "bgtz %[width], 1b \n\t"
5461 "nop \n\t"
5462 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [t2] "=&f"(temp[2]),
5463 [d0] "=&f"(data[0]), [d1] "=&f"(data[1]), [d2] "=&f"(data[2]),
5464 [d3] "=&f"(data[3]), [src_stride] "=&r"(src_stride)
5465 : [src_uyvy] "r"(src_uyvy), [src_stride_uyvy] "r"(src_stride_uyvy),
5466 [dst_u] "r"(dst_u), [dst_v] "r"(dst_v), [width] "r"(width),
5467 [c0] "f"(c0), [shift] "f"(shift)
5468 : "memory");
5469 }
5470
5471 // Copy row of UYVY UV's (422) into U and V (422).
UYVYToUV422Row_MMI(const uint8_t * src_uyvy,uint8_t * dst_u,uint8_t * dst_v,int width)5472 void UYVYToUV422Row_MMI(const uint8_t* src_uyvy,
5473 uint8_t* dst_u,
5474 uint8_t* dst_v,
5475 int width) {
5476 // Output a row of UV values.
5477 uint64_t c0 = 0x00ff00ff00ff00ff;
5478 uint64_t temp[2];
5479 uint64_t data[4];
5480 uint64_t shift = 0x08;
5481 __asm__ volatile(
5482 "1: \n\t"
5483 "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
5484 "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
5485 "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
5486 "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
5487 "and %[t0], %[t0], %[c0] \n\t"
5488 "and %[t1], %[t1], %[c0] \n\t"
5489 "packushb %[t0], %[t0], %[t1] \n\t"
5490 "mov.s %[t1], %[t0] \n\t"
5491 "and %[d0], %[t0], %[c0] \n\t"
5492 "psrlh %[d1], %[t1], %[shift] \n\t"
5493
5494 "gsldrc1 %[t0], 0x10(%[src_uyvy]) \n\t"
5495 "gsldlc1 %[t0], 0x17(%[src_uyvy]) \n\t"
5496 "gsldrc1 %[t1], 0x18(%[src_uyvy]) \n\t"
5497 "gsldlc1 %[t1], 0x1f(%[src_uyvy]) \n\t"
5498 "and %[t0], %[t0], %[c0] \n\t"
5499 "and %[t1], %[t1], %[c0] \n\t"
5500 "packushb %[t0], %[t0], %[t1] \n\t"
5501 "mov.s %[t1], %[t0] \n\t"
5502 "and %[d2], %[t0], %[c0] \n\t"
5503 "psrlh %[d3], %[t1], %[shift] \n\t"
5504
5505 "packushb %[d0], %[d0], %[d2] \n\t"
5506 "packushb %[d1], %[d1], %[d3] \n\t"
5507 "gssdrc1 %[d0], 0x0(%[dst_u]) \n\t"
5508 "gssdlc1 %[d0], 0x7(%[dst_u]) \n\t"
5509 "gssdrc1 %[d1], 0x0(%[dst_v]) \n\t"
5510 "gssdlc1 %[d1], 0x7(%[dst_v]) \n\t"
5511 "daddiu %[src_uyvy], %[src_uyvy], 32 \n\t"
5512 "daddiu %[dst_u], %[dst_u], 8 \n\t"
5513 "daddiu %[dst_v], %[dst_v], 8 \n\t"
5514 "daddiu %[width], %[width], -16 \n\t"
5515 "bgtz %[width], 1b \n\t"
5516 "nop \n\t"
5517 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1]), [d0] "=&f"(data[0]),
5518 [d1] "=&f"(data[1]), [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
5519 : [src_uyvy] "r"(src_uyvy), [dst_u] "r"(dst_u), [dst_v] "r"(dst_v),
5520 [width] "r"(width), [c0] "f"(c0), [shift] "f"(shift)
5521 : "memory");
5522 }
5523
5524 // Copy row of UYVY Y's (422) into Y (420/422).
UYVYToYRow_MMI(const uint8_t * src_uyvy,uint8_t * dst_y,int width)5525 void UYVYToYRow_MMI(const uint8_t* src_uyvy, uint8_t* dst_y, int width) {
5526 // Output a row of Y values.
5527 uint64_t c0 = 0x00ff00ff00ff00ff;
5528 uint64_t shift = 0x08;
5529 uint64_t temp[2];
5530 __asm__ volatile(
5531 "1: \n\t"
5532 "gsldrc1 %[t0], 0x00(%[src_uyvy]) \n\t"
5533 "gsldlc1 %[t0], 0x07(%[src_uyvy]) \n\t"
5534 "gsldrc1 %[t1], 0x08(%[src_uyvy]) \n\t"
5535 "gsldlc1 %[t1], 0x0f(%[src_uyvy]) \n\t"
5536 "dsrl %[t0], %[t0], %[shift] \n\t"
5537 "dsrl %[t1], %[t1], %[shift] \n\t"
5538 "and %[t0], %[t0], %[c0] \n\t"
5539 "and %[t1], %[t1], %[c0] \n\t"
5540 "and %[t1], %[t1], %[c0] \n\t"
5541 "packushb %[t0], %[t0], %[t1] \n\t"
5542 "gssdrc1 %[t0], 0x0(%[dst_y]) \n\t"
5543 "gssdlc1 %[t0], 0x7(%[dst_y]) \n\t"
5544 "daddiu %[src_uyvy], %[src_uyvy], 16 \n\t"
5545 "daddiu %[dst_y], %[dst_y], 8 \n\t"
5546 "daddiu %[width], %[width], -8 \n\t"
5547 "bgtz %[width], 1b \n\t"
5548 "nop \n\t"
5549 : [t0] "=&f"(temp[0]), [t1] "=&f"(temp[1])
5550 : [src_uyvy] "r"(src_uyvy), [dst_y] "r"(dst_y), [width] "r"(width),
5551 [c0] "f"(c0), [shift] "f"(shift)
5552 : "memory");
5553 }
5554
5555 // Blend src_argb over src_argb1 and store to dst_argb.
5556 // dst_argb may be src_argb or src_argb1.
5557 // This code mimics the SSSE3 version for better testability.
ARGBBlendRow_MMI(const uint8_t * src_argb,const uint8_t * src_argb1,uint8_t * dst_argb,int width)5558 void ARGBBlendRow_MMI(const uint8_t* src_argb,
5559 const uint8_t* src_argb1,
5560 uint8_t* dst_argb,
5561 int width) {
5562 uint64_t src0, src1, dest, alpha, src0_hi, src0_lo, src1_hi, src1_lo, dest_hi,
5563 dest_lo;
5564 const uint64_t mask0 = 0x0;
5565 const uint64_t mask1 = 0x00FFFFFF00FFFFFFULL;
5566 const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
5567 const uint64_t mask3 = 0xFF;
5568 const uint64_t mask4 = ~mask1;
5569 const uint64_t shift = 0x08;
5570
5571 __asm__ volatile(
5572 "1: \n\t"
5573 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
5574 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
5575 "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
5576
5577 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
5578 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
5579 "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
5580
5581 "psubush %[alpha], %[mask2], %[src0_lo] \n\t"
5582 "pshufh %[alpha], %[alpha], %[mask3] \n\t"
5583 "pmullh %[dest_lo], %[src1_lo], %[alpha] \n\t"
5584 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
5585 "paddush %[dest_lo], %[dest_lo], %[src0_lo] \n\t"
5586
5587 "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
5588 "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
5589
5590 "psubush %[alpha], %[mask2], %[src0_hi] \n\t"
5591 "pshufh %[alpha], %[alpha], %[mask3] \n\t"
5592 "pmullh %[dest_hi], %[src1_hi], %[alpha] \n\t"
5593 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
5594 "paddush %[dest_hi], %[dest_hi], %[src0_hi] \n\t"
5595
5596 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
5597 "and %[dest], %[dest], %[mask1] \n\t"
5598 "or %[dest], %[dest], %[mask4] \n\t"
5599 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5600 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5601
5602 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
5603 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
5604 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5605 "daddi %[width], %[width], -0x02 \n\t"
5606 "bnez %[width], 1b \n\t"
5607 : [src0] "=&f"(src0), [src1] "=&f"(src1), [alpha] "=&f"(alpha),
5608 [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
5609 [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
5610 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo)
5611 : [src0_ptr] "r"(src_argb), [src1_ptr] "r"(src_argb1),
5612 [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0), [mask1] "f"(mask1),
5613 [mask2] "f"(mask2), [mask3] "f"(mask3), [mask4] "f"(mask4),
5614 [shift] "f"(shift), [width] "r"(width)
5615 : "memory");
5616 }
5617
BlendPlaneRow_MMI(const uint8_t * src0,const uint8_t * src1,const uint8_t * alpha,uint8_t * dst,int width)5618 void BlendPlaneRow_MMI(const uint8_t* src0,
5619 const uint8_t* src1,
5620 const uint8_t* alpha,
5621 uint8_t* dst,
5622 int width) {
5623 uint64_t source0, source1, dest, alph;
5624 uint64_t src0_hi, src0_lo, src1_hi, src1_lo, alpha_hi, alpha_lo, dest_hi,
5625 dest_lo;
5626 uint64_t alpha_rev, alpha_rev_lo, alpha_rev_hi;
5627 const uint64_t mask0 = 0x0;
5628 const uint64_t mask1 = 0xFFFFFFFFFFFFFFFFULL;
5629 const uint64_t mask2 = 0x00FF00FF00FF00FFULL;
5630 const uint64_t shift = 0x08;
5631
5632 __asm__ volatile(
5633 "1: \n\t"
5634 "gsldlc1 %[src0], 0x07(%[src0_ptr]) \n\t"
5635 "gsldrc1 %[src0], 0x00(%[src0_ptr]) \n\t"
5636 "punpcklbh %[src0_lo], %[src0], %[mask0] \n\t"
5637 "punpckhbh %[src0_hi], %[src0], %[mask0] \n\t"
5638
5639 "gsldlc1 %[src1], 0x07(%[src1_ptr]) \n\t"
5640 "gsldrc1 %[src1], 0x00(%[src1_ptr]) \n\t"
5641 "punpcklbh %[src1_lo], %[src1], %[mask0] \n\t"
5642 "punpckhbh %[src1_hi], %[src1], %[mask0] \n\t"
5643
5644 "gsldlc1 %[alpha], 0x07(%[alpha_ptr]) \n\t"
5645 "gsldrc1 %[alpha], 0x00(%[alpha_ptr]) \n\t"
5646 "psubusb %[alpha_r], %[mask1], %[alpha] \n\t"
5647 "punpcklbh %[alpha_lo], %[alpha], %[mask0] \n\t"
5648 "punpckhbh %[alpha_hi], %[alpha], %[mask0] \n\t"
5649 "punpcklbh %[alpha_rlo], %[alpha_r], %[mask0] \n\t"
5650 "punpckhbh %[alpha_rhi], %[alpha_r], %[mask0] \n\t"
5651
5652 "pmullh %[dest_lo], %[src0_lo], %[alpha_lo] \n\t"
5653 "pmullh %[dest], %[src1_lo], %[alpha_rlo] \n\t"
5654 "paddush %[dest_lo], %[dest_lo], %[dest] \n\t"
5655 "paddush %[dest_lo], %[dest_lo], %[mask2] \n\t"
5656 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
5657
5658 "pmullh %[dest_hi], %[src0_hi], %[alpha_hi] \n\t"
5659 "pmullh %[dest], %[src1_hi], %[alpha_rhi] \n\t"
5660 "paddush %[dest_hi], %[dest_hi], %[dest] \n\t"
5661 "paddush %[dest_hi], %[dest_hi], %[mask2] \n\t"
5662 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
5663
5664 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
5665 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5666 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5667
5668 "daddiu %[src0_ptr], %[src0_ptr], 0x08 \n\t"
5669 "daddiu %[src1_ptr], %[src1_ptr], 0x08 \n\t"
5670 "daddiu %[alpha_ptr], %[alpha_ptr], 0x08 \n\t"
5671 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5672 "daddi %[width], %[width], -0x08 \n\t"
5673 "bnez %[width], 1b \n\t"
5674 : [src0] "=&f"(source0), [src1] "=&f"(source1), [alpha] "=&f"(alph),
5675 [dest] "=&f"(dest), [src0_hi] "=&f"(src0_hi), [src0_lo] "=&f"(src0_lo),
5676 [src1_hi] "=&f"(src1_hi), [src1_lo] "=&f"(src1_lo),
5677 [alpha_hi] "=&f"(alpha_hi), [alpha_lo] "=&f"(alpha_lo),
5678 [dest_hi] "=&f"(dest_hi), [dest_lo] "=&f"(dest_lo),
5679 [alpha_rlo] "=&f"(alpha_rev_lo), [alpha_rhi] "=&f"(alpha_rev_hi),
5680 [alpha_r] "=&f"(alpha_rev)
5681 : [src0_ptr] "r"(src0), [src1_ptr] "r"(src1), [alpha_ptr] "r"(alpha),
5682 [dst_ptr] "r"(dst), [mask0] "f"(mask0), [mask1] "f"(mask1),
5683 [mask2] "f"(mask2), [shift] "f"(shift), [width] "r"(width)
5684 : "memory");
5685 }
5686
5687 // Multiply source RGB by alpha and store to destination.
5688 // This code mimics the SSSE3 version for better testability.
ARGBAttenuateRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,int width)5689 void ARGBAttenuateRow_MMI(const uint8_t* src_argb,
5690 uint8_t* dst_argb,
5691 int width) {
5692 uint64_t src, src_hi, src_lo, dest, dest_hi, dest_lo, alpha;
5693 const uint64_t mask0 = 0xFF;
5694 const uint64_t mask1 = 0xFF000000FF000000ULL;
5695 const uint64_t mask2 = ~mask1;
5696 const uint64_t shift = 0x08;
5697
5698 __asm__ volatile(
5699 "1: \n\t"
5700 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5701 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5702 "punpcklbh %[src_lo], %[src], %[src] \n\t"
5703 "punpckhbh %[src_hi], %[src], %[src] \n\t"
5704
5705 "pshufh %[alpha], %[src_lo], %[mask0] \n\t"
5706 "pmulhuh %[dest_lo], %[alpha], %[src_lo] \n\t"
5707 "psrlh %[dest_lo], %[dest_lo], %[shift] \n\t"
5708 "pshufh %[alpha], %[src_hi], %[mask0] \n\t"
5709 "pmulhuh %[dest_hi], %[alpha], %[src_hi] \n\t"
5710 "psrlh %[dest_hi], %[dest_hi], %[shift] \n\t"
5711
5712 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
5713 "and %[dest], %[dest], %[mask2] \n\t"
5714 "and %[src], %[src], %[mask1] \n\t"
5715 "or %[dest], %[dest], %[src] \n\t"
5716 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5717 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5718
5719 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
5720 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5721 "daddi %[width], %[width], -0x02 \n\t"
5722 "bnez %[width], 1b \n\t"
5723 : [src] "=&f"(src), [dest] "=&f"(dest), [src_hi] "=&f"(src_hi),
5724 [src_lo] "=&f"(src_lo), [dest_hi] "=&f"(dest_hi),
5725 [dest_lo] "=&f"(dest_lo), [alpha] "=&f"(alpha)
5726 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
5727 [mask1] "f"(mask1), [mask2] "f"(mask2), [shift] "f"(shift),
5728 [width] "r"(width)
5729 : "memory");
5730 }
5731
ComputeCumulativeSumRow_MMI(const uint8_t * row,int32_t * cumsum,const int32_t * previous_cumsum,int width)5732 void ComputeCumulativeSumRow_MMI(const uint8_t* row,
5733 int32_t* cumsum,
5734 const int32_t* previous_cumsum,
5735 int width) {
5736 int64_t row_sum[2] = {0, 0};
5737 uint64_t src, dest0, dest1, presrc0, presrc1, dest;
5738 const uint64_t mask = 0x0;
5739
5740 __asm__ volatile(
5741 "xor %[row_sum0], %[row_sum0], %[row_sum0] \n\t"
5742 "xor %[row_sum1], %[row_sum1], %[row_sum1] \n\t"
5743
5744 "1: \n\t"
5745 "gslwlc1 %[src], 0x03(%[row_ptr]) \n\t"
5746 "gslwrc1 %[src], 0x00(%[row_ptr]) \n\t"
5747
5748 "punpcklbh %[src], %[src], %[mask] \n\t"
5749 "punpcklhw %[dest0], %[src], %[mask] \n\t"
5750 "punpckhhw %[dest1], %[src], %[mask] \n\t"
5751
5752 "paddw %[row_sum0], %[row_sum0], %[dest0] \n\t"
5753 "paddw %[row_sum1], %[row_sum1], %[dest1] \n\t"
5754
5755 "gsldlc1 %[presrc0], 0x07(%[pre_ptr]) \n\t"
5756 "gsldrc1 %[presrc0], 0x00(%[pre_ptr]) \n\t"
5757 "gsldlc1 %[presrc1], 0x0f(%[pre_ptr]) \n\t"
5758 "gsldrc1 %[presrc1], 0x08(%[pre_ptr]) \n\t"
5759
5760 "paddw %[dest0], %[row_sum0], %[presrc0] \n\t"
5761 "paddw %[dest1], %[row_sum1], %[presrc1] \n\t"
5762
5763 "gssdlc1 %[dest0], 0x07(%[dst_ptr]) \n\t"
5764 "gssdrc1 %[dest0], 0x00(%[dst_ptr]) \n\t"
5765 "gssdlc1 %[dest1], 0x0f(%[dst_ptr]) \n\t"
5766 "gssdrc1 %[dest1], 0x08(%[dst_ptr]) \n\t"
5767
5768 "daddiu %[row_ptr], %[row_ptr], 0x04 \n\t"
5769 "daddiu %[pre_ptr], %[pre_ptr], 0x10 \n\t"
5770 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
5771 "daddi %[width], %[width], -0x01 \n\t"
5772 "bnez %[width], 1b \n\t"
5773 : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
5774 [dest1] "=&f"(dest1), [row_sum0] "+&f"(row_sum[0]),
5775 [row_sum1] "+&f"(row_sum[1]), [presrc0] "=&f"(presrc0),
5776 [presrc1] "=&f"(presrc1)
5777 : [row_ptr] "r"(row), [pre_ptr] "r"(previous_cumsum),
5778 [dst_ptr] "r"(cumsum), [width] "r"(width), [mask] "f"(mask)
5779 : "memory");
5780 }
5781
5782 // C version 2x2 -> 2x1.
InterpolateRow_MMI(uint8_t * dst_ptr,const uint8_t * src_ptr,ptrdiff_t src_stride,int width,int source_y_fraction)5783 void InterpolateRow_MMI(uint8_t* dst_ptr,
5784 const uint8_t* src_ptr,
5785 ptrdiff_t src_stride,
5786 int width,
5787 int source_y_fraction) {
5788 if (source_y_fraction == 0) {
5789 __asm__ volatile(
5790 "1: \n\t"
5791 "ld $t0, 0x0(%[src_ptr]) \n\t"
5792 "sd $t0, 0x0(%[dst_ptr]) \n\t"
5793 "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
5794 "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
5795 "daddiu %[width], %[width], -8 \n\t"
5796 "bgtz %[width], 1b \n\t"
5797 "nop \n\t"
5798 :
5799 : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [width] "r"(width)
5800 : "memory");
5801 return;
5802 }
5803 if (source_y_fraction == 128) {
5804 uint64_t uv = 0x0;
5805 uint64_t uv_stride = 0x0;
5806 __asm__ volatile(
5807 "1: \n\t"
5808 "gsldrc1 %[uv], 0x0(%[src_ptr]) \n\t"
5809 "gsldlc1 %[uv], 0x7(%[src_ptr]) \n\t"
5810 "daddu $t0, %[src_ptr], %[stride] \n\t"
5811 "gsldrc1 %[uv_stride], 0x0($t0) \n\t"
5812 "gsldlc1 %[uv_stride], 0x7($t0) \n\t"
5813
5814 "pavgb %[uv], %[uv], %[uv_stride] \n\t"
5815 "gssdrc1 %[uv], 0x0(%[dst_ptr]) \n\t"
5816 "gssdlc1 %[uv], 0x7(%[dst_ptr]) \n\t"
5817
5818 "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
5819 "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
5820 "daddiu %[width], %[width], -8 \n\t"
5821 "bgtz %[width], 1b \n\t"
5822 "nop \n\t"
5823 : [uv] "=&f"(uv), [uv_stride] "=&f"(uv_stride)
5824 : [src_ptr] "r"(src_ptr), [dst_ptr] "r"(dst_ptr), [width] "r"(width),
5825 [stride] "r"((int64_t)src_stride)
5826 : "memory");
5827 return;
5828 }
5829 const uint8_t* src_ptr1 = src_ptr + src_stride;
5830 uint64_t temp;
5831 uint64_t data[4];
5832 uint64_t zero = 0x0;
5833 uint64_t c0 = 0x0080008000800080;
5834 uint64_t fy0 = 0x0100010001000100;
5835 uint64_t shift = 0x8;
5836 __asm__ volatile(
5837 "pshufh %[fy1], %[fy1], %[zero] \n\t"
5838 "psubh %[fy0], %[fy0], %[fy1] \n\t"
5839 "1: \n\t"
5840 "gsldrc1 %[t0], 0x0(%[src_ptr]) \n\t"
5841 "gsldlc1 %[t0], 0x7(%[src_ptr]) \n\t"
5842 "punpcklbh %[d0], %[t0], %[zero] \n\t"
5843 "punpckhbh %[d1], %[t0], %[zero] \n\t"
5844 "gsldrc1 %[t0], 0x0(%[src_ptr1]) \n\t"
5845 "gsldlc1 %[t0], 0x7(%[src_ptr1]) \n\t"
5846 "punpcklbh %[d2], %[t0], %[zero] \n\t"
5847 "punpckhbh %[d3], %[t0], %[zero] \n\t"
5848
5849 "pmullh %[d0], %[d0], %[fy0] \n\t"
5850 "pmullh %[d2], %[d2], %[fy1] \n\t"
5851 "paddh %[d0], %[d0], %[d2] \n\t"
5852 "paddh %[d0], %[d0], %[c0] \n\t"
5853 "psrlh %[d0], %[d0], %[shift] \n\t"
5854
5855 "pmullh %[d1], %[d1], %[fy0] \n\t"
5856 "pmullh %[d3], %[d3], %[fy1] \n\t"
5857 "paddh %[d1], %[d1], %[d3] \n\t"
5858 "paddh %[d1], %[d1], %[c0] \n\t"
5859 "psrlh %[d1], %[d1], %[shift] \n\t"
5860
5861 "packushb %[d0], %[d0], %[d1] \n\t"
5862 "gssdrc1 %[d0], 0x0(%[dst_ptr]) \n\t"
5863 "gssdlc1 %[d0], 0x7(%[dst_ptr]) \n\t"
5864 "daddiu %[src_ptr], %[src_ptr], 8 \n\t"
5865 "daddiu %[src_ptr1], %[src_ptr1], 8 \n\t"
5866 "daddiu %[dst_ptr], %[dst_ptr], 8 \n\t"
5867 "daddiu %[width], %[width], -8 \n\t"
5868 "bgtz %[width], 1b \n\t"
5869 "nop \n\t"
5870 : [t0] "=&f"(temp), [d0] "=&f"(data[0]), [d1] "=&f"(data[1]),
5871 [d2] "=&f"(data[2]), [d3] "=&f"(data[3])
5872 : [src_ptr] "r"(src_ptr), [src_ptr1] "r"(src_ptr1),
5873 [dst_ptr] "r"(dst_ptr), [width] "r"(width),
5874 [fy1] "f"(source_y_fraction), [fy0] "f"(fy0), [c0] "f"(c0),
5875 [shift] "f"(shift), [zero] "f"(zero)
5876 : "memory");
5877 }
5878
5879 // Use first 4 shuffler values to reorder ARGB channels.
ARGBShuffleRow_MMI(const uint8_t * src_argb,uint8_t * dst_argb,const uint8_t * shuffler,int width)5880 void ARGBShuffleRow_MMI(const uint8_t* src_argb,
5881 uint8_t* dst_argb,
5882 const uint8_t* shuffler,
5883 int width) {
5884 uint64_t source, dest0, dest1, dest;
5885 const uint64_t mask0 = 0x0;
5886 const uint64_t mask1 = (shuffler[0] & 0x03) | ((shuffler[1] & 0x03) << 2) |
5887 ((shuffler[2] & 0x03) << 4) |
5888 ((shuffler[3] & 0x03) << 6);
5889
5890 __asm__ volatile(
5891 "1: \n\t"
5892 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5893 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5894
5895 "punpcklbh %[dest0], %[src], %[mask0] \n\t"
5896 "pshufh %[dest0], %[dest0], %[mask1] \n\t"
5897 "punpckhbh %[dest1], %[src], %[mask0] \n\t"
5898 "pshufh %[dest1], %[dest1], %[mask1] \n\t"
5899 "packushb %[dest], %[dest0], %[dest1] \n\t"
5900
5901 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5902 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5903
5904 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
5905 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
5906 "daddi %[width], %[width], -0x02 \n\t"
5907 "bnez %[width], 1b \n\t"
5908 : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
5909 [dest1] "=&f"(dest1)
5910 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_argb), [mask0] "f"(mask0),
5911 [mask1] "f"(mask1), [width] "r"(width)
5912 : "memory");
5913 }
5914
I422ToYUY2Row_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)5915 void I422ToYUY2Row_MMI(const uint8_t* src_y,
5916 const uint8_t* src_u,
5917 const uint8_t* src_v,
5918 uint8_t* dst_frame,
5919 int width) {
5920 uint64_t temp[3];
5921 uint64_t vu = 0x0;
5922 __asm__ volatile(
5923 "1: \n\t"
5924 "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
5925 "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
5926 "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
5927 "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
5928 "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
5929 "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
5930 "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
5931 "punpcklbh %[tu], %[ty], %[vu] \n\t" // g
5932 "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
5933 "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
5934 "punpckhbh %[tu], %[ty], %[vu] \n\t" // g
5935 "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
5936 "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
5937 "daddiu %[src_y], %[src_y], 8 \n\t"
5938 "daddiu %[src_u], %[src_u], 4 \n\t"
5939 "daddiu %[src_v], %[src_v], 4 \n\t"
5940 "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
5941 "daddiu %[width], %[width], -8 \n\t"
5942 "bgtz %[width], 1b \n\t"
5943 "nop \n\t"
5944 : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
5945 [vu] "=&f"(vu)
5946 : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
5947 [dst_frame] "r"(dst_frame), [width] "r"(width)
5948 : "memory");
5949 }
5950
I422ToUYVYRow_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_frame,int width)5951 void I422ToUYVYRow_MMI(const uint8_t* src_y,
5952 const uint8_t* src_u,
5953 const uint8_t* src_v,
5954 uint8_t* dst_frame,
5955 int width) {
5956 uint64_t temp[3];
5957 uint64_t vu = 0x0;
5958 __asm__ volatile(
5959 "1: \n\t"
5960 "gsldlc1 %[ty], 0x7(%[src_y]) \n\t" // r=src_sobelx[i]
5961 "gsldrc1 %[ty], 0x0(%[src_y]) \n\t" // r=src_sobelx[i]
5962 "gslwlc1 %[tu], 0x3(%[src_u]) \n\t" // b=src_sobely[i]
5963 "gslwrc1 %[tu], 0x0(%[src_u]) \n\t" // b=src_sobely[i]
5964 "gslwlc1 %[tv], 0x3(%[src_v]) \n\t" // b=src_sobely[i]
5965 "gslwrc1 %[tv], 0x0(%[src_v]) \n\t" // b=src_sobely[i]
5966 "punpcklbh %[vu], %[tu], %[tv] \n\t" // g
5967 "punpcklbh %[tu], %[vu], %[ty] \n\t" // g
5968 "gssdlc1 %[tu], 0x7(%[dst_frame]) \n\t"
5969 "gssdrc1 %[tu], 0x0(%[dst_frame]) \n\t"
5970 "punpckhbh %[tu], %[vu], %[ty] \n\t" // g
5971 "gssdlc1 %[tu], 0x0F(%[dst_frame]) \n\t"
5972 "gssdrc1 %[tu], 0x08(%[dst_frame]) \n\t"
5973 "daddiu %[src_y], %[src_y], 8 \n\t"
5974 "daddiu %[src_u], %[src_u], 4 \n\t"
5975 "daddiu %[src_v], %[src_v], 4 \n\t"
5976 "daddiu %[dst_frame], %[dst_frame], 16 \n\t"
5977 "daddiu %[width], %[width], -8 \n\t"
5978 "bgtz %[width], 1b \n\t"
5979 "nop \n\t"
5980 : [ty] "=&f"(temp[1]), [tu] "=&f"(temp[1]), [tv] "=&f"(temp[1]),
5981 [vu] "=&f"(vu)
5982 : [src_y] "r"(src_y), [src_u] "r"(src_u), [src_v] "r"(src_v),
5983 [dst_frame] "r"(dst_frame), [width] "r"(width)
5984 : "memory");
5985 }
5986
ARGBCopyAlphaRow_MMI(const uint8_t * src,uint8_t * dst,int width)5987 void ARGBCopyAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
5988 uint64_t source, dest;
5989 const uint64_t mask0 = 0xff000000ff000000ULL;
5990 const uint64_t mask1 = ~mask0;
5991
5992 __asm__ volatile(
5993 "1: \n\t"
5994 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
5995 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
5996 "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
5997 "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
5998
5999 "and %[src], %[src], %[mask0] \n\t"
6000 "and %[dest], %[dest], %[mask1] \n\t"
6001 "or %[dest], %[src], %[dest] \n\t"
6002 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
6003 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
6004
6005 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
6006 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
6007 "daddi %[width], %[width], -0x02 \n\t"
6008 "bnez %[width], 1b \n\t"
6009 : [src] "=&f"(source), [dest] "=&f"(dest)
6010 : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
6011 [mask1] "f"(mask1), [width] "r"(width)
6012 : "memory");
6013 }
6014
ARGBExtractAlphaRow_MMI(const uint8_t * src_argb,uint8_t * dst_a,int width)6015 void ARGBExtractAlphaRow_MMI(const uint8_t* src_argb,
6016 uint8_t* dst_a,
6017 int width) {
6018 uint64_t src, dest0, dest1, dest_lo, dest_hi, dest;
6019 const uint64_t mask = 0xff000000ff000000ULL;
6020 const uint64_t shift = 0x18;
6021
6022 __asm__ volatile(
6023 "1: \n\t"
6024 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
6025 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
6026 "and %[dest0], %[src], %[mask] \n\t"
6027 "psrlw %[dest0], %[dest0], %[shift] \n\t"
6028 "gsldlc1 %[src], 0x0f(%[src_ptr]) \n\t"
6029 "gsldrc1 %[src], 0x08(%[src_ptr]) \n\t"
6030 "and %[dest1], %[src], %[mask] \n\t"
6031 "psrlw %[dest1], %[dest1], %[shift] \n\t"
6032 "packsswh %[dest_lo], %[dest0], %[dest1] \n\t"
6033
6034 "gsldlc1 %[src], 0x17(%[src_ptr]) \n\t"
6035 "gsldrc1 %[src], 0x10(%[src_ptr]) \n\t"
6036 "and %[dest0], %[src], %[mask] \n\t"
6037 "psrlw %[dest0], %[dest0], %[shift] \n\t"
6038 "gsldlc1 %[src], 0x1f(%[src_ptr]) \n\t"
6039 "gsldrc1 %[src], 0x18(%[src_ptr]) \n\t"
6040 "and %[dest1], %[src], %[mask] \n\t"
6041 "psrlw %[dest1], %[dest1], %[shift] \n\t"
6042 "packsswh %[dest_hi], %[dest0], %[dest1] \n\t"
6043
6044 "packushb %[dest], %[dest_lo], %[dest_hi] \n\t"
6045
6046 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
6047 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
6048
6049 "daddiu %[src_ptr], %[src_ptr], 0x20 \n\t"
6050 "daddiu %[dst_ptr], %[dst_ptr], 0x08 \n\t"
6051 "daddi %[width], %[width], -0x08 \n\t"
6052 "bnez %[width], 1b \n\t"
6053 : [src] "=&f"(src), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
6054 [dest1] "=&f"(dest1), [dest_lo] "=&f"(dest_lo), [dest_hi] "=&f"(dest_hi)
6055 : [src_ptr] "r"(src_argb), [dst_ptr] "r"(dst_a), [mask] "f"(mask),
6056 [shift] "f"(shift), [width] "r"(width)
6057 : "memory");
6058 }
6059
ARGBCopyYToAlphaRow_MMI(const uint8_t * src,uint8_t * dst,int width)6060 void ARGBCopyYToAlphaRow_MMI(const uint8_t* src, uint8_t* dst, int width) {
6061 uint64_t source, dest0, dest1, dest;
6062 const uint64_t mask0 = 0x0;
6063 const uint64_t mask1 = 0x00ffffff00ffffffULL;
6064
6065 __asm__ volatile(
6066 "1: \n\t"
6067 "gsldlc1 %[src], 0x07(%[src_ptr]) \n\t"
6068 "gsldrc1 %[src], 0x00(%[src_ptr]) \n\t"
6069
6070 "punpcklbh %[dest0], %[mask0], %[src] \n\t"
6071 "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
6072 "gsldlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
6073 "gsldrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
6074 "and %[dest], %[dest], %[mask1] \n\t"
6075 "or %[dest], %[dest], %[dest1] \n\t"
6076 "gssdlc1 %[dest], 0x07(%[dst_ptr]) \n\t"
6077 "gssdrc1 %[dest], 0x00(%[dst_ptr]) \n\t"
6078 "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
6079 "gsldlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
6080 "gsldrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
6081 "and %[dest], %[dest], %[mask1] \n\t"
6082 "or %[dest], %[dest], %[dest1] \n\t"
6083 "gssdlc1 %[dest], 0x0f(%[dst_ptr]) \n\t"
6084 "gssdrc1 %[dest], 0x08(%[dst_ptr]) \n\t"
6085
6086 "punpckhbh %[dest0], %[mask0], %[src] \n\t"
6087 "punpcklhw %[dest1], %[mask0], %[dest0] \n\t"
6088 "gsldlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
6089 "gsldrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
6090 "and %[dest], %[dest], %[mask1] \n\t"
6091 "or %[dest], %[dest], %[dest1] \n\t"
6092 "gssdlc1 %[dest], 0x17(%[dst_ptr]) \n\t"
6093 "gssdrc1 %[dest], 0x10(%[dst_ptr]) \n\t"
6094 "punpckhhw %[dest1], %[mask0], %[dest0] \n\t"
6095 "gsldlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
6096 "gsldrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
6097 "and %[dest], %[dest], %[mask1] \n\t"
6098 "or %[dest], %[dest], %[dest1] \n\t"
6099 "gssdlc1 %[dest], 0x1f(%[dst_ptr]) \n\t"
6100 "gssdrc1 %[dest], 0x18(%[dst_ptr]) \n\t"
6101
6102 "daddiu %[src_ptr], %[src_ptr], 0x08 \n\t"
6103 "daddiu %[dst_ptr], %[dst_ptr], 0x20 \n\t"
6104 "daddi %[width], %[width], -0x08 \n\t"
6105 "bnez %[width], 1b \n\t"
6106 : [src] "=&f"(source), [dest] "=&f"(dest), [dest0] "=&f"(dest0),
6107 [dest1] "=&f"(dest1)
6108 : [src_ptr] "r"(src), [dst_ptr] "r"(dst), [mask0] "f"(mask0),
6109 [mask1] "f"(mask1), [width] "r"(width)
6110 : "memory");
6111 }
6112
I444ToARGBRow_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)6113 void I444ToARGBRow_MMI(const uint8_t* src_y,
6114 const uint8_t* src_u,
6115 const uint8_t* src_v,
6116 uint8_t* rgb_buf,
6117 const struct YuvConstants* yuvconstants,
6118 int width) {
6119 uint64_t y,u,v;
6120 uint64_t b_vec[2],g_vec[2],r_vec[2];
6121 uint64_t mask = 0xff00ff00ff00ff00ULL;
6122 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6123 __asm__ volatile (
6124 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
6125 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
6126 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
6127 "or %[ub], %[ub], %[mask] \n\t"//must sign extension
6128 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
6129 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
6130 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6131 "pshufh %[ug], %[ug], %[zero] \n\t"
6132 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
6133 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6134 "pshufh %[vg], %[vg], %[five] \n\t"
6135 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
6136 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
6137 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6138 "pshufh %[vr], %[vr], %[five] \n\t"
6139 "or %[vr], %[vr], %[mask] \n\t"//sign extension
6140
6141 "1: \n\t"
6142 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
6143 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
6144 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6145 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6146 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6147 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6148
6149 "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
6150 "pmulhuh %[y], %[y], %[yg] \n\t"//y1
6151
6152 "punpcklbh %[u], %[u], %[zero] \n\t"//u
6153 "paddsh %[b_vec0], %[y], %[bb] \n\t"
6154 "pmullh %[b_vec1], %[u], %[ub] \n\t"
6155 "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
6156 "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
6157
6158 "punpcklbh %[v], %[v], %[zero] \n\t"//v
6159 "paddsh %[g_vec0], %[y], %[bg] \n\t"
6160 "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
6161 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6162 "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
6163 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6164 "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
6165
6166 "paddsh %[r_vec0], %[y], %[br] \n\t"
6167 "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
6168 "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
6169 "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
6170
6171 "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
6172 "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
6173 "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
6174 "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
6175 "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
6176 "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
6177 "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
6178 "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
6179 "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
6180 "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
6181 "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
6182
6183 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
6184 "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
6185 "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
6186 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
6187 "daddi %[width], %[width], -0x04 \n\t"
6188 "bnez %[width], 1b \n\t"
6189 : [y]"=&f"(y),
6190 [u]"=&f"(u), [v]"=&f"(v),
6191 [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
6192 [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
6193 [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
6194 [ub]"=&f"(ub), [ug]"=&f"(ug),
6195 [vg]"=&f"(vg), [vr]"=&f"(vr),
6196 [bb]"=&f"(bb), [bg]"=&f"(bg),
6197 [br]"=&f"(br), [yg]"=&f"(yg)
6198 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6199 [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
6200 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
6201 [zero]"f"(0x00), [alpha]"f"(-1),
6202 [six]"f"(0x6), [five]"f"(0x55),
6203 [mask]"f"(mask)
6204 : "memory"
6205 );
6206 }
6207
6208 // Also used for 420
I422ToARGBRow_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)6209 void I422ToARGBRow_MMI(const uint8_t* src_y,
6210 const uint8_t* src_u,
6211 const uint8_t* src_v,
6212 uint8_t* rgb_buf,
6213 const struct YuvConstants* yuvconstants,
6214 int width) {
6215 uint64_t y,u,v;
6216 uint64_t b_vec[2],g_vec[2],r_vec[2];
6217 uint64_t mask = 0xff00ff00ff00ff00ULL;
6218 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6219
6220 __asm__ volatile(
6221 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"//yg
6222 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"//bb
6223 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"//ub
6224 "or %[ub], %[ub], %[mask] \n\t"//must sign extension
6225 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"//bg
6226 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"//ug
6227 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6228 "pshufh %[ug], %[ug], %[zero] \n\t"
6229 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"//vg
6230 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6231 "pshufh %[vg], %[vg], %[five] \n\t"
6232 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"//br
6233 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"//vr
6234 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6235 "pshufh %[vr], %[vr], %[five] \n\t"
6236 "or %[vr], %[vr], %[mask] \n\t"//sign extension
6237
6238 "1: \n\t"
6239 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
6240 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
6241 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6242 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6243 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6244 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6245
6246 "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
6247 "pmulhuh %[y], %[y], %[yg] \n\t"//y1
6248
6249 //u3|u2|u1|u0 --> u1|u1|u0|u0
6250 "punpcklbh %[u], %[u], %[u] \n\t"//u
6251 "punpcklbh %[u], %[u], %[zero] \n\t"
6252 "paddsh %[b_vec0], %[y], %[bb] \n\t"
6253 "pmullh %[b_vec1], %[u], %[ub] \n\t"
6254 "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
6255 "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
6256
6257 //v3|v2|v1|v0 --> v1|v1|v0|v0
6258 "punpcklbh %[v], %[v], %[v] \n\t"//v
6259 "punpcklbh %[v], %[v], %[zero] \n\t"
6260 "paddsh %[g_vec0], %[y], %[bg] \n\t"
6261 "pmullh %[g_vec1], %[u], %[ug] \n\t"//u*ug
6262 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6263 "pmullh %[g_vec1], %[v], %[vg] \n\t"//v*vg
6264 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6265 "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
6266
6267 "paddsh %[r_vec0], %[y], %[br] \n\t"
6268 "pmullh %[r_vec1], %[v], %[vr] \n\t"//v*vr
6269 "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
6270 "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
6271
6272 "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
6273 "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"//ffffgggg
6274 "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
6275 "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"//gbgbgbgb
6276 "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"//frfrfrfr
6277 "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
6278 "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"//frgbfrgb
6279 "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
6280 "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
6281 "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
6282 "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
6283
6284 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
6285 "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
6286 "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
6287 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
6288 "daddi %[width], %[width], -0x04 \n\t"
6289 "bnez %[width], 1b \n\t"
6290
6291 : [y]"=&f"(y),
6292 [u]"=&f"(u), [v]"=&f"(v),
6293 [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
6294 [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
6295 [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
6296 [ub]"=&f"(ub), [ug]"=&f"(ug),
6297 [vg]"=&f"(vg), [vr]"=&f"(vr),
6298 [bb]"=&f"(bb), [bg]"=&f"(bg),
6299 [br]"=&f"(br), [yg]"=&f"(yg)
6300 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6301 [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
6302 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
6303 [zero]"f"(0x00), [alpha]"f"(-1),
6304 [six]"f"(0x6), [five]"f"(0x55),
6305 [mask]"f"(mask)
6306 : "memory"
6307 );
6308 }
6309
6310 // 10 bit YUV to ARGB
I210ToARGBRow_MMI(const uint16_t * src_y,const uint16_t * src_u,const uint16_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)6311 void I210ToARGBRow_MMI(const uint16_t* src_y,
6312 const uint16_t* src_u,
6313 const uint16_t* src_v,
6314 uint8_t* rgb_buf,
6315 const struct YuvConstants* yuvconstants,
6316 int width) {
6317 uint64_t y,u,v;
6318 uint64_t b_vec[2],g_vec[2],r_vec[2];
6319 uint64_t mask = 0xff00ff00ff00ff00ULL;
6320 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6321
6322 __asm__ volatile(
6323 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
6324 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
6325 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
6326 "or %[ub], %[ub], %[mask] \n\t"
6327 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
6328 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
6329 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6330 "pshufh %[ug], %[ug], %[zero] \n\t"
6331 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
6332 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6333 "pshufh %[vg], %[vg], %[five] \n\t"
6334 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
6335 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
6336 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6337 "pshufh %[vr], %[vr], %[five] \n\t"
6338 "or %[vr], %[vr], %[mask] \n\t"
6339
6340 "1: \n\t"
6341 "gsldlc1 %[y], 0x07(%[y_ptr]) \n\t"
6342 "gsldrc1 %[y], 0x00(%[y_ptr]) \n\t"
6343 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6344 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6345 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6346 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6347
6348 "psllh %[y], %[y], %[six] \n\t"
6349 "pmulhuh %[y], %[y], %[yg] \n\t"
6350
6351 "punpcklhw %[u], %[u], %[u] \n\t"
6352 "psrah %[u], %[u], %[two] \n\t"
6353 "punpcklhw %[v], %[v], %[v] \n\t"
6354 "psrah %[v], %[v], %[two] \n\t"
6355 "pminsh %[u], %[u], %[mask1] \n\t"
6356 "pminsh %[v], %[v], %[mask1] \n\t"
6357
6358 "paddsh %[b_vec0], %[y], %[bb] \n\t"
6359 "pmullh %[b_vec1], %[u], %[ub] \n\t"
6360 "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
6361
6362 "paddsh %[g_vec0], %[y], %[bg] \n\t"
6363 "pmullh %[g_vec1], %[u], %[ug] \n\t"
6364 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6365 "pmullh %[g_vec1], %[v], %[vg] \n\t"
6366 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6367
6368 "paddsh %[r_vec0], %[y], %[br] \n\t"
6369 "pmullh %[r_vec1], %[v], %[vr] \n\t"
6370 "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
6371
6372 "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
6373 "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
6374 "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
6375
6376 "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
6377 "packushb %[g_vec0], %[g_vec0], %[alpha] \n\t"
6378 "punpcklwd %[g_vec0], %[g_vec0], %[alpha] \n\t"
6379 "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
6380 "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
6381 "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
6382 "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
6383 "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
6384 "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
6385 "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
6386 "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
6387
6388 "daddiu %[y_ptr], %[y_ptr], 0x08 \n\t"
6389 "daddiu %[u_ptr], %[u_ptr], 0x04 \n\t"
6390 "daddiu %[v_ptr], %[v_ptr], 0x04 \n\t"
6391 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
6392 "daddi %[width], %[width], -0x04 \n\t"
6393 "bnez %[width], 1b \n\t"
6394
6395 : [y]"=&f"(y),
6396 [u]"=&f"(u), [v]"=&f"(v),
6397 [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
6398 [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
6399 [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
6400 [ub]"=&f"(ub), [ug]"=&f"(ug),
6401 [vg]"=&f"(vg), [vr]"=&f"(vr),
6402 [bb]"=&f"(bb), [bg]"=&f"(bg),
6403 [br]"=&f"(br), [yg]"=&f"(yg)
6404 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6405 [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
6406 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
6407 [zero]"f"(0x00), [alpha]"f"(-1),
6408 [six]"f"(0x6), [five]"f"(0x55),
6409 [mask]"f"(mask), [two]"f"(0x02),
6410 [mask1]"f"(0x00ff00ff00ff00ff)
6411 : "memory"
6412 );
6413 }
6414
I422AlphaToARGBRow_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,const uint8_t * src_a,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)6415 void I422AlphaToARGBRow_MMI(const uint8_t* src_y,
6416 const uint8_t* src_u,
6417 const uint8_t* src_v,
6418 const uint8_t* src_a,
6419 uint8_t* rgb_buf,
6420 const struct YuvConstants* yuvconstants,
6421 int width) {
6422 uint64_t y,u,v,a;
6423 uint64_t b_vec[2],g_vec[2],r_vec[2];
6424 uint64_t mask = 0xff00ff00ff00ff00ULL;
6425 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6426
6427 __asm__ volatile(
6428 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
6429 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
6430 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
6431 "or %[ub], %[ub], %[mask] \n\t"
6432 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
6433 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
6434 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6435 "pshufh %[ug], %[ug], %[zero] \n\t"
6436 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
6437 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6438 "pshufh %[vg], %[vg], %[five] \n\t"
6439 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
6440 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
6441 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6442 "pshufh %[vr], %[vr], %[five] \n\t"
6443 "or %[vr], %[vr], %[mask] \n\t"
6444
6445 "1: \n\t"
6446 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
6447 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
6448 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6449 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6450 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6451 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6452 "gslwlc1 %[a], 0x03(%[a_ptr]) \n\t"
6453 "gslwrc1 %[a], 0x00(%[a_ptr]) \n\t"
6454
6455 "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
6456 "pmulhuh %[y], %[y], %[yg] \n\t"//y1
6457
6458 //u3|u2|u1|u0 --> u1|u1|u0|u0
6459 "punpcklbh %[u], %[u], %[u] \n\t"//u
6460 "punpcklbh %[u], %[u], %[zero] \n\t"
6461 "paddsh %[b_vec0], %[y], %[bb] \n\t"
6462 "pmullh %[b_vec1], %[u], %[ub] \n\t"
6463 "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
6464 "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
6465
6466 //v3|v2|v1|v0 --> v1|v1|v0|v0
6467 "punpcklbh %[v], %[v], %[v] \n\t"
6468 "punpcklbh %[v], %[v], %[zero] \n\t"
6469 "paddsh %[g_vec0], %[y], %[bg] \n\t"
6470 "pmullh %[g_vec1], %[u], %[ug] \n\t"
6471 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6472 "pmullh %[g_vec1], %[v], %[vg] \n\t"
6473 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6474 "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
6475
6476 "paddsh %[r_vec0], %[y], %[br] \n\t"
6477 "pmullh %[r_vec1], %[v], %[vr] \n\t"
6478 "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
6479 "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
6480
6481 "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"//rrrrbbbb
6482 "packushb %[g_vec0], %[g_vec0], %[a] \n\t"
6483 "punpcklwd %[g_vec0], %[g_vec0], %[a] \n\t"//aaaagggg
6484 "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
6485 "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
6486 "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
6487 "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
6488 "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
6489 "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
6490 "gssdlc1 %[g_vec1], 0x0f(%[rgbbuf_ptr]) \n\t"
6491 "gssdrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
6492
6493 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
6494 "daddiu %[a_ptr], %[a_ptr], 0x04 \n\t"
6495 "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
6496 "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
6497 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
6498 "daddi %[width], %[width], -0x04 \n\t"
6499 "bnez %[width], 1b \n\t"
6500
6501 : [y]"=&f"(y), [u]"=&f"(u),
6502 [v]"=&f"(v), [a]"=&f"(a),
6503 [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
6504 [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
6505 [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
6506 [ub]"=&f"(ub), [ug]"=&f"(ug),
6507 [vg]"=&f"(vg), [vr]"=&f"(vr),
6508 [bb]"=&f"(bb), [bg]"=&f"(bg),
6509 [br]"=&f"(br), [yg]"=&f"(yg)
6510 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6511 [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
6512 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
6513 [a_ptr]"r"(src_a), [zero]"f"(0x00),
6514 [six]"f"(0x6), [five]"f"(0x55),
6515 [mask]"f"(mask)
6516 : "memory"
6517 );
6518 }
6519
I422ToRGB24Row_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)6520 void I422ToRGB24Row_MMI(const uint8_t* src_y,
6521 const uint8_t* src_u,
6522 const uint8_t* src_v,
6523 uint8_t* rgb_buf,
6524 const struct YuvConstants* yuvconstants,
6525 int width) {
6526 uint64_t y,u,v;
6527 uint64_t b_vec[2],g_vec[2],r_vec[2];
6528 uint64_t mask = 0xff00ff00ff00ff00ULL;
6529 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6530
6531 __asm__ volatile(
6532 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
6533 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
6534 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
6535 "or %[ub], %[ub], %[mask] \n\t"
6536 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
6537 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
6538 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6539 "pshufh %[ug], %[ug], %[zero] \n\t"
6540 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
6541 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6542 "pshufh %[vg], %[vg], %[five] \n\t"
6543 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
6544 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
6545 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6546 "pshufh %[vr], %[vr], %[five] \n\t"
6547 "or %[vr], %[vr], %[mask] \n\t"
6548
6549 "1: \n\t"
6550 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
6551 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
6552 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6553 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6554 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6555 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6556
6557 "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
6558 "pmulhuh %[y], %[y], %[yg] \n\t"//y1
6559
6560 //u3|u2|u1|u0 --> u1|u1|u0|u0
6561 "punpcklbh %[u], %[u], %[u] \n\t"//u
6562 "punpcklbh %[u], %[u], %[zero] \n\t"
6563 "paddsh %[b_vec0], %[y], %[bb] \n\t"
6564 "pmullh %[b_vec1], %[u], %[ub] \n\t"
6565 "psubsh %[b_vec0], %[b_vec0], %[b_vec1] \n\t"
6566 "psrah %[b_vec0], %[b_vec0], %[six] \n\t"
6567
6568 //v3|v2|v1|v0 --> v1|v1|v0|v0
6569 "punpcklbh %[v], %[v], %[v] \n\t"
6570 "punpcklbh %[v], %[v], %[zero] \n\t"
6571 "paddsh %[g_vec0], %[y], %[bg] \n\t"
6572 "pmullh %[g_vec1], %[u], %[ug] \n\t"
6573 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6574 "pmullh %[g_vec1], %[v], %[vg] \n\t"
6575 "psubsh %[g_vec0], %[g_vec0], %[g_vec1] \n\t"
6576 "psrah %[g_vec0], %[g_vec0], %[six] \n\t"
6577
6578 "paddsh %[r_vec0], %[y], %[br] \n\t"
6579 "pmullh %[r_vec1], %[v], %[vr] \n\t"
6580 "psubsh %[r_vec0], %[r_vec0], %[r_vec1] \n\t"
6581 "psrah %[r_vec0], %[r_vec0], %[six] \n\t"
6582
6583 "packushb %[r_vec0], %[b_vec0], %[r_vec0] \n\t"
6584 "packushb %[g_vec0], %[g_vec0], %[zero] \n\t"
6585 "punpcklbh %[b_vec0], %[r_vec0], %[g_vec0] \n\t"
6586 "punpckhbh %[r_vec0], %[r_vec0], %[g_vec0] \n\t"
6587 "punpcklhw %[g_vec0], %[b_vec0], %[r_vec0] \n\t"
6588 "punpckhhw %[g_vec1], %[b_vec0], %[r_vec0] \n\t"
6589
6590 "punpckhwd %[r_vec0], %[g_vec0], %[g_vec0] \n\t"
6591 "psllw %[r_vec1], %[r_vec0], %[lmove1] \n\t"
6592 "or %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
6593 "psrlw %[r_vec1], %[r_vec0], %[rmove1] \n\t"
6594 "pextrh %[r_vec1], %[r_vec1], %[zero] \n\t"
6595 "pinsrh_2 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
6596 "pextrh %[r_vec1], %[g_vec1], %[zero] \n\t"
6597 "pinsrh_3 %[g_vec0], %[g_vec0], %[r_vec1] \n\t"
6598 "pextrh %[r_vec1], %[g_vec1], %[one] \n\t"
6599 "punpckhwd %[g_vec1], %[g_vec1], %[g_vec1] \n\t"
6600 "psllw %[g_vec1], %[g_vec1], %[rmove1] \n\t"
6601 "or %[g_vec1], %[g_vec1], %[r_vec1] \n\t"
6602 "gssdlc1 %[g_vec0], 0x07(%[rgbbuf_ptr]) \n\t"
6603 "gssdrc1 %[g_vec0], 0x00(%[rgbbuf_ptr]) \n\t"
6604 "gsswlc1 %[g_vec1], 0x0b(%[rgbbuf_ptr]) \n\t"
6605 "gsswrc1 %[g_vec1], 0x08(%[rgbbuf_ptr]) \n\t"
6606
6607
6608 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
6609 "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
6610 "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
6611 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0c \n\t"
6612 "daddi %[width], %[width], -0x04 \n\t"
6613 "bnez %[width], 1b \n\t"
6614
6615 : [y]"=&f"(y), [u]"=&f"(u),
6616 [v]"=&f"(v),
6617 [b_vec0]"=&f"(b_vec[0]), [b_vec1]"=&f"(b_vec[1]),
6618 [g_vec0]"=&f"(g_vec[0]), [g_vec1]"=&f"(g_vec[1]),
6619 [r_vec0]"=&f"(r_vec[0]), [r_vec1]"=&f"(r_vec[1]),
6620 [ub]"=&f"(ub), [ug]"=&f"(ug),
6621 [vg]"=&f"(vg), [vr]"=&f"(vr),
6622 [bb]"=&f"(bb), [bg]"=&f"(bg),
6623 [br]"=&f"(br), [yg]"=&f"(yg)
6624 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6625 [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
6626 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
6627 [zero]"f"(0x00), [five]"f"(0x55),
6628 [six]"f"(0x6), [mask]"f"(mask),
6629 [lmove1]"f"(0x18), [rmove1]"f"(0x8),
6630 [one]"f"(0x1)
6631 : "memory"
6632 );
6633 }
6634
I422ToARGB4444Row_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb4444,const struct YuvConstants * yuvconstants,int width)6635 void I422ToARGB4444Row_MMI(const uint8_t* src_y,
6636 const uint8_t* src_u,
6637 const uint8_t* src_v,
6638 uint8_t* dst_argb4444,
6639 const struct YuvConstants* yuvconstants,
6640 int width) {
6641 uint64_t y, u, v;
6642 uint64_t b_vec, g_vec, r_vec, temp;
6643 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6644
6645 __asm__ volatile(
6646 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
6647 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
6648 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
6649 "or %[ub], %[ub], %[mask] \n\t"
6650 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
6651 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
6652 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6653 "pshufh %[ug], %[ug], %[zero] \n\t"
6654 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
6655 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6656 "pshufh %[vg], %[vg], %[five] \n\t"
6657 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
6658 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
6659 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6660 "pshufh %[vr], %[vr], %[five] \n\t"
6661 "or %[vr], %[vr], %[mask] \n\t"
6662
6663 "1: \n\t"
6664 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
6665 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
6666 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6667 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6668 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6669 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6670
6671 "punpcklbh %[y], %[y], %[y] \n\t"//y*0x0101
6672 "pmulhuh %[y], %[y], %[yg] \n\t"//y1
6673
6674 //u3|u2|u1|u0 --> u1|u1|u0|u0
6675 "punpcklbh %[u], %[u], %[u] \n\t"//u
6676 "punpcklbh %[u], %[u], %[zero] \n\t"
6677 "paddsh %[b_vec], %[y], %[bb] \n\t"
6678 "pmullh %[temp], %[u], %[ub] \n\t"
6679 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
6680 "psrah %[b_vec], %[b_vec], %[six] \n\t"
6681
6682 //v3|v2|v1|v0 --> v1|v1|v0|v0
6683 "punpcklbh %[v], %[v], %[v] \n\t"
6684 "punpcklbh %[v], %[v], %[zero] \n\t"
6685 "paddsh %[g_vec], %[y], %[bg] \n\t"
6686 "pmullh %[temp], %[u], %[ug] \n\t"
6687 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
6688 "pmullh %[temp], %[v], %[vg] \n\t"
6689 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
6690 "psrah %[g_vec], %[g_vec], %[six] \n\t"
6691
6692 "paddsh %[r_vec], %[y], %[br] \n\t"
6693 "pmullh %[temp], %[v], %[vr] \n\t"
6694 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
6695 "psrah %[r_vec], %[r_vec], %[six] \n\t"
6696
6697 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
6698 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
6699 "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
6700 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
6701 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
6702 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
6703 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
6704
6705 "and %[g_vec], %[g_vec], %[mask1] \n\t"
6706 "psrlw %[g_vec], %[g_vec], %[four] \n\t"
6707 "psrlw %[r_vec], %[g_vec], %[four] \n\t"
6708 "or %[g_vec], %[g_vec], %[r_vec] \n\t"
6709 "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
6710 "and %[g_vec], %[g_vec], %[r_vec] \n\t"
6711
6712 "and %[b_vec], %[b_vec], %[mask1] \n\t"
6713 "psrlw %[b_vec], %[b_vec], %[four] \n\t"
6714 "psrlw %[r_vec], %[b_vec], %[four] \n\t"
6715 "or %[b_vec], %[b_vec], %[r_vec] \n\t"
6716 "punpcklbh %[r_vec], %[alpha], %[zero] \n\t"
6717 "and %[b_vec], %[b_vec], %[r_vec] \n\t"
6718 "packushb %[g_vec], %[g_vec], %[b_vec] \n\t"
6719
6720 "gssdlc1 %[g_vec], 0x07(%[dst_argb4444]) \n\t"
6721 "gssdrc1 %[g_vec], 0x00(%[dst_argb4444]) \n\t"
6722
6723 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
6724 "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
6725 "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
6726 "daddiu %[dst_argb4444], %[dst_argb4444], 0x08 \n\t"
6727 "daddi %[width], %[width], -0x04 \n\t"
6728 "bnez %[width], 1b \n\t"
6729
6730 : [y]"=&f"(y), [u]"=&f"(u),
6731 [v]"=&f"(v),
6732 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
6733 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
6734 [ub]"=&f"(ub), [ug]"=&f"(ug),
6735 [vg]"=&f"(vg), [vr]"=&f"(vr),
6736 [bb]"=&f"(bb), [bg]"=&f"(bg),
6737 [br]"=&f"(br), [yg]"=&f"(yg)
6738 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6739 [v_ptr]"r"(src_v), [dst_argb4444]"r"(dst_argb4444),
6740 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
6741 [zero]"f"(0x00), [five]"f"(0x55),
6742 [six]"f"(0x6), [mask]"f"(0xff00ff00ff00ff00),
6743 [four]"f"(0x4), [mask1]"f"(0xf0f0f0f0f0f0f0f0),
6744 [alpha]"f"(-1)
6745 : "memory"
6746 );
6747 }
6748
I422ToARGB1555Row_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_argb1555,const struct YuvConstants * yuvconstants,int width)6749 void I422ToARGB1555Row_MMI(const uint8_t* src_y,
6750 const uint8_t* src_u,
6751 const uint8_t* src_v,
6752 uint8_t* dst_argb1555,
6753 const struct YuvConstants* yuvconstants,
6754 int width) {
6755 uint64_t y, u, v;
6756 uint64_t b_vec, g_vec, r_vec, temp;
6757 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6758
6759 __asm__ volatile(
6760 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
6761 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
6762 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
6763 "or %[ub], %[ub], %[mask1] \n\t"
6764 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
6765 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
6766 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6767 "pshufh %[ug], %[ug], %[zero] \n\t"
6768 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
6769 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6770 "pshufh %[vg], %[vg], %[five] \n\t"
6771 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
6772 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
6773 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6774 "pshufh %[vr], %[vr], %[five] \n\t"
6775 "or %[vr], %[vr], %[mask1] \n\t"
6776
6777 "1: \n\t"
6778 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
6779 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
6780 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6781 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6782 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6783 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6784
6785 "punpcklbh %[y], %[y], %[y] \n\t"
6786 "pmulhuh %[y], %[y], %[yg] \n\t"
6787
6788 //u3|u2|u1|u0 --> u1|u1|u0|u0
6789 "punpcklbh %[u], %[u], %[u] \n\t"
6790 "punpcklbh %[u], %[u], %[zero] \n\t"
6791 "paddsh %[b_vec], %[y], %[bb] \n\t"
6792 "pmullh %[temp], %[u], %[ub] \n\t"
6793 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
6794 "psrah %[b_vec], %[b_vec], %[six] \n\t"
6795
6796 //v3|v2|v1|v0 --> v1|v1|v0|v0
6797 "punpcklbh %[v], %[v], %[v] \n\t"
6798 "punpcklbh %[v], %[v], %[zero] \n\t"
6799 "paddsh %[g_vec], %[y], %[bg] \n\t"
6800 "pmullh %[temp], %[u], %[ug] \n\t"
6801 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
6802 "pmullh %[temp], %[v], %[vg] \n\t"
6803 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
6804 "psrah %[g_vec], %[g_vec], %[six] \n\t"
6805
6806 "paddsh %[r_vec], %[y], %[br] \n\t"
6807 "pmullh %[temp], %[v], %[vr] \n\t"
6808 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
6809 "psrah %[r_vec], %[r_vec], %[six] \n\t"
6810
6811 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
6812 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
6813 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
6814 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
6815 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
6816 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
6817
6818 "psrlw %[temp], %[g_vec], %[three] \n\t"
6819 "and %[g_vec], %[temp], %[mask2] \n\t"
6820 "psrlw %[temp], %[temp], %[eight] \n\t"
6821 "and %[r_vec], %[temp], %[mask2] \n\t"
6822 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6823 "or %[g_vec], %[g_vec], %[r_vec] \n\t"
6824 "psrlw %[temp], %[temp], %[eight] \n\t"
6825 "and %[r_vec], %[temp], %[mask2] \n\t"
6826 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6827 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6828 "or %[g_vec], %[g_vec], %[r_vec] \n\t"
6829 "or %[g_vec], %[g_vec], %[mask3] \n\t"
6830
6831 "psrlw %[temp], %[b_vec], %[three] \n\t"
6832 "and %[b_vec], %[temp], %[mask2] \n\t"
6833 "psrlw %[temp], %[temp], %[eight] \n\t"
6834 "and %[r_vec], %[temp], %[mask2] \n\t"
6835 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6836 "or %[b_vec], %[b_vec], %[r_vec] \n\t"
6837 "psrlw %[temp], %[temp], %[eight] \n\t"
6838 "and %[r_vec], %[temp], %[mask2] \n\t"
6839 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6840 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6841 "or %[b_vec], %[b_vec], %[r_vec] \n\t"
6842 "or %[b_vec], %[b_vec], %[mask3] \n\t"
6843
6844 "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
6845 "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
6846 "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
6847
6848 "gssdlc1 %[g_vec], 0x07(%[dst_argb1555]) \n\t"
6849 "gssdrc1 %[g_vec], 0x00(%[dst_argb1555]) \n\t"
6850
6851 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
6852 "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
6853 "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
6854 "daddiu %[dst_argb1555], %[dst_argb1555], 0x08 \n\t"
6855 "daddi %[width], %[width], -0x04 \n\t"
6856 "bnez %[width], 1b \n\t"
6857
6858 : [y]"=&f"(y), [u]"=&f"(u),
6859 [v]"=&f"(v),
6860 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
6861 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
6862 [ub]"=&f"(ub), [ug]"=&f"(ug),
6863 [vg]"=&f"(vg), [vr]"=&f"(vr),
6864 [bb]"=&f"(bb), [bg]"=&f"(bg),
6865 [br]"=&f"(br), [yg]"=&f"(yg)
6866 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6867 [v_ptr]"r"(src_v), [dst_argb1555]"r"(dst_argb1555),
6868 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
6869 [zero]"f"(0x00), [five]"f"(0x55),
6870 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
6871 [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
6872 [eight]"f"(0x8), [mask3]"f"(0x800000008000),
6873 [lmove5]"f"(0x5)
6874 : "memory"
6875 );
6876 }
6877
I422ToRGB565Row_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)6878 void I422ToRGB565Row_MMI(const uint8_t* src_y,
6879 const uint8_t* src_u,
6880 const uint8_t* src_v,
6881 uint8_t* dst_rgb565,
6882 const struct YuvConstants* yuvconstants,
6883 int width) {
6884 uint64_t y, u, v;
6885 uint64_t b_vec, g_vec, r_vec, temp;
6886 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
6887
6888 __asm__ volatile(
6889 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
6890 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
6891 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
6892 "or %[ub], %[ub], %[mask1] \n\t"
6893 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
6894 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
6895 "punpcklbh %[ug], %[ug], %[zero] \n\t"
6896 "pshufh %[ug], %[ug], %[zero] \n\t"
6897 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
6898 "punpcklbh %[vg], %[vg], %[zero] \n\t"
6899 "pshufh %[vg], %[vg], %[five] \n\t"
6900 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
6901 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
6902 "punpcklbh %[vr], %[vr], %[zero] \n\t"
6903 "pshufh %[vr], %[vr], %[five] \n\t"
6904 "or %[vr], %[vr], %[mask1] \n\t"
6905
6906 "1: \n\t"
6907 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
6908 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
6909 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
6910 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
6911 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
6912 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
6913
6914 "punpcklbh %[y], %[y], %[y] \n\t"
6915 "pmulhuh %[y], %[y], %[yg] \n\t"
6916
6917 //u3|u2|u1|u0 --> u1|u1|u0|u0
6918 "punpcklbh %[u], %[u], %[u] \n\t"
6919 "punpcklbh %[u], %[u], %[zero] \n\t"
6920 "paddsh %[b_vec], %[y], %[bb] \n\t"
6921 "pmullh %[temp], %[u], %[ub] \n\t"
6922 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
6923 "psrah %[b_vec], %[b_vec], %[six] \n\t"
6924
6925 //v3|v2|v1|v0 --> v1|v1|v0|v0
6926 "punpcklbh %[v], %[v], %[v] \n\t"
6927 "punpcklbh %[v], %[v], %[zero] \n\t"
6928 "paddsh %[g_vec], %[y], %[bg] \n\t"
6929 "pmullh %[temp], %[u], %[ug] \n\t"
6930 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
6931 "pmullh %[temp], %[v], %[vg] \n\t"
6932 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
6933 "psrah %[g_vec], %[g_vec], %[six] \n\t"
6934
6935 "paddsh %[r_vec], %[y], %[br] \n\t"
6936 "pmullh %[temp], %[v], %[vr] \n\t"
6937 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
6938 "psrah %[r_vec], %[r_vec], %[six] \n\t"
6939
6940 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
6941 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
6942 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
6943 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
6944 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
6945 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
6946
6947 "psrlh %[temp], %[g_vec], %[three] \n\t"
6948 "and %[g_vec], %[temp], %[mask2] \n\t"
6949 "psrlw %[temp], %[temp], %[seven] \n\t"
6950 "psrlw %[r_vec], %[mask1], %[eight] \n\t"
6951 "and %[r_vec], %[temp], %[r_vec] \n\t"
6952 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6953 "or %[g_vec], %[g_vec], %[r_vec] \n\t"
6954 "paddb %[r_vec], %[three], %[six] \n\t"
6955 "psrlw %[temp], %[temp], %[r_vec] \n\t"
6956 "and %[r_vec], %[temp], %[mask2] \n\t"
6957 "paddb %[temp], %[three], %[eight] \n\t"
6958 "psllw %[r_vec], %[r_vec], %[temp] \n\t"
6959 "or %[g_vec], %[g_vec], %[r_vec] \n\t"
6960
6961 "psrlh %[temp], %[b_vec], %[three] \n\t"
6962 "and %[b_vec], %[temp], %[mask2] \n\t"
6963 "psrlw %[temp], %[temp], %[seven] \n\t"
6964 "psrlw %[r_vec], %[mask1], %[eight] \n\t"
6965 "and %[r_vec], %[temp], %[r_vec] \n\t"
6966 "psllw %[r_vec], %[r_vec], %[lmove5] \n\t"
6967 "or %[b_vec], %[b_vec], %[r_vec] \n\t"
6968 "paddb %[r_vec], %[three], %[six] \n\t"
6969 "psrlw %[temp], %[temp], %[r_vec] \n\t"
6970 "and %[r_vec], %[temp], %[mask2] \n\t"
6971 "paddb %[temp], %[three], %[eight] \n\t"
6972 "psllw %[r_vec], %[r_vec], %[temp] \n\t"
6973 "or %[b_vec], %[b_vec], %[r_vec] \n\t"
6974
6975 "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
6976 "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
6977 "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
6978
6979 "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
6980 "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
6981
6982 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
6983 "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
6984 "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
6985 "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
6986 "daddi %[width], %[width], -0x04 \n\t"
6987 "bnez %[width], 1b \n\t"
6988
6989 : [y]"=&f"(y), [u]"=&f"(u),
6990 [v]"=&f"(v),
6991 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
6992 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
6993 [ub]"=&f"(ub), [ug]"=&f"(ug),
6994 [vg]"=&f"(vg), [vr]"=&f"(vr),
6995 [bb]"=&f"(bb), [bg]"=&f"(bg),
6996 [br]"=&f"(br), [yg]"=&f"(yg)
6997 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
6998 [v_ptr]"r"(src_v), [dst_rgb565]"r"(dst_rgb565),
6999 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7000 [zero]"f"(0x00), [five]"f"(0x55),
7001 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7002 [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
7003 [eight]"f"(0x8), [seven]"f"(0x7),
7004 [lmove5]"f"(0x5)
7005 : "memory"
7006 );
7007 }
7008
NV12ToARGBRow_MMI(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)7009 void NV12ToARGBRow_MMI(const uint8_t* src_y,
7010 const uint8_t* src_uv,
7011 uint8_t* rgb_buf,
7012 const struct YuvConstants* yuvconstants,
7013 int width) {
7014 uint64_t y, u, v;
7015 uint64_t b_vec, g_vec, r_vec, temp;
7016 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7017
7018 __asm__ volatile(
7019 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7020 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7021 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7022 "or %[ub], %[ub], %[mask1] \n\t"
7023 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7024 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7025 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7026 "pshufh %[ug], %[ug], %[zero] \n\t"
7027 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7028 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7029 "pshufh %[vg], %[vg], %[five] \n\t"
7030 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7031 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7032 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7033 "pshufh %[vr], %[vr], %[five] \n\t"
7034 "or %[vr], %[vr], %[mask1] \n\t"
7035
7036 "1: \n\t"
7037 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
7038 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
7039 "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
7040 "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
7041 "punpcklbh %[u], %[u], %[zero] \n\t"
7042 "pshufh %[v], %[u], %[vshu] \n\t"
7043 "pshufh %[u], %[u], %[ushu] \n\t"
7044
7045 "punpcklbh %[y], %[y], %[y] \n\t"
7046 "pmulhuh %[y], %[y], %[yg] \n\t"
7047
7048 "paddsh %[b_vec], %[y], %[bb] \n\t"
7049 "pmullh %[temp], %[u], %[ub] \n\t"
7050 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7051 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7052
7053 "paddsh %[g_vec], %[y], %[bg] \n\t"
7054 "pmullh %[temp], %[u], %[ug] \n\t"
7055 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7056 "pmullh %[temp], %[v], %[vg] \n\t"
7057 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7058 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7059
7060 "paddsh %[r_vec], %[y], %[br] \n\t"
7061 "pmullh %[temp], %[v], %[vr] \n\t"
7062 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7063 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7064
7065 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7066 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7067 "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
7068 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
7069 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
7070 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7071 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7072
7073 "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
7074 "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
7075 "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
7076 "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
7077
7078 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
7079 "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
7080 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
7081 "daddi %[width], %[width], -0x04 \n\t"
7082 "bnez %[width], 1b \n\t"
7083
7084 : [y]"=&f"(y), [u]"=&f"(u),
7085 [v]"=&f"(v),
7086 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7087 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7088 [ub]"=&f"(ub), [ug]"=&f"(ug),
7089 [vg]"=&f"(vg), [vr]"=&f"(vr),
7090 [bb]"=&f"(bb), [bg]"=&f"(bg),
7091 [br]"=&f"(br), [yg]"=&f"(yg)
7092 : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
7093 [rgbbuf_ptr]"r"(rgb_buf),
7094 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7095 [zero]"f"(0x00), [five]"f"(0x55),
7096 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7097 [ushu]"f"(0xA0), [vshu]"f"(0xf5),
7098 [alpha]"f"(-1)
7099 : "memory"
7100 );
7101 }
7102
NV21ToARGBRow_MMI(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)7103 void NV21ToARGBRow_MMI(const uint8_t* src_y,
7104 const uint8_t* src_vu,
7105 uint8_t* rgb_buf,
7106 const struct YuvConstants* yuvconstants,
7107 int width) {
7108 uint64_t y, u, v;
7109 uint64_t b_vec, g_vec, r_vec, temp;
7110 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7111
7112 __asm__ volatile(
7113 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7114 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7115 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7116 "or %[ub], %[ub], %[mask1] \n\t"
7117 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7118 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7119 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7120 "pshufh %[ug], %[ug], %[zero] \n\t"
7121 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7122 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7123 "pshufh %[vg], %[vg], %[five] \n\t"
7124 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7125 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7126 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7127 "pshufh %[vr], %[vr], %[five] \n\t"
7128 "or %[vr], %[vr], %[mask1] \n\t"
7129
7130 "1: \n\t"
7131 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
7132 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
7133 "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
7134 "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
7135 "punpcklbh %[u], %[u], %[zero] \n\t"
7136 "pshufh %[v], %[u], %[ushu] \n\t"
7137 "pshufh %[u], %[u], %[vshu] \n\t"
7138
7139 "punpcklbh %[y], %[y], %[y] \n\t"
7140 "pmulhuh %[y], %[y], %[yg] \n\t"
7141
7142 "paddsh %[b_vec], %[y], %[bb] \n\t"
7143 "pmullh %[temp], %[u], %[ub] \n\t"
7144 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7145 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7146
7147 "paddsh %[g_vec], %[y], %[bg] \n\t"
7148 "pmullh %[temp], %[u], %[ug] \n\t"
7149 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7150 "pmullh %[temp], %[v], %[vg] \n\t"
7151 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7152 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7153
7154 "paddsh %[r_vec], %[y], %[br] \n\t"
7155 "pmullh %[temp], %[v], %[vr] \n\t"
7156 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7157 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7158
7159 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7160 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7161 "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
7162 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
7163 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
7164 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7165 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7166
7167 "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
7168 "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
7169 "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
7170 "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
7171
7172 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
7173 "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
7174 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
7175 "daddi %[width], %[width], -0x04 \n\t"
7176 "bnez %[width], 1b \n\t"
7177
7178 : [y]"=&f"(y), [u]"=&f"(u),
7179 [v]"=&f"(v),
7180 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7181 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7182 [ub]"=&f"(ub), [ug]"=&f"(ug),
7183 [vg]"=&f"(vg), [vr]"=&f"(vr),
7184 [bb]"=&f"(bb), [bg]"=&f"(bg),
7185 [br]"=&f"(br), [yg]"=&f"(yg)
7186 : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
7187 [rgbbuf_ptr]"r"(rgb_buf),
7188 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7189 [zero]"f"(0x00), [five]"f"(0x55),
7190 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7191 [ushu]"f"(0xA0), [vshu]"f"(0xf5),
7192 [alpha]"f"(-1)
7193 : "memory"
7194 );
7195 }
7196
NV12ToRGB24Row_MMI(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)7197 void NV12ToRGB24Row_MMI(const uint8_t* src_y,
7198 const uint8_t* src_uv,
7199 uint8_t* rgb_buf,
7200 const struct YuvConstants* yuvconstants,
7201 int width) {
7202 uint64_t y, u, v;
7203 uint64_t b_vec, g_vec, r_vec, temp;
7204 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7205
7206 __asm__ volatile(
7207 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7208 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7209 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7210 "or %[ub], %[ub], %[mask1] \n\t"
7211 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7212 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7213 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7214 "pshufh %[ug], %[ug], %[zero] \n\t"
7215 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7216 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7217 "pshufh %[vg], %[vg], %[five] \n\t"
7218 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7219 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7220 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7221 "pshufh %[vr], %[vr], %[five] \n\t"
7222 "or %[vr], %[vr], %[mask1] \n\t"
7223
7224 "1: \n\t"
7225 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
7226 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
7227 "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
7228 "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
7229 "punpcklbh %[u], %[u], %[zero] \n\t"
7230 "pshufh %[v], %[u], %[vshu] \n\t"
7231 "pshufh %[u], %[u], %[ushu] \n\t"
7232
7233 "punpcklbh %[y], %[y], %[y] \n\t"
7234 "pmulhuh %[y], %[y], %[yg] \n\t"
7235
7236 "paddsh %[b_vec], %[y], %[bb] \n\t"
7237 "pmullh %[temp], %[u], %[ub] \n\t"
7238 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7239 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7240
7241 "paddsh %[g_vec], %[y], %[bg] \n\t"
7242 "pmullh %[temp], %[u], %[ug] \n\t"
7243 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7244 "pmullh %[temp], %[v], %[vg] \n\t"
7245 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7246 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7247
7248 "paddsh %[r_vec], %[y], %[br] \n\t"
7249 "pmullh %[temp], %[v], %[vr] \n\t"
7250 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7251 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7252
7253 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7254 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7255 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
7256 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
7257 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7258 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7259
7260 "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
7261 "psllw %[temp], %[r_vec], %[lmove1] \n\t"
7262 "or %[g_vec], %[g_vec], %[temp] \n\t"
7263 "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
7264 "pextrh %[temp], %[temp], %[zero] \n\t"
7265 "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
7266 "pextrh %[temp], %[b_vec], %[zero] \n\t"
7267 "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
7268 "pextrh %[temp], %[b_vec], %[one] \n\t"
7269 "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
7270 "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
7271 "or %[b_vec], %[b_vec], %[temp] \n\t"
7272 "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
7273 "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
7274 "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
7275 "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
7276
7277 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
7278 "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
7279 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
7280 "daddi %[width], %[width], -0x04 \n\t"
7281 "bnez %[width], 1b \n\t"
7282
7283 : [y]"=&f"(y), [u]"=&f"(u),
7284 [v]"=&f"(v),
7285 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7286 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7287 [ub]"=&f"(ub), [ug]"=&f"(ug),
7288 [vg]"=&f"(vg), [vr]"=&f"(vr),
7289 [bb]"=&f"(bb), [bg]"=&f"(bg),
7290 [br]"=&f"(br), [yg]"=&f"(yg)
7291 : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
7292 [rgbbuf_ptr]"r"(rgb_buf),
7293 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7294 [zero]"f"(0x00), [five]"f"(0x55),
7295 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7296 [ushu]"f"(0xA0), [vshu]"f"(0xf5),
7297 [alpha]"f"(-1), [lmove1]"f"(0x18),
7298 [one]"f"(0x1), [rmove1]"f"(0x8)
7299 : "memory"
7300 );
7301 }
7302
NV21ToRGB24Row_MMI(const uint8_t * src_y,const uint8_t * src_vu,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)7303 void NV21ToRGB24Row_MMI(const uint8_t* src_y,
7304 const uint8_t* src_vu,
7305 uint8_t* rgb_buf,
7306 const struct YuvConstants* yuvconstants,
7307 int width) {
7308 uint64_t y, u, v;
7309 uint64_t b_vec, g_vec, r_vec, temp;
7310 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7311
7312 __asm__ volatile(
7313 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7314 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7315 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7316 "or %[ub], %[ub], %[mask1] \n\t"
7317 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7318 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7319 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7320 "pshufh %[ug], %[ug], %[zero] \n\t"
7321 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7322 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7323 "pshufh %[vg], %[vg], %[five] \n\t"
7324 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7325 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7326 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7327 "pshufh %[vr], %[vr], %[five] \n\t"
7328 "or %[vr], %[vr], %[mask1] \n\t"
7329
7330 "1: \n\t"
7331 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
7332 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
7333 "gslwlc1 %[u], 0x03(%[vu_ptr]) \n\t"
7334 "gslwrc1 %[u], 0x00(%[vu_ptr]) \n\t"
7335 "punpcklbh %[u], %[u], %[zero] \n\t"
7336 "pshufh %[v], %[u], %[ushu] \n\t"
7337 "pshufh %[u], %[u], %[vshu] \n\t"
7338
7339 "punpcklbh %[y], %[y], %[y] \n\t"
7340 "pmulhuh %[y], %[y], %[yg] \n\t"
7341
7342 "paddsh %[b_vec], %[y], %[bb] \n\t"
7343 "pmullh %[temp], %[u], %[ub] \n\t"
7344 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7345 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7346
7347 "paddsh %[g_vec], %[y], %[bg] \n\t"
7348 "pmullh %[temp], %[u], %[ug] \n\t"
7349 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7350 "pmullh %[temp], %[v], %[vg] \n\t"
7351 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7352 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7353
7354 "paddsh %[r_vec], %[y], %[br] \n\t"
7355 "pmullh %[temp], %[v], %[vr] \n\t"
7356 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7357 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7358
7359 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7360 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7361 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
7362 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
7363 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7364 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7365
7366 "punpckhwd %[r_vec], %[g_vec], %[g_vec] \n\t"
7367 "psllw %[temp], %[r_vec], %[lmove1] \n\t"
7368 "or %[g_vec], %[g_vec], %[temp] \n\t"
7369 "psrlw %[temp], %[r_vec], %[rmove1] \n\t"
7370 "pextrh %[temp], %[temp], %[zero] \n\t"
7371 "pinsrh_2 %[g_vec], %[g_vec], %[temp] \n\t"
7372 "pextrh %[temp], %[b_vec], %[zero] \n\t"
7373 "pinsrh_3 %[g_vec], %[g_vec], %[temp] \n\t"
7374 "pextrh %[temp], %[b_vec], %[one] \n\t"
7375 "punpckhwd %[b_vec], %[b_vec], %[b_vec] \n\t"
7376 "psllw %[b_vec], %[b_vec], %[rmove1] \n\t"
7377 "or %[b_vec], %[b_vec], %[temp] \n\t"
7378 "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
7379 "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
7380 "gsswlc1 %[b_vec], 0x0b(%[rgbbuf_ptr]) \n\t"
7381 "gsswrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
7382
7383 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
7384 "daddiu %[vu_ptr], %[vu_ptr], 0x04 \n\t"
7385 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x0C \n\t"
7386 "daddi %[width], %[width], -0x04 \n\t"
7387 "bnez %[width], 1b \n\t"
7388
7389 : [y]"=&f"(y), [u]"=&f"(u),
7390 [v]"=&f"(v),
7391 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7392 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7393 [ub]"=&f"(ub), [ug]"=&f"(ug),
7394 [vg]"=&f"(vg), [vr]"=&f"(vr),
7395 [bb]"=&f"(bb), [bg]"=&f"(bg),
7396 [br]"=&f"(br), [yg]"=&f"(yg)
7397 : [y_ptr]"r"(src_y), [vu_ptr]"r"(src_vu),
7398 [rgbbuf_ptr]"r"(rgb_buf),
7399 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7400 [zero]"f"(0x00), [five]"f"(0x55),
7401 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7402 [ushu]"f"(0xA0), [vshu]"f"(0xf5),
7403 [lmove1]"f"(0x18), [rmove1]"f"(0x8),
7404 [one]"f"(0x1)
7405 : "memory"
7406 );
7407 }
7408
NV12ToRGB565Row_MMI(const uint8_t * src_y,const uint8_t * src_uv,uint8_t * dst_rgb565,const struct YuvConstants * yuvconstants,int width)7409 void NV12ToRGB565Row_MMI(const uint8_t* src_y,
7410 const uint8_t* src_uv,
7411 uint8_t* dst_rgb565,
7412 const struct YuvConstants* yuvconstants,
7413 int width) {
7414 uint64_t y, u, v;
7415 uint64_t b_vec, g_vec, r_vec, temp;
7416 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7417
7418 __asm__ volatile(
7419 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7420 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7421 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7422 "or %[ub], %[ub], %[mask1] \n\t"
7423 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7424 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7425 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7426 "pshufh %[ug], %[ug], %[zero] \n\t"
7427 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7428 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7429 "pshufh %[vg], %[vg], %[five] \n\t"
7430 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7431 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7432 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7433 "pshufh %[vr], %[vr], %[five] \n\t"
7434 "or %[vr], %[vr], %[mask1] \n\t"
7435
7436 "1: \n\t"
7437 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
7438 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
7439 "gslwlc1 %[u], 0x03(%[uv_ptr]) \n\t"
7440 "gslwrc1 %[u], 0x00(%[uv_ptr]) \n\t"
7441 "punpcklbh %[u], %[u], %[zero] \n\t"
7442 "pshufh %[v], %[u], %[vshu] \n\t"
7443 "pshufh %[u], %[u], %[ushu] \n\t"
7444
7445 "punpcklbh %[y], %[y], %[y] \n\t"
7446 "pmulhuh %[y], %[y], %[yg] \n\t"
7447
7448 "paddsh %[b_vec], %[y], %[bb] \n\t"
7449 "pmullh %[temp], %[u], %[ub] \n\t"
7450 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7451 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7452
7453 "paddsh %[g_vec], %[y], %[bg] \n\t"
7454 "pmullh %[temp], %[u], %[ug] \n\t"
7455 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7456 "pmullh %[temp], %[v], %[vg] \n\t"
7457 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7458 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7459
7460 "paddsh %[r_vec], %[y], %[br] \n\t"
7461 "pmullh %[temp], %[v], %[vr] \n\t"
7462 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7463 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7464
7465 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7466 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7467 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
7468 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
7469 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7470 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7471
7472 "psrlh %[temp], %[g_vec], %[three] \n\t"
7473 "and %[g_vec], %[temp], %[mask2] \n\t"
7474 "psrlw %[temp], %[temp], %[seven] \n\t"
7475 "psrlw %[r_vec], %[mask1], %[eight] \n\t"
7476 "and %[r_vec], %[temp], %[r_vec] \n\t"
7477 "psubb %[y], %[eight], %[three] \n\t"//5
7478 "psllw %[r_vec], %[r_vec], %[y] \n\t"
7479 "or %[g_vec], %[g_vec], %[r_vec] \n\t"
7480 "paddb %[r_vec], %[three], %[six] \n\t"
7481 "psrlw %[temp], %[temp], %[r_vec] \n\t"
7482 "and %[r_vec], %[temp], %[mask2] \n\t"
7483 "paddb %[temp], %[three], %[eight] \n\t"
7484 "psllw %[r_vec], %[r_vec], %[temp] \n\t"
7485 "or %[g_vec], %[g_vec], %[r_vec] \n\t"
7486
7487 "psrlh %[temp], %[b_vec], %[three] \n\t"
7488 "and %[b_vec], %[temp], %[mask2] \n\t"
7489 "psrlw %[temp], %[temp], %[seven] \n\t"
7490 "psrlw %[r_vec], %[mask1], %[eight] \n\t"
7491 "and %[r_vec], %[temp], %[r_vec] \n\t"
7492 "psubb %[y], %[eight], %[three] \n\t"//5
7493 "psllw %[r_vec], %[r_vec], %[y] \n\t"
7494 "or %[b_vec], %[b_vec], %[r_vec] \n\t"
7495 "paddb %[r_vec], %[three], %[six] \n\t"
7496 "psrlw %[temp], %[temp], %[r_vec] \n\t"
7497 "and %[r_vec], %[temp], %[mask2] \n\t"
7498 "paddb %[temp], %[three], %[eight] \n\t"
7499 "psllw %[r_vec], %[r_vec], %[temp] \n\t"
7500 "or %[b_vec], %[b_vec], %[r_vec] \n\t"
7501
7502 "punpcklhw %[r_vec], %[g_vec], %[b_vec] \n\t"
7503 "punpckhhw %[b_vec], %[g_vec], %[b_vec] \n\t"
7504 "punpcklhw %[g_vec], %[r_vec], %[b_vec] \n\t"
7505
7506 "gssdlc1 %[g_vec], 0x07(%[dst_rgb565]) \n\t"
7507 "gssdrc1 %[g_vec], 0x00(%[dst_rgb565]) \n\t"
7508
7509 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
7510 "daddiu %[uv_ptr], %[uv_ptr], 0x04 \n\t"
7511 "daddiu %[dst_rgb565], %[dst_rgb565], 0x08 \n\t"
7512 "daddi %[width], %[width], -0x04 \n\t"
7513 "bnez %[width], 1b \n\t"
7514
7515 : [y]"=&f"(y), [u]"=&f"(u),
7516 [v]"=&f"(v),
7517 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7518 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7519 [ub]"=&f"(ub), [ug]"=&f"(ug),
7520 [vg]"=&f"(vg), [vr]"=&f"(vr),
7521 [bb]"=&f"(bb), [bg]"=&f"(bg),
7522 [br]"=&f"(br), [yg]"=&f"(yg)
7523 : [y_ptr]"r"(src_y), [uv_ptr]"r"(src_uv),
7524 [dst_rgb565]"r"(dst_rgb565),
7525 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7526 [zero]"f"(0x00), [five]"f"(0x55),
7527 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7528 [ushu]"f"(0xA0), [vshu]"f"(0xf5),
7529 [three]"f"(0x3), [mask2]"f"(0x1f0000001f),
7530 [eight]"f"(0x8), [seven]"f"(0x7)
7531 : "memory"
7532 );
7533 }
7534
YUY2ToARGBRow_MMI(const uint8_t * src_yuy2,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)7535 void YUY2ToARGBRow_MMI(const uint8_t* src_yuy2,
7536 uint8_t* rgb_buf,
7537 const struct YuvConstants* yuvconstants,
7538 int width) {
7539 uint64_t y, u, v;
7540 uint64_t b_vec, g_vec, r_vec, temp;
7541 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7542
7543 __asm__ volatile(
7544 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7545 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7546 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7547 "or %[ub], %[ub], %[mask1] \n\t"
7548 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7549 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7550 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7551 "pshufh %[ug], %[ug], %[zero] \n\t"
7552 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7553 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7554 "pshufh %[vg], %[vg], %[five] \n\t"
7555 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7556 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7557 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7558 "pshufh %[vr], %[vr], %[five] \n\t"
7559 "or %[vr], %[vr], %[mask1] \n\t"
7560
7561 "1: \n\t"
7562 "gsldlc1 %[y], 0x07(%[yuy2_ptr]) \n\t"
7563 "gsldrc1 %[y], 0x00(%[yuy2_ptr]) \n\t"
7564 "psrlh %[temp], %[y], %[eight] \n\t"
7565 "pshufh %[u], %[temp], %[ushu] \n\t"
7566 "pshufh %[v], %[temp], %[vshu] \n\t"
7567
7568 "psrlh %[temp], %[mask1], %[eight] \n\t"
7569 "and %[y], %[y], %[temp] \n\t"
7570 "psllh %[temp], %[y], %[eight] \n\t"
7571 "or %[y], %[y], %[temp] \n\t"
7572 "pmulhuh %[y], %[y], %[yg] \n\t"
7573
7574 "paddsh %[b_vec], %[y], %[bb] \n\t"
7575 "pmullh %[temp], %[u], %[ub] \n\t"
7576 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7577 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7578
7579 "paddsh %[g_vec], %[y], %[bg] \n\t"
7580 "pmullh %[temp], %[u], %[ug] \n\t"
7581 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7582 "pmullh %[temp], %[v], %[vg] \n\t"
7583 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7584 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7585
7586 "paddsh %[r_vec], %[y], %[br] \n\t"
7587 "pmullh %[temp], %[v], %[vr] \n\t"
7588 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7589 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7590
7591 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7592 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7593 "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
7594 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
7595 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
7596 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7597 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7598
7599 "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
7600 "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
7601 "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
7602 "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
7603
7604 "daddiu %[yuy2_ptr], %[yuy2_ptr], 0x08 \n\t"
7605 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
7606 "daddi %[width], %[width], -0x04 \n\t"
7607 "bnez %[width], 1b \n\t"
7608
7609 : [y]"=&f"(y), [u]"=&f"(u),
7610 [v]"=&f"(v),
7611 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7612 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7613 [ub]"=&f"(ub), [ug]"=&f"(ug),
7614 [vg]"=&f"(vg), [vr]"=&f"(vr),
7615 [bb]"=&f"(bb), [bg]"=&f"(bg),
7616 [br]"=&f"(br), [yg]"=&f"(yg)
7617 : [yuy2_ptr]"r"(src_yuy2), [rgbbuf_ptr]"r"(rgb_buf),
7618 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7619 [zero]"f"(0x00), [five]"f"(0x55),
7620 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7621 [ushu]"f"(0xA0), [vshu]"f"(0xf5),
7622 [alpha]"f"(-1), [eight]"f"(0x8)
7623 : "memory"
7624 );
7625 }
7626
UYVYToARGBRow_MMI(const uint8_t * src_uyvy,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)7627 void UYVYToARGBRow_MMI(const uint8_t* src_uyvy,
7628 uint8_t* rgb_buf,
7629 const struct YuvConstants* yuvconstants,
7630 int width) {
7631 uint64_t y, u, v;
7632 uint64_t b_vec, g_vec, r_vec, temp;
7633 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7634
7635 __asm__ volatile(
7636 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7637 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7638 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7639 "or %[ub], %[ub], %[mask1] \n\t"
7640 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7641 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7642 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7643 "pshufh %[ug], %[ug], %[zero] \n\t"
7644 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7645 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7646 "pshufh %[vg], %[vg], %[five] \n\t"
7647 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7648 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7649 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7650 "pshufh %[vr], %[vr], %[five] \n\t"
7651 "or %[vr], %[vr], %[mask1] \n\t"
7652
7653 "1: \n\t"
7654 "gsldlc1 %[y], 0x07(%[uyvy_ptr]) \n\t"
7655 "gsldrc1 %[y], 0x00(%[uyvy_ptr]) \n\t"
7656 "psrlh %[temp], %[mask1], %[eight] \n\t"
7657 "and %[temp], %[y], %[temp] \n\t"
7658 "pshufh %[u], %[temp], %[ushu] \n\t"
7659 "pshufh %[v], %[temp], %[vshu] \n\t"
7660
7661 "psrlh %[y], %[y], %[eight] \n\t"
7662 "psllh %[temp], %[y], %[eight] \n\t"
7663 "or %[y], %[y], %[temp] \n\t"
7664 "pmulhuh %[y], %[y], %[yg] \n\t"
7665
7666 "paddsh %[b_vec], %[y], %[bb] \n\t"
7667 "pmullh %[temp], %[u], %[ub] \n\t"
7668 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7669 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7670
7671 "paddsh %[g_vec], %[y], %[bg] \n\t"
7672 "pmullh %[temp], %[u], %[ug] \n\t"
7673 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7674 "pmullh %[temp], %[v], %[vg] \n\t"
7675 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7676 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7677
7678 "paddsh %[r_vec], %[y], %[br] \n\t"
7679 "pmullh %[temp], %[v], %[vr] \n\t"
7680 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7681 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7682
7683 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7684 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7685 "punpcklwd %[g_vec], %[g_vec], %[alpha] \n\t"
7686 "punpcklbh %[b_vec], %[r_vec], %[g_vec] \n\t"
7687 "punpckhbh %[r_vec], %[r_vec], %[g_vec] \n\t"
7688 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7689 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7690
7691 "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
7692 "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
7693 "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
7694 "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
7695
7696 "daddiu %[uyvy_ptr], %[uyvy_ptr], 0x08 \n\t"
7697 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
7698 "daddi %[width], %[width], -0x04 \n\t"
7699 "bnez %[width], 1b \n\t"
7700
7701 : [y]"=&f"(y), [u]"=&f"(u),
7702 [v]"=&f"(v),
7703 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7704 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7705 [ub]"=&f"(ub), [ug]"=&f"(ug),
7706 [vg]"=&f"(vg), [vr]"=&f"(vr),
7707 [bb]"=&f"(bb), [bg]"=&f"(bg),
7708 [br]"=&f"(br), [yg]"=&f"(yg)
7709 : [uyvy_ptr]"r"(src_uyvy), [rgbbuf_ptr]"r"(rgb_buf),
7710 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7711 [zero]"f"(0x00), [five]"f"(0x55),
7712 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7713 [ushu]"f"(0xA0), [vshu]"f"(0xf5),
7714 [alpha]"f"(-1), [eight]"f"(0x8)
7715 : "memory"
7716 );
7717 }
7718
I422ToRGBARow_MMI(const uint8_t * src_y,const uint8_t * src_u,const uint8_t * src_v,uint8_t * rgb_buf,const struct YuvConstants * yuvconstants,int width)7719 void I422ToRGBARow_MMI(const uint8_t* src_y,
7720 const uint8_t* src_u,
7721 const uint8_t* src_v,
7722 uint8_t* rgb_buf,
7723 const struct YuvConstants* yuvconstants,
7724 int width) {
7725 uint64_t y, u, v;
7726 uint64_t b_vec, g_vec, r_vec, temp;
7727 uint64_t ub,ug,vg,vr,bb,bg,br,yg;
7728
7729 __asm__ volatile(
7730 "ldc1 %[yg], 0xc0(%[yuvcons_ptr]) \n\t"
7731 "ldc1 %[bb], 0x60(%[yuvcons_ptr]) \n\t"
7732 "ldc1 %[ub], 0x00(%[yuvcons_ptr]) \n\t"
7733 "or %[ub], %[ub], %[mask1] \n\t"
7734 "ldc1 %[bg], 0x80(%[yuvcons_ptr]) \n\t"
7735 "ldc1 %[ug], 0x20(%[yuvcons_ptr]) \n\t"
7736 "punpcklbh %[ug], %[ug], %[zero] \n\t"
7737 "pshufh %[ug], %[ug], %[zero] \n\t"
7738 "ldc1 %[vg], 0x20(%[yuvcons_ptr]) \n\t"
7739 "punpcklbh %[vg], %[vg], %[zero] \n\t"
7740 "pshufh %[vg], %[vg], %[five] \n\t"
7741 "ldc1 %[br], 0xa0(%[yuvcons_ptr]) \n\t"
7742 "ldc1 %[vr], 0x40(%[yuvcons_ptr]) \n\t"
7743 "punpcklbh %[vr], %[vr], %[zero] \n\t"
7744 "pshufh %[vr], %[vr], %[five] \n\t"
7745 "or %[vr], %[vr], %[mask1] \n\t"
7746
7747 "1: \n\t"
7748 "gslwlc1 %[y], 0x03(%[y_ptr]) \n\t"
7749 "gslwrc1 %[y], 0x00(%[y_ptr]) \n\t"
7750 "gslwlc1 %[u], 0x03(%[u_ptr]) \n\t"
7751 "gslwrc1 %[u], 0x00(%[u_ptr]) \n\t"
7752 "gslwlc1 %[v], 0x03(%[v_ptr]) \n\t"
7753 "gslwrc1 %[v], 0x00(%[v_ptr]) \n\t"
7754
7755 "punpcklbh %[y], %[y], %[y] \n\t"
7756 "pmulhuh %[y], %[y], %[yg] \n\t"
7757
7758 "punpcklbh %[u], %[u], %[u] \n\t"
7759 "punpcklbh %[u], %[u], %[zero] \n\t"
7760 "paddsh %[b_vec], %[y], %[bb] \n\t"
7761 "pmullh %[temp], %[u], %[ub] \n\t"
7762 "psubsh %[b_vec], %[b_vec], %[temp] \n\t"
7763 "psrah %[b_vec], %[b_vec], %[six] \n\t"
7764
7765 "punpcklbh %[v], %[v], %[v] \n\t"
7766 "punpcklbh %[v], %[v], %[zero] \n\t"
7767 "paddsh %[g_vec], %[y], %[bg] \n\t"
7768 "pmullh %[temp], %[u], %[ug] \n\t"
7769 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7770 "pmullh %[temp], %[v], %[vg] \n\t"
7771 "psubsh %[g_vec], %[g_vec], %[temp] \n\t"
7772 "psrah %[g_vec], %[g_vec], %[six] \n\t"
7773
7774 "paddsh %[r_vec], %[y], %[br] \n\t"
7775 "pmullh %[temp], %[v], %[vr] \n\t"
7776 "psubsh %[r_vec], %[r_vec], %[temp] \n\t"
7777 "psrah %[r_vec], %[r_vec], %[six] \n\t"
7778
7779 "packushb %[r_vec], %[b_vec], %[r_vec] \n\t"
7780 "packushb %[g_vec], %[g_vec], %[zero] \n\t"
7781 "punpcklwd %[g_vec], %[alpha], %[g_vec] \n\t"
7782 "punpcklbh %[b_vec], %[g_vec], %[r_vec] \n\t"
7783 "punpckhbh %[r_vec], %[g_vec], %[r_vec] \n\t"
7784 "punpcklhw %[g_vec], %[b_vec], %[r_vec] \n\t"
7785 "punpckhhw %[b_vec], %[b_vec], %[r_vec] \n\t"
7786
7787 "gssdlc1 %[g_vec], 0x07(%[rgbbuf_ptr]) \n\t"
7788 "gssdrc1 %[g_vec], 0x00(%[rgbbuf_ptr]) \n\t"
7789 "gssdlc1 %[b_vec], 0x0f(%[rgbbuf_ptr]) \n\t"
7790 "gssdrc1 %[b_vec], 0x08(%[rgbbuf_ptr]) \n\t"
7791
7792 "daddiu %[y_ptr], %[y_ptr], 0x04 \n\t"
7793 "daddiu %[u_ptr], %[u_ptr], 0x02 \n\t"
7794 "daddiu %[v_ptr], %[v_ptr], 0x02 \n\t"
7795 "daddiu %[rgbbuf_ptr], %[rgbbuf_ptr], 0x10 \n\t"
7796 "daddi %[width], %[width], -0x04 \n\t"
7797 "bnez %[width], 1b \n\t"
7798
7799 : [y]"=&f"(y), [u]"=&f"(u),
7800 [v]"=&f"(v),
7801 [b_vec]"=&f"(b_vec), [g_vec]"=&f"(g_vec),
7802 [r_vec]"=&f"(r_vec), [temp]"=&f"(temp),
7803 [ub]"=&f"(ub), [ug]"=&f"(ug),
7804 [vg]"=&f"(vg), [vr]"=&f"(vr),
7805 [bb]"=&f"(bb), [bg]"=&f"(bg),
7806 [br]"=&f"(br), [yg]"=&f"(yg)
7807 : [y_ptr]"r"(src_y), [u_ptr]"r"(src_u),
7808 [v_ptr]"r"(src_v), [rgbbuf_ptr]"r"(rgb_buf),
7809 [yuvcons_ptr]"r"(yuvconstants), [width]"r"(width),
7810 [zero]"f"(0x00), [five]"f"(0x55),
7811 [six]"f"(0x6), [mask1]"f"(0xff00ff00ff00ff00),
7812 [alpha]"f"(-1)
7813 : "memory"
7814 );
7815 }
7816
ARGBSetRow_MMI(uint8_t * dst_argb,uint32_t v32,int width)7817 void ARGBSetRow_MMI(uint8_t* dst_argb, uint32_t v32, int width) {
7818 __asm__ volatile (
7819 "punpcklwd %[v32], %[v32], %[v32] \n\t"
7820 "1: \n\t"
7821 "gssdlc1 %[v32], 0x07(%[dst_ptr]) \n\t"
7822 "gssdrc1 %[v32], 0x00(%[dst_ptr]) \n\t"
7823 "gssdlc1 %[v32], 0x0f(%[dst_ptr]) \n\t"
7824 "gssdrc1 %[v32], 0x08(%[dst_ptr]) \n\t"
7825
7826 "daddi %[width], %[width], -0x04 \n\t"
7827 "daddiu %[dst_ptr], %[dst_ptr], 0x10 \n\t"
7828 "bnez %[width], 1b \n\t"
7829 : [v32]"+&f"(v32)
7830 : [dst_ptr]"r"(dst_argb), [width]"r"(width)
7831 : "memory"
7832 );
7833 }
7834 // clang-format on
7835
7836 // 10 bit YUV to ARGB
7837 #endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
7838
7839 #ifdef __cplusplus
7840 } // extern "C"
7841 } // namespace libyuv
7842 #endif
7843