1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
13 // Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14
15 #include "src/dsp/dsp.h"
16
17 #if defined(WEBP_USE_MIPS_DSP_R2)
18
19 #include "src/dsp/lossless.h"
20 #include "src/dsp/lossless_common.h"
21
22 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
23 static void FUNC_NAME(const TYPE* src, \
24 const uint32_t* const color_map, \
25 TYPE* dst, int y_start, int y_end, \
26 int width) { \
27 int y; \
28 for (y = y_start; y < y_end; ++y) { \
29 int x; \
30 for (x = 0; x < (width >> 2); ++x) { \
31 int tmp1, tmp2, tmp3, tmp4; \
32 __asm__ volatile ( \
33 ".ifc " #TYPE ", uint8_t \n\t" \
34 "lbu %[tmp1], 0(%[src]) \n\t" \
35 "lbu %[tmp2], 1(%[src]) \n\t" \
36 "lbu %[tmp3], 2(%[src]) \n\t" \
37 "lbu %[tmp4], 3(%[src]) \n\t" \
38 "addiu %[src], %[src], 4 \n\t" \
39 ".endif \n\t" \
40 ".ifc " #TYPE ", uint32_t \n\t" \
41 "lw %[tmp1], 0(%[src]) \n\t" \
42 "lw %[tmp2], 4(%[src]) \n\t" \
43 "lw %[tmp3], 8(%[src]) \n\t" \
44 "lw %[tmp4], 12(%[src]) \n\t" \
45 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
46 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
47 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
48 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
49 "addiu %[src], %[src], 16 \n\t" \
50 ".endif \n\t" \
51 "sll %[tmp1], %[tmp1], 2 \n\t" \
52 "sll %[tmp2], %[tmp2], 2 \n\t" \
53 "sll %[tmp3], %[tmp3], 2 \n\t" \
54 "sll %[tmp4], %[tmp4], 2 \n\t" \
55 "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \
56 "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
57 "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
58 "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
59 ".ifc " #TYPE ", uint8_t \n\t" \
60 "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
61 "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
62 "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
63 "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
64 "sb %[tmp1], 0(%[dst]) \n\t" \
65 "sb %[tmp2], 1(%[dst]) \n\t" \
66 "sb %[tmp3], 2(%[dst]) \n\t" \
67 "sb %[tmp4], 3(%[dst]) \n\t" \
68 "addiu %[dst], %[dst], 4 \n\t" \
69 ".endif \n\t" \
70 ".ifc " #TYPE ", uint32_t \n\t" \
71 "sw %[tmp1], 0(%[dst]) \n\t" \
72 "sw %[tmp2], 4(%[dst]) \n\t" \
73 "sw %[tmp3], 8(%[dst]) \n\t" \
74 "sw %[tmp4], 12(%[dst]) \n\t" \
75 "addiu %[dst], %[dst], 16 \n\t" \
76 ".endif \n\t" \
77 : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \
78 [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \
79 : [color_map]"r"(color_map) \
80 : "memory" \
81 ); \
82 } \
83 for (x = 0; x < (width & 3); ++x) { \
84 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
85 } \
86 } \
87 }
88
MAP_COLOR_FUNCS(MapARGB_MIPSdspR2,uint32_t,VP8GetARGBIndex,VP8GetARGBValue)89 MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
90 MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
91
92 #undef MAP_COLOR_FUNCS
93
94 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
95 uint32_t c2) {
96 int temp0, temp1, temp2, temp3, temp4, temp5;
97 __asm__ volatile (
98 "preceu.ph.qbr %[temp1], %[c0] \n\t"
99 "preceu.ph.qbl %[temp2], %[c0] \n\t"
100 "preceu.ph.qbr %[temp3], %[c1] \n\t"
101 "preceu.ph.qbl %[temp4], %[c1] \n\t"
102 "preceu.ph.qbr %[temp5], %[c2] \n\t"
103 "preceu.ph.qbl %[temp0], %[c2] \n\t"
104 "subq.ph %[temp3], %[temp3], %[temp5] \n\t"
105 "subq.ph %[temp4], %[temp4], %[temp0] \n\t"
106 "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
107 "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
108 "shll_s.ph %[temp1], %[temp1], 7 \n\t"
109 "shll_s.ph %[temp2], %[temp2], 7 \n\t"
110 "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"
111 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
112 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
113 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
114 : "memory"
115 );
116 return temp2;
117 }
118
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)119 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
120 uint32_t c2) {
121 int temp0, temp1, temp2, temp3, temp4, temp5;
122 __asm__ volatile (
123 "adduh.qb %[temp5], %[c0], %[c1] \n\t"
124 "preceu.ph.qbr %[temp3], %[c2] \n\t"
125 "preceu.ph.qbr %[temp1], %[temp5] \n\t"
126 "preceu.ph.qbl %[temp2], %[temp5] \n\t"
127 "preceu.ph.qbl %[temp4], %[c2] \n\t"
128 "subq.ph %[temp3], %[temp1], %[temp3] \n\t"
129 "subq.ph %[temp4], %[temp2], %[temp4] \n\t"
130 "shrl.ph %[temp5], %[temp3], 15 \n\t"
131 "shrl.ph %[temp0], %[temp4], 15 \n\t"
132 "addq.ph %[temp3], %[temp3], %[temp5] \n\t"
133 "addq.ph %[temp4], %[temp0], %[temp4] \n\t"
134 "shra.ph %[temp3], %[temp3], 1 \n\t"
135 "shra.ph %[temp4], %[temp4], 1 \n\t"
136 "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
137 "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
138 "shll_s.ph %[temp1], %[temp1], 7 \n\t"
139 "shll_s.ph %[temp2], %[temp2], 7 \n\t"
140 "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"
141 : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
142 [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
143 : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
144 : "memory"
145 );
146 return temp1;
147 }
148
Select(uint32_t a,uint32_t b,uint32_t c)149 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
150 int temp0, temp1, temp2, temp3, temp4, temp5;
151 __asm__ volatile (
152 "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"
153 "pick.qb %[temp1], %[b], %[c] \n\t"
154 "pick.qb %[temp2], %[c], %[b] \n\t"
155 "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"
156 "pick.qb %[temp4], %[a], %[c] \n\t"
157 "pick.qb %[temp5], %[c], %[a] \n\t"
158 "subu.qb %[temp3], %[temp1], %[temp2] \n\t"
159 "subu.qb %[temp0], %[temp4], %[temp5] \n\t"
160 "raddu.w.qb %[temp3], %[temp3] \n\t"
161 "raddu.w.qb %[temp0], %[temp0] \n\t"
162 "subu %[temp3], %[temp3], %[temp0] \n\t"
163 "slti %[temp0], %[temp3], 0x1 \n\t"
164 "movz %[a], %[b], %[temp0] \n\t"
165 : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
166 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
167 [a]"+&r"(a)
168 : [b]"r"(b), [c]"r"(c)
169 );
170 return a;
171 }
172
Average2(uint32_t a0,uint32_t a1)173 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
174 __asm__ volatile (
175 "adduh.qb %[a0], %[a0], %[a1] \n\t"
176 : [a0]"+r"(a0)
177 : [a1]"r"(a1)
178 );
179 return a0;
180 }
181
Average3(uint32_t a0,uint32_t a1,uint32_t a2)182 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
183 return Average2(Average2(a0, a2), a1);
184 }
185
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)186 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
187 uint32_t a2, uint32_t a3) {
188 return Average2(Average2(a0, a1), Average2(a2, a3));
189 }
190
Predictor5_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)191 static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
192 const uint32_t* const top) {
193 return Average3(*left, top[0], top[1]);
194 }
195
Predictor6_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)196 static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
197 const uint32_t* const top) {
198 return Average2(*left, top[-1]);
199 }
200
Predictor7_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)201 static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
202 const uint32_t* const top) {
203 return Average2(*left, top[0]);
204 }
205
Predictor8_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)206 static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
207 const uint32_t* const top) {
208 (void)left;
209 return Average2(top[-1], top[0]);
210 }
211
Predictor9_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)212 static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
213 const uint32_t* const top) {
214 (void)left;
215 return Average2(top[0], top[1]);
216 }
217
Predictor10_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)218 static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
219 const uint32_t* const top) {
220 return Average4(*left, top[-1], top[0], top[1]);
221 }
222
Predictor11_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)223 static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
224 const uint32_t* const top) {
225 return Select(top[0], *left, top[-1]);
226 }
227
Predictor12_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)228 static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
229 const uint32_t* const top) {
230 return ClampedAddSubtractFull(*left, top[0], top[-1]);
231 }
232
Predictor13_MIPSdspR2(const uint32_t * const left,const uint32_t * const top)233 static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
234 const uint32_t* const top) {
235 return ClampedAddSubtractHalf(*left, top[0], top[-1]);
236 }
237
238 // Add green to blue and red channels (i.e. perform the inverse transform of
239 // 'subtract green').
AddGreenToBlueAndRed_MIPSdspR2(const uint32_t * src,int num_pixels,uint32_t * dst)240 static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
241 uint32_t* dst) {
242 uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
243 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
244 const uint32_t* const p_loop2_end = src + num_pixels;
245 __asm__ volatile (
246 ".set push \n\t"
247 ".set noreorder \n\t"
248 "beq %[src], %[p_loop1_end], 3f \n\t"
249 " nop \n\t"
250 "0: \n\t"
251 "lw %[temp0], 0(%[src]) \n\t"
252 "lw %[temp1], 4(%[src]) \n\t"
253 "lw %[temp2], 8(%[src]) \n\t"
254 "lw %[temp3], 12(%[src]) \n\t"
255 "ext %[temp4], %[temp0], 8, 8 \n\t"
256 "ext %[temp5], %[temp1], 8, 8 \n\t"
257 "ext %[temp6], %[temp2], 8, 8 \n\t"
258 "ext %[temp7], %[temp3], 8, 8 \n\t"
259 "addiu %[src], %[src], 16 \n\t"
260 "addiu %[dst], %[dst], 16 \n\t"
261 "replv.ph %[temp4], %[temp4] \n\t"
262 "replv.ph %[temp5], %[temp5] \n\t"
263 "replv.ph %[temp6], %[temp6] \n\t"
264 "replv.ph %[temp7], %[temp7] \n\t"
265 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
266 "addu.qb %[temp1], %[temp1], %[temp5] \n\t"
267 "addu.qb %[temp2], %[temp2], %[temp6] \n\t"
268 "addu.qb %[temp3], %[temp3], %[temp7] \n\t"
269 "sw %[temp0], -16(%[dst]) \n\t"
270 "sw %[temp1], -12(%[dst]) \n\t"
271 "sw %[temp2], -8(%[dst]) \n\t"
272 "bne %[src], %[p_loop1_end], 0b \n\t"
273 " sw %[temp3], -4(%[dst]) \n\t"
274 "3: \n\t"
275 "beq %[src], %[p_loop2_end], 2f \n\t"
276 " nop \n\t"
277 "1: \n\t"
278 "lw %[temp0], 0(%[src]) \n\t"
279 "addiu %[src], %[src], 4 \n\t"
280 "addiu %[dst], %[dst], 4 \n\t"
281 "ext %[temp4], %[temp0], 8, 8 \n\t"
282 "replv.ph %[temp4], %[temp4] \n\t"
283 "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
284 "bne %[src], %[p_loop2_end], 1b \n\t"
285 " sw %[temp0], -4(%[dst]) \n\t"
286 "2: \n\t"
287 ".set pop \n\t"
288 : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
289 [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
290 [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
291 [temp7]"=&r"(temp7)
292 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
293 : "memory"
294 );
295 }
296
TransformColorInverse_MIPSdspR2(const VP8LMultipliers * const m,const uint32_t * src,int num_pixels,uint32_t * dst)297 static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
298 const uint32_t* src, int num_pixels,
299 uint32_t* dst) {
300 int temp0, temp1, temp2, temp3, temp4, temp5;
301 uint32_t argb, argb1, new_red;
302 const uint32_t G_to_R = m->green_to_red_;
303 const uint32_t G_to_B = m->green_to_blue_;
304 const uint32_t R_to_B = m->red_to_blue_;
305 const uint32_t* const p_loop_end = src + (num_pixels & ~1);
306 __asm__ volatile (
307 ".set push \n\t"
308 ".set noreorder \n\t"
309 "beq %[src], %[p_loop_end], 1f \n\t"
310 " nop \n\t"
311 "replv.ph %[temp0], %[G_to_R] \n\t"
312 "replv.ph %[temp1], %[G_to_B] \n\t"
313 "replv.ph %[temp2], %[R_to_B] \n\t"
314 "shll.ph %[temp0], %[temp0], 8 \n\t"
315 "shll.ph %[temp1], %[temp1], 8 \n\t"
316 "shll.ph %[temp2], %[temp2], 8 \n\t"
317 "shra.ph %[temp0], %[temp0], 8 \n\t"
318 "shra.ph %[temp1], %[temp1], 8 \n\t"
319 "shra.ph %[temp2], %[temp2], 8 \n\t"
320 "0: \n\t"
321 "lw %[argb], 0(%[src]) \n\t"
322 "lw %[argb1], 4(%[src]) \n\t"
323 "sw %[argb], 0(%[dst]) \n\t"
324 "sw %[argb1], 4(%[dst]) \n\t"
325 "addiu %[src], %[src], 8 \n\t"
326 "addiu %[dst], %[dst], 8 \n\t"
327 "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
328 "preceu.ph.qbra %[temp3], %[temp3] \n\t"
329 "shll.ph %[temp3], %[temp3], 8 \n\t"
330 "shra.ph %[temp3], %[temp3], 8 \n\t"
331 "mul.ph %[temp5], %[temp3], %[temp0] \n\t"
332 "mul.ph %[temp3], %[temp3], %[temp1] \n\t"
333 "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
334 "ins %[argb1], %[argb], 16, 16 \n\t"
335 "shra.ph %[temp5], %[temp5], 5 \n\t"
336 "shra.ph %[temp3], %[temp3], 5 \n\t"
337 "addu.ph %[new_red], %[new_red], %[temp5] \n\t"
338 "addu.ph %[argb1], %[argb1], %[temp3] \n\t"
339 "preceu.ph.qbra %[temp5], %[new_red] \n\t"
340 "shll.ph %[temp4], %[temp5], 8 \n\t"
341 "shra.ph %[temp4], %[temp4], 8 \n\t"
342 "mul.ph %[temp4], %[temp4], %[temp2] \n\t"
343 "sb %[temp5], -2(%[dst]) \n\t"
344 "sra %[temp5], %[temp5], 16 \n\t"
345 "shra.ph %[temp4], %[temp4], 5 \n\t"
346 "addu.ph %[argb1], %[argb1], %[temp4] \n\t"
347 "preceu.ph.qbra %[temp3], %[argb1] \n\t"
348 "sb %[temp5], -6(%[dst]) \n\t"
349 "sb %[temp3], -4(%[dst]) \n\t"
350 "sra %[temp3], %[temp3], 16 \n\t"
351 "bne %[src], %[p_loop_end], 0b \n\t"
352 " sb %[temp3], -8(%[dst]) \n\t"
353 "1: \n\t"
354 ".set pop \n\t"
355 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
356 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
357 [new_red]"=&r"(new_red), [argb]"=&r"(argb),
358 [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
359 : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
360 [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
361 : "memory", "hi", "lo"
362 );
363
364 // Fall-back to C-version for left-overs.
365 if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
366 }
367
ConvertBGRAToRGB_MIPSdspR2(const uint32_t * src,int num_pixels,uint8_t * dst)368 static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
369 int num_pixels, uint8_t* dst) {
370 int temp0, temp1, temp2, temp3;
371 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
372 const uint32_t* const p_loop2_end = src + num_pixels;
373 __asm__ volatile (
374 ".set push \n\t"
375 ".set noreorder \n\t"
376 "beq %[src], %[p_loop1_end], 3f \n\t"
377 " nop \n\t"
378 "0: \n\t"
379 "lw %[temp3], 12(%[src]) \n\t"
380 "lw %[temp2], 8(%[src]) \n\t"
381 "lw %[temp1], 4(%[src]) \n\t"
382 "lw %[temp0], 0(%[src]) \n\t"
383 "ins %[temp3], %[temp2], 24, 8 \n\t"
384 "sll %[temp2], %[temp2], 8 \n\t"
385 "rotr %[temp3], %[temp3], 16 \n\t"
386 "ins %[temp2], %[temp1], 0, 16 \n\t"
387 "sll %[temp1], %[temp1], 8 \n\t"
388 "wsbh %[temp3], %[temp3] \n\t"
389 "balign %[temp0], %[temp1], 1 \n\t"
390 "wsbh %[temp2], %[temp2] \n\t"
391 "wsbh %[temp0], %[temp0] \n\t"
392 "usw %[temp3], 8(%[dst]) \n\t"
393 "rotr %[temp0], %[temp0], 16 \n\t"
394 "usw %[temp2], 4(%[dst]) \n\t"
395 "addiu %[src], %[src], 16 \n\t"
396 "usw %[temp0], 0(%[dst]) \n\t"
397 "bne %[src], %[p_loop1_end], 0b \n\t"
398 " addiu %[dst], %[dst], 12 \n\t"
399 "3: \n\t"
400 "beq %[src], %[p_loop2_end], 2f \n\t"
401 " nop \n\t"
402 "1: \n\t"
403 "lw %[temp0], 0(%[src]) \n\t"
404 "addiu %[src], %[src], 4 \n\t"
405 "wsbh %[temp1], %[temp0] \n\t"
406 "addiu %[dst], %[dst], 3 \n\t"
407 "ush %[temp1], -2(%[dst]) \n\t"
408 "sra %[temp0], %[temp0], 16 \n\t"
409 "bne %[src], %[p_loop2_end], 1b \n\t"
410 " sb %[temp0], -3(%[dst]) \n\t"
411 "2: \n\t"
412 ".set pop \n\t"
413 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
414 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
415 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
416 : "memory"
417 );
418 }
419
ConvertBGRAToRGBA_MIPSdspR2(const uint32_t * src,int num_pixels,uint8_t * dst)420 static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
421 int num_pixels, uint8_t* dst) {
422 int temp0, temp1, temp2, temp3;
423 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
424 const uint32_t* const p_loop2_end = src + num_pixels;
425 __asm__ volatile (
426 ".set push \n\t"
427 ".set noreorder \n\t"
428 "beq %[src], %[p_loop1_end], 3f \n\t"
429 " nop \n\t"
430 "0: \n\t"
431 "lw %[temp0], 0(%[src]) \n\t"
432 "lw %[temp1], 4(%[src]) \n\t"
433 "lw %[temp2], 8(%[src]) \n\t"
434 "lw %[temp3], 12(%[src]) \n\t"
435 "wsbh %[temp0], %[temp0] \n\t"
436 "wsbh %[temp1], %[temp1] \n\t"
437 "wsbh %[temp2], %[temp2] \n\t"
438 "wsbh %[temp3], %[temp3] \n\t"
439 "addiu %[src], %[src], 16 \n\t"
440 "balign %[temp0], %[temp0], 1 \n\t"
441 "balign %[temp1], %[temp1], 1 \n\t"
442 "balign %[temp2], %[temp2], 1 \n\t"
443 "balign %[temp3], %[temp3], 1 \n\t"
444 "usw %[temp0], 0(%[dst]) \n\t"
445 "usw %[temp1], 4(%[dst]) \n\t"
446 "usw %[temp2], 8(%[dst]) \n\t"
447 "usw %[temp3], 12(%[dst]) \n\t"
448 "bne %[src], %[p_loop1_end], 0b \n\t"
449 " addiu %[dst], %[dst], 16 \n\t"
450 "3: \n\t"
451 "beq %[src], %[p_loop2_end], 2f \n\t"
452 " nop \n\t"
453 "1: \n\t"
454 "lw %[temp0], 0(%[src]) \n\t"
455 "wsbh %[temp0], %[temp0] \n\t"
456 "addiu %[src], %[src], 4 \n\t"
457 "balign %[temp0], %[temp0], 1 \n\t"
458 "usw %[temp0], 0(%[dst]) \n\t"
459 "bne %[src], %[p_loop2_end], 1b \n\t"
460 " addiu %[dst], %[dst], 4 \n\t"
461 "2: \n\t"
462 ".set pop \n\t"
463 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
464 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
465 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
466 : "memory"
467 );
468 }
469
ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t * src,int num_pixels,uint8_t * dst)470 static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
471 int num_pixels, uint8_t* dst) {
472 int temp0, temp1, temp2, temp3, temp4, temp5;
473 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
474 const uint32_t* const p_loop2_end = src + num_pixels;
475 __asm__ volatile (
476 ".set push \n\t"
477 ".set noreorder \n\t"
478 "beq %[src], %[p_loop1_end], 3f \n\t"
479 " nop \n\t"
480 "0: \n\t"
481 "lw %[temp0], 0(%[src]) \n\t"
482 "lw %[temp1], 4(%[src]) \n\t"
483 "lw %[temp2], 8(%[src]) \n\t"
484 "lw %[temp3], 12(%[src]) \n\t"
485 "ext %[temp4], %[temp0], 28, 4 \n\t"
486 "ext %[temp5], %[temp0], 12, 4 \n\t"
487 "ins %[temp0], %[temp4], 0, 4 \n\t"
488 "ext %[temp4], %[temp1], 28, 4 \n\t"
489 "ins %[temp0], %[temp5], 16, 4 \n\t"
490 "ext %[temp5], %[temp1], 12, 4 \n\t"
491 "ins %[temp1], %[temp4], 0, 4 \n\t"
492 "ext %[temp4], %[temp2], 28, 4 \n\t"
493 "ins %[temp1], %[temp5], 16, 4 \n\t"
494 "ext %[temp5], %[temp2], 12, 4 \n\t"
495 "ins %[temp2], %[temp4], 0, 4 \n\t"
496 "ext %[temp4], %[temp3], 28, 4 \n\t"
497 "ins %[temp2], %[temp5], 16, 4 \n\t"
498 "ext %[temp5], %[temp3], 12, 4 \n\t"
499 "ins %[temp3], %[temp4], 0, 4 \n\t"
500 "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"
501 "ins %[temp3], %[temp5], 16, 4 \n\t"
502 "addiu %[src], %[src], 16 \n\t"
503 "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
504 #if (WEBP_SWAP_16BIT_CSP == 1)
505 "usw %[temp1], 0(%[dst]) \n\t"
506 "usw %[temp3], 4(%[dst]) \n\t"
507 #else
508 "wsbh %[temp1], %[temp1] \n\t"
509 "wsbh %[temp3], %[temp3] \n\t"
510 "usw %[temp1], 0(%[dst]) \n\t"
511 "usw %[temp3], 4(%[dst]) \n\t"
512 #endif
513 "bne %[src], %[p_loop1_end], 0b \n\t"
514 " addiu %[dst], %[dst], 8 \n\t"
515 "3: \n\t"
516 "beq %[src], %[p_loop2_end], 2f \n\t"
517 " nop \n\t"
518 "1: \n\t"
519 "lw %[temp0], 0(%[src]) \n\t"
520 "ext %[temp4], %[temp0], 28, 4 \n\t"
521 "ext %[temp5], %[temp0], 12, 4 \n\t"
522 "ins %[temp0], %[temp4], 0, 4 \n\t"
523 "ins %[temp0], %[temp5], 16, 4 \n\t"
524 "addiu %[src], %[src], 4 \n\t"
525 "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"
526 #if (WEBP_SWAP_16BIT_CSP == 1)
527 "ush %[temp0], 0(%[dst]) \n\t"
528 #else
529 "wsbh %[temp0], %[temp0] \n\t"
530 "ush %[temp0], 0(%[dst]) \n\t"
531 #endif
532 "bne %[src], %[p_loop2_end], 1b \n\t"
533 " addiu %[dst], %[dst], 2 \n\t"
534 "2: \n\t"
535 ".set pop \n\t"
536 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
537 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
538 [dst]"+&r"(dst), [src]"+&r"(src)
539 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
540 : "memory"
541 );
542 }
543
ConvertBGRAToRGB565_MIPSdspR2(const uint32_t * src,int num_pixels,uint8_t * dst)544 static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
545 int num_pixels, uint8_t* dst) {
546 int temp0, temp1, temp2, temp3, temp4, temp5;
547 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
548 const uint32_t* const p_loop2_end = src + num_pixels;
549 __asm__ volatile (
550 ".set push \n\t"
551 ".set noreorder \n\t"
552 "beq %[src], %[p_loop1_end], 3f \n\t"
553 " nop \n\t"
554 "0: \n\t"
555 "lw %[temp0], 0(%[src]) \n\t"
556 "lw %[temp1], 4(%[src]) \n\t"
557 "lw %[temp2], 8(%[src]) \n\t"
558 "lw %[temp3], 12(%[src]) \n\t"
559 "ext %[temp4], %[temp0], 8, 16 \n\t"
560 "ext %[temp5], %[temp0], 5, 11 \n\t"
561 "ext %[temp0], %[temp0], 3, 5 \n\t"
562 "ins %[temp4], %[temp5], 0, 11 \n\t"
563 "ext %[temp5], %[temp1], 5, 11 \n\t"
564 "ins %[temp4], %[temp0], 0, 5 \n\t"
565 "ext %[temp0], %[temp1], 8, 16 \n\t"
566 "ext %[temp1], %[temp1], 3, 5 \n\t"
567 "ins %[temp0], %[temp5], 0, 11 \n\t"
568 "ext %[temp5], %[temp2], 5, 11 \n\t"
569 "ins %[temp0], %[temp1], 0, 5 \n\t"
570 "ext %[temp1], %[temp2], 8, 16 \n\t"
571 "ext %[temp2], %[temp2], 3, 5 \n\t"
572 "ins %[temp1], %[temp5], 0, 11 \n\t"
573 "ext %[temp5], %[temp3], 5, 11 \n\t"
574 "ins %[temp1], %[temp2], 0, 5 \n\t"
575 "ext %[temp2], %[temp3], 8, 16 \n\t"
576 "ext %[temp3], %[temp3], 3, 5 \n\t"
577 "ins %[temp2], %[temp5], 0, 11 \n\t"
578 "append %[temp0], %[temp4], 16 \n\t"
579 "ins %[temp2], %[temp3], 0, 5 \n\t"
580 "addiu %[src], %[src], 16 \n\t"
581 "append %[temp2], %[temp1], 16 \n\t"
582 #if (WEBP_SWAP_16BIT_CSP == 1)
583 "usw %[temp0], 0(%[dst]) \n\t"
584 "usw %[temp2], 4(%[dst]) \n\t"
585 #else
586 "wsbh %[temp0], %[temp0] \n\t"
587 "wsbh %[temp2], %[temp2] \n\t"
588 "usw %[temp0], 0(%[dst]) \n\t"
589 "usw %[temp2], 4(%[dst]) \n\t"
590 #endif
591 "bne %[src], %[p_loop1_end], 0b \n\t"
592 " addiu %[dst], %[dst], 8 \n\t"
593 "3: \n\t"
594 "beq %[src], %[p_loop2_end], 2f \n\t"
595 " nop \n\t"
596 "1: \n\t"
597 "lw %[temp0], 0(%[src]) \n\t"
598 "ext %[temp4], %[temp0], 8, 16 \n\t"
599 "ext %[temp5], %[temp0], 5, 11 \n\t"
600 "ext %[temp0], %[temp0], 3, 5 \n\t"
601 "ins %[temp4], %[temp5], 0, 11 \n\t"
602 "addiu %[src], %[src], 4 \n\t"
603 "ins %[temp4], %[temp0], 0, 5 \n\t"
604 #if (WEBP_SWAP_16BIT_CSP == 1)
605 "ush %[temp4], 0(%[dst]) \n\t"
606 #else
607 "wsbh %[temp4], %[temp4] \n\t"
608 "ush %[temp4], 0(%[dst]) \n\t"
609 #endif
610 "bne %[src], %[p_loop2_end], 1b \n\t"
611 " addiu %[dst], %[dst], 2 \n\t"
612 "2: \n\t"
613 ".set pop \n\t"
614 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
615 [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
616 [dst]"+&r"(dst), [src]"+&r"(src)
617 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
618 : "memory"
619 );
620 }
621
ConvertBGRAToBGR_MIPSdspR2(const uint32_t * src,int num_pixels,uint8_t * dst)622 static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
623 int num_pixels, uint8_t* dst) {
624 int temp0, temp1, temp2, temp3;
625 const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
626 const uint32_t* const p_loop2_end = src + num_pixels;
627 __asm__ volatile (
628 ".set push \n\t"
629 ".set noreorder \n\t"
630 "beq %[src], %[p_loop1_end], 3f \n\t"
631 " nop \n\t"
632 "0: \n\t"
633 "lw %[temp0], 0(%[src]) \n\t"
634 "lw %[temp1], 4(%[src]) \n\t"
635 "lw %[temp2], 8(%[src]) \n\t"
636 "lw %[temp3], 12(%[src]) \n\t"
637 "ins %[temp0], %[temp1], 24, 8 \n\t"
638 "sra %[temp1], %[temp1], 8 \n\t"
639 "ins %[temp1], %[temp2], 16, 16 \n\t"
640 "sll %[temp2], %[temp2], 8 \n\t"
641 "balign %[temp3], %[temp2], 1 \n\t"
642 "addiu %[src], %[src], 16 \n\t"
643 "usw %[temp0], 0(%[dst]) \n\t"
644 "usw %[temp1], 4(%[dst]) \n\t"
645 "usw %[temp3], 8(%[dst]) \n\t"
646 "bne %[src], %[p_loop1_end], 0b \n\t"
647 " addiu %[dst], %[dst], 12 \n\t"
648 "3: \n\t"
649 "beq %[src], %[p_loop2_end], 2f \n\t"
650 " nop \n\t"
651 "1: \n\t"
652 "lw %[temp0], 0(%[src]) \n\t"
653 "addiu %[src], %[src], 4 \n\t"
654 "addiu %[dst], %[dst], 3 \n\t"
655 "ush %[temp0], -3(%[dst]) \n\t"
656 "sra %[temp0], %[temp0], 16 \n\t"
657 "bne %[src], %[p_loop2_end], 1b \n\t"
658 " sb %[temp0], -1(%[dst]) \n\t"
659 "2: \n\t"
660 ".set pop \n\t"
661 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
662 [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
663 : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
664 : "memory"
665 );
666 }
667
668 //------------------------------------------------------------------------------
669 // Entry point
670
671 extern void VP8LDspInitMIPSdspR2(void);
672
VP8LDspInitMIPSdspR2(void)673 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
674 VP8LMapColor32b = MapARGB_MIPSdspR2;
675 VP8LMapColor8b = MapAlpha_MIPSdspR2;
676
677 VP8LPredictors[5] = Predictor5_MIPSdspR2;
678 VP8LPredictors[6] = Predictor6_MIPSdspR2;
679 VP8LPredictors[7] = Predictor7_MIPSdspR2;
680 VP8LPredictors[8] = Predictor8_MIPSdspR2;
681 VP8LPredictors[9] = Predictor9_MIPSdspR2;
682 VP8LPredictors[10] = Predictor10_MIPSdspR2;
683 VP8LPredictors[11] = Predictor11_MIPSdspR2;
684 VP8LPredictors[12] = Predictor12_MIPSdspR2;
685 VP8LPredictors[13] = Predictor13_MIPSdspR2;
686
687 VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
688 VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
689
690 VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
691 VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
692 VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
693 VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
694 VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
695 }
696
697 #else // !WEBP_USE_MIPS_DSP_R2
698
699 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
700
701 #endif // WEBP_USE_MIPS_DSP_R2
702