• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
13 //             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
14 
15 #include "./dsp.h"
16 
17 #if defined(WEBP_USE_MIPS_DSP_R2)
18 
19 #include "./lossless.h"
20 #include "./lossless_common.h"
21 
22 #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)                 \
23 static void FUNC_NAME(const TYPE* src,                                         \
24                       const uint32_t* const color_map,                         \
25                       TYPE* dst, int y_start, int y_end,                       \
26                       int width) {                                             \
27   int y;                                                                       \
28   for (y = y_start; y < y_end; ++y) {                                          \
29     int x;                                                                     \
30     for (x = 0; x < (width >> 2); ++x) {                                       \
31       int tmp1, tmp2, tmp3, tmp4;                                              \
32       __asm__ volatile (                                                       \
33       ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
34         "lbu       %[tmp1],  0(%[src])                  \n\t"                  \
35         "lbu       %[tmp2],  1(%[src])                  \n\t"                  \
36         "lbu       %[tmp3],  2(%[src])                  \n\t"                  \
37         "lbu       %[tmp4],  3(%[src])                  \n\t"                  \
38         "addiu     %[src],   %[src],      4             \n\t"                  \
39       ".endif                                           \n\t"                  \
40       ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
41         "lw        %[tmp1],  0(%[src])                  \n\t"                  \
42         "lw        %[tmp2],  4(%[src])                  \n\t"                  \
43         "lw        %[tmp3],  8(%[src])                  \n\t"                  \
44         "lw        %[tmp4],  12(%[src])                 \n\t"                  \
45         "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
46         "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
47         "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
48         "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
49         "addiu     %[src],   %[src],      16            \n\t"                  \
50       ".endif                                           \n\t"                  \
51         "sll       %[tmp1],  %[tmp1],     2             \n\t"                  \
52         "sll       %[tmp2],  %[tmp2],     2             \n\t"                  \
53         "sll       %[tmp3],  %[tmp3],     2             \n\t"                  \
54         "sll       %[tmp4],  %[tmp4],     2             \n\t"                  \
55         "lwx       %[tmp1],  %[tmp1](%[color_map])      \n\t"                  \
56         "lwx       %[tmp2],  %[tmp2](%[color_map])      \n\t"                  \
57         "lwx       %[tmp3],  %[tmp3](%[color_map])      \n\t"                  \
58         "lwx       %[tmp4],  %[tmp4](%[color_map])      \n\t"                  \
59       ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
60         "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
61         "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
62         "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
63         "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
64         "sb        %[tmp1],  0(%[dst])                  \n\t"                  \
65         "sb        %[tmp2],  1(%[dst])                  \n\t"                  \
66         "sb        %[tmp3],  2(%[dst])                  \n\t"                  \
67         "sb        %[tmp4],  3(%[dst])                  \n\t"                  \
68         "addiu     %[dst],   %[dst],      4             \n\t"                  \
69       ".endif                                           \n\t"                  \
70       ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
71         "sw        %[tmp1],  0(%[dst])                  \n\t"                  \
72         "sw        %[tmp2],  4(%[dst])                  \n\t"                  \
73         "sw        %[tmp3],  8(%[dst])                  \n\t"                  \
74         "sw        %[tmp4],  12(%[dst])                 \n\t"                  \
75         "addiu     %[dst],   %[dst],      16            \n\t"                  \
76       ".endif                                           \n\t"                  \
77         : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),             \
78           [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst)                   \
79         : [color_map]"r"(color_map)                                            \
80         : "memory"                                                             \
81       );                                                                       \
82     }                                                                          \
83     for (x = 0; x < (width & 3); ++x) {                                        \
84       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
85     }                                                                          \
86   }                                                                            \
87 }
88 
MAP_COLOR_FUNCS(MapARGB,uint32_t,VP8GetARGBIndex,VP8GetARGBValue)89 MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
90 MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
91 
92 #undef MAP_COLOR_FUNCS
93 
94 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
95                                                    uint32_t c2) {
96   int temp0, temp1, temp2, temp3, temp4, temp5;
97   __asm__ volatile (
98     "preceu.ph.qbr   %[temp1],   %[c0]                 \n\t"
99     "preceu.ph.qbl   %[temp2],   %[c0]                 \n\t"
100     "preceu.ph.qbr   %[temp3],   %[c1]                 \n\t"
101     "preceu.ph.qbl   %[temp4],   %[c1]                 \n\t"
102     "preceu.ph.qbr   %[temp5],   %[c2]                 \n\t"
103     "preceu.ph.qbl   %[temp0],   %[c2]                 \n\t"
104     "subq.ph         %[temp3],   %[temp3],   %[temp5]  \n\t"
105     "subq.ph         %[temp4],   %[temp4],   %[temp0]  \n\t"
106     "addq.ph         %[temp1],   %[temp1],   %[temp3]  \n\t"
107     "addq.ph         %[temp2],   %[temp2],   %[temp4]  \n\t"
108     "shll_s.ph       %[temp1],   %[temp1],   7         \n\t"
109     "shll_s.ph       %[temp2],   %[temp2],   7         \n\t"
110     "precrqu_s.qb.ph %[temp2],   %[temp2],   %[temp1]  \n\t"
111     : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
112       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
113     : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
114     : "memory"
115   );
116   return temp2;
117 }
118 
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)119 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
120                                                    uint32_t c2) {
121   int temp0, temp1, temp2, temp3, temp4, temp5;
122   __asm__ volatile (
123     "adduh.qb         %[temp5],   %[c0],      %[c1]       \n\t"
124     "preceu.ph.qbr    %[temp3],   %[c2]                   \n\t"
125     "preceu.ph.qbr    %[temp1],   %[temp5]                \n\t"
126     "preceu.ph.qbl    %[temp2],   %[temp5]                \n\t"
127     "preceu.ph.qbl    %[temp4],   %[c2]                   \n\t"
128     "subq.ph          %[temp3],   %[temp1],   %[temp3]    \n\t"
129     "subq.ph          %[temp4],   %[temp2],   %[temp4]    \n\t"
130     "shrl.ph          %[temp5],   %[temp3],   15          \n\t"
131     "shrl.ph          %[temp0],   %[temp4],   15          \n\t"
132     "addq.ph          %[temp3],   %[temp3],   %[temp5]    \n\t"
133     "addq.ph          %[temp4],   %[temp0],   %[temp4]    \n\t"
134     "shra.ph          %[temp3],   %[temp3],   1           \n\t"
135     "shra.ph          %[temp4],   %[temp4],   1           \n\t"
136     "addq.ph          %[temp1],   %[temp1],   %[temp3]    \n\t"
137     "addq.ph          %[temp2],   %[temp2],   %[temp4]    \n\t"
138     "shll_s.ph        %[temp1],   %[temp1],   7           \n\t"
139     "shll_s.ph        %[temp2],   %[temp2],   7           \n\t"
140     "precrqu_s.qb.ph  %[temp1],   %[temp2],   %[temp1]    \n\t"
141     : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
142       [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
143     : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
144     : "memory"
145   );
146   return temp1;
147 }
148 
Select(uint32_t a,uint32_t b,uint32_t c)149 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
150   int temp0, temp1, temp2, temp3, temp4, temp5;
151   __asm__ volatile (
152     "cmpgdu.lt.qb %[temp1], %[c],     %[b]             \n\t"
153     "pick.qb      %[temp1], %[b],     %[c]             \n\t"
154     "pick.qb      %[temp2], %[c],     %[b]             \n\t"
155     "cmpgdu.lt.qb %[temp4], %[c],     %[a]             \n\t"
156     "pick.qb      %[temp4], %[a],     %[c]             \n\t"
157     "pick.qb      %[temp5], %[c],     %[a]             \n\t"
158     "subu.qb      %[temp3], %[temp1], %[temp2]         \n\t"
159     "subu.qb      %[temp0], %[temp4], %[temp5]         \n\t"
160     "raddu.w.qb   %[temp3], %[temp3]                   \n\t"
161     "raddu.w.qb   %[temp0], %[temp0]                   \n\t"
162     "subu         %[temp3], %[temp3], %[temp0]         \n\t"
163     "slti         %[temp0], %[temp3], 0x1              \n\t"
164     "movz         %[a],     %[b],     %[temp0]         \n\t"
165     : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
166       [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
167       [a]"+&r"(a)
168     : [b]"r"(b), [c]"r"(c)
169   );
170   return a;
171 }
172 
Average2(uint32_t a0,uint32_t a1)173 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
174   __asm__ volatile (
175     "adduh.qb    %[a0], %[a0], %[a1]       \n\t"
176     : [a0]"+r"(a0)
177     : [a1]"r"(a1)
178   );
179   return a0;
180 }
181 
Average3(uint32_t a0,uint32_t a1,uint32_t a2)182 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
183   return Average2(Average2(a0, a2), a1);
184 }
185 
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)186 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
187                                      uint32_t a2, uint32_t a3) {
188   return Average2(Average2(a0, a1), Average2(a2, a3));
189 }
190 
Predictor5(uint32_t left,const uint32_t * const top)191 static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
192   return Average3(left, top[0], top[1]);
193 }
194 
Predictor6(uint32_t left,const uint32_t * const top)195 static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
196   return Average2(left, top[-1]);
197 }
198 
Predictor7(uint32_t left,const uint32_t * const top)199 static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
200   return Average2(left, top[0]);
201 }
202 
Predictor8(uint32_t left,const uint32_t * const top)203 static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
204   (void)left;
205   return Average2(top[-1], top[0]);
206 }
207 
Predictor9(uint32_t left,const uint32_t * const top)208 static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
209   (void)left;
210   return Average2(top[0], top[1]);
211 }
212 
Predictor10(uint32_t left,const uint32_t * const top)213 static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
214   return Average4(left, top[-1], top[0], top[1]);
215 }
216 
Predictor11(uint32_t left,const uint32_t * const top)217 static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
218   return Select(top[0], left, top[-1]);
219 }
220 
Predictor12(uint32_t left,const uint32_t * const top)221 static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
222   return ClampedAddSubtractFull(left, top[0], top[-1]);
223 }
224 
Predictor13(uint32_t left,const uint32_t * const top)225 static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
226   return ClampedAddSubtractHalf(left, top[0], top[-1]);
227 }
228 
229 // Add green to blue and red channels (i.e. perform the inverse transform of
230 // 'subtract green').
AddGreenToBlueAndRed(const uint32_t * src,int num_pixels,uint32_t * dst)231 static void AddGreenToBlueAndRed(const uint32_t* src, int num_pixels,
232                                  uint32_t* dst) {
233   uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
234   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
235   const uint32_t* const p_loop2_end = src + num_pixels;
236   __asm__ volatile (
237     ".set       push                                          \n\t"
238     ".set       noreorder                                     \n\t"
239     "beq        %[src],          %[p_loop1_end],     3f       \n\t"
240     " nop                                                     \n\t"
241   "0:                                                         \n\t"
242     "lw         %[temp0],        0(%[src])                    \n\t"
243     "lw         %[temp1],        4(%[src])                    \n\t"
244     "lw         %[temp2],        8(%[src])                    \n\t"
245     "lw         %[temp3],        12(%[src])                   \n\t"
246     "ext        %[temp4],        %[temp0],           8,    8  \n\t"
247     "ext        %[temp5],        %[temp1],           8,    8  \n\t"
248     "ext        %[temp6],        %[temp2],           8,    8  \n\t"
249     "ext        %[temp7],        %[temp3],           8,    8  \n\t"
250     "addiu      %[src],          %[src],             16       \n\t"
251     "addiu      %[dst],          %[dst],             16       \n\t"
252     "replv.ph   %[temp4],        %[temp4]                     \n\t"
253     "replv.ph   %[temp5],        %[temp5]                     \n\t"
254     "replv.ph   %[temp6],        %[temp6]                     \n\t"
255     "replv.ph   %[temp7],        %[temp7]                     \n\t"
256     "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
257     "addu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
258     "addu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
259     "addu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
260     "sw         %[temp0],        -16(%[dst])                  \n\t"
261     "sw         %[temp1],        -12(%[dst])                  \n\t"
262     "sw         %[temp2],        -8(%[dst])                   \n\t"
263     "bne        %[src],          %[p_loop1_end],     0b       \n\t"
264     " sw        %[temp3],        -4(%[dst])                   \n\t"
265   "3:                                                         \n\t"
266     "beq        %[src],          %[p_loop2_end],     2f       \n\t"
267     " nop                                                     \n\t"
268   "1:                                                         \n\t"
269     "lw         %[temp0],        0(%[src])                    \n\t"
270     "addiu      %[src],          %[src],             4        \n\t"
271     "addiu      %[dst],          %[dst],             4        \n\t"
272     "ext        %[temp4],        %[temp0],           8,    8  \n\t"
273     "replv.ph   %[temp4],        %[temp4]                     \n\t"
274     "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
275     "bne        %[src],          %[p_loop2_end],     1b       \n\t"
276     " sw        %[temp0],        -4(%[dst])                   \n\t"
277   "2:                                                         \n\t"
278     ".set       pop                                           \n\t"
279     : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
280       [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
281       [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
282       [temp7]"=&r"(temp7)
283     : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
284     : "memory"
285   );
286 }
287 
TransformColorInverse(const VP8LMultipliers * const m,const uint32_t * src,int num_pixels,uint32_t * dst)288 static void TransformColorInverse(const VP8LMultipliers* const m,
289                                   const uint32_t* src, int num_pixels,
290                                   uint32_t* dst) {
291   int temp0, temp1, temp2, temp3, temp4, temp5;
292   uint32_t argb, argb1, new_red;
293   const uint32_t G_to_R = m->green_to_red_;
294   const uint32_t G_to_B = m->green_to_blue_;
295   const uint32_t R_to_B = m->red_to_blue_;
296   const uint32_t* const p_loop_end = src + (num_pixels & ~1);
297   __asm__ volatile (
298     ".set            push                                    \n\t"
299     ".set            noreorder                               \n\t"
300     "beq             %[src],       %[p_loop_end],  1f        \n\t"
301     " nop                                                    \n\t"
302     "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
303     "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
304     "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
305     "shll.ph         %[temp0],     %[temp0],       8         \n\t"
306     "shll.ph         %[temp1],     %[temp1],       8         \n\t"
307     "shll.ph         %[temp2],     %[temp2],       8         \n\t"
308     "shra.ph         %[temp0],     %[temp0],       8         \n\t"
309     "shra.ph         %[temp1],     %[temp1],       8         \n\t"
310     "shra.ph         %[temp2],     %[temp2],       8         \n\t"
311   "0:                                                        \n\t"
312     "lw              %[argb],      0(%[src])                 \n\t"
313     "lw              %[argb1],     4(%[src])                 \n\t"
314     "sw              %[argb],      0(%[dst])                 \n\t"
315     "sw              %[argb1],     4(%[dst])                 \n\t"
316     "addiu           %[src],       %[src],         8         \n\t"
317     "addiu           %[dst],       %[dst],         8         \n\t"
318     "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
319     "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
320     "shll.ph         %[temp3],     %[temp3],       8         \n\t"
321     "shra.ph         %[temp3],     %[temp3],       8         \n\t"
322     "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
323     "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
324     "precrq.ph.w     %[new_red],   %[argb],        %[argb1]  \n\t"
325     "ins             %[argb1],     %[argb],        16,   16  \n\t"
326     "shra.ph         %[temp5],     %[temp5],       5         \n\t"
327     "shra.ph         %[temp3],     %[temp3],       5         \n\t"
328     "addu.ph         %[new_red],   %[new_red],     %[temp5]  \n\t"
329     "addu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
330     "preceu.ph.qbra  %[temp5],     %[new_red]                \n\t"
331     "shll.ph         %[temp4],     %[temp5],       8         \n\t"
332     "shra.ph         %[temp4],     %[temp4],       8         \n\t"
333     "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
334     "sb              %[temp5],     -2(%[dst])                \n\t"
335     "sra             %[temp5],     %[temp5],       16        \n\t"
336     "shra.ph         %[temp4],     %[temp4],       5         \n\t"
337     "addu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
338     "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
339     "sb              %[temp5],     -6(%[dst])                \n\t"
340     "sb              %[temp3],     -4(%[dst])                \n\t"
341     "sra             %[temp3],     %[temp3],       16        \n\t"
342     "bne             %[src],       %[p_loop_end],  0b        \n\t"
343     " sb             %[temp3],     -8(%[dst])                \n\t"
344   "1:                                                        \n\t"
345     ".set            pop                                     \n\t"
346     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
347       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
348       [new_red]"=&r"(new_red), [argb]"=&r"(argb),
349       [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
350     : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
351       [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
352     : "memory", "hi", "lo"
353   );
354 
355   // Fall-back to C-version for left-overs.
356   if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
357 }
358 
ConvertBGRAToRGB(const uint32_t * src,int num_pixels,uint8_t * dst)359 static void ConvertBGRAToRGB(const uint32_t* src,
360                              int num_pixels, uint8_t* dst) {
361   int temp0, temp1, temp2, temp3;
362   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
363   const uint32_t* const p_loop2_end = src + num_pixels;
364   __asm__ volatile (
365     ".set       push                                       \n\t"
366     ".set       noreorder                                  \n\t"
367     "beq        %[src],      %[p_loop1_end],    3f         \n\t"
368     " nop                                                  \n\t"
369   "0:                                                      \n\t"
370     "lw         %[temp3],    12(%[src])                    \n\t"
371     "lw         %[temp2],    8(%[src])                     \n\t"
372     "lw         %[temp1],    4(%[src])                     \n\t"
373     "lw         %[temp0],    0(%[src])                     \n\t"
374     "ins        %[temp3],    %[temp2],          24,   8    \n\t"
375     "sll        %[temp2],    %[temp2],          8          \n\t"
376     "rotr       %[temp3],    %[temp3],          16         \n\t"
377     "ins        %[temp2],    %[temp1],          0,    16   \n\t"
378     "sll        %[temp1],    %[temp1],          8          \n\t"
379     "wsbh       %[temp3],    %[temp3]                      \n\t"
380     "balign     %[temp0],    %[temp1],          1          \n\t"
381     "wsbh       %[temp2],    %[temp2]                      \n\t"
382     "wsbh       %[temp0],    %[temp0]                      \n\t"
383     "usw        %[temp3],    8(%[dst])                     \n\t"
384     "rotr       %[temp0],    %[temp0],          16         \n\t"
385     "usw        %[temp2],    4(%[dst])                     \n\t"
386     "addiu      %[src],      %[src],            16         \n\t"
387     "usw        %[temp0],    0(%[dst])                     \n\t"
388     "bne        %[src],      %[p_loop1_end],    0b         \n\t"
389     " addiu     %[dst],      %[dst],            12         \n\t"
390   "3:                                                      \n\t"
391     "beq        %[src],      %[p_loop2_end],    2f         \n\t"
392     " nop                                                  \n\t"
393   "1:                                                      \n\t"
394     "lw         %[temp0],    0(%[src])                     \n\t"
395     "addiu      %[src],      %[src],            4          \n\t"
396     "wsbh       %[temp1],    %[temp0]                      \n\t"
397     "addiu      %[dst],      %[dst],            3          \n\t"
398     "ush        %[temp1],    -2(%[dst])                    \n\t"
399     "sra        %[temp0],    %[temp0],          16         \n\t"
400     "bne        %[src],      %[p_loop2_end],    1b         \n\t"
401     " sb        %[temp0],    -3(%[dst])                    \n\t"
402   "2:                                                      \n\t"
403     ".set       pop                                        \n\t"
404     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
405       [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
406     : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
407     : "memory"
408   );
409 }
410 
ConvertBGRAToRGBA(const uint32_t * src,int num_pixels,uint8_t * dst)411 static void ConvertBGRAToRGBA(const uint32_t* src,
412                               int num_pixels, uint8_t* dst) {
413   int temp0, temp1, temp2, temp3;
414   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
415   const uint32_t* const p_loop2_end = src + num_pixels;
416   __asm__ volatile (
417     ".set       push                                       \n\t"
418     ".set       noreorder                                  \n\t"
419     "beq        %[src],      %[p_loop1_end],    3f         \n\t"
420     " nop                                                  \n\t"
421   "0:                                                      \n\t"
422     "lw         %[temp0],    0(%[src])                     \n\t"
423     "lw         %[temp1],    4(%[src])                     \n\t"
424     "lw         %[temp2],    8(%[src])                     \n\t"
425     "lw         %[temp3],    12(%[src])                    \n\t"
426     "wsbh       %[temp0],    %[temp0]                      \n\t"
427     "wsbh       %[temp1],    %[temp1]                      \n\t"
428     "wsbh       %[temp2],    %[temp2]                      \n\t"
429     "wsbh       %[temp3],    %[temp3]                      \n\t"
430     "addiu      %[src],      %[src],            16         \n\t"
431     "balign     %[temp0],    %[temp0],          1          \n\t"
432     "balign     %[temp1],    %[temp1],          1          \n\t"
433     "balign     %[temp2],    %[temp2],          1          \n\t"
434     "balign     %[temp3],    %[temp3],          1          \n\t"
435     "usw        %[temp0],    0(%[dst])                     \n\t"
436     "usw        %[temp1],    4(%[dst])                     \n\t"
437     "usw        %[temp2],    8(%[dst])                     \n\t"
438     "usw        %[temp3],    12(%[dst])                    \n\t"
439     "bne        %[src],      %[p_loop1_end],    0b         \n\t"
440     " addiu     %[dst],      %[dst],            16         \n\t"
441   "3:                                                      \n\t"
442     "beq        %[src],      %[p_loop2_end],    2f         \n\t"
443     " nop                                                  \n\t"
444   "1:                                                      \n\t"
445     "lw         %[temp0],    0(%[src])                     \n\t"
446     "wsbh       %[temp0],    %[temp0]                      \n\t"
447     "addiu      %[src],      %[src],            4          \n\t"
448     "balign     %[temp0],    %[temp0],          1          \n\t"
449     "usw        %[temp0],    0(%[dst])                     \n\t"
450     "bne        %[src],      %[p_loop2_end],    1b         \n\t"
451     " addiu     %[dst],      %[dst],            4          \n\t"
452   "2:                                                      \n\t"
453     ".set       pop                                        \n\t"
454     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
455       [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
456     : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
457     : "memory"
458   );
459 }
460 
ConvertBGRAToRGBA4444(const uint32_t * src,int num_pixels,uint8_t * dst)461 static void ConvertBGRAToRGBA4444(const uint32_t* src,
462                                   int num_pixels, uint8_t* dst) {
463   int temp0, temp1, temp2, temp3, temp4, temp5;
464   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
465   const uint32_t* const p_loop2_end = src + num_pixels;
466   __asm__ volatile (
467     ".set           push                                       \n\t"
468     ".set           noreorder                                  \n\t"
469     "beq            %[src],      %[p_loop1_end],    3f         \n\t"
470     " nop                                                      \n\t"
471   "0:                                                          \n\t"
472     "lw             %[temp0],    0(%[src])                     \n\t"
473     "lw             %[temp1],    4(%[src])                     \n\t"
474     "lw             %[temp2],    8(%[src])                     \n\t"
475     "lw             %[temp3],    12(%[src])                    \n\t"
476     "ext            %[temp4],    %[temp0],          28,   4    \n\t"
477     "ext            %[temp5],    %[temp0],          12,   4    \n\t"
478     "ins            %[temp0],    %[temp4],          0,    4    \n\t"
479     "ext            %[temp4],    %[temp1],          28,   4    \n\t"
480     "ins            %[temp0],    %[temp5],          16,   4    \n\t"
481     "ext            %[temp5],    %[temp1],          12,   4    \n\t"
482     "ins            %[temp1],    %[temp4],          0,    4    \n\t"
483     "ext            %[temp4],    %[temp2],          28,   4    \n\t"
484     "ins            %[temp1],    %[temp5],          16,   4    \n\t"
485     "ext            %[temp5],    %[temp2],          12,   4    \n\t"
486     "ins            %[temp2],    %[temp4],          0,    4    \n\t"
487     "ext            %[temp4],    %[temp3],          28,   4    \n\t"
488     "ins            %[temp2],    %[temp5],          16,   4    \n\t"
489     "ext            %[temp5],    %[temp3],          12,   4    \n\t"
490     "ins            %[temp3],    %[temp4],          0,    4    \n\t"
491     "precr.qb.ph    %[temp1],    %[temp1],          %[temp0]   \n\t"
492     "ins            %[temp3],    %[temp5],          16,   4    \n\t"
493     "addiu          %[src],      %[src],            16         \n\t"
494     "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
495 #ifdef WEBP_SWAP_16BIT_CSP
496     "usw            %[temp1],    0(%[dst])                     \n\t"
497     "usw            %[temp3],    4(%[dst])                     \n\t"
498 #else
499     "wsbh           %[temp1],    %[temp1]                      \n\t"
500     "wsbh           %[temp3],    %[temp3]                      \n\t"
501     "usw            %[temp1],    0(%[dst])                     \n\t"
502     "usw            %[temp3],    4(%[dst])                     \n\t"
503 #endif
504     "bne            %[src],      %[p_loop1_end],    0b         \n\t"
505     " addiu         %[dst],      %[dst],            8          \n\t"
506   "3:                                                          \n\t"
507     "beq            %[src],      %[p_loop2_end],    2f         \n\t"
508     " nop                                                      \n\t"
509   "1:                                                          \n\t"
510     "lw             %[temp0],    0(%[src])                     \n\t"
511     "ext            %[temp4],    %[temp0],          28,   4    \n\t"
512     "ext            %[temp5],    %[temp0],          12,   4    \n\t"
513     "ins            %[temp0],    %[temp4],          0,    4    \n\t"
514     "ins            %[temp0],    %[temp5],          16,   4    \n\t"
515     "addiu          %[src],      %[src],            4          \n\t"
516     "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
517 #ifdef WEBP_SWAP_16BIT_CSP
518     "ush            %[temp0],    0(%[dst])                     \n\t"
519 #else
520     "wsbh           %[temp0],    %[temp0]                      \n\t"
521     "ush            %[temp0],    0(%[dst])                     \n\t"
522 #endif
523     "bne            %[src],      %[p_loop2_end],    1b         \n\t"
524     " addiu         %[dst],      %[dst],            2          \n\t"
525   "2:                                                          \n\t"
526     ".set           pop                                        \n\t"
527     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
528       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
529       [dst]"+&r"(dst), [src]"+&r"(src)
530     : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
531     : "memory"
532   );
533 }
534 
ConvertBGRAToRGB565(const uint32_t * src,int num_pixels,uint8_t * dst)535 static void ConvertBGRAToRGB565(const uint32_t* src,
536                                 int num_pixels, uint8_t* dst) {
537   int temp0, temp1, temp2, temp3, temp4, temp5;
538   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
539   const uint32_t* const p_loop2_end = src + num_pixels;
540   __asm__ volatile (
541     ".set           push                                       \n\t"
542     ".set           noreorder                                  \n\t"
543     "beq            %[src],      %[p_loop1_end],    3f         \n\t"
544     " nop                                                      \n\t"
545   "0:                                                          \n\t"
546     "lw             %[temp0],    0(%[src])                     \n\t"
547     "lw             %[temp1],    4(%[src])                     \n\t"
548     "lw             %[temp2],    8(%[src])                     \n\t"
549     "lw             %[temp3],    12(%[src])                    \n\t"
550     "ext            %[temp4],    %[temp0],          8,    16   \n\t"
551     "ext            %[temp5],    %[temp0],          5,    11   \n\t"
552     "ext            %[temp0],    %[temp0],          3,    5    \n\t"
553     "ins            %[temp4],    %[temp5],          0,    11   \n\t"
554     "ext            %[temp5],    %[temp1],          5,    11   \n\t"
555     "ins            %[temp4],    %[temp0],          0,    5    \n\t"
556     "ext            %[temp0],    %[temp1],          8,    16   \n\t"
557     "ext            %[temp1],    %[temp1],          3,    5    \n\t"
558     "ins            %[temp0],    %[temp5],          0,    11   \n\t"
559     "ext            %[temp5],    %[temp2],          5,    11   \n\t"
560     "ins            %[temp0],    %[temp1],          0,    5    \n\t"
561     "ext            %[temp1],    %[temp2],          8,    16   \n\t"
562     "ext            %[temp2],    %[temp2],          3,    5    \n\t"
563     "ins            %[temp1],    %[temp5],          0,    11   \n\t"
564     "ext            %[temp5],    %[temp3],          5,    11   \n\t"
565     "ins            %[temp1],    %[temp2],          0,    5    \n\t"
566     "ext            %[temp2],    %[temp3],          8,    16   \n\t"
567     "ext            %[temp3],    %[temp3],          3,    5    \n\t"
568     "ins            %[temp2],    %[temp5],          0,    11   \n\t"
569     "append         %[temp0],    %[temp4],          16         \n\t"
570     "ins            %[temp2],    %[temp3],          0,    5    \n\t"
571     "addiu          %[src],      %[src],            16         \n\t"
572     "append         %[temp2],    %[temp1],          16         \n\t"
573 #ifdef WEBP_SWAP_16BIT_CSP
574     "usw            %[temp0],    0(%[dst])                     \n\t"
575     "usw            %[temp2],    4(%[dst])                     \n\t"
576 #else
577     "wsbh           %[temp0],    %[temp0]                      \n\t"
578     "wsbh           %[temp2],    %[temp2]                      \n\t"
579     "usw            %[temp0],    0(%[dst])                     \n\t"
580     "usw            %[temp2],    4(%[dst])                     \n\t"
581 #endif
582     "bne            %[src],      %[p_loop1_end],    0b         \n\t"
583     " addiu         %[dst],      %[dst],            8          \n\t"
584   "3:                                                          \n\t"
585     "beq            %[src],      %[p_loop2_end],    2f         \n\t"
586     " nop                                                      \n\t"
587   "1:                                                          \n\t"
588     "lw             %[temp0],    0(%[src])                     \n\t"
589     "ext            %[temp4],    %[temp0],          8,    16   \n\t"
590     "ext            %[temp5],    %[temp0],          5,    11   \n\t"
591     "ext            %[temp0],    %[temp0],          3,    5    \n\t"
592     "ins            %[temp4],    %[temp5],          0,    11   \n\t"
593     "addiu          %[src],      %[src],            4          \n\t"
594     "ins            %[temp4],    %[temp0],          0,    5    \n\t"
595 #ifdef WEBP_SWAP_16BIT_CSP
596     "ush            %[temp4],    0(%[dst])                     \n\t"
597 #else
598     "wsbh           %[temp4],    %[temp4]                      \n\t"
599     "ush            %[temp4],    0(%[dst])                     \n\t"
600 #endif
601     "bne            %[src],      %[p_loop2_end],    1b         \n\t"
602     " addiu         %[dst],      %[dst],            2          \n\t"
603   "2:                                                          \n\t"
604     ".set           pop                                        \n\t"
605     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
606       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
607       [dst]"+&r"(dst), [src]"+&r"(src)
608     : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
609     : "memory"
610   );
611 }
612 
ConvertBGRAToBGR(const uint32_t * src,int num_pixels,uint8_t * dst)613 static void ConvertBGRAToBGR(const uint32_t* src,
614                              int num_pixels, uint8_t* dst) {
615   int temp0, temp1, temp2, temp3;
616   const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
617   const uint32_t* const p_loop2_end = src + num_pixels;
618   __asm__ volatile (
619     ".set       push                                         \n\t"
620     ".set       noreorder                                    \n\t"
621     "beq        %[src],      %[p_loop1_end],    3f           \n\t"
622     " nop                                                    \n\t"
623   "0:                                                        \n\t"
624     "lw         %[temp0],    0(%[src])                       \n\t"
625     "lw         %[temp1],    4(%[src])                       \n\t"
626     "lw         %[temp2],    8(%[src])                       \n\t"
627     "lw         %[temp3],    12(%[src])                      \n\t"
628     "ins        %[temp0],    %[temp1],          24,    8     \n\t"
629     "sra        %[temp1],    %[temp1],          8            \n\t"
630     "ins        %[temp1],    %[temp2],          16,    16    \n\t"
631     "sll        %[temp2],    %[temp2],          8            \n\t"
632     "balign     %[temp3],    %[temp2],          1            \n\t"
633     "addiu      %[src],      %[src],            16           \n\t"
634     "usw        %[temp0],    0(%[dst])                       \n\t"
635     "usw        %[temp1],    4(%[dst])                       \n\t"
636     "usw        %[temp3],    8(%[dst])                       \n\t"
637     "bne        %[src],      %[p_loop1_end],    0b           \n\t"
638     " addiu     %[dst],      %[dst],            12           \n\t"
639   "3:                                                        \n\t"
640     "beq        %[src],      %[p_loop2_end],    2f           \n\t"
641     " nop                                                    \n\t"
642   "1:                                                        \n\t"
643     "lw         %[temp0],    0(%[src])                       \n\t"
644     "addiu      %[src],      %[src],            4            \n\t"
645     "addiu      %[dst],      %[dst],            3            \n\t"
646     "ush        %[temp0],    -3(%[dst])                      \n\t"
647     "sra        %[temp0],    %[temp0],          16           \n\t"
648     "bne        %[src],      %[p_loop2_end],    1b           \n\t"
649     " sb        %[temp0],    -1(%[dst])                      \n\t"
650   "2:                                                        \n\t"
651     ".set       pop                                          \n\t"
652     : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
653       [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
654     : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
655     : "memory"
656   );
657 }
658 
659 //------------------------------------------------------------------------------
660 // Entry point
661 
662 extern void VP8LDspInitMIPSdspR2(void);
663 
VP8LDspInitMIPSdspR2(void)664 WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
665   VP8LMapColor32b = MapARGB;
666   VP8LMapColor8b = MapAlpha;
667   VP8LPredictors[5] = Predictor5;
668   VP8LPredictors[6] = Predictor6;
669   VP8LPredictors[7] = Predictor7;
670   VP8LPredictors[8] = Predictor8;
671   VP8LPredictors[9] = Predictor9;
672   VP8LPredictors[10] = Predictor10;
673   VP8LPredictors[11] = Predictor11;
674   VP8LPredictors[12] = Predictor12;
675   VP8LPredictors[13] = Predictor13;
676   VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
677   VP8LTransformColorInverse = TransformColorInverse;
678   VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
679   VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
680   VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
681   VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
682   VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
683 }
684 
685 #else  // !WEBP_USE_MIPS_DSP_R2
686 
687 WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
688 
689 #endif  // WEBP_USE_MIPS_DSP_R2
690