1 /*
2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "vpx_ports/mem.h"
13 #include "vpx_ports/asmdefs_mmi.h"
14
15 #define TRANSPOSE_4H \
16 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
17 MMI_LI(%[tmp0], 0x93) \
18 "mtc1 %[tmp0], %[ftmp10] \n\t" \
19 "punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \
20 "punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
21 "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
22 "por %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \
23 "punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \
24 "punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \
25 "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
26 "por %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \
27 "punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \
28 "punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
29 "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
30 "por %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \
31 "punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \
32 "punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \
33 "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \
34 "por %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \
35 "punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \
36 "punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \
37 "punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \
38 "punpckhwd %[ftmp4], %[ftmp6], %[ftmp8] \n\t"
39
vp8_short_idct4x4llm_mmi(int16_t * input,unsigned char * pred_ptr,int pred_stride,unsigned char * dst_ptr,int dst_stride)40 void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr,
41 int pred_stride, unsigned char *dst_ptr,
42 int dst_stride) {
43 double ftmp[12];
44 uint64_t tmp[1];
45 double ff_ph_04, ff_ph_4e7b, ff_ph_22a3;
46
47 __asm__ volatile (
48 "dli %[tmp0], 0x0004000400040004 \n\t"
49 "dmtc1 %[tmp0], %[ff_ph_04] \n\t"
50 "dli %[tmp0], 0x4e7b4e7b4e7b4e7b \n\t"
51 "dmtc1 %[tmp0], %[ff_ph_4e7b] \n\t"
52 "dli %[tmp0], 0x22a322a322a322a3 \n\t"
53 "dmtc1 %[tmp0], %[ff_ph_22a3] \n\t"
54 MMI_LI(%[tmp0], 0x02)
55 "dmtc1 %[tmp0], %[ftmp11] \n\t"
56 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
57
58 "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
59 "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
60 "gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t"
61 "gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t"
62 "gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t"
63 "gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t"
64 "gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t"
65 "gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t"
66
67 // ip[0...3] + ip[8...11]
68 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
69 // ip[0...3] - ip[8...11]
70 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
71 // (ip[12...15] * sinpi8sqrt2) >> 16
72 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
73 "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t"
74 // (ip[ 4... 7] * sinpi8sqrt2) >> 16
75 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
76 "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t"
77 // ip[ 4... 7] + ((ip[ 4... 7] * cospi8sqrt2minus1) >> 16)
78 "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t"
79 "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
80 // ip[12...15] + ((ip[12...15] * cospi8sqrt2minus1) >> 16)
81 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
82 "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t"
83
84 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
85 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
86 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
87 "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
88 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
89 "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
90 "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t"
91 "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
92
93 TRANSPOSE_4H
94 // a
95 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
96 // b
97 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
98 // c
99 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
100 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
101 "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t"
102 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
103 "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
104 // d
105 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
106 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
107 "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t"
108 "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t"
109 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
110
111 MMI_LI(%[tmp0], 0x03)
112 "mtc1 %[tmp0], %[ftmp11] \n\t"
113 // a + d
114 "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t"
115 "paddh %[ftmp1], %[ftmp1], %[ff_ph_04] \n\t"
116 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
117 // b + c
118 "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t"
119 "paddh %[ftmp2], %[ftmp2], %[ff_ph_04] \n\t"
120 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
121 // b - c
122 "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t"
123 "paddh %[ftmp3], %[ftmp3], %[ff_ph_04] \n\t"
124 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
125 // a - d
126 "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
127 "paddh %[ftmp4], %[ftmp4], %[ff_ph_04] \n\t"
128 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
129
130 TRANSPOSE_4H
131 #if _MIPS_SIM == _ABIO32
132 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
133 "mtc1 %[tmp0], %[ftmp5] \n\t"
134 #else
135 "gslwlc1 %[ftmp5], 0x03(%[pred_ptr]) \n\t"
136 "gslwrc1 %[ftmp5], 0x00(%[pred_ptr]) \n\t"
137 #endif
138 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
139 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
140 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
141 "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
142 "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
143 MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
144 MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
145
146 #if _MIPS_SIM == _ABIO32
147 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
148 "mtc1 %[tmp0], %[ftmp6] \n\t"
149 #else
150 "gslwlc1 %[ftmp6], 0x03(%[pred_ptr]) \n\t"
151 "gslwrc1 %[ftmp6], 0x00(%[pred_ptr]) \n\t"
152 #endif
153 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
154 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
155 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
156 "gsswlc1 %[ftmp2], 0x03(%[dst_ptr]) \n\t"
157 "gsswrc1 %[ftmp2], 0x00(%[dst_ptr]) \n\t"
158 MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
159 MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
160
161 #if _MIPS_SIM == _ABIO32
162 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
163 "mtc1 %[tmp0], %[ftmp7] \n\t"
164 #else
165 "gslwlc1 %[ftmp7], 0x03(%[pred_ptr]) \n\t"
166 "gslwrc1 %[ftmp7], 0x00(%[pred_ptr]) \n\t"
167 #endif
168 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
169 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
170 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
171 "gsswlc1 %[ftmp3], 0x03(%[dst_ptr]) \n\t"
172 "gsswrc1 %[ftmp3], 0x00(%[dst_ptr]) \n\t"
173 MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
174 MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
175
176 #if _MIPS_SIM == _ABIO32
177 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t"
178 "mtc1 %[tmp0], %[ftmp8] \n\t"
179 #else
180 "gslwlc1 %[ftmp8], 0x03(%[pred_ptr]) \n\t"
181 "gslwrc1 %[ftmp8], 0x00(%[pred_ptr]) \n\t"
182 #endif
183 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
184 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
185 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
186 "gsswlc1 %[ftmp4], 0x03(%[dst_ptr]) \n\t"
187 "gsswrc1 %[ftmp4], 0x00(%[dst_ptr]) \n\t"
188 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
189 [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
190 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
191 [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
192 [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]),
193 [pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr),
194 [ff_ph_4e7b]"=&f"(ff_ph_4e7b), [ff_ph_04]"=&f"(ff_ph_04),
195 [ff_ph_22a3]"=&f"(ff_ph_22a3)
196 : [ip]"r"(input),
197 [pred_stride]"r"((mips_reg)pred_stride),
198 [dst_stride]"r"((mips_reg)dst_stride)
199 : "memory"
200 );
201 }
202
vp8_dc_only_idct_add_mmi(int16_t input_dc,unsigned char * pred_ptr,int pred_stride,unsigned char * dst_ptr,int dst_stride)203 void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr,
204 int pred_stride, unsigned char *dst_ptr,
205 int dst_stride) {
206 int a0 = ((input_dc + 4) >> 3);
207 double a1, ftmp[5];
208 int low32;
209
210 __asm__ volatile (
211 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
212 "dmtc1 %[a0], %[a1] \n\t"
213 "pshufh %[a1], %[a1], %[ftmp0] \n\t"
214 "ulw %[low32], 0x00(%[pred_ptr]) \n\t"
215 "mtc1 %[low32], %[ftmp1] \n\t"
216 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
217 "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
218 "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
219 "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
220 "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
221
222 MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
223 MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
224 "ulw %[low32], 0x00(%[pred_ptr]) \n\t"
225 "mtc1 %[low32], %[ftmp1] \n\t"
226 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
227 "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
228 "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
229 "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
230 "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
231
232 MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
233 MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
234 "ulw %[low32], 0x00(%[pred_ptr]) \n\t"
235 "mtc1 %[low32], %[ftmp1] \n\t"
236 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
237 "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
238 "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
239 "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
240 "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
241
242 MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride])
243 MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride])
244 "ulw %[low32], 0x00(%[pred_ptr]) \n\t"
245 "mtc1 %[low32], %[ftmp1] \n\t"
246 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
247 "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t"
248 "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t"
249 "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t"
250 "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t"
251 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
252 [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32),
253 [dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr), [a1]"=&f"(a1)
254 : [dst_stride]"r"((mips_reg)dst_stride),
255 [pred_stride]"r"((mips_reg)pred_stride), [a0]"r"(a0)
256 : "memory"
257 );
258 }
259
vp8_short_inv_walsh4x4_mmi(int16_t * input,int16_t * mb_dqcoeff)260 void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) {
261 int i;
262 int16_t output[16];
263 double ff_ph_03, ftmp[12];
264 uint64_t tmp[1];
265
266 __asm__ volatile (
267 "dli %[tmp0], 0x0003000300030003 \n\t"
268 "dmtc1 %[tmp0], %[ff_ph_03] \n\t"
269 MMI_LI(%[tmp0], 0x03)
270 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
271 "dmtc1 %[tmp0], %[ftmp11] \n\t"
272 "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t"
273 "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t"
274 "gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t"
275 "gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t"
276 "gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t"
277 "gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t"
278 "gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t"
279 "gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t"
280 "paddh %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
281 "psubh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
282 "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
283 "psubh %[ftmp8], %[ftmp3], %[ftmp4] \n\t"
284
285 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
286 "psubh %[ftmp2], %[ftmp5], %[ftmp7] \n\t"
287 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
288 "paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t"
289
290 TRANSPOSE_4H
291 // a
292 "paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t"
293 // d
294 "psubh %[ftmp6], %[ftmp1], %[ftmp4] \n\t"
295 // b
296 "paddh %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
297 // c
298 "psubh %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
299
300 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
301 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
302 "psubh %[ftmp3], %[ftmp5], %[ftmp7] \n\t"
303 "psubh %[ftmp4], %[ftmp6], %[ftmp8] \n\t"
304
305 "paddh %[ftmp1], %[ftmp1], %[ff_ph_03] \n\t"
306 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
307 "paddh %[ftmp2], %[ftmp2], %[ff_ph_03] \n\t"
308 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
309 "paddh %[ftmp3], %[ftmp3], %[ff_ph_03] \n\t"
310 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
311 "paddh %[ftmp4], %[ftmp4], %[ff_ph_03] \n\t"
312 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
313
314 TRANSPOSE_4H
315 "gssdlc1 %[ftmp1], 0x07(%[op]) \n\t"
316 "gssdrc1 %[ftmp1], 0x00(%[op]) \n\t"
317 "gssdlc1 %[ftmp2], 0x0f(%[op]) \n\t"
318 "gssdrc1 %[ftmp2], 0x08(%[op]) \n\t"
319 "gssdlc1 %[ftmp3], 0x17(%[op]) \n\t"
320 "gssdrc1 %[ftmp3], 0x10(%[op]) \n\t"
321 "gssdlc1 %[ftmp4], 0x1f(%[op]) \n\t"
322 "gssdrc1 %[ftmp4], 0x18(%[op]) \n\t"
323 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]),
324 [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
325 [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]),
326 [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]),
327 [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [ff_ph_03]"=&f"(ff_ph_03)
328 : [ip]"r"(input), [op]"r"(output)
329 : "memory"
330 );
331
332 for (i = 0; i < 16; i++) {
333 mb_dqcoeff[i * 16] = output[i];
334 }
335 }
336