1 /*
2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vpx_dsp_rtcd.h"
12 #include "vpx/vpx_integer.h"
13 #include "vpx_ports/mem.h"
14 #include "vpx_ports/asmdefs_mmi.h"
15
vpx_subtract_block_mmi(int rows,int cols,int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)16 void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
17 ptrdiff_t diff_stride, const uint8_t *src,
18 ptrdiff_t src_stride, const uint8_t *pred,
19 ptrdiff_t pred_stride) {
20 double ftmp[13];
21 uint32_t tmp[1];
22
23 if (rows == cols) {
24 switch (rows) {
25 case 4:
26 __asm__ volatile(
27 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
28 #if _MIPS_SIM == _ABIO32
29 "ulw %[tmp0], 0x00(%[src]) \n\t"
30 "mtc1 %[tmp0], %[ftmp1] \n\t"
31 "ulw %[tmp0], 0x00(%[pred]) \n\t"
32 "mtc1 %[tmp0], %[ftmp2] \n\t"
33 #else
34 "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t"
35 "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t"
36 "gslwlc1 %[ftmp2], 0x03(%[pred]) \n\t"
37 "gslwrc1 %[ftmp2], 0x00(%[pred]) \n\t"
38 #endif
39 MMI_ADDU(%[src], %[src], %[src_stride])
40 MMI_ADDU(%[pred], %[pred], %[pred_stride])
41
42 #if _MIPS_SIM == _ABIO32
43 "ulw %[tmp0], 0x00(%[src]) \n\t"
44 "mtc1 %[tmp0], %[ftmp3] \n\t"
45 "ulw %[tmp0], 0x00(%[pred]) \n\t"
46 "mtc1 %[tmp0], %[ftmp4] \n\t"
47 #else
48 "gslwlc1 %[ftmp3], 0x03(%[src]) \n\t"
49 "gslwrc1 %[ftmp3], 0x00(%[src]) \n\t"
50 "gslwlc1 %[ftmp4], 0x03(%[pred]) \n\t"
51 "gslwrc1 %[ftmp4], 0x00(%[pred]) \n\t"
52 #endif
53 MMI_ADDU(%[src], %[src], %[src_stride])
54 MMI_ADDU(%[pred], %[pred], %[pred_stride])
55
56 #if _MIPS_SIM == _ABIO32
57 "ulw %[tmp0], 0x00(%[src]) \n\t"
58 "mtc1 %[tmp0], %[ftmp5] \n\t"
59 "ulw %[tmp0], 0x00(%[pred]) \n\t"
60 "mtc1 %[tmp0], %[ftmp6] \n\t"
61 #else
62 "gslwlc1 %[ftmp5], 0x03(%[src]) \n\t"
63 "gslwrc1 %[ftmp5], 0x00(%[src]) \n\t"
64 "gslwlc1 %[ftmp6], 0x03(%[pred]) \n\t"
65 "gslwrc1 %[ftmp6], 0x00(%[pred]) \n\t"
66 #endif
67 MMI_ADDU(%[src], %[src], %[src_stride])
68 MMI_ADDU(%[pred], %[pred], %[pred_stride])
69
70 #if _MIPS_SIM == _ABIO32
71 "ulw %[tmp0], 0x00(%[src]) \n\t"
72 "mtc1 %[tmp0], %[ftmp7] \n\t"
73 "ulw %[tmp0], 0x00(%[pred]) \n\t"
74 "mtc1 %[tmp0], %[ftmp8] \n\t"
75 #else
76 "gslwlc1 %[ftmp7], 0x03(%[src]) \n\t"
77 "gslwrc1 %[ftmp7], 0x00(%[src]) \n\t"
78 "gslwlc1 %[ftmp8], 0x03(%[pred]) \n\t"
79 "gslwrc1 %[ftmp8], 0x00(%[pred]) \n\t"
80 #endif
81 "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t"
82 "punpcklbh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
83 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
84 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
85 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
86 MMI_ADDU(%[diff], %[diff], %[diff_stride])
87 "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t"
88 "punpcklbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
89 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
90 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
91 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
92 MMI_ADDU(%[diff], %[diff], %[diff_stride])
93 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
94 "punpcklbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
95 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
96 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
97 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
98 MMI_ADDU(%[diff], %[diff], %[diff_stride])
99 "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t"
100 "punpcklbh %[ftmp10], %[ftmp8], %[ftmp0] \n\t"
101 "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t"
102 "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t"
103 "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t"
104 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
105 [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
106 [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
107 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
108 [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
109 [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
110 #if _MIPS_SIM == _ABIO32
111 [tmp0] "=&r"(tmp[0]),
112 #endif
113 [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff)
114 : [src_stride] "r"((mips_reg)src_stride),
115 [pred_stride] "r"((mips_reg)pred_stride),
116 [diff_stride] "r"((mips_reg)(diff_stride * 2))
117 : "memory");
118 break;
119 case 8:
120 __asm__ volatile(
121 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
122 "li %[tmp0], 0x02 \n\t"
123 "1: \n\t"
124 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
125 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
126 "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t"
127 "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t"
128 MMI_ADDU(%[src], %[src], %[src_stride])
129 MMI_ADDU(%[pred], %[pred], %[pred_stride])
130 "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t"
131 "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t"
132 "gsldlc1 %[ftmp4], 0x07(%[pred]) \n\t"
133 "gsldrc1 %[ftmp4], 0x00(%[pred]) \n\t"
134 MMI_ADDU(%[src], %[src], %[src_stride])
135 MMI_ADDU(%[pred], %[pred], %[pred_stride])
136 "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
137 "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
138 "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t"
139 "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t"
140 MMI_ADDU(%[src], %[src], %[src_stride])
141 MMI_ADDU(%[pred], %[pred], %[pred_stride])
142 "gsldlc1 %[ftmp7], 0x07(%[src]) \n\t"
143 "gsldrc1 %[ftmp7], 0x00(%[src]) \n\t"
144 "gsldlc1 %[ftmp8], 0x07(%[pred]) \n\t"
145 "gsldrc1 %[ftmp8], 0x00(%[pred]) \n\t"
146 MMI_ADDU(%[src], %[src], %[src_stride])
147 MMI_ADDU(%[pred], %[pred], %[pred_stride])
148 "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t"
149 "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t"
150 "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t"
151 "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t"
152 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
153 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
154 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
155 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
156 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
157 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
158 MMI_ADDU(%[diff], %[diff], %[diff_stride])
159 "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t"
160 "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t"
161 "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t"
162 "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t"
163 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
164 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
165 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
166 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
167 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
168 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
169 MMI_ADDU(%[diff], %[diff], %[diff_stride])
170 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
171 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
172 "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
173 "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
174 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
175 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
176 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
177 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
178 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
179 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
180 MMI_ADDU(%[diff], %[diff], %[diff_stride])
181 "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t"
182 "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t"
183 "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t"
184 "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t"
185 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
186 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
187 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
188 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
189 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
190 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
191 MMI_ADDU(%[diff], %[diff], %[diff_stride])
192 "addiu %[tmp0], %[tmp0], -0x01 \n\t"
193 "bnez %[tmp0], 1b \n\t"
194 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
195 [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
196 [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
197 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
198 [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
199 [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
200 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
201 [pred] "+&r"(pred), [diff] "+&r"(diff)
202 : [pred_stride] "r"((mips_reg)pred_stride),
203 [src_stride] "r"((mips_reg)src_stride),
204 [diff_stride] "r"((mips_reg)(diff_stride * 2))
205 : "memory");
206 break;
207 case 16:
208 __asm__ volatile(
209 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
210 "li %[tmp0], 0x08 \n\t"
211 "1: \n\t"
212 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
213 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
214 "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t"
215 "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t"
216 "gsldlc1 %[ftmp3], 0x0f(%[src]) \n\t"
217 "gsldrc1 %[ftmp3], 0x08(%[src]) \n\t"
218 "gsldlc1 %[ftmp4], 0x0f(%[pred]) \n\t"
219 "gsldrc1 %[ftmp4], 0x08(%[pred]) \n\t"
220 MMI_ADDU(%[src], %[src], %[src_stride])
221 MMI_ADDU(%[pred], %[pred], %[pred_stride])
222 "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
223 "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
224 "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t"
225 "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t"
226 "gsldlc1 %[ftmp7], 0x0f(%[src]) \n\t"
227 "gsldrc1 %[ftmp7], 0x08(%[src]) \n\t"
228 "gsldlc1 %[ftmp8], 0x0f(%[pred]) \n\t"
229 "gsldrc1 %[ftmp8], 0x08(%[pred]) \n\t"
230 MMI_ADDU(%[src], %[src], %[src_stride])
231 MMI_ADDU(%[pred], %[pred], %[pred_stride])
232 "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t"
233 "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t"
234 "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t"
235 "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t"
236 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
237 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
238 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
239 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
240 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
241 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
242 "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t"
243 "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t"
244 "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t"
245 "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t"
246 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
247 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
248 "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t"
249 "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t"
250 "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t"
251 "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t"
252 MMI_ADDU(%[diff], %[diff], %[diff_stride])
253 "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
254 "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
255 "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t"
256 "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t"
257 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
258 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
259 "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t"
260 "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t"
261 "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t"
262 "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t"
263 "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t"
264 "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t"
265 "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t"
266 "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t"
267 "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t"
268 "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t"
269 "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t"
270 "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t"
271 "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t"
272 "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t"
273 MMI_ADDU(%[diff], %[diff], %[diff_stride])
274 "addiu %[tmp0], %[tmp0], -0x01 \n\t"
275 "bnez %[tmp0], 1b \n\t"
276 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
277 [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
278 [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
279 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
280 [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
281 [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
282 [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
283 [pred] "+&r"(pred), [diff] "+&r"(diff)
284 : [pred_stride] "r"((mips_reg)pred_stride),
285 [src_stride] "r"((mips_reg)src_stride),
286 [diff_stride] "r"((mips_reg)(diff_stride * 2))
287 : "memory");
288 break;
289 case 32:
290 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
291 pred, pred_stride);
292 break;
293 case 64:
294 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
295 pred, pred_stride);
296 break;
297 default:
298 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
299 pred, pred_stride);
300 break;
301 }
302 } else {
303 vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred,
304 pred_stride);
305 }
306 }
307