• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_dsp_rtcd.h"
12 #include "vpx/vpx_integer.h"
13 #include "vpx_ports/mem.h"
14 #include "vpx_ports/asmdefs_mmi.h"
15 
vpx_subtract_block_mmi(int rows,int cols,int16_t * diff,ptrdiff_t diff_stride,const uint8_t * src,ptrdiff_t src_stride,const uint8_t * pred,ptrdiff_t pred_stride)16 void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff,
17                             ptrdiff_t diff_stride, const uint8_t *src,
18                             ptrdiff_t src_stride, const uint8_t *pred,
19                             ptrdiff_t pred_stride) {
20   double ftmp[13];
21   uint32_t tmp[1];
22 
23   if (rows == cols) {
24     switch (rows) {
25       case 4:
26         __asm__ volatile(
27             "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
28 #if _MIPS_SIM == _ABIO32
29             "ulw        %[tmp0],    0x00(%[src])                        \n\t"
30             "mtc1       %[tmp0],    %[ftmp1]                            \n\t"
31             "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
32             "mtc1       %[tmp0],    %[ftmp2]                            \n\t"
33 #else
34             "gslwlc1    %[ftmp1],   0x03(%[src])                        \n\t"
35             "gslwrc1    %[ftmp1],   0x00(%[src])                        \n\t"
36             "gslwlc1    %[ftmp2],   0x03(%[pred])                       \n\t"
37             "gslwrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
38 #endif
39             MMI_ADDU(%[src], %[src], %[src_stride])
40             MMI_ADDU(%[pred], %[pred], %[pred_stride])
41 
42 #if _MIPS_SIM == _ABIO32
43             "ulw        %[tmp0],    0x00(%[src])                        \n\t"
44             "mtc1       %[tmp0],    %[ftmp3]                            \n\t"
45             "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
46             "mtc1       %[tmp0],    %[ftmp4]                            \n\t"
47 #else
48             "gslwlc1    %[ftmp3],   0x03(%[src])                        \n\t"
49             "gslwrc1    %[ftmp3],   0x00(%[src])                        \n\t"
50             "gslwlc1    %[ftmp4],   0x03(%[pred])                       \n\t"
51             "gslwrc1    %[ftmp4],   0x00(%[pred])                       \n\t"
52 #endif
53             MMI_ADDU(%[src], %[src], %[src_stride])
54             MMI_ADDU(%[pred], %[pred], %[pred_stride])
55 
56 #if _MIPS_SIM == _ABIO32
57             "ulw        %[tmp0],    0x00(%[src])                        \n\t"
58             "mtc1       %[tmp0],    %[ftmp5]                            \n\t"
59             "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
60             "mtc1       %[tmp0],    %[ftmp6]                            \n\t"
61 #else
62             "gslwlc1    %[ftmp5],   0x03(%[src])                        \n\t"
63             "gslwrc1    %[ftmp5],   0x00(%[src])                        \n\t"
64             "gslwlc1    %[ftmp6],   0x03(%[pred])                       \n\t"
65             "gslwrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
66 #endif
67             MMI_ADDU(%[src], %[src], %[src_stride])
68             MMI_ADDU(%[pred], %[pred], %[pred_stride])
69 
70 #if _MIPS_SIM == _ABIO32
71             "ulw        %[tmp0],    0x00(%[src])                        \n\t"
72             "mtc1       %[tmp0],    %[ftmp7]                            \n\t"
73             "ulw        %[tmp0],    0x00(%[pred])                       \n\t"
74             "mtc1       %[tmp0],    %[ftmp8]                            \n\t"
75 #else
76             "gslwlc1    %[ftmp7],   0x03(%[src])                        \n\t"
77             "gslwrc1    %[ftmp7],   0x00(%[src])                        \n\t"
78             "gslwlc1    %[ftmp8],   0x03(%[pred])                       \n\t"
79             "gslwrc1    %[ftmp8],   0x00(%[pred])                       \n\t"
80 #endif
81             "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
82             "punpcklbh  %[ftmp10],  %[ftmp2],           %[ftmp0]        \n\t"
83             "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
84             "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
85             "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
86             MMI_ADDU(%[diff], %[diff], %[diff_stride])
87             "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
88             "punpcklbh  %[ftmp10],  %[ftmp4],           %[ftmp0]        \n\t"
89             "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
90             "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
91             "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
92             MMI_ADDU(%[diff], %[diff], %[diff_stride])
93             "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
94             "punpcklbh  %[ftmp10],  %[ftmp6],           %[ftmp0]        \n\t"
95             "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
96             "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
97             "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
98             MMI_ADDU(%[diff], %[diff], %[diff_stride])
99             "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
100             "punpcklbh  %[ftmp10],  %[ftmp8],           %[ftmp0]        \n\t"
101             "psubh      %[ftmp11],  %[ftmp9],           %[ftmp10]       \n\t"
102             "gssdlc1    %[ftmp11],  0x07(%[diff])                       \n\t"
103             "gssdrc1    %[ftmp11],  0x00(%[diff])                       \n\t"
104             : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
105               [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
106               [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
107               [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
108               [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
109               [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
110 #if _MIPS_SIM == _ABIO32
111               [tmp0] "=&r"(tmp[0]),
112 #endif
113               [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff)
114             : [src_stride] "r"((mips_reg)src_stride),
115               [pred_stride] "r"((mips_reg)pred_stride),
116               [diff_stride] "r"((mips_reg)(diff_stride * 2))
117             : "memory");
118         break;
119       case 8:
120         __asm__ volatile(
121             "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
122             "li         %[tmp0],    0x02                                \n\t"
123             "1:                                                         \n\t"
124             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
125             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
126             "gsldlc1    %[ftmp2],   0x07(%[pred])                       \n\t"
127             "gsldrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
128             MMI_ADDU(%[src], %[src], %[src_stride])
129             MMI_ADDU(%[pred], %[pred], %[pred_stride])
130             "gsldlc1    %[ftmp3],   0x07(%[src])                        \n\t"
131             "gsldrc1    %[ftmp3],   0x00(%[src])                        \n\t"
132             "gsldlc1    %[ftmp4],   0x07(%[pred])                       \n\t"
133             "gsldrc1    %[ftmp4],   0x00(%[pred])                       \n\t"
134             MMI_ADDU(%[src], %[src], %[src_stride])
135             MMI_ADDU(%[pred], %[pred], %[pred_stride])
136             "gsldlc1    %[ftmp5],   0x07(%[src])                        \n\t"
137             "gsldrc1    %[ftmp5],   0x00(%[src])                        \n\t"
138             "gsldlc1    %[ftmp6],   0x07(%[pred])                       \n\t"
139             "gsldrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
140             MMI_ADDU(%[src], %[src], %[src_stride])
141             MMI_ADDU(%[pred], %[pred], %[pred_stride])
142             "gsldlc1    %[ftmp7],   0x07(%[src])                        \n\t"
143             "gsldrc1    %[ftmp7],   0x00(%[src])                        \n\t"
144             "gsldlc1    %[ftmp8],   0x07(%[pred])                       \n\t"
145             "gsldrc1    %[ftmp8],   0x00(%[pred])                       \n\t"
146             MMI_ADDU(%[src], %[src], %[src_stride])
147             MMI_ADDU(%[pred], %[pred], %[pred_stride])
148             "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
149             "punpckhbh  %[ftmp10],  %[ftmp1],           %[ftmp0]        \n\t"
150             "punpcklbh  %[ftmp11],  %[ftmp2],           %[ftmp0]        \n\t"
151             "punpckhbh  %[ftmp12],  %[ftmp2],           %[ftmp0]        \n\t"
152             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
153             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
154             "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
155             "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
156             "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
157             "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
158             MMI_ADDU(%[diff], %[diff], %[diff_stride])
159             "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
160             "punpckhbh  %[ftmp10],  %[ftmp3],           %[ftmp0]        \n\t"
161             "punpcklbh  %[ftmp11],  %[ftmp4],           %[ftmp0]        \n\t"
162             "punpckhbh  %[ftmp12],  %[ftmp4],           %[ftmp0]        \n\t"
163             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
164             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
165             "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
166             "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
167             "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
168             "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
169             MMI_ADDU(%[diff], %[diff], %[diff_stride])
170             "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
171             "punpckhbh  %[ftmp10],  %[ftmp5],           %[ftmp0]        \n\t"
172             "punpcklbh  %[ftmp11],  %[ftmp6],           %[ftmp0]        \n\t"
173             "punpckhbh  %[ftmp12],  %[ftmp6],           %[ftmp0]        \n\t"
174             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
175             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
176             "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
177             "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
178             "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
179             "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
180             MMI_ADDU(%[diff], %[diff], %[diff_stride])
181             "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
182             "punpckhbh  %[ftmp10],  %[ftmp7],           %[ftmp0]        \n\t"
183             "punpcklbh  %[ftmp11],  %[ftmp8],           %[ftmp0]        \n\t"
184             "punpckhbh  %[ftmp12],  %[ftmp8],           %[ftmp0]        \n\t"
185             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
186             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
187             "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
188             "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
189             "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
190             "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
191             MMI_ADDU(%[diff], %[diff], %[diff_stride])
192             "addiu      %[tmp0],    %[tmp0],            -0x01           \n\t"
193             "bnez       %[tmp0],    1b                                  \n\t"
194             : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
195               [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
196               [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
197               [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
198               [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
199               [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
200               [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
201               [pred] "+&r"(pred), [diff] "+&r"(diff)
202             : [pred_stride] "r"((mips_reg)pred_stride),
203               [src_stride] "r"((mips_reg)src_stride),
204               [diff_stride] "r"((mips_reg)(diff_stride * 2))
205             : "memory");
206         break;
207       case 16:
208         __asm__ volatile(
209             "xor        %[ftmp0],   %[ftmp0],           %[ftmp0]        \n\t"
210             "li         %[tmp0],    0x08                                \n\t"
211             "1:                                                         \n\t"
212             "gsldlc1    %[ftmp1],   0x07(%[src])                        \n\t"
213             "gsldrc1    %[ftmp1],   0x00(%[src])                        \n\t"
214             "gsldlc1    %[ftmp2],   0x07(%[pred])                       \n\t"
215             "gsldrc1    %[ftmp2],   0x00(%[pred])                       \n\t"
216             "gsldlc1    %[ftmp3],   0x0f(%[src])                        \n\t"
217             "gsldrc1    %[ftmp3],   0x08(%[src])                        \n\t"
218             "gsldlc1    %[ftmp4],   0x0f(%[pred])                       \n\t"
219             "gsldrc1    %[ftmp4],   0x08(%[pred])                       \n\t"
220             MMI_ADDU(%[src], %[src], %[src_stride])
221             MMI_ADDU(%[pred], %[pred], %[pred_stride])
222             "gsldlc1    %[ftmp5],   0x07(%[src])                        \n\t"
223             "gsldrc1    %[ftmp5],   0x00(%[src])                        \n\t"
224             "gsldlc1    %[ftmp6],   0x07(%[pred])                       \n\t"
225             "gsldrc1    %[ftmp6],   0x00(%[pred])                       \n\t"
226             "gsldlc1    %[ftmp7],   0x0f(%[src])                        \n\t"
227             "gsldrc1    %[ftmp7],   0x08(%[src])                        \n\t"
228             "gsldlc1    %[ftmp8],   0x0f(%[pred])                       \n\t"
229             "gsldrc1    %[ftmp8],   0x08(%[pred])                       \n\t"
230             MMI_ADDU(%[src], %[src], %[src_stride])
231             MMI_ADDU(%[pred], %[pred], %[pred_stride])
232             "punpcklbh  %[ftmp9],   %[ftmp1],           %[ftmp0]        \n\t"
233             "punpckhbh  %[ftmp10],  %[ftmp1],           %[ftmp0]        \n\t"
234             "punpcklbh  %[ftmp11],  %[ftmp2],           %[ftmp0]        \n\t"
235             "punpckhbh  %[ftmp12],  %[ftmp2],           %[ftmp0]        \n\t"
236             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
237             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
238             "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
239             "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
240             "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
241             "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
242             "punpcklbh  %[ftmp9],   %[ftmp3],           %[ftmp0]        \n\t"
243             "punpckhbh  %[ftmp10],  %[ftmp3],           %[ftmp0]        \n\t"
244             "punpcklbh  %[ftmp11],  %[ftmp4],           %[ftmp0]        \n\t"
245             "punpckhbh  %[ftmp12],  %[ftmp4],           %[ftmp0]        \n\t"
246             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
247             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
248             "gssdlc1    %[ftmp9],   0x17(%[diff])                       \n\t"
249             "gssdrc1    %[ftmp9],   0x10(%[diff])                       \n\t"
250             "gssdlc1    %[ftmp10],  0x1f(%[diff])                       \n\t"
251             "gssdrc1    %[ftmp10],  0x18(%[diff])                       \n\t"
252             MMI_ADDU(%[diff], %[diff], %[diff_stride])
253             "punpcklbh  %[ftmp9],   %[ftmp5],           %[ftmp0]        \n\t"
254             "punpckhbh  %[ftmp10],  %[ftmp5],           %[ftmp0]        \n\t"
255             "punpcklbh  %[ftmp11],  %[ftmp6],           %[ftmp0]        \n\t"
256             "punpckhbh  %[ftmp12],  %[ftmp6],           %[ftmp0]        \n\t"
257             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
258             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
259             "gssdlc1    %[ftmp9],   0x07(%[diff])                       \n\t"
260             "gssdrc1    %[ftmp9],   0x00(%[diff])                       \n\t"
261             "gssdlc1    %[ftmp10],  0x0f(%[diff])                       \n\t"
262             "gssdrc1    %[ftmp10],  0x08(%[diff])                       \n\t"
263             "punpcklbh  %[ftmp9],   %[ftmp7],           %[ftmp0]        \n\t"
264             "punpckhbh  %[ftmp10],  %[ftmp7],           %[ftmp0]        \n\t"
265             "punpcklbh  %[ftmp11],  %[ftmp8],           %[ftmp0]        \n\t"
266             "punpckhbh  %[ftmp12],  %[ftmp8],           %[ftmp0]        \n\t"
267             "psubsh     %[ftmp9],   %[ftmp9],           %[ftmp11]       \n\t"
268             "psubsh     %[ftmp10],  %[ftmp10],          %[ftmp12]       \n\t"
269             "gssdlc1    %[ftmp9],   0x17(%[diff])                       \n\t"
270             "gssdrc1    %[ftmp9],   0x10(%[diff])                       \n\t"
271             "gssdlc1    %[ftmp10],  0x1f(%[diff])                       \n\t"
272             "gssdrc1    %[ftmp10],  0x18(%[diff])                       \n\t"
273             MMI_ADDU(%[diff], %[diff], %[diff_stride])
274             "addiu      %[tmp0],    %[tmp0],            -0x01           \n\t"
275             "bnez       %[tmp0],    1b                                  \n\t"
276             : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]),
277               [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]),
278               [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
279               [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]),
280               [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]),
281               [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]),
282               [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src),
283               [pred] "+&r"(pred), [diff] "+&r"(diff)
284             : [pred_stride] "r"((mips_reg)pred_stride),
285               [src_stride] "r"((mips_reg)src_stride),
286               [diff_stride] "r"((mips_reg)(diff_stride * 2))
287             : "memory");
288         break;
289       case 32:
290         vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
291                              pred, pred_stride);
292         break;
293       case 64:
294         vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
295                              pred, pred_stride);
296         break;
297       default:
298         vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride,
299                              pred, pred_stride);
300         break;
301     }
302   } else {
303     vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred,
304                          pred_stride);
305   }
306 }
307