1 /*
2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vpx_dsp_rtcd.h"
12 #include "vpx_ports/asmdefs_mmi.h"
13 #include "vpx/vpx_integer.h"
14 #include "vpx_ports/mem.h"
15
16 #define SAD_SRC_REF_ABS_SUB_64 \
17 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
18 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
19 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
20 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
21 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
22 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
23 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
24 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
25 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
26 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
27 "biadd %[ftmp1], %[ftmp1] \n\t" \
28 "biadd %[ftmp2], %[ftmp2] \n\t" \
29 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
30 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
31 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
32 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
33 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
34 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
35 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
36 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
37 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
38 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
39 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
40 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
41 "biadd %[ftmp1], %[ftmp1] \n\t" \
42 "biadd %[ftmp2], %[ftmp2] \n\t" \
43 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
44 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
45 "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \
46 "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \
47 "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \
48 "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \
49 "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \
50 "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \
51 "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \
52 "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \
53 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
54 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
55 "biadd %[ftmp1], %[ftmp1] \n\t" \
56 "biadd %[ftmp2], %[ftmp2] \n\t" \
57 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
58 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
59 "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \
60 "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \
61 "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \
62 "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \
63 "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \
64 "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \
65 "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \
66 "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \
67 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
68 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
69 "biadd %[ftmp1], %[ftmp1] \n\t" \
70 "biadd %[ftmp2], %[ftmp2] \n\t" \
71 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
72 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
73
74 #define SAD_SRC_REF_ABS_SUB_32 \
75 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
76 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
77 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
78 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
79 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
80 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
81 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
82 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
83 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
84 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
85 "biadd %[ftmp1], %[ftmp1] \n\t" \
86 "biadd %[ftmp2], %[ftmp2] \n\t" \
87 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
88 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
89 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
90 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
91 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
92 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
93 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
94 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
95 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
96 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
97 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
98 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
99 "biadd %[ftmp1], %[ftmp1] \n\t" \
100 "biadd %[ftmp2], %[ftmp2] \n\t" \
101 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
102 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
103
104 #define SAD_SRC_REF_ABS_SUB_16 \
105 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
106 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
107 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
108 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
109 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
110 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
111 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
112 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
113 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
114 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
115 "biadd %[ftmp1], %[ftmp1] \n\t" \
116 "biadd %[ftmp2], %[ftmp2] \n\t" \
117 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
118 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
119
120 #define SAD_SRC_REF_ABS_SUB_8 \
121 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
122 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
123 "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \
124 "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
125 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
126 "biadd %[ftmp1], %[ftmp1] \n\t" \
127 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
128
129 #if _MIPS_SIM == _ABIO32
130 #define SAD_SRC_REF_ABS_SUB_4 \
131 "ulw %[tmp0], 0x00(%[src]) \n\t" \
132 "mtc1 %[tmp0], %[ftmp1] \n\t" \
133 "ulw %[tmp0], 0x00(%[ref]) \n\t" \
134 "mtc1 %[tmp0], %[ftmp2] \n\t" \
135 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
136 "mthc1 $0, %[ftmp1] \n\t" \
137 "biadd %[ftmp1], %[ftmp1] \n\t" \
138 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
139 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
140 #define SAD_SRC_REF_ABS_SUB_4 \
141 "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t" \
142 "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" \
143 "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \
144 "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
145 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
146 "mthc1 $0, %[ftmp1] \n\t" \
147 "biadd %[ftmp1], %[ftmp1] \n\t" \
148 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
149 #endif /* _MIPS_SIM == _ABIO32 */
150
151 #define SAD_SRC_AVGREF_ABS_SUB_64 \
152 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
153 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
154 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
155 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
156 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
157 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
158 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
159 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
160 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
161 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
162 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
163 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
164 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
165 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
166 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
167 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
168 "biadd %[ftmp1], %[ftmp1] \n\t" \
169 "biadd %[ftmp2], %[ftmp2] \n\t" \
170 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
171 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
172 "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \
173 "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \
174 "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \
175 "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \
176 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
177 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
178 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
179 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
180 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
181 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
182 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
183 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
184 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
185 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
186 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
187 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
188 "biadd %[ftmp1], %[ftmp1] \n\t" \
189 "biadd %[ftmp2], %[ftmp2] \n\t" \
190 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
191 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
192 "gsldlc1 %[ftmp1], 0x27(%[second_pred]) \n\t" \
193 "gsldrc1 %[ftmp1], 0x20(%[second_pred]) \n\t" \
194 "gsldlc1 %[ftmp2], 0x2f(%[second_pred]) \n\t" \
195 "gsldrc1 %[ftmp2], 0x28(%[second_pred]) \n\t" \
196 "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \
197 "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \
198 "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \
199 "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \
200 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
201 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
202 "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \
203 "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \
204 "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \
205 "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \
206 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
207 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
208 "biadd %[ftmp1], %[ftmp1] \n\t" \
209 "biadd %[ftmp2], %[ftmp2] \n\t" \
210 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
211 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
212 "gsldlc1 %[ftmp1], 0x37(%[second_pred]) \n\t" \
213 "gsldrc1 %[ftmp1], 0x30(%[second_pred]) \n\t" \
214 "gsldlc1 %[ftmp2], 0x3f(%[second_pred]) \n\t" \
215 "gsldrc1 %[ftmp2], 0x38(%[second_pred]) \n\t" \
216 "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \
217 "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \
218 "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \
219 "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \
220 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
221 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
222 "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \
223 "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \
224 "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \
225 "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \
226 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
227 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
228 "biadd %[ftmp1], %[ftmp1] \n\t" \
229 "biadd %[ftmp2], %[ftmp2] \n\t" \
230 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
231 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
232
233 #define SAD_SRC_AVGREF_ABS_SUB_32 \
234 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
235 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
236 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
237 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
238 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
239 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
240 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
241 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
242 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
243 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
244 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
245 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
246 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
247 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
248 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
249 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
250 "biadd %[ftmp1], %[ftmp1] \n\t" \
251 "biadd %[ftmp2], %[ftmp2] \n\t" \
252 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
253 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
254 "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \
255 "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \
256 "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \
257 "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \
258 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
259 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
260 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
261 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
262 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
263 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
264 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
265 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
266 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
267 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
268 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
269 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
270 "biadd %[ftmp1], %[ftmp1] \n\t" \
271 "biadd %[ftmp2], %[ftmp2] \n\t" \
272 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
273 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
274
275 #define SAD_SRC_AVGREF_ABS_SUB_16 \
276 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
277 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
278 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
279 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
280 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
281 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
282 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
283 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
284 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
285 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
286 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
287 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
288 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
289 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
290 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
291 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
292 "biadd %[ftmp1], %[ftmp1] \n\t" \
293 "biadd %[ftmp2], %[ftmp2] \n\t" \
294 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
295 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
296
297 #define SAD_SRC_AVGREF_ABS_SUB_8 \
298 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
299 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
300 "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \
301 "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
302 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
303 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
304 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
305 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
306 "biadd %[ftmp1], %[ftmp1] \n\t" \
307 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
308
309 #if _MIPS_SIM == _ABIO32
310 #define SAD_SRC_AVGREF_ABS_SUB_4 \
311 "ulw %[tmp0], 0x00(%[second_pred]) \n\t" \
312 "mtc1 %[tmp0], %[ftmp1] \n\t" \
313 "ulw %[tmp0], 0x00(%[ref]) \n\t" \
314 "mtc1 %[tmp0], %[ftmp2] \n\t" \
315 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
316 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
317 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
318 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
319 "mthc1 $0, %[ftmp1] \n\t" \
320 "biadd %[ftmp1], %[ftmp1] \n\t" \
321 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
322 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
323 #define SAD_SRC_AVGREF_ABS_SUB_4 \
324 "gslwlc1 %[ftmp1], 0x03(%[second_pred]) \n\t" \
325 "gslwrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
326 "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \
327 "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
328 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
329 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
330 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
331 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
332 "mthc1 $0, %[ftmp1] \n\t" \
333 "biadd %[ftmp1], %[ftmp1] \n\t" \
334 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
335 #endif /* _MIPS_SIM == _ABIO32 */
336
337 #define sadMxNx4D_mmi(m, n) \
338 void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride, \
339 const uint8_t *const ref_array[], \
340 int ref_stride, uint32_t *sad_array) { \
341 int i; \
342 for (i = 0; i < 4; ++i) \
343 sad_array[i] = \
344 vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \
345 }
346
vpx_sad64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)347 static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
348 const uint8_t *ref, int ref_stride,
349 int counter) {
350 unsigned int sad;
351 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
352 mips_reg l_counter = counter;
353
354 /* clang-format off */
355 __asm__ volatile (
356 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
357 "1: \n\t"
358 // Include two loop body, to reduce loop time.
359 SAD_SRC_REF_ABS_SUB_64
360 MMI_ADDU(%[src], %[src], %[src_stride])
361 MMI_ADDU(%[ref], %[ref], %[ref_stride])
362 SAD_SRC_REF_ABS_SUB_64
363 MMI_ADDU(%[src], %[src], %[src_stride])
364 MMI_ADDU(%[ref], %[ref], %[ref_stride])
365 MMI_ADDIU(%[counter], %[counter], -0x02)
366 "bnez %[counter], 1b \n\t"
367 "mfc1 %[sad], %[ftmp5] \n\t"
368 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
369 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
370 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
371 : [src_stride]"r"((mips_reg)src_stride),
372 [ref_stride]"r"((mips_reg)ref_stride)
373 );
374 /* clang-format on */
375
376 return sad;
377 }
378
379 #define vpx_sad64xN(H) \
380 unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride, \
381 const uint8_t *ref, int ref_stride) { \
382 return vpx_sad64x(src, src_stride, ref, ref_stride, H); \
383 }
384
385 vpx_sad64xN(64);
386 vpx_sad64xN(32);
387 sadMxNx4D_mmi(64, 64);
388 sadMxNx4D_mmi(64, 32);
389
vpx_sad_avg64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)390 static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
391 const uint8_t *ref, int ref_stride,
392 const uint8_t *second_pred,
393 int counter) {
394 unsigned int sad;
395 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
396 mips_reg l_counter = counter;
397 mips_reg l_second_pred = (mips_reg)second_pred;
398
399 /* clang-format off */
400 __asm__ volatile (
401 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
402 "1: \n\t"
403 // Include two loop body, to reduce loop time.
404 SAD_SRC_AVGREF_ABS_SUB_64
405 MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
406 MMI_ADDU(%[src], %[src], %[src_stride])
407 MMI_ADDU(%[ref], %[ref], %[ref_stride])
408 SAD_SRC_AVGREF_ABS_SUB_64
409 MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
410 MMI_ADDU(%[src], %[src], %[src_stride])
411 MMI_ADDU(%[ref], %[ref], %[ref_stride])
412 MMI_ADDIU(%[counter], %[counter], -0x02)
413 "bnez %[counter], 1b \n\t"
414 "mfc1 %[sad], %[ftmp5] \n\t"
415 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
416 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
417 [src]"+&r"(src), [ref]"+&r"(ref),
418 [second_pred]"+&r"(l_second_pred),
419 [sad]"=&r"(sad)
420 : [src_stride]"r"((mips_reg)src_stride),
421 [ref_stride]"r"((mips_reg)ref_stride)
422 );
423 /* clang-format on */
424
425 return sad;
426 }
427
428 #define vpx_sad_avg64xN(H) \
429 unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride, \
430 const uint8_t *ref, int ref_stride, \
431 const uint8_t *second_pred) { \
432 return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \
433 }
434
435 vpx_sad_avg64xN(64);
436 vpx_sad_avg64xN(32);
437
vpx_sad32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)438 static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
439 const uint8_t *ref, int ref_stride,
440 int counter) {
441 unsigned int sad;
442 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
443 mips_reg l_counter = counter;
444
445 /* clang-format off */
446 __asm__ volatile (
447 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
448 "1: \n\t"
449 // Include two loop body, to reduce loop time.
450 SAD_SRC_REF_ABS_SUB_32
451 MMI_ADDU(%[src], %[src], %[src_stride])
452 MMI_ADDU(%[ref], %[ref], %[ref_stride])
453 SAD_SRC_REF_ABS_SUB_32
454 MMI_ADDU(%[src], %[src], %[src_stride])
455 MMI_ADDU(%[ref], %[ref], %[ref_stride])
456 MMI_ADDIU(%[counter], %[counter], -0x02)
457 "bnez %[counter], 1b \n\t"
458 "mfc1 %[sad], %[ftmp5] \n\t"
459 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
460 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
461 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
462 : [src_stride]"r"((mips_reg)src_stride),
463 [ref_stride]"r"((mips_reg)ref_stride)
464 );
465 /* clang-format on */
466
467 return sad;
468 }
469
470 #define vpx_sad32xN(H) \
471 unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride, \
472 const uint8_t *ref, int ref_stride) { \
473 return vpx_sad32x(src, src_stride, ref, ref_stride, H); \
474 }
475
476 vpx_sad32xN(64);
477 vpx_sad32xN(32);
478 vpx_sad32xN(16);
479 sadMxNx4D_mmi(32, 64);
480 sadMxNx4D_mmi(32, 32);
481 sadMxNx4D_mmi(32, 16);
482
vpx_sad_avg32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)483 static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
484 const uint8_t *ref, int ref_stride,
485 const uint8_t *second_pred,
486 int counter) {
487 unsigned int sad;
488 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
489 mips_reg l_counter = counter;
490 mips_reg l_second_pred = (mips_reg)second_pred;
491
492 /* clang-format off */
493 __asm__ volatile (
494 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
495 "1: \n\t"
496 // Include two loop body, to reduce loop time.
497 SAD_SRC_AVGREF_ABS_SUB_32
498 MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
499 MMI_ADDU(%[src], %[src], %[src_stride])
500 MMI_ADDU(%[ref], %[ref], %[ref_stride])
501 SAD_SRC_AVGREF_ABS_SUB_32
502 MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
503 MMI_ADDU(%[src], %[src], %[src_stride])
504 MMI_ADDU(%[ref], %[ref], %[ref_stride])
505 MMI_ADDIU(%[counter], %[counter], -0x02)
506 "bnez %[counter], 1b \n\t"
507 "mfc1 %[sad], %[ftmp5] \n\t"
508 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
509 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
510 [src]"+&r"(src), [ref]"+&r"(ref),
511 [second_pred]"+&r"(l_second_pred),
512 [sad]"=&r"(sad)
513 : [src_stride]"r"((mips_reg)src_stride),
514 [ref_stride]"r"((mips_reg)ref_stride)
515 );
516 /* clang-format on */
517
518 return sad;
519 }
520
521 #define vpx_sad_avg32xN(H) \
522 unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride, \
523 const uint8_t *ref, int ref_stride, \
524 const uint8_t *second_pred) { \
525 return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \
526 }
527
528 vpx_sad_avg32xN(64);
529 vpx_sad_avg32xN(32);
530 vpx_sad_avg32xN(16);
531
vpx_sad16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)532 static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
533 const uint8_t *ref, int ref_stride,
534 int counter) {
535 unsigned int sad;
536 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
537 mips_reg l_counter = counter;
538
539 /* clang-format off */
540 __asm__ volatile (
541 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
542 "1: \n\t"
543 // Include two loop body, to reduce loop time.
544 SAD_SRC_REF_ABS_SUB_16
545 MMI_ADDU(%[src], %[src], %[src_stride])
546 MMI_ADDU(%[ref], %[ref], %[ref_stride])
547 SAD_SRC_REF_ABS_SUB_16
548 MMI_ADDU(%[src], %[src], %[src_stride])
549 MMI_ADDU(%[ref], %[ref], %[ref_stride])
550 MMI_ADDIU(%[counter], %[counter], -0x02)
551 "bnez %[counter], 1b \n\t"
552 "mfc1 %[sad], %[ftmp5] \n\t"
553 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
554 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
555 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
556 : [src_stride]"r"((mips_reg)src_stride),
557 [ref_stride]"r"((mips_reg)ref_stride)
558 );
559 /* clang-format on */
560
561 return sad;
562 }
563
564 #define vpx_sad16xN(H) \
565 unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride, \
566 const uint8_t *ref, int ref_stride) { \
567 return vpx_sad16x(src, src_stride, ref, ref_stride, H); \
568 }
569
570 vpx_sad16xN(32);
571 vpx_sad16xN(16);
572 vpx_sad16xN(8);
573 sadMxNx4D_mmi(16, 32);
574 sadMxNx4D_mmi(16, 16);
575 sadMxNx4D_mmi(16, 8);
576
vpx_sad_avg16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)577 static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
578 const uint8_t *ref, int ref_stride,
579 const uint8_t *second_pred,
580 int counter) {
581 unsigned int sad;
582 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
583 mips_reg l_counter = counter;
584 mips_reg l_second_pred = (mips_reg)second_pred;
585
586 /* clang-format off */
587 __asm__ volatile (
588 "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
589 "1: \n\t"
590 // Include two loop body, to reduce loop time.
591 SAD_SRC_AVGREF_ABS_SUB_16
592 MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
593 MMI_ADDU(%[src], %[src], %[src_stride])
594 MMI_ADDU(%[ref], %[ref], %[ref_stride])
595 SAD_SRC_AVGREF_ABS_SUB_16
596 MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
597 MMI_ADDU(%[src], %[src], %[src_stride])
598 MMI_ADDU(%[ref], %[ref], %[ref_stride])
599 MMI_ADDIU(%[counter], %[counter], -0x02)
600 "bnez %[counter], 1b \n\t"
601 "mfc1 %[sad], %[ftmp5] \n\t"
602 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
603 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
604 [src]"+&r"(src), [ref]"+&r"(ref),
605 [second_pred]"+&r"(l_second_pred),
606 [sad]"=&r"(sad)
607 : [src_stride]"r"((mips_reg)src_stride),
608 [ref_stride]"r"((mips_reg)ref_stride)
609 );
610 /* clang-format on */
611
612 return sad;
613 }
614
615 #define vpx_sad_avg16xN(H) \
616 unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride, \
617 const uint8_t *ref, int ref_stride, \
618 const uint8_t *second_pred) { \
619 return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \
620 }
621
622 vpx_sad_avg16xN(32);
623 vpx_sad_avg16xN(16);
624 vpx_sad_avg16xN(8);
625
vpx_sad8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)626 static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
627 const uint8_t *ref, int ref_stride,
628 int counter) {
629 unsigned int sad;
630 double ftmp1, ftmp2, ftmp3;
631 mips_reg l_counter = counter;
632
633 /* clang-format off */
634 __asm__ volatile (
635 "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
636 "1: \n\t"
637 // Include two loop body, to reduce loop time.
638 SAD_SRC_REF_ABS_SUB_8
639 MMI_ADDU(%[src], %[src], %[src_stride])
640 MMI_ADDU(%[ref], %[ref], %[ref_stride])
641 SAD_SRC_REF_ABS_SUB_8
642 MMI_ADDU(%[src], %[src], %[src_stride])
643 MMI_ADDU(%[ref], %[ref], %[ref_stride])
644 MMI_ADDIU(%[counter], %[counter], -0x02)
645 "bnez %[counter], 1b \n\t"
646 "mfc1 %[sad], %[ftmp3] \n\t"
647 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
648 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
649 [sad]"=&r"(sad)
650 : [src_stride]"r"((mips_reg)src_stride),
651 [ref_stride]"r"((mips_reg)ref_stride)
652 );
653 /* clang-format on */
654
655 return sad;
656 }
657
658 #define vpx_sad8xN(H) \
659 unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride, \
660 const uint8_t *ref, int ref_stride) { \
661 return vpx_sad8x(src, src_stride, ref, ref_stride, H); \
662 }
663
664 vpx_sad8xN(16);
665 vpx_sad8xN(8);
666 vpx_sad8xN(4);
667 sadMxNx4D_mmi(8, 16);
668 sadMxNx4D_mmi(8, 8);
669 sadMxNx4D_mmi(8, 4);
670
vpx_sad_avg8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)671 static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
672 const uint8_t *ref, int ref_stride,
673 const uint8_t *second_pred,
674 int counter) {
675 unsigned int sad;
676 double ftmp1, ftmp2, ftmp3;
677 mips_reg l_counter = counter;
678 mips_reg l_second_pred = (mips_reg)second_pred;
679
680 /* clang-format off */
681 __asm__ volatile (
682 "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
683 "1: \n\t"
684 // Include two loop body, to reduce loop time.
685 SAD_SRC_AVGREF_ABS_SUB_8
686 MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
687 MMI_ADDU(%[src], %[src], %[src_stride])
688 MMI_ADDU(%[ref], %[ref], %[ref_stride])
689 SAD_SRC_AVGREF_ABS_SUB_8
690 MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
691 MMI_ADDU(%[src], %[src], %[src_stride])
692 MMI_ADDU(%[ref], %[ref], %[ref_stride])
693 MMI_ADDIU(%[counter], %[counter], -0x02)
694 "bnez %[counter], 1b \n\t"
695 "mfc1 %[sad], %[ftmp3] \n\t"
696 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
697 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
698 [second_pred]"+&r"(l_second_pred),
699 [sad]"=&r"(sad)
700 : [src_stride]"r"((mips_reg)src_stride),
701 [ref_stride]"r"((mips_reg)ref_stride)
702 );
703 /* clang-format on */
704
705 return sad;
706 }
707
708 #define vpx_sad_avg8xN(H) \
709 unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride, \
710 const uint8_t *ref, int ref_stride, \
711 const uint8_t *second_pred) { \
712 return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \
713 }
714
715 vpx_sad_avg8xN(16);
716 vpx_sad_avg8xN(8);
717 vpx_sad_avg8xN(4);
718
vpx_sad4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)719 static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
720 const uint8_t *ref, int ref_stride,
721 int counter) {
722 unsigned int sad;
723 double ftmp1, ftmp2, ftmp3;
724 mips_reg l_counter = counter;
725
726 /* clang-format off */
727 __asm__ volatile (
728 "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
729 "1: \n\t"
730 // Include two loop body, to reduce loop time.
731 SAD_SRC_REF_ABS_SUB_4
732 MMI_ADDU(%[src], %[src], %[src_stride])
733 MMI_ADDU(%[ref], %[ref], %[ref_stride])
734 SAD_SRC_REF_ABS_SUB_4
735 MMI_ADDU(%[src], %[src], %[src_stride])
736 MMI_ADDU(%[ref], %[ref], %[ref_stride])
737 MMI_ADDIU(%[counter], %[counter], -0x02)
738 "bnez %[counter], 1b \n\t"
739 "mfc1 %[sad], %[ftmp3] \n\t"
740 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
741 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
742 [sad]"=&r"(sad)
743 : [src_stride]"r"((mips_reg)src_stride),
744 [ref_stride]"r"((mips_reg)ref_stride)
745 );
746 /* clang-format on */
747
748 return sad;
749 }
750
751 #define vpx_sad4xN(H) \
752 unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride, \
753 const uint8_t *ref, int ref_stride) { \
754 return vpx_sad4x(src, src_stride, ref, ref_stride, H); \
755 }
756
757 vpx_sad4xN(8);
758 vpx_sad4xN(4);
759 sadMxNx4D_mmi(4, 8);
760 sadMxNx4D_mmi(4, 4);
761
vpx_sad_avg4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)762 static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
763 const uint8_t *ref, int ref_stride,
764 const uint8_t *second_pred,
765 int counter) {
766 unsigned int sad;
767 double ftmp1, ftmp2, ftmp3;
768 mips_reg l_counter = counter;
769 mips_reg l_second_pred = (mips_reg)second_pred;
770
771 /* clang-format off */
772 __asm__ volatile (
773 "pxor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
774 "1: \n\t"
775 // Include two loop body, to reduce loop time.
776 SAD_SRC_AVGREF_ABS_SUB_4
777 MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
778 MMI_ADDU(%[src], %[src], %[src_stride])
779 MMI_ADDU(%[ref], %[ref], %[ref_stride])
780 SAD_SRC_AVGREF_ABS_SUB_4
781 MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
782 MMI_ADDU(%[src], %[src], %[src_stride])
783 MMI_ADDU(%[ref], %[ref], %[ref_stride])
784 MMI_ADDIU(%[counter], %[counter], -0x02)
785 "bnez %[counter], 1b \n\t"
786 "mfc1 %[sad], %[ftmp3] \n\t"
787 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
788 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
789 [second_pred]"+&r"(l_second_pred),
790 [sad]"=&r"(sad)
791 : [src_stride]"r"((mips_reg)src_stride),
792 [ref_stride]"r"((mips_reg)ref_stride)
793 );
794 /* clang-format on */
795
796 return sad;
797 }
798
799 #define vpx_sad_avg4xN(H) \
800 unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride, \
801 const uint8_t *ref, int ref_stride, \
802 const uint8_t *second_pred) { \
803 return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \
804 }
805
806 vpx_sad_avg4xN(8);
807 vpx_sad_avg4xN(4);
808