1 /*
2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vpx_dsp_rtcd.h"
12 #include "vpx_ports/asmdefs_mmi.h"
13 #include "vpx/vpx_integer.h"
14 #include "vpx_ports/mem.h"
15
16 #define SAD_SRC_REF_ABS_SUB_64 \
17 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
18 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
19 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
20 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
21 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
22 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
23 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
24 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
25 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
26 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
27 "biadd %[ftmp1], %[ftmp1] \n\t" \
28 "biadd %[ftmp2], %[ftmp2] \n\t" \
29 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
30 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
31 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
32 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
33 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
34 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
35 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
36 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
37 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
38 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
39 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
40 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
41 "biadd %[ftmp1], %[ftmp1] \n\t" \
42 "biadd %[ftmp2], %[ftmp2] \n\t" \
43 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
44 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
45 "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \
46 "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \
47 "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \
48 "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \
49 "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \
50 "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \
51 "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \
52 "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \
53 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
54 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
55 "biadd %[ftmp1], %[ftmp1] \n\t" \
56 "biadd %[ftmp2], %[ftmp2] \n\t" \
57 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
58 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
59 "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \
60 "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \
61 "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \
62 "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \
63 "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \
64 "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \
65 "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \
66 "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \
67 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
68 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
69 "biadd %[ftmp1], %[ftmp1] \n\t" \
70 "biadd %[ftmp2], %[ftmp2] \n\t" \
71 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
72 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
73
74 #define SAD_SRC_REF_ABS_SUB_32 \
75 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
76 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
77 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
78 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
79 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
80 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
81 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
82 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
83 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
84 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
85 "biadd %[ftmp1], %[ftmp1] \n\t" \
86 "biadd %[ftmp2], %[ftmp2] \n\t" \
87 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
88 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
89 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
90 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
91 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
92 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
93 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
94 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
95 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
96 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
97 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
98 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
99 "biadd %[ftmp1], %[ftmp1] \n\t" \
100 "biadd %[ftmp2], %[ftmp2] \n\t" \
101 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
102 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
103
104 #define SAD_SRC_REF_ABS_SUB_16 \
105 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
106 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
107 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
108 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
109 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
110 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
111 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
112 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
113 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
114 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
115 "biadd %[ftmp1], %[ftmp1] \n\t" \
116 "biadd %[ftmp2], %[ftmp2] \n\t" \
117 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
118 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
119
120 #define SAD_SRC_REF_ABS_SUB_8 \
121 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
122 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
123 "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \
124 "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
125 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
126 "biadd %[ftmp1], %[ftmp1] \n\t" \
127 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
128
129 #if _MIPS_SIM == _ABIO32
130 #define SAD_SRC_REF_ABS_SUB_4 \
131 "ulw %[tmp0], 0x00(%[src]) \n\t" \
132 "mtc1 %[tmp0], %[ftmp1] \n\t" \
133 "ulw %[tmp0], 0x00(%[ref]) \n\t" \
134 "mtc1 %[tmp0], %[ftmp2] \n\t" \
135 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
136 "mthc1 $0, %[ftmp1] \n\t" \
137 "biadd %[ftmp1], %[ftmp1] \n\t" \
138 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
139 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
140 #define SAD_SRC_REF_ABS_SUB_4 \
141 "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t" \
142 "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" \
143 "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \
144 "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
145 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
146 "mthc1 $0, %[ftmp1] \n\t" \
147 "biadd %[ftmp1], %[ftmp1] \n\t" \
148 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
149 #endif /* _MIPS_SIM == _ABIO32 */
150
151 #define SAD_SRC_AVGREF_ABS_SUB_64 \
152 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
153 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
154 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
155 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
156 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
157 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
158 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
159 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
160 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
161 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
162 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
163 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
164 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
165 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
166 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
167 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
168 "biadd %[ftmp1], %[ftmp1] \n\t" \
169 "biadd %[ftmp2], %[ftmp2] \n\t" \
170 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
171 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
172 "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \
173 "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \
174 "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \
175 "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \
176 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
177 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
178 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
179 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
180 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
181 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
182 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
183 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
184 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
185 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
186 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
187 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
188 "biadd %[ftmp1], %[ftmp1] \n\t" \
189 "biadd %[ftmp2], %[ftmp2] \n\t" \
190 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
191 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
192 "gsldlc1 %[ftmp1], 0x27(%[second_pred]) \n\t" \
193 "gsldrc1 %[ftmp1], 0x20(%[second_pred]) \n\t" \
194 "gsldlc1 %[ftmp2], 0x2f(%[second_pred]) \n\t" \
195 "gsldrc1 %[ftmp2], 0x28(%[second_pred]) \n\t" \
196 "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \
197 "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \
198 "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \
199 "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \
200 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
201 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
202 "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \
203 "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \
204 "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \
205 "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \
206 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
207 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
208 "biadd %[ftmp1], %[ftmp1] \n\t" \
209 "biadd %[ftmp2], %[ftmp2] \n\t" \
210 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
211 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
212 "gsldlc1 %[ftmp1], 0x37(%[second_pred]) \n\t" \
213 "gsldrc1 %[ftmp1], 0x30(%[second_pred]) \n\t" \
214 "gsldlc1 %[ftmp2], 0x3f(%[second_pred]) \n\t" \
215 "gsldrc1 %[ftmp2], 0x38(%[second_pred]) \n\t" \
216 "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \
217 "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \
218 "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \
219 "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \
220 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
221 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
222 "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \
223 "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \
224 "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \
225 "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \
226 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
227 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
228 "biadd %[ftmp1], %[ftmp1] \n\t" \
229 "biadd %[ftmp2], %[ftmp2] \n\t" \
230 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
231 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
232
233 #define SAD_SRC_AVGREF_ABS_SUB_32 \
234 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
235 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
236 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
237 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
238 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
239 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
240 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
241 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
242 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
243 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
244 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
245 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
246 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
247 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
248 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
249 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
250 "biadd %[ftmp1], %[ftmp1] \n\t" \
251 "biadd %[ftmp2], %[ftmp2] \n\t" \
252 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
253 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
254 "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \
255 "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \
256 "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \
257 "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \
258 "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \
259 "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \
260 "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \
261 "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \
262 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
263 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
264 "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \
265 "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \
266 "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \
267 "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \
268 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
269 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
270 "biadd %[ftmp1], %[ftmp1] \n\t" \
271 "biadd %[ftmp2], %[ftmp2] \n\t" \
272 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
273 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
274
275 #define SAD_SRC_AVGREF_ABS_SUB_16 \
276 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
277 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
278 "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \
279 "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \
280 "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \
281 "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \
282 "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \
283 "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \
284 "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \
285 "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \
286 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
287 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
288 "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \
289 "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \
290 "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \
291 "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \
292 "biadd %[ftmp1], %[ftmp1] \n\t" \
293 "biadd %[ftmp2], %[ftmp2] \n\t" \
294 "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \
295 "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
296
297 #define SAD_SRC_AVGREF_ABS_SUB_8 \
298 "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \
299 "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
300 "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \
301 "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
302 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
303 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
304 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
305 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
306 "biadd %[ftmp1], %[ftmp1] \n\t" \
307 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
308
309 #if _MIPS_SIM == _ABIO32
310 #define SAD_SRC_AVGREF_ABS_SUB_4 \
311 "ulw %[tmp0], 0x00(%[second_pred]) \n\t" \
312 "mtc1 %[tmp0], %[ftmp1] \n\t" \
313 "ulw %[tmp0], 0x00(%[ref]) \n\t" \
314 "mtc1 %[tmp0], %[ftmp2] \n\t" \
315 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
316 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
317 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
318 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
319 "mthc1 $0, %[ftmp1] \n\t" \
320 "biadd %[ftmp1], %[ftmp1] \n\t" \
321 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
322 #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */
323 #define SAD_SRC_AVGREF_ABS_SUB_4 \
324 "gslwlc1 %[ftmp1], 0x03(%[second_pred]) \n\t" \
325 "gslwrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \
326 "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \
327 "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \
328 "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \
329 "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \
330 "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \
331 "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
332 "mthc1 $0, %[ftmp1] \n\t" \
333 "biadd %[ftmp1], %[ftmp1] \n\t" \
334 "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
335 #endif /* _MIPS_SIM == _ABIO32 */
336
337 // depending on call sites, pass **ref_array to avoid & in subsequent call and
338 // de-dup with 4D below.
339 #define sadMxNxK_mmi(m, n, k) \
340 void vpx_sad##m##x##n##x##k##_mmi(const uint8_t *src, int src_stride, \
341 const uint8_t *ref_array, int ref_stride, \
342 uint32_t *sad_array) { \
343 int i; \
344 for (i = 0; i < k; ++i) \
345 sad_array[i] = \
346 vpx_sad##m##x##n##_mmi(src, src_stride, &ref_array[i], ref_stride); \
347 }
348
349 // This appears to be equivalent to the above when k == 4 and refs is const
350 #define sadMxNx4D_mmi(m, n) \
351 void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride, \
352 const uint8_t *const ref_array[], \
353 int ref_stride, uint32_t *sad_array) { \
354 int i; \
355 for (i = 0; i < 4; ++i) \
356 sad_array[i] = \
357 vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \
358 }
359
vpx_sad64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)360 static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride,
361 const uint8_t *ref, int ref_stride,
362 int counter) {
363 unsigned int sad;
364 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
365 mips_reg l_counter = counter;
366
367 __asm__ volatile (
368 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
369 "1: \n\t"
370 // Include two loop body, to reduce loop time.
371 SAD_SRC_REF_ABS_SUB_64
372 MMI_ADDU(%[src], %[src], %[src_stride])
373 MMI_ADDU(%[ref], %[ref], %[ref_stride])
374 SAD_SRC_REF_ABS_SUB_64
375 MMI_ADDU(%[src], %[src], %[src_stride])
376 MMI_ADDU(%[ref], %[ref], %[ref_stride])
377 MMI_ADDIU(%[counter], %[counter], -0x02)
378 "bnez %[counter], 1b \n\t"
379 "mfc1 %[sad], %[ftmp5] \n\t"
380 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
381 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
382 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
383 : [src_stride]"r"((mips_reg)src_stride),
384 [ref_stride]"r"((mips_reg)ref_stride)
385 );
386
387 return sad;
388 }
389
390 #define vpx_sad64xN(H) \
391 unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride, \
392 const uint8_t *ref, int ref_stride) { \
393 return vpx_sad64x(src, src_stride, ref, ref_stride, H); \
394 }
395
396 vpx_sad64xN(64);
397 vpx_sad64xN(32);
398 sadMxNx4D_mmi(64, 64);
399 sadMxNx4D_mmi(64, 32);
400
vpx_sad_avg64x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)401 static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride,
402 const uint8_t *ref, int ref_stride,
403 const uint8_t *second_pred,
404 int counter) {
405 unsigned int sad;
406 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
407 mips_reg l_counter = counter;
408
409 __asm__ volatile (
410 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
411 "1: \n\t"
412 // Include two loop body, to reduce loop time.
413 SAD_SRC_AVGREF_ABS_SUB_64
414 MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
415 MMI_ADDU(%[src], %[src], %[src_stride])
416 MMI_ADDU(%[ref], %[ref], %[ref_stride])
417 SAD_SRC_AVGREF_ABS_SUB_64
418 MMI_ADDIU(%[second_pred], %[second_pred], 0x40)
419 MMI_ADDU(%[src], %[src], %[src_stride])
420 MMI_ADDU(%[ref], %[ref], %[ref_stride])
421 MMI_ADDIU(%[counter], %[counter], -0x02)
422 "bnez %[counter], 1b \n\t"
423 "mfc1 %[sad], %[ftmp5] \n\t"
424 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
425 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
426 [src]"+&r"(src), [ref]"+&r"(ref),
427 [second_pred]"+&r"((mips_reg)second_pred),
428 [sad]"=&r"(sad)
429 : [src_stride]"r"((mips_reg)src_stride),
430 [ref_stride]"r"((mips_reg)ref_stride)
431 );
432
433 return sad;
434 }
435
436 #define vpx_sad_avg64xN(H) \
437 unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride, \
438 const uint8_t *ref, int ref_stride, \
439 const uint8_t *second_pred) { \
440 return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \
441 }
442
443 vpx_sad_avg64xN(64);
444 vpx_sad_avg64xN(32);
445
vpx_sad32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)446 static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride,
447 const uint8_t *ref, int ref_stride,
448 int counter) {
449 unsigned int sad;
450 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
451 mips_reg l_counter = counter;
452
453 __asm__ volatile (
454 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
455 "1: \n\t"
456 // Include two loop body, to reduce loop time.
457 SAD_SRC_REF_ABS_SUB_32
458 MMI_ADDU(%[src], %[src], %[src_stride])
459 MMI_ADDU(%[ref], %[ref], %[ref_stride])
460 SAD_SRC_REF_ABS_SUB_32
461 MMI_ADDU(%[src], %[src], %[src_stride])
462 MMI_ADDU(%[ref], %[ref], %[ref_stride])
463 MMI_ADDIU(%[counter], %[counter], -0x02)
464 "bnez %[counter], 1b \n\t"
465 "mfc1 %[sad], %[ftmp5] \n\t"
466 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
467 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
468 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
469 : [src_stride]"r"((mips_reg)src_stride),
470 [ref_stride]"r"((mips_reg)ref_stride)
471 );
472
473 return sad;
474 }
475
476 #define vpx_sad32xN(H) \
477 unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride, \
478 const uint8_t *ref, int ref_stride) { \
479 return vpx_sad32x(src, src_stride, ref, ref_stride, H); \
480 }
481
482 vpx_sad32xN(64);
483 vpx_sad32xN(32);
484 vpx_sad32xN(16);
485 sadMxNx4D_mmi(32, 64);
486 sadMxNx4D_mmi(32, 32);
487 sadMxNx4D_mmi(32, 16);
488
vpx_sad_avg32x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)489 static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride,
490 const uint8_t *ref, int ref_stride,
491 const uint8_t *second_pred,
492 int counter) {
493 unsigned int sad;
494 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
495 mips_reg l_counter = counter;
496
497 __asm__ volatile (
498 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
499 "1: \n\t"
500 // Include two loop body, to reduce loop time.
501 SAD_SRC_AVGREF_ABS_SUB_32
502 MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
503 MMI_ADDU(%[src], %[src], %[src_stride])
504 MMI_ADDU(%[ref], %[ref], %[ref_stride])
505 SAD_SRC_AVGREF_ABS_SUB_32
506 MMI_ADDIU(%[second_pred], %[second_pred], 0x20)
507 MMI_ADDU(%[src], %[src], %[src_stride])
508 MMI_ADDU(%[ref], %[ref], %[ref_stride])
509 MMI_ADDIU(%[counter], %[counter], -0x02)
510 "bnez %[counter], 1b \n\t"
511 "mfc1 %[sad], %[ftmp5] \n\t"
512 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
513 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
514 [src]"+&r"(src), [ref]"+&r"(ref),
515 [second_pred]"+&r"((mips_reg)second_pred),
516 [sad]"=&r"(sad)
517 : [src_stride]"r"((mips_reg)src_stride),
518 [ref_stride]"r"((mips_reg)ref_stride)
519 );
520
521 return sad;
522 }
523
524 #define vpx_sad_avg32xN(H) \
525 unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride, \
526 const uint8_t *ref, int ref_stride, \
527 const uint8_t *second_pred) { \
528 return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \
529 }
530
531 vpx_sad_avg32xN(64);
532 vpx_sad_avg32xN(32);
533 vpx_sad_avg32xN(16);
534
vpx_sad16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)535 static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride,
536 const uint8_t *ref, int ref_stride,
537 int counter) {
538 unsigned int sad;
539 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
540 mips_reg l_counter = counter;
541
542 __asm__ volatile (
543 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
544 "1: \n\t"
545 // Include two loop body, to reduce loop time.
546 SAD_SRC_REF_ABS_SUB_16
547 MMI_ADDU(%[src], %[src], %[src_stride])
548 MMI_ADDU(%[ref], %[ref], %[ref_stride])
549 SAD_SRC_REF_ABS_SUB_16
550 MMI_ADDU(%[src], %[src], %[src_stride])
551 MMI_ADDU(%[ref], %[ref], %[ref_stride])
552 MMI_ADDIU(%[counter], %[counter], -0x02)
553 "bnez %[counter], 1b \n\t"
554 "mfc1 %[sad], %[ftmp5] \n\t"
555 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
556 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
557 [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad)
558 : [src_stride]"r"((mips_reg)src_stride),
559 [ref_stride]"r"((mips_reg)ref_stride)
560 );
561
562 return sad;
563 }
564
565 #define vpx_sad16xN(H) \
566 unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride, \
567 const uint8_t *ref, int ref_stride) { \
568 return vpx_sad16x(src, src_stride, ref, ref_stride, H); \
569 }
570
571 vpx_sad16xN(32);
572 vpx_sad16xN(16);
573 vpx_sad16xN(8);
574 sadMxNxK_mmi(16, 16, 3);
575 sadMxNxK_mmi(16, 16, 8);
576 sadMxNxK_mmi(16, 8, 3);
577 sadMxNxK_mmi(16, 8, 8);
578 sadMxNx4D_mmi(16, 32);
579 sadMxNx4D_mmi(16, 16);
580 sadMxNx4D_mmi(16, 8);
581
vpx_sad_avg16x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)582 static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride,
583 const uint8_t *ref, int ref_stride,
584 const uint8_t *second_pred,
585 int counter) {
586 unsigned int sad;
587 double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5;
588 mips_reg l_counter = counter;
589
590 __asm__ volatile (
591 "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
592 "1: \n\t"
593 // Include two loop body, to reduce loop time.
594 SAD_SRC_AVGREF_ABS_SUB_16
595 MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
596 MMI_ADDU(%[src], %[src], %[src_stride])
597 MMI_ADDU(%[ref], %[ref], %[ref_stride])
598 SAD_SRC_AVGREF_ABS_SUB_16
599 MMI_ADDIU(%[second_pred], %[second_pred], 0x10)
600 MMI_ADDU(%[src], %[src], %[src_stride])
601 MMI_ADDU(%[ref], %[ref], %[ref_stride])
602 MMI_ADDIU(%[counter], %[counter], -0x02)
603 "bnez %[counter], 1b \n\t"
604 "mfc1 %[sad], %[ftmp5] \n\t"
605 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
606 [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter),
607 [src]"+&r"(src), [ref]"+&r"(ref),
608 [second_pred]"+&r"((mips_reg)second_pred),
609 [sad]"=&r"(sad)
610 : [src_stride]"r"((mips_reg)src_stride),
611 [ref_stride]"r"((mips_reg)ref_stride)
612 );
613
614 return sad;
615 }
616
617 #define vpx_sad_avg16xN(H) \
618 unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride, \
619 const uint8_t *ref, int ref_stride, \
620 const uint8_t *second_pred) { \
621 return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \
622 }
623
624 vpx_sad_avg16xN(32);
625 vpx_sad_avg16xN(16);
626 vpx_sad_avg16xN(8);
627
vpx_sad8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)628 static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride,
629 const uint8_t *ref, int ref_stride,
630 int counter) {
631 unsigned int sad;
632 double ftmp1, ftmp2, ftmp3;
633 mips_reg l_counter = counter;
634
635 __asm__ volatile (
636 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
637 "1: \n\t"
638 // Include two loop body, to reduce loop time.
639 SAD_SRC_REF_ABS_SUB_8
640 MMI_ADDU(%[src], %[src], %[src_stride])
641 MMI_ADDU(%[ref], %[ref], %[ref_stride])
642 SAD_SRC_REF_ABS_SUB_8
643 MMI_ADDU(%[src], %[src], %[src_stride])
644 MMI_ADDU(%[ref], %[ref], %[ref_stride])
645 MMI_ADDIU(%[counter], %[counter], -0x02)
646 "bnez %[counter], 1b \n\t"
647 "mfc1 %[sad], %[ftmp3] \n\t"
648 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
649 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
650 [sad]"=&r"(sad)
651 : [src_stride]"r"((mips_reg)src_stride),
652 [ref_stride]"r"((mips_reg)ref_stride)
653 );
654
655 return sad;
656 }
657
658 #define vpx_sad8xN(H) \
659 unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride, \
660 const uint8_t *ref, int ref_stride) { \
661 return vpx_sad8x(src, src_stride, ref, ref_stride, H); \
662 }
663
664 vpx_sad8xN(16);
665 vpx_sad8xN(8);
666 vpx_sad8xN(4);
667 sadMxNxK_mmi(8, 16, 3);
668 sadMxNxK_mmi(8, 16, 8);
669 sadMxNxK_mmi(8, 8, 3);
670 sadMxNxK_mmi(8, 8, 8);
671 sadMxNx4D_mmi(8, 16);
672 sadMxNx4D_mmi(8, 8);
673 sadMxNx4D_mmi(8, 4);
674
vpx_sad_avg8x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)675 static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride,
676 const uint8_t *ref, int ref_stride,
677 const uint8_t *second_pred,
678 int counter) {
679 unsigned int sad;
680 double ftmp1, ftmp2, ftmp3;
681 mips_reg l_counter = counter;
682
683 __asm__ volatile (
684 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
685 "1: \n\t"
686 // Include two loop body, to reduce loop time.
687 SAD_SRC_AVGREF_ABS_SUB_8
688 MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
689 MMI_ADDU(%[src], %[src], %[src_stride])
690 MMI_ADDU(%[ref], %[ref], %[ref_stride])
691 SAD_SRC_AVGREF_ABS_SUB_8
692 MMI_ADDIU(%[second_pred], %[second_pred], 0x08)
693 MMI_ADDU(%[src], %[src], %[src_stride])
694 MMI_ADDU(%[ref], %[ref], %[ref_stride])
695 MMI_ADDIU(%[counter], %[counter], -0x02)
696 "bnez %[counter], 1b \n\t"
697 "mfc1 %[sad], %[ftmp3] \n\t"
698 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
699 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
700 [second_pred]"+&r"((mips_reg)second_pred),
701 [sad]"=&r"(sad)
702 : [src_stride]"r"((mips_reg)src_stride),
703 [ref_stride]"r"((mips_reg)ref_stride)
704 );
705
706 return sad;
707 }
708
709 #define vpx_sad_avg8xN(H) \
710 unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride, \
711 const uint8_t *ref, int ref_stride, \
712 const uint8_t *second_pred) { \
713 return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \
714 }
715
716 vpx_sad_avg8xN(16);
717 vpx_sad_avg8xN(8);
718 vpx_sad_avg8xN(4);
719
vpx_sad4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,int counter)720 static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride,
721 const uint8_t *ref, int ref_stride,
722 int counter) {
723 unsigned int sad;
724 double ftmp1, ftmp2, ftmp3;
725 mips_reg l_counter = counter;
726
727 __asm__ volatile (
728 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
729 "1: \n\t"
730 // Include two loop body, to reduce loop time.
731 SAD_SRC_REF_ABS_SUB_4
732 MMI_ADDU(%[src], %[src], %[src_stride])
733 MMI_ADDU(%[ref], %[ref], %[ref_stride])
734 SAD_SRC_REF_ABS_SUB_4
735 MMI_ADDU(%[src], %[src], %[src_stride])
736 MMI_ADDU(%[ref], %[ref], %[ref_stride])
737 MMI_ADDIU(%[counter], %[counter], -0x02)
738 "bnez %[counter], 1b \n\t"
739 "mfc1 %[sad], %[ftmp3] \n\t"
740 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
741 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
742 [sad]"=&r"(sad)
743 : [src_stride]"r"((mips_reg)src_stride),
744 [ref_stride]"r"((mips_reg)ref_stride)
745 );
746
747 return sad;
748 }
749
750 #define vpx_sad4xN(H) \
751 unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride, \
752 const uint8_t *ref, int ref_stride) { \
753 return vpx_sad4x(src, src_stride, ref, ref_stride, H); \
754 }
755
756 vpx_sad4xN(8);
757 vpx_sad4xN(4);
758 sadMxNxK_mmi(4, 4, 3);
759 sadMxNxK_mmi(4, 4, 8);
760 sadMxNx4D_mmi(4, 8);
761 sadMxNx4D_mmi(4, 4);
762
vpx_sad_avg4x(const uint8_t * src,int src_stride,const uint8_t * ref,int ref_stride,const uint8_t * second_pred,int counter)763 static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride,
764 const uint8_t *ref, int ref_stride,
765 const uint8_t *second_pred,
766 int counter) {
767 unsigned int sad;
768 double ftmp1, ftmp2, ftmp3;
769 mips_reg l_counter = counter;
770
771 __asm__ volatile (
772 "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
773 "1: \n\t"
774 // Include two loop body, to reduce loop time.
775 SAD_SRC_AVGREF_ABS_SUB_4
776 MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
777 MMI_ADDU(%[src], %[src], %[src_stride])
778 MMI_ADDU(%[ref], %[ref], %[ref_stride])
779 SAD_SRC_AVGREF_ABS_SUB_4
780 MMI_ADDIU(%[second_pred], %[second_pred], 0x04)
781 MMI_ADDU(%[src], %[src], %[src_stride])
782 MMI_ADDU(%[ref], %[ref], %[ref_stride])
783 MMI_ADDIU(%[counter], %[counter], -0x02)
784 "bnez %[counter], 1b \n\t"
785 "mfc1 %[sad], %[ftmp3] \n\t"
786 : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3),
787 [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref),
788 [second_pred]"+&r"((mips_reg)second_pred),
789 [sad]"=&r"(sad)
790 : [src_stride]"r"((mips_reg)src_stride),
791 [ref_stride]"r"((mips_reg)ref_stride)
792 );
793
794 return sad;
795 }
796
797 #define vpx_sad_avg4xN(H) \
798 unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride, \
799 const uint8_t *ref, int ref_stride, \
800 const uint8_t *second_pred) { \
801 return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \
802 }
803
804 vpx_sad_avg4xN(8);
805 vpx_sad_avg4xN(4);
806