1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vpx_dsp_rtcd.h"
12 #include "vpx_dsp/mips/macros_msa.h"
13
14 #define CALC_MSE_B(src, ref, var) \
15 { \
16 v16u8 src_l0_m, src_l1_m; \
17 v8i16 res_l0_m, res_l1_m; \
18 \
19 ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
20 HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
21 DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
22 }
23
24 #define CALC_MSE_AVG_B(src, ref, var, sub) \
25 { \
26 v16u8 src_l0_m, src_l1_m; \
27 v8i16 res_l0_m, res_l1_m; \
28 \
29 ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \
30 HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \
31 DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \
32 \
33 sub += res_l0_m + res_l1_m; \
34 }
35
36 #define VARIANCE_WxH(sse, diff, shift) sse - (((uint32_t)diff * diff) >> shift)
37
38 #define VARIANCE_LARGE_WxH(sse, diff, shift) \
39 sse - (((int64_t)diff * diff) >> shift)
40
sse_diff_4width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)41 static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
42 const uint8_t *ref_ptr, int32_t ref_stride,
43 int32_t height, int32_t *diff) {
44 uint32_t src0, src1, src2, src3;
45 uint32_t ref0, ref1, ref2, ref3;
46 int32_t ht_cnt;
47 v16u8 src = { 0 };
48 v16u8 ref = { 0 };
49 v8i16 avg = { 0 };
50 v4i32 vec, var = { 0 };
51
52 for (ht_cnt = (height >> 2); ht_cnt--;) {
53 LW4(src_ptr, src_stride, src0, src1, src2, src3);
54 src_ptr += (4 * src_stride);
55 LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
56 ref_ptr += (4 * ref_stride);
57
58 INSERT_W4_UB(src0, src1, src2, src3, src);
59 INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
60 CALC_MSE_AVG_B(src, ref, var, avg);
61 }
62
63 vec = __msa_hadd_s_w(avg, avg);
64 *diff = HADD_SW_S32(vec);
65
66 return HADD_SW_S32(var);
67 }
68
sse_diff_8width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)69 static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
70 const uint8_t *ref_ptr, int32_t ref_stride,
71 int32_t height, int32_t *diff) {
72 int32_t ht_cnt;
73 v16u8 src0, src1, src2, src3;
74 v16u8 ref0, ref1, ref2, ref3;
75 v8i16 avg = { 0 };
76 v4i32 vec, var = { 0 };
77
78 for (ht_cnt = (height >> 2); ht_cnt--;) {
79 LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
80 src_ptr += (4 * src_stride);
81 LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
82 ref_ptr += (4 * ref_stride);
83
84 PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
85 ref0, ref1);
86 CALC_MSE_AVG_B(src0, ref0, var, avg);
87 CALC_MSE_AVG_B(src1, ref1, var, avg);
88 }
89
90 vec = __msa_hadd_s_w(avg, avg);
91 *diff = HADD_SW_S32(vec);
92
93 return HADD_SW_S32(var);
94 }
95
sse_diff_16width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)96 static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
97 const uint8_t *ref_ptr, int32_t ref_stride,
98 int32_t height, int32_t *diff) {
99 int32_t ht_cnt;
100 v16u8 src, ref;
101 v8i16 avg = { 0 };
102 v4i32 vec, var = { 0 };
103
104 for (ht_cnt = (height >> 2); ht_cnt--;) {
105 src = LD_UB(src_ptr);
106 src_ptr += src_stride;
107 ref = LD_UB(ref_ptr);
108 ref_ptr += ref_stride;
109 CALC_MSE_AVG_B(src, ref, var, avg);
110
111 src = LD_UB(src_ptr);
112 src_ptr += src_stride;
113 ref = LD_UB(ref_ptr);
114 ref_ptr += ref_stride;
115 CALC_MSE_AVG_B(src, ref, var, avg);
116
117 src = LD_UB(src_ptr);
118 src_ptr += src_stride;
119 ref = LD_UB(ref_ptr);
120 ref_ptr += ref_stride;
121 CALC_MSE_AVG_B(src, ref, var, avg);
122
123 src = LD_UB(src_ptr);
124 src_ptr += src_stride;
125 ref = LD_UB(ref_ptr);
126 ref_ptr += ref_stride;
127 CALC_MSE_AVG_B(src, ref, var, avg);
128 }
129
130 vec = __msa_hadd_s_w(avg, avg);
131 *diff = HADD_SW_S32(vec);
132
133 return HADD_SW_S32(var);
134 }
135
sse_diff_32width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height,int32_t * diff)136 static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
137 const uint8_t *ref_ptr, int32_t ref_stride,
138 int32_t height, int32_t *diff) {
139 int32_t ht_cnt;
140 v16u8 src0, src1, ref0, ref1;
141 v8i16 avg = { 0 };
142 v4i32 vec, var = { 0 };
143
144 for (ht_cnt = (height >> 2); ht_cnt--;) {
145 LD_UB2(src_ptr, 16, src0, src1);
146 src_ptr += src_stride;
147 LD_UB2(ref_ptr, 16, ref0, ref1);
148 ref_ptr += ref_stride;
149 CALC_MSE_AVG_B(src0, ref0, var, avg);
150 CALC_MSE_AVG_B(src1, ref1, var, avg);
151
152 LD_UB2(src_ptr, 16, src0, src1);
153 src_ptr += src_stride;
154 LD_UB2(ref_ptr, 16, ref0, ref1);
155 ref_ptr += ref_stride;
156 CALC_MSE_AVG_B(src0, ref0, var, avg);
157 CALC_MSE_AVG_B(src1, ref1, var, avg);
158
159 LD_UB2(src_ptr, 16, src0, src1);
160 src_ptr += src_stride;
161 LD_UB2(ref_ptr, 16, ref0, ref1);
162 ref_ptr += ref_stride;
163 CALC_MSE_AVG_B(src0, ref0, var, avg);
164 CALC_MSE_AVG_B(src1, ref1, var, avg);
165
166 LD_UB2(src_ptr, 16, src0, src1);
167 src_ptr += src_stride;
168 LD_UB2(ref_ptr, 16, ref0, ref1);
169 ref_ptr += ref_stride;
170 CALC_MSE_AVG_B(src0, ref0, var, avg);
171 CALC_MSE_AVG_B(src1, ref1, var, avg);
172 }
173
174 vec = __msa_hadd_s_w(avg, avg);
175 *diff = HADD_SW_S32(vec);
176
177 return HADD_SW_S32(var);
178 }
179
sse_diff_32x64_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t * diff)180 static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride,
181 const uint8_t *ref_ptr, int32_t ref_stride,
182 int32_t *diff) {
183 int32_t ht_cnt;
184 v16u8 src0, src1, ref0, ref1;
185 v8i16 avg0 = { 0 };
186 v8i16 avg1 = { 0 };
187 v4i32 vec, var = { 0 };
188
189 for (ht_cnt = 16; ht_cnt--;) {
190 LD_UB2(src_ptr, 16, src0, src1);
191 src_ptr += src_stride;
192 LD_UB2(ref_ptr, 16, ref0, ref1);
193 ref_ptr += ref_stride;
194 CALC_MSE_AVG_B(src0, ref0, var, avg0);
195 CALC_MSE_AVG_B(src1, ref1, var, avg1);
196
197 LD_UB2(src_ptr, 16, src0, src1);
198 src_ptr += src_stride;
199 LD_UB2(ref_ptr, 16, ref0, ref1);
200 ref_ptr += ref_stride;
201 CALC_MSE_AVG_B(src0, ref0, var, avg0);
202 CALC_MSE_AVG_B(src1, ref1, var, avg1);
203
204 LD_UB2(src_ptr, 16, src0, src1);
205 src_ptr += src_stride;
206 LD_UB2(ref_ptr, 16, ref0, ref1);
207 ref_ptr += ref_stride;
208 CALC_MSE_AVG_B(src0, ref0, var, avg0);
209 CALC_MSE_AVG_B(src1, ref1, var, avg1);
210
211 LD_UB2(src_ptr, 16, src0, src1);
212 src_ptr += src_stride;
213 LD_UB2(ref_ptr, 16, ref0, ref1);
214 ref_ptr += ref_stride;
215 CALC_MSE_AVG_B(src0, ref0, var, avg0);
216 CALC_MSE_AVG_B(src1, ref1, var, avg1);
217 }
218
219 vec = __msa_hadd_s_w(avg0, avg0);
220 vec += __msa_hadd_s_w(avg1, avg1);
221 *diff = HADD_SW_S32(vec);
222
223 return HADD_SW_S32(var);
224 }
225
sse_diff_64x32_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t * diff)226 static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride,
227 const uint8_t *ref_ptr, int32_t ref_stride,
228 int32_t *diff) {
229 int32_t ht_cnt;
230 v16u8 src0, src1, src2, src3;
231 v16u8 ref0, ref1, ref2, ref3;
232 v8i16 avg0 = { 0 };
233 v8i16 avg1 = { 0 };
234 v4i32 vec, var = { 0 };
235
236 for (ht_cnt = 16; ht_cnt--;) {
237 LD_UB4(src_ptr, 16, src0, src1, src2, src3);
238 src_ptr += src_stride;
239 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
240 ref_ptr += ref_stride;
241 CALC_MSE_AVG_B(src0, ref0, var, avg0);
242 CALC_MSE_AVG_B(src2, ref2, var, avg0);
243 CALC_MSE_AVG_B(src1, ref1, var, avg1);
244 CALC_MSE_AVG_B(src3, ref3, var, avg1);
245
246 LD_UB4(src_ptr, 16, src0, src1, src2, src3);
247 src_ptr += src_stride;
248 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
249 ref_ptr += ref_stride;
250 CALC_MSE_AVG_B(src0, ref0, var, avg0);
251 CALC_MSE_AVG_B(src2, ref2, var, avg0);
252 CALC_MSE_AVG_B(src1, ref1, var, avg1);
253 CALC_MSE_AVG_B(src3, ref3, var, avg1);
254 }
255
256 vec = __msa_hadd_s_w(avg0, avg0);
257 vec += __msa_hadd_s_w(avg1, avg1);
258 *diff = HADD_SW_S32(vec);
259
260 return HADD_SW_S32(var);
261 }
262
sse_diff_64x64_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t * diff)263 static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride,
264 const uint8_t *ref_ptr, int32_t ref_stride,
265 int32_t *diff) {
266 int32_t ht_cnt;
267 v16u8 src0, src1, src2, src3;
268 v16u8 ref0, ref1, ref2, ref3;
269 v8i16 avg0 = { 0 };
270 v8i16 avg1 = { 0 };
271 v8i16 avg2 = { 0 };
272 v8i16 avg3 = { 0 };
273 v4i32 vec, var = { 0 };
274
275 for (ht_cnt = 32; ht_cnt--;) {
276 LD_UB4(src_ptr, 16, src0, src1, src2, src3);
277 src_ptr += src_stride;
278 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
279 ref_ptr += ref_stride;
280
281 CALC_MSE_AVG_B(src0, ref0, var, avg0);
282 CALC_MSE_AVG_B(src1, ref1, var, avg1);
283 CALC_MSE_AVG_B(src2, ref2, var, avg2);
284 CALC_MSE_AVG_B(src3, ref3, var, avg3);
285 LD_UB4(src_ptr, 16, src0, src1, src2, src3);
286 src_ptr += src_stride;
287 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
288 ref_ptr += ref_stride;
289 CALC_MSE_AVG_B(src0, ref0, var, avg0);
290 CALC_MSE_AVG_B(src1, ref1, var, avg1);
291 CALC_MSE_AVG_B(src2, ref2, var, avg2);
292 CALC_MSE_AVG_B(src3, ref3, var, avg3);
293 }
294
295 vec = __msa_hadd_s_w(avg0, avg0);
296 vec += __msa_hadd_s_w(avg1, avg1);
297 vec += __msa_hadd_s_w(avg2, avg2);
298 vec += __msa_hadd_s_w(avg3, avg3);
299 *diff = HADD_SW_S32(vec);
300
301 return HADD_SW_S32(var);
302 }
303
get_mb_ss_msa(const int16_t * src)304 static uint32_t get_mb_ss_msa(const int16_t *src) {
305 uint32_t sum, cnt;
306 v8i16 src0, src1, src2, src3;
307 v4i32 src0_l, src1_l, src2_l, src3_l;
308 v4i32 src0_r, src1_r, src2_r, src3_r;
309 v2i64 sq_src_l = { 0 };
310 v2i64 sq_src_r = { 0 };
311
312 for (cnt = 8; cnt--;) {
313 LD_SH4(src, 8, src0, src1, src2, src3);
314 src += 4 * 8;
315
316 UNPCK_SH_SW(src0, src0_l, src0_r);
317 UNPCK_SH_SW(src1, src1_l, src1_r);
318 UNPCK_SH_SW(src2, src2_l, src2_r);
319 UNPCK_SH_SW(src3, src3_l, src3_r);
320
321 DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r);
322 DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r);
323 DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r);
324 DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r);
325 }
326
327 sq_src_l += __msa_splati_d(sq_src_l, 1);
328 sq_src_r += __msa_splati_d(sq_src_r, 1);
329
330 sum = __msa_copy_s_d(sq_src_l, 0);
331 sum += __msa_copy_s_d(sq_src_r, 0);
332
333 return sum;
334 }
335
sse_4width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)336 static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
337 const uint8_t *ref_ptr, int32_t ref_stride,
338 int32_t height) {
339 int32_t ht_cnt;
340 uint32_t src0, src1, src2, src3;
341 uint32_t ref0, ref1, ref2, ref3;
342 v16u8 src = { 0 };
343 v16u8 ref = { 0 };
344 v4i32 var = { 0 };
345
346 for (ht_cnt = (height >> 2); ht_cnt--;) {
347 LW4(src_ptr, src_stride, src0, src1, src2, src3);
348 src_ptr += (4 * src_stride);
349 LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
350 ref_ptr += (4 * ref_stride);
351
352 INSERT_W4_UB(src0, src1, src2, src3, src);
353 INSERT_W4_UB(ref0, ref1, ref2, ref3, ref);
354 CALC_MSE_B(src, ref, var);
355 }
356
357 return HADD_SW_S32(var);
358 }
359
sse_8width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)360 static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride,
361 const uint8_t *ref_ptr, int32_t ref_stride,
362 int32_t height) {
363 int32_t ht_cnt;
364 v16u8 src0, src1, src2, src3;
365 v16u8 ref0, ref1, ref2, ref3;
366 v4i32 var = { 0 };
367
368 for (ht_cnt = (height >> 2); ht_cnt--;) {
369 LD_UB4(src_ptr, src_stride, src0, src1, src2, src3);
370 src_ptr += (4 * src_stride);
371 LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
372 ref_ptr += (4 * ref_stride);
373
374 PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1,
375 ref0, ref1);
376 CALC_MSE_B(src0, ref0, var);
377 CALC_MSE_B(src1, ref1, var);
378 }
379
380 return HADD_SW_S32(var);
381 }
382
sse_16width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)383 static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride,
384 const uint8_t *ref_ptr, int32_t ref_stride,
385 int32_t height) {
386 int32_t ht_cnt;
387 v16u8 src, ref;
388 v4i32 var = { 0 };
389
390 for (ht_cnt = (height >> 2); ht_cnt--;) {
391 src = LD_UB(src_ptr);
392 src_ptr += src_stride;
393 ref = LD_UB(ref_ptr);
394 ref_ptr += ref_stride;
395 CALC_MSE_B(src, ref, var);
396
397 src = LD_UB(src_ptr);
398 src_ptr += src_stride;
399 ref = LD_UB(ref_ptr);
400 ref_ptr += ref_stride;
401 CALC_MSE_B(src, ref, var);
402
403 src = LD_UB(src_ptr);
404 src_ptr += src_stride;
405 ref = LD_UB(ref_ptr);
406 ref_ptr += ref_stride;
407 CALC_MSE_B(src, ref, var);
408
409 src = LD_UB(src_ptr);
410 src_ptr += src_stride;
411 ref = LD_UB(ref_ptr);
412 ref_ptr += ref_stride;
413 CALC_MSE_B(src, ref, var);
414 }
415
416 return HADD_SW_S32(var);
417 }
418
sse_32width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)419 static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride,
420 const uint8_t *ref_ptr, int32_t ref_stride,
421 int32_t height) {
422 int32_t ht_cnt;
423 v16u8 src0, src1, ref0, ref1;
424 v4i32 var = { 0 };
425
426 for (ht_cnt = (height >> 2); ht_cnt--;) {
427 LD_UB2(src_ptr, 16, src0, src1);
428 src_ptr += src_stride;
429 LD_UB2(ref_ptr, 16, ref0, ref1);
430 ref_ptr += ref_stride;
431 CALC_MSE_B(src0, ref0, var);
432 CALC_MSE_B(src1, ref1, var);
433
434 LD_UB2(src_ptr, 16, src0, src1);
435 src_ptr += src_stride;
436 LD_UB2(ref_ptr, 16, ref0, ref1);
437 ref_ptr += ref_stride;
438 CALC_MSE_B(src0, ref0, var);
439 CALC_MSE_B(src1, ref1, var);
440
441 LD_UB2(src_ptr, 16, src0, src1);
442 src_ptr += src_stride;
443 LD_UB2(ref_ptr, 16, ref0, ref1);
444 ref_ptr += ref_stride;
445 CALC_MSE_B(src0, ref0, var);
446 CALC_MSE_B(src1, ref1, var);
447
448 LD_UB2(src_ptr, 16, src0, src1);
449 src_ptr += src_stride;
450 LD_UB2(ref_ptr, 16, ref0, ref1);
451 ref_ptr += ref_stride;
452 CALC_MSE_B(src0, ref0, var);
453 CALC_MSE_B(src1, ref1, var);
454 }
455
456 return HADD_SW_S32(var);
457 }
458
sse_64width_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride,int32_t height)459 static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride,
460 const uint8_t *ref_ptr, int32_t ref_stride,
461 int32_t height) {
462 int32_t ht_cnt;
463 v16u8 src0, src1, src2, src3;
464 v16u8 ref0, ref1, ref2, ref3;
465 v4i32 var = { 0 };
466
467 for (ht_cnt = height >> 1; ht_cnt--;) {
468 LD_UB4(src_ptr, 16, src0, src1, src2, src3);
469 src_ptr += src_stride;
470 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
471 ref_ptr += ref_stride;
472 CALC_MSE_B(src0, ref0, var);
473 CALC_MSE_B(src2, ref2, var);
474 CALC_MSE_B(src1, ref1, var);
475 CALC_MSE_B(src3, ref3, var);
476
477 LD_UB4(src_ptr, 16, src0, src1, src2, src3);
478 src_ptr += src_stride;
479 LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3);
480 ref_ptr += ref_stride;
481 CALC_MSE_B(src0, ref0, var);
482 CALC_MSE_B(src2, ref2, var);
483 CALC_MSE_B(src1, ref1, var);
484 CALC_MSE_B(src3, ref3, var);
485 }
486
487 return HADD_SW_S32(var);
488 }
489
vpx_get4x4sse_cs_msa(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * ref_ptr,int32_t ref_stride)490 uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
491 const uint8_t *ref_ptr, int32_t ref_stride) {
492 uint32_t src0, src1, src2, src3;
493 uint32_t ref0, ref1, ref2, ref3;
494 v16i8 src = { 0 };
495 v16i8 ref = { 0 };
496 v4i32 err0 = { 0 };
497
498 LW4(src_ptr, src_stride, src0, src1, src2, src3);
499 LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
500 INSERT_W4_SB(src0, src1, src2, src3, src);
501 INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
502 CALC_MSE_B(src, ref, err0);
503
504 return HADD_SW_S32(err0);
505 }
506
507 #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
508 #define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5);
509 #define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5);
510 #define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6);
511 #define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7);
512 #define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7);
513 #define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8);
514
515 #define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
516 #define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9);
517 #define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10);
518 #define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
519 #define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11);
520 #define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12);
521
522 #define VPX_VARIANCE_WDXHT_MSA(wd, ht) \
523 uint32_t vpx_variance##wd##x##ht##_msa( \
524 const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
525 int32_t ref_stride, uint32_t *sse) { \
526 int32_t diff; \
527 \
528 *sse = \
529 sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \
530 \
531 return VARIANCE_##wd##Wx##ht##H(*sse, diff); \
532 }
533
534 VPX_VARIANCE_WDXHT_MSA(4, 4);
535 VPX_VARIANCE_WDXHT_MSA(4, 8);
536
537 VPX_VARIANCE_WDXHT_MSA(8, 4)
538 VPX_VARIANCE_WDXHT_MSA(8, 8)
539 VPX_VARIANCE_WDXHT_MSA(8, 16)
540
541 VPX_VARIANCE_WDXHT_MSA(16, 8)
542 VPX_VARIANCE_WDXHT_MSA(16, 16)
543 VPX_VARIANCE_WDXHT_MSA(16, 32)
544
545 VPX_VARIANCE_WDXHT_MSA(32, 16)
546 VPX_VARIANCE_WDXHT_MSA(32, 32)
547
vpx_variance32x64_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)548 uint32_t vpx_variance32x64_msa(const uint8_t *src, int32_t src_stride,
549 const uint8_t *ref, int32_t ref_stride,
550 uint32_t *sse) {
551 int32_t diff;
552
553 *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff);
554
555 return VARIANCE_32Wx64H(*sse, diff);
556 }
557
vpx_variance64x32_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)558 uint32_t vpx_variance64x32_msa(const uint8_t *src, int32_t src_stride,
559 const uint8_t *ref, int32_t ref_stride,
560 uint32_t *sse) {
561 int32_t diff;
562
563 *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff);
564
565 return VARIANCE_64Wx32H(*sse, diff);
566 }
567
vpx_variance64x64_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)568 uint32_t vpx_variance64x64_msa(const uint8_t *src, int32_t src_stride,
569 const uint8_t *ref, int32_t ref_stride,
570 uint32_t *sse) {
571 int32_t diff;
572
573 *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff);
574
575 return VARIANCE_64Wx64H(*sse, diff);
576 }
577
vpx_mse8x8_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)578 uint32_t vpx_mse8x8_msa(const uint8_t *src, int32_t src_stride,
579 const uint8_t *ref, int32_t ref_stride, uint32_t *sse) {
580 *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8);
581
582 return *sse;
583 }
584
vpx_mse8x16_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)585 uint32_t vpx_mse8x16_msa(const uint8_t *src, int32_t src_stride,
586 const uint8_t *ref, int32_t ref_stride,
587 uint32_t *sse) {
588 *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16);
589
590 return *sse;
591 }
592
vpx_mse16x8_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)593 uint32_t vpx_mse16x8_msa(const uint8_t *src, int32_t src_stride,
594 const uint8_t *ref, int32_t ref_stride,
595 uint32_t *sse) {
596 *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8);
597
598 return *sse;
599 }
600
vpx_mse16x16_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse)601 uint32_t vpx_mse16x16_msa(const uint8_t *src, int32_t src_stride,
602 const uint8_t *ref, int32_t ref_stride,
603 uint32_t *sse) {
604 *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16);
605
606 return *sse;
607 }
608
vpx_get8x8var_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse,int32_t * sum)609 void vpx_get8x8var_msa(const uint8_t *src, int32_t src_stride,
610 const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
611 int32_t *sum) {
612 *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum);
613 }
614
vpx_get16x16var_msa(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,uint32_t * sse,int32_t * sum)615 void vpx_get16x16var_msa(const uint8_t *src, int32_t src_stride,
616 const uint8_t *ref, int32_t ref_stride, uint32_t *sse,
617 int32_t *sum) {
618 *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum);
619 }
620
vpx_get_mb_ss_msa(const int16_t * src)621 uint32_t vpx_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); }
622