1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vpx_config.h"
12 #include "./vp9_rtcd.h"
13 #include "vpx_dsp/mips/macros_msa.h"
14
15 #define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
16 static int64_t block_error_##BSize##size_msa( \
17 const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
18 int64_t err = 0; \
19 uint32_t loop_cnt; \
20 v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
21 v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
22 v2i64 sq_coeff_r, sq_coeff_l; \
23 v2i64 err0, err_dup0, err1, err_dup1; \
24 \
25 coeff = LD_SH(coeff_ptr); \
26 dq_coeff = LD_SH(dq_coeff_ptr); \
27 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
28 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
29 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
30 DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
31 sq_coeff_l); \
32 DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
33 \
34 coeff = LD_SH(coeff_ptr + 8); \
35 dq_coeff = LD_SH(dq_coeff_ptr + 8); \
36 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
37 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
38 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
39 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
40 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
41 \
42 coeff_ptr += 16; \
43 dq_coeff_ptr += 16; \
44 \
45 for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
46 coeff = LD_SH(coeff_ptr); \
47 dq_coeff = LD_SH(dq_coeff_ptr); \
48 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
49 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
50 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
51 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
52 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
53 \
54 coeff = LD_SH(coeff_ptr + 8); \
55 dq_coeff = LD_SH(dq_coeff_ptr + 8); \
56 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
57 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
58 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
59 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
60 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
61 \
62 coeff_ptr += 16; \
63 dq_coeff_ptr += 16; \
64 } \
65 \
66 err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
67 err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
68 sq_coeff_r += err_dup0; \
69 sq_coeff_l += err_dup1; \
70 *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
71 *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
72 \
73 err_dup0 = __msa_splati_d(err0, 1); \
74 err_dup1 = __msa_splati_d(err1, 1); \
75 err0 += err_dup0; \
76 err1 += err_dup1; \
77 err = __msa_copy_s_d(err0, 0); \
78 err += __msa_copy_s_d(err1, 0); \
79 \
80 return err; \
81 }
82
83 #if !CONFIG_VP9_HIGHBITDEPTH
84 BLOCK_ERROR_BLOCKSIZE_MSA(16);
85 BLOCK_ERROR_BLOCKSIZE_MSA(64);
86 BLOCK_ERROR_BLOCKSIZE_MSA(256);
87 BLOCK_ERROR_BLOCKSIZE_MSA(1024);
88
vp9_block_error_msa(const tran_low_t * coeff_ptr,const tran_low_t * dq_coeff_ptr,intptr_t blk_size,int64_t * ssz)89 int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
90 const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
91 int64_t *ssz) {
92 int64_t err;
93 const int16_t *coeff = (const int16_t *)coeff_ptr;
94 const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
95
96 switch (blk_size) {
97 case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
98 case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
99 case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
100 case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
101 default:
102 err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
103 break;
104 }
105
106 return err;
107 }
108 #endif // !CONFIG_VP9_HIGHBITDEPTH
109