1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include "config/av1_rtcd.h"
13
14 #include "aom_dsp/mips/macros_msa.h"
15
16 #define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
17 static int64_t block_error_##BSize##size_msa( \
18 const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
19 int64_t err = 0; \
20 uint32_t loop_cnt; \
21 v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
22 v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
23 v2i64 sq_coeff_r, sq_coeff_l; \
24 v2i64 err0, err_dup0, err1, err_dup1; \
25 \
26 coeff = LD_SH(coeff_ptr); \
27 dq_coeff = LD_SH(dq_coeff_ptr); \
28 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
29 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
30 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
31 DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \
32 sq_coeff_l); \
33 DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
34 \
35 coeff = LD_SH(coeff_ptr + 8); \
36 dq_coeff = LD_SH(dq_coeff_ptr + 8); \
37 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
38 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
39 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
40 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
41 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
42 \
43 coeff_ptr += 16; \
44 dq_coeff_ptr += 16; \
45 \
46 for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
47 coeff = LD_SH(coeff_ptr); \
48 dq_coeff = LD_SH(dq_coeff_ptr); \
49 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
50 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
51 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
52 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
53 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
54 \
55 coeff = LD_SH(coeff_ptr + 8); \
56 dq_coeff = LD_SH(dq_coeff_ptr + 8); \
57 UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
58 ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
59 HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
60 DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
61 DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
62 \
63 coeff_ptr += 16; \
64 dq_coeff_ptr += 16; \
65 } \
66 \
67 err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
68 err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
69 sq_coeff_r += err_dup0; \
70 sq_coeff_l += err_dup1; \
71 *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
72 *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
73 \
74 err_dup0 = __msa_splati_d(err0, 1); \
75 err_dup1 = __msa_splati_d(err1, 1); \
76 err0 += err_dup0; \
77 err1 += err_dup1; \
78 err = __msa_copy_s_d(err0, 0); \
79 err += __msa_copy_s_d(err1, 0); \
80 \
81 return err; \
82 }
83
84 /* clang-format off */
85 BLOCK_ERROR_BLOCKSIZE_MSA(16)
86 BLOCK_ERROR_BLOCKSIZE_MSA(64)
87 BLOCK_ERROR_BLOCKSIZE_MSA(256)
88 BLOCK_ERROR_BLOCKSIZE_MSA(1024)
89 /* clang-format on */
90
av1_block_error_msa(const tran_low_t * coeff_ptr,const tran_low_t * dq_coeff_ptr,intptr_t blk_size,int64_t * ssz)91 int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
92 const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
93 int64_t *ssz) {
94 int64_t err;
95 const int16_t *coeff = (const int16_t *)coeff_ptr;
96 const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
97
98 switch (blk_size) {
99 case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
100 case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
101 case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
102 case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
103 default:
104 err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
105 break;
106 }
107
108 return err;
109 }
110