• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include "config/av1_rtcd.h"
13 
14 #include "aom_dsp/mips/macros_msa.h"
15 
16 #define BLOCK_ERROR_BLOCKSIZE_MSA(BSize)                                     \
17   static int64_t block_error_##BSize##size_msa(                              \
18       const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
19     int64_t err = 0;                                                         \
20     uint32_t loop_cnt;                                                       \
21     v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h;                             \
22     v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w;                              \
23     v2i64 sq_coeff_r, sq_coeff_l;                                            \
24     v2i64 err0, err_dup0, err1, err_dup1;                                    \
25                                                                              \
26     coeff = LD_SH(coeff_ptr);                                                \
27     dq_coeff = LD_SH(dq_coeff_ptr);                                          \
28     UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
29     ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
30     HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
31     DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r,      \
32                 sq_coeff_l);                                                 \
33     DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1);                 \
34                                                                              \
35     coeff = LD_SH(coeff_ptr + 8);                                            \
36     dq_coeff = LD_SH(dq_coeff_ptr + 8);                                      \
37     UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
38     ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
39     HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
40     DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);              \
41     DPADD_SD2_SD(diff_r, diff_l, err0, err1);                                \
42                                                                              \
43     coeff_ptr += 16;                                                         \
44     dq_coeff_ptr += 16;                                                      \
45                                                                              \
46     for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) {                       \
47       coeff = LD_SH(coeff_ptr);                                              \
48       dq_coeff = LD_SH(dq_coeff_ptr);                                        \
49       UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
50       ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
51       HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
52       DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
53       DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
54                                                                              \
55       coeff = LD_SH(coeff_ptr + 8);                                          \
56       dq_coeff = LD_SH(dq_coeff_ptr + 8);                                    \
57       UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
58       ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
59       HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
60       DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
61       DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
62                                                                              \
63       coeff_ptr += 16;                                                       \
64       dq_coeff_ptr += 16;                                                    \
65     }                                                                        \
66                                                                              \
67     err_dup0 = __msa_splati_d(sq_coeff_r, 1);                                \
68     err_dup1 = __msa_splati_d(sq_coeff_l, 1);                                \
69     sq_coeff_r += err_dup0;                                                  \
70     sq_coeff_l += err_dup1;                                                  \
71     *ssz = __msa_copy_s_d(sq_coeff_r, 0);                                    \
72     *ssz += __msa_copy_s_d(sq_coeff_l, 0);                                   \
73                                                                              \
74     err_dup0 = __msa_splati_d(err0, 1);                                      \
75     err_dup1 = __msa_splati_d(err1, 1);                                      \
76     err0 += err_dup0;                                                        \
77     err1 += err_dup1;                                                        \
78     err = __msa_copy_s_d(err0, 0);                                           \
79     err += __msa_copy_s_d(err1, 0);                                          \
80                                                                              \
81     return err;                                                              \
82   }
83 
84 /* clang-format off */
85 BLOCK_ERROR_BLOCKSIZE_MSA(16)
86 BLOCK_ERROR_BLOCKSIZE_MSA(64)
87 BLOCK_ERROR_BLOCKSIZE_MSA(256)
88 BLOCK_ERROR_BLOCKSIZE_MSA(1024)
89 /* clang-format on */
90 
av1_block_error_msa(const tran_low_t * coeff_ptr,const tran_low_t * dq_coeff_ptr,intptr_t blk_size,int64_t * ssz)91 int64_t av1_block_error_msa(const tran_low_t *coeff_ptr,
92                             const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
93                             int64_t *ssz) {
94   int64_t err;
95   const int16_t *coeff = (const int16_t *)coeff_ptr;
96   const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
97 
98   switch (blk_size) {
99     case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
100     case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
101     case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
102     case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
103     default:
104       err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
105       break;
106   }
107 
108   return err;
109 }
110