• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_config.h"
12 #include "./vp9_rtcd.h"
13 #include "vpx_dsp/mips/macros_msa.h"
14 
15 #define BLOCK_ERROR_BLOCKSIZE_MSA(BSize)                                     \
16   static int64_t block_error_##BSize##size_msa(                              \
17       const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \
18     int64_t err = 0;                                                         \
19     uint32_t loop_cnt;                                                       \
20     v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h;                             \
21     v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w;                              \
22     v2i64 sq_coeff_r, sq_coeff_l;                                            \
23     v2i64 err0, err_dup0, err1, err_dup1;                                    \
24                                                                              \
25     coeff = LD_SH(coeff_ptr);                                                \
26     dq_coeff = LD_SH(dq_coeff_ptr);                                          \
27     UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
28     ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
29     HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
30     DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r,      \
31                 sq_coeff_l);                                                 \
32     DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1);                 \
33                                                                              \
34     coeff = LD_SH(coeff_ptr + 8);                                            \
35     dq_coeff = LD_SH(dq_coeff_ptr + 8);                                      \
36     UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                                \
37     ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                      \
38     HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                       \
39     DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);              \
40     DPADD_SD2_SD(diff_r, diff_l, err0, err1);                                \
41                                                                              \
42     coeff_ptr += 16;                                                         \
43     dq_coeff_ptr += 16;                                                      \
44                                                                              \
45     for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) {                       \
46       coeff = LD_SH(coeff_ptr);                                              \
47       dq_coeff = LD_SH(dq_coeff_ptr);                                        \
48       UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
49       ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
50       HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
51       DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
52       DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
53                                                                              \
54       coeff = LD_SH(coeff_ptr + 8);                                          \
55       dq_coeff = LD_SH(dq_coeff_ptr + 8);                                    \
56       UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w);                              \
57       ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h);                    \
58       HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l);                     \
59       DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l);            \
60       DPADD_SD2_SD(diff_r, diff_l, err0, err1);                              \
61                                                                              \
62       coeff_ptr += 16;                                                       \
63       dq_coeff_ptr += 16;                                                    \
64     }                                                                        \
65                                                                              \
66     err_dup0 = __msa_splati_d(sq_coeff_r, 1);                                \
67     err_dup1 = __msa_splati_d(sq_coeff_l, 1);                                \
68     sq_coeff_r += err_dup0;                                                  \
69     sq_coeff_l += err_dup1;                                                  \
70     *ssz = __msa_copy_s_d(sq_coeff_r, 0);                                    \
71     *ssz += __msa_copy_s_d(sq_coeff_l, 0);                                   \
72                                                                              \
73     err_dup0 = __msa_splati_d(err0, 1);                                      \
74     err_dup1 = __msa_splati_d(err1, 1);                                      \
75     err0 += err_dup0;                                                        \
76     err1 += err_dup1;                                                        \
77     err = __msa_copy_s_d(err0, 0);                                           \
78     err += __msa_copy_s_d(err1, 0);                                          \
79                                                                              \
80     return err;                                                              \
81   }
82 
83 #if !CONFIG_VP9_HIGHBITDEPTH
84 BLOCK_ERROR_BLOCKSIZE_MSA(16);
85 BLOCK_ERROR_BLOCKSIZE_MSA(64);
86 BLOCK_ERROR_BLOCKSIZE_MSA(256);
87 BLOCK_ERROR_BLOCKSIZE_MSA(1024);
88 
vp9_block_error_msa(const tran_low_t * coeff_ptr,const tran_low_t * dq_coeff_ptr,intptr_t blk_size,int64_t * ssz)89 int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
90                             const tran_low_t *dq_coeff_ptr, intptr_t blk_size,
91                             int64_t *ssz) {
92   int64_t err;
93   const int16_t *coeff = (const int16_t *)coeff_ptr;
94   const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
95 
96   switch (blk_size) {
97     case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break;
98     case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break;
99     case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break;
100     case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break;
101     default:
102       err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
103       break;
104   }
105 
106   return err;
107 }
108 #endif  // !CONFIG_VP9_HIGHBITDEPTH
109