1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "vp8/common/mips/msa/vp8_macros_msa.h"
13 #include "vp8/encoder/block.h"
14
vp8_block_error_msa(int16_t * coeff_ptr,int16_t * dq_coeff_ptr)15 int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr)
16 {
17 int32_t err = 0;
18 uint32_t loop_cnt;
19 v8i16 coeff, dq_coeff, coeff0, coeff1;
20 v4i32 diff0, diff1;
21 v2i64 err0 = { 0 };
22 v2i64 err1 = { 0 };
23
24 for (loop_cnt = 2; loop_cnt--;)
25 {
26 coeff = LD_SH(coeff_ptr);
27 dq_coeff = LD_SH(dq_coeff_ptr);
28 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
29 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
30 DPADD_SD2_SD(diff0, diff1, err0, err1);
31 coeff_ptr += 8;
32 dq_coeff_ptr += 8;
33 }
34
35 err0 += __msa_splati_d(err0, 1);
36 err1 += __msa_splati_d(err1, 1);
37 err = __msa_copy_s_d(err0, 0);
38 err += __msa_copy_s_d(err1, 0);
39
40 return err;
41 }
42
vp8_mbblock_error_msa(MACROBLOCK * mb,int32_t dc)43 int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc)
44 {
45 BLOCK *be;
46 BLOCKD *bd;
47 int16_t *coeff_ptr, *dq_coeff_ptr;
48 int32_t err = 0;
49 uint32_t loop_cnt;
50 v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
51 v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
52 v4i32 diff0, diff1;
53 v2i64 err0, err1;
54 v16u8 zero = { 0 };
55 v16u8 mask0 = (v16u8)__msa_ldi_b(255);
56
57 if (1 == dc)
58 {
59 mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero);
60 }
61
62 for (loop_cnt = 0; loop_cnt < 8; loop_cnt++)
63 {
64 be = &mb->block[2 * loop_cnt];
65 bd = &mb->e_mbd.block[2 * loop_cnt];
66 coeff_ptr = be->coeff;
67 dq_coeff_ptr = bd->dqcoeff;
68 coeff = LD_SH(coeff_ptr);
69 dq_coeff = LD_SH(dq_coeff_ptr);
70 coeff_ptr += 8;
71 dq_coeff_ptr += 8;
72 coeff2 = LD_SH(coeff_ptr);
73 dq_coeff2 = LD_SH(dq_coeff_ptr);
74 be = &mb->block[2 * loop_cnt + 1];
75 bd = &mb->e_mbd.block[2 * loop_cnt + 1];
76 coeff_ptr = be->coeff;
77 dq_coeff_ptr = bd->dqcoeff;
78 coeff3 = LD_SH(coeff_ptr);
79 dq_coeff3 = LD_SH(dq_coeff_ptr);
80 coeff_ptr += 8;
81 dq_coeff_ptr += 8;
82 coeff4 = LD_SH(coeff_ptr);
83 dq_coeff4 = LD_SH(dq_coeff_ptr);
84 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
85 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
86 diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
87 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
88 ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
89 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
90 DPADD_SD2_SD(diff0, diff1, err0, err1);
91 err0 += __msa_splati_d(err0, 1);
92 err1 += __msa_splati_d(err1, 1);
93 err += __msa_copy_s_d(err0, 0);
94 err += __msa_copy_s_d(err1, 0);
95
96 ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
97 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
98 diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0);
99 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
100 ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
101 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
102 DPADD_SD2_SD(diff0, diff1, err0, err1);
103 err0 += __msa_splati_d(err0, 1);
104 err1 += __msa_splati_d(err1, 1);
105 err += __msa_copy_s_d(err0, 0);
106 err += __msa_copy_s_d(err1, 0);
107 }
108
109 return err;
110 }
111
vp8_mbuverror_msa(MACROBLOCK * mb)112 int32_t vp8_mbuverror_msa(MACROBLOCK *mb)
113 {
114 BLOCK *be;
115 BLOCKD *bd;
116 int16_t *coeff_ptr, *dq_coeff_ptr;
117 int32_t err = 0;
118 uint32_t loop_cnt;
119 v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4;
120 v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4;
121 v4i32 diff0, diff1;
122 v2i64 err0, err1, err_dup0, err_dup1;
123
124 for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2)
125 {
126 be = &mb->block[loop_cnt];
127 bd = &mb->e_mbd.block[loop_cnt];
128 coeff_ptr = be->coeff;
129 dq_coeff_ptr = bd->dqcoeff;
130 coeff = LD_SH(coeff_ptr);
131 dq_coeff = LD_SH(dq_coeff_ptr);
132 coeff_ptr += 8;
133 dq_coeff_ptr += 8;
134 coeff2 = LD_SH(coeff_ptr);
135 dq_coeff2 = LD_SH(dq_coeff_ptr);
136 be = &mb->block[loop_cnt + 1];
137 bd = &mb->e_mbd.block[loop_cnt + 1];
138 coeff_ptr = be->coeff;
139 dq_coeff_ptr = bd->dqcoeff;
140 coeff3 = LD_SH(coeff_ptr);
141 dq_coeff3 = LD_SH(dq_coeff_ptr);
142 coeff_ptr += 8;
143 dq_coeff_ptr += 8;
144 coeff4 = LD_SH(coeff_ptr);
145 dq_coeff4 = LD_SH(dq_coeff_ptr);
146
147 ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1);
148 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
149 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
150
151 ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1);
152 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
153 DPADD_SD2_SD(diff0, diff1, err0, err1);
154 err_dup0 = __msa_splati_d(err0, 1);
155 err_dup1 = __msa_splati_d(err1, 1);
156 ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
157 err += __msa_copy_s_d(err0, 0);
158 err += __msa_copy_s_d(err1, 0);
159
160 ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1);
161 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
162 DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1);
163 ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1);
164 HSUB_UH2_SW(coeff0, coeff1, diff0, diff1);
165 DPADD_SD2_SD(diff0, diff1, err0, err1);
166 err_dup0 = __msa_splati_d(err0, 1);
167 err_dup1 = __msa_splati_d(err1, 1);
168 ADD2(err0, err_dup0, err1, err_dup1, err0, err1);
169 err += __msa_copy_s_d(err0, 0);
170 err += __msa_copy_s_d(err1, 0);
171 }
172
173 return err;
174 }
175