1 /*
2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vp8_rtcd.h"
12 #include "vp8/common/mips/msa/vp8_macros_msa.h"
13 #include "vp8/encoder/block.h"
14
fast_quantize_b_msa(int16_t * coeff_ptr,int16_t * zbin,int16_t * round,int16_t * quant,int16_t * de_quant,int16_t * q_coeff,int16_t * dq_coeff)15 static int8_t fast_quantize_b_msa(int16_t *coeff_ptr, int16_t *zbin,
16 int16_t *round, int16_t *quant,
17 int16_t *de_quant, int16_t *q_coeff,
18 int16_t *dq_coeff)
19 {
20 int32_t cnt, eob;
21 v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12,
22 3, 8, 11, 13, 9, 10, 14, 15 };
23 v8i16 round0, round1;
24 v8i16 sign_z0, sign_z1;
25 v8i16 q_coeff0, q_coeff1;
26 v8i16 x0, x1, de_quant0, de_quant1;
27 v8i16 coeff0, coeff1, z0, z1;
28 v8i16 quant0, quant1, quant2, quant3;
29 v8i16 zero = { 0 };
30 v8i16 inv_zig_zag0, inv_zig_zag1;
31 v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
32 v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
33 v8i16 temp0_h, temp1_h, temp2_h, temp3_h;
34 v4i32 temp0_w, temp1_w, temp2_w, temp3_w;
35
36 ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1);
37 eob = -1;
38 LD_SH2(coeff_ptr, 8, coeff0, coeff1);
39 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
40 z0, z1);
41 LD_SH2(round, 8, coeff0, coeff1);
42 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
43 round0, round1);
44 LD_SH2(quant, 8, coeff0, coeff1);
45 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
46 quant0, quant2);
47 sign_z0 = z0 >> 15;
48 sign_z1 = z1 >> 15;
49 x0 = __msa_add_a_h(z0, zero);
50 x1 = __msa_add_a_h(z1, zero);
51 ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
52 ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
53 ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h);
54 ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h);
55 DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
56 quant3, temp0_w, temp1_w, temp2_w, temp3_w);
57 SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
58 PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1);
59 x0 = x0 ^ sign_z0;
60 x1 = x1 ^ sign_z1;
61 SUB2(x0, sign_z0, x1, sign_z1, x0, x1);
62 VSHF_H2_SH(x0, x1, x0, x1, inv_zig_zag0, inv_zig_zag1, q_coeff0, q_coeff1);
63 ST_SH2(q_coeff0, q_coeff1, q_coeff, 8);
64 LD_SH2(de_quant, 8, de_quant0, de_quant1);
65 q_coeff0 *= de_quant0;
66 q_coeff1 *= de_quant1;
67 ST_SH2(q_coeff0, q_coeff1, dq_coeff, 8);
68
69 for (cnt = 0; cnt < 16; ++cnt)
70 {
71 if ((cnt <= 7) && (x1[7 - cnt] != 0))
72 {
73 eob = (15 - cnt);
74 break;
75 }
76
77 if ((cnt > 7) && (x0[7 - (cnt - 8)] != 0))
78 {
79 eob = (7 - (cnt - 8));
80 break;
81 }
82 }
83
84 return (int8_t)(eob + 1);
85 }
86
exact_regular_quantize_b_msa(int16_t * zbin_boost,int16_t * coeff_ptr,int16_t * zbin,int16_t * round,int16_t * quant,int16_t * quant_shift,int16_t * de_quant,int16_t zbin_oq_in,int16_t * q_coeff,int16_t * dq_coeff)87 static int8_t exact_regular_quantize_b_msa(int16_t *zbin_boost,
88 int16_t *coeff_ptr,
89 int16_t *zbin,
90 int16_t *round,
91 int16_t *quant,
92 int16_t *quant_shift,
93 int16_t *de_quant,
94 int16_t zbin_oq_in,
95 int16_t *q_coeff,
96 int16_t *dq_coeff)
97 {
98 int32_t cnt, eob;
99 int16_t *boost_temp = zbin_boost;
100 v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12,
101 3, 8, 11, 13, 9, 10, 14, 15 };
102 v8i16 round0, round1;
103 v8i16 sign_z0, sign_z1;
104 v8i16 q_coeff0, q_coeff1;
105 v8i16 z_bin0, z_bin1, zbin_o_q;
106 v8i16 x0, x1, sign_x0, sign_x1, de_quant0, de_quant1;
107 v8i16 coeff0, coeff1, z0, z1;
108 v8i16 quant0, quant1, quant2, quant3;
109 v8i16 zero = { 0 };
110 v8i16 inv_zig_zag0, inv_zig_zag1;
111 v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
112 v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
113 v8i16 temp0_h, temp1_h, temp2_h, temp3_h;
114 v4i32 temp0_w, temp1_w, temp2_w, temp3_w;
115
116 ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1);
117 zbin_o_q = __msa_fill_h(zbin_oq_in);
118 eob = -1;
119 LD_SH2(coeff_ptr, 8, coeff0, coeff1);
120 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
121 z0, z1);
122 LD_SH2(round, 8, coeff0, coeff1);
123 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
124 round0, round1);
125 LD_SH2(quant, 8, coeff0, coeff1);
126 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
127 quant0, quant2);
128 LD_SH2(zbin, 8, coeff0, coeff1);
129 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
130 z_bin0, z_bin1);
131 sign_z0 = z0 >> 15;
132 sign_z1 = z1 >> 15;
133 x0 = __msa_add_a_h(z0, zero);
134 x1 = __msa_add_a_h(z1, zero);
135 SUB2(x0, z_bin0, x1, z_bin1, z_bin0, z_bin1);
136 SUB2(z_bin0, zbin_o_q, z_bin1, zbin_o_q, z_bin0, z_bin1);
137 ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
138 ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
139 ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h);
140 ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h);
141 DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
142 quant3, temp0_w, temp1_w, temp2_w, temp3_w);
143 SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
144 PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, temp0_h, temp2_h);
145 LD_SH2(quant_shift, 8, coeff0, coeff1);
146 VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
147 quant0, quant2);
148 ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
149 ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
150 ADD2(x0, round0, x1, round1, x0, x1);
151 ILVL_H2_SH(temp0_h, x0, temp2_h, x1, temp1_h, temp3_h);
152 ILVR_H2_SH(temp0_h, x0, temp2_h, x1, temp0_h, temp2_h);
153 DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
154 quant3, temp0_w, temp1_w, temp2_w, temp3_w);
155 SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
156 PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1);
157 sign_x0 = x0 ^ sign_z0;
158 sign_x1 = x1 ^ sign_z1;
159 SUB2(sign_x0, sign_z0, sign_x1, sign_z1, sign_x0, sign_x1);
160 for (cnt = 0; cnt < 16; ++cnt)
161 {
162 if (cnt <= 7)
163 {
164 if (boost_temp[0] <= z_bin0[cnt])
165 {
166 if (x0[cnt])
167 {
168 eob = cnt;
169 boost_temp = zbin_boost;
170 }
171 else
172 {
173 boost_temp++;
174 }
175 }
176 else
177 {
178 sign_x0[cnt] = 0;
179 boost_temp++;
180 }
181 }
182 else
183 {
184 if (boost_temp[0] <= z_bin1[cnt - 8])
185 {
186 if (x1[cnt - 8])
187 {
188 eob = cnt;
189 boost_temp = zbin_boost;
190 }
191 else
192 {
193 boost_temp++;
194 }
195 }
196 else
197 {
198 sign_x1[cnt - 8] = 0;
199 boost_temp++;
200 }
201 }
202 }
203
204 VSHF_H2_SH(sign_x0, sign_x1, sign_x0, sign_x1, inv_zig_zag0, inv_zig_zag1,
205 q_coeff0, q_coeff1);
206 ST_SH2(q_coeff0, q_coeff1, q_coeff, 8);
207 LD_SH2(de_quant, 8, de_quant0, de_quant1);
208 MUL2(de_quant0, q_coeff0, de_quant1, q_coeff1, de_quant0, de_quant1);
209 ST_SH2(de_quant0, de_quant1, dq_coeff, 8);
210
211 return (int8_t)(eob + 1);
212 }
213
vp8_fast_quantize_b_msa(BLOCK * b,BLOCKD * d)214 void vp8_fast_quantize_b_msa(BLOCK *b, BLOCKD *d)
215 {
216 int16_t *coeff_ptr = b->coeff;
217 int16_t *zbin_ptr = b->zbin;
218 int16_t *round_ptr = b->round;
219 int16_t *quant_ptr = b->quant_fast;
220 int16_t *qcoeff_ptr = d->qcoeff;
221 int16_t *dqcoeff_ptr = d->dqcoeff;
222 int16_t *dequant_ptr = d->dequant;
223
224 *d->eob = fast_quantize_b_msa(coeff_ptr, zbin_ptr, round_ptr, quant_ptr,
225 dequant_ptr, qcoeff_ptr, dqcoeff_ptr);
226 }
227
vp8_regular_quantize_b_msa(BLOCK * b,BLOCKD * d)228 void vp8_regular_quantize_b_msa(BLOCK *b, BLOCKD *d)
229 {
230 int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
231 int16_t *coeff_ptr = b->coeff;
232 int16_t *zbin_ptr = b->zbin;
233 int16_t *round_ptr = b->round;
234 int16_t *quant_ptr = b->quant;
235 int16_t *quant_shift_ptr = b->quant_shift;
236 int16_t *qcoeff_ptr = d->qcoeff;
237 int16_t *dqcoeff_ptr = d->dqcoeff;
238 int16_t *dequant_ptr = d->dequant;
239 int16_t zbin_oq_value = b->zbin_extra;
240
241 *d->eob = exact_regular_quantize_b_msa(zbin_boost_ptr, coeff_ptr,
242 zbin_ptr, round_ptr,
243 quant_ptr, quant_shift_ptr,
244 dequant_ptr, zbin_oq_value,
245 qcoeff_ptr, dqcoeff_ptr);
246 }
247