• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 #include "vp8/common/mips/msa/vp8_macros_msa.h"
13 #include "vp8/encoder/block.h"
14 
fast_quantize_b_msa(int16_t * coeff_ptr,int16_t * zbin,int16_t * round,int16_t * quant,int16_t * de_quant,int16_t * q_coeff,int16_t * dq_coeff)15 static int8_t fast_quantize_b_msa(int16_t *coeff_ptr, int16_t *zbin,
16                                   int16_t *round, int16_t *quant,
17                                   int16_t *de_quant, int16_t *q_coeff,
18                                   int16_t *dq_coeff)
19 {
20     int32_t cnt, eob;
21     v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12,
22                           3, 8, 11, 13, 9, 10, 14, 15 };
23     v8i16 round0, round1;
24     v8i16 sign_z0, sign_z1;
25     v8i16 q_coeff0, q_coeff1;
26     v8i16 x0, x1, de_quant0, de_quant1;
27     v8i16 coeff0, coeff1, z0, z1;
28     v8i16 quant0, quant1, quant2, quant3;
29     v8i16 zero = { 0 };
30     v8i16 inv_zig_zag0, inv_zig_zag1;
31     v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
32     v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
33     v8i16 temp0_h, temp1_h, temp2_h, temp3_h;
34     v4i32 temp0_w, temp1_w, temp2_w, temp3_w;
35 
36     ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1);
37     eob = -1;
38     LD_SH2(coeff_ptr, 8, coeff0, coeff1);
39     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
40                z0, z1);
41     LD_SH2(round, 8, coeff0, coeff1);
42     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
43                round0, round1);
44     LD_SH2(quant, 8, coeff0, coeff1);
45     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
46                quant0, quant2);
47     sign_z0 = z0 >> 15;
48     sign_z1 = z1 >> 15;
49     x0 = __msa_add_a_h(z0, zero);
50     x1 = __msa_add_a_h(z1, zero);
51     ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
52     ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
53     ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h);
54     ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h);
55     DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
56                 quant3, temp0_w, temp1_w, temp2_w, temp3_w);
57     SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
58     PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1);
59     x0 = x0 ^ sign_z0;
60     x1 = x1 ^ sign_z1;
61     SUB2(x0, sign_z0, x1, sign_z1, x0, x1);
62     VSHF_H2_SH(x0, x1, x0, x1, inv_zig_zag0, inv_zig_zag1, q_coeff0, q_coeff1);
63     ST_SH2(q_coeff0, q_coeff1, q_coeff, 8);
64     LD_SH2(de_quant, 8, de_quant0, de_quant1);
65     q_coeff0 *= de_quant0;
66     q_coeff1 *= de_quant1;
67     ST_SH2(q_coeff0, q_coeff1, dq_coeff, 8);
68 
69     for (cnt = 0; cnt < 16; ++cnt)
70     {
71         if ((cnt <= 7) && (x1[7 - cnt] != 0))
72         {
73             eob = (15 - cnt);
74             break;
75         }
76 
77         if ((cnt > 7) && (x0[7 - (cnt - 8)] != 0))
78         {
79             eob = (7 - (cnt - 8));
80             break;
81         }
82     }
83 
84     return (int8_t)(eob + 1);
85 }
86 
exact_regular_quantize_b_msa(int16_t * zbin_boost,int16_t * coeff_ptr,int16_t * zbin,int16_t * round,int16_t * quant,int16_t * quant_shift,int16_t * de_quant,int16_t zbin_oq_in,int16_t * q_coeff,int16_t * dq_coeff)87 static int8_t exact_regular_quantize_b_msa(int16_t *zbin_boost,
88                                            int16_t *coeff_ptr,
89                                            int16_t *zbin,
90                                            int16_t *round,
91                                            int16_t *quant,
92                                            int16_t *quant_shift,
93                                            int16_t *de_quant,
94                                            int16_t zbin_oq_in,
95                                            int16_t *q_coeff,
96                                            int16_t *dq_coeff)
97 {
98     int32_t cnt, eob;
99     int16_t *boost_temp = zbin_boost;
100     v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12,
101                           3, 8, 11, 13, 9, 10, 14, 15 };
102     v8i16 round0, round1;
103     v8i16 sign_z0, sign_z1;
104     v8i16 q_coeff0, q_coeff1;
105     v8i16 z_bin0, z_bin1, zbin_o_q;
106     v8i16 x0, x1, sign_x0, sign_x1, de_quant0, de_quant1;
107     v8i16 coeff0, coeff1, z0, z1;
108     v8i16 quant0, quant1, quant2, quant3;
109     v8i16 zero = { 0 };
110     v8i16 inv_zig_zag0, inv_zig_zag1;
111     v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 };
112     v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 };
113     v8i16 temp0_h, temp1_h, temp2_h, temp3_h;
114     v4i32 temp0_w, temp1_w, temp2_w, temp3_w;
115 
116     ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1);
117     zbin_o_q = __msa_fill_h(zbin_oq_in);
118     eob = -1;
119     LD_SH2(coeff_ptr, 8, coeff0, coeff1);
120     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
121                z0, z1);
122     LD_SH2(round, 8, coeff0, coeff1);
123     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
124                round0, round1);
125     LD_SH2(quant, 8, coeff0, coeff1);
126     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
127                quant0, quant2);
128     LD_SH2(zbin, 8, coeff0, coeff1);
129     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
130                z_bin0, z_bin1);
131     sign_z0 = z0 >> 15;
132     sign_z1 = z1 >> 15;
133     x0 = __msa_add_a_h(z0, zero);
134     x1 = __msa_add_a_h(z1, zero);
135     SUB2(x0, z_bin0, x1, z_bin1, z_bin0, z_bin1);
136     SUB2(z_bin0, zbin_o_q, z_bin1, zbin_o_q, z_bin0, z_bin1);
137     ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
138     ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
139     ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h);
140     ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h);
141     DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
142                 quant3, temp0_w, temp1_w, temp2_w, temp3_w);
143     SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
144     PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, temp0_h, temp2_h);
145     LD_SH2(quant_shift, 8, coeff0, coeff1);
146     VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1,
147                quant0, quant2);
148     ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3);
149     ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2);
150     ADD2(x0, round0, x1, round1, x0, x1);
151     ILVL_H2_SH(temp0_h, x0, temp2_h, x1, temp1_h, temp3_h);
152     ILVR_H2_SH(temp0_h, x0, temp2_h, x1, temp0_h, temp2_h);
153     DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2,
154                 quant3, temp0_w, temp1_w, temp2_w, temp3_w);
155     SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16);
156     PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1);
157     sign_x0 = x0 ^ sign_z0;
158     sign_x1 = x1 ^ sign_z1;
159     SUB2(sign_x0, sign_z0, sign_x1, sign_z1, sign_x0, sign_x1);
160     for (cnt = 0; cnt < 16; ++cnt)
161     {
162         if (cnt <= 7)
163         {
164             if (boost_temp[0] <= z_bin0[cnt])
165             {
166                 if (x0[cnt])
167                 {
168                     eob = cnt;
169                     boost_temp = zbin_boost;
170                 }
171                 else
172                 {
173                     boost_temp++;
174                 }
175             }
176             else
177             {
178                 sign_x0[cnt] = 0;
179                 boost_temp++;
180             }
181         }
182         else
183         {
184             if (boost_temp[0] <= z_bin1[cnt - 8])
185             {
186                 if (x1[cnt - 8])
187                 {
188                     eob = cnt;
189                     boost_temp = zbin_boost;
190                 }
191                 else
192                 {
193                     boost_temp++;
194                 }
195             }
196             else
197             {
198                 sign_x1[cnt - 8] = 0;
199                 boost_temp++;
200             }
201         }
202     }
203 
204     VSHF_H2_SH(sign_x0, sign_x1, sign_x0, sign_x1, inv_zig_zag0, inv_zig_zag1,
205                q_coeff0, q_coeff1);
206     ST_SH2(q_coeff0, q_coeff1, q_coeff, 8);
207     LD_SH2(de_quant, 8, de_quant0, de_quant1);
208     MUL2(de_quant0, q_coeff0, de_quant1, q_coeff1, de_quant0, de_quant1);
209     ST_SH2(de_quant0, de_quant1, dq_coeff, 8);
210 
211     return (int8_t)(eob + 1);
212 }
213 
vp8_fast_quantize_b_msa(BLOCK * b,BLOCKD * d)214 void vp8_fast_quantize_b_msa(BLOCK *b, BLOCKD *d)
215 {
216     int16_t *coeff_ptr = b->coeff;
217     int16_t *zbin_ptr = b->zbin;
218     int16_t *round_ptr = b->round;
219     int16_t *quant_ptr = b->quant_fast;
220     int16_t *qcoeff_ptr = d->qcoeff;
221     int16_t *dqcoeff_ptr = d->dqcoeff;
222     int16_t *dequant_ptr = d->dequant;
223 
224     *d->eob = fast_quantize_b_msa(coeff_ptr, zbin_ptr, round_ptr, quant_ptr,
225                                   dequant_ptr, qcoeff_ptr, dqcoeff_ptr);
226 }
227 
vp8_regular_quantize_b_msa(BLOCK * b,BLOCKD * d)228 void vp8_regular_quantize_b_msa(BLOCK *b, BLOCKD *d)
229 {
230     int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
231     int16_t *coeff_ptr = b->coeff;
232     int16_t *zbin_ptr = b->zbin;
233     int16_t *round_ptr = b->round;
234     int16_t *quant_ptr = b->quant;
235     int16_t *quant_shift_ptr = b->quant_shift;
236     int16_t *qcoeff_ptr = d->qcoeff;
237     int16_t *dqcoeff_ptr = d->dqcoeff;
238     int16_t *dequant_ptr = d->dequant;
239     int16_t zbin_oq_value = b->zbin_extra;
240 
241     *d->eob = exact_regular_quantize_b_msa(zbin_boost_ptr, coeff_ptr,
242                                            zbin_ptr, round_ptr,
243                                            quant_ptr, quant_shift_ptr,
244                                            dequant_ptr, zbin_oq_value,
245                                            qcoeff_ptr, dqcoeff_ptr);
246 }
247