1 /*
2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_mem/vpx_mem.h"
12 #include "vpx_ports/asmdefs_mmi.h"
13 #include "vp8/encoder/onyx_int.h"
14 #include "vp8/encoder/quantize.h"
15 #include "vp8/common/quant_common.h"
16
17 #define REGULAR_SELECT_EOB(i, rc) \
18 z = coeff_ptr[rc]; \
19 sz = (z >> 31); \
20 x = (z ^ sz) - sz; \
21 zbin = zbin_ptr[rc] + *(zbin_boost_ptr++) + zbin_oq_value; \
22 if (x >= zbin) { \
23 x += round_ptr[rc]; \
24 y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; \
25 if (y) { \
26 x = (y ^ sz) - sz; \
27 qcoeff_ptr[rc] = x; \
28 dqcoeff_ptr[rc] = x * dequant_ptr[rc]; \
29 eob = i; \
30 zbin_boost_ptr = b->zrun_zbin_boost; \
31 } \
32 }
33
vp8_fast_quantize_b_mmi(BLOCK * b,BLOCKD * d)34 void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
35 const int16_t *coeff_ptr = b->coeff;
36 const int16_t *round_ptr = b->round;
37 const int16_t *quant_ptr = b->quant_fast;
38 int16_t *qcoeff_ptr = d->qcoeff;
39 int16_t *dqcoeff_ptr = d->dqcoeff;
40 const int16_t *dequant_ptr = d->dequant;
41 const int16_t *inv_zig_zag = vp8_default_inv_zig_zag;
42
43 double ftmp[13];
44 uint64_t tmp[1];
45 DECLARE_ALIGNED(8, const uint64_t, ones) = { 0xffffffffffffffffULL };
46 int eob = 0;
47
48 __asm__ volatile(
49 // loop 0 ~ 7
50 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
51 "gsldlc1 %[ftmp1], 0x07(%[coeff_ptr]) \n\t"
52 "gsldrc1 %[ftmp1], 0x00(%[coeff_ptr]) \n\t"
53 "li %[tmp0], 0x0f \n\t"
54 "mtc1 %[tmp0], %[ftmp9] \n\t"
55 "gsldlc1 %[ftmp2], 0x0f(%[coeff_ptr]) \n\t"
56 "gsldrc1 %[ftmp2], 0x08(%[coeff_ptr]) \n\t"
57
58 "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t"
59 "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
60 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
61 "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t"
62 "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t"
63 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
64
65 "gsldlc1 %[ftmp5], 0x07(%[round_ptr]) \n\t"
66 "gsldrc1 %[ftmp5], 0x00(%[round_ptr]) \n\t"
67 "gsldlc1 %[ftmp6], 0x0f(%[round_ptr]) \n\t"
68 "gsldrc1 %[ftmp6], 0x08(%[round_ptr]) \n\t"
69 "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
70 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
71 "gsldlc1 %[ftmp7], 0x07(%[quant_ptr]) \n\t"
72 "gsldrc1 %[ftmp7], 0x00(%[quant_ptr]) \n\t"
73 "gsldlc1 %[ftmp8], 0x0f(%[quant_ptr]) \n\t"
74 "gsldrc1 %[ftmp8], 0x08(%[quant_ptr]) \n\t"
75 "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
76 "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
77
78 "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t"
79 "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t"
80 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
81 "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
82 "gssdlc1 %[ftmp7], 0x07(%[qcoeff_ptr]) \n\t"
83 "gssdrc1 %[ftmp7], 0x00(%[qcoeff_ptr]) \n\t"
84 "gssdlc1 %[ftmp8], 0x0f(%[qcoeff_ptr]) \n\t"
85 "gssdrc1 %[ftmp8], 0x08(%[qcoeff_ptr]) \n\t"
86
87 "gsldlc1 %[ftmp1], 0x07(%[inv_zig_zag]) \n\t"
88 "gsldrc1 %[ftmp1], 0x00(%[inv_zig_zag]) \n\t"
89 "gsldlc1 %[ftmp2], 0x0f(%[inv_zig_zag]) \n\t"
90 "gsldrc1 %[ftmp2], 0x08(%[inv_zig_zag]) \n\t"
91 "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
92 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
93 "xor %[ftmp5], %[ftmp5], %[ones] \n\t"
94 "xor %[ftmp6], %[ftmp6], %[ones] \n\t"
95 "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
96 "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
97 "pmaxsh %[ftmp10], %[ftmp5], %[ftmp6] \n\t"
98
99 "gsldlc1 %[ftmp5], 0x07(%[dequant_ptr]) \n\t"
100 "gsldrc1 %[ftmp5], 0x00(%[dequant_ptr]) \n\t"
101 "gsldlc1 %[ftmp6], 0x0f(%[dequant_ptr]) \n\t"
102 "gsldrc1 %[ftmp6], 0x08(%[dequant_ptr]) \n\t"
103 "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
104 "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
105 "gssdlc1 %[ftmp5], 0x07(%[dqcoeff_ptr]) \n\t"
106 "gssdrc1 %[ftmp5], 0x00(%[dqcoeff_ptr]) \n\t"
107 "gssdlc1 %[ftmp6], 0x0f(%[dqcoeff_ptr]) \n\t"
108 "gssdrc1 %[ftmp6], 0x08(%[dqcoeff_ptr]) \n\t"
109
110 // loop 8 ~ 15
111 "gsldlc1 %[ftmp1], 0x17(%[coeff_ptr]) \n\t"
112 "gsldrc1 %[ftmp1], 0x10(%[coeff_ptr]) \n\t"
113 "gsldlc1 %[ftmp2], 0x1f(%[coeff_ptr]) \n\t"
114 "gsldrc1 %[ftmp2], 0x18(%[coeff_ptr]) \n\t"
115
116 "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t"
117 "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t"
118 "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
119 "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t"
120 "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t"
121 "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
122
123 "gsldlc1 %[ftmp5], 0x17(%[round_ptr]) \n\t"
124 "gsldrc1 %[ftmp5], 0x10(%[round_ptr]) \n\t"
125 "gsldlc1 %[ftmp6], 0x1f(%[round_ptr]) \n\t"
126 "gsldrc1 %[ftmp6], 0x18(%[round_ptr]) \n\t"
127 "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
128 "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
129 "gsldlc1 %[ftmp7], 0x17(%[quant_ptr]) \n\t"
130 "gsldrc1 %[ftmp7], 0x10(%[quant_ptr]) \n\t"
131 "gsldlc1 %[ftmp8], 0x1f(%[quant_ptr]) \n\t"
132 "gsldrc1 %[ftmp8], 0x18(%[quant_ptr]) \n\t"
133 "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
134 "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
135
136 "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t"
137 "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t"
138 "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
139 "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
140 "gssdlc1 %[ftmp7], 0x17(%[qcoeff_ptr]) \n\t"
141 "gssdrc1 %[ftmp7], 0x10(%[qcoeff_ptr]) \n\t"
142 "gssdlc1 %[ftmp8], 0x1f(%[qcoeff_ptr]) \n\t"
143 "gssdrc1 %[ftmp8], 0x18(%[qcoeff_ptr]) \n\t"
144
145 "gsldlc1 %[ftmp1], 0x17(%[inv_zig_zag]) \n\t"
146 "gsldrc1 %[ftmp1], 0x10(%[inv_zig_zag]) \n\t"
147 "gsldlc1 %[ftmp2], 0x1f(%[inv_zig_zag]) \n\t"
148 "gsldrc1 %[ftmp2], 0x18(%[inv_zig_zag]) \n\t"
149 "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
150 "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
151 "xor %[ftmp5], %[ftmp5], %[ones] \n\t"
152 "xor %[ftmp6], %[ftmp6], %[ones] \n\t"
153 "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
154 "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
155 "pmaxsh %[ftmp11], %[ftmp5], %[ftmp6] \n\t"
156
157 "gsldlc1 %[ftmp5], 0x17(%[dequant_ptr]) \n\t"
158 "gsldrc1 %[ftmp5], 0x10(%[dequant_ptr]) \n\t"
159 "gsldlc1 %[ftmp6], 0x1f(%[dequant_ptr]) \n\t"
160 "gsldrc1 %[ftmp6], 0x18(%[dequant_ptr]) \n\t"
161 "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
162 "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
163 "gssdlc1 %[ftmp5], 0x17(%[dqcoeff_ptr]) \n\t"
164 "gssdrc1 %[ftmp5], 0x10(%[dqcoeff_ptr]) \n\t"
165 "gssdlc1 %[ftmp6], 0x1f(%[dqcoeff_ptr]) \n\t"
166 "gssdrc1 %[ftmp6], 0x18(%[dqcoeff_ptr]) \n\t"
167
168 "li %[tmp0], 0x10 \n\t"
169 "mtc1 %[tmp0], %[ftmp9] \n\t"
170
171 "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
172 "psrlw %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
173 "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
174 "li %[tmp0], 0xaa \n\t"
175 "mtc1 %[tmp0], %[ftmp9] \n\t"
176 "pshufh %[ftmp11], %[ftmp10], %[ftmp9] \n\t"
177 "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t"
178 "li %[tmp0], 0xffff \n\t"
179 "mtc1 %[tmp0], %[ftmp9] \n\t"
180 "and %[ftmp10], %[ftmp10], %[ftmp9] \n\t"
181 "gssdlc1 %[ftmp10], 0x07(%[eob]) \n\t"
182 "gssdrc1 %[ftmp10], 0x00(%[eob]) \n\t"
183 : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]),
184 [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]),
185 [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]),
186 [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]),
187 [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0])
188 : [coeff_ptr] "r"((mips_reg)coeff_ptr),
189 [qcoeff_ptr] "r"((mips_reg)qcoeff_ptr),
190 [dequant_ptr] "r"((mips_reg)dequant_ptr),
191 [round_ptr] "r"((mips_reg)round_ptr),
192 [quant_ptr] "r"((mips_reg)quant_ptr),
193 [dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr),
194 [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob),
195 [ones] "f"(ones)
196 : "memory");
197
198 *d->eob = eob;
199 }
200
vp8_regular_quantize_b_mmi(BLOCK * b,BLOCKD * d)201 void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) {
202 int eob = 0;
203 int x, y, z, sz, zbin;
204 const int16_t *zbin_boost_ptr = b->zrun_zbin_boost;
205 const int16_t *coeff_ptr = b->coeff;
206 const int16_t *zbin_ptr = b->zbin;
207 const int16_t *round_ptr = b->round;
208 const int16_t *quant_ptr = b->quant;
209 const int16_t *quant_shift_ptr = b->quant_shift;
210 int16_t *qcoeff_ptr = d->qcoeff;
211 int16_t *dqcoeff_ptr = d->dqcoeff;
212 const int16_t *dequant_ptr = d->dequant;
213 const int16_t zbin_oq_value = b->zbin_extra;
214 register double ftmp0 asm("$f0");
215
216 // memset(qcoeff_ptr, 0, 32);
217 // memset(dqcoeff_ptr, 0, 32);
218 /* clang-format off */
219 __asm__ volatile (
220 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
221 "gssdlc1 %[ftmp0], 0x07(%[qcoeff_ptr]) \n\t"
222 "gssdrc1 %[ftmp0], 0x00(%[qcoeff_ptr]) \n\t"
223 "gssdlc1 %[ftmp0], 0x0f(%[qcoeff_ptr]) \n\t"
224 "gssdrc1 %[ftmp0], 0x08(%[qcoeff_ptr]) \n\t"
225 "gssdlc1 %[ftmp0], 0x17(%[qcoeff_ptr]) \n\t"
226 "gssdrc1 %[ftmp0], 0x10(%[qcoeff_ptr]) \n\t"
227 "gssdlc1 %[ftmp0], 0x1f(%[qcoeff_ptr]) \n\t"
228 "gssdrc1 %[ftmp0], 0x18(%[qcoeff_ptr]) \n\t"
229
230 "gssdlc1 %[ftmp0], 0x07(%[dqcoeff_ptr]) \n\t"
231 "gssdrc1 %[ftmp0], 0x00(%[dqcoeff_ptr]) \n\t"
232 "gssdlc1 %[ftmp0], 0x0f(%[dqcoeff_ptr]) \n\t"
233 "gssdrc1 %[ftmp0], 0x08(%[dqcoeff_ptr]) \n\t"
234 "gssdlc1 %[ftmp0], 0x17(%[dqcoeff_ptr]) \n\t"
235 "gssdrc1 %[ftmp0], 0x10(%[dqcoeff_ptr]) \n\t"
236 "gssdlc1 %[ftmp0], 0x1f(%[dqcoeff_ptr]) \n\t"
237 "gssdrc1 %[ftmp0], 0x18(%[dqcoeff_ptr]) \n\t"
238 : [ftmp0]"=&f"(ftmp0)
239 : [qcoeff_ptr]"r"(qcoeff_ptr), [dqcoeff_ptr]"r"(dqcoeff_ptr)
240 : "memory"
241 );
242 /* clang-format on */
243
244 REGULAR_SELECT_EOB(1, 0);
245 REGULAR_SELECT_EOB(2, 1);
246 REGULAR_SELECT_EOB(3, 4);
247 REGULAR_SELECT_EOB(4, 8);
248 REGULAR_SELECT_EOB(5, 5);
249 REGULAR_SELECT_EOB(6, 2);
250 REGULAR_SELECT_EOB(7, 3);
251 REGULAR_SELECT_EOB(8, 6);
252 REGULAR_SELECT_EOB(9, 9);
253 REGULAR_SELECT_EOB(10, 12);
254 REGULAR_SELECT_EOB(11, 13);
255 REGULAR_SELECT_EOB(12, 10);
256 REGULAR_SELECT_EOB(13, 7);
257 REGULAR_SELECT_EOB(14, 11);
258 REGULAR_SELECT_EOB(15, 14);
259 REGULAR_SELECT_EOB(16, 15);
260
261 *d->eob = (char)eob;
262 }
263