1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_mem/vpx_mem.h"
12 #include "vpx_ports/mem.h"
13
14 #include "vp9/common/vp9_blockd.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_entropy.h"
17 #if CONFIG_COEFFICIENT_RANGE_CHECKING
18 #include "vp9/common/vp9_idct.h"
19 #endif
20
21 #include "vp9/decoder/vp9_detokenize.h"
22
23 #define EOB_CONTEXT_NODE 0
24 #define ZERO_CONTEXT_NODE 1
25 #define ONE_CONTEXT_NODE 2
26
27 #define INCREMENT_COUNT(token) \
28 do { \
29 if (counts) ++coef_counts[band][ctx][token]; \
30 } while (0)
31
read_bool(vpx_reader * r,int prob,BD_VALUE * value,int * count,unsigned int * range)32 static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value,
33 int *count, unsigned int *range) {
34 const unsigned int split = (*range * prob + (256 - prob)) >> CHAR_BIT;
35 const BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT);
36
37 if (*count < 0) {
38 r->value = *value;
39 r->count = *count;
40 vpx_reader_fill(r);
41 *value = r->value;
42 *count = r->count;
43 }
44
45 if (*value >= bigsplit) {
46 *range = *range - split;
47 *value = *value - bigsplit;
48 {
49 const int shift = vpx_norm[*range];
50 *range <<= shift;
51 *value <<= shift;
52 *count -= shift;
53 }
54 return 1;
55 }
56 *range = split;
57 {
58 const int shift = vpx_norm[*range];
59 *range <<= shift;
60 *value <<= shift;
61 *count -= shift;
62 }
63 return 0;
64 }
65
read_coeff(vpx_reader * r,const vpx_prob * probs,int n,BD_VALUE * value,int * count,unsigned int * range)66 static INLINE int read_coeff(vpx_reader *r, const vpx_prob *probs, int n,
67 BD_VALUE *value, int *count, unsigned int *range) {
68 int i, val = 0;
69 for (i = 0; i < n; ++i)
70 val = (val << 1) | read_bool(r, probs[i], value, count, range);
71 return val;
72 }
73
decode_coefs(const MACROBLOCKD * xd,PLANE_TYPE type,tran_low_t * dqcoeff,TX_SIZE tx_size,const int16_t * dq,int ctx,const int16_t * scan,const int16_t * nb,vpx_reader * r)74 static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type,
75 tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
76 int ctx, const int16_t *scan, const int16_t *nb,
77 vpx_reader *r) {
78 FRAME_COUNTS *counts = xd->counts;
79 const int max_eob = 16 << (tx_size << 1);
80 const FRAME_CONTEXT *const fc = xd->fc;
81 const int ref = is_inter_block(xd->mi[0]);
82 int band, c = 0;
83 const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
84 fc->coef_probs[tx_size][type][ref];
85 const vpx_prob *prob;
86 unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
87 unsigned int(*eob_branch_count)[COEFF_CONTEXTS];
88 uint8_t token_cache[32 * 32];
89 const uint8_t *band_translate = get_band_translate(tx_size);
90 const int dq_shift = (tx_size == TX_32X32);
91 int v;
92 int16_t dqv = dq[0];
93 const uint8_t *const cat6_prob =
94 #if CONFIG_VP9_HIGHBITDEPTH
95 (xd->bd == VPX_BITS_12)
96 ? vp9_cat6_prob_high12
97 : (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 :
98 #endif // CONFIG_VP9_HIGHBITDEPTH
99 vp9_cat6_prob;
100 const int cat6_bits =
101 #if CONFIG_VP9_HIGHBITDEPTH
102 (xd->bd == VPX_BITS_12) ? 18
103 : (xd->bd == VPX_BITS_10) ? 16 :
104 #endif // CONFIG_VP9_HIGHBITDEPTH
105 14;
106 // Keep value, range, and count as locals. The compiler produces better
107 // results with the locals than using r directly.
108 BD_VALUE value = r->value;
109 unsigned int range = r->range;
110 int count = r->count;
111
112 if (counts) {
113 coef_counts = counts->coef[tx_size][type][ref];
114 eob_branch_count = counts->eob_branch[tx_size][type][ref];
115 }
116
117 while (c < max_eob) {
118 int val = -1;
119 band = *band_translate++;
120 prob = coef_probs[band][ctx];
121 if (counts) ++eob_branch_count[band][ctx];
122 if (!read_bool(r, prob[EOB_CONTEXT_NODE], &value, &count, &range)) {
123 INCREMENT_COUNT(EOB_MODEL_TOKEN);
124 break;
125 }
126
127 while (!read_bool(r, prob[ZERO_CONTEXT_NODE], &value, &count, &range)) {
128 INCREMENT_COUNT(ZERO_TOKEN);
129 dqv = dq[1];
130 token_cache[scan[c]] = 0;
131 ++c;
132 if (c >= max_eob) {
133 r->value = value;
134 r->range = range;
135 r->count = count;
136 return c; // zero tokens at the end (no eob token)
137 }
138 ctx = get_coef_context(nb, token_cache, c);
139 band = *band_translate++;
140 prob = coef_probs[band][ctx];
141 }
142
143 if (read_bool(r, prob[ONE_CONTEXT_NODE], &value, &count, &range)) {
144 const vpx_prob *p = vp9_pareto8_full[prob[PIVOT_NODE] - 1];
145 INCREMENT_COUNT(TWO_TOKEN);
146 if (read_bool(r, p[0], &value, &count, &range)) {
147 if (read_bool(r, p[3], &value, &count, &range)) {
148 token_cache[scan[c]] = 5;
149 if (read_bool(r, p[5], &value, &count, &range)) {
150 if (read_bool(r, p[7], &value, &count, &range)) {
151 val = CAT6_MIN_VAL +
152 read_coeff(r, cat6_prob, cat6_bits, &value, &count, &range);
153 } else {
154 val = CAT5_MIN_VAL +
155 read_coeff(r, vp9_cat5_prob, 5, &value, &count, &range);
156 }
157 } else if (read_bool(r, p[6], &value, &count, &range)) {
158 val = CAT4_MIN_VAL +
159 read_coeff(r, vp9_cat4_prob, 4, &value, &count, &range);
160 } else {
161 val = CAT3_MIN_VAL +
162 read_coeff(r, vp9_cat3_prob, 3, &value, &count, &range);
163 }
164 } else {
165 token_cache[scan[c]] = 4;
166 if (read_bool(r, p[4], &value, &count, &range)) {
167 val = CAT2_MIN_VAL +
168 read_coeff(r, vp9_cat2_prob, 2, &value, &count, &range);
169 } else {
170 val = CAT1_MIN_VAL +
171 read_coeff(r, vp9_cat1_prob, 1, &value, &count, &range);
172 }
173 }
174 #if CONFIG_VP9_HIGHBITDEPTH
175 // val may use 18-bits
176 v = (int)(((int64_t)val * dqv) >> dq_shift);
177 #else
178 v = (val * dqv) >> dq_shift;
179 #endif
180 } else {
181 if (read_bool(r, p[1], &value, &count, &range)) {
182 token_cache[scan[c]] = 3;
183 v = ((3 + read_bool(r, p[2], &value, &count, &range)) * dqv) >>
184 dq_shift;
185 } else {
186 token_cache[scan[c]] = 2;
187 v = (2 * dqv) >> dq_shift;
188 }
189 }
190 } else {
191 INCREMENT_COUNT(ONE_TOKEN);
192 token_cache[scan[c]] = 1;
193 v = dqv >> dq_shift;
194 }
195 #if CONFIG_COEFFICIENT_RANGE_CHECKING
196 #if CONFIG_VP9_HIGHBITDEPTH
197 dqcoeff[scan[c]] = highbd_check_range(
198 read_bool(r, 128, &value, &count, &range) ? -v : v, xd->bd);
199 #else
200 dqcoeff[scan[c]] =
201 check_range(read_bool(r, 128, &value, &count, &range) ? -v : v);
202 #endif // CONFIG_VP9_HIGHBITDEPTH
203 #else
204 if (read_bool(r, 128, &value, &count, &range)) {
205 dqcoeff[scan[c]] = -v;
206 } else {
207 dqcoeff[scan[c]] = v;
208 }
209 #endif // CONFIG_COEFFICIENT_RANGE_CHECKING
210 ++c;
211 ctx = get_coef_context(nb, token_cache, c);
212 dqv = dq[1];
213 }
214
215 r->value = value;
216 r->range = range;
217 r->count = count;
218 return c;
219 }
220
get_ctx_shift(MACROBLOCKD * xd,int * ctx_shift_a,int * ctx_shift_l,int x,int y,unsigned int tx_size_in_blocks)221 static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l,
222 int x, int y, unsigned int tx_size_in_blocks) {
223 if (xd->max_blocks_wide) {
224 if (tx_size_in_blocks + x > xd->max_blocks_wide)
225 *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8;
226 }
227 if (xd->max_blocks_high) {
228 if (tx_size_in_blocks + y > xd->max_blocks_high)
229 *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8;
230 }
231 }
232
vp9_decode_block_tokens(TileWorkerData * twd,int plane,const scan_order * sc,int x,int y,TX_SIZE tx_size,int seg_id)233 int vp9_decode_block_tokens(TileWorkerData *twd, int plane,
234 const scan_order *sc, int x, int y, TX_SIZE tx_size,
235 int seg_id) {
236 vpx_reader *r = &twd->bit_reader;
237 MACROBLOCKD *xd = &twd->xd;
238 struct macroblockd_plane *const pd = &xd->plane[plane];
239 const int16_t *const dequant = pd->seg_dequant[seg_id];
240 int eob;
241 ENTROPY_CONTEXT *a = pd->above_context + x;
242 ENTROPY_CONTEXT *l = pd->left_context + y;
243 int ctx;
244 int ctx_shift_a = 0;
245 int ctx_shift_l = 0;
246
247 switch (tx_size) {
248 case TX_4X4:
249 ctx = a[0] != 0;
250 ctx += l[0] != 0;
251 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
252 dequant, ctx, sc->scan, sc->neighbors, r);
253 a[0] = l[0] = (eob > 0);
254 break;
255 case TX_8X8:
256 get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8);
257 ctx = !!*(const uint16_t *)a;
258 ctx += !!*(const uint16_t *)l;
259 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
260 dequant, ctx, sc->scan, sc->neighbors, r);
261 *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a;
262 *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l;
263 break;
264 case TX_16X16:
265 get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16);
266 ctx = !!*(const uint32_t *)a;
267 ctx += !!*(const uint32_t *)l;
268 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
269 dequant, ctx, sc->scan, sc->neighbors, r);
270 *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a;
271 *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l;
272 break;
273 case TX_32X32:
274 get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32);
275 // NOTE: casting to uint64_t here is safe because the default memory
276 // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte
277 // boundaries.
278 ctx = !!*(const uint64_t *)a;
279 ctx += !!*(const uint64_t *)l;
280 eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size,
281 dequant, ctx, sc->scan, sc->neighbors, r);
282 *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a;
283 *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l;
284 break;
285 default:
286 assert(0 && "Invalid transform size.");
287 eob = 0;
288 break;
289 }
290
291 return eob;
292 }
293