1 /*
2 * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <emmintrin.h> // SSE2
14
15 #include "aom/aom_integer.h"
16 #include "aom_dsp/x86/mem_sse2.h"
17 #include "av1/common/onyxc_int.h"
18 #include "av1/common/txb_common.h"
19
load_levels_4x4x5_sse2(const uint8_t * const src,const int stride,const ptrdiff_t * const offsets,__m128i * const level)20 static INLINE void load_levels_4x4x5_sse2(const uint8_t *const src,
21 const int stride,
22 const ptrdiff_t *const offsets,
23 __m128i *const level) {
24 level[0] = load_8bit_4x4_to_1_reg_sse2(src + 1, stride);
25 level[1] = load_8bit_4x4_to_1_reg_sse2(src + stride, stride);
26 level[2] = load_8bit_4x4_to_1_reg_sse2(src + offsets[0], stride);
27 level[3] = load_8bit_4x4_to_1_reg_sse2(src + offsets[1], stride);
28 level[4] = load_8bit_4x4_to_1_reg_sse2(src + offsets[2], stride);
29 }
30
load_levels_8x2x5_sse2(const uint8_t * const src,const int stride,const ptrdiff_t * const offsets,__m128i * const level)31 static INLINE void load_levels_8x2x5_sse2(const uint8_t *const src,
32 const int stride,
33 const ptrdiff_t *const offsets,
34 __m128i *const level) {
35 level[0] = load_8bit_8x2_to_1_reg_sse2(src + 1, stride);
36 level[1] = load_8bit_8x2_to_1_reg_sse2(src + stride, stride);
37 level[2] = load_8bit_8x2_to_1_reg_sse2(src + offsets[0], stride);
38 level[3] = load_8bit_8x2_to_1_reg_sse2(src + offsets[1], stride);
39 level[4] = load_8bit_8x2_to_1_reg_sse2(src + offsets[2], stride);
40 }
41
load_levels_16x1x5_sse2(const uint8_t * const src,const int stride,const ptrdiff_t * const offsets,__m128i * const level)42 static INLINE void load_levels_16x1x5_sse2(const uint8_t *const src,
43 const int stride,
44 const ptrdiff_t *const offsets,
45 __m128i *const level) {
46 level[0] = _mm_loadu_si128((__m128i *)(src + 1));
47 level[1] = _mm_loadu_si128((__m128i *)(src + stride));
48 level[2] = _mm_loadu_si128((__m128i *)(src + offsets[0]));
49 level[3] = _mm_loadu_si128((__m128i *)(src + offsets[1]));
50 level[4] = _mm_loadu_si128((__m128i *)(src + offsets[2]));
51 }
52
get_coeff_contexts_kernel_sse2(__m128i * const level)53 static INLINE __m128i get_coeff_contexts_kernel_sse2(__m128i *const level) {
54 const __m128i const_3 = _mm_set1_epi8(3);
55 const __m128i const_4 = _mm_set1_epi8(4);
56 __m128i count;
57
58 count = _mm_min_epu8(level[0], const_3);
59 level[1] = _mm_min_epu8(level[1], const_3);
60 level[2] = _mm_min_epu8(level[2], const_3);
61 level[3] = _mm_min_epu8(level[3], const_3);
62 level[4] = _mm_min_epu8(level[4], const_3);
63 count = _mm_add_epi8(count, level[1]);
64 count = _mm_add_epi8(count, level[2]);
65 count = _mm_add_epi8(count, level[3]);
66 count = _mm_add_epi8(count, level[4]);
67 count = _mm_avg_epu8(count, _mm_setzero_si128());
68 count = _mm_min_epu8(count, const_4);
69 return count;
70 }
71
get_4_nz_map_contexts_2d(const uint8_t * levels,const int height,const ptrdiff_t * const offsets,int8_t * const coeff_contexts)72 static INLINE void get_4_nz_map_contexts_2d(const uint8_t *levels,
73 const int height,
74 const ptrdiff_t *const offsets,
75 int8_t *const coeff_contexts) {
76 const int stride = 4 + TX_PAD_HOR;
77 const __m128i pos_to_offset_large = _mm_set1_epi8(21);
78 __m128i pos_to_offset =
79 (height == 4)
80 ? _mm_setr_epi8(0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21)
81 : _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21,
82 21, 21);
83 __m128i count;
84 __m128i level[5];
85 int8_t *cc = coeff_contexts;
86 int row = height;
87
88 assert(!(height % 4));
89
90 do {
91 load_levels_4x4x5_sse2(levels, stride, offsets, level);
92 count = get_coeff_contexts_kernel_sse2(level);
93 count = _mm_add_epi8(count, pos_to_offset);
94 _mm_store_si128((__m128i *)cc, count);
95 pos_to_offset = pos_to_offset_large;
96 levels += 4 * stride;
97 cc += 16;
98 row -= 4;
99 } while (row);
100
101 coeff_contexts[0] = 0;
102 }
103
get_4_nz_map_contexts_hor(const uint8_t * levels,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)104 static INLINE void get_4_nz_map_contexts_hor(const uint8_t *levels,
105 const int height,
106 const ptrdiff_t *const offsets,
107 int8_t *coeff_contexts) {
108 const int stride = 4 + TX_PAD_HOR;
109 const __m128i pos_to_offset =
110 _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
111 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
112 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
113 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
114 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
115 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
116 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
117 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
118 __m128i count;
119 __m128i level[5];
120 int row = height;
121
122 assert(!(height % 4));
123
124 do {
125 load_levels_4x4x5_sse2(levels, stride, offsets, level);
126 count = get_coeff_contexts_kernel_sse2(level);
127 count = _mm_add_epi8(count, pos_to_offset);
128 _mm_store_si128((__m128i *)coeff_contexts, count);
129 levels += 4 * stride;
130 coeff_contexts += 16;
131 row -= 4;
132 } while (row);
133 }
134
get_4_nz_map_contexts_ver(const uint8_t * levels,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)135 static INLINE void get_4_nz_map_contexts_ver(const uint8_t *levels,
136 const int height,
137 const ptrdiff_t *const offsets,
138 int8_t *coeff_contexts) {
139 const int stride = 4 + TX_PAD_HOR;
140 const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
141 __m128i pos_to_offset =
142 _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
143 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
144 SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
145 SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
146 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
147 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
148 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
149 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
150 __m128i count;
151 __m128i level[5];
152 int row = height;
153
154 assert(!(height % 4));
155
156 do {
157 load_levels_4x4x5_sse2(levels, stride, offsets, level);
158 count = get_coeff_contexts_kernel_sse2(level);
159 count = _mm_add_epi8(count, pos_to_offset);
160 _mm_store_si128((__m128i *)coeff_contexts, count);
161 pos_to_offset = pos_to_offset_large;
162 levels += 4 * stride;
163 coeff_contexts += 16;
164 row -= 4;
165 } while (row);
166 }
167
get_8_coeff_contexts_2d(const uint8_t * levels,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)168 static INLINE void get_8_coeff_contexts_2d(const uint8_t *levels,
169 const int height,
170 const ptrdiff_t *const offsets,
171 int8_t *coeff_contexts) {
172 const int stride = 8 + TX_PAD_HOR;
173 int8_t *cc = coeff_contexts;
174 int row = height;
175 __m128i count;
176 __m128i level[5];
177 __m128i pos_to_offset[3];
178
179 assert(!(height % 2));
180
181 if (height == 8) {
182 pos_to_offset[0] =
183 _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21);
184 pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
185 21, 21, 21, 21, 21);
186 } else if (height < 8) {
187 pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21,
188 21, 21, 21, 21);
189 pos_to_offset[1] = _mm_setr_epi8(16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21,
190 21, 21, 21, 21, 21);
191 } else {
192 pos_to_offset[0] = _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
193 11, 11, 11, 11, 11);
194 pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21,
195 21, 21, 21, 21, 21);
196 }
197 pos_to_offset[2] = _mm_set1_epi8(21);
198
199 do {
200 load_levels_8x2x5_sse2(levels, stride, offsets, level);
201 count = get_coeff_contexts_kernel_sse2(level);
202 count = _mm_add_epi8(count, pos_to_offset[0]);
203 _mm_store_si128((__m128i *)cc, count);
204 pos_to_offset[0] = pos_to_offset[1];
205 pos_to_offset[1] = pos_to_offset[2];
206 levels += 2 * stride;
207 cc += 16;
208 row -= 2;
209 } while (row);
210
211 coeff_contexts[0] = 0;
212 }
213
get_8_coeff_contexts_hor(const uint8_t * levels,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)214 static INLINE void get_8_coeff_contexts_hor(const uint8_t *levels,
215 const int height,
216 const ptrdiff_t *const offsets,
217 int8_t *coeff_contexts) {
218 const int stride = 8 + TX_PAD_HOR;
219 const __m128i pos_to_offset =
220 _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
221 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
222 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
223 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
224 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
225 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
226 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
227 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
228 int row = height;
229 __m128i count;
230 __m128i level[5];
231
232 assert(!(height % 2));
233
234 do {
235 load_levels_8x2x5_sse2(levels, stride, offsets, level);
236 count = get_coeff_contexts_kernel_sse2(level);
237 count = _mm_add_epi8(count, pos_to_offset);
238 _mm_store_si128((__m128i *)coeff_contexts, count);
239 levels += 2 * stride;
240 coeff_contexts += 16;
241 row -= 2;
242 } while (row);
243 }
244
get_8_coeff_contexts_ver(const uint8_t * levels,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)245 static INLINE void get_8_coeff_contexts_ver(const uint8_t *levels,
246 const int height,
247 const ptrdiff_t *const offsets,
248 int8_t *coeff_contexts) {
249 const int stride = 8 + TX_PAD_HOR;
250 const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
251 __m128i pos_to_offset =
252 _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
253 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
254 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
255 SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0,
256 SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
257 SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
258 SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5,
259 SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5);
260 int row = height;
261 __m128i count;
262 __m128i level[5];
263
264 assert(!(height % 2));
265
266 do {
267 load_levels_8x2x5_sse2(levels, stride, offsets, level);
268 count = get_coeff_contexts_kernel_sse2(level);
269 count = _mm_add_epi8(count, pos_to_offset);
270 _mm_store_si128((__m128i *)coeff_contexts, count);
271 pos_to_offset = pos_to_offset_large;
272 levels += 2 * stride;
273 coeff_contexts += 16;
274 row -= 2;
275 } while (row);
276 }
277
get_16n_coeff_contexts_2d(const uint8_t * levels,const int real_width,const int real_height,const int width,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)278 static INLINE void get_16n_coeff_contexts_2d(const uint8_t *levels,
279 const int real_width,
280 const int real_height,
281 const int width, const int height,
282 const ptrdiff_t *const offsets,
283 int8_t *coeff_contexts) {
284 const int stride = width + TX_PAD_HOR;
285 int8_t *cc = coeff_contexts;
286 int row = height;
287 __m128i pos_to_offset[5];
288 __m128i pos_to_offset_large[3];
289 __m128i count;
290 __m128i level[5];
291
292 assert(!(width % 16));
293
294 pos_to_offset_large[2] = _mm_set1_epi8(21);
295 if (real_width == real_height) {
296 pos_to_offset[0] = _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
297 21, 21, 21, 21);
298 pos_to_offset[1] = _mm_setr_epi8(1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21,
299 21, 21, 21, 21, 21);
300 pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
301 21, 21, 21, 21, 21);
302 pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
303 21, 21, 21, 21, 21);
304 pos_to_offset[4] = pos_to_offset_large[0] = pos_to_offset_large[1] =
305 pos_to_offset_large[2];
306 } else if (real_width > real_height) {
307 pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21,
308 21, 21, 21, 21, 21);
309 pos_to_offset[1] = _mm_setr_epi8(16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21,
310 21, 21, 21, 21, 21);
311 pos_to_offset[2] = pos_to_offset[3] = pos_to_offset[4] = _mm_setr_epi8(
312 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21);
313 pos_to_offset_large[0] = pos_to_offset_large[1] = pos_to_offset_large[2];
314 } else { // real_width < real_height
315 pos_to_offset[0] = pos_to_offset[1] = _mm_setr_epi8(
316 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11);
317 pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21,
318 21, 21, 21, 21, 21);
319 pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
320 21, 21, 21, 21, 21);
321 pos_to_offset[4] = pos_to_offset_large[2];
322 pos_to_offset_large[0] = pos_to_offset_large[1] = _mm_set1_epi8(11);
323 }
324
325 do {
326 int w = width;
327
328 do {
329 load_levels_16x1x5_sse2(levels, stride, offsets, level);
330 count = get_coeff_contexts_kernel_sse2(level);
331 count = _mm_add_epi8(count, pos_to_offset[0]);
332 _mm_store_si128((__m128i *)cc, count);
333 levels += 16;
334 cc += 16;
335 w -= 16;
336 pos_to_offset[0] = pos_to_offset_large[0];
337 } while (w);
338
339 pos_to_offset[0] = pos_to_offset[1];
340 pos_to_offset[1] = pos_to_offset[2];
341 pos_to_offset[2] = pos_to_offset[3];
342 pos_to_offset[3] = pos_to_offset[4];
343 pos_to_offset_large[0] = pos_to_offset_large[1];
344 pos_to_offset_large[1] = pos_to_offset_large[2];
345 levels += TX_PAD_HOR;
346 } while (--row);
347
348 coeff_contexts[0] = 0;
349 }
350
get_16n_coeff_contexts_hor(const uint8_t * levels,const int width,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)351 static INLINE void get_16n_coeff_contexts_hor(const uint8_t *levels,
352 const int width, const int height,
353 const ptrdiff_t *const offsets,
354 int8_t *coeff_contexts) {
355 const int stride = width + TX_PAD_HOR;
356 const __m128i pos_to_offset_large =
357 _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
358 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
359 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
360 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
361 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
362 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
363 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
364 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
365 __m128i count;
366 __m128i level[5];
367 int row = height;
368
369 assert(!(width % 16));
370
371 do {
372 __m128i pos_to_offset =
373 _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5,
374 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
375 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
376 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
377 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
378 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
379 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10,
380 SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10);
381 int w = width;
382
383 do {
384 load_levels_16x1x5_sse2(levels, stride, offsets, level);
385 count = get_coeff_contexts_kernel_sse2(level);
386 count = _mm_add_epi8(count, pos_to_offset);
387 _mm_store_si128((__m128i *)coeff_contexts, count);
388 pos_to_offset = pos_to_offset_large;
389 levels += 16;
390 coeff_contexts += 16;
391 w -= 16;
392 } while (w);
393
394 levels += TX_PAD_HOR;
395 } while (--row);
396 }
397
get_16n_coeff_contexts_ver(const uint8_t * levels,const int width,const int height,const ptrdiff_t * const offsets,int8_t * coeff_contexts)398 static INLINE void get_16n_coeff_contexts_ver(const uint8_t *levels,
399 const int width, const int height,
400 const ptrdiff_t *const offsets,
401 int8_t *coeff_contexts) {
402 const int stride = width + TX_PAD_HOR;
403 __m128i pos_to_offset[3];
404 __m128i count;
405 __m128i level[5];
406 int row = height;
407
408 assert(!(width % 16));
409
410 pos_to_offset[0] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 0);
411 pos_to_offset[1] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 5);
412 pos_to_offset[2] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10);
413
414 do {
415 int w = width;
416
417 do {
418 load_levels_16x1x5_sse2(levels, stride, offsets, level);
419 count = get_coeff_contexts_kernel_sse2(level);
420 count = _mm_add_epi8(count, pos_to_offset[0]);
421 _mm_store_si128((__m128i *)coeff_contexts, count);
422 levels += 16;
423 coeff_contexts += 16;
424 w -= 16;
425 } while (w);
426
427 pos_to_offset[0] = pos_to_offset[1];
428 pos_to_offset[1] = pos_to_offset[2];
429 levels += TX_PAD_HOR;
430 } while (--row);
431 }
432
433 // Note: levels[] must be in the range [0, 127], inclusive.
av1_get_nz_map_contexts_sse2(const uint8_t * const levels,const int16_t * const scan,const uint16_t eob,const TX_SIZE tx_size,const TX_CLASS tx_class,int8_t * const coeff_contexts)434 void av1_get_nz_map_contexts_sse2(const uint8_t *const levels,
435 const int16_t *const scan, const uint16_t eob,
436 const TX_SIZE tx_size,
437 const TX_CLASS tx_class,
438 int8_t *const coeff_contexts) {
439 const int last_idx = eob - 1;
440 if (!last_idx) {
441 coeff_contexts[0] = 0;
442 return;
443 }
444
445 const int real_width = tx_size_wide[tx_size];
446 const int real_height = tx_size_high[tx_size];
447 const int width = get_txb_wide(tx_size);
448 const int height = get_txb_high(tx_size);
449 const int stride = width + TX_PAD_HOR;
450 ptrdiff_t offsets[3];
451
452 /* coeff_contexts must be 16 byte aligned. */
453 assert(!((intptr_t)coeff_contexts & 0xf));
454
455 if (tx_class == TX_CLASS_2D) {
456 offsets[0] = 0 * stride + 2;
457 offsets[1] = 1 * stride + 1;
458 offsets[2] = 2 * stride + 0;
459
460 if (width == 4) {
461 get_4_nz_map_contexts_2d(levels, height, offsets, coeff_contexts);
462 } else if (width == 8) {
463 get_8_coeff_contexts_2d(levels, height, offsets, coeff_contexts);
464 } else if (width == 16) {
465 get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
466 offsets, coeff_contexts);
467 } else {
468 get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height,
469 offsets, coeff_contexts);
470 }
471 } else if (tx_class == TX_CLASS_HORIZ) {
472 offsets[0] = 2;
473 offsets[1] = 3;
474 offsets[2] = 4;
475 if (width == 4) {
476 get_4_nz_map_contexts_hor(levels, height, offsets, coeff_contexts);
477 } else if (width == 8) {
478 get_8_coeff_contexts_hor(levels, height, offsets, coeff_contexts);
479 } else {
480 get_16n_coeff_contexts_hor(levels, width, height, offsets,
481 coeff_contexts);
482 }
483 } else { // TX_CLASS_VERT
484 offsets[0] = 2 * stride;
485 offsets[1] = 3 * stride;
486 offsets[2] = 4 * stride;
487 if (width == 4) {
488 get_4_nz_map_contexts_ver(levels, height, offsets, coeff_contexts);
489 } else if (width == 8) {
490 get_8_coeff_contexts_ver(levels, height, offsets, coeff_contexts);
491 } else {
492 get_16n_coeff_contexts_ver(levels, width, height, offsets,
493 coeff_contexts);
494 }
495 }
496
497 const int bwl = get_txb_bwl(tx_size);
498 const int pos = scan[last_idx];
499 if (last_idx <= (height << bwl) / 8)
500 coeff_contexts[pos] = 1;
501 else if (last_idx <= (height << bwl) / 4)
502 coeff_contexts[pos] = 2;
503 else
504 coeff_contexts[pos] = 3;
505 }
506