1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/tile.h"
16
17 #include <algorithm>
18 #include <array>
19 #include <cassert>
20 #include <climits>
21 #include <cstdlib>
22 #include <cstring>
23 #include <memory>
24 #include <new>
25 #include <numeric>
26 #include <type_traits>
27 #include <utility>
28
29 #include "src/frame_scratch_buffer.h"
30 #include "src/motion_vector.h"
31 #include "src/reconstruction.h"
32 #include "src/utils/bit_mask_set.h"
33 #include "src/utils/common.h"
34 #include "src/utils/constants.h"
35 #include "src/utils/logging.h"
36 #include "src/utils/segmentation.h"
37 #include "src/utils/stack.h"
38
39 namespace libgav1 {
40 namespace {
41
42 // Import all the constants in the anonymous namespace.
43 #include "src/scan_tables.inc"
44
45 // Range above kNumQuantizerBaseLevels which the exponential golomb coding
46 // process is activated.
47 constexpr int kQuantizerCoefficientBaseRange = 12;
48 constexpr int kNumQuantizerBaseLevels = 2;
49 constexpr int kCoeffBaseRangeMaxIterations =
50 kQuantizerCoefficientBaseRange / (kCoeffBaseRangeSymbolCount - 1);
51 constexpr int kEntropyContextLeft = 0;
52 constexpr int kEntropyContextTop = 1;
53
54 constexpr uint8_t kAllZeroContextsByTopLeft[5][5] = {{1, 2, 2, 2, 3},
55 {2, 4, 4, 4, 5},
56 {2, 4, 4, 4, 5},
57 {2, 4, 4, 4, 5},
58 {3, 5, 5, 5, 6}};
59
60 // The space complexity of DFS is O(branching_factor * max_depth). For the
61 // parameter tree, branching_factor = 4 (there could be up to 4 children for
62 // every node) and max_depth (excluding the root) = 5 (to go from a 128x128
63 // block all the way to a 4x4 block). The worse-case stack size is 16, by
64 // counting the number of 'o' nodes in the diagram:
65 //
66 // | 128x128 The highest level (corresponding to the
67 // | root of the tree) has no node in the stack.
68 // |-----------------+
69 // | | | |
70 // | o o o 64x64
71 // |
72 // |-----------------+
73 // | | | |
74 // | o o o 32x32 Higher levels have three nodes in the stack,
75 // | because we pop one node off the stack before
76 // |-----------------+ pushing its four children onto the stack.
77 // | | | |
78 // | o o o 16x16
79 // |
80 // |-----------------+
81 // | | | |
82 // | o o o 8x8
83 // |
84 // |-----------------+
85 // | | | |
86 // o o o o 4x4 Only the lowest level has four nodes in the
87 // stack.
88 constexpr int kDfsStackSize = 16;
89
90 // Mask indicating whether the transform sets contain a particular transform
91 // type. If |tx_type| is present in |tx_set|, then the |tx_type|th LSB is set.
92 constexpr BitMaskSet kTransformTypeInSetMask[kNumTransformSets] = {
93 BitMaskSet(0x1), BitMaskSet(0xE0F), BitMaskSet(0x20F),
94 BitMaskSet(0xFFFF), BitMaskSet(0xFFF), BitMaskSet(0x201)};
95
96 constexpr PredictionMode
97 kFilterIntraModeToIntraPredictor[kNumFilterIntraPredictors] = {
98 kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
99 kPredictionModeD157, kPredictionModeDc};
100
101 // Mask used to determine the index for mode_deltas lookup.
102 constexpr BitMaskSet kPredictionModeDeltasMask(
103 kPredictionModeNearestMv, kPredictionModeNearMv, kPredictionModeNewMv,
104 kPredictionModeNearestNearestMv, kPredictionModeNearNearMv,
105 kPredictionModeNearestNewMv, kPredictionModeNewNearestMv,
106 kPredictionModeNearNewMv, kPredictionModeNewNearMv,
107 kPredictionModeNewNewMv);
108
109 // This is computed as:
110 // min(transform_width_log2, 5) + min(transform_height_log2, 5) - 4.
111 constexpr uint8_t kEobMultiSizeLookup[kNumTransformSizes] = {
112 0, 1, 2, 1, 2, 3, 4, 2, 3, 4, 5, 5, 4, 5, 6, 6, 5, 6, 6};
113
114 /* clang-format off */
115 constexpr uint8_t kCoeffBaseContextOffset[kNumTransformSizes][5][5] = {
116 {{0, 1, 6, 6, 0}, {1, 6, 6, 21, 0}, {6, 6, 21, 21, 0}, {6, 21, 21, 21, 0},
117 {0, 0, 0, 0, 0}},
118 {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
119 {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
120 {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
121 {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
122 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
123 {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
124 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
125 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
126 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
127 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
128 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
129 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
130 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
131 {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
132 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
133 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
134 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
135 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
136 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
137 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
138 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
139 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
140 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
141 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
142 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
143 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
144 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
145 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
146 {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
147 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
148 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
149 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
150 {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
151 {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
152 {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
153 {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}};
154 /* clang-format on */
155
156 // Extended the table size from 3 to 16 by repeating the last element to avoid
157 // the clips to row or column indices.
158 constexpr uint8_t kCoeffBasePositionContextOffset[16] = {
159 26, 31, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
160
161 constexpr PredictionMode kInterIntraToIntraMode[kNumInterIntraModes] = {
162 kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
163 kPredictionModeSmooth};
164
165 // Number of horizontal luma samples before intra block copy can be used.
166 constexpr int kIntraBlockCopyDelayPixels = 256;
167 // Number of 64 by 64 blocks before intra block copy can be used.
168 constexpr int kIntraBlockCopyDelay64x64Blocks = kIntraBlockCopyDelayPixels / 64;
169
170 // Index [i][j] corresponds to the transform size of width 1 << (i + 2) and
171 // height 1 << (j + 2).
172 constexpr TransformSize k4x4SizeToTransformSize[5][5] = {
173 {kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
174 kNumTransformSizes, kNumTransformSizes},
175 {kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
176 kTransformSize8x32, kNumTransformSizes},
177 {kTransformSize16x4, kTransformSize16x8, kTransformSize16x16,
178 kTransformSize16x32, kTransformSize16x64},
179 {kNumTransformSizes, kTransformSize32x8, kTransformSize32x16,
180 kTransformSize32x32, kTransformSize32x64},
181 {kNumTransformSizes, kNumTransformSizes, kTransformSize64x16,
182 kTransformSize64x32, kTransformSize64x64}};
183
184 // Defined in section 9.3 of the spec.
185 constexpr TransformType kModeToTransformType[kIntraPredictionModesUV] = {
186 kTransformTypeDctDct, kTransformTypeDctAdst, kTransformTypeAdstDct,
187 kTransformTypeDctDct, kTransformTypeAdstAdst, kTransformTypeDctAdst,
188 kTransformTypeAdstDct, kTransformTypeAdstDct, kTransformTypeDctAdst,
189 kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct,
190 kTransformTypeAdstAdst, kTransformTypeDctDct};
191
192 // Defined in section 5.11.47 of the spec. This array does not contain an entry
193 // for kTransformSetDctOnly, so the first dimension needs to be
194 // |kNumTransformSets| - 1.
195 constexpr TransformType kInverseTransformTypeBySet[kNumTransformSets - 1][16] =
196 {{kTransformTypeIdentityIdentity, kTransformTypeDctDct,
197 kTransformTypeIdentityDct, kTransformTypeDctIdentity,
198 kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
199 {kTransformTypeIdentityIdentity, kTransformTypeDctDct,
200 kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
201 {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
202 kTransformTypeDctIdentity, kTransformTypeIdentityAdst,
203 kTransformTypeAdstIdentity, kTransformTypeIdentityFlipadst,
204 kTransformTypeFlipadstIdentity, kTransformTypeDctDct,
205 kTransformTypeDctAdst, kTransformTypeAdstDct, kTransformTypeDctFlipadst,
206 kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
207 kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
208 kTransformTypeAdstFlipadst},
209 {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
210 kTransformTypeDctIdentity, kTransformTypeDctDct, kTransformTypeDctAdst,
211 kTransformTypeAdstDct, kTransformTypeDctFlipadst,
212 kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
213 kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
214 kTransformTypeAdstFlipadst},
215 {kTransformTypeIdentityIdentity, kTransformTypeDctDct}};
216
217 // Replaces all occurrences of 64x* and *x64 with 32x* and *x32 respectively.
218 constexpr TransformSize kAdjustedTransformSize[kNumTransformSizes] = {
219 kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
220 kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
221 kTransformSize8x32, kTransformSize16x4, kTransformSize16x8,
222 kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
223 kTransformSize32x8, kTransformSize32x16, kTransformSize32x32,
224 kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
225 kTransformSize32x32};
226
227 // This is the same as Max_Tx_Size_Rect array in the spec but with *x64 and 64*x
228 // transforms replaced with *x32 and 32x* respectively.
229 constexpr TransformSize kUVTransformSize[kMaxBlockSizes] = {
230 kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
231 kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
232 kTransformSize8x32, kTransformSize16x4, kTransformSize16x8,
233 kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
234 kTransformSize32x8, kTransformSize32x16, kTransformSize32x32,
235 kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
236 kTransformSize32x32, kTransformSize32x32, kTransformSize32x32,
237 kTransformSize32x32};
238
239 // ith entry of this array is computed as:
240 // DivideBy2(TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[i]) +
241 // TransformSizeToSquareTransformIndex(kTransformSizeSquareMax[i]) +
242 // 1)
243 constexpr uint8_t kTransformSizeContext[kNumTransformSizes] = {
244 0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 3, 3, 2, 3, 3, 4, 3, 4, 4};
245
246 constexpr int8_t kSgrProjDefaultMultiplier[2] = {-32, 31};
247
248 constexpr int8_t kWienerDefaultFilter[kNumWienerCoefficients] = {3, -7, 15};
249
250 // Maps compound prediction modes into single modes. For e.g.
251 // kPredictionModeNearestNewMv will map to kPredictionModeNearestMv for index 0
252 // and kPredictionModeNewMv for index 1. It is used to simplify the logic in
253 // AssignMv (and avoid duplicate code). This is section 5.11.30. in the spec.
254 constexpr PredictionMode
255 kCompoundToSinglePredictionMode[kNumCompoundInterPredictionModes][2] = {
256 {kPredictionModeNearestMv, kPredictionModeNearestMv},
257 {kPredictionModeNearMv, kPredictionModeNearMv},
258 {kPredictionModeNearestMv, kPredictionModeNewMv},
259 {kPredictionModeNewMv, kPredictionModeNearestMv},
260 {kPredictionModeNearMv, kPredictionModeNewMv},
261 {kPredictionModeNewMv, kPredictionModeNearMv},
262 {kPredictionModeGlobalMv, kPredictionModeGlobalMv},
263 {kPredictionModeNewMv, kPredictionModeNewMv},
264 };
GetSinglePredictionMode(int index,PredictionMode y_mode)265 PredictionMode GetSinglePredictionMode(int index, PredictionMode y_mode) {
266 if (y_mode < kPredictionModeNearestNearestMv) {
267 return y_mode;
268 }
269 const int lookup_index = y_mode - kPredictionModeNearestNearestMv;
270 assert(lookup_index >= 0);
271 return kCompoundToSinglePredictionMode[lookup_index][index];
272 }
273
274 // log2(dqDenom) in section 7.12.3 of the spec. We use the log2 value because
275 // dqDenom is always a power of two and hence right shift can be used instead of
276 // division.
277 constexpr uint8_t kQuantizationShift[kNumTransformSizes] = {
278 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 2, 1, 2, 2};
279
280 // Returns the minimum of |length| or |max|-|start|. This is used to clamp array
281 // indices when accessing arrays whose bound is equal to |max|.
GetNumElements(int length,int start,int max)282 int GetNumElements(int length, int start, int max) {
283 return std::min(length, max - start);
284 }
285
286 template <typename T>
SetBlockValues(int rows,int columns,T value,T * dst,ptrdiff_t stride)287 void SetBlockValues(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
288 // Specialize all columns cases (values in kTransformWidth4x4[]) for better
289 // performance.
290 switch (columns) {
291 case 1:
292 MemSetBlock<T>(rows, 1, value, dst, stride);
293 break;
294 case 2:
295 MemSetBlock<T>(rows, 2, value, dst, stride);
296 break;
297 case 4:
298 MemSetBlock<T>(rows, 4, value, dst, stride);
299 break;
300 case 8:
301 MemSetBlock<T>(rows, 8, value, dst, stride);
302 break;
303 default:
304 assert(columns == 16);
305 MemSetBlock<T>(rows, 16, value, dst, stride);
306 break;
307 }
308 }
309
SetTransformType(const Tile::Block & block,int x4,int y4,int w4,int h4,TransformType tx_type,TransformType transform_types[32][32])310 void SetTransformType(const Tile::Block& block, int x4, int y4, int w4, int h4,
311 TransformType tx_type,
312 TransformType transform_types[32][32]) {
313 const int y_offset = y4 - block.row4x4;
314 const int x_offset = x4 - block.column4x4;
315 TransformType* const dst = &transform_types[y_offset][x_offset];
316 SetBlockValues<TransformType>(h4, w4, tx_type, dst, 32);
317 }
318
StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,const MotionVector & mv_to_store,ptrdiff_t stride,int rows,int columns,ReferenceFrameType * reference_frame_row_start,MotionVector * mv)319 void StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,
320 const MotionVector& mv_to_store, ptrdiff_t stride,
321 int rows, int columns,
322 ReferenceFrameType* reference_frame_row_start,
323 MotionVector* mv) {
324 static_assert(sizeof(*reference_frame_row_start) == sizeof(int8_t), "");
325 do {
326 // Don't switch the following two memory setting functions.
327 // Some ARM CPUs are quite sensitive to the order.
328 memset(reference_frame_row_start, reference_frame_to_store, columns);
329 std::fill(mv, mv + columns, mv_to_store);
330 reference_frame_row_start += stride;
331 mv += stride;
332 } while (--rows != 0);
333 }
334
335 // Inverse transform process assumes that the quantized coefficients are stored
336 // as a virtual 2d array of size |tx_width| x tx_height. If transform width is
337 // 64, then this assumption is broken because the scan order used for populating
338 // the coefficients for such transforms is the same as the one used for
339 // corresponding transform with width 32 (e.g. the scan order used for 64x16 is
340 // the same as the one used for 32x16). So we must restore the coefficients to
341 // their correct positions and clean the positions they occupied.
342 template <typename ResidualType>
MoveCoefficientsForTxWidth64(int clamped_tx_height,int tx_width,ResidualType * residual)343 void MoveCoefficientsForTxWidth64(int clamped_tx_height, int tx_width,
344 ResidualType* residual) {
345 if (tx_width != 64) return;
346 const int rows = clamped_tx_height - 2;
347 auto* src = residual + 32 * rows;
348 residual += 64 * rows;
349 // Process 2 rows in each loop in reverse order to avoid overwrite.
350 int x = rows >> 1;
351 do {
352 // The 2 rows can be processed in order.
353 memcpy(residual, src, 32 * sizeof(src[0]));
354 memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
355 memset(src + 32, 0, 32 * sizeof(src[0]));
356 src -= 64;
357 residual -= 128;
358 } while (--x);
359 // Process the second row. The first row is already correct.
360 memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
361 memset(src + 32, 0, 32 * sizeof(src[0]));
362 }
363
GetClampParameters(const Tile::Block & block,int min[2],int max[2])364 void GetClampParameters(const Tile::Block& block, int min[2], int max[2]) {
365 // 7.10.2.14 (part 1). (also contains implementations of 5.11.53
366 // and 5.11.54).
367 constexpr int kMvBorder4x4 = 4;
368 const int row_border = kMvBorder4x4 + block.height4x4;
369 const int column_border = kMvBorder4x4 + block.width4x4;
370 const int macroblocks_to_top_edge = -block.row4x4;
371 const int macroblocks_to_bottom_edge =
372 block.tile.frame_header().rows4x4 - block.height4x4 - block.row4x4;
373 const int macroblocks_to_left_edge = -block.column4x4;
374 const int macroblocks_to_right_edge =
375 block.tile.frame_header().columns4x4 - block.width4x4 - block.column4x4;
376 min[0] = MultiplyBy32(macroblocks_to_top_edge - row_border);
377 min[1] = MultiplyBy32(macroblocks_to_left_edge - column_border);
378 max[0] = MultiplyBy32(macroblocks_to_bottom_edge + row_border);
379 max[1] = MultiplyBy32(macroblocks_to_right_edge + column_border);
380 }
381
382 // Section 8.3.2 in the spec, under coeff_base_eob.
GetCoeffBaseContextEob(TransformSize tx_size,int index)383 int GetCoeffBaseContextEob(TransformSize tx_size, int index) {
384 if (index == 0) return 0;
385 const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
386 const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
387 const int tx_height = kTransformHeight[adjusted_tx_size];
388 if (index <= DivideBy8(tx_height << tx_width_log2)) return 1;
389 if (index <= DivideBy4(tx_height << tx_width_log2)) return 2;
390 return 3;
391 }
392
393 // Section 8.3.2 in the spec, under coeff_br. Optimized for end of block based
394 // on the fact that {0, 1}, {1, 0}, {1, 1}, {0, 2} and {2, 0} will all be 0 in
395 // the end of block case.
GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2,int pos,TransformClass tx_class)396 int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos,
397 TransformClass tx_class) {
398 if (pos == 0) return 0;
399 const int tx_width = 1 << adjusted_tx_width_log2;
400 const int row = pos >> adjusted_tx_width_log2;
401 const int column = pos & (tx_width - 1);
402 // This return statement is equivalent to:
403 // return ((tx_class == kTransformClass2D && (row | column) < 2) ||
404 // (tx_class == kTransformClassHorizontal && column == 0) ||
405 // (tx_class == kTransformClassVertical && row == 0))
406 // ? 7
407 // : 14;
408 return 14 >> ((static_cast<int>(tx_class == kTransformClass2D) &
409 static_cast<int>((row | column) < 2)) |
410 (tx_class & static_cast<int>(column == 0)) |
411 ((tx_class >> 1) & static_cast<int>(row == 0)));
412 }
413
414 } // namespace
415
Tile(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,const QuantizerMatrix & quantizer_matrix,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)416 Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
417 const ObuSequenceHeader& sequence_header,
418 const ObuFrameHeader& frame_header,
419 RefCountedBuffer* const current_frame, const DecoderState& state,
420 FrameScratchBuffer* const frame_scratch_buffer,
421 const WedgeMaskArray& wedge_masks,
422 const QuantizerMatrix& quantizer_matrix,
423 SymbolDecoderContext* const saved_symbol_decoder_context,
424 const SegmentationMap* prev_segment_ids,
425 PostFilter* const post_filter, const dsp::Dsp* const dsp,
426 ThreadPool* const thread_pool,
427 BlockingCounterWithStatus* const pending_tiles, bool frame_parallel,
428 bool use_intra_prediction_buffer)
429 : number_(tile_number),
430 row_(number_ / frame_header.tile_info.tile_columns),
431 column_(number_ % frame_header.tile_info.tile_columns),
432 data_(data),
433 size_(size),
434 read_deltas_(false),
435 subsampling_x_{0, sequence_header.color_config.subsampling_x,
436 sequence_header.color_config.subsampling_x},
437 subsampling_y_{0, sequence_header.color_config.subsampling_y,
438 sequence_header.color_config.subsampling_y},
439 current_quantizer_index_(frame_header.quantizer.base_index),
440 sequence_header_(sequence_header),
441 frame_header_(frame_header),
442 reference_frame_sign_bias_(state.reference_frame_sign_bias),
443 reference_frames_(state.reference_frame),
444 motion_field_(frame_scratch_buffer->motion_field),
445 reference_order_hint_(state.reference_order_hint),
446 wedge_masks_(wedge_masks),
447 quantizer_matrix_(quantizer_matrix),
448 reader_(data_, size_, frame_header_.enable_cdf_update),
449 symbol_decoder_context_(frame_scratch_buffer->symbol_decoder_context),
450 saved_symbol_decoder_context_(saved_symbol_decoder_context),
451 prev_segment_ids_(prev_segment_ids),
452 dsp_(*dsp),
453 post_filter_(*post_filter),
454 block_parameters_holder_(frame_scratch_buffer->block_parameters_holder),
455 quantizer_(sequence_header_.color_config.bitdepth,
456 &frame_header_.quantizer),
457 residual_size_((sequence_header_.color_config.bitdepth == 8)
458 ? sizeof(int16_t)
459 : sizeof(int32_t)),
460 intra_block_copy_lag_(
461 frame_header_.allow_intrabc
462 ? (sequence_header_.use_128x128_superblock ? 3 : 5)
463 : 1),
464 current_frame_(*current_frame),
465 cdef_index_(frame_scratch_buffer->cdef_index),
466 cdef_skip_(frame_scratch_buffer->cdef_skip),
467 inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
468 thread_pool_(thread_pool),
469 residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()),
470 tile_scratch_buffer_pool_(
471 &frame_scratch_buffer->tile_scratch_buffer_pool),
472 pending_tiles_(pending_tiles),
473 frame_parallel_(frame_parallel),
474 use_intra_prediction_buffer_(use_intra_prediction_buffer),
475 intra_prediction_buffer_(
476 use_intra_prediction_buffer_
477 ? &frame_scratch_buffer->intra_prediction_buffers.get()[row_]
478 : nullptr) {
479 row4x4_start_ = frame_header.tile_info.tile_row_start[row_];
480 row4x4_end_ = frame_header.tile_info.tile_row_start[row_ + 1];
481 column4x4_start_ = frame_header.tile_info.tile_column_start[column_];
482 column4x4_end_ = frame_header.tile_info.tile_column_start[column_ + 1];
483 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
484 const int block_width4x4_log2 = k4x4HeightLog2[SuperBlockSize()];
485 superblock_rows_ =
486 (row4x4_end_ - row4x4_start_ + block_width4x4 - 1) >> block_width4x4_log2;
487 superblock_columns_ =
488 (column4x4_end_ - column4x4_start_ + block_width4x4 - 1) >>
489 block_width4x4_log2;
490 // If |split_parse_and_decode_| is true, we do the necessary setup for
491 // splitting the parsing and the decoding steps. This is done in the following
492 // two cases:
493 // 1) If there is multi-threading within a tile (this is done if
494 // |thread_pool_| is not nullptr and if there are at least as many
495 // superblock columns as |intra_block_copy_lag_|).
496 // 2) If |frame_parallel| is true.
497 split_parse_and_decode_ = (thread_pool_ != nullptr &&
498 superblock_columns_ > intra_block_copy_lag_) ||
499 frame_parallel;
500 if (frame_parallel_) {
501 reference_frame_progress_cache_.fill(INT_MIN);
502 }
503 memset(delta_lf_, 0, sizeof(delta_lf_));
504 delta_lf_all_zero_ = true;
505 const YuvBuffer& buffer = post_filter_.frame_buffer();
506 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
507 // Verify that the borders are big enough for Reconstruct(). max_tx_length
508 // is the maximum value of tx_width and tx_height for the plane.
509 const int max_tx_length = (plane == kPlaneY) ? 64 : 32;
510 // Reconstruct() may overwrite on the right. Since the right border of a
511 // row is followed in memory by the left border of the next row, the
512 // number of extra pixels to the right of a row is at least the sum of the
513 // left and right borders.
514 //
515 // Note: This assertion actually checks the sum of the left and right
516 // borders of post_filter_.GetUnfilteredBuffer(), which is a horizontally
517 // and vertically shifted version of |buffer|. Since the sum of the left and
518 // right borders is not changed by the shift, we can just check the sum of
519 // the left and right borders of |buffer|.
520 assert(buffer.left_border(plane) + buffer.right_border(plane) >=
521 max_tx_length - 1);
522 // Reconstruct() may overwrite on the bottom. We need an extra border row
523 // on the bottom because we need the left border of that row.
524 //
525 // Note: This assertion checks the bottom border of
526 // post_filter_.GetUnfilteredBuffer(). So we need to calculate the vertical
527 // shift that the PostFilter constructor applied to |buffer| and reduce the
528 // bottom border by that amount.
529 #ifndef NDEBUG
530 const int vertical_shift = static_cast<int>(
531 (post_filter_.GetUnfilteredBuffer(plane) - buffer.data(plane)) /
532 buffer.stride(plane));
533 const int bottom_border = buffer.bottom_border(plane) - vertical_shift;
534 assert(bottom_border >= max_tx_length);
535 #endif
536 // In AV1, a transform block of height H starts at a y coordinate that is
537 // a multiple of H. If a transform block at the bottom of the frame has
538 // height H, then Reconstruct() will write up to the row with index
539 // Align(buffer.height(plane), H) - 1. Therefore the maximum number of
540 // rows Reconstruct() may write to is
541 // Align(buffer.height(plane), max_tx_length).
542 buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length),
543 buffer.stride(plane),
544 post_filter_.GetUnfilteredBuffer(plane));
545 }
546 }
547
Init()548 bool Tile::Init() {
549 assert(coefficient_levels_.size() == dc_categories_.size());
550 for (size_t i = 0; i < coefficient_levels_.size(); ++i) {
551 const int contexts_per_plane = (i == kEntropyContextLeft)
552 ? frame_header_.rows4x4
553 : frame_header_.columns4x4;
554 if (!coefficient_levels_[i].Reset(PlaneCount(), contexts_per_plane)) {
555 LIBGAV1_DLOG(ERROR, "coefficient_levels_[%zu].Reset() failed.", i);
556 return false;
557 }
558 if (!dc_categories_[i].Reset(PlaneCount(), contexts_per_plane)) {
559 LIBGAV1_DLOG(ERROR, "dc_categories_[%zu].Reset() failed.", i);
560 return false;
561 }
562 }
563 if (split_parse_and_decode_) {
564 assert(residual_buffer_pool_ != nullptr);
565 if (!residual_buffer_threaded_.Reset(superblock_rows_, superblock_columns_,
566 /*zero_initialize=*/false)) {
567 LIBGAV1_DLOG(ERROR, "residual_buffer_threaded_.Reset() failed.");
568 return false;
569 }
570 } else {
571 // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary
572 // checks when parsing quantized coefficients.
573 residual_buffer_ = MakeAlignedUniquePtr<uint8_t>(
574 32, (4096 + 32 * kResidualPaddingVertical) * residual_size_);
575 if (residual_buffer_ == nullptr) {
576 LIBGAV1_DLOG(ERROR, "Allocation of residual_buffer_ failed.");
577 return false;
578 }
579 prediction_parameters_.reset(new (std::nothrow) PredictionParameters());
580 if (prediction_parameters_ == nullptr) {
581 LIBGAV1_DLOG(ERROR, "Allocation of prediction_parameters_ failed.");
582 return false;
583 }
584 }
585 if (frame_header_.use_ref_frame_mvs) {
586 assert(sequence_header_.enable_order_hint);
587 SetupMotionField(frame_header_, current_frame_, reference_frames_,
588 row4x4_start_, row4x4_end_, column4x4_start_,
589 column4x4_end_, &motion_field_);
590 }
591 ResetLoopRestorationParams();
592 if (!top_context_.Resize(superblock_columns_)) {
593 LIBGAV1_DLOG(ERROR, "Allocation of top_context_ failed.");
594 return false;
595 }
596 return true;
597 }
598
599 template <ProcessingMode processing_mode, bool save_symbol_decoder_context>
ProcessSuperBlockRow(int row4x4,TileScratchBuffer * const scratch_buffer)600 bool Tile::ProcessSuperBlockRow(int row4x4,
601 TileScratchBuffer* const scratch_buffer) {
602 if (row4x4 < row4x4_start_ || row4x4 >= row4x4_end_) return true;
603 assert(scratch_buffer != nullptr);
604 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
605 for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_;
606 column4x4 += block_width4x4) {
607 if (!ProcessSuperBlock(row4x4, column4x4, scratch_buffer,
608 processing_mode)) {
609 LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d",
610 row4x4, column4x4);
611 return false;
612 }
613 }
614 if (save_symbol_decoder_context && row4x4 + block_width4x4 >= row4x4_end_) {
615 SaveSymbolDecoderContext();
616 }
617 if (processing_mode == kProcessingModeDecodeOnly ||
618 processing_mode == kProcessingModeParseAndDecode) {
619 PopulateIntraPredictionBuffer(row4x4);
620 }
621 return true;
622 }
623
624 // Used in frame parallel mode. The symbol decoder context need not be saved in
625 // this case since it was done when parsing was complete.
626 template bool Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
627 int row4x4, TileScratchBuffer* scratch_buffer);
628 // Used in non frame parallel mode.
629 template bool Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
630 int row4x4, TileScratchBuffer* scratch_buffer);
631
SaveSymbolDecoderContext()632 void Tile::SaveSymbolDecoderContext() {
633 if (frame_header_.enable_frame_end_update_cdf &&
634 number_ == frame_header_.tile_info.context_update_id) {
635 *saved_symbol_decoder_context_ = symbol_decoder_context_;
636 }
637 }
638
ParseAndDecode()639 bool Tile::ParseAndDecode() {
640 if (split_parse_and_decode_) {
641 if (!ThreadedParseAndDecode()) return false;
642 SaveSymbolDecoderContext();
643 return true;
644 }
645 std::unique_ptr<TileScratchBuffer> scratch_buffer =
646 tile_scratch_buffer_pool_->Get();
647 if (scratch_buffer == nullptr) {
648 pending_tiles_->Decrement(false);
649 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
650 return false;
651 }
652 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
653 for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
654 row4x4 += block_width4x4) {
655 if (!ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
656 row4x4, scratch_buffer.get())) {
657 pending_tiles_->Decrement(false);
658 return false;
659 }
660 }
661 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
662 pending_tiles_->Decrement(true);
663 return true;
664 }
665
Parse()666 bool Tile::Parse() {
667 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
668 std::unique_ptr<TileScratchBuffer> scratch_buffer =
669 tile_scratch_buffer_pool_->Get();
670 if (scratch_buffer == nullptr) {
671 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
672 return false;
673 }
674 for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
675 row4x4 += block_width4x4) {
676 if (!ProcessSuperBlockRow<kProcessingModeParseOnly, false>(
677 row4x4, scratch_buffer.get())) {
678 return false;
679 }
680 }
681 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
682 SaveSymbolDecoderContext();
683 return true;
684 }
685
Decode(std::mutex * const mutex,int * const superblock_row_progress,std::condition_variable * const superblock_row_progress_condvar)686 bool Tile::Decode(
687 std::mutex* const mutex, int* const superblock_row_progress,
688 std::condition_variable* const superblock_row_progress_condvar) {
689 const int block_width4x4 = sequence_header_.use_128x128_superblock ? 32 : 16;
690 const int block_width4x4_log2 =
691 sequence_header_.use_128x128_superblock ? 5 : 4;
692 std::unique_ptr<TileScratchBuffer> scratch_buffer =
693 tile_scratch_buffer_pool_->Get();
694 if (scratch_buffer == nullptr) {
695 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
696 return false;
697 }
698 for (int row4x4 = row4x4_start_, index = row4x4_start_ >> block_width4x4_log2;
699 row4x4 < row4x4_end_; row4x4 += block_width4x4, ++index) {
700 if (!ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
701 row4x4, scratch_buffer.get())) {
702 return false;
703 }
704 if (post_filter_.DoDeblock()) {
705 // Apply vertical deblock filtering for all the columns in this tile
706 // except for the first 64 columns.
707 post_filter_.ApplyDeblockFilter(
708 kLoopFilterTypeVertical, row4x4,
709 column4x4_start_ + kNum4x4InLoopFilterUnit, column4x4_end_,
710 block_width4x4);
711 // If this is the first superblock row of the tile, then we cannot apply
712 // horizontal deblocking here since we don't know if the top row is
713 // available. So it will be done by the calling thread in that case.
714 if (row4x4 != row4x4_start_) {
715 // Apply horizontal deblock filtering for all the columns in this tile
716 // except for the first and the last 64 columns.
717 // Note about the last tile of each row: For the last tile,
718 // column4x4_end may not be a multiple of 16. In that case it is still
719 // okay to simply subtract 16 since ApplyDeblockFilter() will only do
720 // the filters in increments of 64 columns (or 32 columns for chroma
721 // with subsampling).
722 post_filter_.ApplyDeblockFilter(
723 kLoopFilterTypeHorizontal, row4x4,
724 column4x4_start_ + kNum4x4InLoopFilterUnit,
725 column4x4_end_ - kNum4x4InLoopFilterUnit, block_width4x4);
726 }
727 }
728 bool notify;
729 {
730 std::unique_lock<std::mutex> lock(*mutex);
731 notify = ++superblock_row_progress[index] ==
732 frame_header_.tile_info.tile_columns;
733 }
734 if (notify) {
735 // We are done decoding this superblock row. Notify the post filtering
736 // thread.
737 superblock_row_progress_condvar[index].notify_one();
738 }
739 }
740 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
741 return true;
742 }
743
ThreadedParseAndDecode()744 bool Tile::ThreadedParseAndDecode() {
745 {
746 std::lock_guard<std::mutex> lock(threading_.mutex);
747 if (!threading_.sb_state.Reset(superblock_rows_, superblock_columns_)) {
748 pending_tiles_->Decrement(false);
749 LIBGAV1_DLOG(ERROR, "threading.sb_state.Reset() failed.");
750 return false;
751 }
752 // Account for the parsing job.
753 ++threading_.pending_jobs;
754 }
755
756 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
757
758 // Begin parsing.
759 std::unique_ptr<TileScratchBuffer> scratch_buffer =
760 tile_scratch_buffer_pool_->Get();
761 if (scratch_buffer == nullptr) {
762 pending_tiles_->Decrement(false);
763 LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
764 return false;
765 }
766 for (int row4x4 = row4x4_start_, row_index = 0; row4x4 < row4x4_end_;
767 row4x4 += block_width4x4, ++row_index) {
768 for (int column4x4 = column4x4_start_, column_index = 0;
769 column4x4 < column4x4_end_;
770 column4x4 += block_width4x4, ++column_index) {
771 if (!ProcessSuperBlock(row4x4, column4x4, scratch_buffer.get(),
772 kProcessingModeParseOnly)) {
773 std::lock_guard<std::mutex> lock(threading_.mutex);
774 threading_.abort = true;
775 break;
776 }
777 std::unique_lock<std::mutex> lock(threading_.mutex);
778 if (threading_.abort) break;
779 threading_.sb_state[row_index][column_index] = kSuperBlockStateParsed;
780 // Schedule the decoding of this superblock if it is allowed.
781 if (CanDecode(row_index, column_index)) {
782 ++threading_.pending_jobs;
783 threading_.sb_state[row_index][column_index] =
784 kSuperBlockStateScheduled;
785 lock.unlock();
786 thread_pool_->Schedule(
787 [this, row_index, column_index, block_width4x4]() {
788 DecodeSuperBlock(row_index, column_index, block_width4x4);
789 });
790 }
791 }
792 std::lock_guard<std::mutex> lock(threading_.mutex);
793 if (threading_.abort) break;
794 }
795 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
796
797 // We are done parsing. We can return here since the calling thread will make
798 // sure that it waits for all the superblocks to be decoded.
799 //
800 // Finish using |threading_| before |pending_tiles_->Decrement()| because the
801 // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
802 // is called.
803 threading_.mutex.lock();
804 const bool no_pending_jobs = (--threading_.pending_jobs == 0);
805 const bool job_succeeded = !threading_.abort;
806 threading_.mutex.unlock();
807 if (no_pending_jobs) {
808 // We are done parsing and decoding this tile.
809 pending_tiles_->Decrement(job_succeeded);
810 }
811 return job_succeeded;
812 }
813
CanDecode(int row_index,int column_index) const814 bool Tile::CanDecode(int row_index, int column_index) const {
815 assert(row_index >= 0);
816 assert(column_index >= 0);
817 // If |threading_.sb_state[row_index][column_index]| is not equal to
818 // kSuperBlockStateParsed, then return false. This is ok because if
819 // |threading_.sb_state[row_index][column_index]| is equal to:
820 // kSuperBlockStateNone - then the superblock is not yet parsed.
821 // kSuperBlockStateScheduled - then the superblock is already scheduled for
822 // decode.
823 // kSuperBlockStateDecoded - then the superblock has already been decoded.
824 if (row_index >= superblock_rows_ || column_index >= superblock_columns_ ||
825 threading_.sb_state[row_index][column_index] != kSuperBlockStateParsed) {
826 return false;
827 }
828 // First superblock has no dependencies.
829 if (row_index == 0 && column_index == 0) {
830 return true;
831 }
832 // Superblocks in the first row only depend on the superblock to the left of
833 // it.
834 if (row_index == 0) {
835 return threading_.sb_state[0][column_index - 1] == kSuperBlockStateDecoded;
836 }
837 // All other superblocks depend on superblock to the left of it (if one
838 // exists) and superblock to the top right with a lag of
839 // |intra_block_copy_lag_| (if one exists).
840 const int top_right_column_index =
841 std::min(column_index + intra_block_copy_lag_, superblock_columns_ - 1);
842 return threading_.sb_state[row_index - 1][top_right_column_index] ==
843 kSuperBlockStateDecoded &&
844 (column_index == 0 ||
845 threading_.sb_state[row_index][column_index - 1] ==
846 kSuperBlockStateDecoded);
847 }
848
DecodeSuperBlock(int row_index,int column_index,int block_width4x4)849 void Tile::DecodeSuperBlock(int row_index, int column_index,
850 int block_width4x4) {
851 const int row4x4 = row4x4_start_ + (row_index * block_width4x4);
852 const int column4x4 = column4x4_start_ + (column_index * block_width4x4);
853 std::unique_ptr<TileScratchBuffer> scratch_buffer =
854 tile_scratch_buffer_pool_->Get();
855 bool ok = scratch_buffer != nullptr;
856 if (ok) {
857 ok = ProcessSuperBlock(row4x4, column4x4, scratch_buffer.get(),
858 kProcessingModeDecodeOnly);
859 tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
860 }
861 std::unique_lock<std::mutex> lock(threading_.mutex);
862 if (ok) {
863 threading_.sb_state[row_index][column_index] = kSuperBlockStateDecoded;
864 // Candidate rows and columns that we could potentially begin the decoding
865 // (if it is allowed to do so). The candidates are:
866 // 1) The superblock to the bottom-left of the current superblock with a
867 // lag of |intra_block_copy_lag_| (or the beginning of the next superblock
868 // row in case there are less than |intra_block_copy_lag_| superblock
869 // columns in the Tile).
870 // 2) The superblock to the right of the current superblock.
871 const int candidate_row_indices[] = {row_index + 1, row_index};
872 const int candidate_column_indices[] = {
873 std::max(0, column_index - intra_block_copy_lag_), column_index + 1};
874 for (size_t i = 0; i < std::extent<decltype(candidate_row_indices)>::value;
875 ++i) {
876 const int candidate_row_index = candidate_row_indices[i];
877 const int candidate_column_index = candidate_column_indices[i];
878 if (!CanDecode(candidate_row_index, candidate_column_index)) {
879 continue;
880 }
881 ++threading_.pending_jobs;
882 threading_.sb_state[candidate_row_index][candidate_column_index] =
883 kSuperBlockStateScheduled;
884 lock.unlock();
885 thread_pool_->Schedule([this, candidate_row_index, candidate_column_index,
886 block_width4x4]() {
887 DecodeSuperBlock(candidate_row_index, candidate_column_index,
888 block_width4x4);
889 });
890 lock.lock();
891 }
892 } else {
893 threading_.abort = true;
894 }
895 // Finish using |threading_| before |pending_tiles_->Decrement()| because the
896 // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
897 // is called.
898 const bool no_pending_jobs = (--threading_.pending_jobs == 0);
899 const bool job_succeeded = !threading_.abort;
900 lock.unlock();
901 if (no_pending_jobs) {
902 // We are done parsing and decoding this tile.
903 pending_tiles_->Decrement(job_succeeded);
904 }
905 }
906
PopulateIntraPredictionBuffer(int row4x4)907 void Tile::PopulateIntraPredictionBuffer(int row4x4) {
908 const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
909 if (!use_intra_prediction_buffer_ || row4x4 + block_width4x4 >= row4x4_end_) {
910 return;
911 }
912 const size_t pixel_size =
913 (sequence_header_.color_config.bitdepth == 8 ? sizeof(uint8_t)
914 : sizeof(uint16_t));
915 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
916 const int row_to_copy =
917 (MultiplyBy4(row4x4 + block_width4x4) >> subsampling_y_[plane]) - 1;
918 const size_t pixels_to_copy =
919 (MultiplyBy4(column4x4_end_ - column4x4_start_) >>
920 subsampling_x_[plane]) *
921 pixel_size;
922 const size_t column_start =
923 MultiplyBy4(column4x4_start_) >> subsampling_x_[plane];
924 void* start;
925 #if LIBGAV1_MAX_BITDEPTH >= 10
926 if (sequence_header_.color_config.bitdepth > 8) {
927 Array2DView<uint16_t> buffer(
928 buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
929 reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
930 start = &buffer[row_to_copy][column_start];
931 } else // NOLINT
932 #endif
933 {
934 start = &buffer_[plane][row_to_copy][column_start];
935 }
936 memcpy((*intra_prediction_buffer_)[plane].get() + column_start * pixel_size,
937 start, pixels_to_copy);
938 }
939 }
940
GetTransformAllZeroContext(const Block & block,Plane plane,TransformSize tx_size,int x4,int y4,int w4,int h4)941 int Tile::GetTransformAllZeroContext(const Block& block, Plane plane,
942 TransformSize tx_size, int x4, int y4,
943 int w4, int h4) {
944 const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
945 const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
946
947 const int tx_width = kTransformWidth[tx_size];
948 const int tx_height = kTransformHeight[tx_size];
949 const BlockSize plane_size = block.residual_size[plane];
950 const int block_width = kBlockWidthPixels[plane_size];
951 const int block_height = kBlockHeightPixels[plane_size];
952
953 int top = 0;
954 int left = 0;
955 const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
956 const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
957 if (plane == kPlaneY) {
958 if (block_width == tx_width && block_height == tx_height) return 0;
959 const uint8_t* coefficient_levels =
960 &coefficient_levels_[kEntropyContextTop][plane][x4];
961 for (int i = 0; i < num_top_elements; ++i) {
962 top = std::max(top, static_cast<int>(coefficient_levels[i]));
963 }
964 coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
965 for (int i = 0; i < num_left_elements; ++i) {
966 left = std::max(left, static_cast<int>(coefficient_levels[i]));
967 }
968 assert(top <= 4);
969 assert(left <= 4);
970 // kAllZeroContextsByTopLeft is pre-computed based on the logic in the spec
971 // for top and left.
972 return kAllZeroContextsByTopLeft[top][left];
973 }
974 const uint8_t* coefficient_levels =
975 &coefficient_levels_[kEntropyContextTop][plane][x4];
976 const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
977 for (int i = 0; i < num_top_elements; ++i) {
978 top |= coefficient_levels[i];
979 top |= dc_categories[i];
980 }
981 coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
982 dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
983 for (int i = 0; i < num_left_elements; ++i) {
984 left |= coefficient_levels[i];
985 left |= dc_categories[i];
986 }
987 return static_cast<int>(top != 0) + static_cast<int>(left != 0) + 7 +
988 3 * static_cast<int>(block_width * block_height >
989 tx_width * tx_height);
990 }
991
GetTransformSet(TransformSize tx_size,bool is_inter) const992 TransformSet Tile::GetTransformSet(TransformSize tx_size, bool is_inter) const {
993 const TransformSize tx_size_square_min = kTransformSizeSquareMin[tx_size];
994 const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
995 if (tx_size_square_max == kTransformSize64x64) return kTransformSetDctOnly;
996 if (is_inter) {
997 if (frame_header_.reduced_tx_set ||
998 tx_size_square_max == kTransformSize32x32) {
999 return kTransformSetInter3;
1000 }
1001 if (tx_size_square_min == kTransformSize16x16) return kTransformSetInter2;
1002 return kTransformSetInter1;
1003 }
1004 if (tx_size_square_max == kTransformSize32x32) return kTransformSetDctOnly;
1005 if (frame_header_.reduced_tx_set ||
1006 tx_size_square_min == kTransformSize16x16) {
1007 return kTransformSetIntra2;
1008 }
1009 return kTransformSetIntra1;
1010 }
1011
ComputeTransformType(const Block & block,Plane plane,TransformSize tx_size,int block_x,int block_y)1012 TransformType Tile::ComputeTransformType(const Block& block, Plane plane,
1013 TransformSize tx_size, int block_x,
1014 int block_y) {
1015 const BlockParameters& bp = *block.bp;
1016 const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
1017 if (frame_header_.segmentation
1018 .lossless[bp.prediction_parameters->segment_id] ||
1019 tx_size_square_max == kTransformSize64x64) {
1020 return kTransformTypeDctDct;
1021 }
1022 if (plane == kPlaneY) {
1023 return transform_types_[block_y - block.row4x4][block_x - block.column4x4];
1024 }
1025 const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1026 TransformType tx_type;
1027 if (bp.is_inter) {
1028 const int x4 =
1029 std::max(block.column4x4, block_x << subsampling_x_[kPlaneU]);
1030 const int y4 = std::max(block.row4x4, block_y << subsampling_y_[kPlaneU]);
1031 tx_type = transform_types_[y4 - block.row4x4][x4 - block.column4x4];
1032 } else {
1033 tx_type = kModeToTransformType[bp.prediction_parameters->uv_mode];
1034 }
1035 return kTransformTypeInSetMask[tx_set].Contains(tx_type)
1036 ? tx_type
1037 : kTransformTypeDctDct;
1038 }
1039
ReadTransformType(const Block & block,int x4,int y4,TransformSize tx_size)1040 void Tile::ReadTransformType(const Block& block, int x4, int y4,
1041 TransformSize tx_size) {
1042 BlockParameters& bp = *block.bp;
1043 const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1044
1045 TransformType tx_type = kTransformTypeDctDct;
1046 if (tx_set != kTransformSetDctOnly &&
1047 frame_header_.segmentation.qindex[bp.prediction_parameters->segment_id] >
1048 0) {
1049 const int cdf_index = SymbolDecoderContext::TxTypeIndex(tx_set);
1050 const int cdf_tx_size_index =
1051 TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[tx_size]);
1052 uint16_t* cdf;
1053 if (bp.is_inter) {
1054 cdf = symbol_decoder_context_
1055 .inter_tx_type_cdf[cdf_index][cdf_tx_size_index];
1056 switch (tx_set) {
1057 case kTransformSetInter1:
1058 tx_type = static_cast<TransformType>(reader_.ReadSymbol<16>(cdf));
1059 break;
1060 case kTransformSetInter2:
1061 tx_type = static_cast<TransformType>(reader_.ReadSymbol<12>(cdf));
1062 break;
1063 default:
1064 assert(tx_set == kTransformSetInter3);
1065 tx_type = static_cast<TransformType>(reader_.ReadSymbol(cdf));
1066 break;
1067 }
1068 } else {
1069 const PredictionMode intra_direction =
1070 block.bp->prediction_parameters->use_filter_intra
1071 ? kFilterIntraModeToIntraPredictor[block.bp->prediction_parameters
1072 ->filter_intra_mode]
1073 : bp.y_mode;
1074 cdf =
1075 symbol_decoder_context_
1076 .intra_tx_type_cdf[cdf_index][cdf_tx_size_index][intra_direction];
1077 assert(tx_set == kTransformSetIntra1 || tx_set == kTransformSetIntra2);
1078 tx_type = static_cast<TransformType>((tx_set == kTransformSetIntra1)
1079 ? reader_.ReadSymbol<7>(cdf)
1080 : reader_.ReadSymbol<5>(cdf));
1081 }
1082
1083 // This array does not contain an entry for kTransformSetDctOnly, so the
1084 // first dimension needs to be offset by 1.
1085 tx_type = kInverseTransformTypeBySet[tx_set - 1][tx_type];
1086 }
1087 SetTransformType(block, x4, y4, kTransformWidth4x4[tx_size],
1088 kTransformHeight4x4[tx_size], tx_type, transform_types_);
1089 }
1090
1091 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1092 // Bottom boundary checks are avoided by the padded rows.
1093 // For a coefficient near the right boundary, the two right neighbors and the
1094 // one bottom-right neighbor may be out of boundary. We don't check the right
1095 // boundary for them, because the out of boundary neighbors project to positions
1096 // above the diagonal line which goes through the current coefficient and these
1097 // positions are still all 0s according to the diagonal scan order.
1098 template <typename ResidualType>
ReadCoeffBase2D(const uint16_t * scan,TransformSize tx_size,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts][kCoeffBaseRangeSymbolCount+1],ResidualType * const quantized_buffer,uint8_t * const level_buffer)1099 void Tile::ReadCoeffBase2D(
1100 const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2,
1101 int eob,
1102 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1103 uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1104 [kCoeffBaseRangeSymbolCount + 1],
1105 ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
1106 const int tx_width = 1 << adjusted_tx_width_log2;
1107 for (int i = eob - 2; i >= 1; --i) {
1108 const uint16_t pos = scan[i];
1109 const int row = pos >> adjusted_tx_width_log2;
1110 const int column = pos & (tx_width - 1);
1111 auto* const quantized = &quantized_buffer[pos];
1112 auto* const levels = &level_buffer[pos];
1113 const int neighbor_sum = 1 + levels[1] + levels[tx_width] +
1114 levels[tx_width + 1] + levels[2] +
1115 levels[MultiplyBy2(tx_width)];
1116 const int context =
1117 ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
1118 kCoeffBaseContextOffset[tx_size][std::min(row, 4)][std::min(column, 4)];
1119 int level =
1120 reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1121 levels[0] = level;
1122 if (level > kNumQuantizerBaseLevels) {
1123 // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1124 // + 1, because we clip the overall output to 6 and the unclipped
1125 // quantized values will always result in an output of greater than 6.
1126 int context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1}
1127 quantized[tx_width] + // {1, 0}
1128 quantized[tx_width + 1])); // {1, 1}
1129 context += 14 >> static_cast<int>((row | column) < 2);
1130 level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1131 }
1132 quantized[0] = level;
1133 }
1134 // Read position 0.
1135 {
1136 auto* const quantized = &quantized_buffer[0];
1137 int level = reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[0]);
1138 level_buffer[0] = level;
1139 if (level > kNumQuantizerBaseLevels) {
1140 // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1141 // + 1, because we clip the overall output to 6 and the unclipped
1142 // quantized values will always result in an output of greater than 6.
1143 const int context =
1144 std::min(6, DivideBy2(1 + quantized[1] + // {0, 1}
1145 quantized[tx_width] + // {1, 0}
1146 quantized[tx_width + 1])); // {1, 1}
1147 level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1148 }
1149 quantized[0] = level;
1150 }
1151 }
1152
1153 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1154 // Bottom boundary checks are avoided by the padded rows.
1155 // For a coefficient near the right boundary, the four right neighbors may be
1156 // out of boundary. We don't do the boundary check for the first three right
1157 // neighbors, because even for the transform blocks with smallest width 4, the
1158 // first three out of boundary neighbors project to positions left of the
1159 // current coefficient and these positions are still all 0s according to the
1160 // column scan order. However, when transform block width is 4 and the current
1161 // coefficient is on the right boundary, its fourth right neighbor projects to
1162 // the under position on the same column, which could be nonzero. Therefore, we
1163 // must skip the fourth right neighbor. To make it simple, for any coefficient,
1164 // we always do the boundary check for its fourth right neighbor.
1165 template <typename ResidualType>
ReadCoeffBaseHorizontal(const uint16_t * scan,TransformSize,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts][kCoeffBaseRangeSymbolCount+1],ResidualType * const quantized_buffer,uint8_t * const level_buffer)1166 void Tile::ReadCoeffBaseHorizontal(
1167 const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2,
1168 int eob,
1169 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1170 uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1171 [kCoeffBaseRangeSymbolCount + 1],
1172 ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
1173 const int tx_width = 1 << adjusted_tx_width_log2;
1174 int i = eob - 2;
1175 do {
1176 const uint16_t pos = scan[i];
1177 const int column = pos & (tx_width - 1);
1178 auto* const quantized = &quantized_buffer[pos];
1179 auto* const levels = &level_buffer[pos];
1180 const int neighbor_sum =
1181 1 + (levels[1] + // {0, 1}
1182 levels[tx_width] + // {1, 0}
1183 levels[2] + // {0, 2}
1184 levels[3] + // {0, 3}
1185 ((column + 4 < tx_width) ? levels[4] : 0)); // {0, 4}
1186 const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
1187 kCoeffBasePositionContextOffset[column];
1188 int level =
1189 reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1190 levels[0] = level;
1191 if (level > kNumQuantizerBaseLevels) {
1192 // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1193 // + 1, because we clip the overall output to 6 and the unclipped
1194 // quantized values will always result in an output of greater than 6.
1195 int context = std::min(6, DivideBy2(1 + quantized[1] + // {0, 1}
1196 quantized[tx_width] + // {1, 0}
1197 quantized[2])); // {0, 2}
1198 if (pos != 0) {
1199 context += 14 >> static_cast<int>(column == 0);
1200 }
1201 level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1202 }
1203 quantized[0] = level;
1204 } while (--i >= 0);
1205 }
1206
1207 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1208 // Bottom boundary checks are avoided by the padded rows.
1209 // Right boundary check is performed explicitly.
1210 template <typename ResidualType>
ReadCoeffBaseVertical(const uint16_t * scan,TransformSize,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts][kCoeffBaseRangeSymbolCount+1],ResidualType * const quantized_buffer,uint8_t * const level_buffer)1211 void Tile::ReadCoeffBaseVertical(
1212 const uint16_t* scan, TransformSize /*tx_size*/, int adjusted_tx_width_log2,
1213 int eob,
1214 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1215 uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1216 [kCoeffBaseRangeSymbolCount + 1],
1217 ResidualType* const quantized_buffer, uint8_t* const level_buffer) {
1218 const int tx_width = 1 << adjusted_tx_width_log2;
1219 int i = eob - 2;
1220 do {
1221 const uint16_t pos = scan[i];
1222 const int row = pos >> adjusted_tx_width_log2;
1223 const int column = pos & (tx_width - 1);
1224 auto* const quantized = &quantized_buffer[pos];
1225 auto* const levels = &level_buffer[pos];
1226 const int neighbor_sum =
1227 1 + (((column + 1 < tx_width) ? levels[1] : 0) + // {0, 1}
1228 levels[tx_width] + // {1, 0}
1229 levels[MultiplyBy2(tx_width)] + // {2, 0}
1230 levels[tx_width * 3] + // {3, 0}
1231 levels[MultiplyBy4(tx_width)]); // {4, 0}
1232 const int context = ((neighbor_sum > 7) ? 4 : DivideBy2(neighbor_sum)) +
1233 kCoeffBasePositionContextOffset[row];
1234 int level =
1235 reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1236 levels[0] = level;
1237 if (level > kNumQuantizerBaseLevels) {
1238 // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1239 // + 1, because we clip the overall output to 6 and the unclipped
1240 // quantized values will always result in an output of greater than 6.
1241 const int quantized_column1 = (column + 1 < tx_width) ? quantized[1] : 0;
1242 int context =
1243 std::min(6, DivideBy2(1 + quantized_column1 + // {0, 1}
1244 quantized[tx_width] + // {1, 0}
1245 quantized[MultiplyBy2(tx_width)])); // {2, 0}
1246 if (pos != 0) {
1247 context += 14 >> static_cast<int>(row == 0);
1248 }
1249 level += ReadCoeffBaseRange(coeff_base_range_cdf[context]);
1250 }
1251 quantized[0] = level;
1252 } while (--i >= 0);
1253 }
1254
GetDcSignContext(int x4,int y4,int w4,int h4,Plane plane)1255 int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) {
1256 const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1257 const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
1258 // Set dc_sign to 8-bit long so that std::accumulate() saves sign extension.
1259 int8_t dc_sign = std::accumulate(
1260 dc_categories, dc_categories + GetNumElements(w4, x4, max_x4x4), 0);
1261 const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1262 dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
1263 dc_sign = std::accumulate(
1264 dc_categories, dc_categories + GetNumElements(h4, y4, max_y4x4), dc_sign);
1265 // This return statement is equivalent to:
1266 // if (dc_sign < 0) return 1;
1267 // if (dc_sign > 0) return 2;
1268 // return 0;
1269 // And it is better than:
1270 // return static_cast<int>(dc_sign != 0) + static_cast<int>(dc_sign > 0);
1271 return static_cast<int>(dc_sign < 0) +
1272 MultiplyBy2(static_cast<int>(dc_sign > 0));
1273 }
1274
SetEntropyContexts(int x4,int y4,int w4,int h4,Plane plane,uint8_t coefficient_level,int8_t dc_category)1275 void Tile::SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane,
1276 uint8_t coefficient_level, int8_t dc_category) {
1277 const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1278 const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
1279 memset(&coefficient_levels_[kEntropyContextTop][plane][x4], coefficient_level,
1280 num_top_elements);
1281 memset(&dc_categories_[kEntropyContextTop][plane][x4], dc_category,
1282 num_top_elements);
1283 const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1284 const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
1285 memset(&coefficient_levels_[kEntropyContextLeft][plane][y4],
1286 coefficient_level, num_left_elements);
1287 memset(&dc_categories_[kEntropyContextLeft][plane][y4], dc_category,
1288 num_left_elements);
1289 }
1290
1291 template <typename ResidualType, bool is_dc_coefficient>
ReadSignAndApplyDequantization(const uint16_t * const scan,int i,int q_value,const uint8_t * const quantizer_matrix,int shift,int max_value,uint16_t * const dc_sign_cdf,int8_t * const dc_category,int * const coefficient_level,ResidualType * residual_buffer)1292 bool Tile::ReadSignAndApplyDequantization(
1293 const uint16_t* const scan, int i, int q_value,
1294 const uint8_t* const quantizer_matrix, int shift, int max_value,
1295 uint16_t* const dc_sign_cdf, int8_t* const dc_category,
1296 int* const coefficient_level, ResidualType* residual_buffer) {
1297 const int pos = is_dc_coefficient ? 0 : scan[i];
1298 // If residual_buffer[pos] is zero, then the rest of the function has no
1299 // effect.
1300 int level = residual_buffer[pos];
1301 if (level == 0) return true;
1302 const int sign = is_dc_coefficient
1303 ? static_cast<int>(reader_.ReadSymbol(dc_sign_cdf))
1304 : reader_.ReadBit();
1305 if (level > kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) {
1306 int length = 0;
1307 bool golomb_length_bit = false;
1308 do {
1309 golomb_length_bit = reader_.ReadBit() != 0;
1310 ++length;
1311 if (length > 20) {
1312 LIBGAV1_DLOG(ERROR, "Invalid golomb_length %d", length);
1313 return false;
1314 }
1315 } while (!golomb_length_bit);
1316 int x = 1;
1317 for (int i = length - 2; i >= 0; --i) {
1318 x = (x << 1) | reader_.ReadBit();
1319 }
1320 level += x - 1;
1321 }
1322 if (is_dc_coefficient) {
1323 *dc_category = (sign != 0) ? -1 : 1;
1324 }
1325 level &= 0xfffff;
1326 *coefficient_level += level;
1327 // Apply dequantization. Step 1 of section 7.12.3 in the spec.
1328 int q = q_value;
1329 if (quantizer_matrix != nullptr) {
1330 q = RightShiftWithRounding(q * quantizer_matrix[pos], 5);
1331 }
1332 // The intermediate multiplication can exceed 32 bits, so it has to be
1333 // performed by promoting one of the values to int64_t.
1334 int32_t dequantized_value = (static_cast<int64_t>(q) * level) & 0xffffff;
1335 dequantized_value >>= shift;
1336 // At this point:
1337 // * |dequantized_value| is always non-negative.
1338 // * |sign| can be either 0 or 1.
1339 // * min_value = -(max_value + 1).
1340 // We need to apply the following:
1341 // dequantized_value = sign ? -dequantized_value : dequantized_value;
1342 // dequantized_value = Clip3(dequantized_value, min_value, max_value);
1343 //
1344 // Note that -x == ~(x - 1).
1345 //
1346 // Now, The above two lines can be done with a std::min and xor as follows:
1347 dequantized_value = std::min(dequantized_value - sign, max_value) ^ -sign;
1348 residual_buffer[pos] = dequantized_value;
1349 return true;
1350 }
1351
ReadCoeffBaseRange(uint16_t * cdf)1352 int Tile::ReadCoeffBaseRange(uint16_t* cdf) {
1353 int level = 0;
1354 for (int j = 0; j < kCoeffBaseRangeMaxIterations; ++j) {
1355 const int coeff_base_range =
1356 reader_.ReadSymbol<kCoeffBaseRangeSymbolCount>(cdf);
1357 level += coeff_base_range;
1358 if (coeff_base_range < (kCoeffBaseRangeSymbolCount - 1)) break;
1359 }
1360 return level;
1361 }
1362
1363 template <typename ResidualType>
ReadTransformCoefficients(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType * const tx_type)1364 int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
1365 int start_x, int start_y,
1366 TransformSize tx_size,
1367 TransformType* const tx_type) {
1368 const int x4 = DivideBy4(start_x);
1369 const int y4 = DivideBy4(start_y);
1370 const int w4 = kTransformWidth4x4[tx_size];
1371 const int h4 = kTransformHeight4x4[tx_size];
1372 const int tx_size_context = kTransformSizeContext[tx_size];
1373 int context =
1374 GetTransformAllZeroContext(block, plane, tx_size, x4, y4, w4, h4);
1375 const bool all_zero = reader_.ReadSymbol(
1376 symbol_decoder_context_.all_zero_cdf[tx_size_context][context]);
1377 if (all_zero) {
1378 if (plane == kPlaneY) {
1379 SetTransformType(block, x4, y4, w4, h4, kTransformTypeDctDct,
1380 transform_types_);
1381 }
1382 SetEntropyContexts(x4, y4, w4, h4, plane, 0, 0);
1383 // This is not used in this case, so it can be set to any value.
1384 *tx_type = kNumTransformTypes;
1385 return 0;
1386 }
1387 const int tx_width = kTransformWidth[tx_size];
1388 const int tx_height = kTransformHeight[tx_size];
1389 const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
1390 const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
1391 const int tx_padding =
1392 (1 << adjusted_tx_width_log2) * kResidualPaddingVertical;
1393 auto* residual = reinterpret_cast<ResidualType*>(*block.residual);
1394 // Clear padding to avoid bottom boundary checks when parsing quantized
1395 // coefficients.
1396 memset(residual, 0, (tx_width * tx_height + tx_padding) * residual_size_);
1397 uint8_t level_buffer[(32 + kResidualPaddingVertical) * 32];
1398 memset(
1399 level_buffer, 0,
1400 kTransformWidth[adjusted_tx_size] * kTransformHeight[adjusted_tx_size] +
1401 tx_padding);
1402 const int clamped_tx_height = std::min(tx_height, 32);
1403 if (plane == kPlaneY) {
1404 ReadTransformType(block, x4, y4, tx_size);
1405 }
1406 BlockParameters& bp = *block.bp;
1407 *tx_type = ComputeTransformType(block, plane, tx_size, x4, y4);
1408 const int eob_multi_size = kEobMultiSizeLookup[tx_size];
1409 const PlaneType plane_type = GetPlaneType(plane);
1410 const TransformClass tx_class = GetTransformClass(*tx_type);
1411 context = static_cast<int>(tx_class != kTransformClass2D);
1412 int eob_pt = 1;
1413 switch (eob_multi_size) {
1414 case 0:
1415 eob_pt += reader_.ReadSymbol<kEobPt16SymbolCount>(
1416 symbol_decoder_context_.eob_pt_16_cdf[plane_type][context]);
1417 break;
1418 case 1:
1419 eob_pt += reader_.ReadSymbol<kEobPt32SymbolCount>(
1420 symbol_decoder_context_.eob_pt_32_cdf[plane_type][context]);
1421 break;
1422 case 2:
1423 eob_pt += reader_.ReadSymbol<kEobPt64SymbolCount>(
1424 symbol_decoder_context_.eob_pt_64_cdf[plane_type][context]);
1425 break;
1426 case 3:
1427 eob_pt += reader_.ReadSymbol<kEobPt128SymbolCount>(
1428 symbol_decoder_context_.eob_pt_128_cdf[plane_type][context]);
1429 break;
1430 case 4:
1431 eob_pt += reader_.ReadSymbol<kEobPt256SymbolCount>(
1432 symbol_decoder_context_.eob_pt_256_cdf[plane_type][context]);
1433 break;
1434 case 5:
1435 eob_pt += reader_.ReadSymbol<kEobPt512SymbolCount>(
1436 symbol_decoder_context_.eob_pt_512_cdf[plane_type]);
1437 break;
1438 case 6:
1439 default:
1440 eob_pt += reader_.ReadSymbol<kEobPt1024SymbolCount>(
1441 symbol_decoder_context_.eob_pt_1024_cdf[plane_type]);
1442 break;
1443 }
1444 int eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1);
1445 if (eob_pt >= 3) {
1446 context = eob_pt - 3;
1447 const bool eob_extra = reader_.ReadSymbol(
1448 symbol_decoder_context_
1449 .eob_extra_cdf[tx_size_context][plane_type][context]);
1450 if (eob_extra) eob += 1 << (eob_pt - 3);
1451 for (int i = 1; i < eob_pt - 2; ++i) {
1452 assert(eob_pt - i >= 3);
1453 assert(eob_pt <= kEobPt1024SymbolCount);
1454 if (reader_.ReadBit() != 0) {
1455 eob += 1 << (eob_pt - i - 3);
1456 }
1457 }
1458 }
1459 const uint16_t* scan = kScan[tx_class][tx_size];
1460 const int clamped_tx_size_context = std::min(tx_size_context, 3);
1461 auto coeff_base_range_cdf =
1462 symbol_decoder_context_
1463 .coeff_base_range_cdf[clamped_tx_size_context][plane_type];
1464 // Read the last coefficient.
1465 {
1466 context = GetCoeffBaseContextEob(tx_size, eob - 1);
1467 const uint16_t pos = scan[eob - 1];
1468 int level =
1469 1 + reader_.ReadSymbol<kCoeffBaseEobSymbolCount>(
1470 symbol_decoder_context_
1471 .coeff_base_eob_cdf[tx_size_context][plane_type][context]);
1472 level_buffer[pos] = level;
1473 if (level > kNumQuantizerBaseLevels) {
1474 level +=
1475 ReadCoeffBaseRange(coeff_base_range_cdf[GetCoeffBaseRangeContextEob(
1476 adjusted_tx_width_log2, pos, tx_class)]);
1477 }
1478 residual[pos] = level;
1479 }
1480 if (eob > 1) {
1481 // Read all the other coefficients.
1482 // Lookup used to call the right variant of ReadCoeffBase*() based on the
1483 // transform class.
1484 static constexpr void (Tile::*kGetCoeffBaseFunc[])(
1485 const uint16_t* scan, TransformSize tx_size, int adjusted_tx_width_log2,
1486 int eob,
1487 uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1488 uint16_t coeff_base_range_cdf[kCoeffBaseRangeContexts]
1489 [kCoeffBaseRangeSymbolCount + 1],
1490 ResidualType* quantized_buffer,
1491 uint8_t* level_buffer) = {&Tile::ReadCoeffBase2D<ResidualType>,
1492 &Tile::ReadCoeffBaseHorizontal<ResidualType>,
1493 &Tile::ReadCoeffBaseVertical<ResidualType>};
1494 (this->*kGetCoeffBaseFunc[tx_class])(
1495 scan, tx_size, adjusted_tx_width_log2, eob,
1496 symbol_decoder_context_.coeff_base_cdf[tx_size_context][plane_type],
1497 coeff_base_range_cdf, residual, level_buffer);
1498 }
1499 const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1;
1500 const int current_quantizer_index =
1501 GetQIndex(frame_header_.segmentation,
1502 bp.prediction_parameters->segment_id, current_quantizer_index_);
1503 const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index);
1504 const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index);
1505 const int shift = kQuantizationShift[tx_size];
1506 const uint8_t* const quantizer_matrix =
1507 (frame_header_.quantizer.use_matrix &&
1508 *tx_type < kTransformTypeIdentityIdentity &&
1509 !frame_header_.segmentation
1510 .lossless[bp.prediction_parameters->segment_id] &&
1511 frame_header_.quantizer.matrix_level[plane] < 15)
1512 ? quantizer_matrix_[frame_header_.quantizer.matrix_level[plane]]
1513 [plane_type][adjusted_tx_size]
1514 .get()
1515 : nullptr;
1516 int coefficient_level = 0;
1517 int8_t dc_category = 0;
1518 uint16_t* const dc_sign_cdf =
1519 (residual[0] != 0)
1520 ? symbol_decoder_context_.dc_sign_cdf[plane_type][GetDcSignContext(
1521 x4, y4, w4, h4, plane)]
1522 : nullptr;
1523 assert(scan[0] == 0);
1524 if (!ReadSignAndApplyDequantization<ResidualType, /*is_dc_coefficient=*/true>(
1525 scan, 0, dc_q_value, quantizer_matrix, shift, max_value, dc_sign_cdf,
1526 &dc_category, &coefficient_level, residual)) {
1527 return -1;
1528 }
1529 if (eob > 1) {
1530 int i = 1;
1531 do {
1532 if (!ReadSignAndApplyDequantization<ResidualType,
1533 /*is_dc_coefficient=*/false>(
1534 scan, i, ac_q_value, quantizer_matrix, shift, max_value, nullptr,
1535 nullptr, &coefficient_level, residual)) {
1536 return -1;
1537 }
1538 } while (++i < eob);
1539 MoveCoefficientsForTxWidth64(clamped_tx_height, tx_width, residual);
1540 }
1541 SetEntropyContexts(x4, y4, w4, h4, plane, std::min(4, coefficient_level),
1542 dc_category);
1543 if (split_parse_and_decode_) {
1544 *block.residual += tx_width * tx_height * residual_size_;
1545 }
1546 return eob;
1547 }
1548
1549 // CALL_BITDEPTH_FUNCTION is a macro that calls the appropriate template
1550 // |function| depending on the value of |sequence_header_.color_config.bitdepth|
1551 // with the variadic arguments.
1552 #if LIBGAV1_MAX_BITDEPTH >= 10
1553 #define CALL_BITDEPTH_FUNCTION(function, ...) \
1554 do { \
1555 if (sequence_header_.color_config.bitdepth > 8) { \
1556 function<uint16_t>(__VA_ARGS__); \
1557 } else { \
1558 function<uint8_t>(__VA_ARGS__); \
1559 } \
1560 } while (false)
1561 #else
1562 #define CALL_BITDEPTH_FUNCTION(function, ...) \
1563 do { \
1564 function<uint8_t>(__VA_ARGS__); \
1565 } while (false)
1566 #endif
1567
TransformBlock(const Block & block,Plane plane,int base_x,int base_y,TransformSize tx_size,int x,int y,ProcessingMode mode)1568 bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
1569 int base_y, TransformSize tx_size, int x, int y,
1570 ProcessingMode mode) {
1571 BlockParameters& bp = *block.bp;
1572 const int subsampling_x = subsampling_x_[plane];
1573 const int subsampling_y = subsampling_y_[plane];
1574 const int start_x = base_x + MultiplyBy4(x);
1575 const int start_y = base_y + MultiplyBy4(y);
1576 const int max_x = MultiplyBy4(frame_header_.columns4x4) >> subsampling_x;
1577 const int max_y = MultiplyBy4(frame_header_.rows4x4) >> subsampling_y;
1578 if (start_x >= max_x || start_y >= max_y) return true;
1579 const int row = DivideBy4(start_y << subsampling_y);
1580 const int column = DivideBy4(start_x << subsampling_x);
1581 const int mask = sequence_header_.use_128x128_superblock ? 31 : 15;
1582 const int sub_block_row4x4 = row & mask;
1583 const int sub_block_column4x4 = column & mask;
1584 const int step_x = kTransformWidth4x4[tx_size];
1585 const int step_y = kTransformHeight4x4[tx_size];
1586 const bool do_decode = mode == kProcessingModeDecodeOnly ||
1587 mode == kProcessingModeParseAndDecode;
1588 if (do_decode && !bp.is_inter) {
1589 if (bp.prediction_parameters->palette_mode_info.size[GetPlaneType(plane)] >
1590 0) {
1591 CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y,
1592 x, y, tx_size);
1593 } else {
1594 const PredictionMode mode =
1595 (plane == kPlaneY) ? bp.y_mode
1596 : (bp.prediction_parameters->uv_mode ==
1597 kPredictionModeChromaFromLuma
1598 ? kPredictionModeDc
1599 : bp.prediction_parameters->uv_mode);
1600 const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y);
1601 const int tr_column4x4 =
1602 (sub_block_column4x4 >> subsampling_x) + step_x + 1;
1603 const int bl_row4x4 = (sub_block_row4x4 >> subsampling_y) + step_y + 1;
1604 const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x);
1605 const bool has_left = x > 0 || block.left_available[plane];
1606 const bool has_top = y > 0 || block.top_available[plane];
1607
1608 CALL_BITDEPTH_FUNCTION(
1609 IntraPrediction, block, plane, start_x, start_y, has_left, has_top,
1610 block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
1611 block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
1612 mode, tx_size);
1613 if (plane != kPlaneY &&
1614 bp.prediction_parameters->uv_mode == kPredictionModeChromaFromLuma) {
1615 CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x,
1616 start_y, tx_size);
1617 }
1618 }
1619 if (plane == kPlaneY) {
1620 block.bp->prediction_parameters->max_luma_width =
1621 start_x + MultiplyBy4(step_x);
1622 block.bp->prediction_parameters->max_luma_height =
1623 start_y + MultiplyBy4(step_y);
1624 block.scratch_buffer->cfl_luma_buffer_valid = false;
1625 }
1626 }
1627 if (!bp.skip) {
1628 const int sb_row_index = SuperBlockRowIndex(block.row4x4);
1629 const int sb_column_index = SuperBlockColumnIndex(block.column4x4);
1630 if (mode == kProcessingModeDecodeOnly) {
1631 Queue<TransformParameters>& tx_params =
1632 *residual_buffer_threaded_[sb_row_index][sb_column_index]
1633 ->transform_parameters();
1634 ReconstructBlock(block, plane, start_x, start_y, tx_size,
1635 tx_params.Front().type,
1636 tx_params.Front().non_zero_coeff_count);
1637 tx_params.Pop();
1638 } else {
1639 TransformType tx_type;
1640 int non_zero_coeff_count;
1641 #if LIBGAV1_MAX_BITDEPTH >= 10
1642 if (sequence_header_.color_config.bitdepth > 8) {
1643 non_zero_coeff_count = ReadTransformCoefficients<int32_t>(
1644 block, plane, start_x, start_y, tx_size, &tx_type);
1645 } else // NOLINT
1646 #endif
1647 {
1648 non_zero_coeff_count = ReadTransformCoefficients<int16_t>(
1649 block, plane, start_x, start_y, tx_size, &tx_type);
1650 }
1651 if (non_zero_coeff_count < 0) return false;
1652 if (mode == kProcessingModeParseAndDecode) {
1653 ReconstructBlock(block, plane, start_x, start_y, tx_size, tx_type,
1654 non_zero_coeff_count);
1655 } else {
1656 assert(mode == kProcessingModeParseOnly);
1657 residual_buffer_threaded_[sb_row_index][sb_column_index]
1658 ->transform_parameters()
1659 ->Push(TransformParameters(tx_type, non_zero_coeff_count));
1660 }
1661 }
1662 }
1663 if (do_decode) {
1664 bool* block_decoded =
1665 &block.scratch_buffer
1666 ->block_decoded[plane][(sub_block_row4x4 >> subsampling_y) + 1]
1667 [(sub_block_column4x4 >> subsampling_x) + 1];
1668 SetBlockValues<bool>(step_y, step_x, true, block_decoded,
1669 TileScratchBuffer::kBlockDecodedStride);
1670 }
1671 return true;
1672 }
1673
TransformTree(const Block & block,int start_x,int start_y,BlockSize plane_size,ProcessingMode mode)1674 bool Tile::TransformTree(const Block& block, int start_x, int start_y,
1675 BlockSize plane_size, ProcessingMode mode) {
1676 assert(plane_size <= kBlock64x64);
1677 // Branching factor is 4; Maximum Depth is 4; So the maximum stack size
1678 // required is (4 - 1) * 4 + 1 = 13.
1679 Stack<TransformTreeNode, 13> stack;
1680 // It is okay to cast BlockSize to TransformSize here since the enum are
1681 // equivalent for all BlockSize values <= kBlock64x64.
1682 stack.Push(TransformTreeNode(start_x, start_y,
1683 static_cast<TransformSize>(plane_size)));
1684
1685 do {
1686 TransformTreeNode node = stack.Pop();
1687 const int row = DivideBy4(node.y);
1688 const int column = DivideBy4(node.x);
1689 if (row >= frame_header_.rows4x4 || column >= frame_header_.columns4x4) {
1690 continue;
1691 }
1692 const TransformSize inter_tx_size = inter_transform_sizes_[row][column];
1693 const int width = kTransformWidth[node.tx_size];
1694 const int height = kTransformHeight[node.tx_size];
1695 if (width <= kTransformWidth[inter_tx_size] &&
1696 height <= kTransformHeight[inter_tx_size]) {
1697 if (!TransformBlock(block, kPlaneY, node.x, node.y, node.tx_size, 0, 0,
1698 mode)) {
1699 return false;
1700 }
1701 continue;
1702 }
1703 // The split transform size look up gives the right transform size that we
1704 // should push in the stack.
1705 // if (width > height) => transform size whose width is half.
1706 // if (width < height) => transform size whose height is half.
1707 // if (width == height) => transform size whose width and height are half.
1708 const TransformSize split_tx_size = kSplitTransformSize[node.tx_size];
1709 const int half_width = DivideBy2(width);
1710 if (width > height) {
1711 stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1712 stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1713 continue;
1714 }
1715 const int half_height = DivideBy2(height);
1716 if (width < height) {
1717 stack.Push(
1718 TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1719 stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1720 continue;
1721 }
1722 stack.Push(TransformTreeNode(node.x + half_width, node.y + half_height,
1723 split_tx_size));
1724 stack.Push(TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1725 stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1726 stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1727 } while (!stack.Empty());
1728 return true;
1729 }
1730
ReconstructBlock(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType tx_type,int non_zero_coeff_count)1731 void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
1732 int start_y, TransformSize tx_size,
1733 TransformType tx_type, int non_zero_coeff_count) {
1734 // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec.
1735 assert(non_zero_coeff_count >= 0);
1736 if (non_zero_coeff_count == 0) return;
1737 #if LIBGAV1_MAX_BITDEPTH >= 10
1738 if (sequence_header_.color_config.bitdepth > 8) {
1739 Array2DView<uint16_t> buffer(
1740 buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
1741 reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
1742 Reconstruct(dsp_, tx_type, tx_size,
1743 frame_header_.segmentation
1744 .lossless[block.bp->prediction_parameters->segment_id],
1745 reinterpret_cast<int32_t*>(*block.residual), start_x, start_y,
1746 &buffer, non_zero_coeff_count);
1747 } else // NOLINT
1748 #endif
1749 {
1750 Reconstruct(dsp_, tx_type, tx_size,
1751 frame_header_.segmentation
1752 .lossless[block.bp->prediction_parameters->segment_id],
1753 reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
1754 &buffer_[plane], non_zero_coeff_count);
1755 }
1756 if (split_parse_and_decode_) {
1757 *block.residual +=
1758 kTransformWidth[tx_size] * kTransformHeight[tx_size] * residual_size_;
1759 }
1760 }
1761
Residual(const Block & block,ProcessingMode mode)1762 bool Tile::Residual(const Block& block, ProcessingMode mode) {
1763 const int width_chunks = std::max(1, block.width >> 6);
1764 const int height_chunks = std::max(1, block.height >> 6);
1765 const BlockSize size_chunk4x4 =
1766 (width_chunks > 1 || height_chunks > 1) ? kBlock64x64 : block.size;
1767 const BlockParameters& bp = *block.bp;
1768 for (int chunk_y = 0; chunk_y < height_chunks; ++chunk_y) {
1769 for (int chunk_x = 0; chunk_x < width_chunks; ++chunk_x) {
1770 const int num_planes = block.HasChroma() ? PlaneCount() : 1;
1771 int plane = kPlaneY;
1772 do {
1773 const int subsampling_x = subsampling_x_[plane];
1774 const int subsampling_y = subsampling_y_[plane];
1775 // For Y Plane, when lossless is true |bp.transform_size| is always
1776 // kTransformSize4x4. So we can simply use |bp.transform_size| here as
1777 // the Y plane's transform size (part of Section 5.11.37 in the spec).
1778 const TransformSize tx_size =
1779 (plane == kPlaneY)
1780 ? inter_transform_sizes_[block.row4x4][block.column4x4]
1781 : bp.uv_transform_size;
1782 const BlockSize plane_size =
1783 kPlaneResidualSize[size_chunk4x4][subsampling_x][subsampling_y];
1784 assert(plane_size != kBlockInvalid);
1785 if (bp.is_inter &&
1786 !frame_header_.segmentation
1787 .lossless[bp.prediction_parameters->segment_id] &&
1788 plane == kPlaneY) {
1789 const int row_chunk4x4 = block.row4x4 + MultiplyBy16(chunk_y);
1790 const int column_chunk4x4 = block.column4x4 + MultiplyBy16(chunk_x);
1791 const int base_x = MultiplyBy4(column_chunk4x4 >> subsampling_x);
1792 const int base_y = MultiplyBy4(row_chunk4x4 >> subsampling_y);
1793 if (!TransformTree(block, base_x, base_y, plane_size, mode)) {
1794 return false;
1795 }
1796 } else {
1797 const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
1798 const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
1799 const int step_x = kTransformWidth4x4[tx_size];
1800 const int step_y = kTransformHeight4x4[tx_size];
1801 const int num4x4_wide = kNum4x4BlocksWide[plane_size];
1802 const int num4x4_high = kNum4x4BlocksHigh[plane_size];
1803 for (int y = 0; y < num4x4_high; y += step_y) {
1804 for (int x = 0; x < num4x4_wide; x += step_x) {
1805 if (!TransformBlock(
1806 block, static_cast<Plane>(plane), base_x, base_y, tx_size,
1807 x + (MultiplyBy16(chunk_x) >> subsampling_x),
1808 y + (MultiplyBy16(chunk_y) >> subsampling_y), mode)) {
1809 return false;
1810 }
1811 }
1812 }
1813 }
1814 } while (++plane < num_planes);
1815 }
1816 }
1817 return true;
1818 }
1819
1820 // The purpose of this function is to limit the maximum size of motion vectors
1821 // and also, if use_intra_block_copy is true, to additionally constrain the
1822 // motion vector so that the data is fetched from parts of the tile that have
1823 // already been decoded and are not too close to the current block (in order to
1824 // make a pipelined decoder implementation feasible).
IsMvValid(const Block & block,bool is_compound) const1825 bool Tile::IsMvValid(const Block& block, bool is_compound) const {
1826 const BlockParameters& bp = *block.bp;
1827 for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) {
1828 for (int mv_component : bp.mv.mv[i].mv) {
1829 if (std::abs(mv_component) >= (1 << 14)) {
1830 return false;
1831 }
1832 }
1833 }
1834 if (!block.bp->prediction_parameters->use_intra_block_copy) {
1835 return true;
1836 }
1837 if ((bp.mv.mv[0].mv32 & 0x00070007) != 0) {
1838 return false;
1839 }
1840 const int delta_row = bp.mv.mv[0].mv[0] >> 3;
1841 const int delta_column = bp.mv.mv[0].mv[1] >> 3;
1842 int src_top_edge = MultiplyBy4(block.row4x4) + delta_row;
1843 int src_left_edge = MultiplyBy4(block.column4x4) + delta_column;
1844 const int src_bottom_edge = src_top_edge + block.height;
1845 const int src_right_edge = src_left_edge + block.width;
1846 if (block.HasChroma()) {
1847 if (block.width < 8 && subsampling_x_[kPlaneU] != 0) {
1848 src_left_edge -= 4;
1849 }
1850 if (block.height < 8 && subsampling_y_[kPlaneU] != 0) {
1851 src_top_edge -= 4;
1852 }
1853 }
1854 if (src_top_edge < MultiplyBy4(row4x4_start_) ||
1855 src_left_edge < MultiplyBy4(column4x4_start_) ||
1856 src_bottom_edge > MultiplyBy4(row4x4_end_) ||
1857 src_right_edge > MultiplyBy4(column4x4_end_)) {
1858 return false;
1859 }
1860 // sb_height_log2 = use_128x128_superblock ? log2(128) : log2(64)
1861 const int sb_height_log2 =
1862 6 + static_cast<int>(sequence_header_.use_128x128_superblock);
1863 const int active_sb_row = MultiplyBy4(block.row4x4) >> sb_height_log2;
1864 const int active_64x64_block_column = MultiplyBy4(block.column4x4) >> 6;
1865 const int src_sb_row = (src_bottom_edge - 1) >> sb_height_log2;
1866 const int src_64x64_block_column = (src_right_edge - 1) >> 6;
1867 const int total_64x64_blocks_per_row =
1868 ((column4x4_end_ - column4x4_start_ - 1) >> 4) + 1;
1869 const int active_64x64_block =
1870 active_sb_row * total_64x64_blocks_per_row + active_64x64_block_column;
1871 const int src_64x64_block =
1872 src_sb_row * total_64x64_blocks_per_row + src_64x64_block_column;
1873 if (src_64x64_block >= active_64x64_block - kIntraBlockCopyDelay64x64Blocks) {
1874 return false;
1875 }
1876
1877 // Wavefront constraint: use only top left area of frame for reference.
1878 if (src_sb_row > active_sb_row) return false;
1879 const int gradient =
1880 1 + kIntraBlockCopyDelay64x64Blocks +
1881 static_cast<int>(sequence_header_.use_128x128_superblock);
1882 const int wavefront_offset = gradient * (active_sb_row - src_sb_row);
1883 return src_64x64_block_column < active_64x64_block_column -
1884 kIntraBlockCopyDelay64x64Blocks +
1885 wavefront_offset;
1886 }
1887
AssignInterMv(const Block & block,bool is_compound)1888 bool Tile::AssignInterMv(const Block& block, bool is_compound) {
1889 int min[2];
1890 int max[2];
1891 GetClampParameters(block, min, max);
1892 BlockParameters& bp = *block.bp;
1893 const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1894 bp.mv.mv64 = 0;
1895 if (is_compound) {
1896 for (int i = 0; i < 2; ++i) {
1897 const PredictionMode mode = GetSinglePredictionMode(i, bp.y_mode);
1898 MotionVector predicted_mv;
1899 if (mode == kPredictionModeGlobalMv) {
1900 predicted_mv = prediction_parameters.global_mv[i];
1901 } else {
1902 const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1903 (mode == kPredictionModeNewMv &&
1904 prediction_parameters.ref_mv_count <= 1))
1905 ? 0
1906 : prediction_parameters.ref_mv_index;
1907 predicted_mv = prediction_parameters.reference_mv(ref_mv_index, i);
1908 if (ref_mv_index < prediction_parameters.ref_mv_count) {
1909 predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1910 predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1911 }
1912 }
1913 if (mode == kPredictionModeNewMv) {
1914 ReadMotionVector(block, i);
1915 bp.mv.mv[i].mv[0] += predicted_mv.mv[0];
1916 bp.mv.mv[i].mv[1] += predicted_mv.mv[1];
1917 } else {
1918 bp.mv.mv[i] = predicted_mv;
1919 }
1920 }
1921 } else {
1922 const PredictionMode mode = GetSinglePredictionMode(0, bp.y_mode);
1923 MotionVector predicted_mv;
1924 if (mode == kPredictionModeGlobalMv) {
1925 predicted_mv = prediction_parameters.global_mv[0];
1926 } else {
1927 const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1928 (mode == kPredictionModeNewMv &&
1929 prediction_parameters.ref_mv_count <= 1))
1930 ? 0
1931 : prediction_parameters.ref_mv_index;
1932 predicted_mv = prediction_parameters.reference_mv(ref_mv_index);
1933 if (ref_mv_index < prediction_parameters.ref_mv_count) {
1934 predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1935 predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1936 }
1937 }
1938 if (mode == kPredictionModeNewMv) {
1939 ReadMotionVector(block, 0);
1940 bp.mv.mv[0].mv[0] += predicted_mv.mv[0];
1941 bp.mv.mv[0].mv[1] += predicted_mv.mv[1];
1942 } else {
1943 bp.mv.mv[0] = predicted_mv;
1944 }
1945 }
1946 return IsMvValid(block, is_compound);
1947 }
1948
AssignIntraMv(const Block & block)1949 bool Tile::AssignIntraMv(const Block& block) {
1950 // TODO(linfengz): Check if the clamping process is necessary.
1951 int min[2];
1952 int max[2];
1953 GetClampParameters(block, min, max);
1954 BlockParameters& bp = *block.bp;
1955 const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1956 const MotionVector& ref_mv_0 = prediction_parameters.reference_mv(0);
1957 bp.mv.mv64 = 0;
1958 ReadMotionVector(block, 0);
1959 if (ref_mv_0.mv32 == 0) {
1960 const MotionVector& ref_mv_1 = prediction_parameters.reference_mv(1);
1961 if (ref_mv_1.mv32 == 0) {
1962 const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()];
1963 if (block.row4x4 - super_block_size4x4 < row4x4_start_) {
1964 bp.mv.mv[0].mv[1] -= MultiplyBy32(super_block_size4x4);
1965 bp.mv.mv[0].mv[1] -= MultiplyBy8(kIntraBlockCopyDelayPixels);
1966 } else {
1967 bp.mv.mv[0].mv[0] -= MultiplyBy32(super_block_size4x4);
1968 }
1969 } else {
1970 bp.mv.mv[0].mv[0] += Clip3(ref_mv_1.mv[0], min[0], max[0]);
1971 bp.mv.mv[0].mv[1] += Clip3(ref_mv_1.mv[1], min[0], max[0]);
1972 }
1973 } else {
1974 bp.mv.mv[0].mv[0] += Clip3(ref_mv_0.mv[0], min[0], max[0]);
1975 bp.mv.mv[0].mv[1] += Clip3(ref_mv_0.mv[1], min[1], max[1]);
1976 }
1977 return IsMvValid(block, /*is_compound=*/false);
1978 }
1979
ResetEntropyContext(const Block & block)1980 void Tile::ResetEntropyContext(const Block& block) {
1981 const int num_planes = block.HasChroma() ? PlaneCount() : 1;
1982 int plane = kPlaneY;
1983 do {
1984 const int subsampling_x = subsampling_x_[plane];
1985 const int start_x = block.column4x4 >> subsampling_x;
1986 const int end_x =
1987 std::min((block.column4x4 + block.width4x4) >> subsampling_x,
1988 frame_header_.columns4x4);
1989 memset(&coefficient_levels_[kEntropyContextTop][plane][start_x], 0,
1990 end_x - start_x);
1991 memset(&dc_categories_[kEntropyContextTop][plane][start_x], 0,
1992 end_x - start_x);
1993 const int subsampling_y = subsampling_y_[plane];
1994 const int start_y = block.row4x4 >> subsampling_y;
1995 const int end_y =
1996 std::min((block.row4x4 + block.height4x4) >> subsampling_y,
1997 frame_header_.rows4x4);
1998 memset(&coefficient_levels_[kEntropyContextLeft][plane][start_y], 0,
1999 end_y - start_y);
2000 memset(&dc_categories_[kEntropyContextLeft][plane][start_y], 0,
2001 end_y - start_y);
2002 } while (++plane < num_planes);
2003 }
2004
ComputePrediction(const Block & block)2005 bool Tile::ComputePrediction(const Block& block) {
2006 const BlockParameters& bp = *block.bp;
2007 if (!bp.is_inter) return true;
2008 const int mask =
2009 (1 << (4 + static_cast<int>(sequence_header_.use_128x128_superblock))) -
2010 1;
2011 const int sub_block_row4x4 = block.row4x4 & mask;
2012 const int sub_block_column4x4 = block.column4x4 & mask;
2013 const int plane_count = block.HasChroma() ? PlaneCount() : 1;
2014 // Returns true if this block applies local warping. The state is determined
2015 // in the Y plane and carried for use in the U/V planes.
2016 // But the U/V planes will not apply warping when the block size is smaller
2017 // than 8x8, even if this variable is true.
2018 bool is_local_valid = false;
2019 // Local warping parameters, similar usage as is_local_valid.
2020 GlobalMotion local_warp_params;
2021 int plane = kPlaneY;
2022 do {
2023 const int8_t subsampling_x = subsampling_x_[plane];
2024 const int8_t subsampling_y = subsampling_y_[plane];
2025 const BlockSize plane_size = block.residual_size[plane];
2026 const int block_width4x4 = kNum4x4BlocksWide[plane_size];
2027 const int block_height4x4 = kNum4x4BlocksHigh[plane_size];
2028 const int block_width = MultiplyBy4(block_width4x4);
2029 const int block_height = MultiplyBy4(block_height4x4);
2030 const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
2031 const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
2032 if (bp.reference_frame[1] == kReferenceFrameIntra) {
2033 const int tr_row4x4 = sub_block_row4x4 >> subsampling_y;
2034 const int tr_column4x4 =
2035 (sub_block_column4x4 >> subsampling_x) + block_width4x4 + 1;
2036 const int bl_row4x4 =
2037 (sub_block_row4x4 >> subsampling_y) + block_height4x4;
2038 const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x) + 1;
2039 const TransformSize tx_size =
2040 k4x4SizeToTransformSize[k4x4WidthLog2[plane_size]]
2041 [k4x4HeightLog2[plane_size]];
2042 const bool has_left = block.left_available[plane];
2043 const bool has_top = block.top_available[plane];
2044 CALL_BITDEPTH_FUNCTION(
2045 IntraPrediction, block, static_cast<Plane>(plane), base_x, base_y,
2046 has_left, has_top,
2047 block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
2048 block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
2049 kInterIntraToIntraMode[block.bp->prediction_parameters
2050 ->inter_intra_mode],
2051 tx_size);
2052 }
2053 int candidate_row = block.row4x4;
2054 int candidate_column = block.column4x4;
2055 bool some_use_intra = bp.reference_frame[0] == kReferenceFrameIntra;
2056 if (!some_use_intra && plane != 0) {
2057 candidate_row = (candidate_row >> subsampling_y) << subsampling_y;
2058 candidate_column = (candidate_column >> subsampling_x) << subsampling_x;
2059 if (candidate_row != block.row4x4) {
2060 // Top block.
2061 const BlockParameters& bp_top =
2062 *block_parameters_holder_.Find(candidate_row, block.column4x4);
2063 some_use_intra = bp_top.reference_frame[0] == kReferenceFrameIntra;
2064 if (!some_use_intra && candidate_column != block.column4x4) {
2065 // Top-left block.
2066 const BlockParameters& bp_top_left =
2067 *block_parameters_holder_.Find(candidate_row, candidate_column);
2068 some_use_intra =
2069 bp_top_left.reference_frame[0] == kReferenceFrameIntra;
2070 }
2071 }
2072 if (!some_use_intra && candidate_column != block.column4x4) {
2073 // Left block.
2074 const BlockParameters& bp_left =
2075 *block_parameters_holder_.Find(block.row4x4, candidate_column);
2076 some_use_intra = bp_left.reference_frame[0] == kReferenceFrameIntra;
2077 }
2078 }
2079 int prediction_width;
2080 int prediction_height;
2081 if (some_use_intra) {
2082 candidate_row = block.row4x4;
2083 candidate_column = block.column4x4;
2084 prediction_width = block_width;
2085 prediction_height = block_height;
2086 } else {
2087 prediction_width = block.width >> subsampling_x;
2088 prediction_height = block.height >> subsampling_y;
2089 }
2090 int r = 0;
2091 int y = 0;
2092 do {
2093 int c = 0;
2094 int x = 0;
2095 do {
2096 if (!InterPrediction(block, static_cast<Plane>(plane), base_x + x,
2097 base_y + y, prediction_width, prediction_height,
2098 candidate_row + r, candidate_column + c,
2099 &is_local_valid, &local_warp_params)) {
2100 return false;
2101 }
2102 ++c;
2103 x += prediction_width;
2104 } while (x < block_width);
2105 ++r;
2106 y += prediction_height;
2107 } while (y < block_height);
2108 } while (++plane < plane_count);
2109 return true;
2110 }
2111
2112 #undef CALL_BITDEPTH_FUNCTION
2113
PopulateDeblockFilterLevel(const Block & block)2114 void Tile::PopulateDeblockFilterLevel(const Block& block) {
2115 if (!post_filter_.DoDeblock()) return;
2116 BlockParameters& bp = *block.bp;
2117 const int mode_id =
2118 static_cast<int>(kPredictionModeDeltasMask.Contains(bp.y_mode));
2119 for (int i = 0; i < kFrameLfCount; ++i) {
2120 if (delta_lf_all_zero_) {
2121 bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel(
2122 bp.prediction_parameters->segment_id, i, bp.reference_frame[0],
2123 mode_id);
2124 } else {
2125 bp.deblock_filter_level[i] =
2126 deblock_filter_levels_[bp.prediction_parameters->segment_id][i]
2127 [bp.reference_frame[0]][mode_id];
2128 }
2129 }
2130 }
2131
PopulateCdefSkip(const Block & block)2132 void Tile::PopulateCdefSkip(const Block& block) {
2133 if (!post_filter_.DoCdef() || block.bp->skip ||
2134 (frame_header_.cdef.bits > 0 &&
2135 cdef_index_[DivideBy16(block.row4x4)][DivideBy16(block.column4x4)] ==
2136 -1)) {
2137 return;
2138 }
2139 // The rest of this function is an efficient version of the following code:
2140 // for (int y = block.row4x4; y < block.row4x4 + block.height4x4; y++) {
2141 // for (int x = block.column4x4; y < block.column4x4 + block.width4x4;
2142 // x++) {
2143 // const uint8_t mask = uint8_t{1} << ((x >> 1) & 0x7);
2144 // cdef_skip_[y >> 1][x >> 4] |= mask;
2145 // }
2146 // }
2147
2148 // For all block widths other than 32, the mask will fit in uint8_t. For
2149 // block width == 32, the mask is always 0xFFFF.
2150 const int bw4 =
2151 std::max(DivideBy2(block.width4x4) + (block.column4x4 & 1), 1);
2152 const uint8_t mask = (block.width4x4 == 32)
2153 ? 0xFF
2154 : (uint8_t{0xFF} >> (8 - bw4))
2155 << (DivideBy2(block.column4x4) & 0x7);
2156 uint8_t* cdef_skip = &cdef_skip_[block.row4x4 >> 1][block.column4x4 >> 4];
2157 const int stride = cdef_skip_.columns();
2158 int row = 0;
2159 do {
2160 *cdef_skip |= mask;
2161 if (block.width4x4 == 32) {
2162 *(cdef_skip + 1) = 0xFF;
2163 }
2164 cdef_skip += stride;
2165 row += 2;
2166 } while (row < block.height4x4);
2167 }
2168
ProcessBlock(int row4x4,int column4x4,BlockSize block_size,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2169 bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
2170 TileScratchBuffer* const scratch_buffer,
2171 ResidualPtr* residual) {
2172 // Do not process the block if the starting point is beyond the visible frame.
2173 // This is equivalent to the has_row/has_column check in the
2174 // decode_partition() section of the spec when partition equals
2175 // kPartitionHorizontal or kPartitionVertical.
2176 if (row4x4 >= frame_header_.rows4x4 ||
2177 column4x4 >= frame_header_.columns4x4) {
2178 return true;
2179 }
2180
2181 if (split_parse_and_decode_) {
2182 // Push block ordering info to the queue. DecodeBlock() will use this queue
2183 // to decode the blocks in the correct order.
2184 const int sb_row_index = SuperBlockRowIndex(row4x4);
2185 const int sb_column_index = SuperBlockColumnIndex(column4x4);
2186 residual_buffer_threaded_[sb_row_index][sb_column_index]
2187 ->partition_tree_order()
2188 ->Push(PartitionTreeNode(row4x4, column4x4, block_size));
2189 }
2190
2191 BlockParameters* bp_ptr =
2192 block_parameters_holder_.Get(row4x4, column4x4, block_size);
2193 if (bp_ptr == nullptr) {
2194 LIBGAV1_DLOG(ERROR, "Failed to get BlockParameters.");
2195 return false;
2196 }
2197 BlockParameters& bp = *bp_ptr;
2198 Block block(this, block_size, row4x4, column4x4, scratch_buffer, residual);
2199 bp.size = block_size;
2200 bp.prediction_parameters =
2201 split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>(
2202 new (std::nothrow) PredictionParameters())
2203 : std::move(prediction_parameters_);
2204 if (bp.prediction_parameters == nullptr) return false;
2205 if (!DecodeModeInfo(block)) return false;
2206 PopulateDeblockFilterLevel(block);
2207 if (!ReadPaletteTokens(block)) return false;
2208 DecodeTransformSize(block);
2209 // Part of Section 5.11.37 in the spec (implemented as a simple lookup).
2210 bp.uv_transform_size =
2211 frame_header_.segmentation.lossless[bp.prediction_parameters->segment_id]
2212 ? kTransformSize4x4
2213 : kUVTransformSize[block.residual_size[kPlaneU]];
2214 if (bp.skip) ResetEntropyContext(block);
2215 PopulateCdefSkip(block);
2216 if (split_parse_and_decode_) {
2217 if (!Residual(block, kProcessingModeParseOnly)) return false;
2218 } else {
2219 if (!ComputePrediction(block) ||
2220 !Residual(block, kProcessingModeParseAndDecode)) {
2221 return false;
2222 }
2223 }
2224 // If frame_header_.segmentation.enabled is false,
2225 // bp.prediction_parameters->segment_id is 0 for all blocks. We don't need to
2226 // call save bp.prediction_parameters->segment_id in the current frame because
2227 // the current frame's segmentation map will be cleared to all 0s.
2228 //
2229 // If frame_header_.segmentation.enabled is true and
2230 // frame_header_.segmentation.update_map is false, we will copy the previous
2231 // frame's segmentation map to the current frame. So we don't need to call
2232 // save bp.prediction_parameters->segment_id in the current frame.
2233 if (frame_header_.segmentation.enabled &&
2234 frame_header_.segmentation.update_map) {
2235 const int x_limit = std::min(frame_header_.columns4x4 - column4x4,
2236 static_cast<int>(block.width4x4));
2237 const int y_limit = std::min(frame_header_.rows4x4 - row4x4,
2238 static_cast<int>(block.height4x4));
2239 current_frame_.segmentation_map()->FillBlock(
2240 row4x4, column4x4, x_limit, y_limit,
2241 bp.prediction_parameters->segment_id);
2242 }
2243 StoreMotionFieldMvsIntoCurrentFrame(block);
2244 if (!split_parse_and_decode_) {
2245 prediction_parameters_ = std::move(bp.prediction_parameters);
2246 }
2247 return true;
2248 }
2249
DecodeBlock(int row4x4,int column4x4,BlockSize block_size,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2250 bool Tile::DecodeBlock(int row4x4, int column4x4, BlockSize block_size,
2251 TileScratchBuffer* const scratch_buffer,
2252 ResidualPtr* residual) {
2253 if (row4x4 >= frame_header_.rows4x4 ||
2254 column4x4 >= frame_header_.columns4x4) {
2255 return true;
2256 }
2257 Block block(this, block_size, row4x4, column4x4, scratch_buffer, residual);
2258 if (!ComputePrediction(block) ||
2259 !Residual(block, kProcessingModeDecodeOnly)) {
2260 return false;
2261 }
2262 block.bp->prediction_parameters.reset(nullptr);
2263 return true;
2264 }
2265
ProcessPartition(int row4x4_start,int column4x4_start,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2266 bool Tile::ProcessPartition(int row4x4_start, int column4x4_start,
2267 TileScratchBuffer* const scratch_buffer,
2268 ResidualPtr* residual) {
2269 Stack<PartitionTreeNode, kDfsStackSize> stack;
2270
2271 // Set up the first iteration.
2272 stack.Push(
2273 PartitionTreeNode(row4x4_start, column4x4_start, SuperBlockSize()));
2274
2275 // DFS loop. If it sees a terminal node (leaf node), ProcessBlock is invoked.
2276 // Otherwise, the children are pushed into the stack for future processing.
2277 do {
2278 PartitionTreeNode node = stack.Pop();
2279 int row4x4 = node.row4x4;
2280 int column4x4 = node.column4x4;
2281 BlockSize block_size = node.block_size;
2282
2283 if (row4x4 >= frame_header_.rows4x4 ||
2284 column4x4 >= frame_header_.columns4x4) {
2285 continue;
2286 }
2287 const int block_width4x4 = kNum4x4BlocksWide[block_size];
2288 assert(block_width4x4 == kNum4x4BlocksHigh[block_size]);
2289 const int half_block4x4 = block_width4x4 >> 1;
2290 const bool has_rows = (row4x4 + half_block4x4) < frame_header_.rows4x4;
2291 const bool has_columns =
2292 (column4x4 + half_block4x4) < frame_header_.columns4x4;
2293 Partition partition;
2294 if (!ReadPartition(row4x4, column4x4, block_size, has_rows, has_columns,
2295 &partition)) {
2296 LIBGAV1_DLOG(ERROR, "Failed to read partition for row: %d column: %d",
2297 row4x4, column4x4);
2298 return false;
2299 }
2300 const BlockSize sub_size = kSubSize[partition][block_size];
2301 // Section 6.10.4: It is a requirement of bitstream conformance that
2302 // get_plane_residual_size( subSize, 1 ) is not equal to BLOCK_INVALID
2303 // every time subSize is computed.
2304 if (sub_size == kBlockInvalid ||
2305 kPlaneResidualSize[sub_size]
2306 [sequence_header_.color_config.subsampling_x]
2307 [sequence_header_.color_config.subsampling_y] ==
2308 kBlockInvalid) {
2309 LIBGAV1_DLOG(
2310 ERROR,
2311 "Invalid sub-block/plane size for row: %d column: %d partition: "
2312 "%d block_size: %d sub_size: %d subsampling_x/y: %d, %d",
2313 row4x4, column4x4, partition, block_size, sub_size,
2314 sequence_header_.color_config.subsampling_x,
2315 sequence_header_.color_config.subsampling_y);
2316 return false;
2317 }
2318
2319 const int quarter_block4x4 = half_block4x4 >> 1;
2320 const BlockSize split_size = kSubSize[kPartitionSplit][block_size];
2321 assert(partition == kPartitionNone || sub_size != kBlockInvalid);
2322 switch (partition) {
2323 case kPartitionNone:
2324 if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2325 residual)) {
2326 return false;
2327 }
2328 break;
2329 case kPartitionSplit:
2330 // The children must be added in reverse order since a stack is being
2331 // used.
2332 stack.Push(PartitionTreeNode(row4x4 + half_block4x4,
2333 column4x4 + half_block4x4, sub_size));
2334 stack.Push(
2335 PartitionTreeNode(row4x4 + half_block4x4, column4x4, sub_size));
2336 stack.Push(
2337 PartitionTreeNode(row4x4, column4x4 + half_block4x4, sub_size));
2338 stack.Push(PartitionTreeNode(row4x4, column4x4, sub_size));
2339 break;
2340 case kPartitionHorizontal:
2341 if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2342 residual) ||
2343 !ProcessBlock(row4x4 + half_block4x4, column4x4, sub_size,
2344 scratch_buffer, residual)) {
2345 return false;
2346 }
2347 break;
2348 case kPartitionVertical:
2349 if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2350 residual) ||
2351 !ProcessBlock(row4x4, column4x4 + half_block4x4, sub_size,
2352 scratch_buffer, residual)) {
2353 return false;
2354 }
2355 break;
2356 case kPartitionHorizontalWithTopSplit:
2357 if (!ProcessBlock(row4x4, column4x4, split_size, scratch_buffer,
2358 residual) ||
2359 !ProcessBlock(row4x4, column4x4 + half_block4x4, split_size,
2360 scratch_buffer, residual) ||
2361 !ProcessBlock(row4x4 + half_block4x4, column4x4, sub_size,
2362 scratch_buffer, residual)) {
2363 return false;
2364 }
2365 break;
2366 case kPartitionHorizontalWithBottomSplit:
2367 if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2368 residual) ||
2369 !ProcessBlock(row4x4 + half_block4x4, column4x4, split_size,
2370 scratch_buffer, residual) ||
2371 !ProcessBlock(row4x4 + half_block4x4, column4x4 + half_block4x4,
2372 split_size, scratch_buffer, residual)) {
2373 return false;
2374 }
2375 break;
2376 case kPartitionVerticalWithLeftSplit:
2377 if (!ProcessBlock(row4x4, column4x4, split_size, scratch_buffer,
2378 residual) ||
2379 !ProcessBlock(row4x4 + half_block4x4, column4x4, split_size,
2380 scratch_buffer, residual) ||
2381 !ProcessBlock(row4x4, column4x4 + half_block4x4, sub_size,
2382 scratch_buffer, residual)) {
2383 return false;
2384 }
2385 break;
2386 case kPartitionVerticalWithRightSplit:
2387 if (!ProcessBlock(row4x4, column4x4, sub_size, scratch_buffer,
2388 residual) ||
2389 !ProcessBlock(row4x4, column4x4 + half_block4x4, split_size,
2390 scratch_buffer, residual) ||
2391 !ProcessBlock(row4x4 + half_block4x4, column4x4 + half_block4x4,
2392 split_size, scratch_buffer, residual)) {
2393 return false;
2394 }
2395 break;
2396 case kPartitionHorizontal4:
2397 for (int i = 0; i < 4; ++i) {
2398 if (!ProcessBlock(row4x4 + i * quarter_block4x4, column4x4, sub_size,
2399 scratch_buffer, residual)) {
2400 return false;
2401 }
2402 }
2403 break;
2404 case kPartitionVertical4:
2405 for (int i = 0; i < 4; ++i) {
2406 if (!ProcessBlock(row4x4, column4x4 + i * quarter_block4x4, sub_size,
2407 scratch_buffer, residual)) {
2408 return false;
2409 }
2410 }
2411 break;
2412 }
2413 } while (!stack.Empty());
2414 return true;
2415 }
2416
ResetLoopRestorationParams()2417 void Tile::ResetLoopRestorationParams() {
2418 for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
2419 for (int i = WienerInfo::kVertical; i <= WienerInfo::kHorizontal; ++i) {
2420 reference_unit_info_[plane].sgr_proj_info.multiplier[i] =
2421 kSgrProjDefaultMultiplier[i];
2422 for (int j = 0; j < kNumWienerCoefficients; ++j) {
2423 reference_unit_info_[plane].wiener_info.filter[i][j] =
2424 kWienerDefaultFilter[j];
2425 }
2426 }
2427 }
2428 }
2429
ResetCdef(const int row4x4,const int column4x4)2430 void Tile::ResetCdef(const int row4x4, const int column4x4) {
2431 if (frame_header_.cdef.bits == 0) return;
2432 const int row = DivideBy16(row4x4);
2433 const int column = DivideBy16(column4x4);
2434 cdef_index_[row][column] = -1;
2435 if (sequence_header_.use_128x128_superblock) {
2436 const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64];
2437 const int border_row = DivideBy16(row4x4 + cdef_size4x4);
2438 const int border_column = DivideBy16(column4x4 + cdef_size4x4);
2439 cdef_index_[row][border_column] = -1;
2440 cdef_index_[border_row][column] = -1;
2441 cdef_index_[border_row][border_column] = -1;
2442 }
2443 }
2444
ClearBlockDecoded(TileScratchBuffer * const scratch_buffer,int row4x4,int column4x4)2445 void Tile::ClearBlockDecoded(TileScratchBuffer* const scratch_buffer,
2446 int row4x4, int column4x4) {
2447 // Set everything to false.
2448 memset(scratch_buffer->block_decoded, 0,
2449 sizeof(scratch_buffer->block_decoded));
2450 // Set specific edge cases to true.
2451 const int sb_size4 = sequence_header_.use_128x128_superblock ? 32 : 16;
2452 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
2453 const int subsampling_x = subsampling_x_[plane];
2454 const int subsampling_y = subsampling_y_[plane];
2455 const int sb_width4 = (column4x4_end_ - column4x4) >> subsampling_x;
2456 const int sb_height4 = (row4x4_end_ - row4x4) >> subsampling_y;
2457 // The memset is equivalent to the following lines in the spec:
2458 // for ( x = -1; x <= ( sbSize4 >> subX ); x++ ) {
2459 // if ( y < 0 && x < sbWidth4 ) {
2460 // BlockDecoded[plane][y][x] = 1
2461 // }
2462 // }
2463 const int num_elements =
2464 std::min((sb_size4 >> subsampling_x_[plane]) + 1, sb_width4) + 1;
2465 memset(&scratch_buffer->block_decoded[plane][0][0], 1, num_elements);
2466 // The for loop is equivalent to the following lines in the spec:
2467 // for ( y = -1; y <= ( sbSize4 >> subY ); y++ )
2468 // if ( x < 0 && y < sbHeight4 )
2469 // BlockDecoded[plane][y][x] = 1
2470 // }
2471 // }
2472 // BlockDecoded[plane][sbSize4 >> subY][-1] = 0
2473 for (int y = -1; y < std::min((sb_size4 >> subsampling_y), sb_height4);
2474 ++y) {
2475 scratch_buffer->block_decoded[plane][y + 1][0] = true;
2476 }
2477 }
2478 }
2479
ProcessSuperBlock(int row4x4,int column4x4,TileScratchBuffer * const scratch_buffer,ProcessingMode mode)2480 bool Tile::ProcessSuperBlock(int row4x4, int column4x4,
2481 TileScratchBuffer* const scratch_buffer,
2482 ProcessingMode mode) {
2483 const bool parsing =
2484 mode == kProcessingModeParseOnly || mode == kProcessingModeParseAndDecode;
2485 const bool decoding = mode == kProcessingModeDecodeOnly ||
2486 mode == kProcessingModeParseAndDecode;
2487 if (parsing) {
2488 read_deltas_ = frame_header_.delta_q.present;
2489 ResetCdef(row4x4, column4x4);
2490 }
2491 if (decoding) {
2492 ClearBlockDecoded(scratch_buffer, row4x4, column4x4);
2493 }
2494 const BlockSize block_size = SuperBlockSize();
2495 if (parsing) {
2496 ReadLoopRestorationCoefficients(row4x4, column4x4, block_size);
2497 }
2498 if (parsing && decoding) {
2499 uint8_t* residual_buffer = residual_buffer_.get();
2500 if (!ProcessPartition(row4x4, column4x4, scratch_buffer,
2501 &residual_buffer)) {
2502 LIBGAV1_DLOG(ERROR, "Error decoding partition row: %d column: %d", row4x4,
2503 column4x4);
2504 return false;
2505 }
2506 return true;
2507 }
2508 const int sb_row_index = SuperBlockRowIndex(row4x4);
2509 const int sb_column_index = SuperBlockColumnIndex(column4x4);
2510 if (parsing) {
2511 residual_buffer_threaded_[sb_row_index][sb_column_index] =
2512 residual_buffer_pool_->Get();
2513 if (residual_buffer_threaded_[sb_row_index][sb_column_index] == nullptr) {
2514 LIBGAV1_DLOG(ERROR, "Failed to get residual buffer.");
2515 return false;
2516 }
2517 uint8_t* residual_buffer =
2518 residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2519 if (!ProcessPartition(row4x4, column4x4, scratch_buffer,
2520 &residual_buffer)) {
2521 LIBGAV1_DLOG(ERROR, "Error parsing partition row: %d column: %d", row4x4,
2522 column4x4);
2523 return false;
2524 }
2525 } else {
2526 if (!DecodeSuperBlock(sb_row_index, sb_column_index, scratch_buffer)) {
2527 LIBGAV1_DLOG(ERROR, "Error decoding superblock row: %d column: %d",
2528 row4x4, column4x4);
2529 return false;
2530 }
2531 residual_buffer_pool_->Release(
2532 std::move(residual_buffer_threaded_[sb_row_index][sb_column_index]));
2533 }
2534 return true;
2535 }
2536
DecodeSuperBlock(int sb_row_index,int sb_column_index,TileScratchBuffer * const scratch_buffer)2537 bool Tile::DecodeSuperBlock(int sb_row_index, int sb_column_index,
2538 TileScratchBuffer* const scratch_buffer) {
2539 uint8_t* residual_buffer =
2540 residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2541 Queue<PartitionTreeNode>& partition_tree_order =
2542 *residual_buffer_threaded_[sb_row_index][sb_column_index]
2543 ->partition_tree_order();
2544 while (!partition_tree_order.Empty()) {
2545 PartitionTreeNode block = partition_tree_order.Front();
2546 if (!DecodeBlock(block.row4x4, block.column4x4, block.block_size,
2547 scratch_buffer, &residual_buffer)) {
2548 LIBGAV1_DLOG(ERROR, "Error decoding block row: %d column: %d",
2549 block.row4x4, block.column4x4);
2550 return false;
2551 }
2552 partition_tree_order.Pop();
2553 }
2554 return true;
2555 }
2556
ReadLoopRestorationCoefficients(int row4x4,int column4x4,BlockSize block_size)2557 void Tile::ReadLoopRestorationCoefficients(int row4x4, int column4x4,
2558 BlockSize block_size) {
2559 if (frame_header_.allow_intrabc) return;
2560 LoopRestorationInfo* const restoration_info = post_filter_.restoration_info();
2561 const bool is_superres_scaled =
2562 frame_header_.width != frame_header_.upscaled_width;
2563 for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
2564 LoopRestorationUnitInfo unit_info;
2565 if (restoration_info->PopulateUnitInfoForSuperBlock(
2566 static_cast<Plane>(plane), block_size, is_superres_scaled,
2567 frame_header_.superres_scale_denominator, row4x4, column4x4,
2568 &unit_info)) {
2569 for (int unit_row = unit_info.row_start; unit_row < unit_info.row_end;
2570 ++unit_row) {
2571 for (int unit_column = unit_info.column_start;
2572 unit_column < unit_info.column_end; ++unit_column) {
2573 const int unit_id = unit_row * restoration_info->num_horizontal_units(
2574 static_cast<Plane>(plane)) +
2575 unit_column;
2576 restoration_info->ReadUnitCoefficients(
2577 &reader_, &symbol_decoder_context_, static_cast<Plane>(plane),
2578 unit_id, &reference_unit_info_);
2579 }
2580 }
2581 }
2582 }
2583 }
2584
StoreMotionFieldMvsIntoCurrentFrame(const Block & block)2585 void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
2586 if (frame_header_.refresh_frame_flags == 0 ||
2587 IsIntraFrame(frame_header_.frame_type)) {
2588 return;
2589 }
2590 // Iterate over odd rows/columns beginning at the first odd row/column for the
2591 // block. It is done this way because motion field mvs are only needed at a
2592 // 8x8 granularity.
2593 const int row_start4x4 = block.row4x4 | 1;
2594 const int row_limit4x4 =
2595 std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
2596 if (row_start4x4 >= row_limit4x4) return;
2597 const int column_start4x4 = block.column4x4 | 1;
2598 const int column_limit4x4 =
2599 std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
2600 if (column_start4x4 >= column_limit4x4) return;
2601
2602 // The largest reference MV component that can be saved.
2603 constexpr int kRefMvsLimit = (1 << 12) - 1;
2604 const BlockParameters& bp = *block.bp;
2605 ReferenceInfo* reference_info = current_frame_.reference_info();
2606 for (int i = 1; i >= 0; --i) {
2607 const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i];
2608 // Must make a local copy so that StoreMotionFieldMvs() knows there is no
2609 // overlap between load and store.
2610 const MotionVector mv_to_store = bp.mv.mv[i];
2611 const int mv_row = std::abs(mv_to_store.mv[0]);
2612 const int mv_column = std::abs(mv_to_store.mv[1]);
2613 if (reference_frame_to_store > kReferenceFrameIntra &&
2614 // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two
2615 // absolute values and then compare with kRefMvsLimit to save a branch.
2616 // The next line is equivalent to:
2617 // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
2618 (mv_row | mv_column) <= kRefMvsLimit &&
2619 reference_info->relative_distance_from[reference_frame_to_store] < 0) {
2620 const int row_start8x8 = DivideBy2(row_start4x4);
2621 const int row_limit8x8 = DivideBy2(row_limit4x4);
2622 const int column_start8x8 = DivideBy2(column_start4x4);
2623 const int column_limit8x8 = DivideBy2(column_limit4x4);
2624 const int rows = row_limit8x8 - row_start8x8;
2625 const int columns = column_limit8x8 - column_start8x8;
2626 const ptrdiff_t stride = DivideBy2(current_frame_.columns4x4());
2627 ReferenceFrameType* const reference_frame_row_start =
2628 &reference_info
2629 ->motion_field_reference_frame[row_start8x8][column_start8x8];
2630 MotionVector* const mv =
2631 &reference_info->motion_field_mv[row_start8x8][column_start8x8];
2632
2633 // Specialize columns cases 1, 2, 4, 8 and 16. This makes memset() inlined
2634 // and simplifies std::fill() for these cases.
2635 if (columns <= 1) {
2636 // Don't change the above condition to (columns == 1).
2637 // Condition (columns <= 1) may help the compiler simplify the inlining
2638 // of the general case of StoreMotionFieldMvs() by eliminating the
2639 // (columns == 0) case.
2640 assert(columns == 1);
2641 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2642 1, reference_frame_row_start, mv);
2643 } else if (columns == 2) {
2644 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2645 2, reference_frame_row_start, mv);
2646 } else if (columns == 4) {
2647 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2648 4, reference_frame_row_start, mv);
2649 } else if (columns == 8) {
2650 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2651 8, reference_frame_row_start, mv);
2652 } else if (columns == 16) {
2653 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2654 16, reference_frame_row_start, mv);
2655 } else if (columns < 16) {
2656 // This always true condition (columns < 16) may help the compiler
2657 // simplify the inlining of the following function.
2658 // This general case is rare and usually only happens to the blocks
2659 // which contain the right boundary of the frame.
2660 StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2661 columns, reference_frame_row_start, mv);
2662 } else {
2663 assert(false);
2664 }
2665 return;
2666 }
2667 }
2668 }
2669
2670 } // namespace libgav1
2671