• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/tile.h"
16 
17 #include <algorithm>
18 #include <array>
19 #include <cassert>
20 #include <climits>
21 #include <cstdlib>
22 #include <cstring>
23 #include <memory>
24 #include <new>
25 #include <numeric>
26 #include <type_traits>
27 #include <utility>
28 
29 #include "src/frame_scratch_buffer.h"
30 #include "src/motion_vector.h"
31 #include "src/reconstruction.h"
32 #include "src/utils/bit_mask_set.h"
33 #include "src/utils/common.h"
34 #include "src/utils/constants.h"
35 #include "src/utils/logging.h"
36 #include "src/utils/segmentation.h"
37 #include "src/utils/stack.h"
38 
39 namespace libgav1 {
40 namespace {
41 
42 // Import all the constants in the anonymous namespace.
43 #include "src/quantizer_tables.inc"
44 #include "src/scan_tables.inc"
45 
46 // Precision bits when scaling reference frames.
47 constexpr int kReferenceScaleShift = 14;
48 // Range above kNumQuantizerBaseLevels which the exponential golomb coding
49 // process is activated.
50 constexpr int kQuantizerCoefficientBaseRange = 12;
51 constexpr int kNumQuantizerBaseLevels = 2;
52 constexpr int kCoeffBaseRangeMaxIterations =
53     kQuantizerCoefficientBaseRange / (kCoeffBaseRangeSymbolCount - 1);
54 constexpr int kEntropyContextLeft = 0;
55 constexpr int kEntropyContextTop = 1;
56 
57 constexpr uint8_t kAllZeroContextsByTopLeft[5][5] = {{1, 2, 2, 2, 3},
58                                                      {2, 4, 4, 4, 5},
59                                                      {2, 4, 4, 4, 5},
60                                                      {2, 4, 4, 4, 5},
61                                                      {3, 5, 5, 5, 6}};
62 
63 // The space complexity of DFS is O(branching_factor * max_depth). For the
64 // parameter tree, branching_factor = 4 (there could be up to 4 children for
65 // every node) and max_depth (excluding the root) = 5 (to go from a 128x128
66 // block all the way to a 4x4 block). The worse-case stack size is 16, by
67 // counting the number of 'o' nodes in the diagram:
68 //
69 //   |                    128x128  The highest level (corresponding to the
70 //   |                             root of the tree) has no node in the stack.
71 //   |-----------------+
72 //   |     |     |     |
73 //   |     o     o     o  64x64
74 //   |
75 //   |-----------------+
76 //   |     |     |     |
77 //   |     o     o     o  32x32    Higher levels have three nodes in the stack,
78 //   |                             because we pop one node off the stack before
79 //   |-----------------+           pushing its four children onto the stack.
80 //   |     |     |     |
81 //   |     o     o     o  16x16
82 //   |
83 //   |-----------------+
84 //   |     |     |     |
85 //   |     o     o     o  8x8
86 //   |
87 //   |-----------------+
88 //   |     |     |     |
89 //   o     o     o     o  4x4      Only the lowest level has four nodes in the
90 //                                 stack.
91 constexpr int kDfsStackSize = 16;
92 
93 // Mask indicating whether the transform sets contain a particular transform
94 // type. If |tx_type| is present in |tx_set|, then the |tx_type|th LSB is set.
95 constexpr BitMaskSet kTransformTypeInSetMask[kNumTransformSets] = {
96     BitMaskSet(0x1),    BitMaskSet(0xE0F), BitMaskSet(0x20F),
97     BitMaskSet(0xFFFF), BitMaskSet(0xFFF), BitMaskSet(0x201)};
98 
99 constexpr PredictionMode
100     kFilterIntraModeToIntraPredictor[kNumFilterIntraPredictors] = {
101         kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
102         kPredictionModeD157, kPredictionModeDc};
103 
104 // Mask used to determine the index for mode_deltas lookup.
105 constexpr BitMaskSet kPredictionModeDeltasMask(
106     kPredictionModeNearestMv, kPredictionModeNearMv, kPredictionModeNewMv,
107     kPredictionModeNearestNearestMv, kPredictionModeNearNearMv,
108     kPredictionModeNearestNewMv, kPredictionModeNewNearestMv,
109     kPredictionModeNearNewMv, kPredictionModeNewNearMv,
110     kPredictionModeNewNewMv);
111 
112 // This is computed as:
113 // min(transform_width_log2, 5) + min(transform_height_log2, 5) - 4.
114 constexpr uint8_t kEobMultiSizeLookup[kNumTransformSizes] = {
115     0, 1, 2, 1, 2, 3, 4, 2, 3, 4, 5, 5, 4, 5, 6, 6, 5, 6, 6};
116 
117 /* clang-format off */
118 constexpr uint8_t kCoeffBaseContextOffset[kNumTransformSizes][5][5] = {
119     {{0, 1, 6, 6, 0}, {1, 6, 6, 21, 0}, {6, 6, 21, 21, 0}, {6, 21, 21, 21, 0},
120      {0, 0, 0, 0, 0}},
121     {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
122      {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
123     {{0, 11, 11, 11, 0}, {11, 11, 11, 11, 0}, {6, 6, 21, 21, 0},
124      {6, 21, 21, 21, 0}, {21, 21, 21, 21, 0}},
125     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
126      {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
127     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
128      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
129     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
130      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
131     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
132      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
133     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
134      {16, 16, 21, 21, 21}, {0, 0, 0, 0, 0}},
135     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
136      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
137     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
138      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
139     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
140      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
141     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
142      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
143     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
144      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
145     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
146      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
147     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
148      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
149     {{0, 11, 11, 11, 11}, {11, 11, 11, 11, 11}, {6, 6, 21, 21, 21},
150      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}},
151     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
152      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
153     {{0, 16, 6, 6, 21}, {16, 16, 6, 21, 21}, {16, 16, 21, 21, 21},
154      {16, 16, 21, 21, 21}, {16, 16, 21, 21, 21}},
155     {{0, 1, 6, 6, 21}, {1, 6, 6, 21, 21}, {6, 6, 21, 21, 21},
156      {6, 21, 21, 21, 21}, {21, 21, 21, 21, 21}}};
157 /* clang-format on */
158 
159 // Extended the table size from 3 to 16 by repeating the last element to avoid
160 // the clips to row or column indices.
161 constexpr uint8_t kCoeffBasePositionContextOffset[16] = {
162     26, 31, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
163 
164 constexpr PredictionMode kInterIntraToIntraMode[kNumInterIntraModes] = {
165     kPredictionModeDc, kPredictionModeVertical, kPredictionModeHorizontal,
166     kPredictionModeSmooth};
167 
168 // Number of horizontal luma samples before intra block copy can be used.
169 constexpr int kIntraBlockCopyDelayPixels = 256;
170 // Number of 64 by 64 blocks before intra block copy can be used.
171 constexpr int kIntraBlockCopyDelay64x64Blocks = kIntraBlockCopyDelayPixels / 64;
172 
173 // Index [i][j] corresponds to the transform size of width 1 << (i + 2) and
174 // height 1 << (j + 2).
175 constexpr TransformSize k4x4SizeToTransformSize[5][5] = {
176     {kTransformSize4x4, kTransformSize4x8, kTransformSize4x16,
177      kNumTransformSizes, kNumTransformSizes},
178     {kTransformSize8x4, kTransformSize8x8, kTransformSize8x16,
179      kTransformSize8x32, kNumTransformSizes},
180     {kTransformSize16x4, kTransformSize16x8, kTransformSize16x16,
181      kTransformSize16x32, kTransformSize16x64},
182     {kNumTransformSizes, kTransformSize32x8, kTransformSize32x16,
183      kTransformSize32x32, kTransformSize32x64},
184     {kNumTransformSizes, kNumTransformSizes, kTransformSize64x16,
185      kTransformSize64x32, kTransformSize64x64}};
186 
187 // Defined in section 9.3 of the spec.
188 constexpr TransformType kModeToTransformType[kIntraPredictionModesUV] = {
189     kTransformTypeDctDct,   kTransformTypeDctAdst,  kTransformTypeAdstDct,
190     kTransformTypeDctDct,   kTransformTypeAdstAdst, kTransformTypeDctAdst,
191     kTransformTypeAdstDct,  kTransformTypeAdstDct,  kTransformTypeDctAdst,
192     kTransformTypeAdstAdst, kTransformTypeDctAdst,  kTransformTypeAdstDct,
193     kTransformTypeAdstAdst, kTransformTypeDctDct};
194 
195 // Defined in section 5.11.47 of the spec. This array does not contain an entry
196 // for kTransformSetDctOnly, so the first dimension needs to be
197 // |kNumTransformSets| - 1.
198 constexpr TransformType kInverseTransformTypeBySet[kNumTransformSets - 1][16] =
199     {{kTransformTypeIdentityIdentity, kTransformTypeDctDct,
200       kTransformTypeIdentityDct, kTransformTypeDctIdentity,
201       kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
202      {kTransformTypeIdentityIdentity, kTransformTypeDctDct,
203       kTransformTypeAdstAdst, kTransformTypeDctAdst, kTransformTypeAdstDct},
204      {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
205       kTransformTypeDctIdentity, kTransformTypeIdentityAdst,
206       kTransformTypeAdstIdentity, kTransformTypeIdentityFlipadst,
207       kTransformTypeFlipadstIdentity, kTransformTypeDctDct,
208       kTransformTypeDctAdst, kTransformTypeAdstDct, kTransformTypeDctFlipadst,
209       kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
210       kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
211       kTransformTypeAdstFlipadst},
212      {kTransformTypeIdentityIdentity, kTransformTypeIdentityDct,
213       kTransformTypeDctIdentity, kTransformTypeDctDct, kTransformTypeDctAdst,
214       kTransformTypeAdstDct, kTransformTypeDctFlipadst,
215       kTransformTypeFlipadstDct, kTransformTypeAdstAdst,
216       kTransformTypeFlipadstFlipadst, kTransformTypeFlipadstAdst,
217       kTransformTypeAdstFlipadst},
218      {kTransformTypeIdentityIdentity, kTransformTypeDctDct}};
219 
220 // Replaces all occurrences of 64x* and *x64 with 32x* and *x32 respectively.
221 constexpr TransformSize kAdjustedTransformSize[kNumTransformSizes] = {
222     kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
223     kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
224     kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
225     kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
226     kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
227     kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
228     kTransformSize32x32};
229 
230 // This is the same as Max_Tx_Size_Rect array in the spec but with *x64 and 64*x
231 // transforms replaced with *x32 and 32x* respectively.
232 constexpr TransformSize kUVTransformSize[kMaxBlockSizes] = {
233     kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
234     kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
235     kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
236     kTransformSize16x16, kTransformSize16x32, kTransformSize16x32,
237     kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
238     kTransformSize32x32, kTransformSize32x16, kTransformSize32x32,
239     kTransformSize32x32, kTransformSize32x32, kTransformSize32x32,
240     kTransformSize32x32};
241 
242 // ith entry of this array is computed as:
243 // DivideBy2(TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[i]) +
244 //           TransformSizeToSquareTransformIndex(kTransformSizeSquareMax[i]) +
245 //           1)
246 constexpr uint8_t kTransformSizeContext[kNumTransformSizes] = {
247     0, 1, 1, 1, 1, 2, 2, 1, 2, 2, 3, 3, 2, 3, 3, 4, 3, 4, 4};
248 
249 constexpr int8_t kSgrProjDefaultMultiplier[2] = {-32, 31};
250 
251 constexpr int8_t kWienerDefaultFilter[kNumWienerCoefficients] = {3, -7, 15};
252 
253 // Maps compound prediction modes into single modes. For e.g.
254 // kPredictionModeNearestNewMv will map to kPredictionModeNearestMv for index 0
255 // and kPredictionModeNewMv for index 1. It is used to simplify the logic in
256 // AssignMv (and avoid duplicate code). This is section 5.11.30. in the spec.
257 constexpr PredictionMode
258     kCompoundToSinglePredictionMode[kNumCompoundInterPredictionModes][2] = {
259         {kPredictionModeNearestMv, kPredictionModeNearestMv},
260         {kPredictionModeNearMv, kPredictionModeNearMv},
261         {kPredictionModeNearestMv, kPredictionModeNewMv},
262         {kPredictionModeNewMv, kPredictionModeNearestMv},
263         {kPredictionModeNearMv, kPredictionModeNewMv},
264         {kPredictionModeNewMv, kPredictionModeNearMv},
265         {kPredictionModeGlobalMv, kPredictionModeGlobalMv},
266         {kPredictionModeNewMv, kPredictionModeNewMv},
267 };
GetSinglePredictionMode(int index,PredictionMode y_mode)268 PredictionMode GetSinglePredictionMode(int index, PredictionMode y_mode) {
269   if (y_mode < kPredictionModeNearestNearestMv) {
270     return y_mode;
271   }
272   const int lookup_index = y_mode - kPredictionModeNearestNearestMv;
273   assert(lookup_index >= 0);
274   return kCompoundToSinglePredictionMode[lookup_index][index];
275 }
276 
277 // log2(dqDenom) in section 7.12.3 of the spec. We use the log2 value because
278 // dqDenom is always a power of two and hence right shift can be used instead of
279 // division.
280 constexpr uint8_t kQuantizationShift[kNumTransformSizes] = {
281     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 2, 1, 2, 2};
282 
283 // Returns the minimum of |length| or |max|-|start|. This is used to clamp array
284 // indices when accessing arrays whose bound is equal to |max|.
GetNumElements(int length,int start,int max)285 int GetNumElements(int length, int start, int max) {
286   return std::min(length, max - start);
287 }
288 
289 template <typename T>
SetBlockValues(int rows,int columns,T value,T * dst,ptrdiff_t stride)290 void SetBlockValues(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
291   // Specialize all columns cases (values in kTransformWidth4x4[]) for better
292   // performance.
293   switch (columns) {
294     case 1:
295       MemSetBlock<T>(rows, 1, value, dst, stride);
296       break;
297     case 2:
298       MemSetBlock<T>(rows, 2, value, dst, stride);
299       break;
300     case 4:
301       MemSetBlock<T>(rows, 4, value, dst, stride);
302       break;
303     case 8:
304       MemSetBlock<T>(rows, 8, value, dst, stride);
305       break;
306     default:
307       assert(columns == 16);
308       MemSetBlock<T>(rows, 16, value, dst, stride);
309       break;
310   }
311 }
312 
SetTransformType(const Tile::Block & block,int x4,int y4,int w4,int h4,TransformType tx_type,TransformType transform_types[32][32])313 void SetTransformType(const Tile::Block& block, int x4, int y4, int w4, int h4,
314                       TransformType tx_type,
315                       TransformType transform_types[32][32]) {
316   const int y_offset = y4 - block.row4x4;
317   const int x_offset = x4 - block.column4x4;
318   TransformType* const dst = &transform_types[y_offset][x_offset];
319   SetBlockValues<TransformType>(h4, w4, tx_type, dst, 32);
320 }
321 
StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,const MotionVector & mv_to_store,ptrdiff_t stride,int rows,int columns,ReferenceFrameType * reference_frame_row_start,MotionVector * mv)322 void StoreMotionFieldMvs(ReferenceFrameType reference_frame_to_store,
323                          const MotionVector& mv_to_store, ptrdiff_t stride,
324                          int rows, int columns,
325                          ReferenceFrameType* reference_frame_row_start,
326                          MotionVector* mv) {
327   static_assert(sizeof(*reference_frame_row_start) == sizeof(int8_t), "");
328   do {
329     // Don't switch the following two memory setting functions.
330     // Some ARM CPUs are quite sensitive to the order.
331     memset(reference_frame_row_start, reference_frame_to_store, columns);
332     std::fill(mv, mv + columns, mv_to_store);
333     reference_frame_row_start += stride;
334     mv += stride;
335   } while (--rows != 0);
336 }
337 
338 // Inverse transform process assumes that the quantized coefficients are stored
339 // as a virtual 2d array of size |tx_width| x tx_height. If transform width is
340 // 64, then this assumption is broken because the scan order used for populating
341 // the coefficients for such transforms is the same as the one used for
342 // corresponding transform with width 32 (e.g. the scan order used for 64x16 is
343 // the same as the one used for 32x16). So we must restore the coefficients to
344 // their correct positions and clean the positions they occupied.
345 template <typename ResidualType>
MoveCoefficientsForTxWidth64(int clamped_tx_height,int tx_width,ResidualType * residual)346 void MoveCoefficientsForTxWidth64(int clamped_tx_height, int tx_width,
347                                   ResidualType* residual) {
348   if (tx_width != 64) return;
349   const int rows = clamped_tx_height - 2;
350   auto* src = residual + 32 * rows;
351   residual += 64 * rows;
352   // Process 2 rows in each loop in reverse order to avoid overwrite.
353   int x = rows >> 1;
354   do {
355     // The 2 rows can be processed in order.
356     memcpy(residual, src, 32 * sizeof(src[0]));
357     memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
358     memset(src + 32, 0, 32 * sizeof(src[0]));
359     src -= 64;
360     residual -= 128;
361   } while (--x);
362   // Process the second row. The first row is already correct.
363   memcpy(residual + 64, src + 32, 32 * sizeof(src[0]));
364   memset(src + 32, 0, 32 * sizeof(src[0]));
365 }
366 
GetClampParameters(const Tile::Block & block,int min[2],int max[2])367 void GetClampParameters(const Tile::Block& block, int min[2], int max[2]) {
368   // 7.10.2.14 (part 1). (also contains implementations of 5.11.53
369   // and 5.11.54).
370   constexpr int kMvBorder4x4 = 4;
371   const int row_border = kMvBorder4x4 + block.height4x4;
372   const int column_border = kMvBorder4x4 + block.width4x4;
373   const int macroblocks_to_top_edge = -block.row4x4;
374   const int macroblocks_to_bottom_edge =
375       block.tile.frame_header().rows4x4 - block.height4x4 - block.row4x4;
376   const int macroblocks_to_left_edge = -block.column4x4;
377   const int macroblocks_to_right_edge =
378       block.tile.frame_header().columns4x4 - block.width4x4 - block.column4x4;
379   min[0] = MultiplyBy32(macroblocks_to_top_edge - row_border);
380   min[1] = MultiplyBy32(macroblocks_to_left_edge - column_border);
381   max[0] = MultiplyBy32(macroblocks_to_bottom_edge + row_border);
382   max[1] = MultiplyBy32(macroblocks_to_right_edge + column_border);
383 }
384 
385 // Section 8.3.2 in the spec, under coeff_base_eob.
GetCoeffBaseContextEob(TransformSize tx_size,int index)386 int GetCoeffBaseContextEob(TransformSize tx_size, int index) {
387   if (index == 0) return 0;
388   const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
389   const int tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
390   const int tx_height = kTransformHeight[adjusted_tx_size];
391   if (index <= DivideBy8(tx_height << tx_width_log2)) return 1;
392   if (index <= DivideBy4(tx_height << tx_width_log2)) return 2;
393   return 3;
394 }
395 
396 // Section 8.3.2 in the spec, under coeff_br. Optimized for end of block based
397 // on the fact that {0, 1}, {1, 0}, {1, 1}, {0, 2} and {2, 0} will all be 0 in
398 // the end of block case.
GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2,int pos,TransformClass tx_class)399 int GetCoeffBaseRangeContextEob(int adjusted_tx_width_log2, int pos,
400                                 TransformClass tx_class) {
401   if (pos == 0) return 0;
402   const int tx_width = 1 << adjusted_tx_width_log2;
403   const int row = pos >> adjusted_tx_width_log2;
404   const int column = pos & (tx_width - 1);
405   // This return statement is equivalent to:
406   // return ((tx_class == kTransformClass2D && (row | column) < 2) ||
407   //         (tx_class == kTransformClassHorizontal && column == 0) ||
408   //         (tx_class == kTransformClassVertical && row == 0))
409   //            ? 7
410   //            : 14;
411   return 14 >> ((static_cast<int>(tx_class == kTransformClass2D) &
412                  static_cast<int>((row | column) < 2)) |
413                 (tx_class & static_cast<int>(column == 0)) |
414                 ((tx_class >> 1) & static_cast<int>(row == 0)));
415 }
416 
417 }  // namespace
418 
Tile(int tile_number,const uint8_t * const data,size_t size,const ObuSequenceHeader & sequence_header,const ObuFrameHeader & frame_header,RefCountedBuffer * const current_frame,const DecoderState & state,FrameScratchBuffer * const frame_scratch_buffer,const WedgeMaskArray & wedge_masks,SymbolDecoderContext * const saved_symbol_decoder_context,const SegmentationMap * prev_segment_ids,PostFilter * const post_filter,const dsp::Dsp * const dsp,ThreadPool * const thread_pool,BlockingCounterWithStatus * const pending_tiles,bool frame_parallel,bool use_intra_prediction_buffer)419 Tile::Tile(int tile_number, const uint8_t* const data, size_t size,
420            const ObuSequenceHeader& sequence_header,
421            const ObuFrameHeader& frame_header,
422            RefCountedBuffer* const current_frame, const DecoderState& state,
423            FrameScratchBuffer* const frame_scratch_buffer,
424            const WedgeMaskArray& wedge_masks,
425            SymbolDecoderContext* const saved_symbol_decoder_context,
426            const SegmentationMap* prev_segment_ids,
427            PostFilter* const post_filter, const dsp::Dsp* const dsp,
428            ThreadPool* const thread_pool,
429            BlockingCounterWithStatus* const pending_tiles, bool frame_parallel,
430            bool use_intra_prediction_buffer)
431     : number_(tile_number),
432       row_(number_ / frame_header.tile_info.tile_columns),
433       column_(number_ % frame_header.tile_info.tile_columns),
434       data_(data),
435       size_(size),
436       read_deltas_(false),
437       subsampling_x_{0, sequence_header.color_config.subsampling_x,
438                      sequence_header.color_config.subsampling_x},
439       subsampling_y_{0, sequence_header.color_config.subsampling_y,
440                      sequence_header.color_config.subsampling_y},
441       current_quantizer_index_(frame_header.quantizer.base_index),
442       sequence_header_(sequence_header),
443       frame_header_(frame_header),
444       reference_frame_sign_bias_(state.reference_frame_sign_bias),
445       reference_frames_(state.reference_frame),
446       motion_field_(frame_scratch_buffer->motion_field),
447       reference_order_hint_(state.reference_order_hint),
448       wedge_masks_(wedge_masks),
449       reader_(data_, size_, frame_header_.enable_cdf_update),
450       symbol_decoder_context_(frame_scratch_buffer->symbol_decoder_context),
451       saved_symbol_decoder_context_(saved_symbol_decoder_context),
452       prev_segment_ids_(prev_segment_ids),
453       dsp_(*dsp),
454       post_filter_(*post_filter),
455       block_parameters_holder_(frame_scratch_buffer->block_parameters_holder),
456       quantizer_(sequence_header_.color_config.bitdepth,
457                  &frame_header_.quantizer),
458       residual_size_((sequence_header_.color_config.bitdepth == 8)
459                          ? sizeof(int16_t)
460                          : sizeof(int32_t)),
461       intra_block_copy_lag_(
462           frame_header_.allow_intrabc
463               ? (sequence_header_.use_128x128_superblock ? 3 : 5)
464               : 1),
465       current_frame_(*current_frame),
466       cdef_index_(frame_scratch_buffer->cdef_index),
467       inter_transform_sizes_(frame_scratch_buffer->inter_transform_sizes),
468       thread_pool_(thread_pool),
469       residual_buffer_pool_(frame_scratch_buffer->residual_buffer_pool.get()),
470       tile_scratch_buffer_pool_(
471           &frame_scratch_buffer->tile_scratch_buffer_pool),
472       pending_tiles_(pending_tiles),
473       frame_parallel_(frame_parallel),
474       use_intra_prediction_buffer_(use_intra_prediction_buffer),
475       intra_prediction_buffer_(
476           use_intra_prediction_buffer_
477               ? &frame_scratch_buffer->intra_prediction_buffers.get()[row_]
478               : nullptr) {
479   row4x4_start_ = frame_header.tile_info.tile_row_start[row_];
480   row4x4_end_ = frame_header.tile_info.tile_row_start[row_ + 1];
481   column4x4_start_ = frame_header.tile_info.tile_column_start[column_];
482   column4x4_end_ = frame_header.tile_info.tile_column_start[column_ + 1];
483   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
484   const int block_width4x4_log2 = k4x4HeightLog2[SuperBlockSize()];
485   superblock_rows_ =
486       (row4x4_end_ - row4x4_start_ + block_width4x4 - 1) >> block_width4x4_log2;
487   superblock_columns_ =
488       (column4x4_end_ - column4x4_start_ + block_width4x4 - 1) >>
489       block_width4x4_log2;
490   // If |split_parse_and_decode_| is true, we do the necessary setup for
491   // splitting the parsing and the decoding steps. This is done in the following
492   // two cases:
493   //  1) If there is multi-threading within a tile (this is done if
494   //     |thread_pool_| is not nullptr and if there are at least as many
495   //     superblock columns as |intra_block_copy_lag_|).
496   //  2) If |frame_parallel| is true.
497   split_parse_and_decode_ = (thread_pool_ != nullptr &&
498                              superblock_columns_ > intra_block_copy_lag_) ||
499                             frame_parallel;
500   if (frame_parallel_) {
501     reference_frame_progress_cache_.fill(INT_MIN);
502   }
503   memset(delta_lf_, 0, sizeof(delta_lf_));
504   delta_lf_all_zero_ = true;
505   const YuvBuffer& buffer = post_filter_.frame_buffer();
506   for (int plane = 0; plane < PlaneCount(); ++plane) {
507     // Verify that the borders are big enough for Reconstruct(). max_tx_length
508     // is the maximum value of tx_width and tx_height for the plane.
509     const int max_tx_length = (plane == kPlaneY) ? 64 : 32;
510     // Reconstruct() may overwrite on the right. Since the right border of a
511     // row is followed in memory by the left border of the next row, the
512     // number of extra pixels to the right of a row is at least the sum of the
513     // left and right borders.
514     //
515     // Note: This assertion actually checks the sum of the left and right
516     // borders of post_filter_.GetUnfilteredBuffer(), which is a horizontally
517     // and vertically shifted version of |buffer|. Since the sum of the left and
518     // right borders is not changed by the shift, we can just check the sum of
519     // the left and right borders of |buffer|.
520     assert(buffer.left_border(plane) + buffer.right_border(plane) >=
521            max_tx_length - 1);
522     // Reconstruct() may overwrite on the bottom. We need an extra border row
523     // on the bottom because we need the left border of that row.
524     //
525     // Note: This assertion checks the bottom border of
526     // post_filter_.GetUnfilteredBuffer(). So we need to calculate the vertical
527     // shift that the PostFilter constructor applied to |buffer| and reduce the
528     // bottom border by that amount.
529 #ifndef NDEBUG
530     const int vertical_shift = static_cast<int>(
531         (post_filter_.GetUnfilteredBuffer(plane) - buffer.data(plane)) /
532         buffer.stride(plane));
533     const int bottom_border = buffer.bottom_border(plane) - vertical_shift;
534     assert(bottom_border >= max_tx_length);
535 #endif
536     // In AV1, a transform block of height H starts at a y coordinate that is
537     // a multiple of H. If a transform block at the bottom of the frame has
538     // height H, then Reconstruct() will write up to the row with index
539     // Align(buffer.height(plane), H) - 1. Therefore the maximum number of
540     // rows Reconstruct() may write to is
541     // Align(buffer.height(plane), max_tx_length).
542     buffer_[plane].Reset(Align(buffer.height(plane), max_tx_length),
543                          buffer.stride(plane),
544                          post_filter_.GetUnfilteredBuffer(plane));
545     const int plane_height =
546         RightShiftWithRounding(frame_header_.height, subsampling_y_[plane]);
547     deblock_row_limit_[plane] =
548         std::min(frame_header_.rows4x4, DivideBy4(plane_height + 3)
549                                             << subsampling_y_[plane]);
550     const int plane_width =
551         RightShiftWithRounding(frame_header_.width, subsampling_x_[plane]);
552     deblock_column_limit_[plane] =
553         std::min(frame_header_.columns4x4, DivideBy4(plane_width + 3)
554                                                << subsampling_x_[plane]);
555   }
556 }
557 
Init()558 bool Tile::Init() {
559   assert(coefficient_levels_.size() == dc_categories_.size());
560   for (size_t i = 0; i < coefficient_levels_.size(); ++i) {
561     const int contexts_per_plane = (i == kEntropyContextLeft)
562                                        ? frame_header_.rows4x4
563                                        : frame_header_.columns4x4;
564     if (!coefficient_levels_[i].Reset(PlaneCount(), contexts_per_plane)) {
565       LIBGAV1_DLOG(ERROR, "coefficient_levels_[%zu].Reset() failed.", i);
566       return false;
567     }
568     if (!dc_categories_[i].Reset(PlaneCount(), contexts_per_plane)) {
569       LIBGAV1_DLOG(ERROR, "dc_categories_[%zu].Reset() failed.", i);
570       return false;
571     }
572   }
573   if (split_parse_and_decode_) {
574     assert(residual_buffer_pool_ != nullptr);
575     if (!residual_buffer_threaded_.Reset(superblock_rows_, superblock_columns_,
576                                          /*zero_initialize=*/false)) {
577       LIBGAV1_DLOG(ERROR, "residual_buffer_threaded_.Reset() failed.");
578       return false;
579     }
580   } else {
581     // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary
582     // checks when parsing quantized coefficients.
583     residual_buffer_ = MakeAlignedUniquePtr<uint8_t>(
584         32, (4096 + 32 * kResidualPaddingVertical) * residual_size_);
585     if (residual_buffer_ == nullptr) {
586       LIBGAV1_DLOG(ERROR, "Allocation of residual_buffer_ failed.");
587       return false;
588     }
589     prediction_parameters_.reset(new (std::nothrow) PredictionParameters());
590     if (prediction_parameters_ == nullptr) {
591       LIBGAV1_DLOG(ERROR, "Allocation of prediction_parameters_ failed.");
592       return false;
593     }
594   }
595   if (frame_header_.use_ref_frame_mvs) {
596     assert(sequence_header_.enable_order_hint);
597     SetupMotionField(frame_header_, current_frame_, reference_frames_,
598                      row4x4_start_, row4x4_end_, column4x4_start_,
599                      column4x4_end_, &motion_field_);
600   }
601   ResetLoopRestorationParams();
602   return true;
603 }
604 
605 template <ProcessingMode processing_mode, bool save_symbol_decoder_context>
ProcessSuperBlockRow(int row4x4,TileScratchBuffer * const scratch_buffer)606 bool Tile::ProcessSuperBlockRow(int row4x4,
607                                 TileScratchBuffer* const scratch_buffer) {
608   if (row4x4 < row4x4_start_ || row4x4 >= row4x4_end_) return true;
609   assert(scratch_buffer != nullptr);
610   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
611   for (int column4x4 = column4x4_start_; column4x4 < column4x4_end_;
612        column4x4 += block_width4x4) {
613     if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4, scratch_buffer,
614                            processing_mode)) {
615       LIBGAV1_DLOG(ERROR, "Error decoding super block row: %d column: %d",
616                    row4x4, column4x4);
617       return false;
618     }
619   }
620   if (save_symbol_decoder_context && row4x4 + block_width4x4 >= row4x4_end_) {
621     SaveSymbolDecoderContext();
622   }
623   if (processing_mode == kProcessingModeDecodeOnly ||
624       processing_mode == kProcessingModeParseAndDecode) {
625     PopulateIntraPredictionBuffer(row4x4);
626   }
627   return true;
628 }
629 
630 // Used in frame parallel mode. The symbol decoder context need not be saved in
631 // this case since it was done when parsing was complete.
632 template bool Tile::ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
633     int row4x4, TileScratchBuffer* scratch_buffer);
634 // Used in non frame parallel mode.
635 template bool Tile::ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
636     int row4x4, TileScratchBuffer* scratch_buffer);
637 
SaveSymbolDecoderContext()638 void Tile::SaveSymbolDecoderContext() {
639   if (frame_header_.enable_frame_end_update_cdf &&
640       number_ == frame_header_.tile_info.context_update_id) {
641     *saved_symbol_decoder_context_ = symbol_decoder_context_;
642   }
643 }
644 
ParseAndDecode()645 bool Tile::ParseAndDecode() {
646   // If this is the main thread, we build the loop filter bit masks when parsing
647   // so that it happens in the current thread. This ensures that the main thread
648   // does as much work as possible.
649   if (split_parse_and_decode_) {
650     if (!ThreadedParseAndDecode()) return false;
651     SaveSymbolDecoderContext();
652     return true;
653   }
654   std::unique_ptr<TileScratchBuffer> scratch_buffer =
655       tile_scratch_buffer_pool_->Get();
656   if (scratch_buffer == nullptr) {
657     pending_tiles_->Decrement(false);
658     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
659     return false;
660   }
661   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
662   for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
663        row4x4 += block_width4x4) {
664     if (!ProcessSuperBlockRow<kProcessingModeParseAndDecode, true>(
665             row4x4, scratch_buffer.get())) {
666       pending_tiles_->Decrement(false);
667       return false;
668     }
669   }
670   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
671   pending_tiles_->Decrement(true);
672   return true;
673 }
674 
Parse()675 bool Tile::Parse() {
676   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
677   std::unique_ptr<TileScratchBuffer> scratch_buffer =
678       tile_scratch_buffer_pool_->Get();
679   if (scratch_buffer == nullptr) {
680     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
681     return false;
682   }
683   for (int row4x4 = row4x4_start_; row4x4 < row4x4_end_;
684        row4x4 += block_width4x4) {
685     if (!ProcessSuperBlockRow<kProcessingModeParseOnly, false>(
686             row4x4, scratch_buffer.get())) {
687       return false;
688     }
689   }
690   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
691   SaveSymbolDecoderContext();
692   return true;
693 }
694 
Decode(std::mutex * const mutex,int * const superblock_row_progress,std::condition_variable * const superblock_row_progress_condvar)695 bool Tile::Decode(
696     std::mutex* const mutex, int* const superblock_row_progress,
697     std::condition_variable* const superblock_row_progress_condvar) {
698   const int block_width4x4 = sequence_header_.use_128x128_superblock ? 32 : 16;
699   const int block_width4x4_log2 =
700       sequence_header_.use_128x128_superblock ? 5 : 4;
701   std::unique_ptr<TileScratchBuffer> scratch_buffer =
702       tile_scratch_buffer_pool_->Get();
703   if (scratch_buffer == nullptr) {
704     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
705     return false;
706   }
707   for (int row4x4 = row4x4_start_, index = row4x4_start_ >> block_width4x4_log2;
708        row4x4 < row4x4_end_; row4x4 += block_width4x4, ++index) {
709     if (!ProcessSuperBlockRow<kProcessingModeDecodeOnly, false>(
710             row4x4, scratch_buffer.get())) {
711       return false;
712     }
713     if (post_filter_.DoDeblock()) {
714       // Apply vertical deblock filtering for all the columns in this tile
715       // except for the first 64 columns.
716       post_filter_.ApplyDeblockFilter(
717           kLoopFilterTypeVertical, row4x4,
718           column4x4_start_ + kNum4x4InLoopFilterUnit, column4x4_end_,
719           block_width4x4);
720       // If this is the first superblock row of the tile, then we cannot apply
721       // horizontal deblocking here since we don't know if the top row is
722       // available. So it will be done by the calling thread in that case.
723       if (row4x4 != row4x4_start_) {
724         // Apply horizontal deblock filtering for all the columns in this tile
725         // except for the first and the last 64 columns.
726         // Note about the last tile of each row: For the last tile,
727         // column4x4_end may not be a multiple of 16. In that case it is still
728         // okay to simply subtract 16 since ApplyDeblockFilter() will only do
729         // the filters in increments of 64 columns (or 32 columns for chroma
730         // with subsampling).
731         post_filter_.ApplyDeblockFilter(
732             kLoopFilterTypeHorizontal, row4x4,
733             column4x4_start_ + kNum4x4InLoopFilterUnit,
734             column4x4_end_ - kNum4x4InLoopFilterUnit, block_width4x4);
735       }
736     }
737     bool notify;
738     {
739       std::unique_lock<std::mutex> lock(*mutex);
740       notify = ++superblock_row_progress[index] ==
741                frame_header_.tile_info.tile_columns;
742     }
743     if (notify) {
744       // We are done decoding this superblock row. Notify the post filtering
745       // thread.
746       superblock_row_progress_condvar[index].notify_one();
747     }
748   }
749   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
750   return true;
751 }
752 
ThreadedParseAndDecode()753 bool Tile::ThreadedParseAndDecode() {
754   {
755     std::lock_guard<std::mutex> lock(threading_.mutex);
756     if (!threading_.sb_state.Reset(superblock_rows_, superblock_columns_)) {
757       pending_tiles_->Decrement(false);
758       LIBGAV1_DLOG(ERROR, "threading.sb_state.Reset() failed.");
759       return false;
760     }
761     // Account for the parsing job.
762     ++threading_.pending_jobs;
763   }
764 
765   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
766 
767   // Begin parsing.
768   std::unique_ptr<TileScratchBuffer> scratch_buffer =
769       tile_scratch_buffer_pool_->Get();
770   if (scratch_buffer == nullptr) {
771     pending_tiles_->Decrement(false);
772     LIBGAV1_DLOG(ERROR, "Failed to get scratch buffer.");
773     return false;
774   }
775   for (int row4x4 = row4x4_start_, row_index = 0; row4x4 < row4x4_end_;
776        row4x4 += block_width4x4, ++row_index) {
777     for (int column4x4 = column4x4_start_, column_index = 0;
778          column4x4 < column4x4_end_;
779          column4x4 += block_width4x4, ++column_index) {
780       if (!ProcessSuperBlock(row4x4, column4x4, block_width4x4,
781                              scratch_buffer.get(), kProcessingModeParseOnly)) {
782         std::lock_guard<std::mutex> lock(threading_.mutex);
783         threading_.abort = true;
784         break;
785       }
786       std::unique_lock<std::mutex> lock(threading_.mutex);
787       if (threading_.abort) break;
788       threading_.sb_state[row_index][column_index] = kSuperBlockStateParsed;
789       // Schedule the decoding of this superblock if it is allowed.
790       if (CanDecode(row_index, column_index)) {
791         ++threading_.pending_jobs;
792         threading_.sb_state[row_index][column_index] =
793             kSuperBlockStateScheduled;
794         lock.unlock();
795         thread_pool_->Schedule(
796             [this, row_index, column_index, block_width4x4]() {
797               DecodeSuperBlock(row_index, column_index, block_width4x4);
798             });
799       }
800     }
801     std::lock_guard<std::mutex> lock(threading_.mutex);
802     if (threading_.abort) break;
803   }
804   tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
805 
806   // We are done parsing. We can return here since the calling thread will make
807   // sure that it waits for all the superblocks to be decoded.
808   //
809   // Finish using |threading_| before |pending_tiles_->Decrement()| because the
810   // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
811   // is called.
812   threading_.mutex.lock();
813   const bool no_pending_jobs = (--threading_.pending_jobs == 0);
814   const bool job_succeeded = !threading_.abort;
815   threading_.mutex.unlock();
816   if (no_pending_jobs) {
817     // We are done parsing and decoding this tile.
818     pending_tiles_->Decrement(job_succeeded);
819   }
820   return job_succeeded;
821 }
822 
CanDecode(int row_index,int column_index) const823 bool Tile::CanDecode(int row_index, int column_index) const {
824   assert(row_index >= 0);
825   assert(column_index >= 0);
826   // If |threading_.sb_state[row_index][column_index]| is not equal to
827   // kSuperBlockStateParsed, then return false. This is ok because if
828   // |threading_.sb_state[row_index][column_index]| is equal to:
829   //   kSuperBlockStateNone - then the superblock is not yet parsed.
830   //   kSuperBlockStateScheduled - then the superblock is already scheduled for
831   //                               decode.
832   //   kSuperBlockStateDecoded - then the superblock has already been decoded.
833   if (row_index >= superblock_rows_ || column_index >= superblock_columns_ ||
834       threading_.sb_state[row_index][column_index] != kSuperBlockStateParsed) {
835     return false;
836   }
837   // First superblock has no dependencies.
838   if (row_index == 0 && column_index == 0) {
839     return true;
840   }
841   // Superblocks in the first row only depend on the superblock to the left of
842   // it.
843   if (row_index == 0) {
844     return threading_.sb_state[0][column_index - 1] == kSuperBlockStateDecoded;
845   }
846   // All other superblocks depend on superblock to the left of it (if one
847   // exists) and superblock to the top right with a lag of
848   // |intra_block_copy_lag_| (if one exists).
849   const int top_right_column_index =
850       std::min(column_index + intra_block_copy_lag_, superblock_columns_ - 1);
851   return threading_.sb_state[row_index - 1][top_right_column_index] ==
852              kSuperBlockStateDecoded &&
853          (column_index == 0 ||
854           threading_.sb_state[row_index][column_index - 1] ==
855               kSuperBlockStateDecoded);
856 }
857 
DecodeSuperBlock(int row_index,int column_index,int block_width4x4)858 void Tile::DecodeSuperBlock(int row_index, int column_index,
859                             int block_width4x4) {
860   const int row4x4 = row4x4_start_ + (row_index * block_width4x4);
861   const int column4x4 = column4x4_start_ + (column_index * block_width4x4);
862   std::unique_ptr<TileScratchBuffer> scratch_buffer =
863       tile_scratch_buffer_pool_->Get();
864   bool ok = scratch_buffer != nullptr;
865   if (ok) {
866     ok = ProcessSuperBlock(row4x4, column4x4, block_width4x4,
867                            scratch_buffer.get(), kProcessingModeDecodeOnly);
868     tile_scratch_buffer_pool_->Release(std::move(scratch_buffer));
869   }
870   std::unique_lock<std::mutex> lock(threading_.mutex);
871   if (ok) {
872     threading_.sb_state[row_index][column_index] = kSuperBlockStateDecoded;
873     // Candidate rows and columns that we could potentially begin the decoding
874     // (if it is allowed to do so). The candidates are:
875     //   1) The superblock to the bottom-left of the current superblock with a
876     //   lag of |intra_block_copy_lag_| (or the beginning of the next superblock
877     //   row in case there are less than |intra_block_copy_lag_| superblock
878     //   columns in the Tile).
879     //   2) The superblock to the right of the current superblock.
880     const int candidate_row_indices[] = {row_index + 1, row_index};
881     const int candidate_column_indices[] = {
882         std::max(0, column_index - intra_block_copy_lag_), column_index + 1};
883     for (size_t i = 0; i < std::extent<decltype(candidate_row_indices)>::value;
884          ++i) {
885       const int candidate_row_index = candidate_row_indices[i];
886       const int candidate_column_index = candidate_column_indices[i];
887       if (!CanDecode(candidate_row_index, candidate_column_index)) {
888         continue;
889       }
890       ++threading_.pending_jobs;
891       threading_.sb_state[candidate_row_index][candidate_column_index] =
892           kSuperBlockStateScheduled;
893       lock.unlock();
894       thread_pool_->Schedule([this, candidate_row_index, candidate_column_index,
895                               block_width4x4]() {
896         DecodeSuperBlock(candidate_row_index, candidate_column_index,
897                          block_width4x4);
898       });
899       lock.lock();
900     }
901   } else {
902     threading_.abort = true;
903   }
904   // Finish using |threading_| before |pending_tiles_->Decrement()| because the
905   // Tile object could go out of scope as soon as |pending_tiles_->Decrement()|
906   // is called.
907   const bool no_pending_jobs = (--threading_.pending_jobs == 0);
908   const bool job_succeeded = !threading_.abort;
909   lock.unlock();
910   if (no_pending_jobs) {
911     // We are done parsing and decoding this tile.
912     pending_tiles_->Decrement(job_succeeded);
913   }
914 }
915 
PopulateIntraPredictionBuffer(int row4x4)916 void Tile::PopulateIntraPredictionBuffer(int row4x4) {
917   const int block_width4x4 = kNum4x4BlocksWide[SuperBlockSize()];
918   if (!use_intra_prediction_buffer_ || row4x4 + block_width4x4 >= row4x4_end_) {
919     return;
920   }
921   const size_t pixel_size =
922       (sequence_header_.color_config.bitdepth == 8 ? sizeof(uint8_t)
923                                                    : sizeof(uint16_t));
924   for (int plane = 0; plane < PlaneCount(); ++plane) {
925     const int row_to_copy =
926         (MultiplyBy4(row4x4 + block_width4x4) >> subsampling_y_[plane]) - 1;
927     const size_t pixels_to_copy =
928         (MultiplyBy4(column4x4_end_ - column4x4_start_) >>
929          subsampling_x_[plane]) *
930         pixel_size;
931     const size_t column_start =
932         MultiplyBy4(column4x4_start_) >> subsampling_x_[plane];
933     void* start;
934 #if LIBGAV1_MAX_BITDEPTH >= 10
935     if (sequence_header_.color_config.bitdepth > 8) {
936       Array2DView<uint16_t> buffer(
937           buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
938           reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
939       start = &buffer[row_to_copy][column_start];
940     } else  // NOLINT
941 #endif
942     {
943       start = &buffer_[plane][row_to_copy][column_start];
944     }
945     memcpy((*intra_prediction_buffer_)[plane].get() + column_start * pixel_size,
946            start, pixels_to_copy);
947   }
948 }
949 
GetTransformAllZeroContext(const Block & block,Plane plane,TransformSize tx_size,int x4,int y4,int w4,int h4)950 int Tile::GetTransformAllZeroContext(const Block& block, Plane plane,
951                                      TransformSize tx_size, int x4, int y4,
952                                      int w4, int h4) {
953   const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
954   const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
955 
956   const int tx_width = kTransformWidth[tx_size];
957   const int tx_height = kTransformHeight[tx_size];
958   const BlockSize plane_size = block.residual_size[plane];
959   const int block_width = kBlockWidthPixels[plane_size];
960   const int block_height = kBlockHeightPixels[plane_size];
961 
962   int top = 0;
963   int left = 0;
964   const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
965   const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
966   if (plane == kPlaneY) {
967     if (block_width == tx_width && block_height == tx_height) return 0;
968     const uint8_t* coefficient_levels =
969         &coefficient_levels_[kEntropyContextTop][plane][x4];
970     for (int i = 0; i < num_top_elements; ++i) {
971       top = std::max(top, static_cast<int>(coefficient_levels[i]));
972     }
973     coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
974     for (int i = 0; i < num_left_elements; ++i) {
975       left = std::max(left, static_cast<int>(coefficient_levels[i]));
976     }
977     assert(top <= 4);
978     assert(left <= 4);
979     // kAllZeroContextsByTopLeft is pre-computed based on the logic in the spec
980     // for top and left.
981     return kAllZeroContextsByTopLeft[top][left];
982   }
983   const uint8_t* coefficient_levels =
984       &coefficient_levels_[kEntropyContextTop][plane][x4];
985   const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
986   for (int i = 0; i < num_top_elements; ++i) {
987     top |= coefficient_levels[i];
988     top |= dc_categories[i];
989   }
990   coefficient_levels = &coefficient_levels_[kEntropyContextLeft][plane][y4];
991   dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
992   for (int i = 0; i < num_left_elements; ++i) {
993     left |= coefficient_levels[i];
994     left |= dc_categories[i];
995   }
996   return static_cast<int>(top != 0) + static_cast<int>(left != 0) + 7 +
997          3 * static_cast<int>(block_width * block_height >
998                               tx_width * tx_height);
999 }
1000 
GetTransformSet(TransformSize tx_size,bool is_inter) const1001 TransformSet Tile::GetTransformSet(TransformSize tx_size, bool is_inter) const {
1002   const TransformSize tx_size_square_min = kTransformSizeSquareMin[tx_size];
1003   const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
1004   if (tx_size_square_max == kTransformSize64x64) return kTransformSetDctOnly;
1005   if (is_inter) {
1006     if (frame_header_.reduced_tx_set ||
1007         tx_size_square_max == kTransformSize32x32) {
1008       return kTransformSetInter3;
1009     }
1010     if (tx_size_square_min == kTransformSize16x16) return kTransformSetInter2;
1011     return kTransformSetInter1;
1012   }
1013   if (tx_size_square_max == kTransformSize32x32) return kTransformSetDctOnly;
1014   if (frame_header_.reduced_tx_set ||
1015       tx_size_square_min == kTransformSize16x16) {
1016     return kTransformSetIntra2;
1017   }
1018   return kTransformSetIntra1;
1019 }
1020 
ComputeTransformType(const Block & block,Plane plane,TransformSize tx_size,int block_x,int block_y)1021 TransformType Tile::ComputeTransformType(const Block& block, Plane plane,
1022                                          TransformSize tx_size, int block_x,
1023                                          int block_y) {
1024   const BlockParameters& bp = *block.bp;
1025   const TransformSize tx_size_square_max = kTransformSizeSquareMax[tx_size];
1026   if (frame_header_.segmentation.lossless[bp.segment_id] ||
1027       tx_size_square_max == kTransformSize64x64) {
1028     return kTransformTypeDctDct;
1029   }
1030   if (plane == kPlaneY) {
1031     return transform_types_[block_y - block.row4x4][block_x - block.column4x4];
1032   }
1033   const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1034   TransformType tx_type;
1035   if (bp.is_inter) {
1036     const int x4 =
1037         std::max(block.column4x4, block_x << subsampling_x_[kPlaneU]);
1038     const int y4 = std::max(block.row4x4, block_y << subsampling_y_[kPlaneU]);
1039     tx_type = transform_types_[y4 - block.row4x4][x4 - block.column4x4];
1040   } else {
1041     tx_type = kModeToTransformType[bp.uv_mode];
1042   }
1043   return kTransformTypeInSetMask[tx_set].Contains(tx_type)
1044              ? tx_type
1045              : kTransformTypeDctDct;
1046 }
1047 
ReadTransformType(const Block & block,int x4,int y4,TransformSize tx_size)1048 void Tile::ReadTransformType(const Block& block, int x4, int y4,
1049                              TransformSize tx_size) {
1050   BlockParameters& bp = *block.bp;
1051   const TransformSet tx_set = GetTransformSet(tx_size, bp.is_inter);
1052 
1053   TransformType tx_type = kTransformTypeDctDct;
1054   if (tx_set != kTransformSetDctOnly &&
1055       frame_header_.segmentation.qindex[bp.segment_id] > 0) {
1056     const int cdf_index = SymbolDecoderContext::TxTypeIndex(tx_set);
1057     const int cdf_tx_size_index =
1058         TransformSizeToSquareTransformIndex(kTransformSizeSquareMin[tx_size]);
1059     uint16_t* cdf;
1060     if (bp.is_inter) {
1061       cdf = symbol_decoder_context_
1062                 .inter_tx_type_cdf[cdf_index][cdf_tx_size_index];
1063     } else {
1064       const PredictionMode intra_direction =
1065           block.bp->prediction_parameters->use_filter_intra
1066               ? kFilterIntraModeToIntraPredictor[block.bp->prediction_parameters
1067                                                      ->filter_intra_mode]
1068               : bp.y_mode;
1069       cdf =
1070           symbol_decoder_context_
1071               .intra_tx_type_cdf[cdf_index][cdf_tx_size_index][intra_direction];
1072     }
1073     tx_type = static_cast<TransformType>(
1074         reader_.ReadSymbol(cdf, kNumTransformTypesInSet[tx_set]));
1075     // This array does not contain an entry for kTransformSetDctOnly, so the
1076     // first dimension needs to be offset by 1.
1077     tx_type = kInverseTransformTypeBySet[tx_set - 1][tx_type];
1078   }
1079   SetTransformType(block, x4, y4, kTransformWidth4x4[tx_size],
1080                    kTransformHeight4x4[tx_size], tx_type, transform_types_);
1081 }
1082 
1083 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1084 // Bottom boundary checks are avoided by the padded rows.
1085 // For a coefficient near the right boundary, the two right neighbors and the
1086 // one bottom-right neighbor may be out of boundary. We don't check the right
1087 // boundary for them, because the out of boundary neighbors project to positions
1088 // above the diagonal line which goes through the current coefficient and these
1089 // positions are still all 0s according to the diagonal scan order.
1090 template <typename ResidualType>
ReadCoeffBase2D(const uint16_t * scan,PlaneType plane_type,TransformSize tx_size,int clamped_tx_size_context,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],ResidualType * const quantized_buffer)1091 void Tile::ReadCoeffBase2D(
1092     const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
1093     int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1094     uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1095     ResidualType* const quantized_buffer) {
1096   const int tx_width = 1 << adjusted_tx_width_log2;
1097   int i = eob - 2;
1098   do {
1099     constexpr auto threshold = static_cast<ResidualType>(3);
1100     const uint16_t pos = scan[i];
1101     const int row = pos >> adjusted_tx_width_log2;
1102     const int column = pos & (tx_width - 1);
1103     auto* const quantized = &quantized_buffer[pos];
1104     int context;
1105     if (pos == 0) {
1106       context = 0;
1107     } else {
1108       context = std::min(
1109           4, DivideBy2(
1110                  1 + (std::min(quantized[1], threshold) +             // {0, 1}
1111                       std::min(quantized[tx_width], threshold) +      // {1, 0}
1112                       std::min(quantized[tx_width + 1], threshold) +  // {1, 1}
1113                       std::min(quantized[2], threshold) +             // {0, 2}
1114                       std::min(quantized[MultiplyBy2(tx_width)],
1115                                threshold))));  // {2, 0}
1116       context += kCoeffBaseContextOffset[tx_size][std::min(row, 4)]
1117                                         [std::min(column, 4)];
1118     }
1119     int level =
1120         reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1121     if (level > kNumQuantizerBaseLevels) {
1122       // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1123       // + 1, because we clip the overall output to 6 and the unclipped
1124       // quantized values will always result in an output of greater than 6.
1125       context = std::min(6, DivideBy2(1 + quantized[1] +          // {0, 1}
1126                                       quantized[tx_width] +       // {1, 0}
1127                                       quantized[tx_width + 1]));  // {1, 1}
1128       if (pos != 0) {
1129         context += 14 >> static_cast<int>((row | column) < 2);
1130       }
1131       level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
1132     }
1133     quantized[0] = level;
1134   } while (--i >= 0);
1135 }
1136 
1137 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1138 // Bottom boundary checks are avoided by the padded rows.
1139 // For a coefficient near the right boundary, the four right neighbors may be
1140 // out of boundary. We don't do the boundary check for the first three right
1141 // neighbors, because even for the transform blocks with smallest width 4, the
1142 // first three out of boundary neighbors project to positions left of the
1143 // current coefficient and these positions are still all 0s according to the
1144 // column scan order. However, when transform block width is 4 and the current
1145 // coefficient is on the right boundary, its fourth right neighbor projects to
1146 // the under position on the same column, which could be nonzero. Therefore, we
1147 // must skip the fourth right neighbor. To make it simple, for any coefficient,
1148 // we always do the boundary check for its fourth right neighbor.
1149 template <typename ResidualType>
ReadCoeffBaseHorizontal(const uint16_t * scan,PlaneType plane_type,TransformSize,int clamped_tx_size_context,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],ResidualType * const quantized_buffer)1150 void Tile::ReadCoeffBaseHorizontal(
1151     const uint16_t* scan, PlaneType plane_type, TransformSize /*tx_size*/,
1152     int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1153     uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1154     ResidualType* const quantized_buffer) {
1155   const int tx_width = 1 << adjusted_tx_width_log2;
1156   int i = eob - 2;
1157   do {
1158     constexpr auto threshold = static_cast<ResidualType>(3);
1159     const uint16_t pos = scan[i];
1160     const int column = pos & (tx_width - 1);
1161     auto* const quantized = &quantized_buffer[pos];
1162     int context = std::min(
1163         4,
1164         DivideBy2(1 +
1165                   (std::min(quantized[1], threshold) +         // {0, 1}
1166                    std::min(quantized[tx_width], threshold) +  // {1, 0}
1167                    std::min(quantized[2], threshold) +         // {0, 2}
1168                    std::min(quantized[3], threshold) +         // {0, 3}
1169                    std::min(quantized[4],
1170                             static_cast<ResidualType>(
1171                                 (column + 4 < tx_width) ? 3 : 0)))));  // {0, 4}
1172     context += kCoeffBasePositionContextOffset[column];
1173     int level =
1174         reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1175     if (level > kNumQuantizerBaseLevels) {
1176       // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1177       // + 1, because we clip the overall output to 6 and the unclipped
1178       // quantized values will always result in an output of greater than 6.
1179       context = std::min(6, DivideBy2(1 + quantized[1] +     // {0, 1}
1180                                       quantized[tx_width] +  // {1, 0}
1181                                       quantized[2]));        // {0, 2}
1182       if (pos != 0) {
1183         context += 14 >> static_cast<int>(column == 0);
1184       }
1185       level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
1186     }
1187     quantized[0] = level;
1188   } while (--i >= 0);
1189 }
1190 
1191 // Section 8.3.2 in the spec, under coeff_base and coeff_br.
1192 // Bottom boundary checks are avoided by the padded rows.
1193 // Right boundary check is performed explicitly.
1194 template <typename ResidualType>
ReadCoeffBaseVertical(const uint16_t * scan,PlaneType plane_type,TransformSize,int clamped_tx_size_context,int adjusted_tx_width_log2,int eob,uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount+1],ResidualType * const quantized_buffer)1195 void Tile::ReadCoeffBaseVertical(
1196     const uint16_t* scan, PlaneType plane_type, TransformSize /*tx_size*/,
1197     int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1198     uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1199     ResidualType* const quantized_buffer) {
1200   const int tx_width = 1 << adjusted_tx_width_log2;
1201   int i = eob - 2;
1202   do {
1203     constexpr auto threshold = static_cast<ResidualType>(3);
1204     const uint16_t pos = scan[i];
1205     const int row = pos >> adjusted_tx_width_log2;
1206     const int column = pos & (tx_width - 1);
1207     auto* const quantized = &quantized_buffer[pos];
1208     const int quantized_column1 = (column + 1 < tx_width) ? quantized[1] : 0;
1209     int context =
1210         std::min(4, DivideBy2(1 + (std::min(quantized_column1, 3) +  // {0, 1}
1211                                    std::min(quantized[tx_width],
1212                                             threshold) +  // {1, 0}
1213                                    std::min(quantized[MultiplyBy2(tx_width)],
1214                                             threshold) +  // {2, 0}
1215                                    std::min(quantized[tx_width * 3],
1216                                             threshold) +  // {3, 0}
1217                                    std::min(quantized[MultiplyBy4(tx_width)],
1218                                             threshold))));  // {4, 0}
1219     context += kCoeffBasePositionContextOffset[row];
1220     int level =
1221         reader_.ReadSymbol<kCoeffBaseSymbolCount>(coeff_base_cdf[context]);
1222     if (level > kNumQuantizerBaseLevels) {
1223       // No need to clip quantized values to COEFF_BASE_RANGE + NUM_BASE_LEVELS
1224       // + 1, because we clip the overall output to 6 and the unclipped
1225       // quantized values will always result in an output of greater than 6.
1226       int context =
1227           std::min(6, DivideBy2(1 + quantized_column1 +              // {0, 1}
1228                                 quantized[tx_width] +                // {1, 0}
1229                                 quantized[MultiplyBy2(tx_width)]));  // {2, 0}
1230       if (pos != 0) {
1231         context += 14 >> static_cast<int>(row == 0);
1232       }
1233       level += ReadCoeffBaseRange(clamped_tx_size_context, context, plane_type);
1234     }
1235     quantized[0] = level;
1236   } while (--i >= 0);
1237 }
1238 
GetDcSignContext(int x4,int y4,int w4,int h4,Plane plane)1239 int Tile::GetDcSignContext(int x4, int y4, int w4, int h4, Plane plane) {
1240   const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1241   const int8_t* dc_categories = &dc_categories_[kEntropyContextTop][plane][x4];
1242   // Set dc_sign to 8-bit long so that std::accumulate() saves sign extension.
1243   int8_t dc_sign = std::accumulate(
1244       dc_categories, dc_categories + GetNumElements(w4, x4, max_x4x4), 0);
1245   const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1246   dc_categories = &dc_categories_[kEntropyContextLeft][plane][y4];
1247   dc_sign = std::accumulate(
1248       dc_categories, dc_categories + GetNumElements(h4, y4, max_y4x4), dc_sign);
1249   // This return statement is equivalent to:
1250   //   if (dc_sign < 0) return 1;
1251   //   if (dc_sign > 0) return 2;
1252   //   return 0;
1253   // And it is better than:
1254   //   return static_cast<int>(dc_sign != 0) + static_cast<int>(dc_sign > 0);
1255   return static_cast<int>(dc_sign < 0) +
1256          MultiplyBy2(static_cast<int>(dc_sign > 0));
1257 }
1258 
SetEntropyContexts(int x4,int y4,int w4,int h4,Plane plane,uint8_t coefficient_level,int8_t dc_category)1259 void Tile::SetEntropyContexts(int x4, int y4, int w4, int h4, Plane plane,
1260                               uint8_t coefficient_level, int8_t dc_category) {
1261   const int max_x4x4 = frame_header_.columns4x4 >> subsampling_x_[plane];
1262   const int num_top_elements = GetNumElements(w4, x4, max_x4x4);
1263   memset(&coefficient_levels_[kEntropyContextTop][plane][x4], coefficient_level,
1264          num_top_elements);
1265   memset(&dc_categories_[kEntropyContextTop][plane][x4], dc_category,
1266          num_top_elements);
1267   const int max_y4x4 = frame_header_.rows4x4 >> subsampling_y_[plane];
1268   const int num_left_elements = GetNumElements(h4, y4, max_y4x4);
1269   memset(&coefficient_levels_[kEntropyContextLeft][plane][y4],
1270          coefficient_level, num_left_elements);
1271   memset(&dc_categories_[kEntropyContextLeft][plane][y4], dc_category,
1272          num_left_elements);
1273 }
1274 
ScaleMotionVector(const MotionVector & mv,const Plane plane,const int reference_frame_index,const int x,const int y,int * const start_x,int * const start_y,int * const step_x,int * const step_y)1275 void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane,
1276                              const int reference_frame_index, const int x,
1277                              const int y, int* const start_x,
1278                              int* const start_y, int* const step_x,
1279                              int* const step_y) {
1280   const int reference_upscaled_width =
1281       (reference_frame_index == -1)
1282           ? frame_header_.upscaled_width
1283           : reference_frames_[reference_frame_index]->upscaled_width();
1284   const int reference_height =
1285       (reference_frame_index == -1)
1286           ? frame_header_.height
1287           : reference_frames_[reference_frame_index]->frame_height();
1288   assert(2 * frame_header_.width >= reference_upscaled_width &&
1289          2 * frame_header_.height >= reference_height &&
1290          frame_header_.width <= 16 * reference_upscaled_width &&
1291          frame_header_.height <= 16 * reference_height);
1292   const bool is_scaled_x = reference_upscaled_width != frame_header_.width;
1293   const bool is_scaled_y = reference_height != frame_header_.height;
1294   const int half_sample = 1 << (kSubPixelBits - 1);
1295   int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]);
1296   int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]);
1297   const int rounding_offset =
1298       DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits));
1299   if (is_scaled_x) {
1300     const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) +
1301                          DivideBy2(frame_header_.width)) /
1302                         frame_header_.width;
1303     *step_x = RightShiftWithRoundingSigned(
1304         scale_x, kReferenceScaleShift - kScaleSubPixelBits);
1305     orig_x += half_sample;
1306     // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can
1307     // be up to 15 bits. So we use int64_t to hold base_x.
1308     const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x -
1309                            (half_sample << kReferenceScaleShift);
1310     *start_x =
1311         RightShiftWithRoundingSigned(
1312             base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
1313         rounding_offset;
1314   } else {
1315     *step_x = 1 << kScaleSubPixelBits;
1316     *start_x = LeftShift(orig_x, 6) + rounding_offset;
1317   }
1318   if (is_scaled_y) {
1319     const int scale_y = ((reference_height << kReferenceScaleShift) +
1320                          DivideBy2(frame_header_.height)) /
1321                         frame_header_.height;
1322     *step_y = RightShiftWithRoundingSigned(
1323         scale_y, kReferenceScaleShift - kScaleSubPixelBits);
1324     orig_y += half_sample;
1325     const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y -
1326                            (half_sample << kReferenceScaleShift);
1327     *start_y =
1328         RightShiftWithRoundingSigned(
1329             base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
1330         rounding_offset;
1331   } else {
1332     *step_y = 1 << kScaleSubPixelBits;
1333     *start_y = LeftShift(orig_y, 6) + rounding_offset;
1334   }
1335 }
1336 
1337 template <typename ResidualType, bool is_dc_coefficient>
ReadSignAndApplyDequantization(const uint16_t * const scan,int i,int q_value,const uint8_t * const quantizer_matrix,int shift,int max_value,uint16_t * const dc_sign_cdf,int8_t * const dc_category,int * const coefficient_level,ResidualType * residual_buffer)1338 bool Tile::ReadSignAndApplyDequantization(
1339     const uint16_t* const scan, int i, int q_value,
1340     const uint8_t* const quantizer_matrix, int shift, int max_value,
1341     uint16_t* const dc_sign_cdf, int8_t* const dc_category,
1342     int* const coefficient_level, ResidualType* residual_buffer) {
1343   const int pos = is_dc_coefficient ? 0 : scan[i];
1344   // If residual_buffer[pos] is zero, then the rest of the function has no
1345   // effect.
1346   int level = residual_buffer[pos];
1347   if (level == 0) return true;
1348   const int sign = is_dc_coefficient
1349                        ? static_cast<int>(reader_.ReadSymbol(dc_sign_cdf))
1350                        : reader_.ReadBit();
1351   if (level > kNumQuantizerBaseLevels + kQuantizerCoefficientBaseRange) {
1352     int length = 0;
1353     bool golomb_length_bit = false;
1354     do {
1355       golomb_length_bit = static_cast<bool>(reader_.ReadBit());
1356       ++length;
1357       if (length > 20) {
1358         LIBGAV1_DLOG(ERROR, "Invalid golomb_length %d", length);
1359         return false;
1360       }
1361     } while (!golomb_length_bit);
1362     int x = 1;
1363     for (int i = length - 2; i >= 0; --i) {
1364       x = (x << 1) | reader_.ReadBit();
1365     }
1366     level += x - 1;
1367   }
1368   if (is_dc_coefficient) {
1369     *dc_category = (sign != 0) ? -1 : 1;
1370   }
1371   level &= 0xfffff;
1372   *coefficient_level += level;
1373   // Apply dequantization. Step 1 of section 7.12.3 in the spec.
1374   int q = q_value;
1375   if (quantizer_matrix != nullptr) {
1376     q = RightShiftWithRounding(q * quantizer_matrix[pos], 5);
1377   }
1378   // The intermediate multiplication can exceed 32 bits, so it has to be
1379   // performed by promoting one of the values to int64_t.
1380   int32_t dequantized_value = (static_cast<int64_t>(q) * level) & 0xffffff;
1381   dequantized_value >>= shift;
1382   // At this point:
1383   //   * |dequantized_value| is always non-negative.
1384   //   * |sign| can be either 0 or 1.
1385   //   * min_value = -(max_value + 1).
1386   // We need to apply the following:
1387   // dequantized_value = sign ? -dequantized_value : dequantized_value;
1388   // dequantized_value = Clip3(dequantized_value, min_value, max_value);
1389   //
1390   // Note that -x == ~(x - 1).
1391   //
1392   // Now, The above two lines can be done with a std::min and xor as follows:
1393   dequantized_value = std::min(dequantized_value - sign, max_value) ^ -sign;
1394   residual_buffer[pos] = dequantized_value;
1395   return true;
1396 }
1397 
ReadCoeffBaseRange(int clamped_tx_size_context,int cdf_context,int plane_type)1398 int Tile::ReadCoeffBaseRange(int clamped_tx_size_context, int cdf_context,
1399                              int plane_type) {
1400   int level = 0;
1401   for (int j = 0; j < kCoeffBaseRangeMaxIterations; ++j) {
1402     const int coeff_base_range = reader_.ReadSymbol<kCoeffBaseRangeSymbolCount>(
1403         symbol_decoder_context_.coeff_base_range_cdf[clamped_tx_size_context]
1404                                                     [plane_type][cdf_context]);
1405     level += coeff_base_range;
1406     if (coeff_base_range < (kCoeffBaseRangeSymbolCount - 1)) break;
1407   }
1408   return level;
1409 }
1410 
1411 template <typename ResidualType>
ReadTransformCoefficients(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType * const tx_type)1412 int Tile::ReadTransformCoefficients(const Block& block, Plane plane,
1413                                     int start_x, int start_y,
1414                                     TransformSize tx_size,
1415                                     TransformType* const tx_type) {
1416   const int x4 = DivideBy4(start_x);
1417   const int y4 = DivideBy4(start_y);
1418   const int w4 = kTransformWidth4x4[tx_size];
1419   const int h4 = kTransformHeight4x4[tx_size];
1420   const int tx_size_context = kTransformSizeContext[tx_size];
1421   int context =
1422       GetTransformAllZeroContext(block, plane, tx_size, x4, y4, w4, h4);
1423   const bool all_zero = reader_.ReadSymbol(
1424       symbol_decoder_context_.all_zero_cdf[tx_size_context][context]);
1425   if (all_zero) {
1426     if (plane == kPlaneY) {
1427       SetTransformType(block, x4, y4, w4, h4, kTransformTypeDctDct,
1428                        transform_types_);
1429     }
1430     SetEntropyContexts(x4, y4, w4, h4, plane, 0, 0);
1431     // This is not used in this case, so it can be set to any value.
1432     *tx_type = kNumTransformTypes;
1433     return 0;
1434   }
1435   const int tx_width = kTransformWidth[tx_size];
1436   const int tx_height = kTransformHeight[tx_size];
1437   const TransformSize adjusted_tx_size = kAdjustedTransformSize[tx_size];
1438   const int adjusted_tx_width_log2 = kTransformWidthLog2[adjusted_tx_size];
1439   const int tx_padding =
1440       (1 << adjusted_tx_width_log2) * kResidualPaddingVertical;
1441   auto* residual = reinterpret_cast<ResidualType*>(*block.residual);
1442   // Clear padding to avoid bottom boundary checks when parsing quantized
1443   // coefficients.
1444   memset(residual, 0, (tx_width * tx_height + tx_padding) * residual_size_);
1445   const int clamped_tx_height = std::min(tx_height, 32);
1446   if (plane == kPlaneY) {
1447     ReadTransformType(block, x4, y4, tx_size);
1448   }
1449   BlockParameters& bp = *block.bp;
1450   *tx_type = ComputeTransformType(block, plane, tx_size, x4, y4);
1451   const int eob_multi_size = kEobMultiSizeLookup[tx_size];
1452   const PlaneType plane_type = GetPlaneType(plane);
1453   const TransformClass tx_class = GetTransformClass(*tx_type);
1454   context = static_cast<int>(tx_class != kTransformClass2D);
1455   uint16_t* cdf;
1456   switch (eob_multi_size) {
1457     case 0:
1458       cdf = symbol_decoder_context_.eob_pt_16_cdf[plane_type][context];
1459       break;
1460     case 1:
1461       cdf = symbol_decoder_context_.eob_pt_32_cdf[plane_type][context];
1462       break;
1463     case 2:
1464       cdf = symbol_decoder_context_.eob_pt_64_cdf[plane_type][context];
1465       break;
1466     case 3:
1467       cdf = symbol_decoder_context_.eob_pt_128_cdf[plane_type][context];
1468       break;
1469     case 4:
1470       cdf = symbol_decoder_context_.eob_pt_256_cdf[plane_type][context];
1471       break;
1472     case 5:
1473       cdf = symbol_decoder_context_.eob_pt_512_cdf[plane_type];
1474       break;
1475     case 6:
1476     default:
1477       cdf = symbol_decoder_context_.eob_pt_1024_cdf[plane_type];
1478       break;
1479   }
1480   const int eob_pt =
1481       1 + reader_.ReadSymbol(cdf, kEobPt16SymbolCount + eob_multi_size);
1482   int eob = (eob_pt < 2) ? eob_pt : ((1 << (eob_pt - 2)) + 1);
1483   if (eob_pt >= 3) {
1484     context = eob_pt - 3;
1485     const bool eob_extra = reader_.ReadSymbol(
1486         symbol_decoder_context_
1487             .eob_extra_cdf[tx_size_context][plane_type][context]);
1488     if (eob_extra) eob += 1 << (eob_pt - 3);
1489     for (int i = 1; i < eob_pt - 2; ++i) {
1490       assert(eob_pt - i >= 3);
1491       assert(eob_pt <= kEobPt1024SymbolCount);
1492       if (static_cast<bool>(reader_.ReadBit())) {
1493         eob += 1 << (eob_pt - i - 3);
1494       }
1495     }
1496   }
1497   const uint16_t* scan = kScan[tx_class][tx_size];
1498   const int clamped_tx_size_context = std::min(tx_size_context, 3);
1499   // Read the last coefficient.
1500   {
1501     context = GetCoeffBaseContextEob(tx_size, eob - 1);
1502     const uint16_t pos = scan[eob - 1];
1503     int level =
1504         1 + reader_.ReadSymbol(
1505                 symbol_decoder_context_
1506                     .coeff_base_eob_cdf[tx_size_context][plane_type][context],
1507                 kCoeffBaseEobSymbolCount);
1508     if (level > kNumQuantizerBaseLevels) {
1509       level += ReadCoeffBaseRange(
1510           clamped_tx_size_context,
1511           GetCoeffBaseRangeContextEob(adjusted_tx_width_log2, pos, tx_class),
1512           plane_type);
1513     }
1514     residual[pos] = level;
1515   }
1516   if (eob > 1) {
1517     // Read all the other coefficients.
1518     // Lookup used to call the right variant of ReadCoeffBase*() based on the
1519     // transform class.
1520     static constexpr void (Tile::*kGetCoeffBaseFunc[])(
1521         const uint16_t* scan, PlaneType plane_type, TransformSize tx_size,
1522         int clamped_tx_size_context, int adjusted_tx_width_log2, int eob,
1523         uint16_t coeff_base_cdf[kCoeffBaseContexts][kCoeffBaseSymbolCount + 1],
1524         ResidualType* quantized_buffer) = {
1525         &Tile::ReadCoeffBase2D<ResidualType>,
1526         &Tile::ReadCoeffBaseHorizontal<ResidualType>,
1527         &Tile::ReadCoeffBaseVertical<ResidualType>};
1528     (this->*kGetCoeffBaseFunc[tx_class])(
1529         scan, plane_type, tx_size, clamped_tx_size_context,
1530         adjusted_tx_width_log2, eob,
1531         symbol_decoder_context_.coeff_base_cdf[tx_size_context][plane_type],
1532         residual);
1533   }
1534   const int max_value = (1 << (7 + sequence_header_.color_config.bitdepth)) - 1;
1535   const int current_quantizer_index = GetQIndex(
1536       frame_header_.segmentation, bp.segment_id, current_quantizer_index_);
1537   const int dc_q_value = quantizer_.GetDcValue(plane, current_quantizer_index);
1538   const int ac_q_value = quantizer_.GetAcValue(plane, current_quantizer_index);
1539   const int shift = kQuantizationShift[tx_size];
1540   const uint8_t* const quantizer_matrix =
1541       (frame_header_.quantizer.use_matrix &&
1542        *tx_type < kTransformTypeIdentityIdentity &&
1543        !frame_header_.segmentation.lossless[bp.segment_id] &&
1544        frame_header_.quantizer.matrix_level[plane] < 15)
1545           ? &kQuantizerMatrix[frame_header_.quantizer.matrix_level[plane]]
1546                              [plane_type][kQuantizerMatrixOffset[tx_size]]
1547           : nullptr;
1548   int coefficient_level = 0;
1549   int8_t dc_category = 0;
1550   uint16_t* const dc_sign_cdf =
1551       (residual[0] != 0)
1552           ? symbol_decoder_context_.dc_sign_cdf[plane_type][GetDcSignContext(
1553                 x4, y4, w4, h4, plane)]
1554           : nullptr;
1555   assert(scan[0] == 0);
1556   if (!ReadSignAndApplyDequantization<ResidualType, /*is_dc_coefficient=*/true>(
1557           scan, 0, dc_q_value, quantizer_matrix, shift, max_value, dc_sign_cdf,
1558           &dc_category, &coefficient_level, residual)) {
1559     return -1;
1560   }
1561   if (eob > 1) {
1562     int i = 1;
1563     do {
1564       if (!ReadSignAndApplyDequantization<ResidualType,
1565                                           /*is_dc_coefficient=*/false>(
1566               scan, i, ac_q_value, quantizer_matrix, shift, max_value, nullptr,
1567               nullptr, &coefficient_level, residual)) {
1568         return -1;
1569       }
1570     } while (++i < eob);
1571     MoveCoefficientsForTxWidth64(clamped_tx_height, tx_width, residual);
1572   }
1573   SetEntropyContexts(x4, y4, w4, h4, plane, std::min(4, coefficient_level),
1574                      dc_category);
1575   if (split_parse_and_decode_) {
1576     *block.residual += tx_width * tx_height * residual_size_;
1577   }
1578   return eob;
1579 }
1580 
1581 // CALL_BITDEPTH_FUNCTION is a macro that calls the appropriate template
1582 // |function| depending on the value of |sequence_header_.color_config.bitdepth|
1583 // with the variadic arguments.
1584 #if LIBGAV1_MAX_BITDEPTH >= 10
1585 #define CALL_BITDEPTH_FUNCTION(function, ...)         \
1586   do {                                                \
1587     if (sequence_header_.color_config.bitdepth > 8) { \
1588       function<uint16_t>(__VA_ARGS__);                \
1589     } else {                                          \
1590       function<uint8_t>(__VA_ARGS__);                 \
1591     }                                                 \
1592   } while (false)
1593 #else
1594 #define CALL_BITDEPTH_FUNCTION(function, ...) \
1595   do {                                        \
1596     function<uint8_t>(__VA_ARGS__);           \
1597   } while (false)
1598 #endif
1599 
TransformBlock(const Block & block,Plane plane,int base_x,int base_y,TransformSize tx_size,int x,int y,ProcessingMode mode)1600 bool Tile::TransformBlock(const Block& block, Plane plane, int base_x,
1601                           int base_y, TransformSize tx_size, int x, int y,
1602                           ProcessingMode mode) {
1603   BlockParameters& bp = *block.bp;
1604   const int subsampling_x = subsampling_x_[plane];
1605   const int subsampling_y = subsampling_y_[plane];
1606   const int start_x = base_x + MultiplyBy4(x);
1607   const int start_y = base_y + MultiplyBy4(y);
1608   const int max_x = MultiplyBy4(frame_header_.columns4x4) >> subsampling_x;
1609   const int max_y = MultiplyBy4(frame_header_.rows4x4) >> subsampling_y;
1610   if (start_x >= max_x || start_y >= max_y) return true;
1611   const int row = DivideBy4(start_y << subsampling_y);
1612   const int column = DivideBy4(start_x << subsampling_x);
1613   const int mask = sequence_header_.use_128x128_superblock ? 31 : 15;
1614   const int sub_block_row4x4 = row & mask;
1615   const int sub_block_column4x4 = column & mask;
1616   const int step_x = kTransformWidth4x4[tx_size];
1617   const int step_y = kTransformHeight4x4[tx_size];
1618   const bool do_decode = mode == kProcessingModeDecodeOnly ||
1619                          mode == kProcessingModeParseAndDecode;
1620   if (do_decode && !bp.is_inter) {
1621     if (bp.palette_mode_info.size[GetPlaneType(plane)] > 0) {
1622       CALL_BITDEPTH_FUNCTION(PalettePrediction, block, plane, start_x, start_y,
1623                              x, y, tx_size);
1624     } else {
1625       const PredictionMode mode =
1626           (plane == kPlaneY)
1627               ? bp.y_mode
1628               : (bp.uv_mode == kPredictionModeChromaFromLuma ? kPredictionModeDc
1629                                                              : bp.uv_mode);
1630       const int tr_row4x4 = (sub_block_row4x4 >> subsampling_y);
1631       const int tr_column4x4 =
1632           (sub_block_column4x4 >> subsampling_x) + step_x + 1;
1633       const int bl_row4x4 = (sub_block_row4x4 >> subsampling_y) + step_y + 1;
1634       const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x);
1635       const bool has_left = x > 0 || block.left_available[plane];
1636       const bool has_top = y > 0 || block.top_available[plane];
1637 
1638       CALL_BITDEPTH_FUNCTION(
1639           IntraPrediction, block, plane, start_x, start_y, has_left, has_top,
1640           block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
1641           block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
1642           mode, tx_size);
1643       if (plane != kPlaneY && bp.uv_mode == kPredictionModeChromaFromLuma) {
1644         CALL_BITDEPTH_FUNCTION(ChromaFromLumaPrediction, block, plane, start_x,
1645                                start_y, tx_size);
1646       }
1647     }
1648     if (plane == kPlaneY) {
1649       block.bp->prediction_parameters->max_luma_width =
1650           start_x + MultiplyBy4(step_x);
1651       block.bp->prediction_parameters->max_luma_height =
1652           start_y + MultiplyBy4(step_y);
1653       block.scratch_buffer->cfl_luma_buffer_valid = false;
1654     }
1655   }
1656   if (!bp.skip) {
1657     const int sb_row_index = SuperBlockRowIndex(block.row4x4);
1658     const int sb_column_index = SuperBlockColumnIndex(block.column4x4);
1659     if (mode == kProcessingModeDecodeOnly) {
1660       TransformParameterQueue& tx_params =
1661           *residual_buffer_threaded_[sb_row_index][sb_column_index]
1662                ->transform_parameters();
1663       ReconstructBlock(block, plane, start_x, start_y, tx_size,
1664                        tx_params.Type(), tx_params.NonZeroCoeffCount());
1665       tx_params.Pop();
1666     } else {
1667       TransformType tx_type;
1668       int non_zero_coeff_count;
1669 #if LIBGAV1_MAX_BITDEPTH >= 10
1670       if (sequence_header_.color_config.bitdepth > 8) {
1671         non_zero_coeff_count = ReadTransformCoefficients<int32_t>(
1672             block, plane, start_x, start_y, tx_size, &tx_type);
1673       } else  // NOLINT
1674 #endif
1675       {
1676         non_zero_coeff_count = ReadTransformCoefficients<int16_t>(
1677             block, plane, start_x, start_y, tx_size, &tx_type);
1678       }
1679       if (non_zero_coeff_count < 0) return false;
1680       if (mode == kProcessingModeParseAndDecode) {
1681         ReconstructBlock(block, plane, start_x, start_y, tx_size, tx_type,
1682                          non_zero_coeff_count);
1683       } else {
1684         assert(mode == kProcessingModeParseOnly);
1685         residual_buffer_threaded_[sb_row_index][sb_column_index]
1686             ->transform_parameters()
1687             ->Push(non_zero_coeff_count, tx_type);
1688       }
1689     }
1690   }
1691   if (do_decode) {
1692     bool* block_decoded =
1693         &block.scratch_buffer
1694              ->block_decoded[plane][(sub_block_row4x4 >> subsampling_y) + 1]
1695                             [(sub_block_column4x4 >> subsampling_x) + 1];
1696     SetBlockValues<bool>(step_y, step_x, true, block_decoded,
1697                          TileScratchBuffer::kBlockDecodedStride);
1698   }
1699   return true;
1700 }
1701 
TransformTree(const Block & block,int start_x,int start_y,BlockSize plane_size,ProcessingMode mode)1702 bool Tile::TransformTree(const Block& block, int start_x, int start_y,
1703                          BlockSize plane_size, ProcessingMode mode) {
1704   assert(plane_size <= kBlock64x64);
1705   // Branching factor is 4; Maximum Depth is 4; So the maximum stack size
1706   // required is (4 - 1) * 4 + 1 = 13.
1707   Stack<TransformTreeNode, 13> stack;
1708   // It is okay to cast BlockSize to TransformSize here since the enum are
1709   // equivalent for all BlockSize values <= kBlock64x64.
1710   stack.Push(TransformTreeNode(start_x, start_y,
1711                                static_cast<TransformSize>(plane_size)));
1712 
1713   do {
1714     TransformTreeNode node = stack.Pop();
1715     const int row = DivideBy4(node.y);
1716     const int column = DivideBy4(node.x);
1717     if (row >= frame_header_.rows4x4 || column >= frame_header_.columns4x4) {
1718       continue;
1719     }
1720     const TransformSize inter_tx_size = inter_transform_sizes_[row][column];
1721     const int width = kTransformWidth[node.tx_size];
1722     const int height = kTransformHeight[node.tx_size];
1723     if (width <= kTransformWidth[inter_tx_size] &&
1724         height <= kTransformHeight[inter_tx_size]) {
1725       if (!TransformBlock(block, kPlaneY, node.x, node.y, node.tx_size, 0, 0,
1726                           mode)) {
1727         return false;
1728       }
1729       continue;
1730     }
1731     // The split transform size look up gives the right transform size that we
1732     // should push in the stack.
1733     //   if (width > height) => transform size whose width is half.
1734     //   if (width < height) => transform size whose height is half.
1735     //   if (width == height) => transform size whose width and height are half.
1736     const TransformSize split_tx_size = kSplitTransformSize[node.tx_size];
1737     const int half_width = DivideBy2(width);
1738     if (width > height) {
1739       stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1740       stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1741       continue;
1742     }
1743     const int half_height = DivideBy2(height);
1744     if (width < height) {
1745       stack.Push(
1746           TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1747       stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1748       continue;
1749     }
1750     stack.Push(TransformTreeNode(node.x + half_width, node.y + half_height,
1751                                  split_tx_size));
1752     stack.Push(TransformTreeNode(node.x, node.y + half_height, split_tx_size));
1753     stack.Push(TransformTreeNode(node.x + half_width, node.y, split_tx_size));
1754     stack.Push(TransformTreeNode(node.x, node.y, split_tx_size));
1755   } while (!stack.Empty());
1756   return true;
1757 }
1758 
ReconstructBlock(const Block & block,Plane plane,int start_x,int start_y,TransformSize tx_size,TransformType tx_type,int non_zero_coeff_count)1759 void Tile::ReconstructBlock(const Block& block, Plane plane, int start_x,
1760                             int start_y, TransformSize tx_size,
1761                             TransformType tx_type, int non_zero_coeff_count) {
1762   // Reconstruction process. Steps 2 and 3 of Section 7.12.3 in the spec.
1763   assert(non_zero_coeff_count >= 0);
1764   if (non_zero_coeff_count == 0) return;
1765 #if LIBGAV1_MAX_BITDEPTH >= 10
1766   if (sequence_header_.color_config.bitdepth > 8) {
1767     Array2DView<uint16_t> buffer(
1768         buffer_[plane].rows(), buffer_[plane].columns() / sizeof(uint16_t),
1769         reinterpret_cast<uint16_t*>(&buffer_[plane][0][0]));
1770     Reconstruct(dsp_, tx_type, tx_size,
1771                 frame_header_.segmentation.lossless[block.bp->segment_id],
1772                 reinterpret_cast<int32_t*>(*block.residual), start_x, start_y,
1773                 &buffer, non_zero_coeff_count);
1774   } else  // NOLINT
1775 #endif
1776   {
1777     Reconstruct(dsp_, tx_type, tx_size,
1778                 frame_header_.segmentation.lossless[block.bp->segment_id],
1779                 reinterpret_cast<int16_t*>(*block.residual), start_x, start_y,
1780                 &buffer_[plane], non_zero_coeff_count);
1781   }
1782   if (split_parse_and_decode_) {
1783     *block.residual +=
1784         kTransformWidth[tx_size] * kTransformHeight[tx_size] * residual_size_;
1785   }
1786 }
1787 
Residual(const Block & block,ProcessingMode mode)1788 bool Tile::Residual(const Block& block, ProcessingMode mode) {
1789   const int width_chunks = std::max(1, block.width >> 6);
1790   const int height_chunks = std::max(1, block.height >> 6);
1791   const BlockSize size_chunk4x4 =
1792       (width_chunks > 1 || height_chunks > 1) ? kBlock64x64 : block.size;
1793   const BlockParameters& bp = *block.bp;
1794   for (int chunk_y = 0; chunk_y < height_chunks; ++chunk_y) {
1795     for (int chunk_x = 0; chunk_x < width_chunks; ++chunk_x) {
1796       for (int plane = 0; plane < (block.HasChroma() ? PlaneCount() : 1);
1797            ++plane) {
1798         const int subsampling_x = subsampling_x_[plane];
1799         const int subsampling_y = subsampling_y_[plane];
1800         // For Y Plane, when lossless is true |bp.transform_size| is always
1801         // kTransformSize4x4. So we can simply use |bp.transform_size| here as
1802         // the Y plane's transform size (part of Section 5.11.37 in the spec).
1803         const TransformSize tx_size =
1804             (plane == kPlaneY) ? bp.transform_size : bp.uv_transform_size;
1805         const BlockSize plane_size =
1806             kPlaneResidualSize[size_chunk4x4][subsampling_x][subsampling_y];
1807         assert(plane_size != kBlockInvalid);
1808         if (bp.is_inter &&
1809             !frame_header_.segmentation.lossless[bp.segment_id] &&
1810             plane == kPlaneY) {
1811           const int row_chunk4x4 = block.row4x4 + MultiplyBy16(chunk_y);
1812           const int column_chunk4x4 = block.column4x4 + MultiplyBy16(chunk_x);
1813           const int base_x = MultiplyBy4(column_chunk4x4 >> subsampling_x);
1814           const int base_y = MultiplyBy4(row_chunk4x4 >> subsampling_y);
1815           if (!TransformTree(block, base_x, base_y, plane_size, mode)) {
1816             return false;
1817           }
1818         } else {
1819           const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
1820           const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
1821           const int step_x = kTransformWidth4x4[tx_size];
1822           const int step_y = kTransformHeight4x4[tx_size];
1823           const int num4x4_wide = kNum4x4BlocksWide[plane_size];
1824           const int num4x4_high = kNum4x4BlocksHigh[plane_size];
1825           for (int y = 0; y < num4x4_high; y += step_y) {
1826             for (int x = 0; x < num4x4_wide; x += step_x) {
1827               if (!TransformBlock(
1828                       block, static_cast<Plane>(plane), base_x, base_y, tx_size,
1829                       x + (MultiplyBy16(chunk_x) >> subsampling_x),
1830                       y + (MultiplyBy16(chunk_y) >> subsampling_y), mode)) {
1831                 return false;
1832               }
1833             }
1834           }
1835         }
1836       }
1837     }
1838   }
1839   return true;
1840 }
1841 
1842 // The purpose of this function is to limit the maximum size of motion vectors
1843 // and also, if use_intra_block_copy is true, to additionally constrain the
1844 // motion vector so that the data is fetched from parts of the tile that have
1845 // already been decoded and are not too close to the current block (in order to
1846 // make a pipelined decoder implementation feasible).
IsMvValid(const Block & block,bool is_compound) const1847 bool Tile::IsMvValid(const Block& block, bool is_compound) const {
1848   const BlockParameters& bp = *block.bp;
1849   for (int i = 0; i < 1 + static_cast<int>(is_compound); ++i) {
1850     for (int mv_component : bp.mv.mv[i].mv) {
1851       if (std::abs(mv_component) >= (1 << 14)) {
1852         return false;
1853       }
1854     }
1855   }
1856   if (!block.bp->prediction_parameters->use_intra_block_copy) {
1857     return true;
1858   }
1859   if ((bp.mv.mv[0].mv32 & 0x00070007) != 0) {
1860     return false;
1861   }
1862   const int delta_row = bp.mv.mv[0].mv[0] >> 3;
1863   const int delta_column = bp.mv.mv[0].mv[1] >> 3;
1864   int src_top_edge = MultiplyBy4(block.row4x4) + delta_row;
1865   int src_left_edge = MultiplyBy4(block.column4x4) + delta_column;
1866   const int src_bottom_edge = src_top_edge + block.height;
1867   const int src_right_edge = src_left_edge + block.width;
1868   if (block.HasChroma()) {
1869     if (block.width < 8 && subsampling_x_[kPlaneU] != 0) {
1870       src_left_edge -= 4;
1871     }
1872     if (block.height < 8 && subsampling_y_[kPlaneU] != 0) {
1873       src_top_edge -= 4;
1874     }
1875   }
1876   if (src_top_edge < MultiplyBy4(row4x4_start_) ||
1877       src_left_edge < MultiplyBy4(column4x4_start_) ||
1878       src_bottom_edge > MultiplyBy4(row4x4_end_) ||
1879       src_right_edge > MultiplyBy4(column4x4_end_)) {
1880     return false;
1881   }
1882   // sb_height_log2 = use_128x128_superblock ? log2(128) : log2(64)
1883   const int sb_height_log2 =
1884       6 + static_cast<int>(sequence_header_.use_128x128_superblock);
1885   const int active_sb_row = MultiplyBy4(block.row4x4) >> sb_height_log2;
1886   const int active_64x64_block_column = MultiplyBy4(block.column4x4) >> 6;
1887   const int src_sb_row = (src_bottom_edge - 1) >> sb_height_log2;
1888   const int src_64x64_block_column = (src_right_edge - 1) >> 6;
1889   const int total_64x64_blocks_per_row =
1890       ((column4x4_end_ - column4x4_start_ - 1) >> 4) + 1;
1891   const int active_64x64_block =
1892       active_sb_row * total_64x64_blocks_per_row + active_64x64_block_column;
1893   const int src_64x64_block =
1894       src_sb_row * total_64x64_blocks_per_row + src_64x64_block_column;
1895   if (src_64x64_block >= active_64x64_block - kIntraBlockCopyDelay64x64Blocks) {
1896     return false;
1897   }
1898 
1899   // Wavefront constraint: use only top left area of frame for reference.
1900   if (src_sb_row > active_sb_row) return false;
1901   const int gradient =
1902       1 + kIntraBlockCopyDelay64x64Blocks +
1903       static_cast<int>(sequence_header_.use_128x128_superblock);
1904   const int wavefront_offset = gradient * (active_sb_row - src_sb_row);
1905   return src_64x64_block_column < active_64x64_block_column -
1906                                       kIntraBlockCopyDelay64x64Blocks +
1907                                       wavefront_offset;
1908 }
1909 
AssignInterMv(const Block & block,bool is_compound)1910 bool Tile::AssignInterMv(const Block& block, bool is_compound) {
1911   int min[2];
1912   int max[2];
1913   GetClampParameters(block, min, max);
1914   BlockParameters& bp = *block.bp;
1915   const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1916   if (is_compound) {
1917     for (int i = 0; i < 2; ++i) {
1918       const PredictionMode mode = GetSinglePredictionMode(i, bp.y_mode);
1919       MotionVector predicted_mv;
1920       if (mode == kPredictionModeGlobalMv) {
1921         predicted_mv = prediction_parameters.global_mv[i];
1922       } else {
1923         const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1924                                   (mode == kPredictionModeNewMv &&
1925                                    prediction_parameters.ref_mv_count <= 1))
1926                                      ? 0
1927                                      : prediction_parameters.ref_mv_index;
1928         predicted_mv = prediction_parameters.reference_mv(ref_mv_index, i);
1929         if (ref_mv_index < prediction_parameters.ref_mv_count) {
1930           predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1931           predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1932         }
1933       }
1934       if (mode == kPredictionModeNewMv) {
1935         ReadMotionVector(block, i);
1936         bp.mv.mv[i].mv[0] += predicted_mv.mv[0];
1937         bp.mv.mv[i].mv[1] += predicted_mv.mv[1];
1938       } else {
1939         bp.mv.mv[i] = predicted_mv;
1940       }
1941     }
1942   } else {
1943     const PredictionMode mode = GetSinglePredictionMode(0, bp.y_mode);
1944     MotionVector predicted_mv;
1945     if (mode == kPredictionModeGlobalMv) {
1946       predicted_mv = prediction_parameters.global_mv[0];
1947     } else {
1948       const int ref_mv_index = (mode == kPredictionModeNearestMv ||
1949                                 (mode == kPredictionModeNewMv &&
1950                                  prediction_parameters.ref_mv_count <= 1))
1951                                    ? 0
1952                                    : prediction_parameters.ref_mv_index;
1953       predicted_mv = prediction_parameters.reference_mv(ref_mv_index);
1954       if (ref_mv_index < prediction_parameters.ref_mv_count) {
1955         predicted_mv.mv[0] = Clip3(predicted_mv.mv[0], min[0], max[0]);
1956         predicted_mv.mv[1] = Clip3(predicted_mv.mv[1], min[1], max[1]);
1957       }
1958     }
1959     if (mode == kPredictionModeNewMv) {
1960       ReadMotionVector(block, 0);
1961       bp.mv.mv[0].mv[0] += predicted_mv.mv[0];
1962       bp.mv.mv[0].mv[1] += predicted_mv.mv[1];
1963     } else {
1964       bp.mv.mv[0] = predicted_mv;
1965     }
1966   }
1967   return IsMvValid(block, is_compound);
1968 }
1969 
AssignIntraMv(const Block & block)1970 bool Tile::AssignIntraMv(const Block& block) {
1971   // TODO(linfengz): Check if the clamping process is necessary.
1972   int min[2];
1973   int max[2];
1974   GetClampParameters(block, min, max);
1975   BlockParameters& bp = *block.bp;
1976   const PredictionParameters& prediction_parameters = *bp.prediction_parameters;
1977   const MotionVector& ref_mv_0 = prediction_parameters.reference_mv(0);
1978   ReadMotionVector(block, 0);
1979   if (ref_mv_0.mv32 == 0) {
1980     const MotionVector& ref_mv_1 = prediction_parameters.reference_mv(1);
1981     if (ref_mv_1.mv32 == 0) {
1982       const int super_block_size4x4 = kNum4x4BlocksHigh[SuperBlockSize()];
1983       if (block.row4x4 - super_block_size4x4 < row4x4_start_) {
1984         bp.mv.mv[0].mv[1] -= MultiplyBy32(super_block_size4x4);
1985         bp.mv.mv[0].mv[1] -= MultiplyBy8(kIntraBlockCopyDelayPixels);
1986       } else {
1987         bp.mv.mv[0].mv[0] -= MultiplyBy32(super_block_size4x4);
1988       }
1989     } else {
1990       bp.mv.mv[0].mv[0] += Clip3(ref_mv_1.mv[0], min[0], max[0]);
1991       bp.mv.mv[0].mv[1] += Clip3(ref_mv_1.mv[1], min[0], max[0]);
1992     }
1993   } else {
1994     bp.mv.mv[0].mv[0] += Clip3(ref_mv_0.mv[0], min[0], max[0]);
1995     bp.mv.mv[0].mv[1] += Clip3(ref_mv_0.mv[1], min[1], max[1]);
1996   }
1997   return IsMvValid(block, /*is_compound=*/false);
1998 }
1999 
ResetEntropyContext(const Block & block)2000 void Tile::ResetEntropyContext(const Block& block) {
2001   for (int plane = 0; plane < (block.HasChroma() ? PlaneCount() : 1); ++plane) {
2002     const int subsampling_x = subsampling_x_[plane];
2003     const int start_x = block.column4x4 >> subsampling_x;
2004     const int end_x =
2005         std::min((block.column4x4 + block.width4x4) >> subsampling_x,
2006                  frame_header_.columns4x4);
2007     memset(&coefficient_levels_[kEntropyContextTop][plane][start_x], 0,
2008            end_x - start_x);
2009     memset(&dc_categories_[kEntropyContextTop][plane][start_x], 0,
2010            end_x - start_x);
2011     const int subsampling_y = subsampling_y_[plane];
2012     const int start_y = block.row4x4 >> subsampling_y;
2013     const int end_y =
2014         std::min((block.row4x4 + block.height4x4) >> subsampling_y,
2015                  frame_header_.rows4x4);
2016     memset(&coefficient_levels_[kEntropyContextLeft][plane][start_y], 0,
2017            end_y - start_y);
2018     memset(&dc_categories_[kEntropyContextLeft][plane][start_y], 0,
2019            end_y - start_y);
2020   }
2021 }
2022 
ComputePrediction(const Block & block)2023 bool Tile::ComputePrediction(const Block& block) {
2024   const BlockParameters& bp = *block.bp;
2025   if (!bp.is_inter) return true;
2026   const int mask =
2027       (1 << (4 + static_cast<int>(sequence_header_.use_128x128_superblock))) -
2028       1;
2029   const int sub_block_row4x4 = block.row4x4 & mask;
2030   const int sub_block_column4x4 = block.column4x4 & mask;
2031   const int plane_count = block.HasChroma() ? PlaneCount() : 1;
2032   // Returns true if this block applies local warping. The state is determined
2033   // in the Y plane and carried for use in the U/V planes.
2034   // But the U/V planes will not apply warping when the block size is smaller
2035   // than 8x8, even if this variable is true.
2036   bool is_local_valid = false;
2037   // Local warping parameters, similar usage as is_local_valid.
2038   GlobalMotion local_warp_params;
2039   int plane = 0;
2040   do {
2041     const int8_t subsampling_x = subsampling_x_[plane];
2042     const int8_t subsampling_y = subsampling_y_[plane];
2043     const BlockSize plane_size = block.residual_size[plane];
2044     const int block_width4x4 = kNum4x4BlocksWide[plane_size];
2045     const int block_height4x4 = kNum4x4BlocksHigh[plane_size];
2046     const int block_width = MultiplyBy4(block_width4x4);
2047     const int block_height = MultiplyBy4(block_height4x4);
2048     const int base_x = MultiplyBy4(block.column4x4 >> subsampling_x);
2049     const int base_y = MultiplyBy4(block.row4x4 >> subsampling_y);
2050     if (bp.reference_frame[1] == kReferenceFrameIntra) {
2051       const int tr_row4x4 = sub_block_row4x4 >> subsampling_y;
2052       const int tr_column4x4 =
2053           (sub_block_column4x4 >> subsampling_x) + block_width4x4 + 1;
2054       const int bl_row4x4 =
2055           (sub_block_row4x4 >> subsampling_y) + block_height4x4;
2056       const int bl_column4x4 = (sub_block_column4x4 >> subsampling_x) + 1;
2057       const TransformSize tx_size =
2058           k4x4SizeToTransformSize[k4x4WidthLog2[plane_size]]
2059                                  [k4x4HeightLog2[plane_size]];
2060       const bool has_left = block.left_available[plane];
2061       const bool has_top = block.top_available[plane];
2062       CALL_BITDEPTH_FUNCTION(
2063           IntraPrediction, block, static_cast<Plane>(plane), base_x, base_y,
2064           has_left, has_top,
2065           block.scratch_buffer->block_decoded[plane][tr_row4x4][tr_column4x4],
2066           block.scratch_buffer->block_decoded[plane][bl_row4x4][bl_column4x4],
2067           kInterIntraToIntraMode[block.bp->prediction_parameters
2068                                      ->inter_intra_mode],
2069           tx_size);
2070     }
2071     int candidate_row = block.row4x4;
2072     int candidate_column = block.column4x4;
2073     bool some_use_intra = bp.reference_frame[0] == kReferenceFrameIntra;
2074     if (!some_use_intra && plane != 0) {
2075       candidate_row = (candidate_row >> subsampling_y) << subsampling_y;
2076       candidate_column = (candidate_column >> subsampling_x) << subsampling_x;
2077       if (candidate_row != block.row4x4) {
2078         // Top block.
2079         const BlockParameters& bp_top =
2080             *block_parameters_holder_.Find(candidate_row, block.column4x4);
2081         some_use_intra = bp_top.reference_frame[0] == kReferenceFrameIntra;
2082         if (!some_use_intra && candidate_column != block.column4x4) {
2083           // Top-left block.
2084           const BlockParameters& bp_top_left =
2085               *block_parameters_holder_.Find(candidate_row, candidate_column);
2086           some_use_intra =
2087               bp_top_left.reference_frame[0] == kReferenceFrameIntra;
2088         }
2089       }
2090       if (!some_use_intra && candidate_column != block.column4x4) {
2091         // Left block.
2092         const BlockParameters& bp_left =
2093             *block_parameters_holder_.Find(block.row4x4, candidate_column);
2094         some_use_intra = bp_left.reference_frame[0] == kReferenceFrameIntra;
2095       }
2096     }
2097     int prediction_width;
2098     int prediction_height;
2099     if (some_use_intra) {
2100       candidate_row = block.row4x4;
2101       candidate_column = block.column4x4;
2102       prediction_width = block_width;
2103       prediction_height = block_height;
2104     } else {
2105       prediction_width = block.width >> subsampling_x;
2106       prediction_height = block.height >> subsampling_y;
2107     }
2108     int r = 0;
2109     int y = 0;
2110     do {
2111       int c = 0;
2112       int x = 0;
2113       do {
2114         if (!InterPrediction(block, static_cast<Plane>(plane), base_x + x,
2115                              base_y + y, prediction_width, prediction_height,
2116                              candidate_row + r, candidate_column + c,
2117                              &is_local_valid, &local_warp_params)) {
2118           return false;
2119         }
2120         ++c;
2121         x += prediction_width;
2122       } while (x < block_width);
2123       ++r;
2124       y += prediction_height;
2125     } while (y < block_height);
2126   } while (++plane < plane_count);
2127   return true;
2128 }
2129 
2130 #undef CALL_BITDEPTH_FUNCTION
2131 
PopulateDeblockFilterLevel(const Block & block)2132 void Tile::PopulateDeblockFilterLevel(const Block& block) {
2133   if (!post_filter_.DoDeblock()) return;
2134   BlockParameters& bp = *block.bp;
2135   const int mode_id =
2136       static_cast<int>(kPredictionModeDeltasMask.Contains(bp.y_mode));
2137   for (int i = 0; i < kFrameLfCount; ++i) {
2138     if (delta_lf_all_zero_) {
2139       bp.deblock_filter_level[i] = post_filter_.GetZeroDeltaDeblockFilterLevel(
2140           bp.segment_id, i, bp.reference_frame[0], mode_id);
2141     } else {
2142       bp.deblock_filter_level[i] =
2143           deblock_filter_levels_[bp.segment_id][i][bp.reference_frame[0]]
2144                                 [mode_id];
2145     }
2146   }
2147 }
2148 
ProcessBlock(int row4x4,int column4x4,BlockSize block_size,ParameterTree * const tree,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2149 bool Tile::ProcessBlock(int row4x4, int column4x4, BlockSize block_size,
2150                         ParameterTree* const tree,
2151                         TileScratchBuffer* const scratch_buffer,
2152                         ResidualPtr* residual) {
2153   // Do not process the block if the starting point is beyond the visible frame.
2154   // This is equivalent to the has_row/has_column check in the
2155   // decode_partition() section of the spec when partition equals
2156   // kPartitionHorizontal or kPartitionVertical.
2157   if (row4x4 >= frame_header_.rows4x4 ||
2158       column4x4 >= frame_header_.columns4x4) {
2159     return true;
2160   }
2161   BlockParameters& bp = *tree->parameters();
2162   block_parameters_holder_.FillCache(row4x4, column4x4, block_size, &bp);
2163   Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
2164   bp.size = block_size;
2165   bp.prediction_parameters =
2166       split_parse_and_decode_ ? std::unique_ptr<PredictionParameters>(
2167                                     new (std::nothrow) PredictionParameters())
2168                               : std::move(prediction_parameters_);
2169   if (bp.prediction_parameters == nullptr) return false;
2170   if (!DecodeModeInfo(block)) return false;
2171   bp.is_global_mv_block = (bp.y_mode == kPredictionModeGlobalMv ||
2172                            bp.y_mode == kPredictionModeGlobalGlobalMv) &&
2173                           !IsBlockDimension4(bp.size);
2174   PopulateDeblockFilterLevel(block);
2175   if (!ReadPaletteTokens(block)) return false;
2176   DecodeTransformSize(block);
2177   // Part of Section 5.11.37 in the spec (implemented as a simple lookup).
2178   bp.uv_transform_size = frame_header_.segmentation.lossless[bp.segment_id]
2179                              ? kTransformSize4x4
2180                              : kUVTransformSize[block.residual_size[kPlaneU]];
2181   if (bp.skip) ResetEntropyContext(block);
2182   if (split_parse_and_decode_) {
2183     if (!Residual(block, kProcessingModeParseOnly)) return false;
2184   } else {
2185     if (!ComputePrediction(block) ||
2186         !Residual(block, kProcessingModeParseAndDecode)) {
2187       return false;
2188     }
2189   }
2190   // If frame_header_.segmentation.enabled is false, bp.segment_id is 0 for all
2191   // blocks. We don't need to call save bp.segment_id in the current frame
2192   // because the current frame's segmentation map will be cleared to all 0s.
2193   //
2194   // If frame_header_.segmentation.enabled is true and
2195   // frame_header_.segmentation.update_map is false, we will copy the previous
2196   // frame's segmentation map to the current frame. So we don't need to call
2197   // save bp.segment_id in the current frame.
2198   if (frame_header_.segmentation.enabled &&
2199       frame_header_.segmentation.update_map) {
2200     const int x_limit = std::min(frame_header_.columns4x4 - column4x4,
2201                                  static_cast<int>(block.width4x4));
2202     const int y_limit = std::min(frame_header_.rows4x4 - row4x4,
2203                                  static_cast<int>(block.height4x4));
2204     current_frame_.segmentation_map()->FillBlock(row4x4, column4x4, x_limit,
2205                                                  y_limit, bp.segment_id);
2206   }
2207   StoreMotionFieldMvsIntoCurrentFrame(block);
2208   if (!split_parse_and_decode_) {
2209     prediction_parameters_ = std::move(bp.prediction_parameters);
2210   }
2211   return true;
2212 }
2213 
DecodeBlock(ParameterTree * const tree,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2214 bool Tile::DecodeBlock(ParameterTree* const tree,
2215                        TileScratchBuffer* const scratch_buffer,
2216                        ResidualPtr* residual) {
2217   const int row4x4 = tree->row4x4();
2218   const int column4x4 = tree->column4x4();
2219   if (row4x4 >= frame_header_.rows4x4 ||
2220       column4x4 >= frame_header_.columns4x4) {
2221     return true;
2222   }
2223   const BlockSize block_size = tree->block_size();
2224   Block block(*this, block_size, row4x4, column4x4, scratch_buffer, residual);
2225   if (!ComputePrediction(block) ||
2226       !Residual(block, kProcessingModeDecodeOnly)) {
2227     return false;
2228   }
2229   block.bp->prediction_parameters.reset(nullptr);
2230   return true;
2231 }
2232 
ProcessPartition(int row4x4_start,int column4x4_start,ParameterTree * const root,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2233 bool Tile::ProcessPartition(int row4x4_start, int column4x4_start,
2234                             ParameterTree* const root,
2235                             TileScratchBuffer* const scratch_buffer,
2236                             ResidualPtr* residual) {
2237   Stack<ParameterTree*, kDfsStackSize> stack;
2238 
2239   // Set up the first iteration.
2240   ParameterTree* node = root;
2241   int row4x4 = row4x4_start;
2242   int column4x4 = column4x4_start;
2243   BlockSize block_size = SuperBlockSize();
2244 
2245   // DFS loop. If it sees a terminal node (leaf node), ProcessBlock is invoked.
2246   // Otherwise, the children are pushed into the stack for future processing.
2247   do {
2248     if (!stack.Empty()) {
2249       // Set up subsequent iterations.
2250       node = stack.Pop();
2251       row4x4 = node->row4x4();
2252       column4x4 = node->column4x4();
2253       block_size = node->block_size();
2254     }
2255     if (row4x4 >= frame_header_.rows4x4 ||
2256         column4x4 >= frame_header_.columns4x4) {
2257       continue;
2258     }
2259     const int block_width4x4 = kNum4x4BlocksWide[block_size];
2260     assert(block_width4x4 == kNum4x4BlocksHigh[block_size]);
2261     const int half_block4x4 = block_width4x4 >> 1;
2262     const bool has_rows = (row4x4 + half_block4x4) < frame_header_.rows4x4;
2263     const bool has_columns =
2264         (column4x4 + half_block4x4) < frame_header_.columns4x4;
2265     Partition partition;
2266     if (!ReadPartition(row4x4, column4x4, block_size, has_rows, has_columns,
2267                        &partition)) {
2268       LIBGAV1_DLOG(ERROR, "Failed to read partition for row: %d column: %d",
2269                    row4x4, column4x4);
2270       return false;
2271     }
2272     const BlockSize sub_size = kSubSize[partition][block_size];
2273     // Section 6.10.4: It is a requirement of bitstream conformance that
2274     // get_plane_residual_size( subSize, 1 ) is not equal to BLOCK_INVALID
2275     // every time subSize is computed.
2276     if (sub_size == kBlockInvalid ||
2277         kPlaneResidualSize[sub_size]
2278                           [sequence_header_.color_config.subsampling_x]
2279                           [sequence_header_.color_config.subsampling_y] ==
2280             kBlockInvalid) {
2281       LIBGAV1_DLOG(
2282           ERROR,
2283           "Invalid sub-block/plane size for row: %d column: %d partition: "
2284           "%d block_size: %d sub_size: %d subsampling_x/y: %d, %d",
2285           row4x4, column4x4, partition, block_size, sub_size,
2286           sequence_header_.color_config.subsampling_x,
2287           sequence_header_.color_config.subsampling_y);
2288       return false;
2289     }
2290     if (!node->SetPartitionType(partition)) {
2291       LIBGAV1_DLOG(ERROR, "node->SetPartitionType() failed.");
2292       return false;
2293     }
2294     switch (partition) {
2295       case kPartitionNone:
2296         if (!ProcessBlock(row4x4, column4x4, sub_size, node, scratch_buffer,
2297                           residual)) {
2298           return false;
2299         }
2300         break;
2301       case kPartitionSplit:
2302         // The children must be added in reverse order since a stack is being
2303         // used.
2304         for (int i = 3; i >= 0; --i) {
2305           ParameterTree* const child = node->children(i);
2306           assert(child != nullptr);
2307           stack.Push(child);
2308         }
2309         break;
2310       case kPartitionHorizontal:
2311       case kPartitionVertical:
2312       case kPartitionHorizontalWithTopSplit:
2313       case kPartitionHorizontalWithBottomSplit:
2314       case kPartitionVerticalWithLeftSplit:
2315       case kPartitionVerticalWithRightSplit:
2316       case kPartitionHorizontal4:
2317       case kPartitionVertical4:
2318         for (int i = 0; i < 4; ++i) {
2319           ParameterTree* const child = node->children(i);
2320           // Once a null child is seen, all the subsequent children will also be
2321           // null.
2322           if (child == nullptr) break;
2323           if (!ProcessBlock(child->row4x4(), child->column4x4(),
2324                             child->block_size(), child, scratch_buffer,
2325                             residual)) {
2326             return false;
2327           }
2328         }
2329         break;
2330     }
2331   } while (!stack.Empty());
2332   return true;
2333 }
2334 
ResetLoopRestorationParams()2335 void Tile::ResetLoopRestorationParams() {
2336   for (int plane = kPlaneY; plane < kMaxPlanes; ++plane) {
2337     for (int i = WienerInfo::kVertical; i <= WienerInfo::kHorizontal; ++i) {
2338       reference_unit_info_[plane].sgr_proj_info.multiplier[i] =
2339           kSgrProjDefaultMultiplier[i];
2340       for (int j = 0; j < kNumWienerCoefficients; ++j) {
2341         reference_unit_info_[plane].wiener_info.filter[i][j] =
2342             kWienerDefaultFilter[j];
2343       }
2344     }
2345   }
2346 }
2347 
ResetCdef(const int row4x4,const int column4x4)2348 void Tile::ResetCdef(const int row4x4, const int column4x4) {
2349   if (!sequence_header_.enable_cdef) return;
2350   const int row = DivideBy16(row4x4);
2351   const int column = DivideBy16(column4x4);
2352   cdef_index_[row][column] = -1;
2353   if (sequence_header_.use_128x128_superblock) {
2354     const int cdef_size4x4 = kNum4x4BlocksWide[kBlock64x64];
2355     const int border_row = DivideBy16(row4x4 + cdef_size4x4);
2356     const int border_column = DivideBy16(column4x4 + cdef_size4x4);
2357     cdef_index_[row][border_column] = -1;
2358     cdef_index_[border_row][column] = -1;
2359     cdef_index_[border_row][border_column] = -1;
2360   }
2361 }
2362 
ClearBlockDecoded(TileScratchBuffer * const scratch_buffer,int row4x4,int column4x4)2363 void Tile::ClearBlockDecoded(TileScratchBuffer* const scratch_buffer,
2364                              int row4x4, int column4x4) {
2365   // Set everything to false.
2366   memset(scratch_buffer->block_decoded, 0,
2367          sizeof(scratch_buffer->block_decoded));
2368   // Set specific edge cases to true.
2369   const int sb_size4 = sequence_header_.use_128x128_superblock ? 32 : 16;
2370   for (int plane = 0; plane < PlaneCount(); ++plane) {
2371     const int subsampling_x = subsampling_x_[plane];
2372     const int subsampling_y = subsampling_y_[plane];
2373     const int sb_width4 = (column4x4_end_ - column4x4) >> subsampling_x;
2374     const int sb_height4 = (row4x4_end_ - row4x4) >> subsampling_y;
2375     // The memset is equivalent to the following lines in the spec:
2376     // for ( x = -1; x <= ( sbSize4 >> subX ); x++ ) {
2377     //   if ( y < 0 && x < sbWidth4 ) {
2378     //     BlockDecoded[plane][y][x] = 1
2379     //   }
2380     // }
2381     const int num_elements =
2382         std::min((sb_size4 >> subsampling_x_[plane]) + 1, sb_width4) + 1;
2383     memset(&scratch_buffer->block_decoded[plane][0][0], 1, num_elements);
2384     // The for loop is equivalent to the following lines in the spec:
2385     // for ( y = -1; y <= ( sbSize4 >> subY ); y++ )
2386     //   if ( x < 0 && y < sbHeight4 )
2387     //     BlockDecoded[plane][y][x] = 1
2388     //   }
2389     // }
2390     // BlockDecoded[plane][sbSize4 >> subY][-1] = 0
2391     for (int y = -1; y < std::min((sb_size4 >> subsampling_y), sb_height4);
2392          ++y) {
2393       scratch_buffer->block_decoded[plane][y + 1][0] = true;
2394     }
2395   }
2396 }
2397 
ProcessSuperBlock(int row4x4,int column4x4,int block_width4x4,TileScratchBuffer * const scratch_buffer,ProcessingMode mode)2398 bool Tile::ProcessSuperBlock(int row4x4, int column4x4, int block_width4x4,
2399                              TileScratchBuffer* const scratch_buffer,
2400                              ProcessingMode mode) {
2401   const bool parsing =
2402       mode == kProcessingModeParseOnly || mode == kProcessingModeParseAndDecode;
2403   const bool decoding = mode == kProcessingModeDecodeOnly ||
2404                         mode == kProcessingModeParseAndDecode;
2405   if (parsing) {
2406     read_deltas_ = frame_header_.delta_q.present;
2407     ResetCdef(row4x4, column4x4);
2408   }
2409   if (decoding) {
2410     ClearBlockDecoded(scratch_buffer, row4x4, column4x4);
2411   }
2412   const BlockSize block_size = SuperBlockSize();
2413   if (parsing) {
2414     ReadLoopRestorationCoefficients(row4x4, column4x4, block_size);
2415   }
2416   const int row = row4x4 / block_width4x4;
2417   const int column = column4x4 / block_width4x4;
2418   if (parsing && decoding) {
2419     uint8_t* residual_buffer = residual_buffer_.get();
2420     if (!ProcessPartition(row4x4, column4x4,
2421                           block_parameters_holder_.Tree(row, column),
2422                           scratch_buffer, &residual_buffer)) {
2423       LIBGAV1_DLOG(ERROR, "Error decoding partition row: %d column: %d", row4x4,
2424                    column4x4);
2425       return false;
2426     }
2427     return true;
2428   }
2429   const int sb_row_index = SuperBlockRowIndex(row4x4);
2430   const int sb_column_index = SuperBlockColumnIndex(column4x4);
2431   if (parsing) {
2432     residual_buffer_threaded_[sb_row_index][sb_column_index] =
2433         residual_buffer_pool_->Get();
2434     if (residual_buffer_threaded_[sb_row_index][sb_column_index] == nullptr) {
2435       LIBGAV1_DLOG(ERROR, "Failed to get residual buffer.");
2436       return false;
2437     }
2438     uint8_t* residual_buffer =
2439         residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2440     if (!ProcessPartition(row4x4, column4x4,
2441                           block_parameters_holder_.Tree(row, column),
2442                           scratch_buffer, &residual_buffer)) {
2443       LIBGAV1_DLOG(ERROR, "Error parsing partition row: %d column: %d", row4x4,
2444                    column4x4);
2445       return false;
2446     }
2447   } else {
2448     uint8_t* residual_buffer =
2449         residual_buffer_threaded_[sb_row_index][sb_column_index]->buffer();
2450     if (!DecodeSuperBlock(block_parameters_holder_.Tree(row, column),
2451                           scratch_buffer, &residual_buffer)) {
2452       LIBGAV1_DLOG(ERROR, "Error decoding superblock row: %d column: %d",
2453                    row4x4, column4x4);
2454       return false;
2455     }
2456     residual_buffer_pool_->Release(
2457         std::move(residual_buffer_threaded_[sb_row_index][sb_column_index]));
2458   }
2459   return true;
2460 }
2461 
DecodeSuperBlock(ParameterTree * const tree,TileScratchBuffer * const scratch_buffer,ResidualPtr * residual)2462 bool Tile::DecodeSuperBlock(ParameterTree* const tree,
2463                             TileScratchBuffer* const scratch_buffer,
2464                             ResidualPtr* residual) {
2465   Stack<ParameterTree*, kDfsStackSize> stack;
2466   stack.Push(tree);
2467   do {
2468     ParameterTree* const node = stack.Pop();
2469     if (node->partition() != kPartitionNone) {
2470       for (int i = 3; i >= 0; --i) {
2471         if (node->children(i) == nullptr) continue;
2472         stack.Push(node->children(i));
2473       }
2474       continue;
2475     }
2476     if (!DecodeBlock(node, scratch_buffer, residual)) {
2477       LIBGAV1_DLOG(ERROR, "Error decoding block row: %d column: %d",
2478                    node->row4x4(), node->column4x4());
2479       return false;
2480     }
2481   } while (!stack.Empty());
2482   return true;
2483 }
2484 
ReadLoopRestorationCoefficients(int row4x4,int column4x4,BlockSize block_size)2485 void Tile::ReadLoopRestorationCoefficients(int row4x4, int column4x4,
2486                                            BlockSize block_size) {
2487   if (frame_header_.allow_intrabc) return;
2488   LoopRestorationInfo* const restoration_info = post_filter_.restoration_info();
2489   const bool is_superres_scaled =
2490       frame_header_.width != frame_header_.upscaled_width;
2491   for (int plane = kPlaneY; plane < PlaneCount(); ++plane) {
2492     LoopRestorationUnitInfo unit_info;
2493     if (restoration_info->PopulateUnitInfoForSuperBlock(
2494             static_cast<Plane>(plane), block_size, is_superres_scaled,
2495             frame_header_.superres_scale_denominator, row4x4, column4x4,
2496             &unit_info)) {
2497       for (int unit_row = unit_info.row_start; unit_row < unit_info.row_end;
2498            ++unit_row) {
2499         for (int unit_column = unit_info.column_start;
2500              unit_column < unit_info.column_end; ++unit_column) {
2501           const int unit_id = unit_row * restoration_info->num_horizontal_units(
2502                                              static_cast<Plane>(plane)) +
2503                               unit_column;
2504           restoration_info->ReadUnitCoefficients(
2505               &reader_, &symbol_decoder_context_, static_cast<Plane>(plane),
2506               unit_id, &reference_unit_info_);
2507         }
2508       }
2509     }
2510   }
2511 }
2512 
StoreMotionFieldMvsIntoCurrentFrame(const Block & block)2513 void Tile::StoreMotionFieldMvsIntoCurrentFrame(const Block& block) {
2514   if (frame_header_.refresh_frame_flags == 0 ||
2515       IsIntraFrame(frame_header_.frame_type)) {
2516     return;
2517   }
2518   // Iterate over odd rows/columns beginning at the first odd row/column for the
2519   // block. It is done this way because motion field mvs are only needed at a
2520   // 8x8 granularity.
2521   const int row_start4x4 = block.row4x4 | 1;
2522   const int row_limit4x4 =
2523       std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
2524   if (row_start4x4 >= row_limit4x4) return;
2525   const int column_start4x4 = block.column4x4 | 1;
2526   const int column_limit4x4 =
2527       std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
2528   if (column_start4x4 >= column_limit4x4) return;
2529 
2530   // The largest reference MV component that can be saved.
2531   constexpr int kRefMvsLimit = (1 << 12) - 1;
2532   const BlockParameters& bp = *block.bp;
2533   ReferenceInfo* reference_info = current_frame_.reference_info();
2534   for (int i = 1; i >= 0; --i) {
2535     const ReferenceFrameType reference_frame_to_store = bp.reference_frame[i];
2536     // Must make a local copy so that StoreMotionFieldMvs() knows there is no
2537     // overlap between load and store.
2538     const MotionVector mv_to_store = bp.mv.mv[i];
2539     const int mv_row = std::abs(mv_to_store.mv[MotionVector::kRow]);
2540     const int mv_column = std::abs(mv_to_store.mv[MotionVector::kColumn]);
2541     if (reference_frame_to_store > kReferenceFrameIntra &&
2542         // kRefMvsLimit equals 0x07FF, so we can first bitwise OR the two
2543         // absolute values and then compare with kRefMvsLimit to save a branch.
2544         // The next line is equivalent to:
2545         // mv_row <= kRefMvsLimit && mv_column <= kRefMvsLimit
2546         (mv_row | mv_column) <= kRefMvsLimit &&
2547         reference_info->relative_distance_from[reference_frame_to_store] < 0) {
2548       const int row_start8x8 = DivideBy2(row_start4x4);
2549       const int row_limit8x8 = DivideBy2(row_limit4x4);
2550       const int column_start8x8 = DivideBy2(column_start4x4);
2551       const int column_limit8x8 = DivideBy2(column_limit4x4);
2552       const int rows = row_limit8x8 - row_start8x8;
2553       const int columns = column_limit8x8 - column_start8x8;
2554       const ptrdiff_t stride = DivideBy2(current_frame_.columns4x4());
2555       ReferenceFrameType* const reference_frame_row_start =
2556           &reference_info
2557                ->motion_field_reference_frame[row_start8x8][column_start8x8];
2558       MotionVector* const mv =
2559           &reference_info->motion_field_mv[row_start8x8][column_start8x8];
2560 
2561       // Specialize columns cases 1, 2, 4, 8 and 16. This makes memset() inlined
2562       // and simplifies std::fill() for these cases.
2563       if (columns <= 1) {
2564         // Don't change the above condition to (columns == 1).
2565         // Condition (columns <= 1) may help the compiler simplify the inlining
2566         // of the general case of StoreMotionFieldMvs() by eliminating the
2567         // (columns == 0) case.
2568         assert(columns == 1);
2569         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2570                             1, reference_frame_row_start, mv);
2571       } else if (columns == 2) {
2572         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2573                             2, reference_frame_row_start, mv);
2574       } else if (columns == 4) {
2575         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2576                             4, reference_frame_row_start, mv);
2577       } else if (columns == 8) {
2578         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2579                             8, reference_frame_row_start, mv);
2580       } else if (columns == 16) {
2581         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2582                             16, reference_frame_row_start, mv);
2583       } else if (columns < 16) {
2584         // This always true condition (columns < 16) may help the compiler
2585         // simplify the inlining of the following function.
2586         // This general case is rare and usually only happens to the blocks
2587         // which contain the right boundary of the frame.
2588         StoreMotionFieldMvs(reference_frame_to_store, mv_to_store, stride, rows,
2589                             columns, reference_frame_row_start, mv);
2590       } else {
2591         assert(false);
2592       }
2593       return;
2594     }
2595   }
2596 }
2597 
2598 }  // namespace libgav1
2599