• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_UTILS_COMMON_H_
18 #define LIBGAV1_SRC_UTILS_COMMON_H_
19 
20 #if defined(_MSC_VER)
21 #include <intrin.h>
22 #pragma intrinsic(_BitScanForward)
23 #pragma intrinsic(_BitScanReverse)
24 #if defined(_M_X64) || defined(_M_ARM64)
25 #pragma intrinsic(_BitScanReverse64)
26 #define HAVE_BITSCANREVERSE64
27 #endif  // defined(_M_X64) || defined(_M_ARM64)
28 #endif  // defined(_MSC_VER)
29 
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstdint>
34 #include <cstdlib>
35 #include <cstring>
36 #include <type_traits>
37 
38 #include "src/utils/bit_mask_set.h"
39 #include "src/utils/constants.h"
40 #include "src/utils/memory.h"
41 #include "src/utils/types.h"
42 
43 namespace libgav1 {
44 
45 // LIBGAV1_RESTRICT
46 // Declares a pointer with the restrict type qualifier if available.
47 // This allows code to hint to the compiler that only this pointer references a
48 // particular object or memory region within the scope of the block in which it
49 // is declared. This may allow for improved optimizations due to the lack of
50 // pointer aliasing. See also:
51 // https://en.cppreference.com/w/c/language/restrict
52 // Note a template alias is not used for compatibility with older compilers
53 // (e.g., gcc < 10) that do not expand the type when instantiating a template
54 // function, either explicitly or in an assignment to a function pointer as is
55 // done within the dsp code. RestrictPtr<T>::type is an alternative to this,
56 // similar to std::add_const, but for conciseness the macro is preferred.
57 #ifdef __GNUC__
58 #define LIBGAV1_RESTRICT __restrict__
59 #elif defined(_MSC_VER)
60 #define LIBGAV1_RESTRICT __restrict
61 #else
62 #define LIBGAV1_RESTRICT
63 #endif
64 
65 // Aligns |value| to the desired |alignment|. |alignment| must be a power of 2.
66 template <typename T>
Align(T value,T alignment)67 inline T Align(T value, T alignment) {
68   assert(alignment != 0);
69   const T alignment_mask = alignment - 1;
70   return (value + alignment_mask) & ~alignment_mask;
71 }
72 
73 // Aligns |addr| to the desired |alignment|. |alignment| must be a power of 2.
AlignAddr(uint8_t * const addr,const uintptr_t alignment)74 inline uint8_t* AlignAddr(uint8_t* const addr, const uintptr_t alignment) {
75   const auto value = reinterpret_cast<uintptr_t>(addr);
76   return reinterpret_cast<uint8_t*>(Align(value, alignment));
77 }
78 
Clip3(int32_t value,int32_t low,int32_t high)79 inline int32_t Clip3(int32_t value, int32_t low, int32_t high) {
80   return value < low ? low : (value > high ? high : value);
81 }
82 
83 template <typename Pixel>
ExtendLine(void * const line_start,const int width,const int left,const int right)84 void ExtendLine(void* const line_start, const int width, const int left,
85                 const int right) {
86   auto* const start = static_cast<Pixel*>(line_start);
87   const Pixel* src = start;
88   Pixel* dst = start - left;
89   // Copy to left and right borders.
90   Memset(dst, src[0], left);
91   Memset(dst + left + width, src[width - 1], right);
92 }
93 
94 // The following 2 templates set a block of data with uncontiguous memory to
95 // |value|. The compilers usually generate several branches to handle different
96 // cases of |columns| when inlining memset() and std::fill(), and these branches
97 // are unfortunately within the loop of |rows|. So calling these templates
98 // directly could be inefficient. It is recommended to specialize common cases
99 // of |columns|, such as 1, 2, 4, 8, 16 and 32, etc. in advance before
100 // processing the generic case of |columns|. The code size may be larger, but
101 // there would be big speed gains.
102 // Call template MemSetBlock<> when sizeof(|T|) is 1.
103 // Call template SetBlock<> when sizeof(|T|) is larger than 1.
104 template <typename T>
MemSetBlock(int rows,int columns,T value,T * dst,ptrdiff_t stride)105 void MemSetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
106   static_assert(sizeof(T) == 1, "");
107   do {
108     memset(dst, value, columns);
109     dst += stride;
110   } while (--rows != 0);
111 }
112 
113 template <typename T>
SetBlock(int rows,int columns,T value,T * dst,ptrdiff_t stride)114 void SetBlock(int rows, int columns, T value, T* dst, ptrdiff_t stride) {
115   do {
116     std::fill(dst, dst + columns, value);
117     dst += stride;
118   } while (--rows != 0);
119 }
120 
121 #if defined(__GNUC__)
122 
CountLeadingZeros(uint32_t n)123 inline int CountLeadingZeros(uint32_t n) {
124   assert(n != 0);
125   return __builtin_clz(n);
126 }
127 
CountLeadingZeros(uint64_t n)128 inline int CountLeadingZeros(uint64_t n) {
129   assert(n != 0);
130   return __builtin_clzll(n);
131 }
132 
CountTrailingZeros(uint32_t n)133 inline int CountTrailingZeros(uint32_t n) {
134   assert(n != 0);
135   return __builtin_ctz(n);
136 }
137 
138 #elif defined(_MSC_VER)
139 
CountLeadingZeros(uint32_t n)140 inline int CountLeadingZeros(uint32_t n) {
141   assert(n != 0);
142   unsigned long first_set_bit;  // NOLINT(runtime/int)
143   const unsigned char bit_set = _BitScanReverse(&first_set_bit, n);
144   assert(bit_set != 0);
145   static_cast<void>(bit_set);
146   return 31 ^ static_cast<int>(first_set_bit);
147 }
148 
CountLeadingZeros(uint64_t n)149 inline int CountLeadingZeros(uint64_t n) {
150   assert(n != 0);
151   unsigned long first_set_bit;  // NOLINT(runtime/int)
152 #if defined(HAVE_BITSCANREVERSE64)
153   const unsigned char bit_set =
154       _BitScanReverse64(&first_set_bit, static_cast<unsigned __int64>(n));
155 #else   // !defined(HAVE_BITSCANREVERSE64)
156   const auto n_hi = static_cast<unsigned long>(n >> 32);  // NOLINT(runtime/int)
157   if (n_hi != 0) {
158     const unsigned char bit_set = _BitScanReverse(&first_set_bit, n_hi);
159     assert(bit_set != 0);
160     static_cast<void>(bit_set);
161     return 31 ^ static_cast<int>(first_set_bit);
162   }
163   const unsigned char bit_set = _BitScanReverse(
164       &first_set_bit, static_cast<unsigned long>(n));  // NOLINT(runtime/int)
165 #endif  // defined(HAVE_BITSCANREVERSE64)
166   assert(bit_set != 0);
167   static_cast<void>(bit_set);
168   return 63 ^ static_cast<int>(first_set_bit);
169 }
170 
171 #undef HAVE_BITSCANREVERSE64
172 
CountTrailingZeros(uint32_t n)173 inline int CountTrailingZeros(uint32_t n) {
174   assert(n != 0);
175   unsigned long first_set_bit;  // NOLINT(runtime/int)
176   const unsigned char bit_set = _BitScanForward(&first_set_bit, n);
177   assert(bit_set != 0);
178   static_cast<void>(bit_set);
179   return static_cast<int>(first_set_bit);
180 }
181 
182 #else  // !defined(__GNUC__) && !defined(_MSC_VER)
183 
184 template <const int kMSB, typename T>
CountLeadingZeros(T n)185 inline int CountLeadingZeros(T n) {
186   assert(n != 0);
187   const T msb = T{1} << kMSB;
188   int count = 0;
189   while ((n & msb) == 0) {
190     ++count;
191     n <<= 1;
192   }
193   return count;
194 }
195 
CountLeadingZeros(uint32_t n)196 inline int CountLeadingZeros(uint32_t n) { return CountLeadingZeros<31>(n); }
197 
CountLeadingZeros(uint64_t n)198 inline int CountLeadingZeros(uint64_t n) { return CountLeadingZeros<63>(n); }
199 
200 // This is the algorithm on the left in Figure 5-23, Hacker's Delight, Second
201 // Edition, page 109. The book says:
202 //   If the number of trailing 0's is expected to be small or large, then the
203 //   simple loops shown in Figure 5-23 are quite fast.
CountTrailingZeros(uint32_t n)204 inline int CountTrailingZeros(uint32_t n) {
205   assert(n != 0);
206   // Create a word with 1's at the positions of the trailing 0's in |n|, and
207   // 0's elsewhere (e.g., 01011000 => 00000111).
208   n = ~n & (n - 1);
209   int count = 0;
210   while (n != 0) {
211     ++count;
212     n >>= 1;
213   }
214   return count;
215 }
216 
217 #endif  // defined(__GNUC__)
218 
FloorLog2(int32_t n)219 inline int FloorLog2(int32_t n) {
220   assert(n > 0);
221   return 31 ^ CountLeadingZeros(static_cast<uint32_t>(n));
222 }
223 
FloorLog2(uint32_t n)224 inline int FloorLog2(uint32_t n) {
225   assert(n > 0);
226   return 31 ^ CountLeadingZeros(n);
227 }
228 
FloorLog2(int64_t n)229 inline int FloorLog2(int64_t n) {
230   assert(n > 0);
231   return 63 ^ CountLeadingZeros(static_cast<uint64_t>(n));
232 }
233 
FloorLog2(uint64_t n)234 inline int FloorLog2(uint64_t n) {
235   assert(n > 0);
236   return 63 ^ CountLeadingZeros(n);
237 }
238 
CeilLog2(unsigned int n)239 inline int CeilLog2(unsigned int n) {
240   // The expression FloorLog2(n - 1) + 1 is undefined not only for n == 0 but
241   // also for n == 1, so this expression must be guarded by the n < 2 test. An
242   // alternative implementation is:
243   // return (n == 0) ? 0 : FloorLog2(n) + static_cast<int>((n & (n - 1)) != 0);
244   return (n < 2) ? 0 : FloorLog2(n - 1) + 1;
245 }
246 
RightShiftWithCeiling(int value,int bits)247 inline int RightShiftWithCeiling(int value, int bits) {
248   assert(bits > 0);
249   return (value + (1 << bits) - 1) >> bits;
250 }
251 
RightShiftWithRounding(int32_t value,int bits)252 inline int32_t RightShiftWithRounding(int32_t value, int bits) {
253   assert(bits >= 0);
254   return (value + ((1 << bits) >> 1)) >> bits;
255 }
256 
RightShiftWithRounding(uint32_t value,int bits)257 inline uint32_t RightShiftWithRounding(uint32_t value, int bits) {
258   assert(bits >= 0);
259   return (value + ((1 << bits) >> 1)) >> bits;
260 }
261 
262 // This variant is used when |value| can exceed 32 bits. Although the final
263 // result must always fit into int32_t.
RightShiftWithRounding(int64_t value,int bits)264 inline int32_t RightShiftWithRounding(int64_t value, int bits) {
265   assert(bits >= 0);
266   return static_cast<int32_t>((value + ((int64_t{1} << bits) >> 1)) >> bits);
267 }
268 
RightShiftWithRoundingSigned(int32_t value,int bits)269 inline int32_t RightShiftWithRoundingSigned(int32_t value, int bits) {
270   assert(bits > 0);
271   // The next line is equivalent to:
272   // return (value >= 0) ? RightShiftWithRounding(value, bits)
273   //                     : -RightShiftWithRounding(-value, bits);
274   return RightShiftWithRounding(value + (value >> 31), bits);
275 }
276 
277 // This variant is used when |value| can exceed 32 bits. Although the final
278 // result must always fit into int32_t.
RightShiftWithRoundingSigned(int64_t value,int bits)279 inline int32_t RightShiftWithRoundingSigned(int64_t value, int bits) {
280   assert(bits > 0);
281   // The next line is equivalent to:
282   // return (value >= 0) ? RightShiftWithRounding(value, bits)
283   //                     : -RightShiftWithRounding(-value, bits);
284   return RightShiftWithRounding(value + (value >> 63), bits);
285 }
286 
DivideBy2(int n)287 constexpr int DivideBy2(int n) { return n >> 1; }
DivideBy4(int n)288 constexpr int DivideBy4(int n) { return n >> 2; }
DivideBy8(int n)289 constexpr int DivideBy8(int n) { return n >> 3; }
DivideBy16(int n)290 constexpr int DivideBy16(int n) { return n >> 4; }
DivideBy32(int n)291 constexpr int DivideBy32(int n) { return n >> 5; }
DivideBy64(int n)292 constexpr int DivideBy64(int n) { return n >> 6; }
DivideBy128(int n)293 constexpr int DivideBy128(int n) { return n >> 7; }
294 
295 // Convert |value| to unsigned before shifting to avoid undefined behavior with
296 // negative values.
LeftShift(int value,int bits)297 inline int LeftShift(int value, int bits) {
298   assert(bits >= 0);
299   assert(value >= -(int64_t{1} << (31 - bits)));
300   assert(value <= (int64_t{1} << (31 - bits)) - ((bits == 0) ? 1 : 0));
301   return static_cast<int>(static_cast<uint32_t>(value) << bits);
302 }
MultiplyBy2(int n)303 inline int MultiplyBy2(int n) { return LeftShift(n, 1); }
MultiplyBy4(int n)304 inline int MultiplyBy4(int n) { return LeftShift(n, 2); }
MultiplyBy8(int n)305 inline int MultiplyBy8(int n) { return LeftShift(n, 3); }
MultiplyBy16(int n)306 inline int MultiplyBy16(int n) { return LeftShift(n, 4); }
MultiplyBy32(int n)307 inline int MultiplyBy32(int n) { return LeftShift(n, 5); }
MultiplyBy64(int n)308 inline int MultiplyBy64(int n) { return LeftShift(n, 6); }
309 
Mod32(int n)310 constexpr int Mod32(int n) { return n & 0x1f; }
Mod64(int n)311 constexpr int Mod64(int n) { return n & 0x3f; }
312 
313 //------------------------------------------------------------------------------
314 // Bitstream functions
315 
IsIntraFrame(FrameType type)316 constexpr bool IsIntraFrame(FrameType type) {
317   return type == kFrameKey || type == kFrameIntraOnly;
318 }
319 
GetTransformClass(TransformType tx_type)320 inline TransformClass GetTransformClass(TransformType tx_type) {
321   constexpr BitMaskSet kTransformClassVerticalMask(
322       kTransformTypeIdentityDct, kTransformTypeIdentityAdst,
323       kTransformTypeIdentityFlipadst);
324   if (kTransformClassVerticalMask.Contains(tx_type)) {
325     return kTransformClassVertical;
326   }
327   constexpr BitMaskSet kTransformClassHorizontalMask(
328       kTransformTypeDctIdentity, kTransformTypeAdstIdentity,
329       kTransformTypeFlipadstIdentity);
330   if (kTransformClassHorizontalMask.Contains(tx_type)) {
331     return kTransformClassHorizontal;
332   }
333   return kTransformClass2D;
334 }
335 
RowOrColumn4x4ToPixel(int row_or_column4x4,Plane plane,int8_t subsampling)336 inline int RowOrColumn4x4ToPixel(int row_or_column4x4, Plane plane,
337                                  int8_t subsampling) {
338   return MultiplyBy4(row_or_column4x4) >> (plane == kPlaneY ? 0 : subsampling);
339 }
340 
GetPlaneType(Plane plane)341 constexpr PlaneType GetPlaneType(Plane plane) {
342   return static_cast<PlaneType>(plane != kPlaneY);
343 }
344 
345 // 5.11.44.
IsDirectionalMode(PredictionMode mode)346 constexpr bool IsDirectionalMode(PredictionMode mode) {
347   return mode >= kPredictionModeVertical && mode <= kPredictionModeD67;
348 }
349 
350 // 5.9.3.
351 //
352 // |a| and |b| are order hints, treated as unsigned order_hint_bits-bit
353 // integers. |order_hint_shift_bits| equals (32 - order_hint_bits) % 32.
354 // order_hint_bits is at most 8, so |order_hint_shift_bits| is zero or a
355 // value between 24 and 31 (inclusive).
356 //
357 // If |order_hint_shift_bits| is zero, |a| and |b| are both zeros, and the
358 // result is zero. If |order_hint_shift_bits| is not zero, returns the
359 // signed difference |a| - |b| using "modular arithmetic". More precisely, the
360 // signed difference |a| - |b| is treated as a signed order_hint_bits-bit
361 // integer and cast to an int. The returned difference is between
362 // -(1 << (order_hint_bits - 1)) and (1 << (order_hint_bits - 1)) - 1
363 // (inclusive).
364 //
365 // NOTE: |a| and |b| are the order_hint_bits least significant bits of the
366 // actual values. This function returns the signed difference between the
367 // actual values. The returned difference is correct as long as the actual
368 // values are not more than 1 << (order_hint_bits - 1) - 1 apart.
369 //
370 // Example: Suppose order_hint_bits is 4 and |order_hint_shift_bits|
371 // is 28. Then |a| and |b| are in the range [0, 15], and the actual values for
372 // |a| and |b| must not be more than 7 apart. (If the actual values for |a| and
373 // |b| are exactly 8 apart, this function cannot tell whether the actual value
374 // for |a| is before or after the actual value for |b|.)
375 //
376 // First, consider the order hints 2 and 6. For this simple case, we have
377 //   GetRelativeDistance(2, 6, 28) = 2 - 6 = -4, and
378 //   GetRelativeDistance(6, 2, 28) = 6 - 2 = 4.
379 //
380 // On the other hand, consider the order hints 2 and 14. The order hints are
381 // 12 (> 7) apart, so we need to use the actual values instead. The actual
382 // values may be 34 (= 2 mod 16) and 30 (= 14 mod 16), respectively. Therefore
383 // we have
384 //   GetRelativeDistance(2, 14, 28) = 34 - 30 = 4, and
385 //   GetRelativeDistance(14, 2, 28) = 30 - 34 = -4.
386 //
387 // The following comments apply only to specific CPUs' SIMD implementations,
388 // such as intrinsics code.
389 // For the 2 shift operations in this function, if the SIMD packed data is
390 // 16-bit wide, try to use |order_hint_shift_bits| - 16 as the number of bits to
391 // shift; If the SIMD packed data is 8-bit wide, try to use
392 // |order_hint_shift_bits| - 24 as as the number of bits to shift.
393 // |order_hint_shift_bits| - 16 and |order_hint_shift_bits| - 24 could be -16 or
394 // -24. In these cases diff is 0, and the behavior of left or right shifting -16
395 // or -24 bits is defined for x86 SIMD instructions and ARM NEON instructions,
396 // and the result of shifting 0 is still 0. There is no guarantee that this
397 // behavior and result apply to other CPUs' SIMD instructions.
GetRelativeDistance(const unsigned int a,const unsigned int b,const unsigned int order_hint_shift_bits)398 inline int GetRelativeDistance(const unsigned int a, const unsigned int b,
399                                const unsigned int order_hint_shift_bits) {
400   const int diff = static_cast<int>(a) - static_cast<int>(b);
401   assert(order_hint_shift_bits <= 31);
402   if (order_hint_shift_bits == 0) {
403     assert(a == 0);
404     assert(b == 0);
405   } else {
406     assert(order_hint_shift_bits >= 24);  // i.e., order_hint_bits <= 8
407     assert(a < (1u << (32 - order_hint_shift_bits)));
408     assert(b < (1u << (32 - order_hint_shift_bits)));
409     assert(diff < (1 << (32 - order_hint_shift_bits)));
410     assert(diff >= -(1 << (32 - order_hint_shift_bits)));
411   }
412   // Sign extend the result of subtracting the values.
413   // Cast to unsigned int and then left shift to avoid undefined behavior with
414   // negative values. Cast to int to do the sign extension through right shift.
415   // This requires the right shift of a signed integer be an arithmetic shift,
416   // which is true for clang, gcc, and Visual C++.
417   // These two casts do not generate extra instructions.
418   // Don't use LeftShift(diff) since a valid diff may fail its assertions.
419   // For example, GetRelativeDistance(2, 14, 28), diff equals -12 and is less
420   // than the minimum allowed value of LeftShift() which is -8.
421   // The next 3 lines are equivalent to:
422   // const int order_hint_bits = Mod32(32 - order_hint_shift_bits);
423   // const int m = (1 << order_hint_bits) >> 1;
424   // return (diff & (m - 1)) - (diff & m);
425   return static_cast<int>(static_cast<unsigned int>(diff)
426                           << order_hint_shift_bits) >>
427          order_hint_shift_bits;
428 }
429 
430 // Applies |sign| (must be 0 or -1) to |value|, i.e.,
431 //   return (sign == 0) ? value : -value;
432 // and does so without a branch.
ApplySign(int value,int sign)433 constexpr int ApplySign(int value, int sign) { return (value ^ sign) - sign; }
434 
435 // 7.9.3. (without the clamp for numerator and denominator).
GetMvProjection(const MotionVector & mv,int numerator,int division_multiplier,MotionVector * projection_mv)436 inline void GetMvProjection(const MotionVector& mv, int numerator,
437                             int division_multiplier,
438                             MotionVector* projection_mv) {
439   // Allow numerator and to be 0 so that this function can be called
440   // unconditionally. When numerator is 0, |projection_mv| will be 0, and this
441   // is what we want.
442   assert(std::abs(numerator) <= kMaxFrameDistance);
443   for (int i = 0; i < 2; ++i) {
444     projection_mv->mv[i] =
445         Clip3(RightShiftWithRoundingSigned(
446                   mv.mv[i] * numerator * division_multiplier, 14),
447               -kProjectionMvClamp, kProjectionMvClamp);
448   }
449 }
450 
451 // 7.9.4.
Project(int value,int delta,int dst_sign)452 constexpr int Project(int value, int delta, int dst_sign) {
453   return value + ApplySign(delta / 64, dst_sign);
454 }
455 
IsBlockSmallerThan8x8(BlockSize size)456 inline bool IsBlockSmallerThan8x8(BlockSize size) {
457   return size < kBlock8x8 && size != kBlock4x16;
458 }
459 
460 // Returns true if the either the width or the height of the block is equal to
461 // four.
IsBlockDimension4(BlockSize size)462 inline bool IsBlockDimension4(BlockSize size) {
463   return size < kBlock8x8 || size == kBlock16x4;
464 }
465 
466 // Converts bitdepth 8, 10, and 12 to array index 0, 1, and 2, respectively.
BitdepthToArrayIndex(int bitdepth)467 constexpr int BitdepthToArrayIndex(int bitdepth) { return (bitdepth - 8) >> 1; }
468 
469 // Maps a square transform to an index between [0, 4]. kTransformSize4x4 maps
470 // to 0, kTransformSize8x8 maps to 1 and so on.
TransformSizeToSquareTransformIndex(TransformSize tx_size)471 inline int TransformSizeToSquareTransformIndex(TransformSize tx_size) {
472   assert(kTransformWidth[tx_size] == kTransformHeight[tx_size]);
473 
474   // The values of the square transform sizes happen to be in the right
475   // ranges, so we can just divide them by 4 to get the indexes.
476   static_assert(
477       std::is_unsigned<std::underlying_type<TransformSize>::type>::value, "");
478   static_assert(kTransformSize4x4 < 4, "");
479   static_assert(4 <= kTransformSize8x8 && kTransformSize8x8 < 8, "");
480   static_assert(8 <= kTransformSize16x16 && kTransformSize16x16 < 12, "");
481   static_assert(12 <= kTransformSize32x32 && kTransformSize32x32 < 16, "");
482   static_assert(16 <= kTransformSize64x64 && kTransformSize64x64 < 20, "");
483   return DivideBy4(tx_size);
484 }
485 
486 // Gets the corresponding Y/U/V position, to set and get filter masks
487 // in deblock filtering.
488 // Returns luma_position if it's Y plane, whose subsampling must be 0.
489 // Returns the odd position for U/V plane, if there is subsampling.
GetDeblockPosition(const int luma_position,const int subsampling)490 constexpr int GetDeblockPosition(const int luma_position,
491                                  const int subsampling) {
492   return luma_position | subsampling;
493 }
494 
495 // Returns the size of the residual buffer required to hold the residual values
496 // for a block or frame of size |rows| by |columns| (taking into account
497 // |subsampling_x|, |subsampling_y| and |residual_size|). |residual_size| is the
498 // number of bytes required to represent one residual value.
GetResidualBufferSize(const int rows,const int columns,const int subsampling_x,const int subsampling_y,const size_t residual_size)499 inline size_t GetResidualBufferSize(const int rows, const int columns,
500                                     const int subsampling_x,
501                                     const int subsampling_y,
502                                     const size_t residual_size) {
503   // The subsampling multipliers are:
504   //   Both x and y are subsampled: 3 / 2.
505   //   Only x or y is subsampled: 2 / 1 (which is equivalent to 4 / 2).
506   //   Both x and y are not subsampled: 3 / 1 (which is equivalent to 6 / 2).
507   // So we compute the final subsampling multiplier as follows:
508   //   multiplier = (2 + (4 >> subsampling_x >> subsampling_y)) / 2.
509   // Add 32 * |kResidualPaddingVertical| padding to avoid bottom boundary checks
510   // when parsing quantized coefficients.
511   const int subsampling_multiplier_num =
512       2 + (4 >> subsampling_x >> subsampling_y);
513   const int number_elements =
514       (rows * columns * subsampling_multiplier_num) >> 1;
515   const int tx_padding = 32 * kResidualPaddingVertical;
516   return residual_size * (number_elements + tx_padding);
517 }
518 
519 // This function is equivalent to:
520 // std::min({kTransformWidthLog2[tx_size] - 2,
521 //           kTransformWidthLog2[left_tx_size] - 2,
522 //           2});
GetTransformSizeIdWidth(TransformSize tx_size,TransformSize left_tx_size)523 constexpr LoopFilterTransformSizeId GetTransformSizeIdWidth(
524     TransformSize tx_size, TransformSize left_tx_size) {
525   return static_cast<LoopFilterTransformSizeId>(
526       static_cast<int>(tx_size > kTransformSize4x16 &&
527                        left_tx_size > kTransformSize4x16) +
528       static_cast<int>(tx_size > kTransformSize8x32 &&
529                        left_tx_size > kTransformSize8x32));
530 }
531 
532 // This is used for 7.11.3.4 Block Inter Prediction Process, to select convolve
533 // filters.
GetFilterIndex(const int filter_index,const int length)534 inline int GetFilterIndex(const int filter_index, const int length) {
535   if (length <= 4) {
536     if (filter_index == kInterpolationFilterEightTap ||
537         filter_index == kInterpolationFilterEightTapSharp) {
538       return 4;
539     }
540     if (filter_index == kInterpolationFilterEightTapSmooth) {
541       return 5;
542     }
543   }
544   return filter_index;
545 }
546 
547 // This has identical results as RightShiftWithRounding since |subsampling| can
548 // only be 0 or 1.
SubsampledValue(int value,int subsampling)549 constexpr int SubsampledValue(int value, int subsampling) {
550   return (value + subsampling) >> subsampling;
551 }
552 
553 }  // namespace libgav1
554 
555 #endif  // LIBGAV1_SRC_UTILS_COMMON_H_
556