1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 18 #define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <cstring> 23 #include <memory> 24 #include <mutex> // NOLINT (unapproved c++11 header) 25 #include <new> 26 #include <utility> 27 28 #include "src/dsp/constants.h" 29 #include "src/utils/common.h" 30 #include "src/utils/compiler_attributes.h" 31 #include "src/utils/constants.h" 32 #include "src/utils/memory.h" 33 #include "src/utils/stack.h" 34 35 namespace libgav1 { 36 37 // Buffer to facilitate decoding a superblock. 38 struct TileScratchBuffer : public MaxAlignedAllocable { 39 static constexpr int kBlockDecodedStride = 34; 40 InitTileScratchBuffer41 LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) { 42 #if LIBGAV1_MAX_BITDEPTH >= 10 43 const int pixel_size = (bitdepth == 8) ? 1 : 2; 44 #else 45 assert(bitdepth == 8); 46 static_cast<void>(bitdepth); 47 const int pixel_size = 1; 48 #endif 49 50 static_assert(kConvolveScaleBorderRight >= kConvolveBorderRight, ""); 51 constexpr int unaligned_convolve_buffer_stride = 52 kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop + 53 kConvolveScaleBorderRight; 54 convolve_block_buffer_stride = Align<ptrdiff_t>( 55 unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment); 56 constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels + 57 kConvolveBorderLeftTop + 58 kConvolveBorderBottom; 59 60 convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>( 61 kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride); 62 #if LIBGAV1_MSAN 63 // Quiet msan warnings in ConvolveScale2D_NEON(). Set with random non-zero 64 // value to aid in future debugging. 65 memset(convolve_block_buffer.get(), 0x66, 66 convolve_buffer_height * convolve_block_buffer_stride); 67 #endif 68 69 return convolve_block_buffer != nullptr; 70 } 71 72 // kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the 73 // prediction block size. This buffer is used to store that mask. The masks 74 // will be created for the Y plane and will be re-used for the U & V planes. 75 alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels]; 76 77 // For each instance of the TileScratchBuffer, only one of the following 78 // buffers will be used at any given time, so it is ok to share them in a 79 // union. 80 union { 81 // Buffers used for prediction process. 82 // Compound prediction calculations always output 16-bit values. Depending 83 // on the bitdepth the values may be treated as int16_t or uint16_t. See 84 // src/dsp/convolve.cc and src/dsp/warp.cc for explanations. 85 // Inter/intra calculations output Pixel values. 86 // These buffers always use width as the stride. This enables packing the 87 // values in and simplifies loads/stores for small values. 88 89 // 10/12 bit compound prediction and 10/12 bit inter/intra prediction. 90 alignas(kMaxAlignment) uint16_t 91 prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels]; 92 // 8 bit compound prediction buffer. 93 alignas(kMaxAlignment) int16_t 94 compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels]; 95 96 // Union usage note: This is used only by functions in the "intra" 97 // prediction path. 98 // 99 // Buffer used for storing subsampled luma samples needed for CFL 100 // prediction. This buffer is used to avoid repetition of the subsampling 101 // for the V plane when it is already done for the U plane. 102 int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride]; 103 }; 104 105 // Buffer used for convolve. The maximum size required for this buffer is: 106 // maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263. 107 // maximum block stride (with scaling and border aligned to 16) = 108 // (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size. 109 // Where pixel_size is (bitdepth == 8) ? 1 : 2. 110 // Has an alignment of kMaxAlignment when allocated. 111 AlignedUniquePtr<uint8_t> convolve_block_buffer; 112 ptrdiff_t convolve_block_buffer_stride; 113 114 // Flag indicating whether the data in |cfl_luma_buffer| is valid. 115 bool cfl_luma_buffer_valid; 116 117 // Equivalent to BlockDecoded array in the spec. This stores the decoded 118 // state of every 4x4 block in a superblock. It has 1 row/column border on 119 // all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the 120 // spec uses "-1" as an index to access the left and top borders. In the 121 // code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So 122 // all accesses into this array will be offset by +1 when compared with the 123 // spec. 124 bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride]; 125 }; 126 127 class TileScratchBufferPool { 128 public: Reset(int bitdepth)129 void Reset(int bitdepth) { 130 if (bitdepth_ == bitdepth) return; 131 #if LIBGAV1_MAX_BITDEPTH >= 10 132 if (bitdepth_ == 8 && bitdepth != 8) { 133 // We are going from a pixel size of 1 to a pixel size of 2. So invalidate 134 // the stack. 135 std::lock_guard<std::mutex> lock(mutex_); 136 while (!buffers_.Empty()) { 137 buffers_.Pop(); 138 } 139 } 140 #endif 141 bitdepth_ = bitdepth; 142 } 143 Get()144 std::unique_ptr<TileScratchBuffer> Get() { 145 std::lock_guard<std::mutex> lock(mutex_); 146 if (buffers_.Empty()) { 147 std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow) 148 TileScratchBuffer); 149 if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) { 150 return nullptr; 151 } 152 return scratch_buffer; 153 } 154 return buffers_.Pop(); 155 } 156 Release(std::unique_ptr<TileScratchBuffer> scratch_buffer)157 void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) { 158 std::lock_guard<std::mutex> lock(mutex_); 159 buffers_.Push(std::move(scratch_buffer)); 160 } 161 162 private: 163 std::mutex mutex_; 164 // We will never need more than kMaxThreads scratch buffers since that is the 165 // maximum amount of work that will be done at any given time. 166 Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_ 167 LIBGAV1_GUARDED_BY(mutex_); 168 int bitdepth_ = 0; 169 }; 170 171 } // namespace libgav1 172 173 #endif // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 174