1 /* 2 * Copyright 2019 The libgav1 Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 18 #define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 19 20 #include <cstdint> 21 #include <mutex> // NOLINT (unapproved c++11 header) 22 23 #include "src/dsp/constants.h" 24 #include "src/utils/common.h" 25 #include "src/utils/compiler_attributes.h" 26 #include "src/utils/constants.h" 27 #include "src/utils/memory.h" 28 #include "src/utils/stack.h" 29 30 namespace libgav1 { 31 32 // Buffer to facilitate decoding a superblock. 33 struct TileScratchBuffer : public MaxAlignedAllocable { 34 static constexpr int kBlockDecodedStride = 34; 35 InitTileScratchBuffer36 LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) { 37 #if LIBGAV1_MAX_BITDEPTH >= 10 38 const int pixel_size = (bitdepth == 8) ? 1 : 2; 39 #else 40 assert(bitdepth == 8); 41 static_cast<void>(bitdepth); 42 const int pixel_size = 1; 43 #endif 44 45 constexpr int unaligned_convolve_buffer_stride = 46 kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop + 47 kConvolveBorderRight; 48 convolve_block_buffer_stride = Align<ptrdiff_t>( 49 unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment); 50 constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels + 51 kConvolveBorderLeftTop + 52 kConvolveBorderBottom; 53 54 convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>( 55 kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride); 56 return convolve_block_buffer != nullptr; 57 } 58 59 // kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the 60 // prediction block size. This buffer is used to store that mask. The masks 61 // will be created for the Y plane and will be re-used for the U & V planes. 62 alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels]; 63 64 // For each instance of the TileScratchBuffer, only one of the following 65 // buffers will be used at any given time, so it is ok to share them in a 66 // union. 67 union { 68 // Buffers used for prediction process. 69 // Compound prediction calculations always output 16-bit values. Depending 70 // on the bitdepth the values may be treated as int16_t or uint16_t. See 71 // src/dsp/convolve.cc and src/dsp/warp.cc for explanations. 72 // Inter/intra calculations output Pixel values. 73 // These buffers always use width as the stride. This enables packing the 74 // values in and simplifies loads/stores for small values. 75 76 // 10/12 bit compound prediction and 10/12 bit inter/intra prediction. 77 alignas(kMaxAlignment) uint16_t 78 prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels]; 79 // 8 bit compound prediction buffer. 80 alignas(kMaxAlignment) int16_t 81 compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels]; 82 83 // Union usage note: This is used only by functions in the "intra" 84 // prediction path. 85 // 86 // Buffer used for storing subsampled luma samples needed for CFL 87 // prediction. This buffer is used to avoid repetition of the subsampling 88 // for the V plane when it is already done for the U plane. 89 int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride]; 90 }; 91 92 // Buffer used for convolve. The maximum size required for this buffer is: 93 // maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263. 94 // maximum block stride (with scaling and border aligned to 16) = 95 // (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size. 96 // Where pixel_size is (bitdepth == 8) ? 1 : 2. 97 // Has an alignment of kMaxAlignment when allocated. 98 AlignedUniquePtr<uint8_t> convolve_block_buffer; 99 ptrdiff_t convolve_block_buffer_stride; 100 101 // Flag indicating whether the data in |cfl_luma_buffer| is valid. 102 bool cfl_luma_buffer_valid; 103 104 // Equivalent to BlockDecoded array in the spec. This stores the decoded 105 // state of every 4x4 block in a superblock. It has 1 row/column border on 106 // all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the 107 // spec uses "-1" as an index to access the left and top borders. In the 108 // code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So 109 // all accesses into this array will be offset by +1 when compared with the 110 // spec. 111 bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride]; 112 }; 113 114 class TileScratchBufferPool { 115 public: Reset(int bitdepth)116 void Reset(int bitdepth) { 117 if (bitdepth_ == bitdepth) return; 118 #if LIBGAV1_MAX_BITDEPTH >= 10 119 if (bitdepth_ == 8 && bitdepth != 8) { 120 // We are going from a pixel size of 1 to a pixel size of 2. So invalidate 121 // the stack. 122 std::lock_guard<std::mutex> lock(mutex_); 123 while (!buffers_.Empty()) { 124 buffers_.Pop(); 125 } 126 } 127 #endif 128 bitdepth_ = bitdepth; 129 } 130 Get()131 std::unique_ptr<TileScratchBuffer> Get() { 132 std::lock_guard<std::mutex> lock(mutex_); 133 if (buffers_.Empty()) { 134 std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow) 135 TileScratchBuffer); 136 if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) { 137 return nullptr; 138 } 139 return scratch_buffer; 140 } 141 return buffers_.Pop(); 142 } 143 Release(std::unique_ptr<TileScratchBuffer> scratch_buffer)144 void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) { 145 std::lock_guard<std::mutex> lock(mutex_); 146 buffers_.Push(std::move(scratch_buffer)); 147 } 148 149 private: 150 std::mutex mutex_; 151 // We will never need more than kMaxThreads scratch buffers since that is the 152 // maximum amount of work that will be done at any given time. 153 Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_ 154 LIBGAV1_GUARDED_BY(mutex_); 155 int bitdepth_ = 0; 156 }; 157 158 } // namespace libgav1 159 160 #endif // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_ 161