• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
18 #define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstring>
23 #include <memory>
24 #include <mutex>  // NOLINT (unapproved c++11 header)
25 #include <new>
26 #include <utility>
27 
28 #include "src/dsp/constants.h"
29 #include "src/utils/common.h"
30 #include "src/utils/compiler_attributes.h"
31 #include "src/utils/constants.h"
32 #include "src/utils/memory.h"
33 #include "src/utils/stack.h"
34 
35 namespace libgav1 {
36 
37 // Buffer to facilitate decoding a superblock.
38 struct TileScratchBuffer : public MaxAlignedAllocable {
39   static constexpr int kBlockDecodedStride = 34;
40 
InitTileScratchBuffer41   LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) {
42 #if LIBGAV1_MAX_BITDEPTH >= 10
43     const int pixel_size = (bitdepth == 8) ? 1 : 2;
44 #else
45     assert(bitdepth == 8);
46     static_cast<void>(bitdepth);
47     const int pixel_size = 1;
48 #endif
49 
50     static_assert(kConvolveScaleBorderRight >= kConvolveBorderRight, "");
51     constexpr int unaligned_convolve_buffer_stride =
52         kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop +
53         kConvolveScaleBorderRight;
54     convolve_block_buffer_stride = Align<ptrdiff_t>(
55         unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment);
56     constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels +
57                                            kConvolveBorderLeftTop +
58                                            kConvolveBorderBottom;
59 
60     convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>(
61         kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride);
62 #if LIBGAV1_MSAN
63     // Quiet msan warnings in ConvolveScale2D_NEON(). Set with random non-zero
64     // value to aid in future debugging.
65     memset(convolve_block_buffer.get(), 0x66,
66            convolve_buffer_height * convolve_block_buffer_stride);
67 #endif
68 
69     return convolve_block_buffer != nullptr;
70   }
71 
72   // kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the
73   // prediction block size. This buffer is used to store that mask. The masks
74   // will be created for the Y plane and will be re-used for the U & V planes.
75   alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels];
76 
77   // For each instance of the TileScratchBuffer, only one of the following
78   // buffers will be used at any given time, so it is ok to share them in a
79   // union.
80   union {
81     // Buffers used for prediction process.
82     // Compound prediction calculations always output 16-bit values. Depending
83     // on the bitdepth the values may be treated as int16_t or uint16_t. See
84     // src/dsp/convolve.cc and src/dsp/warp.cc for explanations.
85     // Inter/intra calculations output Pixel values.
86     // These buffers always use width as the stride. This enables packing the
87     // values in and simplifies loads/stores for small values.
88 
89     // 10/12 bit compound prediction and 10/12 bit inter/intra prediction.
90     alignas(kMaxAlignment) uint16_t
91         prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels];
92     // 8 bit compound prediction buffer.
93     alignas(kMaxAlignment) int16_t
94         compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels];
95 
96     // Union usage note: This is used only by functions in the "intra"
97     // prediction path.
98     //
99     // Buffer used for storing subsampled luma samples needed for CFL
100     // prediction. This buffer is used to avoid repetition of the subsampling
101     // for the V plane when it is already done for the U plane.
102     int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride];
103   };
104 
105   // Buffer used for convolve. The maximum size required for this buffer is:
106   //  maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263.
107   //  maximum block stride (with scaling and border aligned to 16) =
108   //     (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size.
109   //  Where pixel_size is (bitdepth == 8) ? 1 : 2.
110   // Has an alignment of kMaxAlignment when allocated.
111   AlignedUniquePtr<uint8_t> convolve_block_buffer;
112   ptrdiff_t convolve_block_buffer_stride;
113 
114   // Flag indicating whether the data in |cfl_luma_buffer| is valid.
115   bool cfl_luma_buffer_valid;
116 
117   // Equivalent to BlockDecoded array in the spec. This stores the decoded
118   // state of every 4x4 block in a superblock. It has 1 row/column border on
119   // all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the
120   // spec uses "-1" as an index to access the left and top borders. In the
121   // code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So
122   // all accesses into this array will be offset by +1 when compared with the
123   // spec.
124   bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride];
125 };
126 
127 class TileScratchBufferPool {
128  public:
Reset(int bitdepth)129   void Reset(int bitdepth) {
130     if (bitdepth_ == bitdepth) return;
131 #if LIBGAV1_MAX_BITDEPTH >= 10
132     if (bitdepth_ == 8 && bitdepth != 8) {
133       // We are going from a pixel size of 1 to a pixel size of 2. So invalidate
134       // the stack.
135       std::lock_guard<std::mutex> lock(mutex_);
136       while (!buffers_.Empty()) {
137         buffers_.Pop();
138       }
139     }
140 #endif
141     bitdepth_ = bitdepth;
142   }
143 
Get()144   std::unique_ptr<TileScratchBuffer> Get() {
145     std::lock_guard<std::mutex> lock(mutex_);
146     if (buffers_.Empty()) {
147       std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow)
148                                                             TileScratchBuffer);
149       if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) {
150         return nullptr;
151       }
152       return scratch_buffer;
153     }
154     return buffers_.Pop();
155   }
156 
Release(std::unique_ptr<TileScratchBuffer> scratch_buffer)157   void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) {
158     std::lock_guard<std::mutex> lock(mutex_);
159     buffers_.Push(std::move(scratch_buffer));
160   }
161 
162  private:
163   std::mutex mutex_;
164   // We will never need more than kMaxThreads scratch buffers since that is the
165   // maximum amount of work that will be done at any given time.
166   Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_
167       LIBGAV1_GUARDED_BY(mutex_);
168   int bitdepth_ = 0;
169 };
170 
171 }  // namespace libgav1
172 
173 #endif  // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
174