1 /*
2 * Copyright 2019 The libgav1 Authors
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LIBGAV1_SRC_DSP_DSP_H_
18 #define LIBGAV1_SRC_DSP_DSP_H_
19
20 #include <cstddef> // ptrdiff_t
21 #include <cstdint>
22 #include <cstdlib>
23
24 #include "src/dsp/common.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/film_grain_common.h"
27 #include "src/utils/cpu.h"
28 #include "src/utils/reference_info.h"
29 #include "src/utils/types.h"
30
31 namespace libgav1 {
32 namespace dsp {
33
34 #if !defined(LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS)
35 #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 0
36 #endif
37
38 enum IntraPredictor : uint8_t {
39 kIntraPredictorDcFill,
40 kIntraPredictorDcTop,
41 kIntraPredictorDcLeft,
42 kIntraPredictorDc,
43 kIntraPredictorVertical,
44 kIntraPredictorHorizontal,
45 kIntraPredictorPaeth,
46 kIntraPredictorSmooth,
47 kIntraPredictorSmoothVertical,
48 kIntraPredictorSmoothHorizontal,
49 kNumIntraPredictors
50 };
51
52 // List of valid 1D transforms.
53 enum Transform1D : uint8_t {
54 k1DTransformDct, // Discrete Cosine Transform.
55 k1DTransformAdst, // Asymmetric Discrete Sine Transform.
56 k1DTransformIdentity,
57 k1DTransformWht, // Walsh Hadamard Transform.
58 kNum1DTransforms
59 };
60
61 // List of valid 1D transform sizes. Not all transforms may be available for all
62 // the sizes.
63 enum TransformSize1D : uint8_t {
64 k1DTransformSize4,
65 k1DTransformSize8,
66 k1DTransformSize16,
67 k1DTransformSize32,
68 k1DTransformSize64,
69 kNum1DTransformSizes
70 };
71
72 // The maximum width of the loop filter, fewer pixels may be filtered depending
73 // on strength thresholds.
74 enum LoopFilterSize : uint8_t {
75 kLoopFilterSize4,
76 kLoopFilterSize6,
77 kLoopFilterSize8,
78 kLoopFilterSize14,
79 kNumLoopFilterSizes
80 };
81
82 //------------------------------------------------------------------------------
83 // ToString()
84 //
85 // These functions are meant to be used only in debug logging and within tests.
86 // They are defined inline to avoid including the strings in the release
87 // library when logging is disabled; unreferenced functions will not be added to
88 // any object file in that case.
89
ToString(const IntraPredictor predictor)90 inline const char* ToString(const IntraPredictor predictor) {
91 switch (predictor) {
92 case kIntraPredictorDcFill:
93 return "kIntraPredictorDcFill";
94 case kIntraPredictorDcTop:
95 return "kIntraPredictorDcTop";
96 case kIntraPredictorDcLeft:
97 return "kIntraPredictorDcLeft";
98 case kIntraPredictorDc:
99 return "kIntraPredictorDc";
100 case kIntraPredictorVertical:
101 return "kIntraPredictorVertical";
102 case kIntraPredictorHorizontal:
103 return "kIntraPredictorHorizontal";
104 case kIntraPredictorPaeth:
105 return "kIntraPredictorPaeth";
106 case kIntraPredictorSmooth:
107 return "kIntraPredictorSmooth";
108 case kIntraPredictorSmoothVertical:
109 return "kIntraPredictorSmoothVertical";
110 case kIntraPredictorSmoothHorizontal:
111 return "kIntraPredictorSmoothHorizontal";
112 case kNumIntraPredictors:
113 return "kNumIntraPredictors";
114 }
115 abort();
116 }
117
ToString(const Transform1D transform)118 inline const char* ToString(const Transform1D transform) {
119 switch (transform) {
120 case k1DTransformDct:
121 return "k1DTransformDct";
122 case k1DTransformAdst:
123 return "k1DTransformAdst";
124 case k1DTransformIdentity:
125 return "k1DTransformIdentity";
126 case k1DTransformWht:
127 return "k1DTransformWht";
128 case kNum1DTransforms:
129 return "kNum1DTransforms";
130 }
131 abort();
132 }
133
ToString(const TransformSize1D transform_size)134 inline const char* ToString(const TransformSize1D transform_size) {
135 switch (transform_size) {
136 case k1DTransformSize4:
137 return "k1DTransformSize4";
138 case k1DTransformSize8:
139 return "k1DTransformSize8";
140 case k1DTransformSize16:
141 return "k1DTransformSize16";
142 case k1DTransformSize32:
143 return "k1DTransformSize32";
144 case k1DTransformSize64:
145 return "k1DTransformSize64";
146 case kNum1DTransformSizes:
147 return "kNum1DTransformSizes";
148 }
149 abort();
150 }
151
ToString(const LoopFilterSize filter_size)152 inline const char* ToString(const LoopFilterSize filter_size) {
153 switch (filter_size) {
154 case kLoopFilterSize4:
155 return "kLoopFilterSize4";
156 case kLoopFilterSize6:
157 return "kLoopFilterSize6";
158 case kLoopFilterSize8:
159 return "kLoopFilterSize8";
160 case kLoopFilterSize14:
161 return "kLoopFilterSize14";
162 case kNumLoopFilterSizes:
163 return "kNumLoopFilterSizes";
164 }
165 abort();
166 }
167
ToString(const LoopFilterType filter_type)168 inline const char* ToString(const LoopFilterType filter_type) {
169 switch (filter_type) {
170 case kLoopFilterTypeVertical:
171 return "kLoopFilterTypeVertical";
172 case kLoopFilterTypeHorizontal:
173 return "kLoopFilterTypeHorizontal";
174 case kNumLoopFilterTypes:
175 return "kNumLoopFilterTypes";
176 }
177 abort();
178 }
179
180 //------------------------------------------------------------------------------
181 // Intra predictors. Section 7.11.2.
182 // These require access to one or both of the top row and left column. Some may
183 // access the top-left (top[-1]), top-right (top[width+N]), bottom-left
184 // (left[height+N]) or upper-left (left[-1]).
185
186 // Intra predictor function signature. Sections 7.11.2.2, 7.11.2.4 (#10,#11),
187 // 7.11.2.5, 7.11.2.6.
188 // |dst| is an unaligned pointer to the output block. Pixel size is determined
189 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
190 // the row above |dst|. |left| is an aligned vector of the column to the left
191 // of |dst|. top-left and bottom-left may be accessed.
192 using IntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
193 const void* top, const void* left);
194 using IntraPredictorFuncs =
195 IntraPredictorFunc[kNumTransformSizes][kNumIntraPredictors];
196
197 // Directional intra predictor function signature, zone 1 (0 < angle < 90).
198 // Section 7.11.2.4 (#7).
199 // |dst| is an unaligned pointer to the output block. Pixel size is determined
200 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
201 // the row above |dst|. |width| and |height| give the dimensions of the block.
202 // |xstep| is the scaled starting index to |top| from
203 // kDirectionalIntraPredictorDerivative. |upsampled_top| indicates whether
204 // |top| has been upsampled as described in '7.11.2.11. Intra edge upsample
205 // process'. This can occur in cases with |width| + |height| <= 16. top-right
206 // is accessed.
207 using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride,
208 const void* top, int width,
209 int height, int xstep,
210 bool upsampled_top);
211
212 // Directional intra predictor function signature, zone 2 (90 < angle < 180).
213 // Section 7.11.2.4 (#8).
214 // |dst| is an unaligned pointer to the output block. Pixel size is determined
215 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
216 // the row above |dst|. |left| is an aligned vector of the column to the left of
217 // |dst|. |width| and |height| give the dimensions of the block. |xstep| and
218 // |ystep| are the scaled starting index to |top| and |left|, respectively,
219 // from kDirectionalIntraPredictorDerivative. |upsampled_top| and
220 // |upsampled_left| indicate whether |top| and |left| have been upsampled as
221 // described in '7.11.2.11. Intra edge upsample process'. This can occur in
222 // cases with |width| + |height| <= 16. top-left and upper-left are accessed,
223 // up to [-2] in each if |upsampled_top/left| are set.
224 using DirectionalIntraPredictorZone2Func = void (*)(
225 void* dst, ptrdiff_t stride, const void* top, const void* left, int width,
226 int height, int xstep, int ystep, bool upsampled_top, bool upsampled_left);
227
228 // Directional intra predictor function signature, zone 3 (180 < angle < 270).
229 // Section 7.11.2.4 (#9).
230 // |dst| is an unaligned pointer to the output block. Pixel size is determined
231 // by bitdepth with |stride| given in bytes. |left| is an aligned vector of the
232 // column to the left of |dst|. |width| and |height| give the dimensions of the
233 // block. |ystep| is the scaled starting index to |left| from
234 // kDirectionalIntraPredictorDerivative. |upsampled_left| indicates whether
235 // |left| has been upsampled as described in '7.11.2.11. Intra edge upsample
236 // process'. This can occur in cases with |width| + |height| <= 16. bottom-left
237 // is accessed.
238 using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride,
239 const void* left, int width,
240 int height, int ystep,
241 bool upsampled_left);
242
243 // Filter intra predictor function signature. Section 7.11.2.3.
244 // |dst| is an unaligned pointer to the output block. Pixel size is determined
245 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
246 // the row above |dst|. |left| is an aligned vector of the column to the left
247 // of |dst|. |width| and |height| are the size of the block in pixels.
248 using FilterIntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
249 const void* top, const void* left,
250 FilterIntraPredictor pred, int width,
251 int height);
252
253 //------------------------------------------------------------------------------
254 // Chroma from Luma (Cfl) prediction. Section 7.11.5.
255
256 // Chroma from Luma (Cfl) intra prediction function signature. |dst| is an
257 // unaligned pointer to the output block. Pixel size is determined by bitdepth
258 // with |stride| given in bytes. |luma| contains subsampled luma pixels with 3
259 // fractional bits of precision. |alpha| is the signed Cfl alpha value for the
260 // appropriate plane.
261 using CflIntraPredictorFunc = void (*)(
262 void* dst, ptrdiff_t stride,
263 const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], int alpha);
264 using CflIntraPredictorFuncs = CflIntraPredictorFunc[kNumTransformSizes];
265
266 // Chroma from Luma (Cfl) subsampler function signature. |luma| is an unaligned
267 // pointer to the output block. |src| is an unaligned pointer to the input
268 // block. Pixel size is determined by bitdepth with |stride| given in bytes.
269 using CflSubsamplerFunc =
270 void (*)(int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
271 int max_luma_width, int max_luma_height, const void* source,
272 ptrdiff_t stride);
273 using CflSubsamplerFuncs =
274 CflSubsamplerFunc[kNumTransformSizes][kNumSubsamplingTypes];
275
276 //------------------------------------------------------------------------------
277 // Intra Edge Filtering and Upsampling. Step 4 in section 7.11.2.4.
278
279 // Intra edge filter function signature. |buffer| is a pointer to the top_row or
280 // left_column that needs to be filtered. Typically the -1'th index of |top_row|
281 // and |left_column| need to be filtered as well, so the caller can merely pass
282 // the |buffer| as top_row[-1] or left_column[-1]. Pixel size is determined by
283 // bitdepth. |size| is the number of pixels to be filtered. |strength| is the
284 // filter strength. Section 7.11.2.12 in the spec.
285 using IntraEdgeFilterFunc = void (*)(void* buffer, int size, int strength);
286
287 // Intra edge upsampler function signature. |buffer| is a pointer to the top_row
288 // or left_column that needs to be upsampled. Pixel size is determined by
289 // bitdepth. |size| is the number of pixels to be upsampled; valid values are:
290 // 4, 8, 12, 16. This function needs access to negative indices -1 and -2 of
291 // the |buffer|. Section 7.11.2.11 in the spec.
292 using IntraEdgeUpsamplerFunc = void (*)(void* buffer, int size);
293
294 //------------------------------------------------------------------------------
295 // Inverse transform add function signature.
296 //
297 // Steps 2 and 3 of section 7.12.3 (contains the implementation of section
298 // 7.13.3).
299 // Apply the inverse transforms and add the residual to the destination frame
300 // for the transform type and block size |tx_size| starting at position
301 // |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D. |is_row|
302 // signals the direction of the transform loop. |non_zero_coeff_count| is the
303 // number of non zero coefficients in the block.
304 using InverseTransformAddFunc = void (*)(TransformType tx_type,
305 TransformSize tx_size,
306 void* src_buffer, int start_x,
307 int start_y, void* dst_frame,
308 bool is_row, int non_zero_coeff_count);
309 using InverseTransformAddFuncs =
310 InverseTransformAddFunc[kNum1DTransformSizes][kNum1DTransforms];
311
312 //------------------------------------------------------------------------------
313 // Post processing.
314
315 // Loop filter function signature. Section 7.14.
316 // |dst| is an unaligned pointer to the output block. Pixel size is determined
317 // by bitdepth with |stride| given in bytes.
318 using LoopFilterFunc = void (*)(void* dst, ptrdiff_t stride, int outer_thresh,
319 int inner_thresh, int hev_thresh);
320 using LoopFilterFuncs =
321 LoopFilterFunc[kNumLoopFilterSizes][kNumLoopFilterTypes];
322
323 // Cdef direction function signature. Section 7.15.2.
324 // |src| is a pointer to the source block. Pixel size is determined by bitdepth
325 // with |stride| given in bytes. |direction| and |variance| are output
326 // parameters and must not be nullptr.
327 using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride,
328 int* direction, int* variance);
329
330 // Cdef filtering function signature. Section 7.15.3.
331 // |source| is a pointer to the input block padded with kCdefLargeValue if at a
332 // frame border. |source_stride| is given in units of uint16_t.
333 // |block_width|, |block_height| are the width/height of the input block.
334 // |primary_strength|, |secondary_strength|, and |damping| are Cdef filtering
335 // parameters.
336 // |direction| is the filtering direction.
337 // |dest| is the output buffer. |dest_stride| is given in bytes.
338 using CdefFilteringFunc = void (*)(const uint16_t* source,
339 ptrdiff_t source_stride, int block_height,
340 int primary_strength, int secondary_strength,
341 int damping, int direction, void* dest,
342 ptrdiff_t dest_stride);
343
344 // The first index is block width: [0]: 4, [1]: 8. The second is based on
345 // non-zero strengths: [0]: |primary_strength| and |secondary_strength|, [1]:
346 // |primary_strength| only, [2]: |secondary_strength| only.
347 using CdefFilteringFuncs = CdefFilteringFunc[2][3];
348
349 // Upscaling process function signature. Section 7.16.
350 // Operates on a single row.
351 // |source| is the input frame buffer at the given row.
352 // |dest| is the output row.
353 // |upscaled_width| is the width of the output frame.
354 // |step| is the number of subpixels to move the kernel for the next destination
355 // pixel.
356 // |initial_subpixel_x| is a base offset from which |step| increments.
357 using SuperResRowFunc = void (*)(const void* source, const int upscaled_width,
358 const int initial_subpixel_x, const int step,
359 void* const dest);
360
361 // Loop restoration function signature. Sections 7.16, 7.17.
362 // |source| is the input frame buffer, which is deblocked and cdef filtered.
363 // |dest| is the output.
364 // |restoration_info| contains loop restoration information, such as filter
365 // type, strength.
366 // |source_stride| and |dest_stride| are given in pixels.
367 // |buffer| contains buffers required for self guided filter and wiener filter.
368 // They must be initialized before calling.
369 using LoopRestorationFunc = void (*)(
370 const void* source, void* dest, const RestorationUnitInfo& restoration_info,
371 ptrdiff_t source_stride, ptrdiff_t dest_stride, int width, int height,
372 RestorationBuffer* buffer);
373
374 // Index 0 is Wiener Filter.
375 // Index 1 is Self Guided Restoration Filter.
376 // This can be accessed as LoopRestorationType - 2.
377 using LoopRestorationFuncs = LoopRestorationFunc[2];
378
379 // Convolve function signature. Section 7.11.3.4.
380 // This function applies a horizontal filter followed by a vertical filter.
381 // |reference| is the input block (reference frame buffer). |reference_stride|
382 // is the corresponding frame stride.
383 // |vertical_filter_index|/|horizontal_filter_index| is the index to
384 // retrieve the type of filter to be applied for vertical/horizontal direction
385 // from the filter lookup table 'kSubPixelFilters'.
386 // |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024.
387 // |width| and |height| are width and height of the block to be filtered.
388 // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
389 // x/y direction.
390 // |prediction| is the output block (output frame buffer).
391 // Rounding precision is derived from the function being called. For horizontal
392 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
393 // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
394 // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
395 // be used.
396 using ConvolveFunc = void (*)(const void* reference, ptrdiff_t reference_stride,
397 int horizontal_filter_index,
398 int vertical_filter_index, int subpixel_x,
399 int subpixel_y, int width, int height,
400 void* prediction, ptrdiff_t pred_stride);
401
402 // Convolve functions signature. Each points to one convolve function with
403 // a specific setting:
404 // ConvolveFunc[is_intra_block_copy][is_compound][has_vertical_filter]
405 // [has_horizontal_filter].
406 // If is_compound is false, the prediction is clipped to Pixel.
407 // If is_compound is true, the range of prediction is:
408 // 8bpp: [-5132, 9212] (int16_t)
409 // 10bpp: [ 3988, 61532] (uint16_t)
410 // 12bpp: [ 3974, 61559] (uint16_t)
411 // See src/dsp/convolve.cc
412 using ConvolveFuncs = ConvolveFunc[2][2][2][2];
413
414 // Convolve + scale function signature. Section 7.11.3.4.
415 // This function applies a horizontal filter followed by a vertical filter.
416 // |reference| is the input block (reference frame buffer). |reference_stride|
417 // is the corresponding frame stride.
418 // |vertical_filter_index|/|horizontal_filter_index| is the index to
419 // retrieve the type of filter to be applied for vertical/horizontal direction
420 // from the filter lookup table 'kSubPixelFilters'.
421 // |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024.
422 // |step_x| and |step_y| are step sizes in units of 1/1024 of a pixel.
423 // |width| and |height| are width and height of the block to be filtered.
424 // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
425 // x/y direction.
426 // |prediction| is the output block (output frame buffer).
427 // Rounding precision is derived from the function being called. For horizontal
428 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
429 // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
430 // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
431 // be used.
432 using ConvolveScaleFunc = void (*)(const void* reference,
433 ptrdiff_t reference_stride,
434 int horizontal_filter_index,
435 int vertical_filter_index, int subpixel_x,
436 int subpixel_y, int step_x, int step_y,
437 int width, int height, void* prediction,
438 ptrdiff_t pred_stride);
439
440 // Convolve functions signature for scaling version.
441 // 0: single predictor. 1: compound predictor.
442 using ConvolveScaleFuncs = ConvolveScaleFunc[2];
443
444 // Weight mask function signature. Section 7.11.3.12.
445 // |prediction_0| is the first input block.
446 // |prediction_1| is the second input block. Both blocks are int16_t* when
447 // bitdepth == 8 and uint16_t* otherwise.
448 // |width| and |height| are the prediction width and height.
449 // The stride for the input buffers is equal to |width|.
450 // The valid range of block size is [8x8, 128x128] for the luma plane.
451 // |mask| is the output buffer. |mask_stride| is the output buffer stride.
452 using WeightMaskFunc = void (*)(const void* prediction_0,
453 const void* prediction_1, uint8_t* mask,
454 ptrdiff_t mask_stride);
455
456 // Weight mask functions signature. The dimensions (in order) are:
457 // * Width index (4 => 0, 8 => 1, 16 => 2 and so on).
458 // * Height index (4 => 0, 8 => 1, 16 => 2 and so on).
459 // * mask_is_inverse.
460 using WeightMaskFuncs = WeightMaskFunc[6][6][2];
461
462 // Average blending function signature.
463 // Two predictors are averaged to generate the output.
464 // Input predictor values are int16_t. Output type is uint8_t, with actual
465 // range of Pixel value.
466 // Average blending is in the bottom of Section 7.11.3.1 (COMPOUND_AVERAGE).
467 // |prediction_0| is the first input block.
468 // |prediction_1| is the second input block. Both blocks are int16_t* when
469 // bitdepth == 8 and uint16_t* otherwise.
470 // |width| and |height| are the same for the first and second input blocks.
471 // The stride for the input buffers is equal to |width|.
472 // The valid range of block size is [8x8, 128x128] for the luma plane.
473 // |dest| is the output buffer. |dest_stride| is the output buffer stride.
474 using AverageBlendFunc = void (*)(const void* prediction_0,
475 const void* prediction_1, int width,
476 int height, void* dest,
477 ptrdiff_t dest_stride);
478
479 // Distance weighted blending function signature.
480 // Weights are generated in Section 7.11.3.15.
481 // Weighted blending is in the bottom of Section 7.11.3.1 (COMPOUND_DISTANCE).
482 // This function takes two blocks (inter frame prediction) and produces a
483 // weighted output.
484 // |prediction_0| is the first input block.
485 // |prediction_1| is the second input block. Both blocks are int16_t* when
486 // bitdepth == 8 and uint16_t* otherwise.
487 // |weight_0| is the weight for the first block. It is derived from the relative
488 // distance of the first reference frame and the current frame.
489 // |weight_1| is the weight for the second block. It is derived from the
490 // relative distance of the second reference frame and the current frame.
491 // |width| and |height| are the same for the first and second input blocks.
492 // The stride for the input buffers is equal to |width|.
493 // The valid range of block size is [8x8, 128x128] for the luma plane.
494 // |dest| is the output buffer. |dest_stride| is the output buffer stride.
495 using DistanceWeightedBlendFunc = void (*)(const void* prediction_0,
496 const void* prediction_1,
497 uint8_t weight_0, uint8_t weight_1,
498 int width, int height, void* dest,
499 ptrdiff_t dest_stride);
500
501 // Mask blending function signature. Section 7.11.3.14.
502 // This function takes two blocks and produces a blended output stored into the
503 // output block |dest|. The blending is a weighted average process, controlled
504 // by values of the mask.
505 // |prediction_0| is the first input block. When prediction mode is inter_intra
506 // (or wedge_inter_intra), this refers to the inter frame prediction. It is
507 // int16_t* when bitdepth == 8 and uint16_t* otherwise.
508 // The stride for |prediction_0| is equal to |width|.
509 // |prediction_1| is the second input block. When prediction mode is inter_intra
510 // (or wedge_inter_intra), this refers to the intra frame prediction and uses
511 // Pixel values. It is only used for intra frame prediction when bitdepth >= 10.
512 // It is int16_t* when bitdepth == 8 and uint16_t* otherwise.
513 // |prediction_stride_1| is the stride, given in units of [u]int16_t. When
514 // |is_inter_intra| is false (compound prediction) then |prediction_stride_1| is
515 // equal to |width|.
516 // |mask| is an integer array, whose value indicates the weight of the blending.
517 // |mask_stride| is corresponding stride.
518 // |width|, |height| are the same for both input blocks.
519 // If it's inter_intra (or wedge_inter_intra), the valid range of block size is
520 // [8x8, 32x32]. Otherwise (including difference weighted prediction and
521 // compound average prediction), the valid range is [8x8, 128x128].
522 // If there's subsampling, the corresponding width and height are halved for
523 // chroma planes.
524 // |subsampling_x|, |subsampling_y| are the subsampling factors.
525 // |is_inter_intra| stands for the prediction mode. If it is true, one of the
526 // prediction blocks is from intra prediction of current frame. Otherwise, two
527 // prediction blocks are both inter frame predictions.
528 // |is_wedge_inter_intra| indicates if the mask is for the wedge prediction.
529 // |dest| is the output block.
530 // |dest_stride| is the corresponding stride for dest.
531 using MaskBlendFunc = void (*)(const void* prediction_0,
532 const void* prediction_1,
533 ptrdiff_t prediction_stride_1,
534 const uint8_t* mask, ptrdiff_t mask_stride,
535 int width, int height, void* dest,
536 ptrdiff_t dest_stride);
537
538 // Mask blending functions signature. Each points to one function with
539 // a specific setting:
540 // MaskBlendFunc[subsampling_x + subsampling_y][is_inter_intra].
541 using MaskBlendFuncs = MaskBlendFunc[3][2];
542
543 // This function is similar to the MaskBlendFunc. It is only used when
544 // |is_inter_intra| is true and |bitdepth| == 8.
545 // |prediction_[01]| are Pixel values (uint8_t).
546 // |prediction_1| is also the output buffer.
547 using InterIntraMaskBlendFunc8bpp = void (*)(const uint8_t* prediction_0,
548 uint8_t* prediction_1,
549 ptrdiff_t prediction_stride_1,
550 const uint8_t* mask,
551 ptrdiff_t mask_stride, int width,
552 int height);
553
554 // InterIntra8bpp mask blending functions signature. When is_wedge_inter_intra
555 // is false, the function at index 0 must be used. Otherwise, the function at
556 // index subsampling_x + subsampling_y must be used.
557 using InterIntraMaskBlendFuncs8bpp = InterIntraMaskBlendFunc8bpp[3];
558
559 // Obmc (overlapped block motion compensation) blending function signature.
560 // Section 7.11.3.10.
561 // This function takes two blocks and produces a blended output stored into the
562 // first input block. The blending is a weighted average process, controlled by
563 // values of the mask.
564 // Obmc is not a compound mode. It is different from other compound blending,
565 // in terms of precision. The current block is computed using convolution with
566 // clipping to the range of pixel values. Its above and left blocks are also
567 // clipped. Therefore obmc blending process doesn't need to clip the output.
568 // |prediction| is the first input block, which will be overwritten.
569 // |prediction_stride| is the stride, given in bytes.
570 // |width|, |height| are the same for both input blocks.
571 // |obmc_prediction| is the second input block.
572 // |obmc_prediction_stride| is its stride, given in bytes.
573 using ObmcBlendFunc = void (*)(void* prediction, ptrdiff_t prediction_stride,
574 int width, int height,
575 const void* obmc_prediction,
576 ptrdiff_t obmc_prediction_stride);
577 using ObmcBlendFuncs = ObmcBlendFunc[kNumObmcDirections];
578
579 // Warp function signature. Section 7.11.3.5.
580 // This function applies warp filtering for each 8x8 block inside the current
581 // coding block. The filtering process is similar to 2d convolve filtering.
582 // The horizontal filter is applied followed by the vertical filter.
583 // The function has to calculate corresponding pixel positions before and
584 // after warping.
585 // |source| is the input reference frame buffer.
586 // |source_stride|, |source_width|, |source_height| are corresponding frame
587 // stride, width, and height. |source_stride| is given in bytes.
588 // |warp_params| is the matrix of warp motion: warp_params[i] = mN.
589 // [x' (m2 m3 m0 [x
590 // z . y' = m4 m5 m1 * y
591 // 1] m6 m7 1) 1]
592 // |subsampling_x/y| is the current frame's plane subsampling factor.
593 // |block_start_x| and |block_start_y| are the starting position the current
594 // coding block.
595 // |block_width| and |block_height| are width and height of the current coding
596 // block. |block_width| and |block_height| are at least 8.
597 // |alpha|, |beta|, |gamma|, |delta| are valid warp parameters. See the
598 // comments in the definition of struct GlobalMotion for the range of their
599 // values.
600 // |dest| is the output buffer of type Pixel. The output values are clipped to
601 // Pixel values.
602 // |dest_stride| is the stride, in units of bytes.
603 // Rounding precision is derived from the function being called. For horizontal
604 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
605 // used. For vertical filtering kInterRoundBitsVertical &
606 // kInterRoundBitsVertical12bpp will be used.
607 //
608 // NOTE: WarpFunc assumes the source frame has left, right, top, and bottom
609 // borders that extend the frame boundary pixels.
610 // * The left and right borders must be at least 13 pixels wide. In addition,
611 // Warp_NEON() may read up to 14 bytes after a row in the |source| buffer.
612 // Therefore, there must be at least one extra padding byte after the right
613 // border of the last row in the source buffer.
614 // * The top and bottom borders must be at least 13 pixels high.
615 using WarpFunc = void (*)(const void* source, ptrdiff_t source_stride,
616 int source_width, int source_height,
617 const int* warp_params, int subsampling_x,
618 int subsampling_y, int block_start_x,
619 int block_start_y, int block_width, int block_height,
620 int16_t alpha, int16_t beta, int16_t gamma,
621 int16_t delta, void* dest, ptrdiff_t dest_stride);
622
623 // Warp for compound predictions. Section 7.11.3.5.
624 // Similar to WarpFunc, but |dest| is a uint16_t predictor buffer,
625 // |dest_stride| is given in units of uint16_t and |inter_round_bits_vertical|
626 // is always 7 (kCompoundInterRoundBitsVertical).
627 // Rounding precision is derived from the function being called. For horizontal
628 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
629 // used. For vertical filtering kInterRoundBitsCompondVertical will be used.
630 using WarpCompoundFunc = WarpFunc;
631
632 constexpr int kNumAutoRegressionLags = 4;
633 // Applies an auto-regressive filter to the white noise in |luma_grain_buffer|.
634 // Section 7.18.3.3, second code block
635 // |params| are parameters read from frame header, mainly providing
636 // auto_regression_coeff_y for the filter and auto_regression_shift to right
637 // shift the filter sum by. Note: This method assumes
638 // params.auto_regression_coeff_lag is not 0. Do not call this method if
639 // params.auto_regression_coeff_lag is 0.
640 using LumaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
641 void* luma_grain_buffer);
642 // Function index is auto_regression_coeff_lag - 1.
643 using LumaAutoRegressionFuncs =
644 LumaAutoRegressionFunc[kNumAutoRegressionLags - 1];
645
646 // Applies an auto-regressive filter to the white noise in u_grain and v_grain.
647 // Section 7.18.3.3, third code block
648 // The |luma_grain_buffer| provides samples that are added to the autoregressive
649 // sum when num_y_points > 0.
650 // |u_grain_buffer| and |v_grain_buffer| point to the buffers of chroma noise
651 // that were generated from the stored Gaussian sequence, and are overwritten
652 // with the results of the autoregressive filter. |params| are parameters read
653 // from frame header, mainly providing auto_regression_coeff_u and
654 // auto_regression_coeff_v for each chroma plane's filter, and
655 // auto_regression_shift to right shift the filter sums by.
656 using ChromaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
657 const void* luma_grain_buffer,
658 int subsampling_x, int subsampling_y,
659 void* u_grain_buffer,
660 void* v_grain_buffer);
661 using ChromaAutoRegressionFuncs =
662 ChromaAutoRegressionFunc[/*use_luma*/ 2][kNumAutoRegressionLags];
663
664 // Build an image-wide "stripe" of grain noise for every 32 rows in the image.
665 // Section 7.18.3.5, first code block.
666 // Each 32x32 luma block is copied at a random offset specified via
667 // |grain_seed| from the grain template produced by autoregression, and the same
668 // is done for chroma grains, subject to subsampling.
669 // |width| and |height| are the dimensions of the overall image.
670 // |noise_stripes_buffer| points to an Array2DView with one row for each stripe.
671 // Because this function treats all planes identically and independently, it is
672 // simplified to take one grain buffer at a time. This means duplicating some
673 // random number generations, but that work can be reduced in other ways.
674 using ConstructNoiseStripesFunc = void (*)(const void* grain_buffer,
675 int grain_seed, int width,
676 int height, int subsampling_x,
677 int subsampling_y,
678 void* noise_stripes_buffer);
679 using ConstructNoiseStripesFuncs =
680 ConstructNoiseStripesFunc[/*overlap_flag*/ 2];
681
682 // Compute the one or two overlap rows for each stripe copied to the noise
683 // image.
684 // Section 7.18.3.5, second code block. |width| and |height| are the
685 // dimensions of the overall image. |noise_stripes_buffer| points to an
686 // Array2DView with one row for each stripe. |noise_image_buffer| points to an
687 // Array2D containing the allocated plane for this frame. Because this function
688 // treats all planes identically and independently, it is simplified to take one
689 // grain buffer at a time.
690 using ConstructNoiseImageOverlapFunc =
691 void (*)(const void* noise_stripes_buffer, int width, int height,
692 int subsampling_x, int subsampling_y, void* noise_image_buffer);
693
694 // Populate a scaling lookup table with interpolated values of a piecewise
695 // linear function where values in |point_value| are mapped to the values in
696 // |point_scaling|.
697 // |num_points| can be between 0 and 15. When 0, the lookup table is set to
698 // zero.
699 // |point_value| and |point_scaling| have |num_points| valid elements.
700 using InitializeScalingLutFunc = void (*)(
701 int num_points, const uint8_t point_value[], const uint8_t point_scaling[],
702 uint8_t scaling_lut[kScalingLookupTableSize]);
703
704 // Blend noise with image. Section 7.18.3.5, third code block.
705 // |width| is the width of each row, while |height| is how many rows to compute.
706 // |start_height| is an offset for the noise image, to support multithreading.
707 // |min_value|, |max_luma|, and |max_chroma| are computed by the caller of these
708 // functions, according to the code in the spec.
709 // |source_plane_y| and |source_plane_uv| are the plane buffers of the decoded
710 // frame. They are blended with the film grain noise and written to
711 // |dest_plane_y| and |dest_plane_uv| as final output for display.
712 // source_plane_* and dest_plane_* may point to the same buffer, in which case
713 // the film grain noise is added in place.
714 // |scaling_lut_y| and |scaling_lut| represent a piecewise linear mapping from
715 // the frame's raw pixel value, to a scaling factor for the noise sample.
716 // |scaling_shift| is applied as a right shift after scaling, so that scaling
717 // down is possible. It is found in FilmGrainParams, but supplied directly to
718 // BlendNoiseWithImageLumaFunc because it's the only member used.
719 using BlendNoiseWithImageLumaFunc =
720 void (*)(const void* noise_image_ptr, int min_value, int max_value,
721 int scaling_shift, int width, int height, int start_height,
722 const uint8_t scaling_lut_y[kScalingLookupTableSize],
723 const void* source_plane_y, ptrdiff_t source_stride_y,
724 void* dest_plane_y, ptrdiff_t dest_stride_y);
725
726 using BlendNoiseWithImageChromaFunc = void (*)(
727 Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
728 int min_value, int max_value, int width, int height, int start_height,
729 int subsampling_x, int subsampling_y,
730 const uint8_t scaling_lut[kScalingLookupTableSize],
731 const void* source_plane_y, ptrdiff_t source_stride_y,
732 const void* source_plane_uv, ptrdiff_t source_stride_uv,
733 void* dest_plane_uv, ptrdiff_t dest_stride_uv);
734
735 using BlendNoiseWithImageChromaFuncs =
736 BlendNoiseWithImageChromaFunc[/*chroma_scaling_from_luma*/ 2];
737
738 //------------------------------------------------------------------------------
739
740 struct FilmGrainFuncs {
741 LumaAutoRegressionFuncs luma_auto_regression;
742 ChromaAutoRegressionFuncs chroma_auto_regression;
743 ConstructNoiseStripesFuncs construct_noise_stripes;
744 ConstructNoiseImageOverlapFunc construct_noise_image_overlap;
745 InitializeScalingLutFunc initialize_scaling_lut;
746 BlendNoiseWithImageLumaFunc blend_noise_luma;
747 BlendNoiseWithImageChromaFuncs blend_noise_chroma;
748 };
749
750 // Motion field projection function signature. Section 7.9.
751 // |reference_info| provides reference information for motion field projection.
752 // |reference_to_current_with_sign| is the precalculated reference frame id
753 // distance from current frame.
754 // |dst_sign| is -1 for LAST_FRAME and LAST2_FRAME, or 0 (1 in spec) for others.
755 // |y8_start| and |y8_end| are the start and end 8x8 rows of the current tile.
756 // |x8_start| and |x8_end| are the start and end 8x8 columns of the current
757 // tile.
758 // |motion_field| is the output which saves the projected motion field
759 // information.
760 using MotionFieldProjectionKernelFunc = void (*)(
761 const ReferenceInfo& reference_info, int reference_to_current_with_sign,
762 int dst_sign, int y8_start, int y8_end, int x8_start, int x8_end,
763 TemporalMotionField* motion_field);
764
765 // Compound temporal motion vector projection function signature.
766 // Section 7.9.3 and 7.10.2.10.
767 // |temporal_mvs| is the set of temporal reference motion vectors.
768 // |temporal_reference_offsets| specifies the number of frames covered by the
769 // original motion vector.
770 // |reference_offsets| specifies the number of frames to be covered by the
771 // projected motion vector.
772 // |count| is the number of the temporal motion vectors.
773 // |candidate_mvs| is the set of projected motion vectors.
774 using MvProjectionCompoundFunc = void (*)(
775 const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
776 const int reference_offsets[2], int count,
777 CompoundMotionVector* candidate_mvs);
778
779 // Single temporal motion vector projection function signature.
780 // Section 7.9.3 and 7.10.2.10.
781 // |temporal_mvs| is the set of temporal reference motion vectors.
782 // |temporal_reference_offsets| specifies the number of frames covered by the
783 // original motion vector.
784 // |reference_offset| specifies the number of frames to be covered by the
785 // projected motion vector.
786 // |count| is the number of the temporal motion vectors.
787 // |candidate_mvs| is the set of projected motion vectors.
788 using MvProjectionSingleFunc = void (*)(
789 const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
790 int reference_offset, int count, MotionVector* candidate_mvs);
791
792 struct Dsp {
793 AverageBlendFunc average_blend;
794 CdefDirectionFunc cdef_direction;
795 CdefFilteringFuncs cdef_filters;
796 CflIntraPredictorFuncs cfl_intra_predictors;
797 CflSubsamplerFuncs cfl_subsamplers;
798 ConvolveFuncs convolve;
799 ConvolveScaleFuncs convolve_scale;
800 DirectionalIntraPredictorZone1Func directional_intra_predictor_zone1;
801 DirectionalIntraPredictorZone2Func directional_intra_predictor_zone2;
802 DirectionalIntraPredictorZone3Func directional_intra_predictor_zone3;
803 DistanceWeightedBlendFunc distance_weighted_blend;
804 FilmGrainFuncs film_grain;
805 FilterIntraPredictorFunc filter_intra_predictor;
806 InterIntraMaskBlendFuncs8bpp inter_intra_mask_blend_8bpp;
807 IntraEdgeFilterFunc intra_edge_filter;
808 IntraEdgeUpsamplerFunc intra_edge_upsampler;
809 IntraPredictorFuncs intra_predictors;
810 InverseTransformAddFuncs inverse_transforms;
811 LoopFilterFuncs loop_filters;
812 LoopRestorationFuncs loop_restorations;
813 MaskBlendFuncs mask_blend;
814 MotionFieldProjectionKernelFunc motion_field_projection_kernel;
815 MvProjectionCompoundFunc mv_projection_compound[3];
816 MvProjectionSingleFunc mv_projection_single[3];
817 ObmcBlendFuncs obmc_blend;
818 SuperResRowFunc super_res_row;
819 WarpCompoundFunc warp_compound;
820 WarpFunc warp;
821 WeightMaskFuncs weight_mask;
822 };
823
824 // Initializes function pointers based on build config and runtime
825 // environment. Must be called once before first use. This function is
826 // thread-safe.
827 void DspInit();
828
829 // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
830 // exist.
831 const Dsp* GetDspTable(int bitdepth);
832
833 } // namespace dsp
834
835 namespace dsp_internal {
836
837 // Returns true if a more highly optimized version of |func| is not defined for
838 // the associated bitdepth or if it is forcibly enabled with
839 // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS. The define checked for |func| corresponds
840 // to the LIBGAV1_Dsp<bitdepth>bpp_|func| define in the header file associated
841 // with the module.
842 // |func| is one of:
843 // - FunctionName, e.g., SelfGuidedFilter.
844 // - [sub-table-index1][...-indexN] e.g.,
845 // TransformSize4x4_IntraPredictorDc. The indices correspond to enum values
846 // used as lookups with leading 'k' removed.
847 //
848 // NEON support is the only extension available for ARM and it is always
849 // required. Because of this restriction DSP_ENABLED_8BPP_NEON(func) is always
850 // true and can be omitted.
851 #define DSP_ENABLED_8BPP_SSE4_1(func) \
852 (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
853 LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_SSE4_1)
854 #define DSP_ENABLED_10BPP_SSE4_1(func) \
855 (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
856 LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_SSE4_1)
857
858 // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
859 // exist. This version is meant for use by test or dsp/*Init() functions only.
860 dsp::Dsp* GetWritableDspTable(int bitdepth);
861
862 } // namespace dsp_internal
863 } // namespace libgav1
864
865 #endif // LIBGAV1_SRC_DSP_DSP_H_
866