• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_DSP_DSP_H_
18 #define LIBGAV1_SRC_DSP_DSP_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdlib>
23 
24 #include "src/dsp/common.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/film_grain_common.h"
27 #include "src/utils/cpu.h"
28 #include "src/utils/reference_info.h"
29 #include "src/utils/types.h"
30 
31 namespace libgav1 {
32 namespace dsp {
33 
34 #if !defined(LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS)
35 #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 0
36 #endif
37 
38 enum IntraPredictor : uint8_t {
39   kIntraPredictorDcFill,
40   kIntraPredictorDcTop,
41   kIntraPredictorDcLeft,
42   kIntraPredictorDc,
43   kIntraPredictorVertical,
44   kIntraPredictorHorizontal,
45   kIntraPredictorPaeth,
46   kIntraPredictorSmooth,
47   kIntraPredictorSmoothVertical,
48   kIntraPredictorSmoothHorizontal,
49   kNumIntraPredictors
50 };
51 
52 // List of valid 1D transforms.
53 enum Transform1d : uint8_t {
54   kTransform1dDct,   // Discrete Cosine Transform.
55   kTransform1dAdst,  // Asymmetric Discrete Sine Transform.
56   kTransform1dIdentity,
57   kTransform1dWht,  // Walsh Hadamard Transform.
58   kNumTransform1ds
59 };
60 
61 // List of valid 1D transform sizes. Not all transforms may be available for all
62 // the sizes.
63 enum Transform1dSize : uint8_t {
64   kTransform1dSize4,
65   kTransform1dSize8,
66   kTransform1dSize16,
67   kTransform1dSize32,
68   kTransform1dSize64,
69   kNumTransform1dSizes
70 };
71 
72 // The maximum width of the loop filter, fewer pixels may be filtered depending
73 // on strength thresholds.
74 enum LoopFilterSize : uint8_t {
75   kLoopFilterSize4,
76   kLoopFilterSize6,
77   kLoopFilterSize8,
78   kLoopFilterSize14,
79   kNumLoopFilterSizes
80 };
81 
82 enum : uint8_t {
83   kRow = 0,
84   kColumn = 1,
85 };
86 
87 //------------------------------------------------------------------------------
88 // ToString()
89 //
90 // These functions are meant to be used only in debug logging and within tests.
91 // They are defined inline to avoid including the strings in the release
92 // library when logging is disabled; unreferenced functions will not be added to
93 // any object file in that case.
94 
ToString(const IntraPredictor predictor)95 inline const char* ToString(const IntraPredictor predictor) {
96   switch (predictor) {
97     case kIntraPredictorDcFill:
98       return "kIntraPredictorDcFill";
99     case kIntraPredictorDcTop:
100       return "kIntraPredictorDcTop";
101     case kIntraPredictorDcLeft:
102       return "kIntraPredictorDcLeft";
103     case kIntraPredictorDc:
104       return "kIntraPredictorDc";
105     case kIntraPredictorVertical:
106       return "kIntraPredictorVertical";
107     case kIntraPredictorHorizontal:
108       return "kIntraPredictorHorizontal";
109     case kIntraPredictorPaeth:
110       return "kIntraPredictorPaeth";
111     case kIntraPredictorSmooth:
112       return "kIntraPredictorSmooth";
113     case kIntraPredictorSmoothVertical:
114       return "kIntraPredictorSmoothVertical";
115     case kIntraPredictorSmoothHorizontal:
116       return "kIntraPredictorSmoothHorizontal";
117     case kNumIntraPredictors:
118       return "kNumIntraPredictors";
119   }
120   abort();
121 }
122 
ToString(const Transform1d transform)123 inline const char* ToString(const Transform1d transform) {
124   switch (transform) {
125     case kTransform1dDct:
126       return "kTransform1dDct";
127     case kTransform1dAdst:
128       return "kTransform1dAdst";
129     case kTransform1dIdentity:
130       return "kTransform1dIdentity";
131     case kTransform1dWht:
132       return "kTransform1dWht";
133     case kNumTransform1ds:
134       return "kNumTransform1ds";
135   }
136   abort();
137 }
138 
ToString(const Transform1dSize transform_size)139 inline const char* ToString(const Transform1dSize transform_size) {
140   switch (transform_size) {
141     case kTransform1dSize4:
142       return "kTransform1dSize4";
143     case kTransform1dSize8:
144       return "kTransform1dSize8";
145     case kTransform1dSize16:
146       return "kTransform1dSize16";
147     case kTransform1dSize32:
148       return "kTransform1dSize32";
149     case kTransform1dSize64:
150       return "kTransform1dSize64";
151     case kNumTransform1dSizes:
152       return "kNumTransform1dSizes";
153   }
154   abort();
155 }
156 
ToString(const LoopFilterSize filter_size)157 inline const char* ToString(const LoopFilterSize filter_size) {
158   switch (filter_size) {
159     case kLoopFilterSize4:
160       return "kLoopFilterSize4";
161     case kLoopFilterSize6:
162       return "kLoopFilterSize6";
163     case kLoopFilterSize8:
164       return "kLoopFilterSize8";
165     case kLoopFilterSize14:
166       return "kLoopFilterSize14";
167     case kNumLoopFilterSizes:
168       return "kNumLoopFilterSizes";
169   }
170   abort();
171 }
172 
ToString(const LoopFilterType filter_type)173 inline const char* ToString(const LoopFilterType filter_type) {
174   switch (filter_type) {
175     case kLoopFilterTypeVertical:
176       return "kLoopFilterTypeVertical";
177     case kLoopFilterTypeHorizontal:
178       return "kLoopFilterTypeHorizontal";
179     case kNumLoopFilterTypes:
180       return "kNumLoopFilterTypes";
181   }
182   abort();
183 }
184 
185 //------------------------------------------------------------------------------
186 // Intra predictors. Section 7.11.2.
187 // These require access to one or both of the top row and left column. Some may
188 // access the top-left (top[-1]), top-right (top[width+N]), bottom-left
189 // (left[height+N]) or upper-left (left[-1]).
190 
191 // Intra predictor function signature. Sections 7.11.2.2, 7.11.2.4 (#10,#11),
192 // 7.11.2.5, 7.11.2.6.
193 // |dst| is an unaligned pointer to the output block. Pixel size is determined
194 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
195 // the row above |dst|. |left| is an aligned vector of the column to the left
196 // of |dst|. top-left and bottom-left may be accessed.
197 // The pointer arguments do not alias one another.
198 using IntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
199                                     const void* top, const void* left);
200 using IntraPredictorFuncs =
201     IntraPredictorFunc[kNumTransformSizes][kNumIntraPredictors];
202 
203 // Directional intra predictor function signature, zone 1 (0 < angle < 90).
204 // Section 7.11.2.4 (#7).
205 // |dst| is an unaligned pointer to the output block. Pixel size is determined
206 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
207 // the row above |dst|. |width| and |height| give the dimensions of the block.
208 // |xstep| is the scaled starting index to |top| from
209 // kDirectionalIntraPredictorDerivative. |upsampled_top| indicates whether
210 // |top| has been upsampled as described in '7.11.2.11. Intra edge upsample
211 // process'. This can occur in cases with |width| + |height| <= 16. top-right
212 // is accessed.
213 // The pointer arguments do not alias one another.
214 using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride,
215                                                     const void* top, int width,
216                                                     int height, int xstep,
217                                                     bool upsampled_top);
218 
219 // Directional intra predictor function signature, zone 2 (90 < angle < 180).
220 // Section 7.11.2.4 (#8).
221 // |dst| is an unaligned pointer to the output block. Pixel size is determined
222 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
223 // the row above |dst|. |left| is an aligned vector of the column to the left of
224 // |dst|. |width| and |height| give the dimensions of the block. |xstep| and
225 // |ystep| are the scaled starting index to |top| and |left|, respectively,
226 // from kDirectionalIntraPredictorDerivative. |upsampled_top| and
227 // |upsampled_left| indicate whether |top| and |left| have been upsampled as
228 // described in '7.11.2.11. Intra edge upsample process'. This can occur in
229 // cases with |width| + |height| <= 16. top-left and upper-left are accessed,
230 // up to [-2] in each if |upsampled_top/left| are set.
231 // The pointer arguments do not alias one another.
232 using DirectionalIntraPredictorZone2Func = void (*)(
233     void* dst, ptrdiff_t stride, const void* top, const void* left, int width,
234     int height, int xstep, int ystep, bool upsampled_top, bool upsampled_left);
235 
236 // Directional intra predictor function signature, zone 3 (180 < angle < 270).
237 // Section 7.11.2.4 (#9).
238 // |dst| is an unaligned pointer to the output block. Pixel size is determined
239 // by bitdepth with |stride| given in bytes. |left| is an aligned vector of the
240 // column to the left of |dst|. |width| and |height| give the dimensions of the
241 // block. |ystep| is the scaled starting index to |left| from
242 // kDirectionalIntraPredictorDerivative. |upsampled_left| indicates whether
243 // |left| has been upsampled as described in '7.11.2.11. Intra edge upsample
244 // process'. This can occur in cases with |width| + |height| <= 16. bottom-left
245 // is accessed.
246 // The pointer arguments do not alias one another.
247 using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride,
248                                                     const void* left, int width,
249                                                     int height, int ystep,
250                                                     bool upsampled_left);
251 
252 // Filter intra predictor function signature. Section 7.11.2.3.
253 // |dst| is an unaligned pointer to the output block. Pixel size is determined
254 // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to
255 // the row above |dst|. |left| is an aligned vector of the column to the left
256 // of |dst|. |width| and |height| are the size of the block in pixels.
257 // The pointer arguments do not alias one another.
258 using FilterIntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride,
259                                           const void* top, const void* left,
260                                           FilterIntraPredictor pred, int width,
261                                           int height);
262 
263 //------------------------------------------------------------------------------
264 // Chroma from Luma (Cfl) prediction. Section 7.11.5.
265 
266 // Chroma from Luma (Cfl) intra prediction function signature. |dst| is an
267 // unaligned pointer to the output block. Pixel size is determined by bitdepth
268 // with |stride| given in bytes. |luma| contains subsampled luma pixels with 3
269 // fractional bits of precision. |alpha| is the signed Cfl alpha value for the
270 // appropriate plane.
271 using CflIntraPredictorFunc = void (*)(
272     void* dst, ptrdiff_t stride,
273     const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], int alpha);
274 using CflIntraPredictorFuncs = CflIntraPredictorFunc[kNumTransformSizes];
275 
276 // Chroma from Luma (Cfl) subsampler function signature. |luma| is an unaligned
277 // pointer to the output block. |src| is an unaligned pointer to the input
278 // block. Pixel size is determined by bitdepth with |stride| given in bytes.
279 using CflSubsamplerFunc =
280     void (*)(int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride],
281              int max_luma_width, int max_luma_height, const void* source,
282              ptrdiff_t stride);
283 using CflSubsamplerFuncs =
284     CflSubsamplerFunc[kNumTransformSizes][kNumSubsamplingTypes];
285 
286 //------------------------------------------------------------------------------
287 // Intra Edge Filtering and Upsampling. Step 4 in section 7.11.2.4.
288 
289 // Intra edge filter function signature. |buffer| is a pointer to the top_row or
290 // left_column that needs to be filtered. Typically the -1'th index of |top_row|
291 // and |left_column| need to be filtered as well, so the caller can merely pass
292 // the |buffer| as top_row[-1] or left_column[-1]. Pixel size is determined by
293 // bitdepth. |size| is the number of pixels to be filtered. |strength| is the
294 // filter strength. Section 7.11.2.12 in the spec.
295 using IntraEdgeFilterFunc = void (*)(void* buffer, int size, int strength);
296 
297 // Intra edge upsampler function signature. |buffer| is a pointer to the top_row
298 // or left_column that needs to be upsampled. Pixel size is determined by
299 // bitdepth. |size| is the number of pixels to be upsampled; valid values are:
300 // 4, 8, 12, 16. This function needs access to negative indices -1 and -2 of
301 // the |buffer|. Section 7.11.2.11 in the spec.
302 using IntraEdgeUpsamplerFunc = void (*)(void* buffer, int size);
303 
304 //------------------------------------------------------------------------------
305 // Inverse transform add function signature.
306 //
307 // Steps 2 and 3 of section 7.12.3 (contains the implementation of section
308 // 7.13.3).
309 // Apply the inverse transforms and add the residual to the destination frame
310 // for the transform type and block size |tx_size| starting at position
311 // |start_x| and |start_y|. |dst_frame| is a pointer to an Array2D of Pixel
312 // values. |adjusted_tx_height| is the number of rows to process based on the
313 // non-zero coefficient count in the block. It will be 1 (non-zero coefficient
314 // count == 1), 4 or a multiple of 8 up to 32 or the original transform height,
315 // whichever is less. |src_buffer| is a pointer to an Array2D of Residual
316 // values. On input |src_buffer| contains the dequantized values, on output it
317 // contains the residual.
318 // The pointer arguments do not alias one another.
319 using InverseTransformAddFunc = void (*)(TransformType tx_type,
320                                          TransformSize tx_size,
321                                          int adjusted_tx_height,
322                                          void* src_buffer, int start_x,
323                                          int start_y, void* dst_frame);
324 // The final dimension holds row and column transforms indexed with kRow and
325 // kColumn.
326 using InverseTransformAddFuncs =
327     InverseTransformAddFunc[kNumTransform1ds][kNumTransform1dSizes][2];
328 
329 //------------------------------------------------------------------------------
330 // Post processing.
331 
332 // Loop filter function signature. Section 7.14.
333 // |dst| is an unaligned pointer to the output block. Pixel size is determined
334 // by bitdepth with |stride| given in bytes.
335 // <threshold param> <spec name> <range>
336 // |outer_thresh|    blimit      [7, 193]
337 // |inner_thresh|    limit       [1, 63]
338 // |hev_thresh|      thresh      [0, 63]
339 // These are scaled by the implementation by 'bitdepth - 8' to produce
340 // the spec variables blimitBd, limitBd and threshBd.
341 // Note these functions are not called when the loop filter level is 0.
342 using LoopFilterFunc = void (*)(void* dst, ptrdiff_t stride, int outer_thresh,
343                                 int inner_thresh, int hev_thresh);
344 using LoopFilterFuncs =
345     LoopFilterFunc[kNumLoopFilterSizes][kNumLoopFilterTypes];
346 
347 // Cdef direction function signature. Section 7.15.2.
348 // |src| is a pointer to the source block. Pixel size is determined by bitdepth
349 // with |stride| given in bytes. |direction| and |variance| are output
350 // parameters and must not be nullptr.
351 // The pointer arguments do not alias one another.
352 using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride,
353                                    uint8_t* direction, int* variance);
354 
355 // Cdef filtering function signature. Section 7.15.3.
356 // |source| is a pointer to the input block padded with kCdefLargeValue if at a
357 // frame border. |source_stride| is given in units of uint16_t.
358 // |block_width|, |block_height| are the width/height of the input block.
359 // |primary_strength|, |secondary_strength|, and |damping| are Cdef filtering
360 // parameters.
361 // |direction| is the filtering direction.
362 // |dest| is the output buffer. |dest_stride| is given in bytes.
363 // The pointer arguments do not alias one another.
364 using CdefFilteringFunc = void (*)(const uint16_t* source,
365                                    ptrdiff_t source_stride, int block_height,
366                                    int primary_strength, int secondary_strength,
367                                    int damping, int direction, void* dest,
368                                    ptrdiff_t dest_stride);
369 
370 // The first index is block width: [0]: 4, [1]: 8. The second is based on
371 // non-zero strengths: [0]: |primary_strength| and |secondary_strength|, [1]:
372 // |primary_strength| only, [2]: |secondary_strength| only.
373 using CdefFilteringFuncs = CdefFilteringFunc[2][3];
374 
375 // Upscaling coefficients function signature. Section 7.16.
376 // This is an auxiliary function for SIMD optimizations and has no corresponding
377 // C function. Different SIMD versions may have different outputs. So it must
378 // pair with the corresponding version of SuperResFunc.
379 // |upscaled_width| is the width of the output frame.
380 // |step| is the number of subpixels to move the kernel for the next destination
381 // pixel.
382 // |initial_subpixel_x| is a base offset from which |step| increments.
383 // |coefficients| is the upscale filter used by each pixel in a row.
384 using SuperResCoefficientsFunc = void (*)(int upscaled_width,
385                                           int initial_subpixel_x, int step,
386                                           void* coefficients);
387 
388 // Upscaling process function signature. Section 7.16.
389 // |coefficients| is the upscale filter used by each pixel in a row. It is not
390 // used by the C function.
391 // |source| is the input frame buffer. It will be line extended.
392 // |source_stride| is given in pixels.
393 // |dest| is the output buffer.
394 // |dest_stride| is given in pixels.
395 // |height| is the height of the block to be processed.
396 // |downscaled_width| is the width of the input frame.
397 // |upscaled_width| is the width of the output frame.
398 // |step| is the number of subpixels to move the kernel for the next destination
399 // pixel.
400 // |initial_subpixel_x| is a base offset from which |step| increments.
401 // The pointer arguments do not alias one another.
402 using SuperResFunc = void (*)(const void* coefficients, void* source,
403                               ptrdiff_t source_stride, int height,
404                               int downscaled_width, int upscaled_width,
405                               int initial_subpixel_x, int step, void* dest,
406                               ptrdiff_t dest_stride);
407 
408 // Loop restoration function signature. Sections 7.16, 7.17.
409 // |restoration_info| contains loop restoration information, such as filter
410 // type, strength.
411 // |source| is the input frame buffer, which is deblocked and cdef filtered.
412 // |top_border| and |bottom_border| are the top and bottom borders.
413 // |dest| is the output.
414 // |stride| is given in pixels, and shared by |source| and |dest|.
415 // |top_border_stride| and |bottom_border_stride| are given in pixels.
416 // |restoration_buffer| contains buffers required for self guided filter and
417 // wiener filter. They must be initialized before calling.
418 // The pointer arguments do not alias one another.
419 using LoopRestorationFunc = void (*)(
420     const RestorationUnitInfo& restoration_info, const void* source,
421     ptrdiff_t stride, const void* top_border, ptrdiff_t top_border_stride,
422     const void* bottom_border, ptrdiff_t bottom_border_stride, int width,
423     int height, RestorationBuffer* restoration_buffer, void* dest);
424 
425 // Index 0 is Wiener Filter.
426 // Index 1 is Self Guided Restoration Filter.
427 // This can be accessed as LoopRestorationType - 2.
428 using LoopRestorationFuncs = LoopRestorationFunc[2];
429 
430 // Convolve function signature. Section 7.11.3.4.
431 // This function applies a horizontal filter followed by a vertical filter.
432 // |reference| is the input block (reference frame buffer). |reference_stride|
433 // is the corresponding frame stride.
434 // |vertical_filter_index|/|horizontal_filter_index| is the index to
435 // retrieve the type of filter to be applied for vertical/horizontal direction
436 // from the filter lookup table 'kSubPixelFilters'.
437 // |horizontal_filter_id| and |vertical_filter_id| are the filter ids.
438 // |width| and |height| are width and height of the block to be filtered.
439 // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
440 // x/y direction.
441 // |prediction| is the output block (output frame buffer).
442 // Rounding precision is derived from the function being called. For horizontal
443 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
444 // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
445 // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
446 // be used.
447 // The pointer arguments do not alias one another.
448 using ConvolveFunc = void (*)(const void* reference, ptrdiff_t reference_stride,
449                               int horizontal_filter_index,
450                               int vertical_filter_index,
451                               int horizontal_filter_id, int vertical_filter_id,
452                               int width, int height, void* prediction,
453                               ptrdiff_t pred_stride);
454 
455 // Convolve functions signature. Each points to one convolve function with
456 // a specific setting:
457 // ConvolveFunc[is_intra_block_copy][is_compound][has_vertical_filter]
458 // [has_horizontal_filter].
459 // If is_compound is false, the prediction is clipped to Pixel.
460 // If is_compound is true, the range of prediction is:
461 //   8bpp:  [-5132,  9212] (int16_t)
462 //   10bpp: [ 3988, 61532] (uint16_t)
463 //   12bpp: [ 3974, 61559] (uint16_t)
464 // See src/dsp/convolve.cc
465 using ConvolveFuncs = ConvolveFunc[2][2][2][2];
466 
467 // Convolve + scale function signature. Section 7.11.3.4.
468 // This function applies a horizontal filter followed by a vertical filter.
469 // |reference| is the input block (reference frame buffer). |reference_stride|
470 // is the corresponding frame stride.
471 // |vertical_filter_index|/|horizontal_filter_index| is the index to
472 // retrieve the type of filter to be applied for vertical/horizontal direction
473 // from the filter lookup table 'kSubPixelFilters'.
474 // |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024.
475 // |step_x| and |step_y| are step sizes in units of 1/1024 of a pixel.
476 // |width| and |height| are width and height of the block to be filtered.
477 // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in
478 // x/y direction.
479 // |prediction| is the output block (output frame buffer).
480 // Rounding precision is derived from the function being called. For horizontal
481 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
482 // used. For compound vertical filtering kInterRoundBitsCompoundVertical will be
483 // used. Otherwise kInterRoundBitsVertical & kInterRoundBitsVertical12bpp will
484 // be used.
485 // The pointer arguments do not alias one another.
486 using ConvolveScaleFunc = void (*)(const void* reference,
487                                    ptrdiff_t reference_stride,
488                                    int horizontal_filter_index,
489                                    int vertical_filter_index, int subpixel_x,
490                                    int subpixel_y, int step_x, int step_y,
491                                    int width, int height, void* prediction,
492                                    ptrdiff_t pred_stride);
493 
494 // Convolve functions signature for scaling version.
495 // 0: single predictor. 1: compound predictor.
496 using ConvolveScaleFuncs = ConvolveScaleFunc[2];
497 
498 // Weight mask function signature. Section 7.11.3.12.
499 // |prediction_0| is the first input block.
500 // |prediction_1| is the second input block. Both blocks are int16_t* when
501 // bitdepth == 8 and uint16_t* otherwise.
502 // |width| and |height| are the prediction width and height.
503 // The stride for the input buffers is equal to |width|.
504 // The valid range of block size is [8x8, 128x128] for the luma plane.
505 // |mask| is the output buffer. |mask_stride| is the output buffer stride.
506 // The pointer arguments do not alias one another.
507 using WeightMaskFunc = void (*)(const void* prediction_0,
508                                 const void* prediction_1, uint8_t* mask,
509                                 ptrdiff_t mask_stride);
510 
511 // Weight mask functions signature. The dimensions (in order) are:
512 //   * Width index (4 => 0, 8 => 1, 16 => 2 and so on).
513 //   * Height index (4 => 0, 8 => 1, 16 => 2 and so on).
514 //   * mask_is_inverse.
515 using WeightMaskFuncs = WeightMaskFunc[6][6][2];
516 
517 // Average blending function signature.
518 // Two predictors are averaged to generate the output.
519 // Input predictor values are int16_t. Output type is uint8_t, with actual
520 // range of Pixel value.
521 // Average blending is in the bottom of Section 7.11.3.1 (COMPOUND_AVERAGE).
522 // |prediction_0| is the first input block.
523 // |prediction_1| is the second input block. Both blocks are int16_t* when
524 // bitdepth == 8 and uint16_t* otherwise.
525 // |width| and |height| are the same for the first and second input blocks.
526 // The stride for the input buffers is equal to |width|.
527 // The valid range of block size is [8x8, 128x128] for the luma plane.
528 // |dest| is the output buffer. |dest_stride| is the output buffer stride.
529 // The pointer arguments do not alias one another.
530 using AverageBlendFunc = void (*)(const void* prediction_0,
531                                   const void* prediction_1, int width,
532                                   int height, void* dest,
533                                   ptrdiff_t dest_stride);
534 
535 // Distance weighted blending function signature.
536 // Weights are generated in Section 7.11.3.15.
537 // Weighted blending is in the bottom of Section 7.11.3.1 (COMPOUND_DISTANCE).
538 // This function takes two blocks (inter frame prediction) and produces a
539 // weighted output.
540 // |prediction_0| is the first input block.
541 // |prediction_1| is the second input block. Both blocks are int16_t* when
542 // bitdepth == 8 and uint16_t* otherwise.
543 // |weight_0| is the weight for the first block. It is derived from the relative
544 // distance of the first reference frame and the current frame.
545 // |weight_1| is the weight for the second block. It is derived from the
546 // relative distance of the second reference frame and the current frame.
547 // |width| and |height| are the same for the first and second input blocks.
548 // The stride for the input buffers is equal to |width|.
549 // The valid range of block size is [8x8, 128x128] for the luma plane.
550 // |dest| is the output buffer. |dest_stride| is the output buffer stride.
551 // The pointer arguments do not alias one another.
552 using DistanceWeightedBlendFunc = void (*)(const void* prediction_0,
553                                            const void* prediction_1,
554                                            uint8_t weight_0, uint8_t weight_1,
555                                            int width, int height, void* dest,
556                                            ptrdiff_t dest_stride);
557 
558 // Mask blending function signature. Section 7.11.3.14.
559 // This function takes two blocks and produces a blended output stored into the
560 // output block |dest|. The blending is a weighted average process, controlled
561 // by values of the mask.
562 // |prediction_0| is the first input block. When prediction mode is inter_intra
563 // (or wedge_inter_intra), this refers to the inter frame prediction. It is
564 // int16_t* when bitdepth == 8 and uint16_t* otherwise.
565 // The stride for |prediction_0| is equal to |width|.
566 // |prediction_1| is the second input block. When prediction mode is inter_intra
567 // (or wedge_inter_intra), this refers to the intra frame prediction and uses
568 // Pixel values. It is only used for intra frame prediction when bitdepth >= 10.
569 // It is int16_t* when bitdepth == 8 and uint16_t* otherwise.
570 // |prediction_stride_1| is the stride, given in units of [u]int16_t. When
571 // |is_inter_intra| is false (compound prediction) then |prediction_stride_1| is
572 // equal to |width|.
573 // |mask| is an integer array, whose value indicates the weight of the blending.
574 // |mask_stride| is corresponding stride.
575 // |width|, |height| are the same for both input blocks.
576 // If it's inter_intra (or wedge_inter_intra), the valid range of block size is
577 // [8x8, 32x32], no 4:1/1:4 blocks (Section 5.11.28). Otherwise (including
578 // difference weighted prediction and compound average prediction), the valid
579 // range is [8x8, 128x128].
580 // If there's subsampling, the corresponding width and height are halved for
581 // chroma planes.
582 // |is_inter_intra| stands for the prediction mode. If it is true, one of the
583 // prediction blocks is from intra prediction of current frame. Otherwise, two
584 // prediction blocks are both inter frame predictions.
585 // |is_wedge_inter_intra| indicates if the mask is for the wedge prediction.
586 // |dest| is the output block.
587 // |dest_stride| is the corresponding stride for dest.
588 // The pointer arguments do not alias one another.
589 using MaskBlendFunc = void (*)(const void* prediction_0,
590                                const void* prediction_1,
591                                ptrdiff_t prediction_stride_1,
592                                const uint8_t* mask, ptrdiff_t mask_stride,
593                                int width, int height, void* dest,
594                                ptrdiff_t dest_stride);
595 
596 // Mask blending functions signature. Each points to one function with
597 // a specific setting:
598 // MaskBlendFunc[subsampling_x + subsampling_y][is_inter_intra].
599 using MaskBlendFuncs = MaskBlendFunc[3][2];
600 
601 // This function is similar to the MaskBlendFunc. It is only used when
602 // |is_inter_intra| is true and |bitdepth| == 8.
603 // |prediction_[01]| are Pixel values (uint8_t).
604 // |prediction_1| is also the output buffer.
605 // The pointer arguments do not alias one another.
606 using InterIntraMaskBlendFunc8bpp = void (*)(const uint8_t* prediction_0,
607                                              uint8_t* prediction_1,
608                                              ptrdiff_t prediction_stride_1,
609                                              const uint8_t* mask,
610                                              ptrdiff_t mask_stride, int width,
611                                              int height);
612 
613 // InterIntra8bpp mask blending functions signature. When is_wedge_inter_intra
614 // is false, the function at index 0 must be used. Otherwise, the function at
615 // index subsampling_x + subsampling_y must be used.
616 using InterIntraMaskBlendFuncs8bpp = InterIntraMaskBlendFunc8bpp[3];
617 
618 // Obmc (overlapped block motion compensation) blending function signature.
619 // Section 7.11.3.10.
620 // This function takes two blocks and produces a blended output stored into the
621 // first input block. The blending is a weighted average process, controlled by
622 // values of the mask.
623 // Obmc is not a compound mode. It is different from other compound blending,
624 // in terms of precision. The current block is computed using convolution with
625 // clipping to the range of pixel values. Its above and left blocks are also
626 // clipped. Therefore obmc blending process doesn't need to clip the output.
627 // |prediction| is the first input block, which will be overwritten.
628 // |prediction_stride| is the stride, given in bytes.
629 // |width|, |height| are the same for both input blocks. The range is [4x2,
630 // 32x32] for kObmcDirectionVertical and [2x4, 32x32] for
631 // kObmcDirectionHorizontal, see Section 7.11.3.9.
632 // |obmc_prediction| is the second input block.
633 // |obmc_prediction_stride| is its stride, given in bytes.
634 // The pointer arguments do not alias one another.
635 using ObmcBlendFunc = void (*)(void* prediction, ptrdiff_t prediction_stride,
636                                int width, int height,
637                                const void* obmc_prediction,
638                                ptrdiff_t obmc_prediction_stride);
639 using ObmcBlendFuncs = ObmcBlendFunc[kNumObmcDirections];
640 
641 // Warp function signature. Section 7.11.3.5.
642 // This function applies warp filtering for each 8x8 block inside the current
643 // coding block. The filtering process is similar to 2d convolve filtering.
644 // The horizontal filter is applied followed by the vertical filter.
645 // The function has to calculate corresponding pixel positions before and
646 // after warping.
647 // |source| is the input reference frame buffer.
648 // |source_stride|, |source_width|, |source_height| are corresponding frame
649 // stride, width, and height. |source_stride| is given in bytes.
650 // |warp_params| is the matrix of warp motion: warp_params[i] = mN.
651 //         [x'     (m2 m3 m0   [x
652 //     z .  y'  =   m4 m5 m1 *  y
653 //          1]      m6 m7 1)    1]
654 // |subsampling_x/y| is the current frame's plane subsampling factor.
655 // |block_start_x| and |block_start_y| are the starting position the current
656 // coding block.
657 // |block_width| and |block_height| are width and height of the current coding
658 // block. |block_width| and |block_height| are at least 8.
659 // |alpha|, |beta|, |gamma|, |delta| are valid warp parameters. See the
660 // comments in the definition of struct GlobalMotion for the range of their
661 // values.
662 // |dest| is the output buffer of type Pixel. The output values are clipped to
663 // Pixel values.
664 // |dest_stride| is the stride, in units of bytes.
665 // Rounding precision is derived from the function being called. For horizontal
666 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
667 // used. For vertical filtering kInterRoundBitsVertical &
668 // kInterRoundBitsVertical12bpp will be used.
669 //
670 // NOTE: WarpFunc assumes the source frame has left, right, top, and bottom
671 // borders that extend the frame boundary pixels.
672 // * The left and right borders must be at least 13 pixels wide. In addition,
673 //   Warp_NEON() may read up to 14 bytes after a row in the |source| buffer.
674 //   Therefore, there must be at least one extra padding byte after the right
675 //   border of the last row in the source buffer.
676 // * The top and bottom borders must be at least 13 pixels high.
677 // The pointer arguments do not alias one another.
678 using WarpFunc = void (*)(const void* source, ptrdiff_t source_stride,
679                           int source_width, int source_height,
680                           const int* warp_params, int subsampling_x,
681                           int subsampling_y, int block_start_x,
682                           int block_start_y, int block_width, int block_height,
683                           int16_t alpha, int16_t beta, int16_t gamma,
684                           int16_t delta, void* dest, ptrdiff_t dest_stride);
685 
686 // Warp for compound predictions. Section 7.11.3.5.
687 // Similar to WarpFunc, but |dest| is a uint16_t predictor buffer,
688 // |dest_stride| is given in units of uint16_t and |inter_round_bits_vertical|
689 // is always 7 (kCompoundInterRoundBitsVertical).
690 // Rounding precision is derived from the function being called. For horizontal
691 // filtering kInterRoundBitsHorizontal & kInterRoundBitsHorizontal12bpp will be
692 // used. For vertical filtering kInterRoundBitsCompondVertical will be used.
693 using WarpCompoundFunc = WarpFunc;
694 
695 constexpr int kNumAutoRegressionLags = 4;
696 // Applies an auto-regressive filter to the white noise in |luma_grain_buffer|.
697 // Section 7.18.3.3, second code block
698 // |params| are parameters read from frame header, mainly providing
699 // auto_regression_coeff_y for the filter and auto_regression_shift to right
700 // shift the filter sum by. Note: This method assumes
701 // params.auto_regression_coeff_lag is not 0. Do not call this method if
702 // params.auto_regression_coeff_lag is 0.
703 using LumaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
704                                         void* luma_grain_buffer);
705 // Function index is auto_regression_coeff_lag - 1.
706 using LumaAutoRegressionFuncs =
707     LumaAutoRegressionFunc[kNumAutoRegressionLags - 1];
708 
709 // Applies an auto-regressive filter to the white noise in u_grain and v_grain.
710 // Section 7.18.3.3, third code block
711 // The |luma_grain_buffer| provides samples that are added to the autoregressive
712 // sum when num_y_points > 0.
713 // |u_grain_buffer| and |v_grain_buffer| point to the buffers of chroma noise
714 // that were generated from the stored Gaussian sequence, and are overwritten
715 // with the results of the autoregressive filter. |params| are parameters read
716 // from frame header, mainly providing auto_regression_coeff_u and
717 // auto_regression_coeff_v for each chroma plane's filter, and
718 // auto_regression_shift to right shift the filter sums by.
719 // The pointer arguments do not alias one another.
720 using ChromaAutoRegressionFunc = void (*)(const FilmGrainParams& params,
721                                           const void* luma_grain_buffer,
722                                           int subsampling_x, int subsampling_y,
723                                           void* u_grain_buffer,
724                                           void* v_grain_buffer);
725 using ChromaAutoRegressionFuncs =
726     ChromaAutoRegressionFunc[/*use_luma*/ 2][kNumAutoRegressionLags];
727 
728 // Build an image-wide "stripe" of grain noise for every 32 rows in the image.
729 // Section 7.18.3.5, first code block.
730 // Each 32x32 luma block is copied at a random offset specified via
731 // |grain_seed| from the grain template produced by autoregression, and the same
732 // is done for chroma grains, subject to subsampling.
733 // |width| and |height| are the dimensions of the overall image.
734 // |noise_stripes_buffer| points to an Array2DView with one row for each stripe.
735 // Because this function treats all planes identically and independently, it is
736 // simplified to take one grain buffer at a time. This means duplicating some
737 // random number generations, but that work can be reduced in other ways.
738 // The pointer arguments do not alias one another.
739 using ConstructNoiseStripesFunc = void (*)(const void* grain_buffer,
740                                            int grain_seed, int width,
741                                            int height, int subsampling_x,
742                                            int subsampling_y,
743                                            void* noise_stripes_buffer);
744 using ConstructNoiseStripesFuncs =
745     ConstructNoiseStripesFunc[/*overlap_flag*/ 2];
746 
747 // Compute the one or two overlap rows for each stripe copied to the noise
748 // image.
749 // Section 7.18.3.5, second code block. |width| and |height| are the
750 // dimensions of the overall image. |noise_stripes_buffer| points to an
751 // Array2DView with one row for each stripe. |noise_image_buffer| points to an
752 // Array2D containing the allocated plane for this frame. Because this function
753 // treats all planes identically and independently, it is simplified to take one
754 // grain buffer at a time.
755 // The pointer arguments do not alias one another.
756 using ConstructNoiseImageOverlapFunc =
757     void (*)(const void* noise_stripes_buffer, int width, int height,
758              int subsampling_x, int subsampling_y, void* noise_image_buffer);
759 
760 // Populate a scaling lookup table with interpolated values of a piecewise
761 // linear function where values in |point_value| are mapped to the values in
762 // |point_scaling|.
763 // |num_points| can be between 0 and 15. When 0, the lookup table is set to
764 // zero.
765 // |point_value| and |point_scaling| have |num_points| valid elements.
766 // The pointer arguments do not alias one another.
767 using InitializeScalingLutFunc = void (*)(int num_points,
768                                           const uint8_t point_value[],
769                                           const uint8_t point_scaling[],
770                                           int16_t* scaling_lut,
771                                           const int scaling_lut_length);
772 
773 // Blend noise with image. Section 7.18.3.5, third code block.
774 // |width| is the width of each row, while |height| is how many rows to compute.
775 // |start_height| is an offset for the noise image, to support multithreading.
776 // |min_value|, |max_luma|, and |max_chroma| are computed by the caller of these
777 // functions, according to the code in the spec.
778 // |source_plane_y| and |source_plane_uv| are the plane buffers of the decoded
779 // frame. They are blended with the film grain noise and written to
780 // |dest_plane_y| and |dest_plane_uv| as final output for display.
781 // source_plane_* and dest_plane_* may point to the same buffer, in which case
782 // the film grain noise is added in place.
783 // |scaling_lut_y|  and |scaling_lut| represent a piecewise linear mapping from
784 // the frame's raw pixel value, to a scaling factor for the noise sample.
785 // |scaling_shift| is applied as a right shift after scaling, so that scaling
786 // down is possible. It is found in FilmGrainParams, but supplied directly to
787 // BlendNoiseWithImageLumaFunc because it's the only member used.
788 // The dest plane may point to the source plane, depending on the value of
789 // frame_header.show_existing_frame. |noise_image_ptr| and scaling_lut.* do not
790 // alias other arguments.
791 using BlendNoiseWithImageLumaFunc = void (*)(
792     const void* noise_image_ptr, int min_value, int max_value,
793     int scaling_shift, int width, int height, int start_height,
794     const int16_t* scaling_lut_y, const void* source_plane_y,
795     ptrdiff_t source_stride_y, void* dest_plane_y, ptrdiff_t dest_stride_y);
796 
797 using BlendNoiseWithImageChromaFunc = void (*)(
798     Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
799     int min_value, int max_value, int width, int height, int start_height,
800     int subsampling_x, int subsampling_y, const int16_t* scaling_lut,
801     const void* source_plane_y, ptrdiff_t source_stride_y,
802     const void* source_plane_uv, ptrdiff_t source_stride_uv,
803     void* dest_plane_uv, ptrdiff_t dest_stride_uv);
804 
805 using BlendNoiseWithImageChromaFuncs =
806     BlendNoiseWithImageChromaFunc[/*chroma_scaling_from_luma*/ 2];
807 
808 //------------------------------------------------------------------------------
809 
810 struct FilmGrainFuncs {
811   LumaAutoRegressionFuncs luma_auto_regression;
812   ChromaAutoRegressionFuncs chroma_auto_regression;
813   ConstructNoiseStripesFuncs construct_noise_stripes;
814   ConstructNoiseImageOverlapFunc construct_noise_image_overlap;
815   InitializeScalingLutFunc initialize_scaling_lut;
816   BlendNoiseWithImageLumaFunc blend_noise_luma;
817   BlendNoiseWithImageChromaFuncs blend_noise_chroma;
818 };
819 
820 // Motion field projection function signature. Section 7.9.
821 // |reference_info| provides reference information for motion field projection.
822 // |reference_to_current_with_sign| is the precalculated reference frame id
823 // distance from current frame.
824 // |dst_sign| is -1 for LAST_FRAME and LAST2_FRAME, or 0 (1 in spec) for others.
825 // |y8_start| and |y8_end| are the start and end 8x8 rows of the current tile.
826 // |x8_start| and |x8_end| are the start and end 8x8 columns of the current
827 // tile.
828 // |motion_field| is the output which saves the projected motion field
829 // information.
830 // Note: Only the entry from the 8-bit Dsp table is used as this function is
831 // bitdepth agnostic.
832 using MotionFieldProjectionKernelFunc = void (*)(
833     const ReferenceInfo& reference_info, int reference_to_current_with_sign,
834     int dst_sign, int y8_start, int y8_end, int x8_start, int x8_end,
835     TemporalMotionField* motion_field);
836 
837 // Compound temporal motion vector projection function signature.
838 // Section 7.9.3 and 7.10.2.10.
839 // |temporal_mvs| is the aligned set of temporal reference motion vectors.
840 // |temporal_reference_offsets| specifies the number of frames covered by the
841 // original motion vector.
842 // |reference_offsets| specifies the number of frames to be covered by the
843 // projected motion vector.
844 // |count| is the number of the temporal motion vectors.
845 // |candidate_mvs| is the aligned set of projected motion vectors.
846 // The pointer arguments do not alias one another.
847 // Note: Only the entry from the 8-bit Dsp table is used as this function is
848 // bitdepth agnostic.
849 using MvProjectionCompoundFunc = void (*)(
850     const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
851     const int reference_offsets[2], int count,
852     CompoundMotionVector* candidate_mvs);
853 
854 // Single temporal motion vector projection function signature.
855 // Section 7.9.3 and 7.10.2.10.
856 // |temporal_mvs| is the aligned set of temporal reference motion vectors.
857 // |temporal_reference_offsets| specifies the number of frames covered by the
858 // original motion vector.
859 // |reference_offset| specifies the number of frames to be covered by the
860 // projected motion vector.
861 // |count| is the number of the temporal motion vectors.
862 // |candidate_mvs| is the aligned set of projected motion vectors.
863 // The pointer arguments do not alias one another.
864 // Note: Only the entry from the 8-bit Dsp table is used as this function is
865 // bitdepth agnostic.
866 using MvProjectionSingleFunc = void (*)(
867     const MotionVector* temporal_mvs, const int8_t* temporal_reference_offsets,
868     int reference_offset, int count, MotionVector* candidate_mvs);
869 
870 struct Dsp {
871   AverageBlendFunc average_blend;
872   CdefDirectionFunc cdef_direction;
873   CdefFilteringFuncs cdef_filters;
874   CflIntraPredictorFuncs cfl_intra_predictors;
875   CflSubsamplerFuncs cfl_subsamplers;
876   ConvolveFuncs convolve;
877   ConvolveScaleFuncs convolve_scale;
878   DirectionalIntraPredictorZone1Func directional_intra_predictor_zone1;
879   DirectionalIntraPredictorZone2Func directional_intra_predictor_zone2;
880   DirectionalIntraPredictorZone3Func directional_intra_predictor_zone3;
881   DistanceWeightedBlendFunc distance_weighted_blend;
882   FilmGrainFuncs film_grain;
883   FilterIntraPredictorFunc filter_intra_predictor;
884   InterIntraMaskBlendFuncs8bpp inter_intra_mask_blend_8bpp;
885   IntraEdgeFilterFunc intra_edge_filter;
886   IntraEdgeUpsamplerFunc intra_edge_upsampler;
887   IntraPredictorFuncs intra_predictors;
888   InverseTransformAddFuncs inverse_transforms;
889   LoopFilterFuncs loop_filters;
890   LoopRestorationFuncs loop_restorations;
891   MaskBlendFuncs mask_blend;
892   MotionFieldProjectionKernelFunc motion_field_projection_kernel;
893   MvProjectionCompoundFunc mv_projection_compound[3];
894   MvProjectionSingleFunc mv_projection_single[3];
895   ObmcBlendFuncs obmc_blend;
896   SuperResCoefficientsFunc super_res_coefficients;
897   SuperResFunc super_res;
898   WarpCompoundFunc warp_compound;
899   WarpFunc warp;
900   WeightMaskFuncs weight_mask;
901 };
902 
903 // Initializes function pointers based on build config and runtime
904 // environment. Must be called once before first use. This function is
905 // thread-safe.
906 void DspInit();
907 
908 // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
909 // exist.
910 const Dsp* GetDspTable(int bitdepth);
911 
912 }  // namespace dsp
913 
914 namespace dsp_internal {
915 
916 // Visual Studio builds don't have a way to detect SSE4_1. Only exclude the C
917 // functions if /arch:AVX2 is used across all sources.
918 #if !LIBGAV1_TARGETING_AVX2 && \
919     (defined(_MSC_VER) || (defined(_M_IX86) || defined(_M_X64)))
920 #undef LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
921 #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 1
922 #endif
923 
924 // Returns true if a more highly optimized version of |func| is not defined for
925 // the associated bitdepth or if it is forcibly enabled with
926 // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS. The define checked for |func| corresponds
927 // to the LIBGAV1_Dsp<bitdepth>bpp_|func| define in the header file associated
928 // with the module.
929 // |func| is one of:
930 //   - FunctionName, e.g., SelfGuidedFilter.
931 //   - [sub-table-index1][...-indexN] e.g.,
932 //     TransformSize4x4_IntraPredictorDc. The indices correspond to enum values
933 //     used as lookups with leading 'k' removed.
934 //
935 //  NEON support is the only extension available for ARM and it is always
936 //  required. Because of this restriction DSP_ENABLED_8BPP_NEON(func) is always
937 //  true and can be omitted.
938 #define DSP_ENABLED_8BPP_AVX2(func)    \
939   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
940    LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_AVX2)
941 #define DSP_ENABLED_10BPP_AVX2(func)   \
942   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
943    LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_AVX2)
944 #define DSP_ENABLED_8BPP_SSE4_1(func)  \
945   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
946    LIBGAV1_Dsp8bpp_##func == LIBGAV1_CPU_SSE4_1)
947 #define DSP_ENABLED_10BPP_SSE4_1(func) \
948   (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \
949    LIBGAV1_Dsp10bpp_##func == LIBGAV1_CPU_SSE4_1)
950 
951 // Initializes C-only function pointers. Note some entries may be set to
952 // nullptr if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS is not defined. This is meant
953 // for use in tests only, it is not thread-safe.
954 void DspInit_C();
955 
956 // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't
957 // exist. This version is meant for use by test or dsp/*Init() functions only.
958 dsp::Dsp* GetWritableDspTable(int bitdepth);
959 
960 }  // namespace dsp_internal
961 }  // namespace libgav1
962 
963 #endif  // LIBGAV1_SRC_DSP_DSP_H_
964