• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/convolve.h"
16 
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22 
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27 
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31 
32 constexpr int kHorizontalOffset = 3;
33 constexpr int kVerticalOffset = 3;
34 
35 // Compound prediction output ranges from ConvolveTest.ShowRange.
36 // In some cases, the horizontal or vertical filter will be omitted. This table
37 // shows the general case, where the downscaled horizontal output is input to
38 // the vertical filter via the |intermediate_result| array. The final output is
39 // either Pixel or compound values, depending on the |is_compound| variable.
40 // Bitdepth:  8 Input range:            [       0,      255]
41 //   Horizontal upscaled range:         [   -7140,    23460]
42 //   Horizontal downscaled range:       [   -1785,     5865]
43 //   Vertical upscaled range:           [ -328440,   589560]
44 //   Pixel output range:                [       0,      255]
45 //   Compound output range:             [   -5132,     9212]
46 //
47 // Bitdepth: 10 Input range:            [       0,     1023]
48 //   Horizontal upscaled range:         [  -28644,    94116]
49 //   Horizontal downscaled range:       [   -7161,    23529]
50 //   Vertical upscaled range:           [-1317624,  2365176]
51 //   Pixel output range:                [       0,     1023]
52 //   Compound output range:             [    3988,    61532]
53 //
54 // Bitdepth: 12 Input range:            [       0,     4095]
55 //   Horizontal upscaled range:         [ -114660,   376740]
56 //   Horizontal downscaled range:       [   -7166,    23546]
57 //   Vertical upscaled range:           [-1318560,  2366880]
58 //   Pixel output range:                [       0,     4095]
59 //   Compound output range:             [    3974,    61559]
60 
61 template <int bitdepth, typename Pixel>
ConvolveScale2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)62 void ConvolveScale2D_C(const void* LIBGAV1_RESTRICT const reference,
63                        const ptrdiff_t reference_stride,
64                        const int horizontal_filter_index,
65                        const int vertical_filter_index, const int subpixel_x,
66                        const int subpixel_y, const int step_x, const int step_y,
67                        const int width, const int height,
68                        void* LIBGAV1_RESTRICT prediction,
69                        const ptrdiff_t pred_stride) {
70   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
71                                            ? kInterRoundBitsHorizontal12bpp
72                                            : kInterRoundBitsHorizontal;
73   constexpr int kRoundBitsVertical =
74       (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
75   const int intermediate_height =
76       (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
77        kScaleSubPixelBits) +
78       kSubPixelTaps;
79   // The output of the horizontal filter, i.e. the intermediate_result, is
80   // guaranteed to fit in int16_t.
81   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
82                               (2 * kMaxSuperBlockSizeInPixels + 8)];
83   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
84   const int max_pixel_value = (1 << bitdepth) - 1;
85 
86   // Horizontal filter.
87   // Filter types used for width <= 4 are different from those for width > 4.
88   // When width > 4, the valid filter index range is always [0, 3].
89   // When width <= 4, the valid filter index range is always [4, 5].
90   // Similarly for height.
91   int filter_index = GetFilterIndex(horizontal_filter_index, width);
92   int16_t* intermediate = intermediate_result;
93   const auto* src = static_cast<const Pixel*>(reference);
94   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
95   auto* dest = static_cast<Pixel*>(prediction);
96   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
97   const int ref_x = subpixel_x >> kScaleSubPixelBits;
98   // Note: assume the input src is already aligned to the correct start
99   // position.
100   int y = 0;
101   do {
102     int p = subpixel_x;
103     int x = 0;
104     do {
105       int sum = 0;
106       const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
107       const int filter_id = (p >> 6) & kSubPixelMask;
108       for (int k = 0; k < kSubPixelTaps; ++k) {
109         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
110       }
111       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
112       p += step_x;
113     } while (++x < width);
114 
115     src += src_stride;
116     intermediate += intermediate_stride;
117   } while (++y < intermediate_height);
118 
119   // Vertical filter.
120   filter_index = GetFilterIndex(vertical_filter_index, height);
121   intermediate = intermediate_result;
122   int p = subpixel_y & 1023;
123   y = 0;
124   do {
125     const int filter_id = (p >> 6) & kSubPixelMask;
126     int x = 0;
127     do {
128       int sum = 0;
129       for (int k = 0; k < kSubPixelTaps; ++k) {
130         sum +=
131             kHalfSubPixelFilters[filter_index][filter_id][k] *
132             intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
133                          x];
134       }
135       dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
136                       max_pixel_value);
137     } while (++x < width);
138 
139     dest += dest_stride;
140     p += step_y;
141   } while (++y < height);
142 }
143 
144 template <int bitdepth, typename Pixel>
ConvolveCompoundScale2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)145 void ConvolveCompoundScale2D_C(
146     const void* LIBGAV1_RESTRICT const reference,
147     const ptrdiff_t reference_stride, const int horizontal_filter_index,
148     const int vertical_filter_index, const int subpixel_x, const int subpixel_y,
149     const int step_x, const int step_y, const int width, const int height,
150     void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
151   // All compound functions output to the predictor buffer with |pred_stride|
152   // equal to |width|.
153   assert(pred_stride == width);
154   // Compound functions start at 4x4.
155   assert(width >= 4 && height >= 4);
156   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
157                                            ? kInterRoundBitsHorizontal12bpp
158                                            : kInterRoundBitsHorizontal;
159   constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
160   const int intermediate_height =
161       (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
162        kScaleSubPixelBits) +
163       kSubPixelTaps;
164   // The output of the horizontal filter, i.e. the intermediate_result, is
165   // guaranteed to fit in int16_t.
166   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
167                               (2 * kMaxSuperBlockSizeInPixels + 8)];
168   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
169 
170   // Horizontal filter.
171   // Filter types used for width <= 4 are different from those for width > 4.
172   // When width > 4, the valid filter index range is always [0, 3].
173   // When width <= 4, the valid filter index range is always [4, 5].
174   // Similarly for height.
175   int filter_index = GetFilterIndex(horizontal_filter_index, width);
176   int16_t* intermediate = intermediate_result;
177   const auto* src = static_cast<const Pixel*>(reference);
178   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
179   auto* dest = static_cast<uint16_t*>(prediction);
180   const int ref_x = subpixel_x >> kScaleSubPixelBits;
181   // Note: assume the input src is already aligned to the correct start
182   // position.
183   int y = 0;
184   do {
185     int p = subpixel_x;
186     int x = 0;
187     do {
188       int sum = 0;
189       const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
190       const int filter_id = (p >> 6) & kSubPixelMask;
191       for (int k = 0; k < kSubPixelTaps; ++k) {
192         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
193       }
194       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
195       p += step_x;
196     } while (++x < width);
197 
198     src += src_stride;
199     intermediate += intermediate_stride;
200   } while (++y < intermediate_height);
201 
202   // Vertical filter.
203   filter_index = GetFilterIndex(vertical_filter_index, height);
204   intermediate = intermediate_result;
205   int p = subpixel_y & 1023;
206   y = 0;
207   do {
208     const int filter_id = (p >> 6) & kSubPixelMask;
209     int x = 0;
210     do {
211       int sum = 0;
212       for (int k = 0; k < kSubPixelTaps; ++k) {
213         sum +=
214             kHalfSubPixelFilters[filter_index][filter_id][k] *
215             intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
216                          x];
217       }
218       sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
219       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
220       dest[x] = sum;
221     } while (++x < width);
222 
223     dest += pred_stride;
224     p += step_y;
225   } while (++y < height);
226 }
227 
228 template <int bitdepth, typename Pixel>
ConvolveCompound2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)229 void ConvolveCompound2D_C(const void* LIBGAV1_RESTRICT const reference,
230                           const ptrdiff_t reference_stride,
231                           const int horizontal_filter_index,
232                           const int vertical_filter_index,
233                           const int horizontal_filter_id,
234                           const int vertical_filter_id, const int width,
235                           const int height, void* LIBGAV1_RESTRICT prediction,
236                           const ptrdiff_t pred_stride) {
237   // All compound functions output to the predictor buffer with |pred_stride|
238   // equal to |width|.
239   assert(pred_stride == width);
240   // Compound functions start at 4x4.
241   assert(width >= 4 && height >= 4);
242   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
243                                            ? kInterRoundBitsHorizontal12bpp
244                                            : kInterRoundBitsHorizontal;
245   constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
246   const int intermediate_height = height + kSubPixelTaps - 1;
247   // The output of the horizontal filter, i.e. the intermediate_result, is
248   // guaranteed to fit in int16_t.
249   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
250                               (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
251   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
252 
253   // Horizontal filter.
254   // Filter types used for width <= 4 are different from those for width > 4.
255   // When width > 4, the valid filter index range is always [0, 3].
256   // When width <= 4, the valid filter index range is always [4, 5].
257   // Similarly for height.
258   int filter_index = GetFilterIndex(horizontal_filter_index, width);
259   int16_t* intermediate = intermediate_result;
260   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
261   const auto* src = static_cast<const Pixel*>(reference) -
262                     kVerticalOffset * src_stride - kHorizontalOffset;
263   auto* dest = static_cast<uint16_t*>(prediction);
264 
265   // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
266   assert(horizontal_filter_id != 0);
267   int y = 0;
268   do {
269     int x = 0;
270     do {
271       int sum = 0;
272       for (int k = 0; k < kSubPixelTaps; ++k) {
273         sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
274                src[x + k];
275       }
276       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
277     } while (++x < width);
278 
279     src += src_stride;
280     intermediate += intermediate_stride;
281   } while (++y < intermediate_height);
282 
283   // Vertical filter.
284   filter_index = GetFilterIndex(vertical_filter_index, height);
285   intermediate = intermediate_result;
286   // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
287   assert(vertical_filter_id != 0);
288   y = 0;
289   do {
290     int x = 0;
291     do {
292       int sum = 0;
293       for (int k = 0; k < kSubPixelTaps; ++k) {
294         sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
295                intermediate[k * intermediate_stride + x];
296       }
297       sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
298       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
299       dest[x] = sum;
300     } while (++x < width);
301 
302     dest += pred_stride;
303     intermediate += intermediate_stride;
304   } while (++y < height);
305 }
306 
307 // This function is a simplified version of ConvolveCompound2D_C.
308 // It is called when it is single prediction mode, where both horizontal and
309 // vertical filtering are required.
310 // The output is the single prediction of the block, clipped to valid pixel
311 // range.
312 template <int bitdepth, typename Pixel>
Convolve2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)313 void Convolve2D_C(const void* LIBGAV1_RESTRICT const reference,
314                   const ptrdiff_t reference_stride,
315                   const int horizontal_filter_index,
316                   const int vertical_filter_index,
317                   const int horizontal_filter_id, const int vertical_filter_id,
318                   const int width, const int height,
319                   void* LIBGAV1_RESTRICT prediction,
320                   const ptrdiff_t pred_stride) {
321   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
322                                            ? kInterRoundBitsHorizontal12bpp
323                                            : kInterRoundBitsHorizontal;
324   constexpr int kRoundBitsVertical =
325       (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
326   const int intermediate_height = height + kSubPixelTaps - 1;
327   // The output of the horizontal filter, i.e. the intermediate_result, is
328   // guaranteed to fit in int16_t.
329   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
330                               (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
331   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
332   const int max_pixel_value = (1 << bitdepth) - 1;
333 
334   // Horizontal filter.
335   // Filter types used for width <= 4 are different from those for width > 4.
336   // When width > 4, the valid filter index range is always [0, 3].
337   // When width <= 4, the valid filter index range is always [4, 5].
338   // Similarly for height.
339   int filter_index = GetFilterIndex(horizontal_filter_index, width);
340   int16_t* intermediate = intermediate_result;
341   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
342   const auto* src = static_cast<const Pixel*>(reference) -
343                     kVerticalOffset * src_stride - kHorizontalOffset;
344   auto* dest = static_cast<Pixel*>(prediction);
345   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
346   // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
347   assert(horizontal_filter_id != 0);
348   int y = 0;
349   do {
350     int x = 0;
351     do {
352       int sum = 0;
353       for (int k = 0; k < kSubPixelTaps; ++k) {
354         sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
355                src[x + k];
356       }
357       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
358     } while (++x < width);
359 
360     src += src_stride;
361     intermediate += intermediate_stride;
362   } while (++y < intermediate_height);
363 
364   // Vertical filter.
365   filter_index = GetFilterIndex(vertical_filter_index, height);
366   intermediate = intermediate_result;
367   // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
368   assert(vertical_filter_id != 0);
369   y = 0;
370   do {
371     int x = 0;
372     do {
373       int sum = 0;
374       for (int k = 0; k < kSubPixelTaps; ++k) {
375         sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
376                intermediate[k * intermediate_stride + x];
377       }
378       dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
379                       max_pixel_value);
380     } while (++x < width);
381 
382     dest += dest_stride;
383     intermediate += intermediate_stride;
384   } while (++y < height);
385 }
386 
387 // This function is a simplified version of Convolve2D_C.
388 // It is called when it is single prediction mode, where only horizontal
389 // filtering is required.
390 // The output is the single prediction of the block, clipped to valid pixel
391 // range.
392 template <int bitdepth, typename Pixel>
ConvolveHorizontal_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)393 void ConvolveHorizontal_C(const void* LIBGAV1_RESTRICT const reference,
394                           const ptrdiff_t reference_stride,
395                           const int horizontal_filter_index,
396                           const int /*vertical_filter_index*/,
397                           const int horizontal_filter_id,
398                           const int /*vertical_filter_id*/, const int width,
399                           const int height, void* LIBGAV1_RESTRICT prediction,
400                           const ptrdiff_t pred_stride) {
401   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
402                                            ? kInterRoundBitsHorizontal12bpp
403                                            : kInterRoundBitsHorizontal;
404   const int filter_index = GetFilterIndex(horizontal_filter_index, width);
405   const int bits = kFilterBits - kRoundBitsHorizontal;
406   const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
407   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
408   auto* dest = static_cast<Pixel*>(prediction);
409   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
410   const int max_pixel_value = (1 << bitdepth) - 1;
411   int y = 0;
412   do {
413     int x = 0;
414     do {
415       int sum = 0;
416       for (int k = 0; k < kSubPixelTaps; ++k) {
417         sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
418                src[x + k];
419       }
420       sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
421       dest[x] = Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value);
422     } while (++x < width);
423 
424     src += src_stride;
425     dest += dest_stride;
426   } while (++y < height);
427 }
428 
429 // This function is a simplified version of Convolve2D_C.
430 // It is called when it is single prediction mode, where only vertical
431 // filtering is required.
432 // The output is the single prediction of the block, clipped to valid pixel
433 // range.
434 template <int bitdepth, typename Pixel>
ConvolveVertical_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)435 void ConvolveVertical_C(const void* LIBGAV1_RESTRICT const reference,
436                         const ptrdiff_t reference_stride,
437                         const int /*horizontal_filter_index*/,
438                         const int vertical_filter_index,
439                         const int /*horizontal_filter_id*/,
440                         const int vertical_filter_id, const int width,
441                         const int height, void* LIBGAV1_RESTRICT prediction,
442                         const ptrdiff_t pred_stride) {
443   const int filter_index = GetFilterIndex(vertical_filter_index, height);
444   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
445   const auto* src =
446       static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
447   auto* dest = static_cast<Pixel*>(prediction);
448   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
449   // Copy filters must call ConvolveCopy().
450   assert(vertical_filter_id != 0);
451 
452   const int max_pixel_value = (1 << bitdepth) - 1;
453   int y = 0;
454   do {
455     int x = 0;
456     do {
457       int sum = 0;
458       for (int k = 0; k < kSubPixelTaps; ++k) {
459         sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
460                src[k * src_stride + x];
461       }
462       dest[x] = Clip3(RightShiftWithRounding(sum, kFilterBits - 1), 0,
463                       max_pixel_value);
464     } while (++x < width);
465 
466     src += src_stride;
467     dest += dest_stride;
468   } while (++y < height);
469 }
470 
471 template <int bitdepth, typename Pixel>
ConvolveCopy_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)472 void ConvolveCopy_C(const void* LIBGAV1_RESTRICT const reference,
473                     const ptrdiff_t reference_stride,
474                     const int /*horizontal_filter_index*/,
475                     const int /*vertical_filter_index*/,
476                     const int /*horizontal_filter_id*/,
477                     const int /*vertical_filter_id*/, const int width,
478                     const int height, void* LIBGAV1_RESTRICT prediction,
479                     const ptrdiff_t pred_stride) {
480   const auto* src = static_cast<const uint8_t*>(reference);
481   auto* dest = static_cast<uint8_t*>(prediction);
482   int y = 0;
483   do {
484     memcpy(dest, src, width * sizeof(Pixel));
485     src += reference_stride;
486     dest += pred_stride;
487   } while (++y < height);
488 }
489 
490 template <int bitdepth, typename Pixel>
ConvolveCompoundCopy_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)491 void ConvolveCompoundCopy_C(const void* LIBGAV1_RESTRICT const reference,
492                             const ptrdiff_t reference_stride,
493                             const int /*horizontal_filter_index*/,
494                             const int /*vertical_filter_index*/,
495                             const int /*horizontal_filter_id*/,
496                             const int /*vertical_filter_id*/, const int width,
497                             const int height, void* LIBGAV1_RESTRICT prediction,
498                             const ptrdiff_t pred_stride) {
499   // All compound functions output to the predictor buffer with |pred_stride|
500   // equal to |width|.
501   assert(pred_stride == width);
502   // Compound functions start at 4x4.
503   assert(width >= 4 && height >= 4);
504   constexpr int kRoundBitsVertical =
505       ((bitdepth == 12) ? kInterRoundBitsVertical12bpp
506                         : kInterRoundBitsVertical) -
507       kInterRoundBitsCompoundVertical;
508   const auto* src = static_cast<const Pixel*>(reference);
509   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
510   auto* dest = static_cast<uint16_t*>(prediction);
511   int y = 0;
512   do {
513     int x = 0;
514     do {
515       int sum = (bitdepth == 8) ? 0 : ((1 << bitdepth) + (1 << (bitdepth - 1)));
516       sum += src[x];
517       dest[x] = sum << kRoundBitsVertical;
518     } while (++x < width);
519     src += src_stride;
520     dest += pred_stride;
521   } while (++y < height);
522 }
523 
524 // This function is a simplified version of ConvolveCompound2D_C.
525 // It is called when it is compound prediction mode, where only horizontal
526 // filtering is required.
527 // The output is not clipped to valid pixel range. Its output will be
528 // blended with another predictor to generate the final prediction of the block.
529 template <int bitdepth, typename Pixel>
ConvolveCompoundHorizontal_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)530 void ConvolveCompoundHorizontal_C(
531     const void* LIBGAV1_RESTRICT const reference,
532     const ptrdiff_t reference_stride, const int horizontal_filter_index,
533     const int /*vertical_filter_index*/, const int horizontal_filter_id,
534     const int /*vertical_filter_id*/, const int width, const int height,
535     void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
536   // All compound functions output to the predictor buffer with |pred_stride|
537   // equal to |width|.
538   assert(pred_stride == width);
539   // Compound functions start at 4x4.
540   assert(width >= 4 && height >= 4);
541   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
542                                            ? kInterRoundBitsHorizontal12bpp
543                                            : kInterRoundBitsHorizontal;
544   const int filter_index = GetFilterIndex(horizontal_filter_index, width);
545   const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
546   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
547   auto* dest = static_cast<uint16_t*>(prediction);
548   // Copy filters must call ConvolveCopy().
549   assert(horizontal_filter_id != 0);
550   int y = 0;
551   do {
552     int x = 0;
553     do {
554       int sum = 0;
555       for (int k = 0; k < kSubPixelTaps; ++k) {
556         sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
557                src[x + k];
558       }
559       sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
560       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
561       dest[x] = sum;
562     } while (++x < width);
563 
564     src += src_stride;
565     dest += pred_stride;
566   } while (++y < height);
567 }
568 
569 // This function is a simplified version of ConvolveCompound2D_C.
570 // It is called when it is compound prediction mode, where only vertical
571 // filtering is required.
572 // The output is not clipped to valid pixel range. Its output will be
573 // blended with another predictor to generate the final prediction of the block.
574 template <int bitdepth, typename Pixel>
ConvolveCompoundVertical_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)575 void ConvolveCompoundVertical_C(
576     const void* LIBGAV1_RESTRICT const reference,
577     const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
578     const int vertical_filter_index, const int /*horizontal_filter_id*/,
579     const int vertical_filter_id, const int width, const int height,
580     void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
581   // All compound functions output to the predictor buffer with |pred_stride|
582   // equal to |width|.
583   assert(pred_stride == width);
584   // Compound functions start at 4x4.
585   assert(width >= 4 && height >= 4);
586   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
587                                            ? kInterRoundBitsHorizontal12bpp
588                                            : kInterRoundBitsHorizontal;
589   const int filter_index = GetFilterIndex(vertical_filter_index, height);
590   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
591   const auto* src =
592       static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
593   auto* dest = static_cast<uint16_t*>(prediction);
594   // Copy filters must call ConvolveCopy().
595   assert(vertical_filter_id != 0);
596   int y = 0;
597   do {
598     int x = 0;
599     do {
600       int sum = 0;
601       for (int k = 0; k < kSubPixelTaps; ++k) {
602         sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
603                src[k * src_stride + x];
604       }
605       sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
606       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
607       dest[x] = sum;
608     } while (++x < width);
609     src += src_stride;
610     dest += pred_stride;
611   } while (++y < height);
612 }
613 
614 // This function is used when intra block copy is present.
615 // It is called when it is single prediction mode for U/V plane, where the
616 // reference block is from current frame and both horizontal and vertical
617 // filtering are required.
618 // The output is the single prediction of the block, clipped to valid pixel
619 // range.
620 template <int bitdepth, typename Pixel>
ConvolveIntraBlockCopy2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)621 void ConvolveIntraBlockCopy2D_C(
622     const void* LIBGAV1_RESTRICT const reference,
623     const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
624     const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
625     const int /*vertical_filter_id*/, const int width, const int height,
626     void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
627   assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
628   assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
629   const auto* src = static_cast<const Pixel*>(reference);
630   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
631   auto* dest = static_cast<Pixel*>(prediction);
632   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
633   const int intermediate_height = height + 1;
634   uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
635                                (kMaxSuperBlockSizeInPixels + 1)];
636   uint16_t* intermediate = intermediate_result;
637   // Note: allow vertical access to height + 1. Because this function is only
638   // for u/v plane of intra block copy, such access is guaranteed to be within
639   // the prediction block.
640   int y = 0;
641   do {
642     int x = 0;
643     do {
644       intermediate[x] = src[x] + src[x + 1];
645     } while (++x < width);
646 
647     src += src_stride;
648     intermediate += width;
649   } while (++y < intermediate_height);
650 
651   intermediate = intermediate_result;
652   y = 0;
653   do {
654     int x = 0;
655     do {
656       dest[x] =
657           RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2);
658     } while (++x < width);
659 
660     intermediate += width;
661     dest += dest_stride;
662   } while (++y < height);
663 }
664 
665 // This function is used when intra block copy is present.
666 // It is called when it is single prediction mode for U/V plane, where the
667 // reference block is from the current frame and only horizontal or vertical
668 // filtering is required.
669 // The output is the single prediction of the block, clipped to valid pixel
670 // range.
671 // The filtering of intra block copy is simply the average of current and
672 // the next pixel.
673 template <int bitdepth, typename Pixel, bool is_horizontal>
ConvolveIntraBlockCopy1D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)674 void ConvolveIntraBlockCopy1D_C(
675     const void* LIBGAV1_RESTRICT const reference,
676     const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
677     const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
678     const int /*vertical_filter_id*/, const int width, const int height,
679     void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
680   assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
681   assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
682   const auto* src = static_cast<const Pixel*>(reference);
683   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
684   auto* dest = static_cast<Pixel*>(prediction);
685   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
686   const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
687   int y = 0;
688   do {
689     int x = 0;
690     do {
691       dest[x] = RightShiftWithRounding(src[x] + src[x + offset], 1);
692     } while (++x < width);
693 
694     src += src_stride;
695     dest += dest_stride;
696   } while (++y < height);
697 }
698 
Init8bpp()699 void Init8bpp() {
700   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
701   assert(dsp != nullptr);
702 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
703   dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
704   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
705   dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
706   dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
707 
708   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
709   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
710   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
711   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
712 
713   dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
714   dsp->convolve[1][0][0][1] =
715       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
716   dsp->convolve[1][0][1][0] =
717       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
718   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
719 
720   dsp->convolve[1][1][0][0] = nullptr;
721   dsp->convolve[1][1][0][1] = nullptr;
722   dsp->convolve[1][1][1][0] = nullptr;
723   dsp->convolve[1][1][1][1] = nullptr;
724 
725   dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
726   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
727 #else  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
728 #ifndef LIBGAV1_Dsp8bpp_ConvolveCopy
729   dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
730 #endif
731 #ifndef LIBGAV1_Dsp8bpp_ConvolveHorizontal
732   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
733 #endif
734 #ifndef LIBGAV1_Dsp8bpp_ConvolveVertical
735   dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
736 #endif
737 #ifndef LIBGAV1_Dsp8bpp_Convolve2D
738   dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
739 #endif
740 
741 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundCopy
742   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
743 #endif
744 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundHorizontal
745   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
746 #endif
747 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundVertical
748   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
749 #endif
750 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompound2D
751   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
752 #endif
753 
754 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy
755   dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
756 #endif
757 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyHorizontal
758   dsp->convolve[1][0][0][1] =
759       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
760 #endif
761 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyVertical
762   dsp->convolve[1][0][1][0] =
763       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
764 #endif
765 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy2D
766   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
767 #endif
768 
769   dsp->convolve[1][1][0][0] = nullptr;
770   dsp->convolve[1][1][0][1] = nullptr;
771   dsp->convolve[1][1][1][0] = nullptr;
772   dsp->convolve[1][1][1][1] = nullptr;
773 
774 #ifndef LIBGAV1_Dsp8bpp_ConvolveScale2D
775   dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
776 #endif
777 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D
778   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
779 #endif
780 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
781 }
782 
783 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()784 void Init10bpp() {
785   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
786   assert(dsp != nullptr);
787 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
788   dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
789   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
790   dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
791   dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
792 
793   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
794   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
795   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
796   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
797 
798   dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
799   dsp->convolve[1][0][0][1] =
800       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
801   dsp->convolve[1][0][1][0] =
802       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
803   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
804 
805   dsp->convolve[1][1][0][0] = nullptr;
806   dsp->convolve[1][1][0][1] = nullptr;
807   dsp->convolve[1][1][1][0] = nullptr;
808   dsp->convolve[1][1][1][1] = nullptr;
809 
810   dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
811   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
812 #else  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
813 #ifndef LIBGAV1_Dsp10bpp_ConvolveCopy
814   dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
815 #endif
816 #ifndef LIBGAV1_Dsp10bpp_ConvolveHorizontal
817   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
818 #endif
819 #ifndef LIBGAV1_Dsp10bpp_ConvolveVertical
820   dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
821 #endif
822 #ifndef LIBGAV1_Dsp10bpp_Convolve2D
823   dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
824 #endif
825 
826 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundCopy
827   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
828 #endif
829 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal
830   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
831 #endif
832 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundVertical
833   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
834 #endif
835 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompound2D
836   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
837 #endif
838 
839 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy
840   dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
841 #endif
842 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockHorizontal
843   dsp->convolve[1][0][0][1] =
844       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
845 #endif
846 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockVertical
847   dsp->convolve[1][0][1][0] =
848       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
849 #endif
850 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlock2D
851   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
852 #endif
853 
854   dsp->convolve[1][1][0][0] = nullptr;
855   dsp->convolve[1][1][0][1] = nullptr;
856   dsp->convolve[1][1][1][0] = nullptr;
857   dsp->convolve[1][1][1][1] = nullptr;
858 
859 #ifndef LIBGAV1_Dsp10bpp_ConvolveScale2D
860   dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
861 #endif
862 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D
863   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
864 #endif
865 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
866 }
867 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
868 
869 #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()870 void Init12bpp() {
871   Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
872   assert(dsp != nullptr);
873 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
874   dsp->convolve[0][0][0][0] = ConvolveCopy_C<12, uint16_t>;
875   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<12, uint16_t>;
876   dsp->convolve[0][0][1][0] = ConvolveVertical_C<12, uint16_t>;
877   dsp->convolve[0][0][1][1] = Convolve2D_C<12, uint16_t>;
878 
879   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<12, uint16_t>;
880   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<12, uint16_t>;
881   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<12, uint16_t>;
882   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<12, uint16_t>;
883 
884   dsp->convolve[1][0][0][0] = ConvolveCopy_C<12, uint16_t>;
885   dsp->convolve[1][0][0][1] =
886       ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/true>;
887   dsp->convolve[1][0][1][0] =
888       ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/false>;
889   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<12, uint16_t>;
890 
891   dsp->convolve[1][1][0][0] = nullptr;
892   dsp->convolve[1][1][0][1] = nullptr;
893   dsp->convolve[1][1][1][0] = nullptr;
894   dsp->convolve[1][1][1][1] = nullptr;
895 
896   dsp->convolve_scale[0] = ConvolveScale2D_C<12, uint16_t>;
897   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<12, uint16_t>;
898 #else  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
899 #ifndef LIBGAV1_Dsp12bpp_ConvolveCopy
900   dsp->convolve[0][0][0][0] = ConvolveCopy_C<12, uint16_t>;
901 #endif
902 #ifndef LIBGAV1_Dsp12bpp_ConvolveHorizontal
903   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<12, uint16_t>;
904 #endif
905 #ifndef LIBGAV1_Dsp12bpp_ConvolveVertical
906   dsp->convolve[0][0][1][0] = ConvolveVertical_C<12, uint16_t>;
907 #endif
908 #ifndef LIBGAV1_Dsp12bpp_Convolve2D
909   dsp->convolve[0][0][1][1] = Convolve2D_C<12, uint16_t>;
910 #endif
911 
912 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundCopy
913   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<12, uint16_t>;
914 #endif
915 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundHorizontal
916   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<12, uint16_t>;
917 #endif
918 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundVertical
919   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<12, uint16_t>;
920 #endif
921 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompound2D
922   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<12, uint16_t>;
923 #endif
924 
925 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockCopy
926   dsp->convolve[1][0][0][0] = ConvolveCopy_C<12, uint16_t>;
927 #endif
928 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockHorizontal
929   dsp->convolve[1][0][0][1] =
930       ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/true>;
931 #endif
932 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockVertical
933   dsp->convolve[1][0][1][0] =
934       ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/false>;
935 #endif
936 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlock2D
937   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<12, uint16_t>;
938 #endif
939 
940   dsp->convolve[1][1][0][0] = nullptr;
941   dsp->convolve[1][1][0][1] = nullptr;
942   dsp->convolve[1][1][1][0] = nullptr;
943   dsp->convolve[1][1][1][1] = nullptr;
944 
945 #ifndef LIBGAV1_Dsp12bpp_ConvolveScale2D
946   dsp->convolve_scale[0] = ConvolveScale2D_C<12, uint16_t>;
947 #endif
948 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundScale2D
949   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<12, uint16_t>;
950 #endif
951 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
952 }
953 #endif  // LIBGAV1_MAX_BITDEPTH == 12
954 
955 }  // namespace
956 
ConvolveInit_C()957 void ConvolveInit_C() {
958   Init8bpp();
959 #if LIBGAV1_MAX_BITDEPTH >= 10
960   Init10bpp();
961 #endif
962 #if LIBGAV1_MAX_BITDEPTH == 12
963   Init12bpp();
964 #endif
965 }
966 
967 }  // namespace dsp
968 }  // namespace libgav1
969