1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/convolve.h"
16
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31
32 constexpr int kHorizontalOffset = 3;
33 constexpr int kVerticalOffset = 3;
34
35 // Compound prediction output ranges from ConvolveTest.ShowRange.
36 // In some cases, the horizontal or vertical filter will be omitted. This table
37 // shows the general case, where the downscaled horizontal output is input to
38 // the vertical filter via the |intermediate_result| array. The final output is
39 // either Pixel or compound values, depending on the |is_compound| variable.
40 // Bitdepth: 8 Input range: [ 0, 255]
41 // Horizontal upscaled range: [ -7140, 23460]
42 // Horizontal downscaled range: [ -1785, 5865]
43 // Vertical upscaled range: [ -328440, 589560]
44 // Pixel output range: [ 0, 255]
45 // Compound output range: [ -5132, 9212]
46 //
47 // Bitdepth: 10 Input range: [ 0, 1023]
48 // Horizontal upscaled range: [ -28644, 94116]
49 // Horizontal downscaled range: [ -7161, 23529]
50 // Vertical upscaled range: [-1317624, 2365176]
51 // Pixel output range: [ 0, 1023]
52 // Compound output range: [ 3988, 61532]
53 //
54 // Bitdepth: 12 Input range: [ 0, 4095]
55 // Horizontal upscaled range: [ -114660, 376740]
56 // Horizontal downscaled range: [ -7166, 23546]
57 // Vertical upscaled range: [-1318560, 2366880]
58 // Pixel output range: [ 0, 4095]
59 // Compound output range: [ 3974, 61559]
60
61 template <int bitdepth, typename Pixel>
ConvolveScale2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)62 void ConvolveScale2D_C(const void* LIBGAV1_RESTRICT const reference,
63 const ptrdiff_t reference_stride,
64 const int horizontal_filter_index,
65 const int vertical_filter_index, const int subpixel_x,
66 const int subpixel_y, const int step_x, const int step_y,
67 const int width, const int height,
68 void* LIBGAV1_RESTRICT prediction,
69 const ptrdiff_t pred_stride) {
70 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
71 ? kInterRoundBitsHorizontal12bpp
72 : kInterRoundBitsHorizontal;
73 constexpr int kRoundBitsVertical =
74 (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
75 const int intermediate_height =
76 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
77 kScaleSubPixelBits) +
78 kSubPixelTaps;
79 // The output of the horizontal filter, i.e. the intermediate_result, is
80 // guaranteed to fit in int16_t.
81 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
82 (2 * kMaxSuperBlockSizeInPixels + 8)];
83 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
84 const int max_pixel_value = (1 << bitdepth) - 1;
85
86 // Horizontal filter.
87 // Filter types used for width <= 4 are different from those for width > 4.
88 // When width > 4, the valid filter index range is always [0, 3].
89 // When width <= 4, the valid filter index range is always [4, 5].
90 // Similarly for height.
91 int filter_index = GetFilterIndex(horizontal_filter_index, width);
92 int16_t* intermediate = intermediate_result;
93 const auto* src = static_cast<const Pixel*>(reference);
94 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
95 auto* dest = static_cast<Pixel*>(prediction);
96 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
97 const int ref_x = subpixel_x >> kScaleSubPixelBits;
98 // Note: assume the input src is already aligned to the correct start
99 // position.
100 int y = 0;
101 do {
102 int p = subpixel_x;
103 int x = 0;
104 do {
105 int sum = 0;
106 const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
107 const int filter_id = (p >> 6) & kSubPixelMask;
108 for (int k = 0; k < kSubPixelTaps; ++k) {
109 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
110 }
111 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
112 p += step_x;
113 } while (++x < width);
114
115 src += src_stride;
116 intermediate += intermediate_stride;
117 } while (++y < intermediate_height);
118
119 // Vertical filter.
120 filter_index = GetFilterIndex(vertical_filter_index, height);
121 intermediate = intermediate_result;
122 int p = subpixel_y & 1023;
123 y = 0;
124 do {
125 const int filter_id = (p >> 6) & kSubPixelMask;
126 int x = 0;
127 do {
128 int sum = 0;
129 for (int k = 0; k < kSubPixelTaps; ++k) {
130 sum +=
131 kHalfSubPixelFilters[filter_index][filter_id][k] *
132 intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
133 x];
134 }
135 dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
136 max_pixel_value);
137 } while (++x < width);
138
139 dest += dest_stride;
140 p += step_y;
141 } while (++y < height);
142 }
143
144 template <int bitdepth, typename Pixel>
ConvolveCompoundScale2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)145 void ConvolveCompoundScale2D_C(
146 const void* LIBGAV1_RESTRICT const reference,
147 const ptrdiff_t reference_stride, const int horizontal_filter_index,
148 const int vertical_filter_index, const int subpixel_x, const int subpixel_y,
149 const int step_x, const int step_y, const int width, const int height,
150 void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
151 // All compound functions output to the predictor buffer with |pred_stride|
152 // equal to |width|.
153 assert(pred_stride == width);
154 // Compound functions start at 4x4.
155 assert(width >= 4 && height >= 4);
156 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
157 ? kInterRoundBitsHorizontal12bpp
158 : kInterRoundBitsHorizontal;
159 constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
160 const int intermediate_height =
161 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
162 kScaleSubPixelBits) +
163 kSubPixelTaps;
164 // The output of the horizontal filter, i.e. the intermediate_result, is
165 // guaranteed to fit in int16_t.
166 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
167 (2 * kMaxSuperBlockSizeInPixels + 8)];
168 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
169
170 // Horizontal filter.
171 // Filter types used for width <= 4 are different from those for width > 4.
172 // When width > 4, the valid filter index range is always [0, 3].
173 // When width <= 4, the valid filter index range is always [4, 5].
174 // Similarly for height.
175 int filter_index = GetFilterIndex(horizontal_filter_index, width);
176 int16_t* intermediate = intermediate_result;
177 const auto* src = static_cast<const Pixel*>(reference);
178 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
179 auto* dest = static_cast<uint16_t*>(prediction);
180 const int ref_x = subpixel_x >> kScaleSubPixelBits;
181 // Note: assume the input src is already aligned to the correct start
182 // position.
183 int y = 0;
184 do {
185 int p = subpixel_x;
186 int x = 0;
187 do {
188 int sum = 0;
189 const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
190 const int filter_id = (p >> 6) & kSubPixelMask;
191 for (int k = 0; k < kSubPixelTaps; ++k) {
192 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
193 }
194 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
195 p += step_x;
196 } while (++x < width);
197
198 src += src_stride;
199 intermediate += intermediate_stride;
200 } while (++y < intermediate_height);
201
202 // Vertical filter.
203 filter_index = GetFilterIndex(vertical_filter_index, height);
204 intermediate = intermediate_result;
205 int p = subpixel_y & 1023;
206 y = 0;
207 do {
208 const int filter_id = (p >> 6) & kSubPixelMask;
209 int x = 0;
210 do {
211 int sum = 0;
212 for (int k = 0; k < kSubPixelTaps; ++k) {
213 sum +=
214 kHalfSubPixelFilters[filter_index][filter_id][k] *
215 intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
216 x];
217 }
218 sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
219 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
220 dest[x] = sum;
221 } while (++x < width);
222
223 dest += pred_stride;
224 p += step_y;
225 } while (++y < height);
226 }
227
228 template <int bitdepth, typename Pixel>
ConvolveCompound2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)229 void ConvolveCompound2D_C(const void* LIBGAV1_RESTRICT const reference,
230 const ptrdiff_t reference_stride,
231 const int horizontal_filter_index,
232 const int vertical_filter_index,
233 const int horizontal_filter_id,
234 const int vertical_filter_id, const int width,
235 const int height, void* LIBGAV1_RESTRICT prediction,
236 const ptrdiff_t pred_stride) {
237 // All compound functions output to the predictor buffer with |pred_stride|
238 // equal to |width|.
239 assert(pred_stride == width);
240 // Compound functions start at 4x4.
241 assert(width >= 4 && height >= 4);
242 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
243 ? kInterRoundBitsHorizontal12bpp
244 : kInterRoundBitsHorizontal;
245 constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
246 const int intermediate_height = height + kSubPixelTaps - 1;
247 // The output of the horizontal filter, i.e. the intermediate_result, is
248 // guaranteed to fit in int16_t.
249 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
250 (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
251 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
252
253 // Horizontal filter.
254 // Filter types used for width <= 4 are different from those for width > 4.
255 // When width > 4, the valid filter index range is always [0, 3].
256 // When width <= 4, the valid filter index range is always [4, 5].
257 // Similarly for height.
258 int filter_index = GetFilterIndex(horizontal_filter_index, width);
259 int16_t* intermediate = intermediate_result;
260 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
261 const auto* src = static_cast<const Pixel*>(reference) -
262 kVerticalOffset * src_stride - kHorizontalOffset;
263 auto* dest = static_cast<uint16_t*>(prediction);
264
265 // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
266 assert(horizontal_filter_id != 0);
267 int y = 0;
268 do {
269 int x = 0;
270 do {
271 int sum = 0;
272 for (int k = 0; k < kSubPixelTaps; ++k) {
273 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
274 src[x + k];
275 }
276 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
277 } while (++x < width);
278
279 src += src_stride;
280 intermediate += intermediate_stride;
281 } while (++y < intermediate_height);
282
283 // Vertical filter.
284 filter_index = GetFilterIndex(vertical_filter_index, height);
285 intermediate = intermediate_result;
286 // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
287 assert(vertical_filter_id != 0);
288 y = 0;
289 do {
290 int x = 0;
291 do {
292 int sum = 0;
293 for (int k = 0; k < kSubPixelTaps; ++k) {
294 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
295 intermediate[k * intermediate_stride + x];
296 }
297 sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
298 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
299 dest[x] = sum;
300 } while (++x < width);
301
302 dest += pred_stride;
303 intermediate += intermediate_stride;
304 } while (++y < height);
305 }
306
307 // This function is a simplified version of ConvolveCompound2D_C.
308 // It is called when it is single prediction mode, where both horizontal and
309 // vertical filtering are required.
310 // The output is the single prediction of the block, clipped to valid pixel
311 // range.
312 template <int bitdepth, typename Pixel>
Convolve2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)313 void Convolve2D_C(const void* LIBGAV1_RESTRICT const reference,
314 const ptrdiff_t reference_stride,
315 const int horizontal_filter_index,
316 const int vertical_filter_index,
317 const int horizontal_filter_id, const int vertical_filter_id,
318 const int width, const int height,
319 void* LIBGAV1_RESTRICT prediction,
320 const ptrdiff_t pred_stride) {
321 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
322 ? kInterRoundBitsHorizontal12bpp
323 : kInterRoundBitsHorizontal;
324 constexpr int kRoundBitsVertical =
325 (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
326 const int intermediate_height = height + kSubPixelTaps - 1;
327 // The output of the horizontal filter, i.e. the intermediate_result, is
328 // guaranteed to fit in int16_t.
329 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
330 (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
331 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
332 const int max_pixel_value = (1 << bitdepth) - 1;
333
334 // Horizontal filter.
335 // Filter types used for width <= 4 are different from those for width > 4.
336 // When width > 4, the valid filter index range is always [0, 3].
337 // When width <= 4, the valid filter index range is always [4, 5].
338 // Similarly for height.
339 int filter_index = GetFilterIndex(horizontal_filter_index, width);
340 int16_t* intermediate = intermediate_result;
341 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
342 const auto* src = static_cast<const Pixel*>(reference) -
343 kVerticalOffset * src_stride - kHorizontalOffset;
344 auto* dest = static_cast<Pixel*>(prediction);
345 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
346 // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
347 assert(horizontal_filter_id != 0);
348 int y = 0;
349 do {
350 int x = 0;
351 do {
352 int sum = 0;
353 for (int k = 0; k < kSubPixelTaps; ++k) {
354 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
355 src[x + k];
356 }
357 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
358 } while (++x < width);
359
360 src += src_stride;
361 intermediate += intermediate_stride;
362 } while (++y < intermediate_height);
363
364 // Vertical filter.
365 filter_index = GetFilterIndex(vertical_filter_index, height);
366 intermediate = intermediate_result;
367 // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
368 assert(vertical_filter_id != 0);
369 y = 0;
370 do {
371 int x = 0;
372 do {
373 int sum = 0;
374 for (int k = 0; k < kSubPixelTaps; ++k) {
375 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
376 intermediate[k * intermediate_stride + x];
377 }
378 dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
379 max_pixel_value);
380 } while (++x < width);
381
382 dest += dest_stride;
383 intermediate += intermediate_stride;
384 } while (++y < height);
385 }
386
387 // This function is a simplified version of Convolve2D_C.
388 // It is called when it is single prediction mode, where only horizontal
389 // filtering is required.
390 // The output is the single prediction of the block, clipped to valid pixel
391 // range.
392 template <int bitdepth, typename Pixel>
ConvolveHorizontal_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)393 void ConvolveHorizontal_C(const void* LIBGAV1_RESTRICT const reference,
394 const ptrdiff_t reference_stride,
395 const int horizontal_filter_index,
396 const int /*vertical_filter_index*/,
397 const int horizontal_filter_id,
398 const int /*vertical_filter_id*/, const int width,
399 const int height, void* LIBGAV1_RESTRICT prediction,
400 const ptrdiff_t pred_stride) {
401 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
402 ? kInterRoundBitsHorizontal12bpp
403 : kInterRoundBitsHorizontal;
404 const int filter_index = GetFilterIndex(horizontal_filter_index, width);
405 const int bits = kFilterBits - kRoundBitsHorizontal;
406 const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
407 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
408 auto* dest = static_cast<Pixel*>(prediction);
409 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
410 const int max_pixel_value = (1 << bitdepth) - 1;
411 int y = 0;
412 do {
413 int x = 0;
414 do {
415 int sum = 0;
416 for (int k = 0; k < kSubPixelTaps; ++k) {
417 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
418 src[x + k];
419 }
420 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
421 dest[x] = Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value);
422 } while (++x < width);
423
424 src += src_stride;
425 dest += dest_stride;
426 } while (++y < height);
427 }
428
429 // This function is a simplified version of Convolve2D_C.
430 // It is called when it is single prediction mode, where only vertical
431 // filtering is required.
432 // The output is the single prediction of the block, clipped to valid pixel
433 // range.
434 template <int bitdepth, typename Pixel>
ConvolveVertical_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)435 void ConvolveVertical_C(const void* LIBGAV1_RESTRICT const reference,
436 const ptrdiff_t reference_stride,
437 const int /*horizontal_filter_index*/,
438 const int vertical_filter_index,
439 const int /*horizontal_filter_id*/,
440 const int vertical_filter_id, const int width,
441 const int height, void* LIBGAV1_RESTRICT prediction,
442 const ptrdiff_t pred_stride) {
443 const int filter_index = GetFilterIndex(vertical_filter_index, height);
444 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
445 const auto* src =
446 static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
447 auto* dest = static_cast<Pixel*>(prediction);
448 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
449 // Copy filters must call ConvolveCopy().
450 assert(vertical_filter_id != 0);
451
452 const int max_pixel_value = (1 << bitdepth) - 1;
453 int y = 0;
454 do {
455 int x = 0;
456 do {
457 int sum = 0;
458 for (int k = 0; k < kSubPixelTaps; ++k) {
459 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
460 src[k * src_stride + x];
461 }
462 dest[x] = Clip3(RightShiftWithRounding(sum, kFilterBits - 1), 0,
463 max_pixel_value);
464 } while (++x < width);
465
466 src += src_stride;
467 dest += dest_stride;
468 } while (++y < height);
469 }
470
471 template <int bitdepth, typename Pixel>
ConvolveCopy_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)472 void ConvolveCopy_C(const void* LIBGAV1_RESTRICT const reference,
473 const ptrdiff_t reference_stride,
474 const int /*horizontal_filter_index*/,
475 const int /*vertical_filter_index*/,
476 const int /*horizontal_filter_id*/,
477 const int /*vertical_filter_id*/, const int width,
478 const int height, void* LIBGAV1_RESTRICT prediction,
479 const ptrdiff_t pred_stride) {
480 const auto* src = static_cast<const uint8_t*>(reference);
481 auto* dest = static_cast<uint8_t*>(prediction);
482 int y = 0;
483 do {
484 memcpy(dest, src, width * sizeof(Pixel));
485 src += reference_stride;
486 dest += pred_stride;
487 } while (++y < height);
488 }
489
490 template <int bitdepth, typename Pixel>
ConvolveCompoundCopy_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)491 void ConvolveCompoundCopy_C(const void* LIBGAV1_RESTRICT const reference,
492 const ptrdiff_t reference_stride,
493 const int /*horizontal_filter_index*/,
494 const int /*vertical_filter_index*/,
495 const int /*horizontal_filter_id*/,
496 const int /*vertical_filter_id*/, const int width,
497 const int height, void* LIBGAV1_RESTRICT prediction,
498 const ptrdiff_t pred_stride) {
499 // All compound functions output to the predictor buffer with |pred_stride|
500 // equal to |width|.
501 assert(pred_stride == width);
502 // Compound functions start at 4x4.
503 assert(width >= 4 && height >= 4);
504 constexpr int kRoundBitsVertical =
505 ((bitdepth == 12) ? kInterRoundBitsVertical12bpp
506 : kInterRoundBitsVertical) -
507 kInterRoundBitsCompoundVertical;
508 const auto* src = static_cast<const Pixel*>(reference);
509 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
510 auto* dest = static_cast<uint16_t*>(prediction);
511 int y = 0;
512 do {
513 int x = 0;
514 do {
515 int sum = (bitdepth == 8) ? 0 : ((1 << bitdepth) + (1 << (bitdepth - 1)));
516 sum += src[x];
517 dest[x] = sum << kRoundBitsVertical;
518 } while (++x < width);
519 src += src_stride;
520 dest += pred_stride;
521 } while (++y < height);
522 }
523
524 // This function is a simplified version of ConvolveCompound2D_C.
525 // It is called when it is compound prediction mode, where only horizontal
526 // filtering is required.
527 // The output is not clipped to valid pixel range. Its output will be
528 // blended with another predictor to generate the final prediction of the block.
529 template <int bitdepth, typename Pixel>
ConvolveCompoundHorizontal_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)530 void ConvolveCompoundHorizontal_C(
531 const void* LIBGAV1_RESTRICT const reference,
532 const ptrdiff_t reference_stride, const int horizontal_filter_index,
533 const int /*vertical_filter_index*/, const int horizontal_filter_id,
534 const int /*vertical_filter_id*/, const int width, const int height,
535 void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
536 // All compound functions output to the predictor buffer with |pred_stride|
537 // equal to |width|.
538 assert(pred_stride == width);
539 // Compound functions start at 4x4.
540 assert(width >= 4 && height >= 4);
541 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
542 ? kInterRoundBitsHorizontal12bpp
543 : kInterRoundBitsHorizontal;
544 const int filter_index = GetFilterIndex(horizontal_filter_index, width);
545 const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
546 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
547 auto* dest = static_cast<uint16_t*>(prediction);
548 // Copy filters must call ConvolveCopy().
549 assert(horizontal_filter_id != 0);
550 int y = 0;
551 do {
552 int x = 0;
553 do {
554 int sum = 0;
555 for (int k = 0; k < kSubPixelTaps; ++k) {
556 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
557 src[x + k];
558 }
559 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
560 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
561 dest[x] = sum;
562 } while (++x < width);
563
564 src += src_stride;
565 dest += pred_stride;
566 } while (++y < height);
567 }
568
569 // This function is a simplified version of ConvolveCompound2D_C.
570 // It is called when it is compound prediction mode, where only vertical
571 // filtering is required.
572 // The output is not clipped to valid pixel range. Its output will be
573 // blended with another predictor to generate the final prediction of the block.
574 template <int bitdepth, typename Pixel>
ConvolveCompoundVertical_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)575 void ConvolveCompoundVertical_C(
576 const void* LIBGAV1_RESTRICT const reference,
577 const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
578 const int vertical_filter_index, const int /*horizontal_filter_id*/,
579 const int vertical_filter_id, const int width, const int height,
580 void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
581 // All compound functions output to the predictor buffer with |pred_stride|
582 // equal to |width|.
583 assert(pred_stride == width);
584 // Compound functions start at 4x4.
585 assert(width >= 4 && height >= 4);
586 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
587 ? kInterRoundBitsHorizontal12bpp
588 : kInterRoundBitsHorizontal;
589 const int filter_index = GetFilterIndex(vertical_filter_index, height);
590 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
591 const auto* src =
592 static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
593 auto* dest = static_cast<uint16_t*>(prediction);
594 // Copy filters must call ConvolveCopy().
595 assert(vertical_filter_id != 0);
596 int y = 0;
597 do {
598 int x = 0;
599 do {
600 int sum = 0;
601 for (int k = 0; k < kSubPixelTaps; ++k) {
602 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
603 src[k * src_stride + x];
604 }
605 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
606 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
607 dest[x] = sum;
608 } while (++x < width);
609 src += src_stride;
610 dest += pred_stride;
611 } while (++y < height);
612 }
613
614 // This function is used when intra block copy is present.
615 // It is called when it is single prediction mode for U/V plane, where the
616 // reference block is from current frame and both horizontal and vertical
617 // filtering are required.
618 // The output is the single prediction of the block, clipped to valid pixel
619 // range.
620 template <int bitdepth, typename Pixel>
ConvolveIntraBlockCopy2D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)621 void ConvolveIntraBlockCopy2D_C(
622 const void* LIBGAV1_RESTRICT const reference,
623 const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
624 const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
625 const int /*vertical_filter_id*/, const int width, const int height,
626 void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
627 assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
628 assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
629 const auto* src = static_cast<const Pixel*>(reference);
630 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
631 auto* dest = static_cast<Pixel*>(prediction);
632 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
633 const int intermediate_height = height + 1;
634 uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
635 (kMaxSuperBlockSizeInPixels + 1)];
636 uint16_t* intermediate = intermediate_result;
637 // Note: allow vertical access to height + 1. Because this function is only
638 // for u/v plane of intra block copy, such access is guaranteed to be within
639 // the prediction block.
640 int y = 0;
641 do {
642 int x = 0;
643 do {
644 intermediate[x] = src[x] + src[x + 1];
645 } while (++x < width);
646
647 src += src_stride;
648 intermediate += width;
649 } while (++y < intermediate_height);
650
651 intermediate = intermediate_result;
652 y = 0;
653 do {
654 int x = 0;
655 do {
656 dest[x] =
657 RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2);
658 } while (++x < width);
659
660 intermediate += width;
661 dest += dest_stride;
662 } while (++y < height);
663 }
664
665 // This function is used when intra block copy is present.
666 // It is called when it is single prediction mode for U/V plane, where the
667 // reference block is from the current frame and only horizontal or vertical
668 // filtering is required.
669 // The output is the single prediction of the block, clipped to valid pixel
670 // range.
671 // The filtering of intra block copy is simply the average of current and
672 // the next pixel.
673 template <int bitdepth, typename Pixel, bool is_horizontal>
ConvolveIntraBlockCopy1D_C(const void * LIBGAV1_RESTRICT const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * LIBGAV1_RESTRICT prediction,const ptrdiff_t pred_stride)674 void ConvolveIntraBlockCopy1D_C(
675 const void* LIBGAV1_RESTRICT const reference,
676 const ptrdiff_t reference_stride, const int /*horizontal_filter_index*/,
677 const int /*vertical_filter_index*/, const int /*horizontal_filter_id*/,
678 const int /*vertical_filter_id*/, const int width, const int height,
679 void* LIBGAV1_RESTRICT prediction, const ptrdiff_t pred_stride) {
680 assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
681 assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
682 const auto* src = static_cast<const Pixel*>(reference);
683 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
684 auto* dest = static_cast<Pixel*>(prediction);
685 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
686 const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
687 int y = 0;
688 do {
689 int x = 0;
690 do {
691 dest[x] = RightShiftWithRounding(src[x] + src[x + offset], 1);
692 } while (++x < width);
693
694 src += src_stride;
695 dest += dest_stride;
696 } while (++y < height);
697 }
698
Init8bpp()699 void Init8bpp() {
700 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
701 assert(dsp != nullptr);
702 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
703 dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
704 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
705 dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
706 dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
707
708 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
709 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
710 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
711 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
712
713 dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
714 dsp->convolve[1][0][0][1] =
715 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
716 dsp->convolve[1][0][1][0] =
717 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
718 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
719
720 dsp->convolve[1][1][0][0] = nullptr;
721 dsp->convolve[1][1][0][1] = nullptr;
722 dsp->convolve[1][1][1][0] = nullptr;
723 dsp->convolve[1][1][1][1] = nullptr;
724
725 dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
726 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
727 #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
728 #ifndef LIBGAV1_Dsp8bpp_ConvolveCopy
729 dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
730 #endif
731 #ifndef LIBGAV1_Dsp8bpp_ConvolveHorizontal
732 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
733 #endif
734 #ifndef LIBGAV1_Dsp8bpp_ConvolveVertical
735 dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
736 #endif
737 #ifndef LIBGAV1_Dsp8bpp_Convolve2D
738 dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
739 #endif
740
741 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundCopy
742 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
743 #endif
744 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundHorizontal
745 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
746 #endif
747 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundVertical
748 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
749 #endif
750 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompound2D
751 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
752 #endif
753
754 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy
755 dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
756 #endif
757 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyHorizontal
758 dsp->convolve[1][0][0][1] =
759 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
760 #endif
761 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyVertical
762 dsp->convolve[1][0][1][0] =
763 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
764 #endif
765 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy2D
766 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
767 #endif
768
769 dsp->convolve[1][1][0][0] = nullptr;
770 dsp->convolve[1][1][0][1] = nullptr;
771 dsp->convolve[1][1][1][0] = nullptr;
772 dsp->convolve[1][1][1][1] = nullptr;
773
774 #ifndef LIBGAV1_Dsp8bpp_ConvolveScale2D
775 dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
776 #endif
777 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D
778 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
779 #endif
780 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
781 }
782
783 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()784 void Init10bpp() {
785 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
786 assert(dsp != nullptr);
787 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
788 dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
789 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
790 dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
791 dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
792
793 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
794 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
795 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
796 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
797
798 dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
799 dsp->convolve[1][0][0][1] =
800 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
801 dsp->convolve[1][0][1][0] =
802 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
803 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
804
805 dsp->convolve[1][1][0][0] = nullptr;
806 dsp->convolve[1][1][0][1] = nullptr;
807 dsp->convolve[1][1][1][0] = nullptr;
808 dsp->convolve[1][1][1][1] = nullptr;
809
810 dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
811 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
812 #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
813 #ifndef LIBGAV1_Dsp10bpp_ConvolveCopy
814 dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
815 #endif
816 #ifndef LIBGAV1_Dsp10bpp_ConvolveHorizontal
817 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
818 #endif
819 #ifndef LIBGAV1_Dsp10bpp_ConvolveVertical
820 dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
821 #endif
822 #ifndef LIBGAV1_Dsp10bpp_Convolve2D
823 dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
824 #endif
825
826 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundCopy
827 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
828 #endif
829 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal
830 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
831 #endif
832 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundVertical
833 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
834 #endif
835 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompound2D
836 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
837 #endif
838
839 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy
840 dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
841 #endif
842 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockHorizontal
843 dsp->convolve[1][0][0][1] =
844 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
845 #endif
846 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockVertical
847 dsp->convolve[1][0][1][0] =
848 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
849 #endif
850 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlock2D
851 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
852 #endif
853
854 dsp->convolve[1][1][0][0] = nullptr;
855 dsp->convolve[1][1][0][1] = nullptr;
856 dsp->convolve[1][1][1][0] = nullptr;
857 dsp->convolve[1][1][1][1] = nullptr;
858
859 #ifndef LIBGAV1_Dsp10bpp_ConvolveScale2D
860 dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
861 #endif
862 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D
863 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
864 #endif
865 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
866 }
867 #endif // LIBGAV1_MAX_BITDEPTH >= 10
868
869 #if LIBGAV1_MAX_BITDEPTH == 12
Init12bpp()870 void Init12bpp() {
871 Dsp* const dsp = dsp_internal::GetWritableDspTable(12);
872 assert(dsp != nullptr);
873 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
874 dsp->convolve[0][0][0][0] = ConvolveCopy_C<12, uint16_t>;
875 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<12, uint16_t>;
876 dsp->convolve[0][0][1][0] = ConvolveVertical_C<12, uint16_t>;
877 dsp->convolve[0][0][1][1] = Convolve2D_C<12, uint16_t>;
878
879 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<12, uint16_t>;
880 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<12, uint16_t>;
881 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<12, uint16_t>;
882 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<12, uint16_t>;
883
884 dsp->convolve[1][0][0][0] = ConvolveCopy_C<12, uint16_t>;
885 dsp->convolve[1][0][0][1] =
886 ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/true>;
887 dsp->convolve[1][0][1][0] =
888 ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/false>;
889 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<12, uint16_t>;
890
891 dsp->convolve[1][1][0][0] = nullptr;
892 dsp->convolve[1][1][0][1] = nullptr;
893 dsp->convolve[1][1][1][0] = nullptr;
894 dsp->convolve[1][1][1][1] = nullptr;
895
896 dsp->convolve_scale[0] = ConvolveScale2D_C<12, uint16_t>;
897 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<12, uint16_t>;
898 #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
899 #ifndef LIBGAV1_Dsp12bpp_ConvolveCopy
900 dsp->convolve[0][0][0][0] = ConvolveCopy_C<12, uint16_t>;
901 #endif
902 #ifndef LIBGAV1_Dsp12bpp_ConvolveHorizontal
903 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<12, uint16_t>;
904 #endif
905 #ifndef LIBGAV1_Dsp12bpp_ConvolveVertical
906 dsp->convolve[0][0][1][0] = ConvolveVertical_C<12, uint16_t>;
907 #endif
908 #ifndef LIBGAV1_Dsp12bpp_Convolve2D
909 dsp->convolve[0][0][1][1] = Convolve2D_C<12, uint16_t>;
910 #endif
911
912 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundCopy
913 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<12, uint16_t>;
914 #endif
915 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundHorizontal
916 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<12, uint16_t>;
917 #endif
918 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundVertical
919 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<12, uint16_t>;
920 #endif
921 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompound2D
922 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<12, uint16_t>;
923 #endif
924
925 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockCopy
926 dsp->convolve[1][0][0][0] = ConvolveCopy_C<12, uint16_t>;
927 #endif
928 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockHorizontal
929 dsp->convolve[1][0][0][1] =
930 ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/true>;
931 #endif
932 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlockVertical
933 dsp->convolve[1][0][1][0] =
934 ConvolveIntraBlockCopy1D_C<12, uint16_t, /*is_horizontal=*/false>;
935 #endif
936 #ifndef LIBGAV1_Dsp12bpp_ConvolveIntraBlock2D
937 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<12, uint16_t>;
938 #endif
939
940 dsp->convolve[1][1][0][0] = nullptr;
941 dsp->convolve[1][1][0][1] = nullptr;
942 dsp->convolve[1][1][1][0] = nullptr;
943 dsp->convolve[1][1][1][1] = nullptr;
944
945 #ifndef LIBGAV1_Dsp12bpp_ConvolveScale2D
946 dsp->convolve_scale[0] = ConvolveScale2D_C<12, uint16_t>;
947 #endif
948 #ifndef LIBGAV1_Dsp12bpp_ConvolveCompoundScale2D
949 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<12, uint16_t>;
950 #endif
951 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
952 }
953 #endif // LIBGAV1_MAX_BITDEPTH == 12
954
955 } // namespace
956
ConvolveInit_C()957 void ConvolveInit_C() {
958 Init8bpp();
959 #if LIBGAV1_MAX_BITDEPTH >= 10
960 Init10bpp();
961 #endif
962 #if LIBGAV1_MAX_BITDEPTH == 12
963 Init12bpp();
964 #endif
965 }
966
967 } // namespace dsp
968 } // namespace libgav1
969