1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/convolve.h"
16
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31
32 constexpr int kHorizontalOffset = 3;
33 constexpr int kVerticalOffset = 3;
34
35 // Compound prediction output ranges from ConvolveTest.ShowRange.
36 // Bitdepth: 8 Input range: [ 0, 255]
37 // intermediate range: [ -7140, 23460]
38 // first pass output range: [ -1785, 5865]
39 // intermediate range: [ -328440, 589560]
40 // second pass output range: [ 0, 255]
41 // compound second pass output range: [ -5132, 9212]
42 //
43 // Bitdepth: 10 Input range: [ 0, 1023]
44 // intermediate range: [ -28644, 94116]
45 // first pass output range: [ -7161, 23529]
46 // intermediate range: [-1317624, 2365176]
47 // second pass output range: [ 0, 1023]
48 // compound second pass output range: [ 3988, 61532]
49 //
50 // Bitdepth: 12 Input range: [ 0, 4095]
51 // intermediate range: [ -114660, 376740]
52 // first pass output range: [ -7166, 23546]
53 // intermediate range: [-1318560, 2366880]
54 // second pass output range: [ 0, 4095]
55 // compound second pass output range: [ 3974, 61559]
56
57 template <int bitdepth, typename Pixel>
ConvolveScale2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)58 void ConvolveScale2D_C(const void* const reference,
59 const ptrdiff_t reference_stride,
60 const int horizontal_filter_index,
61 const int vertical_filter_index, const int subpixel_x,
62 const int subpixel_y, const int step_x, const int step_y,
63 const int width, const int height, void* prediction,
64 const ptrdiff_t pred_stride) {
65 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
66 ? kInterRoundBitsHorizontal12bpp
67 : kInterRoundBitsHorizontal;
68 constexpr int kRoundBitsVertical =
69 (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
70 const int intermediate_height =
71 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
72 kScaleSubPixelBits) +
73 kSubPixelTaps;
74 // The output of the horizontal filter, i.e. the intermediate_result, is
75 // guaranteed to fit in int16_t.
76 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
77 (2 * kMaxSuperBlockSizeInPixels + 8)];
78 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
79 const int max_pixel_value = (1 << bitdepth) - 1;
80
81 // Horizontal filter.
82 // Filter types used for width <= 4 are different from those for width > 4.
83 // When width > 4, the valid filter index range is always [0, 3].
84 // When width <= 4, the valid filter index range is always [4, 5].
85 // Similarly for height.
86 int filter_index = GetFilterIndex(horizontal_filter_index, width);
87 int16_t* intermediate = intermediate_result;
88 const auto* src = static_cast<const Pixel*>(reference);
89 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
90 auto* dest = static_cast<Pixel*>(prediction);
91 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
92 const int ref_x = subpixel_x >> kScaleSubPixelBits;
93 // Note: assume the input src is already aligned to the correct start
94 // position.
95 int y = 0;
96 do {
97 int p = subpixel_x;
98 int x = 0;
99 do {
100 int sum = 0;
101 const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
102 const int filter_id = (p >> 6) & kSubPixelMask;
103 for (int k = 0; k < kSubPixelTaps; ++k) {
104 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
105 }
106 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
107 p += step_x;
108 } while (++x < width);
109
110 src += src_stride;
111 intermediate += intermediate_stride;
112 } while (++y < intermediate_height);
113
114 // Vertical filter.
115 filter_index = GetFilterIndex(vertical_filter_index, height);
116 intermediate = intermediate_result;
117 int p = subpixel_y & 1023;
118 y = 0;
119 do {
120 const int filter_id = (p >> 6) & kSubPixelMask;
121 int x = 0;
122 do {
123 int sum = 0;
124 for (int k = 0; k < kSubPixelTaps; ++k) {
125 sum +=
126 kHalfSubPixelFilters[filter_index][filter_id][k] *
127 intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
128 x];
129 }
130 dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
131 max_pixel_value);
132 } while (++x < width);
133
134 dest += dest_stride;
135 p += step_y;
136 } while (++y < height);
137 }
138
139 template <int bitdepth, typename Pixel>
ConvolveCompoundScale2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)140 void ConvolveCompoundScale2D_C(const void* const reference,
141 const ptrdiff_t reference_stride,
142 const int horizontal_filter_index,
143 const int vertical_filter_index,
144 const int subpixel_x, const int subpixel_y,
145 const int step_x, const int step_y,
146 const int width, const int height,
147 void* prediction, const ptrdiff_t pred_stride) {
148 // All compound functions output to the predictor buffer with |pred_stride|
149 // equal to |width|.
150 assert(pred_stride == width);
151 // Compound functions start at 4x4.
152 assert(width >= 4 && height >= 4);
153 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
154 ? kInterRoundBitsHorizontal12bpp
155 : kInterRoundBitsHorizontal;
156 constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
157 const int intermediate_height =
158 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
159 kScaleSubPixelBits) +
160 kSubPixelTaps;
161 // The output of the horizontal filter, i.e. the intermediate_result, is
162 // guaranteed to fit in int16_t.
163 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
164 (2 * kMaxSuperBlockSizeInPixels + 8)];
165 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
166
167 // Horizontal filter.
168 // Filter types used for width <= 4 are different from those for width > 4.
169 // When width > 4, the valid filter index range is always [0, 3].
170 // When width <= 4, the valid filter index range is always [4, 5].
171 // Similarly for height.
172 int filter_index = GetFilterIndex(horizontal_filter_index, width);
173 int16_t* intermediate = intermediate_result;
174 const auto* src = static_cast<const Pixel*>(reference);
175 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
176 auto* dest = static_cast<uint16_t*>(prediction);
177 const int ref_x = subpixel_x >> kScaleSubPixelBits;
178 // Note: assume the input src is already aligned to the correct start
179 // position.
180 int y = 0;
181 do {
182 int p = subpixel_x;
183 int x = 0;
184 do {
185 int sum = 0;
186 const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
187 const int filter_id = (p >> 6) & kSubPixelMask;
188 for (int k = 0; k < kSubPixelTaps; ++k) {
189 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
190 }
191 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
192 p += step_x;
193 } while (++x < width);
194
195 src += src_stride;
196 intermediate += intermediate_stride;
197 } while (++y < intermediate_height);
198
199 // Vertical filter.
200 filter_index = GetFilterIndex(vertical_filter_index, height);
201 intermediate = intermediate_result;
202 int p = subpixel_y & 1023;
203 y = 0;
204 do {
205 const int filter_id = (p >> 6) & kSubPixelMask;
206 int x = 0;
207 do {
208 int sum = 0;
209 for (int k = 0; k < kSubPixelTaps; ++k) {
210 sum +=
211 kHalfSubPixelFilters[filter_index][filter_id][k] *
212 intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
213 x];
214 }
215 sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
216 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
217 dest[x] = sum;
218 } while (++x < width);
219
220 dest += pred_stride;
221 p += step_y;
222 } while (++y < height);
223 }
224
225 template <int bitdepth, typename Pixel>
ConvolveCompound2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)226 void ConvolveCompound2D_C(const void* const reference,
227 const ptrdiff_t reference_stride,
228 const int horizontal_filter_index,
229 const int vertical_filter_index,
230 const int horizontal_filter_id,
231 const int vertical_filter_id, const int width,
232 const int height, void* prediction,
233 const ptrdiff_t pred_stride) {
234 // All compound functions output to the predictor buffer with |pred_stride|
235 // equal to |width|.
236 assert(pred_stride == width);
237 // Compound functions start at 4x4.
238 assert(width >= 4 && height >= 4);
239 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
240 ? kInterRoundBitsHorizontal12bpp
241 : kInterRoundBitsHorizontal;
242 constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
243 const int intermediate_height = height + kSubPixelTaps - 1;
244 // The output of the horizontal filter, i.e. the intermediate_result, is
245 // guaranteed to fit in int16_t.
246 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
247 (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
248 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
249
250 // Horizontal filter.
251 // Filter types used for width <= 4 are different from those for width > 4.
252 // When width > 4, the valid filter index range is always [0, 3].
253 // When width <= 4, the valid filter index range is always [4, 5].
254 // Similarly for height.
255 int filter_index = GetFilterIndex(horizontal_filter_index, width);
256 int16_t* intermediate = intermediate_result;
257 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
258 const auto* src = static_cast<const Pixel*>(reference) -
259 kVerticalOffset * src_stride - kHorizontalOffset;
260 auto* dest = static_cast<uint16_t*>(prediction);
261
262 // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
263 assert(horizontal_filter_id != 0);
264 int y = 0;
265 do {
266 int x = 0;
267 do {
268 int sum = 0;
269 for (int k = 0; k < kSubPixelTaps; ++k) {
270 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
271 src[x + k];
272 }
273 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
274 } while (++x < width);
275
276 src += src_stride;
277 intermediate += intermediate_stride;
278 } while (++y < intermediate_height);
279
280 // Vertical filter.
281 filter_index = GetFilterIndex(vertical_filter_index, height);
282 intermediate = intermediate_result;
283 // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
284 assert(vertical_filter_id != 0);
285 y = 0;
286 do {
287 int x = 0;
288 do {
289 int sum = 0;
290 for (int k = 0; k < kSubPixelTaps; ++k) {
291 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
292 intermediate[k * intermediate_stride + x];
293 }
294 sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
295 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
296 dest[x] = sum;
297 } while (++x < width);
298
299 dest += pred_stride;
300 intermediate += intermediate_stride;
301 } while (++y < height);
302 }
303
304 // This function is a simplified version of ConvolveCompound2D_C.
305 // It is called when it is single prediction mode, where both horizontal and
306 // vertical filtering are required.
307 // The output is the single prediction of the block, clipped to valid pixel
308 // range.
309 template <int bitdepth, typename Pixel>
Convolve2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int horizontal_filter_id,const int vertical_filter_id,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)310 void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
311 const int horizontal_filter_index,
312 const int vertical_filter_index,
313 const int horizontal_filter_id, const int vertical_filter_id,
314 const int width, const int height, void* prediction,
315 const ptrdiff_t pred_stride) {
316 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
317 ? kInterRoundBitsHorizontal12bpp
318 : kInterRoundBitsHorizontal;
319 constexpr int kRoundBitsVertical =
320 (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
321 const int intermediate_height = height + kSubPixelTaps - 1;
322 // The output of the horizontal filter, i.e. the intermediate_result, is
323 // guaranteed to fit in int16_t.
324 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
325 (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
326 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
327 const int max_pixel_value = (1 << bitdepth) - 1;
328
329 // Horizontal filter.
330 // Filter types used for width <= 4 are different from those for width > 4.
331 // When width > 4, the valid filter index range is always [0, 3].
332 // When width <= 4, the valid filter index range is always [4, 5].
333 // Similarly for height.
334 int filter_index = GetFilterIndex(horizontal_filter_index, width);
335 int16_t* intermediate = intermediate_result;
336 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
337 const auto* src = static_cast<const Pixel*>(reference) -
338 kVerticalOffset * src_stride - kHorizontalOffset;
339 auto* dest = static_cast<Pixel*>(prediction);
340 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
341 // If |horizontal_filter_id| == 0 then ConvolveVertical() should be called.
342 assert(horizontal_filter_id != 0);
343 int y = 0;
344 do {
345 int x = 0;
346 do {
347 int sum = 0;
348 for (int k = 0; k < kSubPixelTaps; ++k) {
349 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
350 src[x + k];
351 }
352 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
353 } while (++x < width);
354
355 src += src_stride;
356 intermediate += intermediate_stride;
357 } while (++y < intermediate_height);
358
359 // Vertical filter.
360 filter_index = GetFilterIndex(vertical_filter_index, height);
361 intermediate = intermediate_result;
362 // If |vertical_filter_id| == 0 then ConvolveHorizontal() should be called.
363 assert(vertical_filter_id != 0);
364 y = 0;
365 do {
366 int x = 0;
367 do {
368 int sum = 0;
369 for (int k = 0; k < kSubPixelTaps; ++k) {
370 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
371 intermediate[k * intermediate_stride + x];
372 }
373 dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
374 max_pixel_value);
375 } while (++x < width);
376
377 dest += dest_stride;
378 intermediate += intermediate_stride;
379 } while (++y < height);
380 }
381
382 // This function is a simplified version of Convolve2D_C.
383 // It is called when it is single prediction mode, where only horizontal
384 // filtering is required.
385 // The output is the single prediction of the block, clipped to valid pixel
386 // range.
387 template <int bitdepth, typename Pixel>
ConvolveHorizontal_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)388 void ConvolveHorizontal_C(const void* const reference,
389 const ptrdiff_t reference_stride,
390 const int horizontal_filter_index,
391 const int /*vertical_filter_index*/,
392 const int horizontal_filter_id,
393 const int /*vertical_filter_id*/, const int width,
394 const int height, void* prediction,
395 const ptrdiff_t pred_stride) {
396 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
397 ? kInterRoundBitsHorizontal12bpp
398 : kInterRoundBitsHorizontal;
399 const int filter_index = GetFilterIndex(horizontal_filter_index, width);
400 const int bits = kFilterBits - kRoundBitsHorizontal;
401 const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
402 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
403 auto* dest = static_cast<Pixel*>(prediction);
404 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
405 const int max_pixel_value = (1 << bitdepth) - 1;
406 int y = 0;
407 do {
408 int x = 0;
409 do {
410 int sum = 0;
411 for (int k = 0; k < kSubPixelTaps; ++k) {
412 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
413 src[x + k];
414 }
415 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
416 dest[x] = Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value);
417 } while (++x < width);
418
419 src += src_stride;
420 dest += dest_stride;
421 } while (++y < height);
422 }
423
424 // This function is a simplified version of Convolve2D_C.
425 // It is called when it is single prediction mode, where only vertical
426 // filtering is required.
427 // The output is the single prediction of the block, clipped to valid pixel
428 // range.
429 template <int bitdepth, typename Pixel>
ConvolveVertical_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)430 void ConvolveVertical_C(const void* const reference,
431 const ptrdiff_t reference_stride,
432 const int /*horizontal_filter_index*/,
433 const int vertical_filter_index,
434 const int /*horizontal_filter_id*/,
435 const int vertical_filter_id, const int width,
436 const int height, void* prediction,
437 const ptrdiff_t pred_stride) {
438 const int filter_index = GetFilterIndex(vertical_filter_index, height);
439 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
440 const auto* src =
441 static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
442 auto* dest = static_cast<Pixel*>(prediction);
443 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
444 // Copy filters must call ConvolveCopy().
445 assert(vertical_filter_id != 0);
446
447 const int max_pixel_value = (1 << bitdepth) - 1;
448 int y = 0;
449 do {
450 int x = 0;
451 do {
452 int sum = 0;
453 for (int k = 0; k < kSubPixelTaps; ++k) {
454 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
455 src[k * src_stride + x];
456 }
457 dest[x] = Clip3(RightShiftWithRounding(sum, kFilterBits - 1), 0,
458 max_pixel_value);
459 } while (++x < width);
460
461 src += src_stride;
462 dest += dest_stride;
463 } while (++y < height);
464 }
465
466 template <int bitdepth, typename Pixel>
ConvolveCopy_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)467 void ConvolveCopy_C(const void* const reference,
468 const ptrdiff_t reference_stride,
469 const int /*horizontal_filter_index*/,
470 const int /*vertical_filter_index*/,
471 const int /*horizontal_filter_id*/,
472 const int /*vertical_filter_id*/, const int width,
473 const int height, void* prediction,
474 const ptrdiff_t pred_stride) {
475 const auto* src = static_cast<const uint8_t*>(reference);
476 auto* dest = static_cast<uint8_t*>(prediction);
477 int y = 0;
478 do {
479 memcpy(dest, src, width * sizeof(Pixel));
480 src += reference_stride;
481 dest += pred_stride;
482 } while (++y < height);
483 }
484
485 template <int bitdepth, typename Pixel>
ConvolveCompoundCopy_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)486 void ConvolveCompoundCopy_C(const void* const reference,
487 const ptrdiff_t reference_stride,
488 const int /*horizontal_filter_index*/,
489 const int /*vertical_filter_index*/,
490 const int /*horizontal_filter_id*/,
491 const int /*vertical_filter_id*/, const int width,
492 const int height, void* prediction,
493 const ptrdiff_t pred_stride) {
494 // All compound functions output to the predictor buffer with |pred_stride|
495 // equal to |width|.
496 assert(pred_stride == width);
497 // Compound functions start at 4x4.
498 assert(width >= 4 && height >= 4);
499 constexpr int kRoundBitsVertical =
500 ((bitdepth == 12) ? kInterRoundBitsVertical12bpp
501 : kInterRoundBitsVertical) -
502 kInterRoundBitsCompoundVertical;
503 const auto* src = static_cast<const Pixel*>(reference);
504 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
505 auto* dest = static_cast<uint16_t*>(prediction);
506 int y = 0;
507 do {
508 int x = 0;
509 do {
510 int sum = (bitdepth == 8) ? 0 : ((1 << bitdepth) + (1 << (bitdepth - 1)));
511 sum += src[x];
512 dest[x] = sum << kRoundBitsVertical;
513 } while (++x < width);
514 src += src_stride;
515 dest += pred_stride;
516 } while (++y < height);
517 }
518
519 // This function is a simplified version of ConvolveCompound2D_C.
520 // It is called when it is compound prediction mode, where only horizontal
521 // filtering is required.
522 // The output is not clipped to valid pixel range. Its output will be
523 // blended with another predictor to generate the final prediction of the block.
524 template <int bitdepth, typename Pixel>
ConvolveCompoundHorizontal_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int horizontal_filter_id,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)525 void ConvolveCompoundHorizontal_C(
526 const void* const reference, const ptrdiff_t reference_stride,
527 const int horizontal_filter_index, const int /*vertical_filter_index*/,
528 const int horizontal_filter_id, const int /*vertical_filter_id*/,
529 const int width, const int height, void* prediction,
530 const ptrdiff_t pred_stride) {
531 // All compound functions output to the predictor buffer with |pred_stride|
532 // equal to |width|.
533 assert(pred_stride == width);
534 // Compound functions start at 4x4.
535 assert(width >= 4 && height >= 4);
536 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
537 ? kInterRoundBitsHorizontal12bpp
538 : kInterRoundBitsHorizontal;
539 const int filter_index = GetFilterIndex(horizontal_filter_index, width);
540 const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
541 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
542 auto* dest = static_cast<uint16_t*>(prediction);
543 // Copy filters must call ConvolveCopy().
544 assert(horizontal_filter_id != 0);
545 int y = 0;
546 do {
547 int x = 0;
548 do {
549 int sum = 0;
550 for (int k = 0; k < kSubPixelTaps; ++k) {
551 sum += kHalfSubPixelFilters[filter_index][horizontal_filter_id][k] *
552 src[x + k];
553 }
554 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
555 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
556 dest[x] = sum;
557 } while (++x < width);
558
559 src += src_stride;
560 dest += pred_stride;
561 } while (++y < height);
562 }
563
564 // This function is a simplified version of ConvolveCompound2D_C.
565 // It is called when it is compound prediction mode, where only vertical
566 // filtering is required.
567 // The output is not clipped to valid pixel range. Its output will be
568 // blended with another predictor to generate the final prediction of the block.
569 template <int bitdepth, typename Pixel>
ConvolveCompoundVertical_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int vertical_filter_id,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)570 void ConvolveCompoundVertical_C(const void* const reference,
571 const ptrdiff_t reference_stride,
572 const int /*horizontal_filter_index*/,
573 const int vertical_filter_index,
574 const int /*horizontal_filter_id*/,
575 const int vertical_filter_id, const int width,
576 const int height, void* prediction,
577 const ptrdiff_t pred_stride) {
578 // All compound functions output to the predictor buffer with |pred_stride|
579 // equal to |width|.
580 assert(pred_stride == width);
581 // Compound functions start at 4x4.
582 assert(width >= 4 && height >= 4);
583 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
584 ? kInterRoundBitsHorizontal12bpp
585 : kInterRoundBitsHorizontal;
586 const int filter_index = GetFilterIndex(vertical_filter_index, height);
587 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
588 const auto* src =
589 static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
590 auto* dest = static_cast<uint16_t*>(prediction);
591 // Copy filters must call ConvolveCopy().
592 assert(vertical_filter_id != 0);
593 int y = 0;
594 do {
595 int x = 0;
596 do {
597 int sum = 0;
598 for (int k = 0; k < kSubPixelTaps; ++k) {
599 sum += kHalfSubPixelFilters[filter_index][vertical_filter_id][k] *
600 src[k * src_stride + x];
601 }
602 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
603 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
604 dest[x] = sum;
605 } while (++x < width);
606 src += src_stride;
607 dest += pred_stride;
608 } while (++y < height);
609 }
610
611 // This function is used when intra block copy is present.
612 // It is called when it is single prediction mode for U/V plane, where the
613 // reference block is from current frame and both horizontal and vertical
614 // filtering are required.
615 // The output is the single prediction of the block, clipped to valid pixel
616 // range.
617 template <int bitdepth, typename Pixel>
ConvolveIntraBlockCopy2D_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)618 void ConvolveIntraBlockCopy2D_C(const void* const reference,
619 const ptrdiff_t reference_stride,
620 const int /*horizontal_filter_index*/,
621 const int /*vertical_filter_index*/,
622 const int /*horizontal_filter_id*/,
623 const int /*vertical_filter_id*/,
624 const int width, const int height,
625 void* prediction, const ptrdiff_t pred_stride) {
626 assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
627 assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
628 const auto* src = static_cast<const Pixel*>(reference);
629 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
630 auto* dest = static_cast<Pixel*>(prediction);
631 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
632 const int intermediate_height = height + 1;
633 uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
634 (kMaxSuperBlockSizeInPixels + 1)];
635 uint16_t* intermediate = intermediate_result;
636 // Note: allow vertical access to height + 1. Because this function is only
637 // for u/v plane of intra block copy, such access is guaranteed to be within
638 // the prediction block.
639 int y = 0;
640 do {
641 int x = 0;
642 do {
643 intermediate[x] = src[x] + src[x + 1];
644 } while (++x < width);
645
646 src += src_stride;
647 intermediate += width;
648 } while (++y < intermediate_height);
649
650 intermediate = intermediate_result;
651 y = 0;
652 do {
653 int x = 0;
654 do {
655 dest[x] =
656 RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2);
657 } while (++x < width);
658
659 intermediate += width;
660 dest += dest_stride;
661 } while (++y < height);
662 }
663
664 // This function is used when intra block copy is present.
665 // It is called when it is single prediction mode for U/V plane, where the
666 // reference block is from the current frame and only horizontal or vertical
667 // filtering is required.
668 // The output is the single prediction of the block, clipped to valid pixel
669 // range.
670 // The filtering of intra block copy is simply the average of current and
671 // the next pixel.
672 template <int bitdepth, typename Pixel, bool is_horizontal>
ConvolveIntraBlockCopy1D_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)673 void ConvolveIntraBlockCopy1D_C(const void* const reference,
674 const ptrdiff_t reference_stride,
675 const int /*horizontal_filter_index*/,
676 const int /*vertical_filter_index*/,
677 const int /*horizontal_filter_id*/,
678 const int /*vertical_filter_id*/,
679 const int width, const int height,
680 void* prediction, const ptrdiff_t pred_stride) {
681 assert(width >= 4 && width <= kMaxSuperBlockSizeInPixels);
682 assert(height >= 4 && height <= kMaxSuperBlockSizeInPixels);
683 const auto* src = static_cast<const Pixel*>(reference);
684 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
685 auto* dest = static_cast<Pixel*>(prediction);
686 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
687 const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
688 int y = 0;
689 do {
690 int x = 0;
691 do {
692 dest[x] = RightShiftWithRounding(src[x] + src[x + offset], 1);
693 } while (++x < width);
694
695 src += src_stride;
696 dest += dest_stride;
697 } while (++y < height);
698 }
699
Init8bpp()700 void Init8bpp() {
701 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
702 assert(dsp != nullptr);
703 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
704 dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
705 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
706 dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
707 dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
708
709 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
710 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
711 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
712 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
713
714 dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
715 dsp->convolve[1][0][0][1] =
716 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
717 dsp->convolve[1][0][1][0] =
718 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
719 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
720
721 dsp->convolve[1][1][0][0] = nullptr;
722 dsp->convolve[1][1][0][1] = nullptr;
723 dsp->convolve[1][1][1][0] = nullptr;
724 dsp->convolve[1][1][1][1] = nullptr;
725
726 dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
727 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
728 #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
729 #ifndef LIBGAV1_Dsp8bpp_ConvolveCopy
730 dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
731 #endif
732 #ifndef LIBGAV1_Dsp8bpp_ConvolveHorizontal
733 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
734 #endif
735 #ifndef LIBGAV1_Dsp8bpp_ConvolveVertical
736 dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
737 #endif
738 #ifndef LIBGAV1_Dsp8bpp_Convolve2D
739 dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
740 #endif
741
742 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundCopy
743 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
744 #endif
745 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundHorizontal
746 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
747 #endif
748 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundVertical
749 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
750 #endif
751 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompound2D
752 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
753 #endif
754
755 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy
756 dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
757 #endif
758 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyHorizontal
759 dsp->convolve[1][0][0][1] =
760 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
761 #endif
762 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyVertical
763 dsp->convolve[1][0][1][0] =
764 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
765 #endif
766 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy2D
767 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
768 #endif
769
770 dsp->convolve[1][1][0][0] = nullptr;
771 dsp->convolve[1][1][0][1] = nullptr;
772 dsp->convolve[1][1][1][0] = nullptr;
773 dsp->convolve[1][1][1][1] = nullptr;
774
775 #ifndef LIBGAV1_Dsp8bpp_ConvolveScale2D
776 dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
777 #endif
778 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D
779 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
780 #endif
781 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
782 }
783
784 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()785 void Init10bpp() {
786 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
787 assert(dsp != nullptr);
788 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
789 dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
790 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
791 dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
792 dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
793
794 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
795 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
796 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
797 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
798
799 dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
800 dsp->convolve[1][0][0][1] =
801 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
802 dsp->convolve[1][0][1][0] =
803 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
804 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
805
806 dsp->convolve[1][1][0][0] = nullptr;
807 dsp->convolve[1][1][0][1] = nullptr;
808 dsp->convolve[1][1][1][0] = nullptr;
809 dsp->convolve[1][1][1][1] = nullptr;
810
811 dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
812 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
813 #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
814 #ifndef LIBGAV1_Dsp10bpp_ConvolveCopy
815 dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
816 #endif
817 #ifndef LIBGAV1_Dsp10bpp_ConvolveHorizontal
818 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
819 #endif
820 #ifndef LIBGAV1_Dsp10bpp_ConvolveVertical
821 dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
822 #endif
823 #ifndef LIBGAV1_Dsp10bpp_Convolve2D
824 dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
825 #endif
826
827 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundCopy
828 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
829 #endif
830 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal
831 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
832 #endif
833 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundVertical
834 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
835 #endif
836 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompound2D
837 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
838 #endif
839
840 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy
841 dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
842 #endif
843 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockHorizontal
844 dsp->convolve[1][0][0][1] =
845 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
846 #endif
847 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockVertical
848 dsp->convolve[1][0][1][0] =
849 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
850 #endif
851 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlock2D
852 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
853 #endif
854
855 dsp->convolve[1][1][0][0] = nullptr;
856 dsp->convolve[1][1][0][1] = nullptr;
857 dsp->convolve[1][1][1][0] = nullptr;
858 dsp->convolve[1][1][1][1] = nullptr;
859
860 #ifndef LIBGAV1_Dsp10bpp_ConvolveScale2D
861 dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
862 #endif
863 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D
864 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
865 #endif
866 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
867 }
868 #endif
869
870 } // namespace
871
ConvolveInit_C()872 void ConvolveInit_C() {
873 Init8bpp();
874 #if LIBGAV1_MAX_BITDEPTH >= 10
875 Init10bpp();
876 #endif
877 }
878
879 } // namespace dsp
880 } // namespace libgav1
881