1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/convolve.h"
16
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31
32 constexpr int kHorizontalOffset = 3;
33 constexpr int kVerticalOffset = 3;
34
35 // Compound prediction output ranges from ConvolveTest.ShowRange.
36 // Bitdepth: 8 Input range: [ 0, 255]
37 // intermediate range: [ -7140, 23460]
38 // first pass output range: [ -1785, 5865]
39 // intermediate range: [ -328440, 589560]
40 // second pass output range: [ 0, 255]
41 // compound second pass output range: [ -5132, 9212]
42 //
43 // Bitdepth: 10 Input range: [ 0, 1023]
44 // intermediate range: [ -28644, 94116]
45 // first pass output range: [ -7161, 23529]
46 // intermediate range: [-1317624, 2365176]
47 // second pass output range: [ 0, 1023]
48 // compound second pass output range: [ 3988, 61532]
49 //
50 // Bitdepth: 12 Input range: [ 0, 4095]
51 // intermediate range: [ -114660, 376740]
52 // first pass output range: [ -7166, 23546]
53 // intermediate range: [-1318560, 2366880]
54 // second pass output range: [ 0, 4095]
55 // compound second pass output range: [ 3974, 61559]
56
57 template <int bitdepth, typename Pixel>
ConvolveScale2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)58 void ConvolveScale2D_C(const void* const reference,
59 const ptrdiff_t reference_stride,
60 const int horizontal_filter_index,
61 const int vertical_filter_index, const int subpixel_x,
62 const int subpixel_y, const int step_x, const int step_y,
63 const int width, const int height, void* prediction,
64 const ptrdiff_t pred_stride) {
65 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
66 ? kInterRoundBitsHorizontal12bpp
67 : kInterRoundBitsHorizontal;
68 constexpr int kRoundBitsVertical =
69 (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
70 const int intermediate_height =
71 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
72 kScaleSubPixelBits) +
73 kSubPixelTaps;
74 // The output of the horizontal filter, i.e. the intermediate_result, is
75 // guaranteed to fit in int16_t.
76 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
77 (2 * kMaxSuperBlockSizeInPixels + 8)];
78 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
79 const int max_pixel_value = (1 << bitdepth) - 1;
80
81 // Horizontal filter.
82 // Filter types used for width <= 4 are different from those for width > 4.
83 // When width > 4, the valid filter index range is always [0, 3].
84 // When width <= 4, the valid filter index range is always [4, 5].
85 // Similarly for height.
86 int filter_index = GetFilterIndex(horizontal_filter_index, width);
87 int16_t* intermediate = intermediate_result;
88 const auto* src = static_cast<const Pixel*>(reference);
89 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
90 auto* dest = static_cast<Pixel*>(prediction);
91 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
92 const int ref_x = subpixel_x >> kScaleSubPixelBits;
93 // Note: assume the input src is already aligned to the correct start
94 // position.
95 int y = 0;
96 do {
97 int p = subpixel_x;
98 int x = 0;
99 do {
100 int sum = 0;
101 const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
102 const int filter_id = (p >> 6) & kSubPixelMask;
103 for (int k = 0; k < kSubPixelTaps; ++k) {
104 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
105 }
106 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
107 p += step_x;
108 } while (++x < width);
109
110 src += src_stride;
111 intermediate += intermediate_stride;
112 } while (++y < intermediate_height);
113
114 // Vertical filter.
115 filter_index = GetFilterIndex(vertical_filter_index, height);
116 intermediate = intermediate_result;
117 int p = subpixel_y & 1023;
118 y = 0;
119 do {
120 const int filter_id = (p >> 6) & kSubPixelMask;
121 int x = 0;
122 do {
123 int sum = 0;
124 for (int k = 0; k < kSubPixelTaps; ++k) {
125 sum +=
126 kHalfSubPixelFilters[filter_index][filter_id][k] *
127 intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
128 x];
129 }
130 dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
131 max_pixel_value);
132 } while (++x < width);
133
134 dest += dest_stride;
135 p += step_y;
136 } while (++y < height);
137 }
138
139 template <int bitdepth, typename Pixel>
ConvolveCompoundScale2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)140 void ConvolveCompoundScale2D_C(const void* const reference,
141 const ptrdiff_t reference_stride,
142 const int horizontal_filter_index,
143 const int vertical_filter_index,
144 const int subpixel_x, const int subpixel_y,
145 const int step_x, const int step_y,
146 const int width, const int height,
147 void* prediction, const ptrdiff_t pred_stride) {
148 // All compound functions output to the predictor buffer with |pred_stride|
149 // equal to |width|.
150 assert(pred_stride == width);
151 // Compound functions start at 4x4.
152 assert(width >= 4 && height >= 4);
153 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
154 ? kInterRoundBitsHorizontal12bpp
155 : kInterRoundBitsHorizontal;
156 constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
157 const int intermediate_height =
158 (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
159 kScaleSubPixelBits) +
160 kSubPixelTaps;
161 // The output of the horizontal filter, i.e. the intermediate_result, is
162 // guaranteed to fit in int16_t.
163 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
164 (2 * kMaxSuperBlockSizeInPixels + 8)];
165 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
166
167 // Horizontal filter.
168 // Filter types used for width <= 4 are different from those for width > 4.
169 // When width > 4, the valid filter index range is always [0, 3].
170 // When width <= 4, the valid filter index range is always [4, 5].
171 // Similarly for height.
172 int filter_index = GetFilterIndex(horizontal_filter_index, width);
173 int16_t* intermediate = intermediate_result;
174 const auto* src = static_cast<const Pixel*>(reference);
175 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
176 auto* dest = static_cast<uint16_t*>(prediction);
177 const int ref_x = subpixel_x >> kScaleSubPixelBits;
178 // Note: assume the input src is already aligned to the correct start
179 // position.
180 int y = 0;
181 do {
182 int p = subpixel_x;
183 int x = 0;
184 do {
185 int sum = 0;
186 const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
187 const int filter_id = (p >> 6) & kSubPixelMask;
188 for (int k = 0; k < kSubPixelTaps; ++k) {
189 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
190 }
191 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
192 p += step_x;
193 } while (++x < width);
194
195 src += src_stride;
196 intermediate += intermediate_stride;
197 } while (++y < intermediate_height);
198
199 // Vertical filter.
200 filter_index = GetFilterIndex(vertical_filter_index, height);
201 intermediate = intermediate_result;
202 int p = subpixel_y & 1023;
203 y = 0;
204 do {
205 const int filter_id = (p >> 6) & kSubPixelMask;
206 int x = 0;
207 do {
208 int sum = 0;
209 for (int k = 0; k < kSubPixelTaps; ++k) {
210 sum +=
211 kHalfSubPixelFilters[filter_index][filter_id][k] *
212 intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
213 x];
214 }
215 sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
216 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
217 dest[x] = sum;
218 } while (++x < width);
219
220 dest += pred_stride;
221 p += step_y;
222 } while (++y < height);
223 }
224
225 template <int bitdepth, typename Pixel>
ConvolveCompound2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)226 void ConvolveCompound2D_C(const void* const reference,
227 const ptrdiff_t reference_stride,
228 const int horizontal_filter_index,
229 const int vertical_filter_index, const int subpixel_x,
230 const int subpixel_y, const int width,
231 const int height, void* prediction,
232 const ptrdiff_t pred_stride) {
233 // All compound functions output to the predictor buffer with |pred_stride|
234 // equal to |width|.
235 assert(pred_stride == width);
236 // Compound functions start at 4x4.
237 assert(width >= 4 && height >= 4);
238 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
239 ? kInterRoundBitsHorizontal12bpp
240 : kInterRoundBitsHorizontal;
241 constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
242 const int intermediate_height = height + kSubPixelTaps - 1;
243 // The output of the horizontal filter, i.e. the intermediate_result, is
244 // guaranteed to fit in int16_t.
245 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
246 (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
247 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
248
249 // Horizontal filter.
250 // Filter types used for width <= 4 are different from those for width > 4.
251 // When width > 4, the valid filter index range is always [0, 3].
252 // When width <= 4, the valid filter index range is always [4, 5].
253 // Similarly for height.
254 int filter_index = GetFilterIndex(horizontal_filter_index, width);
255 int16_t* intermediate = intermediate_result;
256 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
257 const auto* src = static_cast<const Pixel*>(reference) -
258 kVerticalOffset * src_stride - kHorizontalOffset;
259 auto* dest = static_cast<uint16_t*>(prediction);
260 int filter_id = (subpixel_x >> 6) & kSubPixelMask;
261 // If |filter_id| == 0 then ConvolveVertical() should be called.
262 assert(filter_id != 0);
263 int y = 0;
264 do {
265 int x = 0;
266 do {
267 int sum = 0;
268 for (int k = 0; k < kSubPixelTaps; ++k) {
269 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
270 }
271 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
272 } while (++x < width);
273
274 src += src_stride;
275 intermediate += intermediate_stride;
276 } while (++y < intermediate_height);
277
278 // Vertical filter.
279 filter_index = GetFilterIndex(vertical_filter_index, height);
280 intermediate = intermediate_result;
281 filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
282 // If |filter_id| == 0 then ConvolveHorizontal() should be called.
283 assert(filter_id != 0);
284 y = 0;
285 do {
286 int x = 0;
287 do {
288 int sum = 0;
289 for (int k = 0; k < kSubPixelTaps; ++k) {
290 sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
291 intermediate[k * intermediate_stride + x];
292 }
293 sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
294 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
295 dest[x] = sum;
296 } while (++x < width);
297
298 dest += pred_stride;
299 intermediate += intermediate_stride;
300 } while (++y < height);
301 }
302
303 // This function is a simplified version of ConvolveCompound2D_C.
304 // It is called when it is single prediction mode, where both horizontal and
305 // vertical filtering are required.
306 // The output is the single prediction of the block, clipped to valid pixel
307 // range.
308 template <int bitdepth, typename Pixel>
Convolve2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)309 void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
310 const int horizontal_filter_index,
311 const int vertical_filter_index, const int subpixel_x,
312 const int subpixel_y, const int width, const int height,
313 void* prediction, const ptrdiff_t pred_stride) {
314 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
315 ? kInterRoundBitsHorizontal12bpp
316 : kInterRoundBitsHorizontal;
317 constexpr int kRoundBitsVertical =
318 (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
319 const int intermediate_height = height + kSubPixelTaps - 1;
320 // The output of the horizontal filter, i.e. the intermediate_result, is
321 // guaranteed to fit in int16_t.
322 int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
323 (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
324 const int intermediate_stride = kMaxSuperBlockSizeInPixels;
325 const int max_pixel_value = (1 << bitdepth) - 1;
326
327 // Horizontal filter.
328 // Filter types used for width <= 4 are different from those for width > 4.
329 // When width > 4, the valid filter index range is always [0, 3].
330 // When width <= 4, the valid filter index range is always [4, 5].
331 // Similarly for height.
332 int filter_index = GetFilterIndex(horizontal_filter_index, width);
333 int16_t* intermediate = intermediate_result;
334 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
335 const auto* src = static_cast<const Pixel*>(reference) -
336 kVerticalOffset * src_stride - kHorizontalOffset;
337 auto* dest = static_cast<Pixel*>(prediction);
338 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
339 int filter_id = (subpixel_x >> 6) & kSubPixelMask;
340 // If |filter_id| == 0 then ConvolveVertical() should be called.
341 assert(filter_id != 0);
342 int y = 0;
343 do {
344 int x = 0;
345 do {
346 int sum = 0;
347 for (int k = 0; k < kSubPixelTaps; ++k) {
348 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
349 }
350 intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
351 } while (++x < width);
352
353 src += src_stride;
354 intermediate += intermediate_stride;
355 } while (++y < intermediate_height);
356
357 // Vertical filter.
358 filter_index = GetFilterIndex(vertical_filter_index, height);
359 intermediate = intermediate_result;
360 filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
361 // If |filter_id| == 0 then ConvolveHorizontal() should be called.
362 assert(filter_id != 0);
363 y = 0;
364 do {
365 int x = 0;
366 do {
367 int sum = 0;
368 for (int k = 0; k < kSubPixelTaps; ++k) {
369 sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
370 intermediate[k * intermediate_stride + x];
371 }
372 dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
373 max_pixel_value);
374 } while (++x < width);
375
376 dest += dest_stride;
377 intermediate += intermediate_stride;
378 } while (++y < height);
379 }
380
381 // This function is a simplified version of Convolve2D_C.
382 // It is called when it is single prediction mode, where only horizontal
383 // filtering is required.
384 // The output is the single prediction of the block, clipped to valid pixel
385 // range.
386 template <int bitdepth, typename Pixel>
ConvolveHorizontal_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int subpixel_x,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)387 void ConvolveHorizontal_C(const void* const reference,
388 const ptrdiff_t reference_stride,
389 const int horizontal_filter_index,
390 const int /*vertical_filter_index*/,
391 const int subpixel_x, const int /*subpixel_y*/,
392 const int width, const int height, void* prediction,
393 const ptrdiff_t pred_stride) {
394 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
395 ? kInterRoundBitsHorizontal12bpp
396 : kInterRoundBitsHorizontal;
397 const int filter_index = GetFilterIndex(horizontal_filter_index, width);
398 const int bits = kFilterBits - kRoundBitsHorizontal;
399 const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
400 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
401 auto* dest = static_cast<Pixel*>(prediction);
402 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
403 const int filter_id = (subpixel_x >> 6) & kSubPixelMask;
404 const int max_pixel_value = (1 << bitdepth) - 1;
405 int y = 0;
406 do {
407 int x = 0;
408 do {
409 int sum = 0;
410 for (int k = 0; k < kSubPixelTaps; ++k) {
411 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
412 }
413 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
414 dest[x] = Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value);
415 } while (++x < width);
416
417 src += src_stride;
418 dest += dest_stride;
419 } while (++y < height);
420 }
421
422 // This function is a simplified version of Convolve2D_C.
423 // It is called when it is single prediction mode, where only vertical
424 // filtering is required.
425 // The output is the single prediction of the block, clipped to valid pixel
426 // range.
427 template <int bitdepth, typename Pixel>
ConvolveVertical_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)428 void ConvolveVertical_C(const void* const reference,
429 const ptrdiff_t reference_stride,
430 const int /*horizontal_filter_index*/,
431 const int vertical_filter_index,
432 const int /*subpixel_x*/, const int subpixel_y,
433 const int width, const int height, void* prediction,
434 const ptrdiff_t pred_stride) {
435 const int filter_index = GetFilterIndex(vertical_filter_index, height);
436 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
437 const auto* src =
438 static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
439 auto* dest = static_cast<Pixel*>(prediction);
440 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
441 const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
442 // Copy filters must call ConvolveCopy().
443 assert(filter_id != 0);
444
445 const int max_pixel_value = (1 << bitdepth) - 1;
446 int y = 0;
447 do {
448 int x = 0;
449 do {
450 int sum = 0;
451 for (int k = 0; k < kSubPixelTaps; ++k) {
452 sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
453 src[k * src_stride + x];
454 }
455 dest[x] = Clip3(RightShiftWithRounding(sum, kFilterBits - 1), 0,
456 max_pixel_value);
457 } while (++x < width);
458
459 src += src_stride;
460 dest += dest_stride;
461 } while (++y < height);
462 }
463
464 template <int bitdepth, typename Pixel>
ConvolveCopy_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)465 void ConvolveCopy_C(const void* const reference,
466 const ptrdiff_t reference_stride,
467 const int /*horizontal_filter_index*/,
468 const int /*vertical_filter_index*/,
469 const int /*subpixel_x*/, const int /*subpixel_y*/,
470 const int width, const int height, void* prediction,
471 const ptrdiff_t pred_stride) {
472 const auto* src = static_cast<const uint8_t*>(reference);
473 auto* dest = static_cast<uint8_t*>(prediction);
474 int y = 0;
475 do {
476 memcpy(dest, src, width * sizeof(Pixel));
477 src += reference_stride;
478 dest += pred_stride;
479 } while (++y < height);
480 }
481
482 template <int bitdepth, typename Pixel>
ConvolveCompoundCopy_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)483 void ConvolveCompoundCopy_C(const void* const reference,
484 const ptrdiff_t reference_stride,
485 const int /*horizontal_filter_index*/,
486 const int /*vertical_filter_index*/,
487 const int /*subpixel_x*/, const int /*subpixel_y*/,
488 const int width, const int height, void* prediction,
489 const ptrdiff_t pred_stride) {
490 // All compound functions output to the predictor buffer with |pred_stride|
491 // equal to |width|.
492 assert(pred_stride == width);
493 // Compound functions start at 4x4.
494 assert(width >= 4 && height >= 4);
495 constexpr int kRoundBitsVertical =
496 ((bitdepth == 12) ? kInterRoundBitsVertical12bpp
497 : kInterRoundBitsVertical) -
498 kInterRoundBitsCompoundVertical;
499 const auto* src = static_cast<const Pixel*>(reference);
500 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
501 auto* dest = static_cast<uint16_t*>(prediction);
502 int y = 0;
503 do {
504 int x = 0;
505 do {
506 int sum = (bitdepth == 8) ? 0 : ((1 << bitdepth) + (1 << (bitdepth - 1)));
507 sum += src[x];
508 dest[x] = sum << kRoundBitsVertical;
509 } while (++x < width);
510 src += src_stride;
511 dest += pred_stride;
512 } while (++y < height);
513 }
514
515 // This function is a simplified version of ConvolveCompound2D_C.
516 // It is called when it is compound prediction mode, where only horizontal
517 // filtering is required.
518 // The output is not clipped to valid pixel range. Its output will be
519 // blended with another predictor to generate the final prediction of the block.
520 template <int bitdepth, typename Pixel>
ConvolveCompoundHorizontal_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int subpixel_x,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)521 void ConvolveCompoundHorizontal_C(
522 const void* const reference, const ptrdiff_t reference_stride,
523 const int horizontal_filter_index, const int /*vertical_filter_index*/,
524 const int subpixel_x, const int /*subpixel_y*/, const int width,
525 const int height, void* prediction, const ptrdiff_t pred_stride) {
526 // All compound functions output to the predictor buffer with |pred_stride|
527 // equal to |width|.
528 assert(pred_stride == width);
529 // Compound functions start at 4x4.
530 assert(width >= 4 && height >= 4);
531 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
532 ? kInterRoundBitsHorizontal12bpp
533 : kInterRoundBitsHorizontal;
534 const int filter_index = GetFilterIndex(horizontal_filter_index, width);
535 const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
536 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
537 auto* dest = static_cast<uint16_t*>(prediction);
538 const int filter_id = (subpixel_x >> 6) & kSubPixelMask;
539 // Copy filters must call ConvolveCopy().
540 assert(filter_id != 0);
541 int y = 0;
542 do {
543 int x = 0;
544 do {
545 int sum = 0;
546 for (int k = 0; k < kSubPixelTaps; ++k) {
547 sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
548 }
549 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
550 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
551 dest[x] = sum;
552 } while (++x < width);
553
554 src += src_stride;
555 dest += pred_stride;
556 } while (++y < height);
557 }
558
559 // This function is a simplified version of ConvolveCompound2D_C.
560 // It is called when it is compound prediction mode, where only vertical
561 // filtering is required.
562 // The output is not clipped to valid pixel range. Its output will be
563 // blended with another predictor to generate the final prediction of the block.
564 template <int bitdepth, typename Pixel>
ConvolveCompoundVertical_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)565 void ConvolveCompoundVertical_C(const void* const reference,
566 const ptrdiff_t reference_stride,
567 const int /*horizontal_filter_index*/,
568 const int vertical_filter_index,
569 const int /*subpixel_x*/, const int subpixel_y,
570 const int width, const int height,
571 void* prediction, const ptrdiff_t pred_stride) {
572 // All compound functions output to the predictor buffer with |pred_stride|
573 // equal to |width|.
574 assert(pred_stride == width);
575 // Compound functions start at 4x4.
576 assert(width >= 4 && height >= 4);
577 constexpr int kRoundBitsHorizontal = (bitdepth == 12)
578 ? kInterRoundBitsHorizontal12bpp
579 : kInterRoundBitsHorizontal;
580 const int filter_index = GetFilterIndex(vertical_filter_index, height);
581 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
582 const auto* src =
583 static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
584 auto* dest = static_cast<uint16_t*>(prediction);
585 const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
586 // Copy filters must call ConvolveCopy().
587 assert(filter_id != 0);
588 int y = 0;
589 do {
590 int x = 0;
591 do {
592 int sum = 0;
593 for (int k = 0; k < kSubPixelTaps; ++k) {
594 sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
595 src[k * src_stride + x];
596 }
597 sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
598 sum += (bitdepth == 8) ? 0 : kCompoundOffset;
599 dest[x] = sum;
600 } while (++x < width);
601 src += src_stride;
602 dest += pred_stride;
603 } while (++y < height);
604 }
605
606 // This function is used when intra block copy is present.
607 // It is called when it is single prediction mode for U/V plane, where the
608 // reference block is from current frame and both horizontal and vertical
609 // filtering are required.
610 // The output is the single prediction of the block, clipped to valid pixel
611 // range.
612 template <int bitdepth, typename Pixel>
ConvolveIntraBlockCopy2D_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)613 void ConvolveIntraBlockCopy2D_C(
614 const void* const reference, const ptrdiff_t reference_stride,
615 const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
616 const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
617 const int height, void* prediction, const ptrdiff_t pred_stride) {
618 const auto* src = static_cast<const Pixel*>(reference);
619 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
620 auto* dest = static_cast<Pixel*>(prediction);
621 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
622 const int intermediate_height = height + 1;
623 uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
624 (kMaxSuperBlockSizeInPixels + 1)];
625 uint16_t* intermediate = intermediate_result;
626 // Note: allow vertical access to height + 1. Because this function is only
627 // for u/v plane of intra block copy, such access is guaranteed to be within
628 // the prediction block.
629 int y = 0;
630 do {
631 int x = 0;
632 do {
633 intermediate[x] = src[x] + src[x + 1];
634 } while (++x < width);
635
636 src += src_stride;
637 intermediate += width;
638 } while (++y < intermediate_height);
639
640 intermediate = intermediate_result;
641 y = 0;
642 do {
643 int x = 0;
644 do {
645 dest[x] =
646 RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2);
647 } while (++x < width);
648
649 intermediate += width;
650 dest += dest_stride;
651 } while (++y < height);
652 }
653
654 // This function is used when intra block copy is present.
655 // It is called when it is single prediction mode for U/V plane, where the
656 // reference block is from the current frame and only horizontal or vertical
657 // filtering is required.
658 // The output is the single prediction of the block, clipped to valid pixel
659 // range.
660 // The filtering of intra block copy is simply the average of current and
661 // the next pixel.
662 template <int bitdepth, typename Pixel, bool is_horizontal>
ConvolveIntraBlockCopy1D_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)663 void ConvolveIntraBlockCopy1D_C(
664 const void* const reference, const ptrdiff_t reference_stride,
665 const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
666 const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
667 const int height, void* prediction, const ptrdiff_t pred_stride) {
668 const auto* src = static_cast<const Pixel*>(reference);
669 const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
670 auto* dest = static_cast<Pixel*>(prediction);
671 const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
672 const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
673 int y = 0;
674 do {
675 int x = 0;
676 do {
677 dest[x] = RightShiftWithRounding(src[x] + src[x + offset], 1);
678 } while (++x < width);
679
680 src += src_stride;
681 dest += dest_stride;
682 } while (++y < height);
683 }
684
Init8bpp()685 void Init8bpp() {
686 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
687 assert(dsp != nullptr);
688 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
689 dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
690 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
691 dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
692 dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
693
694 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
695 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
696 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
697 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
698
699 dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
700 dsp->convolve[1][0][0][1] =
701 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
702 dsp->convolve[1][0][1][0] =
703 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
704 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
705
706 dsp->convolve[1][1][0][0] = nullptr;
707 dsp->convolve[1][1][0][1] = nullptr;
708 dsp->convolve[1][1][1][0] = nullptr;
709 dsp->convolve[1][1][1][1] = nullptr;
710
711 dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
712 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
713 #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
714 #ifndef LIBGAV1_Dsp8bpp_ConvolveCopy
715 dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
716 #endif
717 #ifndef LIBGAV1_Dsp8bpp_ConvolveHorizontal
718 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
719 #endif
720 #ifndef LIBGAV1_Dsp8bpp_ConvolveVertical
721 dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
722 #endif
723 #ifndef LIBGAV1_Dsp8bpp_Convolve2D
724 dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
725 #endif
726
727 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundCopy
728 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
729 #endif
730 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundHorizontal
731 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
732 #endif
733 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundVertical
734 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
735 #endif
736 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompound2D
737 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
738 #endif
739
740 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy
741 dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
742 #endif
743 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyHorizontal
744 dsp->convolve[1][0][0][1] =
745 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
746 #endif
747 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyVertical
748 dsp->convolve[1][0][1][0] =
749 ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
750 #endif
751 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy2D
752 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
753 #endif
754
755 dsp->convolve[1][1][0][0] = nullptr;
756 dsp->convolve[1][1][0][1] = nullptr;
757 dsp->convolve[1][1][1][0] = nullptr;
758 dsp->convolve[1][1][1][1] = nullptr;
759
760 #ifndef LIBGAV1_Dsp8bpp_ConvolveScale2D
761 dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
762 #endif
763 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D
764 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
765 #endif
766 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
767 }
768
769 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()770 void Init10bpp() {
771 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
772 assert(dsp != nullptr);
773 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
774 dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
775 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
776 dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
777 dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
778
779 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
780 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
781 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
782 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
783
784 dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
785 dsp->convolve[1][0][0][1] =
786 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
787 dsp->convolve[1][0][1][0] =
788 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
789 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
790
791 dsp->convolve[1][1][0][0] = nullptr;
792 dsp->convolve[1][1][0][1] = nullptr;
793 dsp->convolve[1][1][1][0] = nullptr;
794 dsp->convolve[1][1][1][1] = nullptr;
795
796 dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
797 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
798 #else // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
799 #ifndef LIBGAV1_Dsp10bpp_ConvolveCopy
800 dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
801 #endif
802 #ifndef LIBGAV1_Dsp10bpp_ConvolveHorizontal
803 dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
804 #endif
805 #ifndef LIBGAV1_Dsp10bpp_ConvolveVertical
806 dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
807 #endif
808 #ifndef LIBGAV1_Dsp10bpp_Convolve2D
809 dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
810 #endif
811
812 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundCopy
813 dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
814 #endif
815 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal
816 dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
817 #endif
818 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundVertical
819 dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
820 #endif
821 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompound2D
822 dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
823 #endif
824
825 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy
826 dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
827 #endif
828 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockHorizontal
829 dsp->convolve[1][0][0][1] =
830 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
831 #endif
832 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockVertical
833 dsp->convolve[1][0][1][0] =
834 ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
835 #endif
836 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlock2D
837 dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
838 #endif
839
840 dsp->convolve[1][1][0][0] = nullptr;
841 dsp->convolve[1][1][0][1] = nullptr;
842 dsp->convolve[1][1][1][0] = nullptr;
843 dsp->convolve[1][1][1][1] = nullptr;
844
845 #ifndef LIBGAV1_Dsp10bpp_ConvolveScale2D
846 dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
847 #endif
848 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D
849 dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
850 #endif
851 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
852 }
853 #endif
854
855 } // namespace
856
ConvolveInit_C()857 void ConvolveInit_C() {
858 Init8bpp();
859 #if LIBGAV1_MAX_BITDEPTH >= 10
860 Init10bpp();
861 #endif
862 }
863
864 } // namespace dsp
865 } // namespace libgav1
866