• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/convolve.h"
16 
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22 
23 #include "src/dsp/constants.h"
24 #include "src/dsp/dsp.h"
25 #include "src/utils/common.h"
26 #include "src/utils/constants.h"
27 
28 namespace libgav1 {
29 namespace dsp {
30 namespace {
31 
32 constexpr int kHorizontalOffset = 3;
33 constexpr int kVerticalOffset = 3;
34 
35 // Compound prediction output ranges from ConvolveTest.ShowRange.
36 // Bitdepth:  8 Input range:            [       0,      255]
37 //   intermediate range:                [   -7140,    23460]
38 //   first pass output range:           [   -1785,     5865]
39 //   intermediate range:                [ -328440,   589560]
40 //   second pass output range:          [       0,      255]
41 //   compound second pass output range: [   -5132,     9212]
42 //
43 // Bitdepth: 10 Input range:            [       0,     1023]
44 //   intermediate range:                [  -28644,    94116]
45 //   first pass output range:           [   -7161,    23529]
46 //   intermediate range:                [-1317624,  2365176]
47 //   second pass output range:          [       0,     1023]
48 //   compound second pass output range: [    3988,    61532]
49 //
50 // Bitdepth: 12 Input range:            [       0,     4095]
51 //   intermediate range:                [ -114660,   376740]
52 //   first pass output range:           [   -7166,    23546]
53 //   intermediate range:                [-1318560,  2366880]
54 //   second pass output range:          [       0,     4095]
55 //   compound second pass output range: [    3974,    61559]
56 
57 template <int bitdepth, typename Pixel>
ConvolveScale2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)58 void ConvolveScale2D_C(const void* const reference,
59                        const ptrdiff_t reference_stride,
60                        const int horizontal_filter_index,
61                        const int vertical_filter_index, const int subpixel_x,
62                        const int subpixel_y, const int step_x, const int step_y,
63                        const int width, const int height, void* prediction,
64                        const ptrdiff_t pred_stride) {
65   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
66                                            ? kInterRoundBitsHorizontal12bpp
67                                            : kInterRoundBitsHorizontal;
68   constexpr int kRoundBitsVertical =
69       (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
70   const int intermediate_height =
71       (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
72        kScaleSubPixelBits) +
73       kSubPixelTaps;
74   // The output of the horizontal filter, i.e. the intermediate_result, is
75   // guaranteed to fit in int16_t.
76   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
77                               (2 * kMaxSuperBlockSizeInPixels + 8)];
78   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
79   const int max_pixel_value = (1 << bitdepth) - 1;
80 
81   // Horizontal filter.
82   // Filter types used for width <= 4 are different from those for width > 4.
83   // When width > 4, the valid filter index range is always [0, 3].
84   // When width <= 4, the valid filter index range is always [4, 5].
85   // Similarly for height.
86   int filter_index = GetFilterIndex(horizontal_filter_index, width);
87   int16_t* intermediate = intermediate_result;
88   const auto* src = static_cast<const Pixel*>(reference);
89   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
90   auto* dest = static_cast<Pixel*>(prediction);
91   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
92   const int ref_x = subpixel_x >> kScaleSubPixelBits;
93   // Note: assume the input src is already aligned to the correct start
94   // position.
95   int y = 0;
96   do {
97     int p = subpixel_x;
98     int x = 0;
99     do {
100       int sum = 0;
101       const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
102       const int filter_id = (p >> 6) & kSubPixelMask;
103       for (int k = 0; k < kSubPixelTaps; ++k) {
104         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
105       }
106       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
107       p += step_x;
108     } while (++x < width);
109 
110     src += src_stride;
111     intermediate += intermediate_stride;
112   } while (++y < intermediate_height);
113 
114   // Vertical filter.
115   filter_index = GetFilterIndex(vertical_filter_index, height);
116   intermediate = intermediate_result;
117   int p = subpixel_y & 1023;
118   y = 0;
119   do {
120     const int filter_id = (p >> 6) & kSubPixelMask;
121     int x = 0;
122     do {
123       int sum = 0;
124       for (int k = 0; k < kSubPixelTaps; ++k) {
125         sum +=
126             kHalfSubPixelFilters[filter_index][filter_id][k] *
127             intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
128                          x];
129       }
130       dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
131                       max_pixel_value);
132     } while (++x < width);
133 
134     dest += dest_stride;
135     p += step_y;
136   } while (++y < height);
137 }
138 
139 template <int bitdepth, typename Pixel>
ConvolveCompoundScale2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int step_x,const int step_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)140 void ConvolveCompoundScale2D_C(const void* const reference,
141                                const ptrdiff_t reference_stride,
142                                const int horizontal_filter_index,
143                                const int vertical_filter_index,
144                                const int subpixel_x, const int subpixel_y,
145                                const int step_x, const int step_y,
146                                const int width, const int height,
147                                void* prediction, const ptrdiff_t pred_stride) {
148   // All compound functions output to the predictor buffer with |pred_stride|
149   // equal to |width|.
150   assert(pred_stride == width);
151   // Compound functions start at 4x4.
152   assert(width >= 4 && height >= 4);
153   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
154                                            ? kInterRoundBitsHorizontal12bpp
155                                            : kInterRoundBitsHorizontal;
156   constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
157   const int intermediate_height =
158       (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
159        kScaleSubPixelBits) +
160       kSubPixelTaps;
161   // The output of the horizontal filter, i.e. the intermediate_result, is
162   // guaranteed to fit in int16_t.
163   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
164                               (2 * kMaxSuperBlockSizeInPixels + 8)];
165   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
166 
167   // Horizontal filter.
168   // Filter types used for width <= 4 are different from those for width > 4.
169   // When width > 4, the valid filter index range is always [0, 3].
170   // When width <= 4, the valid filter index range is always [4, 5].
171   // Similarly for height.
172   int filter_index = GetFilterIndex(horizontal_filter_index, width);
173   int16_t* intermediate = intermediate_result;
174   const auto* src = static_cast<const Pixel*>(reference);
175   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
176   auto* dest = static_cast<uint16_t*>(prediction);
177   const int ref_x = subpixel_x >> kScaleSubPixelBits;
178   // Note: assume the input src is already aligned to the correct start
179   // position.
180   int y = 0;
181   do {
182     int p = subpixel_x;
183     int x = 0;
184     do {
185       int sum = 0;
186       const Pixel* src_x = &src[(p >> kScaleSubPixelBits) - ref_x];
187       const int filter_id = (p >> 6) & kSubPixelMask;
188       for (int k = 0; k < kSubPixelTaps; ++k) {
189         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src_x[k];
190       }
191       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
192       p += step_x;
193     } while (++x < width);
194 
195     src += src_stride;
196     intermediate += intermediate_stride;
197   } while (++y < intermediate_height);
198 
199   // Vertical filter.
200   filter_index = GetFilterIndex(vertical_filter_index, height);
201   intermediate = intermediate_result;
202   int p = subpixel_y & 1023;
203   y = 0;
204   do {
205     const int filter_id = (p >> 6) & kSubPixelMask;
206     int x = 0;
207     do {
208       int sum = 0;
209       for (int k = 0; k < kSubPixelTaps; ++k) {
210         sum +=
211             kHalfSubPixelFilters[filter_index][filter_id][k] *
212             intermediate[((p >> kScaleSubPixelBits) + k) * intermediate_stride +
213                          x];
214       }
215       sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
216       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
217       dest[x] = sum;
218     } while (++x < width);
219 
220     dest += pred_stride;
221     p += step_y;
222   } while (++y < height);
223 }
224 
225 template <int bitdepth, typename Pixel>
ConvolveCompound2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)226 void ConvolveCompound2D_C(const void* const reference,
227                           const ptrdiff_t reference_stride,
228                           const int horizontal_filter_index,
229                           const int vertical_filter_index, const int subpixel_x,
230                           const int subpixel_y, const int width,
231                           const int height, void* prediction,
232                           const ptrdiff_t pred_stride) {
233   // All compound functions output to the predictor buffer with |pred_stride|
234   // equal to |width|.
235   assert(pred_stride == width);
236   // Compound functions start at 4x4.
237   assert(width >= 4 && height >= 4);
238   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
239                                            ? kInterRoundBitsHorizontal12bpp
240                                            : kInterRoundBitsHorizontal;
241   constexpr int kRoundBitsVertical = kInterRoundBitsCompoundVertical;
242   const int intermediate_height = height + kSubPixelTaps - 1;
243   // The output of the horizontal filter, i.e. the intermediate_result, is
244   // guaranteed to fit in int16_t.
245   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
246                               (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
247   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
248 
249   // Horizontal filter.
250   // Filter types used for width <= 4 are different from those for width > 4.
251   // When width > 4, the valid filter index range is always [0, 3].
252   // When width <= 4, the valid filter index range is always [4, 5].
253   // Similarly for height.
254   int filter_index = GetFilterIndex(horizontal_filter_index, width);
255   int16_t* intermediate = intermediate_result;
256   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
257   const auto* src = static_cast<const Pixel*>(reference) -
258                     kVerticalOffset * src_stride - kHorizontalOffset;
259   auto* dest = static_cast<uint16_t*>(prediction);
260   int filter_id = (subpixel_x >> 6) & kSubPixelMask;
261   // If |filter_id| == 0 then ConvolveVertical() should be called.
262   assert(filter_id != 0);
263   int y = 0;
264   do {
265     int x = 0;
266     do {
267       int sum = 0;
268       for (int k = 0; k < kSubPixelTaps; ++k) {
269         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
270       }
271       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
272     } while (++x < width);
273 
274     src += src_stride;
275     intermediate += intermediate_stride;
276   } while (++y < intermediate_height);
277 
278   // Vertical filter.
279   filter_index = GetFilterIndex(vertical_filter_index, height);
280   intermediate = intermediate_result;
281   filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
282   // If |filter_id| == 0 then ConvolveHorizontal() should be called.
283   assert(filter_id != 0);
284   y = 0;
285   do {
286     int x = 0;
287     do {
288       int sum = 0;
289       for (int k = 0; k < kSubPixelTaps; ++k) {
290         sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
291                intermediate[k * intermediate_stride + x];
292       }
293       sum = RightShiftWithRounding(sum, kRoundBitsVertical - 1);
294       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
295       dest[x] = sum;
296     } while (++x < width);
297 
298     dest += pred_stride;
299     intermediate += intermediate_stride;
300   } while (++y < height);
301 }
302 
303 // This function is a simplified version of ConvolveCompound2D_C.
304 // It is called when it is single prediction mode, where both horizontal and
305 // vertical filtering are required.
306 // The output is the single prediction of the block, clipped to valid pixel
307 // range.
308 template <int bitdepth, typename Pixel>
Convolve2D_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int vertical_filter_index,const int subpixel_x,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)309 void Convolve2D_C(const void* const reference, const ptrdiff_t reference_stride,
310                   const int horizontal_filter_index,
311                   const int vertical_filter_index, const int subpixel_x,
312                   const int subpixel_y, const int width, const int height,
313                   void* prediction, const ptrdiff_t pred_stride) {
314   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
315                                            ? kInterRoundBitsHorizontal12bpp
316                                            : kInterRoundBitsHorizontal;
317   constexpr int kRoundBitsVertical =
318       (bitdepth == 12) ? kInterRoundBitsVertical12bpp : kInterRoundBitsVertical;
319   const int intermediate_height = height + kSubPixelTaps - 1;
320   // The output of the horizontal filter, i.e. the intermediate_result, is
321   // guaranteed to fit in int16_t.
322   int16_t intermediate_result[kMaxSuperBlockSizeInPixels *
323                               (kMaxSuperBlockSizeInPixels + kSubPixelTaps - 1)];
324   const int intermediate_stride = kMaxSuperBlockSizeInPixels;
325   const int max_pixel_value = (1 << bitdepth) - 1;
326 
327   // Horizontal filter.
328   // Filter types used for width <= 4 are different from those for width > 4.
329   // When width > 4, the valid filter index range is always [0, 3].
330   // When width <= 4, the valid filter index range is always [4, 5].
331   // Similarly for height.
332   int filter_index = GetFilterIndex(horizontal_filter_index, width);
333   int16_t* intermediate = intermediate_result;
334   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
335   const auto* src = static_cast<const Pixel*>(reference) -
336                     kVerticalOffset * src_stride - kHorizontalOffset;
337   auto* dest = static_cast<Pixel*>(prediction);
338   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
339   int filter_id = (subpixel_x >> 6) & kSubPixelMask;
340   // If |filter_id| == 0 then ConvolveVertical() should be called.
341   assert(filter_id != 0);
342   int y = 0;
343   do {
344     int x = 0;
345     do {
346       int sum = 0;
347       for (int k = 0; k < kSubPixelTaps; ++k) {
348         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
349       }
350       intermediate[x] = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
351     } while (++x < width);
352 
353     src += src_stride;
354     intermediate += intermediate_stride;
355   } while (++y < intermediate_height);
356 
357   // Vertical filter.
358   filter_index = GetFilterIndex(vertical_filter_index, height);
359   intermediate = intermediate_result;
360   filter_id = ((subpixel_y & 1023) >> 6) & kSubPixelMask;
361   // If |filter_id| == 0 then ConvolveHorizontal() should be called.
362   assert(filter_id != 0);
363   y = 0;
364   do {
365     int x = 0;
366     do {
367       int sum = 0;
368       for (int k = 0; k < kSubPixelTaps; ++k) {
369         sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
370                intermediate[k * intermediate_stride + x];
371       }
372       dest[x] = Clip3(RightShiftWithRounding(sum, kRoundBitsVertical - 1), 0,
373                       max_pixel_value);
374     } while (++x < width);
375 
376     dest += dest_stride;
377     intermediate += intermediate_stride;
378   } while (++y < height);
379 }
380 
381 // This function is a simplified version of Convolve2D_C.
382 // It is called when it is single prediction mode, where only horizontal
383 // filtering is required.
384 // The output is the single prediction of the block, clipped to valid pixel
385 // range.
386 template <int bitdepth, typename Pixel>
ConvolveHorizontal_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int subpixel_x,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)387 void ConvolveHorizontal_C(const void* const reference,
388                           const ptrdiff_t reference_stride,
389                           const int horizontal_filter_index,
390                           const int /*vertical_filter_index*/,
391                           const int subpixel_x, const int /*subpixel_y*/,
392                           const int width, const int height, void* prediction,
393                           const ptrdiff_t pred_stride) {
394   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
395                                            ? kInterRoundBitsHorizontal12bpp
396                                            : kInterRoundBitsHorizontal;
397   const int filter_index = GetFilterIndex(horizontal_filter_index, width);
398   const int bits = kFilterBits - kRoundBitsHorizontal;
399   const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
400   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
401   auto* dest = static_cast<Pixel*>(prediction);
402   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
403   const int filter_id = (subpixel_x >> 6) & kSubPixelMask;
404   const int max_pixel_value = (1 << bitdepth) - 1;
405   int y = 0;
406   do {
407     int x = 0;
408     do {
409       int sum = 0;
410       for (int k = 0; k < kSubPixelTaps; ++k) {
411         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
412       }
413       sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
414       dest[x] = Clip3(RightShiftWithRounding(sum, bits), 0, max_pixel_value);
415     } while (++x < width);
416 
417     src += src_stride;
418     dest += dest_stride;
419   } while (++y < height);
420 }
421 
422 // This function is a simplified version of Convolve2D_C.
423 // It is called when it is single prediction mode, where only vertical
424 // filtering is required.
425 // The output is the single prediction of the block, clipped to valid pixel
426 // range.
427 template <int bitdepth, typename Pixel>
ConvolveVertical_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)428 void ConvolveVertical_C(const void* const reference,
429                         const ptrdiff_t reference_stride,
430                         const int /*horizontal_filter_index*/,
431                         const int vertical_filter_index,
432                         const int /*subpixel_x*/, const int subpixel_y,
433                         const int width, const int height, void* prediction,
434                         const ptrdiff_t pred_stride) {
435   const int filter_index = GetFilterIndex(vertical_filter_index, height);
436   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
437   const auto* src =
438       static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
439   auto* dest = static_cast<Pixel*>(prediction);
440   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
441   const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
442   // Copy filters must call ConvolveCopy().
443   assert(filter_id != 0);
444 
445   const int max_pixel_value = (1 << bitdepth) - 1;
446   int y = 0;
447   do {
448     int x = 0;
449     do {
450       int sum = 0;
451       for (int k = 0; k < kSubPixelTaps; ++k) {
452         sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
453                src[k * src_stride + x];
454       }
455       dest[x] = Clip3(RightShiftWithRounding(sum, kFilterBits - 1), 0,
456                       max_pixel_value);
457     } while (++x < width);
458 
459     src += src_stride;
460     dest += dest_stride;
461   } while (++y < height);
462 }
463 
464 template <int bitdepth, typename Pixel>
ConvolveCopy_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)465 void ConvolveCopy_C(const void* const reference,
466                     const ptrdiff_t reference_stride,
467                     const int /*horizontal_filter_index*/,
468                     const int /*vertical_filter_index*/,
469                     const int /*subpixel_x*/, const int /*subpixel_y*/,
470                     const int width, const int height, void* prediction,
471                     const ptrdiff_t pred_stride) {
472   const auto* src = static_cast<const uint8_t*>(reference);
473   auto* dest = static_cast<uint8_t*>(prediction);
474   int y = 0;
475   do {
476     memcpy(dest, src, width * sizeof(Pixel));
477     src += reference_stride;
478     dest += pred_stride;
479   } while (++y < height);
480 }
481 
482 template <int bitdepth, typename Pixel>
ConvolveCompoundCopy_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)483 void ConvolveCompoundCopy_C(const void* const reference,
484                             const ptrdiff_t reference_stride,
485                             const int /*horizontal_filter_index*/,
486                             const int /*vertical_filter_index*/,
487                             const int /*subpixel_x*/, const int /*subpixel_y*/,
488                             const int width, const int height, void* prediction,
489                             const ptrdiff_t pred_stride) {
490   // All compound functions output to the predictor buffer with |pred_stride|
491   // equal to |width|.
492   assert(pred_stride == width);
493   // Compound functions start at 4x4.
494   assert(width >= 4 && height >= 4);
495   constexpr int kRoundBitsVertical =
496       ((bitdepth == 12) ? kInterRoundBitsVertical12bpp
497                         : kInterRoundBitsVertical) -
498       kInterRoundBitsCompoundVertical;
499   const auto* src = static_cast<const Pixel*>(reference);
500   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
501   auto* dest = static_cast<uint16_t*>(prediction);
502   int y = 0;
503   do {
504     int x = 0;
505     do {
506       int sum = (bitdepth == 8) ? 0 : ((1 << bitdepth) + (1 << (bitdepth - 1)));
507       sum += src[x];
508       dest[x] = sum << kRoundBitsVertical;
509     } while (++x < width);
510     src += src_stride;
511     dest += pred_stride;
512   } while (++y < height);
513 }
514 
515 // This function is a simplified version of ConvolveCompound2D_C.
516 // It is called when it is compound prediction mode, where only horizontal
517 // filtering is required.
518 // The output is not clipped to valid pixel range. Its output will be
519 // blended with another predictor to generate the final prediction of the block.
520 template <int bitdepth, typename Pixel>
ConvolveCompoundHorizontal_C(const void * const reference,const ptrdiff_t reference_stride,const int horizontal_filter_index,const int,const int subpixel_x,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)521 void ConvolveCompoundHorizontal_C(
522     const void* const reference, const ptrdiff_t reference_stride,
523     const int horizontal_filter_index, const int /*vertical_filter_index*/,
524     const int subpixel_x, const int /*subpixel_y*/, const int width,
525     const int height, void* prediction, const ptrdiff_t pred_stride) {
526   // All compound functions output to the predictor buffer with |pred_stride|
527   // equal to |width|.
528   assert(pred_stride == width);
529   // Compound functions start at 4x4.
530   assert(width >= 4 && height >= 4);
531   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
532                                            ? kInterRoundBitsHorizontal12bpp
533                                            : kInterRoundBitsHorizontal;
534   const int filter_index = GetFilterIndex(horizontal_filter_index, width);
535   const auto* src = static_cast<const Pixel*>(reference) - kHorizontalOffset;
536   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
537   auto* dest = static_cast<uint16_t*>(prediction);
538   const int filter_id = (subpixel_x >> 6) & kSubPixelMask;
539   // Copy filters must call ConvolveCopy().
540   assert(filter_id != 0);
541   int y = 0;
542   do {
543     int x = 0;
544     do {
545       int sum = 0;
546       for (int k = 0; k < kSubPixelTaps; ++k) {
547         sum += kHalfSubPixelFilters[filter_index][filter_id][k] * src[x + k];
548       }
549       sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
550       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
551       dest[x] = sum;
552     } while (++x < width);
553 
554     src += src_stride;
555     dest += pred_stride;
556   } while (++y < height);
557 }
558 
559 // This function is a simplified version of ConvolveCompound2D_C.
560 // It is called when it is compound prediction mode, where only vertical
561 // filtering is required.
562 // The output is not clipped to valid pixel range. Its output will be
563 // blended with another predictor to generate the final prediction of the block.
564 template <int bitdepth, typename Pixel>
ConvolveCompoundVertical_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int vertical_filter_index,const int,const int subpixel_y,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)565 void ConvolveCompoundVertical_C(const void* const reference,
566                                 const ptrdiff_t reference_stride,
567                                 const int /*horizontal_filter_index*/,
568                                 const int vertical_filter_index,
569                                 const int /*subpixel_x*/, const int subpixel_y,
570                                 const int width, const int height,
571                                 void* prediction, const ptrdiff_t pred_stride) {
572   // All compound functions output to the predictor buffer with |pred_stride|
573   // equal to |width|.
574   assert(pred_stride == width);
575   // Compound functions start at 4x4.
576   assert(width >= 4 && height >= 4);
577   constexpr int kRoundBitsHorizontal = (bitdepth == 12)
578                                            ? kInterRoundBitsHorizontal12bpp
579                                            : kInterRoundBitsHorizontal;
580   const int filter_index = GetFilterIndex(vertical_filter_index, height);
581   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
582   const auto* src =
583       static_cast<const Pixel*>(reference) - kVerticalOffset * src_stride;
584   auto* dest = static_cast<uint16_t*>(prediction);
585   const int filter_id = (subpixel_y >> 6) & kSubPixelMask;
586   // Copy filters must call ConvolveCopy().
587   assert(filter_id != 0);
588   int y = 0;
589   do {
590     int x = 0;
591     do {
592       int sum = 0;
593       for (int k = 0; k < kSubPixelTaps; ++k) {
594         sum += kHalfSubPixelFilters[filter_index][filter_id][k] *
595                src[k * src_stride + x];
596       }
597       sum = RightShiftWithRounding(sum, kRoundBitsHorizontal - 1);
598       sum += (bitdepth == 8) ? 0 : kCompoundOffset;
599       dest[x] = sum;
600     } while (++x < width);
601     src += src_stride;
602     dest += pred_stride;
603   } while (++y < height);
604 }
605 
606 // This function is used when intra block copy is present.
607 // It is called when it is single prediction mode for U/V plane, where the
608 // reference block is from current frame and both horizontal and vertical
609 // filtering are required.
610 // The output is the single prediction of the block, clipped to valid pixel
611 // range.
612 template <int bitdepth, typename Pixel>
ConvolveIntraBlockCopy2D_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)613 void ConvolveIntraBlockCopy2D_C(
614     const void* const reference, const ptrdiff_t reference_stride,
615     const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
616     const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
617     const int height, void* prediction, const ptrdiff_t pred_stride) {
618   const auto* src = static_cast<const Pixel*>(reference);
619   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
620   auto* dest = static_cast<Pixel*>(prediction);
621   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
622   const int intermediate_height = height + 1;
623   uint16_t intermediate_result[kMaxSuperBlockSizeInPixels *
624                                (kMaxSuperBlockSizeInPixels + 1)];
625   uint16_t* intermediate = intermediate_result;
626   // Note: allow vertical access to height + 1. Because this function is only
627   // for u/v plane of intra block copy, such access is guaranteed to be within
628   // the prediction block.
629   int y = 0;
630   do {
631     int x = 0;
632     do {
633       intermediate[x] = src[x] + src[x + 1];
634     } while (++x < width);
635 
636     src += src_stride;
637     intermediate += width;
638   } while (++y < intermediate_height);
639 
640   intermediate = intermediate_result;
641   y = 0;
642   do {
643     int x = 0;
644     do {
645       dest[x] =
646           RightShiftWithRounding(intermediate[x] + intermediate[x + width], 2);
647     } while (++x < width);
648 
649     intermediate += width;
650     dest += dest_stride;
651   } while (++y < height);
652 }
653 
654 // This function is used when intra block copy is present.
655 // It is called when it is single prediction mode for U/V plane, where the
656 // reference block is from the current frame and only horizontal or vertical
657 // filtering is required.
658 // The output is the single prediction of the block, clipped to valid pixel
659 // range.
660 // The filtering of intra block copy is simply the average of current and
661 // the next pixel.
662 template <int bitdepth, typename Pixel, bool is_horizontal>
ConvolveIntraBlockCopy1D_C(const void * const reference,const ptrdiff_t reference_stride,const int,const int,const int,const int,const int width,const int height,void * prediction,const ptrdiff_t pred_stride)663 void ConvolveIntraBlockCopy1D_C(
664     const void* const reference, const ptrdiff_t reference_stride,
665     const int /*horizontal_filter_index*/, const int /*vertical_filter_index*/,
666     const int /*subpixel_x*/, const int /*subpixel_y*/, const int width,
667     const int height, void* prediction, const ptrdiff_t pred_stride) {
668   const auto* src = static_cast<const Pixel*>(reference);
669   const ptrdiff_t src_stride = reference_stride / sizeof(Pixel);
670   auto* dest = static_cast<Pixel*>(prediction);
671   const ptrdiff_t dest_stride = pred_stride / sizeof(Pixel);
672   const ptrdiff_t offset = is_horizontal ? 1 : src_stride;
673   int y = 0;
674   do {
675     int x = 0;
676     do {
677       dest[x] = RightShiftWithRounding(src[x] + src[x + offset], 1);
678     } while (++x < width);
679 
680     src += src_stride;
681     dest += dest_stride;
682   } while (++y < height);
683 }
684 
Init8bpp()685 void Init8bpp() {
686   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
687   assert(dsp != nullptr);
688 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
689   dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
690   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
691   dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
692   dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
693 
694   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
695   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
696   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
697   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
698 
699   dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
700   dsp->convolve[1][0][0][1] =
701       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
702   dsp->convolve[1][0][1][0] =
703       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
704   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
705 
706   dsp->convolve[1][1][0][0] = nullptr;
707   dsp->convolve[1][1][0][1] = nullptr;
708   dsp->convolve[1][1][1][0] = nullptr;
709   dsp->convolve[1][1][1][1] = nullptr;
710 
711   dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
712   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
713 #else  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
714 #ifndef LIBGAV1_Dsp8bpp_ConvolveCopy
715   dsp->convolve[0][0][0][0] = ConvolveCopy_C<8, uint8_t>;
716 #endif
717 #ifndef LIBGAV1_Dsp8bpp_ConvolveHorizontal
718   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<8, uint8_t>;
719 #endif
720 #ifndef LIBGAV1_Dsp8bpp_ConvolveVertical
721   dsp->convolve[0][0][1][0] = ConvolveVertical_C<8, uint8_t>;
722 #endif
723 #ifndef LIBGAV1_Dsp8bpp_Convolve2D
724   dsp->convolve[0][0][1][1] = Convolve2D_C<8, uint8_t>;
725 #endif
726 
727 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundCopy
728   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<8, uint8_t>;
729 #endif
730 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundHorizontal
731   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<8, uint8_t>;
732 #endif
733 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundVertical
734   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<8, uint8_t>;
735 #endif
736 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompound2D
737   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<8, uint8_t>;
738 #endif
739 
740 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy
741   dsp->convolve[1][0][0][0] = ConvolveCopy_C<8, uint8_t>;
742 #endif
743 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyHorizontal
744   dsp->convolve[1][0][0][1] =
745       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/true>;
746 #endif
747 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopyVertical
748   dsp->convolve[1][0][1][0] =
749       ConvolveIntraBlockCopy1D_C<8, uint8_t, /*is_horizontal=*/false>;
750 #endif
751 #ifndef LIBGAV1_Dsp8bpp_ConvolveIntraBlockCopy2D
752   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<8, uint8_t>;
753 #endif
754 
755   dsp->convolve[1][1][0][0] = nullptr;
756   dsp->convolve[1][1][0][1] = nullptr;
757   dsp->convolve[1][1][1][0] = nullptr;
758   dsp->convolve[1][1][1][1] = nullptr;
759 
760 #ifndef LIBGAV1_Dsp8bpp_ConvolveScale2D
761   dsp->convolve_scale[0] = ConvolveScale2D_C<8, uint8_t>;
762 #endif
763 #ifndef LIBGAV1_Dsp8bpp_ConvolveCompoundScale2D
764   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<8, uint8_t>;
765 #endif
766 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
767 }
768 
769 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()770 void Init10bpp() {
771   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
772   assert(dsp != nullptr);
773 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
774   dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
775   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
776   dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
777   dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
778 
779   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
780   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
781   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
782   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
783 
784   dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
785   dsp->convolve[1][0][0][1] =
786       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
787   dsp->convolve[1][0][1][0] =
788       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
789   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
790 
791   dsp->convolve[1][1][0][0] = nullptr;
792   dsp->convolve[1][1][0][1] = nullptr;
793   dsp->convolve[1][1][1][0] = nullptr;
794   dsp->convolve[1][1][1][1] = nullptr;
795 
796   dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
797   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
798 #else  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
799 #ifndef LIBGAV1_Dsp10bpp_ConvolveCopy
800   dsp->convolve[0][0][0][0] = ConvolveCopy_C<10, uint16_t>;
801 #endif
802 #ifndef LIBGAV1_Dsp10bpp_ConvolveHorizontal
803   dsp->convolve[0][0][0][1] = ConvolveHorizontal_C<10, uint16_t>;
804 #endif
805 #ifndef LIBGAV1_Dsp10bpp_ConvolveVertical
806   dsp->convolve[0][0][1][0] = ConvolveVertical_C<10, uint16_t>;
807 #endif
808 #ifndef LIBGAV1_Dsp10bpp_Convolve2D
809   dsp->convolve[0][0][1][1] = Convolve2D_C<10, uint16_t>;
810 #endif
811 
812 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundCopy
813   dsp->convolve[0][1][0][0] = ConvolveCompoundCopy_C<10, uint16_t>;
814 #endif
815 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundHorizontal
816   dsp->convolve[0][1][0][1] = ConvolveCompoundHorizontal_C<10, uint16_t>;
817 #endif
818 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundVertical
819   dsp->convolve[0][1][1][0] = ConvolveCompoundVertical_C<10, uint16_t>;
820 #endif
821 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompound2D
822   dsp->convolve[0][1][1][1] = ConvolveCompound2D_C<10, uint16_t>;
823 #endif
824 
825 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockCopy
826   dsp->convolve[1][0][0][0] = ConvolveCopy_C<10, uint16_t>;
827 #endif
828 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockHorizontal
829   dsp->convolve[1][0][0][1] =
830       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/true>;
831 #endif
832 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlockVertical
833   dsp->convolve[1][0][1][0] =
834       ConvolveIntraBlockCopy1D_C<10, uint16_t, /*is_horizontal=*/false>;
835 #endif
836 #ifndef LIBGAV1_Dsp10bpp_ConvolveIntraBlock2D
837   dsp->convolve[1][0][1][1] = ConvolveIntraBlockCopy2D_C<10, uint16_t>;
838 #endif
839 
840   dsp->convolve[1][1][0][0] = nullptr;
841   dsp->convolve[1][1][0][1] = nullptr;
842   dsp->convolve[1][1][1][0] = nullptr;
843   dsp->convolve[1][1][1][1] = nullptr;
844 
845 #ifndef LIBGAV1_Dsp10bpp_ConvolveScale2D
846   dsp->convolve_scale[0] = ConvolveScale2D_C<10, uint16_t>;
847 #endif
848 #ifndef LIBGAV1_Dsp10bpp_ConvolveCompoundScale2D
849   dsp->convolve_scale[1] = ConvolveCompoundScale2D_C<10, uint16_t>;
850 #endif
851 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
852 }
853 #endif
854 
855 }  // namespace
856 
ConvolveInit_C()857 void ConvolveInit_C() {
858   Init8bpp();
859 #if LIBGAV1_MAX_BITDEPTH >= 10
860   Init10bpp();
861 #endif
862 }
863 
864 }  // namespace dsp
865 }  // namespace libgav1
866