• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #ifndef VPX_DSP_X86_CONVOLVE_H_
11 #define VPX_DSP_X86_CONVOLVE_H_
12 
13 #include <assert.h>
14 
15 #include "./vpx_config.h"
16 #include "vpx/vpx_integer.h"
17 #include "vpx_ports/mem.h"
18 
19 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
20                                 uint8_t *output_ptr, ptrdiff_t out_pitch,
21                                 uint32_t output_height, const int16_t *filter);
22 
23 #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt)         \
24   void vpx_convolve8_##name##_##opt(                                         \
25       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
26       ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4,    \
27       int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {               \
28     const int16_t *filter = filter_kernel[offset];                           \
29     (void)x0_q4;                                                             \
30     (void)x_step_q4;                                                         \
31     (void)y0_q4;                                                             \
32     (void)y_step_q4;                                                         \
33     assert(filter[3] != 128);                                                \
34     assert(step_q4 == 16);                                                   \
35     if (filter[0] | filter[1] | filter[2]) {                                 \
36       while (w >= 16) {                                                      \
37         vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \
38                                                  dst_stride, h, filter);     \
39         src += 16;                                                           \
40         dst += 16;                                                           \
41         w -= 16;                                                             \
42       }                                                                      \
43       if (w == 8) {                                                          \
44         vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst,  \
45                                                 dst_stride, h, filter);      \
46       } else if (w == 4) {                                                   \
47         vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst,  \
48                                                 dst_stride, h, filter);      \
49       }                                                                      \
50     } else {                                                                 \
51       while (w >= 16) {                                                      \
52         vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst,       \
53                                                  dst_stride, h, filter);     \
54         src += 16;                                                           \
55         dst += 16;                                                           \
56         w -= 16;                                                             \
57       }                                                                      \
58       if (w == 8) {                                                          \
59         vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst,        \
60                                                 dst_stride, h, filter);      \
61       } else if (w == 4) {                                                   \
62         vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst,        \
63                                                 dst_stride, h, filter);      \
64       }                                                                      \
65     }                                                                        \
66   }
67 
68 #define FUN_CONV_2D(avg, opt)                                                  \
69   void vpx_convolve8_##avg##opt(                                               \
70       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                  \
71       ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4,             \
72       int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {                 \
73     const int16_t *filter_x = filter[x0_q4];                                   \
74     const int16_t *filter_y = filter[y0_q4];                                   \
75     (void)filter_y;                                                            \
76     assert(filter_x[3] != 128);                                                \
77     assert(filter_y[3] != 128);                                                \
78     assert(w <= 64);                                                           \
79     assert(h <= 64);                                                           \
80     assert(x_step_q4 == 16);                                                   \
81     assert(y_step_q4 == 16);                                                   \
82     if (filter_x[0] | filter_x[1] | filter_x[2]) {                             \
83       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]);                           \
84       vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64,  \
85                                 filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
86                                 h + 7);                                        \
87       vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride,    \
88                                       filter, x0_q4, x_step_q4, y0_q4,         \
89                                       y_step_q4, w, h);                        \
90     } else {                                                                   \
91       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]);                           \
92       vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4,    \
93                                 x_step_q4, y0_q4, y_step_q4, w, h + 1);        \
94       vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter,     \
95                                       x0_q4, x_step_q4, y0_q4, y_step_q4, w,   \
96                                       h);                                      \
97     }                                                                          \
98   }
99 
100 #if CONFIG_VP9_HIGHBITDEPTH
101 
102 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
103                                        const ptrdiff_t src_pitch,
104                                        uint16_t *output_ptr,
105                                        ptrdiff_t out_pitch,
106                                        unsigned int output_height,
107                                        const int16_t *filter, int bd);
108 
109 #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt)     \
110   void vpx_highbd_convolve8_##name##_##opt(                                   \
111       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,               \
112       ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4,     \
113       int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {        \
114     const int16_t *filter = filter_kernel[offset];                            \
115     if (step_q4 == 16 && filter[3] != 128) {                                  \
116       if (filter[0] | filter[1] | filter[2]) {                                \
117         while (w >= 16) {                                                     \
118           vpx_highbd_filter_block1d16_##dir##8_##avg##opt(                    \
119               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
120           src += 16;                                                          \
121           dst += 16;                                                          \
122           w -= 16;                                                            \
123         }                                                                     \
124         while (w >= 8) {                                                      \
125           vpx_highbd_filter_block1d8_##dir##8_##avg##opt(                     \
126               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
127           src += 8;                                                           \
128           dst += 8;                                                           \
129           w -= 8;                                                             \
130         }                                                                     \
131         while (w >= 4) {                                                      \
132           vpx_highbd_filter_block1d4_##dir##8_##avg##opt(                     \
133               src_start, src_stride, dst, dst_stride, h, filter, bd);         \
134           src += 4;                                                           \
135           dst += 4;                                                           \
136           w -= 4;                                                             \
137         }                                                                     \
138       } else {                                                                \
139         while (w >= 16) {                                                     \
140           vpx_highbd_filter_block1d16_##dir##2_##avg##opt(                    \
141               src, src_stride, dst, dst_stride, h, filter, bd);               \
142           src += 16;                                                          \
143           dst += 16;                                                          \
144           w -= 16;                                                            \
145         }                                                                     \
146         while (w >= 8) {                                                      \
147           vpx_highbd_filter_block1d8_##dir##2_##avg##opt(                     \
148               src, src_stride, dst, dst_stride, h, filter, bd);               \
149           src += 8;                                                           \
150           dst += 8;                                                           \
151           w -= 8;                                                             \
152         }                                                                     \
153         while (w >= 4) {                                                      \
154           vpx_highbd_filter_block1d4_##dir##2_##avg##opt(                     \
155               src, src_stride, dst, dst_stride, h, filter, bd);               \
156           src += 4;                                                           \
157           dst += 4;                                                           \
158           w -= 4;                                                             \
159         }                                                                     \
160       }                                                                       \
161     }                                                                         \
162     if (w) {                                                                  \
163       vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride,       \
164                                       filter_kernel, x0_q4, x_step_q4, y0_q4, \
165                                       y_step_q4, w, h, bd);                   \
166     }                                                                         \
167   }
168 
169 #define HIGH_FUN_CONV_2D(avg, opt)                                             \
170   void vpx_highbd_convolve8_##avg##opt(                                        \
171       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,                \
172       ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4,             \
173       int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {         \
174     const int16_t *filter_x = filter[x0_q4];                                   \
175     assert(w <= 64);                                                           \
176     assert(h <= 64);                                                           \
177     if (x_step_q4 == 16 && y_step_q4 == 16) {                                  \
178       if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {   \
179         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                        \
180         vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride,     \
181                                          fdata2, 64, filter, x0_q4, x_step_q4, \
182                                          y0_q4, y_step_q4, w, h + 7, bd);      \
183         vpx_highbd_convolve8_##avg##vert_##opt(                                \
184             fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4,       \
185             y0_q4, y_step_q4, w, h, bd);                                       \
186       } else {                                                                 \
187         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]);                        \
188         vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter,  \
189                                          x0_q4, x_step_q4, y0_q4, y_step_q4,   \
190                                          w, h + 1, bd);                        \
191         vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride,    \
192                                                filter, x0_q4, x_step_q4,       \
193                                                y0_q4, y_step_q4, w, h, bd);    \
194       }                                                                        \
195     } else {                                                                   \
196       vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter,  \
197                                     x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,  \
198                                     bd);                                       \
199     }                                                                          \
200   }
201 #endif  // CONFIG_VP9_HIGHBITDEPTH
202 
203 #endif  // VPX_DSP_X86_CONVOLVE_H_
204