• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #ifndef VPX_DSP_X86_CONVOLVE_H_
11 #define VPX_DSP_X86_CONVOLVE_H_
12 
13 #include <assert.h>
14 
15 #include "./vpx_config.h"
16 #include "vpx/vpx_integer.h"
17 #include "vpx_ports/mem.h"
18 
19 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch,
20                                 uint8_t *output_ptr, ptrdiff_t out_pitch,
21                                 uint32_t output_height, const int16_t *filter);
22 
23 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt)         \
24   void vpx_convolve8_##name##_##opt(                                         \
25       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                \
26       ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,          \
27       const int16_t *filter_y, int y_step_q4, int w, int h) {                \
28     (void)filter_x;                                                          \
29     (void)x_step_q4;                                                         \
30     (void)filter_y;                                                          \
31     (void)y_step_q4;                                                         \
32     assert(filter[3] != 128);                                                \
33     assert(step_q4 == 16);                                                   \
34     if (filter[0] | filter[1] | filter[2]) {                                 \
35       while (w >= 16) {                                                      \
36         vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \
37                                                  dst_stride, h, filter);     \
38         src += 16;                                                           \
39         dst += 16;                                                           \
40         w -= 16;                                                             \
41       }                                                                      \
42       if (w == 8) {                                                          \
43         vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst,  \
44                                                 dst_stride, h, filter);      \
45       } else if (w == 4) {                                                   \
46         vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst,  \
47                                                 dst_stride, h, filter);      \
48       }                                                                      \
49     } else {                                                                 \
50       while (w >= 16) {                                                      \
51         vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst,       \
52                                                  dst_stride, h, filter);     \
53         src += 16;                                                           \
54         dst += 16;                                                           \
55         w -= 16;                                                             \
56       }                                                                      \
57       if (w == 8) {                                                          \
58         vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst,        \
59                                                 dst_stride, h, filter);      \
60       } else if (w == 4) {                                                   \
61         vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst,        \
62                                                 dst_stride, h, filter);      \
63       }                                                                      \
64     }                                                                        \
65   }
66 
67 #define FUN_CONV_2D(avg, opt)                                                 \
68   void vpx_convolve8_##avg##opt(                                              \
69       const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,                 \
70       ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,           \
71       const int16_t *filter_y, int y_step_q4, int w, int h) {                 \
72     assert(filter_x[3] != 128);                                               \
73     assert(filter_y[3] != 128);                                               \
74     assert(w <= 64);                                                          \
75     assert(h <= 64);                                                          \
76     assert(x_step_q4 == 16);                                                  \
77     assert(y_step_q4 == 16);                                                  \
78     if (filter_x[0] | filter_x[1] | filter_x[2]) {                            \
79       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]);                          \
80       vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
81                                 filter_x, x_step_q4, filter_y, y_step_q4, w,  \
82                                 h + 7);                                       \
83       vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride,   \
84                                       filter_x, x_step_q4, filter_y,          \
85                                       y_step_q4, w, h);                       \
86     } else {                                                                  \
87       DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]);                          \
88       vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter_x,        \
89                                 x_step_q4, filter_y, y_step_q4, w, h + 1);    \
90       vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter_x,  \
91                                       x_step_q4, filter_y, y_step_q4, w, h);  \
92     }                                                                         \
93   }
94 
95 #if CONFIG_VP9_HIGHBITDEPTH
96 
97 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr,
98                                        const ptrdiff_t src_pitch,
99                                        uint16_t *output_ptr,
100                                        ptrdiff_t out_pitch,
101                                        unsigned int output_height,
102                                        const int16_t *filter, int bd);
103 
104 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
105   void vpx_highbd_convolve8_##name##_##opt(                               \
106       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,           \
107       ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,       \
108       const int16_t *filter_y, int y_step_q4, int w, int h, int bd) {     \
109     if (step_q4 == 16 && filter[3] != 128) {                              \
110       if (filter[0] | filter[1] | filter[2]) {                            \
111         while (w >= 16) {                                                 \
112           vpx_highbd_filter_block1d16_##dir##8_##avg##opt(                \
113               src_start, src_stride, dst, dst_stride, h, filter, bd);     \
114           src += 16;                                                      \
115           dst += 16;                                                      \
116           w -= 16;                                                        \
117         }                                                                 \
118         while (w >= 8) {                                                  \
119           vpx_highbd_filter_block1d8_##dir##8_##avg##opt(                 \
120               src_start, src_stride, dst, dst_stride, h, filter, bd);     \
121           src += 8;                                                       \
122           dst += 8;                                                       \
123           w -= 8;                                                         \
124         }                                                                 \
125         while (w >= 4) {                                                  \
126           vpx_highbd_filter_block1d4_##dir##8_##avg##opt(                 \
127               src_start, src_stride, dst, dst_stride, h, filter, bd);     \
128           src += 4;                                                       \
129           dst += 4;                                                       \
130           w -= 4;                                                         \
131         }                                                                 \
132       } else {                                                            \
133         while (w >= 16) {                                                 \
134           vpx_highbd_filter_block1d16_##dir##2_##avg##opt(                \
135               src, src_stride, dst, dst_stride, h, filter, bd);           \
136           src += 16;                                                      \
137           dst += 16;                                                      \
138           w -= 16;                                                        \
139         }                                                                 \
140         while (w >= 8) {                                                  \
141           vpx_highbd_filter_block1d8_##dir##2_##avg##opt(                 \
142               src, src_stride, dst, dst_stride, h, filter, bd);           \
143           src += 8;                                                       \
144           dst += 8;                                                       \
145           w -= 8;                                                         \
146         }                                                                 \
147         while (w >= 4) {                                                  \
148           vpx_highbd_filter_block1d4_##dir##2_##avg##opt(                 \
149               src, src_stride, dst, dst_stride, h, filter, bd);           \
150           src += 4;                                                       \
151           dst += 4;                                                       \
152           w -= 4;                                                         \
153         }                                                                 \
154       }                                                                   \
155     }                                                                     \
156     if (w) {                                                              \
157       vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride,   \
158                                       filter_x, x_step_q4, filter_y,      \
159                                       y_step_q4, w, h, bd);               \
160     }                                                                     \
161   }
162 
163 #define HIGH_FUN_CONV_2D(avg, opt)                                            \
164   void vpx_highbd_convolve8_##avg##opt(                                       \
165       const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst,               \
166       ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4,           \
167       const int16_t *filter_y, int y_step_q4, int w, int h, int bd) {         \
168     assert(w <= 64);                                                          \
169     assert(h <= 64);                                                          \
170     if (x_step_q4 == 16 && y_step_q4 == 16) {                                 \
171       if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {  \
172         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]);                       \
173         vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride,    \
174                                          fdata2, 64, filter_x, x_step_q4,     \
175                                          filter_y, y_step_q4, w, h + 7, bd);  \
176         vpx_highbd_convolve8_##avg##vert_##opt(                               \
177             fdata2 + 192, 64, dst, dst_stride, filter_x, x_step_q4, filter_y, \
178             y_step_q4, w, h, bd);                                             \
179       } else {                                                                \
180         DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]);                       \
181         vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64,         \
182                                          filter_x, x_step_q4, filter_y,       \
183                                          y_step_q4, w, h + 1, bd);            \
184         vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride,   \
185                                                filter_x, x_step_q4, filter_y, \
186                                                y_step_q4, w, h, bd);          \
187       }                                                                       \
188     } else {                                                                  \
189       vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride,         \
190                                     filter_x, x_step_q4, filter_y, y_step_q4, \
191                                     w, h, bd);                                \
192     }                                                                         \
193   }
194 #endif  // CONFIG_VP9_HIGHBITDEPTH
195 
196 #endif  // VPX_DSP_X86_CONVOLVE_H_
197