• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 
13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_convolve.h"
17 #include "vp9/common/vp9_filter.h"
18 #include "vpx/vpx_integer.h"
19 #include "vpx_ports/mem.h"
20 
convolve_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x0,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h,int taps)21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
22                              uint8_t *dst, ptrdiff_t dst_stride,
23                              const int16_t *filter_x0, int x_step_q4,
24                              const int16_t *filter_y, int y_step_q4,
25                              int w, int h, int taps) {
26   int x, y, k;
27 
28   /* NOTE: This assumes that the filter table is 256-byte aligned. */
29   /* TODO(agrange) Modify to make independent of table alignment. */
30   const int16_t *const filter_x_base =
31       (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
32 
33   /* Adjust base pointer address for this source line */
34   src -= taps / 2 - 1;
35 
36   for (y = 0; y < h; ++y) {
37     /* Initial phase offset */
38     int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
39 
40     for (x = 0; x < w; ++x) {
41       /* Per-pixel src offset */
42       const int src_x = x_q4 >> SUBPEL_BITS;
43       int sum = 0;
44 
45       /* Pointer to filter to use */
46       const int16_t *const filter_x = filter_x_base +
47           (x_q4 & SUBPEL_MASK) * taps;
48 
49       for (k = 0; k < taps; ++k)
50         sum += src[src_x + k] * filter_x[k];
51 
52       dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
53 
54       /* Move to the next source pixel */
55       x_q4 += x_step_q4;
56     }
57     src += src_stride;
58     dst += dst_stride;
59   }
60 }
61 
convolve_avg_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x0,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h,int taps)62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
63                                  uint8_t *dst, ptrdiff_t dst_stride,
64                                  const int16_t *filter_x0, int x_step_q4,
65                                  const int16_t *filter_y, int y_step_q4,
66                                  int w, int h, int taps) {
67   int x, y, k;
68 
69   /* NOTE: This assumes that the filter table is 256-byte aligned. */
70   /* TODO(agrange) Modify to make independent of table alignment. */
71   const int16_t *const filter_x_base =
72       (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
73 
74   /* Adjust base pointer address for this source line */
75   src -= taps / 2 - 1;
76 
77   for (y = 0; y < h; ++y) {
78     /* Initial phase offset */
79     int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
80 
81     for (x = 0; x < w; ++x) {
82       /* Per-pixel src offset */
83       const int src_x = x_q4 >> SUBPEL_BITS;
84       int sum = 0;
85 
86       /* Pointer to filter to use */
87       const int16_t *const filter_x = filter_x_base +
88           (x_q4 & SUBPEL_MASK) * taps;
89 
90       for (k = 0; k < taps; ++k)
91         sum += src[src_x + k] * filter_x[k];
92 
93       dst[x] = ROUND_POWER_OF_TWO(dst[x] +
94                    clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
95 
96       /* Move to the next source pixel */
97       x_q4 += x_step_q4;
98     }
99     src += src_stride;
100     dst += dst_stride;
101   }
102 }
103 
convolve_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y0,int y_step_q4,int w,int h,int taps)104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
105                             uint8_t *dst, ptrdiff_t dst_stride,
106                             const int16_t *filter_x, int x_step_q4,
107                             const int16_t *filter_y0, int y_step_q4,
108                             int w, int h, int taps) {
109   int x, y, k;
110 
111   /* NOTE: This assumes that the filter table is 256-byte aligned. */
112   /* TODO(agrange) Modify to make independent of table alignment. */
113   const int16_t *const filter_y_base =
114       (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
115 
116   /* Adjust base pointer address for this source column */
117   src -= src_stride * (taps / 2 - 1);
118 
119   for (x = 0; x < w; ++x) {
120     /* Initial phase offset */
121     int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
122 
123     for (y = 0; y < h; ++y) {
124       /* Per-pixel src offset */
125       const int src_y = y_q4 >> SUBPEL_BITS;
126       int sum = 0;
127 
128       /* Pointer to filter to use */
129       const int16_t *const filter_y = filter_y_base +
130           (y_q4 & SUBPEL_MASK) * taps;
131 
132       for (k = 0; k < taps; ++k)
133         sum += src[(src_y + k) * src_stride] * filter_y[k];
134 
135       dst[y * dst_stride] =
136           clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
137 
138       /* Move to the next source pixel */
139       y_q4 += y_step_q4;
140     }
141     ++src;
142     ++dst;
143   }
144 }
145 
convolve_avg_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y0,int y_step_q4,int w,int h,int taps)146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
147                                 uint8_t *dst, ptrdiff_t dst_stride,
148                                 const int16_t *filter_x, int x_step_q4,
149                                 const int16_t *filter_y0, int y_step_q4,
150                                 int w, int h, int taps) {
151   int x, y, k;
152 
153   /* NOTE: This assumes that the filter table is 256-byte aligned. */
154   /* TODO(agrange) Modify to make independent of table alignment. */
155   const int16_t *const filter_y_base =
156       (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
157 
158   /* Adjust base pointer address for this source column */
159   src -= src_stride * (taps / 2 - 1);
160 
161   for (x = 0; x < w; ++x) {
162     /* Initial phase offset */
163     int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
164 
165     for (y = 0; y < h; ++y) {
166       /* Per-pixel src offset */
167       const int src_y = y_q4 >> SUBPEL_BITS;
168       int sum = 0;
169 
170       /* Pointer to filter to use */
171       const int16_t *const filter_y = filter_y_base +
172           (y_q4 & SUBPEL_MASK) * taps;
173 
174       for (k = 0; k < taps; ++k)
175         sum += src[(src_y + k) * src_stride] * filter_y[k];
176 
177       dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
178            clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
179 
180       /* Move to the next source pixel */
181       y_q4 += y_step_q4;
182     }
183     ++src;
184     ++dst;
185   }
186 }
187 
convolve_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h,int taps)188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
189                        uint8_t *dst, ptrdiff_t dst_stride,
190                        const int16_t *filter_x, int x_step_q4,
191                        const int16_t *filter_y, int y_step_q4,
192                        int w, int h, int taps) {
193   /* Fixed size intermediate buffer places limits on parameters.
194    * Maximum intermediate_height is 324, for y_step_q4 == 80,
195    * h == 64, taps == 8.
196    * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
197    */
198   uint8_t temp[64 * 324];
199   int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
200 
201   assert(w <= 64);
202   assert(h <= 64);
203   assert(taps <= 8);
204   assert(y_step_q4 <= 80);
205   assert(x_step_q4 <= 80);
206 
207   if (intermediate_height < h)
208     intermediate_height = h;
209 
210   convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
211                    filter_x, x_step_q4, filter_y, y_step_q4, w,
212                    intermediate_height, taps);
213   convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
214                   x_step_q4, filter_y, y_step_q4, w, h, taps);
215 }
216 
vp9_convolve8_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
218                            uint8_t *dst, ptrdiff_t dst_stride,
219                            const int16_t *filter_x, int x_step_q4,
220                            const int16_t *filter_y, int y_step_q4,
221                            int w, int h) {
222   convolve_horiz_c(src, src_stride, dst, dst_stride,
223                    filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
224 }
225 
vp9_convolve8_avg_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
227                                uint8_t *dst, ptrdiff_t dst_stride,
228                                const int16_t *filter_x, int x_step_q4,
229                                const int16_t *filter_y, int y_step_q4,
230                                int w, int h) {
231   convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
232                        filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
233 }
234 
vp9_convolve8_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
236                           uint8_t *dst, ptrdiff_t dst_stride,
237                           const int16_t *filter_x, int x_step_q4,
238                           const int16_t *filter_y, int y_step_q4,
239                           int w, int h) {
240   convolve_vert_c(src, src_stride, dst, dst_stride,
241                   filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
242 }
243 
vp9_convolve8_avg_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
245                               uint8_t *dst, ptrdiff_t dst_stride,
246                               const int16_t *filter_x, int x_step_q4,
247                               const int16_t *filter_y, int y_step_q4,
248                               int w, int h) {
249   convolve_avg_vert_c(src, src_stride, dst, dst_stride,
250                       filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
251 }
252 
vp9_convolve8_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
254                      uint8_t *dst, ptrdiff_t dst_stride,
255                      const int16_t *filter_x, int x_step_q4,
256                      const int16_t *filter_y, int y_step_q4,
257                      int w, int h) {
258   convolve_c(src, src_stride, dst, dst_stride,
259              filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
260 }
261 
vp9_convolve8_avg_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
263                          uint8_t *dst, ptrdiff_t dst_stride,
264                          const int16_t *filter_x, int x_step_q4,
265                          const int16_t *filter_y, int y_step_q4,
266                          int w, int h) {
267   /* Fixed size intermediate buffer places limits on parameters. */
268   DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
269   assert(w <= 64);
270   assert(h <= 64);
271 
272   vp9_convolve8(src, src_stride, temp, 64,
273                filter_x, x_step_q4, filter_y, y_step_q4, w, h);
274   vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
275 }
276 
vp9_convolve_copy_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int filter_x_stride,const int16_t * filter_y,int filter_y_stride,int w,int h)277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
278                          uint8_t *dst, ptrdiff_t dst_stride,
279                          const int16_t *filter_x, int filter_x_stride,
280                          const int16_t *filter_y, int filter_y_stride,
281                          int w, int h) {
282   int r;
283 
284   for (r = h; r > 0; --r) {
285     vpx_memcpy(dst, src, w);
286     src += src_stride;
287     dst += dst_stride;
288   }
289 }
290 
vp9_convolve_avg_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int filter_x_stride,const int16_t * filter_y,int filter_y_stride,int w,int h)291 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
292                         uint8_t *dst, ptrdiff_t dst_stride,
293                         const int16_t *filter_x, int filter_x_stride,
294                         const int16_t *filter_y, int filter_y_stride,
295                         int w, int h) {
296   int x, y;
297 
298   for (y = 0; y < h; ++y) {
299     for (x = 0; x < w; ++x)
300       dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
301 
302     src += src_stride;
303     dst += dst_stride;
304   }
305 }
306