1 /*
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12
13 #include "./vpx_config.h"
14 #include "./vp9_rtcd.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_convolve.h"
17 #include "vp9/common/vp9_filter.h"
18 #include "vpx/vpx_integer.h"
19 #include "vpx_ports/mem.h"
20
convolve_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x0,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h,int taps)21 static void convolve_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
22 uint8_t *dst, ptrdiff_t dst_stride,
23 const int16_t *filter_x0, int x_step_q4,
24 const int16_t *filter_y, int y_step_q4,
25 int w, int h, int taps) {
26 int x, y, k;
27
28 /* NOTE: This assumes that the filter table is 256-byte aligned. */
29 /* TODO(agrange) Modify to make independent of table alignment. */
30 const int16_t *const filter_x_base =
31 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
32
33 /* Adjust base pointer address for this source line */
34 src -= taps / 2 - 1;
35
36 for (y = 0; y < h; ++y) {
37 /* Initial phase offset */
38 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
39
40 for (x = 0; x < w; ++x) {
41 /* Per-pixel src offset */
42 const int src_x = x_q4 >> SUBPEL_BITS;
43 int sum = 0;
44
45 /* Pointer to filter to use */
46 const int16_t *const filter_x = filter_x_base +
47 (x_q4 & SUBPEL_MASK) * taps;
48
49 for (k = 0; k < taps; ++k)
50 sum += src[src_x + k] * filter_x[k];
51
52 dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
53
54 /* Move to the next source pixel */
55 x_q4 += x_step_q4;
56 }
57 src += src_stride;
58 dst += dst_stride;
59 }
60 }
61
convolve_avg_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x0,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h,int taps)62 static void convolve_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
63 uint8_t *dst, ptrdiff_t dst_stride,
64 const int16_t *filter_x0, int x_step_q4,
65 const int16_t *filter_y, int y_step_q4,
66 int w, int h, int taps) {
67 int x, y, k;
68
69 /* NOTE: This assumes that the filter table is 256-byte aligned. */
70 /* TODO(agrange) Modify to make independent of table alignment. */
71 const int16_t *const filter_x_base =
72 (const int16_t *)(((intptr_t)filter_x0) & ~(intptr_t)0xff);
73
74 /* Adjust base pointer address for this source line */
75 src -= taps / 2 - 1;
76
77 for (y = 0; y < h; ++y) {
78 /* Initial phase offset */
79 int x_q4 = (int)(filter_x0 - filter_x_base) / taps;
80
81 for (x = 0; x < w; ++x) {
82 /* Per-pixel src offset */
83 const int src_x = x_q4 >> SUBPEL_BITS;
84 int sum = 0;
85
86 /* Pointer to filter to use */
87 const int16_t *const filter_x = filter_x_base +
88 (x_q4 & SUBPEL_MASK) * taps;
89
90 for (k = 0; k < taps; ++k)
91 sum += src[src_x + k] * filter_x[k];
92
93 dst[x] = ROUND_POWER_OF_TWO(dst[x] +
94 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
95
96 /* Move to the next source pixel */
97 x_q4 += x_step_q4;
98 }
99 src += src_stride;
100 dst += dst_stride;
101 }
102 }
103
convolve_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y0,int y_step_q4,int w,int h,int taps)104 static void convolve_vert_c(const uint8_t *src, ptrdiff_t src_stride,
105 uint8_t *dst, ptrdiff_t dst_stride,
106 const int16_t *filter_x, int x_step_q4,
107 const int16_t *filter_y0, int y_step_q4,
108 int w, int h, int taps) {
109 int x, y, k;
110
111 /* NOTE: This assumes that the filter table is 256-byte aligned. */
112 /* TODO(agrange) Modify to make independent of table alignment. */
113 const int16_t *const filter_y_base =
114 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
115
116 /* Adjust base pointer address for this source column */
117 src -= src_stride * (taps / 2 - 1);
118
119 for (x = 0; x < w; ++x) {
120 /* Initial phase offset */
121 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
122
123 for (y = 0; y < h; ++y) {
124 /* Per-pixel src offset */
125 const int src_y = y_q4 >> SUBPEL_BITS;
126 int sum = 0;
127
128 /* Pointer to filter to use */
129 const int16_t *const filter_y = filter_y_base +
130 (y_q4 & SUBPEL_MASK) * taps;
131
132 for (k = 0; k < taps; ++k)
133 sum += src[(src_y + k) * src_stride] * filter_y[k];
134
135 dst[y * dst_stride] =
136 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
137
138 /* Move to the next source pixel */
139 y_q4 += y_step_q4;
140 }
141 ++src;
142 ++dst;
143 }
144 }
145
convolve_avg_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y0,int y_step_q4,int w,int h,int taps)146 static void convolve_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
147 uint8_t *dst, ptrdiff_t dst_stride,
148 const int16_t *filter_x, int x_step_q4,
149 const int16_t *filter_y0, int y_step_q4,
150 int w, int h, int taps) {
151 int x, y, k;
152
153 /* NOTE: This assumes that the filter table is 256-byte aligned. */
154 /* TODO(agrange) Modify to make independent of table alignment. */
155 const int16_t *const filter_y_base =
156 (const int16_t *)(((intptr_t)filter_y0) & ~(intptr_t)0xff);
157
158 /* Adjust base pointer address for this source column */
159 src -= src_stride * (taps / 2 - 1);
160
161 for (x = 0; x < w; ++x) {
162 /* Initial phase offset */
163 int y_q4 = (int)(filter_y0 - filter_y_base) / taps;
164
165 for (y = 0; y < h; ++y) {
166 /* Per-pixel src offset */
167 const int src_y = y_q4 >> SUBPEL_BITS;
168 int sum = 0;
169
170 /* Pointer to filter to use */
171 const int16_t *const filter_y = filter_y_base +
172 (y_q4 & SUBPEL_MASK) * taps;
173
174 for (k = 0; k < taps; ++k)
175 sum += src[(src_y + k) * src_stride] * filter_y[k];
176
177 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] +
178 clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
179
180 /* Move to the next source pixel */
181 y_q4 += y_step_q4;
182 }
183 ++src;
184 ++dst;
185 }
186 }
187
convolve_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h,int taps)188 static void convolve_c(const uint8_t *src, ptrdiff_t src_stride,
189 uint8_t *dst, ptrdiff_t dst_stride,
190 const int16_t *filter_x, int x_step_q4,
191 const int16_t *filter_y, int y_step_q4,
192 int w, int h, int taps) {
193 /* Fixed size intermediate buffer places limits on parameters.
194 * Maximum intermediate_height is 324, for y_step_q4 == 80,
195 * h == 64, taps == 8.
196 * y_step_q4 of 80 allows for 1/10 scale for 5 layer svc
197 */
198 uint8_t temp[64 * 324];
199 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + taps;
200
201 assert(w <= 64);
202 assert(h <= 64);
203 assert(taps <= 8);
204 assert(y_step_q4 <= 80);
205 assert(x_step_q4 <= 80);
206
207 if (intermediate_height < h)
208 intermediate_height = h;
209
210 convolve_horiz_c(src - src_stride * (taps / 2 - 1), src_stride, temp, 64,
211 filter_x, x_step_q4, filter_y, y_step_q4, w,
212 intermediate_height, taps);
213 convolve_vert_c(temp + 64 * (taps / 2 - 1), 64, dst, dst_stride, filter_x,
214 x_step_q4, filter_y, y_step_q4, w, h, taps);
215 }
216
vp9_convolve8_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)217 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
218 uint8_t *dst, ptrdiff_t dst_stride,
219 const int16_t *filter_x, int x_step_q4,
220 const int16_t *filter_y, int y_step_q4,
221 int w, int h) {
222 convolve_horiz_c(src, src_stride, dst, dst_stride,
223 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
224 }
225
vp9_convolve8_avg_horiz_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)226 void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
227 uint8_t *dst, ptrdiff_t dst_stride,
228 const int16_t *filter_x, int x_step_q4,
229 const int16_t *filter_y, int y_step_q4,
230 int w, int h) {
231 convolve_avg_horiz_c(src, src_stride, dst, dst_stride,
232 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
233 }
234
vp9_convolve8_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)235 void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
236 uint8_t *dst, ptrdiff_t dst_stride,
237 const int16_t *filter_x, int x_step_q4,
238 const int16_t *filter_y, int y_step_q4,
239 int w, int h) {
240 convolve_vert_c(src, src_stride, dst, dst_stride,
241 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
242 }
243
vp9_convolve8_avg_vert_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)244 void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
245 uint8_t *dst, ptrdiff_t dst_stride,
246 const int16_t *filter_x, int x_step_q4,
247 const int16_t *filter_y, int y_step_q4,
248 int w, int h) {
249 convolve_avg_vert_c(src, src_stride, dst, dst_stride,
250 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
251 }
252
vp9_convolve8_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)253 void vp9_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
254 uint8_t *dst, ptrdiff_t dst_stride,
255 const int16_t *filter_x, int x_step_q4,
256 const int16_t *filter_y, int y_step_q4,
257 int w, int h) {
258 convolve_c(src, src_stride, dst, dst_stride,
259 filter_x, x_step_q4, filter_y, y_step_q4, w, h, 8);
260 }
261
vp9_convolve8_avg_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int x_step_q4,const int16_t * filter_y,int y_step_q4,int w,int h)262 void vp9_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride,
263 uint8_t *dst, ptrdiff_t dst_stride,
264 const int16_t *filter_x, int x_step_q4,
265 const int16_t *filter_y, int y_step_q4,
266 int w, int h) {
267 /* Fixed size intermediate buffer places limits on parameters. */
268 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp, 64 * 64);
269 assert(w <= 64);
270 assert(h <= 64);
271
272 vp9_convolve8(src, src_stride, temp, 64,
273 filter_x, x_step_q4, filter_y, y_step_q4, w, h);
274 vp9_convolve_avg(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
275 }
276
vp9_convolve_copy_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int filter_x_stride,const int16_t * filter_y,int filter_y_stride,int w,int h)277 void vp9_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride,
278 uint8_t *dst, ptrdiff_t dst_stride,
279 const int16_t *filter_x, int filter_x_stride,
280 const int16_t *filter_y, int filter_y_stride,
281 int w, int h) {
282 int r;
283
284 for (r = h; r > 0; --r) {
285 vpx_memcpy(dst, src, w);
286 src += src_stride;
287 dst += dst_stride;
288 }
289 }
290
vp9_convolve_avg_c(const uint8_t * src,ptrdiff_t src_stride,uint8_t * dst,ptrdiff_t dst_stride,const int16_t * filter_x,int filter_x_stride,const int16_t * filter_y,int filter_y_stride,int w,int h)291 void vp9_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride,
292 uint8_t *dst, ptrdiff_t dst_stride,
293 const int16_t *filter_x, int filter_x_stride,
294 const int16_t *filter_y, int filter_y_stride,
295 int w, int h) {
296 int x, y;
297
298 for (y = 0; y < h; ++y) {
299 for (x = 0; x < w; ++x)
300 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
301
302 src += src_stride;
303 dst += dst_stride;
304 }
305 }
306