1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 #ifndef VPX_DSP_X86_CONVOLVE_H_ 11 #define VPX_DSP_X86_CONVOLVE_H_ 12 13 #include <assert.h> 14 15 #include "./vpx_config.h" 16 #include "vpx/vpx_integer.h" 17 #include "vpx_ports/mem.h" 18 19 typedef void filter8_1dfunction ( 20 const uint8_t *src_ptr, 21 ptrdiff_t src_pitch, 22 uint8_t *output_ptr, 23 ptrdiff_t out_pitch, 24 uint32_t output_height, 25 const int16_t *filter 26 ); 27 28 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 29 void vpx_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ 30 uint8_t *dst, ptrdiff_t dst_stride, \ 31 const int16_t *filter_x, int x_step_q4, \ 32 const int16_t *filter_y, int y_step_q4, \ 33 int w, int h) { \ 34 assert(filter[3] != 128); \ 35 assert(step_q4 == 16); \ 36 if (filter[0] || filter[1] || filter[2]) { \ 37 while (w >= 16) { \ 38 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, \ 39 src_stride, \ 40 dst, \ 41 dst_stride, \ 42 h, \ 43 filter); \ 44 src += 16; \ 45 dst += 16; \ 46 w -= 16; \ 47 } \ 48 while (w >= 8) { \ 49 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, \ 50 src_stride, \ 51 dst, \ 52 dst_stride, \ 53 h, \ 54 filter); \ 55 src += 8; \ 56 dst += 8; \ 57 w -= 8; \ 58 } \ 59 while (w >= 4) { \ 60 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, \ 61 src_stride, \ 62 dst, \ 63 dst_stride, \ 64 h, \ 65 filter); \ 66 src += 4; \ 67 dst += 4; \ 68 w -= 4; \ 69 } \ 70 } else { \ 71 while (w >= 16) { \ 72 vpx_filter_block1d16_##dir##2_##avg##opt(src, \ 73 src_stride, \ 74 dst, \ 75 dst_stride, \ 76 h, \ 77 filter); \ 78 src += 16; \ 79 dst += 16; \ 80 w -= 16; \ 81 } \ 82 while (w >= 8) { \ 83 vpx_filter_block1d8_##dir##2_##avg##opt(src, \ 84 src_stride, \ 85 dst, \ 86 dst_stride, \ 87 h, \ 88 filter); \ 89 src += 8; \ 90 dst += 8; \ 91 w -= 8; \ 92 } \ 93 while (w >= 4) { \ 94 vpx_filter_block1d4_##dir##2_##avg##opt(src, \ 95 src_stride, \ 96 dst, \ 97 dst_stride, \ 98 h, \ 99 filter); \ 100 src += 4; \ 101 dst += 4; \ 102 w -= 4; \ 103 } \ 104 } \ 105 } 106 107 #define FUN_CONV_2D(avg, opt) \ 108 void vpx_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ 109 uint8_t *dst, ptrdiff_t dst_stride, \ 110 const int16_t *filter_x, int x_step_q4, \ 111 const int16_t *filter_y, int y_step_q4, \ 112 int w, int h) { \ 113 assert(filter_x[3] != 128); \ 114 assert(filter_y[3] != 128); \ 115 assert(w <= 64); \ 116 assert(h <= 64); \ 117 assert(x_step_q4 == 16); \ 118 assert(y_step_q4 == 16); \ 119 if (filter_x[0] || filter_x[1] || filter_x[2]|| \ 120 filter_y[0] || filter_y[1] || filter_y[2]) { \ 121 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ 122 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ 123 filter_x, x_step_q4, filter_y, y_step_q4, \ 124 w, h + 7); \ 125 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ 126 filter_x, x_step_q4, filter_y, \ 127 y_step_q4, w, h); \ 128 } else { \ 129 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ 130 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ 131 filter_x, x_step_q4, filter_y, y_step_q4, \ 132 w, h + 1); \ 133 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ 134 filter_x, x_step_q4, filter_y, \ 135 y_step_q4, w, h); \ 136 } \ 137 } 138 139 #if CONFIG_VP9_HIGHBITDEPTH 140 141 typedef void highbd_filter8_1dfunction ( 142 const uint16_t *src_ptr, 143 const ptrdiff_t src_pitch, 144 uint16_t *output_ptr, 145 ptrdiff_t out_pitch, 146 unsigned int output_height, 147 const int16_t *filter, 148 int bd 149 ); 150 151 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 152 void vpx_highbd_convolve8_##name##_##opt(const uint8_t *src8, \ 153 ptrdiff_t src_stride, \ 154 uint8_t *dst8, \ 155 ptrdiff_t dst_stride, \ 156 const int16_t *filter_x, \ 157 int x_step_q4, \ 158 const int16_t *filter_y, \ 159 int y_step_q4, \ 160 int w, int h, int bd) { \ 161 if (step_q4 == 16 && filter[3] != 128) { \ 162 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ 163 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ 164 if (filter[0] || filter[1] || filter[2]) { \ 165 while (w >= 16) { \ 166 vpx_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \ 167 src_stride, \ 168 dst, \ 169 dst_stride, \ 170 h, \ 171 filter, \ 172 bd); \ 173 src += 16; \ 174 dst += 16; \ 175 w -= 16; \ 176 } \ 177 while (w >= 8) { \ 178 vpx_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \ 179 src_stride, \ 180 dst, \ 181 dst_stride, \ 182 h, \ 183 filter, \ 184 bd); \ 185 src += 8; \ 186 dst += 8; \ 187 w -= 8; \ 188 } \ 189 while (w >= 4) { \ 190 vpx_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \ 191 src_stride, \ 192 dst, \ 193 dst_stride, \ 194 h, \ 195 filter, \ 196 bd); \ 197 src += 4; \ 198 dst += 4; \ 199 w -= 4; \ 200 } \ 201 } else { \ 202 while (w >= 16) { \ 203 vpx_highbd_filter_block1d16_##dir##2_##avg##opt(src, \ 204 src_stride, \ 205 dst, \ 206 dst_stride, \ 207 h, \ 208 filter, \ 209 bd); \ 210 src += 16; \ 211 dst += 16; \ 212 w -= 16; \ 213 } \ 214 while (w >= 8) { \ 215 vpx_highbd_filter_block1d8_##dir##2_##avg##opt(src, \ 216 src_stride, \ 217 dst, \ 218 dst_stride, \ 219 h, \ 220 filter, \ 221 bd); \ 222 src += 8; \ 223 dst += 8; \ 224 w -= 8; \ 225 } \ 226 while (w >= 4) { \ 227 vpx_highbd_filter_block1d4_##dir##2_##avg##opt(src, \ 228 src_stride, \ 229 dst, \ 230 dst_stride, \ 231 h, \ 232 filter, \ 233 bd); \ 234 src += 4; \ 235 dst += 4; \ 236 w -= 4; \ 237 } \ 238 } \ 239 } \ 240 if (w) { \ 241 vpx_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ 242 filter_x, x_step_q4, filter_y, y_step_q4, \ 243 w, h, bd); \ 244 } \ 245 } 246 247 #define HIGH_FUN_CONV_2D(avg, opt) \ 248 void vpx_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ 249 uint8_t *dst, ptrdiff_t dst_stride, \ 250 const int16_t *filter_x, int x_step_q4, \ 251 const int16_t *filter_y, int y_step_q4, \ 252 int w, int h, int bd) { \ 253 assert(w <= 64); \ 254 assert(h <= 64); \ 255 if (x_step_q4 == 16 && y_step_q4 == 16) { \ 256 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ 257 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ 258 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ 259 vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ 260 CONVERT_TO_BYTEPTR(fdata2), 64, \ 261 filter_x, x_step_q4, \ 262 filter_y, y_step_q4, \ 263 w, h + 7, bd); \ 264 vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ 265 64, dst, dst_stride, \ 266 filter_x, x_step_q4, \ 267 filter_y, y_step_q4, \ 268 w, h, bd); \ 269 } else { \ 270 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ 271 vpx_highbd_convolve8_horiz_##opt(src, src_stride, \ 272 CONVERT_TO_BYTEPTR(fdata2), 64, \ 273 filter_x, x_step_q4, \ 274 filter_y, y_step_q4, \ 275 w, h + 1, bd); \ 276 vpx_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ 277 dst, dst_stride, \ 278 filter_x, x_step_q4, \ 279 filter_y, y_step_q4, \ 280 w, h, bd); \ 281 } \ 282 } else { \ 283 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ 284 filter_x, x_step_q4, filter_y, y_step_q4, w, \ 285 h, bd); \ 286 } \ 287 } 288 #endif // CONFIG_VP9_HIGHBITDEPTH 289 290 #endif // VPX_DSP_X86_CONVOLVE_H_ 291