1 /* 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 13 #include "./vpx_config.h" 14 #include "./vp9_rtcd.h" 15 #include "vpx_ports/mem.h" 16 17 typedef void filter8_1dfunction ( 18 const unsigned char *src_ptr, 19 const ptrdiff_t src_pitch, 20 unsigned char *output_ptr, 21 ptrdiff_t out_pitch, 22 unsigned int output_height, 23 const short *filter 24 ); 25 26 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 27 void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \ 28 uint8_t *dst, ptrdiff_t dst_stride, \ 29 const int16_t *filter_x, int x_step_q4, \ 30 const int16_t *filter_y, int y_step_q4, \ 31 int w, int h) { \ 32 if (step_q4 == 16 && filter[3] != 128) { \ 33 if (filter[0] || filter[1] || filter[2]) { \ 34 while (w >= 16) { \ 35 vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \ 36 src_stride, \ 37 dst, \ 38 dst_stride, \ 39 h, \ 40 filter); \ 41 src += 16; \ 42 dst += 16; \ 43 w -= 16; \ 44 } \ 45 while (w >= 8) { \ 46 vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \ 47 src_stride, \ 48 dst, \ 49 dst_stride, \ 50 h, \ 51 filter); \ 52 src += 8; \ 53 dst += 8; \ 54 w -= 8; \ 55 } \ 56 while (w >= 4) { \ 57 vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \ 58 src_stride, \ 59 dst, \ 60 dst_stride, \ 61 h, \ 62 filter); \ 63 src += 4; \ 64 dst += 4; \ 65 w -= 4; \ 66 } \ 67 } else { \ 68 while (w >= 16) { \ 69 vp9_filter_block1d16_##dir##2_##avg##opt(src, \ 70 src_stride, \ 71 dst, \ 72 dst_stride, \ 73 h, \ 74 filter); \ 75 src += 16; \ 76 dst += 16; \ 77 w -= 16; \ 78 } \ 79 while (w >= 8) { \ 80 vp9_filter_block1d8_##dir##2_##avg##opt(src, \ 81 src_stride, \ 82 dst, \ 83 dst_stride, \ 84 h, \ 85 filter); \ 86 src += 8; \ 87 dst += 8; \ 88 w -= 8; \ 89 } \ 90 while (w >= 4) { \ 91 vp9_filter_block1d4_##dir##2_##avg##opt(src, \ 92 src_stride, \ 93 dst, \ 94 dst_stride, \ 95 h, \ 96 filter); \ 97 src += 4; \ 98 dst += 4; \ 99 w -= 4; \ 100 } \ 101 } \ 102 } \ 103 if (w) { \ 104 vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ 105 filter_x, x_step_q4, filter_y, y_step_q4, \ 106 w, h); \ 107 } \ 108 } 109 110 #define FUN_CONV_2D(avg, opt) \ 111 void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ 112 uint8_t *dst, ptrdiff_t dst_stride, \ 113 const int16_t *filter_x, int x_step_q4, \ 114 const int16_t *filter_y, int y_step_q4, \ 115 int w, int h) { \ 116 assert(w <= 64); \ 117 assert(h <= 64); \ 118 if (x_step_q4 == 16 && y_step_q4 == 16) { \ 119 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ 120 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ 121 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 71); \ 122 vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ 123 filter_x, x_step_q4, filter_y, y_step_q4, \ 124 w, h + 7); \ 125 vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ 126 filter_x, x_step_q4, filter_y, \ 127 y_step_q4, w, h); \ 128 } else { \ 129 DECLARE_ALIGNED_ARRAY(16, unsigned char, fdata2, 64 * 65); \ 130 vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \ 131 filter_x, x_step_q4, filter_y, y_step_q4, \ 132 w, h + 1); \ 133 vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ 134 filter_x, x_step_q4, filter_y, \ 135 y_step_q4, w, h); \ 136 } \ 137 } else { \ 138 vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ 139 filter_x, x_step_q4, filter_y, y_step_q4, w, h); \ 140 } \ 141 } 142 143 #if CONFIG_VP9_HIGHBITDEPTH 144 145 typedef void high_filter8_1dfunction ( 146 const uint16_t *src_ptr, 147 const ptrdiff_t src_pitch, 148 uint16_t *output_ptr, 149 ptrdiff_t out_pitch, 150 unsigned int output_height, 151 const int16_t *filter, 152 int bd 153 ); 154 155 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 156 void vp9_high_convolve8_##name##_##opt(const uint8_t *src8, \ 157 ptrdiff_t src_stride, \ 158 uint8_t *dst8, ptrdiff_t dst_stride, \ 159 const int16_t *filter_x, \ 160 int x_step_q4, \ 161 const int16_t *filter_y, \ 162 int y_step_q4, \ 163 int w, int h, int bd) { \ 164 if (step_q4 == 16 && filter[3] != 128) { \ 165 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ 166 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ 167 if (filter[0] || filter[1] || filter[2]) { \ 168 while (w >= 16) { \ 169 vp9_high_filter_block1d16_##dir##8_##avg##opt(src_start, \ 170 src_stride, \ 171 dst, \ 172 dst_stride, \ 173 h, \ 174 filter, \ 175 bd); \ 176 src += 16; \ 177 dst += 16; \ 178 w -= 16; \ 179 } \ 180 while (w >= 8) { \ 181 vp9_high_filter_block1d8_##dir##8_##avg##opt(src_start, \ 182 src_stride, \ 183 dst, \ 184 dst_stride, \ 185 h, \ 186 filter, \ 187 bd); \ 188 src += 8; \ 189 dst += 8; \ 190 w -= 8; \ 191 } \ 192 while (w >= 4) { \ 193 vp9_high_filter_block1d4_##dir##8_##avg##opt(src_start, \ 194 src_stride, \ 195 dst, \ 196 dst_stride, \ 197 h, \ 198 filter, \ 199 bd); \ 200 src += 4; \ 201 dst += 4; \ 202 w -= 4; \ 203 } \ 204 } else { \ 205 while (w >= 16) { \ 206 vp9_high_filter_block1d16_##dir##2_##avg##opt(src, \ 207 src_stride, \ 208 dst, \ 209 dst_stride, \ 210 h, \ 211 filter, \ 212 bd); \ 213 src += 16; \ 214 dst += 16; \ 215 w -= 16; \ 216 } \ 217 while (w >= 8) { \ 218 vp9_high_filter_block1d8_##dir##2_##avg##opt(src, \ 219 src_stride, \ 220 dst, \ 221 dst_stride, \ 222 h, \ 223 filter, \ 224 bd); \ 225 src += 8; \ 226 dst += 8; \ 227 w -= 8; \ 228 } \ 229 while (w >= 4) { \ 230 vp9_high_filter_block1d4_##dir##2_##avg##opt(src, \ 231 src_stride, \ 232 dst, \ 233 dst_stride, \ 234 h, \ 235 filter, \ 236 bd); \ 237 src += 4; \ 238 dst += 4; \ 239 w -= 4; \ 240 } \ 241 } \ 242 } \ 243 if (w) { \ 244 vp9_high_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \ 245 filter_x, x_step_q4, filter_y, y_step_q4, \ 246 w, h, bd); \ 247 } \ 248 } 249 250 #define HIGH_FUN_CONV_2D(avg, opt) \ 251 void vp9_high_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \ 252 uint8_t *dst, ptrdiff_t dst_stride, \ 253 const int16_t *filter_x, int x_step_q4, \ 254 const int16_t *filter_y, int y_step_q4, \ 255 int w, int h, int bd) { \ 256 assert(w <= 64); \ 257 assert(h <= 64); \ 258 if (x_step_q4 == 16 && y_step_q4 == 16) { \ 259 if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \ 260 filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \ 261 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 71); \ 262 vp9_high_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ 263 CONVERT_TO_BYTEPTR(fdata2), 64, \ 264 filter_x, x_step_q4, filter_y, y_step_q4, \ 265 w, h + 7, bd); \ 266 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \ 267 64, dst, dst_stride, \ 268 filter_x, x_step_q4, filter_y, \ 269 y_step_q4, w, h, bd); \ 270 } else { \ 271 DECLARE_ALIGNED_ARRAY(16, uint16_t, fdata2, 64 * 65); \ 272 vp9_high_convolve8_horiz_##opt(src, src_stride, \ 273 CONVERT_TO_BYTEPTR(fdata2), 64, \ 274 filter_x, x_step_q4, filter_y, y_step_q4, \ 275 w, h + 1, bd); \ 276 vp9_high_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \ 277 dst, dst_stride, \ 278 filter_x, x_step_q4, filter_y, \ 279 y_step_q4, w, h, bd); \ 280 } \ 281 } else { \ 282 vp9_high_convolve8_##avg##c(src, src_stride, dst, dst_stride, \ 283 filter_x, x_step_q4, filter_y, y_step_q4, w, \ 284 h, bd); \ 285 } \ 286 } 287 #endif // CONFIG_VP9_HIGHBITDEPTH 288 289 #if HAVE_AVX2 && HAVE_SSSE3 290 filter8_1dfunction vp9_filter_block1d16_v8_avx2; 291 filter8_1dfunction vp9_filter_block1d16_h8_avx2; 292 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; 293 #if ARCH_X86_64 294 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; 295 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; 296 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; 297 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3 298 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3 299 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3 300 #else // ARCH_X86 301 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; 302 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; 303 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; 304 #define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3 305 #define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3 306 #define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3 307 #endif // ARCH_X86_64 / ARCH_X86 308 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; 309 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; 310 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; 311 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; 312 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; 313 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; 314 #define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3 315 #define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3 316 #define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3 317 #define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3 318 #define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3 319 #define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3 320 #define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3 321 // void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, 322 // uint8_t *dst, ptrdiff_t dst_stride, 323 // const int16_t *filter_x, int x_step_q4, 324 // const int16_t *filter_y, int y_step_q4, 325 // int w, int h); 326 // void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, 327 // uint8_t *dst, ptrdiff_t dst_stride, 328 // const int16_t *filter_x, int x_step_q4, 329 // const int16_t *filter_y, int y_step_q4, 330 // int w, int h); 331 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2); 332 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2); 333 334 // void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, 335 // uint8_t *dst, ptrdiff_t dst_stride, 336 // const int16_t *filter_x, int x_step_q4, 337 // const int16_t *filter_y, int y_step_q4, 338 // int w, int h); 339 FUN_CONV_2D(, avx2); 340 #endif // HAVE_AX2 && HAVE_SSSE3 341 #if HAVE_SSSE3 342 #if ARCH_X86_64 343 filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3; 344 filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3; 345 filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3; 346 filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3; 347 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; 348 filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3; 349 #define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3 350 #define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3 351 #define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3 352 #define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3 353 #define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3 354 #else // ARCH_X86 355 filter8_1dfunction vp9_filter_block1d16_v8_ssse3; 356 filter8_1dfunction vp9_filter_block1d16_h8_ssse3; 357 filter8_1dfunction vp9_filter_block1d8_v8_ssse3; 358 filter8_1dfunction vp9_filter_block1d8_h8_ssse3; 359 filter8_1dfunction vp9_filter_block1d4_v8_ssse3; 360 filter8_1dfunction vp9_filter_block1d4_h8_ssse3; 361 #endif // ARCH_X86_64 / ARCH_X86 362 filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3; 363 filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3; 364 filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3; 365 filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3; 366 filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3; 367 filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3; 368 369 filter8_1dfunction vp9_filter_block1d16_v2_ssse3; 370 filter8_1dfunction vp9_filter_block1d16_h2_ssse3; 371 filter8_1dfunction vp9_filter_block1d8_v2_ssse3; 372 filter8_1dfunction vp9_filter_block1d8_h2_ssse3; 373 filter8_1dfunction vp9_filter_block1d4_v2_ssse3; 374 filter8_1dfunction vp9_filter_block1d4_h2_ssse3; 375 filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3; 376 filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3; 377 filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3; 378 filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3; 379 filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3; 380 filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3; 381 382 // void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 383 // uint8_t *dst, ptrdiff_t dst_stride, 384 // const int16_t *filter_x, int x_step_q4, 385 // const int16_t *filter_y, int y_step_q4, 386 // int w, int h); 387 // void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 388 // uint8_t *dst, ptrdiff_t dst_stride, 389 // const int16_t *filter_x, int x_step_q4, 390 // const int16_t *filter_y, int y_step_q4, 391 // int w, int h); 392 // void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, 393 // uint8_t *dst, ptrdiff_t dst_stride, 394 // const int16_t *filter_x, int x_step_q4, 395 // const int16_t *filter_y, int y_step_q4, 396 // int w, int h); 397 // void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, 398 // uint8_t *dst, ptrdiff_t dst_stride, 399 // const int16_t *filter_x, int x_step_q4, 400 // const int16_t *filter_y, int y_step_q4, 401 // int w, int h); 402 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3); 403 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3); 404 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3); 405 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, 406 ssse3); 407 408 // void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, 409 // uint8_t *dst, ptrdiff_t dst_stride, 410 // const int16_t *filter_x, int x_step_q4, 411 // const int16_t *filter_y, int y_step_q4, 412 // int w, int h); 413 // void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, 414 // uint8_t *dst, ptrdiff_t dst_stride, 415 // const int16_t *filter_x, int x_step_q4, 416 // const int16_t *filter_y, int y_step_q4, 417 // int w, int h); 418 FUN_CONV_2D(, ssse3); 419 FUN_CONV_2D(avg_ , ssse3); 420 #endif // HAVE_SSSE3 421 422 #if HAVE_SSE2 423 filter8_1dfunction vp9_filter_block1d16_v8_sse2; 424 filter8_1dfunction vp9_filter_block1d16_h8_sse2; 425 filter8_1dfunction vp9_filter_block1d8_v8_sse2; 426 filter8_1dfunction vp9_filter_block1d8_h8_sse2; 427 filter8_1dfunction vp9_filter_block1d4_v8_sse2; 428 filter8_1dfunction vp9_filter_block1d4_h8_sse2; 429 filter8_1dfunction vp9_filter_block1d16_v8_avg_sse2; 430 filter8_1dfunction vp9_filter_block1d16_h8_avg_sse2; 431 filter8_1dfunction vp9_filter_block1d8_v8_avg_sse2; 432 filter8_1dfunction vp9_filter_block1d8_h8_avg_sse2; 433 filter8_1dfunction vp9_filter_block1d4_v8_avg_sse2; 434 filter8_1dfunction vp9_filter_block1d4_h8_avg_sse2; 435 436 filter8_1dfunction vp9_filter_block1d16_v2_sse2; 437 filter8_1dfunction vp9_filter_block1d16_h2_sse2; 438 filter8_1dfunction vp9_filter_block1d8_v2_sse2; 439 filter8_1dfunction vp9_filter_block1d8_h2_sse2; 440 filter8_1dfunction vp9_filter_block1d4_v2_sse2; 441 filter8_1dfunction vp9_filter_block1d4_h2_sse2; 442 filter8_1dfunction vp9_filter_block1d16_v2_avg_sse2; 443 filter8_1dfunction vp9_filter_block1d16_h2_avg_sse2; 444 filter8_1dfunction vp9_filter_block1d8_v2_avg_sse2; 445 filter8_1dfunction vp9_filter_block1d8_h2_avg_sse2; 446 filter8_1dfunction vp9_filter_block1d4_v2_avg_sse2; 447 filter8_1dfunction vp9_filter_block1d4_h2_avg_sse2; 448 449 // void vp9_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 450 // uint8_t *dst, ptrdiff_t dst_stride, 451 // const int16_t *filter_x, int x_step_q4, 452 // const int16_t *filter_y, int y_step_q4, 453 // int w, int h); 454 // void vp9_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 455 // uint8_t *dst, ptrdiff_t dst_stride, 456 // const int16_t *filter_x, int x_step_q4, 457 // const int16_t *filter_y, int y_step_q4, 458 // int w, int h); 459 // void vp9_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 460 // uint8_t *dst, ptrdiff_t dst_stride, 461 // const int16_t *filter_x, int x_step_q4, 462 // const int16_t *filter_y, int y_step_q4, 463 // int w, int h); 464 // void vp9_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 465 // uint8_t *dst, ptrdiff_t dst_stride, 466 // const int16_t *filter_x, int x_step_q4, 467 // const int16_t *filter_y, int y_step_q4, 468 // int w, int h); 469 FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); 470 FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); 471 FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); 472 FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2); 473 474 // void vp9_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, 475 // uint8_t *dst, ptrdiff_t dst_stride, 476 // const int16_t *filter_x, int x_step_q4, 477 // const int16_t *filter_y, int y_step_q4, 478 // int w, int h); 479 // void vp9_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, 480 // uint8_t *dst, ptrdiff_t dst_stride, 481 // const int16_t *filter_x, int x_step_q4, 482 // const int16_t *filter_y, int y_step_q4, 483 // int w, int h); 484 FUN_CONV_2D(, sse2); 485 FUN_CONV_2D(avg_ , sse2); 486 487 #if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 488 high_filter8_1dfunction vp9_high_filter_block1d16_v8_sse2; 489 high_filter8_1dfunction vp9_high_filter_block1d16_h8_sse2; 490 high_filter8_1dfunction vp9_high_filter_block1d8_v8_sse2; 491 high_filter8_1dfunction vp9_high_filter_block1d8_h8_sse2; 492 high_filter8_1dfunction vp9_high_filter_block1d4_v8_sse2; 493 high_filter8_1dfunction vp9_high_filter_block1d4_h8_sse2; 494 high_filter8_1dfunction vp9_high_filter_block1d16_v8_avg_sse2; 495 high_filter8_1dfunction vp9_high_filter_block1d16_h8_avg_sse2; 496 high_filter8_1dfunction vp9_high_filter_block1d8_v8_avg_sse2; 497 high_filter8_1dfunction vp9_high_filter_block1d8_h8_avg_sse2; 498 high_filter8_1dfunction vp9_high_filter_block1d4_v8_avg_sse2; 499 high_filter8_1dfunction vp9_high_filter_block1d4_h8_avg_sse2; 500 501 high_filter8_1dfunction vp9_high_filter_block1d16_v2_sse2; 502 high_filter8_1dfunction vp9_high_filter_block1d16_h2_sse2; 503 high_filter8_1dfunction vp9_high_filter_block1d8_v2_sse2; 504 high_filter8_1dfunction vp9_high_filter_block1d8_h2_sse2; 505 high_filter8_1dfunction vp9_high_filter_block1d4_v2_sse2; 506 high_filter8_1dfunction vp9_high_filter_block1d4_h2_sse2; 507 high_filter8_1dfunction vp9_high_filter_block1d16_v2_avg_sse2; 508 high_filter8_1dfunction vp9_high_filter_block1d16_h2_avg_sse2; 509 high_filter8_1dfunction vp9_high_filter_block1d8_v2_avg_sse2; 510 high_filter8_1dfunction vp9_high_filter_block1d8_h2_avg_sse2; 511 high_filter8_1dfunction vp9_high_filter_block1d4_v2_avg_sse2; 512 high_filter8_1dfunction vp9_high_filter_block1d4_h2_avg_sse2; 513 514 // void vp9_high_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, 515 // uint8_t *dst, ptrdiff_t dst_stride, 516 // const int16_t *filter_x, int x_step_q4, 517 // const int16_t *filter_y, int y_step_q4, 518 // int w, int h, int bd); 519 // void vp9_high_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, 520 // uint8_t *dst, ptrdiff_t dst_stride, 521 // const int16_t *filter_x, int x_step_q4, 522 // const int16_t *filter_y, int y_step_q4, 523 // int w, int h, int bd); 524 // void vp9_high_convolve8_avg_horiz_sse2(const uint8_t *src, 525 // ptrdiff_t src_stride, 526 // uint8_t *dst, ptrdiff_t dst_stride, 527 // const int16_t *filter_x, 528 // int x_step_q4, 529 // const int16_t *filter_y, 530 // int y_step_q4, 531 // int w, int h, int bd); 532 // void vp9_high_convolve8_avg_vert_sse2(const uint8_t *src, 533 // ptrdiff_t src_stride, 534 // uint8_t *dst, ptrdiff_t dst_stride, 535 // const int16_t *filter_x, int x_step_q4, 536 // const int16_t *filter_y, int y_step_q4, 537 // int w, int h, int bd); 538 HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2); 539 HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2); 540 HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2); 541 HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, 542 sse2); 543 544 // void vp9_high_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, 545 // uint8_t *dst, ptrdiff_t dst_stride, 546 // const int16_t *filter_x, int x_step_q4, 547 // const int16_t *filter_y, int y_step_q4, 548 // int w, int h, int bd); 549 // void vp9_high_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, 550 // uint8_t *dst, ptrdiff_t dst_stride, 551 // const int16_t *filter_x, int x_step_q4, 552 // const int16_t *filter_y, int y_step_q4, 553 // int w, int h, int bd); 554 HIGH_FUN_CONV_2D(, sse2); 555 HIGH_FUN_CONV_2D(avg_ , sse2); 556 #endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64 557 #endif // HAVE_SSE2 558