1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 #ifndef VPX_DSP_X86_CONVOLVE_H_ 11 #define VPX_DSP_X86_CONVOLVE_H_ 12 13 #include <assert.h> 14 15 #include "./vpx_config.h" 16 #include "vpx/vpx_integer.h" 17 #include "vpx_ports/mem.h" 18 19 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, 20 uint8_t *output_ptr, ptrdiff_t out_pitch, 21 uint32_t output_height, const int16_t *filter); 22 23 #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ 24 void vpx_convolve8_##name##_##opt( \ 25 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 26 ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ 27 int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ 28 const int16_t *filter = filter_kernel[offset]; \ 29 (void)x0_q4; \ 30 (void)x_step_q4; \ 31 (void)y0_q4; \ 32 (void)y_step_q4; \ 33 assert(filter[3] != 128); \ 34 assert(step_q4 == 16); \ 35 if (filter[0] | filter[1] | filter[2]) { \ 36 while (w >= 16) { \ 37 vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ 38 dst_stride, h, filter); \ 39 src += 16; \ 40 dst += 16; \ 41 w -= 16; \ 42 } \ 43 if (w == 8) { \ 44 vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ 45 dst_stride, h, filter); \ 46 } else if (w == 4) { \ 47 vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ 48 dst_stride, h, filter); \ 49 } \ 50 } else { \ 51 while (w >= 16) { \ 52 vpx_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \ 53 dst_stride, h, filter); \ 54 src += 16; \ 55 dst += 16; \ 56 w -= 16; \ 57 } \ 58 if (w == 8) { \ 59 vpx_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \ 60 dst_stride, h, filter); \ 61 } else if (w == 4) { \ 62 vpx_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \ 63 dst_stride, h, filter); \ 64 } \ 65 } \ 66 } 67 68 #define FUN_CONV_2D(avg, opt) \ 69 void vpx_convolve8_##avg##opt( \ 70 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 71 ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 72 int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ 73 const int16_t *filter_x = filter[x0_q4]; \ 74 const int16_t *filter_y = filter[y0_q4]; \ 75 (void)filter_y; \ 76 assert(filter_x[3] != 128); \ 77 assert(filter_y[3] != 128); \ 78 assert(w <= 64); \ 79 assert(h <= 64); \ 80 assert(x_step_q4 == 16); \ 81 assert(y_step_q4 == 16); \ 82 if (filter_x[0] | filter_x[1] | filter_x[2]) { \ 83 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \ 84 vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ 85 filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ 86 h + 7); \ 87 vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ 88 filter, x0_q4, x_step_q4, y0_q4, \ 89 y_step_q4, w, h); \ 90 } else { \ 91 DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \ 92 vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \ 93 x_step_q4, y0_q4, y_step_q4, w, h + 1); \ 94 vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \ 95 x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ 96 h); \ 97 } \ 98 } 99 100 #if CONFIG_VP9_HIGHBITDEPTH 101 102 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, 103 const ptrdiff_t src_pitch, 104 uint16_t *output_ptr, 105 ptrdiff_t out_pitch, 106 unsigned int output_height, 107 const int16_t *filter, int bd); 108 109 #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \ 110 void vpx_highbd_convolve8_##name##_##opt( \ 111 const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ 112 ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ 113 int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ 114 const int16_t *filter = filter_kernel[offset]; \ 115 if (step_q4 == 16 && filter[3] != 128) { \ 116 if (filter[0] | filter[1] | filter[2]) { \ 117 while (w >= 16) { \ 118 vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \ 119 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 120 src += 16; \ 121 dst += 16; \ 122 w -= 16; \ 123 } \ 124 while (w >= 8) { \ 125 vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \ 126 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 127 src += 8; \ 128 dst += 8; \ 129 w -= 8; \ 130 } \ 131 while (w >= 4) { \ 132 vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \ 133 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 134 src += 4; \ 135 dst += 4; \ 136 w -= 4; \ 137 } \ 138 } else { \ 139 while (w >= 16) { \ 140 vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \ 141 src, src_stride, dst, dst_stride, h, filter, bd); \ 142 src += 16; \ 143 dst += 16; \ 144 w -= 16; \ 145 } \ 146 while (w >= 8) { \ 147 vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \ 148 src, src_stride, dst, dst_stride, h, filter, bd); \ 149 src += 8; \ 150 dst += 8; \ 151 w -= 8; \ 152 } \ 153 while (w >= 4) { \ 154 vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \ 155 src, src_stride, dst, dst_stride, h, filter, bd); \ 156 src += 4; \ 157 dst += 4; \ 158 w -= 4; \ 159 } \ 160 } \ 161 } \ 162 if (w) { \ 163 vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ 164 filter_kernel, x0_q4, x_step_q4, y0_q4, \ 165 y_step_q4, w, h, bd); \ 166 } \ 167 } 168 169 #define HIGH_FUN_CONV_2D(avg, opt) \ 170 void vpx_highbd_convolve8_##avg##opt( \ 171 const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ 172 ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ 173 int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ 174 const int16_t *filter_x = filter[x0_q4]; \ 175 assert(w <= 64); \ 176 assert(h <= 64); \ 177 if (x_step_q4 == 16 && y_step_q4 == 16) { \ 178 if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) { \ 179 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \ 180 vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ 181 fdata2, 64, filter, x0_q4, x_step_q4, \ 182 y0_q4, y_step_q4, w, h + 7, bd); \ 183 vpx_highbd_convolve8_##avg##vert_##opt( \ 184 fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \ 185 y0_q4, y_step_q4, w, h, bd); \ 186 } else { \ 187 DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \ 188 vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \ 189 x0_q4, x_step_q4, y0_q4, y_step_q4, \ 190 w, h + 1, bd); \ 191 vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ 192 filter, x0_q4, x_step_q4, \ 193 y0_q4, y_step_q4, w, h, bd); \ 194 } \ 195 } else { \ 196 vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \ 197 x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \ 198 bd); \ 199 } \ 200 } 201 #endif // CONFIG_VP9_HIGHBITDEPTH 202 203 #endif // VPX_DSP_X86_CONVOLVE_H_ 204