1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 #ifndef AOM_AOM_DSP_X86_CONVOLVE_H_ 12 #define AOM_AOM_DSP_X86_CONVOLVE_H_ 13 14 #include <assert.h> 15 16 #include "config/aom_config.h" 17 18 #include "aom/aom_integer.h" 19 #include "aom_ports/mem.h" 20 21 typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, 22 uint8_t *output_ptr, ptrdiff_t out_pitch, 23 uint32_t output_height, const int16_t *filter); 24 25 #define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 26 void aom_convolve8_##name##_##opt( \ 27 const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ 28 ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 29 const int16_t *filter_y, int y_step_q4, int w, int h) { \ 30 (void)filter_x; \ 31 (void)x_step_q4; \ 32 (void)filter_y; \ 33 (void)y_step_q4; \ 34 assert((-128 <= filter[3]) && (filter[3] <= 127)); \ 35 assert(step_q4 == 16); \ 36 if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \ 37 (filter[2] | filter[5])) { \ 38 while (w >= 16) { \ 39 aom_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \ 40 dst_stride, h, filter); \ 41 src += 16; \ 42 dst += 16; \ 43 w -= 16; \ 44 } \ 45 while (w >= 8) { \ 46 aom_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \ 47 dst_stride, h, filter); \ 48 src += 8; \ 49 dst += 8; \ 50 w -= 8; \ 51 } \ 52 while (w >= 4) { \ 53 aom_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \ 54 dst_stride, h, filter); \ 55 src += 4; \ 56 dst += 4; \ 57 w -= 4; \ 58 } \ 59 } else if (filter[0] | filter[1] | filter[2]) { \ 60 while (w >= 16) { \ 61 aom_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ 62 dst_stride, h, filter); \ 63 src += 16; \ 64 dst += 16; \ 65 w -= 16; \ 66 } \ 67 while (w >= 8) { \ 68 aom_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ 69 dst_stride, h, filter); \ 70 src += 8; \ 71 dst += 8; \ 72 w -= 8; \ 73 } \ 74 while (w >= 4) { \ 75 aom_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ 76 dst_stride, h, filter); \ 77 src += 4; \ 78 dst += 4; \ 79 w -= 4; \ 80 } \ 81 } else { \ 82 while (w >= 16) { \ 83 aom_filter_block1d16_##dir##2_##avg##opt(src, src_stride, dst, \ 84 dst_stride, h, filter); \ 85 src += 16; \ 86 dst += 16; \ 87 w -= 16; \ 88 } \ 89 while (w >= 8) { \ 90 aom_filter_block1d8_##dir##2_##avg##opt(src, src_stride, dst, \ 91 dst_stride, h, filter); \ 92 src += 8; \ 93 dst += 8; \ 94 w -= 8; \ 95 } \ 96 while (w >= 4) { \ 97 aom_filter_block1d4_##dir##2_##avg##opt(src, src_stride, dst, \ 98 dst_stride, h, filter); \ 99 src += 4; \ 100 dst += 4; \ 101 w -= 4; \ 102 } \ 103 } \ 104 if (w) { \ 105 aom_convolve8_##name##_c(src, src_stride, dst, dst_stride, filter_x, \ 106 x_step_q4, filter_y, y_step_q4, w, h); \ 107 } \ 108 } 109 110 #if CONFIG_AV1_HIGHBITDEPTH 111 typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, 112 const ptrdiff_t src_pitch, 113 uint16_t *output_ptr, 114 ptrdiff_t out_pitch, 115 unsigned int output_height, 116 const int16_t *filter, int bd); 117 118 #define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \ 119 void aom_highbd_convolve8_##name##_##opt( \ 120 const uint8_t *src8, ptrdiff_t src_stride, uint8_t *dst8, \ 121 ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \ 122 const int16_t *filter_y, int y_step_q4, int w, int h, int bd) { \ 123 uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ 124 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \ 125 if (step_q4 == 16 && filter[3] != 128) { \ 126 if (((filter[0] | filter[1] | filter[6] | filter[7]) == 0) && \ 127 (filter[2] | filter[5])) { \ 128 while (w >= 16) { \ 129 aom_highbd_filter_block1d16_##dir##4_##avg##opt( \ 130 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 131 src += 16; \ 132 dst += 16; \ 133 w -= 16; \ 134 } \ 135 while (w >= 8) { \ 136 aom_highbd_filter_block1d8_##dir##4_##avg##opt( \ 137 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 138 src += 8; \ 139 dst += 8; \ 140 w -= 8; \ 141 } \ 142 while (w >= 4) { \ 143 aom_highbd_filter_block1d4_##dir##4_##avg##opt( \ 144 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 145 src += 4; \ 146 dst += 4; \ 147 w -= 4; \ 148 } \ 149 } else if (filter[0] | filter[1] | filter[2]) { \ 150 while (w >= 16) { \ 151 aom_highbd_filter_block1d16_##dir##8_##avg##opt( \ 152 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 153 src += 16; \ 154 dst += 16; \ 155 w -= 16; \ 156 } \ 157 while (w >= 8) { \ 158 aom_highbd_filter_block1d8_##dir##8_##avg##opt( \ 159 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 160 src += 8; \ 161 dst += 8; \ 162 w -= 8; \ 163 } \ 164 while (w >= 4) { \ 165 aom_highbd_filter_block1d4_##dir##8_##avg##opt( \ 166 src_start, src_stride, dst, dst_stride, h, filter, bd); \ 167 src += 4; \ 168 dst += 4; \ 169 w -= 4; \ 170 } \ 171 } else { \ 172 while (w >= 16) { \ 173 aom_highbd_filter_block1d16_##dir##2_##avg##opt( \ 174 src, src_stride, dst, dst_stride, h, filter, bd); \ 175 src += 16; \ 176 dst += 16; \ 177 w -= 16; \ 178 } \ 179 while (w >= 8) { \ 180 aom_highbd_filter_block1d8_##dir##2_##avg##opt( \ 181 src, src_stride, dst, dst_stride, h, filter, bd); \ 182 src += 8; \ 183 dst += 8; \ 184 w -= 8; \ 185 } \ 186 while (w >= 4) { \ 187 aom_highbd_filter_block1d4_##dir##2_##avg##opt( \ 188 src, src_stride, dst, dst_stride, h, filter, bd); \ 189 src += 4; \ 190 dst += 4; \ 191 w -= 4; \ 192 } \ 193 } \ 194 } \ 195 if (w) { \ 196 aom_highbd_convolve8_##name##_c( \ 197 CONVERT_TO_BYTEPTR(src), src_stride, CONVERT_TO_BYTEPTR(dst), \ 198 dst_stride, filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); \ 199 } \ 200 } 201 #endif // CONFIG_AV1_HIGHBITDEPTH 202 203 #endif // AOM_AOM_DSP_X86_CONVOLVE_H_ 204