1 /* 2 * Copyright (c) 2017 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <stdlib.h> 12 13 #include "./vpx_dsp_rtcd.h" 14 15 #include "vpx_dsp/ppc/types_vsx.h" 16 17 #include "vpx/vpx_integer.h" 18 #include "vpx_ports/mem.h" 19 20 #define PROCESS16(offset) \ 21 v_a = vec_vsx_ld(offset, a); \ 22 v_b = vec_vsx_ld(offset, b); \ 23 v_abs = vec_absd(v_a, v_b); \ 24 v_sad = vec_sum4s(v_abs, v_sad); 25 26 #define SAD8(height) \ 27 unsigned int vpx_sad8x##height##_vsx(const uint8_t *a, int a_stride, \ 28 const uint8_t *b, int b_stride) { \ 29 int y = 0; \ 30 uint8x16_t v_a, v_b, v_abs; \ 31 uint32x4_t v_sad = vec_zeros_u32; \ 32 \ 33 do { \ 34 PROCESS16(0) \ 35 \ 36 a += a_stride; \ 37 b += b_stride; \ 38 y++; \ 39 } while (y < height); \ 40 \ 41 return v_sad[1] + v_sad[0]; \ 42 } 43 44 #define SAD16(height) \ 45 unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride, \ 46 const uint8_t *b, int b_stride) { \ 47 int y = 0; \ 48 uint8x16_t v_a, v_b, v_abs; \ 49 uint32x4_t v_sad = vec_zeros_u32; \ 50 \ 51 do { \ 52 PROCESS16(0); \ 53 \ 54 a += a_stride; \ 55 b += b_stride; \ 56 y++; \ 57 } while (y < height); \ 58 \ 59 return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ 60 } 61 62 #define SAD32(height) \ 63 unsigned int vpx_sad32x##height##_vsx(const uint8_t *a, int a_stride, \ 64 const uint8_t *b, int b_stride) { \ 65 int y = 0; \ 66 uint8x16_t v_a, v_b, v_abs; \ 67 uint32x4_t v_sad = vec_zeros_u32; \ 68 \ 69 do { \ 70 PROCESS16(0); \ 71 PROCESS16(16); \ 72 \ 73 a += a_stride; \ 74 b += b_stride; \ 75 y++; \ 76 } while (y < height); \ 77 \ 78 return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ 79 } 80 81 #define SAD64(height) \ 82 unsigned int vpx_sad64x##height##_vsx(const uint8_t *a, int a_stride, \ 83 const uint8_t *b, int b_stride) { \ 84 int y = 0; \ 85 uint8x16_t v_a, v_b, v_abs; \ 86 uint32x4_t v_sad = vec_zeros_u32; \ 87 \ 88 do { \ 89 PROCESS16(0); \ 90 PROCESS16(16); \ 91 PROCESS16(32); \ 92 PROCESS16(48); \ 93 \ 94 a += a_stride; \ 95 b += b_stride; \ 96 y++; \ 97 } while (y < height); \ 98 \ 99 return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ 100 } 101 102 SAD8(4); 103 SAD8(8); 104 SAD8(16); 105 SAD16(8); 106 SAD16(16); 107 SAD16(32); 108 SAD32(16); 109 SAD32(32); 110 SAD32(64); 111 SAD64(32); 112 SAD64(64); 113 114 #define SAD16AVG(height) \ 115 unsigned int vpx_sad16x##height##_avg_vsx( \ 116 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ 117 const uint8_t *second_pred) { \ 118 DECLARE_ALIGNED(16, uint8_t, comp_pred[16 * (height)]); \ 119 vpx_comp_avg_pred_vsx(comp_pred, second_pred, 16, height, ref, \ 120 ref_stride); \ 121 \ 122 return vpx_sad16x##height##_vsx(src, src_stride, comp_pred, 16); \ 123 } 124 125 #define SAD32AVG(height) \ 126 unsigned int vpx_sad32x##height##_avg_vsx( \ 127 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ 128 const uint8_t *second_pred) { \ 129 DECLARE_ALIGNED(32, uint8_t, comp_pred[32 * (height)]); \ 130 vpx_comp_avg_pred_vsx(comp_pred, second_pred, 32, height, ref, \ 131 ref_stride); \ 132 \ 133 return vpx_sad32x##height##_vsx(src, src_stride, comp_pred, 32); \ 134 } 135 136 #define SAD64AVG(height) \ 137 unsigned int vpx_sad64x##height##_avg_vsx( \ 138 const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ 139 const uint8_t *second_pred) { \ 140 DECLARE_ALIGNED(64, uint8_t, comp_pred[64 * (height)]); \ 141 vpx_comp_avg_pred_vsx(comp_pred, second_pred, 64, height, ref, \ 142 ref_stride); \ 143 return vpx_sad64x##height##_vsx(src, src_stride, comp_pred, 64); \ 144 } 145 146 SAD16AVG(8); 147 SAD16AVG(16); 148 SAD16AVG(32); 149 SAD32AVG(16); 150 SAD32AVG(32); 151 SAD32AVG(64); 152 SAD64AVG(32); 153 SAD64AVG(64); 154 155 #define PROCESS16_4D(offset, ref, v_h, v_l) \ 156 v_b = vec_vsx_ld(offset, ref); \ 157 v_bh = unpack_to_s16_h(v_b); \ 158 v_bl = unpack_to_s16_l(v_b); \ 159 v_subh = vec_sub(v_h, v_bh); \ 160 v_subl = vec_sub(v_l, v_bl); \ 161 v_absh = vec_abs(v_subh); \ 162 v_absl = vec_abs(v_subl); \ 163 v_sad = vec_sum4s(v_absh, v_sad); \ 164 v_sad = vec_sum4s(v_absl, v_sad); 165 166 #define UNPACK_SRC(offset, srcv_h, srcv_l) \ 167 v_a = vec_vsx_ld(offset, src); \ 168 srcv_h = unpack_to_s16_h(v_a); \ 169 srcv_l = unpack_to_s16_l(v_a); 170 171 #define SAD16_4D(height) \ 172 void vpx_sad16x##height##x4d_vsx(const uint8_t *src, int src_stride, \ 173 const uint8_t *const ref_array[], \ 174 int ref_stride, uint32_t *sad_array) { \ 175 int i; \ 176 int y; \ 177 unsigned int sad[4]; \ 178 uint8x16_t v_a, v_b; \ 179 int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl; \ 180 \ 181 for (i = 0; i < 4; i++) sad_array[i] = 0; \ 182 \ 183 for (y = 0; y < height; y++) { \ 184 UNPACK_SRC(y *src_stride, v_ah, v_al); \ 185 for (i = 0; i < 4; i++) { \ 186 int32x4_t v_sad = vec_splat_s32(0); \ 187 PROCESS16_4D(y *ref_stride, ref_array[i], v_ah, v_al); \ 188 \ 189 vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ 190 sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ 191 } \ 192 } \ 193 } 194 195 #define SAD32_4D(height) \ 196 void vpx_sad32x##height##x4d_vsx(const uint8_t *src, int src_stride, \ 197 const uint8_t *const ref_array[], \ 198 int ref_stride, uint32_t *sad_array) { \ 199 int i; \ 200 int y; \ 201 unsigned int sad[4]; \ 202 uint8x16_t v_a, v_b; \ 203 int16x8_t v_ah1, v_al1, v_ah2, v_al2, v_bh, v_bl; \ 204 int16x8_t v_absh, v_absl, v_subh, v_subl; \ 205 \ 206 for (i = 0; i < 4; i++) sad_array[i] = 0; \ 207 \ 208 for (y = 0; y < height; y++) { \ 209 UNPACK_SRC(y *src_stride, v_ah1, v_al1); \ 210 UNPACK_SRC(y *src_stride + 16, v_ah2, v_al2); \ 211 for (i = 0; i < 4; i++) { \ 212 int32x4_t v_sad = vec_splat_s32(0); \ 213 PROCESS16_4D(y *ref_stride, ref_array[i], v_ah1, v_al1); \ 214 PROCESS16_4D(y *ref_stride + 16, ref_array[i], v_ah2, v_al2); \ 215 \ 216 vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ 217 sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ 218 } \ 219 } \ 220 } 221 222 #define SAD64_4D(height) \ 223 void vpx_sad64x##height##x4d_vsx(const uint8_t *src, int src_stride, \ 224 const uint8_t *const ref_array[], \ 225 int ref_stride, uint32_t *sad_array) { \ 226 int i; \ 227 int y; \ 228 unsigned int sad[4]; \ 229 uint8x16_t v_a, v_b; \ 230 int16x8_t v_ah1, v_al1, v_ah2, v_al2, v_bh, v_bl; \ 231 int16x8_t v_ah3, v_al3, v_ah4, v_al4; \ 232 int16x8_t v_absh, v_absl, v_subh, v_subl; \ 233 \ 234 for (i = 0; i < 4; i++) sad_array[i] = 0; \ 235 \ 236 for (y = 0; y < height; y++) { \ 237 UNPACK_SRC(y *src_stride, v_ah1, v_al1); \ 238 UNPACK_SRC(y *src_stride + 16, v_ah2, v_al2); \ 239 UNPACK_SRC(y *src_stride + 32, v_ah3, v_al3); \ 240 UNPACK_SRC(y *src_stride + 48, v_ah4, v_al4); \ 241 for (i = 0; i < 4; i++) { \ 242 int32x4_t v_sad = vec_splat_s32(0); \ 243 PROCESS16_4D(y *ref_stride, ref_array[i], v_ah1, v_al1); \ 244 PROCESS16_4D(y *ref_stride + 16, ref_array[i], v_ah2, v_al2); \ 245 PROCESS16_4D(y *ref_stride + 32, ref_array[i], v_ah3, v_al3); \ 246 PROCESS16_4D(y *ref_stride + 48, ref_array[i], v_ah4, v_al4); \ 247 \ 248 vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ 249 sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ 250 } \ 251 } \ 252 } 253 254 SAD16_4D(8); 255 SAD16_4D(16); 256 SAD16_4D(32); 257 SAD32_4D(16); 258 SAD32_4D(32); 259 SAD32_4D(64); 260 SAD64_4D(32); 261 SAD64_4D(64); 262