1## 2## Copyright (c) 2017, Alliance for Open Media. All rights reserved. 3## 4## This source code is subject to the terms of the BSD 2 Clause License and 5## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6## was not distributed with this source code in the LICENSE file, you can 7## obtain it at www.aomedia.org/license/software. If the Alliance for Open 8## Media Patent License 1.0 was not distributed with this source code in the 9## PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10## 11sub aom_dsp_forward_decls() { 12print <<EOF 13/* 14 * DSP 15 */ 16 17#include "aom/aom_integer.h" 18#include "aom_dsp/aom_dsp_common.h" 19#include "av1/common/blockd.h" 20#include "av1/common/enums.h" 21 22EOF 23} 24forward_decls qw/aom_dsp_forward_decls/; 25 26# optimizations which depend on multiple features 27$avx2_ssse3 = ''; 28if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) { 29 $avx2_ssse3 = 'avx2'; 30} 31 32# functions that are 64 bit only. 33$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = ''; 34if ($opts{arch} eq "x86_64") { 35 $mmx_x86_64 = 'mmx'; 36 $sse2_x86_64 = 'sse2'; 37 $ssse3_x86_64 = 'ssse3'; 38 $avx_x86_64 = 'avx'; 39 $avx2_x86_64 = 'avx2'; 40} 41 42@block_widths = (4, 8, 16, 32, 64, 128); 43 44@encoder_block_sizes = (); 45foreach $w (@block_widths) { 46 foreach $h (@block_widths) { 47 push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w); 48 } 49} 50 51if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 52 push @encoder_block_sizes, [4, 16]; 53 push @encoder_block_sizes, [16, 4]; 54 push @encoder_block_sizes, [8, 32]; 55 push @encoder_block_sizes, [32, 8]; 56 push @encoder_block_sizes, [16, 64]; 57 push @encoder_block_sizes, [64, 16]; 58} 59 60@tx_dims = (4, 8, 16, 32, 64); 61@tx_sizes = (); 62foreach $w (@tx_dims) { 63 push @tx_sizes, [$w, $w]; 64 foreach $h (@tx_dims) { 65 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w)); 66 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || 67 (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 68 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w)); 69 } # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 70 } 71} 72 73@pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/; 74 75# 76# Intra prediction 77# 78 79foreach (@tx_sizes) { 80 ($w, $h) = @$_; 81 foreach $pred_name (@pred_names) { 82 add_proto "void", "aom_${pred_name}_predictor_${w}x${h}", 83 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 84 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 85 add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}", 86 "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; 87 } 88 } 89} 90 91specialize qw/aom_dc_top_predictor_4x4 neon sse2/; 92specialize qw/aom_dc_top_predictor_4x8 neon sse2/; 93specialize qw/aom_dc_top_predictor_8x4 neon sse2/; 94specialize qw/aom_dc_top_predictor_8x8 neon sse2/; 95specialize qw/aom_dc_top_predictor_8x16 neon sse2/; 96specialize qw/aom_dc_top_predictor_16x8 neon sse2/; 97specialize qw/aom_dc_top_predictor_16x16 neon sse2/; 98specialize qw/aom_dc_top_predictor_16x32 neon sse2/; 99specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/; 100specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/; 101specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/; 102specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/; 103specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/; 104 105specialize qw/aom_dc_left_predictor_4x4 neon sse2/; 106specialize qw/aom_dc_left_predictor_4x8 neon sse2/; 107specialize qw/aom_dc_left_predictor_8x4 neon sse2/; 108specialize qw/aom_dc_left_predictor_8x8 neon sse2/; 109specialize qw/aom_dc_left_predictor_8x16 neon sse2/; 110specialize qw/aom_dc_left_predictor_16x8 neon sse2/; 111specialize qw/aom_dc_left_predictor_16x16 neon sse2/; 112specialize qw/aom_dc_left_predictor_16x32 neon sse2/; 113specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/; 114specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/; 115specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/; 116specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/; 117specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/; 118 119specialize qw/aom_dc_128_predictor_4x4 neon sse2/; 120specialize qw/aom_dc_128_predictor_4x8 neon sse2/; 121specialize qw/aom_dc_128_predictor_8x4 neon sse2/; 122specialize qw/aom_dc_128_predictor_8x8 neon sse2/; 123specialize qw/aom_dc_128_predictor_8x16 neon sse2/; 124specialize qw/aom_dc_128_predictor_16x8 neon sse2/; 125specialize qw/aom_dc_128_predictor_16x16 neon sse2/; 126specialize qw/aom_dc_128_predictor_16x32 neon sse2/; 127specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/; 128specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/; 129specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/; 130specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/; 131specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/; 132 133specialize qw/aom_v_predictor_4x4 neon sse2/; 134specialize qw/aom_v_predictor_4x8 neon sse2/; 135specialize qw/aom_v_predictor_8x4 neon sse2/; 136specialize qw/aom_v_predictor_8x8 neon sse2/; 137specialize qw/aom_v_predictor_8x16 neon sse2/; 138specialize qw/aom_v_predictor_16x8 neon sse2/; 139specialize qw/aom_v_predictor_16x16 neon sse2/; 140specialize qw/aom_v_predictor_16x32 neon sse2/; 141specialize qw/aom_v_predictor_32x16 neon sse2 avx2/; 142specialize qw/aom_v_predictor_32x32 neon sse2 avx2/; 143specialize qw/aom_v_predictor_32x64 neon sse2 avx2/; 144specialize qw/aom_v_predictor_64x32 neon sse2 avx2/; 145specialize qw/aom_v_predictor_64x64 neon sse2 avx2/; 146 147specialize qw/aom_h_predictor_4x4 neon sse2/; 148specialize qw/aom_h_predictor_4x8 neon sse2/; 149specialize qw/aom_h_predictor_8x4 neon sse2/; 150specialize qw/aom_h_predictor_8x8 neon sse2/; 151specialize qw/aom_h_predictor_8x16 neon sse2/; 152specialize qw/aom_h_predictor_16x8 neon sse2/; 153specialize qw/aom_h_predictor_16x16 neon sse2/; 154specialize qw/aom_h_predictor_16x32 neon sse2/; 155specialize qw/aom_h_predictor_32x16 neon sse2/; 156specialize qw/aom_h_predictor_32x32 neon sse2 avx2/; 157specialize qw/aom_h_predictor_32x64 neon sse2/; 158specialize qw/aom_h_predictor_64x32 neon sse2/; 159specialize qw/aom_h_predictor_64x64 neon sse2/; 160 161specialize qw/aom_paeth_predictor_4x4 ssse3 neon/; 162specialize qw/aom_paeth_predictor_4x8 ssse3 neon/; 163specialize qw/aom_paeth_predictor_8x4 ssse3 neon/; 164specialize qw/aom_paeth_predictor_8x8 ssse3 neon/; 165specialize qw/aom_paeth_predictor_8x16 ssse3 neon/; 166specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/; 167specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/; 168specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/; 169specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/; 170specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/; 171specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/; 172specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/; 173specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/; 174 175specialize qw/aom_smooth_predictor_4x4 neon ssse3/; 176specialize qw/aom_smooth_predictor_4x8 neon ssse3/; 177specialize qw/aom_smooth_predictor_8x4 neon ssse3/; 178specialize qw/aom_smooth_predictor_8x8 neon ssse3/; 179specialize qw/aom_smooth_predictor_8x16 neon ssse3/; 180specialize qw/aom_smooth_predictor_16x8 neon ssse3/; 181specialize qw/aom_smooth_predictor_16x16 neon ssse3/; 182specialize qw/aom_smooth_predictor_16x32 neon ssse3/; 183specialize qw/aom_smooth_predictor_32x16 neon ssse3/; 184specialize qw/aom_smooth_predictor_32x32 neon ssse3/; 185specialize qw/aom_smooth_predictor_32x64 neon ssse3/; 186specialize qw/aom_smooth_predictor_64x32 neon ssse3/; 187specialize qw/aom_smooth_predictor_64x64 neon ssse3/; 188 189specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/; 190specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/; 191specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/; 192specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/; 193specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/; 194specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/; 195specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/; 196specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/; 197specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/; 198specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/; 199specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/; 200specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/; 201specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/; 202 203specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/; 204specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/; 205specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/; 206specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/; 207specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/; 208specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/; 209specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/; 210specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/; 211specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/; 212specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/; 213specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/; 214specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/; 215specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/; 216 217# TODO(yunqingwang): optimize rectangular DC_PRED to replace division 218# by multiply and shift. 219specialize qw/aom_dc_predictor_4x4 neon sse2/; 220specialize qw/aom_dc_predictor_4x8 neon sse2/; 221specialize qw/aom_dc_predictor_8x4 neon sse2/; 222specialize qw/aom_dc_predictor_8x8 neon sse2/; 223specialize qw/aom_dc_predictor_8x16 neon sse2/; 224specialize qw/aom_dc_predictor_16x8 neon sse2/; 225specialize qw/aom_dc_predictor_16x16 neon sse2/; 226specialize qw/aom_dc_predictor_16x32 neon sse2/; 227specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/; 228specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/; 229specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/; 230specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/; 231specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/; 232 233 234if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 235 specialize qw/aom_dc_top_predictor_4x16 neon sse2/; 236 specialize qw/aom_dc_top_predictor_8x32 neon sse2/; 237 specialize qw/aom_dc_top_predictor_16x4 neon sse2/; 238 specialize qw/aom_dc_top_predictor_16x64 neon sse2/; 239 specialize qw/aom_dc_top_predictor_32x8 neon sse2/; 240 specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/; 241 242 specialize qw/aom_dc_left_predictor_4x16 neon sse2/; 243 specialize qw/aom_dc_left_predictor_8x32 neon sse2/; 244 specialize qw/aom_dc_left_predictor_16x4 neon sse2/; 245 specialize qw/aom_dc_left_predictor_16x64 neon sse2/; 246 specialize qw/aom_dc_left_predictor_32x8 neon sse2/; 247 specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/; 248 249 specialize qw/aom_dc_128_predictor_4x16 neon sse2/; 250 specialize qw/aom_dc_128_predictor_8x32 neon sse2/; 251 specialize qw/aom_dc_128_predictor_16x4 neon sse2/; 252 specialize qw/aom_dc_128_predictor_16x64 neon sse2/; 253 specialize qw/aom_dc_128_predictor_32x8 neon sse2/; 254 specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/; 255 256 specialize qw/aom_v_predictor_4x16 neon sse2/; 257 specialize qw/aom_v_predictor_8x32 neon sse2/; 258 specialize qw/aom_v_predictor_16x4 neon sse2/; 259 specialize qw/aom_v_predictor_16x64 neon sse2/; 260 specialize qw/aom_v_predictor_32x8 neon sse2/; 261 specialize qw/aom_v_predictor_64x16 neon sse2 avx2/; 262 263 specialize qw/aom_h_predictor_4x16 neon sse2/; 264 specialize qw/aom_h_predictor_8x32 neon sse2/; 265 specialize qw/aom_h_predictor_16x4 neon sse2/; 266 specialize qw/aom_h_predictor_16x64 neon sse2/; 267 specialize qw/aom_h_predictor_32x8 neon sse2/; 268 specialize qw/aom_h_predictor_64x16 neon sse2/; 269 270 specialize qw/aom_paeth_predictor_4x16 ssse3 neon/; 271 specialize qw/aom_paeth_predictor_8x32 ssse3 neon/; 272 specialize qw/aom_paeth_predictor_16x4 ssse3 neon/; 273 specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/; 274 specialize qw/aom_paeth_predictor_32x8 ssse3 neon/; 275 specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/; 276 277 specialize qw/aom_smooth_predictor_4x16 neon ssse3/; 278 specialize qw/aom_smooth_predictor_8x32 neon ssse3/; 279 specialize qw/aom_smooth_predictor_16x4 neon ssse3/; 280 specialize qw/aom_smooth_predictor_16x64 neon ssse3/; 281 specialize qw/aom_smooth_predictor_32x8 neon ssse3/; 282 specialize qw/aom_smooth_predictor_64x16 neon ssse3/; 283 284 specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/; 285 specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/; 286 specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/; 287 specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/; 288 specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/; 289 specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/; 290 291 specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/; 292 specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/; 293 specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/; 294 specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/; 295 specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/; 296 specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/; 297 298 specialize qw/aom_dc_predictor_4x16 neon sse2/; 299 specialize qw/aom_dc_predictor_8x32 neon sse2/; 300 specialize qw/aom_dc_predictor_16x4 neon sse2/; 301 specialize qw/aom_dc_predictor_16x64 neon sse2/; 302 specialize qw/aom_dc_predictor_32x8 neon sse2/; 303 specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/; 304} # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 305 306if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 307 specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/; 308 specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/; 309 specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/; 310 specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/; 311 specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/; 312 specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/; 313 specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/; 314 specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/; 315 specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/; 316 specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/; 317 specialize qw/aom_highbd_v_predictor_32x64 neon/; 318 specialize qw/aom_highbd_v_predictor_64x32 neon/; 319 specialize qw/aom_highbd_v_predictor_64x64 neon/; 320 321 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division 322 # by multiply and shift. 323 specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/; 324 specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/; 325 specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/; 326 specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/; 327 specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/; 328 specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/; 329 specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/; 330 specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/; 331 specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/; 332 specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/; 333 specialize qw/aom_highbd_dc_predictor_32x64 neon/; 334 specialize qw/aom_highbd_dc_predictor_64x32 neon/; 335 specialize qw/aom_highbd_dc_predictor_64x64 neon/; 336 337 specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/; 338 specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/; 339 specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/; 340 specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/; 341 specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/; 342 specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/; 343 specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/; 344 specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/; 345 specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/; 346 specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/; 347 specialize qw/aom_highbd_h_predictor_32x64 neon/; 348 specialize qw/aom_highbd_h_predictor_64x32 neon/; 349 specialize qw/aom_highbd_h_predictor_64x64 neon/; 350 351 specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/; 352 specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/; 353 specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/; 354 specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/; 355 specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/; 356 specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/; 357 specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/; 358 specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/; 359 specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/; 360 specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/; 361 specialize qw/aom_highbd_dc_128_predictor_32x64 neon/; 362 specialize qw/aom_highbd_dc_128_predictor_64x32 neon/; 363 specialize qw/aom_highbd_dc_128_predictor_64x64 neon/; 364 365 specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/; 366 specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/; 367 specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/; 368 specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/; 369 specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/; 370 specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/; 371 specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/; 372 specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/; 373 specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/; 374 specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/; 375 specialize qw/aom_highbd_dc_left_predictor_32x64 neon/; 376 specialize qw/aom_highbd_dc_left_predictor_64x32 neon/; 377 specialize qw/aom_highbd_dc_left_predictor_64x64 neon/; 378 379 specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/; 380 specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/; 381 specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/; 382 specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/; 383 specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/; 384 specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/; 385 specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/; 386 specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/; 387 specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/; 388 specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/; 389 specialize qw/aom_highbd_dc_top_predictor_32x64 neon/; 390 specialize qw/aom_highbd_dc_top_predictor_64x32 neon/; 391 specialize qw/aom_highbd_dc_top_predictor_64x64 neon/; 392 393 specialize qw/aom_highbd_paeth_predictor_4x4 neon/; 394 specialize qw/aom_highbd_paeth_predictor_4x8 neon/; 395 specialize qw/aom_highbd_paeth_predictor_8x4 neon/; 396 specialize qw/aom_highbd_paeth_predictor_8x8 neon/; 397 specialize qw/aom_highbd_paeth_predictor_8x16 neon/; 398 specialize qw/aom_highbd_paeth_predictor_16x8 neon/; 399 specialize qw/aom_highbd_paeth_predictor_16x16 neon/; 400 specialize qw/aom_highbd_paeth_predictor_16x32 neon/; 401 specialize qw/aom_highbd_paeth_predictor_32x16 neon/; 402 specialize qw/aom_highbd_paeth_predictor_32x32 neon/; 403 specialize qw/aom_highbd_paeth_predictor_32x64 neon/; 404 specialize qw/aom_highbd_paeth_predictor_64x32 neon/; 405 specialize qw/aom_highbd_paeth_predictor_64x64 neon/; 406 407 specialize qw/aom_highbd_smooth_predictor_4x4 neon/; 408 specialize qw/aom_highbd_smooth_predictor_4x8 neon/; 409 specialize qw/aom_highbd_smooth_predictor_8x4 neon/; 410 specialize qw/aom_highbd_smooth_predictor_8x8 neon/; 411 specialize qw/aom_highbd_smooth_predictor_8x16 neon/; 412 specialize qw/aom_highbd_smooth_predictor_16x8 neon/; 413 specialize qw/aom_highbd_smooth_predictor_16x16 neon/; 414 specialize qw/aom_highbd_smooth_predictor_16x32 neon/; 415 specialize qw/aom_highbd_smooth_predictor_32x16 neon/; 416 specialize qw/aom_highbd_smooth_predictor_32x32 neon/; 417 specialize qw/aom_highbd_smooth_predictor_32x64 neon/; 418 specialize qw/aom_highbd_smooth_predictor_64x32 neon/; 419 specialize qw/aom_highbd_smooth_predictor_64x64 neon/; 420 421 specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/; 422 specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/; 423 specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/; 424 specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/; 425 specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/; 426 specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/; 427 specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/; 428 specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/; 429 specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/; 430 specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/; 431 specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/; 432 specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/; 433 specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/; 434 specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/; 435 specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/; 436 437 specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/; 438 specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/; 439 specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/; 440 specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/; 441 specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/; 442 specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/; 443 specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/; 444 specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/; 445 specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/; 446 specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/; 447 specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/; 448 449 if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || 450 (aom_config("CONFIG_AV1_DECODER") eq "yes")) { 451 specialize qw/aom_highbd_v_predictor_4x16 neon/; 452 specialize qw/aom_highbd_v_predictor_8x32 neon/; 453 specialize qw/aom_highbd_v_predictor_16x4 neon/; 454 specialize qw/aom_highbd_v_predictor_16x64 neon/; 455 specialize qw/aom_highbd_v_predictor_32x8 neon/; 456 specialize qw/aom_highbd_v_predictor_64x16 neon/; 457 458 specialize qw/aom_highbd_dc_predictor_4x16 neon/; 459 specialize qw/aom_highbd_dc_predictor_8x32 neon/; 460 specialize qw/aom_highbd_dc_predictor_16x4 neon/; 461 specialize qw/aom_highbd_dc_predictor_16x64 neon/; 462 specialize qw/aom_highbd_dc_predictor_32x8 neon/; 463 specialize qw/aom_highbd_dc_predictor_64x16 neon/; 464 465 specialize qw/aom_highbd_h_predictor_4x16 neon/; 466 specialize qw/aom_highbd_h_predictor_8x32 neon/; 467 specialize qw/aom_highbd_h_predictor_16x4 neon/; 468 specialize qw/aom_highbd_h_predictor_16x64 neon/; 469 specialize qw/aom_highbd_h_predictor_32x8 neon/; 470 specialize qw/aom_highbd_h_predictor_64x16 neon/; 471 472 specialize qw/aom_highbd_dc_128_predictor_4x16 neon/; 473 specialize qw/aom_highbd_dc_128_predictor_8x32 neon/; 474 specialize qw/aom_highbd_dc_128_predictor_16x4 neon/; 475 specialize qw/aom_highbd_dc_128_predictor_16x64 neon/; 476 specialize qw/aom_highbd_dc_128_predictor_32x8 neon/; 477 specialize qw/aom_highbd_dc_128_predictor_64x16 neon/; 478 479 specialize qw/aom_highbd_dc_left_predictor_4x16 neon/; 480 specialize qw/aom_highbd_dc_left_predictor_8x32 neon/; 481 specialize qw/aom_highbd_dc_left_predictor_16x4 neon/; 482 specialize qw/aom_highbd_dc_left_predictor_16x64 neon/; 483 specialize qw/aom_highbd_dc_left_predictor_32x8 neon/; 484 specialize qw/aom_highbd_dc_left_predictor_64x16 neon/; 485 486 specialize qw/aom_highbd_dc_top_predictor_4x16 neon/; 487 specialize qw/aom_highbd_dc_top_predictor_8x32 neon/; 488 specialize qw/aom_highbd_dc_top_predictor_16x4 neon/; 489 specialize qw/aom_highbd_dc_top_predictor_16x64 neon/; 490 specialize qw/aom_highbd_dc_top_predictor_32x8 neon/; 491 specialize qw/aom_highbd_dc_top_predictor_64x16 neon/; 492 493 specialize qw/aom_highbd_paeth_predictor_4x16 neon/; 494 specialize qw/aom_highbd_paeth_predictor_8x32 neon/; 495 specialize qw/aom_highbd_paeth_predictor_16x4 neon/; 496 specialize qw/aom_highbd_paeth_predictor_16x64 neon/; 497 specialize qw/aom_highbd_paeth_predictor_32x8 neon/; 498 specialize qw/aom_highbd_paeth_predictor_64x16 neon/; 499 500 specialize qw/aom_highbd_smooth_predictor_4x16 neon/; 501 specialize qw/aom_highbd_smooth_predictor_8x32 neon/; 502 specialize qw/aom_highbd_smooth_predictor_16x4 neon/; 503 specialize qw/aom_highbd_smooth_predictor_16x64 neon/; 504 specialize qw/aom_highbd_smooth_predictor_32x8 neon/; 505 specialize qw/aom_highbd_smooth_predictor_64x16 neon/; 506 507 specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/; 508 specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/; 509 specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/; 510 specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/; 511 specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/; 512 specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/; 513 514 specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/; 515 specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/; 516 specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/; 517 specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/; 518 specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/; 519 specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/; 520 } # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER 521} 522# 523# Sub Pixel Filters 524# 525add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h"; 526add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 527add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 528 529specialize qw/aom_convolve_copy neon sse2 avx2/; 530specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 531specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 532 533add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; 534specialize qw/aom_scaled_2d ssse3 neon neon_dotprod neon_i8mm/; 535 536if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 537 add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h"; 538 specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/; 539 540 add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 541 specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/; 542 543 add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 544 specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/; 545} 546 547# 548# Loopfilter 549# 550add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 551specialize qw/aom_lpf_vertical_14 sse2 neon/; 552 553add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 554specialize qw/aom_lpf_vertical_14_dual sse2 neon/; 555 556add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 557specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/; 558 559add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 560specialize qw/aom_lpf_vertical_6 sse2 neon/; 561 562add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 563specialize qw/aom_lpf_vertical_8 sse2 neon/; 564 565add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 566specialize qw/aom_lpf_vertical_8_dual sse2 neon/; 567 568add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 569specialize qw/aom_lpf_vertical_8_quad sse2 neon/; 570 571add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 572specialize qw/aom_lpf_vertical_4 sse2 neon/; 573 574add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 575specialize qw/aom_lpf_vertical_4_dual sse2 neon/; 576 577add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 578specialize qw/aom_lpf_vertical_4_quad sse2 neon/; 579 580add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 581specialize qw/aom_lpf_horizontal_14 sse2 neon/; 582 583add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 584specialize qw/aom_lpf_horizontal_14_dual sse2 neon/; 585 586add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 587specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/; 588 589add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 590specialize qw/aom_lpf_horizontal_6 sse2 neon/; 591 592add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 593specialize qw/aom_lpf_horizontal_6_dual sse2 neon/; 594 595add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 596specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/; 597 598add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 599specialize qw/aom_lpf_horizontal_8 sse2 neon/; 600 601add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 602specialize qw/aom_lpf_horizontal_8_dual sse2 neon/; 603 604add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 605specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/; 606 607add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 608specialize qw/aom_lpf_horizontal_4 sse2 neon/; 609 610add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 611specialize qw/aom_lpf_horizontal_4_dual sse2 neon/; 612 613add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 614specialize qw/aom_lpf_horizontal_4_quad sse2 neon/; 615 616add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 617specialize qw/aom_lpf_vertical_6_dual sse2 neon/; 618 619add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 620specialize qw/aom_lpf_vertical_6_quad sse2 neon/; 621 622if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 623 add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 624 specialize qw/aom_highbd_lpf_vertical_14 neon sse2/; 625 626 add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 627 specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/; 628 629 add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 630 specialize qw/aom_highbd_lpf_vertical_8 neon sse2/; 631 632 add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 633 specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/; 634 635 add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 636 specialize qw/aom_highbd_lpf_vertical_6 neon sse2/; 637 638 add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 639 specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/; 640 641 add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 642 specialize qw/aom_highbd_lpf_vertical_4 neon sse2/; 643 644 add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 645 specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/; 646 647 add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 648 specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/; 649 650 add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd"; 651 specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/; 652 653 add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 654 specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/; 655 656 add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 657 specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/; 658 659 add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 660 specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/; 661 662 add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 663 specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/; 664 665 add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 666 specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/; 667 668 add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 669 specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/; 670} 671 672# 673# Encoder functions. 674# 675 676# 677# Forward transform 678# 679if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){ 680 add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; 681 specialize qw/aom_fdct4x4 neon sse2/; 682 683 add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride"; 684 specialize qw/aom_fdct4x4_lp neon sse2/; 685 686 if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){ 687 # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible 688 # with av1 scan orders, because it does two transposes. 689 add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 690 specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64"; 691 # High bit depth 692 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 693 add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 694 specialize qw/aom_highbd_fdct8x8 sse2/; 695 } 696 } 697 # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation) 698 add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output"; 699 700 add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output"; 701 specialize qw/aom_fft4x4_float sse2/; 702 703 add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output"; 704 specialize qw/aom_fft8x8_float avx2 sse2/; 705 706 add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output"; 707 specialize qw/aom_fft16x16_float avx2 sse2/; 708 709 add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output"; 710 specialize qw/aom_fft32x32_float avx2 sse2/; 711 712 add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output"; 713 714 add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output"; 715 specialize qw/aom_ifft4x4_float sse2/; 716 717 add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output"; 718 specialize qw/aom_ifft8x8_float avx2 sse2/; 719 720 add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output"; 721 specialize qw/aom_ifft16x16_float avx2 sse2/; 722 723 add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output"; 724 specialize qw/aom_ifft32x32_float avx2 sse2/; 725} # CONFIG_AV1_ENCODER 726 727# 728# Quantization 729# 730if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 731 add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 732 specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64"; 733 734 add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 735 specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64"; 736 737 add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 738 specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/; 739 740 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 741 add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 742 specialize qw/aom_quantize_b_adaptive sse2 avx2/; 743 744 add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 745 specialize qw/aom_quantize_b_32x32_adaptive sse2/; 746 747 add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 748 specialize qw/aom_quantize_b_64x64_adaptive sse2/; 749 } 750} # CONFIG_AV1_ENCODER 751 752if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 753 add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 754 specialize qw/aom_highbd_quantize_b sse2 avx2 neon/; 755 756 add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 757 specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/; 758 759 add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 760 specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/; 761 762 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 763 add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 764 specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/; 765 766 add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 767 specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/; 768 769 add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 770 specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/; 771 } 772} # CONFIG_AV1_ENCODER 773 774# 775# Alpha blending with mask 776# 777add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params"; 778specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/; 779add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh"; 780add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 781add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 782specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/; 783specialize "aom_blend_a64_hmask", qw/sse4_1 neon/; 784specialize "aom_blend_a64_vmask", qw/sse4_1 neon/; 785 786if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 787 add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd"; 788 add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 789 add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 790 add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd"; 791 specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/; 792 specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/; 793 specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/; 794 specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/; 795} 796 797if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 798 # 799 # Block subtraction 800 # 801 add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 802 specialize qw/aom_subtract_block neon sse2 avx2/; 803 804 add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height"; 805 specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/; 806 807 add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum"; 808 specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/; 809 810 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 811 add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 812 specialize qw/aom_highbd_subtract_block sse2 neon/; 813 814 add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height"; 815 specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/; 816 } 817 818 # 819 # Sum of Squares 820 # 821 add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height"; 822 specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/; 823 824 add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N"; 825 specialize qw/aom_sum_squares_i16 sse2 neon sve/; 826 827 add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height"; 828 specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/; 829 830 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 831 add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height"; 832 specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/; 833 } 834 835 # 836 # Single block SAD / Single block Avg SAD 837 # 838 foreach (@encoder_block_sizes) { 839 ($w, $h) = @$_; 840 add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 841 if ($h >= 16) { 842 add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 843 } 844 if ($w != 4 && $h != 4) { 845 add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 846 } 847 } 848 849 add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum"; 850 specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/; 851 specialize qw/aom_sad128x128 avx2 sse2 neon neon_dotprod/; 852 specialize qw/aom_sad128x64 avx2 sse2 neon neon_dotprod/; 853 specialize qw/aom_sad64x128 avx2 sse2 neon neon_dotprod/; 854 specialize qw/aom_sad64x64 avx2 sse2 neon neon_dotprod/; 855 specialize qw/aom_sad64x32 avx2 sse2 neon neon_dotprod/; 856 specialize qw/aom_sad32x64 avx2 sse2 neon neon_dotprod/; 857 specialize qw/aom_sad32x32 avx2 sse2 neon neon_dotprod/; 858 specialize qw/aom_sad32x16 avx2 sse2 neon neon_dotprod/; 859 specialize qw/aom_sad16x32 sse2 neon neon_dotprod/; 860 specialize qw/aom_sad16x16 sse2 neon neon_dotprod/; 861 specialize qw/aom_sad16x8 sse2 neon neon_dotprod/; 862 specialize qw/aom_sad8x16 sse2 neon/; 863 specialize qw/aom_sad8x8 sse2 neon/; 864 specialize qw/aom_sad8x4 sse2 neon/; 865 specialize qw/aom_sad4x8 sse2 neon/; 866 specialize qw/aom_sad4x4 sse2 neon/; 867 868 specialize qw/aom_sad4x16 sse2 neon/; 869 specialize qw/aom_sad16x4 sse2 neon neon_dotprod/; 870 specialize qw/aom_sad8x32 sse2 neon/; 871 specialize qw/aom_sad32x8 sse2 neon neon_dotprod/; 872 specialize qw/aom_sad16x64 sse2 neon neon_dotprod/; 873 specialize qw/aom_sad64x16 sse2 neon neon_dotprod/; 874 875 specialize qw/aom_sad_skip_128x128 avx2 sse2 neon neon_dotprod/; 876 specialize qw/aom_sad_skip_128x64 avx2 sse2 neon neon_dotprod/; 877 specialize qw/aom_sad_skip_64x128 avx2 sse2 neon neon_dotprod/; 878 specialize qw/aom_sad_skip_64x64 avx2 sse2 neon neon_dotprod/; 879 specialize qw/aom_sad_skip_64x32 avx2 sse2 neon neon_dotprod/; 880 specialize qw/aom_sad_skip_32x64 avx2 sse2 neon neon_dotprod/; 881 specialize qw/aom_sad_skip_32x32 avx2 sse2 neon neon_dotprod/; 882 specialize qw/aom_sad_skip_32x16 avx2 sse2 neon neon_dotprod/; 883 specialize qw/aom_sad_skip_16x32 sse2 neon neon_dotprod/; 884 specialize qw/aom_sad_skip_16x16 sse2 neon neon_dotprod/; 885 specialize qw/aom_sad_skip_16x8 sse2 neon neon_dotprod/; 886 specialize qw/aom_sad_skip_8x16 sse2 neon/; 887 888 specialize qw/aom_sad_skip_4x16 sse2 neon/; 889 specialize qw/aom_sad_skip_8x32 sse2 neon/; 890 specialize qw/aom_sad_skip_16x64 sse2 neon neon_dotprod/; 891 specialize qw/aom_sad_skip_64x16 sse2 neon neon_dotprod/; 892 893 specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/; 894 specialize qw/aom_sad128x64_avg avx2 sse2 neon neon_dotprod/; 895 specialize qw/aom_sad64x128_avg avx2 sse2 neon neon_dotprod/; 896 specialize qw/aom_sad64x64_avg avx2 sse2 neon neon_dotprod/; 897 specialize qw/aom_sad64x32_avg avx2 sse2 neon neon_dotprod/; 898 specialize qw/aom_sad32x64_avg avx2 sse2 neon neon_dotprod/; 899 specialize qw/aom_sad32x32_avg avx2 sse2 neon neon_dotprod/; 900 specialize qw/aom_sad32x16_avg avx2 sse2 neon neon_dotprod/; 901 specialize qw/aom_sad16x32_avg sse2 neon neon_dotprod/; 902 specialize qw/aom_sad16x16_avg sse2 neon neon_dotprod/; 903 specialize qw/aom_sad16x8_avg sse2 neon neon_dotprod/; 904 specialize qw/aom_sad8x16_avg sse2 neon/; 905 specialize qw/aom_sad8x8_avg sse2 neon/; 906 907 specialize qw/aom_sad8x32_avg sse2 neon/; 908 specialize qw/aom_sad32x8_avg sse2 neon neon_dotprod/; 909 specialize qw/aom_sad16x64_avg sse2 neon neon_dotprod/; 910 specialize qw/aom_sad64x16_avg sse2 neon neon_dotprod/; 911 912 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 913 foreach (@encoder_block_sizes) { 914 ($w, $h) = @$_; 915 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 916 if ($h >= 16) { 917 add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 918 } 919 if ($w != 4 && $h != 4) { 920 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 921 } 922 if ($w != 128 && $h != 128 && $w != 4) { 923 specialize "aom_highbd_sad${w}x${h}", qw/sse2/; 924 specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/; 925 } 926 } 927 specialize qw/aom_highbd_sad128x128 avx2 neon/; 928 specialize qw/aom_highbd_sad128x64 avx2 neon/; 929 specialize qw/aom_highbd_sad64x128 avx2 neon/; 930 specialize qw/aom_highbd_sad64x64 avx2 sse2 neon/; 931 specialize qw/aom_highbd_sad64x32 avx2 sse2 neon/; 932 specialize qw/aom_highbd_sad32x64 avx2 sse2 neon/; 933 specialize qw/aom_highbd_sad32x32 avx2 sse2 neon/; 934 specialize qw/aom_highbd_sad32x16 avx2 sse2 neon/; 935 specialize qw/aom_highbd_sad16x32 avx2 sse2 neon/; 936 specialize qw/aom_highbd_sad16x16 avx2 sse2 neon/; 937 specialize qw/aom_highbd_sad16x8 avx2 sse2 neon/; 938 specialize qw/aom_highbd_sad8x16 sse2 neon/; 939 specialize qw/aom_highbd_sad8x8 sse2 neon/; 940 specialize qw/aom_highbd_sad8x4 sse2 neon/; 941 specialize qw/aom_highbd_sad4x8 sse2 neon/; 942 specialize qw/aom_highbd_sad4x4 sse2 neon/; 943 944 specialize qw/aom_highbd_sad4x16 sse2 neon/; 945 specialize qw/aom_highbd_sad16x4 avx2 sse2 neon/; 946 specialize qw/aom_highbd_sad8x32 sse2 neon/; 947 specialize qw/aom_highbd_sad32x8 avx2 sse2 neon/; 948 specialize qw/aom_highbd_sad16x64 avx2 sse2 neon/; 949 specialize qw/aom_highbd_sad64x16 avx2 sse2 neon/; 950 951 specialize qw/aom_highbd_sad_skip_128x128 avx2 neon/; 952 specialize qw/aom_highbd_sad_skip_128x64 avx2 neon/; 953 specialize qw/aom_highbd_sad_skip_64x128 avx2 neon/; 954 specialize qw/aom_highbd_sad_skip_64x64 avx2 sse2 neon/; 955 specialize qw/aom_highbd_sad_skip_64x32 avx2 sse2 neon/; 956 specialize qw/aom_highbd_sad_skip_32x64 avx2 sse2 neon/; 957 specialize qw/aom_highbd_sad_skip_32x32 avx2 sse2 neon/; 958 specialize qw/aom_highbd_sad_skip_32x16 avx2 sse2 neon/; 959 specialize qw/aom_highbd_sad_skip_16x32 avx2 sse2 neon/; 960 specialize qw/aom_highbd_sad_skip_16x16 avx2 sse2 neon/; 961 specialize qw/aom_highbd_sad_skip_8x16 sse2 neon/; 962 963 specialize qw/aom_highbd_sad_skip_4x16 sse2 neon/; 964 specialize qw/aom_highbd_sad_skip_8x32 sse2 neon/; 965 specialize qw/aom_highbd_sad_skip_16x64 avx2 sse2 neon/; 966 specialize qw/aom_highbd_sad_skip_64x16 avx2 sse2 neon/; 967 968 specialize qw/aom_highbd_sad128x128_avg avx2 neon/; 969 specialize qw/aom_highbd_sad128x64_avg avx2 neon/; 970 specialize qw/aom_highbd_sad64x128_avg avx2 neon/; 971 specialize qw/aom_highbd_sad64x64_avg avx2 sse2 neon/; 972 specialize qw/aom_highbd_sad64x32_avg avx2 sse2 neon/; 973 specialize qw/aom_highbd_sad32x64_avg avx2 sse2 neon/; 974 specialize qw/aom_highbd_sad32x32_avg avx2 sse2 neon/; 975 specialize qw/aom_highbd_sad32x16_avg avx2 sse2 neon/; 976 specialize qw/aom_highbd_sad16x32_avg avx2 sse2 neon/; 977 specialize qw/aom_highbd_sad16x16_avg avx2 sse2 neon/; 978 specialize qw/aom_highbd_sad16x8_avg avx2 sse2 neon/; 979 specialize qw/aom_highbd_sad8x16_avg neon/; 980 specialize qw/aom_highbd_sad8x8_avg neon/; 981 982 specialize qw/aom_highbd_sad8x32_avg sse2 neon/; 983 specialize qw/aom_highbd_sad16x64_avg avx2 sse2 neon/; 984 specialize qw/aom_highbd_sad32x8_avg avx2 sse2 neon/; 985 specialize qw/aom_highbd_sad64x16_avg avx2 sse2 neon/; 986 } 987 # 988 # Masked SAD 989 # 990 foreach (@encoder_block_sizes) { 991 ($w, $h) = @$_; 992 add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask"; 993 specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 994 } 995 996 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 997 foreach (@encoder_block_sizes) { 998 ($w, $h) = @$_; 999 add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask"; 1000 specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 1001 } 1002 } 1003 1004 # 1005 # OBMC SAD 1006 # 1007 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1008 foreach (@encoder_block_sizes) { 1009 ($w, $h) = @$_; 1010 add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1011 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1012 specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1013 } 1014 } 1015 1016 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1017 foreach (@encoder_block_sizes) { 1018 ($w, $h) = @$_; 1019 add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1020 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1021 specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1022 } 1023 } 1024 } 1025 } 1026 1027 # 1028 # Multi-block SAD, comparing a reference to N independent blocks 1029 # 1030 foreach (@encoder_block_sizes) { 1031 ($w, $h) = @$_; 1032 add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1033 add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1034 if ($h >= 16) { 1035 add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1036 } 1037 } 1038 1039 specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/; 1040 specialize qw/aom_sad128x64x4d avx2 sse2 neon neon_dotprod/; 1041 specialize qw/aom_sad64x128x4d avx2 sse2 neon neon_dotprod/; 1042 specialize qw/aom_sad64x64x4d avx2 sse2 neon neon_dotprod/; 1043 specialize qw/aom_sad64x32x4d avx2 sse2 neon neon_dotprod/; 1044 specialize qw/aom_sad32x64x4d avx2 sse2 neon neon_dotprod/; 1045 specialize qw/aom_sad32x32x4d avx2 sse2 neon neon_dotprod/; 1046 specialize qw/aom_sad32x16x4d avx2 sse2 neon neon_dotprod/; 1047 specialize qw/aom_sad16x32x4d avx2 sse2 neon neon_dotprod/; 1048 specialize qw/aom_sad16x16x4d avx2 sse2 neon neon_dotprod/; 1049 specialize qw/aom_sad16x8x4d avx2 sse2 neon neon_dotprod/; 1050 1051 specialize qw/aom_sad8x16x4d sse2 neon/; 1052 specialize qw/aom_sad8x8x4d sse2 neon/; 1053 specialize qw/aom_sad8x4x4d sse2 neon/; 1054 specialize qw/aom_sad4x8x4d sse2 neon/; 1055 specialize qw/aom_sad4x4x4d sse2 neon/; 1056 1057 specialize qw/aom_sad64x16x4d avx2 sse2 neon neon_dotprod/; 1058 specialize qw/aom_sad32x8x4d avx2 sse2 neon neon_dotprod/; 1059 specialize qw/aom_sad16x64x4d avx2 sse2 neon neon_dotprod/; 1060 specialize qw/aom_sad16x4x4d avx2 sse2 neon neon_dotprod/; 1061 specialize qw/aom_sad8x32x4d sse2 neon/; 1062 specialize qw/aom_sad4x16x4d sse2 neon/; 1063 1064 specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/; 1065 specialize qw/aom_sad_skip_128x64x4d avx2 sse2 neon neon_dotprod/; 1066 specialize qw/aom_sad_skip_64x128x4d avx2 sse2 neon neon_dotprod/; 1067 specialize qw/aom_sad_skip_64x64x4d avx2 sse2 neon neon_dotprod/; 1068 specialize qw/aom_sad_skip_64x32x4d avx2 sse2 neon neon_dotprod/; 1069 specialize qw/aom_sad_skip_64x16x4d avx2 sse2 neon neon_dotprod/; 1070 specialize qw/aom_sad_skip_32x64x4d avx2 sse2 neon neon_dotprod/; 1071 specialize qw/aom_sad_skip_32x32x4d avx2 sse2 neon neon_dotprod/; 1072 specialize qw/aom_sad_skip_32x16x4d avx2 sse2 neon neon_dotprod/; 1073 1074 specialize qw/aom_sad_skip_16x64x4d avx2 sse2 neon neon_dotprod/; 1075 specialize qw/aom_sad_skip_16x32x4d avx2 sse2 neon neon_dotprod/; 1076 specialize qw/aom_sad_skip_16x16x4d avx2 sse2 neon neon_dotprod/; 1077 specialize qw/aom_sad_skip_16x8x4d avx2 sse2 neon neon_dotprod/; 1078 specialize qw/aom_sad_skip_8x32x4d sse2 neon/; 1079 specialize qw/aom_sad_skip_8x16x4d sse2 neon/; 1080 specialize qw/aom_sad_skip_4x16x4d sse2 neon/; 1081 1082 specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/; 1083 specialize qw/aom_sad128x64x3d avx2 neon neon_dotprod/; 1084 specialize qw/aom_sad64x128x3d avx2 neon neon_dotprod/; 1085 specialize qw/aom_sad64x64x3d avx2 neon neon_dotprod/; 1086 specialize qw/aom_sad64x32x3d avx2 neon neon_dotprod/; 1087 specialize qw/aom_sad32x64x3d avx2 neon neon_dotprod/; 1088 specialize qw/aom_sad32x32x3d avx2 neon neon_dotprod/; 1089 specialize qw/aom_sad32x16x3d avx2 neon neon_dotprod/; 1090 specialize qw/aom_sad16x32x3d avx2 neon neon_dotprod/; 1091 specialize qw/aom_sad16x16x3d avx2 neon neon_dotprod/; 1092 specialize qw/aom_sad16x8x3d avx2 neon neon_dotprod/; 1093 specialize qw/aom_sad8x16x3d neon/; 1094 specialize qw/aom_sad8x8x3d neon/; 1095 specialize qw/aom_sad8x4x3d neon/; 1096 specialize qw/aom_sad4x8x3d neon/; 1097 specialize qw/aom_sad4x4x3d neon/; 1098 1099 specialize qw/aom_sad64x16x3d avx2 neon neon_dotprod/; 1100 specialize qw/aom_sad32x8x3d avx2 neon neon_dotprod/; 1101 specialize qw/aom_sad16x64x3d avx2 neon neon_dotprod/; 1102 specialize qw/aom_sad16x4x3d avx2 neon neon_dotprod/; 1103 specialize qw/aom_sad8x32x3d neon/; 1104 specialize qw/aom_sad4x16x3d neon/; 1105 1106 # 1107 # Multi-block SAD, comparing a reference to N independent blocks 1108 # 1109 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1110 foreach (@encoder_block_sizes) { 1111 ($w, $h) = @$_; 1112 add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1113 add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1114 if ($h >= 16) { 1115 add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1116 } 1117 if ($w != 128 && $h != 128) { 1118 specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/; 1119 } 1120 } 1121 specialize qw/aom_highbd_sad128x128x4d avx2 neon/; 1122 specialize qw/aom_highbd_sad128x64x4d avx2 neon/; 1123 specialize qw/aom_highbd_sad64x128x4d avx2 neon/; 1124 specialize qw/aom_highbd_sad64x64x4d sse2 avx2 neon/; 1125 specialize qw/aom_highbd_sad64x32x4d sse2 avx2 neon/; 1126 specialize qw/aom_highbd_sad32x64x4d sse2 avx2 neon/; 1127 specialize qw/aom_highbd_sad32x32x4d sse2 avx2 neon/; 1128 specialize qw/aom_highbd_sad32x16x4d sse2 avx2 neon/; 1129 specialize qw/aom_highbd_sad16x32x4d sse2 avx2 neon/; 1130 specialize qw/aom_highbd_sad16x16x4d sse2 avx2 neon/; 1131 specialize qw/aom_highbd_sad16x8x4d sse2 avx2 neon/; 1132 specialize qw/aom_highbd_sad8x16x4d sse2 neon/; 1133 specialize qw/aom_highbd_sad8x8x4d sse2 neon/; 1134 specialize qw/aom_highbd_sad8x4x4d sse2 neon/; 1135 specialize qw/aom_highbd_sad4x8x4d sse2 neon/; 1136 specialize qw/aom_highbd_sad4x4x4d sse2 neon/; 1137 1138 specialize qw/aom_highbd_sad4x16x4d sse2 neon/; 1139 specialize qw/aom_highbd_sad16x4x4d avx2 sse2 neon/; 1140 specialize qw/aom_highbd_sad8x32x4d sse2 neon/; 1141 specialize qw/aom_highbd_sad32x8x4d avx2 sse2 neon/; 1142 specialize qw/aom_highbd_sad16x64x4d avx2 sse2 neon/; 1143 specialize qw/aom_highbd_sad64x16x4d avx2 sse2 neon/; 1144 1145 specialize qw/aom_highbd_sad_skip_128x128x4d avx2 neon/; 1146 specialize qw/aom_highbd_sad_skip_128x64x4d avx2 neon/; 1147 specialize qw/aom_highbd_sad_skip_64x128x4d avx2 neon/; 1148 specialize qw/aom_highbd_sad_skip_64x64x4d avx2 sse2 neon/; 1149 specialize qw/aom_highbd_sad_skip_64x32x4d avx2 sse2 neon/; 1150 specialize qw/aom_highbd_sad_skip_32x64x4d avx2 sse2 neon/; 1151 specialize qw/aom_highbd_sad_skip_32x32x4d avx2 sse2 neon/; 1152 specialize qw/aom_highbd_sad_skip_32x16x4d avx2 sse2 neon/; 1153 specialize qw/aom_highbd_sad_skip_16x32x4d avx2 sse2 neon/; 1154 specialize qw/aom_highbd_sad_skip_16x16x4d avx2 sse2 neon/; 1155 specialize qw/aom_highbd_sad_skip_8x16x4d sse2 neon/; 1156 1157 specialize qw/aom_highbd_sad_skip_4x16x4d sse2 neon/; 1158 specialize qw/aom_highbd_sad_skip_8x32x4d sse2 neon/; 1159 specialize qw/aom_highbd_sad_skip_16x64x4d avx2 sse2 neon/; 1160 specialize qw/aom_highbd_sad_skip_64x16x4d avx2 sse2 neon/; 1161 1162 specialize qw/aom_highbd_sad128x128x3d avx2 neon/; 1163 specialize qw/aom_highbd_sad128x64x3d avx2 neon/; 1164 specialize qw/aom_highbd_sad64x128x3d avx2 neon/; 1165 specialize qw/aom_highbd_sad64x64x3d avx2 neon/; 1166 specialize qw/aom_highbd_sad64x32x3d avx2 neon/; 1167 specialize qw/aom_highbd_sad32x64x3d avx2 neon/; 1168 specialize qw/aom_highbd_sad32x32x3d avx2 neon/; 1169 specialize qw/aom_highbd_sad32x16x3d avx2 neon/; 1170 specialize qw/aom_highbd_sad16x32x3d avx2 neon/; 1171 specialize qw/aom_highbd_sad16x16x3d avx2 neon/; 1172 specialize qw/aom_highbd_sad16x8x3d avx2 neon/; 1173 specialize qw/aom_highbd_sad8x16x3d neon/; 1174 specialize qw/aom_highbd_sad8x8x3d neon/; 1175 specialize qw/aom_highbd_sad8x4x3d neon/; 1176 specialize qw/aom_highbd_sad4x8x3d neon/; 1177 specialize qw/aom_highbd_sad4x4x3d neon/; 1178 1179 specialize qw/aom_highbd_sad64x16x3d avx2 neon/; 1180 specialize qw/aom_highbd_sad32x8x3d avx2 neon/; 1181 specialize qw/aom_highbd_sad16x64x3d avx2 neon/; 1182 specialize qw/aom_highbd_sad16x4x3d avx2 neon/; 1183 specialize qw/aom_highbd_sad8x32x3d neon/; 1184 specialize qw/aom_highbd_sad4x16x3d neon/; 1185 } 1186 # 1187 # Avg 1188 # 1189 add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p"; 1190 specialize qw/aom_avg_8x8 sse2 neon/; 1191 1192 add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p"; 1193 specialize qw/aom_avg_4x4 sse2 neon/; 1194 1195 add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg"; 1196 specialize qw/aom_avg_8x8_quad avx2 sse2 neon/; 1197 1198 add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1199 specialize qw/aom_minmax_8x8 sse2 neon/; 1200 1201 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1202 add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p"; 1203 specialize qw/aom_highbd_avg_8x8 neon/; 1204 add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p"; 1205 specialize qw/aom_highbd_avg_4x4 neon/; 1206 add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1207 specialize qw/aom_highbd_minmax_8x8 neon/; 1208 } 1209 1210 add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1211 specialize qw/aom_int_pro_row avx2 sse2 neon/; 1212 1213 add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1214 specialize qw/aom_int_pro_col avx2 sse2 neon/; 1215 1216 add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl"; 1217 specialize qw/aom_vector_var avx2 sse4_1 neon sve/; 1218 1219 # 1220 # hamadard transform and satd for implmenting temporal dependency model 1221 # 1222 add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1223 specialize qw/aom_hadamard_4x4 sse2 neon/; 1224 1225 add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1226 specialize qw/aom_hadamard_8x8 sse2 neon/; 1227 1228 add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1229 specialize qw/aom_hadamard_16x16 avx2 sse2 neon/; 1230 1231 add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1232 specialize qw/aom_hadamard_32x32 avx2 sse2 neon/; 1233 1234 add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1235 specialize qw/aom_hadamard_lp_8x8 sse2 neon/; 1236 1237 add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1238 specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/; 1239 1240 add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1241 specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/; 1242 1243 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1244 add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1245 specialize qw/aom_highbd_hadamard_8x8 avx2 neon/; 1246 1247 add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1248 specialize qw/aom_highbd_hadamard_16x16 avx2 neon/; 1249 1250 add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1251 specialize qw/aom_highbd_hadamard_32x32 avx2 neon/; 1252 } 1253 add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length"; 1254 specialize qw/aom_satd neon sse2 avx2/; 1255 1256 add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length"; 1257 specialize qw/aom_satd_lp sse2 avx2 neon/; 1258 1259 1260 # 1261 # Structured Similarity (SSIM) 1262 # 1263 add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1264 specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64"; 1265 1266 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1267 add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1268 } 1269} # CONFIG_AV1_ENCODER 1270 1271if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 1272 1273 # 1274 # Specialty Variance 1275 # 1276 add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8"; 1277 specialize qw/aom_get_var_sse_sum_8x8_quad avx2 sse2 neon neon_dotprod/; 1278 1279 add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16"; 1280 specialize qw/aom_get_var_sse_sum_16x16_dual avx2 sse2 neon neon_dotprod/; 1281 1282 add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1283 add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1284 add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1285 add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1286 1287 specialize qw/aom_mse16x16 sse2 avx2 neon neon_dotprod/; 1288 specialize qw/aom_mse16x8 sse2 neon neon_dotprod/; 1289 specialize qw/aom_mse8x16 sse2 neon neon_dotprod/; 1290 specialize qw/aom_mse8x8 sse2 neon neon_dotprod/; 1291 1292 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1293 foreach $bd (8, 10, 12) { 1294 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1295 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1296 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1297 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1298 1299 if ($bd eq 8) { 1300 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/; 1301 specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/; 1302 specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/; 1303 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/; 1304 } elsif ($bd eq 10) { 1305 specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/; 1306 specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; 1307 specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; 1308 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; 1309 } else { 1310 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/; 1311 specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; 1312 specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; 1313 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; 1314 } 1315 1316 } 1317 } 1318 1319 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1320 add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *"; 1321 specialize qw/aom_get_mb_ss sse2 neon/; 1322 } 1323 1324 # 1325 # Variance / Subpixel Variance / Subpixel Avg Variance 1326 # 1327 add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1328 specialize qw/aom_mse_wxh_16bit sse2 avx2 neon/; 1329 1330 add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h"; 1331 specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/; 1332 1333 foreach (@encoder_block_sizes) { 1334 ($w, $h) = @$_; 1335 add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 1336 add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1337 add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1338 } 1339 specialize qw/aom_variance128x128 sse2 avx2 neon neon_dotprod/; 1340 specialize qw/aom_variance128x64 sse2 avx2 neon neon_dotprod/; 1341 specialize qw/aom_variance64x128 sse2 avx2 neon neon_dotprod/; 1342 specialize qw/aom_variance64x64 sse2 avx2 neon neon_dotprod/; 1343 specialize qw/aom_variance64x32 sse2 avx2 neon neon_dotprod/; 1344 specialize qw/aom_variance32x64 sse2 avx2 neon neon_dotprod/; 1345 specialize qw/aom_variance32x32 sse2 avx2 neon neon_dotprod/; 1346 specialize qw/aom_variance32x16 sse2 avx2 neon neon_dotprod/; 1347 specialize qw/aom_variance16x32 sse2 avx2 neon neon_dotprod/; 1348 specialize qw/aom_variance16x16 sse2 avx2 neon neon_dotprod/; 1349 specialize qw/aom_variance16x8 sse2 avx2 neon neon_dotprod/; 1350 specialize qw/aom_variance8x16 sse2 neon neon_dotprod/; 1351 specialize qw/aom_variance8x8 sse2 neon neon_dotprod/; 1352 specialize qw/aom_variance8x4 sse2 neon neon_dotprod/; 1353 specialize qw/aom_variance4x8 sse2 neon neon_dotprod/; 1354 specialize qw/aom_variance4x4 sse2 neon neon_dotprod/; 1355 1356 specialize qw/aom_sub_pixel_variance128x128 avx2 neon ssse3/; 1357 specialize qw/aom_sub_pixel_variance128x64 avx2 neon ssse3/; 1358 specialize qw/aom_sub_pixel_variance64x128 avx2 neon ssse3/; 1359 specialize qw/aom_sub_pixel_variance64x64 avx2 neon ssse3/; 1360 specialize qw/aom_sub_pixel_variance64x32 avx2 neon ssse3/; 1361 specialize qw/aom_sub_pixel_variance32x64 avx2 neon ssse3/; 1362 specialize qw/aom_sub_pixel_variance32x32 avx2 neon ssse3/; 1363 specialize qw/aom_sub_pixel_variance32x16 avx2 neon ssse3/; 1364 specialize qw/aom_sub_pixel_variance16x32 avx2 neon ssse3/; 1365 specialize qw/aom_sub_pixel_variance16x16 avx2 neon ssse3/; 1366 specialize qw/aom_sub_pixel_variance16x8 avx2 neon ssse3/; 1367 specialize qw/aom_sub_pixel_variance8x16 neon ssse3/; 1368 specialize qw/aom_sub_pixel_variance8x8 neon ssse3/; 1369 specialize qw/aom_sub_pixel_variance8x4 neon ssse3/; 1370 specialize qw/aom_sub_pixel_variance4x8 neon ssse3/; 1371 specialize qw/aom_sub_pixel_variance4x4 neon ssse3/; 1372 1373 specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/; 1374 specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon ssse3/; 1375 specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon ssse3/; 1376 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon ssse3/; 1377 specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon ssse3/; 1378 specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon ssse3/; 1379 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon ssse3/; 1380 specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon ssse3/; 1381 specialize qw/aom_sub_pixel_avg_variance16x32 neon ssse3/; 1382 specialize qw/aom_sub_pixel_avg_variance16x16 neon ssse3/; 1383 specialize qw/aom_sub_pixel_avg_variance16x8 neon ssse3/; 1384 specialize qw/aom_sub_pixel_avg_variance8x16 neon ssse3/; 1385 specialize qw/aom_sub_pixel_avg_variance8x8 neon ssse3/; 1386 specialize qw/aom_sub_pixel_avg_variance8x4 neon ssse3/; 1387 specialize qw/aom_sub_pixel_avg_variance4x8 neon ssse3/; 1388 specialize qw/aom_sub_pixel_avg_variance4x4 neon ssse3/; 1389 1390 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1391 specialize qw/aom_variance4x16 neon neon_dotprod sse2/; 1392 specialize qw/aom_variance16x4 neon neon_dotprod sse2 avx2/; 1393 specialize qw/aom_variance8x32 neon neon_dotprod sse2/; 1394 specialize qw/aom_variance32x8 neon neon_dotprod sse2 avx2/; 1395 specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/; 1396 specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/; 1397 1398 specialize qw/aom_sub_pixel_variance4x16 neon ssse3/; 1399 specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/; 1400 specialize qw/aom_sub_pixel_variance8x32 neon ssse3/; 1401 specialize qw/aom_sub_pixel_variance32x8 neon ssse3/; 1402 specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/; 1403 specialize qw/aom_sub_pixel_variance64x16 neon ssse3/; 1404 specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/; 1405 specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/; 1406 specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/; 1407 specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/; 1408 specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/; 1409 specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/; 1410 } 1411 1412 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1413 foreach $bd (8, 10, 12) { 1414 foreach (@encoder_block_sizes) { 1415 ($w, $h) = @$_; 1416 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1417 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1418 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1419 } 1420 } 1421 1422 specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/; 1423 specialize qw/aom_highbd_12_variance128x64 sse2 neon sve/; 1424 specialize qw/aom_highbd_12_variance64x128 sse2 neon sve/; 1425 specialize qw/aom_highbd_12_variance64x64 sse2 neon sve/; 1426 specialize qw/aom_highbd_12_variance64x32 sse2 neon sve/; 1427 specialize qw/aom_highbd_12_variance32x64 sse2 neon sve/; 1428 specialize qw/aom_highbd_12_variance32x32 sse2 neon sve/; 1429 specialize qw/aom_highbd_12_variance32x16 sse2 neon sve/; 1430 specialize qw/aom_highbd_12_variance16x32 sse2 neon sve/; 1431 specialize qw/aom_highbd_12_variance16x16 sse2 neon sve/; 1432 specialize qw/aom_highbd_12_variance16x8 sse2 neon sve/; 1433 specialize qw/aom_highbd_12_variance8x16 sse2 neon sve/; 1434 specialize qw/aom_highbd_12_variance8x8 sse2 neon sve/; 1435 specialize qw/aom_highbd_12_variance8x4 neon sve/; 1436 specialize qw/aom_highbd_12_variance4x8 neon sve/; 1437 specialize qw/aom_highbd_12_variance4x4 sse4_1 neon sve/; 1438 1439 specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/; 1440 specialize qw/aom_highbd_10_variance128x64 sse2 avx2 neon sve/; 1441 specialize qw/aom_highbd_10_variance64x128 sse2 avx2 neon sve/; 1442 specialize qw/aom_highbd_10_variance64x64 sse2 avx2 neon sve/; 1443 specialize qw/aom_highbd_10_variance64x32 sse2 avx2 neon sve/; 1444 specialize qw/aom_highbd_10_variance32x64 sse2 avx2 neon sve/; 1445 specialize qw/aom_highbd_10_variance32x32 sse2 avx2 neon sve/; 1446 specialize qw/aom_highbd_10_variance32x16 sse2 avx2 neon sve/; 1447 specialize qw/aom_highbd_10_variance16x32 sse2 avx2 neon sve/; 1448 specialize qw/aom_highbd_10_variance16x16 sse2 avx2 neon sve/; 1449 specialize qw/aom_highbd_10_variance16x8 sse2 avx2 neon sve/; 1450 specialize qw/aom_highbd_10_variance8x16 sse2 avx2 neon sve/; 1451 specialize qw/aom_highbd_10_variance8x8 sse2 avx2 neon sve/; 1452 specialize qw/aom_highbd_10_variance8x4 neon sve/; 1453 specialize qw/aom_highbd_10_variance4x8 neon sve/; 1454 specialize qw/aom_highbd_10_variance4x4 sse4_1 neon sve/; 1455 1456 specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/; 1457 specialize qw/aom_highbd_8_variance128x64 sse2 neon sve/; 1458 specialize qw/aom_highbd_8_variance64x128 sse2 neon sve/; 1459 specialize qw/aom_highbd_8_variance64x64 sse2 neon sve/; 1460 specialize qw/aom_highbd_8_variance64x32 sse2 neon sve/; 1461 specialize qw/aom_highbd_8_variance32x64 sse2 neon sve/; 1462 specialize qw/aom_highbd_8_variance32x32 sse2 neon sve/; 1463 specialize qw/aom_highbd_8_variance32x16 sse2 neon sve/; 1464 specialize qw/aom_highbd_8_variance16x32 sse2 neon sve/; 1465 specialize qw/aom_highbd_8_variance16x16 sse2 neon sve/; 1466 specialize qw/aom_highbd_8_variance16x8 sse2 neon sve/; 1467 specialize qw/aom_highbd_8_variance8x16 sse2 neon sve/; 1468 specialize qw/aom_highbd_8_variance8x8 sse2 neon sve/; 1469 specialize qw/aom_highbd_8_variance8x4 neon sve/; 1470 specialize qw/aom_highbd_8_variance4x8 neon sve/; 1471 specialize qw/aom_highbd_8_variance4x4 sse4_1 neon sve/; 1472 1473 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1474 foreach $bd (8, 10, 12) { 1475 my $avx2 = ($bd == 10) ? "avx2" : ""; 1476 specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/; 1477 specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/; 1478 specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/; 1479 specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/; 1480 specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/; 1481 specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/; 1482 } 1483 } 1484 1485 specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/; 1486 specialize qw/aom_highbd_12_sub_pixel_variance128x64 sse2 neon/; 1487 specialize qw/aom_highbd_12_sub_pixel_variance64x128 sse2 neon/; 1488 specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2 neon/; 1489 specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2 neon/; 1490 specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2 neon/; 1491 specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2 neon/; 1492 specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2 neon/; 1493 specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2 neon/; 1494 specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 neon/; 1495 specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 neon/; 1496 specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2 neon/; 1497 specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2 neon/; 1498 specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2 neon/; 1499 specialize qw/aom_highbd_12_sub_pixel_variance4x8 neon/; 1500 specialize qw/aom_highbd_12_sub_pixel_variance4x4 sse4_1 neon/; 1501 1502 specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/; 1503 specialize qw/aom_highbd_10_sub_pixel_variance128x64 sse2 avx2 neon/; 1504 specialize qw/aom_highbd_10_sub_pixel_variance64x128 sse2 avx2 neon/; 1505 specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2 avx2 neon/; 1506 specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2 avx2 neon/; 1507 specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2 avx2 neon/; 1508 specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2 avx2 neon/; 1509 specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2 avx2 neon/; 1510 specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2 avx2 neon/; 1511 specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2 neon/; 1512 specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2 neon/; 1513 specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2 avx2 neon/; 1514 specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2 avx2 neon/; 1515 specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2 neon/; 1516 specialize qw/aom_highbd_10_sub_pixel_variance4x8 neon/; 1517 specialize qw/aom_highbd_10_sub_pixel_variance4x4 sse4_1 neon/; 1518 1519 specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/; 1520 specialize qw/aom_highbd_8_sub_pixel_variance128x64 sse2 neon/; 1521 specialize qw/aom_highbd_8_sub_pixel_variance64x128 sse2 neon/; 1522 specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2 neon/; 1523 specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2 neon/; 1524 specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2 neon/; 1525 specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2 neon/; 1526 specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2 neon/; 1527 specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2 neon/; 1528 specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 neon/; 1529 specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 neon/; 1530 specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2 neon/; 1531 specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2 neon/; 1532 specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2 neon/; 1533 specialize qw/aom_highbd_8_sub_pixel_variance4x8 neon/; 1534 specialize qw/aom_highbd_8_sub_pixel_variance4x4 sse4_1 neon/; 1535 1536 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1537 foreach $bd (8, 10, 12) { 1538 specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/; 1539 specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/; 1540 specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/; 1541 specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/; 1542 specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/; 1543 specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/; 1544 } 1545 } 1546 1547 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128 neon/; 1548 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64 neon/; 1549 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128 neon/; 1550 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2 neon/; 1551 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2 neon/; 1552 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2 neon/; 1553 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2 neon/; 1554 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2 neon/; 1555 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2 neon/; 1556 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2 neon/; 1557 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2 neon/; 1558 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2 neon/; 1559 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2 neon/; 1560 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2 neon/; 1561 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8 neon/; 1562 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4 sse4_1 neon/; 1563 1564 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128 neon/; 1565 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64 neon/; 1566 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128 neon/; 1567 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2 neon/; 1568 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2 neon/; 1569 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2 neon/; 1570 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2 neon/; 1571 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2 neon/; 1572 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2 neon/; 1573 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2 neon/; 1574 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2 neon/; 1575 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2 neon/; 1576 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2 neon/; 1577 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2 neon/; 1578 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8 neon/; 1579 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4 sse4_1 neon/; 1580 1581 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128 neon/; 1582 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64 neon/; 1583 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128 neon/; 1584 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2 neon/; 1585 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2 neon/; 1586 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2 neon/; 1587 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2 neon/; 1588 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2 neon/; 1589 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2 neon/; 1590 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2 neon/; 1591 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2 neon/; 1592 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2 neon/; 1593 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2 neon/; 1594 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2 neon/; 1595 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8 neon/; 1596 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4 sse4_1 neon/; 1597 1598 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1599 foreach $bd (8, 10, 12) { 1600 specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/; 1601 specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/; 1602 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/; 1603 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/; 1604 specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/; 1605 specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/; 1606 } 1607 } 1608 } 1609 # 1610 # Masked Variance / Masked Subpixel Variance 1611 # 1612 foreach (@encoder_block_sizes) { 1613 ($w, $h) = @$_; 1614 add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1615 specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1616 } 1617 1618 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1619 foreach $bd ("_8_", "_10_", "_12_") { 1620 foreach (@encoder_block_sizes) { 1621 ($w, $h) = @$_; 1622 add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1623 specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1624 } 1625 } 1626 } 1627 1628 # 1629 # OBMC Variance / OBMC Subpixel Variance 1630 # 1631 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1632 foreach (@encoder_block_sizes) { 1633 ($w, $h) = @$_; 1634 add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1635 add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1636 specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/; 1637 specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/; 1638 } 1639 1640 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1641 foreach $bd ("_8_", "_10_", "_12_") { 1642 foreach (@encoder_block_sizes) { 1643 ($w, $h) = @$_; 1644 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1645 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1646 specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/; 1647 specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/; 1648 } 1649 } 1650 } 1651 } 1652 1653 # 1654 # Comp Avg 1655 # 1656 add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; 1657 specialize qw/aom_comp_avg_pred avx2 neon/; 1658 1659 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1660 add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride"; 1661 specialize qw/aom_highbd_comp_avg_pred neon/; 1662 1663 add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1664 specialize qw/aom_mse_wxh_16bit_highbd sse2 avx2 neon sve/; 1665 } 1666 1667 add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1668 specialize qw/aom_comp_mask_pred ssse3 avx2 neon/; 1669 1670 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1671 add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1672 specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/; 1673 } 1674 1675 # Flow estimation library 1676 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1677 add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev"; 1678 specialize qw/aom_compute_mean_stddev sse4_1 avx2/; 1679 1680 add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2"; 1681 specialize qw/aom_compute_correlation sse4_1 avx2/; 1682 1683 add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v"; 1684 specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon sve/; 1685 } 1686 1687} # CONFIG_AV1_ENCODER 1688 16891; 1690