1## 2## Copyright (c) 2017, Alliance for Open Media. All rights reserved 3## 4## This source code is subject to the terms of the BSD 2 Clause License and 5## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6## was not distributed with this source code in the LICENSE file, you can 7## obtain it at www.aomedia.org/license/software. If the Alliance for Open 8## Media Patent License 1.0 was not distributed with this source code in the 9## PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10## 11sub aom_dsp_forward_decls() { 12print <<EOF 13/* 14 * DSP 15 */ 16 17#include "aom/aom_integer.h" 18#include "aom_dsp/aom_dsp_common.h" 19#include "av1/common/blockd.h" 20#include "av1/common/enums.h" 21 22EOF 23} 24forward_decls qw/aom_dsp_forward_decls/; 25 26# optimizations which depend on multiple features 27$avx2_ssse3 = ''; 28if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) { 29 $avx2_ssse3 = 'avx2'; 30} 31 32# functions that are 64 bit only. 33$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = ''; 34if ($opts{arch} eq "x86_64") { 35 $mmx_x86_64 = 'mmx'; 36 $sse2_x86_64 = 'sse2'; 37 $ssse3_x86_64 = 'ssse3'; 38 $avx_x86_64 = 'avx'; 39 $avx2_x86_64 = 'avx2'; 40} 41 42@block_widths = (4, 8, 16, 32, 64, 128); 43 44@encoder_block_sizes = (); 45foreach $w (@block_widths) { 46 foreach $h (@block_widths) { 47 push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w); 48 } 49} 50 51if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 52 push @encoder_block_sizes, [4, 16]; 53 push @encoder_block_sizes, [16, 4]; 54 push @encoder_block_sizes, [8, 32]; 55 push @encoder_block_sizes, [32, 8]; 56 push @encoder_block_sizes, [16, 64]; 57 push @encoder_block_sizes, [64, 16]; 58} 59 60@tx_dims = (4, 8, 16, 32, 64); 61@tx_sizes = (); 62foreach $w (@tx_dims) { 63 push @tx_sizes, [$w, $w]; 64 foreach $h (@tx_dims) { 65 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w)); 66 push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w)); 67 } 68} 69 70@pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/; 71 72# 73# Intra prediction 74# 75 76foreach (@tx_sizes) { 77 ($w, $h) = @$_; 78 foreach $pred_name (@pred_names) { 79 add_proto "void", "aom_${pred_name}_predictor_${w}x${h}", 80 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left"; 81 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 82 add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}", 83 "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd"; 84 } 85 } 86} 87 88specialize qw/aom_dc_top_predictor_4x4 neon sse2/; 89specialize qw/aom_dc_top_predictor_4x8 neon sse2/; 90specialize qw/aom_dc_top_predictor_4x16 neon sse2/; 91specialize qw/aom_dc_top_predictor_8x4 neon sse2/; 92specialize qw/aom_dc_top_predictor_8x8 neon sse2/; 93specialize qw/aom_dc_top_predictor_8x16 neon sse2/; 94specialize qw/aom_dc_top_predictor_8x32 neon sse2/; 95specialize qw/aom_dc_top_predictor_16x4 neon sse2/; 96specialize qw/aom_dc_top_predictor_16x8 neon sse2/; 97specialize qw/aom_dc_top_predictor_16x16 neon sse2/; 98specialize qw/aom_dc_top_predictor_16x32 neon sse2/; 99specialize qw/aom_dc_top_predictor_16x64 neon sse2/; 100specialize qw/aom_dc_top_predictor_32x8 neon sse2/; 101specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/; 102specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/; 103specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/; 104specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/; 105specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/; 106specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/; 107 108specialize qw/aom_dc_left_predictor_4x4 neon sse2/; 109specialize qw/aom_dc_left_predictor_4x8 neon sse2/; 110specialize qw/aom_dc_left_predictor_4x16 neon sse2/; 111specialize qw/aom_dc_left_predictor_8x4 neon sse2/; 112specialize qw/aom_dc_left_predictor_8x8 neon sse2/; 113specialize qw/aom_dc_left_predictor_8x16 neon sse2/; 114specialize qw/aom_dc_left_predictor_8x32 neon sse2/; 115specialize qw/aom_dc_left_predictor_16x4 neon sse2/; 116specialize qw/aom_dc_left_predictor_16x8 neon sse2/; 117specialize qw/aom_dc_left_predictor_16x16 neon sse2/; 118specialize qw/aom_dc_left_predictor_16x32 neon sse2/; 119specialize qw/aom_dc_left_predictor_16x64 neon sse2/; 120specialize qw/aom_dc_left_predictor_32x8 neon sse2/; 121specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/; 122specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/; 123specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/; 124specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/; 125specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/; 126specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/; 127 128specialize qw/aom_dc_128_predictor_4x4 neon sse2/; 129specialize qw/aom_dc_128_predictor_4x8 neon sse2/; 130specialize qw/aom_dc_128_predictor_4x16 neon sse2/; 131specialize qw/aom_dc_128_predictor_8x4 neon sse2/; 132specialize qw/aom_dc_128_predictor_8x8 neon sse2/; 133specialize qw/aom_dc_128_predictor_8x16 neon sse2/; 134specialize qw/aom_dc_128_predictor_8x32 neon sse2/; 135specialize qw/aom_dc_128_predictor_16x4 neon sse2/; 136specialize qw/aom_dc_128_predictor_16x8 neon sse2/; 137specialize qw/aom_dc_128_predictor_16x16 neon sse2/; 138specialize qw/aom_dc_128_predictor_16x32 neon sse2/; 139specialize qw/aom_dc_128_predictor_16x64 neon sse2/; 140specialize qw/aom_dc_128_predictor_32x8 neon sse2/; 141specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/; 142specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/; 143specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/; 144specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/; 145specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/; 146specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/; 147 148specialize qw/aom_v_predictor_4x4 neon sse2/; 149specialize qw/aom_v_predictor_4x8 neon sse2/; 150specialize qw/aom_v_predictor_4x16 neon sse2/; 151specialize qw/aom_v_predictor_8x4 neon sse2/; 152specialize qw/aom_v_predictor_8x8 neon sse2/; 153specialize qw/aom_v_predictor_8x16 neon sse2/; 154specialize qw/aom_v_predictor_8x32 neon sse2/; 155specialize qw/aom_v_predictor_16x4 neon sse2/; 156specialize qw/aom_v_predictor_16x8 neon sse2/; 157specialize qw/aom_v_predictor_16x16 neon sse2/; 158specialize qw/aom_v_predictor_16x32 neon sse2/; 159specialize qw/aom_v_predictor_16x64 neon sse2/; 160specialize qw/aom_v_predictor_32x8 neon sse2/; 161specialize qw/aom_v_predictor_32x16 neon sse2 avx2/; 162specialize qw/aom_v_predictor_32x32 neon sse2 avx2/; 163specialize qw/aom_v_predictor_32x64 neon sse2 avx2/; 164specialize qw/aom_v_predictor_64x16 neon sse2 avx2/; 165specialize qw/aom_v_predictor_64x32 neon sse2 avx2/; 166specialize qw/aom_v_predictor_64x64 neon sse2 avx2/; 167 168specialize qw/aom_h_predictor_4x4 neon sse2/; 169specialize qw/aom_h_predictor_4x8 neon sse2/; 170specialize qw/aom_h_predictor_4x16 neon sse2/; 171specialize qw/aom_h_predictor_8x4 neon sse2/; 172specialize qw/aom_h_predictor_8x8 neon sse2/; 173specialize qw/aom_h_predictor_8x16 neon sse2/; 174specialize qw/aom_h_predictor_8x32 neon sse2/; 175specialize qw/aom_h_predictor_16x4 neon sse2/; 176specialize qw/aom_h_predictor_16x8 neon sse2/; 177specialize qw/aom_h_predictor_16x16 neon sse2/; 178specialize qw/aom_h_predictor_16x32 neon sse2/; 179specialize qw/aom_h_predictor_16x64 neon sse2/; 180specialize qw/aom_h_predictor_32x8 neon sse2/; 181specialize qw/aom_h_predictor_32x16 neon sse2/; 182specialize qw/aom_h_predictor_32x32 neon sse2 avx2/; 183specialize qw/aom_h_predictor_32x64 neon sse2/; 184specialize qw/aom_h_predictor_64x16 neon sse2/; 185specialize qw/aom_h_predictor_64x32 neon sse2/; 186specialize qw/aom_h_predictor_64x64 neon sse2/; 187 188specialize qw/aom_paeth_predictor_4x4 ssse3 neon/; 189specialize qw/aom_paeth_predictor_4x8 ssse3 neon/; 190specialize qw/aom_paeth_predictor_4x16 ssse3 neon/; 191specialize qw/aom_paeth_predictor_8x4 ssse3 neon/; 192specialize qw/aom_paeth_predictor_8x8 ssse3 neon/; 193specialize qw/aom_paeth_predictor_8x16 ssse3 neon/; 194specialize qw/aom_paeth_predictor_8x32 ssse3 neon/; 195specialize qw/aom_paeth_predictor_16x4 ssse3 neon/; 196specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/; 197specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/; 198specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/; 199specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/; 200specialize qw/aom_paeth_predictor_32x8 ssse3 neon/; 201specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/; 202specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/; 203specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/; 204specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/; 205specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/; 206specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/; 207 208specialize qw/aom_smooth_predictor_4x4 neon ssse3/; 209specialize qw/aom_smooth_predictor_4x8 neon ssse3/; 210specialize qw/aom_smooth_predictor_4x16 neon ssse3/; 211specialize qw/aom_smooth_predictor_8x4 neon ssse3/; 212specialize qw/aom_smooth_predictor_8x8 neon ssse3/; 213specialize qw/aom_smooth_predictor_8x16 neon ssse3/; 214specialize qw/aom_smooth_predictor_8x32 neon ssse3/; 215specialize qw/aom_smooth_predictor_16x4 neon ssse3/; 216specialize qw/aom_smooth_predictor_16x8 neon ssse3/; 217specialize qw/aom_smooth_predictor_16x16 neon ssse3/; 218specialize qw/aom_smooth_predictor_16x32 neon ssse3/; 219specialize qw/aom_smooth_predictor_16x64 neon ssse3/; 220specialize qw/aom_smooth_predictor_32x8 neon ssse3/; 221specialize qw/aom_smooth_predictor_32x16 neon ssse3/; 222specialize qw/aom_smooth_predictor_32x32 neon ssse3/; 223specialize qw/aom_smooth_predictor_32x64 neon ssse3/; 224specialize qw/aom_smooth_predictor_64x16 neon ssse3/; 225specialize qw/aom_smooth_predictor_64x32 neon ssse3/; 226specialize qw/aom_smooth_predictor_64x64 neon ssse3/; 227 228specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/; 229specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/; 230specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/; 231specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/; 232specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/; 233specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/; 234specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/; 235specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/; 236specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/; 237specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/; 238specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/; 239specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/; 240specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/; 241specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/; 242specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/; 243specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/; 244specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/; 245specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/; 246specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/; 247 248specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/; 249specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/; 250specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/; 251specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/; 252specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/; 253specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/; 254specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/; 255specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/; 256specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/; 257specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/; 258specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/; 259specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/; 260specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/; 261specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/; 262specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/; 263specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/; 264specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/; 265specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/; 266specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/; 267 268# TODO(yunqingwang): optimize rectangular DC_PRED to replace division 269# by multiply and shift. 270specialize qw/aom_dc_predictor_4x4 neon sse2/; 271specialize qw/aom_dc_predictor_4x8 neon sse2/; 272specialize qw/aom_dc_predictor_4x16 neon sse2/; 273specialize qw/aom_dc_predictor_8x4 neon sse2/; 274specialize qw/aom_dc_predictor_8x8 neon sse2/; 275specialize qw/aom_dc_predictor_8x16 neon sse2/; 276specialize qw/aom_dc_predictor_8x32 neon sse2/; 277specialize qw/aom_dc_predictor_16x4 neon sse2/; 278specialize qw/aom_dc_predictor_16x8 neon sse2/; 279specialize qw/aom_dc_predictor_16x16 neon sse2/; 280specialize qw/aom_dc_predictor_16x32 neon sse2/; 281specialize qw/aom_dc_predictor_16x64 neon sse2/; 282specialize qw/aom_dc_predictor_32x8 neon sse2/; 283specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/; 284specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/; 285specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/; 286specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/; 287specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/; 288specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/; 289if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 290 specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/; 291 specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/; 292 specialize qw/aom_highbd_v_predictor_4x16 neon/; 293 specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/; 294 specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/; 295 specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/; 296 specialize qw/aom_highbd_v_predictor_8x32 neon/; 297 specialize qw/aom_highbd_v_predictor_16x4 neon/; 298 specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/; 299 specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/; 300 specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/; 301 specialize qw/aom_highbd_v_predictor_16x64 neon/; 302 specialize qw/aom_highbd_v_predictor_32x8 neon/; 303 specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/; 304 specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/; 305 specialize qw/aom_highbd_v_predictor_32x64 neon/; 306 specialize qw/aom_highbd_v_predictor_64x16 neon/; 307 specialize qw/aom_highbd_v_predictor_64x32 neon/; 308 specialize qw/aom_highbd_v_predictor_64x64 neon/; 309 310 # TODO(yunqingwang): optimize rectangular DC_PRED to replace division 311 # by multiply and shift. 312 specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/; 313 specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/; 314 specialize qw/aom_highbd_dc_predictor_4x16 neon/; 315 specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/; 316 specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/; 317 specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/; 318 specialize qw/aom_highbd_dc_predictor_8x32 neon/; 319 specialize qw/aom_highbd_dc_predictor_16x4 neon/; 320 specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/; 321 specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/; 322 specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/; 323 specialize qw/aom_highbd_dc_predictor_16x64 neon/; 324 specialize qw/aom_highbd_dc_predictor_32x8 neon/; 325 specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/; 326 specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/; 327 specialize qw/aom_highbd_dc_predictor_32x64 neon/; 328 specialize qw/aom_highbd_dc_predictor_64x16 neon/; 329 specialize qw/aom_highbd_dc_predictor_64x32 neon/; 330 specialize qw/aom_highbd_dc_predictor_64x64 neon/; 331 332 specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/; 333 specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/; 334 specialize qw/aom_highbd_h_predictor_4x16 neon/; 335 specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/; 336 specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/; 337 specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/; 338 specialize qw/aom_highbd_h_predictor_8x32 neon/; 339 specialize qw/aom_highbd_h_predictor_16x4 neon/; 340 specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/; 341 specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/; 342 specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/; 343 specialize qw/aom_highbd_h_predictor_16x64 neon/; 344 specialize qw/aom_highbd_h_predictor_32x8 neon/; 345 specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/; 346 specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/; 347 specialize qw/aom_highbd_h_predictor_32x64 neon/; 348 specialize qw/aom_highbd_h_predictor_64x16 neon/; 349 specialize qw/aom_highbd_h_predictor_64x32 neon/; 350 specialize qw/aom_highbd_h_predictor_64x64 neon/; 351 352 specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/; 353 specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/; 354 specialize qw/aom_highbd_dc_128_predictor_4x16 neon/; 355 specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/; 356 specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/; 357 specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/; 358 specialize qw/aom_highbd_dc_128_predictor_8x32 neon/; 359 specialize qw/aom_highbd_dc_128_predictor_16x4 neon/; 360 specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/; 361 specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/; 362 specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/; 363 specialize qw/aom_highbd_dc_128_predictor_16x64 neon/; 364 specialize qw/aom_highbd_dc_128_predictor_32x8 neon/; 365 specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/; 366 specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/; 367 specialize qw/aom_highbd_dc_128_predictor_32x64 neon/; 368 specialize qw/aom_highbd_dc_128_predictor_64x16 neon/; 369 specialize qw/aom_highbd_dc_128_predictor_64x32 neon/; 370 specialize qw/aom_highbd_dc_128_predictor_64x64 neon/; 371 372 specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/; 373 specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/; 374 specialize qw/aom_highbd_dc_left_predictor_4x16 neon/; 375 specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/; 376 specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/; 377 specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/; 378 specialize qw/aom_highbd_dc_left_predictor_8x32 neon/; 379 specialize qw/aom_highbd_dc_left_predictor_16x4 neon/; 380 specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/; 381 specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/; 382 specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/; 383 specialize qw/aom_highbd_dc_left_predictor_16x64 neon/; 384 specialize qw/aom_highbd_dc_left_predictor_32x8 neon/; 385 specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/; 386 specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/; 387 specialize qw/aom_highbd_dc_left_predictor_32x64 neon/; 388 specialize qw/aom_highbd_dc_left_predictor_64x16 neon/; 389 specialize qw/aom_highbd_dc_left_predictor_64x32 neon/; 390 specialize qw/aom_highbd_dc_left_predictor_64x64 neon/; 391 392 specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/; 393 specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/; 394 specialize qw/aom_highbd_dc_top_predictor_4x16 neon/; 395 specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/; 396 specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/; 397 specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/; 398 specialize qw/aom_highbd_dc_top_predictor_8x32 neon/; 399 specialize qw/aom_highbd_dc_top_predictor_16x4 neon/; 400 specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/; 401 specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/; 402 specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/; 403 specialize qw/aom_highbd_dc_top_predictor_16x64 neon/; 404 specialize qw/aom_highbd_dc_top_predictor_32x8 neon/; 405 specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/; 406 specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/; 407 specialize qw/aom_highbd_dc_top_predictor_32x64 neon/; 408 specialize qw/aom_highbd_dc_top_predictor_64x16 neon/; 409 specialize qw/aom_highbd_dc_top_predictor_64x32 neon/; 410 specialize qw/aom_highbd_dc_top_predictor_64x64 neon/; 411 412 specialize qw/aom_highbd_paeth_predictor_4x4 neon/; 413 specialize qw/aom_highbd_paeth_predictor_4x8 neon/; 414 specialize qw/aom_highbd_paeth_predictor_4x16 neon/; 415 specialize qw/aom_highbd_paeth_predictor_8x4 neon/; 416 specialize qw/aom_highbd_paeth_predictor_8x8 neon/; 417 specialize qw/aom_highbd_paeth_predictor_8x16 neon/; 418 specialize qw/aom_highbd_paeth_predictor_8x32 neon/; 419 specialize qw/aom_highbd_paeth_predictor_16x4 neon/; 420 specialize qw/aom_highbd_paeth_predictor_16x8 neon/; 421 specialize qw/aom_highbd_paeth_predictor_16x16 neon/; 422 specialize qw/aom_highbd_paeth_predictor_16x32 neon/; 423 specialize qw/aom_highbd_paeth_predictor_16x64 neon/; 424 specialize qw/aom_highbd_paeth_predictor_32x8 neon/; 425 specialize qw/aom_highbd_paeth_predictor_32x16 neon/; 426 specialize qw/aom_highbd_paeth_predictor_32x32 neon/; 427 specialize qw/aom_highbd_paeth_predictor_32x64 neon/; 428 specialize qw/aom_highbd_paeth_predictor_64x16 neon/; 429 specialize qw/aom_highbd_paeth_predictor_64x32 neon/; 430 specialize qw/aom_highbd_paeth_predictor_64x64 neon/; 431 432 specialize qw/aom_highbd_smooth_predictor_4x4 neon/; 433 specialize qw/aom_highbd_smooth_predictor_4x8 neon/; 434 specialize qw/aom_highbd_smooth_predictor_4x16 neon/; 435 specialize qw/aom_highbd_smooth_predictor_8x4 neon/; 436 specialize qw/aom_highbd_smooth_predictor_8x8 neon/; 437 specialize qw/aom_highbd_smooth_predictor_8x16 neon/; 438 specialize qw/aom_highbd_smooth_predictor_8x32 neon/; 439 specialize qw/aom_highbd_smooth_predictor_16x4 neon/; 440 specialize qw/aom_highbd_smooth_predictor_16x8 neon/; 441 specialize qw/aom_highbd_smooth_predictor_16x16 neon/; 442 specialize qw/aom_highbd_smooth_predictor_16x32 neon/; 443 specialize qw/aom_highbd_smooth_predictor_16x64 neon/; 444 specialize qw/aom_highbd_smooth_predictor_32x8 neon/; 445 specialize qw/aom_highbd_smooth_predictor_32x16 neon/; 446 specialize qw/aom_highbd_smooth_predictor_32x32 neon/; 447 specialize qw/aom_highbd_smooth_predictor_32x64 neon/; 448 specialize qw/aom_highbd_smooth_predictor_64x16 neon/; 449 specialize qw/aom_highbd_smooth_predictor_64x32 neon/; 450 specialize qw/aom_highbd_smooth_predictor_64x64 neon/; 451 452 specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/; 453 specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/; 454 specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/; 455 specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/; 456 specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/; 457 specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/; 458 specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/; 459 specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/; 460 specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/; 461 specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/; 462 specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/; 463 specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/; 464 specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/; 465 specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/; 466 specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/; 467 specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/; 468 specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/; 469 specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/; 470 specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/; 471 472 specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/; 473 specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/; 474 specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/; 475 specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/; 476 specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/; 477 specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/; 478 specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/; 479 specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/; 480 specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/; 481 specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/; 482 specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/; 483 specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/; 484 specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/; 485 specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/; 486 specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/; 487 specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/; 488 specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/; 489 specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/; 490 specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/; 491} 492# 493# Sub Pixel Filters 494# 495add_proto qw/void aom_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; 496add_proto qw/void aom_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h"; 497add_proto qw/void aom_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 498add_proto qw/void aom_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h"; 499 500specialize qw/aom_convolve_copy neon sse2 avx2/; 501specialize qw/aom_convolve8_horiz neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 502specialize qw/aom_convolve8_vert neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3"; 503 504add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; 505specialize qw/aom_scaled_2d ssse3 neon/; 506 507if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 508 add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h"; 509 specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/; 510 511 add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 512 specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/; 513 514 add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd"; 515 specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/; 516} 517 518# 519# Loopfilter 520# 521add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 522specialize qw/aom_lpf_vertical_14 sse2 neon/; 523 524add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 525specialize qw/aom_lpf_vertical_14_dual sse2 neon/; 526 527add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 528specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/; 529 530add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 531specialize qw/aom_lpf_vertical_6 sse2 neon/; 532 533add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 534specialize qw/aom_lpf_vertical_8 sse2 neon/; 535 536add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 537specialize qw/aom_lpf_vertical_8_dual sse2 neon/; 538 539add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 540specialize qw/aom_lpf_vertical_8_quad sse2 neon/; 541 542add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 543specialize qw/aom_lpf_vertical_4 sse2 neon/; 544 545add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 546specialize qw/aom_lpf_vertical_4_dual sse2 neon/; 547 548add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 549specialize qw/aom_lpf_vertical_4_quad sse2 neon/; 550 551add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 552specialize qw/aom_lpf_horizontal_14 sse2 neon/; 553 554add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 555specialize qw/aom_lpf_horizontal_14_dual sse2 neon/; 556 557add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 558specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/; 559 560add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 561specialize qw/aom_lpf_horizontal_6 sse2 neon/; 562 563add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 564specialize qw/aom_lpf_horizontal_6_dual sse2 neon/; 565 566add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 567specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/; 568 569add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 570specialize qw/aom_lpf_horizontal_8 sse2 neon/; 571 572add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 573specialize qw/aom_lpf_horizontal_8_dual sse2 neon/; 574 575add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 576specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/; 577 578add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh"; 579specialize qw/aom_lpf_horizontal_4 sse2 neon/; 580 581add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 582specialize qw/aom_lpf_horizontal_4_dual sse2 neon/; 583 584add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 585specialize qw/aom_lpf_horizontal_4_quad sse2 neon/; 586 587add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1"; 588specialize qw/aom_lpf_vertical_6_dual sse2 neon/; 589 590add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0"; 591specialize qw/aom_lpf_vertical_6_quad sse2 neon/; 592 593if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 594 add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 595 specialize qw/aom_highbd_lpf_vertical_14 neon sse2/; 596 597 add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 598 specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/; 599 600 add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 601 specialize qw/aom_highbd_lpf_vertical_8 neon sse2/; 602 603 add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 604 specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/; 605 606 add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 607 specialize qw/aom_highbd_lpf_vertical_6 neon sse2/; 608 609 add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 610 specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/; 611 612 add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 613 specialize qw/aom_highbd_lpf_vertical_4 neon sse2/; 614 615 add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 616 specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/; 617 618 add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 619 specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/; 620 621 add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd"; 622 specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/; 623 624 add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 625 specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/; 626 627 add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 628 specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/; 629 630 add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 631 specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/; 632 633 add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 634 specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/; 635 636 add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd"; 637 specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/; 638 639 add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd"; 640 specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/; 641} 642 643# 644# Encoder functions. 645# 646 647# 648# Forward transform 649# 650if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){ 651 add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride"; 652 specialize qw/aom_fdct4x4 neon sse2/; 653 654 add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride"; 655 specialize qw/aom_fdct4x4_lp neon sse2/; 656 657 if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){ 658 # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible 659 # with av1 scan orders, because it does two transposes. 660 add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 661 specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64"; 662 # High bit depth 663 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 664 add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride"; 665 specialize qw/aom_highbd_fdct8x8 sse2/; 666 } 667 } 668 # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation) 669 add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output"; 670 671 add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output"; 672 specialize qw/aom_fft4x4_float sse2/; 673 674 add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output"; 675 specialize qw/aom_fft8x8_float avx2 sse2/; 676 677 add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output"; 678 specialize qw/aom_fft16x16_float avx2 sse2/; 679 680 add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output"; 681 specialize qw/aom_fft32x32_float avx2 sse2/; 682 683 add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output"; 684 685 add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output"; 686 specialize qw/aom_ifft4x4_float sse2/; 687 688 add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output"; 689 specialize qw/aom_ifft8x8_float avx2 sse2/; 690 691 add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output"; 692 specialize qw/aom_ifft16x16_float avx2 sse2/; 693 694 add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output"; 695 specialize qw/aom_ifft32x32_float avx2 sse2/; 696} # CONFIG_AV1_ENCODER 697 698# 699# Quantization 700# 701if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 702 add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 703 specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64"; 704 705 add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 706 specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64"; 707 708 add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 709 specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/; 710 711 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 712 add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 713 specialize qw/aom_quantize_b_adaptive sse2 avx2/; 714 715 add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 716 specialize qw/aom_quantize_b_32x32_adaptive sse2/; 717 718 add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 719 specialize qw/aom_quantize_b_64x64_adaptive sse2/; 720 } 721} # CONFIG_AV1_ENCODER 722 723if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 724 add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 725 specialize qw/aom_highbd_quantize_b sse2 avx2 neon/; 726 727 add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 728 specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/; 729 730 add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 731 specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/; 732 733 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 734 add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 735 specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/; 736 737 add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 738 specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/; 739 740 add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; 741 specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/; 742 } 743} # CONFIG_AV1_ENCODER 744 745# 746# Alpha blending with mask 747# 748add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params"; 749specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/; 750add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh"; 751add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 752add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; 753specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/; 754specialize "aom_blend_a64_hmask", qw/sse4_1 neon/; 755specialize "aom_blend_a64_vmask", qw/sse4_1 neon/; 756 757if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 758 add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd"; 759 add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 760 add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; 761 add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd"; 762 specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/; 763 specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/; 764 specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/; 765 specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/; 766} 767 768if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 769 # 770 # Block subtraction 771 # 772 add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 773 specialize qw/aom_subtract_block neon sse2 avx2/; 774 775 add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height"; 776 specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/; 777 778 add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum"; 779 specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/; 780 781 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 782 add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; 783 specialize qw/aom_highbd_subtract_block sse2 neon/; 784 785 add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height"; 786 specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/; 787 } 788 789 # 790 # Sum of Squares 791 # 792 add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height"; 793 specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/; 794 795 add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N"; 796 specialize qw/aom_sum_squares_i16 sse2 neon sve/; 797 798 add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height"; 799 specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/; 800 801 add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height"; 802 specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/; 803 804 # 805 # Single block SAD / Single block Avg SAD 806 # 807 foreach (@encoder_block_sizes) { 808 ($w, $h) = @$_; 809 add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 810 add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 811 add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 812 add_proto qw/unsigned int/, "aom_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param"; 813 } 814 815 add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum"; 816 specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/; 817 specialize qw/aom_sad128x128 avx2 sse2 neon neon_dotprod/; 818 specialize qw/aom_sad128x64 avx2 sse2 neon neon_dotprod/; 819 specialize qw/aom_sad64x128 avx2 sse2 neon neon_dotprod/; 820 specialize qw/aom_sad64x64 avx2 sse2 neon neon_dotprod/; 821 specialize qw/aom_sad64x32 avx2 sse2 neon neon_dotprod/; 822 specialize qw/aom_sad32x64 avx2 sse2 neon neon_dotprod/; 823 specialize qw/aom_sad32x32 avx2 sse2 neon neon_dotprod/; 824 specialize qw/aom_sad32x16 avx2 sse2 neon neon_dotprod/; 825 specialize qw/aom_sad16x32 sse2 neon neon_dotprod/; 826 specialize qw/aom_sad16x16 sse2 neon neon_dotprod/; 827 specialize qw/aom_sad16x8 sse2 neon neon_dotprod/; 828 specialize qw/aom_sad8x16 sse2 neon/; 829 specialize qw/aom_sad8x8 sse2 neon/; 830 specialize qw/aom_sad8x4 sse2 neon/; 831 specialize qw/aom_sad4x8 sse2 neon/; 832 specialize qw/aom_sad4x4 sse2 neon/; 833 834 specialize qw/aom_sad4x16 sse2 neon/; 835 specialize qw/aom_sad16x4 sse2 neon neon_dotprod/; 836 specialize qw/aom_sad8x32 sse2 neon/; 837 specialize qw/aom_sad32x8 sse2 neon neon_dotprod/; 838 specialize qw/aom_sad16x64 sse2 neon neon_dotprod/; 839 specialize qw/aom_sad64x16 sse2 neon neon_dotprod/; 840 841 specialize qw/aom_sad_skip_128x128 avx2 sse2 neon neon_dotprod/; 842 specialize qw/aom_sad_skip_128x64 avx2 sse2 neon neon_dotprod/; 843 specialize qw/aom_sad_skip_64x128 avx2 sse2 neon neon_dotprod/; 844 specialize qw/aom_sad_skip_64x64 avx2 sse2 neon neon_dotprod/; 845 specialize qw/aom_sad_skip_64x32 avx2 sse2 neon neon_dotprod/; 846 specialize qw/aom_sad_skip_32x64 avx2 sse2 neon neon_dotprod/; 847 specialize qw/aom_sad_skip_32x32 avx2 sse2 neon neon_dotprod/; 848 specialize qw/aom_sad_skip_32x16 avx2 sse2 neon neon_dotprod/; 849 specialize qw/aom_sad_skip_16x32 sse2 neon neon_dotprod/; 850 specialize qw/aom_sad_skip_16x16 sse2 neon neon_dotprod/; 851 specialize qw/aom_sad_skip_16x8 sse2 neon neon_dotprod/; 852 specialize qw/aom_sad_skip_8x16 sse2 neon/; 853 specialize qw/aom_sad_skip_8x8 sse2 neon/; 854 specialize qw/aom_sad_skip_8x4 neon/; 855 specialize qw/aom_sad_skip_4x8 sse2 neon/; 856 specialize qw/aom_sad_skip_4x4 neon/; 857 858 specialize qw/aom_sad_skip_4x16 sse2 neon/; 859 specialize qw/aom_sad_skip_16x4 neon neon_dotprod/; 860 specialize qw/aom_sad_skip_8x32 sse2 neon/; 861 specialize qw/aom_sad_skip_32x8 sse2 neon neon_dotprod/; 862 specialize qw/aom_sad_skip_16x64 sse2 neon neon_dotprod/; 863 specialize qw/aom_sad_skip_64x16 sse2 neon neon_dotprod/; 864 865 specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/; 866 specialize qw/aom_sad128x64_avg avx2 sse2 neon neon_dotprod/; 867 specialize qw/aom_sad64x128_avg avx2 sse2 neon neon_dotprod/; 868 specialize qw/aom_sad64x64_avg avx2 sse2 neon neon_dotprod/; 869 specialize qw/aom_sad64x32_avg avx2 sse2 neon neon_dotprod/; 870 specialize qw/aom_sad32x64_avg avx2 sse2 neon neon_dotprod/; 871 specialize qw/aom_sad32x32_avg avx2 sse2 neon neon_dotprod/; 872 specialize qw/aom_sad32x16_avg avx2 sse2 neon neon_dotprod/; 873 specialize qw/aom_sad16x32_avg sse2 neon neon_dotprod/; 874 specialize qw/aom_sad16x16_avg sse2 neon neon_dotprod/; 875 specialize qw/aom_sad16x8_avg sse2 neon neon_dotprod/; 876 specialize qw/aom_sad8x16_avg sse2 neon/; 877 specialize qw/aom_sad8x8_avg sse2 neon/; 878 specialize qw/aom_sad8x4_avg sse2 neon/; 879 specialize qw/aom_sad4x8_avg sse2 neon/; 880 specialize qw/aom_sad4x4_avg sse2 neon/; 881 882 specialize qw/aom_sad4x16_avg sse2 neon/; 883 specialize qw/aom_sad16x4_avg sse2 neon neon_dotprod/; 884 specialize qw/aom_sad8x32_avg sse2 neon/; 885 specialize qw/aom_sad32x8_avg sse2 neon neon_dotprod/; 886 specialize qw/aom_sad16x64_avg sse2 neon neon_dotprod/; 887 specialize qw/aom_sad64x16_avg sse2 neon neon_dotprod/; 888 889 specialize qw/aom_dist_wtd_sad128x128_avg sse2 neon neon_dotprod/; 890 specialize qw/aom_dist_wtd_sad128x64_avg sse2 neon neon_dotprod/; 891 specialize qw/aom_dist_wtd_sad64x128_avg sse2 neon neon_dotprod/; 892 specialize qw/aom_dist_wtd_sad64x64_avg sse2 neon neon_dotprod/; 893 specialize qw/aom_dist_wtd_sad64x32_avg sse2 neon neon_dotprod/; 894 specialize qw/aom_dist_wtd_sad32x64_avg sse2 neon neon_dotprod/; 895 specialize qw/aom_dist_wtd_sad32x32_avg sse2 neon neon_dotprod/; 896 specialize qw/aom_dist_wtd_sad32x16_avg sse2 neon neon_dotprod/; 897 specialize qw/aom_dist_wtd_sad16x32_avg sse2 neon neon_dotprod/; 898 specialize qw/aom_dist_wtd_sad16x16_avg sse2 neon neon_dotprod/; 899 specialize qw/aom_dist_wtd_sad16x8_avg sse2 neon neon_dotprod/; 900 specialize qw/aom_dist_wtd_sad8x16_avg sse2 neon/; 901 specialize qw/aom_dist_wtd_sad8x8_avg sse2 neon/; 902 specialize qw/aom_dist_wtd_sad8x4_avg sse2 neon/; 903 specialize qw/aom_dist_wtd_sad4x8_avg sse2 neon/; 904 specialize qw/aom_dist_wtd_sad4x4_avg sse2 neon/; 905 906 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 907 specialize qw/aom_dist_wtd_sad4x16_avg sse2 neon/; 908 specialize qw/aom_dist_wtd_sad16x4_avg sse2 neon neon_dotprod/; 909 specialize qw/aom_dist_wtd_sad8x32_avg sse2 neon/; 910 specialize qw/aom_dist_wtd_sad32x8_avg sse2 neon neon_dotprod/; 911 specialize qw/aom_dist_wtd_sad16x64_avg sse2 neon neon_dotprod/; 912 specialize qw/aom_dist_wtd_sad64x16_avg sse2 neon neon_dotprod/; 913 } 914 915 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 916 foreach (@encoder_block_sizes) { 917 ($w, $h) = @$_; 918 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 919 add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; 920 add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; 921 if ($w != 128 && $h != 128 && $w != 4) { 922 specialize "aom_highbd_sad${w}x${h}", qw/sse2/; 923 specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/; 924 } 925 add_proto qw/unsigned int/, "aom_highbd_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param"; 926 } 927 specialize qw/aom_highbd_sad128x128 avx2 neon/; 928 specialize qw/aom_highbd_sad128x64 avx2 neon/; 929 specialize qw/aom_highbd_sad64x128 avx2 neon/; 930 specialize qw/aom_highbd_sad64x64 avx2 sse2 neon/; 931 specialize qw/aom_highbd_sad64x32 avx2 sse2 neon/; 932 specialize qw/aom_highbd_sad32x64 avx2 sse2 neon/; 933 specialize qw/aom_highbd_sad32x32 avx2 sse2 neon/; 934 specialize qw/aom_highbd_sad32x16 avx2 sse2 neon/; 935 specialize qw/aom_highbd_sad16x32 avx2 sse2 neon/; 936 specialize qw/aom_highbd_sad16x16 avx2 sse2 neon/; 937 specialize qw/aom_highbd_sad16x8 avx2 sse2 neon/; 938 specialize qw/aom_highbd_sad8x16 sse2 neon/; 939 specialize qw/aom_highbd_sad8x8 sse2 neon/; 940 specialize qw/aom_highbd_sad8x4 sse2 neon/; 941 specialize qw/aom_highbd_sad4x8 sse2 neon/; 942 specialize qw/aom_highbd_sad4x4 sse2 neon/; 943 944 specialize qw/aom_highbd_sad4x16 sse2 neon/; 945 specialize qw/aom_highbd_sad16x4 avx2 sse2 neon/; 946 specialize qw/aom_highbd_sad8x32 sse2 neon/; 947 specialize qw/aom_highbd_sad32x8 avx2 sse2 neon/; 948 specialize qw/aom_highbd_sad16x64 avx2 sse2 neon/; 949 specialize qw/aom_highbd_sad64x16 avx2 sse2 neon/; 950 951 specialize qw/aom_highbd_sad_skip_128x128 avx2 neon/; 952 specialize qw/aom_highbd_sad_skip_128x64 avx2 neon/; 953 specialize qw/aom_highbd_sad_skip_64x128 avx2 neon/; 954 specialize qw/aom_highbd_sad_skip_64x64 avx2 sse2 neon/; 955 specialize qw/aom_highbd_sad_skip_64x32 avx2 sse2 neon/; 956 specialize qw/aom_highbd_sad_skip_32x64 avx2 sse2 neon/; 957 specialize qw/aom_highbd_sad_skip_32x32 avx2 sse2 neon/; 958 specialize qw/aom_highbd_sad_skip_32x16 avx2 sse2 neon/; 959 specialize qw/aom_highbd_sad_skip_16x32 avx2 sse2 neon/; 960 specialize qw/aom_highbd_sad_skip_16x16 avx2 sse2 neon/; 961 specialize qw/aom_highbd_sad_skip_16x8 avx2 sse2 neon/; 962 specialize qw/aom_highbd_sad_skip_16x4 neon/; 963 specialize qw/aom_highbd_sad_skip_8x16 sse2 neon/; 964 specialize qw/aom_highbd_sad_skip_8x4 neon/; 965 specialize qw/aom_highbd_sad_skip_8x8 sse2 neon/; 966 specialize qw/aom_highbd_sad_skip_4x8 sse2 neon/; 967 specialize qw/aom_highbd_sad_skip_4x4 neon/; 968 969 specialize qw/aom_highbd_sad_skip_4x16 sse2 neon/; 970 specialize qw/aom_highbd_sad_skip_8x32 sse2 neon/; 971 specialize qw/aom_highbd_sad_skip_32x8 avx2 sse2 neon/; 972 specialize qw/aom_highbd_sad_skip_16x64 avx2 sse2 neon/; 973 specialize qw/aom_highbd_sad_skip_64x16 avx2 sse2 neon/; 974 975 specialize qw/aom_highbd_sad128x128_avg avx2 neon/; 976 specialize qw/aom_highbd_sad128x64_avg avx2 neon/; 977 specialize qw/aom_highbd_sad64x128_avg avx2 neon/; 978 specialize qw/aom_highbd_sad64x64_avg avx2 sse2 neon/; 979 specialize qw/aom_highbd_sad64x32_avg avx2 sse2 neon/; 980 specialize qw/aom_highbd_sad32x64_avg avx2 sse2 neon/; 981 specialize qw/aom_highbd_sad32x32_avg avx2 sse2 neon/; 982 specialize qw/aom_highbd_sad32x16_avg avx2 sse2 neon/; 983 specialize qw/aom_highbd_sad16x32_avg avx2 sse2 neon/; 984 specialize qw/aom_highbd_sad16x16_avg avx2 sse2 neon/; 985 specialize qw/aom_highbd_sad16x8_avg avx2 sse2 neon/; 986 specialize qw/aom_highbd_sad8x16_avg neon/; 987 specialize qw/aom_highbd_sad8x8_avg neon/; 988 specialize qw/aom_highbd_sad8x4_avg sse2 neon/; 989 specialize qw/aom_highbd_sad4x8_avg sse2 neon/; 990 specialize qw/aom_highbd_sad4x4_avg sse2 neon/; 991 992 specialize qw/aom_highbd_sad4x16_avg sse2 neon/; 993 specialize qw/aom_highbd_sad8x32_avg sse2 neon/; 994 specialize qw/aom_highbd_sad16x4_avg avx2 sse2 neon/; 995 specialize qw/aom_highbd_sad16x64_avg avx2 sse2 neon/; 996 specialize qw/aom_highbd_sad32x8_avg avx2 sse2 neon/; 997 specialize qw/aom_highbd_sad64x16_avg avx2 sse2 neon/; 998 } 999 # 1000 # Masked SAD 1001 # 1002 foreach (@encoder_block_sizes) { 1003 ($w, $h) = @$_; 1004 add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask"; 1005 specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 1006 } 1007 1008 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1009 foreach (@encoder_block_sizes) { 1010 ($w, $h) = @$_; 1011 add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask"; 1012 specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/; 1013 } 1014 } 1015 1016 # 1017 # OBMC SAD 1018 # 1019 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1020 foreach (@encoder_block_sizes) { 1021 ($w, $h) = @$_; 1022 add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1023 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1024 specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1025 } 1026 } 1027 1028 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1029 foreach (@encoder_block_sizes) { 1030 ($w, $h) = @$_; 1031 add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask"; 1032 if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) { 1033 specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/; 1034 } 1035 } 1036 } 1037 } 1038 1039 # 1040 # Multi-block SAD, comparing a reference to N independent blocks 1041 # 1042 foreach (@encoder_block_sizes) { 1043 ($w, $h) = @$_; 1044 add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1045 add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1046 add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1047 add_proto qw/void/, "aom_masked_sad${w}x${h}x4d", "const uint8_t *src, int src_stride, const uint8_t *ref[4], int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned sads[4]"; 1048 } 1049 1050 specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/; 1051 specialize qw/aom_sad128x64x4d avx2 sse2 neon neon_dotprod/; 1052 specialize qw/aom_sad64x128x4d avx2 sse2 neon neon_dotprod/; 1053 specialize qw/aom_sad64x64x4d avx2 sse2 neon neon_dotprod/; 1054 specialize qw/aom_sad64x32x4d avx2 sse2 neon neon_dotprod/; 1055 specialize qw/aom_sad32x64x4d avx2 sse2 neon neon_dotprod/; 1056 specialize qw/aom_sad32x32x4d avx2 sse2 neon neon_dotprod/; 1057 specialize qw/aom_sad32x16x4d avx2 sse2 neon neon_dotprod/; 1058 specialize qw/aom_sad16x32x4d avx2 sse2 neon neon_dotprod/; 1059 specialize qw/aom_sad16x16x4d avx2 sse2 neon neon_dotprod/; 1060 specialize qw/aom_sad16x8x4d avx2 sse2 neon neon_dotprod/; 1061 1062 specialize qw/aom_sad8x16x4d sse2 neon/; 1063 specialize qw/aom_sad8x8x4d sse2 neon/; 1064 specialize qw/aom_sad8x4x4d sse2 neon/; 1065 specialize qw/aom_sad4x8x4d sse2 neon/; 1066 specialize qw/aom_sad4x4x4d sse2 neon/; 1067 1068 specialize qw/aom_sad64x16x4d avx2 sse2 neon neon_dotprod/; 1069 specialize qw/aom_sad32x8x4d avx2 sse2 neon neon_dotprod/; 1070 specialize qw/aom_sad16x64x4d avx2 sse2 neon neon_dotprod/; 1071 specialize qw/aom_sad16x4x4d avx2 sse2 neon neon_dotprod/; 1072 specialize qw/aom_sad8x32x4d sse2 neon/; 1073 specialize qw/aom_sad4x16x4d sse2 neon/; 1074 1075 specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/; 1076 specialize qw/aom_sad_skip_128x64x4d avx2 sse2 neon neon_dotprod/; 1077 specialize qw/aom_sad_skip_64x128x4d avx2 sse2 neon neon_dotprod/; 1078 specialize qw/aom_sad_skip_64x64x4d avx2 sse2 neon neon_dotprod/; 1079 specialize qw/aom_sad_skip_64x32x4d avx2 sse2 neon neon_dotprod/; 1080 specialize qw/aom_sad_skip_64x16x4d avx2 sse2 neon neon_dotprod/; 1081 specialize qw/aom_sad_skip_32x64x4d avx2 sse2 neon neon_dotprod/; 1082 specialize qw/aom_sad_skip_32x32x4d avx2 sse2 neon neon_dotprod/; 1083 specialize qw/aom_sad_skip_32x16x4d avx2 sse2 neon neon_dotprod/; 1084 specialize qw/aom_sad_skip_32x8x4d avx2 sse2 neon neon_dotprod/; 1085 1086 specialize qw/aom_sad_skip_16x64x4d avx2 sse2 neon neon_dotprod/; 1087 specialize qw/aom_sad_skip_16x32x4d avx2 sse2 neon neon_dotprod/; 1088 specialize qw/aom_sad_skip_16x16x4d avx2 sse2 neon neon_dotprod/; 1089 specialize qw/aom_sad_skip_16x8x4d avx2 sse2 neon neon_dotprod/; 1090 specialize qw/aom_sad_skip_16x4x4d avx2 neon neon_dotprod/; 1091 specialize qw/aom_sad_skip_8x32x4d sse2 neon/; 1092 specialize qw/aom_sad_skip_8x16x4d sse2 neon/; 1093 specialize qw/aom_sad_skip_8x8x4d sse2 neon/; 1094 specialize qw/aom_sad_skip_8x4x4d neon/; 1095 specialize qw/aom_sad_skip_4x16x4d sse2 neon/; 1096 specialize qw/aom_sad_skip_4x8x4d sse2 neon/; 1097 specialize qw/aom_sad_skip_4x4x4d neon/; 1098 1099 specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/; 1100 specialize qw/aom_sad128x64x3d avx2 neon neon_dotprod/; 1101 specialize qw/aom_sad64x128x3d avx2 neon neon_dotprod/; 1102 specialize qw/aom_sad64x64x3d avx2 neon neon_dotprod/; 1103 specialize qw/aom_sad64x32x3d avx2 neon neon_dotprod/; 1104 specialize qw/aom_sad32x64x3d avx2 neon neon_dotprod/; 1105 specialize qw/aom_sad32x32x3d avx2 neon neon_dotprod/; 1106 specialize qw/aom_sad32x16x3d avx2 neon neon_dotprod/; 1107 specialize qw/aom_sad16x32x3d avx2 neon neon_dotprod/; 1108 specialize qw/aom_sad16x16x3d avx2 neon neon_dotprod/; 1109 specialize qw/aom_sad16x8x3d avx2 neon neon_dotprod/; 1110 specialize qw/aom_sad8x16x3d neon/; 1111 specialize qw/aom_sad8x8x3d neon/; 1112 specialize qw/aom_sad8x4x3d neon/; 1113 specialize qw/aom_sad4x8x3d neon/; 1114 specialize qw/aom_sad4x4x3d neon/; 1115 1116 specialize qw/aom_sad64x16x3d avx2 neon neon_dotprod/; 1117 specialize qw/aom_sad32x8x3d avx2 neon neon_dotprod/; 1118 specialize qw/aom_sad16x64x3d avx2 neon neon_dotprod/; 1119 specialize qw/aom_sad16x4x3d avx2 neon neon_dotprod/; 1120 specialize qw/aom_sad8x32x3d neon/; 1121 specialize qw/aom_sad4x16x3d neon/; 1122 1123 specialize qw/aom_masked_sad128x128x4d ssse3 neon/; 1124 specialize qw/aom_masked_sad128x64x4d ssse3 neon/; 1125 specialize qw/aom_masked_sad64x128x4d ssse3 neon/; 1126 specialize qw/aom_masked_sad64x64x4d ssse3 neon/; 1127 specialize qw/aom_masked_sad64x32x4d ssse3 neon/; 1128 specialize qw/aom_masked_sad64x16x4d ssse3 neon/; 1129 specialize qw/aom_masked_sad32x64x4d ssse3 neon/; 1130 specialize qw/aom_masked_sad32x32x4d ssse3 neon/; 1131 specialize qw/aom_masked_sad32x16x4d ssse3 neon/; 1132 specialize qw/aom_masked_sad32x8x4d ssse3 neon/; 1133 specialize qw/aom_masked_sad16x64x4d ssse3 neon/; 1134 specialize qw/aom_masked_sad16x32x4d ssse3 neon/; 1135 specialize qw/aom_masked_sad16x16x4d ssse3 neon/; 1136 specialize qw/aom_masked_sad16x8x4d ssse3 neon/; 1137 1138 specialize qw/aom_masked_sad8x16x4d ssse3 neon/; 1139 specialize qw/aom_masked_sad8x8x4d ssse3 neon/; 1140 specialize qw/aom_masked_sad8x4x4d ssse3 neon/; 1141 specialize qw/aom_masked_sad4x16x4d ssse3 neon/; 1142 specialize qw/aom_masked_sad4x8x4d ssse3 neon/; 1143 specialize qw/aom_masked_sad4x4x4d ssse3 neon/; 1144 1145 specialize qw/aom_masked_sad4x16x4d ssse3 neon/; 1146 specialize qw/aom_masked_sad16x4x4d ssse3 neon/; 1147 specialize qw/aom_masked_sad8x32x4d ssse3 neon/; 1148 specialize qw/aom_masked_sad32x8x4d ssse3 neon/; 1149 specialize qw/aom_masked_sad64x16x4d ssse3 neon/; 1150 # 1151 # Multi-block SAD, comparing a reference to N independent blocks 1152 # 1153 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1154 foreach (@encoder_block_sizes) { 1155 ($w, $h) = @$_; 1156 add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1157 add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1158 add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]"; 1159 if ($w != 128 && $h != 128) { 1160 specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/; 1161 } 1162 } 1163 specialize qw/aom_highbd_sad128x128x4d avx2 neon/; 1164 specialize qw/aom_highbd_sad128x64x4d avx2 neon/; 1165 specialize qw/aom_highbd_sad64x128x4d avx2 neon/; 1166 specialize qw/aom_highbd_sad64x64x4d sse2 avx2 neon/; 1167 specialize qw/aom_highbd_sad64x32x4d sse2 avx2 neon/; 1168 specialize qw/aom_highbd_sad32x64x4d sse2 avx2 neon/; 1169 specialize qw/aom_highbd_sad32x32x4d sse2 avx2 neon/; 1170 specialize qw/aom_highbd_sad32x16x4d sse2 avx2 neon/; 1171 specialize qw/aom_highbd_sad16x32x4d sse2 avx2 neon/; 1172 specialize qw/aom_highbd_sad16x16x4d sse2 avx2 neon/; 1173 specialize qw/aom_highbd_sad16x8x4d sse2 avx2 neon/; 1174 specialize qw/aom_highbd_sad8x16x4d sse2 neon/; 1175 specialize qw/aom_highbd_sad8x8x4d sse2 neon/; 1176 specialize qw/aom_highbd_sad8x4x4d sse2 neon/; 1177 specialize qw/aom_highbd_sad4x8x4d sse2 neon/; 1178 specialize qw/aom_highbd_sad4x4x4d sse2 neon/; 1179 1180 specialize qw/aom_highbd_sad4x16x4d sse2 neon/; 1181 specialize qw/aom_highbd_sad16x4x4d avx2 sse2 neon/; 1182 specialize qw/aom_highbd_sad8x32x4d sse2 neon/; 1183 specialize qw/aom_highbd_sad32x8x4d avx2 sse2 neon/; 1184 specialize qw/aom_highbd_sad16x64x4d avx2 sse2 neon/; 1185 specialize qw/aom_highbd_sad64x16x4d avx2 sse2 neon/; 1186 1187 specialize qw/aom_highbd_sad_skip_128x128x4d avx2 neon/; 1188 specialize qw/aom_highbd_sad_skip_128x64x4d avx2 neon/; 1189 specialize qw/aom_highbd_sad_skip_64x128x4d avx2 neon/; 1190 specialize qw/aom_highbd_sad_skip_64x64x4d avx2 sse2 neon/; 1191 specialize qw/aom_highbd_sad_skip_64x32x4d avx2 sse2 neon/; 1192 specialize qw/aom_highbd_sad_skip_32x64x4d avx2 sse2 neon/; 1193 specialize qw/aom_highbd_sad_skip_32x32x4d avx2 sse2 neon/; 1194 specialize qw/aom_highbd_sad_skip_32x16x4d avx2 sse2 neon/; 1195 specialize qw/aom_highbd_sad_skip_16x32x4d avx2 sse2 neon/; 1196 specialize qw/aom_highbd_sad_skip_16x16x4d avx2 sse2 neon/; 1197 specialize qw/aom_highbd_sad_skip_16x8x4d avx2 sse2 neon/; 1198 specialize qw/aom_highbd_sad_skip_16x4x4d neon/; 1199 specialize qw/aom_highbd_sad_skip_8x16x4d sse2 neon/; 1200 specialize qw/aom_highbd_sad_skip_8x8x4d sse2 neon/; 1201 specialize qw/aom_highbd_sad_skip_8x4x4d neon/; 1202 specialize qw/aom_highbd_sad_skip_4x8x4d sse2 neon/; 1203 specialize qw/aom_highbd_sad_skip_4x4x4d neon/; 1204 1205 specialize qw/aom_highbd_sad_skip_4x16x4d sse2 neon/; 1206 specialize qw/aom_highbd_sad_skip_8x32x4d sse2 neon/; 1207 specialize qw/aom_highbd_sad_skip_32x8x4d avx2 sse2 neon/; 1208 specialize qw/aom_highbd_sad_skip_16x64x4d avx2 sse2 neon/; 1209 specialize qw/aom_highbd_sad_skip_64x16x4d avx2 sse2 neon/; 1210 1211 specialize qw/aom_highbd_sad128x128x3d avx2 neon/; 1212 specialize qw/aom_highbd_sad128x64x3d avx2 neon/; 1213 specialize qw/aom_highbd_sad64x128x3d avx2 neon/; 1214 specialize qw/aom_highbd_sad64x64x3d avx2 neon/; 1215 specialize qw/aom_highbd_sad64x32x3d avx2 neon/; 1216 specialize qw/aom_highbd_sad32x64x3d avx2 neon/; 1217 specialize qw/aom_highbd_sad32x32x3d avx2 neon/; 1218 specialize qw/aom_highbd_sad32x16x3d avx2 neon/; 1219 specialize qw/aom_highbd_sad16x32x3d avx2 neon/; 1220 specialize qw/aom_highbd_sad16x16x3d avx2 neon/; 1221 specialize qw/aom_highbd_sad16x8x3d avx2 neon/; 1222 specialize qw/aom_highbd_sad8x16x3d neon/; 1223 specialize qw/aom_highbd_sad8x8x3d neon/; 1224 specialize qw/aom_highbd_sad8x4x3d neon/; 1225 specialize qw/aom_highbd_sad4x8x3d neon/; 1226 specialize qw/aom_highbd_sad4x4x3d neon/; 1227 1228 specialize qw/aom_highbd_sad64x16x3d avx2 neon/; 1229 specialize qw/aom_highbd_sad32x8x3d avx2 neon/; 1230 specialize qw/aom_highbd_sad16x64x3d avx2 neon/; 1231 specialize qw/aom_highbd_sad16x4x3d avx2 neon/; 1232 specialize qw/aom_highbd_sad8x32x3d neon/; 1233 specialize qw/aom_highbd_sad4x16x3d neon/; 1234 } 1235 # 1236 # Avg 1237 # 1238 add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p"; 1239 specialize qw/aom_avg_8x8 sse2 neon/; 1240 1241 add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p"; 1242 specialize qw/aom_avg_4x4 sse2 neon/; 1243 1244 add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg"; 1245 specialize qw/aom_avg_8x8_quad avx2 sse2 neon/; 1246 1247 add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1248 specialize qw/aom_minmax_8x8 sse2 neon/; 1249 1250 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1251 add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p"; 1252 specialize qw/aom_highbd_avg_8x8 neon/; 1253 add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p"; 1254 specialize qw/aom_highbd_avg_4x4 neon/; 1255 add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max"; 1256 specialize qw/aom_highbd_minmax_8x8 neon/; 1257 } 1258 1259 add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1260 specialize qw/aom_int_pro_row avx2 sse2 neon/; 1261 1262 add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor"; 1263 specialize qw/aom_int_pro_col avx2 sse2 neon/; 1264 1265 add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl"; 1266 specialize qw/aom_vector_var avx2 sse4_1 neon sve/; 1267 1268 # 1269 # hamadard transform and satd for implmenting temporal dependency model 1270 # 1271 add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1272 specialize qw/aom_hadamard_4x4 sse2 neon/; 1273 1274 add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1275 specialize qw/aom_hadamard_8x8 sse2 neon/; 1276 1277 add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1278 specialize qw/aom_hadamard_16x16 avx2 sse2 neon/; 1279 1280 add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1281 specialize qw/aom_hadamard_32x32 avx2 sse2 neon/; 1282 1283 add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1284 specialize qw/aom_hadamard_lp_8x8 sse2 neon/; 1285 1286 add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1287 specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/; 1288 1289 add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; 1290 specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/; 1291 1292 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1293 add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1294 specialize qw/aom_highbd_hadamard_8x8 avx2 neon/; 1295 1296 add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1297 specialize qw/aom_highbd_hadamard_16x16 avx2 neon/; 1298 1299 add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; 1300 specialize qw/aom_highbd_hadamard_32x32 avx2 neon/; 1301 } 1302 add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length"; 1303 specialize qw/aom_satd neon sse2 avx2/; 1304 1305 add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length"; 1306 specialize qw/aom_satd_lp sse2 avx2 neon/; 1307 1308 1309 # 1310 # Structured Similarity (SSIM) 1311 # 1312 add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1313 specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64"; 1314 1315 if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") { 1316 add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1317 specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64"; 1318 } 1319 1320 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1321 add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr"; 1322 } 1323} # CONFIG_AV1_ENCODER 1324 1325if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { 1326 1327 # 1328 # Specialty Variance 1329 # 1330 add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8"; 1331 specialize qw/aom_get_var_sse_sum_8x8_quad avx2 sse2 neon neon_dotprod/; 1332 1333 add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16"; 1334 specialize qw/aom_get_var_sse_sum_16x16_dual avx2 sse2 neon neon_dotprod/; 1335 1336 add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1337 add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1338 add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1339 add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1340 1341 specialize qw/aom_mse16x16 sse2 avx2 neon neon_dotprod/; 1342 specialize qw/aom_mse16x8 sse2 neon neon_dotprod/; 1343 specialize qw/aom_mse8x16 sse2 neon neon_dotprod/; 1344 specialize qw/aom_mse8x8 sse2 neon neon_dotprod/; 1345 1346 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1347 foreach $bd (8, 10, 12) { 1348 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1349 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1350 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1351 add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; 1352 1353 if ($bd eq 8) { 1354 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/; 1355 specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/; 1356 specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/; 1357 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/; 1358 } else { 1359 specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/; 1360 specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/; 1361 specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/; 1362 specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/; 1363 } 1364 1365 } 1366 } 1367 1368 # 1369 # 1370 # 1371 add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *"; 1372 specialize qw/aom_get_mb_ss sse2 neon/; 1373 1374 # 1375 # Variance / Subpixel Variance / Subpixel Avg Variance 1376 # 1377 add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1378 specialize qw/aom_mse_wxh_16bit sse2 avx2 neon/; 1379 1380 add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h"; 1381 specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/; 1382 1383 foreach (@encoder_block_sizes) { 1384 ($w, $h) = @$_; 1385 add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; 1386 add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1387 add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1388 add_proto qw/uint32_t/, "aom_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param"; 1389 } 1390 specialize qw/aom_variance128x128 sse2 avx2 neon neon_dotprod/; 1391 specialize qw/aom_variance128x64 sse2 avx2 neon neon_dotprod/; 1392 specialize qw/aom_variance64x128 sse2 avx2 neon neon_dotprod/; 1393 specialize qw/aom_variance64x64 sse2 avx2 neon neon_dotprod/; 1394 specialize qw/aom_variance64x32 sse2 avx2 neon neon_dotprod/; 1395 specialize qw/aom_variance32x64 sse2 avx2 neon neon_dotprod/; 1396 specialize qw/aom_variance32x32 sse2 avx2 neon neon_dotprod/; 1397 specialize qw/aom_variance32x16 sse2 avx2 neon neon_dotprod/; 1398 specialize qw/aom_variance16x32 sse2 avx2 neon neon_dotprod/; 1399 specialize qw/aom_variance16x16 sse2 avx2 neon neon_dotprod/; 1400 specialize qw/aom_variance16x8 sse2 avx2 neon neon_dotprod/; 1401 specialize qw/aom_variance8x16 sse2 neon neon_dotprod/; 1402 specialize qw/aom_variance8x8 sse2 neon neon_dotprod/; 1403 specialize qw/aom_variance8x4 sse2 neon neon_dotprod/; 1404 specialize qw/aom_variance4x8 sse2 neon neon_dotprod/; 1405 specialize qw/aom_variance4x4 sse2 neon neon_dotprod/; 1406 1407 specialize qw/aom_sub_pixel_variance128x128 avx2 neon ssse3/; 1408 specialize qw/aom_sub_pixel_variance128x64 avx2 neon ssse3/; 1409 specialize qw/aom_sub_pixel_variance64x128 avx2 neon ssse3/; 1410 specialize qw/aom_sub_pixel_variance64x64 avx2 neon ssse3/; 1411 specialize qw/aom_sub_pixel_variance64x32 avx2 neon ssse3/; 1412 specialize qw/aom_sub_pixel_variance32x64 avx2 neon ssse3/; 1413 specialize qw/aom_sub_pixel_variance32x32 avx2 neon ssse3/; 1414 specialize qw/aom_sub_pixel_variance32x16 avx2 neon ssse3/; 1415 specialize qw/aom_sub_pixel_variance16x32 avx2 neon ssse3/; 1416 specialize qw/aom_sub_pixel_variance16x16 avx2 neon ssse3/; 1417 specialize qw/aom_sub_pixel_variance16x8 avx2 neon ssse3/; 1418 specialize qw/aom_sub_pixel_variance8x16 neon ssse3/; 1419 specialize qw/aom_sub_pixel_variance8x8 neon ssse3/; 1420 specialize qw/aom_sub_pixel_variance8x4 neon ssse3/; 1421 specialize qw/aom_sub_pixel_variance4x8 neon ssse3/; 1422 specialize qw/aom_sub_pixel_variance4x4 neon ssse3/; 1423 1424 specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/; 1425 specialize qw/aom_sub_pixel_avg_variance128x64 avx2 neon ssse3/; 1426 specialize qw/aom_sub_pixel_avg_variance64x128 avx2 neon ssse3/; 1427 specialize qw/aom_sub_pixel_avg_variance64x64 avx2 neon ssse3/; 1428 specialize qw/aom_sub_pixel_avg_variance64x32 avx2 neon ssse3/; 1429 specialize qw/aom_sub_pixel_avg_variance32x64 avx2 neon ssse3/; 1430 specialize qw/aom_sub_pixel_avg_variance32x32 avx2 neon ssse3/; 1431 specialize qw/aom_sub_pixel_avg_variance32x16 avx2 neon ssse3/; 1432 specialize qw/aom_sub_pixel_avg_variance16x32 neon ssse3/; 1433 specialize qw/aom_sub_pixel_avg_variance16x16 neon ssse3/; 1434 specialize qw/aom_sub_pixel_avg_variance16x8 neon ssse3/; 1435 specialize qw/aom_sub_pixel_avg_variance8x16 neon ssse3/; 1436 specialize qw/aom_sub_pixel_avg_variance8x8 neon ssse3/; 1437 specialize qw/aom_sub_pixel_avg_variance8x4 neon ssse3/; 1438 specialize qw/aom_sub_pixel_avg_variance4x8 neon ssse3/; 1439 specialize qw/aom_sub_pixel_avg_variance4x4 neon ssse3/; 1440 1441 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1442 specialize qw/aom_variance4x16 neon neon_dotprod sse2/; 1443 specialize qw/aom_variance16x4 neon neon_dotprod sse2 avx2/; 1444 specialize qw/aom_variance8x32 neon neon_dotprod sse2/; 1445 specialize qw/aom_variance32x8 neon neon_dotprod sse2 avx2/; 1446 specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/; 1447 specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/; 1448 1449 specialize qw/aom_sub_pixel_variance4x16 neon ssse3/; 1450 specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/; 1451 specialize qw/aom_sub_pixel_variance8x32 neon ssse3/; 1452 specialize qw/aom_sub_pixel_variance32x8 neon ssse3/; 1453 specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/; 1454 specialize qw/aom_sub_pixel_variance64x16 neon ssse3/; 1455 specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/; 1456 specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/; 1457 specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/; 1458 specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/; 1459 specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/; 1460 specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/; 1461 1462 specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x16 neon ssse3/; 1463 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x4 neon ssse3/; 1464 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x32 neon ssse3/; 1465 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x8 neon ssse3/; 1466 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x64 neon ssse3/; 1467 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x16 neon ssse3/; 1468 } 1469 1470 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x64 neon ssse3/; 1471 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x32 neon ssse3/; 1472 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x64 neon ssse3/; 1473 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x32 neon ssse3/; 1474 specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x16 neon ssse3/; 1475 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x32 neon ssse3/; 1476 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x16 neon ssse3/; 1477 specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x8 neon ssse3/; 1478 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x16 neon ssse3/; 1479 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x8 neon ssse3/; 1480 specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x4 neon ssse3/; 1481 specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x8 neon ssse3/; 1482 specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x4 neon ssse3/; 1483 1484 specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x128 neon ssse3/; 1485 specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x64 neon ssse3/; 1486 specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x128 neon ssse3/; 1487 1488 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1489 foreach $bd (8, 10, 12) { 1490 foreach (@encoder_block_sizes) { 1491 ($w, $h) = @$_; 1492 add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1493 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse"; 1494 add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred"; 1495 add_proto qw/uint32_t/, "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param"; 1496 } 1497 } 1498 1499 specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/; 1500 specialize qw/aom_highbd_12_variance128x64 sse2 neon sve/; 1501 specialize qw/aom_highbd_12_variance64x128 sse2 neon sve/; 1502 specialize qw/aom_highbd_12_variance64x64 sse2 neon sve/; 1503 specialize qw/aom_highbd_12_variance64x32 sse2 neon sve/; 1504 specialize qw/aom_highbd_12_variance32x64 sse2 neon sve/; 1505 specialize qw/aom_highbd_12_variance32x32 sse2 neon sve/; 1506 specialize qw/aom_highbd_12_variance32x16 sse2 neon sve/; 1507 specialize qw/aom_highbd_12_variance16x32 sse2 neon sve/; 1508 specialize qw/aom_highbd_12_variance16x16 sse2 neon sve/; 1509 specialize qw/aom_highbd_12_variance16x8 sse2 neon sve/; 1510 specialize qw/aom_highbd_12_variance8x16 sse2 neon sve/; 1511 specialize qw/aom_highbd_12_variance8x8 sse2 neon sve/; 1512 specialize qw/aom_highbd_12_variance8x4 neon sve/; 1513 specialize qw/aom_highbd_12_variance4x8 neon sve/; 1514 specialize qw/aom_highbd_12_variance4x4 sse4_1 neon sve/; 1515 1516 specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/; 1517 specialize qw/aom_highbd_10_variance128x64 sse2 avx2 neon sve/; 1518 specialize qw/aom_highbd_10_variance64x128 sse2 avx2 neon sve/; 1519 specialize qw/aom_highbd_10_variance64x64 sse2 avx2 neon sve/; 1520 specialize qw/aom_highbd_10_variance64x32 sse2 avx2 neon sve/; 1521 specialize qw/aom_highbd_10_variance32x64 sse2 avx2 neon sve/; 1522 specialize qw/aom_highbd_10_variance32x32 sse2 avx2 neon sve/; 1523 specialize qw/aom_highbd_10_variance32x16 sse2 avx2 neon sve/; 1524 specialize qw/aom_highbd_10_variance16x32 sse2 avx2 neon sve/; 1525 specialize qw/aom_highbd_10_variance16x16 sse2 avx2 neon sve/; 1526 specialize qw/aom_highbd_10_variance16x8 sse2 avx2 neon sve/; 1527 specialize qw/aom_highbd_10_variance8x16 sse2 avx2 neon sve/; 1528 specialize qw/aom_highbd_10_variance8x8 sse2 avx2 neon sve/; 1529 specialize qw/aom_highbd_10_variance8x4 neon sve/; 1530 specialize qw/aom_highbd_10_variance4x8 neon sve/; 1531 specialize qw/aom_highbd_10_variance4x4 sse4_1 neon sve/; 1532 1533 specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/; 1534 specialize qw/aom_highbd_8_variance128x64 sse2 neon sve/; 1535 specialize qw/aom_highbd_8_variance64x128 sse2 neon sve/; 1536 specialize qw/aom_highbd_8_variance64x64 sse2 neon sve/; 1537 specialize qw/aom_highbd_8_variance64x32 sse2 neon sve/; 1538 specialize qw/aom_highbd_8_variance32x64 sse2 neon sve/; 1539 specialize qw/aom_highbd_8_variance32x32 sse2 neon sve/; 1540 specialize qw/aom_highbd_8_variance32x16 sse2 neon sve/; 1541 specialize qw/aom_highbd_8_variance16x32 sse2 neon sve/; 1542 specialize qw/aom_highbd_8_variance16x16 sse2 neon sve/; 1543 specialize qw/aom_highbd_8_variance16x8 sse2 neon sve/; 1544 specialize qw/aom_highbd_8_variance8x16 sse2 neon sve/; 1545 specialize qw/aom_highbd_8_variance8x8 sse2 neon sve/; 1546 specialize qw/aom_highbd_8_variance8x4 neon sve/; 1547 specialize qw/aom_highbd_8_variance4x8 neon sve/; 1548 specialize qw/aom_highbd_8_variance4x4 sse4_1 neon sve/; 1549 1550 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1551 foreach $bd (8, 10, 12) { 1552 my $avx2 = ($bd == 10) ? "avx2" : ""; 1553 specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/; 1554 specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/; 1555 specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/; 1556 specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/; 1557 specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/; 1558 specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/; 1559 } 1560 } 1561 1562 specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/; 1563 specialize qw/aom_highbd_12_sub_pixel_variance128x64 sse2 neon/; 1564 specialize qw/aom_highbd_12_sub_pixel_variance64x128 sse2 neon/; 1565 specialize qw/aom_highbd_12_sub_pixel_variance64x64 sse2 neon/; 1566 specialize qw/aom_highbd_12_sub_pixel_variance64x32 sse2 neon/; 1567 specialize qw/aom_highbd_12_sub_pixel_variance32x64 sse2 neon/; 1568 specialize qw/aom_highbd_12_sub_pixel_variance32x32 sse2 neon/; 1569 specialize qw/aom_highbd_12_sub_pixel_variance32x16 sse2 neon/; 1570 specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2 neon/; 1571 specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 neon/; 1572 specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 neon/; 1573 specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2 neon/; 1574 specialize qw/aom_highbd_12_sub_pixel_variance8x8 sse2 neon/; 1575 specialize qw/aom_highbd_12_sub_pixel_variance8x4 sse2 neon/; 1576 specialize qw/aom_highbd_12_sub_pixel_variance4x8 neon/; 1577 specialize qw/aom_highbd_12_sub_pixel_variance4x4 sse4_1 neon/; 1578 1579 specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/; 1580 specialize qw/aom_highbd_10_sub_pixel_variance128x64 sse2 avx2 neon/; 1581 specialize qw/aom_highbd_10_sub_pixel_variance64x128 sse2 avx2 neon/; 1582 specialize qw/aom_highbd_10_sub_pixel_variance64x64 sse2 avx2 neon/; 1583 specialize qw/aom_highbd_10_sub_pixel_variance64x32 sse2 avx2 neon/; 1584 specialize qw/aom_highbd_10_sub_pixel_variance32x64 sse2 avx2 neon/; 1585 specialize qw/aom_highbd_10_sub_pixel_variance32x32 sse2 avx2 neon/; 1586 specialize qw/aom_highbd_10_sub_pixel_variance32x16 sse2 avx2 neon/; 1587 specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2 avx2 neon/; 1588 specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2 neon/; 1589 specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2 neon/; 1590 specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2 avx2 neon/; 1591 specialize qw/aom_highbd_10_sub_pixel_variance8x8 sse2 avx2 neon/; 1592 specialize qw/aom_highbd_10_sub_pixel_variance8x4 sse2 neon/; 1593 specialize qw/aom_highbd_10_sub_pixel_variance4x8 neon/; 1594 specialize qw/aom_highbd_10_sub_pixel_variance4x4 sse4_1 neon/; 1595 1596 specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/; 1597 specialize qw/aom_highbd_8_sub_pixel_variance128x64 sse2 neon/; 1598 specialize qw/aom_highbd_8_sub_pixel_variance64x128 sse2 neon/; 1599 specialize qw/aom_highbd_8_sub_pixel_variance64x64 sse2 neon/; 1600 specialize qw/aom_highbd_8_sub_pixel_variance64x32 sse2 neon/; 1601 specialize qw/aom_highbd_8_sub_pixel_variance32x64 sse2 neon/; 1602 specialize qw/aom_highbd_8_sub_pixel_variance32x32 sse2 neon/; 1603 specialize qw/aom_highbd_8_sub_pixel_variance32x16 sse2 neon/; 1604 specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2 neon/; 1605 specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 neon/; 1606 specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 neon/; 1607 specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2 neon/; 1608 specialize qw/aom_highbd_8_sub_pixel_variance8x8 sse2 neon/; 1609 specialize qw/aom_highbd_8_sub_pixel_variance8x4 sse2 neon/; 1610 specialize qw/aom_highbd_8_sub_pixel_variance4x8 neon/; 1611 specialize qw/aom_highbd_8_sub_pixel_variance4x4 sse4_1 neon/; 1612 1613 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1614 foreach $bd (8, 10, 12) { 1615 specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/; 1616 specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/; 1617 specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/; 1618 specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/; 1619 specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/; 1620 specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/; 1621 } 1622 } 1623 1624 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128 neon/; 1625 specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64 neon/; 1626 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128 neon/; 1627 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64 sse2 neon/; 1628 specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32 sse2 neon/; 1629 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64 sse2 neon/; 1630 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32 sse2 neon/; 1631 specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16 sse2 neon/; 1632 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2 neon/; 1633 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2 neon/; 1634 specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2 neon/; 1635 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2 neon/; 1636 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8 sse2 neon/; 1637 specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4 sse2 neon/; 1638 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8 neon/; 1639 specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4 sse4_1 neon/; 1640 1641 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128 neon/; 1642 specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64 neon/; 1643 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128 neon/; 1644 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64 sse2 neon/; 1645 specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32 sse2 neon/; 1646 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64 sse2 neon/; 1647 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32 sse2 neon/; 1648 specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16 sse2 neon/; 1649 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2 neon/; 1650 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2 neon/; 1651 specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2 neon/; 1652 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2 neon/; 1653 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8 sse2 neon/; 1654 specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4 sse2 neon/; 1655 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8 neon/; 1656 specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4 sse4_1 neon/; 1657 1658 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128 neon/; 1659 specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64 neon/; 1660 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128 neon/; 1661 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64 sse2 neon/; 1662 specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32 sse2 neon/; 1663 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64 sse2 neon/; 1664 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32 sse2 neon/; 1665 specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16 sse2 neon/; 1666 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2 neon/; 1667 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2 neon/; 1668 specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2 neon/; 1669 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2 neon/; 1670 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8 sse2 neon/; 1671 specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4 sse2 neon/; 1672 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8 neon/; 1673 specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4 sse4_1 neon/; 1674 1675 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1676 foreach $bd (8, 10, 12) { 1677 specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/; 1678 specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/; 1679 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/; 1680 specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/; 1681 specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/; 1682 specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/; 1683 } 1684 } 1685 1686 foreach $bd (8, 10, 12) { 1687 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x128", qw/neon/; 1688 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x64" , qw/neon/; 1689 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x128" , qw/neon/; 1690 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x64" , qw/neon/; 1691 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x32" , qw/neon/; 1692 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x64" , qw/neon/; 1693 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x32" , qw/neon/; 1694 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x16" , qw/neon/; 1695 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x32" , qw/neon/; 1696 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x16" , qw/neon/; 1697 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x8" , qw/neon/; 1698 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x16" , qw/neon/; 1699 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x8" , qw/neon/; 1700 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x4" , qw/neon/; 1701 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x8" , qw/neon/; 1702 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x4" , qw/neon/; 1703 } 1704 1705 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1706 foreach $bd (8, 10, 12) { 1707 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x16", qw/neon/; 1708 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x8" , qw/neon/; 1709 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x64", qw/neon/; 1710 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x4" , qw/neon/; 1711 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x32" , qw/neon/; 1712 specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x16" , qw/neon/; 1713 } 1714 } 1715 } 1716 # 1717 # Masked Variance / Masked Subpixel Variance 1718 # 1719 foreach (@encoder_block_sizes) { 1720 ($w, $h) = @$_; 1721 add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1722 specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1723 } 1724 1725 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1726 foreach $bd ("_8_", "_10_", "_12_") { 1727 foreach (@encoder_block_sizes) { 1728 ($w, $h) = @$_; 1729 add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse"; 1730 specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/; 1731 } 1732 } 1733 } 1734 1735 # 1736 # OBMC Variance / OBMC Subpixel Variance 1737 # 1738 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1739 foreach (@encoder_block_sizes) { 1740 ($w, $h) = @$_; 1741 add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1742 add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1743 specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/; 1744 specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/; 1745 } 1746 1747 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1748 foreach $bd ("_8_", "_10_", "_12_") { 1749 foreach (@encoder_block_sizes) { 1750 ($w, $h) = @$_; 1751 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1752 add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse"; 1753 specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/; 1754 specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/; 1755 } 1756 } 1757 } 1758 } 1759 1760 # 1761 # Comp Avg 1762 # 1763 add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; 1764 specialize qw/aom_comp_avg_pred avx2 neon/; 1765 1766 add_proto qw/void aom_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param"; 1767 specialize qw/aom_dist_wtd_comp_avg_pred ssse3 neon/; 1768 1769 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1770 add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride"; 1771 specialize qw/aom_highbd_comp_avg_pred neon/; 1772 1773 add_proto qw/void aom_highbd_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param"; 1774 specialize qw/aom_highbd_dist_wtd_comp_avg_pred sse2 neon/; 1775 1776 add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h"; 1777 specialize qw/aom_mse_wxh_16bit_highbd sse2 avx2 neon sve/; 1778 } 1779 1780 add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1781 specialize qw/aom_comp_mask_pred ssse3 avx2 neon/; 1782 1783 if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") { 1784 add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask"; 1785 specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/; 1786 } 1787 1788 # Flow estimation library 1789 if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") { 1790 add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev"; 1791 specialize qw/aom_compute_mean_stddev sse4_1 avx2/; 1792 1793 add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2"; 1794 specialize qw/aom_compute_correlation sse4_1 avx2/; 1795 1796 add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v"; 1797 specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon/; 1798 } 1799 1800} # CONFIG_AV1_ENCODER 1801 18021; 1803