• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1##
2## Copyright (c) 2017, Alliance for Open Media. All rights reserved
3##
4## This source code is subject to the terms of the BSD 2 Clause License and
5## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6## was not distributed with this source code in the LICENSE file, you can
7## obtain it at www.aomedia.org/license/software. If the Alliance for Open
8## Media Patent License 1.0 was not distributed with this source code in the
9## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10##
11sub aom_dsp_forward_decls() {
12print <<EOF
13/*
14 * DSP
15 */
16
17#include "aom/aom_integer.h"
18#include "aom_dsp/aom_dsp_common.h"
19#include "av1/common/blockd.h"
20#include "av1/common/enums.h"
21
22EOF
23}
24forward_decls qw/aom_dsp_forward_decls/;
25
26# optimizations which depend on multiple features
27$avx2_ssse3 = '';
28if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
29  $avx2_ssse3 = 'avx2';
30}
31
32# functions that are 64 bit only.
33$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
34if ($opts{arch} eq "x86_64") {
35  $mmx_x86_64 = 'mmx';
36  $sse2_x86_64 = 'sse2';
37  $ssse3_x86_64 = 'ssse3';
38  $avx_x86_64 = 'avx';
39  $avx2_x86_64 = 'avx2';
40}
41
42@block_widths = (4, 8, 16, 32, 64, 128);
43
44@encoder_block_sizes = ();
45foreach $w (@block_widths) {
46  foreach $h (@block_widths) {
47    push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w);
48  }
49}
50
51if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
52  push @encoder_block_sizes, [4, 16];
53  push @encoder_block_sizes, [16, 4];
54  push @encoder_block_sizes, [8, 32];
55  push @encoder_block_sizes, [32, 8];
56  push @encoder_block_sizes, [16, 64];
57  push @encoder_block_sizes, [64, 16];
58}
59
60@tx_dims = (4, 8, 16, 32, 64);
61@tx_sizes = ();
62foreach $w (@tx_dims) {
63  push @tx_sizes, [$w, $w];
64  foreach $h (@tx_dims) {
65    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
66    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
67  }
68}
69
70@pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/;
71
72#
73# Intra prediction
74#
75
76foreach (@tx_sizes) {
77  ($w, $h) = @$_;
78  foreach $pred_name (@pred_names) {
79    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
80              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
81    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
82        add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
83                  "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
84    }
85  }
86}
87
88specialize qw/aom_dc_top_predictor_4x4 neon sse2/;
89specialize qw/aom_dc_top_predictor_4x8 neon sse2/;
90specialize qw/aom_dc_top_predictor_4x16 neon sse2/;
91specialize qw/aom_dc_top_predictor_8x4 neon sse2/;
92specialize qw/aom_dc_top_predictor_8x8 neon sse2/;
93specialize qw/aom_dc_top_predictor_8x16 neon sse2/;
94specialize qw/aom_dc_top_predictor_8x32 neon sse2/;
95specialize qw/aom_dc_top_predictor_16x4 neon sse2/;
96specialize qw/aom_dc_top_predictor_16x8 neon sse2/;
97specialize qw/aom_dc_top_predictor_16x16 neon sse2/;
98specialize qw/aom_dc_top_predictor_16x32 neon sse2/;
99specialize qw/aom_dc_top_predictor_16x64 neon sse2/;
100specialize qw/aom_dc_top_predictor_32x8 neon sse2/;
101specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/;
102specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/;
103specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/;
104specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/;
105specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/;
106specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/;
107
108specialize qw/aom_dc_left_predictor_4x4 neon sse2/;
109specialize qw/aom_dc_left_predictor_4x8 neon sse2/;
110specialize qw/aom_dc_left_predictor_4x16 neon sse2/;
111specialize qw/aom_dc_left_predictor_8x4 neon sse2/;
112specialize qw/aom_dc_left_predictor_8x8 neon sse2/;
113specialize qw/aom_dc_left_predictor_8x16 neon sse2/;
114specialize qw/aom_dc_left_predictor_8x32 neon sse2/;
115specialize qw/aom_dc_left_predictor_16x4 neon sse2/;
116specialize qw/aom_dc_left_predictor_16x8 neon sse2/;
117specialize qw/aom_dc_left_predictor_16x16 neon sse2/;
118specialize qw/aom_dc_left_predictor_16x32 neon sse2/;
119specialize qw/aom_dc_left_predictor_16x64 neon sse2/;
120specialize qw/aom_dc_left_predictor_32x8 neon sse2/;
121specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/;
122specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/;
123specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/;
124specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/;
125specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/;
126specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/;
127
128specialize qw/aom_dc_128_predictor_4x4 neon sse2/;
129specialize qw/aom_dc_128_predictor_4x8 neon sse2/;
130specialize qw/aom_dc_128_predictor_4x16 neon sse2/;
131specialize qw/aom_dc_128_predictor_8x4 neon sse2/;
132specialize qw/aom_dc_128_predictor_8x8 neon sse2/;
133specialize qw/aom_dc_128_predictor_8x16 neon sse2/;
134specialize qw/aom_dc_128_predictor_8x32 neon sse2/;
135specialize qw/aom_dc_128_predictor_16x4 neon sse2/;
136specialize qw/aom_dc_128_predictor_16x8 neon sse2/;
137specialize qw/aom_dc_128_predictor_16x16 neon sse2/;
138specialize qw/aom_dc_128_predictor_16x32 neon sse2/;
139specialize qw/aom_dc_128_predictor_16x64 neon sse2/;
140specialize qw/aom_dc_128_predictor_32x8 neon sse2/;
141specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/;
142specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/;
143specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/;
144specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/;
145specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/;
146specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/;
147
148specialize qw/aom_v_predictor_4x4 neon sse2/;
149specialize qw/aom_v_predictor_4x8 neon sse2/;
150specialize qw/aom_v_predictor_4x16 neon sse2/;
151specialize qw/aom_v_predictor_8x4 neon sse2/;
152specialize qw/aom_v_predictor_8x8 neon sse2/;
153specialize qw/aom_v_predictor_8x16 neon sse2/;
154specialize qw/aom_v_predictor_8x32 neon sse2/;
155specialize qw/aom_v_predictor_16x4 neon sse2/;
156specialize qw/aom_v_predictor_16x8 neon sse2/;
157specialize qw/aom_v_predictor_16x16 neon sse2/;
158specialize qw/aom_v_predictor_16x32 neon sse2/;
159specialize qw/aom_v_predictor_16x64 neon sse2/;
160specialize qw/aom_v_predictor_32x8 neon sse2/;
161specialize qw/aom_v_predictor_32x16 neon sse2 avx2/;
162specialize qw/aom_v_predictor_32x32 neon sse2 avx2/;
163specialize qw/aom_v_predictor_32x64 neon sse2 avx2/;
164specialize qw/aom_v_predictor_64x16 neon sse2 avx2/;
165specialize qw/aom_v_predictor_64x32 neon sse2 avx2/;
166specialize qw/aom_v_predictor_64x64 neon sse2 avx2/;
167
168specialize qw/aom_h_predictor_4x4 neon sse2/;
169specialize qw/aom_h_predictor_4x8 neon sse2/;
170specialize qw/aom_h_predictor_4x16 neon sse2/;
171specialize qw/aom_h_predictor_8x4 neon sse2/;
172specialize qw/aom_h_predictor_8x8 neon sse2/;
173specialize qw/aom_h_predictor_8x16 neon sse2/;
174specialize qw/aom_h_predictor_8x32 neon sse2/;
175specialize qw/aom_h_predictor_16x4 neon sse2/;
176specialize qw/aom_h_predictor_16x8 neon sse2/;
177specialize qw/aom_h_predictor_16x16 neon sse2/;
178specialize qw/aom_h_predictor_16x32 neon sse2/;
179specialize qw/aom_h_predictor_16x64 neon sse2/;
180specialize qw/aom_h_predictor_32x8 neon sse2/;
181specialize qw/aom_h_predictor_32x16 neon sse2/;
182specialize qw/aom_h_predictor_32x32 neon sse2 avx2/;
183specialize qw/aom_h_predictor_32x64 neon sse2/;
184specialize qw/aom_h_predictor_64x16 neon sse2/;
185specialize qw/aom_h_predictor_64x32 neon sse2/;
186specialize qw/aom_h_predictor_64x64 neon sse2/;
187
188specialize qw/aom_paeth_predictor_4x4 ssse3 neon/;
189specialize qw/aom_paeth_predictor_4x8 ssse3 neon/;
190specialize qw/aom_paeth_predictor_4x16 ssse3 neon/;
191specialize qw/aom_paeth_predictor_8x4 ssse3 neon/;
192specialize qw/aom_paeth_predictor_8x8 ssse3 neon/;
193specialize qw/aom_paeth_predictor_8x16 ssse3 neon/;
194specialize qw/aom_paeth_predictor_8x32 ssse3 neon/;
195specialize qw/aom_paeth_predictor_16x4 ssse3 neon/;
196specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/;
197specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/;
198specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/;
199specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/;
200specialize qw/aom_paeth_predictor_32x8 ssse3 neon/;
201specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/;
202specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/;
203specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/;
204specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/;
205specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/;
206specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/;
207
208specialize qw/aom_smooth_predictor_4x4 neon ssse3/;
209specialize qw/aom_smooth_predictor_4x8 neon ssse3/;
210specialize qw/aom_smooth_predictor_4x16 neon ssse3/;
211specialize qw/aom_smooth_predictor_8x4 neon ssse3/;
212specialize qw/aom_smooth_predictor_8x8 neon ssse3/;
213specialize qw/aom_smooth_predictor_8x16 neon ssse3/;
214specialize qw/aom_smooth_predictor_8x32 neon ssse3/;
215specialize qw/aom_smooth_predictor_16x4 neon ssse3/;
216specialize qw/aom_smooth_predictor_16x8 neon ssse3/;
217specialize qw/aom_smooth_predictor_16x16 neon ssse3/;
218specialize qw/aom_smooth_predictor_16x32 neon ssse3/;
219specialize qw/aom_smooth_predictor_16x64 neon ssse3/;
220specialize qw/aom_smooth_predictor_32x8 neon ssse3/;
221specialize qw/aom_smooth_predictor_32x16 neon ssse3/;
222specialize qw/aom_smooth_predictor_32x32 neon ssse3/;
223specialize qw/aom_smooth_predictor_32x64 neon ssse3/;
224specialize qw/aom_smooth_predictor_64x16 neon ssse3/;
225specialize qw/aom_smooth_predictor_64x32 neon ssse3/;
226specialize qw/aom_smooth_predictor_64x64 neon ssse3/;
227
228specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/;
229specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/;
230specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/;
231specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/;
232specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/;
233specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/;
234specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/;
235specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/;
236specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/;
237specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/;
238specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/;
239specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/;
240specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/;
241specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/;
242specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/;
243specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/;
244specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/;
245specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/;
246specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/;
247
248specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/;
249specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/;
250specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/;
251specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/;
252specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/;
253specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/;
254specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/;
255specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/;
256specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/;
257specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/;
258specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/;
259specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/;
260specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/;
261specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/;
262specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/;
263specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/;
264specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/;
265specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/;
266specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/;
267
268# TODO(yunqingwang): optimize rectangular DC_PRED to replace division
269# by multiply and shift.
270specialize qw/aom_dc_predictor_4x4 neon sse2/;
271specialize qw/aom_dc_predictor_4x8 neon sse2/;
272specialize qw/aom_dc_predictor_4x16 neon sse2/;
273specialize qw/aom_dc_predictor_8x4 neon sse2/;
274specialize qw/aom_dc_predictor_8x8 neon sse2/;
275specialize qw/aom_dc_predictor_8x16 neon sse2/;
276specialize qw/aom_dc_predictor_8x32 neon sse2/;
277specialize qw/aom_dc_predictor_16x4 neon sse2/;
278specialize qw/aom_dc_predictor_16x8 neon sse2/;
279specialize qw/aom_dc_predictor_16x16 neon sse2/;
280specialize qw/aom_dc_predictor_16x32 neon sse2/;
281specialize qw/aom_dc_predictor_16x64 neon sse2/;
282specialize qw/aom_dc_predictor_32x8 neon sse2/;
283specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/;
284specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/;
285specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/;
286specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/;
287specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/;
288specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/;
289if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
290  specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/;
291  specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/;
292  specialize qw/aom_highbd_v_predictor_4x16 neon/;
293  specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/;
294  specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/;
295  specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/;
296  specialize qw/aom_highbd_v_predictor_8x32 neon/;
297  specialize qw/aom_highbd_v_predictor_16x4 neon/;
298  specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/;
299  specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/;
300  specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/;
301  specialize qw/aom_highbd_v_predictor_16x64 neon/;
302  specialize qw/aom_highbd_v_predictor_32x8 neon/;
303  specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/;
304  specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/;
305  specialize qw/aom_highbd_v_predictor_32x64 neon/;
306  specialize qw/aom_highbd_v_predictor_64x16 neon/;
307  specialize qw/aom_highbd_v_predictor_64x32 neon/;
308  specialize qw/aom_highbd_v_predictor_64x64 neon/;
309
310  # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
311  # by multiply and shift.
312  specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/;
313  specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/;
314  specialize qw/aom_highbd_dc_predictor_4x16 neon/;
315  specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/;
316  specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/;
317  specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/;
318  specialize qw/aom_highbd_dc_predictor_8x32 neon/;
319  specialize qw/aom_highbd_dc_predictor_16x4 neon/;
320  specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/;
321  specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/;
322  specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/;
323  specialize qw/aom_highbd_dc_predictor_16x64 neon/;
324  specialize qw/aom_highbd_dc_predictor_32x8 neon/;
325  specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/;
326  specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/;
327  specialize qw/aom_highbd_dc_predictor_32x64 neon/;
328  specialize qw/aom_highbd_dc_predictor_64x16 neon/;
329  specialize qw/aom_highbd_dc_predictor_64x32 neon/;
330  specialize qw/aom_highbd_dc_predictor_64x64 neon/;
331
332  specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/;
333  specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/;
334  specialize qw/aom_highbd_h_predictor_4x16 neon/;
335  specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/;
336  specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/;
337  specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/;
338  specialize qw/aom_highbd_h_predictor_8x32 neon/;
339  specialize qw/aom_highbd_h_predictor_16x4 neon/;
340  specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/;
341  specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/;
342  specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/;
343  specialize qw/aom_highbd_h_predictor_16x64 neon/;
344  specialize qw/aom_highbd_h_predictor_32x8 neon/;
345  specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/;
346  specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/;
347  specialize qw/aom_highbd_h_predictor_32x64 neon/;
348  specialize qw/aom_highbd_h_predictor_64x16 neon/;
349  specialize qw/aom_highbd_h_predictor_64x32 neon/;
350  specialize qw/aom_highbd_h_predictor_64x64 neon/;
351
352  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/;
353  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/;
354  specialize qw/aom_highbd_dc_128_predictor_4x16 neon/;
355  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/;
356  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/;
357  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/;
358  specialize qw/aom_highbd_dc_128_predictor_8x32 neon/;
359  specialize qw/aom_highbd_dc_128_predictor_16x4 neon/;
360  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/;
361  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/;
362  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/;
363  specialize qw/aom_highbd_dc_128_predictor_16x64 neon/;
364  specialize qw/aom_highbd_dc_128_predictor_32x8 neon/;
365  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/;
366  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/;
367  specialize qw/aom_highbd_dc_128_predictor_32x64 neon/;
368  specialize qw/aom_highbd_dc_128_predictor_64x16 neon/;
369  specialize qw/aom_highbd_dc_128_predictor_64x32 neon/;
370  specialize qw/aom_highbd_dc_128_predictor_64x64 neon/;
371
372  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/;
373  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/;
374  specialize qw/aom_highbd_dc_left_predictor_4x16 neon/;
375  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/;
376  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/;
377  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/;
378  specialize qw/aom_highbd_dc_left_predictor_8x32 neon/;
379  specialize qw/aom_highbd_dc_left_predictor_16x4 neon/;
380  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/;
381  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/;
382  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/;
383  specialize qw/aom_highbd_dc_left_predictor_16x64 neon/;
384  specialize qw/aom_highbd_dc_left_predictor_32x8 neon/;
385  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/;
386  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/;
387  specialize qw/aom_highbd_dc_left_predictor_32x64 neon/;
388  specialize qw/aom_highbd_dc_left_predictor_64x16 neon/;
389  specialize qw/aom_highbd_dc_left_predictor_64x32 neon/;
390  specialize qw/aom_highbd_dc_left_predictor_64x64 neon/;
391
392  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/;
393  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/;
394  specialize qw/aom_highbd_dc_top_predictor_4x16 neon/;
395  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/;
396  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/;
397  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/;
398  specialize qw/aom_highbd_dc_top_predictor_8x32 neon/;
399  specialize qw/aom_highbd_dc_top_predictor_16x4 neon/;
400  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/;
401  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/;
402  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/;
403  specialize qw/aom_highbd_dc_top_predictor_16x64 neon/;
404  specialize qw/aom_highbd_dc_top_predictor_32x8 neon/;
405  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/;
406  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/;
407  specialize qw/aom_highbd_dc_top_predictor_32x64 neon/;
408  specialize qw/aom_highbd_dc_top_predictor_64x16 neon/;
409  specialize qw/aom_highbd_dc_top_predictor_64x32 neon/;
410  specialize qw/aom_highbd_dc_top_predictor_64x64 neon/;
411
412  specialize qw/aom_highbd_paeth_predictor_4x4 neon/;
413  specialize qw/aom_highbd_paeth_predictor_4x8 neon/;
414  specialize qw/aom_highbd_paeth_predictor_4x16 neon/;
415  specialize qw/aom_highbd_paeth_predictor_8x4 neon/;
416  specialize qw/aom_highbd_paeth_predictor_8x8 neon/;
417  specialize qw/aom_highbd_paeth_predictor_8x16 neon/;
418  specialize qw/aom_highbd_paeth_predictor_8x32 neon/;
419  specialize qw/aom_highbd_paeth_predictor_16x4 neon/;
420  specialize qw/aom_highbd_paeth_predictor_16x8 neon/;
421  specialize qw/aom_highbd_paeth_predictor_16x16 neon/;
422  specialize qw/aom_highbd_paeth_predictor_16x32 neon/;
423  specialize qw/aom_highbd_paeth_predictor_16x64 neon/;
424  specialize qw/aom_highbd_paeth_predictor_32x8 neon/;
425  specialize qw/aom_highbd_paeth_predictor_32x16 neon/;
426  specialize qw/aom_highbd_paeth_predictor_32x32 neon/;
427  specialize qw/aom_highbd_paeth_predictor_32x64 neon/;
428  specialize qw/aom_highbd_paeth_predictor_64x16 neon/;
429  specialize qw/aom_highbd_paeth_predictor_64x32 neon/;
430  specialize qw/aom_highbd_paeth_predictor_64x64 neon/;
431
432  specialize qw/aom_highbd_smooth_predictor_4x4 neon/;
433  specialize qw/aom_highbd_smooth_predictor_4x8 neon/;
434  specialize qw/aom_highbd_smooth_predictor_4x16 neon/;
435  specialize qw/aom_highbd_smooth_predictor_8x4 neon/;
436  specialize qw/aom_highbd_smooth_predictor_8x8 neon/;
437  specialize qw/aom_highbd_smooth_predictor_8x16 neon/;
438  specialize qw/aom_highbd_smooth_predictor_8x32 neon/;
439  specialize qw/aom_highbd_smooth_predictor_16x4 neon/;
440  specialize qw/aom_highbd_smooth_predictor_16x8 neon/;
441  specialize qw/aom_highbd_smooth_predictor_16x16 neon/;
442  specialize qw/aom_highbd_smooth_predictor_16x32 neon/;
443  specialize qw/aom_highbd_smooth_predictor_16x64 neon/;
444  specialize qw/aom_highbd_smooth_predictor_32x8 neon/;
445  specialize qw/aom_highbd_smooth_predictor_32x16 neon/;
446  specialize qw/aom_highbd_smooth_predictor_32x32 neon/;
447  specialize qw/aom_highbd_smooth_predictor_32x64 neon/;
448  specialize qw/aom_highbd_smooth_predictor_64x16 neon/;
449  specialize qw/aom_highbd_smooth_predictor_64x32 neon/;
450  specialize qw/aom_highbd_smooth_predictor_64x64 neon/;
451
452  specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/;
453  specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/;
454  specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/;
455  specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/;
456  specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/;
457  specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/;
458  specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/;
459  specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/;
460  specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/;
461  specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/;
462  specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/;
463  specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/;
464  specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/;
465  specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/;
466  specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/;
467  specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/;
468  specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/;
469  specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/;
470  specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/;
471
472  specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/;
473  specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/;
474  specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/;
475  specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/;
476  specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/;
477  specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/;
478  specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/;
479  specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/;
480  specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/;
481  specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/;
482  specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/;
483  specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/;
484  specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/;
485  specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/;
486  specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/;
487  specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/;
488  specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/;
489  specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/;
490  specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/;
491}
492#
493# Sub Pixel Filters
494#
495add_proto qw/void aom_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
496add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h";
497add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
498add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
499
500specialize qw/aom_convolve_copy       neon                        sse2 avx2/;
501specialize qw/aom_convolve8_horiz     neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
502specialize qw/aom_convolve8_vert      neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
503
504add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
505specialize qw/aom_scaled_2d ssse3 neon/;
506
507if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
508  add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h";
509  specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/;
510
511  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
512  specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/;
513
514  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
515  specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/;
516}
517
518#
519# Loopfilter
520#
521add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
522specialize qw/aom_lpf_vertical_14 sse2 neon/;
523
524add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
525specialize qw/aom_lpf_vertical_14_dual sse2 neon/;
526
527add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
528specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/;
529
530add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
531specialize qw/aom_lpf_vertical_6 sse2 neon/;
532
533add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
534specialize qw/aom_lpf_vertical_8 sse2 neon/;
535
536add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
537specialize qw/aom_lpf_vertical_8_dual sse2 neon/;
538
539add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
540specialize qw/aom_lpf_vertical_8_quad sse2 neon/;
541
542add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
543specialize qw/aom_lpf_vertical_4 sse2 neon/;
544
545add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
546specialize qw/aom_lpf_vertical_4_dual sse2 neon/;
547
548add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
549specialize qw/aom_lpf_vertical_4_quad sse2 neon/;
550
551add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
552specialize qw/aom_lpf_horizontal_14 sse2 neon/;
553
554add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
555specialize qw/aom_lpf_horizontal_14_dual sse2 neon/;
556
557add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
558specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/;
559
560add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
561specialize qw/aom_lpf_horizontal_6 sse2 neon/;
562
563add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
564specialize qw/aom_lpf_horizontal_6_dual sse2 neon/;
565
566add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
567specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/;
568
569add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
570specialize qw/aom_lpf_horizontal_8 sse2 neon/;
571
572add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
573specialize qw/aom_lpf_horizontal_8_dual sse2 neon/;
574
575add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
576specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/;
577
578add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
579specialize qw/aom_lpf_horizontal_4 sse2 neon/;
580
581add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
582specialize qw/aom_lpf_horizontal_4_dual sse2 neon/;
583
584add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
585specialize qw/aom_lpf_horizontal_4_quad sse2 neon/;
586
587add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
588specialize qw/aom_lpf_vertical_6_dual sse2 neon/;
589
590add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
591specialize qw/aom_lpf_vertical_6_quad sse2 neon/;
592
593if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
594  add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
595  specialize qw/aom_highbd_lpf_vertical_14 neon sse2/;
596
597  add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
598  specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/;
599
600  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
601  specialize qw/aom_highbd_lpf_vertical_8 neon sse2/;
602
603  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
604  specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/;
605
606  add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
607  specialize qw/aom_highbd_lpf_vertical_6 neon sse2/;
608
609  add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
610  specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/;
611
612  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
613  specialize qw/aom_highbd_lpf_vertical_4 neon sse2/;
614
615  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
616  specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/;
617
618  add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
619  specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/;
620
621  add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd";
622  specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/;
623
624  add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
625  specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/;
626
627  add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
628  specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/;
629
630  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
631  specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/;
632
633  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
634  specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/;
635
636  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
637  specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/;
638
639  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
640  specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/;
641}
642
643#
644# Encoder functions.
645#
646
647#
648# Forward transform
649#
650if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
651    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
652    specialize qw/aom_fdct4x4 neon sse2/;
653
654    add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride";
655    specialize qw/aom_fdct4x4_lp neon sse2/;
656
657    if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){
658      # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible
659      # with av1 scan orders, because it does two transposes.
660      add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
661      specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64";
662      # High bit depth
663      if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
664        add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
665        specialize qw/aom_highbd_fdct8x8 sse2/;
666      }
667    }
668    # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation)
669    add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output";
670
671    add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output";
672    specialize qw/aom_fft4x4_float                  sse2/;
673
674    add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output";
675    specialize qw/aom_fft8x8_float avx2             sse2/;
676
677    add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output";
678    specialize qw/aom_fft16x16_float avx2           sse2/;
679
680    add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output";
681    specialize qw/aom_fft32x32_float avx2           sse2/;
682
683    add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output";
684
685    add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output";
686    specialize qw/aom_ifft4x4_float                 sse2/;
687
688    add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output";
689    specialize qw/aom_ifft8x8_float avx2            sse2/;
690
691    add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output";
692    specialize qw/aom_ifft16x16_float avx2          sse2/;
693
694    add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output";
695    specialize qw/aom_ifft32x32_float avx2          sse2/;
696}  # CONFIG_AV1_ENCODER
697
698#
699# Quantization
700#
701if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
702  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
703  specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64";
704
705  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
706  specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64";
707
708  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
709  specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/;
710
711  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
712    add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
713    specialize qw/aom_quantize_b_adaptive sse2 avx2/;
714
715    add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
716    specialize qw/aom_quantize_b_32x32_adaptive sse2/;
717
718    add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
719    specialize qw/aom_quantize_b_64x64_adaptive sse2/;
720  }
721}  # CONFIG_AV1_ENCODER
722
723if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
724  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
725  specialize qw/aom_highbd_quantize_b sse2 avx2 neon/;
726
727  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
728  specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/;
729
730  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
731  specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/;
732
733  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
734    add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
735    specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/;
736
737    add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
738    specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/;
739
740    add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
741    specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/;
742  }
743}  # CONFIG_AV1_ENCODER
744
745#
746# Alpha blending with mask
747#
748add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params";
749specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/;
750add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh";
751add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
752add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
753specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/;
754specialize "aom_blend_a64_hmask", qw/sse4_1 neon/;
755specialize "aom_blend_a64_vmask", qw/sse4_1 neon/;
756
757if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
758  add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd";
759  add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
760  add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
761  add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd";
762  specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/;
763  specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/;
764  specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/;
765  specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/;
766}
767
768if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
769  #
770  # Block subtraction
771  #
772  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
773  specialize qw/aom_subtract_block neon sse2 avx2/;
774
775  add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height";
776  specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/;
777
778  add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum";
779  specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/;
780
781  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
782    add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
783    specialize qw/aom_highbd_subtract_block sse2 neon/;
784
785    add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height";
786    specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/;
787  }
788
789  #
790  # Sum of Squares
791  #
792  add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
793  specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/;
794
795  add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
796  specialize qw/aom_sum_squares_i16 sse2 neon sve/;
797
798  add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height";
799  specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/;
800
801  add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height";
802  specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/;
803
804  #
805  # Single block SAD / Single block Avg SAD
806  #
807  foreach (@encoder_block_sizes) {
808    ($w, $h) = @$_;
809    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
810    add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
811    add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
812    add_proto qw/unsigned int/, "aom_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param";
813  }
814
815  add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum";
816  specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/;
817  specialize qw/aom_sad128x128    avx2 sse2 neon neon_dotprod/;
818  specialize qw/aom_sad128x64     avx2 sse2 neon neon_dotprod/;
819  specialize qw/aom_sad64x128     avx2 sse2 neon neon_dotprod/;
820  specialize qw/aom_sad64x64      avx2 sse2 neon neon_dotprod/;
821  specialize qw/aom_sad64x32      avx2 sse2 neon neon_dotprod/;
822  specialize qw/aom_sad32x64      avx2 sse2 neon neon_dotprod/;
823  specialize qw/aom_sad32x32      avx2 sse2 neon neon_dotprod/;
824  specialize qw/aom_sad32x16      avx2 sse2 neon neon_dotprod/;
825  specialize qw/aom_sad16x32           sse2 neon neon_dotprod/;
826  specialize qw/aom_sad16x16           sse2 neon neon_dotprod/;
827  specialize qw/aom_sad16x8            sse2 neon neon_dotprod/;
828  specialize qw/aom_sad8x16            sse2 neon/;
829  specialize qw/aom_sad8x8             sse2 neon/;
830  specialize qw/aom_sad8x4             sse2 neon/;
831  specialize qw/aom_sad4x8             sse2 neon/;
832  specialize qw/aom_sad4x4             sse2 neon/;
833
834  specialize qw/aom_sad4x16            sse2 neon/;
835  specialize qw/aom_sad16x4            sse2 neon neon_dotprod/;
836  specialize qw/aom_sad8x32            sse2 neon/;
837  specialize qw/aom_sad32x8            sse2 neon neon_dotprod/;
838  specialize qw/aom_sad16x64           sse2 neon neon_dotprod/;
839  specialize qw/aom_sad64x16           sse2 neon neon_dotprod/;
840
841  specialize qw/aom_sad_skip_128x128    avx2 sse2 neon neon_dotprod/;
842  specialize qw/aom_sad_skip_128x64     avx2 sse2 neon neon_dotprod/;
843  specialize qw/aom_sad_skip_64x128     avx2 sse2 neon neon_dotprod/;
844  specialize qw/aom_sad_skip_64x64      avx2 sse2 neon neon_dotprod/;
845  specialize qw/aom_sad_skip_64x32      avx2 sse2 neon neon_dotprod/;
846  specialize qw/aom_sad_skip_32x64      avx2 sse2 neon neon_dotprod/;
847  specialize qw/aom_sad_skip_32x32      avx2 sse2 neon neon_dotprod/;
848  specialize qw/aom_sad_skip_32x16      avx2 sse2 neon neon_dotprod/;
849  specialize qw/aom_sad_skip_16x32           sse2 neon neon_dotprod/;
850  specialize qw/aom_sad_skip_16x16           sse2 neon neon_dotprod/;
851  specialize qw/aom_sad_skip_16x8            sse2 neon neon_dotprod/;
852  specialize qw/aom_sad_skip_8x16            sse2 neon/;
853  specialize qw/aom_sad_skip_8x8             sse2 neon/;
854  specialize qw/aom_sad_skip_8x4                  neon/;
855  specialize qw/aom_sad_skip_4x8             sse2 neon/;
856  specialize qw/aom_sad_skip_4x4                  neon/;
857
858  specialize qw/aom_sad_skip_4x16            sse2 neon/;
859  specialize qw/aom_sad_skip_16x4                 neon neon_dotprod/;
860  specialize qw/aom_sad_skip_8x32            sse2 neon/;
861  specialize qw/aom_sad_skip_32x8            sse2 neon neon_dotprod/;
862  specialize qw/aom_sad_skip_16x64           sse2 neon neon_dotprod/;
863  specialize qw/aom_sad_skip_64x16           sse2 neon neon_dotprod/;
864
865  specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/;
866  specialize qw/aom_sad128x64_avg  avx2 sse2 neon neon_dotprod/;
867  specialize qw/aom_sad64x128_avg  avx2 sse2 neon neon_dotprod/;
868  specialize qw/aom_sad64x64_avg   avx2 sse2 neon neon_dotprod/;
869  specialize qw/aom_sad64x32_avg   avx2 sse2 neon neon_dotprod/;
870  specialize qw/aom_sad32x64_avg   avx2 sse2 neon neon_dotprod/;
871  specialize qw/aom_sad32x32_avg   avx2 sse2 neon neon_dotprod/;
872  specialize qw/aom_sad32x16_avg   avx2 sse2 neon neon_dotprod/;
873  specialize qw/aom_sad16x32_avg        sse2 neon neon_dotprod/;
874  specialize qw/aom_sad16x16_avg        sse2 neon neon_dotprod/;
875  specialize qw/aom_sad16x8_avg         sse2 neon neon_dotprod/;
876  specialize qw/aom_sad8x16_avg         sse2 neon/;
877  specialize qw/aom_sad8x8_avg          sse2 neon/;
878  specialize qw/aom_sad8x4_avg          sse2 neon/;
879  specialize qw/aom_sad4x8_avg          sse2 neon/;
880  specialize qw/aom_sad4x4_avg          sse2 neon/;
881
882  specialize qw/aom_sad4x16_avg         sse2 neon/;
883  specialize qw/aom_sad16x4_avg         sse2 neon neon_dotprod/;
884  specialize qw/aom_sad8x32_avg         sse2 neon/;
885  specialize qw/aom_sad32x8_avg         sse2 neon neon_dotprod/;
886  specialize qw/aom_sad16x64_avg        sse2 neon neon_dotprod/;
887  specialize qw/aom_sad64x16_avg        sse2 neon neon_dotprod/;
888
889  specialize qw/aom_dist_wtd_sad128x128_avg sse2 neon neon_dotprod/;
890  specialize qw/aom_dist_wtd_sad128x64_avg  sse2 neon neon_dotprod/;
891  specialize qw/aom_dist_wtd_sad64x128_avg  sse2 neon neon_dotprod/;
892  specialize qw/aom_dist_wtd_sad64x64_avg   sse2 neon neon_dotprod/;
893  specialize qw/aom_dist_wtd_sad64x32_avg   sse2 neon neon_dotprod/;
894  specialize qw/aom_dist_wtd_sad32x64_avg   sse2 neon neon_dotprod/;
895  specialize qw/aom_dist_wtd_sad32x32_avg   sse2 neon neon_dotprod/;
896  specialize qw/aom_dist_wtd_sad32x16_avg   sse2 neon neon_dotprod/;
897  specialize qw/aom_dist_wtd_sad16x32_avg   sse2 neon neon_dotprod/;
898  specialize qw/aom_dist_wtd_sad16x16_avg   sse2 neon neon_dotprod/;
899  specialize qw/aom_dist_wtd_sad16x8_avg    sse2 neon neon_dotprod/;
900  specialize qw/aom_dist_wtd_sad8x16_avg    sse2 neon/;
901  specialize qw/aom_dist_wtd_sad8x8_avg     sse2 neon/;
902  specialize qw/aom_dist_wtd_sad8x4_avg     sse2 neon/;
903  specialize qw/aom_dist_wtd_sad4x8_avg     sse2 neon/;
904  specialize qw/aom_dist_wtd_sad4x4_avg     sse2 neon/;
905
906  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
907    specialize qw/aom_dist_wtd_sad4x16_avg     sse2 neon/;
908    specialize qw/aom_dist_wtd_sad16x4_avg     sse2 neon neon_dotprod/;
909    specialize qw/aom_dist_wtd_sad8x32_avg     sse2 neon/;
910    specialize qw/aom_dist_wtd_sad32x8_avg     sse2 neon neon_dotprod/;
911    specialize qw/aom_dist_wtd_sad16x64_avg    sse2 neon neon_dotprod/;
912    specialize qw/aom_dist_wtd_sad64x16_avg    sse2 neon neon_dotprod/;
913  }
914
915  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
916    foreach (@encoder_block_sizes) {
917      ($w, $h) = @$_;
918      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
919      add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
920      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
921      if ($w != 128 && $h != 128 && $w != 4) {
922        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
923        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
924      }
925      add_proto qw/unsigned int/, "aom_highbd_dist_wtd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param";
926    }
927    specialize qw/aom_highbd_sad128x128 avx2      neon/;
928    specialize qw/aom_highbd_sad128x64  avx2      neon/;
929    specialize qw/aom_highbd_sad64x128  avx2      neon/;
930    specialize qw/aom_highbd_sad64x64   avx2 sse2 neon/;
931    specialize qw/aom_highbd_sad64x32   avx2 sse2 neon/;
932    specialize qw/aom_highbd_sad32x64   avx2 sse2 neon/;
933    specialize qw/aom_highbd_sad32x32   avx2 sse2 neon/;
934    specialize qw/aom_highbd_sad32x16   avx2 sse2 neon/;
935    specialize qw/aom_highbd_sad16x32   avx2 sse2 neon/;
936    specialize qw/aom_highbd_sad16x16   avx2 sse2 neon/;
937    specialize qw/aom_highbd_sad16x8    avx2 sse2 neon/;
938    specialize qw/aom_highbd_sad8x16         sse2 neon/;
939    specialize qw/aom_highbd_sad8x8          sse2 neon/;
940    specialize qw/aom_highbd_sad8x4          sse2 neon/;
941    specialize qw/aom_highbd_sad4x8          sse2 neon/;
942    specialize qw/aom_highbd_sad4x4          sse2 neon/;
943
944    specialize qw/aom_highbd_sad4x16         sse2 neon/;
945    specialize qw/aom_highbd_sad16x4    avx2 sse2 neon/;
946    specialize qw/aom_highbd_sad8x32         sse2 neon/;
947    specialize qw/aom_highbd_sad32x8    avx2 sse2 neon/;
948    specialize qw/aom_highbd_sad16x64   avx2 sse2 neon/;
949    specialize qw/aom_highbd_sad64x16   avx2 sse2 neon/;
950
951    specialize qw/aom_highbd_sad_skip_128x128 avx2      neon/;
952    specialize qw/aom_highbd_sad_skip_128x64  avx2      neon/;
953    specialize qw/aom_highbd_sad_skip_64x128  avx2      neon/;
954    specialize qw/aom_highbd_sad_skip_64x64   avx2 sse2 neon/;
955    specialize qw/aom_highbd_sad_skip_64x32   avx2 sse2 neon/;
956    specialize qw/aom_highbd_sad_skip_32x64   avx2 sse2 neon/;
957    specialize qw/aom_highbd_sad_skip_32x32   avx2 sse2 neon/;
958    specialize qw/aom_highbd_sad_skip_32x16   avx2 sse2 neon/;
959    specialize qw/aom_highbd_sad_skip_16x32   avx2 sse2 neon/;
960    specialize qw/aom_highbd_sad_skip_16x16   avx2 sse2 neon/;
961    specialize qw/aom_highbd_sad_skip_16x8    avx2 sse2 neon/;
962    specialize qw/aom_highbd_sad_skip_16x4              neon/;
963    specialize qw/aom_highbd_sad_skip_8x16         sse2 neon/;
964    specialize qw/aom_highbd_sad_skip_8x4               neon/;
965    specialize qw/aom_highbd_sad_skip_8x8          sse2 neon/;
966    specialize qw/aom_highbd_sad_skip_4x8          sse2 neon/;
967    specialize qw/aom_highbd_sad_skip_4x4               neon/;
968
969    specialize qw/aom_highbd_sad_skip_4x16         sse2 neon/;
970    specialize qw/aom_highbd_sad_skip_8x32         sse2 neon/;
971    specialize qw/aom_highbd_sad_skip_32x8    avx2 sse2 neon/;
972    specialize qw/aom_highbd_sad_skip_16x64   avx2 sse2 neon/;
973    specialize qw/aom_highbd_sad_skip_64x16   avx2 sse2 neon/;
974
975    specialize qw/aom_highbd_sad128x128_avg avx2      neon/;
976    specialize qw/aom_highbd_sad128x64_avg  avx2      neon/;
977    specialize qw/aom_highbd_sad64x128_avg  avx2      neon/;
978    specialize qw/aom_highbd_sad64x64_avg   avx2 sse2 neon/;
979    specialize qw/aom_highbd_sad64x32_avg   avx2 sse2 neon/;
980    specialize qw/aom_highbd_sad32x64_avg   avx2 sse2 neon/;
981    specialize qw/aom_highbd_sad32x32_avg   avx2 sse2 neon/;
982    specialize qw/aom_highbd_sad32x16_avg   avx2 sse2 neon/;
983    specialize qw/aom_highbd_sad16x32_avg   avx2 sse2 neon/;
984    specialize qw/aom_highbd_sad16x16_avg   avx2 sse2 neon/;
985    specialize qw/aom_highbd_sad16x8_avg    avx2 sse2 neon/;
986    specialize qw/aom_highbd_sad8x16_avg              neon/;
987    specialize qw/aom_highbd_sad8x8_avg               neon/;
988    specialize qw/aom_highbd_sad8x4_avg          sse2 neon/;
989    specialize qw/aom_highbd_sad4x8_avg          sse2 neon/;
990    specialize qw/aom_highbd_sad4x4_avg          sse2 neon/;
991
992    specialize qw/aom_highbd_sad4x16_avg         sse2 neon/;
993    specialize qw/aom_highbd_sad8x32_avg         sse2 neon/;
994    specialize qw/aom_highbd_sad16x4_avg    avx2 sse2 neon/;
995    specialize qw/aom_highbd_sad16x64_avg   avx2 sse2 neon/;
996    specialize qw/aom_highbd_sad32x8_avg    avx2 sse2 neon/;
997    specialize qw/aom_highbd_sad64x16_avg   avx2 sse2 neon/;
998  }
999  #
1000  # Masked SAD
1001  #
1002  foreach (@encoder_block_sizes) {
1003    ($w, $h) = @$_;
1004    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
1005    specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
1006  }
1007
1008  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1009    foreach (@encoder_block_sizes) {
1010      ($w, $h) = @$_;
1011      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
1012      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
1013    }
1014  }
1015
1016  #
1017  # OBMC SAD
1018  #
1019  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1020    foreach (@encoder_block_sizes) {
1021      ($w, $h) = @$_;
1022      add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
1023      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
1024        specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
1025      }
1026    }
1027
1028    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1029      foreach (@encoder_block_sizes) {
1030        ($w, $h) = @$_;
1031        add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
1032        if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
1033          specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
1034        }
1035      }
1036    }
1037  }
1038
1039  #
1040  # Multi-block SAD, comparing a reference to N independent blocks
1041  #
1042  foreach (@encoder_block_sizes) {
1043    ($w, $h) = @$_;
1044    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1045    add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1046    add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1047    add_proto qw/void/, "aom_masked_sad${w}x${h}x4d", "const uint8_t *src, int src_stride, const uint8_t *ref[4], int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned sads[4]";
1048  }
1049
1050  specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/;
1051  specialize qw/aom_sad128x64x4d  avx2 sse2 neon neon_dotprod/;
1052  specialize qw/aom_sad64x128x4d  avx2 sse2 neon neon_dotprod/;
1053  specialize qw/aom_sad64x64x4d   avx2 sse2 neon neon_dotprod/;
1054  specialize qw/aom_sad64x32x4d   avx2 sse2 neon neon_dotprod/;
1055  specialize qw/aom_sad32x64x4d   avx2 sse2 neon neon_dotprod/;
1056  specialize qw/aom_sad32x32x4d   avx2 sse2 neon neon_dotprod/;
1057  specialize qw/aom_sad32x16x4d   avx2 sse2 neon neon_dotprod/;
1058  specialize qw/aom_sad16x32x4d   avx2 sse2 neon neon_dotprod/;
1059  specialize qw/aom_sad16x16x4d   avx2 sse2 neon neon_dotprod/;
1060  specialize qw/aom_sad16x8x4d    avx2 sse2 neon neon_dotprod/;
1061
1062  specialize qw/aom_sad8x16x4d         sse2 neon/;
1063  specialize qw/aom_sad8x8x4d          sse2 neon/;
1064  specialize qw/aom_sad8x4x4d          sse2 neon/;
1065  specialize qw/aom_sad4x8x4d          sse2 neon/;
1066  specialize qw/aom_sad4x4x4d          sse2 neon/;
1067
1068  specialize qw/aom_sad64x16x4d   avx2 sse2 neon neon_dotprod/;
1069  specialize qw/aom_sad32x8x4d    avx2 sse2 neon neon_dotprod/;
1070  specialize qw/aom_sad16x64x4d   avx2 sse2 neon neon_dotprod/;
1071  specialize qw/aom_sad16x4x4d    avx2 sse2 neon neon_dotprod/;
1072  specialize qw/aom_sad8x32x4d         sse2 neon/;
1073  specialize qw/aom_sad4x16x4d         sse2 neon/;
1074
1075  specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/;
1076  specialize qw/aom_sad_skip_128x64x4d  avx2 sse2 neon neon_dotprod/;
1077  specialize qw/aom_sad_skip_64x128x4d  avx2 sse2 neon neon_dotprod/;
1078  specialize qw/aom_sad_skip_64x64x4d   avx2 sse2 neon neon_dotprod/;
1079  specialize qw/aom_sad_skip_64x32x4d   avx2 sse2 neon neon_dotprod/;
1080  specialize qw/aom_sad_skip_64x16x4d   avx2 sse2 neon neon_dotprod/;
1081  specialize qw/aom_sad_skip_32x64x4d   avx2 sse2 neon neon_dotprod/;
1082  specialize qw/aom_sad_skip_32x32x4d   avx2 sse2 neon neon_dotprod/;
1083  specialize qw/aom_sad_skip_32x16x4d   avx2 sse2 neon neon_dotprod/;
1084  specialize qw/aom_sad_skip_32x8x4d    avx2 sse2 neon neon_dotprod/;
1085
1086  specialize qw/aom_sad_skip_16x64x4d   avx2 sse2 neon neon_dotprod/;
1087  specialize qw/aom_sad_skip_16x32x4d   avx2 sse2 neon neon_dotprod/;
1088  specialize qw/aom_sad_skip_16x16x4d   avx2 sse2 neon neon_dotprod/;
1089  specialize qw/aom_sad_skip_16x8x4d    avx2 sse2 neon neon_dotprod/;
1090  specialize qw/aom_sad_skip_16x4x4d    avx2      neon neon_dotprod/;
1091  specialize qw/aom_sad_skip_8x32x4d         sse2 neon/;
1092  specialize qw/aom_sad_skip_8x16x4d         sse2 neon/;
1093  specialize qw/aom_sad_skip_8x8x4d          sse2 neon/;
1094  specialize qw/aom_sad_skip_8x4x4d               neon/;
1095  specialize qw/aom_sad_skip_4x16x4d         sse2 neon/;
1096  specialize qw/aom_sad_skip_4x8x4d          sse2 neon/;
1097  specialize qw/aom_sad_skip_4x4x4d               neon/;
1098
1099  specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/;
1100  specialize qw/aom_sad128x64x3d  avx2 neon neon_dotprod/;
1101  specialize qw/aom_sad64x128x3d  avx2 neon neon_dotprod/;
1102  specialize qw/aom_sad64x64x3d   avx2 neon neon_dotprod/;
1103  specialize qw/aom_sad64x32x3d   avx2 neon neon_dotprod/;
1104  specialize qw/aom_sad32x64x3d   avx2 neon neon_dotprod/;
1105  specialize qw/aom_sad32x32x3d   avx2 neon neon_dotprod/;
1106  specialize qw/aom_sad32x16x3d   avx2 neon neon_dotprod/;
1107  specialize qw/aom_sad16x32x3d   avx2 neon neon_dotprod/;
1108  specialize qw/aom_sad16x16x3d   avx2 neon neon_dotprod/;
1109  specialize qw/aom_sad16x8x3d    avx2 neon neon_dotprod/;
1110  specialize qw/aom_sad8x16x3d         neon/;
1111  specialize qw/aom_sad8x8x3d          neon/;
1112  specialize qw/aom_sad8x4x3d          neon/;
1113  specialize qw/aom_sad4x8x3d          neon/;
1114  specialize qw/aom_sad4x4x3d          neon/;
1115
1116  specialize qw/aom_sad64x16x3d   avx2 neon neon_dotprod/;
1117  specialize qw/aom_sad32x8x3d    avx2 neon neon_dotprod/;
1118  specialize qw/aom_sad16x64x3d   avx2 neon neon_dotprod/;
1119  specialize qw/aom_sad16x4x3d    avx2 neon neon_dotprod/;
1120  specialize qw/aom_sad8x32x3d         neon/;
1121  specialize qw/aom_sad4x16x3d         neon/;
1122
1123  specialize qw/aom_masked_sad128x128x4d  ssse3 neon/;
1124  specialize qw/aom_masked_sad128x64x4d   ssse3 neon/;
1125  specialize qw/aom_masked_sad64x128x4d   ssse3 neon/;
1126  specialize qw/aom_masked_sad64x64x4d    ssse3 neon/;
1127  specialize qw/aom_masked_sad64x32x4d    ssse3 neon/;
1128  specialize qw/aom_masked_sad64x16x4d    ssse3 neon/;
1129  specialize qw/aom_masked_sad32x64x4d    ssse3 neon/;
1130  specialize qw/aom_masked_sad32x32x4d    ssse3 neon/;
1131  specialize qw/aom_masked_sad32x16x4d    ssse3 neon/;
1132  specialize qw/aom_masked_sad32x8x4d     ssse3 neon/;
1133  specialize qw/aom_masked_sad16x64x4d    ssse3 neon/;
1134  specialize qw/aom_masked_sad16x32x4d    ssse3 neon/;
1135  specialize qw/aom_masked_sad16x16x4d    ssse3 neon/;
1136  specialize qw/aom_masked_sad16x8x4d     ssse3 neon/;
1137
1138  specialize qw/aom_masked_sad8x16x4d     ssse3 neon/;
1139  specialize qw/aom_masked_sad8x8x4d      ssse3 neon/;
1140  specialize qw/aom_masked_sad8x4x4d      ssse3 neon/;
1141  specialize qw/aom_masked_sad4x16x4d     ssse3 neon/;
1142  specialize qw/aom_masked_sad4x8x4d      ssse3 neon/;
1143  specialize qw/aom_masked_sad4x4x4d      ssse3 neon/;
1144
1145  specialize qw/aom_masked_sad4x16x4d     ssse3 neon/;
1146  specialize qw/aom_masked_sad16x4x4d     ssse3 neon/;
1147  specialize qw/aom_masked_sad8x32x4d     ssse3 neon/;
1148  specialize qw/aom_masked_sad32x8x4d     ssse3 neon/;
1149  specialize qw/aom_masked_sad64x16x4d    ssse3 neon/;
1150  #
1151  # Multi-block SAD, comparing a reference to N independent blocks
1152  #
1153  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1154    foreach (@encoder_block_sizes) {
1155      ($w, $h) = @$_;
1156      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1157      add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1158      add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1159      if ($w != 128 && $h != 128) {
1160        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
1161      }
1162    }
1163    specialize qw/aom_highbd_sad128x128x4d      avx2 neon/;
1164    specialize qw/aom_highbd_sad128x64x4d       avx2 neon/;
1165    specialize qw/aom_highbd_sad64x128x4d       avx2 neon/;
1166    specialize qw/aom_highbd_sad64x64x4d   sse2 avx2 neon/;
1167    specialize qw/aom_highbd_sad64x32x4d   sse2 avx2 neon/;
1168    specialize qw/aom_highbd_sad32x64x4d   sse2 avx2 neon/;
1169    specialize qw/aom_highbd_sad32x32x4d   sse2 avx2 neon/;
1170    specialize qw/aom_highbd_sad32x16x4d   sse2 avx2 neon/;
1171    specialize qw/aom_highbd_sad16x32x4d   sse2 avx2 neon/;
1172    specialize qw/aom_highbd_sad16x16x4d   sse2 avx2 neon/;
1173    specialize qw/aom_highbd_sad16x8x4d    sse2 avx2 neon/;
1174    specialize qw/aom_highbd_sad8x16x4d    sse2      neon/;
1175    specialize qw/aom_highbd_sad8x8x4d     sse2      neon/;
1176    specialize qw/aom_highbd_sad8x4x4d     sse2      neon/;
1177    specialize qw/aom_highbd_sad4x8x4d     sse2      neon/;
1178    specialize qw/aom_highbd_sad4x4x4d     sse2      neon/;
1179
1180    specialize qw/aom_highbd_sad4x16x4d         sse2 neon/;
1181    specialize qw/aom_highbd_sad16x4x4d    avx2 sse2 neon/;
1182    specialize qw/aom_highbd_sad8x32x4d         sse2 neon/;
1183    specialize qw/aom_highbd_sad32x8x4d    avx2 sse2 neon/;
1184    specialize qw/aom_highbd_sad16x64x4d   avx2 sse2 neon/;
1185    specialize qw/aom_highbd_sad64x16x4d   avx2 sse2 neon/;
1186
1187    specialize qw/aom_highbd_sad_skip_128x128x4d avx2      neon/;
1188    specialize qw/aom_highbd_sad_skip_128x64x4d  avx2      neon/;
1189    specialize qw/aom_highbd_sad_skip_64x128x4d  avx2      neon/;
1190    specialize qw/aom_highbd_sad_skip_64x64x4d   avx2 sse2 neon/;
1191    specialize qw/aom_highbd_sad_skip_64x32x4d   avx2 sse2 neon/;
1192    specialize qw/aom_highbd_sad_skip_32x64x4d   avx2 sse2 neon/;
1193    specialize qw/aom_highbd_sad_skip_32x32x4d   avx2 sse2 neon/;
1194    specialize qw/aom_highbd_sad_skip_32x16x4d   avx2 sse2 neon/;
1195    specialize qw/aom_highbd_sad_skip_16x32x4d   avx2 sse2 neon/;
1196    specialize qw/aom_highbd_sad_skip_16x16x4d   avx2 sse2 neon/;
1197    specialize qw/aom_highbd_sad_skip_16x8x4d    avx2 sse2 neon/;
1198    specialize qw/aom_highbd_sad_skip_16x4x4d              neon/;
1199    specialize qw/aom_highbd_sad_skip_8x16x4d         sse2 neon/;
1200    specialize qw/aom_highbd_sad_skip_8x8x4d          sse2 neon/;
1201    specialize qw/aom_highbd_sad_skip_8x4x4d               neon/;
1202    specialize qw/aom_highbd_sad_skip_4x8x4d          sse2 neon/;
1203    specialize qw/aom_highbd_sad_skip_4x4x4d               neon/;
1204
1205    specialize qw/aom_highbd_sad_skip_4x16x4d         sse2 neon/;
1206    specialize qw/aom_highbd_sad_skip_8x32x4d         sse2 neon/;
1207    specialize qw/aom_highbd_sad_skip_32x8x4d    avx2 sse2 neon/;
1208    specialize qw/aom_highbd_sad_skip_16x64x4d   avx2 sse2 neon/;
1209    specialize qw/aom_highbd_sad_skip_64x16x4d   avx2 sse2 neon/;
1210
1211    specialize qw/aom_highbd_sad128x128x3d avx2 neon/;
1212    specialize qw/aom_highbd_sad128x64x3d  avx2 neon/;
1213    specialize qw/aom_highbd_sad64x128x3d  avx2 neon/;
1214    specialize qw/aom_highbd_sad64x64x3d   avx2 neon/;
1215    specialize qw/aom_highbd_sad64x32x3d   avx2 neon/;
1216    specialize qw/aom_highbd_sad32x64x3d   avx2 neon/;
1217    specialize qw/aom_highbd_sad32x32x3d   avx2 neon/;
1218    specialize qw/aom_highbd_sad32x16x3d   avx2 neon/;
1219    specialize qw/aom_highbd_sad16x32x3d   avx2 neon/;
1220    specialize qw/aom_highbd_sad16x16x3d   avx2 neon/;
1221    specialize qw/aom_highbd_sad16x8x3d    avx2 neon/;
1222    specialize qw/aom_highbd_sad8x16x3d         neon/;
1223    specialize qw/aom_highbd_sad8x8x3d          neon/;
1224    specialize qw/aom_highbd_sad8x4x3d          neon/;
1225    specialize qw/aom_highbd_sad4x8x3d          neon/;
1226    specialize qw/aom_highbd_sad4x4x3d          neon/;
1227
1228    specialize qw/aom_highbd_sad64x16x3d   avx2 neon/;
1229    specialize qw/aom_highbd_sad32x8x3d    avx2 neon/;
1230    specialize qw/aom_highbd_sad16x64x3d   avx2 neon/;
1231    specialize qw/aom_highbd_sad16x4x3d    avx2 neon/;
1232    specialize qw/aom_highbd_sad8x32x3d         neon/;
1233    specialize qw/aom_highbd_sad4x16x3d         neon/;
1234  }
1235  #
1236  # Avg
1237  #
1238  add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p";
1239  specialize qw/aom_avg_8x8 sse2 neon/;
1240
1241  add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p";
1242  specialize qw/aom_avg_4x4 sse2 neon/;
1243
1244  add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg";
1245  specialize qw/aom_avg_8x8_quad avx2 sse2 neon/;
1246
1247  add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
1248  specialize qw/aom_minmax_8x8 sse2 neon/;
1249
1250  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1251    add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p";
1252    specialize qw/aom_highbd_avg_8x8 neon/;
1253    add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p";
1254    specialize qw/aom_highbd_avg_4x4 neon/;
1255    add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
1256    specialize qw/aom_highbd_minmax_8x8 neon/;
1257  }
1258
1259  add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
1260  specialize qw/aom_int_pro_row avx2 sse2 neon/;
1261
1262  add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
1263  specialize qw/aom_int_pro_col avx2 sse2 neon/;
1264
1265  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
1266  specialize qw/aom_vector_var avx2 sse4_1 neon sve/;
1267
1268  #
1269  # hamadard transform and satd for implmenting temporal dependency model
1270  #
1271  add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1272  specialize qw/aom_hadamard_4x4 sse2 neon/;
1273
1274  add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1275  specialize qw/aom_hadamard_8x8 sse2 neon/;
1276
1277  add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1278  specialize qw/aom_hadamard_16x16 avx2 sse2 neon/;
1279
1280  add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1281  specialize qw/aom_hadamard_32x32 avx2 sse2 neon/;
1282
1283  add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1284  specialize qw/aom_hadamard_lp_8x8 sse2 neon/;
1285
1286  add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1287  specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/;
1288
1289  add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1290  specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/;
1291
1292  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1293    add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1294    specialize qw/aom_highbd_hadamard_8x8 avx2 neon/;
1295
1296    add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1297    specialize qw/aom_highbd_hadamard_16x16 avx2 neon/;
1298
1299    add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1300    specialize qw/aom_highbd_hadamard_32x32 avx2 neon/;
1301  }
1302  add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length";
1303  specialize qw/aom_satd neon sse2 avx2/;
1304
1305  add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length";
1306  specialize qw/aom_satd_lp sse2 avx2 neon/;
1307
1308
1309  #
1310  # Structured Similarity (SSIM)
1311  #
1312  add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1313  specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
1314
1315  if (aom_config("CONFIG_INTERNAL_STATS") eq "yes") {
1316    add_proto qw/void aom_ssim_parms_16x16/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1317    specialize qw/aom_ssim_parms_16x16/, "$sse2_x86_64";
1318  }
1319
1320  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1321    add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1322  }
1323}  # CONFIG_AV1_ENCODER
1324
1325if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
1326
1327  #
1328  # Specialty Variance
1329  #
1330  add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8";
1331  specialize qw/aom_get_var_sse_sum_8x8_quad        avx2 sse2 neon neon_dotprod/;
1332
1333  add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16";
1334  specialize qw/aom_get_var_sse_sum_16x16_dual        avx2 sse2 neon neon_dotprod/;
1335
1336  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1337  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1338  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1339  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1340
1341  specialize qw/aom_mse16x16          sse2 avx2 neon neon_dotprod/;
1342  specialize qw/aom_mse16x8           sse2      neon neon_dotprod/;
1343  specialize qw/aom_mse8x16           sse2      neon neon_dotprod/;
1344  specialize qw/aom_mse8x8            sse2      neon neon_dotprod/;
1345
1346  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1347    foreach $bd (8, 10, 12) {
1348      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1349      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1350      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1351      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1352
1353      if ($bd eq 8) {
1354        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/;
1355        specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/;
1356        specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/;
1357        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/;
1358      } else {
1359        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/;
1360        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
1361        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
1362        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
1363      }
1364
1365    }
1366  }
1367
1368  #
1369  #
1370  #
1371  add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
1372  specialize qw/aom_get_mb_ss sse2 neon/;
1373
1374  #
1375  # Variance / Subpixel Variance / Subpixel Avg Variance
1376  #
1377  add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
1378  specialize qw/aom_mse_wxh_16bit  sse2 avx2 neon/;
1379
1380  add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h";
1381  specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/;
1382
1383  foreach (@encoder_block_sizes) {
1384    ($w, $h) = @$_;
1385    add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1386    add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1387    add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1388    add_proto qw/uint32_t/, "aom_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS *jcp_param";
1389  }
1390  specialize qw/aom_variance128x128   sse2 avx2 neon neon_dotprod/;
1391  specialize qw/aom_variance128x64    sse2 avx2 neon neon_dotprod/;
1392  specialize qw/aom_variance64x128    sse2 avx2 neon neon_dotprod/;
1393  specialize qw/aom_variance64x64     sse2 avx2 neon neon_dotprod/;
1394  specialize qw/aom_variance64x32     sse2 avx2 neon neon_dotprod/;
1395  specialize qw/aom_variance32x64     sse2 avx2 neon neon_dotprod/;
1396  specialize qw/aom_variance32x32     sse2 avx2 neon neon_dotprod/;
1397  specialize qw/aom_variance32x16     sse2 avx2 neon neon_dotprod/;
1398  specialize qw/aom_variance16x32     sse2 avx2 neon neon_dotprod/;
1399  specialize qw/aom_variance16x16     sse2 avx2 neon neon_dotprod/;
1400  specialize qw/aom_variance16x8      sse2 avx2 neon neon_dotprod/;
1401  specialize qw/aom_variance8x16      sse2      neon neon_dotprod/;
1402  specialize qw/aom_variance8x8       sse2      neon neon_dotprod/;
1403  specialize qw/aom_variance8x4       sse2      neon neon_dotprod/;
1404  specialize qw/aom_variance4x8       sse2      neon neon_dotprod/;
1405  specialize qw/aom_variance4x4       sse2      neon neon_dotprod/;
1406
1407  specialize qw/aom_sub_pixel_variance128x128   avx2 neon ssse3/;
1408  specialize qw/aom_sub_pixel_variance128x64    avx2 neon ssse3/;
1409  specialize qw/aom_sub_pixel_variance64x128    avx2 neon ssse3/;
1410  specialize qw/aom_sub_pixel_variance64x64     avx2 neon ssse3/;
1411  specialize qw/aom_sub_pixel_variance64x32     avx2 neon ssse3/;
1412  specialize qw/aom_sub_pixel_variance32x64     avx2 neon ssse3/;
1413  specialize qw/aom_sub_pixel_variance32x32     avx2 neon ssse3/;
1414  specialize qw/aom_sub_pixel_variance32x16     avx2 neon ssse3/;
1415  specialize qw/aom_sub_pixel_variance16x32     avx2 neon ssse3/;
1416  specialize qw/aom_sub_pixel_variance16x16     avx2 neon ssse3/;
1417  specialize qw/aom_sub_pixel_variance16x8      avx2 neon ssse3/;
1418  specialize qw/aom_sub_pixel_variance8x16           neon ssse3/;
1419  specialize qw/aom_sub_pixel_variance8x8            neon ssse3/;
1420  specialize qw/aom_sub_pixel_variance8x4            neon ssse3/;
1421  specialize qw/aom_sub_pixel_variance4x8            neon ssse3/;
1422  specialize qw/aom_sub_pixel_variance4x4            neon ssse3/;
1423
1424  specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/;
1425  specialize qw/aom_sub_pixel_avg_variance128x64  avx2 neon ssse3/;
1426  specialize qw/aom_sub_pixel_avg_variance64x128  avx2 neon ssse3/;
1427  specialize qw/aom_sub_pixel_avg_variance64x64   avx2 neon ssse3/;
1428  specialize qw/aom_sub_pixel_avg_variance64x32   avx2 neon ssse3/;
1429  specialize qw/aom_sub_pixel_avg_variance32x64   avx2 neon ssse3/;
1430  specialize qw/aom_sub_pixel_avg_variance32x32   avx2 neon ssse3/;
1431  specialize qw/aom_sub_pixel_avg_variance32x16   avx2 neon ssse3/;
1432  specialize qw/aom_sub_pixel_avg_variance16x32        neon ssse3/;
1433  specialize qw/aom_sub_pixel_avg_variance16x16        neon ssse3/;
1434  specialize qw/aom_sub_pixel_avg_variance16x8         neon ssse3/;
1435  specialize qw/aom_sub_pixel_avg_variance8x16         neon ssse3/;
1436  specialize qw/aom_sub_pixel_avg_variance8x8          neon ssse3/;
1437  specialize qw/aom_sub_pixel_avg_variance8x4          neon ssse3/;
1438  specialize qw/aom_sub_pixel_avg_variance4x8          neon ssse3/;
1439  specialize qw/aom_sub_pixel_avg_variance4x4          neon ssse3/;
1440
1441  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1442    specialize qw/aom_variance4x16  neon neon_dotprod sse2/;
1443    specialize qw/aom_variance16x4  neon neon_dotprod sse2 avx2/;
1444    specialize qw/aom_variance8x32  neon neon_dotprod sse2/;
1445    specialize qw/aom_variance32x8  neon neon_dotprod sse2 avx2/;
1446    specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/;
1447    specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/;
1448
1449    specialize qw/aom_sub_pixel_variance4x16 neon ssse3/;
1450    specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/;
1451    specialize qw/aom_sub_pixel_variance8x32 neon ssse3/;
1452    specialize qw/aom_sub_pixel_variance32x8 neon ssse3/;
1453    specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/;
1454    specialize qw/aom_sub_pixel_variance64x16 neon ssse3/;
1455    specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/;
1456    specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/;
1457    specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/;
1458    specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/;
1459    specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/;
1460    specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/;
1461
1462    specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x16  neon ssse3/;
1463    specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x4  neon ssse3/;
1464    specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x32  neon ssse3/;
1465    specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x8  neon ssse3/;
1466    specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x64 neon ssse3/;
1467    specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x16 neon ssse3/;
1468  }
1469
1470  specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x64 neon ssse3/;
1471  specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x32 neon ssse3/;
1472  specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x64 neon ssse3/;
1473  specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x32 neon ssse3/;
1474  specialize qw/aom_dist_wtd_sub_pixel_avg_variance32x16 neon ssse3/;
1475  specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x32 neon ssse3/;
1476  specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x16 neon ssse3/;
1477  specialize qw/aom_dist_wtd_sub_pixel_avg_variance16x8  neon ssse3/;
1478  specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x16  neon ssse3/;
1479  specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x8   neon ssse3/;
1480  specialize qw/aom_dist_wtd_sub_pixel_avg_variance8x4   neon ssse3/;
1481  specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x8   neon ssse3/;
1482  specialize qw/aom_dist_wtd_sub_pixel_avg_variance4x4   neon ssse3/;
1483
1484  specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x128  neon ssse3/;
1485  specialize qw/aom_dist_wtd_sub_pixel_avg_variance128x64   neon ssse3/;
1486  specialize qw/aom_dist_wtd_sub_pixel_avg_variance64x128   neon ssse3/;
1487
1488  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1489    foreach $bd (8, 10, 12) {
1490      foreach (@encoder_block_sizes) {
1491        ($w, $h) = @$_;
1492        add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1493        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1494        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1495        add_proto qw/uint32_t/, "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param";
1496      }
1497    }
1498
1499    specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/;
1500    specialize qw/aom_highbd_12_variance128x64  sse2 neon sve/;
1501    specialize qw/aom_highbd_12_variance64x128  sse2 neon sve/;
1502    specialize qw/aom_highbd_12_variance64x64   sse2 neon sve/;
1503    specialize qw/aom_highbd_12_variance64x32   sse2 neon sve/;
1504    specialize qw/aom_highbd_12_variance32x64   sse2 neon sve/;
1505    specialize qw/aom_highbd_12_variance32x32   sse2 neon sve/;
1506    specialize qw/aom_highbd_12_variance32x16   sse2 neon sve/;
1507    specialize qw/aom_highbd_12_variance16x32   sse2 neon sve/;
1508    specialize qw/aom_highbd_12_variance16x16   sse2 neon sve/;
1509    specialize qw/aom_highbd_12_variance16x8    sse2 neon sve/;
1510    specialize qw/aom_highbd_12_variance8x16    sse2 neon sve/;
1511    specialize qw/aom_highbd_12_variance8x8     sse2 neon sve/;
1512    specialize qw/aom_highbd_12_variance8x4          neon sve/;
1513    specialize qw/aom_highbd_12_variance4x8          neon sve/;
1514    specialize qw/aom_highbd_12_variance4x4   sse4_1 neon sve/;
1515
1516    specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/;
1517    specialize qw/aom_highbd_10_variance128x64  sse2 avx2 neon sve/;
1518    specialize qw/aom_highbd_10_variance64x128  sse2 avx2 neon sve/;
1519    specialize qw/aom_highbd_10_variance64x64   sse2 avx2 neon sve/;
1520    specialize qw/aom_highbd_10_variance64x32   sse2 avx2 neon sve/;
1521    specialize qw/aom_highbd_10_variance32x64   sse2 avx2 neon sve/;
1522    specialize qw/aom_highbd_10_variance32x32   sse2 avx2 neon sve/;
1523    specialize qw/aom_highbd_10_variance32x16   sse2 avx2 neon sve/;
1524    specialize qw/aom_highbd_10_variance16x32   sse2 avx2 neon sve/;
1525    specialize qw/aom_highbd_10_variance16x16   sse2 avx2 neon sve/;
1526    specialize qw/aom_highbd_10_variance16x8    sse2 avx2 neon sve/;
1527    specialize qw/aom_highbd_10_variance8x16    sse2 avx2 neon sve/;
1528    specialize qw/aom_highbd_10_variance8x8     sse2 avx2 neon sve/;
1529    specialize qw/aom_highbd_10_variance8x4               neon sve/;
1530    specialize qw/aom_highbd_10_variance4x8               neon sve/;
1531    specialize qw/aom_highbd_10_variance4x4   sse4_1      neon sve/;
1532
1533    specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/;
1534    specialize qw/aom_highbd_8_variance128x64  sse2 neon sve/;
1535    specialize qw/aom_highbd_8_variance64x128  sse2 neon sve/;
1536    specialize qw/aom_highbd_8_variance64x64   sse2 neon sve/;
1537    specialize qw/aom_highbd_8_variance64x32   sse2 neon sve/;
1538    specialize qw/aom_highbd_8_variance32x64   sse2 neon sve/;
1539    specialize qw/aom_highbd_8_variance32x32   sse2 neon sve/;
1540    specialize qw/aom_highbd_8_variance32x16   sse2 neon sve/;
1541    specialize qw/aom_highbd_8_variance16x32   sse2 neon sve/;
1542    specialize qw/aom_highbd_8_variance16x16   sse2 neon sve/;
1543    specialize qw/aom_highbd_8_variance16x8    sse2 neon sve/;
1544    specialize qw/aom_highbd_8_variance8x16    sse2 neon sve/;
1545    specialize qw/aom_highbd_8_variance8x8     sse2 neon sve/;
1546    specialize qw/aom_highbd_8_variance8x4          neon sve/;
1547    specialize qw/aom_highbd_8_variance4x8          neon sve/;
1548    specialize qw/aom_highbd_8_variance4x4   sse4_1 neon sve/;
1549
1550    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1551      foreach $bd (8, 10, 12) {
1552        my $avx2 = ($bd == 10) ? "avx2" : "";
1553        specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/;
1554        specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/;
1555        specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/;
1556        specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/;
1557        specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/;
1558        specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/;
1559      }
1560    }
1561
1562    specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/;
1563    specialize qw/aom_highbd_12_sub_pixel_variance128x64  sse2 neon/;
1564    specialize qw/aom_highbd_12_sub_pixel_variance64x128  sse2 neon/;
1565    specialize qw/aom_highbd_12_sub_pixel_variance64x64   sse2 neon/;
1566    specialize qw/aom_highbd_12_sub_pixel_variance64x32   sse2 neon/;
1567    specialize qw/aom_highbd_12_sub_pixel_variance32x64   sse2 neon/;
1568    specialize qw/aom_highbd_12_sub_pixel_variance32x32   sse2 neon/;
1569    specialize qw/aom_highbd_12_sub_pixel_variance32x16   sse2 neon/;
1570    specialize qw/aom_highbd_12_sub_pixel_variance16x32   sse2 neon/;
1571    specialize qw/aom_highbd_12_sub_pixel_variance16x16   sse2 neon/;
1572    specialize qw/aom_highbd_12_sub_pixel_variance16x8    sse2 neon/;
1573    specialize qw/aom_highbd_12_sub_pixel_variance8x16    sse2 neon/;
1574    specialize qw/aom_highbd_12_sub_pixel_variance8x8     sse2 neon/;
1575    specialize qw/aom_highbd_12_sub_pixel_variance8x4     sse2 neon/;
1576    specialize qw/aom_highbd_12_sub_pixel_variance4x8          neon/;
1577    specialize qw/aom_highbd_12_sub_pixel_variance4x4   sse4_1 neon/;
1578
1579    specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/;
1580    specialize qw/aom_highbd_10_sub_pixel_variance128x64  sse2 avx2 neon/;
1581    specialize qw/aom_highbd_10_sub_pixel_variance64x128  sse2 avx2 neon/;
1582    specialize qw/aom_highbd_10_sub_pixel_variance64x64   sse2 avx2 neon/;
1583    specialize qw/aom_highbd_10_sub_pixel_variance64x32   sse2 avx2 neon/;
1584    specialize qw/aom_highbd_10_sub_pixel_variance32x64   sse2 avx2 neon/;
1585    specialize qw/aom_highbd_10_sub_pixel_variance32x32   sse2 avx2 neon/;
1586    specialize qw/aom_highbd_10_sub_pixel_variance32x16   sse2 avx2 neon/;
1587    specialize qw/aom_highbd_10_sub_pixel_variance16x32   sse2 avx2 neon/;
1588    specialize qw/aom_highbd_10_sub_pixel_variance16x16   sse2 avx2 neon/;
1589    specialize qw/aom_highbd_10_sub_pixel_variance16x8    sse2 avx2 neon/;
1590    specialize qw/aom_highbd_10_sub_pixel_variance8x16    sse2 avx2 neon/;
1591    specialize qw/aom_highbd_10_sub_pixel_variance8x8     sse2 avx2 neon/;
1592    specialize qw/aom_highbd_10_sub_pixel_variance8x4     sse2      neon/;
1593    specialize qw/aom_highbd_10_sub_pixel_variance4x8               neon/;
1594    specialize qw/aom_highbd_10_sub_pixel_variance4x4   sse4_1      neon/;
1595
1596    specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/;
1597    specialize qw/aom_highbd_8_sub_pixel_variance128x64  sse2 neon/;
1598    specialize qw/aom_highbd_8_sub_pixel_variance64x128  sse2 neon/;
1599    specialize qw/aom_highbd_8_sub_pixel_variance64x64   sse2 neon/;
1600    specialize qw/aom_highbd_8_sub_pixel_variance64x32   sse2 neon/;
1601    specialize qw/aom_highbd_8_sub_pixel_variance32x64   sse2 neon/;
1602    specialize qw/aom_highbd_8_sub_pixel_variance32x32   sse2 neon/;
1603    specialize qw/aom_highbd_8_sub_pixel_variance32x16   sse2 neon/;
1604    specialize qw/aom_highbd_8_sub_pixel_variance16x32   sse2 neon/;
1605    specialize qw/aom_highbd_8_sub_pixel_variance16x16   sse2 neon/;
1606    specialize qw/aom_highbd_8_sub_pixel_variance16x8    sse2 neon/;
1607    specialize qw/aom_highbd_8_sub_pixel_variance8x16    sse2 neon/;
1608    specialize qw/aom_highbd_8_sub_pixel_variance8x8     sse2 neon/;
1609    specialize qw/aom_highbd_8_sub_pixel_variance8x4     sse2 neon/;
1610    specialize qw/aom_highbd_8_sub_pixel_variance4x8          neon/;
1611    specialize qw/aom_highbd_8_sub_pixel_variance4x4   sse4_1 neon/;
1612
1613    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1614      foreach $bd (8, 10, 12) {
1615        specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/;
1616        specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/;
1617        specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/;
1618        specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/;
1619        specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/;
1620        specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/;
1621      }
1622    }
1623
1624    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128      neon/;
1625    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64       neon/;
1626    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128       neon/;
1627    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64   sse2 neon/;
1628    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32   sse2 neon/;
1629    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64   sse2 neon/;
1630    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32   sse2 neon/;
1631    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16   sse2 neon/;
1632    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32   sse2 neon/;
1633    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16   sse2 neon/;
1634    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8    sse2 neon/;
1635    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16    sse2 neon/;
1636    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8     sse2 neon/;
1637    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4     sse2 neon/;
1638    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8          neon/;
1639    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4   sse4_1 neon/;
1640
1641    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128      neon/;
1642    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64       neon/;
1643    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128       neon/;
1644    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64   sse2 neon/;
1645    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32   sse2 neon/;
1646    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64   sse2 neon/;
1647    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32   sse2 neon/;
1648    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16   sse2 neon/;
1649    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32   sse2 neon/;
1650    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16   sse2 neon/;
1651    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8    sse2 neon/;
1652    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16    sse2 neon/;
1653    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8     sse2 neon/;
1654    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4     sse2 neon/;
1655    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8          neon/;
1656    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4   sse4_1 neon/;
1657
1658    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128      neon/;
1659    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64       neon/;
1660    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128       neon/;
1661    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64   sse2 neon/;
1662    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32   sse2 neon/;
1663    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64   sse2 neon/;
1664    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32   sse2 neon/;
1665    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16   sse2 neon/;
1666    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32   sse2 neon/;
1667    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16   sse2 neon/;
1668    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8    sse2 neon/;
1669    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16    sse2 neon/;
1670    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8     sse2 neon/;
1671    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4     sse2 neon/;
1672    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8          neon/;
1673    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4   sse4_1 neon/;
1674
1675    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1676      foreach $bd (8, 10, 12) {
1677        specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/;
1678        specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/;
1679        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/;
1680        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/;
1681        specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/;
1682        specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/;
1683      }
1684    }
1685
1686    foreach $bd (8, 10, 12) {
1687      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x128", qw/neon/;
1688      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance128x64" , qw/neon/;
1689      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x128" , qw/neon/;
1690      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x64"  , qw/neon/;
1691      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x32"  , qw/neon/;
1692      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x64"  , qw/neon/;
1693      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x32"  , qw/neon/;
1694      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x16"  , qw/neon/;
1695      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x32"  , qw/neon/;
1696      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x16"  , qw/neon/;
1697      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x8"   , qw/neon/;
1698      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x16"   , qw/neon/;
1699      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x8"    , qw/neon/;
1700      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x4"    , qw/neon/;
1701      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x8"    , qw/neon/;
1702      specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x4"    , qw/neon/;
1703    }
1704
1705    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1706      foreach $bd (8, 10, 12) {
1707        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance64x16", qw/neon/;
1708        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance32x8" , qw/neon/;
1709        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x64", qw/neon/;
1710        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance16x4" , qw/neon/;
1711        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance8x32" , qw/neon/;
1712        specialize "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance4x16" , qw/neon/;
1713      }
1714    }
1715  }
1716  #
1717  # Masked Variance / Masked Subpixel Variance
1718  #
1719  foreach (@encoder_block_sizes) {
1720    ($w, $h) = @$_;
1721    add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1722    specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
1723  }
1724
1725  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1726    foreach $bd ("_8_", "_10_", "_12_") {
1727      foreach (@encoder_block_sizes) {
1728        ($w, $h) = @$_;
1729        add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1730        specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
1731      }
1732    }
1733  }
1734
1735  #
1736  # OBMC Variance / OBMC Subpixel Variance
1737  #
1738  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1739    foreach (@encoder_block_sizes) {
1740      ($w, $h) = @$_;
1741      add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1742      add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1743      specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/;
1744      specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/;
1745    }
1746
1747    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1748      foreach $bd ("_8_", "_10_", "_12_") {
1749        foreach (@encoder_block_sizes) {
1750          ($w, $h) = @$_;
1751          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1752          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1753          specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/;
1754          specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/;
1755        }
1756      }
1757    }
1758  }
1759
1760  #
1761  # Comp Avg
1762  #
1763  add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
1764  specialize qw/aom_comp_avg_pred avx2 neon/;
1765
1766  add_proto qw/void aom_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param";
1767  specialize qw/aom_dist_wtd_comp_avg_pred ssse3 neon/;
1768
1769  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1770    add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
1771    specialize qw/aom_highbd_comp_avg_pred neon/;
1772
1773    add_proto qw/void aom_highbd_dist_wtd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const DIST_WTD_COMP_PARAMS *jcp_param";
1774    specialize qw/aom_highbd_dist_wtd_comp_avg_pred sse2 neon/;
1775
1776    add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
1777    specialize qw/aom_mse_wxh_16bit_highbd   sse2 avx2 neon sve/;
1778  }
1779
1780  add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1781  specialize qw/aom_comp_mask_pred ssse3 avx2 neon/;
1782
1783  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1784    add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1785    specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/;
1786  }
1787
1788  # Flow estimation library
1789  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1790    add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev";
1791    specialize qw/aom_compute_mean_stddev sse4_1 avx2/;
1792
1793    add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2";
1794    specialize qw/aom_compute_correlation sse4_1 avx2/;
1795
1796    add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v";
1797    specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon/;
1798  }
1799
1800}  # CONFIG_AV1_ENCODER
1801
18021;
1803