• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1##
2## Copyright (c) 2017, Alliance for Open Media. All rights reserved.
3##
4## This source code is subject to the terms of the BSD 2 Clause License and
5## the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6## was not distributed with this source code in the LICENSE file, you can
7## obtain it at www.aomedia.org/license/software. If the Alliance for Open
8## Media Patent License 1.0 was not distributed with this source code in the
9## PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10##
11sub aom_dsp_forward_decls() {
12print <<EOF
13/*
14 * DSP
15 */
16
17#include "aom/aom_integer.h"
18#include "aom_dsp/aom_dsp_common.h"
19#include "av1/common/blockd.h"
20#include "av1/common/enums.h"
21
22EOF
23}
24forward_decls qw/aom_dsp_forward_decls/;
25
26# optimizations which depend on multiple features
27$avx2_ssse3 = '';
28if ((aom_config("HAVE_AVX2") eq "yes") && (aom_config("HAVE_SSSE3") eq "yes")) {
29  $avx2_ssse3 = 'avx2';
30}
31
32# functions that are 64 bit only.
33$mmx_x86_64 = $sse2_x86_64 = $ssse3_x86_64 = $avx_x86_64 = $avx2_x86_64 = '';
34if ($opts{arch} eq "x86_64") {
35  $mmx_x86_64 = 'mmx';
36  $sse2_x86_64 = 'sse2';
37  $ssse3_x86_64 = 'ssse3';
38  $avx_x86_64 = 'avx';
39  $avx2_x86_64 = 'avx2';
40}
41
42@block_widths = (4, 8, 16, 32, 64, 128);
43
44@encoder_block_sizes = ();
45foreach $w (@block_widths) {
46  foreach $h (@block_widths) {
47    push @encoder_block_sizes, [$w, $h] if ($w <= 2*$h && $h <= 2*$w);
48  }
49}
50
51if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
52  push @encoder_block_sizes, [4, 16];
53  push @encoder_block_sizes, [16, 4];
54  push @encoder_block_sizes, [8, 32];
55  push @encoder_block_sizes, [32, 8];
56  push @encoder_block_sizes, [16, 64];
57  push @encoder_block_sizes, [64, 16];
58}
59
60@tx_dims = (4, 8, 16, 32, 64);
61@tx_sizes = ();
62foreach $w (@tx_dims) {
63  push @tx_sizes, [$w, $w];
64  foreach $h (@tx_dims) {
65    push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 2*$h || $h == 2*$w));
66    if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") ||
67        (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
68      push @tx_sizes, [$w, $h] if ($w >=4 && $h >=4 && ($w == 4*$h || $h == 4*$w));
69    }  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
70  }
71}
72
73@pred_names = qw/dc dc_top dc_left dc_128 v h paeth smooth smooth_v smooth_h/;
74
75#
76# Intra prediction
77#
78
79foreach (@tx_sizes) {
80  ($w, $h) = @$_;
81  foreach $pred_name (@pred_names) {
82    add_proto "void", "aom_${pred_name}_predictor_${w}x${h}",
83              "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
84    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
85        add_proto "void", "aom_highbd_${pred_name}_predictor_${w}x${h}",
86                  "uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd";
87    }
88  }
89}
90
91specialize qw/aom_dc_top_predictor_4x4 neon sse2/;
92specialize qw/aom_dc_top_predictor_4x8 neon sse2/;
93specialize qw/aom_dc_top_predictor_8x4 neon sse2/;
94specialize qw/aom_dc_top_predictor_8x8 neon sse2/;
95specialize qw/aom_dc_top_predictor_8x16 neon sse2/;
96specialize qw/aom_dc_top_predictor_16x8 neon sse2/;
97specialize qw/aom_dc_top_predictor_16x16 neon sse2/;
98specialize qw/aom_dc_top_predictor_16x32 neon sse2/;
99specialize qw/aom_dc_top_predictor_32x16 neon sse2 avx2/;
100specialize qw/aom_dc_top_predictor_32x32 neon sse2 avx2/;
101specialize qw/aom_dc_top_predictor_32x64 neon sse2 avx2/;
102specialize qw/aom_dc_top_predictor_64x32 neon sse2 avx2/;
103specialize qw/aom_dc_top_predictor_64x64 neon sse2 avx2/;
104
105specialize qw/aom_dc_left_predictor_4x4 neon sse2/;
106specialize qw/aom_dc_left_predictor_4x8 neon sse2/;
107specialize qw/aom_dc_left_predictor_8x4 neon sse2/;
108specialize qw/aom_dc_left_predictor_8x8 neon sse2/;
109specialize qw/aom_dc_left_predictor_8x16 neon sse2/;
110specialize qw/aom_dc_left_predictor_16x8 neon sse2/;
111specialize qw/aom_dc_left_predictor_16x16 neon sse2/;
112specialize qw/aom_dc_left_predictor_16x32 neon sse2/;
113specialize qw/aom_dc_left_predictor_32x16 neon sse2 avx2/;
114specialize qw/aom_dc_left_predictor_32x32 neon sse2 avx2/;
115specialize qw/aom_dc_left_predictor_32x64 neon sse2 avx2/;
116specialize qw/aom_dc_left_predictor_64x32 neon sse2 avx2/;
117specialize qw/aom_dc_left_predictor_64x64 neon sse2 avx2/;
118
119specialize qw/aom_dc_128_predictor_4x4 neon sse2/;
120specialize qw/aom_dc_128_predictor_4x8 neon sse2/;
121specialize qw/aom_dc_128_predictor_8x4 neon sse2/;
122specialize qw/aom_dc_128_predictor_8x8 neon sse2/;
123specialize qw/aom_dc_128_predictor_8x16 neon sse2/;
124specialize qw/aom_dc_128_predictor_16x8 neon sse2/;
125specialize qw/aom_dc_128_predictor_16x16 neon sse2/;
126specialize qw/aom_dc_128_predictor_16x32 neon sse2/;
127specialize qw/aom_dc_128_predictor_32x16 neon sse2 avx2/;
128specialize qw/aom_dc_128_predictor_32x32 neon sse2 avx2/;
129specialize qw/aom_dc_128_predictor_32x64 neon sse2 avx2/;
130specialize qw/aom_dc_128_predictor_64x32 neon sse2 avx2/;
131specialize qw/aom_dc_128_predictor_64x64 neon sse2 avx2/;
132
133specialize qw/aom_v_predictor_4x4 neon sse2/;
134specialize qw/aom_v_predictor_4x8 neon sse2/;
135specialize qw/aom_v_predictor_8x4 neon sse2/;
136specialize qw/aom_v_predictor_8x8 neon sse2/;
137specialize qw/aom_v_predictor_8x16 neon sse2/;
138specialize qw/aom_v_predictor_16x8 neon sse2/;
139specialize qw/aom_v_predictor_16x16 neon sse2/;
140specialize qw/aom_v_predictor_16x32 neon sse2/;
141specialize qw/aom_v_predictor_32x16 neon sse2 avx2/;
142specialize qw/aom_v_predictor_32x32 neon sse2 avx2/;
143specialize qw/aom_v_predictor_32x64 neon sse2 avx2/;
144specialize qw/aom_v_predictor_64x32 neon sse2 avx2/;
145specialize qw/aom_v_predictor_64x64 neon sse2 avx2/;
146
147specialize qw/aom_h_predictor_4x4 neon sse2/;
148specialize qw/aom_h_predictor_4x8 neon sse2/;
149specialize qw/aom_h_predictor_8x4 neon sse2/;
150specialize qw/aom_h_predictor_8x8 neon sse2/;
151specialize qw/aom_h_predictor_8x16 neon sse2/;
152specialize qw/aom_h_predictor_16x8 neon sse2/;
153specialize qw/aom_h_predictor_16x16 neon sse2/;
154specialize qw/aom_h_predictor_16x32 neon sse2/;
155specialize qw/aom_h_predictor_32x16 neon sse2/;
156specialize qw/aom_h_predictor_32x32 neon sse2 avx2/;
157specialize qw/aom_h_predictor_32x64 neon sse2/;
158specialize qw/aom_h_predictor_64x32 neon sse2/;
159specialize qw/aom_h_predictor_64x64 neon sse2/;
160
161specialize qw/aom_paeth_predictor_4x4 ssse3 neon/;
162specialize qw/aom_paeth_predictor_4x8 ssse3 neon/;
163specialize qw/aom_paeth_predictor_8x4 ssse3 neon/;
164specialize qw/aom_paeth_predictor_8x8 ssse3 neon/;
165specialize qw/aom_paeth_predictor_8x16 ssse3 neon/;
166specialize qw/aom_paeth_predictor_16x8 ssse3 avx2 neon/;
167specialize qw/aom_paeth_predictor_16x16 ssse3 avx2 neon/;
168specialize qw/aom_paeth_predictor_16x32 ssse3 avx2 neon/;
169specialize qw/aom_paeth_predictor_32x16 ssse3 avx2 neon/;
170specialize qw/aom_paeth_predictor_32x32 ssse3 avx2 neon/;
171specialize qw/aom_paeth_predictor_32x64 ssse3 avx2 neon/;
172specialize qw/aom_paeth_predictor_64x32 ssse3 avx2 neon/;
173specialize qw/aom_paeth_predictor_64x64 ssse3 avx2 neon/;
174
175specialize qw/aom_smooth_predictor_4x4 neon ssse3/;
176specialize qw/aom_smooth_predictor_4x8 neon ssse3/;
177specialize qw/aom_smooth_predictor_8x4 neon ssse3/;
178specialize qw/aom_smooth_predictor_8x8 neon ssse3/;
179specialize qw/aom_smooth_predictor_8x16 neon ssse3/;
180specialize qw/aom_smooth_predictor_16x8 neon ssse3/;
181specialize qw/aom_smooth_predictor_16x16 neon ssse3/;
182specialize qw/aom_smooth_predictor_16x32 neon ssse3/;
183specialize qw/aom_smooth_predictor_32x16 neon ssse3/;
184specialize qw/aom_smooth_predictor_32x32 neon ssse3/;
185specialize qw/aom_smooth_predictor_32x64 neon ssse3/;
186specialize qw/aom_smooth_predictor_64x32 neon ssse3/;
187specialize qw/aom_smooth_predictor_64x64 neon ssse3/;
188
189specialize qw/aom_smooth_v_predictor_4x4 neon ssse3/;
190specialize qw/aom_smooth_v_predictor_4x8 neon ssse3/;
191specialize qw/aom_smooth_v_predictor_8x4 neon ssse3/;
192specialize qw/aom_smooth_v_predictor_8x8 neon ssse3/;
193specialize qw/aom_smooth_v_predictor_8x16 neon ssse3/;
194specialize qw/aom_smooth_v_predictor_16x8 neon ssse3/;
195specialize qw/aom_smooth_v_predictor_16x16 neon ssse3/;
196specialize qw/aom_smooth_v_predictor_16x32 neon ssse3/;
197specialize qw/aom_smooth_v_predictor_32x16 neon ssse3/;
198specialize qw/aom_smooth_v_predictor_32x32 neon ssse3/;
199specialize qw/aom_smooth_v_predictor_32x64 neon ssse3/;
200specialize qw/aom_smooth_v_predictor_64x32 neon ssse3/;
201specialize qw/aom_smooth_v_predictor_64x64 neon ssse3/;
202
203specialize qw/aom_smooth_h_predictor_4x4 neon ssse3/;
204specialize qw/aom_smooth_h_predictor_4x8 neon ssse3/;
205specialize qw/aom_smooth_h_predictor_8x4 neon ssse3/;
206specialize qw/aom_smooth_h_predictor_8x8 neon ssse3/;
207specialize qw/aom_smooth_h_predictor_8x16 neon ssse3/;
208specialize qw/aom_smooth_h_predictor_16x8 neon ssse3/;
209specialize qw/aom_smooth_h_predictor_16x16 neon ssse3/;
210specialize qw/aom_smooth_h_predictor_16x32 neon ssse3/;
211specialize qw/aom_smooth_h_predictor_32x16 neon ssse3/;
212specialize qw/aom_smooth_h_predictor_32x32 neon ssse3/;
213specialize qw/aom_smooth_h_predictor_32x64 neon ssse3/;
214specialize qw/aom_smooth_h_predictor_64x32 neon ssse3/;
215specialize qw/aom_smooth_h_predictor_64x64 neon ssse3/;
216
217# TODO(yunqingwang): optimize rectangular DC_PRED to replace division
218# by multiply and shift.
219specialize qw/aom_dc_predictor_4x4 neon sse2/;
220specialize qw/aom_dc_predictor_4x8 neon sse2/;
221specialize qw/aom_dc_predictor_8x4 neon sse2/;
222specialize qw/aom_dc_predictor_8x8 neon sse2/;
223specialize qw/aom_dc_predictor_8x16 neon sse2/;
224specialize qw/aom_dc_predictor_16x8 neon sse2/;
225specialize qw/aom_dc_predictor_16x16 neon sse2/;
226specialize qw/aom_dc_predictor_16x32 neon sse2/;
227specialize qw/aom_dc_predictor_32x16 neon sse2 avx2/;
228specialize qw/aom_dc_predictor_32x32 neon sse2 avx2/;
229specialize qw/aom_dc_predictor_32x64 neon sse2 avx2/;
230specialize qw/aom_dc_predictor_64x64 neon sse2 avx2/;
231specialize qw/aom_dc_predictor_64x32 neon sse2 avx2/;
232
233
234if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") || (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
235  specialize qw/aom_dc_top_predictor_4x16 neon sse2/;
236  specialize qw/aom_dc_top_predictor_8x32 neon sse2/;
237  specialize qw/aom_dc_top_predictor_16x4 neon sse2/;
238  specialize qw/aom_dc_top_predictor_16x64 neon sse2/;
239  specialize qw/aom_dc_top_predictor_32x8 neon sse2/;
240  specialize qw/aom_dc_top_predictor_64x16 neon sse2 avx2/;
241
242  specialize qw/aom_dc_left_predictor_4x16 neon sse2/;
243  specialize qw/aom_dc_left_predictor_8x32 neon sse2/;
244  specialize qw/aom_dc_left_predictor_16x4 neon sse2/;
245  specialize qw/aom_dc_left_predictor_16x64 neon sse2/;
246  specialize qw/aom_dc_left_predictor_32x8 neon sse2/;
247  specialize qw/aom_dc_left_predictor_64x16 neon sse2 avx2/;
248
249  specialize qw/aom_dc_128_predictor_4x16 neon sse2/;
250  specialize qw/aom_dc_128_predictor_8x32 neon sse2/;
251  specialize qw/aom_dc_128_predictor_16x4 neon sse2/;
252  specialize qw/aom_dc_128_predictor_16x64 neon sse2/;
253  specialize qw/aom_dc_128_predictor_32x8 neon sse2/;
254  specialize qw/aom_dc_128_predictor_64x16 neon sse2 avx2/;
255
256  specialize qw/aom_v_predictor_4x16 neon sse2/;
257  specialize qw/aom_v_predictor_8x32 neon sse2/;
258  specialize qw/aom_v_predictor_16x4 neon sse2/;
259  specialize qw/aom_v_predictor_16x64 neon sse2/;
260  specialize qw/aom_v_predictor_32x8 neon sse2/;
261  specialize qw/aom_v_predictor_64x16 neon sse2 avx2/;
262
263  specialize qw/aom_h_predictor_4x16 neon sse2/;
264  specialize qw/aom_h_predictor_8x32 neon sse2/;
265  specialize qw/aom_h_predictor_16x4 neon sse2/;
266  specialize qw/aom_h_predictor_16x64 neon sse2/;
267  specialize qw/aom_h_predictor_32x8 neon sse2/;
268  specialize qw/aom_h_predictor_64x16 neon sse2/;
269
270  specialize qw/aom_paeth_predictor_4x16 ssse3 neon/;
271  specialize qw/aom_paeth_predictor_8x32 ssse3 neon/;
272  specialize qw/aom_paeth_predictor_16x4 ssse3 neon/;
273  specialize qw/aom_paeth_predictor_16x64 ssse3 avx2 neon/;
274  specialize qw/aom_paeth_predictor_32x8 ssse3 neon/;
275  specialize qw/aom_paeth_predictor_64x16 ssse3 avx2 neon/;
276
277  specialize qw/aom_smooth_predictor_4x16 neon ssse3/;
278  specialize qw/aom_smooth_predictor_8x32 neon ssse3/;
279  specialize qw/aom_smooth_predictor_16x4 neon ssse3/;
280  specialize qw/aom_smooth_predictor_16x64 neon ssse3/;
281  specialize qw/aom_smooth_predictor_32x8 neon ssse3/;
282  specialize qw/aom_smooth_predictor_64x16 neon ssse3/;
283
284  specialize qw/aom_smooth_v_predictor_4x16 neon ssse3/;
285  specialize qw/aom_smooth_v_predictor_8x32 neon ssse3/;
286  specialize qw/aom_smooth_v_predictor_16x4 neon ssse3/;
287  specialize qw/aom_smooth_v_predictor_16x64 neon ssse3/;
288  specialize qw/aom_smooth_v_predictor_32x8 neon ssse3/;
289  specialize qw/aom_smooth_v_predictor_64x16 neon ssse3/;
290
291  specialize qw/aom_smooth_h_predictor_4x16 neon ssse3/;
292  specialize qw/aom_smooth_h_predictor_8x32 neon ssse3/;
293  specialize qw/aom_smooth_h_predictor_16x4 neon ssse3/;
294  specialize qw/aom_smooth_h_predictor_16x64 neon ssse3/;
295  specialize qw/aom_smooth_h_predictor_32x8 neon ssse3/;
296  specialize qw/aom_smooth_h_predictor_64x16 neon ssse3/;
297
298  specialize qw/aom_dc_predictor_4x16 neon sse2/;
299  specialize qw/aom_dc_predictor_8x32 neon sse2/;
300  specialize qw/aom_dc_predictor_16x4 neon sse2/;
301  specialize qw/aom_dc_predictor_16x64 neon sse2/;
302  specialize qw/aom_dc_predictor_32x8 neon sse2/;
303  specialize qw/aom_dc_predictor_64x16 neon sse2 avx2/;
304}  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
305
306if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
307  specialize qw/aom_highbd_v_predictor_4x4 sse2 neon/;
308  specialize qw/aom_highbd_v_predictor_4x8 sse2 neon/;
309  specialize qw/aom_highbd_v_predictor_8x4 sse2 neon/;
310  specialize qw/aom_highbd_v_predictor_8x8 sse2 neon/;
311  specialize qw/aom_highbd_v_predictor_8x16 sse2 neon/;
312  specialize qw/aom_highbd_v_predictor_16x8 sse2 neon/;
313  specialize qw/aom_highbd_v_predictor_16x16 sse2 neon/;
314  specialize qw/aom_highbd_v_predictor_16x32 sse2 neon/;
315  specialize qw/aom_highbd_v_predictor_32x16 sse2 neon/;
316  specialize qw/aom_highbd_v_predictor_32x32 sse2 neon/;
317  specialize qw/aom_highbd_v_predictor_32x64 neon/;
318  specialize qw/aom_highbd_v_predictor_64x32 neon/;
319  specialize qw/aom_highbd_v_predictor_64x64 neon/;
320
321  # TODO(yunqingwang): optimize rectangular DC_PRED to replace division
322  # by multiply and shift.
323  specialize qw/aom_highbd_dc_predictor_4x4 sse2 neon/;
324  specialize qw/aom_highbd_dc_predictor_4x8 sse2 neon/;
325  specialize qw/aom_highbd_dc_predictor_8x4 sse2 neon/;
326  specialize qw/aom_highbd_dc_predictor_8x8 sse2 neon/;
327  specialize qw/aom_highbd_dc_predictor_8x16 sse2 neon/;
328  specialize qw/aom_highbd_dc_predictor_16x8 sse2 neon/;
329  specialize qw/aom_highbd_dc_predictor_16x16 sse2 neon/;
330  specialize qw/aom_highbd_dc_predictor_16x32 sse2 neon/;
331  specialize qw/aom_highbd_dc_predictor_32x16 sse2 neon/;
332  specialize qw/aom_highbd_dc_predictor_32x32 sse2 neon/;
333  specialize qw/aom_highbd_dc_predictor_32x64 neon/;
334  specialize qw/aom_highbd_dc_predictor_64x32 neon/;
335  specialize qw/aom_highbd_dc_predictor_64x64 neon/;
336
337  specialize qw/aom_highbd_h_predictor_4x4 sse2 neon/;
338  specialize qw/aom_highbd_h_predictor_4x8 sse2 neon/;
339  specialize qw/aom_highbd_h_predictor_8x4 sse2 neon/;
340  specialize qw/aom_highbd_h_predictor_8x8 sse2 neon/;
341  specialize qw/aom_highbd_h_predictor_8x16 sse2 neon/;
342  specialize qw/aom_highbd_h_predictor_16x8 sse2 neon/;
343  specialize qw/aom_highbd_h_predictor_16x16 sse2 neon/;
344  specialize qw/aom_highbd_h_predictor_16x32 sse2 neon/;
345  specialize qw/aom_highbd_h_predictor_32x16 sse2 neon/;
346  specialize qw/aom_highbd_h_predictor_32x32 sse2 neon/;
347  specialize qw/aom_highbd_h_predictor_32x64 neon/;
348  specialize qw/aom_highbd_h_predictor_64x32 neon/;
349  specialize qw/aom_highbd_h_predictor_64x64 neon/;
350
351  specialize qw/aom_highbd_dc_128_predictor_4x4 sse2 neon/;
352  specialize qw/aom_highbd_dc_128_predictor_4x8 sse2 neon/;
353  specialize qw/aom_highbd_dc_128_predictor_8x4 sse2 neon/;
354  specialize qw/aom_highbd_dc_128_predictor_8x8 sse2 neon/;
355  specialize qw/aom_highbd_dc_128_predictor_8x16 sse2 neon/;
356  specialize qw/aom_highbd_dc_128_predictor_16x8 sse2 neon/;
357  specialize qw/aom_highbd_dc_128_predictor_16x16 sse2 neon/;
358  specialize qw/aom_highbd_dc_128_predictor_16x32 sse2 neon/;
359  specialize qw/aom_highbd_dc_128_predictor_32x16 sse2 neon/;
360  specialize qw/aom_highbd_dc_128_predictor_32x32 sse2 neon/;
361  specialize qw/aom_highbd_dc_128_predictor_32x64 neon/;
362  specialize qw/aom_highbd_dc_128_predictor_64x32 neon/;
363  specialize qw/aom_highbd_dc_128_predictor_64x64 neon/;
364
365  specialize qw/aom_highbd_dc_left_predictor_4x4 sse2 neon/;
366  specialize qw/aom_highbd_dc_left_predictor_4x8 sse2 neon/;
367  specialize qw/aom_highbd_dc_left_predictor_8x4 sse2 neon/;
368  specialize qw/aom_highbd_dc_left_predictor_8x8 sse2 neon/;
369  specialize qw/aom_highbd_dc_left_predictor_8x16 sse2 neon/;
370  specialize qw/aom_highbd_dc_left_predictor_16x8 sse2 neon/;
371  specialize qw/aom_highbd_dc_left_predictor_16x16 sse2 neon/;
372  specialize qw/aom_highbd_dc_left_predictor_16x32 sse2 neon/;
373  specialize qw/aom_highbd_dc_left_predictor_32x16 sse2 neon/;
374  specialize qw/aom_highbd_dc_left_predictor_32x32 sse2 neon/;
375  specialize qw/aom_highbd_dc_left_predictor_32x64 neon/;
376  specialize qw/aom_highbd_dc_left_predictor_64x32 neon/;
377  specialize qw/aom_highbd_dc_left_predictor_64x64 neon/;
378
379  specialize qw/aom_highbd_dc_top_predictor_4x4 sse2 neon/;
380  specialize qw/aom_highbd_dc_top_predictor_4x8 sse2 neon/;
381  specialize qw/aom_highbd_dc_top_predictor_8x4 sse2 neon/;
382  specialize qw/aom_highbd_dc_top_predictor_8x8 sse2 neon/;
383  specialize qw/aom_highbd_dc_top_predictor_8x16 sse2 neon/;
384  specialize qw/aom_highbd_dc_top_predictor_16x8 sse2 neon/;
385  specialize qw/aom_highbd_dc_top_predictor_16x16 sse2 neon/;
386  specialize qw/aom_highbd_dc_top_predictor_16x32 sse2 neon/;
387  specialize qw/aom_highbd_dc_top_predictor_32x16 sse2 neon/;
388  specialize qw/aom_highbd_dc_top_predictor_32x32 sse2 neon/;
389  specialize qw/aom_highbd_dc_top_predictor_32x64 neon/;
390  specialize qw/aom_highbd_dc_top_predictor_64x32 neon/;
391  specialize qw/aom_highbd_dc_top_predictor_64x64 neon/;
392
393  specialize qw/aom_highbd_paeth_predictor_4x4 neon/;
394  specialize qw/aom_highbd_paeth_predictor_4x8 neon/;
395  specialize qw/aom_highbd_paeth_predictor_8x4 neon/;
396  specialize qw/aom_highbd_paeth_predictor_8x8 neon/;
397  specialize qw/aom_highbd_paeth_predictor_8x16 neon/;
398  specialize qw/aom_highbd_paeth_predictor_16x8 neon/;
399  specialize qw/aom_highbd_paeth_predictor_16x16 neon/;
400  specialize qw/aom_highbd_paeth_predictor_16x32 neon/;
401  specialize qw/aom_highbd_paeth_predictor_32x16 neon/;
402  specialize qw/aom_highbd_paeth_predictor_32x32 neon/;
403  specialize qw/aom_highbd_paeth_predictor_32x64 neon/;
404  specialize qw/aom_highbd_paeth_predictor_64x32 neon/;
405  specialize qw/aom_highbd_paeth_predictor_64x64 neon/;
406
407  specialize qw/aom_highbd_smooth_predictor_4x4 neon/;
408  specialize qw/aom_highbd_smooth_predictor_4x8 neon/;
409  specialize qw/aom_highbd_smooth_predictor_8x4 neon/;
410  specialize qw/aom_highbd_smooth_predictor_8x8 neon/;
411  specialize qw/aom_highbd_smooth_predictor_8x16 neon/;
412  specialize qw/aom_highbd_smooth_predictor_16x8 neon/;
413  specialize qw/aom_highbd_smooth_predictor_16x16 neon/;
414  specialize qw/aom_highbd_smooth_predictor_16x32 neon/;
415  specialize qw/aom_highbd_smooth_predictor_32x16 neon/;
416  specialize qw/aom_highbd_smooth_predictor_32x32 neon/;
417  specialize qw/aom_highbd_smooth_predictor_32x64 neon/;
418  specialize qw/aom_highbd_smooth_predictor_64x32 neon/;
419  specialize qw/aom_highbd_smooth_predictor_64x64 neon/;
420
421  specialize qw/aom_highbd_smooth_v_predictor_4x4 neon/;
422  specialize qw/aom_highbd_smooth_v_predictor_4x8 neon/;
423  specialize qw/aom_highbd_smooth_v_predictor_8x4 neon/;
424  specialize qw/aom_highbd_smooth_v_predictor_8x8 neon/;
425  specialize qw/aom_highbd_smooth_v_predictor_8x16 neon/;
426  specialize qw/aom_highbd_smooth_v_predictor_16x8 neon/;
427  specialize qw/aom_highbd_smooth_v_predictor_16x16 neon/;
428  specialize qw/aom_highbd_smooth_v_predictor_16x32 neon/;
429  specialize qw/aom_highbd_smooth_v_predictor_32x16 neon/;
430  specialize qw/aom_highbd_smooth_v_predictor_32x32 neon/;
431  specialize qw/aom_highbd_smooth_v_predictor_32x64 neon/;
432  specialize qw/aom_highbd_smooth_v_predictor_64x32 neon/;
433  specialize qw/aom_highbd_smooth_v_predictor_64x64 neon/;
434  specialize qw/aom_highbd_smooth_h_predictor_4x4 neon/;
435  specialize qw/aom_highbd_smooth_h_predictor_4x8 neon/;
436
437  specialize qw/aom_highbd_smooth_h_predictor_8x4 neon/;
438  specialize qw/aom_highbd_smooth_h_predictor_8x8 neon/;
439  specialize qw/aom_highbd_smooth_h_predictor_8x16 neon/;
440  specialize qw/aom_highbd_smooth_h_predictor_16x8 neon/;
441  specialize qw/aom_highbd_smooth_h_predictor_16x16 neon/;
442  specialize qw/aom_highbd_smooth_h_predictor_16x32 neon/;
443  specialize qw/aom_highbd_smooth_h_predictor_32x16 neon/;
444  specialize qw/aom_highbd_smooth_h_predictor_32x32 neon/;
445  specialize qw/aom_highbd_smooth_h_predictor_32x64 neon/;
446  specialize qw/aom_highbd_smooth_h_predictor_64x32 neon/;
447  specialize qw/aom_highbd_smooth_h_predictor_64x64 neon/;
448
449  if ((aom_config("CONFIG_REALTIME_ONLY") ne "yes") ||
450      (aom_config("CONFIG_AV1_DECODER") eq "yes")) {
451    specialize qw/aom_highbd_v_predictor_4x16 neon/;
452    specialize qw/aom_highbd_v_predictor_8x32 neon/;
453    specialize qw/aom_highbd_v_predictor_16x4 neon/;
454    specialize qw/aom_highbd_v_predictor_16x64 neon/;
455    specialize qw/aom_highbd_v_predictor_32x8 neon/;
456    specialize qw/aom_highbd_v_predictor_64x16 neon/;
457
458    specialize qw/aom_highbd_dc_predictor_4x16 neon/;
459    specialize qw/aom_highbd_dc_predictor_8x32 neon/;
460    specialize qw/aom_highbd_dc_predictor_16x4 neon/;
461    specialize qw/aom_highbd_dc_predictor_16x64 neon/;
462    specialize qw/aom_highbd_dc_predictor_32x8 neon/;
463    specialize qw/aom_highbd_dc_predictor_64x16 neon/;
464
465    specialize qw/aom_highbd_h_predictor_4x16 neon/;
466    specialize qw/aom_highbd_h_predictor_8x32 neon/;
467    specialize qw/aom_highbd_h_predictor_16x4 neon/;
468    specialize qw/aom_highbd_h_predictor_16x64 neon/;
469    specialize qw/aom_highbd_h_predictor_32x8 neon/;
470    specialize qw/aom_highbd_h_predictor_64x16 neon/;
471
472    specialize qw/aom_highbd_dc_128_predictor_4x16 neon/;
473    specialize qw/aom_highbd_dc_128_predictor_8x32 neon/;
474    specialize qw/aom_highbd_dc_128_predictor_16x4 neon/;
475    specialize qw/aom_highbd_dc_128_predictor_16x64 neon/;
476    specialize qw/aom_highbd_dc_128_predictor_32x8 neon/;
477    specialize qw/aom_highbd_dc_128_predictor_64x16 neon/;
478
479    specialize qw/aom_highbd_dc_left_predictor_4x16 neon/;
480    specialize qw/aom_highbd_dc_left_predictor_8x32 neon/;
481    specialize qw/aom_highbd_dc_left_predictor_16x4 neon/;
482    specialize qw/aom_highbd_dc_left_predictor_16x64 neon/;
483    specialize qw/aom_highbd_dc_left_predictor_32x8 neon/;
484    specialize qw/aom_highbd_dc_left_predictor_64x16 neon/;
485
486    specialize qw/aom_highbd_dc_top_predictor_4x16 neon/;
487    specialize qw/aom_highbd_dc_top_predictor_8x32 neon/;
488    specialize qw/aom_highbd_dc_top_predictor_16x4 neon/;
489    specialize qw/aom_highbd_dc_top_predictor_16x64 neon/;
490    specialize qw/aom_highbd_dc_top_predictor_32x8 neon/;
491    specialize qw/aom_highbd_dc_top_predictor_64x16 neon/;
492
493    specialize qw/aom_highbd_paeth_predictor_4x16 neon/;
494    specialize qw/aom_highbd_paeth_predictor_8x32 neon/;
495    specialize qw/aom_highbd_paeth_predictor_16x4 neon/;
496    specialize qw/aom_highbd_paeth_predictor_16x64 neon/;
497    specialize qw/aom_highbd_paeth_predictor_32x8 neon/;
498    specialize qw/aom_highbd_paeth_predictor_64x16 neon/;
499
500    specialize qw/aom_highbd_smooth_predictor_4x16 neon/;
501    specialize qw/aom_highbd_smooth_predictor_8x32 neon/;
502    specialize qw/aom_highbd_smooth_predictor_16x4 neon/;
503    specialize qw/aom_highbd_smooth_predictor_16x64 neon/;
504    specialize qw/aom_highbd_smooth_predictor_32x8 neon/;
505    specialize qw/aom_highbd_smooth_predictor_64x16 neon/;
506
507    specialize qw/aom_highbd_smooth_v_predictor_4x16 neon/;
508    specialize qw/aom_highbd_smooth_v_predictor_8x32 neon/;
509    specialize qw/aom_highbd_smooth_v_predictor_16x4 neon/;
510    specialize qw/aom_highbd_smooth_v_predictor_16x64 neon/;
511    specialize qw/aom_highbd_smooth_v_predictor_32x8 neon/;
512    specialize qw/aom_highbd_smooth_v_predictor_64x16 neon/;
513
514    specialize qw/aom_highbd_smooth_h_predictor_4x16 neon/;
515    specialize qw/aom_highbd_smooth_h_predictor_8x32 neon/;
516    specialize qw/aom_highbd_smooth_h_predictor_16x4 neon/;
517    specialize qw/aom_highbd_smooth_h_predictor_16x64 neon/;
518    specialize qw/aom_highbd_smooth_h_predictor_32x8 neon/;
519    specialize qw/aom_highbd_smooth_h_predictor_64x16 neon/;
520  }  # !CONFIG_REALTIME_ONLY || CONFIG_AV1_DECODER
521}
522#
523# Sub Pixel Filters
524#
525add_proto qw/void aom_convolve_copy/,             "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h";
526add_proto qw/void aom_convolve8_horiz/,           "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
527add_proto qw/void aom_convolve8_vert/,            "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
528
529specialize qw/aom_convolve_copy       neon                        sse2 avx2/;
530specialize qw/aom_convolve8_horiz     neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
531specialize qw/aom_convolve8_vert      neon neon_dotprod neon_i8mm ssse3/, "$avx2_ssse3";
532
533add_proto qw/void aom_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
534specialize qw/aom_scaled_2d ssse3 neon neon_dotprod neon_i8mm/;
535
536if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
537  add_proto qw/void aom_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, int w, int h";
538  specialize qw/aom_highbd_convolve_copy sse2 avx2 neon/;
539
540  add_proto qw/void aom_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
541  specialize qw/aom_highbd_convolve8_horiz sse2 avx2 neon sve/;
542
543  add_proto qw/void aom_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bd";
544  specialize qw/aom_highbd_convolve8_vert sse2 avx2 neon sve/;
545}
546
547#
548# Loopfilter
549#
550add_proto qw/void aom_lpf_vertical_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
551specialize qw/aom_lpf_vertical_14 sse2 neon/;
552
553add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
554specialize qw/aom_lpf_vertical_14_dual sse2 neon/;
555
556add_proto qw/void aom_lpf_vertical_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
557specialize qw/aom_lpf_vertical_14_quad avx2 sse2 neon/;
558
559add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
560specialize qw/aom_lpf_vertical_6 sse2 neon/;
561
562add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
563specialize qw/aom_lpf_vertical_8 sse2 neon/;
564
565add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
566specialize qw/aom_lpf_vertical_8_dual sse2 neon/;
567
568add_proto qw/void aom_lpf_vertical_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
569specialize qw/aom_lpf_vertical_8_quad sse2 neon/;
570
571add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
572specialize qw/aom_lpf_vertical_4 sse2 neon/;
573
574add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
575specialize qw/aom_lpf_vertical_4_dual sse2 neon/;
576
577add_proto qw/void aom_lpf_vertical_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
578specialize qw/aom_lpf_vertical_4_quad sse2 neon/;
579
580add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
581specialize qw/aom_lpf_horizontal_14 sse2 neon/;
582
583add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
584specialize qw/aom_lpf_horizontal_14_dual sse2 neon/;
585
586add_proto qw/void aom_lpf_horizontal_14_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
587specialize qw/aom_lpf_horizontal_14_quad sse2 avx2 neon/;
588
589add_proto qw/void aom_lpf_horizontal_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
590specialize qw/aom_lpf_horizontal_6 sse2 neon/;
591
592add_proto qw/void aom_lpf_horizontal_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
593specialize qw/aom_lpf_horizontal_6_dual sse2 neon/;
594
595add_proto qw/void aom_lpf_horizontal_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
596specialize qw/aom_lpf_horizontal_6_quad sse2 avx2 neon/;
597
598add_proto qw/void aom_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
599specialize qw/aom_lpf_horizontal_8 sse2 neon/;
600
601add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
602specialize qw/aom_lpf_horizontal_8_dual sse2 neon/;
603
604add_proto qw/void aom_lpf_horizontal_8_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
605specialize qw/aom_lpf_horizontal_8_quad sse2 avx2 neon/;
606
607add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
608specialize qw/aom_lpf_horizontal_4 sse2 neon/;
609
610add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
611specialize qw/aom_lpf_horizontal_4_dual sse2 neon/;
612
613add_proto qw/void aom_lpf_horizontal_4_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
614specialize qw/aom_lpf_horizontal_4_quad sse2 neon/;
615
616add_proto qw/void aom_lpf_vertical_6_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
617specialize qw/aom_lpf_vertical_6_dual sse2 neon/;
618
619add_proto qw/void aom_lpf_vertical_6_quad/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0";
620specialize qw/aom_lpf_vertical_6_quad sse2 neon/;
621
622if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
623  add_proto qw/void aom_highbd_lpf_vertical_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
624  specialize qw/aom_highbd_lpf_vertical_14 neon sse2/;
625
626  add_proto qw/void aom_highbd_lpf_vertical_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
627  specialize qw/aom_highbd_lpf_vertical_14_dual neon sse2 avx2/;
628
629  add_proto qw/void aom_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
630  specialize qw/aom_highbd_lpf_vertical_8 neon sse2/;
631
632  add_proto qw/void aom_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
633  specialize qw/aom_highbd_lpf_vertical_8_dual neon sse2 avx2/;
634
635  add_proto qw/void aom_highbd_lpf_vertical_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
636  specialize qw/aom_highbd_lpf_vertical_6 neon sse2/;
637
638  add_proto qw/void aom_highbd_lpf_vertical_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
639  specialize qw/aom_highbd_lpf_vertical_6_dual neon sse2/;
640
641  add_proto qw/void aom_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
642  specialize qw/aom_highbd_lpf_vertical_4 neon sse2/;
643
644  add_proto qw/void aom_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
645  specialize qw/aom_highbd_lpf_vertical_4_dual neon sse2 avx2/;
646
647  add_proto qw/void aom_highbd_lpf_horizontal_14/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
648  specialize qw/aom_highbd_lpf_horizontal_14 neon sse2/;
649
650  add_proto qw/void aom_highbd_lpf_horizontal_14_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1,int bd";
651  specialize qw/aom_highbd_lpf_horizontal_14_dual neon sse2 avx2/;
652
653  add_proto qw/void aom_highbd_lpf_horizontal_6/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
654  specialize qw/aom_highbd_lpf_horizontal_6 neon sse2/;
655
656  add_proto qw/void aom_highbd_lpf_horizontal_6_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
657  specialize qw/aom_highbd_lpf_horizontal_6_dual neon sse2/;
658
659  add_proto qw/void aom_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
660  specialize qw/aom_highbd_lpf_horizontal_8 neon sse2/;
661
662  add_proto qw/void aom_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
663  specialize qw/aom_highbd_lpf_horizontal_8_dual neon sse2 avx2/;
664
665  add_proto qw/void aom_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
666  specialize qw/aom_highbd_lpf_horizontal_4 neon sse2/;
667
668  add_proto qw/void aom_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
669  specialize qw/aom_highbd_lpf_horizontal_4_dual neon sse2 avx2/;
670}
671
672#
673# Encoder functions.
674#
675
676#
677# Forward transform
678#
679if (aom_config("CONFIG_AV1_ENCODER") eq "yes"){
680    add_proto qw/void aom_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
681    specialize qw/aom_fdct4x4 neon sse2/;
682
683    add_proto qw/void aom_fdct4x4_lp/, "const int16_t *input, int16_t *output, int stride";
684    specialize qw/aom_fdct4x4_lp neon sse2/;
685
686    if (aom_config("CONFIG_INTERNAL_STATS") eq "yes"){
687      # 8x8 DCT transform for psnr-hvs. Unlike other transforms isn't compatible
688      # with av1 scan orders, because it does two transposes.
689      add_proto qw/void aom_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
690      specialize qw/aom_fdct8x8 neon sse2/, "$ssse3_x86_64";
691      # High bit depth
692      if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
693        add_proto qw/void aom_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
694        specialize qw/aom_highbd_fdct8x8 sse2/;
695      }
696    }
697    # FFT/IFFT (float) only used for denoising (and noise power spectral density estimation)
698    add_proto qw/void aom_fft2x2_float/, "const float *input, float *temp, float *output";
699
700    add_proto qw/void aom_fft4x4_float/, "const float *input, float *temp, float *output";
701    specialize qw/aom_fft4x4_float                  sse2/;
702
703    add_proto qw/void aom_fft8x8_float/, "const float *input, float *temp, float *output";
704    specialize qw/aom_fft8x8_float avx2             sse2/;
705
706    add_proto qw/void aom_fft16x16_float/, "const float *input, float *temp, float *output";
707    specialize qw/aom_fft16x16_float avx2           sse2/;
708
709    add_proto qw/void aom_fft32x32_float/, "const float *input, float *temp, float *output";
710    specialize qw/aom_fft32x32_float avx2           sse2/;
711
712    add_proto qw/void aom_ifft2x2_float/, "const float *input, float *temp, float *output";
713
714    add_proto qw/void aom_ifft4x4_float/, "const float *input, float *temp, float *output";
715    specialize qw/aom_ifft4x4_float                 sse2/;
716
717    add_proto qw/void aom_ifft8x8_float/, "const float *input, float *temp, float *output";
718    specialize qw/aom_ifft8x8_float avx2            sse2/;
719
720    add_proto qw/void aom_ifft16x16_float/, "const float *input, float *temp, float *output";
721    specialize qw/aom_ifft16x16_float avx2          sse2/;
722
723    add_proto qw/void aom_ifft32x32_float/, "const float *input, float *temp, float *output";
724    specialize qw/aom_ifft32x32_float avx2          sse2/;
725}  # CONFIG_AV1_ENCODER
726
727#
728# Quantization
729#
730if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
731  add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
732  specialize qw/aom_quantize_b sse2 neon avx avx2/, "$ssse3_x86_64";
733
734  add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
735  specialize qw/aom_quantize_b_32x32 neon avx avx2/, "$ssse3_x86_64";
736
737  add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
738  specialize qw/aom_quantize_b_64x64 neon ssse3 avx2/;
739
740  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
741    add_proto qw/void aom_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
742    specialize qw/aom_quantize_b_adaptive sse2 avx2/;
743
744    add_proto qw/void aom_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
745    specialize qw/aom_quantize_b_32x32_adaptive sse2/;
746
747    add_proto qw/void aom_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
748    specialize qw/aom_quantize_b_64x64_adaptive sse2/;
749  }
750}  # CONFIG_AV1_ENCODER
751
752if (aom_config("CONFIG_AV1_ENCODER") eq "yes" && aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
753  add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
754  specialize qw/aom_highbd_quantize_b sse2 avx2 neon/;
755
756  add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
757  specialize qw/aom_highbd_quantize_b_32x32 sse2 avx2 neon/;
758
759  add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
760  specialize qw/aom_highbd_quantize_b_64x64 sse2 avx2 neon/;
761
762  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
763    add_proto qw/void aom_highbd_quantize_b_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
764    specialize qw/aom_highbd_quantize_b_adaptive sse2 avx2 neon/;
765
766    add_proto qw/void aom_highbd_quantize_b_32x32_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
767    specialize qw/aom_highbd_quantize_b_32x32_adaptive sse2 avx2 neon/;
768
769    add_proto qw/void aom_highbd_quantize_b_64x64_adaptive/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
770    specialize qw/aom_highbd_quantize_b_64x64_adaptive sse2 neon/;
771  }
772}  # CONFIG_AV1_ENCODER
773
774#
775# Alpha blending with mask
776#
777add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params";
778specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1 avx2 neon/;
779add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh";
780add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
781add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h";
782specialize "aom_blend_a64_mask", qw/sse4_1 neon avx2/;
783specialize "aom_blend_a64_hmask", qw/sse4_1 neon/;
784specialize "aom_blend_a64_vmask", qw/sse4_1 neon/;
785
786if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
787  add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, int bd";
788  add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
789  add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd";
790  add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd";
791  specialize "aom_highbd_blend_a64_mask", qw/sse4_1 neon/;
792  specialize "aom_highbd_blend_a64_hmask", qw/sse4_1 neon/;
793  specialize "aom_highbd_blend_a64_vmask", qw/sse4_1 neon/;
794  specialize "aom_highbd_blend_a64_d16_mask", qw/sse4_1 neon avx2/;
795}
796
797if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
798  #
799  # Block subtraction
800  #
801  add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
802  specialize qw/aom_subtract_block neon sse2 avx2/;
803
804  add_proto qw/int64_t/, "aom_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height";
805  specialize qw/aom_sse sse4_1 avx2 neon neon_dotprod/;
806
807  add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum";
808  specialize qw/aom_get_blk_sse_sum sse2 avx2 neon sve/;
809
810  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
811    add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
812    specialize qw/aom_highbd_subtract_block sse2 neon/;
813
814    add_proto qw/int64_t/, "aom_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height";
815    specialize qw/aom_highbd_sse sse4_1 avx2 neon sve/;
816  }
817
818  #
819  # Sum of Squares
820  #
821  add_proto qw/uint64_t aom_sum_squares_2d_i16/, "const int16_t *src, int stride, int width, int height";
822  specialize qw/aom_sum_squares_2d_i16 sse2 avx2 neon sve/;
823
824  add_proto qw/uint64_t aom_sum_squares_i16/, "const int16_t *src, uint32_t N";
825  specialize qw/aom_sum_squares_i16 sse2 neon sve/;
826
827  add_proto qw/uint64_t aom_var_2d_u8/, "uint8_t *src, int src_stride, int width, int height";
828  specialize qw/aom_var_2d_u8 sse2 avx2 neon neon_dotprod/;
829
830  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
831    add_proto qw/uint64_t aom_var_2d_u16/, "uint8_t *src, int src_stride, int width, int height";
832    specialize qw/aom_var_2d_u16 sse2 avx2 neon sve/;
833  }
834
835  #
836  # Single block SAD / Single block Avg SAD
837  #
838  foreach (@encoder_block_sizes) {
839    ($w, $h) = @$_;
840    add_proto qw/unsigned int/, "aom_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
841    if ($h >= 16) {
842      add_proto qw/unsigned int/, "aom_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
843    }
844    if ($w != 4 && $h != 4) {
845      add_proto qw/unsigned int/, "aom_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
846    }
847  }
848
849  add_proto qw/uint64_t aom_sum_sse_2d_i16/, "const int16_t *src, int src_stride, int width, int height, int *sum";
850  specialize qw/aom_sum_sse_2d_i16 avx2 neon sse2 sve/;
851  specialize qw/aom_sad128x128    avx2 sse2 neon neon_dotprod/;
852  specialize qw/aom_sad128x64     avx2 sse2 neon neon_dotprod/;
853  specialize qw/aom_sad64x128     avx2 sse2 neon neon_dotprod/;
854  specialize qw/aom_sad64x64      avx2 sse2 neon neon_dotprod/;
855  specialize qw/aom_sad64x32      avx2 sse2 neon neon_dotprod/;
856  specialize qw/aom_sad32x64      avx2 sse2 neon neon_dotprod/;
857  specialize qw/aom_sad32x32      avx2 sse2 neon neon_dotprod/;
858  specialize qw/aom_sad32x16      avx2 sse2 neon neon_dotprod/;
859  specialize qw/aom_sad16x32           sse2 neon neon_dotprod/;
860  specialize qw/aom_sad16x16           sse2 neon neon_dotprod/;
861  specialize qw/aom_sad16x8            sse2 neon neon_dotprod/;
862  specialize qw/aom_sad8x16            sse2 neon/;
863  specialize qw/aom_sad8x8             sse2 neon/;
864  specialize qw/aom_sad8x4             sse2 neon/;
865  specialize qw/aom_sad4x8             sse2 neon/;
866  specialize qw/aom_sad4x4             sse2 neon/;
867
868  specialize qw/aom_sad4x16            sse2 neon/;
869  specialize qw/aom_sad16x4            sse2 neon neon_dotprod/;
870  specialize qw/aom_sad8x32            sse2 neon/;
871  specialize qw/aom_sad32x8            sse2 neon neon_dotprod/;
872  specialize qw/aom_sad16x64           sse2 neon neon_dotprod/;
873  specialize qw/aom_sad64x16           sse2 neon neon_dotprod/;
874
875  specialize qw/aom_sad_skip_128x128    avx2 sse2 neon neon_dotprod/;
876  specialize qw/aom_sad_skip_128x64     avx2 sse2 neon neon_dotprod/;
877  specialize qw/aom_sad_skip_64x128     avx2 sse2 neon neon_dotprod/;
878  specialize qw/aom_sad_skip_64x64      avx2 sse2 neon neon_dotprod/;
879  specialize qw/aom_sad_skip_64x32      avx2 sse2 neon neon_dotprod/;
880  specialize qw/aom_sad_skip_32x64      avx2 sse2 neon neon_dotprod/;
881  specialize qw/aom_sad_skip_32x32      avx2 sse2 neon neon_dotprod/;
882  specialize qw/aom_sad_skip_32x16      avx2 sse2 neon neon_dotprod/;
883  specialize qw/aom_sad_skip_16x32           sse2 neon neon_dotprod/;
884  specialize qw/aom_sad_skip_16x16           sse2 neon neon_dotprod/;
885  specialize qw/aom_sad_skip_16x8            sse2 neon neon_dotprod/;
886  specialize qw/aom_sad_skip_8x16            sse2 neon/;
887
888  specialize qw/aom_sad_skip_4x16            sse2 neon/;
889  specialize qw/aom_sad_skip_8x32            sse2 neon/;
890  specialize qw/aom_sad_skip_16x64           sse2 neon neon_dotprod/;
891  specialize qw/aom_sad_skip_64x16           sse2 neon neon_dotprod/;
892
893  specialize qw/aom_sad128x128_avg avx2 sse2 neon neon_dotprod/;
894  specialize qw/aom_sad128x64_avg  avx2 sse2 neon neon_dotprod/;
895  specialize qw/aom_sad64x128_avg  avx2 sse2 neon neon_dotprod/;
896  specialize qw/aom_sad64x64_avg   avx2 sse2 neon neon_dotprod/;
897  specialize qw/aom_sad64x32_avg   avx2 sse2 neon neon_dotprod/;
898  specialize qw/aom_sad32x64_avg   avx2 sse2 neon neon_dotprod/;
899  specialize qw/aom_sad32x32_avg   avx2 sse2 neon neon_dotprod/;
900  specialize qw/aom_sad32x16_avg   avx2 sse2 neon neon_dotprod/;
901  specialize qw/aom_sad16x32_avg        sse2 neon neon_dotprod/;
902  specialize qw/aom_sad16x16_avg        sse2 neon neon_dotprod/;
903  specialize qw/aom_sad16x8_avg         sse2 neon neon_dotprod/;
904  specialize qw/aom_sad8x16_avg         sse2 neon/;
905  specialize qw/aom_sad8x8_avg          sse2 neon/;
906
907  specialize qw/aom_sad8x32_avg         sse2 neon/;
908  specialize qw/aom_sad32x8_avg         sse2 neon neon_dotprod/;
909  specialize qw/aom_sad16x64_avg        sse2 neon neon_dotprod/;
910  specialize qw/aom_sad64x16_avg        sse2 neon neon_dotprod/;
911
912  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
913    foreach (@encoder_block_sizes) {
914      ($w, $h) = @$_;
915      add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
916      if ($h >= 16) {
917        add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
918      }
919      if ($w != 4 && $h != 4) {
920        add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
921      }
922      if ($w != 128 && $h != 128 && $w != 4) {
923        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
924        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
925      }
926    }
927    specialize qw/aom_highbd_sad128x128 avx2      neon/;
928    specialize qw/aom_highbd_sad128x64  avx2      neon/;
929    specialize qw/aom_highbd_sad64x128  avx2      neon/;
930    specialize qw/aom_highbd_sad64x64   avx2 sse2 neon/;
931    specialize qw/aom_highbd_sad64x32   avx2 sse2 neon/;
932    specialize qw/aom_highbd_sad32x64   avx2 sse2 neon/;
933    specialize qw/aom_highbd_sad32x32   avx2 sse2 neon/;
934    specialize qw/aom_highbd_sad32x16   avx2 sse2 neon/;
935    specialize qw/aom_highbd_sad16x32   avx2 sse2 neon/;
936    specialize qw/aom_highbd_sad16x16   avx2 sse2 neon/;
937    specialize qw/aom_highbd_sad16x8    avx2 sse2 neon/;
938    specialize qw/aom_highbd_sad8x16         sse2 neon/;
939    specialize qw/aom_highbd_sad8x8          sse2 neon/;
940    specialize qw/aom_highbd_sad8x4          sse2 neon/;
941    specialize qw/aom_highbd_sad4x8          sse2 neon/;
942    specialize qw/aom_highbd_sad4x4          sse2 neon/;
943
944    specialize qw/aom_highbd_sad4x16         sse2 neon/;
945    specialize qw/aom_highbd_sad16x4    avx2 sse2 neon/;
946    specialize qw/aom_highbd_sad8x32         sse2 neon/;
947    specialize qw/aom_highbd_sad32x8    avx2 sse2 neon/;
948    specialize qw/aom_highbd_sad16x64   avx2 sse2 neon/;
949    specialize qw/aom_highbd_sad64x16   avx2 sse2 neon/;
950
951    specialize qw/aom_highbd_sad_skip_128x128 avx2      neon/;
952    specialize qw/aom_highbd_sad_skip_128x64  avx2      neon/;
953    specialize qw/aom_highbd_sad_skip_64x128  avx2      neon/;
954    specialize qw/aom_highbd_sad_skip_64x64   avx2 sse2 neon/;
955    specialize qw/aom_highbd_sad_skip_64x32   avx2 sse2 neon/;
956    specialize qw/aom_highbd_sad_skip_32x64   avx2 sse2 neon/;
957    specialize qw/aom_highbd_sad_skip_32x32   avx2 sse2 neon/;
958    specialize qw/aom_highbd_sad_skip_32x16   avx2 sse2 neon/;
959    specialize qw/aom_highbd_sad_skip_16x32   avx2 sse2 neon/;
960    specialize qw/aom_highbd_sad_skip_16x16   avx2 sse2 neon/;
961    specialize qw/aom_highbd_sad_skip_8x16         sse2 neon/;
962
963    specialize qw/aom_highbd_sad_skip_4x16         sse2 neon/;
964    specialize qw/aom_highbd_sad_skip_8x32         sse2 neon/;
965    specialize qw/aom_highbd_sad_skip_16x64   avx2 sse2 neon/;
966    specialize qw/aom_highbd_sad_skip_64x16   avx2 sse2 neon/;
967
968    specialize qw/aom_highbd_sad128x128_avg avx2      neon/;
969    specialize qw/aom_highbd_sad128x64_avg  avx2      neon/;
970    specialize qw/aom_highbd_sad64x128_avg  avx2      neon/;
971    specialize qw/aom_highbd_sad64x64_avg   avx2 sse2 neon/;
972    specialize qw/aom_highbd_sad64x32_avg   avx2 sse2 neon/;
973    specialize qw/aom_highbd_sad32x64_avg   avx2 sse2 neon/;
974    specialize qw/aom_highbd_sad32x32_avg   avx2 sse2 neon/;
975    specialize qw/aom_highbd_sad32x16_avg   avx2 sse2 neon/;
976    specialize qw/aom_highbd_sad16x32_avg   avx2 sse2 neon/;
977    specialize qw/aom_highbd_sad16x16_avg   avx2 sse2 neon/;
978    specialize qw/aom_highbd_sad16x8_avg    avx2 sse2 neon/;
979    specialize qw/aom_highbd_sad8x16_avg              neon/;
980    specialize qw/aom_highbd_sad8x8_avg               neon/;
981
982    specialize qw/aom_highbd_sad8x32_avg         sse2 neon/;
983    specialize qw/aom_highbd_sad16x64_avg   avx2 sse2 neon/;
984    specialize qw/aom_highbd_sad32x8_avg    avx2 sse2 neon/;
985    specialize qw/aom_highbd_sad64x16_avg   avx2 sse2 neon/;
986  }
987  #
988  # Masked SAD
989  #
990  foreach (@encoder_block_sizes) {
991    ($w, $h) = @$_;
992    add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
993    specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
994  }
995
996  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
997    foreach (@encoder_block_sizes) {
998      ($w, $h) = @$_;
999      add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
1000      specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2 neon/;
1001    }
1002  }
1003
1004  #
1005  # OBMC SAD
1006  #
1007  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1008    foreach (@encoder_block_sizes) {
1009      ($w, $h) = @$_;
1010      add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
1011      if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
1012        specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
1013      }
1014    }
1015
1016    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1017      foreach (@encoder_block_sizes) {
1018        ($w, $h) = @$_;
1019        add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
1020        if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
1021          specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2 neon/;
1022        }
1023      }
1024    }
1025  }
1026
1027  #
1028  # Multi-block SAD, comparing a reference to N independent blocks
1029  #
1030  foreach (@encoder_block_sizes) {
1031    ($w, $h) = @$_;
1032    add_proto qw/void/, "aom_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1033    add_proto qw/void/, "aom_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1034    if ($h >= 16) {
1035      add_proto qw/void/, "aom_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1036    }
1037  }
1038
1039  specialize qw/aom_sad128x128x4d avx2 sse2 neon neon_dotprod/;
1040  specialize qw/aom_sad128x64x4d  avx2 sse2 neon neon_dotprod/;
1041  specialize qw/aom_sad64x128x4d  avx2 sse2 neon neon_dotprod/;
1042  specialize qw/aom_sad64x64x4d   avx2 sse2 neon neon_dotprod/;
1043  specialize qw/aom_sad64x32x4d   avx2 sse2 neon neon_dotprod/;
1044  specialize qw/aom_sad32x64x4d   avx2 sse2 neon neon_dotprod/;
1045  specialize qw/aom_sad32x32x4d   avx2 sse2 neon neon_dotprod/;
1046  specialize qw/aom_sad32x16x4d   avx2 sse2 neon neon_dotprod/;
1047  specialize qw/aom_sad16x32x4d   avx2 sse2 neon neon_dotprod/;
1048  specialize qw/aom_sad16x16x4d   avx2 sse2 neon neon_dotprod/;
1049  specialize qw/aom_sad16x8x4d    avx2 sse2 neon neon_dotprod/;
1050
1051  specialize qw/aom_sad8x16x4d         sse2 neon/;
1052  specialize qw/aom_sad8x8x4d          sse2 neon/;
1053  specialize qw/aom_sad8x4x4d          sse2 neon/;
1054  specialize qw/aom_sad4x8x4d          sse2 neon/;
1055  specialize qw/aom_sad4x4x4d          sse2 neon/;
1056
1057  specialize qw/aom_sad64x16x4d   avx2 sse2 neon neon_dotprod/;
1058  specialize qw/aom_sad32x8x4d    avx2 sse2 neon neon_dotprod/;
1059  specialize qw/aom_sad16x64x4d   avx2 sse2 neon neon_dotprod/;
1060  specialize qw/aom_sad16x4x4d    avx2 sse2 neon neon_dotprod/;
1061  specialize qw/aom_sad8x32x4d         sse2 neon/;
1062  specialize qw/aom_sad4x16x4d         sse2 neon/;
1063
1064  specialize qw/aom_sad_skip_128x128x4d avx2 sse2 neon neon_dotprod/;
1065  specialize qw/aom_sad_skip_128x64x4d  avx2 sse2 neon neon_dotprod/;
1066  specialize qw/aom_sad_skip_64x128x4d  avx2 sse2 neon neon_dotprod/;
1067  specialize qw/aom_sad_skip_64x64x4d   avx2 sse2 neon neon_dotprod/;
1068  specialize qw/aom_sad_skip_64x32x4d   avx2 sse2 neon neon_dotprod/;
1069  specialize qw/aom_sad_skip_64x16x4d   avx2 sse2 neon neon_dotprod/;
1070  specialize qw/aom_sad_skip_32x64x4d   avx2 sse2 neon neon_dotprod/;
1071  specialize qw/aom_sad_skip_32x32x4d   avx2 sse2 neon neon_dotprod/;
1072  specialize qw/aom_sad_skip_32x16x4d   avx2 sse2 neon neon_dotprod/;
1073
1074  specialize qw/aom_sad_skip_16x64x4d   avx2 sse2 neon neon_dotprod/;
1075  specialize qw/aom_sad_skip_16x32x4d   avx2 sse2 neon neon_dotprod/;
1076  specialize qw/aom_sad_skip_16x16x4d   avx2 sse2 neon neon_dotprod/;
1077  specialize qw/aom_sad_skip_16x8x4d    avx2 sse2 neon neon_dotprod/;
1078  specialize qw/aom_sad_skip_8x32x4d         sse2 neon/;
1079  specialize qw/aom_sad_skip_8x16x4d         sse2 neon/;
1080  specialize qw/aom_sad_skip_4x16x4d         sse2 neon/;
1081
1082  specialize qw/aom_sad128x128x3d avx2 neon neon_dotprod/;
1083  specialize qw/aom_sad128x64x3d  avx2 neon neon_dotprod/;
1084  specialize qw/aom_sad64x128x3d  avx2 neon neon_dotprod/;
1085  specialize qw/aom_sad64x64x3d   avx2 neon neon_dotprod/;
1086  specialize qw/aom_sad64x32x3d   avx2 neon neon_dotprod/;
1087  specialize qw/aom_sad32x64x3d   avx2 neon neon_dotprod/;
1088  specialize qw/aom_sad32x32x3d   avx2 neon neon_dotprod/;
1089  specialize qw/aom_sad32x16x3d   avx2 neon neon_dotprod/;
1090  specialize qw/aom_sad16x32x3d   avx2 neon neon_dotprod/;
1091  specialize qw/aom_sad16x16x3d   avx2 neon neon_dotprod/;
1092  specialize qw/aom_sad16x8x3d    avx2 neon neon_dotprod/;
1093  specialize qw/aom_sad8x16x3d         neon/;
1094  specialize qw/aom_sad8x8x3d          neon/;
1095  specialize qw/aom_sad8x4x3d          neon/;
1096  specialize qw/aom_sad4x8x3d          neon/;
1097  specialize qw/aom_sad4x4x3d          neon/;
1098
1099  specialize qw/aom_sad64x16x3d   avx2 neon neon_dotprod/;
1100  specialize qw/aom_sad32x8x3d    avx2 neon neon_dotprod/;
1101  specialize qw/aom_sad16x64x3d   avx2 neon neon_dotprod/;
1102  specialize qw/aom_sad16x4x3d    avx2 neon neon_dotprod/;
1103  specialize qw/aom_sad8x32x3d         neon/;
1104  specialize qw/aom_sad4x16x3d         neon/;
1105
1106  #
1107  # Multi-block SAD, comparing a reference to N independent blocks
1108  #
1109  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1110    foreach (@encoder_block_sizes) {
1111      ($w, $h) = @$_;
1112      add_proto qw/void/, "aom_highbd_sad${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1113      add_proto qw/void/, "aom_highbd_sad${w}x${h}x3d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1114      if ($h >= 16) {
1115        add_proto qw/void/, "aom_highbd_sad_skip_${w}x${h}x4d", "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_ptr[4], int ref_stride, uint32_t sad_array[4]";
1116      }
1117      if ($w != 128 && $h != 128) {
1118        specialize "aom_highbd_sad${w}x${h}x4d", qw/sse2/;
1119      }
1120    }
1121    specialize qw/aom_highbd_sad128x128x4d      avx2 neon/;
1122    specialize qw/aom_highbd_sad128x64x4d       avx2 neon/;
1123    specialize qw/aom_highbd_sad64x128x4d       avx2 neon/;
1124    specialize qw/aom_highbd_sad64x64x4d   sse2 avx2 neon/;
1125    specialize qw/aom_highbd_sad64x32x4d   sse2 avx2 neon/;
1126    specialize qw/aom_highbd_sad32x64x4d   sse2 avx2 neon/;
1127    specialize qw/aom_highbd_sad32x32x4d   sse2 avx2 neon/;
1128    specialize qw/aom_highbd_sad32x16x4d   sse2 avx2 neon/;
1129    specialize qw/aom_highbd_sad16x32x4d   sse2 avx2 neon/;
1130    specialize qw/aom_highbd_sad16x16x4d   sse2 avx2 neon/;
1131    specialize qw/aom_highbd_sad16x8x4d    sse2 avx2 neon/;
1132    specialize qw/aom_highbd_sad8x16x4d    sse2      neon/;
1133    specialize qw/aom_highbd_sad8x8x4d     sse2      neon/;
1134    specialize qw/aom_highbd_sad8x4x4d     sse2      neon/;
1135    specialize qw/aom_highbd_sad4x8x4d     sse2      neon/;
1136    specialize qw/aom_highbd_sad4x4x4d     sse2      neon/;
1137
1138    specialize qw/aom_highbd_sad4x16x4d         sse2 neon/;
1139    specialize qw/aom_highbd_sad16x4x4d    avx2 sse2 neon/;
1140    specialize qw/aom_highbd_sad8x32x4d         sse2 neon/;
1141    specialize qw/aom_highbd_sad32x8x4d    avx2 sse2 neon/;
1142    specialize qw/aom_highbd_sad16x64x4d   avx2 sse2 neon/;
1143    specialize qw/aom_highbd_sad64x16x4d   avx2 sse2 neon/;
1144
1145    specialize qw/aom_highbd_sad_skip_128x128x4d avx2      neon/;
1146    specialize qw/aom_highbd_sad_skip_128x64x4d  avx2      neon/;
1147    specialize qw/aom_highbd_sad_skip_64x128x4d  avx2      neon/;
1148    specialize qw/aom_highbd_sad_skip_64x64x4d   avx2 sse2 neon/;
1149    specialize qw/aom_highbd_sad_skip_64x32x4d   avx2 sse2 neon/;
1150    specialize qw/aom_highbd_sad_skip_32x64x4d   avx2 sse2 neon/;
1151    specialize qw/aom_highbd_sad_skip_32x32x4d   avx2 sse2 neon/;
1152    specialize qw/aom_highbd_sad_skip_32x16x4d   avx2 sse2 neon/;
1153    specialize qw/aom_highbd_sad_skip_16x32x4d   avx2 sse2 neon/;
1154    specialize qw/aom_highbd_sad_skip_16x16x4d   avx2 sse2 neon/;
1155    specialize qw/aom_highbd_sad_skip_8x16x4d         sse2 neon/;
1156
1157    specialize qw/aom_highbd_sad_skip_4x16x4d         sse2 neon/;
1158    specialize qw/aom_highbd_sad_skip_8x32x4d         sse2 neon/;
1159    specialize qw/aom_highbd_sad_skip_16x64x4d   avx2 sse2 neon/;
1160    specialize qw/aom_highbd_sad_skip_64x16x4d   avx2 sse2 neon/;
1161
1162    specialize qw/aom_highbd_sad128x128x3d avx2 neon/;
1163    specialize qw/aom_highbd_sad128x64x3d  avx2 neon/;
1164    specialize qw/aom_highbd_sad64x128x3d  avx2 neon/;
1165    specialize qw/aom_highbd_sad64x64x3d   avx2 neon/;
1166    specialize qw/aom_highbd_sad64x32x3d   avx2 neon/;
1167    specialize qw/aom_highbd_sad32x64x3d   avx2 neon/;
1168    specialize qw/aom_highbd_sad32x32x3d   avx2 neon/;
1169    specialize qw/aom_highbd_sad32x16x3d   avx2 neon/;
1170    specialize qw/aom_highbd_sad16x32x3d   avx2 neon/;
1171    specialize qw/aom_highbd_sad16x16x3d   avx2 neon/;
1172    specialize qw/aom_highbd_sad16x8x3d    avx2 neon/;
1173    specialize qw/aom_highbd_sad8x16x3d         neon/;
1174    specialize qw/aom_highbd_sad8x8x3d          neon/;
1175    specialize qw/aom_highbd_sad8x4x3d          neon/;
1176    specialize qw/aom_highbd_sad4x8x3d          neon/;
1177    specialize qw/aom_highbd_sad4x4x3d          neon/;
1178
1179    specialize qw/aom_highbd_sad64x16x3d   avx2 neon/;
1180    specialize qw/aom_highbd_sad32x8x3d    avx2 neon/;
1181    specialize qw/aom_highbd_sad16x64x3d   avx2 neon/;
1182    specialize qw/aom_highbd_sad16x4x3d    avx2 neon/;
1183    specialize qw/aom_highbd_sad8x32x3d         neon/;
1184    specialize qw/aom_highbd_sad4x16x3d         neon/;
1185  }
1186  #
1187  # Avg
1188  #
1189  add_proto qw/unsigned int aom_avg_8x8/, "const uint8_t *, int p";
1190  specialize qw/aom_avg_8x8 sse2 neon/;
1191
1192  add_proto qw/unsigned int aom_avg_4x4/, "const uint8_t *, int p";
1193  specialize qw/aom_avg_4x4 sse2 neon/;
1194
1195  add_proto qw/void aom_avg_8x8_quad/, "const uint8_t *s, int p, int x16_idx, int y16_idx, int *avg";
1196  specialize qw/aom_avg_8x8_quad avx2 sse2 neon/;
1197
1198  add_proto qw/void aom_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
1199  specialize qw/aom_minmax_8x8 sse2 neon/;
1200
1201  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1202    add_proto qw/unsigned int aom_highbd_avg_8x8/, "const uint8_t *, int p";
1203    specialize qw/aom_highbd_avg_8x8 neon/;
1204    add_proto qw/unsigned int aom_highbd_avg_4x4/, "const uint8_t *, int p";
1205    specialize qw/aom_highbd_avg_4x4 neon/;
1206    add_proto qw/void aom_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
1207    specialize qw/aom_highbd_minmax_8x8 neon/;
1208  }
1209
1210  add_proto qw/void aom_int_pro_row/, "int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
1211  specialize qw/aom_int_pro_row avx2 sse2 neon/;
1212
1213  add_proto qw/void aom_int_pro_col/, "int16_t *vbuf, const uint8_t *ref, const int ref_stride, const int width, const int height, int norm_factor";
1214  specialize qw/aom_int_pro_col avx2 sse2 neon/;
1215
1216  add_proto qw/int aom_vector_var/, "const int16_t *ref, const int16_t *src, int bwl";
1217  specialize qw/aom_vector_var avx2 sse4_1 neon sve/;
1218
1219  #
1220  # hamadard transform and satd for implmenting temporal dependency model
1221  #
1222  add_proto qw/void aom_hadamard_4x4/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1223  specialize qw/aom_hadamard_4x4 sse2 neon/;
1224
1225  add_proto qw/void aom_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1226  specialize qw/aom_hadamard_8x8 sse2 neon/;
1227
1228  add_proto qw/void aom_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1229  specialize qw/aom_hadamard_16x16 avx2 sse2 neon/;
1230
1231  add_proto qw/void aom_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1232  specialize qw/aom_hadamard_32x32 avx2 sse2 neon/;
1233
1234  add_proto qw/void aom_hadamard_lp_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1235  specialize qw/aom_hadamard_lp_8x8 sse2 neon/;
1236
1237  add_proto qw/void aom_hadamard_lp_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1238  specialize qw/aom_hadamard_lp_16x16 sse2 avx2 neon/;
1239
1240  add_proto qw/void aom_hadamard_lp_8x8_dual/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
1241  specialize qw/aom_hadamard_lp_8x8_dual sse2 avx2 neon/;
1242
1243  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1244    add_proto qw/void aom_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1245    specialize qw/aom_highbd_hadamard_8x8 avx2 neon/;
1246
1247    add_proto qw/void aom_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1248    specialize qw/aom_highbd_hadamard_16x16 avx2 neon/;
1249
1250    add_proto qw/void aom_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
1251    specialize qw/aom_highbd_hadamard_32x32 avx2 neon/;
1252  }
1253  add_proto qw/int aom_satd/, "const tran_low_t *coeff, int length";
1254  specialize qw/aom_satd neon sse2 avx2/;
1255
1256  add_proto qw/int aom_satd_lp/, "const int16_t *coeff, int length";
1257  specialize qw/aom_satd_lp sse2 avx2 neon/;
1258
1259
1260  #
1261  # Structured Similarity (SSIM)
1262  #
1263  add_proto qw/void aom_ssim_parms_8x8/, "const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1264  specialize qw/aom_ssim_parms_8x8/, "$sse2_x86_64";
1265
1266  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1267    add_proto qw/void aom_highbd_ssim_parms_8x8/, "const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr";
1268  }
1269}  # CONFIG_AV1_ENCODER
1270
1271if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
1272
1273  #
1274  # Specialty Variance
1275  #
1276  add_proto qw/void aom_get_var_sse_sum_8x8_quad/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse8x8, int *sum8x8, unsigned int *tot_sse, int *tot_sum, uint32_t *var8x8";
1277  specialize qw/aom_get_var_sse_sum_8x8_quad        avx2 sse2 neon neon_dotprod/;
1278
1279  add_proto qw/void aom_get_var_sse_sum_16x16_dual/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse16x16, unsigned int *tot_sse, int *tot_sum, uint32_t *var16x16";
1280  specialize qw/aom_get_var_sse_sum_16x16_dual        avx2 sse2 neon neon_dotprod/;
1281
1282  add_proto qw/unsigned int aom_mse16x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1283  add_proto qw/unsigned int aom_mse16x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1284  add_proto qw/unsigned int aom_mse8x16/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1285  add_proto qw/unsigned int aom_mse8x8/, "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1286
1287  specialize qw/aom_mse16x16          sse2 avx2 neon neon_dotprod/;
1288  specialize qw/aom_mse16x8           sse2      neon neon_dotprod/;
1289  specialize qw/aom_mse8x16           sse2      neon neon_dotprod/;
1290  specialize qw/aom_mse8x8            sse2      neon neon_dotprod/;
1291
1292  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1293    foreach $bd (8, 10, 12) {
1294      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1295      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse16x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1296      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x16", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1297      add_proto qw/unsigned int/, "aom_highbd_${bd}_mse8x8", "const uint8_t *src_ptr, int  source_stride, const uint8_t *ref_ptr, int  recon_stride, unsigned int *sse";
1298
1299      if ($bd eq 8) {
1300        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon neon_dotprod/;
1301        specialize "aom_highbd_${bd}_mse16x8", qw/neon neon_dotprod/;
1302        specialize "aom_highbd_${bd}_mse8x16", qw/neon neon_dotprod/;
1303        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon neon_dotprod/;
1304      } elsif ($bd eq 10) {
1305        specialize "aom_highbd_${bd}_mse16x16", qw/avx2 sse2 neon sve/;
1306        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
1307        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
1308        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
1309      } else {
1310        specialize "aom_highbd_${bd}_mse16x16", qw/sse2 neon sve/;
1311        specialize "aom_highbd_${bd}_mse16x8", qw/neon sve/;
1312        specialize "aom_highbd_${bd}_mse8x16", qw/neon sve/;
1313        specialize "aom_highbd_${bd}_mse8x8", qw/sse2 neon sve/;
1314      }
1315
1316    }
1317  }
1318
1319  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1320    add_proto qw/unsigned int aom_get_mb_ss/, "const int16_t *";
1321    specialize qw/aom_get_mb_ss sse2 neon/;
1322  }
1323
1324  #
1325  # Variance / Subpixel Variance / Subpixel Avg Variance
1326  #
1327  add_proto qw/uint64_t/, "aom_mse_wxh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
1328  specialize qw/aom_mse_wxh_16bit  sse2 avx2 neon/;
1329
1330  add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h";
1331  specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/;
1332
1333  foreach (@encoder_block_sizes) {
1334    ($w, $h) = @$_;
1335    add_proto qw/unsigned int/, "aom_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
1336    add_proto qw/uint32_t/, "aom_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1337    add_proto qw/uint32_t/, "aom_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1338  }
1339  specialize qw/aom_variance128x128   sse2 avx2 neon neon_dotprod/;
1340  specialize qw/aom_variance128x64    sse2 avx2 neon neon_dotprod/;
1341  specialize qw/aom_variance64x128    sse2 avx2 neon neon_dotprod/;
1342  specialize qw/aom_variance64x64     sse2 avx2 neon neon_dotprod/;
1343  specialize qw/aom_variance64x32     sse2 avx2 neon neon_dotprod/;
1344  specialize qw/aom_variance32x64     sse2 avx2 neon neon_dotprod/;
1345  specialize qw/aom_variance32x32     sse2 avx2 neon neon_dotprod/;
1346  specialize qw/aom_variance32x16     sse2 avx2 neon neon_dotprod/;
1347  specialize qw/aom_variance16x32     sse2 avx2 neon neon_dotprod/;
1348  specialize qw/aom_variance16x16     sse2 avx2 neon neon_dotprod/;
1349  specialize qw/aom_variance16x8      sse2 avx2 neon neon_dotprod/;
1350  specialize qw/aom_variance8x16      sse2      neon neon_dotprod/;
1351  specialize qw/aom_variance8x8       sse2      neon neon_dotprod/;
1352  specialize qw/aom_variance8x4       sse2      neon neon_dotprod/;
1353  specialize qw/aom_variance4x8       sse2      neon neon_dotprod/;
1354  specialize qw/aom_variance4x4       sse2      neon neon_dotprod/;
1355
1356  specialize qw/aom_sub_pixel_variance128x128   avx2 neon ssse3/;
1357  specialize qw/aom_sub_pixel_variance128x64    avx2 neon ssse3/;
1358  specialize qw/aom_sub_pixel_variance64x128    avx2 neon ssse3/;
1359  specialize qw/aom_sub_pixel_variance64x64     avx2 neon ssse3/;
1360  specialize qw/aom_sub_pixel_variance64x32     avx2 neon ssse3/;
1361  specialize qw/aom_sub_pixel_variance32x64     avx2 neon ssse3/;
1362  specialize qw/aom_sub_pixel_variance32x32     avx2 neon ssse3/;
1363  specialize qw/aom_sub_pixel_variance32x16     avx2 neon ssse3/;
1364  specialize qw/aom_sub_pixel_variance16x32     avx2 neon ssse3/;
1365  specialize qw/aom_sub_pixel_variance16x16     avx2 neon ssse3/;
1366  specialize qw/aom_sub_pixel_variance16x8      avx2 neon ssse3/;
1367  specialize qw/aom_sub_pixel_variance8x16           neon ssse3/;
1368  specialize qw/aom_sub_pixel_variance8x8            neon ssse3/;
1369  specialize qw/aom_sub_pixel_variance8x4            neon ssse3/;
1370  specialize qw/aom_sub_pixel_variance4x8            neon ssse3/;
1371  specialize qw/aom_sub_pixel_variance4x4            neon ssse3/;
1372
1373  specialize qw/aom_sub_pixel_avg_variance128x128 avx2 neon ssse3/;
1374  specialize qw/aom_sub_pixel_avg_variance128x64  avx2 neon ssse3/;
1375  specialize qw/aom_sub_pixel_avg_variance64x128  avx2 neon ssse3/;
1376  specialize qw/aom_sub_pixel_avg_variance64x64   avx2 neon ssse3/;
1377  specialize qw/aom_sub_pixel_avg_variance64x32   avx2 neon ssse3/;
1378  specialize qw/aom_sub_pixel_avg_variance32x64   avx2 neon ssse3/;
1379  specialize qw/aom_sub_pixel_avg_variance32x32   avx2 neon ssse3/;
1380  specialize qw/aom_sub_pixel_avg_variance32x16   avx2 neon ssse3/;
1381  specialize qw/aom_sub_pixel_avg_variance16x32        neon ssse3/;
1382  specialize qw/aom_sub_pixel_avg_variance16x16        neon ssse3/;
1383  specialize qw/aom_sub_pixel_avg_variance16x8         neon ssse3/;
1384  specialize qw/aom_sub_pixel_avg_variance8x16         neon ssse3/;
1385  specialize qw/aom_sub_pixel_avg_variance8x8          neon ssse3/;
1386  specialize qw/aom_sub_pixel_avg_variance8x4          neon ssse3/;
1387  specialize qw/aom_sub_pixel_avg_variance4x8          neon ssse3/;
1388  specialize qw/aom_sub_pixel_avg_variance4x4          neon ssse3/;
1389
1390  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1391    specialize qw/aom_variance4x16  neon neon_dotprod sse2/;
1392    specialize qw/aom_variance16x4  neon neon_dotprod sse2 avx2/;
1393    specialize qw/aom_variance8x32  neon neon_dotprod sse2/;
1394    specialize qw/aom_variance32x8  neon neon_dotprod sse2 avx2/;
1395    specialize qw/aom_variance16x64 neon neon_dotprod sse2 avx2/;
1396    specialize qw/aom_variance64x16 neon neon_dotprod sse2 avx2/;
1397
1398    specialize qw/aom_sub_pixel_variance4x16 neon ssse3/;
1399    specialize qw/aom_sub_pixel_variance16x4 neon avx2 ssse3/;
1400    specialize qw/aom_sub_pixel_variance8x32 neon ssse3/;
1401    specialize qw/aom_sub_pixel_variance32x8 neon ssse3/;
1402    specialize qw/aom_sub_pixel_variance16x64 neon avx2 ssse3/;
1403    specialize qw/aom_sub_pixel_variance64x16 neon ssse3/;
1404    specialize qw/aom_sub_pixel_avg_variance4x16 neon ssse3/;
1405    specialize qw/aom_sub_pixel_avg_variance16x4 neon ssse3/;
1406    specialize qw/aom_sub_pixel_avg_variance8x32 neon ssse3/;
1407    specialize qw/aom_sub_pixel_avg_variance32x8 neon ssse3/;
1408    specialize qw/aom_sub_pixel_avg_variance16x64 neon ssse3/;
1409    specialize qw/aom_sub_pixel_avg_variance64x16 neon ssse3/;
1410  }
1411
1412  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1413    foreach $bd (8, 10, 12) {
1414      foreach (@encoder_block_sizes) {
1415        ($w, $h) = @$_;
1416        add_proto qw/unsigned int/, "aom_highbd_${bd}_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1417        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse";
1418        add_proto qw/uint32_t/, "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "const uint8_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, const uint8_t *second_pred";
1419      }
1420    }
1421
1422    specialize qw/aom_highbd_12_variance128x128 sse2 neon sve/;
1423    specialize qw/aom_highbd_12_variance128x64  sse2 neon sve/;
1424    specialize qw/aom_highbd_12_variance64x128  sse2 neon sve/;
1425    specialize qw/aom_highbd_12_variance64x64   sse2 neon sve/;
1426    specialize qw/aom_highbd_12_variance64x32   sse2 neon sve/;
1427    specialize qw/aom_highbd_12_variance32x64   sse2 neon sve/;
1428    specialize qw/aom_highbd_12_variance32x32   sse2 neon sve/;
1429    specialize qw/aom_highbd_12_variance32x16   sse2 neon sve/;
1430    specialize qw/aom_highbd_12_variance16x32   sse2 neon sve/;
1431    specialize qw/aom_highbd_12_variance16x16   sse2 neon sve/;
1432    specialize qw/aom_highbd_12_variance16x8    sse2 neon sve/;
1433    specialize qw/aom_highbd_12_variance8x16    sse2 neon sve/;
1434    specialize qw/aom_highbd_12_variance8x8     sse2 neon sve/;
1435    specialize qw/aom_highbd_12_variance8x4          neon sve/;
1436    specialize qw/aom_highbd_12_variance4x8          neon sve/;
1437    specialize qw/aom_highbd_12_variance4x4   sse4_1 neon sve/;
1438
1439    specialize qw/aom_highbd_10_variance128x128 sse2 avx2 neon sve/;
1440    specialize qw/aom_highbd_10_variance128x64  sse2 avx2 neon sve/;
1441    specialize qw/aom_highbd_10_variance64x128  sse2 avx2 neon sve/;
1442    specialize qw/aom_highbd_10_variance64x64   sse2 avx2 neon sve/;
1443    specialize qw/aom_highbd_10_variance64x32   sse2 avx2 neon sve/;
1444    specialize qw/aom_highbd_10_variance32x64   sse2 avx2 neon sve/;
1445    specialize qw/aom_highbd_10_variance32x32   sse2 avx2 neon sve/;
1446    specialize qw/aom_highbd_10_variance32x16   sse2 avx2 neon sve/;
1447    specialize qw/aom_highbd_10_variance16x32   sse2 avx2 neon sve/;
1448    specialize qw/aom_highbd_10_variance16x16   sse2 avx2 neon sve/;
1449    specialize qw/aom_highbd_10_variance16x8    sse2 avx2 neon sve/;
1450    specialize qw/aom_highbd_10_variance8x16    sse2 avx2 neon sve/;
1451    specialize qw/aom_highbd_10_variance8x8     sse2 avx2 neon sve/;
1452    specialize qw/aom_highbd_10_variance8x4               neon sve/;
1453    specialize qw/aom_highbd_10_variance4x8               neon sve/;
1454    specialize qw/aom_highbd_10_variance4x4   sse4_1      neon sve/;
1455
1456    specialize qw/aom_highbd_8_variance128x128 sse2 neon sve/;
1457    specialize qw/aom_highbd_8_variance128x64  sse2 neon sve/;
1458    specialize qw/aom_highbd_8_variance64x128  sse2 neon sve/;
1459    specialize qw/aom_highbd_8_variance64x64   sse2 neon sve/;
1460    specialize qw/aom_highbd_8_variance64x32   sse2 neon sve/;
1461    specialize qw/aom_highbd_8_variance32x64   sse2 neon sve/;
1462    specialize qw/aom_highbd_8_variance32x32   sse2 neon sve/;
1463    specialize qw/aom_highbd_8_variance32x16   sse2 neon sve/;
1464    specialize qw/aom_highbd_8_variance16x32   sse2 neon sve/;
1465    specialize qw/aom_highbd_8_variance16x16   sse2 neon sve/;
1466    specialize qw/aom_highbd_8_variance16x8    sse2 neon sve/;
1467    specialize qw/aom_highbd_8_variance8x16    sse2 neon sve/;
1468    specialize qw/aom_highbd_8_variance8x8     sse2 neon sve/;
1469    specialize qw/aom_highbd_8_variance8x4          neon sve/;
1470    specialize qw/aom_highbd_8_variance4x8          neon sve/;
1471    specialize qw/aom_highbd_8_variance4x4   sse4_1 neon sve/;
1472
1473    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1474      foreach $bd (8, 10, 12) {
1475        my $avx2 = ($bd == 10) ? "avx2" : "";
1476        specialize "aom_highbd_${bd}_variance64x16" , $avx2, qw/sse2 neon sve/;
1477        specialize "aom_highbd_${bd}_variance32x8" , $avx2, qw/sse2 neon sve/;
1478        specialize "aom_highbd_${bd}_variance16x64" , $avx2, qw/sse2 neon sve/;
1479        specialize "aom_highbd_${bd}_variance16x4" , qw/neon sve/;
1480        specialize "aom_highbd_${bd}_variance8x32" , $avx2, qw/sse2 neon sve/;
1481        specialize "aom_highbd_${bd}_variance4x16" , qw/neon sve/;
1482      }
1483    }
1484
1485    specialize qw/aom_highbd_12_sub_pixel_variance128x128 sse2 neon/;
1486    specialize qw/aom_highbd_12_sub_pixel_variance128x64  sse2 neon/;
1487    specialize qw/aom_highbd_12_sub_pixel_variance64x128  sse2 neon/;
1488    specialize qw/aom_highbd_12_sub_pixel_variance64x64   sse2 neon/;
1489    specialize qw/aom_highbd_12_sub_pixel_variance64x32   sse2 neon/;
1490    specialize qw/aom_highbd_12_sub_pixel_variance32x64   sse2 neon/;
1491    specialize qw/aom_highbd_12_sub_pixel_variance32x32   sse2 neon/;
1492    specialize qw/aom_highbd_12_sub_pixel_variance32x16   sse2 neon/;
1493    specialize qw/aom_highbd_12_sub_pixel_variance16x32   sse2 neon/;
1494    specialize qw/aom_highbd_12_sub_pixel_variance16x16   sse2 neon/;
1495    specialize qw/aom_highbd_12_sub_pixel_variance16x8    sse2 neon/;
1496    specialize qw/aom_highbd_12_sub_pixel_variance8x16    sse2 neon/;
1497    specialize qw/aom_highbd_12_sub_pixel_variance8x8     sse2 neon/;
1498    specialize qw/aom_highbd_12_sub_pixel_variance8x4     sse2 neon/;
1499    specialize qw/aom_highbd_12_sub_pixel_variance4x8          neon/;
1500    specialize qw/aom_highbd_12_sub_pixel_variance4x4   sse4_1 neon/;
1501
1502    specialize qw/aom_highbd_10_sub_pixel_variance128x128 sse2 avx2 neon/;
1503    specialize qw/aom_highbd_10_sub_pixel_variance128x64  sse2 avx2 neon/;
1504    specialize qw/aom_highbd_10_sub_pixel_variance64x128  sse2 avx2 neon/;
1505    specialize qw/aom_highbd_10_sub_pixel_variance64x64   sse2 avx2 neon/;
1506    specialize qw/aom_highbd_10_sub_pixel_variance64x32   sse2 avx2 neon/;
1507    specialize qw/aom_highbd_10_sub_pixel_variance32x64   sse2 avx2 neon/;
1508    specialize qw/aom_highbd_10_sub_pixel_variance32x32   sse2 avx2 neon/;
1509    specialize qw/aom_highbd_10_sub_pixel_variance32x16   sse2 avx2 neon/;
1510    specialize qw/aom_highbd_10_sub_pixel_variance16x32   sse2 avx2 neon/;
1511    specialize qw/aom_highbd_10_sub_pixel_variance16x16   sse2 avx2 neon/;
1512    specialize qw/aom_highbd_10_sub_pixel_variance16x8    sse2 avx2 neon/;
1513    specialize qw/aom_highbd_10_sub_pixel_variance8x16    sse2 avx2 neon/;
1514    specialize qw/aom_highbd_10_sub_pixel_variance8x8     sse2 avx2 neon/;
1515    specialize qw/aom_highbd_10_sub_pixel_variance8x4     sse2      neon/;
1516    specialize qw/aom_highbd_10_sub_pixel_variance4x8               neon/;
1517    specialize qw/aom_highbd_10_sub_pixel_variance4x4   sse4_1      neon/;
1518
1519    specialize qw/aom_highbd_8_sub_pixel_variance128x128 sse2 neon/;
1520    specialize qw/aom_highbd_8_sub_pixel_variance128x64  sse2 neon/;
1521    specialize qw/aom_highbd_8_sub_pixel_variance64x128  sse2 neon/;
1522    specialize qw/aom_highbd_8_sub_pixel_variance64x64   sse2 neon/;
1523    specialize qw/aom_highbd_8_sub_pixel_variance64x32   sse2 neon/;
1524    specialize qw/aom_highbd_8_sub_pixel_variance32x64   sse2 neon/;
1525    specialize qw/aom_highbd_8_sub_pixel_variance32x32   sse2 neon/;
1526    specialize qw/aom_highbd_8_sub_pixel_variance32x16   sse2 neon/;
1527    specialize qw/aom_highbd_8_sub_pixel_variance16x32   sse2 neon/;
1528    specialize qw/aom_highbd_8_sub_pixel_variance16x16   sse2 neon/;
1529    specialize qw/aom_highbd_8_sub_pixel_variance16x8    sse2 neon/;
1530    specialize qw/aom_highbd_8_sub_pixel_variance8x16    sse2 neon/;
1531    specialize qw/aom_highbd_8_sub_pixel_variance8x8     sse2 neon/;
1532    specialize qw/aom_highbd_8_sub_pixel_variance8x4     sse2 neon/;
1533    specialize qw/aom_highbd_8_sub_pixel_variance4x8          neon/;
1534    specialize qw/aom_highbd_8_sub_pixel_variance4x4   sse4_1 neon/;
1535
1536    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1537      foreach $bd (8, 10, 12) {
1538        specialize "aom_highbd_${bd}_sub_pixel_variance64x16" , qw/sse2 neon/;
1539        specialize "aom_highbd_${bd}_sub_pixel_variance32x8" , qw/sse2 neon/;
1540        specialize "aom_highbd_${bd}_sub_pixel_variance16x64" , qw/sse2 neon/;
1541        specialize "aom_highbd_${bd}_sub_pixel_variance16x4" , qw/sse2 neon/;
1542        specialize "aom_highbd_${bd}_sub_pixel_variance8x32" , qw/sse2 neon/;
1543        specialize "aom_highbd_${bd}_sub_pixel_variance4x16" , qw/neon/;
1544      }
1545    }
1546
1547    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x128      neon/;
1548    specialize qw/aom_highbd_12_sub_pixel_avg_variance128x64       neon/;
1549    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x128       neon/;
1550    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x64   sse2 neon/;
1551    specialize qw/aom_highbd_12_sub_pixel_avg_variance64x32   sse2 neon/;
1552    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x64   sse2 neon/;
1553    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x32   sse2 neon/;
1554    specialize qw/aom_highbd_12_sub_pixel_avg_variance32x16   sse2 neon/;
1555    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32   sse2 neon/;
1556    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16   sse2 neon/;
1557    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8    sse2 neon/;
1558    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16    sse2 neon/;
1559    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x8     sse2 neon/;
1560    specialize qw/aom_highbd_12_sub_pixel_avg_variance8x4     sse2 neon/;
1561    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x8          neon/;
1562    specialize qw/aom_highbd_12_sub_pixel_avg_variance4x4   sse4_1 neon/;
1563
1564    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x128      neon/;
1565    specialize qw/aom_highbd_10_sub_pixel_avg_variance128x64       neon/;
1566    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x128       neon/;
1567    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x64   sse2 neon/;
1568    specialize qw/aom_highbd_10_sub_pixel_avg_variance64x32   sse2 neon/;
1569    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x64   sse2 neon/;
1570    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x32   sse2 neon/;
1571    specialize qw/aom_highbd_10_sub_pixel_avg_variance32x16   sse2 neon/;
1572    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32   sse2 neon/;
1573    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16   sse2 neon/;
1574    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8    sse2 neon/;
1575    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16    sse2 neon/;
1576    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x8     sse2 neon/;
1577    specialize qw/aom_highbd_10_sub_pixel_avg_variance8x4     sse2 neon/;
1578    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x8          neon/;
1579    specialize qw/aom_highbd_10_sub_pixel_avg_variance4x4   sse4_1 neon/;
1580
1581    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x128      neon/;
1582    specialize qw/aom_highbd_8_sub_pixel_avg_variance128x64       neon/;
1583    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x128       neon/;
1584    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x64   sse2 neon/;
1585    specialize qw/aom_highbd_8_sub_pixel_avg_variance64x32   sse2 neon/;
1586    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x64   sse2 neon/;
1587    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x32   sse2 neon/;
1588    specialize qw/aom_highbd_8_sub_pixel_avg_variance32x16   sse2 neon/;
1589    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32   sse2 neon/;
1590    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16   sse2 neon/;
1591    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8    sse2 neon/;
1592    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16    sse2 neon/;
1593    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x8     sse2 neon/;
1594    specialize qw/aom_highbd_8_sub_pixel_avg_variance8x4     sse2 neon/;
1595    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x8          neon/;
1596    specialize qw/aom_highbd_8_sub_pixel_avg_variance4x4   sse4_1 neon/;
1597
1598    if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1599      foreach $bd (8, 10, 12) {
1600        specialize "aom_highbd_${bd}_sub_pixel_avg_variance64x16" , qw/sse2 neon/;
1601        specialize "aom_highbd_${bd}_sub_pixel_avg_variance32x8" , qw/sse2 neon/;
1602        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x64" , qw/sse2 neon/;
1603        specialize "aom_highbd_${bd}_sub_pixel_avg_variance16x4" , qw/sse2 neon/;
1604        specialize "aom_highbd_${bd}_sub_pixel_avg_variance8x32" , qw/sse2 neon/;
1605        specialize "aom_highbd_${bd}_sub_pixel_avg_variance4x16" , qw/neon/;
1606      }
1607    }
1608  }
1609  #
1610  # Masked Variance / Masked Subpixel Variance
1611  #
1612  foreach (@encoder_block_sizes) {
1613    ($w, $h) = @$_;
1614    add_proto qw/unsigned int/, "aom_masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1615    specialize "aom_masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
1616  }
1617
1618  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1619    foreach $bd ("_8_", "_10_", "_12_") {
1620      foreach (@encoder_block_sizes) {
1621        ($w, $h) = @$_;
1622        add_proto qw/unsigned int/, "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", "const uint8_t *src, int src_stride, int xoffset, int yoffset, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse";
1623        specialize "aom_highbd${bd}masked_sub_pixel_variance${w}x${h}", qw/ssse3 neon/;
1624      }
1625    }
1626  }
1627
1628  #
1629  # OBMC Variance / OBMC Subpixel Variance
1630  #
1631  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1632    foreach (@encoder_block_sizes) {
1633      ($w, $h) = @$_;
1634      add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1635      add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1636      specialize "aom_obmc_variance${w}x${h}", qw/sse4_1 avx2 neon/;
1637      specialize "aom_obmc_sub_pixel_variance${w}x${h}", qw/sse4_1 neon/;
1638    }
1639
1640    if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1641      foreach $bd ("_8_", "_10_", "_12_") {
1642        foreach (@encoder_block_sizes) {
1643          ($w, $h) = @$_;
1644          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1645          add_proto qw/unsigned int/, "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
1646          specialize "aom_highbd${bd}obmc_variance${w}x${h}", qw/sse4_1 neon/;
1647          specialize "aom_highbd${bd}obmc_sub_pixel_variance${w}x${h}", qw/neon/;
1648        }
1649      }
1650    }
1651  }
1652
1653  #
1654  # Comp Avg
1655  #
1656  add_proto qw/void aom_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
1657  specialize qw/aom_comp_avg_pred avx2 neon/;
1658
1659  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1660    add_proto qw/void aom_highbd_comp_avg_pred/, "uint8_t *comp_pred8, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
1661    specialize qw/aom_highbd_comp_avg_pred neon/;
1662
1663    add_proto qw/uint64_t/, "aom_mse_wxh_16bit_highbd", "uint16_t *dst, int dstride,uint16_t *src, int sstride, int w, int h";
1664    specialize qw/aom_mse_wxh_16bit_highbd   sse2 avx2 neon sve/;
1665  }
1666
1667  add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1668  specialize qw/aom_comp_mask_pred ssse3 avx2 neon/;
1669
1670  if (aom_config("CONFIG_AV1_HIGHBITDEPTH") eq "yes") {
1671    add_proto qw/void aom_highbd_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
1672    specialize qw/aom_highbd_comp_mask_pred sse2 avx2 neon/;
1673  }
1674
1675  # Flow estimation library
1676  if (aom_config("CONFIG_REALTIME_ONLY") ne "yes") {
1677    add_proto qw/bool aom_compute_mean_stddev/, "const unsigned char *frame, int stride, int x, int y, double *mean, double *one_over_stddev";
1678    specialize qw/aom_compute_mean_stddev sse4_1 avx2/;
1679
1680    add_proto qw/double aom_compute_correlation/, "const unsigned char *frame1, int stride1, int x1, int y1, double mean1, double one_over_stddev1, const unsigned char *frame2, int stride2, int x2, int y2, double mean2, double one_over_stddev2";
1681    specialize qw/aom_compute_correlation sse4_1 avx2/;
1682
1683    add_proto qw/void aom_compute_flow_at_point/, "const uint8_t *src, const uint8_t *ref, int x, int y, int width, int height, int stride, double *u, double *v";
1684    specialize qw/aom_compute_flow_at_point sse4_1 avx2 neon sve/;
1685  }
1686
1687}  # CONFIG_AV1_ENCODER
1688
16891;
1690