1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #ifndef AOM_AOM_DSP_SIMD_V64_INTRINSICS_C_H_
13 #define AOM_AOM_DSP_SIMD_V64_INTRINSICS_C_H_
14
15 /* Note: This implements the intrinsics in plain, unoptimised C.
16 Intended for reference, porting or debugging. */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20
21 #include "config/aom_config.h"
22
23 typedef union {
24 uint8_t u8[8];
25 uint16_t u16[4];
26 uint32_t u32[2];
27 uint64_t u64;
28 int8_t s8[8];
29 int16_t s16[4];
30 int32_t s32[2];
31 int64_t s64;
32 } c_v64;
33
c_v64_low_u32(c_v64 a)34 SIMD_INLINE uint32_t c_v64_low_u32(c_v64 a) {
35 return a.u32[!!CONFIG_BIG_ENDIAN];
36 }
37
c_v64_high_u32(c_v64 a)38 SIMD_INLINE uint32_t c_v64_high_u32(c_v64 a) {
39 return a.u32[!CONFIG_BIG_ENDIAN];
40 }
41
c_v64_low_s32(c_v64 a)42 SIMD_INLINE int32_t c_v64_low_s32(c_v64 a) {
43 return a.s32[!!CONFIG_BIG_ENDIAN];
44 }
45
c_v64_high_s32(c_v64 a)46 SIMD_INLINE int32_t c_v64_high_s32(c_v64 a) {
47 return a.s32[!CONFIG_BIG_ENDIAN];
48 }
49
c_v64_from_32(uint32_t x,uint32_t y)50 SIMD_INLINE c_v64 c_v64_from_32(uint32_t x, uint32_t y) {
51 c_v64 t;
52 t.u32[!CONFIG_BIG_ENDIAN] = x;
53 t.u32[!!CONFIG_BIG_ENDIAN] = y;
54 return t;
55 }
56
c_v64_from_64(uint64_t x)57 SIMD_INLINE c_v64 c_v64_from_64(uint64_t x) {
58 c_v64 t;
59 t.u64 = x;
60 return t;
61 }
62
c_v64_u64(c_v64 x)63 SIMD_INLINE uint64_t c_v64_u64(c_v64 x) { return x.u64; }
64
c_v64_from_16(uint16_t a,uint16_t b,uint16_t c,uint16_t d)65 SIMD_INLINE c_v64 c_v64_from_16(uint16_t a, uint16_t b, uint16_t c,
66 uint16_t d) {
67 c_v64 t;
68 if (CONFIG_BIG_ENDIAN) {
69 t.u16[0] = a;
70 t.u16[1] = b;
71 t.u16[2] = c;
72 t.u16[3] = d;
73 } else {
74 t.u16[3] = a;
75 t.u16[2] = b;
76 t.u16[1] = c;
77 t.u16[0] = d;
78 }
79 return t;
80 }
81
c_u32_load_unaligned(const void * p)82 SIMD_INLINE uint32_t c_u32_load_unaligned(const void *p) {
83 uint32_t t;
84 uint8_t *pp = (uint8_t *)p;
85 uint8_t *q = (uint8_t *)&t;
86 int c;
87 for (c = 0; c < 4; c++) q[c] = pp[c];
88 return t;
89 }
90
c_u32_store_unaligned(void * p,uint32_t a)91 SIMD_INLINE void c_u32_store_unaligned(void *p, uint32_t a) {
92 uint8_t *pp = (uint8_t *)p;
93 uint8_t *q = (uint8_t *)&a;
94 int c;
95 for (c = 0; c < 4; c++) pp[c] = q[c];
96 }
97
c_u32_load_aligned(const void * p)98 SIMD_INLINE uint32_t c_u32_load_aligned(const void *p) {
99 if (SIMD_CHECK && (uintptr_t)p & 3) {
100 fprintf(stderr, "Error: Unaligned u32 load at %p\n", p);
101 abort();
102 }
103 return c_u32_load_unaligned(p);
104 }
105
c_u32_store_aligned(void * p,uint32_t a)106 SIMD_INLINE void c_u32_store_aligned(void *p, uint32_t a) {
107 if (SIMD_CHECK && (uintptr_t)p & 3) {
108 fprintf(stderr, "Error: Unaligned u32 store at %p\n", p);
109 abort();
110 }
111 c_u32_store_unaligned(p, a);
112 }
113
c_v64_load_unaligned(const void * p)114 SIMD_INLINE c_v64 c_v64_load_unaligned(const void *p) {
115 c_v64 t;
116 uint8_t *pp = (uint8_t *)p;
117 uint8_t *q = (uint8_t *)&t;
118 int c;
119 for (c = 0; c < 8; c++) q[c] = pp[c];
120 return t;
121 }
122
c_v64_load_aligned(const void * p)123 SIMD_INLINE c_v64 c_v64_load_aligned(const void *p) {
124 if (SIMD_CHECK && (uintptr_t)p & 7) {
125 fprintf(stderr, "Error: Unaligned c_v64 load at %p\n", p);
126 abort();
127 }
128 return c_v64_load_unaligned(p);
129 }
130
c_v64_store_unaligned(void * p,c_v64 a)131 SIMD_INLINE void c_v64_store_unaligned(void *p, c_v64 a) {
132 uint8_t *q = (uint8_t *)p;
133 uint8_t *r = (uint8_t *)&a;
134 int c;
135 for (c = 0; c < 8; c++) q[c] = r[c];
136 }
137
c_v64_store_aligned(void * p,c_v64 a)138 SIMD_INLINE void c_v64_store_aligned(void *p, c_v64 a) {
139 if (SIMD_CHECK && (uintptr_t)p & 7) {
140 fprintf(stderr, "Error: Unaligned c_v64 store at %p\n", p);
141 abort();
142 }
143 c_v64_store_unaligned(p, a);
144 }
145
c_v64_zero(void)146 SIMD_INLINE c_v64 c_v64_zero(void) {
147 c_v64 t;
148 t.u64 = 0;
149 return t;
150 }
151
c_v64_dup_8(uint8_t x)152 SIMD_INLINE c_v64 c_v64_dup_8(uint8_t x) {
153 c_v64 t;
154 t.u8[0] = t.u8[1] = t.u8[2] = t.u8[3] = t.u8[4] = t.u8[5] = t.u8[6] =
155 t.u8[7] = x;
156 return t;
157 }
158
c_v64_dup_16(uint16_t x)159 SIMD_INLINE c_v64 c_v64_dup_16(uint16_t x) {
160 c_v64 t;
161 t.u16[0] = t.u16[1] = t.u16[2] = t.u16[3] = x;
162 return t;
163 }
164
c_v64_dup_32(uint32_t x)165 SIMD_INLINE c_v64 c_v64_dup_32(uint32_t x) {
166 c_v64 t;
167 t.u32[0] = t.u32[1] = x;
168 return t;
169 }
170
c_v64_add_8(c_v64 a,c_v64 b)171 SIMD_INLINE c_v64 c_v64_add_8(c_v64 a, c_v64 b) {
172 c_v64 t;
173 int c;
174 for (c = 0; c < 8; c++) t.u8[c] = (uint8_t)(a.u8[c] + b.u8[c]);
175 return t;
176 }
177
c_v64_add_16(c_v64 a,c_v64 b)178 SIMD_INLINE c_v64 c_v64_add_16(c_v64 a, c_v64 b) {
179 c_v64 t;
180 int c;
181 for (c = 0; c < 4; c++) t.u16[c] = (uint16_t)(a.u16[c] + b.u16[c]);
182 return t;
183 }
184
c_v64_sadd_u8(c_v64 a,c_v64 b)185 SIMD_INLINE c_v64 c_v64_sadd_u8(c_v64 a, c_v64 b) {
186 c_v64 t;
187 int c;
188 for (c = 0; c < 8; c++)
189 t.u8[c] = (int16_t)a.u8[c] + (int16_t)b.u8[c] > 255
190 ? 255
191 : (int16_t)a.u8[c] + (int16_t)b.u8[c] < 0
192 ? 0
193 : (int16_t)a.u8[c] + (int16_t)b.u8[c];
194 return t;
195 }
196
c_v64_sadd_s8(c_v64 a,c_v64 b)197 SIMD_INLINE c_v64 c_v64_sadd_s8(c_v64 a, c_v64 b) {
198 c_v64 t;
199 int c;
200 for (c = 0; c < 8; c++)
201 t.s8[c] = (int16_t)a.s8[c] + (int16_t)b.s8[c] > 127
202 ? 127
203 : (int16_t)a.s8[c] + (int16_t)b.s8[c] < -128
204 ? -128
205 : (int16_t)a.s8[c] + (int16_t)b.s8[c];
206 return t;
207 }
208
c_v64_sadd_s16(c_v64 a,c_v64 b)209 SIMD_INLINE c_v64 c_v64_sadd_s16(c_v64 a, c_v64 b) {
210 c_v64 t;
211 int c;
212 for (c = 0; c < 4; c++)
213 t.s16[c] = (int32_t)a.s16[c] + (int32_t)b.s16[c] > 32767
214 ? 32767
215 : (int32_t)a.s16[c] + (int32_t)b.s16[c] < -32768
216 ? -32768
217 : (int32_t)a.s16[c] + (int32_t)b.s16[c];
218 return t;
219 }
220
c_v64_add_32(c_v64 a,c_v64 b)221 SIMD_INLINE c_v64 c_v64_add_32(c_v64 a, c_v64 b) {
222 c_v64 t;
223 t.u32[0] = (uint32_t)((uint64_t)a.u32[0] + b.u32[0]);
224 t.u32[1] = (uint32_t)((uint64_t)a.u32[1] + b.u32[1]);
225 return t;
226 }
227
c_v64_sub_8(c_v64 a,c_v64 b)228 SIMD_INLINE c_v64 c_v64_sub_8(c_v64 a, c_v64 b) {
229 c_v64 t;
230 int c;
231 for (c = 0; c < 8; c++) t.u8[c] = (uint8_t)(a.u8[c] - b.u8[c]);
232 return t;
233 }
234
c_v64_ssub_u8(c_v64 a,c_v64 b)235 SIMD_INLINE c_v64 c_v64_ssub_u8(c_v64 a, c_v64 b) {
236 c_v64 t;
237 int c;
238 for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] < b.u8[c] ? 0 : a.u8[c] - b.u8[c];
239 return t;
240 }
241
c_v64_ssub_s8(c_v64 a,c_v64 b)242 SIMD_INLINE c_v64 c_v64_ssub_s8(c_v64 a, c_v64 b) {
243 c_v64 t;
244 int c;
245 for (c = 0; c < 8; c++) {
246 int16_t d = (int16_t)a.s8[c] - (int16_t)b.s8[c];
247 t.s8[c] = d > 127 ? 127 : (d < -128 ? -128 : d);
248 }
249 return t;
250 }
251
c_v64_sub_16(c_v64 a,c_v64 b)252 SIMD_INLINE c_v64 c_v64_sub_16(c_v64 a, c_v64 b) {
253 c_v64 t;
254 int c;
255 for (c = 0; c < 4; c++) t.u16[c] = (uint16_t)(a.u16[c] - b.u16[c]);
256 return t;
257 }
258
c_v64_ssub_s16(c_v64 a,c_v64 b)259 SIMD_INLINE c_v64 c_v64_ssub_s16(c_v64 a, c_v64 b) {
260 c_v64 t;
261 int c;
262 for (c = 0; c < 4; c++)
263 t.s16[c] = (int32_t)a.s16[c] - (int32_t)b.s16[c] < -32768
264 ? -32768
265 : (int32_t)a.s16[c] - (int32_t)b.s16[c] > 32767
266 ? 32767
267 : (int32_t)a.s16[c] - (int32_t)b.s16[c];
268 return t;
269 }
270
c_v64_ssub_u16(c_v64 a,c_v64 b)271 SIMD_INLINE c_v64 c_v64_ssub_u16(c_v64 a, c_v64 b) {
272 c_v64 t;
273 int c;
274 for (c = 0; c < 4; c++)
275 t.u16[c] =
276 (int32_t)a.u16[c] - (int32_t)b.u16[c] < 0 ? 0 : a.u16[c] - b.u16[c];
277 return t;
278 }
279
c_v64_sub_32(c_v64 a,c_v64 b)280 SIMD_INLINE c_v64 c_v64_sub_32(c_v64 a, c_v64 b) {
281 c_v64 t;
282 t.u32[0] = (uint32_t)((int64_t)a.u32[0] - b.u32[0]);
283 t.u32[1] = (uint32_t)((int64_t)a.u32[1] - b.u32[1]);
284 return t;
285 }
286
c_v64_abs_s16(c_v64 a)287 SIMD_INLINE c_v64 c_v64_abs_s16(c_v64 a) {
288 c_v64 t;
289 int c;
290 for (c = 0; c < 4; c++)
291 t.u16[c] = (uint16_t)((int16_t)a.u16[c] > 0 ? a.u16[c] : -a.u16[c]);
292 return t;
293 }
294
c_v64_abs_s8(c_v64 a)295 SIMD_INLINE c_v64 c_v64_abs_s8(c_v64 a) {
296 c_v64 t;
297 int c;
298 for (c = 0; c < 8; c++)
299 t.u8[c] = (uint8_t)((int8_t)a.u8[c] > 0 ? a.u8[c] : -a.u8[c]);
300 return t;
301 }
302
_c_v64_zip_8(c_v64 a,c_v64 b,int mode)303 SIMD_INLINE c_v64 _c_v64_zip_8(c_v64 a, c_v64 b, int mode) {
304 c_v64 t;
305 if (mode) {
306 t.u8[7] = a.u8[7];
307 t.u8[6] = b.u8[7];
308 t.u8[5] = a.u8[6];
309 t.u8[4] = b.u8[6];
310 t.u8[3] = a.u8[5];
311 t.u8[2] = b.u8[5];
312 t.u8[1] = a.u8[4];
313 t.u8[0] = b.u8[4];
314 } else {
315 t.u8[7] = a.u8[3];
316 t.u8[6] = b.u8[3];
317 t.u8[5] = a.u8[2];
318 t.u8[4] = b.u8[2];
319 t.u8[3] = a.u8[1];
320 t.u8[2] = b.u8[1];
321 t.u8[1] = a.u8[0];
322 t.u8[0] = b.u8[0];
323 }
324 return t;
325 }
326
c_v64_ziplo_8(c_v64 a,c_v64 b)327 SIMD_INLINE c_v64 c_v64_ziplo_8(c_v64 a, c_v64 b) {
328 return CONFIG_BIG_ENDIAN ? _c_v64_zip_8(b, a, 1) : _c_v64_zip_8(a, b, 0);
329 }
330
c_v64_ziphi_8(c_v64 a,c_v64 b)331 SIMD_INLINE c_v64 c_v64_ziphi_8(c_v64 a, c_v64 b) {
332 return CONFIG_BIG_ENDIAN ? _c_v64_zip_8(b, a, 0) : _c_v64_zip_8(a, b, 1);
333 }
334
_c_v64_zip_16(c_v64 a,c_v64 b,int mode)335 SIMD_INLINE c_v64 _c_v64_zip_16(c_v64 a, c_v64 b, int mode) {
336 c_v64 t;
337 if (mode) {
338 t.u16[3] = a.u16[3];
339 t.u16[2] = b.u16[3];
340 t.u16[1] = a.u16[2];
341 t.u16[0] = b.u16[2];
342 } else {
343 t.u16[3] = a.u16[1];
344 t.u16[2] = b.u16[1];
345 t.u16[1] = a.u16[0];
346 t.u16[0] = b.u16[0];
347 }
348 return t;
349 }
350
c_v64_ziplo_16(c_v64 a,c_v64 b)351 SIMD_INLINE c_v64 c_v64_ziplo_16(c_v64 a, c_v64 b) {
352 return CONFIG_BIG_ENDIAN ? _c_v64_zip_16(b, a, 1) : _c_v64_zip_16(a, b, 0);
353 }
354
c_v64_ziphi_16(c_v64 a,c_v64 b)355 SIMD_INLINE c_v64 c_v64_ziphi_16(c_v64 a, c_v64 b) {
356 return CONFIG_BIG_ENDIAN ? _c_v64_zip_16(b, a, 0) : _c_v64_zip_16(a, b, 1);
357 }
358
_c_v64_zip_32(c_v64 a,c_v64 b,int mode)359 SIMD_INLINE c_v64 _c_v64_zip_32(c_v64 a, c_v64 b, int mode) {
360 c_v64 t;
361 if (mode) {
362 t.u32[1] = a.u32[1];
363 t.u32[0] = b.u32[1];
364 } else {
365 t.u32[1] = a.u32[0];
366 t.u32[0] = b.u32[0];
367 }
368 return t;
369 }
370
c_v64_ziplo_32(c_v64 a,c_v64 b)371 SIMD_INLINE c_v64 c_v64_ziplo_32(c_v64 a, c_v64 b) {
372 return CONFIG_BIG_ENDIAN ? _c_v64_zip_32(b, a, 1) : _c_v64_zip_32(a, b, 0);
373 }
374
c_v64_ziphi_32(c_v64 a,c_v64 b)375 SIMD_INLINE c_v64 c_v64_ziphi_32(c_v64 a, c_v64 b) {
376 return CONFIG_BIG_ENDIAN ? _c_v64_zip_32(b, a, 0) : _c_v64_zip_32(a, b, 1);
377 }
378
_c_v64_unzip_8(c_v64 a,c_v64 b,int mode)379 SIMD_INLINE c_v64 _c_v64_unzip_8(c_v64 a, c_v64 b, int mode) {
380 c_v64 t;
381 if (mode) {
382 t.u8[7] = b.u8[7];
383 t.u8[6] = b.u8[5];
384 t.u8[5] = b.u8[3];
385 t.u8[4] = b.u8[1];
386 t.u8[3] = a.u8[7];
387 t.u8[2] = a.u8[5];
388 t.u8[1] = a.u8[3];
389 t.u8[0] = a.u8[1];
390 } else {
391 t.u8[7] = a.u8[6];
392 t.u8[6] = a.u8[4];
393 t.u8[5] = a.u8[2];
394 t.u8[4] = a.u8[0];
395 t.u8[3] = b.u8[6];
396 t.u8[2] = b.u8[4];
397 t.u8[1] = b.u8[2];
398 t.u8[0] = b.u8[0];
399 }
400 return t;
401 }
402
c_v64_unziplo_8(c_v64 a,c_v64 b)403 SIMD_INLINE c_v64 c_v64_unziplo_8(c_v64 a, c_v64 b) {
404 return CONFIG_BIG_ENDIAN ? _c_v64_unzip_8(a, b, 1) : _c_v64_unzip_8(a, b, 0);
405 }
406
c_v64_unziphi_8(c_v64 a,c_v64 b)407 SIMD_INLINE c_v64 c_v64_unziphi_8(c_v64 a, c_v64 b) {
408 return CONFIG_BIG_ENDIAN ? _c_v64_unzip_8(b, a, 0) : _c_v64_unzip_8(b, a, 1);
409 }
410
_c_v64_unzip_16(c_v64 a,c_v64 b,int mode)411 SIMD_INLINE c_v64 _c_v64_unzip_16(c_v64 a, c_v64 b, int mode) {
412 c_v64 t;
413 if (mode) {
414 t.u16[3] = b.u16[3];
415 t.u16[2] = b.u16[1];
416 t.u16[1] = a.u16[3];
417 t.u16[0] = a.u16[1];
418 } else {
419 t.u16[3] = a.u16[2];
420 t.u16[2] = a.u16[0];
421 t.u16[1] = b.u16[2];
422 t.u16[0] = b.u16[0];
423 }
424 return t;
425 }
426
c_v64_unziplo_16(c_v64 a,c_v64 b)427 SIMD_INLINE c_v64 c_v64_unziplo_16(c_v64 a, c_v64 b) {
428 return CONFIG_BIG_ENDIAN ? _c_v64_unzip_16(a, b, 1)
429 : _c_v64_unzip_16(a, b, 0);
430 }
431
c_v64_unziphi_16(c_v64 a,c_v64 b)432 SIMD_INLINE c_v64 c_v64_unziphi_16(c_v64 a, c_v64 b) {
433 return CONFIG_BIG_ENDIAN ? _c_v64_unzip_16(b, a, 0)
434 : _c_v64_unzip_16(b, a, 1);
435 }
436
c_v64_unpacklo_u8_s16(c_v64 a)437 SIMD_INLINE c_v64 c_v64_unpacklo_u8_s16(c_v64 a) {
438 c_v64 t;
439 int endian = !!CONFIG_BIG_ENDIAN * 4;
440 t.s16[3] = (int16_t)a.u8[3 + endian];
441 t.s16[2] = (int16_t)a.u8[2 + endian];
442 t.s16[1] = (int16_t)a.u8[1 + endian];
443 t.s16[0] = (int16_t)a.u8[0 + endian];
444 return t;
445 }
446
c_v64_unpackhi_u8_s16(c_v64 a)447 SIMD_INLINE c_v64 c_v64_unpackhi_u8_s16(c_v64 a) {
448 c_v64 t;
449 int endian = !!CONFIG_BIG_ENDIAN * 4;
450 t.s16[3] = (int16_t)a.u8[7 - endian];
451 t.s16[2] = (int16_t)a.u8[6 - endian];
452 t.s16[1] = (int16_t)a.u8[5 - endian];
453 t.s16[0] = (int16_t)a.u8[4 - endian];
454 return t;
455 }
456
c_v64_unpacklo_s8_s16(c_v64 a)457 SIMD_INLINE c_v64 c_v64_unpacklo_s8_s16(c_v64 a) {
458 c_v64 t;
459 int endian = !!CONFIG_BIG_ENDIAN * 4;
460 t.s16[3] = (int16_t)a.s8[3 + endian];
461 t.s16[2] = (int16_t)a.s8[2 + endian];
462 t.s16[1] = (int16_t)a.s8[1 + endian];
463 t.s16[0] = (int16_t)a.s8[0 + endian];
464 return t;
465 }
466
c_v64_unpackhi_s8_s16(c_v64 a)467 SIMD_INLINE c_v64 c_v64_unpackhi_s8_s16(c_v64 a) {
468 c_v64 t;
469 int endian = !!CONFIG_BIG_ENDIAN * 4;
470 t.s16[3] = (int16_t)a.s8[7 - endian];
471 t.s16[2] = (int16_t)a.s8[6 - endian];
472 t.s16[1] = (int16_t)a.s8[5 - endian];
473 t.s16[0] = (int16_t)a.s8[4 - endian];
474 return t;
475 }
476
c_v64_pack_s32_s16(c_v64 a,c_v64 b)477 SIMD_INLINE c_v64 c_v64_pack_s32_s16(c_v64 a, c_v64 b) {
478 c_v64 t;
479 if (CONFIG_BIG_ENDIAN) {
480 c_v64 u = a;
481 a = b;
482 b = u;
483 }
484 t.s16[3] = a.s32[1] > 32767 ? 32767 : a.s32[1] < -32768 ? -32768 : a.s32[1];
485 t.s16[2] = a.s32[0] > 32767 ? 32767 : a.s32[0] < -32768 ? -32768 : a.s32[0];
486 t.s16[1] = b.s32[1] > 32767 ? 32767 : b.s32[1] < -32768 ? -32768 : b.s32[1];
487 t.s16[0] = b.s32[0] > 32767 ? 32767 : b.s32[0] < -32768 ? -32768 : b.s32[0];
488 return t;
489 }
490
c_v64_pack_s32_u16(c_v64 a,c_v64 b)491 SIMD_INLINE c_v64 c_v64_pack_s32_u16(c_v64 a, c_v64 b) {
492 c_v64 t;
493 if (CONFIG_BIG_ENDIAN) {
494 c_v64 u = a;
495 a = b;
496 b = u;
497 }
498 t.u16[3] = a.s32[1] > 65535 ? 65535 : a.s32[1] < 0 ? 0 : a.s32[1];
499 t.u16[2] = a.s32[0] > 65535 ? 65535 : a.s32[0] < 0 ? 0 : a.s32[0];
500 t.u16[1] = b.s32[1] > 65535 ? 65535 : b.s32[1] < 0 ? 0 : b.s32[1];
501 t.u16[0] = b.s32[0] > 65535 ? 65535 : b.s32[0] < 0 ? 0 : b.s32[0];
502 return t;
503 }
504
c_v64_pack_s16_u8(c_v64 a,c_v64 b)505 SIMD_INLINE c_v64 c_v64_pack_s16_u8(c_v64 a, c_v64 b) {
506 c_v64 t;
507 if (CONFIG_BIG_ENDIAN) {
508 c_v64 u = a;
509 a = b;
510 b = u;
511 }
512 t.u8[7] = a.s16[3] > 255 ? 255 : a.s16[3] < 0 ? 0 : a.s16[3];
513 t.u8[6] = a.s16[2] > 255 ? 255 : a.s16[2] < 0 ? 0 : a.s16[2];
514 t.u8[5] = a.s16[1] > 255 ? 255 : a.s16[1] < 0 ? 0 : a.s16[1];
515 t.u8[4] = a.s16[0] > 255 ? 255 : a.s16[0] < 0 ? 0 : a.s16[0];
516 t.u8[3] = b.s16[3] > 255 ? 255 : b.s16[3] < 0 ? 0 : b.s16[3];
517 t.u8[2] = b.s16[2] > 255 ? 255 : b.s16[2] < 0 ? 0 : b.s16[2];
518 t.u8[1] = b.s16[1] > 255 ? 255 : b.s16[1] < 0 ? 0 : b.s16[1];
519 t.u8[0] = b.s16[0] > 255 ? 255 : b.s16[0] < 0 ? 0 : b.s16[0];
520 return t;
521 }
522
c_v64_pack_s16_s8(c_v64 a,c_v64 b)523 SIMD_INLINE c_v64 c_v64_pack_s16_s8(c_v64 a, c_v64 b) {
524 c_v64 t;
525 if (CONFIG_BIG_ENDIAN) {
526 c_v64 u = a;
527 a = b;
528 b = u;
529 }
530 t.u8[7] = (uint8_t)(a.s16[3] > 127 ? 127 : a.s16[3] < -128 ? 128 : a.s16[3]);
531 t.u8[6] = (uint8_t)(a.s16[2] > 127 ? 127 : a.s16[2] < -128 ? 128 : a.s16[2]);
532 t.u8[5] = (uint8_t)(a.s16[1] > 127 ? 127 : a.s16[1] < -128 ? 128 : a.s16[1]);
533 t.u8[4] = (uint8_t)(a.s16[0] > 127 ? 127 : a.s16[0] < -128 ? 128 : a.s16[0]);
534 t.u8[3] = (uint8_t)(b.s16[3] > 127 ? 127 : b.s16[3] < -128 ? 128 : b.s16[3]);
535 t.u8[2] = (uint8_t)(b.s16[2] > 127 ? 127 : b.s16[2] < -128 ? 128 : b.s16[2]);
536 t.u8[1] = (uint8_t)(b.s16[1] > 127 ? 127 : b.s16[1] < -128 ? 128 : b.s16[1]);
537 t.u8[0] = (uint8_t)(b.s16[0] > 127 ? 127 : b.s16[0] < -128 ? 128 : b.s16[0]);
538 return t;
539 }
540
c_v64_unpacklo_u16_s32(c_v64 a)541 SIMD_INLINE c_v64 c_v64_unpacklo_u16_s32(c_v64 a) {
542 c_v64 t;
543 t.s32[1] = a.u16[1 + !!CONFIG_BIG_ENDIAN * 2];
544 t.s32[0] = a.u16[0 + !!CONFIG_BIG_ENDIAN * 2];
545 return t;
546 }
547
c_v64_unpacklo_s16_s32(c_v64 a)548 SIMD_INLINE c_v64 c_v64_unpacklo_s16_s32(c_v64 a) {
549 c_v64 t;
550 t.s32[1] = a.s16[1 + !!CONFIG_BIG_ENDIAN * 2];
551 t.s32[0] = a.s16[0 + !!CONFIG_BIG_ENDIAN * 2];
552 return t;
553 }
554
c_v64_unpackhi_u16_s32(c_v64 a)555 SIMD_INLINE c_v64 c_v64_unpackhi_u16_s32(c_v64 a) {
556 c_v64 t;
557 t.s32[1] = a.u16[3 - !!CONFIG_BIG_ENDIAN * 2];
558 t.s32[0] = a.u16[2 - !!CONFIG_BIG_ENDIAN * 2];
559 return t;
560 }
561
c_v64_unpackhi_s16_s32(c_v64 a)562 SIMD_INLINE c_v64 c_v64_unpackhi_s16_s32(c_v64 a) {
563 c_v64 t;
564 t.s32[1] = a.s16[3 - !!CONFIG_BIG_ENDIAN * 2];
565 t.s32[0] = a.s16[2 - !!CONFIG_BIG_ENDIAN * 2];
566 return t;
567 }
568
c_v64_shuffle_8(c_v64 a,c_v64 pattern)569 SIMD_INLINE c_v64 c_v64_shuffle_8(c_v64 a, c_v64 pattern) {
570 c_v64 t;
571 int c;
572 for (c = 0; c < 8; c++) {
573 if (SIMD_CHECK && (pattern.u8[c] & ~7)) {
574 fprintf(stderr, "Error: Undefined v64_shuffle_8 index %d/%d\n",
575 pattern.u8[c], c);
576 abort();
577 }
578 t.u8[c] =
579 a.u8[CONFIG_BIG_ENDIAN ? 7 - (pattern.u8[c] & 7) : pattern.u8[c] & 7];
580 }
581 return t;
582 }
583
c_v64_dotp_su8(c_v64 a,c_v64 b)584 SIMD_INLINE int64_t c_v64_dotp_su8(c_v64 a, c_v64 b) {
585 return a.s8[7] * b.u8[7] + a.s8[6] * b.u8[6] + a.s8[5] * b.u8[5] +
586 a.s8[4] * b.u8[4] + a.s8[3] * b.u8[3] + a.s8[2] * b.u8[2] +
587 a.s8[1] * b.u8[1] + a.s8[0] * b.u8[0];
588 }
589
c_v64_dotp_s16(c_v64 a,c_v64 b)590 SIMD_INLINE int64_t c_v64_dotp_s16(c_v64 a, c_v64 b) {
591 return (int64_t)(a.s16[3] * b.s16[3] + a.s16[2] * b.s16[2]) +
592 (int64_t)(a.s16[1] * b.s16[1] + a.s16[0] * b.s16[0]);
593 }
594
c_v64_hadd_u8(c_v64 a)595 SIMD_INLINE uint64_t c_v64_hadd_u8(c_v64 a) {
596 return a.u8[7] + a.u8[6] + a.u8[5] + a.u8[4] + a.u8[3] + a.u8[2] + a.u8[1] +
597 a.u8[0];
598 }
599
c_v64_hadd_s16(c_v64 a)600 SIMD_INLINE int64_t c_v64_hadd_s16(c_v64 a) {
601 return a.s16[3] + a.s16[2] + a.s16[1] + a.s16[0];
602 }
603
604 typedef struct {
605 uint32_t val;
606 int count;
607 } c_sad64_internal;
608
c_v64_sad_u8_init(void)609 SIMD_INLINE c_sad64_internal c_v64_sad_u8_init(void) {
610 c_sad64_internal t;
611 t.val = t.count = 0;
612 return t;
613 }
614
615 /* Implementation dependent return value. Result must be finalised with
616 v64_sad_u8_sum(). The result for more than 32 v64_sad_u8() calls is
617 undefined. */
c_v64_sad_u8(c_sad64_internal s,c_v64 a,c_v64 b)618 SIMD_INLINE c_sad64_internal c_v64_sad_u8(c_sad64_internal s, c_v64 a,
619 c_v64 b) {
620 int c;
621 for (c = 0; c < 8; c++)
622 s.val += a.u8[c] > b.u8[c] ? a.u8[c] - b.u8[c] : b.u8[c] - a.u8[c];
623 s.count++;
624 if (SIMD_CHECK && s.count > 32) {
625 fprintf(stderr,
626 "Error: sad called 32 times returning an undefined result\n");
627 abort();
628 }
629 return s;
630 }
631
c_v64_sad_u8_sum(c_sad64_internal s)632 SIMD_INLINE uint32_t c_v64_sad_u8_sum(c_sad64_internal s) { return s.val; }
633
634 typedef uint32_t c_ssd64_internal;
635
636 /* Implementation dependent return value. Result must be finalised with
637 * v64_ssd_u8_sum(). */
c_v64_ssd_u8_init(void)638 SIMD_INLINE c_ssd64_internal c_v64_ssd_u8_init(void) { return 0; }
639
c_v64_ssd_u8(c_ssd64_internal s,c_v64 a,c_v64 b)640 SIMD_INLINE c_ssd64_internal c_v64_ssd_u8(c_ssd64_internal s, c_v64 a,
641 c_v64 b) {
642 int c;
643 for (c = 0; c < 8; c++) s += (a.u8[c] - b.u8[c]) * (a.u8[c] - b.u8[c]);
644 return s;
645 }
646
c_v64_ssd_u8_sum(c_ssd64_internal s)647 SIMD_INLINE uint32_t c_v64_ssd_u8_sum(c_ssd64_internal s) { return s; }
648
c_v64_or(c_v64 a,c_v64 b)649 SIMD_INLINE c_v64 c_v64_or(c_v64 a, c_v64 b) {
650 c_v64 t;
651 t.u64 = a.u64 | b.u64;
652 return t;
653 }
654
c_v64_xor(c_v64 a,c_v64 b)655 SIMD_INLINE c_v64 c_v64_xor(c_v64 a, c_v64 b) {
656 c_v64 t;
657 t.u64 = a.u64 ^ b.u64;
658 return t;
659 }
660
c_v64_and(c_v64 a,c_v64 b)661 SIMD_INLINE c_v64 c_v64_and(c_v64 a, c_v64 b) {
662 c_v64 t;
663 t.u64 = a.u64 & b.u64;
664 return t;
665 }
666
c_v64_andn(c_v64 a,c_v64 b)667 SIMD_INLINE c_v64 c_v64_andn(c_v64 a, c_v64 b) {
668 c_v64 t;
669 t.u64 = a.u64 & ~b.u64;
670 return t;
671 }
672
c_v64_mullo_s16(c_v64 a,c_v64 b)673 SIMD_INLINE c_v64 c_v64_mullo_s16(c_v64 a, c_v64 b) {
674 c_v64 t;
675 int c;
676 for (c = 0; c < 4; c++) t.s16[c] = (int16_t)(a.s16[c] * b.s16[c]);
677 return t;
678 }
679
c_v64_mulhi_s16(c_v64 a,c_v64 b)680 SIMD_INLINE c_v64 c_v64_mulhi_s16(c_v64 a, c_v64 b) {
681 c_v64 t;
682 int c;
683 for (c = 0; c < 4; c++) t.s16[c] = (a.s16[c] * b.s16[c]) >> 16;
684 return t;
685 }
686
c_v64_mullo_s32(c_v64 a,c_v64 b)687 SIMD_INLINE c_v64 c_v64_mullo_s32(c_v64 a, c_v64 b) {
688 c_v64 t;
689 t.s32[0] = (int32_t)((int64_t)a.s32[0] * b.s32[0]);
690 t.s32[1] = (int32_t)((int64_t)a.s32[1] * b.s32[1]);
691 return t;
692 }
693
c_v64_madd_s16(c_v64 a,c_v64 b)694 SIMD_INLINE c_v64 c_v64_madd_s16(c_v64 a, c_v64 b) {
695 c_v64 t;
696 t.s32[0] = a.s16[0] * b.s16[0] + a.s16[1] * b.s16[1];
697 t.s32[1] = a.s16[2] * b.s16[2] + a.s16[3] * b.s16[3];
698 return t;
699 }
700
c_v64_madd_us8(c_v64 a,c_v64 b)701 SIMD_INLINE c_v64 c_v64_madd_us8(c_v64 a, c_v64 b) {
702 c_v64 t;
703 int32_t u;
704 u = a.u8[0] * b.s8[0] + a.u8[1] * b.s8[1];
705 t.s16[0] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
706 u = a.u8[2] * b.s8[2] + a.u8[3] * b.s8[3];
707 t.s16[1] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
708 u = a.u8[4] * b.s8[4] + a.u8[5] * b.s8[5];
709 t.s16[2] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
710 u = a.u8[6] * b.s8[6] + a.u8[7] * b.s8[7];
711 t.s16[3] = u > 32767 ? 32767 : u < -32768 ? -32768 : u;
712 return t;
713 }
714
c_v64_avg_u8(c_v64 a,c_v64 b)715 SIMD_INLINE c_v64 c_v64_avg_u8(c_v64 a, c_v64 b) {
716 c_v64 t;
717 int c;
718 for (c = 0; c < 8; c++) t.u8[c] = (a.u8[c] + b.u8[c] + 1) >> 1;
719 return t;
720 }
721
c_v64_rdavg_u8(c_v64 a,c_v64 b)722 SIMD_INLINE c_v64 c_v64_rdavg_u8(c_v64 a, c_v64 b) {
723 c_v64 t;
724 int c;
725 for (c = 0; c < 8; c++) t.u8[c] = (a.u8[c] + b.u8[c]) >> 1;
726 return t;
727 }
728
c_v64_rdavg_u16(c_v64 a,c_v64 b)729 SIMD_INLINE c_v64 c_v64_rdavg_u16(c_v64 a, c_v64 b) {
730 c_v64 t;
731 int c;
732 for (c = 0; c < 4; c++) t.u16[c] = (a.u16[c] + b.u16[c]) >> 1;
733 return t;
734 }
735
c_v64_avg_u16(c_v64 a,c_v64 b)736 SIMD_INLINE c_v64 c_v64_avg_u16(c_v64 a, c_v64 b) {
737 c_v64 t;
738 int c;
739 for (c = 0; c < 4; c++) t.u16[c] = (a.u16[c] + b.u16[c] + 1) >> 1;
740 return t;
741 }
742
c_v64_min_u8(c_v64 a,c_v64 b)743 SIMD_INLINE c_v64 c_v64_min_u8(c_v64 a, c_v64 b) {
744 c_v64 t;
745 int c;
746 for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] > b.u8[c] ? b.u8[c] : a.u8[c];
747 return t;
748 }
749
c_v64_max_u8(c_v64 a,c_v64 b)750 SIMD_INLINE c_v64 c_v64_max_u8(c_v64 a, c_v64 b) {
751 c_v64 t;
752 int c;
753 for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] > b.u8[c] ? a.u8[c] : b.u8[c];
754 return t;
755 }
756
c_v64_min_s8(c_v64 a,c_v64 b)757 SIMD_INLINE c_v64 c_v64_min_s8(c_v64 a, c_v64 b) {
758 c_v64 t;
759 int c;
760 for (c = 0; c < 8; c++) t.s8[c] = a.s8[c] > b.s8[c] ? b.s8[c] : a.s8[c];
761 return t;
762 }
763
c_v64_max_s8(c_v64 a,c_v64 b)764 SIMD_INLINE c_v64 c_v64_max_s8(c_v64 a, c_v64 b) {
765 c_v64 t;
766 int c;
767 for (c = 0; c < 8; c++) t.s8[c] = a.s8[c] > b.s8[c] ? a.s8[c] : b.s8[c];
768 return t;
769 }
770
c_v64_min_s16(c_v64 a,c_v64 b)771 SIMD_INLINE c_v64 c_v64_min_s16(c_v64 a, c_v64 b) {
772 c_v64 t;
773 int c;
774 for (c = 0; c < 4; c++) t.s16[c] = a.s16[c] > b.s16[c] ? b.s16[c] : a.s16[c];
775 return t;
776 }
777
c_v64_max_s16(c_v64 a,c_v64 b)778 SIMD_INLINE c_v64 c_v64_max_s16(c_v64 a, c_v64 b) {
779 c_v64 t;
780 int c;
781 for (c = 0; c < 4; c++) t.s16[c] = a.s16[c] > b.s16[c] ? a.s16[c] : b.s16[c];
782 return t;
783 }
784
c_v64_cmpgt_s8(c_v64 a,c_v64 b)785 SIMD_INLINE c_v64 c_v64_cmpgt_s8(c_v64 a, c_v64 b) {
786 c_v64 t;
787 int c;
788 for (c = 0; c < 8; c++) t.s8[c] = -(a.s8[c] > b.s8[c]);
789 return t;
790 }
791
c_v64_cmplt_s8(c_v64 a,c_v64 b)792 SIMD_INLINE c_v64 c_v64_cmplt_s8(c_v64 a, c_v64 b) {
793 c_v64 t;
794 int c;
795 for (c = 0; c < 8; c++) t.s8[c] = -(a.s8[c] < b.s8[c]);
796 return t;
797 }
798
c_v64_cmpeq_8(c_v64 a,c_v64 b)799 SIMD_INLINE c_v64 c_v64_cmpeq_8(c_v64 a, c_v64 b) {
800 c_v64 t;
801 int c;
802 for (c = 0; c < 8; c++) t.s8[c] = -(a.u8[c] == b.u8[c]);
803 return t;
804 }
805
c_v64_cmpgt_s16(c_v64 a,c_v64 b)806 SIMD_INLINE c_v64 c_v64_cmpgt_s16(c_v64 a, c_v64 b) {
807 c_v64 t;
808 int c;
809 for (c = 0; c < 4; c++) t.s16[c] = -(a.s16[c] > b.s16[c]);
810 return t;
811 }
812
c_v64_cmplt_s16(c_v64 a,c_v64 b)813 SIMD_INLINE c_v64 c_v64_cmplt_s16(c_v64 a, c_v64 b) {
814 c_v64 t;
815 int c;
816 for (c = 0; c < 4; c++) t.s16[c] = -(a.s16[c] < b.s16[c]);
817 return t;
818 }
819
c_v64_cmpeq_16(c_v64 a,c_v64 b)820 SIMD_INLINE c_v64 c_v64_cmpeq_16(c_v64 a, c_v64 b) {
821 c_v64 t;
822 int c;
823 for (c = 0; c < 4; c++) t.s16[c] = -(a.u16[c] == b.u16[c]);
824 return t;
825 }
826
c_v64_shl_8(c_v64 a,unsigned int n)827 SIMD_INLINE c_v64 c_v64_shl_8(c_v64 a, unsigned int n) {
828 c_v64 t;
829 int c;
830 if (SIMD_CHECK && n > 7) {
831 fprintf(stderr, "Error: Undefined u8 shift left %d\n", n);
832 abort();
833 }
834 for (c = 0; c < 8; c++) t.s8[c] = (int8_t)(a.u8[c] << n);
835 return t;
836 }
837
c_v64_shr_u8(c_v64 a,unsigned int n)838 SIMD_INLINE c_v64 c_v64_shr_u8(c_v64 a, unsigned int n) {
839 c_v64 t;
840 int c;
841 if (SIMD_CHECK && n > 7) {
842 fprintf(stderr, "Error: Undefined u8 shift right %d\n", n);
843 abort();
844 }
845 for (c = 0; c < 8; c++) t.u8[c] = a.u8[c] >> n;
846 return t;
847 }
848
c_v64_shr_s8(c_v64 a,unsigned int n)849 SIMD_INLINE c_v64 c_v64_shr_s8(c_v64 a, unsigned int n) {
850 c_v64 t;
851 int c;
852 if (SIMD_CHECK && n > 7) {
853 fprintf(stderr, "Error: Undefined s8 shift right %d\n", n);
854 abort();
855 }
856 for (c = 0; c < 8; c++) t.s8[c] = a.s8[c] >> n;
857 return t;
858 }
859
c_v64_shl_16(c_v64 a,unsigned int n)860 SIMD_INLINE c_v64 c_v64_shl_16(c_v64 a, unsigned int n) {
861 c_v64 t;
862 int c;
863 if (SIMD_CHECK && n > 15) {
864 fprintf(stderr, "Error: Undefined u16 shift left %d\n", n);
865 abort();
866 }
867 for (c = 0; c < 4; c++) t.u16[c] = (uint16_t)(a.u16[c] << n);
868 return t;
869 }
870
c_v64_shr_u16(c_v64 a,unsigned int n)871 SIMD_INLINE c_v64 c_v64_shr_u16(c_v64 a, unsigned int n) {
872 c_v64 t;
873 int c;
874 if (SIMD_CHECK && n > 15) {
875 fprintf(stderr, "Error: Undefined u16 shift right %d\n", n);
876 abort();
877 }
878 for (c = 0; c < 4; c++) t.u16[c] = a.u16[c] >> n;
879 return t;
880 }
881
c_v64_shr_s16(c_v64 a,unsigned int n)882 SIMD_INLINE c_v64 c_v64_shr_s16(c_v64 a, unsigned int n) {
883 c_v64 t;
884 int c;
885 if (SIMD_CHECK && n > 15) {
886 fprintf(stderr, "Error: undefined s16 shift right %d\n", n);
887 abort();
888 }
889 for (c = 0; c < 4; c++) t.s16[c] = a.s16[c] >> n;
890 return t;
891 }
892
c_v64_shl_32(c_v64 a,unsigned int n)893 SIMD_INLINE c_v64 c_v64_shl_32(c_v64 a, unsigned int n) {
894 c_v64 t;
895 if (SIMD_CHECK && n > 31) {
896 fprintf(stderr, "Error: undefined u32 shift left %d\n", n);
897 abort();
898 }
899 t.u32[1] = a.u32[1] << n;
900 t.u32[0] = a.u32[0] << n;
901 return t;
902 }
903
c_v64_shr_u32(c_v64 a,unsigned int n)904 SIMD_INLINE c_v64 c_v64_shr_u32(c_v64 a, unsigned int n) {
905 c_v64 t;
906 if (SIMD_CHECK && n > 31) {
907 fprintf(stderr, "Error: undefined u32 shift right %d\n", n);
908 abort();
909 }
910 t.u32[1] = a.u32[1] >> n;
911 t.u32[0] = a.u32[0] >> n;
912 return t;
913 }
914
c_v64_shr_s32(c_v64 a,unsigned int n)915 SIMD_INLINE c_v64 c_v64_shr_s32(c_v64 a, unsigned int n) {
916 c_v64 t;
917 if (SIMD_CHECK && n > 31) {
918 fprintf(stderr, "Error: undefined s32 shift right %d\n", n);
919 abort();
920 }
921 t.s32[1] = a.s32[1] >> n;
922 t.s32[0] = a.s32[0] >> n;
923 return t;
924 }
925
c_v64_shr_n_byte(c_v64 x,unsigned int i)926 SIMD_INLINE c_v64 c_v64_shr_n_byte(c_v64 x, unsigned int i) {
927 c_v64 t;
928 t.u64 = x.u64 >> i * 8;
929 return t;
930 }
931
c_v64_shl_n_byte(c_v64 x,unsigned int i)932 SIMD_INLINE c_v64 c_v64_shl_n_byte(c_v64 x, unsigned int i) {
933 c_v64 t;
934 t.u64 = x.u64 << i * 8;
935 return t;
936 }
937
c_v64_align(c_v64 a,c_v64 b,unsigned int c)938 SIMD_INLINE c_v64 c_v64_align(c_v64 a, c_v64 b, unsigned int c) {
939 if (SIMD_CHECK && c > 7) {
940 fprintf(stderr, "Error: undefined alignment %d\n", c);
941 abort();
942 }
943 return c ? c_v64_or(c_v64_shr_n_byte(b, c), c_v64_shl_n_byte(a, 8 - c)) : b;
944 }
945
c_v64_shl_n_8(c_v64 a,unsigned int c)946 SIMD_INLINE c_v64 c_v64_shl_n_8(c_v64 a, unsigned int c) {
947 return c_v64_shl_8(a, c);
948 }
949
c_v64_shr_n_u8(c_v64 a,unsigned int c)950 SIMD_INLINE c_v64 c_v64_shr_n_u8(c_v64 a, unsigned int c) {
951 return c_v64_shr_u8(a, c);
952 }
953
c_v64_shr_n_s8(c_v64 a,unsigned int c)954 SIMD_INLINE c_v64 c_v64_shr_n_s8(c_v64 a, unsigned int c) {
955 return c_v64_shr_s8(a, c);
956 }
957
c_v64_shl_n_16(c_v64 a,unsigned int c)958 SIMD_INLINE c_v64 c_v64_shl_n_16(c_v64 a, unsigned int c) {
959 return c_v64_shl_16(a, c);
960 }
961
c_v64_shr_n_u16(c_v64 a,unsigned int c)962 SIMD_INLINE c_v64 c_v64_shr_n_u16(c_v64 a, unsigned int c) {
963 return c_v64_shr_u16(a, c);
964 }
965
c_v64_shr_n_s16(c_v64 a,unsigned int c)966 SIMD_INLINE c_v64 c_v64_shr_n_s16(c_v64 a, unsigned int c) {
967 return c_v64_shr_s16(a, c);
968 }
969
c_v64_shl_n_32(c_v64 a,unsigned int c)970 SIMD_INLINE c_v64 c_v64_shl_n_32(c_v64 a, unsigned int c) {
971 return c_v64_shl_32(a, c);
972 }
973
c_v64_shr_n_u32(c_v64 a,unsigned int c)974 SIMD_INLINE c_v64 c_v64_shr_n_u32(c_v64 a, unsigned int c) {
975 return c_v64_shr_u32(a, c);
976 }
977
c_v64_shr_n_s32(c_v64 a,unsigned int c)978 SIMD_INLINE c_v64 c_v64_shr_n_s32(c_v64 a, unsigned int c) {
979 return c_v64_shr_s32(a, c);
980 }
981
982 #endif // AOM_AOM_DSP_SIMD_V64_INTRINSICS_C_H_
983