• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
13 #define AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
14 
15 #include <stdio.h>
16 #include <stdlib.h>
17 
18 #include "config/aom_config.h"
19 
20 #include "aom_dsp/simd/v64_intrinsics_c.h"
21 
22 typedef union {
23   uint8_t u8[16];
24   uint16_t u16[8];
25   uint32_t u32[4];
26   uint64_t u64[2];
27   int8_t s8[16];
28   int16_t s16[8];
29   int32_t s32[4];
30   int64_t s64[2];
31   c_v64 v64[2];
32 } c_v128;
33 
c_v128_low_u32(c_v128 a)34 SIMD_INLINE uint32_t c_v128_low_u32(c_v128 a) { return a.u32[0]; }
35 
c_v128_low_v64(c_v128 a)36 SIMD_INLINE c_v64 c_v128_low_v64(c_v128 a) { return a.v64[0]; }
37 
c_v128_high_v64(c_v128 a)38 SIMD_INLINE c_v64 c_v128_high_v64(c_v128 a) { return a.v64[1]; }
39 
c_v128_from_64(uint64_t hi,uint64_t lo)40 SIMD_INLINE c_v128 c_v128_from_64(uint64_t hi, uint64_t lo) {
41   c_v128 t;
42   t.u64[1] = hi;
43   t.u64[0] = lo;
44   return t;
45 }
46 
c_v128_from_v64(c_v64 hi,c_v64 lo)47 SIMD_INLINE c_v128 c_v128_from_v64(c_v64 hi, c_v64 lo) {
48   c_v128 t;
49   t.v64[1] = hi;
50   t.v64[0] = lo;
51   return t;
52 }
53 
c_v128_from_32(uint32_t a,uint32_t b,uint32_t c,uint32_t d)54 SIMD_INLINE c_v128 c_v128_from_32(uint32_t a, uint32_t b, uint32_t c,
55                                   uint32_t d) {
56   c_v128 t;
57   t.u32[3] = a;
58   t.u32[2] = b;
59   t.u32[1] = c;
60   t.u32[0] = d;
61   return t;
62 }
63 
c_v128_load_unaligned(const void * p)64 SIMD_INLINE c_v128 c_v128_load_unaligned(const void *p) {
65   c_v128 t;
66   uint8_t *pp = (uint8_t *)p;
67   uint8_t *q = (uint8_t *)&t;
68   int c;
69   for (c = 0; c < 16; c++) q[c] = pp[c];
70   return t;
71 }
72 
c_v128_load_aligned(const void * p)73 SIMD_INLINE c_v128 c_v128_load_aligned(const void *p) {
74   if (SIMD_CHECK && (uintptr_t)p & 15) {
75     fprintf(stderr, "Error: unaligned v128 load at %p\n", p);
76     abort();
77   }
78   return c_v128_load_unaligned(p);
79 }
80 
c_v128_store_unaligned(void * p,c_v128 a)81 SIMD_INLINE void c_v128_store_unaligned(void *p, c_v128 a) {
82   uint8_t *pp = (uint8_t *)p;
83   uint8_t *q = (uint8_t *)&a;
84   int c;
85   for (c = 0; c < 16; c++) pp[c] = q[c];
86 }
87 
c_v128_store_aligned(void * p,c_v128 a)88 SIMD_INLINE void c_v128_store_aligned(void *p, c_v128 a) {
89   if (SIMD_CHECK && (uintptr_t)p & 15) {
90     fprintf(stderr, "Error: unaligned v128 store at %p\n", p);
91     abort();
92   }
93   c_v128_store_unaligned(p, a);
94 }
95 
c_v128_zero(void)96 SIMD_INLINE c_v128 c_v128_zero(void) {
97   c_v128 t;
98   t.u64[1] = t.u64[0] = 0;
99   return t;
100 }
101 
c_v128_dup_8(uint8_t x)102 SIMD_INLINE c_v128 c_v128_dup_8(uint8_t x) {
103   c_v128 t;
104   t.v64[1] = t.v64[0] = c_v64_dup_8(x);
105   return t;
106 }
107 
c_v128_dup_16(uint16_t x)108 SIMD_INLINE c_v128 c_v128_dup_16(uint16_t x) {
109   c_v128 t;
110   t.v64[1] = t.v64[0] = c_v64_dup_16(x);
111   return t;
112 }
113 
c_v128_dup_32(uint32_t x)114 SIMD_INLINE c_v128 c_v128_dup_32(uint32_t x) {
115   c_v128 t;
116   t.v64[1] = t.v64[0] = c_v64_dup_32(x);
117   return t;
118 }
119 
c_v128_dup_64(uint64_t x)120 SIMD_INLINE c_v128 c_v128_dup_64(uint64_t x) {
121   c_v128 t;
122   t.u64[1] = t.u64[0] = x;
123   return t;
124 }
125 
c_v128_dotp_su8(c_v128 a,c_v128 b)126 SIMD_INLINE int64_t c_v128_dotp_su8(c_v128 a, c_v128 b) {
127   return c_v64_dotp_su8(a.v64[1], b.v64[1]) +
128          c_v64_dotp_su8(a.v64[0], b.v64[0]);
129 }
130 
c_v128_dotp_s16(c_v128 a,c_v128 b)131 SIMD_INLINE int64_t c_v128_dotp_s16(c_v128 a, c_v128 b) {
132   return c_v64_dotp_s16(a.v64[1], b.v64[1]) +
133          c_v64_dotp_s16(a.v64[0], b.v64[0]);
134 }
135 
c_v128_dotp_s32(c_v128 a,c_v128 b)136 SIMD_INLINE int64_t c_v128_dotp_s32(c_v128 a, c_v128 b) {
137   // 32 bit products, 64 bit sum
138   return (int64_t)(int32_t)((int64_t)a.s32[3] * b.s32[3]) +
139          (int64_t)(int32_t)((int64_t)a.s32[2] * b.s32[2]) +
140          (int64_t)(int32_t)((int64_t)a.s32[1] * b.s32[1]) +
141          (int64_t)(int32_t)((int64_t)a.s32[0] * b.s32[0]);
142 }
143 
c_v128_hadd_u8(c_v128 a)144 SIMD_INLINE uint64_t c_v128_hadd_u8(c_v128 a) {
145   return c_v64_hadd_u8(a.v64[1]) + c_v64_hadd_u8(a.v64[0]);
146 }
147 
148 typedef struct {
149   uint32_t val;
150   int count;
151 } c_sad128_internal;
152 
c_v128_sad_u8_init(void)153 SIMD_INLINE c_sad128_internal c_v128_sad_u8_init(void) {
154   c_sad128_internal t;
155   t.val = t.count = 0;
156   return t;
157 }
158 
159 /* Implementation dependent return value.  Result must be finalised with
160  * v128_sad_u8_sum(). The result for more than 32 v128_sad_u8() calls is
161  * undefined. */
c_v128_sad_u8(c_sad128_internal s,c_v128 a,c_v128 b)162 SIMD_INLINE c_sad128_internal c_v128_sad_u8(c_sad128_internal s, c_v128 a,
163                                             c_v128 b) {
164   int c;
165   for (c = 0; c < 16; c++)
166     s.val += a.u8[c] > b.u8[c] ? a.u8[c] - b.u8[c] : b.u8[c] - a.u8[c];
167   s.count++;
168   if (SIMD_CHECK && s.count > 32) {
169     fprintf(stderr,
170             "Error: sad called 32 times returning an undefined result\n");
171     abort();
172   }
173   return s;
174 }
175 
c_v128_sad_u8_sum(c_sad128_internal s)176 SIMD_INLINE uint32_t c_v128_sad_u8_sum(c_sad128_internal s) { return s.val; }
177 
178 typedef uint32_t c_ssd128_internal;
179 
c_v128_ssd_u8_init(void)180 SIMD_INLINE c_ssd128_internal c_v128_ssd_u8_init(void) { return 0; }
181 
182 /* Implementation dependent return value.  Result must be finalised with
183  * v128_ssd_u8_sum(). */
c_v128_ssd_u8(c_ssd128_internal s,c_v128 a,c_v128 b)184 SIMD_INLINE c_ssd128_internal c_v128_ssd_u8(c_ssd128_internal s, c_v128 a,
185                                             c_v128 b) {
186   int c;
187   for (c = 0; c < 16; c++) s += (a.u8[c] - b.u8[c]) * (a.u8[c] - b.u8[c]);
188   return s;
189 }
190 
c_v128_ssd_u8_sum(c_ssd128_internal s)191 SIMD_INLINE uint32_t c_v128_ssd_u8_sum(c_ssd128_internal s) { return s; }
192 
c_v128_or(c_v128 a,c_v128 b)193 SIMD_INLINE c_v128 c_v128_or(c_v128 a, c_v128 b) {
194   return c_v128_from_v64(c_v64_or(a.v64[1], b.v64[1]),
195                          c_v64_or(a.v64[0], b.v64[0]));
196 }
197 
c_v128_xor(c_v128 a,c_v128 b)198 SIMD_INLINE c_v128 c_v128_xor(c_v128 a, c_v128 b) {
199   return c_v128_from_v64(c_v64_xor(a.v64[1], b.v64[1]),
200                          c_v64_xor(a.v64[0], b.v64[0]));
201 }
202 
c_v128_and(c_v128 a,c_v128 b)203 SIMD_INLINE c_v128 c_v128_and(c_v128 a, c_v128 b) {
204   return c_v128_from_v64(c_v64_and(a.v64[1], b.v64[1]),
205                          c_v64_and(a.v64[0], b.v64[0]));
206 }
207 
c_v128_andn(c_v128 a,c_v128 b)208 SIMD_INLINE c_v128 c_v128_andn(c_v128 a, c_v128 b) {
209   return c_v128_from_v64(c_v64_andn(a.v64[1], b.v64[1]),
210                          c_v64_andn(a.v64[0], b.v64[0]));
211 }
212 
c_v128_add_8(c_v128 a,c_v128 b)213 SIMD_INLINE c_v128 c_v128_add_8(c_v128 a, c_v128 b) {
214   return c_v128_from_v64(c_v64_add_8(a.v64[1], b.v64[1]),
215                          c_v64_add_8(a.v64[0], b.v64[0]));
216 }
217 
c_v128_add_16(c_v128 a,c_v128 b)218 SIMD_INLINE c_v128 c_v128_add_16(c_v128 a, c_v128 b) {
219   return c_v128_from_v64(c_v64_add_16(a.v64[1], b.v64[1]),
220                          c_v64_add_16(a.v64[0], b.v64[0]));
221 }
222 
c_v128_sadd_u8(c_v128 a,c_v128 b)223 SIMD_INLINE c_v128 c_v128_sadd_u8(c_v128 a, c_v128 b) {
224   return c_v128_from_v64(c_v64_sadd_u8(a.v64[1], b.v64[1]),
225                          c_v64_sadd_u8(a.v64[0], b.v64[0]));
226 }
227 
c_v128_sadd_s8(c_v128 a,c_v128 b)228 SIMD_INLINE c_v128 c_v128_sadd_s8(c_v128 a, c_v128 b) {
229   return c_v128_from_v64(c_v64_sadd_s8(a.v64[1], b.v64[1]),
230                          c_v64_sadd_s8(a.v64[0], b.v64[0]));
231 }
232 
c_v128_sadd_s16(c_v128 a,c_v128 b)233 SIMD_INLINE c_v128 c_v128_sadd_s16(c_v128 a, c_v128 b) {
234   return c_v128_from_v64(c_v64_sadd_s16(a.v64[1], b.v64[1]),
235                          c_v64_sadd_s16(a.v64[0], b.v64[0]));
236 }
237 
c_v128_add_32(c_v128 a,c_v128 b)238 SIMD_INLINE c_v128 c_v128_add_32(c_v128 a, c_v128 b) {
239   return c_v128_from_v64(c_v64_add_32(a.v64[1], b.v64[1]),
240                          c_v64_add_32(a.v64[0], b.v64[0]));
241 }
242 
c_v128_add_64(c_v128 a,c_v128 b)243 SIMD_INLINE c_v128 c_v128_add_64(c_v128 a, c_v128 b) {
244   // Two complement overflow (silences sanitizers)
245   return c_v128_from_64(
246       a.v64[1].u64 > ~b.v64[1].u64 ? a.v64[1].u64 - ~b.v64[1].u64 - 1
247                                    : a.v64[1].u64 + b.v64[1].u64,
248       a.v64[0].u64 > ~b.v64[0].u64 ? a.v64[0].u64 - ~b.v64[0].u64 - 1
249                                    : a.v64[0].u64 + b.v64[0].u64);
250 }
251 
c_v128_padd_s16(c_v128 a)252 SIMD_INLINE c_v128 c_v128_padd_s16(c_v128 a) {
253   c_v128 t;
254   t.s32[0] = (int32_t)a.s16[0] + (int32_t)a.s16[1];
255   t.s32[1] = (int32_t)a.s16[2] + (int32_t)a.s16[3];
256   t.s32[2] = (int32_t)a.s16[4] + (int32_t)a.s16[5];
257   t.s32[3] = (int32_t)a.s16[6] + (int32_t)a.s16[7];
258   return t;
259 }
260 
c_v128_padd_u8(c_v128 a)261 SIMD_INLINE c_v128 c_v128_padd_u8(c_v128 a) {
262   c_v128 t;
263   t.u16[0] = (uint16_t)a.u8[0] + (uint16_t)a.u8[1];
264   t.u16[1] = (uint16_t)a.u8[2] + (uint16_t)a.u8[3];
265   t.u16[2] = (uint16_t)a.u8[4] + (uint16_t)a.u8[5];
266   t.u16[3] = (uint16_t)a.u8[6] + (uint16_t)a.u8[7];
267   t.u16[4] = (uint16_t)a.u8[8] + (uint16_t)a.u8[9];
268   t.u16[5] = (uint16_t)a.u8[10] + (uint16_t)a.u8[11];
269   t.u16[6] = (uint16_t)a.u8[12] + (uint16_t)a.u8[13];
270   t.u16[7] = (uint16_t)a.u8[14] + (uint16_t)a.u8[15];
271   return t;
272 }
273 
c_v128_sub_8(c_v128 a,c_v128 b)274 SIMD_INLINE c_v128 c_v128_sub_8(c_v128 a, c_v128 b) {
275   return c_v128_from_v64(c_v64_sub_8(a.v64[1], b.v64[1]),
276                          c_v64_sub_8(a.v64[0], b.v64[0]));
277 }
278 
c_v128_ssub_u8(c_v128 a,c_v128 b)279 SIMD_INLINE c_v128 c_v128_ssub_u8(c_v128 a, c_v128 b) {
280   return c_v128_from_v64(c_v64_ssub_u8(a.v64[1], b.v64[1]),
281                          c_v64_ssub_u8(a.v64[0], b.v64[0]));
282 }
283 
c_v128_ssub_s8(c_v128 a,c_v128 b)284 SIMD_INLINE c_v128 c_v128_ssub_s8(c_v128 a, c_v128 b) {
285   return c_v128_from_v64(c_v64_ssub_s8(a.v64[1], b.v64[1]),
286                          c_v64_ssub_s8(a.v64[0], b.v64[0]));
287 }
288 
c_v128_sub_16(c_v128 a,c_v128 b)289 SIMD_INLINE c_v128 c_v128_sub_16(c_v128 a, c_v128 b) {
290   return c_v128_from_v64(c_v64_sub_16(a.v64[1], b.v64[1]),
291                          c_v64_sub_16(a.v64[0], b.v64[0]));
292 }
293 
c_v128_ssub_s16(c_v128 a,c_v128 b)294 SIMD_INLINE c_v128 c_v128_ssub_s16(c_v128 a, c_v128 b) {
295   return c_v128_from_v64(c_v64_ssub_s16(a.v64[1], b.v64[1]),
296                          c_v64_ssub_s16(a.v64[0], b.v64[0]));
297 }
298 
c_v128_ssub_u16(c_v128 a,c_v128 b)299 SIMD_INLINE c_v128 c_v128_ssub_u16(c_v128 a, c_v128 b) {
300   return c_v128_from_v64(c_v64_ssub_u16(a.v64[1], b.v64[1]),
301                          c_v64_ssub_u16(a.v64[0], b.v64[0]));
302 }
303 
c_v128_sub_32(c_v128 a,c_v128 b)304 SIMD_INLINE c_v128 c_v128_sub_32(c_v128 a, c_v128 b) {
305   return c_v128_from_v64(c_v64_sub_32(a.v64[1], b.v64[1]),
306                          c_v64_sub_32(a.v64[0], b.v64[0]));
307 }
308 
c_v128_sub_64(c_v128 a,c_v128 b)309 SIMD_INLINE c_v128 c_v128_sub_64(c_v128 a, c_v128 b) {
310   // Two complement underflow (silences sanitizers)
311   return c_v128_from_64(
312       a.v64[1].u64 < b.v64[1].u64 ? a.v64[1].u64 + ~b.v64[1].u64 + 1
313                                   : a.v64[1].u64 - b.v64[1].u64,
314       a.v64[0].u64 < b.v64[0].u64 ? a.v64[0].u64 + ~b.v64[0].u64 + 1
315                                   : a.v64[0].u64 - b.v64[0].u64);
316 }
317 
c_v128_abs_s16(c_v128 a)318 SIMD_INLINE c_v128 c_v128_abs_s16(c_v128 a) {
319   return c_v128_from_v64(c_v64_abs_s16(a.v64[1]), c_v64_abs_s16(a.v64[0]));
320 }
321 
c_v128_abs_s8(c_v128 a)322 SIMD_INLINE c_v128 c_v128_abs_s8(c_v128 a) {
323   return c_v128_from_v64(c_v64_abs_s8(a.v64[1]), c_v64_abs_s8(a.v64[0]));
324 }
325 
c_v128_mul_s16(c_v64 a,c_v64 b)326 SIMD_INLINE c_v128 c_v128_mul_s16(c_v64 a, c_v64 b) {
327   c_v64 lo_bits = c_v64_mullo_s16(a, b);
328   c_v64 hi_bits = c_v64_mulhi_s16(a, b);
329   return c_v128_from_v64(c_v64_ziphi_16(hi_bits, lo_bits),
330                          c_v64_ziplo_16(hi_bits, lo_bits));
331 }
332 
c_v128_mullo_s16(c_v128 a,c_v128 b)333 SIMD_INLINE c_v128 c_v128_mullo_s16(c_v128 a, c_v128 b) {
334   return c_v128_from_v64(c_v64_mullo_s16(a.v64[1], b.v64[1]),
335                          c_v64_mullo_s16(a.v64[0], b.v64[0]));
336 }
337 
c_v128_mulhi_s16(c_v128 a,c_v128 b)338 SIMD_INLINE c_v128 c_v128_mulhi_s16(c_v128 a, c_v128 b) {
339   return c_v128_from_v64(c_v64_mulhi_s16(a.v64[1], b.v64[1]),
340                          c_v64_mulhi_s16(a.v64[0], b.v64[0]));
341 }
342 
c_v128_mullo_s32(c_v128 a,c_v128 b)343 SIMD_INLINE c_v128 c_v128_mullo_s32(c_v128 a, c_v128 b) {
344   return c_v128_from_v64(c_v64_mullo_s32(a.v64[1], b.v64[1]),
345                          c_v64_mullo_s32(a.v64[0], b.v64[0]));
346 }
347 
c_v128_madd_s16(c_v128 a,c_v128 b)348 SIMD_INLINE c_v128 c_v128_madd_s16(c_v128 a, c_v128 b) {
349   return c_v128_from_v64(c_v64_madd_s16(a.v64[1], b.v64[1]),
350                          c_v64_madd_s16(a.v64[0], b.v64[0]));
351 }
352 
c_v128_madd_us8(c_v128 a,c_v128 b)353 SIMD_INLINE c_v128 c_v128_madd_us8(c_v128 a, c_v128 b) {
354   return c_v128_from_v64(c_v64_madd_us8(a.v64[1], b.v64[1]),
355                          c_v64_madd_us8(a.v64[0], b.v64[0]));
356 }
357 
c_v128_avg_u8(c_v128 a,c_v128 b)358 SIMD_INLINE c_v128 c_v128_avg_u8(c_v128 a, c_v128 b) {
359   return c_v128_from_v64(c_v64_avg_u8(a.v64[1], b.v64[1]),
360                          c_v64_avg_u8(a.v64[0], b.v64[0]));
361 }
362 
c_v128_rdavg_u8(c_v128 a,c_v128 b)363 SIMD_INLINE c_v128 c_v128_rdavg_u8(c_v128 a, c_v128 b) {
364   return c_v128_from_v64(c_v64_rdavg_u8(a.v64[1], b.v64[1]),
365                          c_v64_rdavg_u8(a.v64[0], b.v64[0]));
366 }
367 
c_v128_rdavg_u16(c_v128 a,c_v128 b)368 SIMD_INLINE c_v128 c_v128_rdavg_u16(c_v128 a, c_v128 b) {
369   return c_v128_from_v64(c_v64_rdavg_u16(a.v64[1], b.v64[1]),
370                          c_v64_rdavg_u16(a.v64[0], b.v64[0]));
371 }
372 
c_v128_avg_u16(c_v128 a,c_v128 b)373 SIMD_INLINE c_v128 c_v128_avg_u16(c_v128 a, c_v128 b) {
374   return c_v128_from_v64(c_v64_avg_u16(a.v64[1], b.v64[1]),
375                          c_v64_avg_u16(a.v64[0], b.v64[0]));
376 }
377 
c_v128_min_u8(c_v128 a,c_v128 b)378 SIMD_INLINE c_v128 c_v128_min_u8(c_v128 a, c_v128 b) {
379   return c_v128_from_v64(c_v64_min_u8(a.v64[1], b.v64[1]),
380                          c_v64_min_u8(a.v64[0], b.v64[0]));
381 }
382 
c_v128_max_u8(c_v128 a,c_v128 b)383 SIMD_INLINE c_v128 c_v128_max_u8(c_v128 a, c_v128 b) {
384   return c_v128_from_v64(c_v64_max_u8(a.v64[1], b.v64[1]),
385                          c_v64_max_u8(a.v64[0], b.v64[0]));
386 }
387 
c_v128_min_s8(c_v128 a,c_v128 b)388 SIMD_INLINE c_v128 c_v128_min_s8(c_v128 a, c_v128 b) {
389   return c_v128_from_v64(c_v64_min_s8(a.v64[1], b.v64[1]),
390                          c_v64_min_s8(a.v64[0], b.v64[0]));
391 }
392 
c_v128_movemask_8(c_v128 a)393 SIMD_INLINE uint32_t c_v128_movemask_8(c_v128 a) {
394   return ((a.s8[15] < 0) << 15) | ((a.s8[14] < 0) << 14) |
395          ((a.s8[13] < 0) << 13) | ((a.s8[12] < 0) << 12) |
396          ((a.s8[11] < 0) << 11) | ((a.s8[10] < 0) << 10) |
397          ((a.s8[9] < 0) << 9) | ((a.s8[8] < 0) << 8) | ((a.s8[7] < 0) << 7) |
398          ((a.s8[6] < 0) << 6) | ((a.s8[5] < 0) << 5) | ((a.s8[4] < 0) << 4) |
399          ((a.s8[3] < 0) << 3) | ((a.s8[2] < 0) << 2) | ((a.s8[1] < 0) << 1) |
400          ((a.s8[0] < 0) << 0);
401 }
402 
c_v128_blend_8(c_v128 a,c_v128 b,c_v128 c)403 SIMD_INLINE c_v128 c_v128_blend_8(c_v128 a, c_v128 b, c_v128 c) {
404   c_v128 t;
405   for (int i = 0; i < 16; i++) t.u8[i] = c.s8[i] < 0 ? b.u8[i] : a.u8[i];
406   return t;
407 }
408 
c_v128_max_s8(c_v128 a,c_v128 b)409 SIMD_INLINE c_v128 c_v128_max_s8(c_v128 a, c_v128 b) {
410   return c_v128_from_v64(c_v64_max_s8(a.v64[1], b.v64[1]),
411                          c_v64_max_s8(a.v64[0], b.v64[0]));
412 }
413 
c_v128_min_s16(c_v128 a,c_v128 b)414 SIMD_INLINE c_v128 c_v128_min_s16(c_v128 a, c_v128 b) {
415   return c_v128_from_v64(c_v64_min_s16(a.v64[1], b.v64[1]),
416                          c_v64_min_s16(a.v64[0], b.v64[0]));
417 }
418 
c_v128_max_s16(c_v128 a,c_v128 b)419 SIMD_INLINE c_v128 c_v128_max_s16(c_v128 a, c_v128 b) {
420   return c_v128_from_v64(c_v64_max_s16(a.v64[1], b.v64[1]),
421                          c_v64_max_s16(a.v64[0], b.v64[0]));
422 }
423 
c_v128_max_s32(c_v128 a,c_v128 b)424 SIMD_INLINE c_v128 c_v128_max_s32(c_v128 a, c_v128 b) {
425   c_v128 t;
426   int c;
427   for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? a.s32[c] : b.s32[c];
428   return t;
429 }
430 
c_v128_min_s32(c_v128 a,c_v128 b)431 SIMD_INLINE c_v128 c_v128_min_s32(c_v128 a, c_v128 b) {
432   c_v128 t;
433   int c;
434   for (c = 0; c < 4; c++) t.s32[c] = a.s32[c] > b.s32[c] ? b.s32[c] : a.s32[c];
435   return t;
436 }
437 
c_v128_ziplo_8(c_v128 a,c_v128 b)438 SIMD_INLINE c_v128 c_v128_ziplo_8(c_v128 a, c_v128 b) {
439   return c_v128_from_v64(c_v64_ziphi_8(a.v64[0], b.v64[0]),
440                          c_v64_ziplo_8(a.v64[0], b.v64[0]));
441 }
442 
c_v128_ziphi_8(c_v128 a,c_v128 b)443 SIMD_INLINE c_v128 c_v128_ziphi_8(c_v128 a, c_v128 b) {
444   return c_v128_from_v64(c_v64_ziphi_8(a.v64[1], b.v64[1]),
445                          c_v64_ziplo_8(a.v64[1], b.v64[1]));
446 }
447 
c_v128_ziplo_16(c_v128 a,c_v128 b)448 SIMD_INLINE c_v128 c_v128_ziplo_16(c_v128 a, c_v128 b) {
449   return c_v128_from_v64(c_v64_ziphi_16(a.v64[0], b.v64[0]),
450                          c_v64_ziplo_16(a.v64[0], b.v64[0]));
451 }
452 
c_v128_ziphi_16(c_v128 a,c_v128 b)453 SIMD_INLINE c_v128 c_v128_ziphi_16(c_v128 a, c_v128 b) {
454   return c_v128_from_v64(c_v64_ziphi_16(a.v64[1], b.v64[1]),
455                          c_v64_ziplo_16(a.v64[1], b.v64[1]));
456 }
457 
c_v128_ziplo_32(c_v128 a,c_v128 b)458 SIMD_INLINE c_v128 c_v128_ziplo_32(c_v128 a, c_v128 b) {
459   return c_v128_from_v64(c_v64_ziphi_32(a.v64[0], b.v64[0]),
460                          c_v64_ziplo_32(a.v64[0], b.v64[0]));
461 }
462 
c_v128_ziphi_32(c_v128 a,c_v128 b)463 SIMD_INLINE c_v128 c_v128_ziphi_32(c_v128 a, c_v128 b) {
464   return c_v128_from_v64(c_v64_ziphi_32(a.v64[1], b.v64[1]),
465                          c_v64_ziplo_32(a.v64[1], b.v64[1]));
466 }
467 
c_v128_ziplo_64(c_v128 a,c_v128 b)468 SIMD_INLINE c_v128 c_v128_ziplo_64(c_v128 a, c_v128 b) {
469   return c_v128_from_v64(a.v64[0], b.v64[0]);
470 }
471 
c_v128_ziphi_64(c_v128 a,c_v128 b)472 SIMD_INLINE c_v128 c_v128_ziphi_64(c_v128 a, c_v128 b) {
473   return c_v128_from_v64(a.v64[1], b.v64[1]);
474 }
475 
c_v128_zip_8(c_v64 a,c_v64 b)476 SIMD_INLINE c_v128 c_v128_zip_8(c_v64 a, c_v64 b) {
477   return c_v128_from_v64(c_v64_ziphi_8(a, b), c_v64_ziplo_8(a, b));
478 }
479 
c_v128_zip_16(c_v64 a,c_v64 b)480 SIMD_INLINE c_v128 c_v128_zip_16(c_v64 a, c_v64 b) {
481   return c_v128_from_v64(c_v64_ziphi_16(a, b), c_v64_ziplo_16(a, b));
482 }
483 
c_v128_zip_32(c_v64 a,c_v64 b)484 SIMD_INLINE c_v128 c_v128_zip_32(c_v64 a, c_v64 b) {
485   return c_v128_from_v64(c_v64_ziphi_32(a, b), c_v64_ziplo_32(a, b));
486 }
487 
_c_v128_unzip_8(c_v128 a,c_v128 b,int mode)488 SIMD_INLINE c_v128 _c_v128_unzip_8(c_v128 a, c_v128 b, int mode) {
489   c_v128 t;
490   if (mode) {
491     t.u8[15] = b.u8[15];
492     t.u8[14] = b.u8[13];
493     t.u8[13] = b.u8[11];
494     t.u8[12] = b.u8[9];
495     t.u8[11] = b.u8[7];
496     t.u8[10] = b.u8[5];
497     t.u8[9] = b.u8[3];
498     t.u8[8] = b.u8[1];
499     t.u8[7] = a.u8[15];
500     t.u8[6] = a.u8[13];
501     t.u8[5] = a.u8[11];
502     t.u8[4] = a.u8[9];
503     t.u8[3] = a.u8[7];
504     t.u8[2] = a.u8[5];
505     t.u8[1] = a.u8[3];
506     t.u8[0] = a.u8[1];
507   } else {
508     t.u8[15] = a.u8[14];
509     t.u8[14] = a.u8[12];
510     t.u8[13] = a.u8[10];
511     t.u8[12] = a.u8[8];
512     t.u8[11] = a.u8[6];
513     t.u8[10] = a.u8[4];
514     t.u8[9] = a.u8[2];
515     t.u8[8] = a.u8[0];
516     t.u8[7] = b.u8[14];
517     t.u8[6] = b.u8[12];
518     t.u8[5] = b.u8[10];
519     t.u8[4] = b.u8[8];
520     t.u8[3] = b.u8[6];
521     t.u8[2] = b.u8[4];
522     t.u8[1] = b.u8[2];
523     t.u8[0] = b.u8[0];
524   }
525   return t;
526 }
527 
c_v128_unziplo_8(c_v128 a,c_v128 b)528 SIMD_INLINE c_v128 c_v128_unziplo_8(c_v128 a, c_v128 b) {
529   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(a, b, 1)
530                            : _c_v128_unzip_8(a, b, 0);
531 }
532 
c_v128_unziphi_8(c_v128 a,c_v128 b)533 SIMD_INLINE c_v128 c_v128_unziphi_8(c_v128 a, c_v128 b) {
534   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_8(b, a, 0)
535                            : _c_v128_unzip_8(b, a, 1);
536 }
537 
_c_v128_unzip_16(c_v128 a,c_v128 b,int mode)538 SIMD_INLINE c_v128 _c_v128_unzip_16(c_v128 a, c_v128 b, int mode) {
539   c_v128 t;
540   if (mode) {
541     t.u16[7] = b.u16[7];
542     t.u16[6] = b.u16[5];
543     t.u16[5] = b.u16[3];
544     t.u16[4] = b.u16[1];
545     t.u16[3] = a.u16[7];
546     t.u16[2] = a.u16[5];
547     t.u16[1] = a.u16[3];
548     t.u16[0] = a.u16[1];
549   } else {
550     t.u16[7] = a.u16[6];
551     t.u16[6] = a.u16[4];
552     t.u16[5] = a.u16[2];
553     t.u16[4] = a.u16[0];
554     t.u16[3] = b.u16[6];
555     t.u16[2] = b.u16[4];
556     t.u16[1] = b.u16[2];
557     t.u16[0] = b.u16[0];
558   }
559   return t;
560 }
561 
c_v128_unziplo_16(c_v128 a,c_v128 b)562 SIMD_INLINE c_v128 c_v128_unziplo_16(c_v128 a, c_v128 b) {
563   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(a, b, 1)
564                            : _c_v128_unzip_16(a, b, 0);
565 }
566 
c_v128_unziphi_16(c_v128 a,c_v128 b)567 SIMD_INLINE c_v128 c_v128_unziphi_16(c_v128 a, c_v128 b) {
568   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_16(b, a, 0)
569                            : _c_v128_unzip_16(b, a, 1);
570 }
571 
_c_v128_unzip_32(c_v128 a,c_v128 b,int mode)572 SIMD_INLINE c_v128 _c_v128_unzip_32(c_v128 a, c_v128 b, int mode) {
573   c_v128 t;
574   if (mode) {
575     t.u32[3] = b.u32[3];
576     t.u32[2] = b.u32[1];
577     t.u32[1] = a.u32[3];
578     t.u32[0] = a.u32[1];
579   } else {
580     t.u32[3] = a.u32[2];
581     t.u32[2] = a.u32[0];
582     t.u32[1] = b.u32[2];
583     t.u32[0] = b.u32[0];
584   }
585   return t;
586 }
587 
c_v128_unziplo_32(c_v128 a,c_v128 b)588 SIMD_INLINE c_v128 c_v128_unziplo_32(c_v128 a, c_v128 b) {
589   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(a, b, 1)
590                            : _c_v128_unzip_32(a, b, 0);
591 }
592 
c_v128_unziphi_32(c_v128 a,c_v128 b)593 SIMD_INLINE c_v128 c_v128_unziphi_32(c_v128 a, c_v128 b) {
594   return CONFIG_BIG_ENDIAN ? _c_v128_unzip_32(b, a, 0)
595                            : _c_v128_unzip_32(b, a, 1);
596 }
597 
c_v128_unpack_u8_s16(c_v64 a)598 SIMD_INLINE c_v128 c_v128_unpack_u8_s16(c_v64 a) {
599   return c_v128_from_v64(c_v64_unpackhi_u8_s16(a), c_v64_unpacklo_u8_s16(a));
600 }
601 
c_v128_unpacklo_u8_s16(c_v128 a)602 SIMD_INLINE c_v128 c_v128_unpacklo_u8_s16(c_v128 a) {
603   return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[0]),
604                          c_v64_unpacklo_u8_s16(a.v64[0]));
605 }
606 
c_v128_unpackhi_u8_s16(c_v128 a)607 SIMD_INLINE c_v128 c_v128_unpackhi_u8_s16(c_v128 a) {
608   return c_v128_from_v64(c_v64_unpackhi_u8_s16(a.v64[1]),
609                          c_v64_unpacklo_u8_s16(a.v64[1]));
610 }
611 
c_v128_unpack_s8_s16(c_v64 a)612 SIMD_INLINE c_v128 c_v128_unpack_s8_s16(c_v64 a) {
613   return c_v128_from_v64(c_v64_unpackhi_s8_s16(a), c_v64_unpacklo_s8_s16(a));
614 }
615 
c_v128_unpacklo_s8_s16(c_v128 a)616 SIMD_INLINE c_v128 c_v128_unpacklo_s8_s16(c_v128 a) {
617   return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[0]),
618                          c_v64_unpacklo_s8_s16(a.v64[0]));
619 }
620 
c_v128_unpackhi_s8_s16(c_v128 a)621 SIMD_INLINE c_v128 c_v128_unpackhi_s8_s16(c_v128 a) {
622   return c_v128_from_v64(c_v64_unpackhi_s8_s16(a.v64[1]),
623                          c_v64_unpacklo_s8_s16(a.v64[1]));
624 }
625 
c_v128_pack_s32_s16(c_v128 a,c_v128 b)626 SIMD_INLINE c_v128 c_v128_pack_s32_s16(c_v128 a, c_v128 b) {
627   return c_v128_from_v64(c_v64_pack_s32_s16(a.v64[1], a.v64[0]),
628                          c_v64_pack_s32_s16(b.v64[1], b.v64[0]));
629 }
630 
c_v128_pack_s32_u16(c_v128 a,c_v128 b)631 SIMD_INLINE c_v128 c_v128_pack_s32_u16(c_v128 a, c_v128 b) {
632   return c_v128_from_v64(c_v64_pack_s32_u16(a.v64[1], a.v64[0]),
633                          c_v64_pack_s32_u16(b.v64[1], b.v64[0]));
634 }
635 
c_v128_pack_s16_u8(c_v128 a,c_v128 b)636 SIMD_INLINE c_v128 c_v128_pack_s16_u8(c_v128 a, c_v128 b) {
637   return c_v128_from_v64(c_v64_pack_s16_u8(a.v64[1], a.v64[0]),
638                          c_v64_pack_s16_u8(b.v64[1], b.v64[0]));
639 }
640 
c_v128_pack_s16_s8(c_v128 a,c_v128 b)641 SIMD_INLINE c_v128 c_v128_pack_s16_s8(c_v128 a, c_v128 b) {
642   return c_v128_from_v64(c_v64_pack_s16_s8(a.v64[1], a.v64[0]),
643                          c_v64_pack_s16_s8(b.v64[1], b.v64[0]));
644 }
645 
c_v128_unpack_u16_s32(c_v64 a)646 SIMD_INLINE c_v128 c_v128_unpack_u16_s32(c_v64 a) {
647   return c_v128_from_v64(c_v64_unpackhi_u16_s32(a), c_v64_unpacklo_u16_s32(a));
648 }
649 
c_v128_unpack_s16_s32(c_v64 a)650 SIMD_INLINE c_v128 c_v128_unpack_s16_s32(c_v64 a) {
651   return c_v128_from_v64(c_v64_unpackhi_s16_s32(a), c_v64_unpacklo_s16_s32(a));
652 }
653 
c_v128_unpacklo_u16_s32(c_v128 a)654 SIMD_INLINE c_v128 c_v128_unpacklo_u16_s32(c_v128 a) {
655   return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[0]),
656                          c_v64_unpacklo_u16_s32(a.v64[0]));
657 }
658 
c_v128_unpacklo_s16_s32(c_v128 a)659 SIMD_INLINE c_v128 c_v128_unpacklo_s16_s32(c_v128 a) {
660   return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[0]),
661                          c_v64_unpacklo_s16_s32(a.v64[0]));
662 }
663 
c_v128_unpackhi_u16_s32(c_v128 a)664 SIMD_INLINE c_v128 c_v128_unpackhi_u16_s32(c_v128 a) {
665   return c_v128_from_v64(c_v64_unpackhi_u16_s32(a.v64[1]),
666                          c_v64_unpacklo_u16_s32(a.v64[1]));
667 }
668 
c_v128_unpackhi_s16_s32(c_v128 a)669 SIMD_INLINE c_v128 c_v128_unpackhi_s16_s32(c_v128 a) {
670   return c_v128_from_v64(c_v64_unpackhi_s16_s32(a.v64[1]),
671                          c_v64_unpacklo_s16_s32(a.v64[1]));
672 }
673 
c_v128_shuffle_8(c_v128 a,c_v128 pattern)674 SIMD_INLINE c_v128 c_v128_shuffle_8(c_v128 a, c_v128 pattern) {
675   c_v128 t;
676   int c;
677   for (c = 0; c < 16; c++)
678     t.u8[c] = a.u8[CONFIG_BIG_ENDIAN ? 15 - (pattern.u8[c] & 15)
679                                      : pattern.u8[c] & 15];
680 
681   return t;
682 }
683 
c_v128_cmpgt_s8(c_v128 a,c_v128 b)684 SIMD_INLINE c_v128 c_v128_cmpgt_s8(c_v128 a, c_v128 b) {
685   return c_v128_from_v64(c_v64_cmpgt_s8(a.v64[1], b.v64[1]),
686                          c_v64_cmpgt_s8(a.v64[0], b.v64[0]));
687 }
688 
c_v128_cmplt_s8(c_v128 a,c_v128 b)689 SIMD_INLINE c_v128 c_v128_cmplt_s8(c_v128 a, c_v128 b) {
690   return c_v128_from_v64(c_v64_cmplt_s8(a.v64[1], b.v64[1]),
691                          c_v64_cmplt_s8(a.v64[0], b.v64[0]));
692 }
693 
c_v128_cmpeq_8(c_v128 a,c_v128 b)694 SIMD_INLINE c_v128 c_v128_cmpeq_8(c_v128 a, c_v128 b) {
695   return c_v128_from_v64(c_v64_cmpeq_8(a.v64[1], b.v64[1]),
696                          c_v64_cmpeq_8(a.v64[0], b.v64[0]));
697 }
698 
c_v128_cmpgt_s16(c_v128 a,c_v128 b)699 SIMD_INLINE c_v128 c_v128_cmpgt_s16(c_v128 a, c_v128 b) {
700   return c_v128_from_v64(c_v64_cmpgt_s16(a.v64[1], b.v64[1]),
701                          c_v64_cmpgt_s16(a.v64[0], b.v64[0]));
702 }
703 
c_v128_cmplt_s16(c_v128 a,c_v128 b)704 SIMD_INLINE c_v128 c_v128_cmplt_s16(c_v128 a, c_v128 b) {
705   return c_v128_from_v64(c_v64_cmplt_s16(a.v64[1], b.v64[1]),
706                          c_v64_cmplt_s16(a.v64[0], b.v64[0]));
707 }
708 
c_v128_cmpeq_16(c_v128 a,c_v128 b)709 SIMD_INLINE c_v128 c_v128_cmpeq_16(c_v128 a, c_v128 b) {
710   return c_v128_from_v64(c_v64_cmpeq_16(a.v64[1], b.v64[1]),
711                          c_v64_cmpeq_16(a.v64[0], b.v64[0]));
712 }
713 
c_v128_cmpgt_s32(c_v128 a,c_v128 b)714 SIMD_INLINE c_v128 c_v128_cmpgt_s32(c_v128 a, c_v128 b) {
715   c_v128 t;
716   int c;
717   for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] > b.s32[c]);
718   return t;
719 }
720 
c_v128_cmplt_s32(c_v128 a,c_v128 b)721 SIMD_INLINE c_v128 c_v128_cmplt_s32(c_v128 a, c_v128 b) {
722   c_v128 t;
723   int c;
724   for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] < b.s32[c]);
725   return t;
726 }
727 
c_v128_cmpeq_32(c_v128 a,c_v128 b)728 SIMD_INLINE c_v128 c_v128_cmpeq_32(c_v128 a, c_v128 b) {
729   c_v128 t;
730   int c;
731   for (c = 0; c < 4; c++) t.s32[c] = -(a.s32[c] == b.s32[c]);
732   return t;
733 }
734 
c_v128_shl_n_byte(c_v128 a,const unsigned int n)735 SIMD_INLINE c_v128 c_v128_shl_n_byte(c_v128 a, const unsigned int n) {
736   if (n == 0) return a;
737   if (n < 8)
738     return c_v128_from_v64(c_v64_or(c_v64_shl_n_byte(a.v64[1], n),
739                                     c_v64_shr_n_byte(a.v64[0], 8 - n)),
740                            c_v64_shl_n_byte(a.v64[0], n));
741   else
742     return c_v128_from_v64(c_v64_shl_n_byte(a.v64[0], n - 8), c_v64_zero());
743 }
744 
c_v128_shr_n_byte(c_v128 a,const unsigned int n)745 SIMD_INLINE c_v128 c_v128_shr_n_byte(c_v128 a, const unsigned int n) {
746   if (n == 0) return a;
747   if (n < 8)
748     return c_v128_from_v64(c_v64_shr_n_byte(a.v64[1], n),
749                            c_v64_or(c_v64_shr_n_byte(a.v64[0], n),
750                                     c_v64_shl_n_byte(a.v64[1], 8 - n)));
751   else
752     return c_v128_from_v64(c_v64_zero(), c_v64_shr_n_byte(a.v64[1], n - 8));
753 }
754 
c_v128_align(c_v128 a,c_v128 b,const unsigned int c)755 SIMD_INLINE c_v128 c_v128_align(c_v128 a, c_v128 b, const unsigned int c) {
756   if (SIMD_CHECK && c > 15) {
757     fprintf(stderr, "Error: undefined alignment %d\n", c);
758     abort();
759   }
760   return c ? c_v128_or(c_v128_shr_n_byte(b, c), c_v128_shl_n_byte(a, 16 - c))
761            : b;
762 }
763 
c_v128_shl_8(c_v128 a,const unsigned int c)764 SIMD_INLINE c_v128 c_v128_shl_8(c_v128 a, const unsigned int c) {
765   return c_v128_from_v64(c_v64_shl_8(a.v64[1], c), c_v64_shl_8(a.v64[0], c));
766 }
767 
c_v128_shr_u8(c_v128 a,const unsigned int c)768 SIMD_INLINE c_v128 c_v128_shr_u8(c_v128 a, const unsigned int c) {
769   return c_v128_from_v64(c_v64_shr_u8(a.v64[1], c), c_v64_shr_u8(a.v64[0], c));
770 }
771 
c_v128_shr_s8(c_v128 a,const unsigned int c)772 SIMD_INLINE c_v128 c_v128_shr_s8(c_v128 a, const unsigned int c) {
773   return c_v128_from_v64(c_v64_shr_s8(a.v64[1], c), c_v64_shr_s8(a.v64[0], c));
774 }
775 
c_v128_shl_16(c_v128 a,const unsigned int c)776 SIMD_INLINE c_v128 c_v128_shl_16(c_v128 a, const unsigned int c) {
777   return c_v128_from_v64(c_v64_shl_16(a.v64[1], c), c_v64_shl_16(a.v64[0], c));
778 }
779 
c_v128_shr_u16(c_v128 a,const unsigned int c)780 SIMD_INLINE c_v128 c_v128_shr_u16(c_v128 a, const unsigned int c) {
781   return c_v128_from_v64(c_v64_shr_u16(a.v64[1], c),
782                          c_v64_shr_u16(a.v64[0], c));
783 }
784 
c_v128_shr_s16(c_v128 a,const unsigned int c)785 SIMD_INLINE c_v128 c_v128_shr_s16(c_v128 a, const unsigned int c) {
786   return c_v128_from_v64(c_v64_shr_s16(a.v64[1], c),
787                          c_v64_shr_s16(a.v64[0], c));
788 }
789 
c_v128_shl_32(c_v128 a,const unsigned int c)790 SIMD_INLINE c_v128 c_v128_shl_32(c_v128 a, const unsigned int c) {
791   return c_v128_from_v64(c_v64_shl_32(a.v64[1], c), c_v64_shl_32(a.v64[0], c));
792 }
793 
c_v128_shr_u32(c_v128 a,const unsigned int c)794 SIMD_INLINE c_v128 c_v128_shr_u32(c_v128 a, const unsigned int c) {
795   return c_v128_from_v64(c_v64_shr_u32(a.v64[1], c),
796                          c_v64_shr_u32(a.v64[0], c));
797 }
798 
c_v128_shr_s32(c_v128 a,const unsigned int c)799 SIMD_INLINE c_v128 c_v128_shr_s32(c_v128 a, const unsigned int c) {
800   return c_v128_from_v64(c_v64_shr_s32(a.v64[1], c),
801                          c_v64_shr_s32(a.v64[0], c));
802 }
803 
c_v128_shl_64(c_v128 a,const unsigned int c)804 SIMD_INLINE c_v128 c_v128_shl_64(c_v128 a, const unsigned int c) {
805   a.v64[1].u64 <<= c;
806   a.v64[0].u64 <<= c;
807   return c_v128_from_v64(a.v64[1], a.v64[0]);
808 }
809 
c_v128_shr_u64(c_v128 a,const unsigned int c)810 SIMD_INLINE c_v128 c_v128_shr_u64(c_v128 a, const unsigned int c) {
811   a.v64[1].u64 >>= c;
812   a.v64[0].u64 >>= c;
813   return c_v128_from_v64(a.v64[1], a.v64[0]);
814 }
815 
c_v128_shr_s64(c_v128 a,const unsigned int c)816 SIMD_INLINE c_v128 c_v128_shr_s64(c_v128 a, const unsigned int c) {
817   a.v64[1].s64 >>= c;
818   a.v64[0].s64 >>= c;
819   return c_v128_from_v64(a.v64[1], a.v64[0]);
820 }
821 
c_v128_shl_n_8(c_v128 a,const unsigned int n)822 SIMD_INLINE c_v128 c_v128_shl_n_8(c_v128 a, const unsigned int n) {
823   return c_v128_shl_8(a, n);
824 }
825 
c_v128_shl_n_16(c_v128 a,const unsigned int n)826 SIMD_INLINE c_v128 c_v128_shl_n_16(c_v128 a, const unsigned int n) {
827   return c_v128_shl_16(a, n);
828 }
829 
c_v128_shl_n_32(c_v128 a,const unsigned int n)830 SIMD_INLINE c_v128 c_v128_shl_n_32(c_v128 a, const unsigned int n) {
831   return c_v128_shl_32(a, n);
832 }
833 
c_v128_shl_n_64(c_v128 a,const unsigned int n)834 SIMD_INLINE c_v128 c_v128_shl_n_64(c_v128 a, const unsigned int n) {
835   return c_v128_shl_64(a, n);
836 }
837 
c_v128_shr_n_u8(c_v128 a,const unsigned int n)838 SIMD_INLINE c_v128 c_v128_shr_n_u8(c_v128 a, const unsigned int n) {
839   return c_v128_shr_u8(a, n);
840 }
841 
c_v128_shr_n_u16(c_v128 a,const unsigned int n)842 SIMD_INLINE c_v128 c_v128_shr_n_u16(c_v128 a, const unsigned int n) {
843   return c_v128_shr_u16(a, n);
844 }
845 
c_v128_shr_n_u32(c_v128 a,const unsigned int n)846 SIMD_INLINE c_v128 c_v128_shr_n_u32(c_v128 a, const unsigned int n) {
847   return c_v128_shr_u32(a, n);
848 }
849 
c_v128_shr_n_u64(c_v128 a,const unsigned int n)850 SIMD_INLINE c_v128 c_v128_shr_n_u64(c_v128 a, const unsigned int n) {
851   return c_v128_shr_u64(a, n);
852 }
853 
c_v128_shr_n_s8(c_v128 a,const unsigned int n)854 SIMD_INLINE c_v128 c_v128_shr_n_s8(c_v128 a, const unsigned int n) {
855   return c_v128_shr_s8(a, n);
856 }
857 
c_v128_shr_n_s16(c_v128 a,const unsigned int n)858 SIMD_INLINE c_v128 c_v128_shr_n_s16(c_v128 a, const unsigned int n) {
859   return c_v128_shr_s16(a, n);
860 }
861 
c_v128_shr_n_s32(c_v128 a,const unsigned int n)862 SIMD_INLINE c_v128 c_v128_shr_n_s32(c_v128 a, const unsigned int n) {
863   return c_v128_shr_s32(a, n);
864 }
865 
c_v128_shr_n_s64(c_v128 a,const unsigned int n)866 SIMD_INLINE c_v128 c_v128_shr_n_s64(c_v128 a, const unsigned int n) {
867   return c_v128_shr_s64(a, n);
868 }
869 
870 typedef uint32_t c_sad128_internal_u16;
871 
c_v128_sad_u16_init(void)872 SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16_init(void) { return 0; }
873 
874 /* Implementation dependent return value.  Result must be finalised with
875  * v128_sad_u16_sum(). */
c_v128_sad_u16(c_sad128_internal_u16 s,c_v128 a,c_v128 b)876 SIMD_INLINE c_sad128_internal_u16 c_v128_sad_u16(c_sad128_internal_u16 s,
877                                                  c_v128 a, c_v128 b) {
878   int c;
879   for (c = 0; c < 8; c++)
880     s += a.u16[c] > b.u16[c] ? a.u16[c] - b.u16[c] : b.u16[c] - a.u16[c];
881   return s;
882 }
883 
c_v128_sad_u16_sum(c_sad128_internal_u16 s)884 SIMD_INLINE uint32_t c_v128_sad_u16_sum(c_sad128_internal_u16 s) { return s; }
885 
886 typedef uint64_t c_ssd128_internal_s16;
887 
c_v128_ssd_s16_init(void)888 SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16_init(void) { return 0; }
889 
890 /* Implementation dependent return value.  Result must be finalised with
891  * v128_ssd_s16_sum(). */
c_v128_ssd_s16(c_ssd128_internal_s16 s,c_v128 a,c_v128 b)892 SIMD_INLINE c_ssd128_internal_s16 c_v128_ssd_s16(c_ssd128_internal_s16 s,
893                                                  c_v128 a, c_v128 b) {
894   int c;
895   for (c = 0; c < 8; c++)
896     s += (int32_t)(int16_t)(a.s16[c] - b.s16[c]) *
897          (int32_t)(int16_t)(a.s16[c] - b.s16[c]);
898   return s;
899 }
900 
c_v128_ssd_s16_sum(c_ssd128_internal_s16 s)901 SIMD_INLINE uint64_t c_v128_ssd_s16_sum(c_ssd128_internal_s16 s) { return s; }
902 
903 #endif  // AOM_AOM_DSP_SIMD_V128_INTRINSICS_C_H_
904