• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #define SIMD_CHECK 1
13 #include "third_party/googletest/src/googletest/include/gtest/gtest.h"
14 #include "test/clear_system_state.h"
15 #include "test/register_state_check.h"
16 #include "aom_dsp/aom_simd_inline.h"
17 #include "aom_dsp/simd/v256_intrinsics_c.h"
18 
19 namespace SIMD_NAMESPACE {
20 
21 template <typename param_signature>
22 class TestIntrinsic : public ::testing::TestWithParam<param_signature> {
23  public:
~TestIntrinsic()24   virtual ~TestIntrinsic() {}
SetUp()25   virtual void SetUp() {
26     mask = ::testing::get<0>(this->GetParam());
27     maskwidth = ::testing::get<1>(this->GetParam());
28     name = ::testing::get<2>(this->GetParam());
29   }
30 
TearDown()31   virtual void TearDown() { libaom_test::ClearSystemState(); }
32 
33  protected:
34   uint32_t mask, maskwidth;
35   const char *name;
36 };
37 
38 // Create one typedef for each function signature
39 #define TYPEDEF_SIMD(name)                                                    \
40   typedef TestIntrinsic< ::testing::tuple<uint32_t, uint32_t, const char *> > \
41       ARCH_POSTFIX(name)
42 
43 TYPEDEF_SIMD(V64_U8);
44 TYPEDEF_SIMD(V64_U16);
45 TYPEDEF_SIMD(V64_U32);
46 TYPEDEF_SIMD(V64_V64);
47 TYPEDEF_SIMD(U32_V64);
48 TYPEDEF_SIMD(S32_V64);
49 TYPEDEF_SIMD(U64_V64);
50 TYPEDEF_SIMD(S64_V64);
51 TYPEDEF_SIMD(V64_U32U32);
52 TYPEDEF_SIMD(V64_V64V64);
53 TYPEDEF_SIMD(S64_V64V64);
54 TYPEDEF_SIMD(V64_V64U32);
55 TYPEDEF_SIMD(U32_V64V64);
56 TYPEDEF_SIMD(V128_V64);
57 TYPEDEF_SIMD(V128_V128);
58 TYPEDEF_SIMD(U32_V128);
59 TYPEDEF_SIMD(U64_V128);
60 TYPEDEF_SIMD(V64_V128);
61 TYPEDEF_SIMD(V128_U8);
62 TYPEDEF_SIMD(V128_U16);
63 TYPEDEF_SIMD(V128_U32);
64 TYPEDEF_SIMD(V128_U64);
65 TYPEDEF_SIMD(V128_U64U64);
66 TYPEDEF_SIMD(V128_V64V64);
67 TYPEDEF_SIMD(V128_V128V128);
68 TYPEDEF_SIMD(V128_V128V128V128);
69 TYPEDEF_SIMD(S64_V128V128);
70 TYPEDEF_SIMD(V128_V128U32);
71 TYPEDEF_SIMD(U32_V128V128);
72 TYPEDEF_SIMD(U64_V128V128);
73 TYPEDEF_SIMD(V256_V128);
74 TYPEDEF_SIMD(V256_V256);
75 TYPEDEF_SIMD(U64_V256);
76 TYPEDEF_SIMD(V256_V128V128);
77 TYPEDEF_SIMD(V256_V256V256);
78 TYPEDEF_SIMD(V256_V256V256V256);
79 TYPEDEF_SIMD(U64_V256V256);
80 TYPEDEF_SIMD(S64_V256V256);
81 TYPEDEF_SIMD(V256_V256U32);
82 TYPEDEF_SIMD(U32_V256V256);
83 TYPEDEF_SIMD(V256_U8);
84 TYPEDEF_SIMD(V256_U16);
85 TYPEDEF_SIMD(V256_U32);
86 TYPEDEF_SIMD(V256_U64);
87 TYPEDEF_SIMD(U32_V256);
88 TYPEDEF_SIMD(V64_V256);
89 
90 // Google Test allows up to 50 tests per case, so split the largest
91 typedef ARCH_POSTFIX(V64_V64) ARCH_POSTFIX(V64_V64_Part2);
92 typedef ARCH_POSTFIX(V64_V64V64) ARCH_POSTFIX(V64_V64V64_Part2);
93 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part2);
94 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part3);
95 typedef ARCH_POSTFIX(V128_V128) ARCH_POSTFIX(V128_V128_Part4);
96 typedef ARCH_POSTFIX(V128_V128V128) ARCH_POSTFIX(V128_V128V128_Part2);
97 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part2);
98 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part3);
99 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part4);
100 typedef ARCH_POSTFIX(V256_V256) ARCH_POSTFIX(V256_V256_Part5);
101 typedef ARCH_POSTFIX(V256_V256V256) ARCH_POSTFIX(V256_V256V256_Part2);
102 
103 // These functions are machine tuned located elsewhere
104 template <typename c_ret, typename c_arg>
105 void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
106                   const char *name);
107 
108 template <typename c_ret, typename c_arg1, typename c_arg2>
109 void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
110                    const char *name);
111 
112 template <typename c_ret, typename c_arg1, typename c_arg2, typename c_arg3>
113 void TestSimd3Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth,
114                    const char *name);
115 
116 const int kIterations = 65536;
117 
118 // Add a macro layer since TEST_P will quote the name so we need to
119 // expand it first with the prefix.
120 #define MY_TEST_P(name, test) TEST_P(name, test)
121 
MY_TEST_P(ARCH_POSTFIX (V64_U8),TestIntrinsics)122 MY_TEST_P(ARCH_POSTFIX(V64_U8), TestIntrinsics) {
123   TestSimd1Arg<c_v64, uint8_t>(kIterations, mask, maskwidth, name);
124 }
125 
MY_TEST_P(ARCH_POSTFIX (V64_U16),TestIntrinsics)126 MY_TEST_P(ARCH_POSTFIX(V64_U16), TestIntrinsics) {
127   TestSimd1Arg<c_v64, uint16_t>(kIterations, mask, maskwidth, name);
128 }
129 
MY_TEST_P(ARCH_POSTFIX (V64_U32),TestIntrinsics)130 MY_TEST_P(ARCH_POSTFIX(V64_U32), TestIntrinsics) {
131   TestSimd1Arg<c_v64, uint32_t>(kIterations, mask, maskwidth, name);
132 }
133 
MY_TEST_P(ARCH_POSTFIX (V64_V64),TestIntrinsics)134 MY_TEST_P(ARCH_POSTFIX(V64_V64), TestIntrinsics) {
135   TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
136 }
137 
MY_TEST_P(ARCH_POSTFIX (U64_V64),TestIntrinsics)138 MY_TEST_P(ARCH_POSTFIX(U64_V64), TestIntrinsics) {
139   TestSimd1Arg<uint64_t, c_v64>(kIterations, mask, maskwidth, name);
140 }
141 
MY_TEST_P(ARCH_POSTFIX (S64_V64),TestIntrinsics)142 MY_TEST_P(ARCH_POSTFIX(S64_V64), TestIntrinsics) {
143   TestSimd1Arg<int64_t, c_v64>(kIterations, mask, maskwidth, name);
144 }
145 
MY_TEST_P(ARCH_POSTFIX (U32_V64),TestIntrinsics)146 MY_TEST_P(ARCH_POSTFIX(U32_V64), TestIntrinsics) {
147   TestSimd1Arg<uint32_t, c_v64>(kIterations, mask, maskwidth, name);
148 }
149 
MY_TEST_P(ARCH_POSTFIX (S32_V64),TestIntrinsics)150 MY_TEST_P(ARCH_POSTFIX(S32_V64), TestIntrinsics) {
151   TestSimd1Arg<int32_t, c_v64>(kIterations, mask, maskwidth, name);
152 }
153 
MY_TEST_P(ARCH_POSTFIX (V64_U32U32),TestIntrinsics)154 MY_TEST_P(ARCH_POSTFIX(V64_U32U32), TestIntrinsics) {
155   TestSimd2Args<c_v64, uint32_t, uint32_t>(kIterations, mask, maskwidth, name);
156 }
157 
MY_TEST_P(ARCH_POSTFIX (V64_V64V64),TestIntrinsics)158 MY_TEST_P(ARCH_POSTFIX(V64_V64V64), TestIntrinsics) {
159   TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
160 }
161 
MY_TEST_P(ARCH_POSTFIX (S64_V64V64),TestIntrinsics)162 MY_TEST_P(ARCH_POSTFIX(S64_V64V64), TestIntrinsics) {
163   TestSimd2Args<int64_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
164 }
165 
MY_TEST_P(ARCH_POSTFIX (U32_V64V64),TestIntrinsics)166 MY_TEST_P(ARCH_POSTFIX(U32_V64V64), TestIntrinsics) {
167   TestSimd2Args<uint32_t, c_v64, c_v64>(kIterations, mask, maskwidth, name);
168 }
169 
MY_TEST_P(ARCH_POSTFIX (V64_V64U32),TestIntrinsics)170 MY_TEST_P(ARCH_POSTFIX(V64_V64U32), TestIntrinsics) {
171   TestSimd2Args<c_v64, c_v64, uint32_t>(kIterations, mask, maskwidth, name);
172 }
173 
174 // Google Test allows up to 50 tests per case, so split the largest
MY_TEST_P(ARCH_POSTFIX (V64_V64_Part2),TestIntrinsics)175 MY_TEST_P(ARCH_POSTFIX(V64_V64_Part2), TestIntrinsics) {
176   TestSimd1Arg<c_v64, c_v64>(kIterations, mask, maskwidth, name);
177 }
178 
MY_TEST_P(ARCH_POSTFIX (V64_V64V64_Part2),TestIntrinsics)179 MY_TEST_P(ARCH_POSTFIX(V64_V64V64_Part2), TestIntrinsics) {
180   TestSimd2Args<c_v64, c_v64, c_v64>(kIterations, mask, maskwidth, name);
181 }
182 
MY_TEST_P(ARCH_POSTFIX (U32_V128),TestIntrinsics)183 MY_TEST_P(ARCH_POSTFIX(U32_V128), TestIntrinsics) {
184   TestSimd1Arg<uint32_t, c_v128>(kIterations, mask, maskwidth, name);
185 }
186 
MY_TEST_P(ARCH_POSTFIX (U64_V128),TestIntrinsics)187 MY_TEST_P(ARCH_POSTFIX(U64_V128), TestIntrinsics) {
188   TestSimd1Arg<uint64_t, c_v128>(kIterations, mask, maskwidth, name);
189 }
190 
MY_TEST_P(ARCH_POSTFIX (V64_V128),TestIntrinsics)191 MY_TEST_P(ARCH_POSTFIX(V64_V128), TestIntrinsics) {
192   TestSimd1Arg<c_v64, c_v128>(kIterations, mask, maskwidth, name);
193 }
194 
MY_TEST_P(ARCH_POSTFIX (V128_V128),TestIntrinsics)195 MY_TEST_P(ARCH_POSTFIX(V128_V128), TestIntrinsics) {
196   TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
197 }
198 
MY_TEST_P(ARCH_POSTFIX (V128_U8),TestIntrinsics)199 MY_TEST_P(ARCH_POSTFIX(V128_U8), TestIntrinsics) {
200   TestSimd1Arg<c_v128, uint8_t>(kIterations, mask, maskwidth, name);
201 }
202 
MY_TEST_P(ARCH_POSTFIX (V128_U16),TestIntrinsics)203 MY_TEST_P(ARCH_POSTFIX(V128_U16), TestIntrinsics) {
204   TestSimd1Arg<c_v128, uint16_t>(kIterations, mask, maskwidth, name);
205 }
206 
MY_TEST_P(ARCH_POSTFIX (V128_U32),TestIntrinsics)207 MY_TEST_P(ARCH_POSTFIX(V128_U32), TestIntrinsics) {
208   TestSimd1Arg<c_v128, uint32_t>(kIterations, mask, maskwidth, name);
209 }
210 
MY_TEST_P(ARCH_POSTFIX (V128_U64),TestIntrinsics)211 MY_TEST_P(ARCH_POSTFIX(V128_U64), TestIntrinsics) {
212   TestSimd1Arg<c_v128, uint64_t>(kIterations, mask, maskwidth, name);
213 }
214 
MY_TEST_P(ARCH_POSTFIX (V128_V64),TestIntrinsics)215 MY_TEST_P(ARCH_POSTFIX(V128_V64), TestIntrinsics) {
216   TestSimd1Arg<c_v128, c_v64>(kIterations, mask, maskwidth, name);
217 }
218 
MY_TEST_P(ARCH_POSTFIX (V128_V128V128),TestIntrinsics)219 MY_TEST_P(ARCH_POSTFIX(V128_V128V128), TestIntrinsics) {
220   TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
221 }
222 
MY_TEST_P(ARCH_POSTFIX (V128_V128V128V128),TestIntrinsics)223 MY_TEST_P(ARCH_POSTFIX(V128_V128V128V128), TestIntrinsics) {
224   TestSimd3Args<c_v128, c_v128, c_v128, c_v128>(kIterations, mask, maskwidth,
225                                                 name);
226 }
227 
MY_TEST_P(ARCH_POSTFIX (U32_V128V128),TestIntrinsics)228 MY_TEST_P(ARCH_POSTFIX(U32_V128V128), TestIntrinsics) {
229   TestSimd2Args<uint32_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
230 }
231 
MY_TEST_P(ARCH_POSTFIX (U64_V128V128),TestIntrinsics)232 MY_TEST_P(ARCH_POSTFIX(U64_V128V128), TestIntrinsics) {
233   TestSimd2Args<uint64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
234 }
235 
MY_TEST_P(ARCH_POSTFIX (S64_V128V128),TestIntrinsics)236 MY_TEST_P(ARCH_POSTFIX(S64_V128V128), TestIntrinsics) {
237   TestSimd2Args<int64_t, c_v128, c_v128>(kIterations, mask, maskwidth, name);
238 }
239 
MY_TEST_P(ARCH_POSTFIX (V128_U64U64),TestIntrinsics)240 MY_TEST_P(ARCH_POSTFIX(V128_U64U64), TestIntrinsics) {
241   TestSimd2Args<c_v128, uint64_t, uint64_t>(kIterations, mask, maskwidth, name);
242 }
243 
MY_TEST_P(ARCH_POSTFIX (V128_V64V64),TestIntrinsics)244 MY_TEST_P(ARCH_POSTFIX(V128_V64V64), TestIntrinsics) {
245   TestSimd2Args<c_v128, c_v64, c_v64>(kIterations, mask, maskwidth, name);
246 }
247 
MY_TEST_P(ARCH_POSTFIX (V128_V128U32),TestIntrinsics)248 MY_TEST_P(ARCH_POSTFIX(V128_V128U32), TestIntrinsics) {
249   TestSimd2Args<c_v128, c_v128, uint32_t>(kIterations, mask, maskwidth, name);
250 }
251 
MY_TEST_P(ARCH_POSTFIX (V128_V128V128_Part2),TestIntrinsics)252 MY_TEST_P(ARCH_POSTFIX(V128_V128V128_Part2), TestIntrinsics) {
253   TestSimd2Args<c_v128, c_v128, c_v128>(kIterations, mask, maskwidth, name);
254 }
255 
MY_TEST_P(ARCH_POSTFIX (V128_V128_Part2),TestIntrinsics)256 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part2), TestIntrinsics) {
257   TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
258 }
259 
MY_TEST_P(ARCH_POSTFIX (V128_V128_Part3),TestIntrinsics)260 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part3), TestIntrinsics) {
261   TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
262 }
263 
MY_TEST_P(ARCH_POSTFIX (V128_V128_Part4),TestIntrinsics)264 MY_TEST_P(ARCH_POSTFIX(V128_V128_Part4), TestIntrinsics) {
265   TestSimd1Arg<c_v128, c_v128>(kIterations, mask, maskwidth, name);
266 }
267 
MY_TEST_P(ARCH_POSTFIX (U64_V256),TestIntrinsics)268 MY_TEST_P(ARCH_POSTFIX(U64_V256), TestIntrinsics) {
269   TestSimd1Arg<uint64_t, c_v256>(kIterations, mask, maskwidth, name);
270 }
271 
MY_TEST_P(ARCH_POSTFIX (V256_V256),TestIntrinsics)272 MY_TEST_P(ARCH_POSTFIX(V256_V256), TestIntrinsics) {
273   TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
274 }
275 
MY_TEST_P(ARCH_POSTFIX (V256_V128),TestIntrinsics)276 MY_TEST_P(ARCH_POSTFIX(V256_V128), TestIntrinsics) {
277   TestSimd1Arg<c_v256, c_v128>(kIterations, mask, maskwidth, name);
278 }
279 
MY_TEST_P(ARCH_POSTFIX (V256_V256V256),TestIntrinsics)280 MY_TEST_P(ARCH_POSTFIX(V256_V256V256), TestIntrinsics) {
281   TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
282 }
283 
MY_TEST_P(ARCH_POSTFIX (V256_V256V256V256),TestIntrinsics)284 MY_TEST_P(ARCH_POSTFIX(V256_V256V256V256), TestIntrinsics) {
285   TestSimd3Args<c_v256, c_v256, c_v256, c_v256>(kIterations, mask, maskwidth,
286                                                 name);
287 }
288 
MY_TEST_P(ARCH_POSTFIX (V256_V128V128),TestIntrinsics)289 MY_TEST_P(ARCH_POSTFIX(V256_V128V128), TestIntrinsics) {
290   TestSimd2Args<c_v256, c_v128, c_v128>(kIterations, mask, maskwidth, name);
291 }
292 
MY_TEST_P(ARCH_POSTFIX (U32_V256V256),TestIntrinsics)293 MY_TEST_P(ARCH_POSTFIX(U32_V256V256), TestIntrinsics) {
294   TestSimd2Args<uint32_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
295 }
296 
MY_TEST_P(ARCH_POSTFIX (U64_V256V256),TestIntrinsics)297 MY_TEST_P(ARCH_POSTFIX(U64_V256V256), TestIntrinsics) {
298   TestSimd2Args<uint64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
299 }
300 
MY_TEST_P(ARCH_POSTFIX (S64_V256V256),TestIntrinsics)301 MY_TEST_P(ARCH_POSTFIX(S64_V256V256), TestIntrinsics) {
302   TestSimd2Args<int64_t, c_v256, c_v256>(kIterations, mask, maskwidth, name);
303 }
304 
MY_TEST_P(ARCH_POSTFIX (V256_V256V256_Part2),TestIntrinsics)305 MY_TEST_P(ARCH_POSTFIX(V256_V256V256_Part2), TestIntrinsics) {
306   TestSimd2Args<c_v256, c_v256, c_v256>(kIterations, mask, maskwidth, name);
307 }
308 
MY_TEST_P(ARCH_POSTFIX (V256_V256U32),TestIntrinsics)309 MY_TEST_P(ARCH_POSTFIX(V256_V256U32), TestIntrinsics) {
310   TestSimd2Args<c_v256, c_v256, uint32_t>(kIterations, mask, maskwidth, name);
311 }
312 
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part2),TestIntrinsics)313 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part2), TestIntrinsics) {
314   TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
315 }
316 
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part3),TestIntrinsics)317 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part3), TestIntrinsics) {
318   TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
319 }
320 
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part4),TestIntrinsics)321 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part4), TestIntrinsics) {
322   TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
323 }
324 
MY_TEST_P(ARCH_POSTFIX (V256_V256_Part5),TestIntrinsics)325 MY_TEST_P(ARCH_POSTFIX(V256_V256_Part5), TestIntrinsics) {
326   TestSimd1Arg<c_v256, c_v256>(kIterations, mask, maskwidth, name);
327 }
328 
MY_TEST_P(ARCH_POSTFIX (V256_U8),TestIntrinsics)329 MY_TEST_P(ARCH_POSTFIX(V256_U8), TestIntrinsics) {
330   TestSimd1Arg<c_v256, uint8_t>(kIterations, mask, maskwidth, name);
331 }
332 
MY_TEST_P(ARCH_POSTFIX (V256_U16),TestIntrinsics)333 MY_TEST_P(ARCH_POSTFIX(V256_U16), TestIntrinsics) {
334   TestSimd1Arg<c_v256, uint16_t>(kIterations, mask, maskwidth, name);
335 }
336 
MY_TEST_P(ARCH_POSTFIX (V256_U32),TestIntrinsics)337 MY_TEST_P(ARCH_POSTFIX(V256_U32), TestIntrinsics) {
338   TestSimd1Arg<c_v256, uint32_t>(kIterations, mask, maskwidth, name);
339 }
340 
MY_TEST_P(ARCH_POSTFIX (V256_U64),TestIntrinsics)341 MY_TEST_P(ARCH_POSTFIX(V256_U64), TestIntrinsics) {
342   TestSimd1Arg<c_v256, uint64_t>(kIterations, mask, maskwidth, name);
343 }
344 
MY_TEST_P(ARCH_POSTFIX (U32_V256),TestIntrinsics)345 MY_TEST_P(ARCH_POSTFIX(U32_V256), TestIntrinsics) {
346   TestSimd1Arg<uint32_t, c_v256>(kIterations, mask, maskwidth, name);
347 }
348 
MY_TEST_P(ARCH_POSTFIX (V64_V256),TestIntrinsics)349 MY_TEST_P(ARCH_POSTFIX(V64_V256), TestIntrinsics) {
350   TestSimd1Arg<c_v64, c_v256>(kIterations, mask, maskwidth, name);
351 }
352 
353 // Add a macro layer since INSTANTIATE_TEST_CASE_P will quote the name
354 // so we need to expand it first with the prefix
355 #define INSTANTIATE(name, type, ...) \
356   INSTANTIATE_TEST_CASE_P(name, type, ::testing::Values(__VA_ARGS__))
357 
358 #define SIMD_TUPLE(name, mask, maskwidth) \
359   ::testing::make_tuple(mask, maskwidth, static_cast<const char *>(#name))
360 
361 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64V64),
362             (SIMD_TUPLE(v64_sad_u8, 0U, 0U), SIMD_TUPLE(v64_ssd_u8, 0U, 0U)));
363 
364 INSTANTIATE(
365     ARCH, ARCH_POSTFIX(V64_V64V64), SIMD_TUPLE(v64_add_8, 0U, 0U),
366     SIMD_TUPLE(v64_add_16, 0U, 0U), SIMD_TUPLE(v64_sadd_s16, 0U, 0U),
367     SIMD_TUPLE(v64_add_32, 0U, 0U), SIMD_TUPLE(v64_sub_8, 0U, 0U),
368     SIMD_TUPLE(v64_ssub_u8, 0U, 0U), SIMD_TUPLE(v64_ssub_s8, 0U, 0U),
369     SIMD_TUPLE(v64_sub_16, 0U, 0U), SIMD_TUPLE(v64_ssub_s16, 0U, 0U),
370     SIMD_TUPLE(v64_ssub_u16, 0U, 0U), SIMD_TUPLE(v64_sub_32, 0U, 0U),
371     SIMD_TUPLE(v64_ziplo_8, 0U, 0U), SIMD_TUPLE(v64_ziphi_8, 0U, 0U),
372     SIMD_TUPLE(v64_ziplo_16, 0U, 0U), SIMD_TUPLE(v64_ziphi_16, 0U, 0U),
373     SIMD_TUPLE(v64_ziplo_32, 0U, 0U), SIMD_TUPLE(v64_ziphi_32, 0U, 0U),
374     SIMD_TUPLE(v64_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v64_pack_s16_u8, 0U, 0U),
375     SIMD_TUPLE(v64_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v64_unziphi_8, 0U, 0U),
376     SIMD_TUPLE(v64_unziplo_8, 0U, 0U), SIMD_TUPLE(v64_unziphi_16, 0U, 0U),
377     SIMD_TUPLE(v64_unziplo_16, 0U, 0U), SIMD_TUPLE(v64_or, 0U, 0U),
378     SIMD_TUPLE(v64_xor, 0U, 0U), SIMD_TUPLE(v64_and, 0U, 0U),
379     SIMD_TUPLE(v64_andn, 0U, 0U), SIMD_TUPLE(v64_mullo_s16, 0U, 0U),
380     SIMD_TUPLE(v64_mulhi_s16, 0U, 0U), SIMD_TUPLE(v64_mullo_s32, 0U, 0U),
381     SIMD_TUPLE(v64_madd_s16, 0U, 0U), SIMD_TUPLE(v64_madd_us8, 0U, 0U),
382     SIMD_TUPLE(v64_avg_u8, 0U, 0U), SIMD_TUPLE(v64_rdavg_u8, 0U, 0U),
383     SIMD_TUPLE(v64_avg_u16, 0U, 0U), SIMD_TUPLE(v64_min_u8, 0U, 0U),
384     SIMD_TUPLE(v64_max_u8, 0U, 0U), SIMD_TUPLE(v64_min_s8, 0U, 0U),
385     SIMD_TUPLE(v64_max_s8, 0U, 0U), SIMD_TUPLE(v64_min_s16, 0U, 0U),
386     SIMD_TUPLE(v64_max_s16, 0U, 0U), SIMD_TUPLE(v64_cmpgt_s8, 0U, 0U),
387     SIMD_TUPLE(v64_cmplt_s8, 0U, 0U), SIMD_TUPLE(v64_cmpeq_8, 0U, 0U),
388     SIMD_TUPLE(v64_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v64_cmplt_s16, 0U, 0U),
389     SIMD_TUPLE(v64_cmpeq_16, 0U, 0U));
390 
391 INSTANTIATE(
392     ARCH, ARCH_POSTFIX(V64_V64V64_Part2), SIMD_TUPLE(v64_shuffle_8, 7U, 8U),
393     SIMD_TUPLE(v64_pack_s32_u16, 0U, 0U), SIMD_TUPLE(v64_rdavg_u16, 0U, 0U),
394     SIMD_TUPLE(v64_sadd_s8, 0U, 0U), SIMD_TUPLE(v64_sadd_u8, 0U, 0U),
395     SIMD_TUPLE(imm_v64_align<1>, 0U, 0U), SIMD_TUPLE(imm_v64_align<2>, 0U, 0U),
396     SIMD_TUPLE(imm_v64_align<3>, 0U, 0U), SIMD_TUPLE(imm_v64_align<4>, 0U, 0U),
397     SIMD_TUPLE(imm_v64_align<5>, 0U, 0U), SIMD_TUPLE(imm_v64_align<6>, 0U, 0U),
398     SIMD_TUPLE(imm_v64_align<7>, 0U, 0U));
399 
400 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s8, 0U, 0U),
401             SIMD_TUPLE(v64_abs_s16, 0U, 0U),
402             SIMD_TUPLE(v64_unpacklo_u8_s16, 0U, 0U),
403             SIMD_TUPLE(v64_unpackhi_u8_s16, 0U, 0U),
404             SIMD_TUPLE(v64_unpacklo_s8_s16, 0U, 0U),
405             SIMD_TUPLE(v64_unpackhi_s8_s16, 0U, 0U),
406             SIMD_TUPLE(v64_unpacklo_u16_s32, 0U, 0U),
407             SIMD_TUPLE(v64_unpacklo_s16_s32, 0U, 0U),
408             SIMD_TUPLE(v64_unpackhi_u16_s32, 0U, 0U),
409             SIMD_TUPLE(v64_unpackhi_s16_s32, 0U, 0U),
410             SIMD_TUPLE(imm_v64_shr_n_byte<1>, 0U, 0U),
411             SIMD_TUPLE(imm_v64_shr_n_byte<2>, 0U, 0U),
412             SIMD_TUPLE(imm_v64_shr_n_byte<3>, 0U, 0U),
413             SIMD_TUPLE(imm_v64_shr_n_byte<4>, 0U, 0U),
414             SIMD_TUPLE(imm_v64_shr_n_byte<5>, 0U, 0U),
415             SIMD_TUPLE(imm_v64_shr_n_byte<6>, 0U, 0U),
416             SIMD_TUPLE(imm_v64_shr_n_byte<7>, 0U, 0U),
417             SIMD_TUPLE(imm_v64_shl_n_byte<1>, 0U, 0U),
418             SIMD_TUPLE(imm_v64_shl_n_byte<2>, 0U, 0U),
419             SIMD_TUPLE(imm_v64_shl_n_byte<3>, 0U, 0U),
420             SIMD_TUPLE(imm_v64_shl_n_byte<4>, 0U, 0U),
421             SIMD_TUPLE(imm_v64_shl_n_byte<5>, 0U, 0U),
422             SIMD_TUPLE(imm_v64_shl_n_byte<6>, 0U, 0U),
423             SIMD_TUPLE(imm_v64_shl_n_byte<7>, 0U, 0U),
424             SIMD_TUPLE(imm_v64_shl_n_8<1>, 0U, 0U),
425             SIMD_TUPLE(imm_v64_shl_n_8<2>, 0U, 0U),
426             SIMD_TUPLE(imm_v64_shl_n_8<3>, 0U, 0U),
427             SIMD_TUPLE(imm_v64_shl_n_8<4>, 0U, 0U),
428             SIMD_TUPLE(imm_v64_shl_n_8<5>, 0U, 0U),
429             SIMD_TUPLE(imm_v64_shl_n_8<6>, 0U, 0U),
430             SIMD_TUPLE(imm_v64_shl_n_8<7>, 0U, 0U),
431             SIMD_TUPLE(imm_v64_shr_n_u8<1>, 0U, 0U),
432             SIMD_TUPLE(imm_v64_shr_n_u8<2>, 0U, 0U),
433             SIMD_TUPLE(imm_v64_shr_n_u8<3>, 0U, 0U),
434             SIMD_TUPLE(imm_v64_shr_n_u8<4>, 0U, 0U),
435             SIMD_TUPLE(imm_v64_shr_n_u8<5>, 0U, 0U),
436             SIMD_TUPLE(imm_v64_shr_n_u8<6>, 0U, 0U),
437             SIMD_TUPLE(imm_v64_shr_n_u8<7>, 0U, 0U),
438             SIMD_TUPLE(imm_v64_shr_n_s8<1>, 0U, 0U),
439             SIMD_TUPLE(imm_v64_shr_n_s8<2>, 0U, 0U),
440             SIMD_TUPLE(imm_v64_shr_n_s8<3>, 0U, 0U),
441             SIMD_TUPLE(imm_v64_shr_n_s8<4>, 0U, 0U),
442             SIMD_TUPLE(imm_v64_shr_n_s8<5>, 0U, 0U),
443             SIMD_TUPLE(imm_v64_shr_n_s8<6>, 0U, 0U),
444             SIMD_TUPLE(imm_v64_shr_n_s8<7>, 0U, 0U),
445             SIMD_TUPLE(imm_v64_shl_n_16<1>, 0U, 0U),
446             SIMD_TUPLE(imm_v64_shl_n_16<2>, 0U, 0U),
447             SIMD_TUPLE(imm_v64_shl_n_16<4>, 0U, 0U),
448             SIMD_TUPLE(imm_v64_shl_n_16<6>, 0U, 0U),
449             SIMD_TUPLE(imm_v64_shl_n_16<8>, 0U, 0U));
450 
451 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64_Part2),
452             SIMD_TUPLE(imm_v64_shl_n_16<10>, 0U, 0U),
453             SIMD_TUPLE(imm_v64_shl_n_16<12>, 0U, 0U),
454             SIMD_TUPLE(imm_v64_shl_n_16<14>, 0U, 0U),
455             SIMD_TUPLE(imm_v64_shr_n_u16<1>, 0U, 0U),
456             SIMD_TUPLE(imm_v64_shr_n_u16<2>, 0U, 0U),
457             SIMD_TUPLE(imm_v64_shr_n_u16<4>, 0U, 0U),
458             SIMD_TUPLE(imm_v64_shr_n_u16<6>, 0U, 0U),
459             SIMD_TUPLE(imm_v64_shr_n_u16<8>, 0U, 0U),
460             SIMD_TUPLE(imm_v64_shr_n_u16<10>, 0U, 0U),
461             SIMD_TUPLE(imm_v64_shr_n_u16<12>, 0U, 0U),
462             SIMD_TUPLE(imm_v64_shr_n_u16<14>, 0U, 0U),
463             SIMD_TUPLE(imm_v64_shr_n_s16<1>, 0U, 0U),
464             SIMD_TUPLE(imm_v64_shr_n_s16<2>, 0U, 0U),
465             SIMD_TUPLE(imm_v64_shr_n_s16<4>, 0U, 0U),
466             SIMD_TUPLE(imm_v64_shr_n_s16<6>, 0U, 0U),
467             SIMD_TUPLE(imm_v64_shr_n_s16<8>, 0U, 0U),
468             SIMD_TUPLE(imm_v64_shr_n_s16<10>, 0U, 0U),
469             SIMD_TUPLE(imm_v64_shr_n_s16<12>, 0U, 0U),
470             SIMD_TUPLE(imm_v64_shr_n_s16<14>, 0U, 0U),
471             SIMD_TUPLE(imm_v64_shl_n_32<1>, 0U, 0U),
472             SIMD_TUPLE(imm_v64_shl_n_32<4>, 0U, 0U),
473             SIMD_TUPLE(imm_v64_shl_n_32<8>, 0U, 0U),
474             SIMD_TUPLE(imm_v64_shl_n_32<12>, 0U, 0U),
475             SIMD_TUPLE(imm_v64_shl_n_32<16>, 0U, 0U),
476             SIMD_TUPLE(imm_v64_shl_n_32<20>, 0U, 0U),
477             SIMD_TUPLE(imm_v64_shl_n_32<24>, 0U, 0U),
478             SIMD_TUPLE(imm_v64_shl_n_32<28>, 0U, 0U),
479             SIMD_TUPLE(imm_v64_shr_n_u32<1>, 0U, 0U),
480             SIMD_TUPLE(imm_v64_shr_n_u32<4>, 0U, 0U),
481             SIMD_TUPLE(imm_v64_shr_n_u32<8>, 0U, 0U),
482             SIMD_TUPLE(imm_v64_shr_n_u32<12>, 0U, 0U),
483             SIMD_TUPLE(imm_v64_shr_n_u32<16>, 0U, 0U),
484             SIMD_TUPLE(imm_v64_shr_n_u32<20>, 0U, 0U),
485             SIMD_TUPLE(imm_v64_shr_n_u32<24>, 0U, 0U),
486             SIMD_TUPLE(imm_v64_shr_n_u32<28>, 0U, 0U),
487             SIMD_TUPLE(imm_v64_shr_n_s32<1>, 0U, 0U),
488             SIMD_TUPLE(imm_v64_shr_n_s32<4>, 0U, 0U),
489             SIMD_TUPLE(imm_v64_shr_n_s32<8>, 0U, 0U),
490             SIMD_TUPLE(imm_v64_shr_n_s32<12>, 0U, 0U),
491             SIMD_TUPLE(imm_v64_shr_n_s32<16>, 0U, 0U),
492             SIMD_TUPLE(imm_v64_shr_n_s32<20>, 0U, 0U),
493             SIMD_TUPLE(imm_v64_shr_n_s32<24>, 0U, 0U),
494             SIMD_TUPLE(imm_v64_shr_n_s32<28>, 0U, 0U));
495 
496 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64U32), SIMD_TUPLE(v64_shl_8, 7U, 32U),
497             SIMD_TUPLE(v64_shr_u8, 7U, 32U), SIMD_TUPLE(v64_shr_s8, 7U, 32U),
498             SIMD_TUPLE(v64_shl_16, 15U, 32U), SIMD_TUPLE(v64_shr_u16, 15U, 32U),
499             SIMD_TUPLE(v64_shr_s16, 15U, 32U), SIMD_TUPLE(v64_shl_32, 31U, 32U),
500             SIMD_TUPLE(v64_shr_u32, 31U, 32U),
501             SIMD_TUPLE(v64_shr_s32, 31U, 32U));
502 
503 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V64), SIMD_TUPLE(v64_hadd_u8, 0U, 0U),
504             SIMD_TUPLE(v64_u64, 0U, 0U));
505 
506 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64), SIMD_TUPLE(v64_hadd_s16, 0U, 0U));
507 
508 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V64), SIMD_TUPLE(v64_low_u32, 0U, 0U),
509             SIMD_TUPLE(v64_high_u32, 0U, 0U));
510 
511 INSTANTIATE(ARCH, ARCH_POSTFIX(S32_V64), SIMD_TUPLE(v64_low_s32, 0U, 0U),
512             SIMD_TUPLE(v64_high_s32, 0U, 0U));
513 
514 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V64V64), SIMD_TUPLE(v64_dotp_s16, 0U, 0U),
515             SIMD_TUPLE(v64_dotp_su8, 0U, 0U));
516 
517 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U8), SIMD_TUPLE(v64_dup_8, 0U, 0U));
518 
519 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U16), SIMD_TUPLE(v64_dup_16, 0U, 0U));
520 
521 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32), SIMD_TUPLE(v64_dup_32, 0U, 0U));
522 
523 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_U32U32), SIMD_TUPLE(v64_from_32, 0U, 0U));
524 
525 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128V128), SIMD_TUPLE(v128_sad_u8, 0U, 0U),
526             SIMD_TUPLE(v128_ssd_u8, 0U, 0U), SIMD_TUPLE(v128_sad_u16, 0U, 0U));
527 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128V128), SIMD_TUPLE(v128_ssd_s16, 0U, 0U));
528 
529 INSTANTIATE(
530     ARCH, ARCH_POSTFIX(V128_V128V128), SIMD_TUPLE(v128_add_8, 0U, 0U),
531     SIMD_TUPLE(v128_add_16, 0U, 0U), SIMD_TUPLE(v128_sadd_s16, 0U, 0U),
532     SIMD_TUPLE(v128_add_32, 0U, 0U), SIMD_TUPLE(v128_sub_8, 0U, 0U),
533     SIMD_TUPLE(v128_ssub_u8, 0U, 0U), SIMD_TUPLE(v128_ssub_s8, 0U, 0U),
534     SIMD_TUPLE(v128_sub_16, 0U, 0U), SIMD_TUPLE(v128_ssub_s16, 0U, 0U),
535     SIMD_TUPLE(v128_ssub_u16, 0U, 0U), SIMD_TUPLE(v128_sub_32, 0U, 0U),
536     SIMD_TUPLE(v128_ziplo_8, 0U, 0U), SIMD_TUPLE(v128_ziphi_8, 0U, 0U),
537     SIMD_TUPLE(v128_ziplo_16, 0U, 0U), SIMD_TUPLE(v128_ziphi_16, 0U, 0U),
538     SIMD_TUPLE(v128_ziplo_32, 0U, 0U), SIMD_TUPLE(v128_ziphi_32, 0U, 0U),
539     SIMD_TUPLE(v128_ziplo_64, 0U, 0U), SIMD_TUPLE(v128_ziphi_64, 0U, 0U),
540     SIMD_TUPLE(v128_unziphi_8, 0U, 0U), SIMD_TUPLE(v128_unziplo_8, 0U, 0U),
541     SIMD_TUPLE(v128_unziphi_16, 0U, 0U), SIMD_TUPLE(v128_unziplo_16, 0U, 0U),
542     SIMD_TUPLE(v128_unziphi_32, 0U, 0U), SIMD_TUPLE(v128_unziplo_32, 0U, 0U),
543     SIMD_TUPLE(v128_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v128_pack_s16_u8, 0U, 0U),
544     SIMD_TUPLE(v128_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v128_or, 0U, 0U),
545     SIMD_TUPLE(v128_xor, 0U, 0U), SIMD_TUPLE(v128_and, 0U, 0U),
546     SIMD_TUPLE(v128_andn, 0U, 0U), SIMD_TUPLE(v128_mullo_s16, 0U, 0U),
547     SIMD_TUPLE(v128_mulhi_s16, 0U, 0U), SIMD_TUPLE(v128_mullo_s32, 0U, 0U),
548     SIMD_TUPLE(v128_madd_s16, 0U, 0U), SIMD_TUPLE(v128_madd_us8, 0U, 0U),
549     SIMD_TUPLE(v128_avg_u8, 0U, 0U), SIMD_TUPLE(v128_rdavg_u8, 0U, 0U),
550     SIMD_TUPLE(v128_avg_u16, 0U, 0U), SIMD_TUPLE(v128_min_u8, 0U, 0U),
551     SIMD_TUPLE(v128_max_u8, 0U, 0U), SIMD_TUPLE(v128_min_s8, 0U, 0U),
552     SIMD_TUPLE(v128_max_s8, 0U, 0U), SIMD_TUPLE(v128_min_s16, 0U, 0U),
553     SIMD_TUPLE(v128_max_s16, 0U, 0U), SIMD_TUPLE(v128_cmpgt_s8, 0U, 0U),
554     SIMD_TUPLE(v128_cmplt_s8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_8, 0U, 0U),
555     SIMD_TUPLE(v128_cmpgt_s16, 0U, 0U));
556 
557 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
558             SIMD_TUPLE(v128_pack_s32_u16, 0U, 0U),
559             SIMD_TUPLE(v128_rdavg_u16, 0U, 0U), SIMD_TUPLE(v128_add_64, 0U, 0U),
560             SIMD_TUPLE(v128_sub_64, 0U, 0U), SIMD_TUPLE(v128_sadd_s8, 0U, 0U),
561             SIMD_TUPLE(v128_sadd_u8, 0U, 0U), SIMD_TUPLE(v128_cmpeq_16, 0U, 0U),
562             SIMD_TUPLE(v128_cmplt_s16, 0U, 0U),
563             SIMD_TUPLE(v128_cmplt_s32, 0U, 0U),
564             SIMD_TUPLE(v128_cmpeq_32, 0U, 0U),
565             SIMD_TUPLE(v128_cmpgt_s32, 0U, 0U),
566             SIMD_TUPLE(v128_shuffle_8, 15U, 8U),
567             SIMD_TUPLE(v128_min_s32, 0U, 0U), SIMD_TUPLE(v128_max_s32, 0U, 0U),
568             SIMD_TUPLE(imm_v128_align<1>, 0U, 0U),
569             SIMD_TUPLE(imm_v128_align<2>, 0U, 0U),
570             SIMD_TUPLE(imm_v128_align<3>, 0U, 0U),
571             SIMD_TUPLE(imm_v128_align<4>, 0U, 0U),
572             SIMD_TUPLE(imm_v128_align<5>, 0U, 0U),
573             SIMD_TUPLE(imm_v128_align<6>, 0U, 0U),
574             SIMD_TUPLE(imm_v128_align<7>, 0U, 0U),
575             SIMD_TUPLE(imm_v128_align<8>, 0U, 0U),
576             SIMD_TUPLE(imm_v128_align<9>, 0U, 0U),
577             SIMD_TUPLE(imm_v128_align<10>, 0U, 0U),
578             SIMD_TUPLE(imm_v128_align<11>, 0U, 0U),
579             SIMD_TUPLE(imm_v128_align<12>, 0U, 0U),
580             SIMD_TUPLE(imm_v128_align<13>, 0U, 0U),
581             SIMD_TUPLE(imm_v128_align<14>, 0U, 0U),
582             SIMD_TUPLE(imm_v128_align<15>, 0U, 0U));
583 
584 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128V128),
585             SIMD_TUPLE(v128_blend_8, 0U, 0U));
586 
587 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s8, 0U, 0U),
588             SIMD_TUPLE(v128_abs_s16, 0U, 0U), SIMD_TUPLE(v128_padd_s16, 0U, 0U),
589             SIMD_TUPLE(v128_unpacklo_u8_s16, 0U, 0U),
590             SIMD_TUPLE(v128_unpacklo_s8_s16, 0U, 0U),
591             SIMD_TUPLE(v128_unpacklo_u16_s32, 0U, 0U),
592             SIMD_TUPLE(v128_unpacklo_s16_s32, 0U, 0U),
593             SIMD_TUPLE(v128_unpackhi_u8_s16, 0U, 0U),
594             SIMD_TUPLE(v128_unpackhi_s8_s16, 0U, 0U),
595             SIMD_TUPLE(v128_unpackhi_u16_s32, 0U, 0U),
596             SIMD_TUPLE(v128_unpackhi_s16_s32, 0U, 0U),
597             SIMD_TUPLE(imm_v128_shr_n_byte<1>, 0U, 0U),
598             SIMD_TUPLE(imm_v128_shr_n_byte<2>, 0U, 0U),
599             SIMD_TUPLE(imm_v128_shr_n_byte<3>, 0U, 0U),
600             SIMD_TUPLE(imm_v128_shr_n_byte<4>, 0U, 0U),
601             SIMD_TUPLE(imm_v128_shr_n_byte<5>, 0U, 0U),
602             SIMD_TUPLE(imm_v128_shr_n_byte<6>, 0U, 0U),
603             SIMD_TUPLE(imm_v128_shr_n_byte<7>, 0U, 0U),
604             SIMD_TUPLE(imm_v128_shr_n_byte<8>, 0U, 0U),
605             SIMD_TUPLE(imm_v128_shr_n_byte<9>, 0U, 0U),
606             SIMD_TUPLE(imm_v128_shr_n_byte<10>, 0U, 0U),
607             SIMD_TUPLE(imm_v128_shr_n_byte<11>, 0U, 0U),
608             SIMD_TUPLE(imm_v128_shr_n_byte<12>, 0U, 0U),
609             SIMD_TUPLE(imm_v128_shr_n_byte<13>, 0U, 0U),
610             SIMD_TUPLE(imm_v128_shr_n_byte<14>, 0U, 0U),
611             SIMD_TUPLE(imm_v128_shr_n_byte<15>, 0U, 0U),
612             SIMD_TUPLE(imm_v128_shl_n_byte<1>, 0U, 0U),
613             SIMD_TUPLE(imm_v128_shl_n_byte<2>, 0U, 0U),
614             SIMD_TUPLE(imm_v128_shl_n_byte<3>, 0U, 0U),
615             SIMD_TUPLE(imm_v128_shl_n_byte<4>, 0U, 0U),
616             SIMD_TUPLE(imm_v128_shl_n_byte<5>, 0U, 0U),
617             SIMD_TUPLE(imm_v128_shl_n_byte<6>, 0U, 0U),
618             SIMD_TUPLE(imm_v128_shl_n_byte<7>, 0U, 0U),
619             SIMD_TUPLE(imm_v128_shl_n_byte<8>, 0U, 0U),
620             SIMD_TUPLE(imm_v128_shl_n_byte<9>, 0U, 0U),
621             SIMD_TUPLE(imm_v128_shl_n_byte<10>, 0U, 0U),
622             SIMD_TUPLE(imm_v128_shl_n_byte<11>, 0U, 0U),
623             SIMD_TUPLE(imm_v128_shl_n_byte<12>, 0U, 0U),
624             SIMD_TUPLE(imm_v128_shl_n_byte<13>, 0U, 0U),
625             SIMD_TUPLE(imm_v128_shl_n_byte<14>, 0U, 0U),
626             SIMD_TUPLE(imm_v128_shl_n_byte<15>, 0U, 0U),
627             SIMD_TUPLE(imm_v128_shl_n_8<1>, 0U, 0U),
628             SIMD_TUPLE(imm_v128_shl_n_8<2>, 0U, 0U),
629             SIMD_TUPLE(imm_v128_shl_n_8<3>, 0U, 0U),
630             SIMD_TUPLE(imm_v128_shl_n_8<4>, 0U, 0U),
631             SIMD_TUPLE(imm_v128_shl_n_8<5>, 0U, 0U),
632             SIMD_TUPLE(imm_v128_shl_n_8<6>, 0U, 0U),
633             SIMD_TUPLE(imm_v128_shl_n_8<7>, 0U, 0U),
634             SIMD_TUPLE(imm_v128_shr_n_u8<1>, 0U, 0U));
635 
636 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
637             SIMD_TUPLE(imm_v128_shr_n_u8<2>, 0U, 0U),
638             SIMD_TUPLE(imm_v128_shr_n_u8<3>, 0U, 0U),
639             SIMD_TUPLE(imm_v128_shr_n_u8<4>, 0U, 0U),
640             SIMD_TUPLE(imm_v128_shr_n_u8<5>, 0U, 0U),
641             SIMD_TUPLE(imm_v128_shr_n_u8<6>, 0U, 0U),
642             SIMD_TUPLE(imm_v128_shr_n_u8<7>, 0U, 0U),
643             SIMD_TUPLE(imm_v128_shr_n_s8<1>, 0U, 0U),
644             SIMD_TUPLE(imm_v128_shr_n_s8<2>, 0U, 0U),
645             SIMD_TUPLE(imm_v128_shr_n_s8<3>, 0U, 0U),
646             SIMD_TUPLE(imm_v128_shr_n_s8<4>, 0U, 0U),
647             SIMD_TUPLE(imm_v128_shr_n_s8<5>, 0U, 0U),
648             SIMD_TUPLE(imm_v128_shr_n_s8<6>, 0U, 0U),
649             SIMD_TUPLE(imm_v128_shr_n_s8<7>, 0U, 0U),
650             SIMD_TUPLE(imm_v128_shl_n_16<1>, 0U, 0U),
651             SIMD_TUPLE(imm_v128_shl_n_16<2>, 0U, 0U),
652             SIMD_TUPLE(imm_v128_shl_n_16<4>, 0U, 0U),
653             SIMD_TUPLE(imm_v128_shl_n_16<6>, 0U, 0U),
654             SIMD_TUPLE(imm_v128_shl_n_16<8>, 0U, 0U),
655             SIMD_TUPLE(imm_v128_shl_n_16<10>, 0U, 0U),
656             SIMD_TUPLE(imm_v128_shl_n_16<12>, 0U, 0U),
657             SIMD_TUPLE(imm_v128_shl_n_16<14>, 0U, 0U),
658             SIMD_TUPLE(imm_v128_shr_n_u16<1>, 0U, 0U),
659             SIMD_TUPLE(imm_v128_shr_n_u16<2>, 0U, 0U),
660             SIMD_TUPLE(imm_v128_shr_n_u16<4>, 0U, 0U),
661             SIMD_TUPLE(imm_v128_shr_n_u16<6>, 0U, 0U),
662             SIMD_TUPLE(imm_v128_shr_n_u16<8>, 0U, 0U),
663             SIMD_TUPLE(imm_v128_shr_n_u16<10>, 0U, 0U),
664             SIMD_TUPLE(imm_v128_shr_n_u16<12>, 0U, 0U),
665             SIMD_TUPLE(imm_v128_shr_n_u16<14>, 0U, 0U),
666             SIMD_TUPLE(imm_v128_shr_n_s16<1>, 0U, 0U),
667             SIMD_TUPLE(imm_v128_shr_n_s16<2>, 0U, 0U),
668             SIMD_TUPLE(imm_v128_shr_n_s16<4>, 0U, 0U),
669             SIMD_TUPLE(imm_v128_shr_n_s16<6>, 0U, 0U),
670             SIMD_TUPLE(imm_v128_shr_n_s16<8>, 0U, 0U),
671             SIMD_TUPLE(imm_v128_shr_n_s16<10>, 0U, 0U),
672             SIMD_TUPLE(imm_v128_shr_n_s16<12>, 0U, 0U),
673             SIMD_TUPLE(imm_v128_shr_n_s16<14>, 0U, 0U),
674             SIMD_TUPLE(imm_v128_shl_n_32<1>, 0U, 0U),
675             SIMD_TUPLE(imm_v128_shl_n_32<4>, 0U, 0U),
676             SIMD_TUPLE(imm_v128_shl_n_32<8>, 0U, 0U),
677             SIMD_TUPLE(imm_v128_shl_n_32<12>, 0U, 0U),
678             SIMD_TUPLE(imm_v128_shl_n_32<16>, 0U, 0U),
679             SIMD_TUPLE(imm_v128_shl_n_32<20>, 0U, 0U),
680             SIMD_TUPLE(imm_v128_shl_n_32<24>, 0U, 0U),
681             SIMD_TUPLE(imm_v128_shl_n_32<28>, 0U, 0U),
682             SIMD_TUPLE(imm_v128_shr_n_u32<1>, 0U, 0U),
683             SIMD_TUPLE(imm_v128_shr_n_u32<4>, 0U, 0U));
684 
685 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part3),
686             SIMD_TUPLE(imm_v128_shr_n_u32<8>, 0U, 0U),
687             SIMD_TUPLE(imm_v128_shr_n_u32<12>, 0U, 0U),
688             SIMD_TUPLE(imm_v128_shr_n_u32<16>, 0U, 0U),
689             SIMD_TUPLE(imm_v128_shr_n_u32<20>, 0U, 0U),
690             SIMD_TUPLE(imm_v128_shr_n_u32<24>, 0U, 0U),
691             SIMD_TUPLE(imm_v128_shr_n_u32<28>, 0U, 0U),
692             SIMD_TUPLE(imm_v128_shr_n_s32<1>, 0U, 0U),
693             SIMD_TUPLE(imm_v128_shr_n_s32<4>, 0U, 0U),
694             SIMD_TUPLE(imm_v128_shr_n_s32<8>, 0U, 0U),
695             SIMD_TUPLE(imm_v128_shr_n_s32<12>, 0U, 0U),
696             SIMD_TUPLE(imm_v128_shr_n_s32<16>, 0U, 0U),
697             SIMD_TUPLE(imm_v128_shr_n_s32<20>, 0U, 0U),
698             SIMD_TUPLE(imm_v128_shr_n_s32<24>, 0U, 0U),
699             SIMD_TUPLE(imm_v128_shr_n_s32<28>, 0U, 0U));
700 
701 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part4),
702             SIMD_TUPLE(imm_v128_shl_n_64<1>, 0U, 0U),
703             SIMD_TUPLE(imm_v128_shl_n_64<4>, 0U, 0U),
704             SIMD_TUPLE(imm_v128_shl_n_64<8>, 0U, 0U),
705             SIMD_TUPLE(imm_v128_shl_n_64<12>, 0U, 0U),
706             SIMD_TUPLE(imm_v128_shl_n_64<16>, 0U, 0U),
707             SIMD_TUPLE(imm_v128_shl_n_64<20>, 0U, 0U),
708             SIMD_TUPLE(imm_v128_shl_n_64<24>, 0U, 0U),
709             SIMD_TUPLE(imm_v128_shl_n_64<28>, 0U, 0U),
710             SIMD_TUPLE(imm_v128_shl_n_64<32>, 0U, 0U),
711             SIMD_TUPLE(imm_v128_shl_n_64<36>, 0U, 0U),
712             SIMD_TUPLE(imm_v128_shl_n_64<40>, 0U, 0U),
713             SIMD_TUPLE(imm_v128_shl_n_64<44>, 0U, 0U),
714             SIMD_TUPLE(imm_v128_shl_n_64<48>, 0U, 0U),
715             SIMD_TUPLE(imm_v128_shl_n_64<52>, 0U, 0U),
716             SIMD_TUPLE(imm_v128_shl_n_64<56>, 0U, 0U),
717             SIMD_TUPLE(imm_v128_shl_n_64<60>, 0U, 0U),
718             SIMD_TUPLE(imm_v128_shr_n_u64<1>, 0U, 0U),
719             SIMD_TUPLE(imm_v128_shr_n_u64<4>, 0U, 0U),
720             SIMD_TUPLE(imm_v128_shr_n_u64<8>, 0U, 0U),
721             SIMD_TUPLE(imm_v128_shr_n_u64<12>, 0U, 0U),
722             SIMD_TUPLE(imm_v128_shr_n_u64<16>, 0U, 0U),
723             SIMD_TUPLE(imm_v128_shr_n_u64<20>, 0U, 0U),
724             SIMD_TUPLE(imm_v128_shr_n_u64<24>, 0U, 0U),
725             SIMD_TUPLE(imm_v128_shr_n_u64<28>, 0U, 0U),
726             SIMD_TUPLE(imm_v128_shr_n_u64<32>, 0U, 0U),
727             SIMD_TUPLE(imm_v128_shr_n_u64<36>, 0U, 0U),
728             SIMD_TUPLE(imm_v128_shr_n_u64<40>, 0U, 0U),
729             SIMD_TUPLE(imm_v128_shr_n_u64<44>, 0U, 0U),
730             SIMD_TUPLE(imm_v128_shr_n_u64<48>, 0U, 0U),
731             SIMD_TUPLE(imm_v128_shr_n_u64<52>, 0U, 0U),
732             SIMD_TUPLE(imm_v128_shr_n_u64<56>, 0U, 0U),
733             SIMD_TUPLE(imm_v128_shr_n_u64<60>, 0U, 0U),
734             SIMD_TUPLE(imm_v128_shr_n_s64<1>, 0U, 0U),
735             SIMD_TUPLE(imm_v128_shr_n_s64<4>, 0U, 0U),
736             SIMD_TUPLE(imm_v128_shr_n_s64<8>, 0U, 0U),
737             SIMD_TUPLE(imm_v128_shr_n_s64<12>, 0U, 0U),
738             SIMD_TUPLE(imm_v128_shr_n_s64<16>, 0U, 0U),
739             SIMD_TUPLE(imm_v128_shr_n_s64<20>, 0U, 0U),
740             SIMD_TUPLE(imm_v128_shr_n_s64<24>, 0U, 0U),
741             SIMD_TUPLE(imm_v128_shr_n_s64<28>, 0U, 0U),
742             SIMD_TUPLE(imm_v128_shr_n_s64<32>, 0U, 0U),
743             SIMD_TUPLE(imm_v128_shr_n_s64<36>, 0U, 0U),
744             SIMD_TUPLE(imm_v128_shr_n_s64<40>, 0U, 0U),
745             SIMD_TUPLE(imm_v128_shr_n_s64<44>, 0U, 0U),
746             SIMD_TUPLE(imm_v128_shr_n_s64<48>, 0U, 0U),
747             SIMD_TUPLE(imm_v128_shr_n_s64<52>, 0U, 0U),
748             SIMD_TUPLE(imm_v128_shr_n_s64<56>, 0U, 0U),
749             SIMD_TUPLE(imm_v128_shr_n_s64<60>, 0U, 0U),
750             SIMD_TUPLE(v128_padd_u8, 0U, 0U));
751 
752 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64V64), SIMD_TUPLE(v128_from_v64, 0U, 0U),
753             SIMD_TUPLE(v128_zip_8, 0U, 0U), SIMD_TUPLE(v128_zip_16, 0U, 0U),
754             SIMD_TUPLE(v128_zip_32, 0U, 0U), SIMD_TUPLE(v128_mul_s16, 0U, 0U));
755 
756 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64U64), SIMD_TUPLE(v128_from_64, 0U, 0U));
757 
758 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V64),
759             SIMD_TUPLE(v128_unpack_u8_s16, 0U, 0U),
760             SIMD_TUPLE(v128_unpack_s8_s16, 0U, 0U),
761             SIMD_TUPLE(v128_unpack_u16_s32, 0U, 0U),
762             SIMD_TUPLE(v128_unpack_s16_s32, 0U, 0U));
763 
764 INSTANTIATE(
765     ARCH, ARCH_POSTFIX(V128_V128U32), SIMD_TUPLE(v128_shl_8, 7U, 32U),
766     SIMD_TUPLE(v128_shr_u8, 7U, 32U), SIMD_TUPLE(v128_shr_s8, 7U, 32U),
767     SIMD_TUPLE(v128_shl_16, 15U, 32U), SIMD_TUPLE(v128_shr_u16, 15U, 32U),
768     SIMD_TUPLE(v128_shr_s16, 15U, 32U), SIMD_TUPLE(v128_shl_32, 31U, 32U),
769     SIMD_TUPLE(v128_shr_u32, 31U, 32U), SIMD_TUPLE(v128_shr_s32, 31U, 32U),
770     SIMD_TUPLE(v128_shl_64, 63U, 32U), SIMD_TUPLE(v128_shr_u64, 63U, 32U),
771     SIMD_TUPLE(v128_shr_s64, 63U, 32U));
772 
773 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V128), SIMD_TUPLE(v128_low_u32, 0U, 0U),
774             SIMD_TUPLE(v128_movemask_8, 0U, 0U));
775 
776 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V128), SIMD_TUPLE(v128_hadd_u8, 0U, 0U));
777 
778 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V128), SIMD_TUPLE(v128_low_v64, 0U, 0U),
779             SIMD_TUPLE(v128_high_v64, 0U, 0U));
780 
781 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U8), SIMD_TUPLE(v128_dup_8, 0U, 0U));
782 
783 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U16), SIMD_TUPLE(v128_dup_16, 0U, 0U));
784 
785 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U32), SIMD_TUPLE(v128_dup_32, 0U, 0U));
786 
787 INSTANTIATE(ARCH, ARCH_POSTFIX(V128_U64), SIMD_TUPLE(v128_dup_64, 0U, 0U));
788 
789 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V128V128), SIMD_TUPLE(v128_dotp_s16, 0U, 0U),
790             SIMD_TUPLE(v128_dotp_s32, 0U, 0U),
791             SIMD_TUPLE(v128_dotp_su8, 0U, 0U));
792 
793 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256V256), SIMD_TUPLE(v256_sad_u8, 0U, 0U),
794             SIMD_TUPLE(v256_ssd_u8, 0U, 0U), SIMD_TUPLE(v256_sad_u16, 0U, 0U));
795 
796 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256), SIMD_TUPLE(v256_hadd_u8, 0U, 0U),
797             SIMD_TUPLE(v256_low_u64, 0U, 0U));
798 
799 INSTANTIATE(ARCH, ARCH_POSTFIX(S64_V256V256), SIMD_TUPLE(v256_dotp_s16, 0U, 0U),
800             SIMD_TUPLE(v256_dotp_s32, 0U, 0U),
801             SIMD_TUPLE(v256_dotp_su8, 0U, 0U));
802 
803 INSTANTIATE(ARCH, ARCH_POSTFIX(U64_V256V256), SIMD_TUPLE(v256_ssd_s16, 0U, 0U));
804 
805 INSTANTIATE(
806     ARCH, ARCH_POSTFIX(V256_V256V256), SIMD_TUPLE(v256_add_8, 0U, 0U),
807     SIMD_TUPLE(v256_add_16, 0U, 0U), SIMD_TUPLE(v256_sadd_s16, 0U, 0U),
808     SIMD_TUPLE(v256_add_32, 0U, 0U), SIMD_TUPLE(v256_sub_8, 0U, 0U),
809     SIMD_TUPLE(v256_ssub_u8, 0U, 0U), SIMD_TUPLE(v256_ssub_s8, 0U, 0U),
810     SIMD_TUPLE(v256_sub_16, 0U, 0U), SIMD_TUPLE(v256_ssub_s16, 0U, 0U),
811     SIMD_TUPLE(v256_ssub_u16, 0U, 0U), SIMD_TUPLE(v256_sub_32, 0U, 0U),
812     SIMD_TUPLE(v256_ziplo_8, 0U, 0U), SIMD_TUPLE(v256_ziphi_8, 0U, 0U),
813     SIMD_TUPLE(v256_ziplo_16, 0U, 0U), SIMD_TUPLE(v256_ziphi_16, 0U, 0U),
814     SIMD_TUPLE(v256_ziplo_32, 0U, 0U), SIMD_TUPLE(v256_ziphi_32, 0U, 0U),
815     SIMD_TUPLE(v256_ziplo_64, 0U, 0U), SIMD_TUPLE(v256_ziphi_64, 0U, 0U),
816     SIMD_TUPLE(v256_ziplo_128, 0U, 0U), SIMD_TUPLE(v256_ziphi_128, 0U, 0U),
817     SIMD_TUPLE(v256_unziphi_8, 0U, 0U), SIMD_TUPLE(v256_unziplo_8, 0U, 0U),
818     SIMD_TUPLE(v256_unziphi_16, 0U, 0U), SIMD_TUPLE(v256_unziplo_16, 0U, 0U),
819     SIMD_TUPLE(v256_unziphi_32, 0U, 0U), SIMD_TUPLE(v256_unziplo_32, 0U, 0U),
820     SIMD_TUPLE(v256_pack_s32_s16, 0U, 0U), SIMD_TUPLE(v256_pack_s16_u8, 0U, 0U),
821     SIMD_TUPLE(v256_pack_s16_s8, 0U, 0U), SIMD_TUPLE(v256_or, 0U, 0U),
822     SIMD_TUPLE(v256_xor, 0U, 0U), SIMD_TUPLE(v256_and, 0U, 0U),
823     SIMD_TUPLE(v256_andn, 0U, 0U), SIMD_TUPLE(v256_mullo_s16, 0U, 0U),
824     SIMD_TUPLE(v256_mulhi_s16, 0U, 0U), SIMD_TUPLE(v256_mullo_s32, 0U, 0U),
825     SIMD_TUPLE(v256_madd_s16, 0U, 0U), SIMD_TUPLE(v256_madd_us8, 0U, 0U),
826     SIMD_TUPLE(v256_avg_u8, 0U, 0U), SIMD_TUPLE(v256_rdavg_u8, 0U, 0U),
827     SIMD_TUPLE(v256_avg_u16, 0U, 0U), SIMD_TUPLE(v256_min_u8, 0U, 0U),
828     SIMD_TUPLE(v256_max_u8, 0U, 0U), SIMD_TUPLE(v256_min_s8, 0U, 0U),
829     SIMD_TUPLE(v256_max_s8, 0U, 0U), SIMD_TUPLE(v256_min_s16, 0U, 0U),
830     SIMD_TUPLE(v256_max_s16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s8, 0U, 0U),
831     SIMD_TUPLE(v256_cmplt_s8, 0U, 0U));
832 
833 INSTANTIATE(
834     ARCH, ARCH_POSTFIX(V256_V256V256_Part2), SIMD_TUPLE(v256_cmpeq_8, 0U, 0U),
835     SIMD_TUPLE(v256_min_s32, 0U, 0U), SIMD_TUPLE(v256_max_s32, 0U, 0U),
836     SIMD_TUPLE(v256_add_64, 0U, 0U), SIMD_TUPLE(v256_sub_64, 0U, 0U),
837     SIMD_TUPLE(v256_cmpgt_s16, 0U, 0U), SIMD_TUPLE(v256_cmplt_s16, 0U, 0U),
838     SIMD_TUPLE(v256_cmpeq_16, 0U, 0U), SIMD_TUPLE(v256_cmpgt_s32, 0U, 0U),
839     SIMD_TUPLE(v256_cmplt_s32, 0U, 0U), SIMD_TUPLE(v256_cmpeq_32, 0U, 0U),
840     SIMD_TUPLE(v256_shuffle_8, 31U, 8U), SIMD_TUPLE(v256_pshuffle_8, 15U, 8U),
841     SIMD_TUPLE(imm_v256_align<1>, 0U, 0U), SIMD_TUPLE(v256_sadd_s8, 0U, 0U),
842     SIMD_TUPLE(v256_sadd_u8, 0U, 0U), SIMD_TUPLE(v256_pack_s32_u16, 0U, 0U),
843     SIMD_TUPLE(v256_rdavg_u16, 0U, 0U), SIMD_TUPLE(imm_v256_align<2>, 0U, 0U),
844     SIMD_TUPLE(v256_unziphi_64, 0U, 0U), SIMD_TUPLE(v256_unziplo_64, 0U, 0U),
845     SIMD_TUPLE(imm_v256_align<3>, 0U, 0U),
846     SIMD_TUPLE(imm_v256_align<4>, 0U, 0U),
847     SIMD_TUPLE(imm_v256_align<5>, 0U, 0U),
848     SIMD_TUPLE(imm_v256_align<6>, 0U, 0U),
849     SIMD_TUPLE(imm_v256_align<7>, 0U, 0U),
850     SIMD_TUPLE(imm_v256_align<8>, 0U, 0U),
851     SIMD_TUPLE(imm_v256_align<9>, 0U, 0U),
852     SIMD_TUPLE(imm_v256_align<10>, 0U, 0U),
853     SIMD_TUPLE(imm_v256_align<11>, 0U, 0U),
854     SIMD_TUPLE(imm_v256_align<12>, 0U, 0U),
855     SIMD_TUPLE(imm_v256_align<13>, 0U, 0U),
856     SIMD_TUPLE(imm_v256_align<14>, 0U, 0U),
857     SIMD_TUPLE(imm_v256_align<15>, 0U, 0U),
858     SIMD_TUPLE(imm_v256_align<16>, 0U, 0U),
859     SIMD_TUPLE(imm_v256_align<17>, 0U, 0U),
860     SIMD_TUPLE(imm_v256_align<18>, 0U, 0U),
861     SIMD_TUPLE(imm_v256_align<19>, 0U, 0U),
862     SIMD_TUPLE(imm_v256_align<20>, 0U, 0U),
863     SIMD_TUPLE(imm_v256_align<21>, 0U, 0U),
864     SIMD_TUPLE(imm_v256_align<22>, 0U, 0U),
865     SIMD_TUPLE(imm_v256_align<23>, 0U, 0U),
866     SIMD_TUPLE(imm_v256_align<24>, 0U, 0U),
867     SIMD_TUPLE(imm_v256_align<25>, 0U, 0U),
868     SIMD_TUPLE(imm_v256_align<26>, 0U, 0U),
869     SIMD_TUPLE(imm_v256_align<27>, 0U, 0U),
870     SIMD_TUPLE(imm_v256_align<28>, 0U, 0U),
871     SIMD_TUPLE(imm_v256_align<29>, 0U, 0U),
872     SIMD_TUPLE(imm_v256_align<30>, 0U, 0U),
873     SIMD_TUPLE(imm_v256_align<31>, 0U, 0U));
874 
875 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128V128),
876             SIMD_TUPLE(v256_from_v128, 0U, 0U), SIMD_TUPLE(v256_zip_8, 0U, 0U),
877             SIMD_TUPLE(v256_zip_16, 0U, 0U), SIMD_TUPLE(v256_zip_32, 0U, 0U),
878             SIMD_TUPLE(v256_mul_s16, 0U, 0U));
879 
880 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V128),
881             SIMD_TUPLE(v256_unpack_u8_s16, 0U, 0U),
882             SIMD_TUPLE(v256_unpack_s8_s16, 0U, 0U),
883             SIMD_TUPLE(v256_unpack_u16_s32, 0U, 0U),
884             SIMD_TUPLE(v256_unpack_s16_s32, 0U, 0U));
885 
886 INSTANTIATE(
887     ARCH, ARCH_POSTFIX(V256_V256U32), SIMD_TUPLE(v256_shl_8, 7U, 32U),
888     SIMD_TUPLE(v256_shr_u8, 7U, 32U), SIMD_TUPLE(v256_shr_s8, 7U, 32U),
889     SIMD_TUPLE(v256_shl_16, 15U, 32U), SIMD_TUPLE(v256_shr_u16, 15U, 32U),
890     SIMD_TUPLE(v256_shr_s16, 15U, 32U), SIMD_TUPLE(v256_shl_32, 31U, 32U),
891     SIMD_TUPLE(v256_shr_u32, 31U, 32U), SIMD_TUPLE(v256_shr_s32, 31U, 32U),
892     SIMD_TUPLE(v256_shl_64, 63U, 32U), SIMD_TUPLE(v256_shr_u64, 63U, 32U),
893     SIMD_TUPLE(v256_shr_s64, 63U, 32U));
894 
895 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256), SIMD_TUPLE(v256_abs_s8, 0U, 0U),
896             SIMD_TUPLE(v256_abs_s16, 0U, 0U), SIMD_TUPLE(v256_padd_s16, 0U, 0U),
897             SIMD_TUPLE(v256_unpacklo_u8_s16, 0U, 0U),
898             SIMD_TUPLE(v256_unpacklo_s8_s16, 0U, 0U),
899             SIMD_TUPLE(v256_unpacklo_u16_s32, 0U, 0U),
900             SIMD_TUPLE(v256_unpacklo_s16_s32, 0U, 0U),
901             SIMD_TUPLE(v256_unpackhi_u8_s16, 0U, 0U),
902             SIMD_TUPLE(v256_unpackhi_s8_s16, 0U, 0U),
903             SIMD_TUPLE(v256_unpackhi_u16_s32, 0U, 0U),
904             SIMD_TUPLE(v256_unpackhi_s16_s32, 0U, 0U),
905             SIMD_TUPLE(imm_v256_shr_n_byte<1>, 0U, 0U),
906             SIMD_TUPLE(imm_v256_shr_n_byte<2>, 0U, 0U),
907             SIMD_TUPLE(imm_v256_shr_n_byte<3>, 0U, 0U),
908             SIMD_TUPLE(imm_v256_shr_n_byte<4>, 0U, 0U),
909             SIMD_TUPLE(imm_v256_shr_n_byte<5>, 0U, 0U),
910             SIMD_TUPLE(imm_v256_shr_n_byte<6>, 0U, 0U),
911             SIMD_TUPLE(imm_v256_shr_n_byte<7>, 0U, 0U),
912             SIMD_TUPLE(imm_v256_shr_n_byte<8>, 0U, 0U),
913             SIMD_TUPLE(imm_v256_shr_n_byte<9>, 0U, 0U),
914             SIMD_TUPLE(imm_v256_shr_n_byte<10>, 0U, 0U),
915             SIMD_TUPLE(imm_v256_shr_n_byte<11>, 0U, 0U),
916             SIMD_TUPLE(imm_v256_shr_n_byte<12>, 0U, 0U),
917             SIMD_TUPLE(imm_v256_shr_n_byte<13>, 0U, 0U),
918             SIMD_TUPLE(imm_v256_shr_n_byte<14>, 0U, 0U),
919             SIMD_TUPLE(imm_v256_shr_n_byte<15>, 0U, 0U),
920             SIMD_TUPLE(imm_v256_shr_n_byte<16>, 0U, 0U),
921             SIMD_TUPLE(imm_v256_shr_n_byte<17>, 0U, 0U),
922             SIMD_TUPLE(imm_v256_shr_n_byte<18>, 0U, 0U),
923             SIMD_TUPLE(imm_v256_shr_n_byte<19>, 0U, 0U),
924             SIMD_TUPLE(imm_v256_shr_n_byte<20>, 0U, 0U),
925             SIMD_TUPLE(imm_v256_shr_n_byte<21>, 0U, 0U),
926             SIMD_TUPLE(imm_v256_shr_n_byte<22>, 0U, 0U),
927             SIMD_TUPLE(imm_v256_shr_n_byte<23>, 0U, 0U),
928             SIMD_TUPLE(imm_v256_shr_n_byte<24>, 0U, 0U),
929             SIMD_TUPLE(imm_v256_shr_n_byte<25>, 0U, 0U),
930             SIMD_TUPLE(imm_v256_shr_n_byte<26>, 0U, 0U),
931             SIMD_TUPLE(imm_v256_shr_n_byte<27>, 0U, 0U),
932             SIMD_TUPLE(imm_v256_shr_n_byte<28>, 0U, 0U),
933             SIMD_TUPLE(imm_v256_shr_n_byte<29>, 0U, 0U),
934             SIMD_TUPLE(imm_v256_shr_n_byte<30>, 0U, 0U),
935             SIMD_TUPLE(imm_v256_shr_n_byte<31>, 0U, 0U),
936             SIMD_TUPLE(imm_v256_shl_n_byte<1>, 0U, 0U),
937             SIMD_TUPLE(imm_v256_shl_n_byte<2>, 0U, 0U),
938             SIMD_TUPLE(imm_v256_shl_n_byte<3>, 0U, 0U),
939             SIMD_TUPLE(imm_v256_shl_n_byte<4>, 0U, 0U),
940             SIMD_TUPLE(imm_v256_shl_n_byte<5>, 0U, 0U),
941             SIMD_TUPLE(imm_v256_shl_n_byte<6>, 0U, 0U),
942             SIMD_TUPLE(imm_v256_shl_n_byte<7>, 0U, 0U),
943             SIMD_TUPLE(imm_v256_shl_n_byte<8>, 0U, 0U));
944 
945 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part2),
946             SIMD_TUPLE(imm_v256_shl_n_byte<9>, 0U, 0U),
947             SIMD_TUPLE(imm_v256_shl_n_byte<10>, 0U, 0U),
948             SIMD_TUPLE(imm_v256_shl_n_byte<11>, 0U, 0U),
949             SIMD_TUPLE(imm_v256_shl_n_byte<12>, 0U, 0U),
950             SIMD_TUPLE(imm_v256_shl_n_byte<13>, 0U, 0U),
951             SIMD_TUPLE(imm_v256_shl_n_byte<14>, 0U, 0U),
952             SIMD_TUPLE(imm_v256_shl_n_byte<15>, 0U, 0U),
953             SIMD_TUPLE(imm_v256_shl_n_byte<16>, 0U, 0U),
954             SIMD_TUPLE(imm_v256_shl_n_byte<17>, 0U, 0U),
955             SIMD_TUPLE(imm_v256_shl_n_byte<18>, 0U, 0U),
956             SIMD_TUPLE(imm_v256_shl_n_byte<19>, 0U, 0U),
957             SIMD_TUPLE(imm_v256_shl_n_byte<20>, 0U, 0U),
958             SIMD_TUPLE(imm_v256_shl_n_byte<21>, 0U, 0U),
959             SIMD_TUPLE(imm_v256_shl_n_byte<22>, 0U, 0U),
960             SIMD_TUPLE(imm_v256_shl_n_byte<23>, 0U, 0U),
961             SIMD_TUPLE(imm_v256_shl_n_byte<24>, 0U, 0U),
962             SIMD_TUPLE(imm_v256_shl_n_byte<25>, 0U, 0U),
963             SIMD_TUPLE(imm_v256_shl_n_byte<26>, 0U, 0U),
964             SIMD_TUPLE(imm_v256_shl_n_byte<27>, 0U, 0U),
965             SIMD_TUPLE(imm_v256_shl_n_byte<28>, 0U, 0U),
966             SIMD_TUPLE(imm_v256_shl_n_byte<29>, 0U, 0U),
967             SIMD_TUPLE(imm_v256_shl_n_byte<30>, 0U, 0U),
968             SIMD_TUPLE(imm_v256_shl_n_byte<31>, 0U, 0U),
969             SIMD_TUPLE(imm_v256_shl_n_8<1>, 0U, 0U),
970             SIMD_TUPLE(imm_v256_shl_n_8<2>, 0U, 0U),
971             SIMD_TUPLE(imm_v256_shl_n_8<3>, 0U, 0U),
972             SIMD_TUPLE(imm_v256_shl_n_8<4>, 0U, 0U),
973             SIMD_TUPLE(imm_v256_shl_n_8<5>, 0U, 0U),
974             SIMD_TUPLE(imm_v256_shl_n_8<6>, 0U, 0U),
975             SIMD_TUPLE(imm_v256_shl_n_8<7>, 0U, 0U),
976             SIMD_TUPLE(imm_v256_shr_n_u8<1>, 0U, 0U),
977             SIMD_TUPLE(imm_v256_shr_n_u8<2>, 0U, 0U),
978             SIMD_TUPLE(imm_v256_shr_n_u8<3>, 0U, 0U),
979             SIMD_TUPLE(imm_v256_shr_n_u8<4>, 0U, 0U),
980             SIMD_TUPLE(imm_v256_shr_n_u8<5>, 0U, 0U),
981             SIMD_TUPLE(imm_v256_shr_n_u8<6>, 0U, 0U),
982             SIMD_TUPLE(imm_v256_shr_n_u8<7>, 0U, 0U),
983             SIMD_TUPLE(imm_v256_shr_n_s8<1>, 0U, 0U),
984             SIMD_TUPLE(imm_v256_shr_n_s8<2>, 0U, 0U),
985             SIMD_TUPLE(imm_v256_shr_n_s8<3>, 0U, 0U),
986             SIMD_TUPLE(imm_v256_shr_n_s8<4>, 0U, 0U),
987             SIMD_TUPLE(imm_v256_shr_n_s8<5>, 0U, 0U),
988             SIMD_TUPLE(imm_v256_shr_n_s8<6>, 0U, 0U),
989             SIMD_TUPLE(imm_v256_shr_n_s8<7>, 0U, 0U),
990             SIMD_TUPLE(imm_v256_shl_n_16<1>, 0U, 0U),
991             SIMD_TUPLE(imm_v256_shl_n_16<2>, 0U, 0U),
992             SIMD_TUPLE(imm_v256_shl_n_16<4>, 0U, 0U),
993             SIMD_TUPLE(imm_v256_shl_n_16<6>, 0U, 0U),
994             SIMD_TUPLE(imm_v256_shl_n_16<8>, 0U, 0U),
995             SIMD_TUPLE(imm_v256_shl_n_16<10>, 0U, 0U));
996 
997 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part3),
998             SIMD_TUPLE(imm_v256_shl_n_16<12>, 0U, 0U),
999             SIMD_TUPLE(imm_v256_shl_n_16<14>, 0U, 0U),
1000             SIMD_TUPLE(imm_v256_shr_n_u16<1>, 0U, 0U),
1001             SIMD_TUPLE(imm_v256_shr_n_u16<2>, 0U, 0U),
1002             SIMD_TUPLE(imm_v256_shr_n_u16<4>, 0U, 0U),
1003             SIMD_TUPLE(imm_v256_shr_n_u16<6>, 0U, 0U),
1004             SIMD_TUPLE(imm_v256_shr_n_u16<8>, 0U, 0U),
1005             SIMD_TUPLE(imm_v256_shr_n_u16<10>, 0U, 0U),
1006             SIMD_TUPLE(imm_v256_shr_n_u16<12>, 0U, 0U),
1007             SIMD_TUPLE(imm_v256_shr_n_u16<14>, 0U, 0U),
1008             SIMD_TUPLE(imm_v256_shr_n_s16<1>, 0U, 0U),
1009             SIMD_TUPLE(imm_v256_shr_n_s16<2>, 0U, 0U),
1010             SIMD_TUPLE(imm_v256_shr_n_s16<4>, 0U, 0U),
1011             SIMD_TUPLE(imm_v256_shr_n_s16<6>, 0U, 0U),
1012             SIMD_TUPLE(imm_v256_shr_n_s16<8>, 0U, 0U),
1013             SIMD_TUPLE(imm_v256_shr_n_s16<10>, 0U, 0U),
1014             SIMD_TUPLE(imm_v256_shr_n_s16<12>, 0U, 0U),
1015             SIMD_TUPLE(imm_v256_shr_n_s16<14>, 0U, 0U),
1016             SIMD_TUPLE(imm_v256_shl_n_32<1>, 0U, 0U),
1017             SIMD_TUPLE(imm_v256_shl_n_32<4>, 0U, 0U),
1018             SIMD_TUPLE(imm_v256_shl_n_32<8>, 0U, 0U),
1019             SIMD_TUPLE(imm_v256_shl_n_32<12>, 0U, 0U),
1020             SIMD_TUPLE(imm_v256_shl_n_32<16>, 0U, 0U),
1021             SIMD_TUPLE(imm_v256_shl_n_32<20>, 0U, 0U),
1022             SIMD_TUPLE(imm_v256_shl_n_32<24>, 0U, 0U),
1023             SIMD_TUPLE(imm_v256_shl_n_32<28>, 0U, 0U),
1024             SIMD_TUPLE(imm_v256_shr_n_u32<1>, 0U, 0U),
1025             SIMD_TUPLE(imm_v256_shr_n_u32<4>, 0U, 0U),
1026             SIMD_TUPLE(imm_v256_shr_n_u32<8>, 0U, 0U),
1027             SIMD_TUPLE(imm_v256_shr_n_u32<12>, 0U, 0U),
1028             SIMD_TUPLE(imm_v256_shr_n_u32<16>, 0U, 0U),
1029             SIMD_TUPLE(imm_v256_shr_n_u32<20>, 0U, 0U),
1030             SIMD_TUPLE(imm_v256_shr_n_u32<24>, 0U, 0U),
1031             SIMD_TUPLE(imm_v256_shr_n_u32<28>, 0U, 0U),
1032             SIMD_TUPLE(imm_v256_shr_n_s32<1>, 0U, 0U),
1033             SIMD_TUPLE(imm_v256_shr_n_s32<4>, 0U, 0U),
1034             SIMD_TUPLE(imm_v256_shr_n_s32<8>, 0U, 0U),
1035             SIMD_TUPLE(imm_v256_shr_n_s32<12>, 0U, 0U),
1036             SIMD_TUPLE(imm_v256_shr_n_s32<16>, 0U, 0U),
1037             SIMD_TUPLE(imm_v256_shr_n_s32<20>, 0U, 0U),
1038             SIMD_TUPLE(imm_v256_shr_n_s32<24>, 0U, 0U),
1039             SIMD_TUPLE(imm_v256_shr_n_s32<28>, 0U, 0U));
1040 
1041 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part4),
1042             SIMD_TUPLE(imm_v256_shl_n_64<1>, 0U, 0U),
1043             SIMD_TUPLE(imm_v256_shl_n_64<4>, 0U, 0U),
1044             SIMD_TUPLE(imm_v256_shl_n_64<8>, 0U, 0U),
1045             SIMD_TUPLE(imm_v256_shl_n_64<12>, 0U, 0U),
1046             SIMD_TUPLE(imm_v256_shl_n_64<16>, 0U, 0U),
1047             SIMD_TUPLE(imm_v256_shl_n_64<20>, 0U, 0U),
1048             SIMD_TUPLE(imm_v256_shl_n_64<24>, 0U, 0U),
1049             SIMD_TUPLE(imm_v256_shl_n_64<28>, 0U, 0U),
1050             SIMD_TUPLE(imm_v256_shl_n_64<32>, 0U, 0U),
1051             SIMD_TUPLE(imm_v256_shl_n_64<36>, 0U, 0U),
1052             SIMD_TUPLE(imm_v256_shl_n_64<40>, 0U, 0U),
1053             SIMD_TUPLE(imm_v256_shl_n_64<44>, 0U, 0U),
1054             SIMD_TUPLE(imm_v256_shl_n_64<48>, 0U, 0U),
1055             SIMD_TUPLE(imm_v256_shl_n_64<52>, 0U, 0U),
1056             SIMD_TUPLE(imm_v256_shl_n_64<56>, 0U, 0U),
1057             SIMD_TUPLE(imm_v256_shl_n_64<60>, 0U, 0U),
1058             SIMD_TUPLE(imm_v256_shr_n_u64<1>, 0U, 0U),
1059             SIMD_TUPLE(imm_v256_shr_n_u64<4>, 0U, 0U),
1060             SIMD_TUPLE(imm_v256_shr_n_u64<8>, 0U, 0U),
1061             SIMD_TUPLE(imm_v256_shr_n_u64<12>, 0U, 0U),
1062             SIMD_TUPLE(imm_v256_shr_n_u64<16>, 0U, 0U),
1063             SIMD_TUPLE(imm_v256_shr_n_u64<20>, 0U, 0U),
1064             SIMD_TUPLE(imm_v256_shr_n_u64<24>, 0U, 0U),
1065             SIMD_TUPLE(imm_v256_shr_n_u64<28>, 0U, 0U),
1066             SIMD_TUPLE(imm_v256_shr_n_u64<32>, 0U, 0U),
1067             SIMD_TUPLE(imm_v256_shr_n_u64<36>, 0U, 0U),
1068             SIMD_TUPLE(imm_v256_shr_n_u64<40>, 0U, 0U),
1069             SIMD_TUPLE(imm_v256_shr_n_u64<44>, 0U, 0U),
1070             SIMD_TUPLE(imm_v256_shr_n_u64<48>, 0U, 0U),
1071             SIMD_TUPLE(imm_v256_shr_n_u64<52>, 0U, 0U),
1072             SIMD_TUPLE(imm_v256_shr_n_u64<56>, 0U, 0U),
1073             SIMD_TUPLE(imm_v256_shr_n_u64<60>, 0U, 0U),
1074             SIMD_TUPLE(imm_v256_shr_n_s64<1>, 0U, 0U),
1075             SIMD_TUPLE(imm_v256_shr_n_s64<4>, 0U, 0U),
1076             SIMD_TUPLE(imm_v256_shr_n_s64<8>, 0U, 0U),
1077             SIMD_TUPLE(imm_v256_shr_n_s64<12>, 0U, 0U),
1078             SIMD_TUPLE(imm_v256_shr_n_s64<16>, 0U, 0U),
1079             SIMD_TUPLE(imm_v256_shr_n_s64<20>, 0U, 0U),
1080             SIMD_TUPLE(imm_v256_shr_n_s64<24>, 0U, 0U),
1081             SIMD_TUPLE(imm_v256_shr_n_s64<28>, 0U, 0U),
1082             SIMD_TUPLE(imm_v256_shr_n_s64<32>, 0U, 0U),
1083             SIMD_TUPLE(imm_v256_shr_n_s64<36>, 0U, 0U),
1084             SIMD_TUPLE(imm_v256_shr_n_s64<40>, 0U, 0U),
1085             SIMD_TUPLE(imm_v256_shr_n_s64<44>, 0U, 0U),
1086             SIMD_TUPLE(imm_v256_shr_n_s64<48>, 0U, 0U),
1087             SIMD_TUPLE(imm_v256_shr_n_s64<52>, 0U, 0U),
1088             SIMD_TUPLE(imm_v256_shr_n_s64<56>, 0U, 0U),
1089             SIMD_TUPLE(imm_v256_shr_n_s64<60>, 0U, 0U),
1090             SIMD_TUPLE(v256_padd_u8, 0U, 0U));
1091 
1092 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256_Part5),
1093             SIMD_TUPLE(imm_v256_shr_n_word<1>, 0U, 0U),
1094             SIMD_TUPLE(imm_v256_shr_n_word<2>, 0U, 0U),
1095             SIMD_TUPLE(imm_v256_shr_n_word<3>, 0U, 0U),
1096             SIMD_TUPLE(imm_v256_shr_n_word<4>, 0U, 0U),
1097             SIMD_TUPLE(imm_v256_shr_n_word<5>, 0U, 0U),
1098             SIMD_TUPLE(imm_v256_shr_n_word<6>, 0U, 0U),
1099             SIMD_TUPLE(imm_v256_shr_n_word<7>, 0U, 0U),
1100             SIMD_TUPLE(imm_v256_shr_n_word<8>, 0U, 0U),
1101             SIMD_TUPLE(imm_v256_shr_n_word<9>, 0U, 0U),
1102             SIMD_TUPLE(imm_v256_shr_n_word<10>, 0U, 0U),
1103             SIMD_TUPLE(imm_v256_shr_n_word<11>, 0U, 0U),
1104             SIMD_TUPLE(imm_v256_shr_n_word<12>, 0U, 0U),
1105             SIMD_TUPLE(imm_v256_shr_n_word<13>, 0U, 0U),
1106             SIMD_TUPLE(imm_v256_shr_n_word<14>, 0U, 0U),
1107             SIMD_TUPLE(imm_v256_shr_n_word<15>, 0U, 0U),
1108             SIMD_TUPLE(imm_v256_shl_n_word<1>, 0U, 0U),
1109             SIMD_TUPLE(imm_v256_shl_n_word<2>, 0U, 0U),
1110             SIMD_TUPLE(imm_v256_shl_n_word<3>, 0U, 0U),
1111             SIMD_TUPLE(imm_v256_shl_n_word<4>, 0U, 0U),
1112             SIMD_TUPLE(imm_v256_shl_n_word<5>, 0U, 0U),
1113             SIMD_TUPLE(imm_v256_shl_n_word<6>, 0U, 0U),
1114             SIMD_TUPLE(imm_v256_shl_n_word<7>, 0U, 0U),
1115             SIMD_TUPLE(imm_v256_shl_n_word<8>, 0U, 0U),
1116             SIMD_TUPLE(imm_v256_shl_n_word<9>, 0U, 0U),
1117             SIMD_TUPLE(imm_v256_shl_n_word<10>, 0U, 0U),
1118             SIMD_TUPLE(imm_v256_shl_n_word<11>, 0U, 0U),
1119             SIMD_TUPLE(imm_v256_shl_n_word<12>, 0U, 0U),
1120             SIMD_TUPLE(imm_v256_shl_n_word<13>, 0U, 0U),
1121             SIMD_TUPLE(imm_v256_shl_n_word<14>, 0U, 0U),
1122             SIMD_TUPLE(imm_v256_shl_n_word<15>, 0U, 0U));
1123 
1124 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_V256V256V256),
1125             SIMD_TUPLE(v256_blend_8, 0U, 0U),
1126             SIMD_TUPLE(v256_wideshuffle_8, 63U, 8U));
1127 
1128 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U8), SIMD_TUPLE(v256_dup_8, 0U, 0U));
1129 
1130 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U16), SIMD_TUPLE(v256_dup_16, 0U, 0U));
1131 
1132 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U32), SIMD_TUPLE(v256_dup_32, 0U, 0U));
1133 
1134 INSTANTIATE(ARCH, ARCH_POSTFIX(V256_U64), SIMD_TUPLE(v256_dup_64, 0U, 0U));
1135 
1136 INSTANTIATE(ARCH, ARCH_POSTFIX(U32_V256), SIMD_TUPLE(v256_low_u32, 0U, 0U),
1137             SIMD_TUPLE(v256_movemask_8, 0U, 0U));
1138 
1139 INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V256), SIMD_TUPLE(v256_low_v64, 0U, 0U));
1140 
1141 }  // namespace SIMD_NAMESPACE
1142