1 /*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <cmath>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdexcept>
28 #include <vector>
29
30 #include <unknwn.h>
31 #include <directx/d3d12.h>
32 #include <dxgi1_4.h>
33 #include <gtest/gtest.h>
34 #include <wrl.h>
35 #include <dxguids/dxguids.h>
36
37 #include "compute_test.h"
38
39 using std::vector;
40
TEST_F(ComputeTest,runtime_memcpy)41 TEST_F(ComputeTest, runtime_memcpy)
42 {
43 struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
44 const char *kernel_source =
45 "struct shift { uchar val; uchar shift; ushort ret; };\n\
46 __kernel void main_test(__global struct shift *inout)\n\
47 {\n\
48 uint id = get_global_id(0);\n\
49 uint id2 = id + get_global_id(1);\n\
50 struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
51 lc[id] = inout[id];\n\
52 inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
53 }\n";
54
55 auto inout = ShaderArg<struct shift>({
56 { 0x10, 1, 0xffff },
57 { 0x20, 2, 0xffff },
58 { 0x30, 3, 0xffff },
59 { 0x40, 4, 0xffff },
60 },
61 SHADER_ARG_INOUT);
62 const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
63 run_shader(kernel_source, inout.size(), 1, 1, inout);
64 for (int i = 0; i < inout.size(); ++i)
65 EXPECT_EQ(inout[i].ret, expected[i]);
66 }
67
TEST_F(ComputeTest,two_global_arrays)68 TEST_F(ComputeTest, two_global_arrays)
69 {
70 const char *kernel_source =
71 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
72 {\n\
73 uint idx = get_global_id(0);\n\
74 g1[idx] -= g2[idx];\n\
75 }\n";
76 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
77 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
78 const uint32_t expected[] = {
79 9, 18, 27, 36
80 };
81
82 run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
83 for (int i = 0; i < g1.size(); ++i)
84 EXPECT_EQ(g1[i], expected[i]);
85 }
86
87 /* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
TEST_F(ComputeTest,DISABLED_i64tof32)88 TEST_F(ComputeTest, DISABLED_i64tof32)
89 {
90 const char *kernel_source =
91 "__kernel void main_test(__global long *out, __constant long *in)\n\
92 {\n\
93 __local float tmp[12];\n\
94 uint idx = get_global_id(0);\n\
95 tmp[idx] = in[idx];\n\
96 barrier(CLK_LOCAL_MEM_FENCE);\n\
97 out[idx] = tmp[idx + get_global_id(1)];\n\
98 }\n";
99 auto in = ShaderArg<int64_t>({ 0x100000000LL,
100 -0x100000000LL,
101 0x7fffffffffffffffLL,
102 0x4000004000000000LL,
103 0x4000003fffffffffLL,
104 0x4000004000000001LL,
105 -1,
106 -0x4000004000000000LL,
107 -0x4000003fffffffffLL,
108 -0x4000004000000001LL,
109 0,
110 INT64_MIN },
111 SHADER_ARG_INPUT);
112 auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
113 const int64_t expected[] = {
114 0x100000000LL,
115 -0x100000000LL,
116 0x7fffffffffffffffLL,
117 0x4000000000000000LL,
118 0x4000000000000000LL,
119 0x4000008000000000LL,
120 -1,
121 -0x4000000000000000LL,
122 -0x4000000000000000LL,
123 -0x4000008000000000LL,
124 0,
125 INT64_MIN,
126 };
127
128 run_shader(kernel_source, out.size(), 1, 1, out, in);
129 for (int i = 0; i < out.size(); ++i) {
130 EXPECT_EQ((int64_t)out[i], expected[i]);
131 }
132 }
TEST_F(ComputeTest,two_constant_arrays)133 TEST_F(ComputeTest, two_constant_arrays)
134 {
135 const char *kernel_source =
136 "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
137 {\n\
138 uint idx = get_global_id(0);\n\
139 g1[idx] -= c1[idx] + c2[idx];\n\
140 }\n";
141 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
142 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
143 auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
144 const uint32_t expected[] = {
145 4, 13, 22, 31
146 };
147
148 run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
149 for (int i = 0; i < g1.size(); ++i)
150 EXPECT_EQ(g1[i], expected[i]);
151 }
152
TEST_F(ComputeTest,null_constant_ptr)153 TEST_F(ComputeTest, null_constant_ptr)
154 {
155 const char *kernel_source =
156 "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
157 {\n\
158 __constant uint fallback[] = {2, 3, 4, 5};\n\
159 __constant uint *c = c1 ? c1 : fallback;\n\
160 uint idx = get_global_id(0);\n\
161 g1[idx] -= c[idx];\n\
162 }\n";
163 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
164 auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
165 const uint32_t expected1[] = {
166 9, 18, 27, 36
167 };
168
169 run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
170 for (int i = 0; i < g1.size(); ++i)
171 EXPECT_EQ(g1[i], expected1[i]);
172
173 const uint32_t expected2[] = {
174 8, 17, 26, 35
175 };
176
177 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
178 auto c2 = NullShaderArg();
179 run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
180 for (int i = 0; i < g1.size(); ++i)
181 EXPECT_EQ(g1[i], expected2[i]);
182 }
183
TEST_F(ComputeTest,null_global_ptr)184 TEST_F(ComputeTest, null_global_ptr)
185 {
186 const char *kernel_source =
187 "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
188 {\n\
189 __constant uint fallback[] = {2, 3, 4, 5};\n\
190 uint idx = get_global_id(0);\n\
191 g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
192 }\n";
193 auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
194 auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
195 const uint32_t expected1[] = {
196 9, 18, 27, 36
197 };
198
199 run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
200 for (int i = 0; i < g1.size(); ++i)
201 EXPECT_EQ(g1[i], expected1[i]);
202
203 const uint32_t expected2[] = {
204 8, 17, 26, 35
205 };
206
207 g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
208 auto g2null = NullShaderArg();
209 run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
210 for (int i = 0; i < g1.size(); ++i)
211 EXPECT_EQ(g1[i], expected2[i]);
212 }
213
TEST_F(ComputeTest,ret_constant_ptr)214 TEST_F(ComputeTest, ret_constant_ptr)
215 {
216 struct s { uint64_t ptr; uint32_t val; };
217 const char *kernel_source =
218 "struct s { __constant uint *ptr; uint val; };\n\
219 __kernel void main_test(__global struct s *out, __constant uint *in)\n\
220 {\n\
221 __constant uint foo[] = { 1, 2 };\n\
222 uint idx = get_global_id(0);\n\
223 if (idx == 0)\n\
224 out[idx].ptr = foo;\n\
225 else\n\
226 out[idx].ptr = in;\n\
227 out[idx].val = out[idx].ptr[idx];\n\
228 }\n";
229 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
230 auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
231 const uint32_t expected_val[] = {
232 1, 4
233 };
234 const uint64_t expected_ptr[] = {
235 2ull << 32, 1ull << 32
236 };
237
238 run_shader(kernel_source, out.size(), 1, 1, out, in);
239 for (int i = 0; i < out.size(); ++i) {
240 EXPECT_EQ(out[i].val, expected_val[i]);
241 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
242 }
243 }
244
TEST_F(ComputeTest,ret_global_ptr)245 TEST_F(ComputeTest, ret_global_ptr)
246 {
247 struct s { uint64_t ptr; uint32_t val; };
248 const char *kernel_source =
249 "struct s { __global uint *ptr; uint val; };\n\
250 __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
251 {\n\
252 uint idx = get_global_id(0);\n\
253 out[idx].ptr = idx ? in2 : in1;\n\
254 out[idx].val = out[idx].ptr[idx];\n\
255 }\n";
256 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
257 auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
258 auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
259 const uint32_t expected_val[] = {
260 1, 4
261 };
262 const uint64_t expected_ptr[] = {
263 1ull << 32, 2ull << 32
264 };
265
266 run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
267 for (int i = 0; i < out.size(); ++i) {
268 EXPECT_EQ(out[i].val, expected_val[i]);
269 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
270 }
271 }
272
TEST_F(ComputeTest,ret_local_ptr)273 TEST_F(ComputeTest, ret_local_ptr)
274 {
275 struct s { uint64_t ptr; };
276 const char *kernel_source =
277 "struct s { __local uint *ptr; };\n\
278 __kernel void main_test(__global struct s *out)\n\
279 {\n\
280 __local uint tmp[2];\n\
281 uint idx = get_global_id(0);\n\
282 tmp[idx] = idx;\n\
283 out[idx].ptr = &tmp[idx];\n\
284 }\n";
285 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
286 const uint64_t expected_ptr[] = {
287 0, 4,
288 };
289
290 run_shader(kernel_source, out.size(), 1, 1, out);
291 for (int i = 0; i < out.size(); ++i) {
292 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
293 }
294 }
295
TEST_F(ComputeTest,ret_private_ptr)296 TEST_F(ComputeTest, ret_private_ptr)
297 {
298 struct s { uint64_t ptr; uint32_t value; };
299 const char *kernel_source =
300 "struct s { __private uint *ptr; uint value; };\n\
301 __kernel void main_test(__global struct s *out)\n\
302 {\n\
303 uint tmp[2] = {1, 2};\n\
304 uint idx = get_global_id(0);\n\
305 out[idx].ptr = &tmp[idx];\n\
306 out[idx].value = *out[idx].ptr;\n\
307 }\n";
308 auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
309 const uint64_t expected_ptr[] = {
310 0, 4,
311 };
312 const uint32_t expected_value[] = {
313 1, 2
314 };
315
316 run_shader(kernel_source, out.size(), 1, 1, out);
317 for (int i = 0; i < out.size(); ++i) {
318 EXPECT_EQ(out[i].ptr, expected_ptr[i]);
319 }
320 }
321
TEST_F(ComputeTest,globals_8bit)322 TEST_F(ComputeTest, globals_8bit)
323 {
324 const char *kernel_source =
325 "__kernel void main_test(__global unsigned char *inout)\n\
326 {\n\
327 uint idx = get_global_id(0);\n\
328 inout[idx] = inout[idx] + 1;\n\
329 }\n";
330 auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
331 const uint8_t expected[] = {
332 101, 111, 121, 131
333 };
334 run_shader(kernel_source, inout.size(), 1, 1, inout);
335 for (int i = 0; i < inout.size(); ++i)
336 EXPECT_EQ(inout[i], expected[i]);
337 }
338
TEST_F(ComputeTest,globals_16bit)339 TEST_F(ComputeTest, globals_16bit)
340 {
341 const char *kernel_source =
342 "__kernel void main_test(__global unsigned short *inout)\n\
343 {\n\
344 uint idx = get_global_id(0);\n\
345 inout[idx] = inout[idx] + 1;\n\
346 }\n";
347 auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
348 const uint16_t expected[] = {
349 10001, 10011, 10021, 10031
350 };
351 run_shader(kernel_source, inout.size(), 1, 1, inout);
352 for (int i = 0; i < inout.size(); ++i)
353 EXPECT_EQ(inout[i], expected[i]);
354 }
355
TEST_F(ComputeTest,globals_64bit)356 TEST_F(ComputeTest, globals_64bit)
357 {
358 const char *kernel_source =
359 "__kernel void main_test(__global unsigned long *inout)\n\
360 {\n\
361 uint idx = get_global_id(0);\n\
362 inout[idx] = inout[idx] + 1;\n\
363 }\n";
364 uint64_t base = 1ull << 50;
365 auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
366 SHADER_ARG_INOUT);
367 const uint64_t expected[] = {
368 base + 1, base + 11, base + 21, base + 31
369 };
370 run_shader(kernel_source, inout.size(), 1, 1, inout);
371 for (int i = 0; i < inout.size(); ++i)
372 EXPECT_EQ(inout[i], expected[i]);
373 }
374
TEST_F(ComputeTest,built_ins_global_id)375 TEST_F(ComputeTest, built_ins_global_id)
376 {
377 const char *kernel_source =
378 "__kernel void main_test(__global uint *output)\n\
379 {\n\
380 output[get_global_id(0)] = get_global_id(0);\n\
381 }\n";
382 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
383 SHADER_ARG_OUTPUT);
384 const uint32_t expected[] = {
385 0, 1, 2, 3
386 };
387
388 run_shader(kernel_source, output.size(), 1, 1, output);
389 for (int i = 0; i < output.size(); ++i)
390 EXPECT_EQ(output[i], expected[i]);
391 }
392
TEST_F(ComputeTest,built_ins_global_id_rmw)393 TEST_F(ComputeTest, built_ins_global_id_rmw)
394 {
395 const char *kernel_source =
396 "__kernel void main_test(__global uint *output)\n\
397 {\n\
398 uint id = get_global_id(0);\n\
399 output[id] = output[id] * (id + 1);\n\
400 }\n";
401 auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
402 SHADER_ARG_INOUT);
403 const uint32_t expected[] = {
404 0x00000001, 0x20000002, 0x00060006, 0x1004080c
405 };
406 run_shader(kernel_source, inout.size(), 1, 1, inout);
407 for (int i = 0; i < inout.size(); ++i)
408 EXPECT_EQ(inout[i], expected[i]);
409 }
410
TEST_F(ComputeTest,types_float_basics)411 TEST_F(ComputeTest, types_float_basics)
412 {
413 const char *kernel_source =
414 "__kernel void main_test(__global uint *output)\n\
415 {\n\
416 output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
417 }\n";
418 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
419 SHADER_ARG_OUTPUT);
420 const uint32_t expected[] = {
421 1, 2, 3, 4
422 };
423 run_shader(kernel_source, output.size(), 1, 1, output);
424 for (int i = 0; i < output.size(); ++i)
425 EXPECT_EQ(output[i], expected[i]);
426 }
427
TEST_F(ComputeTest,DISABLED_types_double_basics)428 TEST_F(ComputeTest, DISABLED_types_double_basics)
429 {
430 /* Disabled because doubles are unsupported */
431 const char *kernel_source =
432 "__kernel void main_test(__global uint *output)\n\
433 {\n\
434 output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
435 }\n";
436 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
437 SHADER_ARG_OUTPUT);
438 const uint32_t expected[] = {
439 1, 2, 3, 4
440 };
441 run_shader(kernel_source, output.size(), 1, 1, output);
442 for (int i = 0; i < output.size(); ++i)
443 EXPECT_EQ(output[i], expected[i]);
444 }
445
TEST_F(ComputeTest,types_short_basics)446 TEST_F(ComputeTest, types_short_basics)
447 {
448 const char *kernel_source =
449 "__kernel void main_test(__global uint *output)\n\
450 {\n\
451 output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
452 }\n";
453 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
454 SHADER_ARG_OUTPUT);
455 const uint32_t expected[] = {
456 1, 2, 3, 4
457 };
458 run_shader(kernel_source, output.size(), 1, 1, output);
459 for (int i = 0; i < output.size(); ++i)
460 EXPECT_EQ(output[i], expected[i]);
461 }
462
TEST_F(ComputeTest,types_char_basics)463 TEST_F(ComputeTest, types_char_basics)
464 {
465 const char *kernel_source =
466 "__kernel void main_test(__global uint *output)\n\
467 {\n\
468 output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
469 }\n";
470 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
471 SHADER_ARG_OUTPUT);
472 const uint32_t expected[] = {
473 1, 2, 3, 4
474 };
475 run_shader(kernel_source, output.size(), 1, 1, output);
476 for (int i = 0; i < output.size(); ++i)
477 EXPECT_EQ(output[i], expected[i]);
478 }
479
TEST_F(ComputeTest,types_if_statement)480 TEST_F(ComputeTest, types_if_statement)
481 {
482 const char *kernel_source =
483 "__kernel void main_test(__global uint *output)\n\
484 {\n\
485 int idx = get_global_id(0);\n\
486 if (idx > 0)\n\
487 output[idx] = ~idx;\n\
488 else\n\
489 output[0] = 0xff;\n\
490 }\n";
491 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
492 SHADER_ARG_OUTPUT);
493 const uint32_t expected[] = {
494 0xff, ~1u, ~2u, ~3u
495 };
496 run_shader(kernel_source, output.size(), 1, 1, output);
497 for (int i = 0; i < output.size(); ++i)
498 EXPECT_EQ(output[i], expected[i]);
499 }
500
TEST_F(ComputeTest,types_do_while_loop)501 TEST_F(ComputeTest, types_do_while_loop)
502 {
503 const char *kernel_source =
504 "__kernel void main_test(__global uint *output)\n\
505 {\n\
506 int value = 1;\n\
507 int i = 1, n = get_global_id(0);\n\
508 do {\n\
509 value *= i++;\n\
510 } while (i <= n);\n\
511 output[n] = value;\n\
512 }\n";
513 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
514 SHADER_ARG_OUTPUT);
515 const uint32_t expected[] = {
516 1, 1, 1*2, 1*2*3, 1*2*3*4
517 };
518 run_shader(kernel_source, output.size(), 1, 1, output);
519 for (int i = 0; i < output.size(); ++i)
520 EXPECT_EQ(output[i], expected[i]);
521 }
522
TEST_F(ComputeTest,types_for_loop)523 TEST_F(ComputeTest, types_for_loop)
524 {
525 const char *kernel_source =
526 "__kernel void main_test(__global uint *output)\n\
527 {\n\
528 int value = 1;\n\
529 int n = get_global_id(0);\n\
530 for (int i = 1; i <= n; ++i)\n\
531 value *= i;\n\
532 output[n] = value;\n\
533 }\n";
534 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
535 SHADER_ARG_OUTPUT);
536 const uint32_t expected[] = {
537 1, 1, 1*2, 1*2*3, 1*2*3*4
538 };
539 run_shader(kernel_source, output.size(), 1, 1, output);
540 for (int i = 0; i < output.size(); ++i)
541 EXPECT_EQ(output[i], expected[i]);
542 }
543
TEST_F(ComputeTest,complex_types_local_array_long)544 TEST_F(ComputeTest, complex_types_local_array_long)
545 {
546 const char *kernel_source =
547 "__kernel void main_test(__global ulong *inout)\n\
548 {\n\
549 ulong tmp[] = {\n\
550 get_global_id(1) + 0x00000000,\n\
551 get_global_id(1) + 0x10000001,\n\
552 get_global_id(1) + 0x20000020,\n\
553 get_global_id(1) + 0x30000300,\n\
554 };\n\
555 uint idx = get_global_id(0);\n\
556 inout[idx] = tmp[idx];\n\
557 }\n";
558 auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
559 const uint64_t expected[] = {
560 0x00000000, 0x10000001, 0x20000020, 0x30000300,
561 };
562 run_shader(kernel_source, inout.size(), 1, 1, inout);
563 for (int i = 0; i < inout.size(); ++i)
564 EXPECT_EQ(inout[i], expected[i]);
565 }
566
TEST_F(ComputeTest,complex_types_local_array_short)567 TEST_F(ComputeTest, complex_types_local_array_short)
568 {
569 const char *kernel_source =
570 "__kernel void main_test(__global ushort *inout)\n\
571 {\n\
572 ushort tmp[] = {\n\
573 get_global_id(1) + 0x00,\n\
574 get_global_id(1) + 0x10,\n\
575 get_global_id(1) + 0x20,\n\
576 get_global_id(1) + 0x30,\n\
577 };\n\
578 uint idx = get_global_id(0);\n\
579 inout[idx] = tmp[idx];\n\
580 }\n";
581 auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
582 const uint16_t expected[] = {
583 0x00, 0x10, 0x20, 0x30,
584 };
585 run_shader(kernel_source, inout.size(), 1, 1, inout);
586 for (int i = 0; i < inout.size(); ++i)
587 EXPECT_EQ(inout[i], expected[i]);
588 }
589
TEST_F(ComputeTest,complex_types_local_array_struct_vec_float_misaligned)590 TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
591 {
592 const char *kernel_source =
593 "struct has_vecs { uchar c; ushort s; float2 f; };\n\
594 __kernel void main_test(__global uint *inout)\n\
595 {\n\
596 struct has_vecs tmp[] = {\n\
597 { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
598 { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
599 { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
600 { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
601 };\n\
602 uint idx = get_global_id(0);\n\
603 uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
604 inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
605 }\n";
606 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
607 const uint16_t expected[] = { 101, 404, 909, 1616 };
608 run_shader(kernel_source, inout.size(), 1, 1, inout);
609 for (int i = 0; i < inout.size(); ++i)
610 EXPECT_EQ(inout[i], expected[i]);
611 }
612
TEST_F(ComputeTest,complex_types_local_array)613 TEST_F(ComputeTest, complex_types_local_array)
614 {
615 const char *kernel_source =
616 "__kernel void main_test(__global uint *inout)\n\
617 {\n\
618 uint tmp[] = {\n\
619 get_global_id(1) + 0x00,\n\
620 get_global_id(1) + 0x10,\n\
621 get_global_id(1) + 0x20,\n\
622 get_global_id(1) + 0x30,\n\
623 };\n\
624 uint idx = get_global_id(0);\n\
625 inout[idx] = tmp[idx];\n\
626 }\n";
627 auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
628 const uint32_t expected[] = {
629 0x00, 0x10, 0x20, 0x30,
630 };
631 run_shader(kernel_source, inout.size(), 1, 1, inout);
632 for (int i = 0; i < inout.size(); ++i)
633 EXPECT_EQ(inout[i], expected[i]);
634 }
635
TEST_F(ComputeTest,complex_types_global_struct_array)636 TEST_F(ComputeTest, complex_types_global_struct_array)
637 {
638 struct two_vals { uint32_t add; uint32_t mul; };
639 const char *kernel_source =
640 "struct two_vals { uint add; uint mul; };\n\
641 __kernel void main_test(__global struct two_vals *in_out)\n\
642 {\n\
643 uint id = get_global_id(0);\n\
644 in_out[id].add = in_out[id].add + id;\n\
645 in_out[id].mul = in_out[id].mul * id;\n\
646 }\n";
647 auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
648 SHADER_ARG_INOUT);
649 const struct two_vals expected[] = {
650 { 8 + 0, 8 * 0 },
651 { 16 + 1, 16 * 1 },
652 { 64 + 2, 64 * 2 },
653 { 65536 + 3, 65536 * 3 }
654 };
655 run_shader(kernel_source, inout.size(), 1, 1, inout);
656 for (int i = 0; i < inout.size(); ++i) {
657 EXPECT_EQ(inout[i].add, expected[i].add);
658 EXPECT_EQ(inout[i].mul, expected[i].mul);
659 }
660 }
661
TEST_F(ComputeTest,complex_types_global_uint2)662 TEST_F(ComputeTest, complex_types_global_uint2)
663 {
664 struct uint2 { uint32_t x; uint32_t y; };
665 const char *kernel_source =
666 "__kernel void main_test(__global uint2 *inout)\n\
667 {\n\
668 uint id = get_global_id(0);\n\
669 inout[id].x = inout[id].x + id;\n\
670 inout[id].y = inout[id].y * id;\n\
671 }\n";
672 auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
673 SHADER_ARG_INOUT);
674 const struct uint2 expected[] = {
675 { 8 + 0, 8 * 0 },
676 { 16 + 1, 16 * 1 },
677 { 64 + 2, 64 * 2 },
678 { 65536 + 3, 65536 * 3 }
679 };
680 run_shader(kernel_source, inout.size(), 1, 1, inout);
681 for (int i = 0; i < inout.size(); ++i) {
682 EXPECT_EQ(inout[i].x, expected[i].x);
683 EXPECT_EQ(inout[i].y, expected[i].y);
684 }
685 }
686
TEST_F(ComputeTest,complex_types_global_ushort2)687 TEST_F(ComputeTest, complex_types_global_ushort2)
688 {
689 struct ushort2 { uint16_t x; uint16_t y; };
690 const char *kernel_source =
691 "__kernel void main_test(__global ushort2 *inout)\n\
692 {\n\
693 uint id = get_global_id(0);\n\
694 inout[id].x = inout[id].x + id;\n\
695 inout[id].y = inout[id].y * id;\n\
696 }\n";
697 auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
698 { (uint16_t)65536, (uint16_t)65536 } },
699 SHADER_ARG_INOUT);
700 const struct ushort2 expected[] = {
701 { 8 + 0, 8 * 0 },
702 { 16 + 1, 16 * 1 },
703 { 64 + 2, 64 * 2 },
704 { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
705 };
706 run_shader(kernel_source, inout.size(), 1, 1, inout);
707 for (int i = 0; i < inout.size(); ++i) {
708 EXPECT_EQ(inout[i].x, expected[i].x);
709 EXPECT_EQ(inout[i].y, expected[i].y);
710 }
711 }
712
TEST_F(ComputeTest,complex_types_global_uchar3)713 TEST_F(ComputeTest, complex_types_global_uchar3)
714 {
715 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
716 const char *kernel_source =
717 "__kernel void main_test(__global uchar3 *inout)\n\
718 {\n\
719 uint id = get_global_id(0);\n\
720 inout[id].x = inout[id].x + id;\n\
721 inout[id].y = inout[id].y * id;\n\
722 inout[id].z = inout[id].y + inout[id].x;\n\
723 }\n";
724 auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
725 SHADER_ARG_INOUT);
726 const struct uchar3 expected[] = {
727 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
728 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
729 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
730 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
731 };
732 run_shader(kernel_source, inout.size(), 1, 1, inout);
733 for (int i = 0; i < inout.size(); ++i) {
734 EXPECT_EQ(inout[i].x, expected[i].x);
735 EXPECT_EQ(inout[i].y, expected[i].y);
736 EXPECT_EQ(inout[i].z, expected[i].z);
737 }
738 }
739
TEST_F(ComputeTest,complex_types_constant_uchar3)740 TEST_F(ComputeTest, complex_types_constant_uchar3)
741 {
742 struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
743 const char *kernel_source =
744 "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
745 {\n\
746 uint id = get_global_id(0);\n\
747 out[id].x = in[id].x + id;\n\
748 out[id].y = in[id].y * id;\n\
749 out[id].z = out[id].y + out[id].x;\n\
750 }\n";
751 auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
752 SHADER_ARG_INPUT);
753 auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
754 SHADER_ARG_OUTPUT);
755 const struct uchar3 expected[] = {
756 { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
757 { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
758 { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
759 { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
760 };
761 run_shader(kernel_source, out.size(), 1, 1, out, in);
762 for (int i = 0; i < out.size(); ++i) {
763 EXPECT_EQ(out[i].x, expected[i].x);
764 EXPECT_EQ(out[i].y, expected[i].y);
765 EXPECT_EQ(out[i].z, expected[i].z);
766 }
767 }
768
TEST_F(ComputeTest,complex_types_global_uint8)769 TEST_F(ComputeTest, complex_types_global_uint8)
770 {
771 struct uint8 {
772 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
773 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
774 };
775 const char *kernel_source =
776 "__kernel void main_test(__global uint8 *inout)\n\
777 {\n\
778 uint id = get_global_id(0);\n\
779 inout[id].s01234567 = inout[id].s01234567 * 2;\n\
780 }\n";
781 auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
782 SHADER_ARG_INOUT);
783 const struct uint8 expected[] = {
784 { 2, 4, 6, 8, 10, 12, 14, 16 }
785 };
786 run_shader(kernel_source, inout.size(), 1, 1, inout);
787 for (int i = 0; i < inout.size(); ++i) {
788 EXPECT_EQ(inout[i].s0, expected[i].s0);
789 EXPECT_EQ(inout[i].s1, expected[i].s1);
790 EXPECT_EQ(inout[i].s2, expected[i].s2);
791 EXPECT_EQ(inout[i].s3, expected[i].s3);
792 EXPECT_EQ(inout[i].s4, expected[i].s4);
793 EXPECT_EQ(inout[i].s5, expected[i].s5);
794 EXPECT_EQ(inout[i].s6, expected[i].s6);
795 EXPECT_EQ(inout[i].s7, expected[i].s7);
796 }
797 }
798
TEST_F(ComputeTest,complex_types_local_ulong16)799 TEST_F(ComputeTest, complex_types_local_ulong16)
800 {
801 struct ulong16 {
802 uint64_t values[16];
803 };
804 const char *kernel_source =
805 R"(__kernel void main_test(__global ulong16 *inout)
806 {
807 __local ulong16 local_array[2];
808 uint id = get_global_id(0);
809 local_array[id] = inout[id];
810 barrier(CLK_LOCAL_MEM_FENCE);
811 inout[id] = local_array[0] * 2;
812 })";
813 auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
814 SHADER_ARG_INOUT);
815 const struct ulong16 expected[] = {
816 { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
817 };
818 run_shader(kernel_source, inout.size(), 1, 1, inout);
819 for (int i = 0; i < inout.size(); ++i) {
820 for (int j = 0; j < 16; ++j) {
821 EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
822 }
823 }
824 }
825
TEST_F(ComputeTest,complex_types_constant_uint8)826 TEST_F(ComputeTest, complex_types_constant_uint8)
827 {
828 struct uint8 {
829 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
830 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
831 };
832 const char *kernel_source =
833 "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
834 {\n\
835 uint id = get_global_id(0);\n\
836 out[id].s01234567 = in[id].s01234567 * 2;\n\
837 }\n";
838 auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
839 SHADER_ARG_INPUT);
840 auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
841 SHADER_ARG_INOUT);
842 const struct uint8 expected[] = {
843 { 2, 4, 6, 8, 10, 12, 14, 16 }
844 };
845 run_shader(kernel_source, out.size(), 1, 1, out, in);
846 for (int i = 0; i < out.size(); ++i) {
847 EXPECT_EQ(out[i].s0, expected[i].s0);
848 EXPECT_EQ(out[i].s1, expected[i].s1);
849 EXPECT_EQ(out[i].s2, expected[i].s2);
850 EXPECT_EQ(out[i].s3, expected[i].s3);
851 EXPECT_EQ(out[i].s4, expected[i].s4);
852 EXPECT_EQ(out[i].s5, expected[i].s5);
853 EXPECT_EQ(out[i].s6, expected[i].s6);
854 EXPECT_EQ(out[i].s7, expected[i].s7);
855 }
856 }
857
TEST_F(ComputeTest,complex_types_const_array)858 TEST_F(ComputeTest, complex_types_const_array)
859 {
860 const char *kernel_source =
861 "__kernel void main_test(__global uint *output)\n\
862 {\n\
863 const uint foo[] = { 100, 101, 102, 103 };\n\
864 output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
865 }\n";
866 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
867 SHADER_ARG_OUTPUT);
868 const uint32_t expected[] = {
869 100, 101, 102, 103
870 };
871 run_shader(kernel_source, output.size(), 1, 1, output);
872 for (int i = 0; i < output.size(); ++i)
873 EXPECT_EQ(output[i], expected[i]);
874 }
875
TEST_F(ComputeTest,mem_access_load_store_ordering)876 TEST_F(ComputeTest, mem_access_load_store_ordering)
877 {
878 const char *kernel_source =
879 "__kernel void main_test(__global uint *output)\n\
880 {\n\
881 uint foo[4];\n\
882 foo[0] = 0x11111111;\n\
883 foo[1] = 0x22222222;\n\
884 foo[2] = 0x44444444;\n\
885 foo[3] = 0x88888888;\n\
886 foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
887 foo[0] += get_global_id(0); // foo[0] = tid\n\
888 foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
889 output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
890 }\n";
891 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
892 SHADER_ARG_OUTPUT);
893 const uint16_t expected[] = {
894 0, 1, 2, 3
895 };
896 run_shader(kernel_source, output.size(), 1, 1, output);
897 for (int i = 0; i < output.size(); ++i)
898 EXPECT_EQ(output[i], expected[i]);
899 }
900
TEST_F(ComputeTest,two_const_arrays)901 TEST_F(ComputeTest, two_const_arrays)
902 {
903 const char *kernel_source =
904 "__kernel void main_test(__global uint *output)\n\
905 {\n\
906 uint id = get_global_id(0);\n\
907 uint foo[4] = {100, 101, 102, 103};\n\
908 uint bar[4] = {1, 2, 3, 4};\n\
909 output[id] = foo[id] * bar[id];\n\
910 }\n";
911 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
912 SHADER_ARG_OUTPUT);
913 const uint32_t expected[] = {
914 100, 202, 306, 412
915 };
916 run_shader(kernel_source, output.size(), 1, 1, output);
917 for (int i = 0; i < output.size(); ++i)
918 EXPECT_EQ(output[i], expected[i]);
919 }
920
TEST_F(ComputeTest,imod_pos)921 TEST_F(ComputeTest, imod_pos)
922 {
923 const char *kernel_source =
924 "__kernel void main_test(__global int *inout)\n\
925 {\n\
926 inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
927 }\n";
928 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
929 SHADER_ARG_INOUT);
930 const int32_t expected[] = {
931 -1, 0, -2, -1, 0, 1, 2, 0, 1
932 };
933 run_shader(kernel_source, inout.size(), 1, 1, inout);
934 for (int i = 0; i < inout.size(); ++i)
935 EXPECT_EQ(inout[i], expected[i]);
936 }
937
TEST_F(ComputeTest,imod_neg)938 TEST_F(ComputeTest, imod_neg)
939 {
940 const char *kernel_source =
941 "__kernel void main_test(__global int *inout)\n\
942 {\n\
943 inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
944 }\n";
945 auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
946 SHADER_ARG_INOUT);
947 const int32_t expected[] = {
948 -1, 0, -2, -1, 0, 1, 2, 0, 1
949 };
950 run_shader(kernel_source, inout.size(), 1, 1, inout);
951 for (int i = 0; i < inout.size(); ++i)
952 EXPECT_EQ(inout[i], expected[i]);
953 }
954
TEST_F(ComputeTest,umod)955 TEST_F(ComputeTest, umod)
956 {
957 const char *kernel_source =
958 "__kernel void main_test(__global uint *inout)\n\
959 {\n\
960 inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
961 }\n";
962 auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
963 SHADER_ARG_INOUT);
964 const uint32_t expected[] = {
965 0xfffffffa, 0xfffffffb, 0, 1, 2
966 };
967 run_shader(kernel_source, inout.size(), 1, 1, inout);
968 for (int i = 0; i < inout.size(); ++i)
969 EXPECT_EQ(inout[i], expected[i]);
970 }
971
TEST_F(ComputeTest,rotate)972 TEST_F(ComputeTest, rotate)
973 {
974 const char *kernel_source =
975 "__kernel void main_test(__global uint *inout)\n\
976 {\n\
977 inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
978 }\n";
979 auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
980 SHADER_ARG_INOUT);
981 const uint32_t expected[] = {
982 0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
983 };
984 run_shader(kernel_source, inout.size(), 1, 1, inout);
985 for (int i = 0; i < inout.size(); ++i)
986 EXPECT_EQ(inout[i], expected[i]);
987 }
988
TEST_F(ComputeTest,popcount)989 TEST_F(ComputeTest, popcount)
990 {
991 const char *kernel_source =
992 "__kernel void main_test(__global uint *inout)\n\
993 {\n\
994 inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
995 }\n";
996 auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
997 SHADER_ARG_INOUT);
998 const uint32_t expected[] = {
999 0, 1, 2, 2, 4, 32
1000 };
1001 run_shader(kernel_source, inout.size(), 1, 1, inout);
1002 for (int i = 0; i < inout.size(); ++i)
1003 EXPECT_EQ(inout[i], expected[i]);
1004 }
1005
TEST_F(ComputeTest,hadd)1006 TEST_F(ComputeTest, hadd)
1007 {
1008 const char *kernel_source =
1009 "__kernel void main_test(__global uint *inout)\n\
1010 {\n\
1011 inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1012 }\n";
1013 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1014 SHADER_ARG_INOUT);
1015 const uint32_t expected[] = {
1016 (1u << 31) >> 1,
1017 ((1u << 31) + 1) >> 1,
1018 ((1u << 31) + 2) >> 1,
1019 ((1u << 31) + 3) >> 1,
1020 ((1ull << 31) + 0xfffffffc) >> 1,
1021 ((1ull << 31) + 0xfffffffd) >> 1,
1022 ((1ull << 31) + 0xfffffffe) >> 1,
1023 ((1ull << 31) + 0xffffffff) >> 1,
1024 };
1025 run_shader(kernel_source, inout.size(), 1, 1, inout);
1026 for (int i = 0; i < inout.size(); ++i)
1027 EXPECT_EQ(inout[i], expected[i]);
1028 }
1029
TEST_F(ComputeTest,rhadd)1030 TEST_F(ComputeTest, rhadd)
1031 {
1032 const char *kernel_source =
1033 "__kernel void main_test(__global uint *inout)\n\
1034 {\n\
1035 inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1036 }\n";
1037 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1038 SHADER_ARG_INOUT);
1039 const uint32_t expected[] = {
1040 ((1u << 31) + 1) >> 1,
1041 ((1u << 31) + 2) >> 1,
1042 ((1u << 31) + 3) >> 1,
1043 ((1u << 31) + 4) >> 1,
1044 ((1ull << 31) + 0xfffffffd) >> 1,
1045 ((1ull << 31) + 0xfffffffe) >> 1,
1046 ((1ull << 31) + 0xffffffff) >> 1,
1047 ((1ull << 31) + (1ull << 32)) >> 1,
1048 };
1049 run_shader(kernel_source, inout.size(), 1, 1, inout);
1050 for (int i = 0; i < inout.size(); ++i)
1051 EXPECT_EQ(inout[i], expected[i]);
1052 }
1053
TEST_F(ComputeTest,add_sat)1054 TEST_F(ComputeTest, add_sat)
1055 {
1056 const char *kernel_source =
1057 "__kernel void main_test(__global uint *inout)\n\
1058 {\n\
1059 inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1060 }\n";
1061 auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1062 SHADER_ARG_INOUT);
1063 const uint32_t expected[] = {
1064 0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1065 };
1066 run_shader(kernel_source, inout.size(), 1, 1, inout);
1067 for (int i = 0; i < inout.size(); ++i)
1068 EXPECT_EQ(inout[i], expected[i]);
1069 }
1070
TEST_F(ComputeTest,sub_sat)1071 TEST_F(ComputeTest, sub_sat)
1072 {
1073 const char *kernel_source =
1074 "__kernel void main_test(__global uint *inout)\n\
1075 {\n\
1076 inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1077 }\n";
1078 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1079 const uint32_t expected[] = {
1080 0, 0, 0, 1
1081 };
1082 run_shader(kernel_source, inout.size(), 1, 1, inout);
1083 for (int i = 0; i < inout.size(); ++i)
1084 EXPECT_EQ(inout[i], expected[i]);
1085 }
1086
TEST_F(ComputeTest,mul_hi)1087 TEST_F(ComputeTest, mul_hi)
1088 {
1089 const char *kernel_source =
1090 "__kernel void main_test(__global uint *inout)\n\
1091 {\n\
1092 inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1093 }\n";
1094 auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1095 const uint32_t expected[] = {
1096 0, 0, 1, 1, (1u << 30)
1097 };
1098 run_shader(kernel_source, inout.size(), 1, 1, inout);
1099 for (int i = 0; i < inout.size(); ++i)
1100 EXPECT_EQ(inout[i], expected[i]);
1101 }
1102
TEST_F(ComputeTest,ldexp_x)1103 TEST_F(ComputeTest, ldexp_x)
1104 {
1105 const char *kernel_source =
1106 "__kernel void main_test(__global float *inout)\n\
1107 {\n\
1108 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1109 }\n";
1110 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1111 const float expected[] = {
1112 ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1113 };
1114 run_shader(kernel_source, inout.size(), 1, 1, inout);
1115 for (int i = 0; i < inout.size(); ++i)
1116 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1117 }
1118
TEST_F(ComputeTest,ldexp_y)1119 TEST_F(ComputeTest, ldexp_y)
1120 {
1121 const char *kernel_source =
1122 "__kernel void main_test(__global float *inout)\n\
1123 {\n\
1124 inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1125 }\n";
1126 auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1127 const float expected[] = {
1128 ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1129 };
1130 run_shader(kernel_source, inout.size(), 1, 1, inout);
1131 for (int i = 0; i < inout.size(); ++i)
1132 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1133 }
1134
TEST_F(ComputeTest,frexp_ret)1135 TEST_F(ComputeTest, frexp_ret)
1136 {
1137 const char *kernel_source =
1138 "__kernel void main_test(__global float *inout)\n\
1139 {\n\
1140 int exp;\n\
1141 inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1142 }\n";
1143 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1144 const float expected[] = {
1145 0.0f, 0.5f, 0.5f, 0.75f
1146 };
1147 run_shader(kernel_source, inout.size(), 1, 1, inout);
1148 for (int i = 0; i < inout.size(); ++i)
1149 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1150 }
1151
TEST_F(ComputeTest,frexp_exp)1152 TEST_F(ComputeTest, frexp_exp)
1153 {
1154 const char *kernel_source =
1155 "__kernel void main_test(__global float *inout)\n\
1156 {\n\
1157 int exp;\n\
1158 frexp(inout[get_global_id(0)], &exp);\n\
1159 inout[get_global_id(0)] = (float)exp;\n\
1160 }\n";
1161 auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1162 const float expected[] = {
1163 0.0f, 0.0f, 1.0f, 2.0f
1164 };
1165 run_shader(kernel_source, inout.size(), 1, 1, inout);
1166 for (int i = 0; i < inout.size(); ++i)
1167 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1168 }
1169
TEST_F(ComputeTest,clz)1170 TEST_F(ComputeTest, clz)
1171 {
1172 const char *kernel_source =
1173 "__kernel void main_test(__global uint *inout)\n\
1174 {\n\
1175 inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1176 }\n";
1177 auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff, (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1178 const uint32_t expected[] = {
1179 32, 31, 16, 1, 0
1180 };
1181 run_shader(kernel_source, inout.size(), 1, 1, inout);
1182 for (int i = 0; i < inout.size(); ++i)
1183 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1184 }
1185
TEST_F(ComputeTest,sin)1186 TEST_F(ComputeTest, sin)
1187 {
1188 struct sin_vals { float in; float clc; float native; };
1189 const char *kernel_source =
1190 "struct sin_vals { float in; float clc; float native; };\n\
1191 __kernel void main_test(__global struct sin_vals *inout)\n\
1192 {\n\
1193 inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1194 inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1195 }\n";
1196 const vector<sin_vals> input = {
1197 { 0.0f, 0.0f, 0.0f },
1198 { 1.0f, 0.0f, 0.0f },
1199 { 2.0f, 0.0f, 0.0f },
1200 { 3.0f, 0.0f, 0.0f },
1201 };
1202 auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1203 const struct sin_vals expected[] = {
1204 { 0.0f, 0.0f, 0.0f },
1205 { 1.0f, sin(1.0f), sin(1.0f) },
1206 { 2.0f, sin(2.0f), sin(2.0f) },
1207 { 3.0f, sin(3.0f), sin(3.0f) },
1208 };
1209 run_shader(kernel_source, inout.size(), 1, 1, inout);
1210 for (int i = 0; i < inout.size(); ++i) {
1211 EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1212 EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1213 EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1214 }
1215 }
1216
TEST_F(ComputeTest,cosh)1217 TEST_F(ComputeTest, cosh)
1218 {
1219 const char *kernel_source =
1220 "__kernel void main_test(__global float *inout)\n\
1221 {\n\
1222 inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1223 }\n";
1224 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1225 const float expected[] = {
1226 cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1227 };
1228 run_shader(kernel_source, inout.size(), 1, 1, inout);
1229 for (int i = 0; i < inout.size(); ++i)
1230 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1231 }
1232
TEST_F(ComputeTest,exp)1233 TEST_F(ComputeTest, exp)
1234 {
1235 const char *kernel_source =
1236 "__kernel void main_test(__global float *inout)\n\
1237 {\n\
1238 inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1239 }\n";
1240 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1241 const float expected[] = {
1242 exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1243 };
1244 run_shader(kernel_source, inout.size(), 1, 1, inout);
1245 for (int i = 0; i < inout.size(); ++i)
1246 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1247 }
1248
TEST_F(ComputeTest,exp10)1249 TEST_F(ComputeTest, exp10)
1250 {
1251 const char *kernel_source =
1252 "__kernel void main_test(__global float *inout)\n\
1253 {\n\
1254 inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1255 }\n";
1256 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1257 const float expected[] = {
1258 pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1259 };
1260 run_shader(kernel_source, inout.size(), 1, 1, inout);
1261 for (int i = 0; i < inout.size(); ++i)
1262 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1263 }
1264
TEST_F(ComputeTest,exp2)1265 TEST_F(ComputeTest, exp2)
1266 {
1267 const char *kernel_source =
1268 "__kernel void main_test(__global float *inout)\n\
1269 {\n\
1270 inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1271 }\n";
1272 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1273 const float expected[] = {
1274 pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1275 };
1276 run_shader(kernel_source, inout.size(), 1, 1, inout);
1277 for (int i = 0; i < inout.size(); ++i)
1278 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1279 }
1280
TEST_F(ComputeTest,log)1281 TEST_F(ComputeTest, log)
1282 {
1283 const char *kernel_source =
1284 "__kernel void main_test(__global float *inout)\n\
1285 {\n\
1286 inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1287 }\n";
1288 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1289 const float expected[] = {
1290 log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1291 };
1292 run_shader(kernel_source, inout.size(), 1, 1, inout);
1293 for (int i = 0; i < inout.size(); ++i)
1294 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1295 }
1296
TEST_F(ComputeTest,log10)1297 TEST_F(ComputeTest, log10)
1298 {
1299 const char *kernel_source =
1300 "__kernel void main_test(__global float *inout)\n\
1301 {\n\
1302 inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1303 }\n";
1304 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1305 const float expected[] = {
1306 log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1307 };
1308 run_shader(kernel_source, inout.size(), 1, 1, inout);
1309 for (int i = 0; i < inout.size(); ++i)
1310 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1311 }
1312
TEST_F(ComputeTest,log2)1313 TEST_F(ComputeTest, log2)
1314 {
1315 const char *kernel_source =
1316 "__kernel void main_test(__global float *inout)\n\
1317 {\n\
1318 inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1319 }\n";
1320 auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1321 const float expected[] = {
1322 log(0.0f) / log(2.0f), log(1.0f) / log(2.0f), log(2.0f) / log(2.0f), log(3.0f) / log(2.0f)
1323 };
1324 run_shader(kernel_source, inout.size(), 1, 1, inout);
1325 for (int i = 0; i < inout.size(); ++i)
1326 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1327 }
1328
TEST_F(ComputeTest,rint)1329 TEST_F(ComputeTest, rint)
1330 {
1331 const char *kernel_source =
1332 "__kernel void main_test(__global float *inout)\n\
1333 {\n\
1334 inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1335 }\n";
1336
1337 auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1338 const float expected[] = {
1339 0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1340 };
1341 run_shader(kernel_source, inout.size(), 1, 1, inout);
1342 for (int i = 0; i < inout.size(); ++i)
1343 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1344 }
1345
TEST_F(ComputeTest,round)1346 TEST_F(ComputeTest, round)
1347 {
1348 const char *kernel_source =
1349 "__kernel void main_test(__global float *inout)\n\
1350 {\n\
1351 inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1352 }\n";
1353 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1354 SHADER_ARG_INOUT);
1355 const float expected[] = {
1356 0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1357 };
1358 run_shader(kernel_source, inout.size(), 1, 1, inout);
1359 for (int i = 0; i < inout.size(); ++i)
1360 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1361 }
1362
TEST_F(ComputeTest,arg_by_val)1363 TEST_F(ComputeTest, arg_by_val)
1364 {
1365 const char *kernel_source =
1366 "__kernel void main_test(__global float *inout, float mul)\n\
1367 {\n\
1368 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1369 }\n";
1370 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1371 SHADER_ARG_INOUT);
1372 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1373 const float expected[] = {
1374 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1375 };
1376 run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1377 for (int i = 0; i < inout.size(); ++i)
1378 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1379 }
1380
TEST_F(ComputeTest,uint8_by_val)1381 TEST_F(ComputeTest, uint8_by_val)
1382 {
1383 struct uint8 {
1384 uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1385 uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1386 };
1387 const char *kernel_source =
1388 "__kernel void main_test(__global uint *out, uint8 val)\n\
1389 {\n\
1390 out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1391 val.s4 + val.s5 + val.s6 + val.s7;\n\
1392 }\n";
1393 auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1394 auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1395 const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1396 run_shader(kernel_source, out.size(), 1, 1, out, val);
1397 for (int i = 0; i < out.size(); ++i)
1398 EXPECT_EQ(out[i], expected[i]);
1399 }
1400
TEST_F(ComputeTest,link)1401 TEST_F(ComputeTest, link)
1402 {
1403 const char *foo_src =
1404 "float foo(float in)\n\
1405 {\n\
1406 return in * in;\n\
1407 }\n";
1408 const char *kernel_source =
1409 "float foo(float in);\n\
1410 __kernel void main_test(__global float *inout)\n\
1411 {\n\
1412 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1413 }\n";
1414 std::vector<const char *> srcs = { foo_src, kernel_source };
1415 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1416 const float expected[] = {
1417 4.0f,
1418 };
1419 run_shader(srcs, inout.size(), 1, 1, inout);
1420 for (int i = 0; i < inout.size(); ++i)
1421 EXPECT_EQ(inout[i], expected[i]);
1422 }
1423
TEST_F(ComputeTest,link_library)1424 TEST_F(ComputeTest, link_library)
1425 {
1426 const char *bar_src =
1427 "float bar(float in)\n\
1428 {\n\
1429 return in * 5;\n\
1430 }\n";
1431 const char *foo_src =
1432 "float bar(float in);\n\
1433 float foo(float in)\n\
1434 {\n\
1435 return in * bar(in);\n\
1436 }\n";
1437 const char *kernel_source =
1438 "float foo(float in);\n\
1439 __kernel void main_test(__global float *inout)\n\
1440 {\n\
1441 inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1442 }\n";
1443 std::vector<Shader> libraries = {
1444 compile({ bar_src, kernel_source }, {}, true),
1445 compile({ foo_src }, {}, true)
1446 };
1447 Shader exe = link(libraries);
1448 auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1449 const float expected[] = {
1450 20.0f,
1451 };
1452 run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1453 for (int i = 0; i < inout.size(); ++i)
1454 EXPECT_EQ(inout[i], expected[i]);
1455 }
1456
TEST_F(ComputeTest,localvar)1457 TEST_F(ComputeTest, localvar)
1458 {
1459 const char *kernel_source =
1460 "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1461 void main_test(__global float *inout)\n\
1462 {\n\
1463 __local float2 tmp[2];\n\
1464 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1465 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1466 barrier(CLK_LOCAL_MEM_FENCE);\n\
1467 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1468 }\n";
1469
1470 auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1471 const float expected[] = {
1472 9.0f, 5.0f
1473 };
1474 run_shader(kernel_source, inout.size(), 1, 1, inout);
1475 for (int i = 0; i < inout.size(); ++i)
1476 EXPECT_EQ(inout[i], expected[i]);
1477 }
1478
TEST_F(ComputeTest,localvar_uchar2)1479 TEST_F(ComputeTest, localvar_uchar2)
1480 {
1481 const char *kernel_source =
1482 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1483 __kernel void main_test(__global uchar *inout)\n\
1484 {\n\
1485 __local uchar2 tmp[2];\n\
1486 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1487 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1488 barrier(CLK_LOCAL_MEM_FENCE);\n\
1489 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1490 }\n";
1491
1492 auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1493 const uint8_t expected[] = { 9, 5 };
1494 run_shader(kernel_source, inout.size(), 1, 1, inout);
1495 for (int i = 0; i < inout.size(); ++i)
1496 EXPECT_EQ(inout[i], expected[i]);
1497 }
1498
TEST_F(ComputeTest,work_group_size_hint)1499 TEST_F(ComputeTest, work_group_size_hint)
1500 {
1501 const char *kernel_source =
1502 "__attribute__((work_group_size_hint(2, 1, 1)))\n\
1503 __kernel void main_test(__global uint *output)\n\
1504 {\n\
1505 output[get_global_id(0)] = get_local_id(0);\n\
1506 }\n";
1507 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1508 SHADER_ARG_OUTPUT);
1509 const uint32_t expected[] = {
1510 0, 1, 2, 3
1511 };
1512 run_shader(kernel_source, output.size(), 1, 1, output);
1513 for (int i = 0; i < output.size(); ++i)
1514 EXPECT_EQ(output[i], expected[i]);
1515 }
1516
TEST_F(ComputeTest,reqd_work_group_size)1517 TEST_F(ComputeTest, reqd_work_group_size)
1518 {
1519 const char *kernel_source =
1520 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1521 __kernel void main_test(__global uint *output)\n\
1522 {\n\
1523 output[get_global_id(0)] = get_local_id(0);\n\
1524 }\n";
1525 auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1526 SHADER_ARG_OUTPUT);
1527 const uint32_t expected[] = {
1528 0, 1, 0, 1
1529 };
1530 run_shader(kernel_source, output.size(), 1, 1, output);
1531 for (int i = 0; i < output.size(); ++i)
1532 EXPECT_EQ(output[i], expected[i]);
1533 }
1534
TEST_F(ComputeTest,image)1535 TEST_F(ComputeTest, image)
1536 {
1537 const char* kernel_source =
1538 "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1539 {\n\
1540 int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1541 write_imagef(output, coords, read_imagef(input, coords));\n\
1542 }\n";
1543 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1544 validate(shader);
1545 }
1546
TEST_F(ComputeTest,image_two_reads)1547 TEST_F(ComputeTest, image_two_reads)
1548 {
1549 const char* kernel_source =
1550 "__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\
1551 {\n\
1552 if (is_float)\n\
1553 output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\
1554 else \n\
1555 output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\
1556 }\n";
1557 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1558 validate(shader);
1559 }
1560
TEST_F(ComputeTest,image_unused)1561 TEST_F(ComputeTest, image_unused)
1562 {
1563 const char* kernel_source =
1564 "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1565 {\n\
1566 }\n";
1567 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1568 validate(shader);
1569 }
1570
TEST_F(ComputeTest,image_read_write)1571 TEST_F(ComputeTest, image_read_write)
1572 {
1573 const char *kernel_source =
1574 R"(__kernel void main_test(read_write image2d_t image)
1575 {
1576 int2 coords = (int2)(get_global_id(0), get_global_id(1));
1577 write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f));
1578 })";
1579 Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" });
1580 validate(shader);
1581 }
1582
TEST_F(ComputeTest,sampler)1583 TEST_F(ComputeTest, sampler)
1584 {
1585 const char* kernel_source =
1586 "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1587 {\n\
1588 output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1589 }\n";
1590 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1591 validate(shader);
1592 }
1593
TEST_F(ComputeTest,image_dims)1594 TEST_F(ComputeTest, image_dims)
1595 {
1596 const char* kernel_source =
1597 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1598 {\n\
1599 output[get_global_id(0)] = get_image_width(roimage);\n\
1600 output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1601 }\n";
1602 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1603 validate(shader);
1604 }
1605
TEST_F(ComputeTest,image_format)1606 TEST_F(ComputeTest, image_format)
1607 {
1608 const char* kernel_source =
1609 "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1610 {\n\
1611 output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1612 output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1613 }\n";
1614 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1615 validate(shader);
1616 }
1617
TEST_F(ComputeTest,image1d_buffer_t)1618 TEST_F(ComputeTest, image1d_buffer_t)
1619 {
1620 const char* kernel_source =
1621 "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1622 {\n\
1623 write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1624 }\n";
1625 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1626 validate(shader);
1627 }
1628
TEST_F(ComputeTest,local_ptr)1629 TEST_F(ComputeTest, local_ptr)
1630 {
1631 struct uint2 { uint32_t x, y; };
1632 const char *kernel_source =
1633 "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1634 {\n\
1635 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1636 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1637 barrier(CLK_LOCAL_MEM_FENCE);\n\
1638 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1639 }\n";
1640 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1641 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1642 const uint8_t expected[] = { 9, 5 };
1643 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1644 for (int i = 0; i < inout.size(); ++i)
1645 EXPECT_EQ(inout[i], expected[i]);
1646 }
1647
TEST_F(ComputeTest,two_local_ptrs)1648 TEST_F(ComputeTest, two_local_ptrs)
1649 {
1650 struct uint2 { uint32_t x, y; };
1651 const char *kernel_source =
1652 "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1653 {\n\
1654 tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1655 tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1656 tmp2[get_local_id(0)] = get_global_id(0);\n\
1657 barrier(CLK_LOCAL_MEM_FENCE);\n\
1658 inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1659 }\n";
1660 auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1661 auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1662 auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1663 const uint8_t expected[] = { 9, 6 };
1664 run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1665 for (int i = 0; i < inout.size(); ++i)
1666 EXPECT_EQ(inout[i], expected[i]);
1667 }
1668
TEST_F(ComputeTest,int8_to_float)1669 TEST_F(ComputeTest, int8_to_float)
1670 {
1671 const char *kernel_source =
1672 "__kernel void main_test(__global char* in, __global float* out)\n\
1673 {\n\
1674 uint pos = get_global_id(0);\n\
1675 out[pos] = in[pos] / 100.0f;\n\
1676 }";
1677 auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1678 auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1679 const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1680 run_shader(kernel_source, in.size(), 1, 1, in, out);
1681 for (int i = 0; i < in.size(); ++i)
1682 EXPECT_FLOAT_EQ(out[i], expected[i]);
1683 }
1684
TEST_F(ComputeTest,vec_hint_float4)1685 TEST_F(ComputeTest, vec_hint_float4)
1686 {
1687 const char *kernel_source =
1688 "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1689 {\n\
1690 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1691 }";
1692 Shader shader = compile({ kernel_source });
1693 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
1694 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1695 }
1696
TEST_F(ComputeTest,vec_hint_uchar2)1697 TEST_F(ComputeTest, vec_hint_uchar2)
1698 {
1699 const char *kernel_source =
1700 "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1701 {\n\
1702 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1703 }";
1704 Shader shader = compile({ kernel_source });
1705 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
1706 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1707 }
1708
TEST_F(ComputeTest,vec_hint_none)1709 TEST_F(ComputeTest, vec_hint_none)
1710 {
1711 const char *kernel_source =
1712 "__kernel void main_test(__global float *inout)\n\
1713 {\n\
1714 inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1715 }";
1716 Shader shader = compile({ kernel_source });
1717 EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
1718 }
1719
TEST_F(ComputeTest,DISABLED_debug_layer_failure)1720 TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1721 {
1722 /* This is a negative test case, it intentionally triggers a failure to validate the mechanism
1723 * is in place, so other tests will fail if they produce debug messages
1724 */
1725 const char *kernel_source =
1726 "__kernel void main_test(__global float *inout, float mul)\n\
1727 {\n\
1728 inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1729 }\n";
1730 auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1731 SHADER_ARG_INOUT);
1732 auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1733 const float expected[] = {
1734 0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1735 };
1736 ComPtr<ID3D12InfoQueue> info_queue;
1737 dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1738 if (!info_queue) {
1739 GTEST_SKIP() << "No info queue";
1740 return;
1741 }
1742
1743 info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1744 run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1745 for (int i = 0; i < inout.size(); ++i)
1746 EXPECT_FLOAT_EQ(inout[i], expected[i]);
1747 }
1748
TEST_F(ComputeTest,compiler_defines)1749 TEST_F(ComputeTest, compiler_defines)
1750 {
1751 const char *kernel_source =
1752 "__kernel void main_test(__global int* out)\n\
1753 {\n\
1754 out[0] = OUT_VAL0;\n\
1755 out[1] = __OPENCL_C_VERSION__;\n\
1756 }";
1757 auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1758 CompileArgs compile_args = { 1, 1, 1 };
1759 compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1760 std::vector<RawShaderArg *> raw_args = { &out };
1761 run_shader({ kernel_source }, compile_args, out);
1762 EXPECT_EQ(out[0], 5);
1763 EXPECT_EQ(out[1], 100);
1764 }
1765
TEST_F(ComputeTest,global_atomic_add)1766 TEST_F(ComputeTest, global_atomic_add)
1767 {
1768 const char *kernel_source =
1769 "__kernel void main_test(__global int *inout, __global int *old)\n\
1770 {\n\
1771 old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1772 }\n";
1773 auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1774 auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1775 const int32_t expected_inout[] = { 5, 7 };
1776 const int32_t expected_old[] = { 2, 4 };
1777 run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1778 for (int i = 0; i < inout.size(); ++i) {
1779 EXPECT_EQ(inout[i], expected_inout[i]);
1780 EXPECT_EQ(old[i], expected_old[i]);
1781 }
1782 }
1783
TEST_F(ComputeTest,global_atomic_imin)1784 TEST_F(ComputeTest, global_atomic_imin)
1785 {
1786 const char *kernel_source =
1787 "__kernel void main_test(__global int *inout, __global int *old)\n\
1788 {\n\
1789 old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1790 }\n";
1791 auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1792 auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1793 const int32_t expected_inout[] = { 0, 1, -1 };
1794 const int32_t expected_old[] = { 0, 2, -1 };
1795 run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1796 for (int i = 0; i < inout.size(); ++i) {
1797 EXPECT_EQ(inout[i], expected_inout[i]);
1798 EXPECT_EQ(old[i], expected_old[i]);
1799 }
1800 }
1801
TEST_F(ComputeTest,global_atomic_and_or)1802 TEST_F(ComputeTest, global_atomic_and_or)
1803 {
1804 const char *kernel_source =
1805 "__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1806 __kernel void main_test(__global int *inout)\n\
1807 {\n\
1808 atomic_and(inout, ~(1 << get_global_id(0)));\n\
1809 atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1810 }\n";
1811 auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1812 const int32_t expected[] = { 0x78 };
1813 run_shader(kernel_source, 3, 1, 1, inout);
1814 for (int i = 0; i < inout.size(); ++i)
1815 EXPECT_EQ(inout[i], expected[i]);
1816 }
1817
TEST_F(ComputeTest,global_atomic_cmpxchg)1818 TEST_F(ComputeTest, global_atomic_cmpxchg)
1819 {
1820 const char *kernel_source =
1821 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1822 __kernel void main_test(__global int *inout)\n\
1823 {\n\
1824 while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1825 ;\n\
1826 }\n";
1827 auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1828 const int32_t expected_inout[] = { 2 };
1829 run_shader(kernel_source, 2, 1, 1, inout);
1830 for (int i = 0; i < inout.size(); ++i)
1831 EXPECT_EQ(inout[i], expected_inout[i]);
1832 }
1833
TEST_F(ComputeTest,local_atomic_and_or)1834 TEST_F(ComputeTest, local_atomic_and_or)
1835 {
1836 const char *kernel_source =
1837 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1838 __kernel void main_test(__global ushort *inout)\n\
1839 {\n\
1840 __local ushort tmp;\n\
1841 atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1842 atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1843 barrier(CLK_LOCAL_MEM_FENCE);\n\
1844 inout[get_global_id(0)] = tmp;\n\
1845 }\n";
1846 auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1847 const uint16_t expected[] = { 0x402, 0x402 };
1848 run_shader(kernel_source, inout.size(), 1, 1, inout);
1849 for (int i = 0; i < inout.size(); ++i)
1850 EXPECT_EQ(inout[i], expected[i]);
1851 }
1852
TEST_F(ComputeTest,local_atomic_cmpxchg)1853 TEST_F(ComputeTest, local_atomic_cmpxchg)
1854 {
1855 const char *kernel_source =
1856 "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1857 __kernel void main_test(__global int *out)\n\
1858 {\n\
1859 __local uint tmp;\n\
1860 tmp = 0;\n\
1861 barrier(CLK_LOCAL_MEM_FENCE);\n\
1862 while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1863 ;\n\
1864 barrier(CLK_LOCAL_MEM_FENCE);\n\
1865 out[0] = tmp;\n\
1866 }\n";
1867
1868 auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1869 const uint16_t expected[] = { 2 };
1870 run_shader(kernel_source, 2, 1, 1, out);
1871 for (int i = 0; i < out.size(); ++i)
1872 EXPECT_EQ(out[i], expected[i]);
1873 }
1874
TEST_F(ComputeTest,constant_sampler)1875 TEST_F(ComputeTest, constant_sampler)
1876 {
1877 const char* kernel_source =
1878 "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1879 __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1880 {\n\
1881 int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1882 float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1883 write_imagef(output, coordsi, \n\
1884 read_imagef(input, sampler, coordsf) + \n\
1885 read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1886 }\n";
1887 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1888 validate(shader);
1889 EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1890 }
1891
TEST_F(ComputeTest,hi)1892 TEST_F(ComputeTest, hi)
1893 {
1894 const char *kernel_source = R"(
1895 __kernel void main_test(__global char3 *srcA, __global char2 *dst)
1896 {
1897 int tid = get_global_id(0);
1898
1899 char2 tmp = srcA[tid].hi;
1900 dst[tid] = tmp;
1901 })";
1902 Shader shader = compile(std::vector<const char*>({ kernel_source }));
1903 validate(shader);
1904 }
1905
TEST_F(ComputeTest,system_values)1906 TEST_F(ComputeTest, system_values)
1907 {
1908 const char *kernel_source =
1909 "__kernel void main_test(__global uint* outputs)\n\
1910 {\n\
1911 outputs[0] = get_work_dim();\n\
1912 outputs[1] = get_global_size(0);\n\
1913 outputs[2] = get_local_size(0);\n\
1914 outputs[3] = get_num_groups(0);\n\
1915 outputs[4] = get_group_id(0);\n\
1916 outputs[5] = get_global_offset(0);\n\
1917 outputs[6] = get_global_id(0);\n\
1918 }\n";
1919 auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1920 const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1921 CompileArgs args = { 1, 1, 1 };
1922 Shader shader = compile({ kernel_source });
1923 run_shader(shader, args, out);
1924 for (int i = 0; i < out.size(); ++i)
1925 EXPECT_EQ(out[i], expected[i]);
1926
1927 args.work_props.work_dim = 2;
1928 args.work_props.global_offset_x = 100;
1929 args.work_props.group_id_offset_x = 2;
1930 args.work_props.group_count_total_x = 5;
1931 const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1932 run_shader(shader, args, out);
1933 for (int i = 0; i < out.size(); ++i)
1934 EXPECT_EQ(out[i], expected_withoffsets[i]);
1935 }
1936
TEST_F(ComputeTest,convert_round_sat)1937 TEST_F(ComputeTest, convert_round_sat)
1938 {
1939 const char *kernel_source =
1940 "__kernel void main_test(__global float *f, __global uchar *u)\n\
1941 {\n\
1942 uint idx = get_global_id(0);\n\
1943 u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1944 }\n";
1945 auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1946 auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1947 const uint8_t expected[] = {
1948 0, 2, 20, 255
1949 };
1950
1951 run_shader(kernel_source, f.size(), 1, 1, f, u);
1952 for (int i = 0; i < u.size(); ++i)
1953 EXPECT_EQ(u[i], expected[i]);
1954 }
1955
TEST_F(ComputeTest,convert_round_sat_vec)1956 TEST_F(ComputeTest, convert_round_sat_vec)
1957 {
1958 const char *kernel_source =
1959 "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
1960 {\n\
1961 uint idx = get_global_id(0);\n\
1962 u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
1963 }\n";
1964 auto f = ShaderArg<float>({
1965 -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1966 -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1967 0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1968 -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1969 }, SHADER_ARG_INPUT);
1970 auto u = ShaderArg<uint8_t>({
1971 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1972 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1973 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1974 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1975 }, SHADER_ARG_OUTPUT);
1976 const uint8_t expected[] = {
1977 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1978 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1979 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1980 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1981 };
1982
1983 run_shader(kernel_source, 4, 1, 1, f, u);
1984 for (int i = 0; i < u.size(); ++i)
1985 EXPECT_EQ(u[i], expected[i]);
1986 }
1987
TEST_F(ComputeTest,convert_char2_uchar2)1988 TEST_F(ComputeTest, convert_char2_uchar2)
1989 {
1990 const char *kernel_source =
1991 "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
1992 {\n\
1993 size_t i = get_global_id(0);\n\
1994 dest[i] = convert_uchar2_sat( src[i] );\n\
1995 }\n";
1996
1997 auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
1998 auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
1999 const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
2000 run_shader(kernel_source, 4, 1, 1, c, u);
2001 for (int i = 0; i < u.size(); i++)
2002 EXPECT_EQ(u[i], expected[i]);
2003 }
2004
TEST_F(ComputeTest,async_copy)2005 TEST_F(ComputeTest, async_copy)
2006 {
2007 const char *kernel_source = R"(
2008 __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
2009 {
2010 int i;
2011 for(i=0; i<copiesPerWorkItem; i++)
2012 localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
2013 barrier( CLK_LOCAL_MEM_FENCE );
2014 event_t event;
2015 event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2016 wait_group_events( 1, &event );
2017 for(i=0; i<copiesPerWorkItem; i++)
2018 dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2019 })";
2020 Shader shader = compile({ kernel_source });
2021 validate(shader);
2022 }
2023
TEST_F(ComputeTest,packed_struct_global)2024 TEST_F(ComputeTest, packed_struct_global)
2025 {
2026 #pragma pack(push, 1)
2027 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2028 #pragma pack(pop)
2029
2030 const char *kernel_source =
2031 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2032 __kernel void main_test(__global struct s *inout, global uint *size)\n\
2033 {\n\
2034 uint idx = get_global_id(0);\n\
2035 inout[idx].uc = idx + 1;\n\
2036 inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2037 inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2038 *size = sizeof(struct s);\n\
2039 }\n";
2040 auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2041 auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2042 const struct s expected[] = {
2043 { 1, 0xfbfcfdff12345678, 0xa112 }
2044 };
2045
2046 run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2047 for (int i = 0; i < inout.size(); ++i) {
2048 EXPECT_EQ(inout[i].uc, expected[i].uc);
2049 EXPECT_EQ(inout[i].ul, expected[i].ul);
2050 EXPECT_EQ(inout[i].us, expected[i].us);
2051 }
2052 EXPECT_EQ(size, sizeof(struct s));
2053 }
2054
TEST_F(ComputeTest,packed_struct_arg)2055 TEST_F(ComputeTest, packed_struct_arg)
2056 {
2057 #pragma pack(push, 1)
2058 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2059 #pragma pack(pop)
2060
2061 const char *kernel_source =
2062 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2063 __kernel void main_test(__global struct s *out, struct s in)\n\
2064 {\n\
2065 uint idx = get_global_id(0);\n\
2066 out[idx].uc = in.uc + 0x12;\n\
2067 out[idx].ul = in.ul + 0x123456789abcdef;\n\
2068 out[idx].us = in.us + 0x1234;\n\
2069 }\n";
2070 auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2071 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2072 const struct s expected[] = {
2073 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2074 };
2075
2076 run_shader(kernel_source, out.size(), 1, 1, out, in);
2077 for (int i = 0; i < out.size(); ++i) {
2078 EXPECT_EQ(out[i].uc, expected[i].uc);
2079 EXPECT_EQ(out[i].ul, expected[i].ul);
2080 EXPECT_EQ(out[i].us, expected[i].us);
2081 }
2082 }
2083
TEST_F(ComputeTest,packed_struct_local)2084 TEST_F(ComputeTest, packed_struct_local)
2085 {
2086 #pragma pack(push, 1)
2087 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2088 #pragma pack(pop)
2089
2090 const char *kernel_source =
2091 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2092 __kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2093 {\n\
2094 uint idx = get_global_id(0);\n\
2095 __local struct s tmp[2];\n\
2096 tmp[get_local_id(0)] = in[idx];\n\
2097 barrier(CLK_LOCAL_MEM_FENCE);\n\
2098 out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2099 }\n";
2100 auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2101 auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2102 const struct s expected[] = {
2103 { 0x12, 0x123456789abcdef, 0x1234 },
2104 { 1, 2, 3 },
2105 };
2106
2107 run_shader(kernel_source, out.size(), 1, 1, out, in);
2108 for (int i = 0; i < out.size(); ++i) {
2109 EXPECT_EQ(out[i].uc, expected[i].uc);
2110 EXPECT_EQ(out[i].ul, expected[i].ul);
2111 EXPECT_EQ(out[i].us, expected[i].us);
2112 }
2113 }
2114
TEST_F(ComputeTest,DISABLED_packed_struct_const)2115 TEST_F(ComputeTest, DISABLED_packed_struct_const)
2116 {
2117 #pragma pack(push, 1)
2118 struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2119 #pragma pack(pop)
2120
2121 const char *kernel_source =
2122 "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2123 __kernel void main_test(__global struct s *out, struct s in)\n\
2124 {\n\
2125 __constant struct s base[] = {\n\
2126 {0x12, 0x123456789abcdef, 0x1234},\n\
2127 {0x11, 0x123456789abcdee, 0x1233},\n\
2128 };\n\
2129 uint idx = get_global_id(0);\n\
2130 out[idx].uc = base[idx % 2].uc + in.uc;\n\
2131 out[idx].ul = base[idx % 2].ul + in.ul;\n\
2132 out[idx].us = base[idx % 2].us + in.us;\n\
2133 }\n";
2134 auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2135 auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2136 const struct s expected[] = {
2137 { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2138 { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2139 };
2140
2141 run_shader(kernel_source, out.size(), 1, 1, out, in);
2142 for (int i = 0; i < out.size(); ++i) {
2143 EXPECT_EQ(out[i].uc, expected[i].uc);
2144 EXPECT_EQ(out[i].ul, expected[i].ul);
2145 EXPECT_EQ(out[i].us, expected[i].us);
2146 }
2147 }
2148
TEST_F(ComputeTest,printf)2149 TEST_F(ComputeTest, printf)
2150 {
2151 const char *kernel_source = R"(
2152 __kernel void main_test(__global float *src, __global uint *dest)
2153 {
2154 *dest = printf("%s: %f", "Test", src[0]);
2155 })";
2156
2157 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2158 auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2159 run_shader(kernel_source, 1, 1, 1, src, dest);
2160 EXPECT_EQ(dest[0], 0);
2161 }
2162
TEST_F(ComputeTest,vload_half)2163 TEST_F(ComputeTest, vload_half)
2164 {
2165 const char *kernel_source = R"(
2166 __kernel void main_test(__global half *src, __global float4 *dest)
2167 {
2168 int offset = get_global_id(0);
2169 dest[offset] = vload_half4(offset, src);
2170 })";
2171 auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2172 0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2173 auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2174 FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2175 run_shader(kernel_source, 2, 1, 1, src, dest);
2176 for (unsigned i = 0; i < 8; ++i)
2177 EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2178 }
2179
TEST_F(ComputeTest,vstore_half)2180 TEST_F(ComputeTest, vstore_half)
2181 {
2182 const char *kernel_source = R"(
2183 __kernel void main_test(__global half *dst, __global float4 *src)
2184 {
2185 int offset = get_global_id(0);
2186 vstore_half4(src[offset], offset, dst);
2187 })";
2188 auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2189 0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2190 auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2191 5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2192 run_shader(kernel_source, 2, 1, 1, dest, src);
2193 const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2194 0x4500, 0x4600, 0x4700, 0x4800 };
2195 for (unsigned i = 0; i < 8; ++i)
2196 EXPECT_EQ(dest[i], expected[i]);
2197 }
2198
TEST_F(ComputeTest,inline_function)2199 TEST_F(ComputeTest, inline_function)
2200 {
2201 const char *kernel_source = R"(
2202 inline float helper(float foo)
2203 {
2204 return foo * 2;
2205 }
2206
2207 __kernel void main_test(__global float *dst, __global float *src)
2208 {
2209 *dst = helper(*src);
2210 })";
2211 auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2212 auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2213 run_shader(kernel_source, 1, 1, 1, dest, src);
2214 EXPECT_EQ(dest[0], 2.0f);
2215 }
2216
TEST_F(ComputeTest,unused_arg)2217 TEST_F(ComputeTest, unused_arg)
2218 {
2219 const char *kernel_source = R"(
2220 __kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2221 {
2222 int i = get_global_id(0);
2223 dst[i] = src[i];
2224 })";
2225 auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2226 auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2227 auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2228 run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2229 for (int i = 0; i < 4; ++i)
2230 EXPECT_EQ(dest[i], i + 1);
2231 }
2232
TEST_F(ComputeTest,spec_constant)2233 TEST_F(ComputeTest, spec_constant)
2234 {
2235 const char *spirv_asm = R"(
2236 OpCapability Addresses
2237 OpCapability Kernel
2238 OpCapability Int64
2239 %1 = OpExtInstImport "OpenCL.std"
2240 OpMemoryModel Physical64 OpenCL
2241 OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
2242 %4 = OpString "kernel_arg_type.main_test.uint*,"
2243 OpSource OpenCL_C 102000
2244 OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
2245 OpName %output "output"
2246 OpName %entry "entry"
2247 OpName %output_addr "output.addr"
2248 OpName %id "id"
2249 OpName %call "call"
2250 OpName %conv "conv"
2251 OpName %idxprom "idxprom"
2252 OpName %arrayidx "arrayidx"
2253 OpName %add "add"
2254 OpName %mul "mul"
2255 OpName %idxprom1 "idxprom1"
2256 OpName %arrayidx2 "arrayidx2"
2257 OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
2258 OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
2259 OpDecorate %id Alignment 4
2260 OpDecorate %output_addr Alignment 8
2261 OpDecorate %uint_1 SpecId 1
2262 %ulong = OpTypeInt 64 0
2263 %uint = OpTypeInt 32 0
2264 %uint_1 = OpSpecConstant %uint 1
2265 %v3ulong = OpTypeVector %ulong 3
2266 %_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
2267 %void = OpTypeVoid
2268 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
2269 %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
2270 %_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
2271 %_ptr_Function_uint = OpTypePointer Function %uint
2272 %__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
2273 %2 = OpFunction %void DontInline %24
2274 %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
2275 %entry = OpLabel
2276 %output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
2277 %id = OpVariable %_ptr_Function_uint Function
2278 OpStore %output_addr %output Aligned 8
2279 %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
2280 %call = OpCompositeExtract %ulong %27 0
2281 %conv = OpUConvert %uint %call
2282 OpStore %id %conv Aligned 4
2283 %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2284 %29 = OpLoad %uint %id Aligned 4
2285 %idxprom = OpUConvert %ulong %29
2286 %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
2287 %30 = OpLoad %uint %arrayidx Aligned 4
2288 %31 = OpLoad %uint %id Aligned 4
2289 %add = OpIAdd %uint %31 %uint_1
2290 %mul = OpIMul %uint %30 %add
2291 %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2292 %33 = OpLoad %uint %id Aligned 4
2293 %idxprom1 = OpUConvert %ulong %33
2294 %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
2295 OpStore %arrayidx2 %mul Aligned 4
2296 OpReturn
2297 OpFunctionEnd)";
2298 Shader shader = assemble(spirv_asm);
2299 Shader spec_shader = specialize(shader, 1, 5);
2300
2301 auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
2302 SHADER_ARG_INOUT);
2303 const uint32_t expected[] = {
2304 0x00000005, 0x60000006, 0x000e000e, 0x20081018
2305 };
2306 CompileArgs args = { (unsigned)inout.size(), 1, 1 };
2307 run_shader(spec_shader, args, inout);
2308 for (int i = 0; i < inout.size(); ++i)
2309 EXPECT_EQ(inout[i], expected[i]);
2310 }
2311
TEST_F(ComputeTest,arg_metadata)2312 TEST_F(ComputeTest, arg_metadata)
2313 {
2314 const char *kernel_source = R"(
2315 __kernel void main_test(
2316 __global int *undec_ptr,
2317 __global volatile int *vol_ptr,
2318 __global const int *const_ptr,
2319 __global int *restrict restr_ptr,
2320 __global const int *restrict const_restr_ptr,
2321 __constant int *const_ptr2)
2322 {
2323 })";
2324 Shader shader = compile({ kernel_source });
2325 EXPECT_EQ(shader.metadata->kernels[0].args[0].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2326 EXPECT_EQ(shader.metadata->kernels[0].args[0].type_qualifier, 0);
2327 EXPECT_EQ(shader.metadata->kernels[0].args[1].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2328 EXPECT_EQ(shader.metadata->kernels[0].args[1].type_qualifier, CLC_KERNEL_ARG_TYPE_VOLATILE);
2329 EXPECT_EQ(shader.metadata->kernels[0].args[2].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2330 EXPECT_EQ(shader.metadata->kernels[0].args[2].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2331 EXPECT_EQ(shader.metadata->kernels[0].args[3].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2332 EXPECT_EQ(shader.metadata->kernels[0].args[3].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT);
2333 EXPECT_EQ(shader.metadata->kernels[0].args[4].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2334 EXPECT_EQ(shader.metadata->kernels[0].args[4].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT | CLC_KERNEL_ARG_TYPE_CONST);
2335 EXPECT_EQ(shader.metadata->kernels[0].args[5].address_qualifier, CLC_KERNEL_ARG_ADDRESS_CONSTANT);
2336 EXPECT_EQ(shader.metadata->kernels[0].args[5].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2337 }
2338