• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <cmath>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <stdexcept>
28 #include <vector>
29 
30 #include <unknwn.h>
31 #include <directx/d3d12.h>
32 #include <dxgi1_4.h>
33 #include <gtest/gtest.h>
34 #include <wrl.h>
35 #include <dxguids/dxguids.h>
36 
37 #include "compute_test.h"
38 
39 using std::vector;
40 
TEST_F(ComputeTest,runtime_memcpy)41 TEST_F(ComputeTest, runtime_memcpy)
42 {
43    struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
44    const char *kernel_source =
45    "struct shift { uchar val; uchar shift; ushort ret; };\n\
46    __kernel void main_test(__global struct shift *inout)\n\
47    {\n\
48       uint id = get_global_id(0);\n\
49       uint id2 = id + get_global_id(1);\n\
50       struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
51       lc[id] = inout[id];\n\
52       inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
53    }\n";
54 
55    auto inout = ShaderArg<struct shift>({
56          { 0x10, 1, 0xffff },
57          { 0x20, 2, 0xffff },
58          { 0x30, 3, 0xffff },
59          { 0x40, 4, 0xffff },
60       },
61       SHADER_ARG_INOUT);
62    const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
63    run_shader(kernel_source, inout.size(), 1, 1, inout);
64    for (int i = 0; i < inout.size(); ++i)
65       EXPECT_EQ(inout[i].ret, expected[i]);
66 }
67 
TEST_F(ComputeTest,two_global_arrays)68 TEST_F(ComputeTest, two_global_arrays)
69 {
70    const char *kernel_source =
71    "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
72    {\n\
73        uint idx = get_global_id(0);\n\
74        g1[idx] -= g2[idx];\n\
75    }\n";
76    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
77    auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
78    const uint32_t expected[] = {
79       9, 18, 27, 36
80    };
81 
82    run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
83    for (int i = 0; i < g1.size(); ++i)
84       EXPECT_EQ(g1[i], expected[i]);
85 }
86 
87 /* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
TEST_F(ComputeTest,DISABLED_i64tof32)88 TEST_F(ComputeTest, DISABLED_i64tof32)
89 {
90    const char *kernel_source =
91    "__kernel void main_test(__global long *out, __constant long *in)\n\
92    {\n\
93        __local float tmp[12];\n\
94        uint idx = get_global_id(0);\n\
95        tmp[idx] = in[idx];\n\
96        barrier(CLK_LOCAL_MEM_FENCE);\n\
97        out[idx] = tmp[idx + get_global_id(1)];\n\
98    }\n";
99    auto in = ShaderArg<int64_t>({ 0x100000000LL,
100                                   -0x100000000LL,
101                                   0x7fffffffffffffffLL,
102                                   0x4000004000000000LL,
103                                   0x4000003fffffffffLL,
104                                   0x4000004000000001LL,
105                                   -1,
106                                   -0x4000004000000000LL,
107                                   -0x4000003fffffffffLL,
108                                   -0x4000004000000001LL,
109                                   0,
110                                   INT64_MIN },
111                                 SHADER_ARG_INPUT);
112    auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
113    const int64_t expected[] = {
114       0x100000000LL,
115       -0x100000000LL,
116       0x7fffffffffffffffLL,
117       0x4000000000000000LL,
118       0x4000000000000000LL,
119       0x4000008000000000LL,
120       -1,
121       -0x4000000000000000LL,
122       -0x4000000000000000LL,
123       -0x4000008000000000LL,
124       0,
125       INT64_MIN,
126    };
127 
128    run_shader(kernel_source, out.size(), 1, 1, out, in);
129    for (int i = 0; i < out.size(); ++i) {
130       EXPECT_EQ((int64_t)out[i], expected[i]);
131    }
132 }
TEST_F(ComputeTest,two_constant_arrays)133 TEST_F(ComputeTest, two_constant_arrays)
134 {
135    const char *kernel_source =
136    "__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
137    {\n\
138        uint idx = get_global_id(0);\n\
139        g1[idx] -= c1[idx] + c2[idx];\n\
140    }\n";
141    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
142    auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
143    auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
144    const uint32_t expected[] = {
145       4, 13, 22, 31
146    };
147 
148    run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
149    for (int i = 0; i < g1.size(); ++i)
150       EXPECT_EQ(g1[i], expected[i]);
151 }
152 
TEST_F(ComputeTest,null_constant_ptr)153 TEST_F(ComputeTest, null_constant_ptr)
154 {
155    const char *kernel_source =
156    "__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
157    {\n\
158        __constant uint fallback[] = {2, 3, 4, 5};\n\
159        __constant uint *c = c1 ? c1 : fallback;\n\
160        uint idx = get_global_id(0);\n\
161        g1[idx] -= c[idx];\n\
162    }\n";
163    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
164    auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
165    const uint32_t expected1[] = {
166       9, 18, 27, 36
167    };
168 
169    run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
170    for (int i = 0; i < g1.size(); ++i)
171       EXPECT_EQ(g1[i], expected1[i]);
172 
173    const uint32_t expected2[] = {
174       8, 17, 26, 35
175    };
176 
177    g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
178    auto c2 = NullShaderArg();
179    run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
180    for (int i = 0; i < g1.size(); ++i)
181       EXPECT_EQ(g1[i], expected2[i]);
182 }
183 
TEST_F(ComputeTest,null_global_ptr)184 TEST_F(ComputeTest, null_global_ptr)
185 {
186    const char *kernel_source =
187    "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
188    {\n\
189        __constant uint fallback[] = {2, 3, 4, 5};\n\
190        uint idx = get_global_id(0);\n\
191        g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
192    }\n";
193    auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
194    auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
195    const uint32_t expected1[] = {
196       9, 18, 27, 36
197    };
198 
199    run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
200    for (int i = 0; i < g1.size(); ++i)
201       EXPECT_EQ(g1[i], expected1[i]);
202 
203    const uint32_t expected2[] = {
204       8, 17, 26, 35
205    };
206 
207    g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
208    auto g2null = NullShaderArg();
209    run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
210    for (int i = 0; i < g1.size(); ++i)
211       EXPECT_EQ(g1[i], expected2[i]);
212 }
213 
TEST_F(ComputeTest,ret_constant_ptr)214 TEST_F(ComputeTest, ret_constant_ptr)
215 {
216    struct s { uint64_t ptr; uint32_t val; };
217    const char *kernel_source =
218    "struct s { __constant uint *ptr; uint val; };\n\
219    __kernel void main_test(__global struct s *out, __constant uint *in)\n\
220    {\n\
221        __constant uint foo[] = { 1, 2 };\n\
222        uint idx = get_global_id(0);\n\
223        if (idx == 0)\n\
224           out[idx].ptr = foo;\n\
225        else\n\
226           out[idx].ptr = in;\n\
227        out[idx].val = out[idx].ptr[idx];\n\
228    }\n";
229    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
230    auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
231    const uint32_t expected_val[] = {
232       1, 4
233    };
234    const uint64_t expected_ptr[] = {
235       2ull << 32, 1ull << 32
236    };
237 
238    run_shader(kernel_source, out.size(), 1, 1, out, in);
239    for (int i = 0; i < out.size(); ++i) {
240       EXPECT_EQ(out[i].val, expected_val[i]);
241       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
242    }
243 }
244 
TEST_F(ComputeTest,ret_global_ptr)245 TEST_F(ComputeTest, ret_global_ptr)
246 {
247    struct s { uint64_t ptr; uint32_t val; };
248    const char *kernel_source =
249    "struct s { __global uint *ptr; uint val; };\n\
250    __kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
251    {\n\
252        uint idx = get_global_id(0);\n\
253        out[idx].ptr = idx ? in2 : in1;\n\
254        out[idx].val = out[idx].ptr[idx];\n\
255    }\n";
256    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
257    auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
258    auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
259    const uint32_t expected_val[] = {
260       1, 4
261    };
262    const uint64_t expected_ptr[] = {
263       1ull << 32, 2ull << 32
264    };
265 
266    run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
267    for (int i = 0; i < out.size(); ++i) {
268       EXPECT_EQ(out[i].val, expected_val[i]);
269       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
270    }
271 }
272 
TEST_F(ComputeTest,ret_local_ptr)273 TEST_F(ComputeTest, ret_local_ptr)
274 {
275    struct s { uint64_t ptr; };
276    const char *kernel_source =
277    "struct s { __local uint *ptr; };\n\
278    __kernel void main_test(__global struct s *out)\n\
279    {\n\
280        __local uint tmp[2];\n\
281        uint idx = get_global_id(0);\n\
282        tmp[idx] = idx;\n\
283        out[idx].ptr = &tmp[idx];\n\
284    }\n";
285    auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
286    const uint64_t expected_ptr[] = {
287       0, 4,
288    };
289 
290    run_shader(kernel_source, out.size(), 1, 1, out);
291    for (int i = 0; i < out.size(); ++i) {
292       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
293    }
294 }
295 
TEST_F(ComputeTest,ret_private_ptr)296 TEST_F(ComputeTest, ret_private_ptr)
297 {
298    struct s { uint64_t ptr; uint32_t value; };
299    const char *kernel_source =
300    "struct s { __private uint *ptr; uint value; };\n\
301    __kernel void main_test(__global struct s *out)\n\
302    {\n\
303        uint tmp[2] = {1, 2};\n\
304        uint idx = get_global_id(0);\n\
305        out[idx].ptr = &tmp[idx];\n\
306        out[idx].value = *out[idx].ptr;\n\
307    }\n";
308    auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
309    const uint64_t expected_ptr[] = {
310       0, 4,
311    };
312    const uint32_t expected_value[] = {
313       1, 2
314    };
315 
316    run_shader(kernel_source, out.size(), 1, 1, out);
317    for (int i = 0; i < out.size(); ++i) {
318       EXPECT_EQ(out[i].ptr, expected_ptr[i]);
319    }
320 }
321 
TEST_F(ComputeTest,globals_8bit)322 TEST_F(ComputeTest, globals_8bit)
323 {
324    const char *kernel_source =
325    "__kernel void main_test(__global unsigned char *inout)\n\
326    {\n\
327        uint idx = get_global_id(0);\n\
328        inout[idx] = inout[idx] + 1;\n\
329    }\n";
330    auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
331    const uint8_t expected[] = {
332       101, 111, 121, 131
333    };
334    run_shader(kernel_source, inout.size(), 1, 1, inout);
335    for (int i = 0; i < inout.size(); ++i)
336       EXPECT_EQ(inout[i], expected[i]);
337 }
338 
TEST_F(ComputeTest,globals_16bit)339 TEST_F(ComputeTest, globals_16bit)
340 {
341    const char *kernel_source =
342    "__kernel void main_test(__global unsigned short *inout)\n\
343    {\n\
344        uint idx = get_global_id(0);\n\
345        inout[idx] = inout[idx] + 1;\n\
346    }\n";
347    auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
348    const uint16_t expected[] = {
349       10001, 10011, 10021, 10031
350    };
351    run_shader(kernel_source, inout.size(), 1, 1, inout);
352    for (int i = 0; i < inout.size(); ++i)
353       EXPECT_EQ(inout[i], expected[i]);
354 }
355 
TEST_F(ComputeTest,globals_64bit)356 TEST_F(ComputeTest, globals_64bit)
357 {
358    const char *kernel_source =
359    "__kernel void main_test(__global unsigned long *inout)\n\
360    {\n\
361        uint idx = get_global_id(0);\n\
362        inout[idx] = inout[idx] + 1;\n\
363    }\n";
364    uint64_t base = 1ull << 50;
365    auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
366                                     SHADER_ARG_INOUT);
367    const uint64_t expected[] = {
368       base + 1, base + 11, base + 21, base + 31
369    };
370    run_shader(kernel_source, inout.size(), 1, 1, inout);
371    for (int i = 0; i < inout.size(); ++i)
372       EXPECT_EQ(inout[i], expected[i]);
373 }
374 
TEST_F(ComputeTest,built_ins_global_id)375 TEST_F(ComputeTest, built_ins_global_id)
376 {
377    const char *kernel_source =
378    "__kernel void main_test(__global uint *output)\n\
379    {\n\
380        output[get_global_id(0)] = get_global_id(0);\n\
381    }\n";
382    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
383                                      SHADER_ARG_OUTPUT);
384    const uint32_t expected[] = {
385       0, 1, 2, 3
386    };
387 
388    run_shader(kernel_source, output.size(), 1, 1, output);
389    for (int i = 0; i < output.size(); ++i)
390       EXPECT_EQ(output[i], expected[i]);
391 }
392 
TEST_F(ComputeTest,built_ins_global_id_rmw)393 TEST_F(ComputeTest, built_ins_global_id_rmw)
394 {
395    const char *kernel_source =
396    "__kernel void main_test(__global uint *output)\n\
397    {\n\
398        uint id = get_global_id(0);\n\
399        output[id] = output[id] * (id + 1);\n\
400    }\n";
401    auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
402                                     SHADER_ARG_INOUT);
403    const uint32_t expected[] = {
404       0x00000001, 0x20000002, 0x00060006, 0x1004080c
405    };
406    run_shader(kernel_source, inout.size(), 1, 1, inout);
407    for (int i = 0; i < inout.size(); ++i)
408       EXPECT_EQ(inout[i], expected[i]);
409 }
410 
TEST_F(ComputeTest,types_float_basics)411 TEST_F(ComputeTest, types_float_basics)
412 {
413    const char *kernel_source =
414    "__kernel void main_test(__global uint *output)\n\
415    {\n\
416        output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
417    }\n";
418    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
419                                      SHADER_ARG_OUTPUT);
420    const uint32_t expected[] = {
421       1, 2, 3, 4
422    };
423    run_shader(kernel_source, output.size(), 1, 1, output);
424    for (int i = 0; i < output.size(); ++i)
425       EXPECT_EQ(output[i], expected[i]);
426 }
427 
TEST_F(ComputeTest,DISABLED_types_double_basics)428 TEST_F(ComputeTest, DISABLED_types_double_basics)
429 {
430    /* Disabled because doubles are unsupported */
431    const char *kernel_source =
432    "__kernel void main_test(__global uint *output)\n\
433    {\n\
434        output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
435    }\n";
436    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
437                                      SHADER_ARG_OUTPUT);
438    const uint32_t expected[] = {
439       1, 2, 3, 4
440    };
441    run_shader(kernel_source, output.size(), 1, 1, output);
442    for (int i = 0; i < output.size(); ++i)
443       EXPECT_EQ(output[i], expected[i]);
444 }
445 
TEST_F(ComputeTest,types_short_basics)446 TEST_F(ComputeTest, types_short_basics)
447 {
448    const char *kernel_source =
449    "__kernel void main_test(__global uint *output)\n\
450    {\n\
451        output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
452    }\n";
453    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
454                                      SHADER_ARG_OUTPUT);
455    const uint32_t expected[] = {
456       1, 2, 3, 4
457    };
458    run_shader(kernel_source, output.size(), 1, 1, output);
459    for (int i = 0; i < output.size(); ++i)
460       EXPECT_EQ(output[i], expected[i]);
461 }
462 
TEST_F(ComputeTest,types_char_basics)463 TEST_F(ComputeTest, types_char_basics)
464 {
465    const char *kernel_source =
466    "__kernel void main_test(__global uint *output)\n\
467    {\n\
468        output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
469    }\n";
470    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
471                                      SHADER_ARG_OUTPUT);
472    const uint32_t expected[] = {
473       1, 2, 3, 4
474    };
475    run_shader(kernel_source, output.size(), 1, 1, output);
476    for (int i = 0; i < output.size(); ++i)
477       EXPECT_EQ(output[i], expected[i]);
478 }
479 
TEST_F(ComputeTest,types_if_statement)480 TEST_F(ComputeTest, types_if_statement)
481 {
482    const char *kernel_source =
483    "__kernel void main_test(__global uint *output)\n\
484    {\n\
485        int idx = get_global_id(0);\n\
486        if (idx > 0)\n\
487            output[idx] = ~idx;\n\
488        else\n\
489            output[0] = 0xff;\n\
490    }\n";
491    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
492                                      SHADER_ARG_OUTPUT);
493    const uint32_t expected[] = {
494       0xff, ~1u, ~2u, ~3u
495    };
496    run_shader(kernel_source, output.size(), 1, 1, output);
497    for (int i = 0; i < output.size(); ++i)
498       EXPECT_EQ(output[i], expected[i]);
499 }
500 
TEST_F(ComputeTest,types_do_while_loop)501 TEST_F(ComputeTest, types_do_while_loop)
502 {
503    const char *kernel_source =
504    "__kernel void main_test(__global uint *output)\n\
505    {\n\
506        int value = 1;\n\
507        int i = 1, n = get_global_id(0);\n\
508        do {\n\
509           value *= i++;\n\
510        } while (i <= n);\n\
511        output[n] = value;\n\
512    }\n";
513    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
514                                      SHADER_ARG_OUTPUT);
515    const uint32_t expected[] = {
516       1, 1, 1*2, 1*2*3, 1*2*3*4
517    };
518    run_shader(kernel_source, output.size(), 1, 1, output);
519    for (int i = 0; i < output.size(); ++i)
520       EXPECT_EQ(output[i], expected[i]);
521 }
522 
TEST_F(ComputeTest,types_for_loop)523 TEST_F(ComputeTest, types_for_loop)
524 {
525    const char *kernel_source =
526    "__kernel void main_test(__global uint *output)\n\
527    {\n\
528        int value = 1;\n\
529        int n = get_global_id(0);\n\
530        for (int i = 1; i <= n; ++i)\n\
531           value *= i;\n\
532        output[n] = value;\n\
533    }\n";
534    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
535                                      SHADER_ARG_OUTPUT);
536    const uint32_t expected[] = {
537       1, 1, 1*2, 1*2*3, 1*2*3*4
538    };
539    run_shader(kernel_source, output.size(), 1, 1, output);
540    for (int i = 0; i < output.size(); ++i)
541       EXPECT_EQ(output[i], expected[i]);
542 }
543 
TEST_F(ComputeTest,complex_types_local_array_long)544 TEST_F(ComputeTest, complex_types_local_array_long)
545 {
546    const char *kernel_source =
547    "__kernel void main_test(__global ulong *inout)\n\
548    {\n\
549       ulong tmp[] = {\n\
550          get_global_id(1) + 0x00000000,\n\
551          get_global_id(1) + 0x10000001,\n\
552          get_global_id(1) + 0x20000020,\n\
553          get_global_id(1) + 0x30000300,\n\
554       };\n\
555       uint idx = get_global_id(0);\n\
556       inout[idx] = tmp[idx];\n\
557    }\n";
558    auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
559    const uint64_t expected[] = {
560       0x00000000, 0x10000001, 0x20000020, 0x30000300,
561    };
562    run_shader(kernel_source, inout.size(), 1, 1, inout);
563    for (int i = 0; i < inout.size(); ++i)
564       EXPECT_EQ(inout[i], expected[i]);
565 }
566 
TEST_F(ComputeTest,complex_types_local_array_short)567 TEST_F(ComputeTest, complex_types_local_array_short)
568 {
569    const char *kernel_source =
570    "__kernel void main_test(__global ushort *inout)\n\
571    {\n\
572       ushort tmp[] = {\n\
573          get_global_id(1) + 0x00,\n\
574          get_global_id(1) + 0x10,\n\
575          get_global_id(1) + 0x20,\n\
576          get_global_id(1) + 0x30,\n\
577       };\n\
578       uint idx = get_global_id(0);\n\
579       inout[idx] = tmp[idx];\n\
580    }\n";
581    auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
582    const uint16_t expected[] = {
583       0x00, 0x10, 0x20, 0x30,
584    };
585    run_shader(kernel_source, inout.size(), 1, 1, inout);
586    for (int i = 0; i < inout.size(); ++i)
587       EXPECT_EQ(inout[i], expected[i]);
588 }
589 
TEST_F(ComputeTest,complex_types_local_array_struct_vec_float_misaligned)590 TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
591 {
592    const char *kernel_source =
593    "struct has_vecs { uchar c; ushort s; float2 f; };\n\
594    __kernel void main_test(__global uint *inout)\n\
595    {\n\
596       struct has_vecs tmp[] = {\n\
597          { 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
598          { 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
599          { 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
600          { 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
601       };\n\
602       uint idx = get_global_id(0);\n\
603       uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
604       inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
605    }\n";
606    auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
607    const uint16_t expected[] = { 101, 404, 909, 1616 };
608    run_shader(kernel_source, inout.size(), 1, 1, inout);
609    for (int i = 0; i < inout.size(); ++i)
610       EXPECT_EQ(inout[i], expected[i]);
611 }
612 
TEST_F(ComputeTest,complex_types_local_array)613 TEST_F(ComputeTest, complex_types_local_array)
614 {
615    const char *kernel_source =
616    "__kernel void main_test(__global uint *inout)\n\
617    {\n\
618       uint tmp[] = {\n\
619          get_global_id(1) + 0x00,\n\
620          get_global_id(1) + 0x10,\n\
621          get_global_id(1) + 0x20,\n\
622          get_global_id(1) + 0x30,\n\
623       };\n\
624       uint idx = get_global_id(0);\n\
625       inout[idx] = tmp[idx];\n\
626    }\n";
627    auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
628    const uint32_t expected[] = {
629       0x00, 0x10, 0x20, 0x30,
630    };
631    run_shader(kernel_source, inout.size(), 1, 1, inout);
632    for (int i = 0; i < inout.size(); ++i)
633       EXPECT_EQ(inout[i], expected[i]);
634 }
635 
TEST_F(ComputeTest,complex_types_global_struct_array)636 TEST_F(ComputeTest, complex_types_global_struct_array)
637 {
638    struct two_vals { uint32_t add; uint32_t mul; };
639    const char *kernel_source =
640    "struct two_vals { uint add; uint mul; };\n\
641    __kernel void main_test(__global struct two_vals *in_out)\n\
642    {\n\
643       uint id = get_global_id(0);\n\
644       in_out[id].add = in_out[id].add + id;\n\
645       in_out[id].mul = in_out[id].mul * id;\n\
646    }\n";
647    auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
648                                            SHADER_ARG_INOUT);
649    const struct two_vals expected[] = {
650       { 8 + 0, 8 * 0 },
651       { 16 + 1, 16 * 1 },
652       { 64 + 2, 64 * 2 },
653       { 65536 + 3, 65536 * 3 }
654    };
655    run_shader(kernel_source, inout.size(), 1, 1, inout);
656    for (int i = 0; i < inout.size(); ++i) {
657       EXPECT_EQ(inout[i].add, expected[i].add);
658       EXPECT_EQ(inout[i].mul, expected[i].mul);
659    }
660 }
661 
TEST_F(ComputeTest,complex_types_global_uint2)662 TEST_F(ComputeTest, complex_types_global_uint2)
663 {
664    struct uint2 { uint32_t x; uint32_t y; };
665    const char *kernel_source =
666    "__kernel void main_test(__global uint2 *inout)\n\
667    {\n\
668       uint id = get_global_id(0);\n\
669       inout[id].x = inout[id].x + id;\n\
670       inout[id].y = inout[id].y * id;\n\
671    }\n";
672    auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
673                                         SHADER_ARG_INOUT);
674    const struct uint2 expected[] = {
675       { 8 + 0, 8 * 0 },
676       { 16 + 1, 16 * 1 },
677       { 64 + 2, 64 * 2 },
678       { 65536 + 3, 65536 * 3 }
679    };
680    run_shader(kernel_source, inout.size(), 1, 1, inout);
681    for (int i = 0; i < inout.size(); ++i) {
682       EXPECT_EQ(inout[i].x, expected[i].x);
683       EXPECT_EQ(inout[i].y, expected[i].y);
684    }
685 }
686 
TEST_F(ComputeTest,complex_types_global_ushort2)687 TEST_F(ComputeTest, complex_types_global_ushort2)
688 {
689    struct ushort2 { uint16_t x; uint16_t y; };
690    const char *kernel_source =
691    "__kernel void main_test(__global ushort2 *inout)\n\
692    {\n\
693       uint id = get_global_id(0);\n\
694       inout[id].x = inout[id].x + id;\n\
695       inout[id].y = inout[id].y * id;\n\
696    }\n";
697    auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
698                                             { (uint16_t)65536, (uint16_t)65536 } },
699                                           SHADER_ARG_INOUT);
700    const struct ushort2 expected[] = {
701       { 8 + 0, 8 * 0 },
702       { 16 + 1, 16 * 1 },
703       { 64 + 2, 64 * 2 },
704       { (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
705    };
706    run_shader(kernel_source, inout.size(), 1, 1, inout);
707    for (int i = 0; i < inout.size(); ++i) {
708       EXPECT_EQ(inout[i].x, expected[i].x);
709       EXPECT_EQ(inout[i].y, expected[i].y);
710    }
711 }
712 
TEST_F(ComputeTest,complex_types_global_uchar3)713 TEST_F(ComputeTest, complex_types_global_uchar3)
714 {
715    struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
716    const char *kernel_source =
717    "__kernel void main_test(__global uchar3 *inout)\n\
718    {\n\
719       uint id = get_global_id(0);\n\
720       inout[id].x = inout[id].x + id;\n\
721       inout[id].y = inout[id].y * id;\n\
722       inout[id].z = inout[id].y + inout[id].x;\n\
723    }\n";
724    auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
725                                          SHADER_ARG_INOUT);
726    const struct uchar3 expected[] = {
727       { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
728       { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
729       { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
730       { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
731    };
732    run_shader(kernel_source, inout.size(), 1, 1, inout);
733    for (int i = 0; i < inout.size(); ++i) {
734       EXPECT_EQ(inout[i].x, expected[i].x);
735       EXPECT_EQ(inout[i].y, expected[i].y);
736       EXPECT_EQ(inout[i].z, expected[i].z);
737    }
738 }
739 
TEST_F(ComputeTest,complex_types_constant_uchar3)740 TEST_F(ComputeTest, complex_types_constant_uchar3)
741 {
742    struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
743    const char *kernel_source =
744    "__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
745    {\n\
746       uint id = get_global_id(0);\n\
747       out[id].x = in[id].x + id;\n\
748       out[id].y = in[id].y * id;\n\
749       out[id].z = out[id].y + out[id].x;\n\
750    }\n";
751    auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
752                                       SHADER_ARG_INPUT);
753    auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
754                                       SHADER_ARG_OUTPUT);
755    const struct uchar3 expected[] = {
756       { 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
757       { 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
758       { 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
759       { (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
760    };
761    run_shader(kernel_source, out.size(), 1, 1, out, in);
762    for (int i = 0; i < out.size(); ++i) {
763       EXPECT_EQ(out[i].x, expected[i].x);
764       EXPECT_EQ(out[i].y, expected[i].y);
765       EXPECT_EQ(out[i].z, expected[i].z);
766    }
767 }
768 
TEST_F(ComputeTest,complex_types_global_uint8)769 TEST_F(ComputeTest, complex_types_global_uint8)
770 {
771    struct uint8 {
772       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
773       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
774    };
775    const char *kernel_source =
776    "__kernel void main_test(__global uint8 *inout)\n\
777    {\n\
778       uint id = get_global_id(0);\n\
779       inout[id].s01234567 = inout[id].s01234567 * 2;\n\
780    }\n";
781    auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
782                                         SHADER_ARG_INOUT);
783    const struct uint8 expected[] = {
784       { 2, 4, 6, 8, 10, 12, 14, 16 }
785    };
786    run_shader(kernel_source, inout.size(), 1, 1, inout);
787    for (int i = 0; i < inout.size(); ++i) {
788       EXPECT_EQ(inout[i].s0, expected[i].s0);
789       EXPECT_EQ(inout[i].s1, expected[i].s1);
790       EXPECT_EQ(inout[i].s2, expected[i].s2);
791       EXPECT_EQ(inout[i].s3, expected[i].s3);
792       EXPECT_EQ(inout[i].s4, expected[i].s4);
793       EXPECT_EQ(inout[i].s5, expected[i].s5);
794       EXPECT_EQ(inout[i].s6, expected[i].s6);
795       EXPECT_EQ(inout[i].s7, expected[i].s7);
796    }
797 }
798 
TEST_F(ComputeTest,complex_types_local_ulong16)799 TEST_F(ComputeTest, complex_types_local_ulong16)
800 {
801    struct ulong16 {
802       uint64_t values[16];
803    };
804    const char *kernel_source =
805    R"(__kernel void main_test(__global ulong16 *inout)
806    {
807       __local ulong16 local_array[2];
808       uint id = get_global_id(0);
809       local_array[id] = inout[id];
810       barrier(CLK_LOCAL_MEM_FENCE);
811       inout[id] = local_array[0] * 2;
812    })";
813    auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
814                                         SHADER_ARG_INOUT);
815    const struct ulong16 expected[] = {
816       { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
817    };
818    run_shader(kernel_source, inout.size(), 1, 1, inout);
819    for (int i = 0; i < inout.size(); ++i) {
820       for (int j = 0; j < 16; ++j) {
821          EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
822       }
823    }
824 }
825 
TEST_F(ComputeTest,complex_types_constant_uint8)826 TEST_F(ComputeTest, complex_types_constant_uint8)
827 {
828    struct uint8 {
829       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
830       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
831    };
832    const char *kernel_source =
833    "__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
834    {\n\
835       uint id = get_global_id(0);\n\
836       out[id].s01234567 = in[id].s01234567 * 2;\n\
837    }\n";
838    auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
839                                      SHADER_ARG_INPUT);
840    auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
841                                       SHADER_ARG_INOUT);
842    const struct uint8 expected[] = {
843       { 2, 4, 6, 8, 10, 12, 14, 16 }
844    };
845    run_shader(kernel_source, out.size(), 1, 1, out, in);
846    for (int i = 0; i < out.size(); ++i) {
847       EXPECT_EQ(out[i].s0, expected[i].s0);
848       EXPECT_EQ(out[i].s1, expected[i].s1);
849       EXPECT_EQ(out[i].s2, expected[i].s2);
850       EXPECT_EQ(out[i].s3, expected[i].s3);
851       EXPECT_EQ(out[i].s4, expected[i].s4);
852       EXPECT_EQ(out[i].s5, expected[i].s5);
853       EXPECT_EQ(out[i].s6, expected[i].s6);
854       EXPECT_EQ(out[i].s7, expected[i].s7);
855    }
856 }
857 
TEST_F(ComputeTest,complex_types_const_array)858 TEST_F(ComputeTest, complex_types_const_array)
859 {
860    const char *kernel_source =
861    "__kernel void main_test(__global uint *output)\n\
862    {\n\
863        const uint foo[] = { 100, 101, 102, 103 };\n\
864        output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
865    }\n";
866    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
867                                      SHADER_ARG_OUTPUT);
868    const uint32_t expected[] = {
869       100, 101, 102, 103
870    };
871    run_shader(kernel_source, output.size(), 1, 1, output);
872    for (int i = 0; i < output.size(); ++i)
873       EXPECT_EQ(output[i], expected[i]);
874 }
875 
TEST_F(ComputeTest,mem_access_load_store_ordering)876 TEST_F(ComputeTest, mem_access_load_store_ordering)
877 {
878    const char *kernel_source =
879    "__kernel void main_test(__global uint *output)\n\
880    {\n\
881        uint foo[4];\n\
882        foo[0] = 0x11111111;\n\
883        foo[1] = 0x22222222;\n\
884        foo[2] = 0x44444444;\n\
885        foo[3] = 0x88888888;\n\
886        foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
887        foo[0] += get_global_id(0); // foo[0] = tid\n\
888        foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
889        output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
890    }\n";
891    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
892                                      SHADER_ARG_OUTPUT);
893    const uint16_t expected[] = {
894       0, 1, 2, 3
895    };
896    run_shader(kernel_source, output.size(), 1, 1, output);
897    for (int i = 0; i < output.size(); ++i)
898       EXPECT_EQ(output[i], expected[i]);
899 }
900 
TEST_F(ComputeTest,two_const_arrays)901 TEST_F(ComputeTest, two_const_arrays)
902 {
903    const char *kernel_source =
904    "__kernel void main_test(__global uint *output)\n\
905    {\n\
906       uint id = get_global_id(0);\n\
907       uint foo[4] = {100, 101, 102, 103};\n\
908       uint bar[4] = {1, 2, 3, 4};\n\
909       output[id] = foo[id] * bar[id];\n\
910    }\n";
911    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
912                                      SHADER_ARG_OUTPUT);
913    const uint32_t expected[] = {
914       100, 202, 306, 412
915    };
916    run_shader(kernel_source, output.size(), 1, 1, output);
917    for (int i = 0; i < output.size(); ++i)
918       EXPECT_EQ(output[i], expected[i]);
919 }
920 
TEST_F(ComputeTest,imod_pos)921 TEST_F(ComputeTest, imod_pos)
922 {
923    const char *kernel_source =
924    "__kernel void main_test(__global int *inout)\n\
925    {\n\
926        inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
927    }\n";
928    auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
929                                    SHADER_ARG_INOUT);
930    const int32_t expected[] = {
931       -1, 0, -2, -1,  0, 1, 2, 0, 1
932    };
933    run_shader(kernel_source, inout.size(), 1, 1, inout);
934    for (int i = 0; i < inout.size(); ++i)
935       EXPECT_EQ(inout[i], expected[i]);
936 }
937 
TEST_F(ComputeTest,imod_neg)938 TEST_F(ComputeTest, imod_neg)
939 {
940    const char *kernel_source =
941    "__kernel void main_test(__global int *inout)\n\
942    {\n\
943        inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
944    }\n";
945    auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
946                                    SHADER_ARG_INOUT);
947    const int32_t expected[] = {
948       -1, 0, -2, -1,  0, 1, 2, 0, 1
949    };
950    run_shader(kernel_source, inout.size(), 1, 1, inout);
951    for (int i = 0; i < inout.size(); ++i)
952       EXPECT_EQ(inout[i], expected[i]);
953 }
954 
TEST_F(ComputeTest,umod)955 TEST_F(ComputeTest, umod)
956 {
957    const char *kernel_source =
958    "__kernel void main_test(__global uint *inout)\n\
959    {\n\
960        inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
961    }\n";
962    auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
963                                     SHADER_ARG_INOUT);
964    const uint32_t expected[] = {
965       0xfffffffa, 0xfffffffb, 0, 1, 2
966    };
967    run_shader(kernel_source, inout.size(), 1, 1, inout);
968    for (int i = 0; i < inout.size(); ++i)
969       EXPECT_EQ(inout[i], expected[i]);
970 }
971 
TEST_F(ComputeTest,rotate)972 TEST_F(ComputeTest, rotate)
973 {
974    const char *kernel_source =
975    "__kernel void main_test(__global uint *inout)\n\
976    {\n\
977        inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
978    }\n";
979    auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
980                                     SHADER_ARG_INOUT);
981    const uint32_t expected[] = {
982       0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
983    };
984    run_shader(kernel_source, inout.size(), 1, 1, inout);
985    for (int i = 0; i < inout.size(); ++i)
986       EXPECT_EQ(inout[i], expected[i]);
987 }
988 
TEST_F(ComputeTest,popcount)989 TEST_F(ComputeTest, popcount)
990 {
991    const char *kernel_source =
992    "__kernel void main_test(__global uint *inout)\n\
993    {\n\
994        inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
995    }\n";
996    auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
997                                     SHADER_ARG_INOUT);
998    const uint32_t expected[] = {
999       0, 1, 2, 2, 4, 32
1000    };
1001    run_shader(kernel_source, inout.size(), 1, 1, inout);
1002    for (int i = 0; i < inout.size(); ++i)
1003       EXPECT_EQ(inout[i], expected[i]);
1004 }
1005 
TEST_F(ComputeTest,hadd)1006 TEST_F(ComputeTest, hadd)
1007 {
1008    const char *kernel_source =
1009    "__kernel void main_test(__global uint *inout)\n\
1010    {\n\
1011        inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1012    }\n";
1013    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1014                                     SHADER_ARG_INOUT);
1015    const uint32_t expected[] = {
1016       (1u << 31) >> 1,
1017       ((1u << 31) + 1) >> 1,
1018       ((1u << 31) + 2) >> 1,
1019       ((1u << 31) + 3) >> 1,
1020       ((1ull << 31) + 0xfffffffc) >> 1,
1021       ((1ull << 31) + 0xfffffffd) >> 1,
1022       ((1ull << 31) + 0xfffffffe) >> 1,
1023       ((1ull << 31) + 0xffffffff) >> 1,
1024    };
1025    run_shader(kernel_source, inout.size(), 1, 1, inout);
1026    for (int i = 0; i < inout.size(); ++i)
1027       EXPECT_EQ(inout[i], expected[i]);
1028 }
1029 
TEST_F(ComputeTest,rhadd)1030 TEST_F(ComputeTest, rhadd)
1031 {
1032    const char *kernel_source =
1033    "__kernel void main_test(__global uint *inout)\n\
1034    {\n\
1035        inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1036    }\n";
1037    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1038                                     SHADER_ARG_INOUT);
1039    const uint32_t expected[] = {
1040       ((1u << 31) + 1) >> 1,
1041       ((1u << 31) + 2) >> 1,
1042       ((1u << 31) + 3) >> 1,
1043       ((1u << 31) + 4) >> 1,
1044       ((1ull << 31) + 0xfffffffd) >> 1,
1045       ((1ull << 31) + 0xfffffffe) >> 1,
1046       ((1ull << 31) + 0xffffffff) >> 1,
1047       ((1ull << 31) + (1ull << 32)) >> 1,
1048    };
1049    run_shader(kernel_source, inout.size(), 1, 1, inout);
1050    for (int i = 0; i < inout.size(); ++i)
1051       EXPECT_EQ(inout[i], expected[i]);
1052 }
1053 
TEST_F(ComputeTest,add_sat)1054 TEST_F(ComputeTest, add_sat)
1055 {
1056    const char *kernel_source =
1057    "__kernel void main_test(__global uint *inout)\n\
1058    {\n\
1059        inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1060    }\n";
1061    auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1062                                     SHADER_ARG_INOUT);
1063    const uint32_t expected[] = {
1064       0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1065    };
1066    run_shader(kernel_source, inout.size(), 1, 1, inout);
1067    for (int i = 0; i < inout.size(); ++i)
1068       EXPECT_EQ(inout[i], expected[i]);
1069 }
1070 
TEST_F(ComputeTest,sub_sat)1071 TEST_F(ComputeTest, sub_sat)
1072 {
1073    const char *kernel_source =
1074    "__kernel void main_test(__global uint *inout)\n\
1075    {\n\
1076        inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1077    }\n";
1078    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1079    const uint32_t expected[] = {
1080       0, 0, 0, 1
1081    };
1082    run_shader(kernel_source, inout.size(), 1, 1, inout);
1083    for (int i = 0; i < inout.size(); ++i)
1084       EXPECT_EQ(inout[i], expected[i]);
1085 }
1086 
TEST_F(ComputeTest,mul_hi)1087 TEST_F(ComputeTest, mul_hi)
1088 {
1089    const char *kernel_source =
1090    "__kernel void main_test(__global uint *inout)\n\
1091    {\n\
1092        inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1093    }\n";
1094    auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1095    const uint32_t expected[] = {
1096       0, 0, 1, 1, (1u << 30)
1097    };
1098    run_shader(kernel_source, inout.size(), 1, 1, inout);
1099    for (int i = 0; i < inout.size(); ++i)
1100       EXPECT_EQ(inout[i], expected[i]);
1101 }
1102 
TEST_F(ComputeTest,ldexp_x)1103 TEST_F(ComputeTest, ldexp_x)
1104 {
1105    const char *kernel_source =
1106    "__kernel void main_test(__global float *inout)\n\
1107    {\n\
1108        inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1109    }\n";
1110    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1111    const float expected[] = {
1112       ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1113    };
1114    run_shader(kernel_source, inout.size(), 1, 1, inout);
1115    for (int i = 0; i < inout.size(); ++i)
1116       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1117 }
1118 
TEST_F(ComputeTest,ldexp_y)1119 TEST_F(ComputeTest, ldexp_y)
1120 {
1121    const char *kernel_source =
1122    "__kernel void main_test(__global float *inout)\n\
1123    {\n\
1124        inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1125    }\n";
1126    auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1127    const float expected[] = {
1128       ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1129    };
1130    run_shader(kernel_source, inout.size(), 1, 1, inout);
1131    for (int i = 0; i < inout.size(); ++i)
1132       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1133 }
1134 
TEST_F(ComputeTest,frexp_ret)1135 TEST_F(ComputeTest, frexp_ret)
1136 {
1137    const char *kernel_source =
1138    "__kernel void main_test(__global float *inout)\n\
1139    {\n\
1140        int exp;\n\
1141        inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1142    }\n";
1143    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1144    const float expected[] = {
1145       0.0f, 0.5f, 0.5f, 0.75f
1146    };
1147    run_shader(kernel_source, inout.size(), 1, 1, inout);
1148    for (int i = 0; i < inout.size(); ++i)
1149       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1150 }
1151 
TEST_F(ComputeTest,frexp_exp)1152 TEST_F(ComputeTest, frexp_exp)
1153 {
1154    const char *kernel_source =
1155    "__kernel void main_test(__global float *inout)\n\
1156    {\n\
1157        int exp;\n\
1158        frexp(inout[get_global_id(0)], &exp);\n\
1159        inout[get_global_id(0)] = (float)exp;\n\
1160    }\n";
1161    auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1162    const float expected[] = {
1163       0.0f, 0.0f, 1.0f, 2.0f
1164    };
1165    run_shader(kernel_source, inout.size(), 1, 1, inout);
1166    for (int i = 0; i < inout.size(); ++i)
1167       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1168 }
1169 
TEST_F(ComputeTest,clz)1170 TEST_F(ComputeTest, clz)
1171 {
1172    const char *kernel_source =
1173    "__kernel void main_test(__global uint *inout)\n\
1174    {\n\
1175        inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1176    }\n";
1177    auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff,  (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1178    const uint32_t expected[] = {
1179       32, 31, 16, 1, 0
1180    };
1181    run_shader(kernel_source, inout.size(), 1, 1, inout);
1182    for (int i = 0; i < inout.size(); ++i)
1183       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1184 }
1185 
TEST_F(ComputeTest,sin)1186 TEST_F(ComputeTest, sin)
1187 {
1188    struct sin_vals { float in; float clc; float native; };
1189    const char *kernel_source =
1190    "struct sin_vals { float in; float clc; float native; };\n\
1191    __kernel void main_test(__global struct sin_vals *inout)\n\
1192    {\n\
1193        inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1194        inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1195    }\n";
1196    const vector<sin_vals> input = {
1197       { 0.0f, 0.0f, 0.0f },
1198       { 1.0f, 0.0f, 0.0f },
1199       { 2.0f, 0.0f, 0.0f },
1200       { 3.0f, 0.0f, 0.0f },
1201    };
1202    auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1203    const struct sin_vals expected[] = {
1204       { 0.0f, 0.0f,       0.0f       },
1205       { 1.0f, sin(1.0f), sin(1.0f) },
1206       { 2.0f, sin(2.0f), sin(2.0f) },
1207       { 3.0f, sin(3.0f), sin(3.0f) },
1208    };
1209    run_shader(kernel_source, inout.size(), 1, 1, inout);
1210    for (int i = 0; i < inout.size(); ++i) {
1211       EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1212       EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1213       EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1214    }
1215 }
1216 
TEST_F(ComputeTest,cosh)1217 TEST_F(ComputeTest, cosh)
1218 {
1219    const char *kernel_source =
1220    "__kernel void main_test(__global float *inout)\n\
1221    {\n\
1222        inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1223    }\n";
1224    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1225    const float expected[] = {
1226       cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1227    };
1228    run_shader(kernel_source, inout.size(), 1, 1, inout);
1229    for (int i = 0; i < inout.size(); ++i)
1230       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1231 }
1232 
TEST_F(ComputeTest,exp)1233 TEST_F(ComputeTest, exp)
1234 {
1235    const char *kernel_source =
1236    "__kernel void main_test(__global float *inout)\n\
1237    {\n\
1238        inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1239    }\n";
1240    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1241    const float expected[] = {
1242       exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1243    };
1244    run_shader(kernel_source, inout.size(), 1, 1, inout);
1245    for (int i = 0; i < inout.size(); ++i)
1246       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1247 }
1248 
TEST_F(ComputeTest,exp10)1249 TEST_F(ComputeTest, exp10)
1250 {
1251    const char *kernel_source =
1252    "__kernel void main_test(__global float *inout)\n\
1253    {\n\
1254        inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1255    }\n";
1256    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1257    const float expected[] = {
1258       pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1259    };
1260    run_shader(kernel_source, inout.size(), 1, 1, inout);
1261    for (int i = 0; i < inout.size(); ++i)
1262       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1263 }
1264 
TEST_F(ComputeTest,exp2)1265 TEST_F(ComputeTest, exp2)
1266 {
1267    const char *kernel_source =
1268    "__kernel void main_test(__global float *inout)\n\
1269    {\n\
1270        inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1271    }\n";
1272    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1273    const float expected[] = {
1274       pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1275    };
1276    run_shader(kernel_source, inout.size(), 1, 1, inout);
1277    for (int i = 0; i < inout.size(); ++i)
1278       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1279 }
1280 
TEST_F(ComputeTest,log)1281 TEST_F(ComputeTest, log)
1282 {
1283    const char *kernel_source =
1284    "__kernel void main_test(__global float *inout)\n\
1285    {\n\
1286        inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1287    }\n";
1288    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1289    const float expected[] = {
1290       log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1291    };
1292    run_shader(kernel_source, inout.size(), 1, 1, inout);
1293    for (int i = 0; i < inout.size(); ++i)
1294       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1295 }
1296 
TEST_F(ComputeTest,log10)1297 TEST_F(ComputeTest, log10)
1298 {
1299    const char *kernel_source =
1300    "__kernel void main_test(__global float *inout)\n\
1301    {\n\
1302        inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1303    }\n";
1304    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1305    const float expected[] = {
1306       log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1307    };
1308    run_shader(kernel_source, inout.size(), 1, 1, inout);
1309    for (int i = 0; i < inout.size(); ++i)
1310       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1311 }
1312 
TEST_F(ComputeTest,log2)1313 TEST_F(ComputeTest, log2)
1314 {
1315    const char *kernel_source =
1316    "__kernel void main_test(__global float *inout)\n\
1317    {\n\
1318        inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1319    }\n";
1320    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1321    const float expected[] = {
1322       log(0.0f) / log(2.0f), log(1.0f) / log(2.0f), log(2.0f) / log(2.0f), log(3.0f) / log(2.0f)
1323    };
1324    run_shader(kernel_source, inout.size(), 1, 1, inout);
1325    for (int i = 0; i < inout.size(); ++i)
1326       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1327 }
1328 
TEST_F(ComputeTest,rint)1329 TEST_F(ComputeTest, rint)
1330 {
1331    const char *kernel_source =
1332    "__kernel void main_test(__global float *inout)\n\
1333    {\n\
1334       inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1335    }\n";
1336 
1337    auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1338    const float expected[] = {
1339       0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1340    };
1341    run_shader(kernel_source, inout.size(), 1, 1, inout);
1342    for (int i = 0; i < inout.size(); ++i)
1343       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1344 }
1345 
TEST_F(ComputeTest,round)1346 TEST_F(ComputeTest, round)
1347 {
1348    const char *kernel_source =
1349    "__kernel void main_test(__global float *inout)\n\
1350    {\n\
1351        inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1352    }\n";
1353    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1354                                  SHADER_ARG_INOUT);
1355    const float expected[] = {
1356       0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1357    };
1358    run_shader(kernel_source, inout.size(), 1, 1, inout);
1359    for (int i = 0; i < inout.size(); ++i)
1360       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1361 }
1362 
TEST_F(ComputeTest,arg_by_val)1363 TEST_F(ComputeTest, arg_by_val)
1364 {
1365    const char *kernel_source =
1366    "__kernel void main_test(__global float *inout, float mul)\n\
1367    {\n\
1368        inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1369    }\n";
1370    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1371                                  SHADER_ARG_INOUT);
1372    auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1373    const float expected[] = {
1374       0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1375    };
1376    run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1377    for (int i = 0; i < inout.size(); ++i)
1378       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1379 }
1380 
TEST_F(ComputeTest,uint8_by_val)1381 TEST_F(ComputeTest, uint8_by_val)
1382 {
1383    struct uint8 {
1384       uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1385       uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1386    };
1387    const char *kernel_source =
1388    "__kernel void main_test(__global uint *out, uint8 val)\n\
1389    {\n\
1390        out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1391                                val.s4 + val.s5 + val.s6 + val.s7;\n\
1392    }\n";
1393    auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1394    auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1395    const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1396    run_shader(kernel_source, out.size(), 1, 1, out, val);
1397    for (int i = 0; i < out.size(); ++i)
1398       EXPECT_EQ(out[i], expected[i]);
1399 }
1400 
TEST_F(ComputeTest,link)1401 TEST_F(ComputeTest, link)
1402 {
1403    const char *foo_src =
1404    "float foo(float in)\n\
1405    {\n\
1406        return in * in;\n\
1407    }\n";
1408    const char *kernel_source =
1409    "float foo(float in);\n\
1410    __kernel void main_test(__global float *inout)\n\
1411    {\n\
1412        inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1413    }\n";
1414    std::vector<const char *> srcs = { foo_src, kernel_source };
1415    auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1416    const float expected[] = {
1417       4.0f,
1418    };
1419    run_shader(srcs, inout.size(), 1, 1, inout);
1420    for (int i = 0; i < inout.size(); ++i)
1421       EXPECT_EQ(inout[i], expected[i]);
1422 }
1423 
TEST_F(ComputeTest,link_library)1424 TEST_F(ComputeTest, link_library)
1425 {
1426    const char *bar_src =
1427    "float bar(float in)\n\
1428    {\n\
1429       return in * 5;\n\
1430    }\n";
1431    const char *foo_src =
1432    "float bar(float in);\n\
1433    float foo(float in)\n\
1434    {\n\
1435        return in * bar(in);\n\
1436    }\n";
1437    const char *kernel_source =
1438    "float foo(float in);\n\
1439    __kernel void main_test(__global float *inout)\n\
1440    {\n\
1441        inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1442    }\n";
1443    std::vector<Shader> libraries = {
1444       compile({ bar_src, kernel_source }, {}, true),
1445       compile({ foo_src }, {}, true)
1446    };
1447    Shader exe = link(libraries);
1448    auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1449    const float expected[] = {
1450       20.0f,
1451    };
1452    run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1453    for (int i = 0; i < inout.size(); ++i)
1454       EXPECT_EQ(inout[i], expected[i]);
1455 }
1456 
TEST_F(ComputeTest,localvar)1457 TEST_F(ComputeTest, localvar)
1458 {
1459    const char *kernel_source =
1460    "__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1461    void main_test(__global float *inout)\n\
1462    {\n\
1463       __local float2 tmp[2];\n\
1464       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1465       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1466       barrier(CLK_LOCAL_MEM_FENCE);\n\
1467       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1468    }\n";
1469 
1470    auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1471    const float expected[] = {
1472       9.0f, 5.0f
1473    };
1474    run_shader(kernel_source, inout.size(), 1, 1, inout);
1475    for (int i = 0; i < inout.size(); ++i)
1476       EXPECT_EQ(inout[i], expected[i]);
1477 }
1478 
TEST_F(ComputeTest,localvar_uchar2)1479 TEST_F(ComputeTest, localvar_uchar2)
1480 {
1481    const char *kernel_source =
1482    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1483    __kernel void main_test(__global uchar *inout)\n\
1484    {\n\
1485       __local uchar2 tmp[2];\n\
1486       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1487       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1488       barrier(CLK_LOCAL_MEM_FENCE);\n\
1489       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1490    }\n";
1491 
1492    auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1493    const uint8_t expected[] = { 9, 5 };
1494    run_shader(kernel_source, inout.size(), 1, 1, inout);
1495    for (int i = 0; i < inout.size(); ++i)
1496       EXPECT_EQ(inout[i], expected[i]);
1497 }
1498 
TEST_F(ComputeTest,work_group_size_hint)1499 TEST_F(ComputeTest, work_group_size_hint)
1500 {
1501    const char *kernel_source =
1502    "__attribute__((work_group_size_hint(2, 1, 1)))\n\
1503    __kernel void main_test(__global uint *output)\n\
1504    {\n\
1505        output[get_global_id(0)] = get_local_id(0);\n\
1506    }\n";
1507    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1508                                      SHADER_ARG_OUTPUT);
1509    const uint32_t expected[] = {
1510       0, 1, 2, 3
1511    };
1512    run_shader(kernel_source, output.size(), 1, 1, output);
1513    for (int i = 0; i < output.size(); ++i)
1514       EXPECT_EQ(output[i], expected[i]);
1515 }
1516 
TEST_F(ComputeTest,reqd_work_group_size)1517 TEST_F(ComputeTest, reqd_work_group_size)
1518 {
1519    const char *kernel_source =
1520    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1521    __kernel void main_test(__global uint *output)\n\
1522    {\n\
1523        output[get_global_id(0)] = get_local_id(0);\n\
1524    }\n";
1525    auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1526                                      SHADER_ARG_OUTPUT);
1527    const uint32_t expected[] = {
1528       0, 1, 0, 1
1529    };
1530    run_shader(kernel_source, output.size(), 1, 1, output);
1531    for (int i = 0; i < output.size(); ++i)
1532       EXPECT_EQ(output[i], expected[i]);
1533 }
1534 
TEST_F(ComputeTest,image)1535 TEST_F(ComputeTest, image)
1536 {
1537    const char* kernel_source =
1538    "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1539    {\n\
1540       int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1541       write_imagef(output, coords, read_imagef(input, coords));\n\
1542    }\n";
1543    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1544    validate(shader);
1545 }
1546 
TEST_F(ComputeTest,image_two_reads)1547 TEST_F(ComputeTest, image_two_reads)
1548 {
1549    const char* kernel_source =
1550    "__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\
1551    {\n\
1552       if (is_float)\n\
1553          output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\
1554       else \n\
1555          output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\
1556    }\n";
1557    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1558    validate(shader);
1559 }
1560 
TEST_F(ComputeTest,image_unused)1561 TEST_F(ComputeTest, image_unused)
1562 {
1563    const char* kernel_source =
1564    "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1565    {\n\
1566    }\n";
1567    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1568    validate(shader);
1569 }
1570 
TEST_F(ComputeTest,image_read_write)1571 TEST_F(ComputeTest, image_read_write)
1572 {
1573    const char *kernel_source =
1574    R"(__kernel void main_test(read_write image2d_t image)
1575    {
1576       int2 coords = (int2)(get_global_id(0), get_global_id(1));
1577       write_imagef(image, coords, read_imagef(image, coords) + (float4)(1.0f, 1.0f, 1.0f, 1.0f));
1578    })";
1579    Shader shader = compile(std::vector<const char*>({ kernel_source }), { "-cl-std=cl3.0" });
1580    validate(shader);
1581 }
1582 
TEST_F(ComputeTest,sampler)1583 TEST_F(ComputeTest, sampler)
1584 {
1585    const char* kernel_source =
1586    "__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1587    {\n\
1588       output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1589    }\n";
1590    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1591    validate(shader);
1592 }
1593 
TEST_F(ComputeTest,image_dims)1594 TEST_F(ComputeTest, image_dims)
1595 {
1596    const char* kernel_source =
1597    "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1598    {\n\
1599       output[get_global_id(0)] = get_image_width(roimage);\n\
1600       output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1601    }\n";
1602    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1603    validate(shader);
1604 }
1605 
TEST_F(ComputeTest,image_format)1606 TEST_F(ComputeTest, image_format)
1607 {
1608    const char* kernel_source =
1609    "__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1610    {\n\
1611       output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1612       output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1613    }\n";
1614    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1615    validate(shader);
1616 }
1617 
TEST_F(ComputeTest,image1d_buffer_t)1618 TEST_F(ComputeTest, image1d_buffer_t)
1619 {
1620    const char* kernel_source =
1621    "__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1622    {\n\
1623       write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1624    }\n";
1625    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1626    validate(shader);
1627 }
1628 
TEST_F(ComputeTest,local_ptr)1629 TEST_F(ComputeTest, local_ptr)
1630 {
1631    struct uint2 { uint32_t x, y; };
1632    const char *kernel_source =
1633    "__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1634    {\n\
1635       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1636       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1637       barrier(CLK_LOCAL_MEM_FENCE);\n\
1638       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1639    }\n";
1640    auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1641    auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1642    const uint8_t expected[] = { 9, 5 };
1643    run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1644    for (int i = 0; i < inout.size(); ++i)
1645       EXPECT_EQ(inout[i], expected[i]);
1646 }
1647 
TEST_F(ComputeTest,two_local_ptrs)1648 TEST_F(ComputeTest, two_local_ptrs)
1649 {
1650    struct uint2 { uint32_t x, y; };
1651    const char *kernel_source =
1652    "__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1653    {\n\
1654       tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1655       tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1656       tmp2[get_local_id(0)] = get_global_id(0);\n\
1657       barrier(CLK_LOCAL_MEM_FENCE);\n\
1658       inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1659    }\n";
1660    auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1661    auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1662    auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1663    const uint8_t expected[] = { 9, 6 };
1664    run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1665    for (int i = 0; i < inout.size(); ++i)
1666       EXPECT_EQ(inout[i], expected[i]);
1667 }
1668 
TEST_F(ComputeTest,int8_to_float)1669 TEST_F(ComputeTest, int8_to_float)
1670 {
1671    const char *kernel_source =
1672    "__kernel void main_test(__global char* in, __global float* out)\n\
1673    {\n\
1674       uint pos = get_global_id(0);\n\
1675       out[pos] = in[pos] / 100.0f;\n\
1676    }";
1677    auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1678    auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1679    const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1680    run_shader(kernel_source, in.size(), 1, 1, in, out);
1681    for (int i = 0; i < in.size(); ++i)
1682       EXPECT_FLOAT_EQ(out[i], expected[i]);
1683 }
1684 
TEST_F(ComputeTest,vec_hint_float4)1685 TEST_F(ComputeTest, vec_hint_float4)
1686 {
1687    const char *kernel_source =
1688    "__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1689    {\n\
1690       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1691    }";
1692    Shader shader = compile({ kernel_source });
1693    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
1694    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1695 }
1696 
TEST_F(ComputeTest,vec_hint_uchar2)1697 TEST_F(ComputeTest, vec_hint_uchar2)
1698 {
1699    const char *kernel_source =
1700    "__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1701    {\n\
1702       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1703    }";
1704    Shader shader = compile({ kernel_source });
1705    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
1706    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1707 }
1708 
TEST_F(ComputeTest,vec_hint_none)1709 TEST_F(ComputeTest, vec_hint_none)
1710 {
1711    const char *kernel_source =
1712    "__kernel void main_test(__global float *inout)\n\
1713    {\n\
1714       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1715    }";
1716    Shader shader = compile({ kernel_source });
1717    EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
1718 }
1719 
TEST_F(ComputeTest,DISABLED_debug_layer_failure)1720 TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1721 {
1722    /* This is a negative test case, it intentionally triggers a failure to validate the mechanism
1723     * is in place, so other tests will fail if they produce debug messages
1724     */
1725    const char *kernel_source =
1726    "__kernel void main_test(__global float *inout, float mul)\n\
1727    {\n\
1728        inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1729    }\n";
1730    auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1731                                  SHADER_ARG_INOUT);
1732    auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1733    const float expected[] = {
1734       0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1735    };
1736    ComPtr<ID3D12InfoQueue> info_queue;
1737    dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1738    if (!info_queue) {
1739       GTEST_SKIP() << "No info queue";
1740       return;
1741    }
1742 
1743    info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1744    run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1745    for (int i = 0; i < inout.size(); ++i)
1746       EXPECT_FLOAT_EQ(inout[i], expected[i]);
1747 }
1748 
TEST_F(ComputeTest,compiler_defines)1749 TEST_F(ComputeTest, compiler_defines)
1750 {
1751    const char *kernel_source =
1752       "__kernel void main_test(__global int* out)\n\
1753    {\n\
1754       out[0] = OUT_VAL0;\n\
1755       out[1] = __OPENCL_C_VERSION__;\n\
1756    }";
1757    auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1758    CompileArgs compile_args = { 1, 1, 1 };
1759    compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1760    std::vector<RawShaderArg *> raw_args = { &out };
1761    run_shader({ kernel_source }, compile_args, out);
1762    EXPECT_EQ(out[0], 5);
1763    EXPECT_EQ(out[1], 100);
1764 }
1765 
TEST_F(ComputeTest,global_atomic_add)1766 TEST_F(ComputeTest, global_atomic_add)
1767 {
1768    const char *kernel_source =
1769    "__kernel void main_test(__global int *inout, __global int *old)\n\
1770    {\n\
1771       old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1772    }\n";
1773    auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1774    auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1775    const int32_t expected_inout[] = { 5, 7 };
1776    const int32_t expected_old[] = { 2, 4 };
1777    run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1778    for (int i = 0; i < inout.size(); ++i) {
1779       EXPECT_EQ(inout[i], expected_inout[i]);
1780       EXPECT_EQ(old[i], expected_old[i]);
1781    }
1782 }
1783 
TEST_F(ComputeTest,global_atomic_imin)1784 TEST_F(ComputeTest, global_atomic_imin)
1785 {
1786    const char *kernel_source =
1787    "__kernel void main_test(__global int *inout, __global int *old)\n\
1788    {\n\
1789       old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1790    }\n";
1791    auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1792    auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1793    const int32_t expected_inout[] = { 0, 1, -1 };
1794    const int32_t expected_old[] = { 0, 2, -1 };
1795    run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1796    for (int i = 0; i < inout.size(); ++i) {
1797       EXPECT_EQ(inout[i], expected_inout[i]);
1798       EXPECT_EQ(old[i], expected_old[i]);
1799    }
1800 }
1801 
TEST_F(ComputeTest,global_atomic_and_or)1802 TEST_F(ComputeTest, global_atomic_and_or)
1803 {
1804    const char *kernel_source =
1805    "__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1806    __kernel void main_test(__global int *inout)\n\
1807    {\n\
1808       atomic_and(inout, ~(1 << get_global_id(0)));\n\
1809       atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1810    }\n";
1811    auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1812    const int32_t expected[] = { 0x78 };
1813    run_shader(kernel_source, 3, 1, 1, inout);
1814    for (int i = 0; i < inout.size(); ++i)
1815       EXPECT_EQ(inout[i], expected[i]);
1816 }
1817 
TEST_F(ComputeTest,global_atomic_cmpxchg)1818 TEST_F(ComputeTest, global_atomic_cmpxchg)
1819 {
1820    const char *kernel_source =
1821    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1822    __kernel void main_test(__global int *inout)\n\
1823    {\n\
1824       while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1825          ;\n\
1826    }\n";
1827    auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1828    const int32_t expected_inout[] = { 2 };
1829    run_shader(kernel_source, 2, 1, 1, inout);
1830    for (int i = 0; i < inout.size(); ++i)
1831       EXPECT_EQ(inout[i], expected_inout[i]);
1832 }
1833 
TEST_F(ComputeTest,local_atomic_and_or)1834 TEST_F(ComputeTest, local_atomic_and_or)
1835 {
1836    const char *kernel_source =
1837    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1838    __kernel void main_test(__global ushort *inout)\n\
1839    {\n\
1840       __local ushort tmp;\n\
1841       atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1842       atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1843       barrier(CLK_LOCAL_MEM_FENCE);\n\
1844       inout[get_global_id(0)] = tmp;\n\
1845    }\n";
1846    auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1847    const uint16_t expected[] = { 0x402, 0x402 };
1848    run_shader(kernel_source, inout.size(), 1, 1, inout);
1849    for (int i = 0; i < inout.size(); ++i)
1850       EXPECT_EQ(inout[i], expected[i]);
1851 }
1852 
TEST_F(ComputeTest,local_atomic_cmpxchg)1853 TEST_F(ComputeTest, local_atomic_cmpxchg)
1854 {
1855    const char *kernel_source =
1856    "__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1857    __kernel void main_test(__global int *out)\n\
1858    {\n\
1859       __local uint tmp;\n\
1860       tmp = 0;\n\
1861       barrier(CLK_LOCAL_MEM_FENCE);\n\
1862       while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1863          ;\n\
1864       barrier(CLK_LOCAL_MEM_FENCE);\n\
1865       out[0] = tmp;\n\
1866    }\n";
1867 
1868    auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1869    const uint16_t expected[] = { 2 };
1870    run_shader(kernel_source, 2, 1, 1, out);
1871    for (int i = 0; i < out.size(); ++i)
1872       EXPECT_EQ(out[i], expected[i]);
1873 }
1874 
TEST_F(ComputeTest,constant_sampler)1875 TEST_F(ComputeTest, constant_sampler)
1876 {
1877    const char* kernel_source =
1878    "__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1879    __kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1880    {\n\
1881       int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1882       float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1883       write_imagef(output, coordsi, \n\
1884          read_imagef(input, sampler, coordsf) + \n\
1885          read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1886    }\n";
1887    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1888    validate(shader);
1889    EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1890 }
1891 
TEST_F(ComputeTest,hi)1892 TEST_F(ComputeTest, hi)
1893 {
1894    const char *kernel_source = R"(
1895    __kernel void main_test(__global char3 *srcA, __global char2 *dst)
1896    {
1897        int  tid = get_global_id(0);
1898 
1899        char2 tmp = srcA[tid].hi;
1900        dst[tid] = tmp;
1901    })";
1902    Shader shader = compile(std::vector<const char*>({ kernel_source }));
1903    validate(shader);
1904 }
1905 
TEST_F(ComputeTest,system_values)1906 TEST_F(ComputeTest, system_values)
1907 {
1908    const char *kernel_source =
1909    "__kernel void main_test(__global uint* outputs)\n\
1910    {\n\
1911       outputs[0] = get_work_dim();\n\
1912       outputs[1] = get_global_size(0);\n\
1913       outputs[2] = get_local_size(0);\n\
1914       outputs[3] = get_num_groups(0);\n\
1915       outputs[4] = get_group_id(0);\n\
1916       outputs[5] = get_global_offset(0);\n\
1917       outputs[6] = get_global_id(0);\n\
1918    }\n";
1919    auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1920    const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1921    CompileArgs args = { 1, 1, 1 };
1922    Shader shader = compile({ kernel_source });
1923    run_shader(shader, args, out);
1924    for (int i = 0; i < out.size(); ++i)
1925       EXPECT_EQ(out[i], expected[i]);
1926 
1927    args.work_props.work_dim = 2;
1928    args.work_props.global_offset_x = 100;
1929    args.work_props.group_id_offset_x = 2;
1930    args.work_props.group_count_total_x = 5;
1931    const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1932    run_shader(shader, args, out);
1933    for (int i = 0; i < out.size(); ++i)
1934       EXPECT_EQ(out[i], expected_withoffsets[i]);
1935 }
1936 
TEST_F(ComputeTest,convert_round_sat)1937 TEST_F(ComputeTest, convert_round_sat)
1938 {
1939    const char *kernel_source =
1940    "__kernel void main_test(__global float *f, __global uchar *u)\n\
1941    {\n\
1942        uint idx = get_global_id(0);\n\
1943        u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1944    }\n";
1945    auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1946    auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1947    const uint8_t expected[] = {
1948       0, 2, 20, 255
1949    };
1950 
1951    run_shader(kernel_source, f.size(), 1, 1, f, u);
1952    for (int i = 0; i < u.size(); ++i)
1953       EXPECT_EQ(u[i], expected[i]);
1954 }
1955 
TEST_F(ComputeTest,convert_round_sat_vec)1956 TEST_F(ComputeTest, convert_round_sat_vec)
1957 {
1958    const char *kernel_source =
1959    "__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
1960    {\n\
1961        uint idx = get_global_id(0);\n\
1962        u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
1963    }\n";
1964    auto f = ShaderArg<float>({
1965       -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1966       -0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1967        0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1968       -0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1969    }, SHADER_ARG_INPUT);
1970    auto u = ShaderArg<uint8_t>({
1971       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1972       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1973       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1974       255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1975    }, SHADER_ARG_OUTPUT);
1976    const uint8_t expected[] = {
1977       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1978       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1979       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1980       0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1981    };
1982 
1983    run_shader(kernel_source, 4, 1, 1, f, u);
1984    for (int i = 0; i < u.size(); ++i)
1985       EXPECT_EQ(u[i], expected[i]);
1986 }
1987 
TEST_F(ComputeTest,convert_char2_uchar2)1988 TEST_F(ComputeTest, convert_char2_uchar2)
1989 {
1990    const char *kernel_source =
1991    "__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
1992    {\n\
1993       size_t i = get_global_id(0);\n\
1994       dest[i] = convert_uchar2_sat( src[i] );\n\
1995    }\n";
1996 
1997    auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
1998    auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
1999    const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
2000    run_shader(kernel_source, 4, 1, 1, c, u);
2001    for (int i = 0; i < u.size(); i++)
2002       EXPECT_EQ(u[i], expected[i]);
2003 }
2004 
TEST_F(ComputeTest,async_copy)2005 TEST_F(ComputeTest, async_copy)
2006 {
2007    const char *kernel_source = R"(
2008    __kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
2009    {
2010     int i;
2011     for(i=0; i<copiesPerWorkItem; i++)
2012         localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
2013        barrier( CLK_LOCAL_MEM_FENCE );
2014        event_t event;
2015        event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2016        wait_group_events( 1, &event );
2017     for(i=0; i<copiesPerWorkItem; i++)
2018      dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2019    })";
2020    Shader shader = compile({ kernel_source });
2021    validate(shader);
2022 }
2023 
TEST_F(ComputeTest,packed_struct_global)2024 TEST_F(ComputeTest, packed_struct_global)
2025 {
2026 #pragma pack(push, 1)
2027    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2028 #pragma pack(pop)
2029 
2030    const char *kernel_source =
2031    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2032    __kernel void main_test(__global struct s *inout, global uint *size)\n\
2033    {\n\
2034        uint idx = get_global_id(0);\n\
2035        inout[idx].uc = idx + 1;\n\
2036        inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2037        inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2038        *size = sizeof(struct s);\n\
2039    }\n";
2040    auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2041    auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2042    const struct s expected[] = {
2043       { 1, 0xfbfcfdff12345678, 0xa112 }
2044    };
2045 
2046    run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2047    for (int i = 0; i < inout.size(); ++i) {
2048       EXPECT_EQ(inout[i].uc, expected[i].uc);
2049       EXPECT_EQ(inout[i].ul, expected[i].ul);
2050       EXPECT_EQ(inout[i].us, expected[i].us);
2051    }
2052    EXPECT_EQ(size, sizeof(struct s));
2053 }
2054 
TEST_F(ComputeTest,packed_struct_arg)2055 TEST_F(ComputeTest, packed_struct_arg)
2056 {
2057 #pragma pack(push, 1)
2058    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2059 #pragma pack(pop)
2060 
2061    const char *kernel_source =
2062    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2063    __kernel void main_test(__global struct s *out, struct s in)\n\
2064    {\n\
2065        uint idx = get_global_id(0);\n\
2066        out[idx].uc = in.uc + 0x12;\n\
2067        out[idx].ul = in.ul + 0x123456789abcdef;\n\
2068        out[idx].us = in.us + 0x1234;\n\
2069    }\n";
2070    auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2071    auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2072    const struct s expected[] = {
2073       { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2074    };
2075 
2076    run_shader(kernel_source, out.size(), 1, 1, out, in);
2077    for (int i = 0; i < out.size(); ++i) {
2078       EXPECT_EQ(out[i].uc, expected[i].uc);
2079       EXPECT_EQ(out[i].ul, expected[i].ul);
2080       EXPECT_EQ(out[i].us, expected[i].us);
2081    }
2082 }
2083 
TEST_F(ComputeTest,packed_struct_local)2084 TEST_F(ComputeTest, packed_struct_local)
2085 {
2086 #pragma pack(push, 1)
2087    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2088 #pragma pack(pop)
2089 
2090    const char *kernel_source =
2091    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2092    __kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2093    {\n\
2094        uint idx = get_global_id(0);\n\
2095        __local struct s tmp[2];\n\
2096        tmp[get_local_id(0)] = in[idx];\n\
2097        barrier(CLK_LOCAL_MEM_FENCE);\n\
2098        out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2099    }\n";
2100    auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2101    auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2102    const struct s expected[] = {
2103       { 0x12, 0x123456789abcdef, 0x1234 },
2104       { 1, 2, 3 },
2105    };
2106 
2107    run_shader(kernel_source, out.size(), 1, 1, out, in);
2108    for (int i = 0; i < out.size(); ++i) {
2109       EXPECT_EQ(out[i].uc, expected[i].uc);
2110       EXPECT_EQ(out[i].ul, expected[i].ul);
2111       EXPECT_EQ(out[i].us, expected[i].us);
2112    }
2113 }
2114 
TEST_F(ComputeTest,DISABLED_packed_struct_const)2115 TEST_F(ComputeTest, DISABLED_packed_struct_const)
2116 {
2117 #pragma pack(push, 1)
2118    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2119 #pragma pack(pop)
2120 
2121    const char *kernel_source =
2122    "struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2123    __kernel void main_test(__global struct s *out, struct s in)\n\
2124    {\n\
2125        __constant struct s base[] = {\n\
2126           {0x12, 0x123456789abcdef, 0x1234},\n\
2127           {0x11, 0x123456789abcdee, 0x1233},\n\
2128        };\n\
2129        uint idx = get_global_id(0);\n\
2130        out[idx].uc = base[idx % 2].uc + in.uc;\n\
2131        out[idx].ul = base[idx % 2].ul + in.ul;\n\
2132        out[idx].us = base[idx % 2].us + in.us;\n\
2133    }\n";
2134    auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2135    auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2136    const struct s expected[] = {
2137       { 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2138       { 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2139    };
2140 
2141    run_shader(kernel_source, out.size(), 1, 1, out, in);
2142    for (int i = 0; i < out.size(); ++i) {
2143       EXPECT_EQ(out[i].uc, expected[i].uc);
2144       EXPECT_EQ(out[i].ul, expected[i].ul);
2145       EXPECT_EQ(out[i].us, expected[i].us);
2146    }
2147 }
2148 
TEST_F(ComputeTest,printf)2149 TEST_F(ComputeTest, printf)
2150 {
2151    const char *kernel_source = R"(
2152    __kernel void main_test(__global float *src, __global uint *dest)
2153    {
2154       *dest = printf("%s: %f", "Test", src[0]);
2155    })";
2156 
2157    auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2158    auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2159    run_shader(kernel_source, 1, 1, 1, src, dest);
2160    EXPECT_EQ(dest[0], 0);
2161 }
2162 
TEST_F(ComputeTest,vload_half)2163 TEST_F(ComputeTest, vload_half)
2164 {
2165    const char *kernel_source = R"(
2166    __kernel void main_test(__global half *src, __global float4 *dest)
2167    {
2168       int offset = get_global_id(0);
2169       dest[offset] = vload_half4(offset, src);
2170    })";
2171    auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2172                                     0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2173    auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2174                                   FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2175    run_shader(kernel_source, 2, 1, 1, src, dest);
2176    for (unsigned i = 0; i < 8; ++i)
2177       EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2178 }
2179 
TEST_F(ComputeTest,vstore_half)2180 TEST_F(ComputeTest, vstore_half)
2181 {
2182    const char *kernel_source = R"(
2183    __kernel void main_test(__global half *dst, __global float4 *src)
2184    {
2185       int offset = get_global_id(0);
2186       vstore_half4(src[offset], offset, dst);
2187    })";
2188    auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2189                                    0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2190    auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2191                                   5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2192    run_shader(kernel_source, 2, 1, 1, dest, src);
2193    const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2194                                  0x4500, 0x4600, 0x4700, 0x4800 };
2195    for (unsigned i = 0; i < 8; ++i)
2196       EXPECT_EQ(dest[i], expected[i]);
2197 }
2198 
TEST_F(ComputeTest,inline_function)2199 TEST_F(ComputeTest, inline_function)
2200 {
2201    const char *kernel_source = R"(
2202    inline float helper(float foo)
2203    {
2204       return foo * 2;
2205    }
2206 
2207    __kernel void main_test(__global float *dst, __global float *src)
2208    {
2209       *dst = helper(*src);
2210    })";
2211    auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2212    auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2213    run_shader(kernel_source, 1, 1, 1, dest, src);
2214    EXPECT_EQ(dest[0], 2.0f);
2215 }
2216 
TEST_F(ComputeTest,unused_arg)2217 TEST_F(ComputeTest, unused_arg)
2218 {
2219    const char *kernel_source = R"(
2220    __kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2221    {
2222       int i = get_global_id(0);
2223       dst[i] = src[i];
2224    })";
2225    auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2226    auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2227    auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2228    run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2229    for (int i = 0; i < 4; ++i)
2230       EXPECT_EQ(dest[i], i + 1);
2231 }
2232 
TEST_F(ComputeTest,spec_constant)2233 TEST_F(ComputeTest, spec_constant)
2234 {
2235    const char *spirv_asm = R"(
2236                OpCapability Addresses
2237                OpCapability Kernel
2238                OpCapability Int64
2239           %1 = OpExtInstImport "OpenCL.std"
2240                OpMemoryModel Physical64 OpenCL
2241                OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
2242           %4 = OpString "kernel_arg_type.main_test.uint*,"
2243                OpSource OpenCL_C 102000
2244                OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
2245                OpName %output "output"
2246                OpName %entry "entry"
2247                OpName %output_addr "output.addr"
2248                OpName %id "id"
2249                OpName %call "call"
2250                OpName %conv "conv"
2251                OpName %idxprom "idxprom"
2252                OpName %arrayidx "arrayidx"
2253                OpName %add "add"
2254                OpName %mul "mul"
2255                OpName %idxprom1 "idxprom1"
2256                OpName %arrayidx2 "arrayidx2"
2257                OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
2258                OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
2259                OpDecorate %id Alignment 4
2260                OpDecorate %output_addr Alignment 8
2261                OpDecorate %uint_1 SpecId 1
2262       %ulong = OpTypeInt 64 0
2263        %uint = OpTypeInt 32 0
2264      %uint_1 = OpSpecConstant %uint 1
2265     %v3ulong = OpTypeVector %ulong 3
2266 %_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
2267        %void = OpTypeVoid
2268 %_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
2269          %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
2270 %_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
2271 %_ptr_Function_uint = OpTypePointer Function %uint
2272 %__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
2273           %2 = OpFunction %void DontInline %24
2274      %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
2275       %entry = OpLabel
2276 %output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
2277          %id = OpVariable %_ptr_Function_uint Function
2278                OpStore %output_addr %output Aligned 8
2279          %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
2280        %call = OpCompositeExtract %ulong %27 0
2281        %conv = OpUConvert %uint %call
2282                OpStore %id %conv Aligned 4
2283          %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2284          %29 = OpLoad %uint %id Aligned 4
2285     %idxprom = OpUConvert %ulong %29
2286    %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
2287          %30 = OpLoad %uint %arrayidx Aligned 4
2288          %31 = OpLoad %uint %id Aligned 4
2289         %add = OpIAdd %uint %31 %uint_1
2290         %mul = OpIMul %uint %30 %add
2291          %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
2292          %33 = OpLoad %uint %id Aligned 4
2293    %idxprom1 = OpUConvert %ulong %33
2294   %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
2295                OpStore %arrayidx2 %mul Aligned 4
2296                OpReturn
2297                OpFunctionEnd)";
2298    Shader shader = assemble(spirv_asm);
2299    Shader spec_shader = specialize(shader, 1, 5);
2300 
2301    auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
2302       SHADER_ARG_INOUT);
2303    const uint32_t expected[] = {
2304       0x00000005, 0x60000006, 0x000e000e, 0x20081018
2305    };
2306    CompileArgs args = { (unsigned)inout.size(), 1, 1 };
2307    run_shader(spec_shader, args, inout);
2308    for (int i = 0; i < inout.size(); ++i)
2309       EXPECT_EQ(inout[i], expected[i]);
2310 }
2311 
TEST_F(ComputeTest,arg_metadata)2312 TEST_F(ComputeTest, arg_metadata)
2313 {
2314    const char *kernel_source = R"(
2315    __kernel void main_test(
2316       __global int *undec_ptr,
2317       __global volatile int *vol_ptr,
2318       __global const int *const_ptr,
2319       __global int *restrict restr_ptr,
2320       __global const int *restrict const_restr_ptr,
2321       __constant int *const_ptr2)
2322    {
2323    })";
2324    Shader shader = compile({ kernel_source });
2325    EXPECT_EQ(shader.metadata->kernels[0].args[0].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2326    EXPECT_EQ(shader.metadata->kernels[0].args[0].type_qualifier, 0);
2327    EXPECT_EQ(shader.metadata->kernels[0].args[1].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2328    EXPECT_EQ(shader.metadata->kernels[0].args[1].type_qualifier, CLC_KERNEL_ARG_TYPE_VOLATILE);
2329    EXPECT_EQ(shader.metadata->kernels[0].args[2].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2330    EXPECT_EQ(shader.metadata->kernels[0].args[2].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2331    EXPECT_EQ(shader.metadata->kernels[0].args[3].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2332    EXPECT_EQ(shader.metadata->kernels[0].args[3].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT);
2333    EXPECT_EQ(shader.metadata->kernels[0].args[4].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
2334    EXPECT_EQ(shader.metadata->kernels[0].args[4].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT | CLC_KERNEL_ARG_TYPE_CONST);
2335    EXPECT_EQ(shader.metadata->kernels[0].args[5].address_qualifier, CLC_KERNEL_ARG_ADDRESS_CONSTANT);
2336    EXPECT_EQ(shader.metadata->kernels[0].args[5].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
2337 }
2338