1 /*
2 * Copyright © 2018 Red Hat
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark (robdclark@gmail.com)
25 */
26
27 #include "math.h"
28 #include "nir/nir_builtin_builder.h"
29
30 #include "util/u_printf.h"
31 #include "vtn_private.h"
32 #include "OpenCL.std.h"
33
34 typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35 uint32_t opcode,
36 unsigned num_srcs, nir_ssa_def **srcs,
37 struct vtn_type **src_types,
38 const struct vtn_type *dest_type);
39
to_llvm_address_space(SpvStorageClass mode)40 static int to_llvm_address_space(SpvStorageClass mode)
41 {
42 switch (mode) {
43 case SpvStorageClassPrivate:
44 case SpvStorageClassFunction: return 0;
45 case SpvStorageClassCrossWorkgroup: return 1;
46 case SpvStorageClassUniform:
47 case SpvStorageClassUniformConstant: return 2;
48 case SpvStorageClassWorkgroup: return 3;
49 case SpvStorageClassGeneric: return 4;
50 default: return -1;
51 }
52 }
53
54
55 static void
vtn_opencl_mangle(const char * in_name,uint32_t const_mask,int ntypes,struct vtn_type ** src_types,char ** outstring)56 vtn_opencl_mangle(const char *in_name,
57 uint32_t const_mask,
58 int ntypes, struct vtn_type **src_types,
59 char **outstring)
60 {
61 char local_name[256] = "";
62 char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
63
64 for (unsigned i = 0; i < ntypes; ++i) {
65 const struct glsl_type *type = src_types[i]->type;
66 enum vtn_base_type base_type = src_types[i]->base_type;
67 if (src_types[i]->base_type == vtn_base_type_pointer) {
68 *(args_str++) = 'P';
69 int address_space = to_llvm_address_space(src_types[i]->storage_class);
70 if (address_space > 0)
71 args_str += sprintf(args_str, "U3AS%d", address_space);
72
73 type = src_types[i]->deref->type;
74 base_type = src_types[i]->deref->base_type;
75 }
76
77 if (const_mask & (1 << i))
78 *(args_str++) = 'K';
79
80 unsigned num_elements = glsl_get_components(type);
81 if (num_elements > 1) {
82 /* Vectors are not treated as built-ins for mangling, so check for substitution.
83 * In theory, we'd need to know which substitution value this is. In practice,
84 * the functions we need from libclc only support 1
85 */
86 bool substitution = false;
87 for (unsigned j = 0; j < i; ++j) {
88 const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
89 src_types[j]->deref->type : src_types[j]->type;
90 if (type == other_type) {
91 substitution = true;
92 break;
93 }
94 }
95
96 if (substitution) {
97 args_str += sprintf(args_str, "S_");
98 continue;
99 } else
100 args_str += sprintf(args_str, "Dv%d_", num_elements);
101 }
102
103 const char *suffix = NULL;
104 switch (base_type) {
105 case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
106 case vtn_base_type_event: suffix = "9ocl_event"; break;
107 default: {
108 const char *primitives[] = {
109 [GLSL_TYPE_UINT] = "j",
110 [GLSL_TYPE_INT] = "i",
111 [GLSL_TYPE_FLOAT] = "f",
112 [GLSL_TYPE_FLOAT16] = "Dh",
113 [GLSL_TYPE_DOUBLE] = "d",
114 [GLSL_TYPE_UINT8] = "h",
115 [GLSL_TYPE_INT8] = "c",
116 [GLSL_TYPE_UINT16] = "t",
117 [GLSL_TYPE_INT16] = "s",
118 [GLSL_TYPE_UINT64] = "m",
119 [GLSL_TYPE_INT64] = "l",
120 [GLSL_TYPE_BOOL] = "b",
121 [GLSL_TYPE_ERROR] = NULL,
122 };
123 enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
124 assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
125 suffix = primitives[glsl_base_type];
126 break;
127 }
128 }
129 args_str += sprintf(args_str, "%s", suffix);
130 }
131
132 *outstring = strdup(local_name);
133 }
134
mangle_and_find(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types)135 static nir_function *mangle_and_find(struct vtn_builder *b,
136 const char *name,
137 uint32_t const_mask,
138 uint32_t num_srcs,
139 struct vtn_type **src_types)
140 {
141 char *mname;
142 nir_function *found = NULL;
143
144 vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
145 /* try and find in current shader first. */
146 nir_foreach_function(funcs, b->shader) {
147 if (!strcmp(funcs->name, mname)) {
148 found = funcs;
149 break;
150 }
151 }
152 /* if not found here find in clc shader and create a decl mirroring it */
153 if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
154 nir_foreach_function(funcs, b->options->clc_shader) {
155 if (!strcmp(funcs->name, mname)) {
156 found = funcs;
157 break;
158 }
159 }
160 if (found) {
161 nir_function *decl = nir_function_create(b->shader, mname);
162 decl->num_params = found->num_params;
163 decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
164 for (unsigned i = 0; i < decl->num_params; i++) {
165 decl->params[i] = found->params[i];
166 }
167 found = decl;
168 }
169 }
170 if (!found)
171 vtn_fail("Can't find clc function %s\n", mname);
172 free(mname);
173 return found;
174 }
175
call_mangled_function(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type,nir_ssa_def ** srcs,nir_deref_instr ** ret_deref_ptr)176 static bool call_mangled_function(struct vtn_builder *b,
177 const char *name,
178 uint32_t const_mask,
179 uint32_t num_srcs,
180 struct vtn_type **src_types,
181 const struct vtn_type *dest_type,
182 nir_ssa_def **srcs,
183 nir_deref_instr **ret_deref_ptr)
184 {
185 nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
186 if (!found)
187 return false;
188
189 nir_call_instr *call = nir_call_instr_create(b->shader, found);
190
191 nir_deref_instr *ret_deref = NULL;
192 uint32_t param_idx = 0;
193 if (dest_type) {
194 nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
195 glsl_get_bare_type(dest_type->type),
196 "return_tmp");
197 ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
198 call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
199 }
200
201 for (unsigned i = 0; i < num_srcs; i++)
202 call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
203 nir_builder_instr_insert(&b->nb, &call->instr);
204
205 *ret_deref_ptr = ret_deref;
206 return true;
207 }
208
209 static void
handle_instr(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest,nir_handler handler)210 handle_instr(struct vtn_builder *b, uint32_t opcode,
211 const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
212 {
213 struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
214
215 nir_ssa_def *srcs[5] = { NULL };
216 struct vtn_type *src_types[5] = { NULL };
217 vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
218 for (unsigned i = 0; i < num_srcs; i++) {
219 struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
220 struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
221 srcs[i] = ssa->def;
222 src_types[i] = val->type;
223 }
224
225 nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
226 if (result) {
227 vtn_push_nir_ssa(b, w_dest[1], result);
228 } else {
229 vtn_assert(dest_type == NULL);
230 }
231 }
232
233 static nir_op
nir_alu_op_for_opencl_opcode(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode)234 nir_alu_op_for_opencl_opcode(struct vtn_builder *b,
235 enum OpenCLstd_Entrypoints opcode)
236 {
237 switch (opcode) {
238 case OpenCLstd_Fabs: return nir_op_fabs;
239 case OpenCLstd_SAbs: return nir_op_iabs;
240 case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
241 case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
242 case OpenCLstd_Ceil: return nir_op_fceil;
243 case OpenCLstd_Floor: return nir_op_ffloor;
244 case OpenCLstd_SHadd: return nir_op_ihadd;
245 case OpenCLstd_UHadd: return nir_op_uhadd;
246 case OpenCLstd_Fmax: return nir_op_fmax;
247 case OpenCLstd_SMax: return nir_op_imax;
248 case OpenCLstd_UMax: return nir_op_umax;
249 case OpenCLstd_Fmin: return nir_op_fmin;
250 case OpenCLstd_SMin: return nir_op_imin;
251 case OpenCLstd_UMin: return nir_op_umin;
252 case OpenCLstd_Mix: return nir_op_flrp;
253 case OpenCLstd_Native_cos: return nir_op_fcos;
254 case OpenCLstd_Native_divide: return nir_op_fdiv;
255 case OpenCLstd_Native_exp2: return nir_op_fexp2;
256 case OpenCLstd_Native_log2: return nir_op_flog2;
257 case OpenCLstd_Native_powr: return nir_op_fpow;
258 case OpenCLstd_Native_recip: return nir_op_frcp;
259 case OpenCLstd_Native_rsqrt: return nir_op_frsq;
260 case OpenCLstd_Native_sin: return nir_op_fsin;
261 case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
262 case OpenCLstd_SMul_hi: return nir_op_imul_high;
263 case OpenCLstd_UMul_hi: return nir_op_umul_high;
264 case OpenCLstd_Popcount: return nir_op_bit_count;
265 case OpenCLstd_SRhadd: return nir_op_irhadd;
266 case OpenCLstd_URhadd: return nir_op_urhadd;
267 case OpenCLstd_Rsqrt: return nir_op_frsq;
268 case OpenCLstd_Sign: return nir_op_fsign;
269 case OpenCLstd_Sqrt: return nir_op_fsqrt;
270 case OpenCLstd_SSub_sat: return nir_op_isub_sat;
271 case OpenCLstd_USub_sat: return nir_op_usub_sat;
272 case OpenCLstd_Trunc: return nir_op_ftrunc;
273 case OpenCLstd_Rint: return nir_op_fround_even;
274 case OpenCLstd_Half_divide: return nir_op_fdiv;
275 case OpenCLstd_Half_recip: return nir_op_frcp;
276 /* uhm... */
277 case OpenCLstd_UAbs: return nir_op_mov;
278 default:
279 vtn_fail("No NIR equivalent");
280 }
281 }
282
283 static nir_ssa_def *
handle_alu(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)284 handle_alu(struct vtn_builder *b, uint32_t opcode,
285 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
286 const struct vtn_type *dest_type)
287 {
288 nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
289 srcs[0], srcs[1], srcs[2], NULL);
290 if (opcode == OpenCLstd_Popcount)
291 ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
292 return ret;
293 }
294
295 #define REMAP(op, str) [OpenCLstd_##op] = { str }
296 static const struct {
297 const char *fn;
298 } remap_table[] = {
299 REMAP(Distance, "distance"),
300 REMAP(Fast_distance, "fast_distance"),
301 REMAP(Fast_length, "fast_length"),
302 REMAP(Fast_normalize, "fast_normalize"),
303 REMAP(Half_rsqrt, "half_rsqrt"),
304 REMAP(Half_sqrt, "half_sqrt"),
305 REMAP(Length, "length"),
306 REMAP(Normalize, "normalize"),
307 REMAP(Degrees, "degrees"),
308 REMAP(Radians, "radians"),
309 REMAP(Rotate, "rotate"),
310 REMAP(Smoothstep, "smoothstep"),
311 REMAP(Step, "step"),
312
313 REMAP(Pow, "pow"),
314 REMAP(Pown, "pown"),
315 REMAP(Powr, "powr"),
316 REMAP(Rootn, "rootn"),
317 REMAP(Modf, "modf"),
318
319 REMAP(Acos, "acos"),
320 REMAP(Acosh, "acosh"),
321 REMAP(Acospi, "acospi"),
322 REMAP(Asin, "asin"),
323 REMAP(Asinh, "asinh"),
324 REMAP(Asinpi, "asinpi"),
325 REMAP(Atan, "atan"),
326 REMAP(Atan2, "atan2"),
327 REMAP(Atanh, "atanh"),
328 REMAP(Atanpi, "atanpi"),
329 REMAP(Atan2pi, "atan2pi"),
330 REMAP(Cos, "cos"),
331 REMAP(Cosh, "cosh"),
332 REMAP(Cospi, "cospi"),
333 REMAP(Sin, "sin"),
334 REMAP(Sinh, "sinh"),
335 REMAP(Sinpi, "sinpi"),
336 REMAP(Tan, "tan"),
337 REMAP(Tanh, "tanh"),
338 REMAP(Tanpi, "tanpi"),
339 REMAP(Sincos, "sincos"),
340 REMAP(Fract, "fract"),
341 REMAP(Frexp, "frexp"),
342 REMAP(Fma, "fma"),
343 REMAP(Fmod, "fmod"),
344
345 REMAP(Half_cos, "cos"),
346 REMAP(Half_exp, "exp"),
347 REMAP(Half_exp2, "exp2"),
348 REMAP(Half_exp10, "exp10"),
349 REMAP(Half_log, "log"),
350 REMAP(Half_log2, "log2"),
351 REMAP(Half_log10, "log10"),
352 REMAP(Half_powr, "powr"),
353 REMAP(Half_sin, "sin"),
354 REMAP(Half_tan, "tan"),
355
356 REMAP(Remainder, "remainder"),
357 REMAP(Remquo, "remquo"),
358 REMAP(Hypot, "hypot"),
359 REMAP(Exp, "exp"),
360 REMAP(Exp2, "exp2"),
361 REMAP(Exp10, "exp10"),
362 REMAP(Expm1, "expm1"),
363 REMAP(Ldexp, "ldexp"),
364
365 REMAP(Ilogb, "ilogb"),
366 REMAP(Log, "log"),
367 REMAP(Log2, "log2"),
368 REMAP(Log10, "log10"),
369 REMAP(Log1p, "log1p"),
370 REMAP(Logb, "logb"),
371
372 REMAP(Cbrt, "cbrt"),
373 REMAP(Erfc, "erfc"),
374 REMAP(Erf, "erf"),
375
376 REMAP(Lgamma, "lgamma"),
377 REMAP(Lgamma_r, "lgamma_r"),
378 REMAP(Tgamma, "tgamma"),
379
380 REMAP(UMad_sat, "mad_sat"),
381 REMAP(SMad_sat, "mad_sat"),
382
383 REMAP(Shuffle, "shuffle"),
384 REMAP(Shuffle2, "shuffle2"),
385 };
386 #undef REMAP
387
remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)388 static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
389 {
390 if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
391 return NULL;
392 return remap_table[opcode].fn;
393 }
394
395 static struct vtn_type *
get_vtn_type_for_glsl_type(struct vtn_builder * b,const struct glsl_type * type)396 get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
397 {
398 struct vtn_type *ret = rzalloc(b, struct vtn_type);
399 assert(glsl_type_is_vector_or_scalar(type));
400 ret->type = type;
401 ret->length = glsl_get_vector_elements(type);
402 ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
403 return ret;
404 }
405
406 static struct vtn_type *
get_pointer_type(struct vtn_builder * b,struct vtn_type * t,SpvStorageClass storage_class)407 get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
408 {
409 struct vtn_type *ret = rzalloc(b, struct vtn_type);
410 ret->type = nir_address_format_to_glsl_type(
411 vtn_mode_to_address_format(
412 b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
413 ret->base_type = vtn_base_type_pointer;
414 ret->storage_class = storage_class;
415 ret->deref = t;
416 return ret;
417 }
418
419 static struct vtn_type *
get_signed_type(struct vtn_builder * b,struct vtn_type * t)420 get_signed_type(struct vtn_builder *b, struct vtn_type *t)
421 {
422 if (t->base_type == vtn_base_type_pointer) {
423 return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
424 }
425 return get_vtn_type_for_glsl_type(
426 b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
427 glsl_get_vector_elements(t->type)));
428 }
429
430 static nir_ssa_def *
handle_clc_fn(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,int num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)431 handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
432 int num_srcs,
433 nir_ssa_def **srcs,
434 struct vtn_type **src_types,
435 const struct vtn_type *dest_type)
436 {
437 const char *name = remap_clc_opcode(opcode);
438 if (!name)
439 return NULL;
440
441 /* Some functions which take params end up with uint (or pointer-to-uint) being passed,
442 * which doesn't mangle correctly when the function expects int or pointer-to-int.
443 * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
444 */
445 int signed_param = -1;
446 switch (opcode) {
447 case OpenCLstd_Frexp:
448 case OpenCLstd_Lgamma_r:
449 case OpenCLstd_Pown:
450 case OpenCLstd_Rootn:
451 case OpenCLstd_Ldexp:
452 signed_param = 1;
453 break;
454 case OpenCLstd_Remquo:
455 signed_param = 2;
456 break;
457 case OpenCLstd_SMad_sat: {
458 /* All parameters need to be converted to signed */
459 src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
460 break;
461 }
462 default: break;
463 }
464
465 if (signed_param >= 0) {
466 src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
467 }
468
469 nir_deref_instr *ret_deref = NULL;
470
471 if (!call_mangled_function(b, name, 0, num_srcs, src_types,
472 dest_type, srcs, &ret_deref))
473 return NULL;
474
475 return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
476 }
477
478 static nir_ssa_def *
handle_special(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)479 handle_special(struct vtn_builder *b, uint32_t opcode,
480 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
481 const struct vtn_type *dest_type)
482 {
483 nir_builder *nb = &b->nb;
484 enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
485
486 switch (cl_opcode) {
487 case OpenCLstd_SAbs_diff:
488 /* these works easier in direct NIR */
489 return nir_iabs_diff(nb, srcs[0], srcs[1]);
490 case OpenCLstd_UAbs_diff:
491 return nir_uabs_diff(nb, srcs[0], srcs[1]);
492 case OpenCLstd_Bitselect:
493 return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
494 case OpenCLstd_SMad_hi:
495 return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
496 case OpenCLstd_UMad_hi:
497 return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
498 case OpenCLstd_SMul24:
499 return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
500 case OpenCLstd_UMul24:
501 return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
502 case OpenCLstd_SMad24:
503 return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
504 case OpenCLstd_UMad24:
505 return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
506 case OpenCLstd_FClamp:
507 return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
508 case OpenCLstd_SClamp:
509 return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
510 case OpenCLstd_UClamp:
511 return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
512 case OpenCLstd_Copysign:
513 return nir_copysign(nb, srcs[0], srcs[1]);
514 case OpenCLstd_Cross:
515 if (dest_type->length == 4)
516 return nir_cross4(nb, srcs[0], srcs[1]);
517 return nir_cross3(nb, srcs[0], srcs[1]);
518 case OpenCLstd_Fdim:
519 return nir_fdim(nb, srcs[0], srcs[1]);
520 case OpenCLstd_Fmod:
521 if (nb->shader->options->lower_fmod)
522 break;
523 return nir_fmod(nb, srcs[0], srcs[1]);
524 case OpenCLstd_Mad:
525 return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
526 case OpenCLstd_Maxmag:
527 return nir_maxmag(nb, srcs[0], srcs[1]);
528 case OpenCLstd_Minmag:
529 return nir_minmag(nb, srcs[0], srcs[1]);
530 case OpenCLstd_Nan:
531 return nir_nan(nb, srcs[0]);
532 case OpenCLstd_Nextafter:
533 return nir_nextafter(nb, srcs[0], srcs[1]);
534 case OpenCLstd_Normalize:
535 return nir_normalize(nb, srcs[0]);
536 case OpenCLstd_Clz:
537 return nir_clz_u(nb, srcs[0]);
538 case OpenCLstd_Ctz:
539 return nir_ctz_u(nb, srcs[0]);
540 case OpenCLstd_Select:
541 return nir_select(nb, srcs[0], srcs[1], srcs[2]);
542 case OpenCLstd_S_Upsample:
543 case OpenCLstd_U_Upsample:
544 /* SPIR-V and CL have different defs for upsample, just implement in nir */
545 return nir_upsample(nb, srcs[0], srcs[1]);
546 case OpenCLstd_Native_exp:
547 return nir_fexp(nb, srcs[0]);
548 case OpenCLstd_Native_exp10:
549 return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
550 case OpenCLstd_Native_log:
551 return nir_flog(nb, srcs[0]);
552 case OpenCLstd_Native_log10:
553 return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
554 case OpenCLstd_Native_tan:
555 return nir_ftan(nb, srcs[0]);
556 case OpenCLstd_Ldexp:
557 if (nb->shader->options->lower_ldexp)
558 break;
559 return nir_ldexp(nb, srcs[0], srcs[1]);
560 case OpenCLstd_Fma:
561 /* FIXME: the software implementation only supports fp32 for now. */
562 if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
563 break;
564 return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
565 default:
566 break;
567 }
568
569 nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
570 if (!ret)
571 vtn_fail("No NIR equivalent");
572
573 return ret;
574 }
575
576 static nir_ssa_def *
handle_core(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)577 handle_core(struct vtn_builder *b, uint32_t opcode,
578 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
579 const struct vtn_type *dest_type)
580 {
581 nir_deref_instr *ret_deref = NULL;
582
583 switch ((SpvOp)opcode) {
584 case SpvOpGroupAsyncCopy: {
585 /* Libclc doesn't include 3-component overloads of the async copy functions.
586 * However, the CLC spec says:
587 * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
588 * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
589 * vector types
590 */
591 for (unsigned i = 0; i < num_srcs; ++i) {
592 if (src_types[i]->base_type == vtn_base_type_pointer &&
593 src_types[i]->deref->base_type == vtn_base_type_vector &&
594 src_types[i]->deref->length == 3) {
595 src_types[i] =
596 get_pointer_type(b,
597 get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
598 src_types[i]->storage_class);
599 }
600 }
601 if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
602 return NULL;
603 break;
604 }
605 case SpvOpGroupWaitEvents: {
606 src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
607 if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
608 return NULL;
609 break;
610 }
611 default:
612 return NULL;
613 }
614
615 return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
616 }
617
618
619 static void
_handle_v_load_store(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count,bool load,bool vec_aligned,nir_rounding_mode rounding)620 _handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
621 const uint32_t *w, unsigned count, bool load,
622 bool vec_aligned, nir_rounding_mode rounding)
623 {
624 struct vtn_type *type;
625 if (load)
626 type = vtn_get_type(b, w[1]);
627 else
628 type = vtn_get_value_type(b, w[5]);
629 unsigned a = load ? 0 : 1;
630
631 enum glsl_base_type base_type = glsl_get_base_type(type->type);
632 unsigned components = glsl_get_vector_elements(type->type);
633
634 nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
635 struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
636
637 struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
638 nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
639
640 nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
641 (vec_aligned && components == 3) ? 4 : components);
642 nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
643
644 unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
645 glsl_get_bit_size(type->type) / 8;
646 enum glsl_base_type ptr_base_type =
647 glsl_get_base_type(p->pointer->type->type);
648 if (base_type != ptr_base_type) {
649 vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
650 (base_type != GLSL_TYPE_FLOAT &&
651 base_type != GLSL_TYPE_DOUBLE),
652 "vload/vstore cannot do type conversion. "
653 "vload/vstore_half can only convert from half to other "
654 "floating-point types.");
655
656 /* Above-computed alignment was for floats/doubles, not halves */
657 alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
658 }
659
660 deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
661
662 for (int i = 0; i < components; i++) {
663 nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
664 nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
665
666 if (load) {
667 comps[i] = vtn_local_load(b, arr_deref, p->type->access);
668 ncomps[i] = comps[i]->def;
669 if (base_type != ptr_base_type) {
670 assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
671 (base_type == GLSL_TYPE_FLOAT ||
672 base_type == GLSL_TYPE_DOUBLE));
673 ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
674 glsl_base_type_get_bit_size(base_type));
675 }
676 } else {
677 struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
678 struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
679 ssa->def = nir_channel(&b->nb, val->def, i);
680 if (base_type != ptr_base_type) {
681 assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
682 (base_type == GLSL_TYPE_FLOAT ||
683 base_type == GLSL_TYPE_DOUBLE));
684 if (rounding == nir_rounding_mode_undef) {
685 ssa->def = nir_f2f16(&b->nb, ssa->def);
686 } else {
687 ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
688 nir_type_float | ssa->def->bit_size,
689 nir_type_float16,
690 rounding, false);
691 }
692 }
693 vtn_local_store(b, ssa, arr_deref, p->type->access);
694 }
695 }
696 if (load) {
697 vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
698 }
699 }
700
701 static void
vtn_handle_opencl_vload(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)702 vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
703 const uint32_t *w, unsigned count)
704 {
705 _handle_v_load_store(b, opcode, w, count, true,
706 opcode == OpenCLstd_Vloada_halfn,
707 nir_rounding_mode_undef);
708 }
709
710 static void
vtn_handle_opencl_vstore(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)711 vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
712 const uint32_t *w, unsigned count)
713 {
714 _handle_v_load_store(b, opcode, w, count, false,
715 opcode == OpenCLstd_Vstorea_halfn,
716 nir_rounding_mode_undef);
717 }
718
719 static void
vtn_handle_opencl_vstore_half_r(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)720 vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
721 const uint32_t *w, unsigned count)
722 {
723 _handle_v_load_store(b, opcode, w, count, false,
724 opcode == OpenCLstd_Vstorea_halfn_r,
725 vtn_rounding_mode_to_nir(b, w[8]));
726 }
727
728 static unsigned
vtn_add_printf_string(struct vtn_builder * b,uint32_t id,nir_printf_info * info)729 vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
730 {
731 nir_deref_instr *deref = vtn_nir_deref(b, id);
732
733 while (deref && deref->deref_type != nir_deref_type_var)
734 deref = nir_deref_instr_parent(deref);
735
736 vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
737 "Printf string argument must be a pointer to a constant variable");
738 vtn_fail_if(deref->var->constant_initializer == NULL,
739 "Printf string argument must have an initializer");
740 vtn_fail_if(!glsl_type_is_array(deref->var->type),
741 "Printf string must be an char array");
742 const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
743 vtn_fail_if(char_type != glsl_uint8_t_type() &&
744 char_type != glsl_int8_t_type(),
745 "Printf string must be an char array");
746
747 nir_constant *c = deref->var->constant_initializer;
748 assert(c->num_elements == glsl_get_length(deref->var->type));
749
750 unsigned idx = info->string_size;
751 info->strings = reralloc_size(b->shader, info->strings,
752 idx + c->num_elements);
753 info->string_size += c->num_elements;
754
755 char *str = &info->strings[idx];
756 bool found_null = false;
757 for (unsigned i = 0; i < c->num_elements; i++) {
758 memcpy((char *)str + i, c->elements[i]->values, 1);
759 if (str[i] == '\0')
760 found_null = true;
761 }
762 vtn_fail_if(!found_null, "Printf string must be null terminated");
763 return idx;
764 }
765
766 /* printf is special because there are no limits on args */
767 static void
handle_printf(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest)768 handle_printf(struct vtn_builder *b, uint32_t opcode,
769 const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
770 {
771 if (!b->options->caps.printf) {
772 vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
773 return;
774 }
775
776 /* Step 1. extract the format string */
777
778 /*
779 * info_idx is 1-based to match clover/llvm
780 * the backend indexes the info table at info_idx - 1.
781 */
782 b->shader->printf_info_count++;
783 unsigned info_idx = b->shader->printf_info_count;
784
785 b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
786 nir_printf_info, info_idx);
787 nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
788
789 info->strings = NULL;
790 info->string_size = 0;
791
792 vtn_add_printf_string(b, w_src[0], info);
793
794 info->num_args = num_srcs - 1;
795 info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
796
797 /* Step 2, build an ad-hoc struct type out of the args */
798 unsigned field_offset = 0;
799 struct glsl_struct_field *fields =
800 rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
801 for (unsigned i = 1; i < num_srcs; ++i) {
802 struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
803 struct vtn_type *src_type = val->type;
804 fields[i - 1].type = src_type->type;
805 fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
806 field_offset = align(field_offset, 4);
807 fields[i - 1].offset = field_offset;
808 info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
809 field_offset += glsl_get_cl_size(src_type->type);
810 }
811 const struct glsl_type *struct_type =
812 glsl_struct_type(fields, num_srcs - 1, "printf", true);
813
814 /* Step 3, create a variable of that type and populate its fields */
815 nir_variable *var = nir_local_variable_create(b->nb.impl, struct_type, NULL);
816 nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817 size_t fmt_pos = 0;
818 for (unsigned i = 1; i < num_srcs; ++i) {
819 nir_deref_instr *field_deref =
820 nir_build_deref_struct(&b->nb, deref_var, i - 1);
821 nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822 /* extract strings */
823 fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824 if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825 unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826 nir_store_deref(&b->nb, field_deref,
827 nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828 ~0 /* write_mask */);
829 } else
830 nir_store_deref(&b->nb, field_deref, field_src, ~0);
831 }
832
833 /* Lastly, the actual intrinsic */
834 nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835 nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836 vtn_push_nir_ssa(b, w_dest[1], ret);
837 }
838
839 static nir_ssa_def *
handle_round(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)840 handle_round(struct vtn_builder *b, uint32_t opcode,
841 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842 const struct vtn_type *dest_type)
843 {
844 nir_ssa_def *src = srcs[0];
845 nir_builder *nb = &b->nb;
846 nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847 nir_ssa_def *truncated = nir_ftrunc(nb, src);
848 nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849
850 return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851 nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852 }
853
854 static nir_ssa_def *
handle_shuffle(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)855 handle_shuffle(struct vtn_builder *b, uint32_t opcode,
856 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857 const struct vtn_type *dest_type)
858 {
859 struct nir_ssa_def *input = srcs[0];
860 struct nir_ssa_def *mask = srcs[1];
861
862 unsigned out_elems = dest_type->length;
863 nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864 unsigned in_elems = input->num_components;
865 if (mask->bit_size != 32)
866 mask = nir_u2u32(&b->nb, mask);
867 mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868 for (unsigned i = 0; i < out_elems; i++)
869 outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870
871 return nir_vec(&b->nb, outres, out_elems);
872 }
873
874 static nir_ssa_def *
handle_shuffle2(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)875 handle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877 const struct vtn_type *dest_type)
878 {
879 struct nir_ssa_def *input0 = srcs[0];
880 struct nir_ssa_def *input1 = srcs[1];
881 struct nir_ssa_def *mask = srcs[2];
882
883 unsigned out_elems = dest_type->length;
884 nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885 unsigned in_elems = input0->num_components;
886 unsigned total_mask = 2 * in_elems - 1;
887 unsigned half_mask = in_elems - 1;
888 if (mask->bit_size != 32)
889 mask = nir_u2u32(&b->nb, mask);
890 mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891 for (unsigned i = 0; i < out_elems; i++) {
892 nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893 nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894 nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895 nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896 nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897 outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898 }
899 return nir_vec(&b->nb, outres, out_elems);
900 }
901
902 bool
vtn_handle_opencl_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)903 vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904 const uint32_t *w, unsigned count)
905 {
906 enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907
908 switch (cl_opcode) {
909 case OpenCLstd_Fabs:
910 case OpenCLstd_SAbs:
911 case OpenCLstd_UAbs:
912 case OpenCLstd_SAdd_sat:
913 case OpenCLstd_UAdd_sat:
914 case OpenCLstd_Ceil:
915 case OpenCLstd_Floor:
916 case OpenCLstd_Fmax:
917 case OpenCLstd_SHadd:
918 case OpenCLstd_UHadd:
919 case OpenCLstd_SMax:
920 case OpenCLstd_UMax:
921 case OpenCLstd_Fmin:
922 case OpenCLstd_SMin:
923 case OpenCLstd_UMin:
924 case OpenCLstd_Mix:
925 case OpenCLstd_Native_cos:
926 case OpenCLstd_Native_divide:
927 case OpenCLstd_Native_exp2:
928 case OpenCLstd_Native_log2:
929 case OpenCLstd_Native_powr:
930 case OpenCLstd_Native_recip:
931 case OpenCLstd_Native_rsqrt:
932 case OpenCLstd_Native_sin:
933 case OpenCLstd_Native_sqrt:
934 case OpenCLstd_SMul_hi:
935 case OpenCLstd_UMul_hi:
936 case OpenCLstd_Popcount:
937 case OpenCLstd_SRhadd:
938 case OpenCLstd_URhadd:
939 case OpenCLstd_Rsqrt:
940 case OpenCLstd_Sign:
941 case OpenCLstd_Sqrt:
942 case OpenCLstd_SSub_sat:
943 case OpenCLstd_USub_sat:
944 case OpenCLstd_Trunc:
945 case OpenCLstd_Rint:
946 case OpenCLstd_Half_divide:
947 case OpenCLstd_Half_recip:
948 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949 return true;
950 case OpenCLstd_SAbs_diff:
951 case OpenCLstd_UAbs_diff:
952 case OpenCLstd_SMad_hi:
953 case OpenCLstd_UMad_hi:
954 case OpenCLstd_SMad24:
955 case OpenCLstd_UMad24:
956 case OpenCLstd_SMul24:
957 case OpenCLstd_UMul24:
958 case OpenCLstd_Bitselect:
959 case OpenCLstd_FClamp:
960 case OpenCLstd_SClamp:
961 case OpenCLstd_UClamp:
962 case OpenCLstd_Copysign:
963 case OpenCLstd_Cross:
964 case OpenCLstd_Degrees:
965 case OpenCLstd_Fdim:
966 case OpenCLstd_Fma:
967 case OpenCLstd_Distance:
968 case OpenCLstd_Fast_distance:
969 case OpenCLstd_Fast_length:
970 case OpenCLstd_Fast_normalize:
971 case OpenCLstd_Half_rsqrt:
972 case OpenCLstd_Half_sqrt:
973 case OpenCLstd_Length:
974 case OpenCLstd_Mad:
975 case OpenCLstd_Maxmag:
976 case OpenCLstd_Minmag:
977 case OpenCLstd_Nan:
978 case OpenCLstd_Nextafter:
979 case OpenCLstd_Normalize:
980 case OpenCLstd_Radians:
981 case OpenCLstd_Rotate:
982 case OpenCLstd_Select:
983 case OpenCLstd_Step:
984 case OpenCLstd_Smoothstep:
985 case OpenCLstd_S_Upsample:
986 case OpenCLstd_U_Upsample:
987 case OpenCLstd_Clz:
988 case OpenCLstd_Ctz:
989 case OpenCLstd_Native_exp:
990 case OpenCLstd_Native_exp10:
991 case OpenCLstd_Native_log:
992 case OpenCLstd_Native_log10:
993 case OpenCLstd_Acos:
994 case OpenCLstd_Acosh:
995 case OpenCLstd_Acospi:
996 case OpenCLstd_Asin:
997 case OpenCLstd_Asinh:
998 case OpenCLstd_Asinpi:
999 case OpenCLstd_Atan:
1000 case OpenCLstd_Atan2:
1001 case OpenCLstd_Atanh:
1002 case OpenCLstd_Atanpi:
1003 case OpenCLstd_Atan2pi:
1004 case OpenCLstd_Fract:
1005 case OpenCLstd_Frexp:
1006 case OpenCLstd_Exp:
1007 case OpenCLstd_Exp2:
1008 case OpenCLstd_Expm1:
1009 case OpenCLstd_Exp10:
1010 case OpenCLstd_Fmod:
1011 case OpenCLstd_Ilogb:
1012 case OpenCLstd_Log:
1013 case OpenCLstd_Log2:
1014 case OpenCLstd_Log10:
1015 case OpenCLstd_Log1p:
1016 case OpenCLstd_Logb:
1017 case OpenCLstd_Ldexp:
1018 case OpenCLstd_Cos:
1019 case OpenCLstd_Cosh:
1020 case OpenCLstd_Cospi:
1021 case OpenCLstd_Sin:
1022 case OpenCLstd_Sinh:
1023 case OpenCLstd_Sinpi:
1024 case OpenCLstd_Tan:
1025 case OpenCLstd_Tanh:
1026 case OpenCLstd_Tanpi:
1027 case OpenCLstd_Cbrt:
1028 case OpenCLstd_Erfc:
1029 case OpenCLstd_Erf:
1030 case OpenCLstd_Lgamma:
1031 case OpenCLstd_Lgamma_r:
1032 case OpenCLstd_Tgamma:
1033 case OpenCLstd_Pow:
1034 case OpenCLstd_Powr:
1035 case OpenCLstd_Pown:
1036 case OpenCLstd_Rootn:
1037 case OpenCLstd_Remainder:
1038 case OpenCLstd_Remquo:
1039 case OpenCLstd_Hypot:
1040 case OpenCLstd_Sincos:
1041 case OpenCLstd_Modf:
1042 case OpenCLstd_UMad_sat:
1043 case OpenCLstd_SMad_sat:
1044 case OpenCLstd_Native_tan:
1045 case OpenCLstd_Half_cos:
1046 case OpenCLstd_Half_exp:
1047 case OpenCLstd_Half_exp2:
1048 case OpenCLstd_Half_exp10:
1049 case OpenCLstd_Half_log:
1050 case OpenCLstd_Half_log2:
1051 case OpenCLstd_Half_log10:
1052 case OpenCLstd_Half_powr:
1053 case OpenCLstd_Half_sin:
1054 case OpenCLstd_Half_tan:
1055 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056 return true;
1057 case OpenCLstd_Vloadn:
1058 case OpenCLstd_Vload_half:
1059 case OpenCLstd_Vload_halfn:
1060 case OpenCLstd_Vloada_halfn:
1061 vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062 return true;
1063 case OpenCLstd_Vstoren:
1064 case OpenCLstd_Vstore_half:
1065 case OpenCLstd_Vstore_halfn:
1066 case OpenCLstd_Vstorea_halfn:
1067 vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068 return true;
1069 case OpenCLstd_Vstore_half_r:
1070 case OpenCLstd_Vstore_halfn_r:
1071 case OpenCLstd_Vstorea_halfn_r:
1072 vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073 return true;
1074 case OpenCLstd_Shuffle:
1075 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076 return true;
1077 case OpenCLstd_Shuffle2:
1078 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079 return true;
1080 case OpenCLstd_Round:
1081 handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082 return true;
1083 case OpenCLstd_Printf:
1084 handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085 return true;
1086 case OpenCLstd_Prefetch:
1087 /* TODO maybe add a nir instruction for this? */
1088 return true;
1089 default:
1090 vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091 return false;
1092 }
1093 }
1094
1095 bool
vtn_handle_opencl_core_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1096 vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097 const uint32_t *w, unsigned count)
1098 {
1099 switch (opcode) {
1100 case SpvOpGroupAsyncCopy:
1101 handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102 return true;
1103 case SpvOpGroupWaitEvents:
1104 handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105 return true;
1106 default:
1107 return false;
1108 }
1109 return true;
1110 }
1111