• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Red Hat
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark (robdclark@gmail.com)
25  */
26 
27 #include "math.h"
28 #include "nir/nir_builtin_builder.h"
29 
30 #include "util/u_printf.h"
31 #include "vtn_private.h"
32 #include "OpenCL.std.h"
33 
34 typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35                                     uint32_t opcode,
36                                     unsigned num_srcs, nir_ssa_def **srcs,
37                                     struct vtn_type **src_types,
38                                     const struct vtn_type *dest_type);
39 
to_llvm_address_space(SpvStorageClass mode)40 static int to_llvm_address_space(SpvStorageClass mode)
41 {
42    switch (mode) {
43    case SpvStorageClassPrivate:
44    case SpvStorageClassFunction: return 0;
45    case SpvStorageClassCrossWorkgroup: return 1;
46    case SpvStorageClassUniform:
47    case SpvStorageClassUniformConstant: return 2;
48    case SpvStorageClassWorkgroup: return 3;
49    case SpvStorageClassGeneric: return 4;
50    default: return -1;
51    }
52 }
53 
54 
55 static void
vtn_opencl_mangle(const char * in_name,uint32_t const_mask,int ntypes,struct vtn_type ** src_types,char ** outstring)56 vtn_opencl_mangle(const char *in_name,
57                   uint32_t const_mask,
58                   int ntypes, struct vtn_type **src_types,
59                   char **outstring)
60 {
61    char local_name[256] = "";
62    char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
63 
64    for (unsigned i = 0; i < ntypes; ++i) {
65       const struct glsl_type *type = src_types[i]->type;
66       enum vtn_base_type base_type = src_types[i]->base_type;
67       if (src_types[i]->base_type == vtn_base_type_pointer) {
68          *(args_str++) = 'P';
69          int address_space = to_llvm_address_space(src_types[i]->storage_class);
70          if (address_space > 0)
71             args_str += sprintf(args_str, "U3AS%d", address_space);
72 
73          type = src_types[i]->deref->type;
74          base_type = src_types[i]->deref->base_type;
75       }
76 
77       if (const_mask & (1 << i))
78          *(args_str++) = 'K';
79 
80       unsigned num_elements = glsl_get_components(type);
81       if (num_elements > 1) {
82          /* Vectors are not treated as built-ins for mangling, so check for substitution.
83           * In theory, we'd need to know which substitution value this is. In practice,
84           * the functions we need from libclc only support 1
85           */
86          bool substitution = false;
87          for (unsigned j = 0; j < i; ++j) {
88             const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
89                src_types[j]->deref->type : src_types[j]->type;
90             if (type == other_type) {
91                substitution = true;
92                break;
93             }
94          }
95 
96          if (substitution) {
97             args_str += sprintf(args_str, "S_");
98             continue;
99          } else
100             args_str += sprintf(args_str, "Dv%d_", num_elements);
101       }
102 
103       const char *suffix = NULL;
104       switch (base_type) {
105       case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
106       case vtn_base_type_event: suffix = "9ocl_event"; break;
107       default: {
108          const char *primitives[] = {
109             [GLSL_TYPE_UINT] = "j",
110             [GLSL_TYPE_INT] = "i",
111             [GLSL_TYPE_FLOAT] = "f",
112             [GLSL_TYPE_FLOAT16] = "Dh",
113             [GLSL_TYPE_DOUBLE] = "d",
114             [GLSL_TYPE_UINT8] = "h",
115             [GLSL_TYPE_INT8] = "c",
116             [GLSL_TYPE_UINT16] = "t",
117             [GLSL_TYPE_INT16] = "s",
118             [GLSL_TYPE_UINT64] = "m",
119             [GLSL_TYPE_INT64] = "l",
120             [GLSL_TYPE_BOOL] = "b",
121             [GLSL_TYPE_ERROR] = NULL,
122          };
123          enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
124          assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
125          suffix = primitives[glsl_base_type];
126          break;
127       }
128       }
129       args_str += sprintf(args_str, "%s", suffix);
130    }
131 
132    *outstring = strdup(local_name);
133 }
134 
mangle_and_find(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types)135 static nir_function *mangle_and_find(struct vtn_builder *b,
136                                      const char *name,
137                                      uint32_t const_mask,
138                                      uint32_t num_srcs,
139                                      struct vtn_type **src_types)
140 {
141    char *mname;
142    nir_function *found = NULL;
143 
144    vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
145    /* try and find in current shader first. */
146    nir_foreach_function(funcs, b->shader) {
147       if (!strcmp(funcs->name, mname)) {
148          found = funcs;
149          break;
150       }
151    }
152    /* if not found here find in clc shader and create a decl mirroring it */
153    if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
154       nir_foreach_function(funcs, b->options->clc_shader) {
155          if (!strcmp(funcs->name, mname)) {
156             found = funcs;
157             break;
158          }
159       }
160       if (found) {
161          nir_function *decl = nir_function_create(b->shader, mname);
162          decl->num_params = found->num_params;
163          decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
164          for (unsigned i = 0; i < decl->num_params; i++) {
165             decl->params[i] = found->params[i];
166          }
167          found = decl;
168       }
169    }
170    if (!found)
171       vtn_fail("Can't find clc function %s\n", mname);
172    free(mname);
173    return found;
174 }
175 
call_mangled_function(struct vtn_builder * b,const char * name,uint32_t const_mask,uint32_t num_srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type,nir_ssa_def ** srcs,nir_deref_instr ** ret_deref_ptr)176 static bool call_mangled_function(struct vtn_builder *b,
177                                   const char *name,
178                                   uint32_t const_mask,
179                                   uint32_t num_srcs,
180                                   struct vtn_type **src_types,
181                                   const struct vtn_type *dest_type,
182                                   nir_ssa_def **srcs,
183                                   nir_deref_instr **ret_deref_ptr)
184 {
185    nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
186    if (!found)
187       return false;
188 
189    nir_call_instr *call = nir_call_instr_create(b->shader, found);
190 
191    nir_deref_instr *ret_deref = NULL;
192    uint32_t param_idx = 0;
193    if (dest_type) {
194       nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
195                                                         glsl_get_bare_type(dest_type->type),
196                                                         "return_tmp");
197       ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
198       call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
199    }
200 
201    for (unsigned i = 0; i < num_srcs; i++)
202       call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
203    nir_builder_instr_insert(&b->nb, &call->instr);
204 
205    *ret_deref_ptr = ret_deref;
206    return true;
207 }
208 
209 static void
handle_instr(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest,nir_handler handler)210 handle_instr(struct vtn_builder *b, uint32_t opcode,
211              const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
212 {
213    struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
214 
215    nir_ssa_def *srcs[5] = { NULL };
216    struct vtn_type *src_types[5] = { NULL };
217    vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
218    for (unsigned i = 0; i < num_srcs; i++) {
219       struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
220       struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
221       srcs[i] = ssa->def;
222       src_types[i] = val->type;
223    }
224 
225    nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
226    if (result) {
227       vtn_push_nir_ssa(b, w_dest[1], result);
228    } else {
229       vtn_assert(dest_type == NULL);
230    }
231 }
232 
233 static nir_op
nir_alu_op_for_opencl_opcode(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode)234 nir_alu_op_for_opencl_opcode(struct vtn_builder *b,
235                              enum OpenCLstd_Entrypoints opcode)
236 {
237    switch (opcode) {
238    case OpenCLstd_Fabs: return nir_op_fabs;
239    case OpenCLstd_SAbs: return nir_op_iabs;
240    case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
241    case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
242    case OpenCLstd_Ceil: return nir_op_fceil;
243    case OpenCLstd_Floor: return nir_op_ffloor;
244    case OpenCLstd_SHadd: return nir_op_ihadd;
245    case OpenCLstd_UHadd: return nir_op_uhadd;
246    case OpenCLstd_Fmax: return nir_op_fmax;
247    case OpenCLstd_SMax: return nir_op_imax;
248    case OpenCLstd_UMax: return nir_op_umax;
249    case OpenCLstd_Fmin: return nir_op_fmin;
250    case OpenCLstd_SMin: return nir_op_imin;
251    case OpenCLstd_UMin: return nir_op_umin;
252    case OpenCLstd_Mix: return nir_op_flrp;
253    case OpenCLstd_Native_cos: return nir_op_fcos;
254    case OpenCLstd_Native_divide: return nir_op_fdiv;
255    case OpenCLstd_Native_exp2: return nir_op_fexp2;
256    case OpenCLstd_Native_log2: return nir_op_flog2;
257    case OpenCLstd_Native_powr: return nir_op_fpow;
258    case OpenCLstd_Native_recip: return nir_op_frcp;
259    case OpenCLstd_Native_rsqrt: return nir_op_frsq;
260    case OpenCLstd_Native_sin: return nir_op_fsin;
261    case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
262    case OpenCLstd_SMul_hi: return nir_op_imul_high;
263    case OpenCLstd_UMul_hi: return nir_op_umul_high;
264    case OpenCLstd_Popcount: return nir_op_bit_count;
265    case OpenCLstd_SRhadd: return nir_op_irhadd;
266    case OpenCLstd_URhadd: return nir_op_urhadd;
267    case OpenCLstd_Rsqrt: return nir_op_frsq;
268    case OpenCLstd_Sign: return nir_op_fsign;
269    case OpenCLstd_Sqrt: return nir_op_fsqrt;
270    case OpenCLstd_SSub_sat: return nir_op_isub_sat;
271    case OpenCLstd_USub_sat: return nir_op_usub_sat;
272    case OpenCLstd_Trunc: return nir_op_ftrunc;
273    case OpenCLstd_Rint: return nir_op_fround_even;
274    case OpenCLstd_Half_divide: return nir_op_fdiv;
275    case OpenCLstd_Half_recip: return nir_op_frcp;
276    /* uhm... */
277    case OpenCLstd_UAbs: return nir_op_mov;
278    default:
279       vtn_fail("No NIR equivalent");
280    }
281 }
282 
283 static nir_ssa_def *
handle_alu(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)284 handle_alu(struct vtn_builder *b, uint32_t opcode,
285            unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
286            const struct vtn_type *dest_type)
287 {
288    nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
289                                     srcs[0], srcs[1], srcs[2], NULL);
290    if (opcode == OpenCLstd_Popcount)
291       ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
292    return ret;
293 }
294 
295 #define REMAP(op, str) [OpenCLstd_##op] = { str }
296 static const struct {
297    const char *fn;
298 } remap_table[] = {
299    REMAP(Distance, "distance"),
300    REMAP(Fast_distance, "fast_distance"),
301    REMAP(Fast_length, "fast_length"),
302    REMAP(Fast_normalize, "fast_normalize"),
303    REMAP(Half_rsqrt, "half_rsqrt"),
304    REMAP(Half_sqrt, "half_sqrt"),
305    REMAP(Length, "length"),
306    REMAP(Normalize, "normalize"),
307    REMAP(Degrees, "degrees"),
308    REMAP(Radians, "radians"),
309    REMAP(Rotate, "rotate"),
310    REMAP(Smoothstep, "smoothstep"),
311    REMAP(Step, "step"),
312 
313    REMAP(Pow, "pow"),
314    REMAP(Pown, "pown"),
315    REMAP(Powr, "powr"),
316    REMAP(Rootn, "rootn"),
317    REMAP(Modf, "modf"),
318 
319    REMAP(Acos, "acos"),
320    REMAP(Acosh, "acosh"),
321    REMAP(Acospi, "acospi"),
322    REMAP(Asin, "asin"),
323    REMAP(Asinh, "asinh"),
324    REMAP(Asinpi, "asinpi"),
325    REMAP(Atan, "atan"),
326    REMAP(Atan2, "atan2"),
327    REMAP(Atanh, "atanh"),
328    REMAP(Atanpi, "atanpi"),
329    REMAP(Atan2pi, "atan2pi"),
330    REMAP(Cos, "cos"),
331    REMAP(Cosh, "cosh"),
332    REMAP(Cospi, "cospi"),
333    REMAP(Sin, "sin"),
334    REMAP(Sinh, "sinh"),
335    REMAP(Sinpi, "sinpi"),
336    REMAP(Tan, "tan"),
337    REMAP(Tanh, "tanh"),
338    REMAP(Tanpi, "tanpi"),
339    REMAP(Sincos, "sincos"),
340    REMAP(Fract, "fract"),
341    REMAP(Frexp, "frexp"),
342    REMAP(Fma, "fma"),
343    REMAP(Fmod, "fmod"),
344 
345    REMAP(Half_cos, "cos"),
346    REMAP(Half_exp, "exp"),
347    REMAP(Half_exp2, "exp2"),
348    REMAP(Half_exp10, "exp10"),
349    REMAP(Half_log, "log"),
350    REMAP(Half_log2, "log2"),
351    REMAP(Half_log10, "log10"),
352    REMAP(Half_powr, "powr"),
353    REMAP(Half_sin, "sin"),
354    REMAP(Half_tan, "tan"),
355 
356    REMAP(Remainder, "remainder"),
357    REMAP(Remquo, "remquo"),
358    REMAP(Hypot, "hypot"),
359    REMAP(Exp, "exp"),
360    REMAP(Exp2, "exp2"),
361    REMAP(Exp10, "exp10"),
362    REMAP(Expm1, "expm1"),
363    REMAP(Ldexp, "ldexp"),
364 
365    REMAP(Ilogb, "ilogb"),
366    REMAP(Log, "log"),
367    REMAP(Log2, "log2"),
368    REMAP(Log10, "log10"),
369    REMAP(Log1p, "log1p"),
370    REMAP(Logb, "logb"),
371 
372    REMAP(Cbrt, "cbrt"),
373    REMAP(Erfc, "erfc"),
374    REMAP(Erf, "erf"),
375 
376    REMAP(Lgamma, "lgamma"),
377    REMAP(Lgamma_r, "lgamma_r"),
378    REMAP(Tgamma, "tgamma"),
379 
380    REMAP(UMad_sat, "mad_sat"),
381    REMAP(SMad_sat, "mad_sat"),
382 
383    REMAP(Shuffle, "shuffle"),
384    REMAP(Shuffle2, "shuffle2"),
385 };
386 #undef REMAP
387 
remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)388 static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
389 {
390    if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
391       return NULL;
392    return remap_table[opcode].fn;
393 }
394 
395 static struct vtn_type *
get_vtn_type_for_glsl_type(struct vtn_builder * b,const struct glsl_type * type)396 get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
397 {
398    struct vtn_type *ret = rzalloc(b, struct vtn_type);
399    assert(glsl_type_is_vector_or_scalar(type));
400    ret->type = type;
401    ret->length = glsl_get_vector_elements(type);
402    ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
403    return ret;
404 }
405 
406 static struct vtn_type *
get_pointer_type(struct vtn_builder * b,struct vtn_type * t,SpvStorageClass storage_class)407 get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
408 {
409    struct vtn_type *ret = rzalloc(b, struct vtn_type);
410    ret->type = nir_address_format_to_glsl_type(
411             vtn_mode_to_address_format(
412                b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
413    ret->base_type = vtn_base_type_pointer;
414    ret->storage_class = storage_class;
415    ret->deref = t;
416    return ret;
417 }
418 
419 static struct vtn_type *
get_signed_type(struct vtn_builder * b,struct vtn_type * t)420 get_signed_type(struct vtn_builder *b, struct vtn_type *t)
421 {
422    if (t->base_type == vtn_base_type_pointer) {
423       return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
424    }
425    return get_vtn_type_for_glsl_type(
426       b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
427                           glsl_get_vector_elements(t->type)));
428 }
429 
430 static nir_ssa_def *
handle_clc_fn(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,int num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)431 handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
432               int num_srcs,
433               nir_ssa_def **srcs,
434               struct vtn_type **src_types,
435               const struct vtn_type *dest_type)
436 {
437    const char *name = remap_clc_opcode(opcode);
438    if (!name)
439        return NULL;
440 
441    /* Some functions which take params end up with uint (or pointer-to-uint) being passed,
442     * which doesn't mangle correctly when the function expects int or pointer-to-int.
443     * See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
444     */
445    int signed_param = -1;
446    switch (opcode) {
447    case OpenCLstd_Frexp:
448    case OpenCLstd_Lgamma_r:
449    case OpenCLstd_Pown:
450    case OpenCLstd_Rootn:
451    case OpenCLstd_Ldexp:
452       signed_param = 1;
453       break;
454    case OpenCLstd_Remquo:
455       signed_param = 2;
456       break;
457    case OpenCLstd_SMad_sat: {
458       /* All parameters need to be converted to signed */
459       src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
460       break;
461    }
462    default: break;
463    }
464 
465    if (signed_param >= 0) {
466       src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
467    }
468 
469    nir_deref_instr *ret_deref = NULL;
470 
471    if (!call_mangled_function(b, name, 0, num_srcs, src_types,
472                               dest_type, srcs, &ret_deref))
473       return NULL;
474 
475    return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
476 }
477 
478 static nir_ssa_def *
handle_special(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)479 handle_special(struct vtn_builder *b, uint32_t opcode,
480                unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
481                const struct vtn_type *dest_type)
482 {
483    nir_builder *nb = &b->nb;
484    enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
485 
486    switch (cl_opcode) {
487    case OpenCLstd_SAbs_diff:
488      /* these works easier in direct NIR */
489       return nir_iabs_diff(nb, srcs[0], srcs[1]);
490    case OpenCLstd_UAbs_diff:
491       return nir_uabs_diff(nb, srcs[0], srcs[1]);
492    case OpenCLstd_Bitselect:
493       return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
494    case OpenCLstd_SMad_hi:
495       return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
496    case OpenCLstd_UMad_hi:
497       return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
498    case OpenCLstd_SMul24:
499       return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
500    case OpenCLstd_UMul24:
501       return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
502    case OpenCLstd_SMad24:
503       return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
504    case OpenCLstd_UMad24:
505       return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
506    case OpenCLstd_FClamp:
507       return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
508    case OpenCLstd_SClamp:
509       return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
510    case OpenCLstd_UClamp:
511       return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
512    case OpenCLstd_Copysign:
513       return nir_copysign(nb, srcs[0], srcs[1]);
514    case OpenCLstd_Cross:
515       if (dest_type->length == 4)
516          return nir_cross4(nb, srcs[0], srcs[1]);
517       return nir_cross3(nb, srcs[0], srcs[1]);
518    case OpenCLstd_Fdim:
519       return nir_fdim(nb, srcs[0], srcs[1]);
520    case OpenCLstd_Fmod:
521       if (nb->shader->options->lower_fmod)
522          break;
523       return nir_fmod(nb, srcs[0], srcs[1]);
524    case OpenCLstd_Mad:
525       return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
526    case OpenCLstd_Maxmag:
527       return nir_maxmag(nb, srcs[0], srcs[1]);
528    case OpenCLstd_Minmag:
529       return nir_minmag(nb, srcs[0], srcs[1]);
530    case OpenCLstd_Nan:
531       return nir_nan(nb, srcs[0]);
532    case OpenCLstd_Nextafter:
533       return nir_nextafter(nb, srcs[0], srcs[1]);
534    case OpenCLstd_Normalize:
535       return nir_normalize(nb, srcs[0]);
536    case OpenCLstd_Clz:
537       return nir_clz_u(nb, srcs[0]);
538    case OpenCLstd_Ctz:
539       return nir_ctz_u(nb, srcs[0]);
540    case OpenCLstd_Select:
541       return nir_select(nb, srcs[0], srcs[1], srcs[2]);
542    case OpenCLstd_S_Upsample:
543    case OpenCLstd_U_Upsample:
544       /* SPIR-V and CL have different defs for upsample, just implement in nir */
545       return nir_upsample(nb, srcs[0], srcs[1]);
546    case OpenCLstd_Native_exp:
547       return nir_fexp(nb, srcs[0]);
548    case OpenCLstd_Native_exp10:
549       return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
550    case OpenCLstd_Native_log:
551       return nir_flog(nb, srcs[0]);
552    case OpenCLstd_Native_log10:
553       return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
554    case OpenCLstd_Native_tan:
555       return nir_ftan(nb, srcs[0]);
556    case OpenCLstd_Ldexp:
557       if (nb->shader->options->lower_ldexp)
558          break;
559       return nir_ldexp(nb, srcs[0], srcs[1]);
560    case OpenCLstd_Fma:
561       /* FIXME: the software implementation only supports fp32 for now. */
562       if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
563          break;
564       return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
565    default:
566       break;
567    }
568 
569    nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
570    if (!ret)
571       vtn_fail("No NIR equivalent");
572 
573    return ret;
574 }
575 
576 static nir_ssa_def *
handle_core(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)577 handle_core(struct vtn_builder *b, uint32_t opcode,
578             unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
579             const struct vtn_type *dest_type)
580 {
581    nir_deref_instr *ret_deref = NULL;
582 
583    switch ((SpvOp)opcode) {
584    case SpvOpGroupAsyncCopy: {
585       /* Libclc doesn't include 3-component overloads of the async copy functions.
586        * However, the CLC spec says:
587        * async_work_group_copy and async_work_group_strided_copy for 3-component vector types
588        * behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
589        * vector types
590        */
591       for (unsigned i = 0; i < num_srcs; ++i) {
592          if (src_types[i]->base_type == vtn_base_type_pointer &&
593              src_types[i]->deref->base_type == vtn_base_type_vector &&
594              src_types[i]->deref->length == 3) {
595             src_types[i] =
596                get_pointer_type(b,
597                                 get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
598                                 src_types[i]->storage_class);
599          }
600       }
601       if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
602          return NULL;
603       break;
604    }
605    case SpvOpGroupWaitEvents: {
606       src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
607       if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
608          return NULL;
609       break;
610    }
611    default:
612       return NULL;
613    }
614 
615    return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
616 }
617 
618 
619 static void
_handle_v_load_store(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count,bool load,bool vec_aligned,nir_rounding_mode rounding)620 _handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
621                      const uint32_t *w, unsigned count, bool load,
622                      bool vec_aligned, nir_rounding_mode rounding)
623 {
624    struct vtn_type *type;
625    if (load)
626       type = vtn_get_type(b, w[1]);
627    else
628       type = vtn_get_value_type(b, w[5]);
629    unsigned a = load ? 0 : 1;
630 
631    enum glsl_base_type base_type = glsl_get_base_type(type->type);
632    unsigned components = glsl_get_vector_elements(type->type);
633 
634    nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
635    struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
636 
637    struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
638    nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
639 
640    nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
641       (vec_aligned && components == 3) ? 4 : components);
642    nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
643 
644    unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
645                                       glsl_get_bit_size(type->type) / 8;
646    enum glsl_base_type ptr_base_type =
647       glsl_get_base_type(p->pointer->type->type);
648    if (base_type != ptr_base_type) {
649       vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
650                   (base_type != GLSL_TYPE_FLOAT &&
651                    base_type != GLSL_TYPE_DOUBLE),
652                   "vload/vstore cannot do type conversion. "
653                   "vload/vstore_half can only convert from half to other "
654                   "floating-point types.");
655 
656       /* Above-computed alignment was for floats/doubles, not halves */
657       alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
658    }
659 
660    deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
661 
662    for (int i = 0; i < components; i++) {
663       nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
664       nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
665 
666       if (load) {
667          comps[i] = vtn_local_load(b, arr_deref, p->type->access);
668          ncomps[i] = comps[i]->def;
669          if (base_type != ptr_base_type) {
670             assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
671                    (base_type == GLSL_TYPE_FLOAT ||
672                     base_type == GLSL_TYPE_DOUBLE));
673             ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
674                                  glsl_base_type_get_bit_size(base_type));
675          }
676       } else {
677          struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
678          struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
679          ssa->def = nir_channel(&b->nb, val->def, i);
680          if (base_type != ptr_base_type) {
681             assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
682                    (base_type == GLSL_TYPE_FLOAT ||
683                     base_type == GLSL_TYPE_DOUBLE));
684             if (rounding == nir_rounding_mode_undef) {
685                ssa->def = nir_f2f16(&b->nb, ssa->def);
686             } else {
687                ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
688                                                 nir_type_float | ssa->def->bit_size,
689                                                 nir_type_float16,
690                                                 rounding, false);
691             }
692          }
693          vtn_local_store(b, ssa, arr_deref, p->type->access);
694       }
695    }
696    if (load) {
697       vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
698    }
699 }
700 
701 static void
vtn_handle_opencl_vload(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)702 vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
703                         const uint32_t *w, unsigned count)
704 {
705    _handle_v_load_store(b, opcode, w, count, true,
706                         opcode == OpenCLstd_Vloada_halfn,
707                         nir_rounding_mode_undef);
708 }
709 
710 static void
vtn_handle_opencl_vstore(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)711 vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
712                          const uint32_t *w, unsigned count)
713 {
714    _handle_v_load_store(b, opcode, w, count, false,
715                         opcode == OpenCLstd_Vstorea_halfn,
716                         nir_rounding_mode_undef);
717 }
718 
719 static void
vtn_handle_opencl_vstore_half_r(struct vtn_builder * b,enum OpenCLstd_Entrypoints opcode,const uint32_t * w,unsigned count)720 vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
721                                 const uint32_t *w, unsigned count)
722 {
723    _handle_v_load_store(b, opcode, w, count, false,
724                         opcode == OpenCLstd_Vstorea_halfn_r,
725                         vtn_rounding_mode_to_nir(b, w[8]));
726 }
727 
728 static unsigned
vtn_add_printf_string(struct vtn_builder * b,uint32_t id,nir_printf_info * info)729 vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
730 {
731    nir_deref_instr *deref = vtn_nir_deref(b, id);
732 
733    while (deref && deref->deref_type != nir_deref_type_var)
734       deref = nir_deref_instr_parent(deref);
735 
736    vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
737                "Printf string argument must be a pointer to a constant variable");
738    vtn_fail_if(deref->var->constant_initializer == NULL,
739                "Printf string argument must have an initializer");
740    vtn_fail_if(!glsl_type_is_array(deref->var->type),
741                "Printf string must be an char array");
742    const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
743    vtn_fail_if(char_type != glsl_uint8_t_type() &&
744                char_type != glsl_int8_t_type(),
745                "Printf string must be an char array");
746 
747    nir_constant *c = deref->var->constant_initializer;
748    assert(c->num_elements == glsl_get_length(deref->var->type));
749 
750    unsigned idx = info->string_size;
751    info->strings = reralloc_size(b->shader, info->strings,
752                                  idx + c->num_elements);
753    info->string_size += c->num_elements;
754 
755    char *str = &info->strings[idx];
756    bool found_null = false;
757    for (unsigned i = 0; i < c->num_elements; i++) {
758       memcpy((char *)str + i, c->elements[i]->values, 1);
759       if (str[i] == '\0')
760          found_null = true;
761    }
762    vtn_fail_if(!found_null, "Printf string must be null terminated");
763    return idx;
764 }
765 
766 /* printf is special because there are no limits on args */
767 static void
handle_printf(struct vtn_builder * b,uint32_t opcode,const uint32_t * w_src,unsigned num_srcs,const uint32_t * w_dest)768 handle_printf(struct vtn_builder *b, uint32_t opcode,
769               const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
770 {
771    if (!b->options->caps.printf) {
772       vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
773       return;
774    }
775 
776    /* Step 1. extract the format string */
777 
778    /*
779     * info_idx is 1-based to match clover/llvm
780     * the backend indexes the info table at info_idx - 1.
781     */
782    b->shader->printf_info_count++;
783    unsigned info_idx = b->shader->printf_info_count;
784 
785    b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
786                                      nir_printf_info, info_idx);
787    nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
788 
789    info->strings = NULL;
790    info->string_size = 0;
791 
792    vtn_add_printf_string(b, w_src[0], info);
793 
794    info->num_args = num_srcs - 1;
795    info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
796 
797    /* Step 2, build an ad-hoc struct type out of the args */
798    unsigned field_offset = 0;
799    struct glsl_struct_field *fields =
800       rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
801    for (unsigned i = 1; i < num_srcs; ++i) {
802       struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
803       struct vtn_type *src_type = val->type;
804       fields[i - 1].type = src_type->type;
805       fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
806       field_offset = align(field_offset, 4);
807       fields[i - 1].offset = field_offset;
808       info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
809       field_offset += glsl_get_cl_size(src_type->type);
810    }
811    const struct glsl_type *struct_type =
812       glsl_struct_type(fields, num_srcs - 1, "printf", true);
813 
814    /* Step 3, create a variable of that type and populate its fields */
815    nir_variable *var = nir_local_variable_create(b->nb.impl, struct_type, NULL);
816    nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817    size_t fmt_pos = 0;
818    for (unsigned i = 1; i < num_srcs; ++i) {
819       nir_deref_instr *field_deref =
820          nir_build_deref_struct(&b->nb, deref_var, i - 1);
821       nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822       /* extract strings */
823       fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824       if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825          unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826          nir_store_deref(&b->nb, field_deref,
827                          nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828                          ~0 /* write_mask */);
829       } else
830          nir_store_deref(&b->nb, field_deref, field_src, ~0);
831    }
832 
833    /* Lastly, the actual intrinsic */
834    nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835    nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836    vtn_push_nir_ssa(b, w_dest[1], ret);
837 }
838 
839 static nir_ssa_def *
handle_round(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)840 handle_round(struct vtn_builder *b, uint32_t opcode,
841              unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842              const struct vtn_type *dest_type)
843 {
844    nir_ssa_def *src = srcs[0];
845    nir_builder *nb = &b->nb;
846    nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847    nir_ssa_def *truncated = nir_ftrunc(nb, src);
848    nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849 
850    return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851                     nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852 }
853 
854 static nir_ssa_def *
handle_shuffle(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)855 handle_shuffle(struct vtn_builder *b, uint32_t opcode,
856                unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857                const struct vtn_type *dest_type)
858 {
859    struct nir_ssa_def *input = srcs[0];
860    struct nir_ssa_def *mask = srcs[1];
861 
862    unsigned out_elems = dest_type->length;
863    nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864    unsigned in_elems = input->num_components;
865    if (mask->bit_size != 32)
866       mask = nir_u2u32(&b->nb, mask);
867    mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868    for (unsigned i = 0; i < out_elems; i++)
869       outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870 
871    return nir_vec(&b->nb, outres, out_elems);
872 }
873 
874 static nir_ssa_def *
handle_shuffle2(struct vtn_builder * b,uint32_t opcode,unsigned num_srcs,nir_ssa_def ** srcs,struct vtn_type ** src_types,const struct vtn_type * dest_type)875 handle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876                 unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877                 const struct vtn_type *dest_type)
878 {
879    struct nir_ssa_def *input0 = srcs[0];
880    struct nir_ssa_def *input1 = srcs[1];
881    struct nir_ssa_def *mask = srcs[2];
882 
883    unsigned out_elems = dest_type->length;
884    nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885    unsigned in_elems = input0->num_components;
886    unsigned total_mask = 2 * in_elems - 1;
887    unsigned half_mask = in_elems - 1;
888    if (mask->bit_size != 32)
889       mask = nir_u2u32(&b->nb, mask);
890    mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891    for (unsigned i = 0; i < out_elems; i++) {
892       nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893       nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894       nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895       nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896       nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897       outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898    }
899    return nir_vec(&b->nb, outres, out_elems);
900 }
901 
902 bool
vtn_handle_opencl_instruction(struct vtn_builder * b,SpvOp ext_opcode,const uint32_t * w,unsigned count)903 vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904                               const uint32_t *w, unsigned count)
905 {
906    enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907 
908    switch (cl_opcode) {
909    case OpenCLstd_Fabs:
910    case OpenCLstd_SAbs:
911    case OpenCLstd_UAbs:
912    case OpenCLstd_SAdd_sat:
913    case OpenCLstd_UAdd_sat:
914    case OpenCLstd_Ceil:
915    case OpenCLstd_Floor:
916    case OpenCLstd_Fmax:
917    case OpenCLstd_SHadd:
918    case OpenCLstd_UHadd:
919    case OpenCLstd_SMax:
920    case OpenCLstd_UMax:
921    case OpenCLstd_Fmin:
922    case OpenCLstd_SMin:
923    case OpenCLstd_UMin:
924    case OpenCLstd_Mix:
925    case OpenCLstd_Native_cos:
926    case OpenCLstd_Native_divide:
927    case OpenCLstd_Native_exp2:
928    case OpenCLstd_Native_log2:
929    case OpenCLstd_Native_powr:
930    case OpenCLstd_Native_recip:
931    case OpenCLstd_Native_rsqrt:
932    case OpenCLstd_Native_sin:
933    case OpenCLstd_Native_sqrt:
934    case OpenCLstd_SMul_hi:
935    case OpenCLstd_UMul_hi:
936    case OpenCLstd_Popcount:
937    case OpenCLstd_SRhadd:
938    case OpenCLstd_URhadd:
939    case OpenCLstd_Rsqrt:
940    case OpenCLstd_Sign:
941    case OpenCLstd_Sqrt:
942    case OpenCLstd_SSub_sat:
943    case OpenCLstd_USub_sat:
944    case OpenCLstd_Trunc:
945    case OpenCLstd_Rint:
946    case OpenCLstd_Half_divide:
947    case OpenCLstd_Half_recip:
948       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949       return true;
950    case OpenCLstd_SAbs_diff:
951    case OpenCLstd_UAbs_diff:
952    case OpenCLstd_SMad_hi:
953    case OpenCLstd_UMad_hi:
954    case OpenCLstd_SMad24:
955    case OpenCLstd_UMad24:
956    case OpenCLstd_SMul24:
957    case OpenCLstd_UMul24:
958    case OpenCLstd_Bitselect:
959    case OpenCLstd_FClamp:
960    case OpenCLstd_SClamp:
961    case OpenCLstd_UClamp:
962    case OpenCLstd_Copysign:
963    case OpenCLstd_Cross:
964    case OpenCLstd_Degrees:
965    case OpenCLstd_Fdim:
966    case OpenCLstd_Fma:
967    case OpenCLstd_Distance:
968    case OpenCLstd_Fast_distance:
969    case OpenCLstd_Fast_length:
970    case OpenCLstd_Fast_normalize:
971    case OpenCLstd_Half_rsqrt:
972    case OpenCLstd_Half_sqrt:
973    case OpenCLstd_Length:
974    case OpenCLstd_Mad:
975    case OpenCLstd_Maxmag:
976    case OpenCLstd_Minmag:
977    case OpenCLstd_Nan:
978    case OpenCLstd_Nextafter:
979    case OpenCLstd_Normalize:
980    case OpenCLstd_Radians:
981    case OpenCLstd_Rotate:
982    case OpenCLstd_Select:
983    case OpenCLstd_Step:
984    case OpenCLstd_Smoothstep:
985    case OpenCLstd_S_Upsample:
986    case OpenCLstd_U_Upsample:
987    case OpenCLstd_Clz:
988    case OpenCLstd_Ctz:
989    case OpenCLstd_Native_exp:
990    case OpenCLstd_Native_exp10:
991    case OpenCLstd_Native_log:
992    case OpenCLstd_Native_log10:
993    case OpenCLstd_Acos:
994    case OpenCLstd_Acosh:
995    case OpenCLstd_Acospi:
996    case OpenCLstd_Asin:
997    case OpenCLstd_Asinh:
998    case OpenCLstd_Asinpi:
999    case OpenCLstd_Atan:
1000    case OpenCLstd_Atan2:
1001    case OpenCLstd_Atanh:
1002    case OpenCLstd_Atanpi:
1003    case OpenCLstd_Atan2pi:
1004    case OpenCLstd_Fract:
1005    case OpenCLstd_Frexp:
1006    case OpenCLstd_Exp:
1007    case OpenCLstd_Exp2:
1008    case OpenCLstd_Expm1:
1009    case OpenCLstd_Exp10:
1010    case OpenCLstd_Fmod:
1011    case OpenCLstd_Ilogb:
1012    case OpenCLstd_Log:
1013    case OpenCLstd_Log2:
1014    case OpenCLstd_Log10:
1015    case OpenCLstd_Log1p:
1016    case OpenCLstd_Logb:
1017    case OpenCLstd_Ldexp:
1018    case OpenCLstd_Cos:
1019    case OpenCLstd_Cosh:
1020    case OpenCLstd_Cospi:
1021    case OpenCLstd_Sin:
1022    case OpenCLstd_Sinh:
1023    case OpenCLstd_Sinpi:
1024    case OpenCLstd_Tan:
1025    case OpenCLstd_Tanh:
1026    case OpenCLstd_Tanpi:
1027    case OpenCLstd_Cbrt:
1028    case OpenCLstd_Erfc:
1029    case OpenCLstd_Erf:
1030    case OpenCLstd_Lgamma:
1031    case OpenCLstd_Lgamma_r:
1032    case OpenCLstd_Tgamma:
1033    case OpenCLstd_Pow:
1034    case OpenCLstd_Powr:
1035    case OpenCLstd_Pown:
1036    case OpenCLstd_Rootn:
1037    case OpenCLstd_Remainder:
1038    case OpenCLstd_Remquo:
1039    case OpenCLstd_Hypot:
1040    case OpenCLstd_Sincos:
1041    case OpenCLstd_Modf:
1042    case OpenCLstd_UMad_sat:
1043    case OpenCLstd_SMad_sat:
1044    case OpenCLstd_Native_tan:
1045    case OpenCLstd_Half_cos:
1046    case OpenCLstd_Half_exp:
1047    case OpenCLstd_Half_exp2:
1048    case OpenCLstd_Half_exp10:
1049    case OpenCLstd_Half_log:
1050    case OpenCLstd_Half_log2:
1051    case OpenCLstd_Half_log10:
1052    case OpenCLstd_Half_powr:
1053    case OpenCLstd_Half_sin:
1054    case OpenCLstd_Half_tan:
1055       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056       return true;
1057    case OpenCLstd_Vloadn:
1058    case OpenCLstd_Vload_half:
1059    case OpenCLstd_Vload_halfn:
1060    case OpenCLstd_Vloada_halfn:
1061       vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062       return true;
1063    case OpenCLstd_Vstoren:
1064    case OpenCLstd_Vstore_half:
1065    case OpenCLstd_Vstore_halfn:
1066    case OpenCLstd_Vstorea_halfn:
1067       vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068       return true;
1069    case OpenCLstd_Vstore_half_r:
1070    case OpenCLstd_Vstore_halfn_r:
1071    case OpenCLstd_Vstorea_halfn_r:
1072       vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073       return true;
1074    case OpenCLstd_Shuffle:
1075       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076       return true;
1077    case OpenCLstd_Shuffle2:
1078       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079       return true;
1080    case OpenCLstd_Round:
1081       handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082       return true;
1083    case OpenCLstd_Printf:
1084       handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085       return true;
1086    case OpenCLstd_Prefetch:
1087       /* TODO maybe add a nir instruction for this? */
1088       return true;
1089    default:
1090       vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091       return false;
1092    }
1093 }
1094 
1095 bool
vtn_handle_opencl_core_instruction(struct vtn_builder * b,SpvOp opcode,const uint32_t * w,unsigned count)1096 vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097                                    const uint32_t *w, unsigned count)
1098 {
1099    switch (opcode) {
1100    case SpvOpGroupAsyncCopy:
1101       handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102       return true;
1103    case SpvOpGroupWaitEvents:
1104       handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105       return true;
1106    default:
1107       return false;
1108    }
1109    return true;
1110 }
1111