• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import re
2from nir_opcodes import opcodes
3from nir_opcodes import type_has_size, type_size, type_sizes, type_base_type
4
5def type_add_size(type_, size):
6    if type_has_size(type_):
7        return type_
8    return type_ + str(size)
9
10def op_bit_sizes(op):
11    sizes = None
12    if not type_has_size(op.output_type):
13        sizes = set(type_sizes(op.output_type))
14
15    for input_type in op.input_types:
16        if not type_has_size(input_type):
17            if sizes is None:
18                sizes = set(type_sizes(input_type))
19            else:
20                sizes = sizes.intersection(set(type_sizes(input_type)))
21
22    return sorted(list(sizes)) if sizes is not None else None
23
24def get_const_field(type_):
25    if type_size(type_) == 1:
26        return 'b'
27    elif type_base_type(type_) == 'bool':
28        return 'i' + str(type_size(type_))
29    elif type_ == "float16":
30        return "u16"
31    else:
32        return type_base_type(type_)[0] + str(type_size(type_))
33
34template = """\
35/*
36 * Copyright (C) 2014 Intel Corporation
37 *
38 * Permission is hereby granted, free of charge, to any person obtaining a
39 * copy of this software and associated documentation files (the "Software"),
40 * to deal in the Software without restriction, including without limitation
41 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
42 * and/or sell copies of the Software, and to permit persons to whom the
43 * Software is furnished to do so, subject to the following conditions:
44 *
45 * The above copyright notice and this permission notice (including the next
46 * paragraph) shall be included in all copies or substantial portions of the
47 * Software.
48 *
49 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
50 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
51 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
52 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
53 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
54 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
55 * IN THE SOFTWARE.
56 *
57 * Authors:
58 *    Jason Ekstrand (jason@jlekstrand.net)
59 */
60
61#include <math.h>
62#include "util/rounding.h" /* for _mesa_roundeven */
63#include "util/half_float.h"
64#include "util/double.h"
65#include "util/softfloat.h"
66#include "util/bigmath.h"
67#include "util/format/format_utils.h"
68#include "nir_constant_expressions.h"
69
70/**
71 * \brief Checks if the provided value is a denorm and flushes it to zero.
72 */
73static void
74constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size)
75{
76    switch(bit_size) {
77    case 64:
78        if (0 == (value->u64 & 0x7ff0000000000000))
79            value->u64 &= 0x8000000000000000;
80        break;
81    case 32:
82        if (0 == (value->u32 & 0x7f800000))
83            value->u32 &= 0x80000000;
84        break;
85    case 16:
86        if (0 == (value->u16 & 0x7c00))
87            value->u16 &= 0x8000;
88    }
89}
90
91/**
92 * Evaluate one component of packSnorm4x8.
93 */
94static uint8_t
95pack_snorm_1x8(float x)
96{
97    /* From section 8.4 of the GLSL 4.30 spec:
98     *
99     *    packSnorm4x8
100     *    ------------
101     *    The conversion for component c of v to fixed point is done as
102     *    follows:
103     *
104     *      packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
105     *
106     * We must first cast the float to an int, because casting a negative
107     * float to a uint is undefined.
108     */
109   return (uint8_t) (int)
110          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f);
111}
112
113/**
114 * Evaluate one component of packSnorm2x16.
115 */
116static uint16_t
117pack_snorm_1x16(float x)
118{
119    /* From section 8.4 of the GLSL ES 3.00 spec:
120     *
121     *    packSnorm2x16
122     *    -------------
123     *    The conversion for component c of v to fixed point is done as
124     *    follows:
125     *
126     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
127     *
128     * We must first cast the float to an int, because casting a negative
129     * float to a uint is undefined.
130     */
131   return (uint16_t) (int)
132          _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
133}
134
135/**
136 * Evaluate one component of unpackSnorm4x8.
137 */
138static float
139unpack_snorm_1x8(uint8_t u)
140{
141    /* From section 8.4 of the GLSL 4.30 spec:
142     *
143     *    unpackSnorm4x8
144     *    --------------
145     *    The conversion for unpacked fixed-point value f to floating point is
146     *    done as follows:
147     *
148     *       unpackSnorm4x8: clamp(f / 127.0, -1, +1)
149     */
150   return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f);
151}
152
153/**
154 * Evaluate one component of unpackSnorm2x16.
155 */
156static float
157unpack_snorm_1x16(uint16_t u)
158{
159    /* From section 8.4 of the GLSL ES 3.00 spec:
160     *
161     *    unpackSnorm2x16
162     *    ---------------
163     *    The conversion for unpacked fixed-point value f to floating point is
164     *    done as follows:
165     *
166     *       unpackSnorm2x16: clamp(f / 32767.0, -1, +1)
167     */
168   return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
169}
170
171/**
172 * Evaluate one component packUnorm4x8.
173 */
174static uint8_t
175pack_unorm_1x8(float x)
176{
177    /* From section 8.4 of the GLSL 4.30 spec:
178     *
179     *    packUnorm4x8
180     *    ------------
181     *    The conversion for component c of v to fixed point is done as
182     *    follows:
183     *
184     *       packUnorm4x8: round(clamp(c, 0, +1) * 255.0)
185     */
186   return (uint8_t) (int)
187          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f);
188}
189
190/**
191 * Evaluate one component packUnorm2x16.
192 */
193static uint16_t
194pack_unorm_1x16(float x)
195{
196    /* From section 8.4 of the GLSL ES 3.00 spec:
197     *
198     *    packUnorm2x16
199     *    -------------
200     *    The conversion for component c of v to fixed point is done as
201     *    follows:
202     *
203     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
204     */
205   return (uint16_t) (int)
206          _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
207}
208
209/**
210 * Evaluate one component of unpackUnorm4x8.
211 */
212static float
213unpack_unorm_1x8(uint8_t u)
214{
215    /* From section 8.4 of the GLSL 4.30 spec:
216     *
217     *    unpackUnorm4x8
218     *    --------------
219     *    The conversion for unpacked fixed-point value f to floating point is
220     *    done as follows:
221     *
222     *       unpackUnorm4x8: f / 255.0
223     */
224   return (float) u / 255.0f;
225}
226
227/**
228 * Evaluate one component of unpackUnorm2x16.
229 */
230static float
231unpack_unorm_1x16(uint16_t u)
232{
233    /* From section 8.4 of the GLSL ES 3.00 spec:
234     *
235     *    unpackUnorm2x16
236     *    ---------------
237     *    The conversion for unpacked fixed-point value f to floating point is
238     *    done as follows:
239     *
240     *       unpackUnorm2x16: f / 65535.0
241     */
242   return (float) u / 65535.0f;
243}
244
245/**
246 * Evaluate one component of packHalf2x16.
247 */
248static uint16_t
249pack_half_1x16(float x)
250{
251   return _mesa_float_to_half(x);
252}
253
254/**
255 * Evaluate one component of unpackHalf2x16.
256 */
257static float
258unpack_half_1x16_flush_to_zero(uint16_t u)
259{
260   if (0 == (u & 0x7c00))
261      u &= 0x8000;
262   return _mesa_half_to_float(u);
263}
264
265/**
266 * Evaluate one component of unpackHalf2x16.
267 */
268static float
269unpack_half_1x16(uint16_t u)
270{
271   return _mesa_half_to_float(u);
272}
273
274/* Some typed vector structures to make things like src0.y work */
275typedef int8_t int1_t;
276typedef uint8_t uint1_t;
277typedef float float16_t;
278typedef float float32_t;
279typedef double float64_t;
280typedef bool bool1_t;
281typedef bool bool8_t;
282typedef bool bool16_t;
283typedef bool bool32_t;
284typedef bool bool64_t;
285% for type in ["float", "int", "uint", "bool"]:
286% for width in type_sizes(type):
287struct ${type}${width}_vec {
288   ${type}${width}_t x;
289   ${type}${width}_t y;
290   ${type}${width}_t z;
291   ${type}${width}_t w;
292   ${type}${width}_t e;
293   ${type}${width}_t f;
294   ${type}${width}_t g;
295   ${type}${width}_t h;
296   ${type}${width}_t i;
297   ${type}${width}_t j;
298   ${type}${width}_t k;
299   ${type}${width}_t l;
300   ${type}${width}_t m;
301   ${type}${width}_t n;
302   ${type}${width}_t o;
303   ${type}${width}_t p;
304};
305% endfor
306% endfor
307
308<%def name="evaluate_op(op, bit_size, execution_mode)">
309   <%
310   output_type = type_add_size(op.output_type, bit_size)
311   input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
312   %>
313
314   ## For each non-per-component input, create a variable srcN that
315   ## contains x, y, z, and w elements which are filled in with the
316   ## appropriately-typed values.
317   % for j in range(op.num_inputs):
318      % if op.input_sizes[j] == 0:
319         <% continue %>
320      % elif "src" + str(j) not in op.const_expr:
321         ## Avoid unused variable warnings
322         <% continue %>
323      %endif
324
325      const struct ${input_types[j]}_vec src${j} = {
326      % for k in range(op.input_sizes[j]):
327         % if input_types[j] == "int1":
328             /* 1-bit integers use a 0/-1 convention */
329             -(int1_t)_src[${j}][${k}].b,
330         % elif input_types[j] == "float16":
331            _mesa_half_to_float(_src[${j}][${k}].u16),
332         % else:
333            _src[${j}][${k}].${get_const_field(input_types[j])},
334         % endif
335      % endfor
336      % for k in range(op.input_sizes[j], 16):
337         0,
338      % endfor
339      };
340   % endfor
341
342   % if op.output_size == 0:
343      ## For per-component instructions, we need to iterate over the
344      ## components and apply the constant expression one component
345      ## at a time.
346      for (unsigned _i = 0; _i < num_components; _i++) {
347         ## For each per-component input, create a variable srcN that
348         ## contains the value of the current (_i'th) component.
349         % for j in range(op.num_inputs):
350            % if op.input_sizes[j] != 0:
351               <% continue %>
352            % elif "src" + str(j) not in op.const_expr:
353               ## Avoid unused variable warnings
354               <% continue %>
355            % elif input_types[j] == "int1":
356               /* 1-bit integers use a 0/-1 convention */
357               const int1_t src${j} = -(int1_t)_src[${j}][_i].b;
358            % elif input_types[j] == "float16":
359               const float src${j} =
360                  _mesa_half_to_float(_src[${j}][_i].u16);
361            % else:
362               const ${input_types[j]}_t src${j} =
363                  _src[${j}][_i].${get_const_field(input_types[j])};
364            % endif
365         % endfor
366
367         ## Create an appropriately-typed variable dst and assign the
368         ## result of the const_expr to it.  If const_expr already contains
369         ## writes to dst, just include const_expr directly.
370         % if "dst" in op.const_expr:
371            ${output_type}_t dst;
372
373            ${op.const_expr}
374         % else:
375            ${output_type}_t dst = ${op.const_expr};
376         % endif
377
378         ## Store the current component of the actual destination to the
379         ## value of dst.
380         % if output_type == "int1" or output_type == "uint1":
381            /* 1-bit integers get truncated */
382            _dst_val[_i].b = dst & 1;
383         % elif output_type.startswith("bool"):
384            ## Sanitize the C value to a proper NIR 0/-1 bool
385            _dst_val[_i].${get_const_field(output_type)} = -(int)dst;
386         % elif output_type == "float16":
387            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
388               _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst);
389            } else {
390               _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst);
391            }
392         % else:
393            _dst_val[_i].${get_const_field(output_type)} = dst;
394         % endif
395
396         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
397            % if type_has_size(output_type):
398               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
399                  constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)});
400               }
401            % else:
402               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
403                  constant_denorm_flush_to_zero(&_dst_val[i], bit_size);
404               }
405            %endif
406         % endif
407      }
408   % else:
409      ## In the non-per-component case, create a struct dst with
410      ## appropriately-typed elements x, y, z, and w and assign the result
411      ## of the const_expr to all components of dst, or include the
412      ## const_expr directly if it writes to dst already.
413      struct ${output_type}_vec dst;
414
415      % if "dst" in op.const_expr:
416         ${op.const_expr}
417      % else:
418         ## Splat the value to all components.  This way expressions which
419         ## write the same value to all components don't need to explicitly
420         ## write to dest.
421         dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
422      % endif
423
424      ## For each component in the destination, copy the value of dst to
425      ## the actual destination.
426      % for k in range(op.output_size):
427         % if output_type == "int1" or output_type == "uint1":
428            /* 1-bit integers get truncated */
429            _dst_val[${k}].b = dst.${"xyzwefghijklmnop"[k]} & 1;
430         % elif output_type.startswith("bool"):
431            ## Sanitize the C value to a proper NIR 0/-1 bool
432            _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzwefghijklmnop"[k]};
433         % elif output_type == "float16":
434            if (nir_is_rounding_mode_rtz(execution_mode, 16)) {
435               _dst_val[${k}].u16 = _mesa_float_to_float16_rtz(dst.${"xyzwefghijklmnop"[k]});
436            } else {
437               _dst_val[${k}].u16 = _mesa_float_to_float16_rtne(dst.${"xyzwefghijklmnop"[k]});
438            }
439         % else:
440            _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzwefghijklmnop"[k]};
441         % endif
442
443         % if op.name != "fquantize2f16" and type_base_type(output_type) == "float":
444            % if type_has_size(output_type):
445               if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) {
446                  constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)});
447               }
448            % else:
449               if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) {
450                  constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size);
451               }
452            % endif
453         % endif
454      % endfor
455   % endif
456</%def>
457
458% for name, op in sorted(opcodes.items()):
459% if op.name == "fsat":
460#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
461#pragma optimize("", off) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
462#endif
463% endif
464static void
465evaluate_${name}(nir_const_value *_dst_val,
466                 UNUSED unsigned num_components,
467                 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size,
468                 UNUSED nir_const_value **_src,
469                 UNUSED unsigned execution_mode)
470{
471   % if op_bit_sizes(op) is not None:
472      switch (bit_size) {
473      % for bit_size in op_bit_sizes(op):
474      case ${bit_size}: {
475         ${evaluate_op(op, bit_size, execution_mode)}
476         break;
477      }
478      % endfor
479
480      default:
481         unreachable("unknown bit width");
482      }
483   % else:
484      ${evaluate_op(op, 0, execution_mode)}
485   % endif
486}
487% if op.name == "fsat":
488#if defined(_MSC_VER) && (defined(_M_ARM64) || defined(_M_ARM64EC))
489#pragma optimize("", on) /* Temporary work-around for MSVC compiler bug, present in VS2019 16.9.2 */
490#endif
491% endif
492% endfor
493
494void
495nir_eval_const_opcode(nir_op op, nir_const_value *dest,
496                      unsigned num_components, unsigned bit_width,
497                      nir_const_value **src,
498                      unsigned float_controls_execution_mode)
499{
500   switch (op) {
501% for name in sorted(opcodes.keys()):
502   case nir_op_${name}:
503      evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode);
504      return;
505% endfor
506   default:
507      unreachable("shouldn't get here");
508   }
509}"""
510
511from mako.template import Template
512
513print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
514                                type_base_type=type_base_type,
515                                type_size=type_size,
516                                type_has_size=type_has_size,
517                                type_add_size=type_add_size,
518                                op_bit_sizes=op_bit_sizes,
519                                get_const_field=get_const_field))
520