1from __future__ import print_function 2 3import re 4from nir_opcodes import opcodes 5from nir_opcodes import type_has_size, type_size, type_sizes, type_base_type 6 7def type_add_size(type_, size): 8 if type_has_size(type_): 9 return type_ 10 return type_ + str(size) 11 12def op_bit_sizes(op): 13 sizes = None 14 if not type_has_size(op.output_type): 15 sizes = set(type_sizes(op.output_type)) 16 17 for input_type in op.input_types: 18 if not type_has_size(input_type): 19 if sizes is None: 20 sizes = set(type_sizes(input_type)) 21 else: 22 sizes = sizes.intersection(set(type_sizes(input_type))) 23 24 return sorted(list(sizes)) if sizes is not None else None 25 26def get_const_field(type_): 27 if type_size(type_) == 1: 28 return 'b' 29 elif type_base_type(type_) == 'bool': 30 return 'i' + str(type_size(type_)) 31 elif type_ == "float16": 32 return "u16" 33 else: 34 return type_base_type(type_)[0] + str(type_size(type_)) 35 36template = """\ 37/* 38 * Copyright (C) 2014 Intel Corporation 39 * 40 * Permission is hereby granted, free of charge, to any person obtaining a 41 * copy of this software and associated documentation files (the "Software"), 42 * to deal in the Software without restriction, including without limitation 43 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 44 * and/or sell copies of the Software, and to permit persons to whom the 45 * Software is furnished to do so, subject to the following conditions: 46 * 47 * The above copyright notice and this permission notice (including the next 48 * paragraph) shall be included in all copies or substantial portions of the 49 * Software. 50 * 51 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 52 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 53 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 54 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 55 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 56 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 57 * IN THE SOFTWARE. 58 * 59 * Authors: 60 * Jason Ekstrand (jason@jlekstrand.net) 61 */ 62 63#include <math.h> 64#include "util/rounding.h" /* for _mesa_roundeven */ 65#include "util/half_float.h" 66#include "util/double.h" 67#include "util/softfloat.h" 68#include "util/bigmath.h" 69#include "nir_constant_expressions.h" 70 71#define MAX_UINT_FOR_SIZE(bits) (UINT64_MAX >> (64 - (bits))) 72 73/** 74 * \brief Checks if the provided value is a denorm and flushes it to zero. 75 */ 76static void 77constant_denorm_flush_to_zero(nir_const_value *value, unsigned bit_size) 78{ 79 switch(bit_size) { 80 case 64: 81 if (0 == (value->u64 & 0x7ff0000000000000)) 82 value->u64 &= 0x8000000000000000; 83 break; 84 case 32: 85 if (0 == (value->u32 & 0x7f800000)) 86 value->u32 &= 0x80000000; 87 break; 88 case 16: 89 if (0 == (value->u16 & 0x7c00)) 90 value->u16 &= 0x8000; 91 } 92} 93 94/** 95 * Evaluate one component of packSnorm4x8. 96 */ 97static uint8_t 98pack_snorm_1x8(float x) 99{ 100 /* From section 8.4 of the GLSL 4.30 spec: 101 * 102 * packSnorm4x8 103 * ------------ 104 * The conversion for component c of v to fixed point is done as 105 * follows: 106 * 107 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 108 * 109 * We must first cast the float to an int, because casting a negative 110 * float to a uint is undefined. 111 */ 112 return (uint8_t) (int) 113 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); 114} 115 116/** 117 * Evaluate one component of packSnorm2x16. 118 */ 119static uint16_t 120pack_snorm_1x16(float x) 121{ 122 /* From section 8.4 of the GLSL ES 3.00 spec: 123 * 124 * packSnorm2x16 125 * ------------- 126 * The conversion for component c of v to fixed point is done as 127 * follows: 128 * 129 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 130 * 131 * We must first cast the float to an int, because casting a negative 132 * float to a uint is undefined. 133 */ 134 return (uint16_t) (int) 135 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); 136} 137 138/** 139 * Evaluate one component of unpackSnorm4x8. 140 */ 141static float 142unpack_snorm_1x8(uint8_t u) 143{ 144 /* From section 8.4 of the GLSL 4.30 spec: 145 * 146 * unpackSnorm4x8 147 * -------------- 148 * The conversion for unpacked fixed-point value f to floating point is 149 * done as follows: 150 * 151 * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 152 */ 153 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); 154} 155 156/** 157 * Evaluate one component of unpackSnorm2x16. 158 */ 159static float 160unpack_snorm_1x16(uint16_t u) 161{ 162 /* From section 8.4 of the GLSL ES 3.00 spec: 163 * 164 * unpackSnorm2x16 165 * --------------- 166 * The conversion for unpacked fixed-point value f to floating point is 167 * done as follows: 168 * 169 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) 170 */ 171 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); 172} 173 174/** 175 * Evaluate one component packUnorm4x8. 176 */ 177static uint8_t 178pack_unorm_1x8(float x) 179{ 180 /* From section 8.4 of the GLSL 4.30 spec: 181 * 182 * packUnorm4x8 183 * ------------ 184 * The conversion for component c of v to fixed point is done as 185 * follows: 186 * 187 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 188 */ 189 return (uint8_t) (int) 190 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); 191} 192 193/** 194 * Evaluate one component packUnorm2x16. 195 */ 196static uint16_t 197pack_unorm_1x16(float x) 198{ 199 /* From section 8.4 of the GLSL ES 3.00 spec: 200 * 201 * packUnorm2x16 202 * ------------- 203 * The conversion for component c of v to fixed point is done as 204 * follows: 205 * 206 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 207 */ 208 return (uint16_t) (int) 209 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); 210} 211 212/** 213 * Evaluate one component of unpackUnorm4x8. 214 */ 215static float 216unpack_unorm_1x8(uint8_t u) 217{ 218 /* From section 8.4 of the GLSL 4.30 spec: 219 * 220 * unpackUnorm4x8 221 * -------------- 222 * The conversion for unpacked fixed-point value f to floating point is 223 * done as follows: 224 * 225 * unpackUnorm4x8: f / 255.0 226 */ 227 return (float) u / 255.0f; 228} 229 230/** 231 * Evaluate one component of unpackUnorm2x16. 232 */ 233static float 234unpack_unorm_1x16(uint16_t u) 235{ 236 /* From section 8.4 of the GLSL ES 3.00 spec: 237 * 238 * unpackUnorm2x16 239 * --------------- 240 * The conversion for unpacked fixed-point value f to floating point is 241 * done as follows: 242 * 243 * unpackUnorm2x16: f / 65535.0 244 */ 245 return (float) u / 65535.0f; 246} 247 248/** 249 * Evaluate one component of packHalf2x16. 250 */ 251static uint16_t 252pack_half_1x16(float x) 253{ 254 return _mesa_float_to_half(x); 255} 256 257/** 258 * Evaluate one component of unpackHalf2x16. 259 */ 260static float 261unpack_half_1x16_flush_to_zero(uint16_t u) 262{ 263 if (0 == (u & 0x7c00)) 264 u &= 0x8000; 265 return _mesa_half_to_float(u); 266} 267 268/** 269 * Evaluate one component of unpackHalf2x16. 270 */ 271static float 272unpack_half_1x16(uint16_t u) 273{ 274 return _mesa_half_to_float(u); 275} 276 277/* Some typed vector structures to make things like src0.y work */ 278typedef int8_t int1_t; 279typedef uint8_t uint1_t; 280typedef float float16_t; 281typedef float float32_t; 282typedef double float64_t; 283typedef bool bool1_t; 284typedef bool bool8_t; 285typedef bool bool16_t; 286typedef bool bool32_t; 287typedef bool bool64_t; 288% for type in ["float", "int", "uint", "bool"]: 289% for width in type_sizes(type): 290struct ${type}${width}_vec { 291 ${type}${width}_t x; 292 ${type}${width}_t y; 293 ${type}${width}_t z; 294 ${type}${width}_t w; 295 ${type}${width}_t e; 296 ${type}${width}_t f; 297 ${type}${width}_t g; 298 ${type}${width}_t h; 299 ${type}${width}_t i; 300 ${type}${width}_t j; 301 ${type}${width}_t k; 302 ${type}${width}_t l; 303 ${type}${width}_t m; 304 ${type}${width}_t n; 305 ${type}${width}_t o; 306 ${type}${width}_t p; 307}; 308% endfor 309% endfor 310 311<%def name="evaluate_op(op, bit_size, execution_mode)"> 312 <% 313 output_type = type_add_size(op.output_type, bit_size) 314 input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] 315 %> 316 317 ## For each non-per-component input, create a variable srcN that 318 ## contains x, y, z, and w elements which are filled in with the 319 ## appropriately-typed values. 320 % for j in range(op.num_inputs): 321 % if op.input_sizes[j] == 0: 322 <% continue %> 323 % elif "src" + str(j) not in op.const_expr: 324 ## Avoid unused variable warnings 325 <% continue %> 326 %endif 327 328 const struct ${input_types[j]}_vec src${j} = { 329 % for k in range(op.input_sizes[j]): 330 % if input_types[j] == "int1": 331 /* 1-bit integers use a 0/-1 convention */ 332 -(int1_t)_src[${j}][${k}].b, 333 % elif input_types[j] == "float16": 334 _mesa_half_to_float(_src[${j}][${k}].u16), 335 % else: 336 _src[${j}][${k}].${get_const_field(input_types[j])}, 337 % endif 338 % endfor 339 % for k in range(op.input_sizes[j], 16): 340 0, 341 % endfor 342 }; 343 % endfor 344 345 % if op.output_size == 0: 346 ## For per-component instructions, we need to iterate over the 347 ## components and apply the constant expression one component 348 ## at a time. 349 for (unsigned _i = 0; _i < num_components; _i++) { 350 ## For each per-component input, create a variable srcN that 351 ## contains the value of the current (_i'th) component. 352 % for j in range(op.num_inputs): 353 % if op.input_sizes[j] != 0: 354 <% continue %> 355 % elif "src" + str(j) not in op.const_expr: 356 ## Avoid unused variable warnings 357 <% continue %> 358 % elif input_types[j] == "int1": 359 /* 1-bit integers use a 0/-1 convention */ 360 const int1_t src${j} = -(int1_t)_src[${j}][_i].b; 361 % elif input_types[j] == "float16": 362 const float src${j} = 363 _mesa_half_to_float(_src[${j}][_i].u16); 364 % else: 365 const ${input_types[j]}_t src${j} = 366 _src[${j}][_i].${get_const_field(input_types[j])}; 367 % endif 368 % endfor 369 370 ## Create an appropriately-typed variable dst and assign the 371 ## result of the const_expr to it. If const_expr already contains 372 ## writes to dst, just include const_expr directly. 373 % if "dst" in op.const_expr: 374 ${output_type}_t dst; 375 376 ${op.const_expr} 377 % else: 378 ${output_type}_t dst = ${op.const_expr}; 379 % endif 380 381 ## Store the current component of the actual destination to the 382 ## value of dst. 383 % if output_type == "int1" or output_type == "uint1": 384 /* 1-bit integers get truncated */ 385 _dst_val[_i].b = dst & 1; 386 % elif output_type.startswith("bool"): 387 ## Sanitize the C value to a proper NIR 0/-1 bool 388 _dst_val[_i].${get_const_field(output_type)} = -(int)dst; 389 % elif output_type == "float16": 390 if (nir_is_rounding_mode_rtz(execution_mode, 16)) { 391 _dst_val[_i].u16 = _mesa_float_to_float16_rtz(dst); 392 } else { 393 _dst_val[_i].u16 = _mesa_float_to_float16_rtne(dst); 394 } 395 % else: 396 _dst_val[_i].${get_const_field(output_type)} = dst; 397 % endif 398 399 % if op.name != "fquantize2f16" and type_base_type(output_type) == "float": 400 % if type_has_size(output_type): 401 if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) { 402 constant_denorm_flush_to_zero(&_dst_val[_i], ${type_size(output_type)}); 403 } 404 % else: 405 if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) { 406 constant_denorm_flush_to_zero(&_dst_val[i], bit_size); 407 } 408 %endif 409 % endif 410 } 411 % else: 412 ## In the non-per-component case, create a struct dst with 413 ## appropriately-typed elements x, y, z, and w and assign the result 414 ## of the const_expr to all components of dst, or include the 415 ## const_expr directly if it writes to dst already. 416 struct ${output_type}_vec dst; 417 418 % if "dst" in op.const_expr: 419 ${op.const_expr} 420 % else: 421 ## Splat the value to all components. This way expressions which 422 ## write the same value to all components don't need to explicitly 423 ## write to dest. 424 dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; 425 % endif 426 427 ## For each component in the destination, copy the value of dst to 428 ## the actual destination. 429 % for k in range(op.output_size): 430 % if output_type == "int1" or output_type == "uint1": 431 /* 1-bit integers get truncated */ 432 _dst_val[${k}].b = dst.${"xyzwefghijklmnop"[k]} & 1; 433 % elif output_type.startswith("bool"): 434 ## Sanitize the C value to a proper NIR 0/-1 bool 435 _dst_val[${k}].${get_const_field(output_type)} = -(int)dst.${"xyzwefghijklmnop"[k]}; 436 % elif output_type == "float16": 437 if (nir_is_rounding_mode_rtz(execution_mode, 16)) { 438 _dst_val[${k}].u16 = _mesa_float_to_float16_rtz(dst.${"xyzwefghijklmnop"[k]}); 439 } else { 440 _dst_val[${k}].u16 = _mesa_float_to_float16_rtne(dst.${"xyzwefghijklmnop"[k]}); 441 } 442 % else: 443 _dst_val[${k}].${get_const_field(output_type)} = dst.${"xyzwefghijklmnop"[k]}; 444 % endif 445 446 % if op.name != "fquantize2f16" and type_base_type(output_type) == "float": 447 % if type_has_size(output_type): 448 if (nir_is_denorm_flush_to_zero(execution_mode, ${type_size(output_type)})) { 449 constant_denorm_flush_to_zero(&_dst_val[${k}], ${type_size(output_type)}); 450 } 451 % else: 452 if (nir_is_denorm_flush_to_zero(execution_mode, ${bit_size})) { 453 constant_denorm_flush_to_zero(&_dst_val[${k}], bit_size); 454 } 455 % endif 456 % endif 457 % endfor 458 % endif 459</%def> 460 461% for name, op in sorted(opcodes.items()): 462static void 463evaluate_${name}(nir_const_value *_dst_val, 464 UNUSED unsigned num_components, 465 ${"UNUSED" if op_bit_sizes(op) is None else ""} unsigned bit_size, 466 UNUSED nir_const_value **_src, 467 UNUSED unsigned execution_mode) 468{ 469 % if op_bit_sizes(op) is not None: 470 switch (bit_size) { 471 % for bit_size in op_bit_sizes(op): 472 case ${bit_size}: { 473 ${evaluate_op(op, bit_size, execution_mode)} 474 break; 475 } 476 % endfor 477 478 default: 479 unreachable("unknown bit width"); 480 } 481 % else: 482 ${evaluate_op(op, 0, execution_mode)} 483 % endif 484} 485% endfor 486 487void 488nir_eval_const_opcode(nir_op op, nir_const_value *dest, 489 unsigned num_components, unsigned bit_width, 490 nir_const_value **src, 491 unsigned float_controls_execution_mode) 492{ 493 switch (op) { 494% for name in sorted(opcodes.keys()): 495 case nir_op_${name}: 496 evaluate_${name}(dest, num_components, bit_width, src, float_controls_execution_mode); 497 return; 498% endfor 499 default: 500 unreachable("shouldn't get here"); 501 } 502}""" 503 504from mako.template import Template 505 506print(Template(template).render(opcodes=opcodes, type_sizes=type_sizes, 507 type_base_type=type_base_type, 508 type_size=type_size, 509 type_has_size=type_has_size, 510 type_add_size=type_add_size, 511 op_bit_sizes=op_bit_sizes, 512 get_const_field=get_const_field)) 513