1 2import re 3 4type_split_re = re.compile(r'(?P<type>[a-z]+)(?P<bits>\d+)') 5 6def type_has_size(type_): 7 return type_[-1:].isdigit() 8 9def type_size(type_): 10 assert type_has_size(type_) 11 return int(type_split_re.match(type_).group('bits')) 12 13def type_sizes(type_): 14 if type_has_size(type_): 15 return [type_size(type_)] 16 elif type_ == 'float': 17 return [16, 32, 64] 18 else: 19 return [8, 16, 32, 64] 20 21def type_add_size(type_, size): 22 if type_has_size(type_): 23 return type_ 24 return type_ + str(size) 25 26def op_bit_sizes(op): 27 sizes = None 28 if not type_has_size(op.output_type): 29 sizes = set(type_sizes(op.output_type)) 30 31 for input_type in op.input_types: 32 if not type_has_size(input_type): 33 if sizes is None: 34 sizes = set(type_sizes(input_type)) 35 else: 36 sizes = sizes.intersection(set(type_sizes(input_type))) 37 38 return sorted(list(sizes)) if sizes is not None else None 39 40def get_const_field(type_): 41 if type_ == "bool32": 42 return "u32" 43 elif type_ == "float16": 44 return "u16" 45 else: 46 m = type_split_re.match(type_) 47 if not m: 48 raise Exception(str(type_)) 49 return m.group('type')[0] + m.group('bits') 50 51template = """\ 52/* 53 * Copyright (C) 2014 Intel Corporation 54 * 55 * Permission is hereby granted, free of charge, to any person obtaining a 56 * copy of this software and associated documentation files (the "Software"), 57 * to deal in the Software without restriction, including without limitation 58 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 59 * and/or sell copies of the Software, and to permit persons to whom the 60 * Software is furnished to do so, subject to the following conditions: 61 * 62 * The above copyright notice and this permission notice (including the next 63 * paragraph) shall be included in all copies or substantial portions of the 64 * Software. 65 * 66 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 67 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 68 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 69 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 70 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 71 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 72 * IN THE SOFTWARE. 73 * 74 * Authors: 75 * Jason Ekstrand (jason@jlekstrand.net) 76 */ 77 78#include <math.h> 79#include "main/core.h" 80#include "util/rounding.h" /* for _mesa_roundeven */ 81#include "util/half_float.h" 82#include "nir_constant_expressions.h" 83 84/** 85 * Evaluate one component of packSnorm4x8. 86 */ 87static uint8_t 88pack_snorm_1x8(float x) 89{ 90 /* From section 8.4 of the GLSL 4.30 spec: 91 * 92 * packSnorm4x8 93 * ------------ 94 * The conversion for component c of v to fixed point is done as 95 * follows: 96 * 97 * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) 98 * 99 * We must first cast the float to an int, because casting a negative 100 * float to a uint is undefined. 101 */ 102 return (uint8_t) (int) 103 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); 104} 105 106/** 107 * Evaluate one component of packSnorm2x16. 108 */ 109static uint16_t 110pack_snorm_1x16(float x) 111{ 112 /* From section 8.4 of the GLSL ES 3.00 spec: 113 * 114 * packSnorm2x16 115 * ------------- 116 * The conversion for component c of v to fixed point is done as 117 * follows: 118 * 119 * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) 120 * 121 * We must first cast the float to an int, because casting a negative 122 * float to a uint is undefined. 123 */ 124 return (uint16_t) (int) 125 _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); 126} 127 128/** 129 * Evaluate one component of unpackSnorm4x8. 130 */ 131static float 132unpack_snorm_1x8(uint8_t u) 133{ 134 /* From section 8.4 of the GLSL 4.30 spec: 135 * 136 * unpackSnorm4x8 137 * -------------- 138 * The conversion for unpacked fixed-point value f to floating point is 139 * done as follows: 140 * 141 * unpackSnorm4x8: clamp(f / 127.0, -1, +1) 142 */ 143 return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); 144} 145 146/** 147 * Evaluate one component of unpackSnorm2x16. 148 */ 149static float 150unpack_snorm_1x16(uint16_t u) 151{ 152 /* From section 8.4 of the GLSL ES 3.00 spec: 153 * 154 * unpackSnorm2x16 155 * --------------- 156 * The conversion for unpacked fixed-point value f to floating point is 157 * done as follows: 158 * 159 * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) 160 */ 161 return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); 162} 163 164/** 165 * Evaluate one component packUnorm4x8. 166 */ 167static uint8_t 168pack_unorm_1x8(float x) 169{ 170 /* From section 8.4 of the GLSL 4.30 spec: 171 * 172 * packUnorm4x8 173 * ------------ 174 * The conversion for component c of v to fixed point is done as 175 * follows: 176 * 177 * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) 178 */ 179 return (uint8_t) (int) 180 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); 181} 182 183/** 184 * Evaluate one component packUnorm2x16. 185 */ 186static uint16_t 187pack_unorm_1x16(float x) 188{ 189 /* From section 8.4 of the GLSL ES 3.00 spec: 190 * 191 * packUnorm2x16 192 * ------------- 193 * The conversion for component c of v to fixed point is done as 194 * follows: 195 * 196 * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) 197 */ 198 return (uint16_t) (int) 199 _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); 200} 201 202/** 203 * Evaluate one component of unpackUnorm4x8. 204 */ 205static float 206unpack_unorm_1x8(uint8_t u) 207{ 208 /* From section 8.4 of the GLSL 4.30 spec: 209 * 210 * unpackUnorm4x8 211 * -------------- 212 * The conversion for unpacked fixed-point value f to floating point is 213 * done as follows: 214 * 215 * unpackUnorm4x8: f / 255.0 216 */ 217 return (float) u / 255.0f; 218} 219 220/** 221 * Evaluate one component of unpackUnorm2x16. 222 */ 223static float 224unpack_unorm_1x16(uint16_t u) 225{ 226 /* From section 8.4 of the GLSL ES 3.00 spec: 227 * 228 * unpackUnorm2x16 229 * --------------- 230 * The conversion for unpacked fixed-point value f to floating point is 231 * done as follows: 232 * 233 * unpackUnorm2x16: f / 65535.0 234 */ 235 return (float) u / 65535.0f; 236} 237 238/** 239 * Evaluate one component of packHalf2x16. 240 */ 241static uint16_t 242pack_half_1x16(float x) 243{ 244 return _mesa_float_to_half(x); 245} 246 247/** 248 * Evaluate one component of unpackHalf2x16. 249 */ 250static float 251unpack_half_1x16(uint16_t u) 252{ 253 return _mesa_half_to_float(u); 254} 255 256/* Some typed vector structures to make things like src0.y work */ 257typedef float float16_t; 258typedef float float32_t; 259typedef double float64_t; 260typedef bool bool32_t; 261% for type in ["float", "int", "uint"]: 262% for width in type_sizes(type): 263struct ${type}${width}_vec { 264 ${type}${width}_t x; 265 ${type}${width}_t y; 266 ${type}${width}_t z; 267 ${type}${width}_t w; 268}; 269% endfor 270% endfor 271 272struct bool32_vec { 273 bool x; 274 bool y; 275 bool z; 276 bool w; 277}; 278 279<%def name="evaluate_op(op, bit_size)"> 280 <% 281 output_type = type_add_size(op.output_type, bit_size) 282 input_types = [type_add_size(type_, bit_size) for type_ in op.input_types] 283 %> 284 285 ## For each non-per-component input, create a variable srcN that 286 ## contains x, y, z, and w elements which are filled in with the 287 ## appropriately-typed values. 288 % for j in range(op.num_inputs): 289 % if op.input_sizes[j] == 0: 290 <% continue %> 291 % elif "src" + str(j) not in op.const_expr: 292 ## Avoid unused variable warnings 293 <% continue %> 294 %endif 295 296 const struct ${input_types[j]}_vec src${j} = { 297 % for k in range(op.input_sizes[j]): 298 % if input_types[j] == "bool32": 299 _src[${j}].u32[${k}] != 0, 300 % elif input_types[j] == "float16": 301 _mesa_half_to_float(_src[${j}].u16[${k}]), 302 % else: 303 _src[${j}].${get_const_field(input_types[j])}[${k}], 304 % endif 305 % endfor 306 % for k in range(op.input_sizes[j], 4): 307 0, 308 % endfor 309 }; 310 % endfor 311 312 % if op.output_size == 0: 313 ## For per-component instructions, we need to iterate over the 314 ## components and apply the constant expression one component 315 ## at a time. 316 for (unsigned _i = 0; _i < num_components; _i++) { 317 ## For each per-component input, create a variable srcN that 318 ## contains the value of the current (_i'th) component. 319 % for j in range(op.num_inputs): 320 % if op.input_sizes[j] != 0: 321 <% continue %> 322 % elif "src" + str(j) not in op.const_expr: 323 ## Avoid unused variable warnings 324 <% continue %> 325 % elif input_types[j] == "bool32": 326 const bool src${j} = _src[${j}].u32[_i] != 0; 327 % elif input_types[j] == "float16": 328 const float src${j} = 329 _mesa_half_to_float(_src[${j}].u16[_i]); 330 % else: 331 const ${input_types[j]}_t src${j} = 332 _src[${j}].${get_const_field(input_types[j])}[_i]; 333 % endif 334 % endfor 335 336 ## Create an appropriately-typed variable dst and assign the 337 ## result of the const_expr to it. If const_expr already contains 338 ## writes to dst, just include const_expr directly. 339 % if "dst" in op.const_expr: 340 ${output_type}_t dst; 341 342 ${op.const_expr} 343 % else: 344 ${output_type}_t dst = ${op.const_expr}; 345 % endif 346 347 ## Store the current component of the actual destination to the 348 ## value of dst. 349 % if output_type == "bool32": 350 ## Sanitize the C value to a proper NIR bool 351 _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE; 352 % elif output_type == "float16": 353 _dst_val.u16[_i] = _mesa_float_to_half(dst); 354 % else: 355 _dst_val.${get_const_field(output_type)}[_i] = dst; 356 % endif 357 } 358 % else: 359 ## In the non-per-component case, create a struct dst with 360 ## appropriately-typed elements x, y, z, and w and assign the result 361 ## of the const_expr to all components of dst, or include the 362 ## const_expr directly if it writes to dst already. 363 struct ${output_type}_vec dst; 364 365 % if "dst" in op.const_expr: 366 ${op.const_expr} 367 % else: 368 ## Splat the value to all components. This way expressions which 369 ## write the same value to all components don't need to explicitly 370 ## write to dest. One such example is fnoise which has a 371 ## const_expr of 0.0f. 372 dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; 373 % endif 374 375 ## For each component in the destination, copy the value of dst to 376 ## the actual destination. 377 % for k in range(op.output_size): 378 % if output_type == "bool32": 379 ## Sanitize the C value to a proper NIR bool 380 _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; 381 % elif output_type == "float16": 382 _dst_val.u16[${k}] = _mesa_float_to_half(dst.${"xyzw"[k]}); 383 % else: 384 _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]}; 385 % endif 386 % endfor 387 % endif 388</%def> 389 390% for name, op in sorted(opcodes.iteritems()): 391static nir_const_value 392evaluate_${name}(MAYBE_UNUSED unsigned num_components, unsigned bit_size, 393 MAYBE_UNUSED nir_const_value *_src) 394{ 395 nir_const_value _dst_val = { {0, } }; 396 397 % if op_bit_sizes(op) is not None: 398 switch (bit_size) { 399 % for bit_size in op_bit_sizes(op): 400 case ${bit_size}: { 401 ${evaluate_op(op, bit_size)} 402 break; 403 } 404 % endfor 405 406 default: 407 unreachable("unknown bit width"); 408 } 409 % else: 410 ${evaluate_op(op, 0)} 411 % endif 412 413 return _dst_val; 414} 415% endfor 416 417nir_const_value 418nir_eval_const_opcode(nir_op op, unsigned num_components, 419 unsigned bit_width, nir_const_value *src) 420{ 421 switch (op) { 422% for name in sorted(opcodes.iterkeys()): 423 case nir_op_${name}: 424 return evaluate_${name}(num_components, bit_width, src); 425% endfor 426 default: 427 unreachable("shouldn't get here"); 428 } 429}""" 430 431from nir_opcodes import opcodes 432from mako.template import Template 433 434print Template(template).render(opcodes=opcodes, type_sizes=type_sizes, 435 type_has_size=type_has_size, 436 type_add_size=type_add_size, 437 op_bit_sizes=op_bit_sizes, 438 get_const_field=get_const_field) 439