1 /* -*- c++ -*- */ 2 /* 3 * Copyright © 2010-2015 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 #pragma once 26 27 #include "elk_ir_vec4.h" 28 #include "elk_ir_allocator.h" 29 30 namespace elk { 31 /** 32 * Toolbox to assemble a VEC4 IR program out of individual instructions. 33 * 34 * This object is meant to have an interface consistent with 35 * elk::fs_builder. They cannot be fully interchangeable because 36 * elk::fs_builder generates scalar code while elk::vec4_builder generates 37 * vector code. 38 */ 39 class vec4_builder { 40 public: 41 /** Type used in this IR to represent a source of an instruction. */ 42 typedef elk::src_reg src_reg; 43 44 /** Type used in this IR to represent the destination of an instruction. */ 45 typedef elk::dst_reg dst_reg; 46 47 /** Type used in this IR to represent an instruction. */ 48 typedef vec4_instruction instruction; 49 50 /** 51 * Construct a vec4_builder that inserts instructions into \p shader. 52 */ 53 vec4_builder(elk_backend_shader *shader, unsigned dispatch_width = 8) : shader(shader)54 shader(shader), block(NULL), cursor(NULL), 55 _dispatch_width(dispatch_width), _group(0), 56 force_writemask_all(false), 57 annotation() 58 { 59 } 60 61 /** 62 * Construct a vec4_builder that inserts instructions into \p shader 63 * before instruction \p inst in basic block \p block. The default 64 * execution controls and debug annotation are initialized from the 65 * instruction passed as argument. 66 */ vec4_builder(elk_backend_shader * shader,elk_bblock_t * block,instruction * inst)67 vec4_builder(elk_backend_shader *shader, elk_bblock_t *block, instruction *inst) : 68 shader(shader), block(block), cursor(inst), 69 _dispatch_width(inst->exec_size), _group(inst->group), 70 force_writemask_all(inst->force_writemask_all) 71 { 72 annotation.str = inst->annotation; 73 annotation.ir = inst->ir; 74 } 75 76 /** 77 * Construct a vec4_builder that inserts instructions before \p cursor 78 * in basic block \p block, inheriting other code generation parameters 79 * from this. 80 */ 81 vec4_builder at(elk_bblock_t * block,exec_node * cursor)82 at(elk_bblock_t *block, exec_node *cursor) const 83 { 84 vec4_builder bld = *this; 85 bld.block = block; 86 bld.cursor = cursor; 87 return bld; 88 } 89 90 /** 91 * Construct a vec4_builder appending instructions at the end of the 92 * instruction list of the shader, inheriting other code generation 93 * parameters from this. 94 */ 95 vec4_builder at_end()96 at_end() const 97 { 98 return at(NULL, (exec_node *)&shader->instructions.tail_sentinel); 99 } 100 101 /** 102 * Construct a builder specifying the default SIMD width and group of 103 * channel enable signals, inheriting other code generation parameters 104 * from this. 105 * 106 * \p n gives the default SIMD width, \p i gives the slot group used for 107 * predication and control flow masking in multiples of \p n channels. 108 */ 109 vec4_builder group(unsigned n,unsigned i)110 group(unsigned n, unsigned i) const 111 { 112 assert(force_writemask_all || 113 (n <= dispatch_width() && i < dispatch_width() / n)); 114 vec4_builder bld = *this; 115 bld._dispatch_width = n; 116 bld._group += i * n; 117 return bld; 118 } 119 120 /** 121 * Construct a builder with per-channel control flow execution masking 122 * disabled if \p b is true. If control flow execution masking is 123 * already disabled this has no effect. 124 */ 125 vec4_builder 126 exec_all(bool b = true) const 127 { 128 vec4_builder bld = *this; 129 if (b) 130 bld.force_writemask_all = true; 131 return bld; 132 } 133 134 /** 135 * Construct a builder with the given debug annotation info. 136 */ 137 vec4_builder 138 annotate(const char *str, const void *ir = NULL) const 139 { 140 vec4_builder bld = *this; 141 bld.annotation.str = str; 142 bld.annotation.ir = ir; 143 return bld; 144 } 145 146 /** 147 * Get the SIMD width in use. 148 */ 149 unsigned dispatch_width()150 dispatch_width() const 151 { 152 return _dispatch_width; 153 } 154 155 /** 156 * Get the channel group in use. 157 */ 158 unsigned group()159 group() const 160 { 161 return _group; 162 } 163 164 /** 165 * Allocate a virtual register of natural vector size (four for this IR) 166 * and SIMD width. \p n gives the amount of space to allocate in 167 * dispatch_width units (which is just enough space for four logical 168 * components in this IR). 169 */ 170 dst_reg 171 vgrf(enum elk_reg_type type, unsigned n = 1) const 172 { 173 assert(dispatch_width() <= 32); 174 175 if (n > 0) 176 return retype(dst_reg(VGRF, shader->alloc.allocate( 177 n * DIV_ROUND_UP(type_sz(type), 4))), 178 type); 179 else 180 return retype(null_reg_ud(), type); 181 } 182 183 /** 184 * Create a null register of floating type. 185 */ 186 dst_reg null_reg_f()187 null_reg_f() const 188 { 189 return dst_reg(retype(elk_null_vec(dispatch_width()), 190 ELK_REGISTER_TYPE_F)); 191 } 192 193 /** 194 * Create a null register of signed integer type. 195 */ 196 dst_reg null_reg_d()197 null_reg_d() const 198 { 199 return dst_reg(retype(elk_null_vec(dispatch_width()), 200 ELK_REGISTER_TYPE_D)); 201 } 202 203 /** 204 * Create a null register of unsigned integer type. 205 */ 206 dst_reg null_reg_ud()207 null_reg_ud() const 208 { 209 return dst_reg(retype(elk_null_vec(dispatch_width()), 210 ELK_REGISTER_TYPE_UD)); 211 } 212 213 /** 214 * Insert an instruction into the program. 215 */ 216 instruction * emit(const instruction & inst)217 emit(const instruction &inst) const 218 { 219 return emit(new(shader->mem_ctx) instruction(inst)); 220 } 221 222 /** 223 * Create and insert a nullary control instruction into the program. 224 */ 225 instruction * emit(enum elk_opcode opcode)226 emit(enum elk_opcode opcode) const 227 { 228 return emit(instruction(opcode)); 229 } 230 231 /** 232 * Create and insert a nullary instruction into the program. 233 */ 234 instruction * emit(enum elk_opcode opcode,const dst_reg & dst)235 emit(enum elk_opcode opcode, const dst_reg &dst) const 236 { 237 return emit(instruction(opcode, dst)); 238 } 239 240 /** 241 * Create and insert a unary instruction into the program. 242 */ 243 instruction * emit(enum elk_opcode opcode,const dst_reg & dst,const src_reg & src0)244 emit(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0) const 245 { 246 switch (opcode) { 247 case ELK_SHADER_OPCODE_RCP: 248 case ELK_SHADER_OPCODE_RSQ: 249 case ELK_SHADER_OPCODE_SQRT: 250 case ELK_SHADER_OPCODE_EXP2: 251 case ELK_SHADER_OPCODE_LOG2: 252 case ELK_SHADER_OPCODE_SIN: 253 case ELK_SHADER_OPCODE_COS: 254 return fix_math_instruction( 255 emit(instruction(opcode, dst, 256 fix_math_operand(src0)))); 257 258 default: 259 return emit(instruction(opcode, dst, src0)); 260 } 261 } 262 263 /** 264 * Create and insert a binary instruction into the program. 265 */ 266 instruction * emit(enum elk_opcode opcode,const dst_reg & dst,const src_reg & src0,const src_reg & src1)267 emit(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0, 268 const src_reg &src1) const 269 { 270 switch (opcode) { 271 case ELK_SHADER_OPCODE_POW: 272 case ELK_SHADER_OPCODE_INT_QUOTIENT: 273 case ELK_SHADER_OPCODE_INT_REMAINDER: 274 return fix_math_instruction( 275 emit(instruction(opcode, dst, 276 fix_math_operand(src0), 277 fix_math_operand(src1)))); 278 279 default: 280 return emit(instruction(opcode, dst, src0, src1)); 281 } 282 } 283 284 /** 285 * Create and insert a ternary instruction into the program. 286 */ 287 instruction * emit(enum elk_opcode opcode,const dst_reg & dst,const src_reg & src0,const src_reg & src1,const src_reg & src2)288 emit(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0, 289 const src_reg &src1, const src_reg &src2) const 290 { 291 switch (opcode) { 292 case ELK_OPCODE_BFE: 293 case ELK_OPCODE_BFI2: 294 case ELK_OPCODE_MAD: 295 case ELK_OPCODE_LRP: 296 return emit(instruction(opcode, dst, 297 fix_3src_operand(src0), 298 fix_3src_operand(src1), 299 fix_3src_operand(src2))); 300 301 default: 302 return emit(instruction(opcode, dst, src0, src1, src2)); 303 } 304 } 305 306 /** 307 * Insert a preallocated instruction into the program. 308 */ 309 instruction * emit(instruction * inst)310 emit(instruction *inst) const 311 { 312 inst->exec_size = dispatch_width(); 313 inst->group = group(); 314 inst->force_writemask_all = force_writemask_all; 315 inst->size_written = inst->exec_size * type_sz(inst->dst.type); 316 inst->annotation = annotation.str; 317 inst->ir = annotation.ir; 318 319 if (block) 320 static_cast<instruction *>(cursor)->insert_before(block, inst); 321 else 322 cursor->insert_before(inst); 323 324 return inst; 325 } 326 327 /** 328 * Select \p src0 if the comparison of both sources with the given 329 * conditional mod evaluates to true, otherwise select \p src1. 330 * 331 * Generally useful to get the minimum or maximum of two values. 332 */ 333 instruction * emit_minmax(const dst_reg & dst,const src_reg & src0,const src_reg & src1,elk_conditional_mod mod)334 emit_minmax(const dst_reg &dst, const src_reg &src0, 335 const src_reg &src1, elk_conditional_mod mod) const 336 { 337 assert(mod == ELK_CONDITIONAL_GE || mod == ELK_CONDITIONAL_L); 338 339 return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), 340 fix_unsigned_negate(src1))); 341 } 342 343 /** 344 * Copy any live channel from \p src to the first channel of the result. 345 */ 346 src_reg emit_uniformize(const src_reg & src)347 emit_uniformize(const src_reg &src) const 348 { 349 const vec4_builder ubld = exec_all(); 350 const dst_reg chan_index = 351 writemask(vgrf(ELK_REGISTER_TYPE_UD), WRITEMASK_X); 352 const dst_reg dst = vgrf(src.type); 353 354 ubld.emit(ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); 355 ubld.emit(ELK_SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index)); 356 357 return src_reg(dst); 358 } 359 360 /** 361 * Assorted arithmetic ops. 362 * @{ 363 */ 364 #define ALU1(op) \ 365 instruction * \ 366 op(const dst_reg &dst, const src_reg &src0) const \ 367 { \ 368 return emit(ELK_OPCODE_##op, dst, src0); \ 369 } 370 371 #define ALU2(op) \ 372 instruction * \ 373 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ 374 { \ 375 return emit(ELK_OPCODE_##op, dst, src0, src1); \ 376 } 377 378 #define ALU2_ACC(op) \ 379 instruction * \ 380 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \ 381 { \ 382 instruction *inst = emit(ELK_OPCODE_##op, dst, src0, src1); \ 383 inst->writes_accumulator = true; \ 384 return inst; \ 385 } 386 387 #define ALU3(op) \ 388 instruction * \ 389 op(const dst_reg &dst, const src_reg &src0, const src_reg &src1, \ 390 const src_reg &src2) const \ 391 { \ 392 return emit(ELK_OPCODE_##op, dst, src0, src1, src2); \ 393 } 394 395 ALU2(ADD) ALU2_ACC(ADDC)396 ALU2_ACC(ADDC) 397 ALU2(AND) 398 ALU2(ASR) 399 ALU2(AVG) 400 ALU3(BFE) 401 ALU2(BFI1) 402 ALU3(BFI2) 403 ALU1(BFREV) 404 ALU1(CBIT) 405 ALU3(CSEL) 406 ALU1(DIM) 407 ALU2(DP2) 408 ALU2(DP3) 409 ALU2(DP4) 410 ALU2(DPH) 411 ALU1(F16TO32) 412 ALU1(F32TO16) 413 ALU1(FBH) 414 ALU1(FBL) 415 ALU1(FRC) 416 ALU2(LINE) 417 ALU1(LZD) 418 ALU2(MAC) 419 ALU2_ACC(MACH) 420 ALU3(MAD) 421 ALU1(MOV) 422 ALU2(MUL) 423 ALU1(NOT) 424 ALU2(OR) 425 ALU2(PLN) 426 ALU1(RNDD) 427 ALU1(RNDE) 428 ALU1(RNDU) 429 ALU1(RNDZ) 430 ALU2(SAD2) 431 ALU2_ACC(SADA2) 432 ALU2(SEL) 433 ALU2(SHL) 434 ALU2(SHR) 435 ALU2_ACC(SUBB) 436 ALU2(XOR) 437 438 #undef ALU3 439 #undef ALU2_ACC 440 #undef ALU2 441 #undef ALU1 442 /** @} */ 443 444 /** 445 * CMP: Sets the low bit of the destination channels with the result 446 * of the comparison, while the upper bits are undefined, and updates 447 * the flag register with the packed 16 bits of the result. 448 */ 449 instruction * 450 CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1, 451 elk_conditional_mod condition) const 452 { 453 /* Take the instruction: 454 * 455 * CMP null<d> src0<f> src1<f> 456 * 457 * Original gfx4 does type conversion to the destination type 458 * before comparison, producing garbage results for floating 459 * point comparisons. 460 * 461 * The destination type doesn't matter on newer generations, 462 * so we set the type to match src0 so we can compact the 463 * instruction. 464 */ 465 return set_condmod(condition, 466 emit(ELK_OPCODE_CMP, retype(dst, src0.type), 467 fix_unsigned_negate(src0), 468 fix_unsigned_negate(src1))); 469 } 470 471 /** 472 * CMPN: Behaves like CMP, but produces true if src1 is NaN. 473 */ 474 instruction * CMPN(const dst_reg & dst,const src_reg & src0,const src_reg & src1,elk_conditional_mod condition)475 CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1, 476 elk_conditional_mod condition) const 477 { 478 /* Take the instruction: 479 * 480 * CMPN null<d> src0<f> src1<f> 481 * 482 * Original gfx4 does type conversion to the destination type 483 * before comparison, producing garbage results for floating 484 * point comparisons. 485 * 486 * The destination type doesn't matter on newer generations, 487 * so we set the type to match src0 so we can compact the 488 * instruction. 489 */ 490 return set_condmod(condition, 491 emit(ELK_OPCODE_CMPN, retype(dst, src0.type), 492 fix_unsigned_negate(src0), 493 fix_unsigned_negate(src1))); 494 } 495 496 /** 497 * Gfx4 predicated IF. 498 */ 499 instruction * IF(elk_predicate predicate)500 IF(elk_predicate predicate) const 501 { 502 return set_predicate(predicate, emit(ELK_OPCODE_IF)); 503 } 504 505 /** 506 * Gfx6 IF with embedded comparison. 507 */ 508 instruction * IF(const src_reg & src0,const src_reg & src1,elk_conditional_mod condition)509 IF(const src_reg &src0, const src_reg &src1, 510 elk_conditional_mod condition) const 511 { 512 assert(shader->devinfo->ver == 6); 513 return set_condmod(condition, 514 emit(ELK_OPCODE_IF, 515 null_reg_d(), 516 fix_unsigned_negate(src0), 517 fix_unsigned_negate(src1))); 518 } 519 520 /** 521 * Emit a linear interpolation instruction. 522 */ 523 instruction * LRP(const dst_reg & dst,const src_reg & x,const src_reg & y,const src_reg & a)524 LRP(const dst_reg &dst, const src_reg &x, const src_reg &y, 525 const src_reg &a) const 526 { 527 /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so 528 * we need to reorder the operands. 529 */ 530 assert(shader->devinfo->ver >= 6); 531 return emit(ELK_OPCODE_LRP, dst, a, y, x); 532 } 533 534 elk_backend_shader *shader; 535 536 protected: 537 /** 538 * Workaround for negation of UD registers. See comment in 539 * elk_fs_generator::generate_code() for the details. 540 */ 541 src_reg fix_unsigned_negate(const src_reg & src)542 fix_unsigned_negate(const src_reg &src) const 543 { 544 if (src.type == ELK_REGISTER_TYPE_UD && src.negate) { 545 dst_reg temp = vgrf(ELK_REGISTER_TYPE_UD); 546 MOV(temp, src); 547 return src_reg(temp); 548 } else { 549 return src; 550 } 551 } 552 553 /** 554 * Workaround for register access modes not supported by the ternary 555 * instruction encoding. 556 */ 557 src_reg fix_3src_operand(const src_reg & src)558 fix_3src_operand(const src_reg &src) const 559 { 560 /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be 561 * able to use vertical stride of zero to replicate the vec4 uniform, like 562 * 563 * g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7] 564 * 565 * But you can't, since vertical stride is always four in three-source 566 * instructions. Instead, insert a MOV instruction to do the replication so 567 * that the three-source instruction can consume it. 568 */ 569 570 /* The MOV is only needed if the source is a uniform or immediate. */ 571 if (src.file != UNIFORM && src.file != IMM) 572 return src; 573 574 if (src.file == UNIFORM && elk_is_single_value_swizzle(src.swizzle)) 575 return src; 576 577 const dst_reg expanded = vgrf(src.type); 578 emit(ELK_VEC4_OPCODE_UNPACK_UNIFORM, expanded, src); 579 return src_reg(expanded); 580 } 581 582 /** 583 * Workaround for register access modes not supported by the math 584 * instruction. 585 */ 586 src_reg fix_math_operand(const src_reg & src)587 fix_math_operand(const src_reg &src) const 588 { 589 /* The gfx6 math instruction ignores the source modifiers -- 590 * swizzle, abs, negate, and at least some parts of the register 591 * region description. 592 * 593 * Rather than trying to enumerate all these cases, *always* expand the 594 * operand to a temp GRF for gfx6. 595 * 596 * For gfx7, keep the operand as-is, except if immediate, which gfx7 still 597 * can't use. 598 */ 599 if (shader->devinfo->ver == 6 || 600 (shader->devinfo->ver == 7 && src.file == IMM)) { 601 const dst_reg tmp = vgrf(src.type); 602 MOV(tmp, src); 603 return src_reg(tmp); 604 } else { 605 return src; 606 } 607 } 608 609 /** 610 * Workaround other weirdness of the math instruction. 611 */ 612 instruction * fix_math_instruction(instruction * inst)613 fix_math_instruction(instruction *inst) const 614 { 615 if (shader->devinfo->ver == 6 && 616 inst->dst.writemask != WRITEMASK_XYZW) { 617 const dst_reg tmp = vgrf(inst->dst.type); 618 MOV(inst->dst, src_reg(tmp)); 619 inst->dst = tmp; 620 621 } else if (shader->devinfo->ver < 6) { 622 const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2); 623 inst->base_mrf = 1; 624 inst->mlen = sources; 625 } 626 627 return inst; 628 } 629 630 elk_bblock_t *block; 631 exec_node *cursor; 632 633 unsigned _dispatch_width; 634 unsigned _group; 635 bool force_writemask_all; 636 637 /** Debug annotation info. */ 638 struct { 639 const char *str; 640 const void *ir; 641 } annotation; 642 }; 643 } 644