1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #ifndef __NV50_IR_H__ 24 #define __NV50_IR_H__ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <stdint.h> 29 #include <deque> 30 #include <list> 31 #include <vector> 32 33 #include "codegen/unordered_set.h" 34 #include "codegen/nv50_ir_util.h" 35 #include "codegen/nv50_ir_graph.h" 36 37 #include "codegen/nv50_ir_driver.h" 38 39 namespace nv50_ir { 40 41 enum operation 42 { 43 OP_NOP = 0, 44 OP_PHI, 45 OP_UNION, // unify a new definition and several source values 46 OP_SPLIT, // $r0d -> { $r0, $r1 } ($r0d and $r0/$r1 will be coalesced) 47 OP_MERGE, // opposite of split, e.g. combine 2 32 bit into a 64 bit value 48 OP_CONSTRAINT, // copy values into consecutive registers 49 OP_MOV, // simple copy, no modifiers allowed 50 OP_LOAD, 51 OP_STORE, 52 OP_ADD, // NOTE: add u64 + u32 is legal for targets w/o 64-bit integer adds 53 OP_SUB, 54 OP_MUL, 55 OP_DIV, 56 OP_MOD, 57 OP_MAD, 58 OP_FMA, 59 OP_SAD, // abs(src0 - src1) + src2 60 OP_SHLADD, 61 OP_ABS, 62 OP_NEG, 63 OP_NOT, 64 OP_AND, 65 OP_OR, 66 OP_XOR, 67 OP_SHL, 68 OP_SHR, 69 OP_MAX, 70 OP_MIN, 71 OP_SAT, // CLAMP(f32, 0.0, 1.0) 72 OP_CEIL, 73 OP_FLOOR, 74 OP_TRUNC, 75 OP_CVT, 76 OP_SET_AND, // dst = (src0 CMP src1) & src2 77 OP_SET_OR, 78 OP_SET_XOR, 79 OP_SET, 80 OP_SELP, // dst = src2 ? src0 : src1 81 OP_SLCT, // dst = (src2 CMP 0) ? src0 : src1 82 OP_RCP, 83 OP_RSQ, 84 OP_LG2, 85 OP_SIN, 86 OP_COS, 87 OP_EX2, 88 OP_EXP, // exponential (base M_E) 89 OP_LOG, // natural logarithm 90 OP_PRESIN, 91 OP_PREEX2, 92 OP_SQRT, 93 OP_POW, 94 OP_BRA, 95 OP_CALL, 96 OP_RET, 97 OP_CONT, 98 OP_BREAK, 99 OP_PRERET, 100 OP_PRECONT, 101 OP_PREBREAK, 102 OP_BRKPT, // breakpoint (not related to loops) 103 OP_JOINAT, // push control flow convergence point 104 OP_JOIN, // converge 105 OP_DISCARD, 106 OP_EXIT, 107 OP_MEMBAR, // memory barrier (mfence, lfence, sfence) 108 OP_VFETCH, // indirection 0 in attribute space, indirection 1 is vertex base 109 OP_PFETCH, // fetch base address of vertex src0 (immediate) [+ src1] 110 OP_AFETCH, // fetch base address of shader input (a[%r1+0x10]) 111 OP_EXPORT, 112 OP_LINTERP, 113 OP_PINTERP, 114 OP_EMIT, // emit vertex 115 OP_RESTART, // restart primitive 116 OP_TEX, 117 OP_TXB, // texture bias 118 OP_TXL, // texure lod 119 OP_TXF, // texel fetch 120 OP_TXQ, // texture size query 121 OP_TXD, // texture derivatives 122 OP_TXG, // texture gather 123 OP_TXLQ, // texture query lod 124 OP_TEXCSAA, // texture op for coverage sampling 125 OP_TEXPREP, // turn cube map array into 2d array coordinates 126 OP_SULDB, // surface load (raw) 127 OP_SULDP, // surface load (formatted) 128 OP_SUSTB, // surface store (raw) 129 OP_SUSTP, // surface store (formatted) 130 OP_SUREDB, 131 OP_SUREDP, // surface reduction (atomic op) 132 OP_SULEA, // surface load effective address 133 OP_SUBFM, // surface bitfield manipulation 134 OP_SUCLAMP, // clamp surface coordinates 135 OP_SUEAU, // surface effective address 136 OP_SUQ, // surface query 137 OP_MADSP, // special integer multiply-add 138 OP_TEXBAR, // texture dependency barrier 139 OP_DFDX, 140 OP_DFDY, 141 OP_RDSV, // read system value 142 OP_WRSV, // write system value 143 OP_PIXLD, // get info about raster object or surfaces 144 OP_QUADOP, 145 OP_QUADON, 146 OP_QUADPOP, 147 OP_POPCNT, // bitcount(src0 & src1) 148 OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] 149 OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK 150 OP_BFIND, // find highest/lowest set bit 151 OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) 152 OP_ATOM, 153 OP_BAR, // execution barrier, sources = { id, thread count, predicate } 154 OP_VADD, // byte/word vector operations 155 OP_VAVG, 156 OP_VMIN, 157 OP_VMAX, 158 OP_VSAD, 159 OP_VSET, 160 OP_VSHR, 161 OP_VSHL, 162 OP_VSEL, 163 OP_CCTL, // cache control 164 OP_SHFL, // warp shuffle 165 OP_VOTE, 166 OP_BUFQ, // buffer query 167 OP_LAST 168 }; 169 170 // various instruction-specific modifier definitions Instruction::subOp 171 // MOV_FINAL marks a MOV originating from an EXPORT (used for placing TEXBARs) 172 #define NV50_IR_SUBOP_MUL_HIGH 1 173 #define NV50_IR_SUBOP_EMIT_RESTART 1 174 #define NV50_IR_SUBOP_LDC_IL 1 175 #define NV50_IR_SUBOP_LDC_IS 2 176 #define NV50_IR_SUBOP_LDC_ISL 3 177 #define NV50_IR_SUBOP_SHIFT_WRAP 1 178 #define NV50_IR_SUBOP_EMU_PRERET 1 179 #define NV50_IR_SUBOP_TEXBAR(n) n 180 #define NV50_IR_SUBOP_MOV_FINAL 1 181 #define NV50_IR_SUBOP_EXTBF_REV 1 182 #define NV50_IR_SUBOP_BFIND_SAMT 1 183 #define NV50_IR_SUBOP_RCPRSQ_64H 1 184 #define NV50_IR_SUBOP_PERMT_F4E 1 185 #define NV50_IR_SUBOP_PERMT_B4E 2 186 #define NV50_IR_SUBOP_PERMT_RC8 3 187 #define NV50_IR_SUBOP_PERMT_ECL 4 188 #define NV50_IR_SUBOP_PERMT_ECR 5 189 #define NV50_IR_SUBOP_PERMT_RC16 6 190 #define NV50_IR_SUBOP_BAR_SYNC 0 191 #define NV50_IR_SUBOP_BAR_ARRIVE 1 192 #define NV50_IR_SUBOP_BAR_RED_AND 2 193 #define NV50_IR_SUBOP_BAR_RED_OR 3 194 #define NV50_IR_SUBOP_BAR_RED_POPC 4 195 #define NV50_IR_SUBOP_MEMBAR_L 1 196 #define NV50_IR_SUBOP_MEMBAR_S 2 197 #define NV50_IR_SUBOP_MEMBAR_M 3 198 #define NV50_IR_SUBOP_MEMBAR_CTA (0 << 2) 199 #define NV50_IR_SUBOP_MEMBAR_GL (1 << 2) 200 #define NV50_IR_SUBOP_MEMBAR_SYS (2 << 2) 201 #define NV50_IR_SUBOP_MEMBAR_DIR(m) ((m) & 0x3) 202 #define NV50_IR_SUBOP_MEMBAR_SCOPE(m) ((m) & ~0x3) 203 #define NV50_IR_SUBOP_MEMBAR(d,s) \ 204 (NV50_IR_SUBOP_MEMBAR_##d | NV50_IR_SUBOP_MEMBAR_##s) 205 #define NV50_IR_SUBOP_ATOM_ADD 0 206 #define NV50_IR_SUBOP_ATOM_MIN 1 207 #define NV50_IR_SUBOP_ATOM_MAX 2 208 #define NV50_IR_SUBOP_ATOM_INC 3 209 #define NV50_IR_SUBOP_ATOM_DEC 4 210 #define NV50_IR_SUBOP_ATOM_AND 5 211 #define NV50_IR_SUBOP_ATOM_OR 6 212 #define NV50_IR_SUBOP_ATOM_XOR 7 213 #define NV50_IR_SUBOP_ATOM_CAS 8 214 #define NV50_IR_SUBOP_ATOM_EXCH 9 215 #define NV50_IR_SUBOP_CCTL_IV 5 216 #define NV50_IR_SUBOP_CCTL_IVALL 6 217 #define NV50_IR_SUBOP_SUST_IGN 0 218 #define NV50_IR_SUBOP_SUST_TRAP 1 219 #define NV50_IR_SUBOP_SUST_SDCL 3 220 #define NV50_IR_SUBOP_SULD_ZERO 0 221 #define NV50_IR_SUBOP_SULD_TRAP 1 222 #define NV50_IR_SUBOP_SULD_SDCL 3 223 #define NV50_IR_SUBOP_SUBFM_3D 1 224 #define NV50_IR_SUBOP_SUCLAMP_2D 0x10 225 #define NV50_IR_SUBOP_SUCLAMP_SD(r, d) (( 0 + (r)) | ((d == 2) ? 0x10 : 0)) 226 #define NV50_IR_SUBOP_SUCLAMP_PL(r, d) (( 5 + (r)) | ((d == 2) ? 0x10 : 0)) 227 #define NV50_IR_SUBOP_SUCLAMP_BL(r, d) ((10 + (r)) | ((d == 2) ? 0x10 : 0)) 228 #define NV50_IR_SUBOP_PIXLD_COUNT 0 229 #define NV50_IR_SUBOP_PIXLD_COVMASK 1 230 #define NV50_IR_SUBOP_PIXLD_COVERED 2 231 #define NV50_IR_SUBOP_PIXLD_OFFSET 3 232 #define NV50_IR_SUBOP_PIXLD_CENT_OFFSET 4 233 #define NV50_IR_SUBOP_PIXLD_SAMPLEID 5 234 #define NV50_IR_SUBOP_SHFL_IDX 0 235 #define NV50_IR_SUBOP_SHFL_UP 1 236 #define NV50_IR_SUBOP_SHFL_DOWN 2 237 #define NV50_IR_SUBOP_SHFL_BFLY 3 238 #define NV50_IR_SUBOP_LOAD_LOCKED 1 239 #define NV50_IR_SUBOP_STORE_UNLOCKED 2 240 #define NV50_IR_SUBOP_MADSP_SD 0xffff 241 // Yes, we could represent those with DataType. 242 // Or put the type into operation and have a couple 1000 values in that enum. 243 // This will have to do for now. 244 // The bitfields are supposed to correspond to nve4 ISA. 245 #define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a)) 246 #define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000) 247 #define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000) 248 #define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000) 249 #define NV50_IR_SUBOP_Vn(n) ((n) >> 14) 250 #define NV50_IR_SUBOP_VOTE_ALL 0 251 #define NV50_IR_SUBOP_VOTE_ANY 1 252 #define NV50_IR_SUBOP_VOTE_UNI 2 253 254 enum DataType 255 { 256 TYPE_NONE, 257 TYPE_U8, 258 TYPE_S8, 259 TYPE_U16, 260 TYPE_S16, 261 TYPE_U32, 262 TYPE_S32, 263 TYPE_U64, // 64 bit operations are only lowered after register allocation 264 TYPE_S64, 265 TYPE_F16, 266 TYPE_F32, 267 TYPE_F64, 268 TYPE_B96, 269 TYPE_B128 270 }; 271 272 enum CondCode 273 { 274 CC_FL = 0, 275 CC_NEVER = CC_FL, // when used with FILE_FLAGS 276 CC_LT = 1, 277 CC_EQ = 2, 278 CC_NOT_P = CC_EQ, // when used with FILE_PREDICATE 279 CC_LE = 3, 280 CC_GT = 4, 281 CC_NE = 5, 282 CC_P = CC_NE, 283 CC_GE = 6, 284 CC_TR = 7, 285 CC_ALWAYS = CC_TR, 286 CC_U = 8, 287 CC_LTU = 9, 288 CC_EQU = 10, 289 CC_LEU = 11, 290 CC_GTU = 12, 291 CC_NEU = 13, 292 CC_GEU = 14, 293 CC_NO = 0x10, 294 CC_NC = 0x11, 295 CC_NS = 0x12, 296 CC_NA = 0x13, 297 CC_A = 0x14, 298 CC_S = 0x15, 299 CC_C = 0x16, 300 CC_O = 0x17 301 }; 302 303 enum RoundMode 304 { 305 ROUND_N, // nearest 306 ROUND_M, // towards -inf 307 ROUND_Z, // towards 0 308 ROUND_P, // towards +inf 309 ROUND_NI, // nearest integer 310 ROUND_MI, // to integer towards -inf 311 ROUND_ZI, // to integer towards 0 312 ROUND_PI, // to integer towards +inf 313 }; 314 315 enum CacheMode 316 { 317 CACHE_CA, // cache at all levels 318 CACHE_WB = CACHE_CA, // cache write back 319 CACHE_CG, // cache at global level 320 CACHE_CS, // cache streaming 321 CACHE_CV, // cache as volatile 322 CACHE_WT = CACHE_CV // cache write-through 323 }; 324 325 enum DataFile 326 { 327 FILE_NULL = 0, 328 FILE_GPR, 329 FILE_PREDICATE, // boolean predicate 330 FILE_FLAGS, // zero/sign/carry/overflow bits 331 FILE_ADDRESS, 332 LAST_REGISTER_FILE = FILE_ADDRESS, 333 FILE_IMMEDIATE, 334 FILE_MEMORY_CONST, 335 FILE_SHADER_INPUT, 336 FILE_SHADER_OUTPUT, 337 FILE_MEMORY_BUFFER, 338 FILE_MEMORY_GLOBAL, 339 FILE_MEMORY_SHARED, 340 FILE_MEMORY_LOCAL, 341 FILE_SYSTEM_VALUE, 342 DATA_FILE_COUNT 343 }; 344 345 enum TexTarget 346 { 347 TEX_TARGET_1D, 348 TEX_TARGET_2D, 349 TEX_TARGET_2D_MS, 350 TEX_TARGET_3D, 351 TEX_TARGET_CUBE, 352 TEX_TARGET_1D_SHADOW, 353 TEX_TARGET_2D_SHADOW, 354 TEX_TARGET_CUBE_SHADOW, 355 TEX_TARGET_1D_ARRAY, 356 TEX_TARGET_2D_ARRAY, 357 TEX_TARGET_2D_MS_ARRAY, 358 TEX_TARGET_CUBE_ARRAY, 359 TEX_TARGET_1D_ARRAY_SHADOW, 360 TEX_TARGET_2D_ARRAY_SHADOW, 361 TEX_TARGET_RECT, 362 TEX_TARGET_RECT_SHADOW, 363 TEX_TARGET_CUBE_ARRAY_SHADOW, 364 TEX_TARGET_BUFFER, 365 TEX_TARGET_COUNT 366 }; 367 368 enum ImgFormat 369 { 370 FMT_NONE, 371 372 FMT_RGBA32F, 373 FMT_RGBA16F, 374 FMT_RG32F, 375 FMT_RG16F, 376 FMT_R11G11B10F, 377 FMT_R32F, 378 FMT_R16F, 379 380 FMT_RGBA32UI, 381 FMT_RGBA16UI, 382 FMT_RGB10A2UI, 383 FMT_RGBA8UI, 384 FMT_RG32UI, 385 FMT_RG16UI, 386 FMT_RG8UI, 387 FMT_R32UI, 388 FMT_R16UI, 389 FMT_R8UI, 390 391 FMT_RGBA32I, 392 FMT_RGBA16I, 393 FMT_RGBA8I, 394 FMT_RG32I, 395 FMT_RG16I, 396 FMT_RG8I, 397 FMT_R32I, 398 FMT_R16I, 399 FMT_R8I, 400 401 FMT_RGBA16, 402 FMT_RGB10A2, 403 FMT_RGBA8, 404 FMT_RG16, 405 FMT_RG8, 406 FMT_R16, 407 FMT_R8, 408 409 FMT_RGBA16_SNORM, 410 FMT_RGBA8_SNORM, 411 FMT_RG16_SNORM, 412 FMT_RG8_SNORM, 413 FMT_R16_SNORM, 414 FMT_R8_SNORM, 415 416 FMT_BGRA8, 417 418 IMG_FORMAT_COUNT, 419 }; 420 421 enum ImgType { 422 UINT, 423 SINT, 424 UNORM, 425 SNORM, 426 FLOAT, 427 }; 428 429 enum SVSemantic 430 { 431 SV_POSITION, // WPOS 432 SV_VERTEX_ID, 433 SV_INSTANCE_ID, 434 SV_INVOCATION_ID, 435 SV_PRIMITIVE_ID, 436 SV_VERTEX_COUNT, // gl_PatchVerticesIn 437 SV_LAYER, 438 SV_VIEWPORT_INDEX, 439 SV_YDIR, 440 SV_FACE, 441 SV_POINT_SIZE, 442 SV_POINT_COORD, 443 SV_CLIP_DISTANCE, 444 SV_SAMPLE_INDEX, 445 SV_SAMPLE_POS, 446 SV_SAMPLE_MASK, 447 SV_TESS_OUTER, 448 SV_TESS_INNER, 449 SV_TESS_COORD, 450 SV_TID, 451 SV_CTAID, 452 SV_NTID, 453 SV_GRIDID, 454 SV_NCTAID, 455 SV_LANEID, 456 SV_PHYSID, 457 SV_NPHYSID, 458 SV_CLOCK, 459 SV_LBASE, 460 SV_SBASE, 461 SV_VERTEX_STRIDE, 462 SV_INVOCATION_INFO, 463 SV_THREAD_KILL, 464 SV_BASEVERTEX, 465 SV_BASEINSTANCE, 466 SV_DRAWID, 467 SV_WORK_DIM, 468 SV_UNDEFINED, 469 SV_LAST 470 }; 471 472 class Program; 473 class Function; 474 class BasicBlock; 475 476 class Target; 477 478 class Instruction; 479 class CmpInstruction; 480 class TexInstruction; 481 class FlowInstruction; 482 483 class Value; 484 class LValue; 485 class Symbol; 486 class ImmediateValue; 487 488 struct Storage 489 { 490 DataFile file; 491 int8_t fileIndex; // signed, may be indirect for CONST[] 492 uint8_t size; // this should match the Instruction type's size 493 DataType type; // mainly for pretty printing 494 union { 495 uint64_t u64; // immediate values 496 uint32_t u32; 497 uint16_t u16; 498 uint8_t u8; 499 int64_t s64; 500 int32_t s32; 501 int16_t s16; 502 int8_t s8; 503 float f32; 504 double f64; 505 int32_t offset; // offset from 0 (base of address space) 506 int32_t id; // register id (< 0 if virtual/unassigned, in units <= 4) 507 struct { 508 SVSemantic sv; 509 int index; 510 } sv; 511 } data; 512 }; 513 514 // precedence: NOT after SAT after NEG after ABS 515 #define NV50_IR_MOD_ABS (1 << 0) 516 #define NV50_IR_MOD_NEG (1 << 1) 517 #define NV50_IR_MOD_SAT (1 << 2) 518 #define NV50_IR_MOD_NOT (1 << 3) 519 #define NV50_IR_MOD_NEG_ABS (NV50_IR_MOD_NEG | NV50_IR_MOD_ABS) 520 521 #define NV50_IR_INTERP_MODE_MASK 0x3 522 #define NV50_IR_INTERP_LINEAR (0 << 0) 523 #define NV50_IR_INTERP_PERSPECTIVE (1 << 0) 524 #define NV50_IR_INTERP_FLAT (2 << 0) 525 #define NV50_IR_INTERP_SC (3 << 0) // what exactly is that ? 526 #define NV50_IR_INTERP_SAMPLE_MASK 0xc 527 #define NV50_IR_INTERP_DEFAULT (0 << 2) 528 #define NV50_IR_INTERP_CENTROID (1 << 2) 529 #define NV50_IR_INTERP_OFFSET (2 << 2) 530 #define NV50_IR_INTERP_SAMPLEID (3 << 2) 531 532 // do we really want this to be a class ? 533 class Modifier 534 { 535 public: Modifier()536 Modifier() : bits(0) { } Modifier(unsigned int m)537 Modifier(unsigned int m) : bits(m) { } 538 Modifier(operation op); 539 540 // @return new Modifier applying a after b (asserts if unrepresentable) 541 Modifier operator*(const Modifier) const; 542 Modifier operator*=(const Modifier m) { *this = *this * m; return *this; } 543 Modifier operator==(const Modifier m) const { return m.bits == bits; } 544 Modifier operator!=(const Modifier m) const { return m.bits != bits; } 545 546 inline Modifier operator&(const Modifier m) const { return bits & m.bits; } 547 inline Modifier operator|(const Modifier m) const { return bits | m.bits; } 548 inline Modifier operator^(const Modifier m) const { return bits ^ m.bits; } 549 550 operation getOp() const; 551 neg()552 inline int neg() const { return (bits & NV50_IR_MOD_NEG) ? 1 : 0; } abs()553 inline int abs() const { return (bits & NV50_IR_MOD_ABS) ? 1 : 0; } 554 555 inline operator bool() const { return bits ? true : false; } 556 557 void applyTo(ImmediateValue &imm) const; 558 559 int print(char *buf, size_t size) const; 560 561 private: 562 uint8_t bits; 563 }; 564 565 class ValueRef 566 { 567 public: 568 ValueRef(Value * = NULL); 569 ValueRef(const ValueRef&); 570 ~ValueRef(); 571 exists()572 inline bool exists() const { return value != NULL; } 573 574 void set(Value *); 575 void set(const ValueRef&); get()576 inline Value *get() const { return value; } 577 inline Value *rep() const; 578 getInsn()579 inline Instruction *getInsn() const { return insn; } setInsn(Instruction * inst)580 inline void setInsn(Instruction *inst) { insn = inst; } 581 isIndirect(int dim)582 inline bool isIndirect(int dim) const { return indirect[dim] >= 0; } 583 inline const ValueRef *getIndirect(int dim) const; 584 585 inline DataFile getFile() const; 586 inline unsigned getSize() const; 587 588 // SSA: return eventual (traverse MOVs) literal value, if it exists 589 bool getImmediate(ImmediateValue&) const; 590 591 public: 592 Modifier mod; 593 int8_t indirect[2]; // >= 0 if relative to lvalue in insn->src(indirect[i]) 594 uint8_t swizzle; 595 596 bool usedAsPtr; // for printing 597 598 private: 599 Value *value; 600 Instruction *insn; 601 }; 602 603 class ValueDef 604 { 605 public: 606 ValueDef(Value * = NULL); 607 ValueDef(const ValueDef&); 608 ~ValueDef(); 609 exists()610 inline bool exists() const { return value != NULL; } 611 get()612 inline Value *get() const { return value; } 613 inline Value *rep() const; 614 void set(Value *); 615 bool mayReplace(const ValueRef &); 616 void replace(const ValueRef &, bool doSet); // replace all uses of the old value 617 getInsn()618 inline Instruction *getInsn() const { return insn; } setInsn(Instruction * inst)619 inline void setInsn(Instruction *inst) { insn = inst; } 620 621 inline DataFile getFile() const; 622 inline unsigned getSize() const; 623 624 inline void setSSA(LValue *); 625 inline const LValue *preSSA() const; 626 627 private: 628 Value *value; // should make this LValue * ... 629 LValue *origin; // pre SSA value 630 Instruction *insn; 631 }; 632 633 class Value 634 { 635 public: 636 Value(); ~Value()637 virtual ~Value() { } 638 639 virtual Value *clone(ClonePolicy<Function>&) const = 0; 640 641 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const = 0; 642 643 virtual bool equals(const Value *, bool strict = false) const; 644 virtual bool interfers(const Value *) const; isUniform()645 virtual bool isUniform() const { return true; } 646 rep()647 inline Value *rep() const { return join; } 648 649 inline Instruction *getUniqueInsn() const; 650 inline Instruction *getInsn() const; // use when uniqueness is certain 651 refCount()652 inline int refCount() { return uses.size(); } 653 654 inline LValue *asLValue(); 655 inline Symbol *asSym(); 656 inline ImmediateValue *asImm(); 657 inline const Symbol *asSym() const; 658 inline const ImmediateValue *asImm() const; 659 inFile(DataFile f)660 inline bool inFile(DataFile f) { return reg.file == f; } 661 662 static inline Value *get(Iterator&); 663 664 unordered_set<ValueRef *> uses; 665 std::list<ValueDef *> defs; 666 typedef unordered_set<ValueRef *>::iterator UseIterator; 667 typedef unordered_set<ValueRef *>::const_iterator UseCIterator; 668 typedef std::list<ValueDef *>::iterator DefIterator; 669 typedef std::list<ValueDef *>::const_iterator DefCIterator; 670 671 int id; 672 Storage reg; 673 674 // TODO: these should be in LValue: 675 Interval livei; 676 Value *join; 677 }; 678 679 class LValue : public Value 680 { 681 public: 682 LValue(Function *, DataFile file); 683 LValue(Function *, LValue *); ~LValue()684 ~LValue() { } 685 686 virtual bool isUniform() const; 687 688 virtual LValue *clone(ClonePolicy<Function>&) const; 689 690 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; 691 692 public: 693 unsigned compMask : 8; // compound/component mask 694 unsigned compound : 1; // used by RA, value involved in split/merge 695 unsigned ssa : 1; 696 unsigned fixedReg : 1; // set & used by RA, earlier just use (id < 0) 697 unsigned noSpill : 1; // do not spill (e.g. if spill temporary already) 698 }; 699 700 class Symbol : public Value 701 { 702 public: 703 Symbol(Program *, DataFile file = FILE_MEMORY_CONST, ubyte fileIdx = 0); ~Symbol()704 ~Symbol() { } 705 706 virtual Symbol *clone(ClonePolicy<Function>&) const; 707 708 virtual bool equals(const Value *that, bool strict) const; 709 710 virtual bool isUniform() const; 711 712 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; 713 714 // print with indirect values 715 int print(char *, size_t, Value *, Value *, DataType ty = TYPE_NONE) const; 716 717 inline void setFile(DataFile file, ubyte fileIndex = 0) 718 { 719 reg.file = file; 720 reg.fileIndex = fileIndex; 721 } 722 723 inline void setOffset(int32_t offset); 724 inline void setAddress(Symbol *base, int32_t offset); 725 inline void setSV(SVSemantic sv, uint32_t idx = 0); 726 getBase()727 inline const Symbol *getBase() const { return baseSym; } 728 729 private: 730 Symbol *baseSym; // array base for Symbols representing array elements 731 }; 732 733 class ImmediateValue : public Value 734 { 735 public: ImmediateValue()736 ImmediateValue() { } 737 ImmediateValue(Program *, uint32_t); 738 ImmediateValue(Program *, float); 739 ImmediateValue(Program *, double); 740 // NOTE: not added to program with 741 ImmediateValue(const ImmediateValue *, DataType ty); ~ImmediateValue()742 ~ImmediateValue() { }; 743 744 virtual ImmediateValue *clone(ClonePolicy<Function>&) const; 745 746 virtual bool equals(const Value *that, bool strict) const; 747 748 // these only work if 'type' is valid (we mostly use untyped literals): 749 bool isInteger(const int ival) const; // ival is cast to this' type 750 bool isNegative() const; 751 bool isPow2() const; 752 753 void applyLog2(); 754 755 // for constant folding: 756 ImmediateValue operator+(const ImmediateValue&) const; 757 ImmediateValue operator-(const ImmediateValue&) const; 758 ImmediateValue operator*(const ImmediateValue&) const; 759 ImmediateValue operator/(const ImmediateValue&) const; 760 761 ImmediateValue& operator=(const ImmediateValue&); // only sets value ! 762 763 bool compare(CondCode cc, float fval) const; 764 765 virtual int print(char *, size_t, DataType ty = TYPE_NONE) const; 766 }; 767 768 class Instruction 769 { 770 public: 771 Instruction(); 772 Instruction(Function *, operation, DataType); 773 virtual ~Instruction(); 774 775 virtual Instruction *clone(ClonePolicy<Function>&, 776 Instruction * = NULL) const; 777 778 void setDef(int i, Value *); 779 void setSrc(int s, Value *); 780 void setSrc(int s, const ValueRef&); 781 void swapSources(int a, int b); 782 void moveSources(int s, int delta); 783 bool setIndirect(int s, int dim, Value *); 784 src(int s)785 inline ValueRef& src(int s) { return srcs[s]; } def(int s)786 inline ValueDef& def(int s) { return defs[s]; } src(int s)787 inline const ValueRef& src(int s) const { return srcs[s]; } def(int s)788 inline const ValueDef& def(int s) const { return defs[s]; } 789 getDef(int d)790 inline Value *getDef(int d) const { return defs[d].get(); } getSrc(int s)791 inline Value *getSrc(int s) const { return srcs[s].get(); } 792 inline Value *getIndirect(int s, int dim) const; 793 defExists(unsigned d)794 inline bool defExists(unsigned d) const 795 { 796 return d < defs.size() && defs[d].exists(); 797 } srcExists(unsigned s)798 inline bool srcExists(unsigned s) const 799 { 800 return s < srcs.size() && srcs[s].exists(); 801 } 802 803 inline bool constrainedDefs() const; 804 805 bool setPredicate(CondCode ccode, Value *); 806 inline Value *getPredicate() const; 807 bool writesPredicate() const; isPredicated()808 inline bool isPredicated() const { return predSrc >= 0; } 809 810 inline void setFlagsSrc(int s, Value *); 811 inline void setFlagsDef(int d, Value *); usesFlags()812 inline bool usesFlags() const { return flagsSrc >= 0; } 813 defCount()814 unsigned int defCount() const { return defs.size(); }; 815 unsigned int defCount(unsigned int mask, bool singleFile = false) const; srcCount()816 unsigned int srcCount() const { return srcs.size(); }; 817 unsigned int srcCount(unsigned int mask, bool singleFile = false) const; 818 819 // save & remove / set indirect[0,1] and predicate source 820 void takeExtraSources(int s, Value *[3]); 821 void putExtraSources(int s, Value *[3]); 822 setType(DataType type)823 inline void setType(DataType type) { dType = sType = type; } 824 setType(DataType dtype,DataType stype)825 inline void setType(DataType dtype, DataType stype) 826 { 827 dType = dtype; 828 sType = stype; 829 } 830 isPseudo()831 inline bool isPseudo() const { return op < OP_MOV; } 832 bool isDead() const; 833 bool isNop() const; 834 bool isCommutationLegal(const Instruction *) const; // must be adjacent ! 835 bool isActionEqual(const Instruction *) const; 836 bool isResultEqual(const Instruction *) const; 837 838 // check whether the defs interfere with srcs and defs of another instruction 839 bool canCommuteDefDef(const Instruction *) const; 840 bool canCommuteDefSrc(const Instruction *) const; 841 842 void print() const; 843 844 inline CmpInstruction *asCmp(); 845 inline TexInstruction *asTex(); 846 inline FlowInstruction *asFlow(); 847 inline const TexInstruction *asTex() const; 848 inline const CmpInstruction *asCmp() const; 849 inline const FlowInstruction *asFlow() const; 850 851 public: 852 Instruction *next; 853 Instruction *prev; 854 int id; 855 int serial; // CFG order 856 857 operation op; 858 DataType dType; // destination or defining type 859 DataType sType; // source or secondary type 860 CondCode cc; 861 RoundMode rnd; 862 CacheMode cache; 863 864 uint16_t subOp; // quadop, 1 for mul-high, etc. 865 866 unsigned encSize : 4; // encoding size in bytes 867 unsigned saturate : 1; // to [0.0f, 1.0f] 868 unsigned join : 1; // converge control flow (use OP_JOIN until end) 869 unsigned fixed : 1; // prevent dead code elimination 870 unsigned terminator : 1; // end of basic block 871 unsigned ftz : 1; // flush denormal to zero 872 unsigned dnz : 1; // denormals, NaN are zero 873 unsigned ipa : 4; // interpolation mode 874 unsigned lanes : 4; 875 unsigned perPatch : 1; 876 unsigned exit : 1; // terminate program after insn 877 unsigned mask : 4; // for vector ops 878 879 int8_t postFactor; // MUL/DIV(if < 0) by 1 << postFactor 880 881 int8_t predSrc; 882 int8_t flagsDef; 883 int8_t flagsSrc; 884 885 uint32_t sched; // scheduling data (NOTE: maybe move to separate storage) 886 887 BasicBlock *bb; 888 889 protected: 890 std::deque<ValueDef> defs; // no gaps ! 891 std::deque<ValueRef> srcs; // no gaps ! 892 893 // instruction specific methods: 894 // (don't want to subclass, would need more constructors and memory pools) 895 public: setInterpolate(unsigned int mode)896 inline void setInterpolate(unsigned int mode) { ipa = mode; } 897 getInterpMode()898 unsigned int getInterpMode() const { return ipa & 0x3; } getSampleMode()899 unsigned int getSampleMode() const { return ipa & 0xc; } 900 901 private: 902 void init(); 903 }; 904 905 enum TexQuery 906 { 907 TXQ_DIMS, /* x, y, z, levels */ 908 TXQ_TYPE, /* ?, ?, samples, ? */ 909 TXQ_SAMPLE_POSITION, 910 TXQ_FILTER, 911 TXQ_LOD, 912 TXQ_WRAP, 913 TXQ_BORDER_COLOUR 914 }; 915 916 class TexInstruction : public Instruction 917 { 918 public: 919 class Target 920 { 921 public: target(targ)922 Target(TexTarget targ = TEX_TARGET_2D) : target(targ) { } 923 getName()924 const char *getName() const { return descTable[target].name; } getArgCount()925 unsigned int getArgCount() const { return descTable[target].argc; } getDim()926 unsigned int getDim() const { return descTable[target].dim; } isArray()927 int isArray() const { return descTable[target].array ? 1 : 0; } isCube()928 int isCube() const { return descTable[target].cube ? 1 : 0; } isShadow()929 int isShadow() const { return descTable[target].shadow ? 1 : 0; } isMS()930 int isMS() const { 931 return target == TEX_TARGET_2D_MS || target == TEX_TARGET_2D_MS_ARRAY; } clearMS()932 void clearMS() { 933 if (isMS()) { 934 if (isArray()) 935 target = TEX_TARGET_2D_ARRAY; 936 else 937 target = TEX_TARGET_2D; 938 } 939 } 940 941 Target& operator=(TexTarget targ) 942 { 943 assert(targ < TEX_TARGET_COUNT); 944 target = targ; 945 return *this; 946 } 947 948 inline bool operator==(TexTarget targ) const { return target == targ; } 949 inline bool operator!=(TexTarget targ) const { return target != targ; } 950 getEnum()951 enum TexTarget getEnum() const { return target; } 952 953 private: 954 struct Desc 955 { 956 char name[19]; 957 uint8_t dim; 958 uint8_t argc; 959 bool array; 960 bool cube; 961 bool shadow; 962 }; 963 964 static const struct Desc descTable[TEX_TARGET_COUNT]; 965 966 private: 967 enum TexTarget target; 968 }; 969 970 public: 971 struct ImgFormatDesc 972 { 973 char name[19]; 974 uint8_t components; 975 uint8_t bits[4]; 976 ImgType type; 977 bool bgra; 978 }; 979 980 static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT]; 981 982 public: 983 TexInstruction(Function *, operation); 984 virtual ~TexInstruction(); 985 986 virtual TexInstruction *clone(ClonePolicy<Function>&, 987 Instruction * = NULL) const; 988 setTexture(Target targ,uint8_t r,uint8_t s)989 inline void setTexture(Target targ, uint8_t r, uint8_t s) 990 { 991 tex.r = r; 992 tex.s = s; 993 tex.target = targ; 994 } 995 996 void setIndirectR(Value *); 997 void setIndirectS(Value *); 998 inline Value *getIndirectR() const; 999 inline Value *getIndirectS() const; 1000 1001 public: 1002 struct { 1003 Target target; 1004 1005 uint16_t r; 1006 uint16_t s; 1007 int8_t rIndirectSrc; 1008 int8_t sIndirectSrc; 1009 1010 uint8_t mask; 1011 uint8_t gatherComp; 1012 1013 bool liveOnly; // only execute on live pixels of a quad (optimization) 1014 bool levelZero; 1015 bool derivAll; 1016 1017 int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets 1018 int8_t offset[3]; // only used on nv50 1019 1020 enum TexQuery query; 1021 const struct ImgFormatDesc *format; 1022 } tex; 1023 1024 ValueRef dPdx[3]; 1025 ValueRef dPdy[3]; 1026 ValueRef offset[4][3]; 1027 }; 1028 1029 class CmpInstruction : public Instruction 1030 { 1031 public: 1032 CmpInstruction(Function *, operation); 1033 1034 virtual CmpInstruction *clone(ClonePolicy<Function>&, 1035 Instruction * = NULL) const; 1036 setCondition(CondCode cond)1037 void setCondition(CondCode cond) { setCond = cond; } getCondition()1038 CondCode getCondition() const { return setCond; } 1039 1040 public: 1041 CondCode setCond; 1042 }; 1043 1044 class FlowInstruction : public Instruction 1045 { 1046 public: 1047 FlowInstruction(Function *, operation, void *target); 1048 1049 virtual FlowInstruction *clone(ClonePolicy<Function>&, 1050 Instruction * = NULL) const; 1051 1052 public: 1053 unsigned allWarp : 1; 1054 unsigned absolute : 1; 1055 unsigned limit : 1; 1056 unsigned builtin : 1; // true for calls to emulation code 1057 unsigned indirect : 1; // target in src(0) 1058 1059 union { 1060 BasicBlock *bb; 1061 int builtin; 1062 Function *fn; 1063 } target; 1064 }; 1065 1066 class BasicBlock 1067 { 1068 public: 1069 BasicBlock(Function *); 1070 ~BasicBlock(); 1071 1072 BasicBlock *clone(ClonePolicy<Function>&) const; 1073 getId()1074 inline int getId() const { return id; } getInsnCount()1075 inline unsigned int getInsnCount() const { return numInsns; } isTerminated()1076 inline bool isTerminated() const { return exit && exit->terminator; } 1077 1078 bool dominatedBy(BasicBlock *bb); 1079 inline bool reachableBy(const BasicBlock *by, const BasicBlock *term); 1080 1081 // returns mask of conditional out blocks 1082 // e.g. 3 for IF { .. } ELSE { .. } ENDIF, 1 for IF { .. } ENDIF 1083 unsigned int initiatesSimpleConditional() const; 1084 1085 public: getFunction()1086 Function *getFunction() const { return func; } getProgram()1087 Program *getProgram() const { return program; } 1088 getEntry()1089 Instruction *getEntry() const { return entry; } // first non-phi instruction getPhi()1090 Instruction *getPhi() const { return phi; } getFirst()1091 Instruction *getFirst() const { return phi ? phi : entry; } getExit()1092 Instruction *getExit() const { return exit; } 1093 1094 void insertHead(Instruction *); 1095 void insertTail(Instruction *); 1096 void insertBefore(Instruction *, Instruction *); 1097 void insertAfter(Instruction *, Instruction *); 1098 void remove(Instruction *); 1099 void permuteAdjacent(Instruction *, Instruction *); 1100 1101 BasicBlock *idom() const; 1102 1103 // NOTE: currently does not rebuild the dominator tree 1104 BasicBlock *splitBefore(Instruction *, bool attach = true); 1105 BasicBlock *splitAfter(Instruction *, bool attach = true); 1106 getDF()1107 DLList& getDF() { return df; } iterDF()1108 DLList::Iterator iterDF() { return df.iterator(); } 1109 1110 static inline BasicBlock *get(Iterator&); 1111 static inline BasicBlock *get(Graph::Node *); 1112 1113 public: 1114 Graph::Node cfg; // first edge is branch *taken* (the ELSE branch) 1115 Graph::Node dom; 1116 1117 BitSet liveSet; 1118 BitSet defSet; 1119 1120 uint32_t binPos; 1121 uint32_t binSize; 1122 1123 Instruction *joinAt; // for quick reference 1124 1125 bool explicitCont; // loop headers: true if loop contains continue stmts 1126 1127 private: 1128 int id; 1129 DLList df; 1130 1131 Instruction *phi; 1132 Instruction *entry; 1133 Instruction *exit; 1134 1135 unsigned int numInsns; 1136 1137 private: 1138 Function *func; 1139 Program *program; 1140 1141 void splitCommon(Instruction *, BasicBlock *, bool attach); 1142 }; 1143 1144 class Function 1145 { 1146 public: 1147 Function(Program *, const char *name, uint32_t label); 1148 ~Function(); 1149 1150 static inline Function *get(Graph::Node *node); 1151 getProgram()1152 inline Program *getProgram() const { return prog; } getName()1153 inline const char *getName() const { return name; } getId()1154 inline int getId() const { return id; } getLabel()1155 inline uint32_t getLabel() const { return label; } 1156 1157 void print(); 1158 void printLiveIntervals() const; 1159 void printCFGraph(const char *filePath); 1160 1161 bool setEntry(BasicBlock *); 1162 bool setExit(BasicBlock *); 1163 1164 unsigned int orderInstructions(ArrayList&); 1165 add(BasicBlock * bb,int & id)1166 inline void add(BasicBlock *bb, int& id) { allBBlocks.insert(bb, id); } add(Instruction * insn,int & id)1167 inline void add(Instruction *insn, int& id) { allInsns.insert(insn, id); } add(LValue * lval,int & id)1168 inline void add(LValue *lval, int& id) { allLValues.insert(lval, id); } 1169 1170 inline LValue *getLValue(int id); 1171 1172 void buildLiveSets(); 1173 void buildDefSets(); 1174 bool convertToSSA(); 1175 1176 public: 1177 std::deque<ValueDef> ins; 1178 std::deque<ValueRef> outs; 1179 std::deque<Value *> clobbers; 1180 1181 Graph cfg; 1182 Graph::Node *cfgExit; 1183 Graph *domTree; 1184 Graph::Node call; // node in the call graph 1185 1186 BasicBlock **bbArray; // BBs in emission order 1187 int bbCount; 1188 1189 unsigned int loopNestingBound; 1190 int regClobberMax; 1191 1192 uint32_t binPos; 1193 uint32_t binSize; 1194 1195 Value *stackPtr; 1196 1197 uint32_t tlsBase; // base address for l[] space (if no stack pointer is used) 1198 uint32_t tlsSize; 1199 1200 ArrayList allBBlocks; 1201 ArrayList allInsns; 1202 ArrayList allLValues; 1203 1204 private: 1205 void buildLiveSetsPreSSA(BasicBlock *, const int sequence); 1206 void buildDefSetsPreSSA(BasicBlock *bb, const int seq); 1207 1208 private: 1209 uint32_t label; 1210 int id; 1211 const char *const name; 1212 Program *prog; 1213 }; 1214 1215 enum CGStage 1216 { 1217 CG_STAGE_PRE_SSA, 1218 CG_STAGE_SSA, // expected directly before register allocation 1219 CG_STAGE_POST_RA 1220 }; 1221 1222 class Program 1223 { 1224 public: 1225 enum Type 1226 { 1227 TYPE_VERTEX, 1228 TYPE_TESSELLATION_CONTROL, 1229 TYPE_TESSELLATION_EVAL, 1230 TYPE_GEOMETRY, 1231 TYPE_FRAGMENT, 1232 TYPE_COMPUTE 1233 }; 1234 1235 Program(Type type, Target *targ); 1236 ~Program(); 1237 1238 void print(); 1239 getType()1240 Type getType() const { return progType; } 1241 add(Function * fn,int & id)1242 inline void add(Function *fn, int& id) { allFuncs.insert(fn, id); } del(Function * fn,int & id)1243 inline void del(Function *fn, int& id) { allFuncs.remove(id); } add(Value * rval,int & id)1244 inline void add(Value *rval, int& id) { allRValues.insert(rval, id); } 1245 1246 bool makeFromTGSI(struct nv50_ir_prog_info *); 1247 bool makeFromSM4(struct nv50_ir_prog_info *); 1248 bool convertToSSA(); 1249 bool optimizeSSA(int level); 1250 bool optimizePostRA(int level); 1251 bool registerAllocation(); 1252 bool emitBinary(struct nv50_ir_prog_info *); 1253 getTarget()1254 const Target *getTarget() const { return target; } 1255 1256 private: 1257 void emitSymbolTable(struct nv50_ir_prog_info *); 1258 1259 Type progType; 1260 Target *target; 1261 1262 public: 1263 Function *main; 1264 Graph calls; 1265 1266 ArrayList allFuncs; 1267 ArrayList allRValues; 1268 1269 uint32_t *code; 1270 uint32_t binSize; 1271 uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL 1272 1273 int maxGPR; 1274 1275 MemoryPool mem_Instruction; 1276 MemoryPool mem_CmpInstruction; 1277 MemoryPool mem_TexInstruction; 1278 MemoryPool mem_FlowInstruction; 1279 MemoryPool mem_LValue; 1280 MemoryPool mem_Symbol; 1281 MemoryPool mem_ImmediateValue; 1282 1283 uint32_t dbgFlags; 1284 uint8_t optLevel; 1285 1286 void *targetPriv; // e.g. to carry information between passes 1287 1288 const struct nv50_ir_prog_info *driver; // for driver configuration 1289 1290 void releaseInstruction(Instruction *); 1291 void releaseValue(Value *); 1292 }; 1293 1294 // TODO: add const version 1295 class Pass 1296 { 1297 public: 1298 bool run(Program *, bool ordered = false, bool skipPhi = false); 1299 bool run(Function *, bool ordered = false, bool skipPhi = false); 1300 1301 private: 1302 // return false to continue with next entity on next higher level visit(Function *)1303 virtual bool visit(Function *) { return true; } visit(BasicBlock *)1304 virtual bool visit(BasicBlock *) { return true; } visit(Instruction *)1305 virtual bool visit(Instruction *) { return false; } 1306 1307 bool doRun(Program *, bool ordered, bool skipPhi); 1308 bool doRun(Function *, bool ordered, bool skipPhi); 1309 1310 protected: 1311 bool err; 1312 Function *func; 1313 Program *prog; 1314 }; 1315 1316 // ============================================================================= 1317 1318 #include "codegen/nv50_ir_inlines.h" 1319 1320 } // namespace nv50_ir 1321 1322 #endif // __NV50_IR_H__ 1323