1 /* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * @file qpu_instr.h 26 * 27 * Definitions of the unpacked form of QPU instructions. Assembly and 28 * disassembly will use this for talking about instructions, with qpu_encode.c 29 * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU 30 * instruction. 31 */ 32 33 #ifndef QPU_INSTR_H 34 #define QPU_INSTR_H 35 36 #include <stdbool.h> 37 #include <stdint.h> 38 #include "util/macros.h" 39 40 struct v3d_device_info; 41 42 struct v3d_qpu_sig { 43 bool thrsw:1; 44 bool ldunif:1; 45 bool ldunifa:1; 46 bool ldunifrf:1; 47 bool ldunifarf:1; 48 bool ldtmu:1; 49 bool ldvary:1; 50 bool ldvpm:1; 51 bool ldtlb:1; 52 bool ldtlbu:1; 53 bool small_imm:1; 54 bool ucb:1; 55 bool rotate:1; 56 bool wrtmuc:1; 57 }; 58 59 enum v3d_qpu_cond { 60 V3D_QPU_COND_NONE, 61 V3D_QPU_COND_IFA, 62 V3D_QPU_COND_IFB, 63 V3D_QPU_COND_IFNA, 64 V3D_QPU_COND_IFNB, 65 }; 66 67 enum v3d_qpu_pf { 68 V3D_QPU_PF_NONE, 69 V3D_QPU_PF_PUSHZ, 70 V3D_QPU_PF_PUSHN, 71 V3D_QPU_PF_PUSHC, 72 }; 73 74 enum v3d_qpu_uf { 75 V3D_QPU_UF_NONE, 76 V3D_QPU_UF_ANDZ, 77 V3D_QPU_UF_ANDNZ, 78 V3D_QPU_UF_NORNZ, 79 V3D_QPU_UF_NORZ, 80 V3D_QPU_UF_ANDN, 81 V3D_QPU_UF_ANDNN, 82 V3D_QPU_UF_NORNN, 83 V3D_QPU_UF_NORN, 84 V3D_QPU_UF_ANDC, 85 V3D_QPU_UF_ANDNC, 86 V3D_QPU_UF_NORNC, 87 V3D_QPU_UF_NORC, 88 }; 89 90 enum v3d_qpu_waddr { 91 V3D_QPU_WADDR_R0 = 0, 92 V3D_QPU_WADDR_R1 = 1, 93 V3D_QPU_WADDR_R2 = 2, 94 V3D_QPU_WADDR_R3 = 3, 95 V3D_QPU_WADDR_R4 = 4, 96 V3D_QPU_WADDR_R5 = 5, 97 V3D_QPU_WADDR_NOP = 6, 98 V3D_QPU_WADDR_TLB = 7, 99 V3D_QPU_WADDR_TLBU = 8, 100 V3D_QPU_WADDR_TMU = 9, /* V3D 3.x */ 101 V3D_QPU_WADDR_UNIFA = 9, /* V3D 4.x */ 102 V3D_QPU_WADDR_TMUL = 10, 103 V3D_QPU_WADDR_TMUD = 11, 104 V3D_QPU_WADDR_TMUA = 12, 105 V3D_QPU_WADDR_TMUAU = 13, 106 V3D_QPU_WADDR_VPM = 14, 107 V3D_QPU_WADDR_VPMU = 15, 108 V3D_QPU_WADDR_SYNC = 16, 109 V3D_QPU_WADDR_SYNCU = 17, 110 V3D_QPU_WADDR_SYNCB = 18, 111 V3D_QPU_WADDR_RECIP = 19, 112 V3D_QPU_WADDR_RSQRT = 20, 113 V3D_QPU_WADDR_EXP = 21, 114 V3D_QPU_WADDR_LOG = 22, 115 V3D_QPU_WADDR_SIN = 23, 116 V3D_QPU_WADDR_RSQRT2 = 24, 117 V3D_QPU_WADDR_TMUC = 32, 118 V3D_QPU_WADDR_TMUS = 33, 119 V3D_QPU_WADDR_TMUT = 34, 120 V3D_QPU_WADDR_TMUR = 35, 121 V3D_QPU_WADDR_TMUI = 36, 122 V3D_QPU_WADDR_TMUB = 37, 123 V3D_QPU_WADDR_TMUDREF = 38, 124 V3D_QPU_WADDR_TMUOFF = 39, 125 V3D_QPU_WADDR_TMUSCM = 40, 126 V3D_QPU_WADDR_TMUSF = 41, 127 V3D_QPU_WADDR_TMUSLOD = 42, 128 V3D_QPU_WADDR_TMUHS = 43, 129 V3D_QPU_WADDR_TMUHSCM = 44, 130 V3D_QPU_WADDR_TMUHSF = 45, 131 V3D_QPU_WADDR_TMUHSLOD = 46, 132 V3D_QPU_WADDR_R5REP = 55, 133 }; 134 135 struct v3d_qpu_flags { 136 enum v3d_qpu_cond ac, mc; 137 enum v3d_qpu_pf apf, mpf; 138 enum v3d_qpu_uf auf, muf; 139 }; 140 141 enum v3d_qpu_add_op { 142 V3D_QPU_A_FADD, 143 V3D_QPU_A_FADDNF, 144 V3D_QPU_A_VFPACK, 145 V3D_QPU_A_ADD, 146 V3D_QPU_A_SUB, 147 V3D_QPU_A_FSUB, 148 V3D_QPU_A_MIN, 149 V3D_QPU_A_MAX, 150 V3D_QPU_A_UMIN, 151 V3D_QPU_A_UMAX, 152 V3D_QPU_A_SHL, 153 V3D_QPU_A_SHR, 154 V3D_QPU_A_ASR, 155 V3D_QPU_A_ROR, 156 V3D_QPU_A_FMIN, 157 V3D_QPU_A_FMAX, 158 V3D_QPU_A_VFMIN, 159 V3D_QPU_A_AND, 160 V3D_QPU_A_OR, 161 V3D_QPU_A_XOR, 162 V3D_QPU_A_VADD, 163 V3D_QPU_A_VSUB, 164 V3D_QPU_A_NOT, 165 V3D_QPU_A_NEG, 166 V3D_QPU_A_FLAPUSH, 167 V3D_QPU_A_FLBPUSH, 168 V3D_QPU_A_FLPOP, 169 V3D_QPU_A_RECIP, 170 V3D_QPU_A_SETMSF, 171 V3D_QPU_A_SETREVF, 172 V3D_QPU_A_NOP, 173 V3D_QPU_A_TIDX, 174 V3D_QPU_A_EIDX, 175 V3D_QPU_A_LR, 176 V3D_QPU_A_VFLA, 177 V3D_QPU_A_VFLNA, 178 V3D_QPU_A_VFLB, 179 V3D_QPU_A_VFLNB, 180 V3D_QPU_A_FXCD, 181 V3D_QPU_A_XCD, 182 V3D_QPU_A_FYCD, 183 V3D_QPU_A_YCD, 184 V3D_QPU_A_MSF, 185 V3D_QPU_A_REVF, 186 V3D_QPU_A_VDWWT, 187 V3D_QPU_A_IID, 188 V3D_QPU_A_SAMPID, 189 V3D_QPU_A_BARRIERID, 190 V3D_QPU_A_TMUWT, 191 V3D_QPU_A_VPMSETUP, 192 V3D_QPU_A_VPMWT, 193 V3D_QPU_A_FLAFIRST, 194 V3D_QPU_A_FLNAFIRST, 195 V3D_QPU_A_LDVPMV_IN, 196 V3D_QPU_A_LDVPMV_OUT, 197 V3D_QPU_A_LDVPMD_IN, 198 V3D_QPU_A_LDVPMD_OUT, 199 V3D_QPU_A_LDVPMP, 200 V3D_QPU_A_RSQRT, 201 V3D_QPU_A_EXP, 202 V3D_QPU_A_LOG, 203 V3D_QPU_A_SIN, 204 V3D_QPU_A_RSQRT2, 205 V3D_QPU_A_LDVPMG_IN, 206 V3D_QPU_A_LDVPMG_OUT, 207 V3D_QPU_A_FCMP, 208 V3D_QPU_A_VFMAX, 209 V3D_QPU_A_FROUND, 210 V3D_QPU_A_FTOIN, 211 V3D_QPU_A_FTRUNC, 212 V3D_QPU_A_FTOIZ, 213 V3D_QPU_A_FFLOOR, 214 V3D_QPU_A_FTOUZ, 215 V3D_QPU_A_FCEIL, 216 V3D_QPU_A_FTOC, 217 V3D_QPU_A_FDX, 218 V3D_QPU_A_FDY, 219 V3D_QPU_A_STVPMV, 220 V3D_QPU_A_STVPMD, 221 V3D_QPU_A_STVPMP, 222 V3D_QPU_A_ITOF, 223 V3D_QPU_A_CLZ, 224 V3D_QPU_A_UTOF, 225 }; 226 227 enum v3d_qpu_mul_op { 228 V3D_QPU_M_ADD, 229 V3D_QPU_M_SUB, 230 V3D_QPU_M_UMUL24, 231 V3D_QPU_M_VFMUL, 232 V3D_QPU_M_SMUL24, 233 V3D_QPU_M_MULTOP, 234 V3D_QPU_M_FMOV, 235 V3D_QPU_M_MOV, 236 V3D_QPU_M_NOP, 237 V3D_QPU_M_FMUL, 238 }; 239 240 enum v3d_qpu_output_pack { 241 V3D_QPU_PACK_NONE, 242 /** 243 * Convert to 16-bit float, put in low 16 bits of destination leaving 244 * high unmodified. 245 */ 246 V3D_QPU_PACK_L, 247 /** 248 * Convert to 16-bit float, put in high 16 bits of destination leaving 249 * low unmodified. 250 */ 251 V3D_QPU_PACK_H, 252 }; 253 254 enum v3d_qpu_input_unpack { 255 /** 256 * No-op input unpacking. Note that this enum's value doesn't match 257 * the packed QPU instruction value of the field (we use 0 so that the 258 * default on new instruction creation is no-op). 259 */ 260 V3D_QPU_UNPACK_NONE, 261 /** Absolute value. Only available for some operations. */ 262 V3D_QPU_UNPACK_ABS, 263 /** Convert low 16 bits from 16-bit float to 32-bit float. */ 264 V3D_QPU_UNPACK_L, 265 /** Convert high 16 bits from 16-bit float to 32-bit float. */ 266 V3D_QPU_UNPACK_H, 267 268 /** Convert to 16f and replicate it to the high bits. */ 269 V3D_QPU_UNPACK_REPLICATE_32F_16, 270 271 /** Replicate low 16 bits to high */ 272 V3D_QPU_UNPACK_REPLICATE_L_16, 273 274 /** Replicate high 16 bits to low */ 275 V3D_QPU_UNPACK_REPLICATE_H_16, 276 277 /** Swap high and low 16 bits */ 278 V3D_QPU_UNPACK_SWAP_16, 279 }; 280 281 enum v3d_qpu_mux { 282 V3D_QPU_MUX_R0, 283 V3D_QPU_MUX_R1, 284 V3D_QPU_MUX_R2, 285 V3D_QPU_MUX_R3, 286 V3D_QPU_MUX_R4, 287 V3D_QPU_MUX_R5, 288 V3D_QPU_MUX_A, 289 V3D_QPU_MUX_B, 290 }; 291 292 struct v3d_qpu_alu_instr { 293 struct { 294 enum v3d_qpu_add_op op; 295 enum v3d_qpu_mux a, b; 296 uint8_t waddr; 297 bool magic_write; 298 enum v3d_qpu_output_pack output_pack; 299 enum v3d_qpu_input_unpack a_unpack; 300 enum v3d_qpu_input_unpack b_unpack; 301 } add; 302 303 struct { 304 enum v3d_qpu_mul_op op; 305 enum v3d_qpu_mux a, b; 306 uint8_t waddr; 307 bool magic_write; 308 enum v3d_qpu_output_pack output_pack; 309 enum v3d_qpu_input_unpack a_unpack; 310 enum v3d_qpu_input_unpack b_unpack; 311 } mul; 312 }; 313 314 enum v3d_qpu_branch_cond { 315 V3D_QPU_BRANCH_COND_ALWAYS, 316 V3D_QPU_BRANCH_COND_A0, 317 V3D_QPU_BRANCH_COND_NA0, 318 V3D_QPU_BRANCH_COND_ALLA, 319 V3D_QPU_BRANCH_COND_ANYNA, 320 V3D_QPU_BRANCH_COND_ANYA, 321 V3D_QPU_BRANCH_COND_ALLNA, 322 }; 323 324 enum v3d_qpu_msfign { 325 /** Ignore multisample flags when determining branch condition. */ 326 V3D_QPU_MSFIGN_NONE, 327 /** 328 * If no multisample flags are set in the lane (a pixel in the FS, a 329 * vertex in the VS), ignore the lane's condition when computing the 330 * branch condition. 331 */ 332 V3D_QPU_MSFIGN_P, 333 /** 334 * If no multisample flags are set in a 2x2 quad in the FS, ignore the 335 * quad's a/b conditions. 336 */ 337 V3D_QPU_MSFIGN_Q, 338 }; 339 340 enum v3d_qpu_branch_dest { 341 V3D_QPU_BRANCH_DEST_ABS, 342 V3D_QPU_BRANCH_DEST_REL, 343 V3D_QPU_BRANCH_DEST_LINK_REG, 344 V3D_QPU_BRANCH_DEST_REGFILE, 345 }; 346 347 struct v3d_qpu_branch_instr { 348 enum v3d_qpu_branch_cond cond; 349 enum v3d_qpu_msfign msfign; 350 351 /** Selects how to compute the new IP if the branch is taken. */ 352 enum v3d_qpu_branch_dest bdi; 353 354 /** 355 * Selects how to compute the new uniforms pointer if the branch is 356 * taken. (ABS/REL implicitly load a uniform and use that) 357 */ 358 enum v3d_qpu_branch_dest bdu; 359 360 /** 361 * If set, then udest determines how the uniform stream will branch, 362 * otherwise the uniform stream is left as is. 363 */ 364 bool ub; 365 366 uint8_t raddr_a; 367 368 uint32_t offset; 369 }; 370 371 enum v3d_qpu_instr_type { 372 V3D_QPU_INSTR_TYPE_ALU, 373 V3D_QPU_INSTR_TYPE_BRANCH, 374 }; 375 376 struct v3d_qpu_instr { 377 enum v3d_qpu_instr_type type; 378 379 struct v3d_qpu_sig sig; 380 uint8_t sig_addr; 381 bool sig_magic; /* If the signal writes to a magic address */ 382 uint8_t raddr_a; 383 uint8_t raddr_b; 384 struct v3d_qpu_flags flags; 385 386 union { 387 struct v3d_qpu_alu_instr alu; 388 struct v3d_qpu_branch_instr branch; 389 }; 390 }; 391 392 const char *v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo, 393 enum v3d_qpu_waddr waddr); 394 const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op); 395 const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op); 396 const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond); 397 const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf); 398 const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf); 399 const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack); 400 const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack); 401 const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond); 402 const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign); 403 404 enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST; 405 406 bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op); 407 bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op); 408 int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op); 409 int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op); 410 411 bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 412 const struct v3d_qpu_sig *sig, 413 uint32_t *packed_sig); 414 bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 415 uint32_t packed_sig, 416 struct v3d_qpu_sig *sig); 417 418 bool 419 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 420 const struct v3d_qpu_flags *cond, 421 uint32_t *packed_cond); 422 bool 423 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 424 uint32_t packed_cond, 425 struct v3d_qpu_flags *cond); 426 427 bool 428 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 429 uint32_t value, 430 uint32_t *packed_small_immediate); 431 432 bool 433 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 434 uint32_t packed_small_immediate, 435 uint32_t *small_immediate); 436 437 bool 438 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 439 const struct v3d_qpu_instr *instr, 440 uint64_t *packed_instr); 441 bool 442 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 443 uint64_t packed_instr, 444 struct v3d_qpu_instr *instr); 445 446 bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 447 bool v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo, 448 enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 449 bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 450 bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 451 bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 452 bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 453 bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 454 bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 455 bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 456 bool v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo, 457 const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 458 bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo, 459 const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 460 bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, 461 const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; 462 bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, 463 const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; 464 bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, 465 const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; 466 bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo, 467 const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 468 bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 469 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); 470 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 471 bool v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 472 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 473 bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 474 bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 475 bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 476 bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 477 bool v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo, 478 const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 479 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, 480 const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; 481 bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 482 bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 483 484 bool v3d_qpu_is_nop(struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 485 #endif 486