1 /* 2 * Copyright © 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /** 25 * @file qpu_instr.h 26 * 27 * Definitions of the unpacked form of QPU instructions. Assembly and 28 * disassembly will use this for talking about instructions, with qpu_encode.c 29 * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU 30 * instruction. 31 */ 32 33 #ifndef QPU_INSTR_H 34 #define QPU_INSTR_H 35 36 #include <stdbool.h> 37 #include <stdint.h> 38 #include "util/macros.h" 39 40 struct v3d_device_info; 41 42 struct v3d_qpu_sig { 43 bool thrsw:1; 44 bool ldunif:1; 45 bool ldunifa:1; 46 bool ldunifrf:1; 47 bool ldunifarf:1; 48 bool ldtmu:1; 49 bool ldvary:1; 50 bool ldvpm:1; 51 bool ldtlb:1; 52 bool ldtlbu:1; 53 bool small_imm:1; 54 bool ucb:1; 55 bool rotate:1; 56 bool wrtmuc:1; 57 }; 58 59 enum v3d_qpu_cond { 60 V3D_QPU_COND_NONE, 61 V3D_QPU_COND_IFA, 62 V3D_QPU_COND_IFB, 63 V3D_QPU_COND_IFNA, 64 V3D_QPU_COND_IFNB, 65 }; 66 67 enum v3d_qpu_pf { 68 V3D_QPU_PF_NONE, 69 V3D_QPU_PF_PUSHZ, 70 V3D_QPU_PF_PUSHN, 71 V3D_QPU_PF_PUSHC, 72 }; 73 74 enum v3d_qpu_uf { 75 V3D_QPU_UF_NONE, 76 V3D_QPU_UF_ANDZ, 77 V3D_QPU_UF_ANDNZ, 78 V3D_QPU_UF_NORNZ, 79 V3D_QPU_UF_NORZ, 80 V3D_QPU_UF_ANDN, 81 V3D_QPU_UF_ANDNN, 82 V3D_QPU_UF_NORNN, 83 V3D_QPU_UF_NORN, 84 V3D_QPU_UF_ANDC, 85 V3D_QPU_UF_ANDNC, 86 V3D_QPU_UF_NORNC, 87 V3D_QPU_UF_NORC, 88 }; 89 90 enum v3d_qpu_waddr { 91 V3D_QPU_WADDR_R0 = 0, 92 V3D_QPU_WADDR_R1 = 1, 93 V3D_QPU_WADDR_R2 = 2, 94 V3D_QPU_WADDR_R3 = 3, 95 V3D_QPU_WADDR_R4 = 4, 96 V3D_QPU_WADDR_R5 = 5, 97 /* 6 is reserved, but note 3.2.2.8: "Result Writes" */ 98 V3D_QPU_WADDR_NOP = 6, 99 V3D_QPU_WADDR_TLB = 7, 100 V3D_QPU_WADDR_TLBU = 8, 101 V3D_QPU_WADDR_TMU = 9, 102 V3D_QPU_WADDR_TMUL = 10, 103 V3D_QPU_WADDR_TMUD = 11, 104 V3D_QPU_WADDR_TMUA = 12, 105 V3D_QPU_WADDR_TMUAU = 13, 106 V3D_QPU_WADDR_VPM = 14, 107 V3D_QPU_WADDR_VPMU = 15, 108 V3D_QPU_WADDR_SYNC = 16, 109 V3D_QPU_WADDR_SYNCU = 17, 110 V3D_QPU_WADDR_SYNCB = 18, 111 V3D_QPU_WADDR_RECIP = 19, 112 V3D_QPU_WADDR_RSQRT = 20, 113 V3D_QPU_WADDR_EXP = 21, 114 V3D_QPU_WADDR_LOG = 22, 115 V3D_QPU_WADDR_SIN = 23, 116 V3D_QPU_WADDR_RSQRT2 = 24, 117 V3D_QPU_WADDR_TMUC = 32, 118 V3D_QPU_WADDR_TMUS = 33, 119 V3D_QPU_WADDR_TMUT = 34, 120 V3D_QPU_WADDR_TMUR = 35, 121 V3D_QPU_WADDR_TMUI = 36, 122 V3D_QPU_WADDR_TMUB = 37, 123 V3D_QPU_WADDR_TMUDREF = 38, 124 V3D_QPU_WADDR_TMUOFF = 39, 125 V3D_QPU_WADDR_TMUSCM = 40, 126 V3D_QPU_WADDR_TMUSF = 41, 127 V3D_QPU_WADDR_TMUSLOD = 42, 128 V3D_QPU_WADDR_TMUHS = 43, 129 V3D_QPU_WADDR_TMUHSCM = 44, 130 V3D_QPU_WADDR_TMUHSF = 45, 131 V3D_QPU_WADDR_TMUHSLOD = 46, 132 V3D_QPU_WADDR_R5REP = 55, 133 }; 134 135 struct v3d_qpu_flags { 136 enum v3d_qpu_cond ac, mc; 137 enum v3d_qpu_pf apf, mpf; 138 enum v3d_qpu_uf auf, muf; 139 }; 140 141 enum v3d_qpu_add_op { 142 V3D_QPU_A_FADD, 143 V3D_QPU_A_FADDNF, 144 V3D_QPU_A_VFPACK, 145 V3D_QPU_A_ADD, 146 V3D_QPU_A_SUB, 147 V3D_QPU_A_FSUB, 148 V3D_QPU_A_MIN, 149 V3D_QPU_A_MAX, 150 V3D_QPU_A_UMIN, 151 V3D_QPU_A_UMAX, 152 V3D_QPU_A_SHL, 153 V3D_QPU_A_SHR, 154 V3D_QPU_A_ASR, 155 V3D_QPU_A_ROR, 156 V3D_QPU_A_FMIN, 157 V3D_QPU_A_FMAX, 158 V3D_QPU_A_VFMIN, 159 V3D_QPU_A_AND, 160 V3D_QPU_A_OR, 161 V3D_QPU_A_XOR, 162 V3D_QPU_A_VADD, 163 V3D_QPU_A_VSUB, 164 V3D_QPU_A_NOT, 165 V3D_QPU_A_NEG, 166 V3D_QPU_A_FLAPUSH, 167 V3D_QPU_A_FLBPUSH, 168 V3D_QPU_A_FLPOP, 169 V3D_QPU_A_RECIP, 170 V3D_QPU_A_SETMSF, 171 V3D_QPU_A_SETREVF, 172 V3D_QPU_A_NOP, 173 V3D_QPU_A_TIDX, 174 V3D_QPU_A_EIDX, 175 V3D_QPU_A_LR, 176 V3D_QPU_A_VFLA, 177 V3D_QPU_A_VFLNA, 178 V3D_QPU_A_VFLB, 179 V3D_QPU_A_VFLNB, 180 V3D_QPU_A_FXCD, 181 V3D_QPU_A_XCD, 182 V3D_QPU_A_FYCD, 183 V3D_QPU_A_YCD, 184 V3D_QPU_A_MSF, 185 V3D_QPU_A_REVF, 186 V3D_QPU_A_VDWWT, 187 V3D_QPU_A_IID, 188 V3D_QPU_A_SAMPID, 189 V3D_QPU_A_BARRIERID, 190 V3D_QPU_A_TMUWT, 191 V3D_QPU_A_VPMSETUP, 192 V3D_QPU_A_VPMWT, 193 V3D_QPU_A_LDVPMV_IN, 194 V3D_QPU_A_LDVPMV_OUT, 195 V3D_QPU_A_LDVPMD_IN, 196 V3D_QPU_A_LDVPMD_OUT, 197 V3D_QPU_A_LDVPMP, 198 V3D_QPU_A_RSQRT, 199 V3D_QPU_A_EXP, 200 V3D_QPU_A_LOG, 201 V3D_QPU_A_SIN, 202 V3D_QPU_A_RSQRT2, 203 V3D_QPU_A_LDVPMG_IN, 204 V3D_QPU_A_LDVPMG_OUT, 205 V3D_QPU_A_FCMP, 206 V3D_QPU_A_VFMAX, 207 V3D_QPU_A_FROUND, 208 V3D_QPU_A_FTOIN, 209 V3D_QPU_A_FTRUNC, 210 V3D_QPU_A_FTOIZ, 211 V3D_QPU_A_FFLOOR, 212 V3D_QPU_A_FTOUZ, 213 V3D_QPU_A_FCEIL, 214 V3D_QPU_A_FTOC, 215 V3D_QPU_A_FDX, 216 V3D_QPU_A_FDY, 217 V3D_QPU_A_STVPMV, 218 V3D_QPU_A_STVPMD, 219 V3D_QPU_A_STVPMP, 220 V3D_QPU_A_ITOF, 221 V3D_QPU_A_CLZ, 222 V3D_QPU_A_UTOF, 223 }; 224 225 enum v3d_qpu_mul_op { 226 V3D_QPU_M_ADD, 227 V3D_QPU_M_SUB, 228 V3D_QPU_M_UMUL24, 229 V3D_QPU_M_VFMUL, 230 V3D_QPU_M_SMUL24, 231 V3D_QPU_M_MULTOP, 232 V3D_QPU_M_FMOV, 233 V3D_QPU_M_MOV, 234 V3D_QPU_M_NOP, 235 V3D_QPU_M_FMUL, 236 }; 237 238 enum v3d_qpu_output_pack { 239 V3D_QPU_PACK_NONE, 240 /** 241 * Convert to 16-bit float, put in low 16 bits of destination leaving 242 * high unmodified. 243 */ 244 V3D_QPU_PACK_L, 245 /** 246 * Convert to 16-bit float, put in high 16 bits of destination leaving 247 * low unmodified. 248 */ 249 V3D_QPU_PACK_H, 250 }; 251 252 enum v3d_qpu_input_unpack { 253 /** 254 * No-op input unpacking. Note that this enum's value doesn't match 255 * the packed QPU instruction value of the field (we use 0 so that the 256 * default on new instruction creation is no-op). 257 */ 258 V3D_QPU_UNPACK_NONE, 259 /** Absolute value. Only available for some operations. */ 260 V3D_QPU_UNPACK_ABS, 261 /** Convert low 16 bits from 16-bit float to 32-bit float. */ 262 V3D_QPU_UNPACK_L, 263 /** Convert high 16 bits from 16-bit float to 32-bit float. */ 264 V3D_QPU_UNPACK_H, 265 266 /** Convert to 16f and replicate it to the high bits. */ 267 V3D_QPU_UNPACK_REPLICATE_32F_16, 268 269 /** Replicate low 16 bits to high */ 270 V3D_QPU_UNPACK_REPLICATE_L_16, 271 272 /** Replicate high 16 bits to low */ 273 V3D_QPU_UNPACK_REPLICATE_H_16, 274 275 /** Swap high and low 16 bits */ 276 V3D_QPU_UNPACK_SWAP_16, 277 }; 278 279 enum v3d_qpu_mux { 280 V3D_QPU_MUX_R0, 281 V3D_QPU_MUX_R1, 282 V3D_QPU_MUX_R2, 283 V3D_QPU_MUX_R3, 284 V3D_QPU_MUX_R4, 285 V3D_QPU_MUX_R5, 286 V3D_QPU_MUX_A, 287 V3D_QPU_MUX_B, 288 }; 289 290 struct v3d_qpu_alu_instr { 291 struct { 292 enum v3d_qpu_add_op op; 293 enum v3d_qpu_mux a, b; 294 uint8_t waddr; 295 bool magic_write; 296 enum v3d_qpu_output_pack output_pack; 297 enum v3d_qpu_input_unpack a_unpack; 298 enum v3d_qpu_input_unpack b_unpack; 299 } add; 300 301 struct { 302 enum v3d_qpu_mul_op op; 303 enum v3d_qpu_mux a, b; 304 uint8_t waddr; 305 bool magic_write; 306 enum v3d_qpu_output_pack output_pack; 307 enum v3d_qpu_input_unpack a_unpack; 308 enum v3d_qpu_input_unpack b_unpack; 309 } mul; 310 }; 311 312 enum v3d_qpu_branch_cond { 313 V3D_QPU_BRANCH_COND_ALWAYS, 314 V3D_QPU_BRANCH_COND_A0, 315 V3D_QPU_BRANCH_COND_NA0, 316 V3D_QPU_BRANCH_COND_ALLA, 317 V3D_QPU_BRANCH_COND_ANYNA, 318 V3D_QPU_BRANCH_COND_ANYA, 319 V3D_QPU_BRANCH_COND_ALLNA, 320 }; 321 322 enum v3d_qpu_msfign { 323 /** Ignore multisample flags when determining branch condition. */ 324 V3D_QPU_MSFIGN_NONE, 325 /** 326 * If no multisample flags are set in the lane (a pixel in the FS, a 327 * vertex in the VS), ignore the lane's condition when computing the 328 * branch condition. 329 */ 330 V3D_QPU_MSFIGN_P, 331 /** 332 * If no multisample flags are set in a 2x2 quad in the FS, ignore the 333 * quad's a/b conditions. 334 */ 335 V3D_QPU_MSFIGN_Q, 336 }; 337 338 enum v3d_qpu_branch_dest { 339 V3D_QPU_BRANCH_DEST_ABS, 340 V3D_QPU_BRANCH_DEST_REL, 341 V3D_QPU_BRANCH_DEST_LINK_REG, 342 V3D_QPU_BRANCH_DEST_REGFILE, 343 }; 344 345 struct v3d_qpu_branch_instr { 346 enum v3d_qpu_branch_cond cond; 347 enum v3d_qpu_msfign msfign; 348 349 /** Selects how to compute the new IP if the branch is taken. */ 350 enum v3d_qpu_branch_dest bdi; 351 352 /** 353 * Selects how to compute the new uniforms pointer if the branch is 354 * taken. (ABS/REL implicitly load a uniform and use that) 355 */ 356 enum v3d_qpu_branch_dest bdu; 357 358 /** 359 * If set, then udest determines how the uniform stream will branch, 360 * otherwise the uniform stream is left as is. 361 */ 362 bool ub; 363 364 uint8_t raddr_a; 365 366 uint32_t offset; 367 }; 368 369 enum v3d_qpu_instr_type { 370 V3D_QPU_INSTR_TYPE_ALU, 371 V3D_QPU_INSTR_TYPE_BRANCH, 372 }; 373 374 struct v3d_qpu_instr { 375 enum v3d_qpu_instr_type type; 376 377 struct v3d_qpu_sig sig; 378 uint8_t sig_addr; 379 bool sig_magic; /* If the signal writes to a magic address */ 380 uint8_t raddr_a; 381 uint8_t raddr_b; 382 struct v3d_qpu_flags flags; 383 384 union { 385 struct v3d_qpu_alu_instr alu; 386 struct v3d_qpu_branch_instr branch; 387 }; 388 }; 389 390 const char *v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr); 391 const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op); 392 const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op); 393 const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond); 394 const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf); 395 const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf); 396 const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack); 397 const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack); 398 const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond); 399 const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign); 400 401 enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST; 402 403 bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op); 404 bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op); 405 int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op); 406 int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op); 407 408 bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 409 const struct v3d_qpu_sig *sig, 410 uint32_t *packed_sig); 411 bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 412 uint32_t packed_sig, 413 struct v3d_qpu_sig *sig); 414 415 bool 416 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 417 const struct v3d_qpu_flags *cond, 418 uint32_t *packed_cond); 419 bool 420 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 421 uint32_t packed_cond, 422 struct v3d_qpu_flags *cond); 423 424 bool 425 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 426 uint32_t value, 427 uint32_t *packed_small_immediate); 428 429 bool 430 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 431 uint32_t packed_small_immediate, 432 uint32_t *small_immediate); 433 434 bool 435 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 436 const struct v3d_qpu_instr *instr, 437 uint64_t *packed_instr); 438 bool 439 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 440 uint64_t packed_instr, 441 struct v3d_qpu_instr *instr); 442 443 bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 444 bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 445 bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 446 bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 447 bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 448 bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; 449 bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 450 bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 451 bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 452 bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 453 bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 454 bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo, 455 const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; 456 bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo, 457 const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; 458 bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo, 459 const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; 460 bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 461 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); 462 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 463 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 464 bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 465 bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 466 bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 467 bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 468 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo, 469 const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST; 470 bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 471 bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST; 472 473 #endif 474