1 /* Author(s): 2 * Connor Abbott 3 * Alyssa Rosenzweig 4 * 5 * Copyright (c) 2013 Connor Abbott (connor@abbott.cx) 6 * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) 7 * Copyright (C) 2019-2020 Collabora, Ltd. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a copy 10 * of this software and associated documentation files (the "Software"), to deal 11 * in the Software without restriction, including without limitation the rights 12 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 * copies of the Software, and to permit persons to whom the Software is 14 * furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be included in 17 * all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 * THE SOFTWARE. 26 */ 27 28 #ifndef __midgard_h__ 29 #define __midgard_h__ 30 31 #include <stdint.h> 32 #include <stdbool.h> 33 34 #define MIDGARD_DBG_MSGS 0x0001 35 #define MIDGARD_DBG_SHADERS 0x0002 36 #define MIDGARD_DBG_SHADERDB 0x0004 37 38 extern int midgard_debug; 39 40 typedef enum { 41 midgard_word_type_alu, 42 midgard_word_type_load_store, 43 midgard_word_type_texture, 44 midgard_word_type_unknown 45 } midgard_word_type; 46 47 typedef enum { 48 midgard_alu_vmul, 49 midgard_alu_sadd, 50 midgard_alu_smul, 51 midgard_alu_vadd, 52 midgard_alu_lut 53 } midgard_alu; 54 55 enum { 56 TAG_INVALID = 0x0, 57 TAG_BREAK = 0x1, 58 TAG_TEXTURE_4_VTX = 0x2, 59 TAG_TEXTURE_4 = 0x3, 60 TAG_TEXTURE_4_BARRIER = 0x4, 61 TAG_LOAD_STORE_4 = 0x5, 62 TAG_UNKNOWN_1 = 0x6, 63 TAG_UNKNOWN_2 = 0x7, 64 TAG_ALU_4 = 0x8, 65 TAG_ALU_8 = 0x9, 66 TAG_ALU_12 = 0xA, 67 TAG_ALU_16 = 0xB, 68 TAG_ALU_4_WRITEOUT = 0xC, 69 TAG_ALU_8_WRITEOUT = 0xD, 70 TAG_ALU_12_WRITEOUT = 0xE, 71 TAG_ALU_16_WRITEOUT = 0xF 72 }; 73 74 /* 75 * ALU words 76 */ 77 78 typedef enum { 79 midgard_alu_op_fadd = 0x10, 80 midgard_alu_op_fmul = 0x14, 81 82 midgard_alu_op_fmin = 0x28, 83 midgard_alu_op_fmax = 0x2C, 84 85 midgard_alu_op_fmov = 0x30, /* fmov_rte */ 86 midgard_alu_op_fmov_rtz = 0x31, 87 midgard_alu_op_fmov_rtn = 0x32, 88 midgard_alu_op_fmov_rtp = 0x33, 89 midgard_alu_op_froundeven = 0x34, 90 midgard_alu_op_ftrunc = 0x35, 91 midgard_alu_op_ffloor = 0x36, 92 midgard_alu_op_fceil = 0x37, 93 midgard_alu_op_ffma = 0x38, 94 midgard_alu_op_fdot3 = 0x3C, 95 midgard_alu_op_fdot3r = 0x3D, 96 midgard_alu_op_fdot4 = 0x3E, 97 midgard_alu_op_freduce = 0x3F, 98 99 midgard_alu_op_iadd = 0x40, 100 midgard_alu_op_ishladd = 0x41, /* a + (b<<1) */ 101 midgard_alu_op_isub = 0x46, 102 midgard_alu_op_iaddsat = 0x48, 103 midgard_alu_op_uaddsat = 0x49, 104 midgard_alu_op_isubsat = 0x4E, 105 midgard_alu_op_usubsat = 0x4F, 106 107 midgard_alu_op_imul = 0x58, 108 109 midgard_alu_op_imin = 0x60, 110 midgard_alu_op_umin = 0x61, 111 midgard_alu_op_imax = 0x62, 112 midgard_alu_op_umax = 0x63, 113 midgard_alu_op_ihadd = 0x64, 114 midgard_alu_op_uhadd = 0x65, 115 midgard_alu_op_irhadd = 0x66, 116 midgard_alu_op_urhadd = 0x67, 117 midgard_alu_op_iasr = 0x68, 118 midgard_alu_op_ilsr = 0x69, 119 midgard_alu_op_ishl = 0x6E, 120 121 midgard_alu_op_iand = 0x70, 122 midgard_alu_op_ior = 0x71, 123 midgard_alu_op_inand = 0x72, /* ~(a & b), for inot let a = b */ 124 midgard_alu_op_inor = 0x73, /* ~(a | b) */ 125 midgard_alu_op_iandnot = 0x74, /* (a & ~b), used for not/b2f */ 126 midgard_alu_op_iornot = 0x75, /* (a | ~b) */ 127 midgard_alu_op_ixor = 0x76, 128 midgard_alu_op_inxor = 0x77, /* ~(a & b) */ 129 midgard_alu_op_iclz = 0x78, /* Number of zeroes on left */ 130 midgard_alu_op_ibitcount8 = 0x7A, /* Counts bits in 8-bit increments */ 131 midgard_alu_op_imov = 0x7B, 132 midgard_alu_op_iabsdiff = 0x7C, 133 midgard_alu_op_uabsdiff = 0x7D, 134 midgard_alu_op_ichoose = 0x7E, /* vector, component number - dupe for shuffle() */ 135 136 midgard_alu_op_feq = 0x80, 137 midgard_alu_op_fne = 0x81, 138 midgard_alu_op_flt = 0x82, 139 midgard_alu_op_fle = 0x83, 140 midgard_alu_op_fball_eq = 0x88, 141 midgard_alu_op_fball_neq = 0x89, 142 midgard_alu_op_fball_lt = 0x8A, /* all(lessThan(.., ..)) */ 143 midgard_alu_op_fball_lte = 0x8B, /* all(lessThanEqual(.., ..)) */ 144 145 midgard_alu_op_fbany_eq = 0x90, 146 midgard_alu_op_fbany_neq = 0x91, 147 midgard_alu_op_fbany_lt = 0x92, /* any(lessThan(.., ..)) */ 148 midgard_alu_op_fbany_lte = 0x93, /* any(lessThanEqual(.., ..)) */ 149 150 midgard_alu_op_f2i_rte = 0x98, 151 midgard_alu_op_f2i_rtz = 0x99, 152 midgard_alu_op_f2i_rtn = 0x9A, 153 midgard_alu_op_f2i_rtp = 0x9B, 154 midgard_alu_op_f2u_rte = 0x9C, 155 midgard_alu_op_f2u_rtz = 0x9D, 156 midgard_alu_op_f2u_rtn = 0x9E, 157 midgard_alu_op_f2u_rtp = 0x9F, 158 159 midgard_alu_op_ieq = 0xA0, 160 midgard_alu_op_ine = 0xA1, 161 midgard_alu_op_ult = 0xA2, 162 midgard_alu_op_ule = 0xA3, 163 midgard_alu_op_ilt = 0xA4, 164 midgard_alu_op_ile = 0xA5, 165 midgard_alu_op_iball_eq = 0xA8, 166 midgard_alu_op_iball_neq = 0xA9, 167 midgard_alu_op_uball_lt = 0xAA, 168 midgard_alu_op_uball_lte = 0xAB, 169 midgard_alu_op_iball_lt = 0xAC, 170 midgard_alu_op_iball_lte = 0xAD, 171 172 midgard_alu_op_ibany_eq = 0xB0, 173 midgard_alu_op_ibany_neq = 0xB1, 174 midgard_alu_op_ubany_lt = 0xB2, 175 midgard_alu_op_ubany_lte = 0xB3, 176 midgard_alu_op_ibany_lt = 0xB4, /* any(lessThan(.., ..)) */ 177 midgard_alu_op_ibany_lte = 0xB5, /* any(lessThanEqual(.., ..)) */ 178 midgard_alu_op_i2f_rte = 0xB8, 179 midgard_alu_op_i2f_rtz = 0xB9, 180 midgard_alu_op_i2f_rtn = 0xBA, 181 midgard_alu_op_i2f_rtp = 0xBB, 182 midgard_alu_op_u2f_rte = 0xBC, 183 midgard_alu_op_u2f_rtz = 0xBD, 184 midgard_alu_op_u2f_rtn = 0xBE, 185 midgard_alu_op_u2f_rtp = 0xBF, 186 187 midgard_alu_op_icsel_v = 0xC0, /* condition code r31 */ 188 midgard_alu_op_icsel = 0xC1, /* condition code r31.w */ 189 midgard_alu_op_fcsel_v = 0xC4, 190 midgard_alu_op_fcsel = 0xC5, 191 midgard_alu_op_fround = 0xC6, 192 193 midgard_alu_op_fatan_pt2 = 0xE8, 194 midgard_alu_op_fpow_pt1 = 0xEC, 195 midgard_alu_op_fpown_pt1 = 0xED, 196 midgard_alu_op_fpowr_pt1 = 0xEE, 197 198 midgard_alu_op_frcp = 0xF0, 199 midgard_alu_op_frsqrt = 0xF2, 200 midgard_alu_op_fsqrt = 0xF3, 201 midgard_alu_op_fexp2 = 0xF4, 202 midgard_alu_op_flog2 = 0xF5, 203 midgard_alu_op_fsin = 0xF6, 204 midgard_alu_op_fcos = 0xF7, 205 midgard_alu_op_fatan2_pt1 = 0xF9, 206 } midgard_alu_op; 207 208 typedef enum { 209 midgard_outmod_none = 0, 210 midgard_outmod_pos = 1, /* max(x, 0.0) */ 211 midgard_outmod_sat_signed = 2, /* clamp(x, -1.0, 1.0) */ 212 midgard_outmod_sat = 3 /* clamp(x, 0.0, 1.0) */ 213 } midgard_outmod_float; 214 215 typedef enum { 216 midgard_outmod_int_saturate = 0, 217 midgard_outmod_uint_saturate = 1, 218 midgard_outmod_int_wrap = 2, 219 midgard_outmod_int_high = 3, /* Overflowed portion */ 220 } midgard_outmod_int; 221 222 typedef enum { 223 midgard_reg_mode_8 = 0, 224 midgard_reg_mode_16 = 1, 225 midgard_reg_mode_32 = 2, 226 midgard_reg_mode_64 = 3 227 } midgard_reg_mode; 228 229 typedef enum { 230 midgard_dest_override_lower = 0, 231 midgard_dest_override_upper = 1, 232 midgard_dest_override_none = 2 233 } midgard_dest_override; 234 235 typedef enum { 236 midgard_int_sign_extend = 0, 237 midgard_int_zero_extend = 1, 238 midgard_int_normal = 2, 239 midgard_int_shift = 3 240 } midgard_int_mod; 241 242 #define MIDGARD_FLOAT_MOD_ABS (1 << 0) 243 #define MIDGARD_FLOAT_MOD_NEG (1 << 1) 244 245 typedef struct 246 __attribute__((__packed__)) 247 { 248 /* Either midgard_int_mod or from midgard_float_mod_*, depending on the 249 * type of op */ 250 unsigned mod : 2; 251 252 /* replicate lower half if dest = half, or low/high half selection if 253 * dest = full 254 */ 255 bool rep_low : 1; 256 bool rep_high : 1; /* unused if dest = full */ 257 bool half : 1; /* only matters if dest = full */ 258 unsigned swizzle : 8; 259 } 260 midgard_vector_alu_src; 261 262 typedef struct 263 __attribute__((__packed__)) 264 { 265 midgard_alu_op op : 8; 266 midgard_reg_mode reg_mode : 2; 267 unsigned src1 : 13; 268 unsigned src2 : 13; 269 midgard_dest_override dest_override : 2; 270 unsigned outmod : 2; 271 unsigned mask : 8; 272 } 273 midgard_vector_alu; 274 275 typedef struct 276 __attribute__((__packed__)) 277 { 278 unsigned mod : 2; 279 bool full : 1; /* 0 = half, 1 = full */ 280 unsigned component : 3; 281 } 282 midgard_scalar_alu_src; 283 284 typedef struct 285 __attribute__((__packed__)) 286 { 287 midgard_alu_op op : 8; 288 unsigned src1 : 6; 289 unsigned src2 : 11; 290 unsigned unknown : 1; 291 unsigned outmod : 2; 292 bool output_full : 1; 293 unsigned output_component : 3; 294 } 295 midgard_scalar_alu; 296 297 typedef struct 298 __attribute__((__packed__)) 299 { 300 unsigned src1_reg : 5; 301 unsigned src2_reg : 5; 302 unsigned out_reg : 5; 303 bool src2_imm : 1; 304 } 305 midgard_reg_info; 306 307 /* In addition to conditional branches and jumps (unconditional branches), 308 * Midgard implements a bit of fixed function functionality used in fragment 309 * shaders via specially crafted branches. These have special branch opcodes, 310 * which perform a fixed-function operation and/or use the results of a 311 * fixed-function operation as the branch condition. */ 312 313 typedef enum { 314 /* Regular branches */ 315 midgard_jmp_writeout_op_branch_uncond = 1, 316 midgard_jmp_writeout_op_branch_cond = 2, 317 318 /* In a fragment shader, execute a discard_if instruction, with the 319 * corresponding condition code. Terminates the shader, so generally 320 * set the branch target to out of the shader */ 321 midgard_jmp_writeout_op_discard = 4, 322 323 /* Branch if the tilebuffer is not yet ready. At the beginning of a 324 * fragment shader that reads from the tile buffer, for instance via 325 * ARM_shader_framebuffer_fetch or EXT_pixel_local_storage, this branch 326 * operation should be used as a loop. An instruction like 327 * "br.tilebuffer.always -1" does the trick, corresponding to 328 * "while(!is_tilebuffer_ready) */ 329 midgard_jmp_writeout_op_tilebuffer_pending = 6, 330 331 /* In a fragment shader, try to write out the value pushed to r0 to the 332 * tilebuffer, subject to unknown state in r1.z and r1.w. If this 333 * succeeds, the shader terminates. If it fails, it branches to the 334 * specified branch target. Generally, this should be used in a loop to 335 * itself, acting as "do { write(r0); } while(!write_successful);" */ 336 midgard_jmp_writeout_op_writeout = 7, 337 } midgard_jmp_writeout_op; 338 339 typedef enum { 340 midgard_condition_write0 = 0, 341 342 /* These condition codes denote a conditional branch on FALSE and on 343 * TRUE respectively */ 344 midgard_condition_false = 1, 345 midgard_condition_true = 2, 346 347 /* This condition code always branches. For a pure branch, the 348 * unconditional branch coding should be used instead, but for 349 * fixed-function branch opcodes, this is still useful */ 350 midgard_condition_always = 3, 351 } midgard_condition; 352 353 typedef struct 354 __attribute__((__packed__)) 355 { 356 midgard_jmp_writeout_op op : 3; /* == branch_uncond */ 357 unsigned dest_tag : 4; /* tag of branch destination */ 358 unsigned unknown : 2; 359 int offset : 7; 360 } 361 midgard_branch_uncond; 362 363 typedef struct 364 __attribute__((__packed__)) 365 { 366 midgard_jmp_writeout_op op : 3; /* == branch_cond */ 367 unsigned dest_tag : 4; /* tag of branch destination */ 368 int offset : 7; 369 midgard_condition cond : 2; 370 } 371 midgard_branch_cond; 372 373 typedef struct 374 __attribute__((__packed__)) 375 { 376 midgard_jmp_writeout_op op : 3; /* == branch_cond */ 377 unsigned dest_tag : 4; /* tag of branch destination */ 378 unsigned unknown : 2; 379 signed offset : 23; 380 381 /* Extended branches permit inputting up to 4 conditions loaded into 382 * r31 (two in r31.w and two in r31.x). In the most general case, we 383 * specify a function f(A, B, C, D) mapping 4 1-bit conditions to a 384 * single 1-bit branch criteria. Note that the domain of f has 2^(2^4) 385 * elements, each mapping to 1-bit of output, so we can trivially 386 * construct a Godel numbering of f as a (2^4)=16-bit integer. This 387 * 16-bit integer serves as a lookup table to compute f, subject to 388 * some swaps for ordering. 389 * 390 * Interesting, the standard 2-bit condition codes are also a LUT with 391 * the same format (2^1-bit), but it's usually easier to use enums. */ 392 393 unsigned cond : 16; 394 } 395 midgard_branch_extended; 396 397 typedef struct 398 __attribute__((__packed__)) 399 { 400 midgard_jmp_writeout_op op : 3; /* == writeout */ 401 unsigned unknown : 13; 402 } 403 midgard_writeout; 404 405 /* 406 * Load/store words 407 */ 408 409 typedef enum { 410 midgard_op_ld_st_noop = 0x03, 411 412 /* Unpack a colour from a native format to fp16 */ 413 midgard_op_unpack_colour = 0x05, 414 415 /* Packs a colour from fp16 to a native format */ 416 midgard_op_pack_colour = 0x09, 417 418 /* Likewise packs from fp32 */ 419 midgard_op_pack_colour_32 = 0x0A, 420 421 /* Unclear why this is on the L/S unit, but moves fp32 cube map 422 * coordinates in r27 to its cube map texture coordinate destination 423 * (e.g r29). */ 424 425 midgard_op_ld_cubemap_coords = 0x0E, 426 427 /* Loads a global/local/group ID, depending on arguments */ 428 midgard_op_ld_compute_id = 0x10, 429 430 /* The L/S unit can do perspective division a clock faster than the ALU 431 * if you're lucky. Put the vec4 in r27, and call with 0x24 as the 432 * unknown state; the output will be <x/w, y/w, z/w, 1>. Replace w with 433 * z for the z version */ 434 midgard_op_ldst_perspective_division_z = 0x12, 435 midgard_op_ldst_perspective_division_w = 0x13, 436 437 /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */ 438 midgard_op_atomic_add = 0x40, 439 midgard_op_atomic_add64 = 0x41, 440 441 midgard_op_atomic_and = 0x44, 442 midgard_op_atomic_and64 = 0x45, 443 midgard_op_atomic_or = 0x48, 444 midgard_op_atomic_or64 = 0x49, 445 midgard_op_atomic_xor = 0x4C, 446 midgard_op_atomic_xor64 = 0x4D, 447 448 midgard_op_atomic_imin = 0x50, 449 midgard_op_atomic_imin64 = 0x51, 450 midgard_op_atomic_umin = 0x54, 451 midgard_op_atomic_umin64 = 0x55, 452 midgard_op_atomic_imax = 0x58, 453 midgard_op_atomic_imax64 = 0x59, 454 midgard_op_atomic_umax = 0x5C, 455 midgard_op_atomic_umax64 = 0x5D, 456 457 midgard_op_atomic_xchg = 0x60, 458 midgard_op_atomic_xchg64 = 0x61, 459 460 midgard_op_atomic_cmpxchg = 0x64, 461 midgard_op_atomic_cmpxchg64 = 0x65, 462 463 /* Used for compute shader's __global arguments, __local variables (or 464 * for register spilling) */ 465 466 midgard_op_ld_uchar = 0x80, /* zero extends */ 467 midgard_op_ld_char = 0x81, /* sign extends */ 468 midgard_op_ld_ushort = 0x84, /* zero extends */ 469 midgard_op_ld_short = 0x85, /* sign extends */ 470 midgard_op_ld_char4 = 0x88, /* short2, int, float */ 471 midgard_op_ld_short4 = 0x8C, /* int2, float2, long */ 472 midgard_op_ld_int4 = 0x90, /* float4, long2 */ 473 474 midgard_op_ld_attr_32 = 0x94, 475 midgard_op_ld_attr_16 = 0x95, 476 midgard_op_ld_attr_32u = 0x96, 477 midgard_op_ld_attr_32i = 0x97, 478 midgard_op_ld_vary_32 = 0x98, 479 midgard_op_ld_vary_16 = 0x99, 480 midgard_op_ld_vary_32u = 0x9A, 481 midgard_op_ld_vary_32i = 0x9B, 482 483 /* Old version of midgard_op_ld_color_buffer_as_fp16, for T720 */ 484 midgard_op_ld_color_buffer_as_fp32_old = 0x9C, 485 midgard_op_ld_color_buffer_as_fp16_old = 0x9D, 486 midgard_op_ld_color_buffer_32u_old = 0x9E, 487 488 /* The distinction between these ops is the alignment requirement / 489 * accompanying shift. Thus, the offset to ld_ubo_int4 is in 16-byte 490 * units and can load 128-bit. The offset to ld_ubo_short4 is in 8-byte 491 * units; ld_ubo_char4 in 4-byte units. ld_ubo_char/ld_ubo_char2 are 492 * purely theoretical (never seen in the wild) since int8/int16/fp16 493 * UBOs don't really exist. The ops are still listed to maintain 494 * symmetry with generic I/O ops. */ 495 496 midgard_op_ld_ubo_char = 0xA0, /* theoretical */ 497 midgard_op_ld_ubo_char2 = 0xA4, /* theoretical */ 498 midgard_op_ld_ubo_char4 = 0xA8, 499 midgard_op_ld_ubo_short4 = 0xAC, 500 midgard_op_ld_ubo_int4 = 0xB0, 501 502 /* New-style blending ops. Works on T760/T860 */ 503 midgard_op_ld_color_buffer_as_fp32 = 0xB8, 504 midgard_op_ld_color_buffer_as_fp16 = 0xB9, 505 midgard_op_ld_color_buffer_32u = 0xBA, 506 507 midgard_op_st_char = 0xC0, 508 midgard_op_st_char2 = 0xC4, /* short */ 509 midgard_op_st_char4 = 0xC8, /* short2, int, float */ 510 midgard_op_st_short4 = 0xCC, /* int2, float2, long */ 511 midgard_op_st_int4 = 0xD0, /* float4, long2 */ 512 513 midgard_op_st_vary_32 = 0xD4, 514 midgard_op_st_vary_16 = 0xD5, 515 midgard_op_st_vary_32u = 0xD6, 516 midgard_op_st_vary_32i = 0xD7, 517 518 /* Value to st in r27, location r26.w as short2 */ 519 midgard_op_st_image_f = 0xD8, 520 midgard_op_st_image_ui = 0xDA, 521 midgard_op_st_image_i = 0xDB, 522 } midgard_load_store_op; 523 524 typedef enum { 525 midgard_interp_sample = 0, 526 midgard_interp_centroid = 1, 527 midgard_interp_default = 2 528 } midgard_interpolation; 529 530 typedef enum { 531 midgard_varying_mod_none = 0, 532 533 /* Other values unknown */ 534 535 /* Take the would-be result and divide all components by its z/w 536 * (perspective division baked in with the load) */ 537 midgard_varying_mod_perspective_z = 2, 538 midgard_varying_mod_perspective_w = 3, 539 } midgard_varying_modifier; 540 541 typedef struct 542 __attribute__((__packed__)) 543 { 544 unsigned zero0 : 1; /* Always zero */ 545 546 midgard_varying_modifier modifier : 2; 547 548 unsigned zero1: 1; /* Always zero */ 549 550 /* Varying qualifiers, zero if not a varying */ 551 unsigned flat : 1; 552 unsigned is_varying : 1; /* Always one for varying, but maybe something else? */ 553 midgard_interpolation interpolation : 2; 554 555 unsigned zero2 : 2; /* Always zero */ 556 } 557 midgard_varying_parameter; 558 559 /* 8-bit register/etc selector for load/store ops */ 560 typedef struct 561 __attribute__((__packed__)) 562 { 563 /* Indexes into the register */ 564 unsigned component : 2; 565 566 /* Register select between r26/r27 */ 567 unsigned select : 1; 568 569 unsigned unknown : 2; 570 571 /* Like any good Arm instruction set, load/store arguments can be 572 * implicitly left-shifted... but only the second argument. Zero for no 573 * shifting, up to <<7 possible though. This is useful for indexing. 574 * 575 * For the first argument, it's unknown what these bits mean */ 576 unsigned shift : 3; 577 } 578 midgard_ldst_register_select; 579 580 typedef struct 581 __attribute__((__packed__)) 582 { 583 midgard_load_store_op op : 8; 584 unsigned reg : 5; 585 unsigned mask : 4; 586 unsigned swizzle : 8; 587 588 /* Load/store ops can take two additional registers as arguments, but 589 * these are limited to load/store registers with only a few supported 590 * mask/swizzle combinations. The tradeoff is these are much more 591 * compact, requiring 8-bits each rather than 17-bits for a full 592 * reg/mask/swizzle. Usually (?) encoded as 593 * midgard_ldst_register_select. */ 594 unsigned arg_1 : 8; 595 unsigned arg_2 : 8; 596 597 unsigned varying_parameters : 10; 598 599 unsigned address : 9; 600 } 601 midgard_load_store_word; 602 603 typedef struct 604 __attribute__((__packed__)) 605 { 606 unsigned type : 4; 607 unsigned next_type : 4; 608 uint64_t word1 : 60; 609 uint64_t word2 : 60; 610 } 611 midgard_load_store; 612 613 /* 8-bit register selector used in texture ops to select a bias/LOD/gradient 614 * register, shoved into the `bias` field */ 615 616 typedef struct 617 __attribute__((__packed__)) 618 { 619 /* 32-bit register, clear for half-register */ 620 unsigned full : 1; 621 622 /* Register select between r28/r29 */ 623 unsigned select : 1; 624 625 /* For a half-register, selects the upper half */ 626 unsigned upper : 1; 627 628 /* Indexes into the register */ 629 unsigned component : 2; 630 631 /* Padding to make this 8-bit */ 632 unsigned zero : 3; 633 } 634 midgard_tex_register_select; 635 636 /* Texture pipeline results are in r28-r29 */ 637 #define REG_TEX_BASE 28 638 639 enum mali_texture_op { 640 TEXTURE_OP_NORMAL = 1, /* texture */ 641 TEXTURE_OP_LOD = 2, /* textureLod */ 642 TEXTURE_OP_TEXEL_FETCH = 4, 643 TEXTURE_OP_BARRIER = 11, 644 TEXTURE_OP_DERIVATIVE = 13 645 }; 646 647 enum mali_sampler_type { 648 MALI_SAMPLER_UNK = 0x0, 649 MALI_SAMPLER_FLOAT = 0x1, /* sampler */ 650 MALI_SAMPLER_UNSIGNED = 0x2, /* usampler */ 651 MALI_SAMPLER_SIGNED = 0x3, /* isampler */ 652 }; 653 654 /* Texture modes */ 655 enum mali_texture_mode { 656 TEXTURE_NORMAL = 1, 657 TEXTURE_SHADOW = 5, 658 TEXTURE_GATHER_SHADOW = 6, 659 TEXTURE_GATHER_X = 8, 660 TEXTURE_GATHER_Y = 9, 661 TEXTURE_GATHER_Z = 10, 662 TEXTURE_GATHER_W = 11, 663 }; 664 665 enum mali_derivative_mode { 666 TEXTURE_DFDX = 0, 667 TEXTURE_DFDY = 1, 668 }; 669 670 typedef struct 671 __attribute__((__packed__)) 672 { 673 unsigned type : 4; 674 unsigned next_type : 4; 675 676 enum mali_texture_op op : 4; 677 unsigned mode : 4; 678 679 /* A little obscure, but last is set for the last texture operation in 680 * a shader. cont appears to just be last's opposite (?). Yeah, I know, 681 * kind of funky.. BiOpen thinks it could do with memory hinting, or 682 * tile locking? */ 683 684 unsigned cont : 1; 685 unsigned last : 1; 686 687 unsigned format : 2; 688 689 /* Are sampler_handle/texture_handler respectively set by registers? If 690 * true, the lower 8-bits of the respective field is a register word. 691 * If false, they are an immediate */ 692 693 unsigned sampler_register : 1; 694 unsigned texture_register : 1; 695 696 /* Is a register used to specify the 697 * LOD/bias/offset? If set, use the `bias` field as 698 * a register index. If clear, use the `bias` field 699 * as an immediate. */ 700 unsigned lod_register : 1; 701 702 /* Is a register used to specify an offset? If set, use the 703 * offset_reg_* fields to encode this, duplicated for each of the 704 * components. If clear, there is implcitly always an immediate offst 705 * specificed in offset_imm_* */ 706 unsigned offset_register : 1; 707 708 unsigned in_reg_full : 1; 709 unsigned in_reg_select : 1; 710 unsigned in_reg_upper : 1; 711 unsigned in_reg_swizzle : 8; 712 713 unsigned unknown8 : 2; 714 715 unsigned out_full : 1; 716 717 enum mali_sampler_type sampler_type : 2; 718 719 unsigned out_reg_select : 1; 720 unsigned out_upper : 1; 721 722 unsigned mask : 4; 723 724 /* Intriguingly, textures can take an outmod just like alu ops. Int 725 * outmods are not supported as far as I can tell, so this is only 726 * meaningful for float samplers */ 727 midgard_outmod_float outmod : 2; 728 729 unsigned swizzle : 8; 730 731 /* These indicate how many bundles after this texture op may be 732 * executed in parallel with this op. We may execute only ALU and 733 * ld/st in parallel (not other textures), and obviously there cannot 734 * be any dependency (the blob appears to forbid even accessing other 735 * channels of a given texture register). */ 736 737 unsigned out_of_order : 2; 738 unsigned unknown4 : 10; 739 740 /* In immediate mode, each offset field is an immediate range [0, 7]. 741 * 742 * In register mode, offset_x becomes a register (full, select, upper) 743 * triplet followed by a vec3 swizzle is splattered across 744 * offset_y/offset_z in a genuinely bizarre way. 745 * 746 * For texel fetches in immediate mode, the range is the full [-8, 7], 747 * but for normal texturing the top bit must be zero and a register 748 * used instead. It's not clear where this limitation is from. 749 * 750 * union { 751 * struct { 752 * signed offset_x : 4; 753 * signed offset_y : 4; 754 * signed offset_z : 4; 755 * } immediate; 756 * struct { 757 * bool full : 1; 758 * bool select : 1; 759 * bool upper : 1; 760 * unsigned swizzle : 8; 761 * unsigned zero : 1; 762 * } register; 763 * } 764 */ 765 766 unsigned offset : 12; 767 768 /* In immediate bias mode, for a normal texture op, this is 769 * texture bias, computed as int(2^8 * frac(biasf)), with 770 * bias_int = floor(bias). For a textureLod, it's that, but 771 * s/bias/lod. For a texel fetch, this is the LOD as-is. 772 * 773 * In register mode, this is a midgard_tex_register_select 774 * structure and bias_int is zero */ 775 776 unsigned bias : 8; 777 signed bias_int : 8; 778 779 /* If sampler/texture_register is set, the bottom 8-bits are 780 * midgard_tex_register_select and the top 8-bits are zero. If they are 781 * clear, they are immediate texture indices */ 782 783 unsigned sampler_handle : 16; 784 unsigned texture_handle : 16; 785 } 786 midgard_texture_word; 787 788 /* Technically barriers are texture instructions but it's less work to add them 789 * as an explicitly zeroed special case, since most fields are forced to go to 790 * zero */ 791 792 typedef struct 793 __attribute__((__packed__)) 794 { 795 unsigned type : 4; 796 unsigned next_type : 4; 797 798 /* op = TEXTURE_OP_BARRIER */ 799 unsigned op : 6; 800 unsigned zero1 : 2; 801 802 /* Since helper invocations don't make any sense, these are forced to one */ 803 unsigned cont : 1; 804 unsigned last : 1; 805 unsigned zero2 : 14; 806 807 unsigned zero3 : 24; 808 unsigned out_of_order : 4; 809 unsigned zero4 : 4; 810 811 uint64_t zero5; 812 } midgard_texture_barrier_word; 813 814 typedef union midgard_constants { 815 double f64[2]; 816 uint64_t u64[2]; 817 int64_t i64[2]; 818 float f32[4]; 819 uint32_t u32[4]; 820 int32_t i32[4]; 821 uint16_t f16[8]; 822 uint16_t u16[8]; 823 int16_t i16[8]; 824 uint8_t u8[16]; 825 int8_t i8[16]; 826 } 827 midgard_constants; 828 829 enum midgard_roundmode { 830 MIDGARD_RTE = 0x0, /* round to even */ 831 MIDGARD_RTZ = 0x1, /* round to zero */ 832 MIDGARD_RTN = 0x2, /* round to negative */ 833 MIDGARD_RTP = 0x3, /* round to positive */ 834 }; 835 836 #endif 837