1; Copyright (c) 2020 Valve Corporation 2; 3; Permission is hereby granted, free of charge, to any person obtaining a 4; copy of this software and associated documentation files (the "Software"), 5; to deal in the Software without restriction, including without limitation 6; the rights to use, copy, modify, merge, publish, distribute, sublicense, 7; and/or sell copies of the Software, and to permit persons to whom the 8; Software is furnished to do so, subject to the following conditions: 9; 10; The above copyright notice and this permission notice (including the next 11; paragraph) shall be included in all copies or substantial portions of the 12; Software. 13; 14; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20; SOFTWARE. 21; 22; 23; This file is the source for a simple mock firmware used to regression test 24; the afuc assembler/disassembler. Note, it won't actually work if you try to 25; load it on the GPU! First this is assembled, compared to the reference 26; binary, then disassambled and compared to the reference disassembly. We do 27; this to avoid having to host the actual firmware, especially the disassembled 28; version, in Mesa. 29[01000001] 30[01000000] 31loc02: 32; packet table loading: 33mov $01, 0x0830 ; CP_SQE_INSTR_BASE 34mov $02, 0x0002 35cwrite $01, [$00 + @REG_READ_ADDR] 36cwrite $02, [$00 + @REG_READ_DWORDS] 37; move hi/lo of SQE fw addrs to registers: 38mov $01, $regdata 39mov $02, $regdata 40; skip first dword 41add $01, $01, 0x0004 42addhi $02, $02, 0x0000 43mov $03, 0x0001 44cwrite $01, [$00 + @MEM_READ_ADDR] 45cwrite $02, [$00 + @MEM_READ_ADDR+0x1] 46cwrite $03, [$00 + @MEM_READ_DWORDS] 47; read 2nd dword of fw, and add offset (minus 4 because we skipped first dword) 48; to base address of sqe fw 49rot $04, $memdata, 0x0008 50ushr $04, $04, 0x0006 51sub $04, $04, 0x0004 52add $01, $01, $04 53addhi $02, $02, 0x0000 54 55; load packet table: 56mov $rem, 0x0080 57cwrite $01, [$00 + @MEM_READ_ADDR] 58cwrite $02, [$00 + @MEM_READ_ADDR+0x1] 59cwrite $02, [$00 + @LOAD_STORE_HI] 60cwrite $rem, [$00 + @MEM_READ_DWORDS] 61cwrite $00, [$00 + @PACKET_TABLE_WRITE_ADDR] 62(rep)cwrite $memdata, [$00 + @PACKET_TABLE_WRITE] 63 64mov $02, 0x883 65mov $03, 0xbeef 66mov $04, 0xdead << 16 67or $03, $03, $04 68cwrite $02, [$00 + @REG_WRITE_ADDR] 69cwrite $03, [$00 + @REG_WRITE] 70waitin 71mov $01, $data 72 73CP_ME_INIT: 74; test label-as-immediate feature 75mov $02, #loc02 ; should be 0x0002 76waitin 77mov $01, $data 78 79CP_MEM_WRITE: 80; test $addr + (rep) + (xmovN) with ALU 81mov $addr, 0xa0 << 24 82mov $02, 4 83(xmov1)add $data, $02, $data 84mov $addr, 0xa204 << 16 85(rep)(xmov3)mov $data, $data 86waitin 87mov $01, $data 88 89CP_SCRATCH_WRITE: 90; test (rep) + flags + non-zero offset with cwrite 91; TODO: 0x4 flag is actually pre-increment addressing, handle it as such 92mov $02, 0xff 93(rep)cwrite $data, [$02 + 0x001]! 94waitin 95mov $01, $data 96 97CP_SET_DRAW_STATE: 98; test (sds) 99(rep)(sds2) cwrite $data, [$00 + @DRAW_STATE_SET_HDR] 100waitin 101mov $01, $data 102 103CP_SET_BIN_DATA5: 104; test SQE registers 105sread $02, [$00 + %SP] 106swrite $02, [$00 + %SP] 107mov $02, 7 108(rep)swrite $data, [$02 + 1]! 109waitin 110mov $01, $data 111 112CP_SET_SECURE_MODE: 113; test setsecure 114mov $02, $data 115setsecure $02, #setsecure_success 116err: 117jump #err 118nop 119setsecure_success: 120waitin 121mov $01, $data 122 123euclid: 124; Euclid's algorithm in afuc: https://en.wikipedia.org/wiki/Euclidean_algorithm 125; Since afuc doesn't do modulo, we implement the subtraction-based version. 126; 127; Demonstrates/tests comparisons and conditional branches. This also 128; demonstrates the common trick of branching in a delay slot. Note that if a 129; branch is taken and its delay slot includes another branch, the second 130; branch cannot also be taken, which is why the last branch in the sequence 131; cannot be unconditional. 132; 133; Inputs are in $02 and $03, and output is in $02. 134cmp $04, $02, $03 135breq $04, b0, #euclid_exit 136brne $04, b1, #euclid_gt 137breq $04, b2, #euclid 138sub $03, $03, $02 139euclid_gt: 140jump #euclid 141sub $02, $02, $03 142euclid_exit: 143ret 144nop 145 146CP_REG_RMW: 147; Test various ALU instructions, and read/write $regdata 148cwrite $data, [$00 + @REG_READ_ADDR] 149add $02, $regdata, 0x42 150addhi $03, $00, $regdata 151sub $02, $02, $regdata 152call #euclid 153subhi $03, $03, $regdata 154and $02, $02, $regdata 155or $02, $02, 0x1 156xor $02, $02, 0x1 157not $02, $02 158shl $02, $02, $regdata 159ushr $02, $02, $regdata 160ishr $02, $02, $regdata 161rot $02, $02, $regdata 162min $02, $02, $regdata 163max $02, $02, $regdata 164mul8 $02, $02, $regdata 165msb $02, $02 166mov $usraddr, $data 167mov $data, $02 168waitin 169mov $01, $data 170 171CP_MEMCPY: 172; implement CP_MEMCPY using load/store instructions 173mov $02, $data 174mov $03, $data 175mov $04, $data 176mov $05, $data 177mov $06, $data 178cpy_header: 179breq $06, 0, #cpy_exit 180cwrite $03, [$00 + @LOAD_STORE_HI] 181load $07, [$02 + 0x004]! 182cwrite $05, [$00 + @LOAD_STORE_HI] 183jump #cpy_header 184store $07, [$04 + 0x004]! 185cpy_exit: 186waitin 187mov $01, $data 188 189CP_MEM_TO_MEM: 190; implement CP_MEMCPY using mem read control regs 191; tests @FOO+0x1 for 64-bit control regs, and reading/writing $rem 192cwrite $data, [$00 + @MEM_READ_ADDR] 193cwrite $data, [$00 + @MEM_READ_ADDR+1] 194mov $02, $data 195cwrite $data, [$00 + @LOAD_STORE_HI] 196mov $rem, $data 197cwrite $rem, [$00 + @MEM_READ_DWORDS] 198(rep)store $memdata, [$02 + 0x004]! 199waitin 200mov $01, $data 201 202UNKN15: 203; test preemptleave + iret + conditional branch w/ immed 204cread $02, [$00 + 0x101] 205brne $02, 0x0001, #exit_iret 206nop 207preemptleave #err 208nop 209nop 210nop 211waitin 212mov $01, $data 213exit_iret: 214iret 215nop 216 217UNKN0: 218UNKN1: 219UNKN2: 220UNKN3: 221PKT4: 222UNKN5: 223UNKN6: 224UNKN7: 225UNKN8: 226UNKN9: 227UNKN10: 228UNKN11: 229UNKN12: 230UNKN13: 231UNKN14: 232CP_NOP: 233CP_RECORD_PFP_TIMESTAMP: 234CP_WAIT_MEM_WRITES: 235CP_WAIT_FOR_ME: 236CP_WAIT_MEM_GTE: 237UNKN21: 238UNKN22: 239UNKN23: 240UNKN24: 241CP_DRAW_PRED_ENABLE_GLOBAL: 242CP_DRAW_PRED_ENABLE_LOCAL: 243UNKN27: 244CP_PREEMPT_ENABLE: 245CP_SKIP_IB2_ENABLE_GLOBAL: 246CP_PREEMPT_TOKEN: 247UNKN31: 248UNKN32: 249CP_DRAW_INDX: 250CP_SKIP_IB2_ENABLE_LOCAL: 251CP_DRAW_AUTO: 252CP_SET_STATE: 253CP_WAIT_FOR_IDLE: 254CP_IM_LOAD: 255CP_DRAW_INDIRECT: 256CP_DRAW_INDX_INDIRECT: 257CP_DRAW_INDIRECT_MULTI: 258CP_IM_LOAD_IMMEDIATE: 259CP_BLIT: 260CP_SET_CONSTANT: 261CP_SET_BIN_DATA5_OFFSET: 262UNKN48: 263CP_RUN_OPENCL: 264CP_LOAD_STATE6_GEOM: 265CP_EXEC_CS: 266CP_LOAD_STATE6_FRAG: 267CP_SET_SUBDRAW_SIZE: 268CP_LOAD_STATE6: 269CP_INDIRECT_BUFFER_PFD: 270CP_DRAW_INDX_OFFSET: 271CP_REG_TEST: 272CP_COND_INDIRECT_BUFFER_PFE: 273CP_INVALIDATE_STATE: 274CP_WAIT_REG_MEM: 275CP_REG_TO_MEM: 276CP_INDIRECT_BUFFER: 277CP_INTERRUPT: 278CP_EXEC_CS_INDIRECT: 279CP_MEM_TO_REG: 280CP_COND_EXEC: 281CP_COND_WRITE5: 282CP_EVENT_WRITE: 283CP_COND_REG_EXEC: 284UNKN73: 285CP_REG_TO_SCRATCH: 286CP_SET_DRAW_INIT_FLAGS: 287CP_SCRATCH_TO_REG: 288CP_DRAW_PRED_SET: 289CP_MEM_WRITE_CNTR: 290CP_START_BIN: 291CP_END_BIN: 292CP_WAIT_REG_EQ: 293CP_SMMU_TABLE_UPDATE: 294UNKN84: 295CP_SET_CTXSWITCH_IB: 296CP_SET_PSEUDO_REG: 297CP_INDIRECT_BUFFER_CHAIN: 298CP_EVENT_WRITE_SHD: 299CP_EVENT_WRITE_CFL: 300UNKN90: 301CP_EVENT_WRITE_ZPD: 302CP_CONTEXT_REG_BUNCH: 303CP_WAIT_IB_PFD_COMPLETE: 304CP_CONTEXT_UPDATE: 305CP_SET_PROTECTED_MODE: 306UNKN96: 307UNKN97: 308UNKN98: 309CP_SET_MODE: 310CP_SET_VISIBILITY_OVERRIDE: 311CP_SET_MARKER: 312UNKN103: 313UNKN104: 314UNKN105: 315UNKN106: 316UNKN107: 317UNKN108: 318CP_REG_WRITE: 319UNKN110: 320CP_BOOTSTRAP_UCODE: 321CP_WAIT_TWO_REGS: 322CP_TEST_TWO_MEMS: 323CP_REG_TO_MEM_OFFSET_REG: 324CP_REG_TO_MEM_OFFSET_MEM: 325UNKN118: 326UNKN119: 327CP_REG_WR_NO_CTXT: 328UNKN121: 329UNKN122: 330UNKN123: 331UNKN124: 332UNKN125: 333UNKN126: 334UNKN127: 335 waitin 336 mov $01, $data 337