1; Copyright (c) 2020 Valve Corporation 2; 3; Permission is hereby granted, free of charge, to any person obtaining a 4; copy of this software and associated documentation files (the "Software"), 5; to deal in the Software without restriction, including without limitation 6; the rights to use, copy, modify, merge, publish, distribute, sublicense, 7; and/or sell copies of the Software, and to permit persons to whom the 8; Software is furnished to do so, subject to the following conditions: 9; 10; The above copyright notice and this permission notice (including the next 11; paragraph) shall be included in all copies or substantial portions of the 12; Software. 13; 14; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20; SOFTWARE. 21; 22; 23; This file is the source for a simple mock firmware used to regression test 24; the afuc assembler/disassembler. Note, it won't actually work if you try to 25; load it on the GPU! First this is assembled, compared to the reference 26; binary, then disassambled and compared to the reference disassembly. We do 27; this to avoid having to host the actual firmware, especially the disassembled 28; version, in Mesa. 29[01000001] 30[01000000] 31loc02: 32; packet table loading: 33mov $01, 0x0830 ; CP_SQE_INSTR_BASE 34mov $02, 0x0002 35cwrite $01, [$00 + @REG_READ_ADDR], 0x0 36cwrite $02, [$00 + @REG_READ_DWORDS], 0x0 37; move hi/lo of SQE fw addrs to registers: 38mov $01, $regdata 39mov $02, $regdata 40; skip first dword 41add $01, $01, 0x0004 42addhi $02, $02, 0x0000 43mov $03, 0x0001 44cwrite $01, [$00 + @MEM_READ_ADDR], 0x0 45cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 46cwrite $03, [$00 + @MEM_READ_DWORDS], 0x0 47; read 2nd dword of fw, and add offset (minus 4 because we skipped first dword) 48; to base address of sqe fw 49rot $04, $memdata, 0x0008 50ushr $04, $04, 0x0006 51sub $04, $04, 0x0004 52add $01, $01, $04 53addhi $02, $02, 0x0000 54 55; load packet table: 56mov $rem, 0x0080 57cwrite $01, [$00 + @MEM_READ_ADDR], 0x0 58cwrite $02, [$00 + @MEM_READ_ADDR+0x1], 0x0 59cwrite $02, [$00 + @LOAD_STORE_HI], 0x0 60cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 61cwrite $00, [$00 + @PACKET_TABLE_WRITE_ADDR], 0x0 62(rep)cwrite $memdata, [$00 + @PACKET_TABLE_WRITE], 0x0 63 64mov $02, 0x883 65mov $03, 0xbeef 66mov $04, 0xdead << 16 67or $03, $03, $04 68cwrite $02, [$00 + @REG_WRITE_ADDR], 0x0 69cwrite $03, [$00 + @REG_WRITE], 0x0 70waitin 71mov $01, $data 72 73CP_ME_INIT: 74; test label-as-immediate feature 75mov $02, #loc02 ; should be 0x0002 76waitin 77mov $01, $data 78 79CP_MEM_WRITE: 80; test $addr + (rep) + (xmovN) with ALU 81mov $addr, 0xa0 << 24 82mov $02, 4 83(xmov1)add $data, $02, $data 84mov $addr, 0xa204 << 16 85(rep)(xmov3)mov $data, $data 86waitin 87mov $01, $data 88 89CP_SCRATCH_WRITE: 90; test (rep) + flags + non-zero offset with cwrite 91; TODO: 0x4 flag is actually pre-increment addressing, handle it as such 92mov $02, 0xff 93(rep)cwrite $data, [$02 + 0x001], 0x4 94waitin 95mov $01, $data 96 97CP_SET_SECURE_MODE: 98; test setsecure 99mov $02, $data 100setsecure $02, #setsecure_success 101err: 102jump #err 103nop 104setsecure_success: 105waitin 106mov $01, $data 107 108euclid: 109; Euclid's algorithm in afuc: https://en.wikipedia.org/wiki/Euclidean_algorithm 110; Since afuc doesn't do modulo, we implement the subtraction-based version. 111; 112; Demonstrates/tests comparisons and conditional branches. This also 113; demonstrates the common trick of branching in a delay slot. Note that if a 114; branch is taken and its delay slot includes another branch, the second 115; branch cannot also be taken, which is why the last branch in the sequence 116; cannot be unconditional. 117; 118; Inputs are in $02 and $03, and output is in $02. 119cmp $04, $02, $03 120breq $04, b0, #euclid_exit 121brne $04, b1, #euclid_gt 122breq $04, b2, #euclid 123sub $03, $03, $02 124euclid_gt: 125jump #euclid 126sub $02, $02, $03 127euclid_exit: 128ret 129nop 130 131CP_REG_RMW: 132; Test various ALU instructions, and read/write $regdata 133cwrite $data, [$00 + @REG_READ_ADDR], 0x0 134add $02, $regdata, 0x42 135addhi $03, $00, $regdata 136sub $02, $02, $regdata 137call #euclid 138subhi $03, $03, $regdata 139and $02, $02, $regdata 140or $02, $02, 0x1 141xor $02, $02, 0x1 142not $02, $02 143shl $02, $02, $regdata 144ushr $02, $02, $regdata 145ishr $02, $02, $regdata 146rot $02, $02, $regdata 147min $02, $02, $regdata 148max $02, $02, $regdata 149mul8 $02, $02, $regdata 150msb $02, $02 151mov $usraddr, $data 152mov $data, $02 153waitin 154mov $01, $data 155 156CP_MEMCPY: 157; implement CP_MEMCPY using load/store instructions 158mov $02, $data 159mov $03, $data 160mov $04, $data 161mov $05, $data 162mov $06, $data 163cpy_header: 164breq $06, 0, #cpy_exit 165cwrite $03, [$00 + @LOAD_STORE_HI], 0x0 166load $07, [$02 + 0x004], 0x4 167cwrite $05, [$00 + @LOAD_STORE_HI], 0x0 168jump #cpy_header 169store $07, [$04 + 0x004], 0x4 170cpy_exit: 171waitin 172mov $01, $data 173 174CP_MEM_TO_MEM: 175; implement CP_MEMCPY using mem read control regs 176; tests @FOO+0x1 for 64-bit control regs, and reading/writing $rem 177cwrite $data, [$00 + @MEM_READ_ADDR], 0x0 178cwrite $data, [$00 + @MEM_READ_ADDR+1], 0x0 179mov $02, $data 180cwrite $data, [$00 + @LOAD_STORE_HI], 0x0 181mov $rem, $data 182cwrite $rem, [$00 + @MEM_READ_DWORDS], 0x0 183(rep)store $memdata, [$02 + 0x004], 0x4 184waitin 185mov $01, $data 186 187UNKN15: 188; test preemptleave + iret + conditional branch w/ immed 189cread $02, [$00 + 0x101], 0x0 190brne $02, 0x0001, #exit_iret 191nop 192preemptleave #err 193nop 194nop 195nop 196waitin 197mov $01, $data 198exit_iret: 199iret 200nop 201 202UNKN0: 203UNKN1: 204UNKN2: 205UNKN3: 206PKT4: 207UNKN5: 208UNKN6: 209UNKN7: 210UNKN8: 211UNKN9: 212UNKN10: 213UNKN11: 214UNKN12: 215UNKN13: 216UNKN14: 217CP_NOP: 218CP_RECORD_PFP_TIMESTAMP: 219CP_WAIT_MEM_WRITES: 220CP_WAIT_FOR_ME: 221CP_WAIT_MEM_GTE: 222UNKN21: 223UNKN22: 224UNKN23: 225UNKN24: 226CP_DRAW_PRED_ENABLE_GLOBAL: 227CP_DRAW_PRED_ENABLE_LOCAL: 228UNKN27: 229CP_PREEMPT_ENABLE: 230CP_SKIP_IB2_ENABLE_GLOBAL: 231CP_PREEMPT_TOKEN: 232UNKN31: 233UNKN32: 234CP_DRAW_INDX: 235CP_SKIP_IB2_ENABLE_LOCAL: 236CP_DRAW_AUTO: 237CP_SET_STATE: 238CP_WAIT_FOR_IDLE: 239CP_IM_LOAD: 240CP_DRAW_INDIRECT: 241CP_DRAW_INDX_INDIRECT: 242CP_DRAW_INDIRECT_MULTI: 243CP_IM_LOAD_IMMEDIATE: 244CP_BLIT: 245CP_SET_CONSTANT: 246CP_SET_BIN_DATA5_OFFSET: 247CP_SET_BIN_DATA5: 248UNKN48: 249CP_RUN_OPENCL: 250CP_LOAD_STATE6_GEOM: 251CP_EXEC_CS: 252CP_LOAD_STATE6_FRAG: 253CP_SET_SUBDRAW_SIZE: 254CP_LOAD_STATE6: 255CP_INDIRECT_BUFFER_PFD: 256CP_DRAW_INDX_OFFSET: 257CP_REG_TEST: 258CP_COND_INDIRECT_BUFFER_PFE: 259CP_INVALIDATE_STATE: 260CP_WAIT_REG_MEM: 261CP_REG_TO_MEM: 262CP_INDIRECT_BUFFER: 263CP_INTERRUPT: 264CP_EXEC_CS_INDIRECT: 265CP_MEM_TO_REG: 266CP_SET_DRAW_STATE: 267CP_COND_EXEC: 268CP_COND_WRITE5: 269CP_EVENT_WRITE: 270CP_COND_REG_EXEC: 271UNKN73: 272CP_REG_TO_SCRATCH: 273CP_SET_DRAW_INIT_FLAGS: 274CP_SCRATCH_TO_REG: 275CP_DRAW_PRED_SET: 276CP_MEM_WRITE_CNTR: 277CP_START_BIN: 278CP_END_BIN: 279CP_WAIT_REG_EQ: 280CP_SMMU_TABLE_UPDATE: 281UNKN84: 282CP_SET_CTXSWITCH_IB: 283CP_SET_PSEUDO_REG: 284CP_INDIRECT_BUFFER_CHAIN: 285CP_EVENT_WRITE_SHD: 286CP_EVENT_WRITE_CFL: 287UNKN90: 288CP_EVENT_WRITE_ZPD: 289CP_CONTEXT_REG_BUNCH: 290CP_WAIT_IB_PFD_COMPLETE: 291CP_CONTEXT_UPDATE: 292CP_SET_PROTECTED_MODE: 293UNKN96: 294UNKN97: 295UNKN98: 296CP_SET_MODE: 297CP_SET_VISIBILITY_OVERRIDE: 298CP_SET_MARKER: 299UNKN103: 300UNKN104: 301UNKN105: 302UNKN106: 303UNKN107: 304UNKN108: 305CP_REG_WRITE: 306UNKN110: 307CP_BOOTSTRAP_UCODE: 308CP_WAIT_TWO_REGS: 309CP_TEST_TWO_MEMS: 310CP_REG_TO_MEM_OFFSET_REG: 311CP_REG_TO_MEM_OFFSET_MEM: 312UNKN118: 313UNKN119: 314CP_REG_WR_NO_CTXT: 315UNKN121: 316UNKN122: 317UNKN123: 318UNKN124: 319UNKN125: 320UNKN126: 321UNKN127: 322 waitin 323 mov $01, $data 324