• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef _AFUC_H_
25 #define _AFUC_H_
26 
27 #include <stdbool.h>
28 
29 #include "util/macros.h"
30 
31 /*
32 TODO kernel debugfs to inject packet into rb for easier experimentation.  It
33 should trigger reloading pfp/me and resetting gpu..
34 
35 Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs,
36 should be restricted to CAP_ADMIN and probably compile option too (default=n).
37 if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from
38 RB.
39  */
40 
41 /* The opcode is encoded variable length.  Opcodes less than 0x30
42  * are encoded as 5 bits followed by (rep) flag.  Opcodes >= 0x30
43  * (ie. top two bits are '11' are encoded as 6 bits.  See get_opc()
44  */
45 typedef enum {
46    OPC_NOP = 0x00,
47 
48    OPC_ADD = 0x01,   /* add immediate */
49    OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */
50    OPC_SUB = 0x03,   /* subtract immediate */
51    OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */
52    OPC_AND = 0x05,   /* AND immediate */
53    OPC_OR = 0x06,    /* OR immediate */
54    OPC_XOR = 0x07,   /* XOR immediate */
55    OPC_NOT = 0x08,   /* bitwise not of immed (src1 ignored) */
56    OPC_SHL = 0x09,   /* shift-left immediate */
57    OPC_USHR = 0x0a,  /* unsigned shift right by immediate */
58    OPC_ISHR = 0x0b,  /* signed shift right by immediate */
59    OPC_ROT = 0x0c,   /* rotate left (left shift with wrap-around) */
60    OPC_MUL8 = 0x0d,  /* 8bit multiply by immediate */
61    OPC_MIN = 0x0e,
62    OPC_MAX = 0x0f,
63    OPC_CMP = 0x10,  /* compare src to immed */
64    OPC_MOVI = 0x11, /* move immediate */
65 
66    /* Return the most-significant bit of src2, or 0 if src2 == 0 (the
67     * same as if src2 == 1). src1 is ignored. Note that this overlaps
68     * with STORE6, so it can only be used with the two-source encoding.
69     */
70    OPC_MSB = 0x14,
71 
72    OPC_ALU = 0x13, /* ALU instruction with two src registers */
73 
74    /* These seem something to do with setting some external state..
75     * doesn't seem to map *directly* to registers, but I guess that
76     * is where things end up.  For example, this sequence in the
77     * CP_INDIRECT_BUFFER handler:
78     *
79     *     mov $02, $data   ; low 32b of IB target address
80     *     mov $03, $data   ; high 32b of IB target
81     *     mov $04, $data   ; IB size in dwords
82     *     breq $04, 0x0, #l23 (#69, 04a2)
83     *     and $05, $18, 0x0003
84     *     shl $05, $05, 0x0002
85     *     cwrite $02, [$05 + 0x0b0], 0x8
86     *     cwrite $03, [$05 + 0x0b1], 0x8
87     *     cwrite $04, [$05 + 0x0b2], 0x8
88     *
89     * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
90     * 0x0b22->0x0b24 (IB2).  Presumably $05 ends up w/ different value
91     * for RB->IB1 vs IB1->IB2.
92     */
93    OPC_CWRITE5 = 0x15,
94    OPC_CREAD5 = 0x16,
95 
96    /* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
97     * that let you read/write directly to memory (and bypass the IOMMU?).
98     */
99    OPC_STORE6 = 0x14,
100    OPC_CWRITE6 = 0x15,
101    OPC_LOAD6 = 0x16,
102    OPC_CREAD6 = 0x17,
103 
104    OPC_BRNEI = 0x30,         /* relative branch (if $src != immed) */
105    OPC_BREQI = 0x31,         /* relative branch (if $src == immed) */
106    OPC_BRNEB = 0x32,         /* relative branch (if bit not set) */
107    OPC_BREQB = 0x33,         /* relative branch (if bit is set) */
108    OPC_RET = 0x34,           /* return */
109    OPC_CALL = 0x35,          /* "function" call */
110    OPC_WIN = 0x36,           /* wait for input (ie. wait for WPTR to advance) */
111    OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */
112    OPC_SETSECURE = 0x3b,     /* switch secure mode on/off */
113 } afuc_opc;
114 
115 /**
116  * Special GPR registers:
117  *
118  * Notes:  (applicable to a6xx, double check a5xx)
119  *
120  *   0x1d:
121  *      $addr:    writes configure GPU reg address to read/write
122  *                (does not respect CP_PROTECT)
123  *      $memdata: reads from FIFO filled based on MEM_READ_DWORDS/
124  *                MEM_READ_ADDR
125  *   0x1e: (note different mnemonic for src vs dst)
126  *      $usraddr: writes configure GPU reg address to read/write,
127  *                respecting CP_PROTECT
128  *      $regdata: reads from FIFO filled based on REG_READ_DWORDS/
129  *                REG_READ_ADDR
130  *   0x1f:
131  *      $data:    reads from from pm4 input stream
132  *      $data:    writes to stream configured by write to $addr
133  *                or $usraddr
134  */
135 typedef enum {
136    REG_REM     = 0x1c,
137    REG_MEMDATA = 0x1d,  /* when used as src */
138    REG_ADDR    = 0x1d,  /* when used as dst */
139    REG_REGDATA = 0x1e,  /* when used as src */
140    REG_USRADDR = 0x1e,  /* when used as dst */
141    REG_DATA    = 0x1f,
142 } afuc_reg;
143 
144 typedef union PACKED {
145    /* addi, subi, andi, ori, xori, etc: */
146    struct PACKED {
147       uint32_t uimm : 16;
148       uint32_t dst : 5;
149       uint32_t src : 5;
150       uint32_t hdr : 6;
151    } alui;
152    struct PACKED {
153       uint32_t uimm : 16;
154       uint32_t dst : 5;
155       uint32_t shift : 5;
156       uint32_t hdr : 6;
157    } movi;
158    struct PACKED {
159       uint32_t alu : 5;
160       uint32_t pad : 4;
161       uint32_t xmov : 2; /* execute eXtra mov's based on $rem */
162       uint32_t dst : 5;
163       uint32_t src2 : 5;
164       uint32_t src1 : 5;
165       uint32_t hdr : 6;
166    } alu;
167    struct PACKED {
168       uint32_t uimm : 12;
169       /* TODO this needs to be confirmed:
170        *
171        * flags:
172        *   0x4 - post-increment src2 by uimm (need to confirm this is also
173        *         true for load/cread).  TBD whether, when used in conjunction
174        *         with @LOAD_STORE_HI, 32b rollover works properly.
175        *
176        * other values tbd, also need to confirm if different bits can be
177        * set together (I don't see examples of this in existing fw)
178        */
179       uint32_t flags : 4;
180       uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */
181       uint32_t src2 : 5; /* read or write address is src2+uimm */
182       uint32_t hdr : 6;
183    } control;
184    struct PACKED {
185       int32_t ioff : 16; /* relative offset */
186       uint32_t bit_or_imm : 5;
187       uint32_t src : 5;
188       uint32_t hdr : 6;
189    } br;
190    struct PACKED {
191       uint32_t uoff : 26; /* absolute (unsigned) offset */
192       uint32_t hdr : 6;
193    } call;
194    struct PACKED {
195       uint32_t pad : 25;
196       uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */
197       uint32_t hdr : 6;
198    } ret;
199    struct PACKED {
200       uint32_t pad : 26;
201       uint32_t hdr : 6;
202    } waitin;
203    struct PACKED {
204       uint32_t pad : 26;
205       uint32_t opc_r : 6;
206    };
207 
208 } afuc_instr;
209 
210 static inline void
afuc_get_opc(afuc_instr * ai,afuc_opc * opc,bool * rep)211 afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)
212 {
213    if (ai->opc_r < 0x30) {
214       *opc = ai->opc_r >> 1;
215       *rep = ai->opc_r & 0x1;
216    } else {
217       *opc = ai->opc_r;
218       *rep = false;
219    }
220 }
221 
222 static inline void
afuc_set_opc(afuc_instr * ai,afuc_opc opc,bool rep)223 afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)
224 {
225    if (opc < 0x30) {
226       ai->opc_r = opc << 1;
227       ai->opc_r |= !!rep;
228    } else {
229       ai->opc_r = opc;
230    }
231 }
232 
233 void print_src(unsigned reg);
234 void print_dst(unsigned reg);
235 void print_control_reg(uint32_t id);
236 void print_pipe_reg(uint32_t id);
237 
238 #endif /* _AFUC_H_ */
239