• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef _AFUC_H_
25 #define _AFUC_H_
26 
27 /*
28 TODO kernel debugfs to inject packet into rb for easier experimentation.  It
29 should trigger reloading pfp/me and resetting gpu..
30 
31 Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs,
32 should be restricted to CAP_ADMIN and probably compile option too (default=n).
33 if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from
34 RB.
35  */
36 
37 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
38 #define PACKED __attribute__((__packed__))
39 
40 /* The opcode is encoded variable length.  Opcodes less than 0x30
41  * are encoded as 5 bits followed by (rep) flag.  Opcodes >= 0x30
42  * (ie. top two bits are '11' are encoded as 6 bits.  See get_opc()
43  */
44 typedef enum {
45 	OPC_NOP    = 0x00,
46 
47 	OPC_ADD    = 0x01,  /* add immediate */
48 	OPC_ADDHI  = 0x02,  /* add immediate (hi 32b of 64b) */
49 	OPC_SUB    = 0x03,  /* subtract immediate */
50 	OPC_SUBHI  = 0x04,  /* subtract immediate (hi 32b of 64b) */
51 	OPC_AND    = 0x05,  /* AND immediate */
52 	OPC_OR     = 0x06,  /* OR immediate */
53 	OPC_XOR    = 0x07,  /* XOR immediate */
54 	OPC_NOT    = 0x08,  /* bitwise not of immed (src1 ignored) */
55 	OPC_SHL    = 0x09,  /* shift-left immediate */
56 	OPC_USHR   = 0x0a,  /* unsigned shift right by immediate */
57 	OPC_ISHR   = 0x0b,  /* signed shift right by immediate */
58 	OPC_ROT    = 0x0c,  /* rotate left (left shift with wrap-around) */
59 	OPC_MUL8   = 0x0d,  /* 8bit multiply by immediate */
60 	OPC_MIN    = 0x0e,
61 	OPC_MAX    = 0x0f,
62 	OPC_CMP    = 0x10,  /* compare src to immed */
63 	OPC_MOVI   = 0x11,  /* move immediate */
64 
65 	/* Return the most-significant bit of src2, or 0 if src2 == 0 (the
66 	 * same as if src2 == 1). src1 is ignored. Note that this overlaps
67 	 * with STORE6, so it can only be used with the two-source encoding.
68 	 */
69 	OPC_MSB    = 0x14,
70 
71 
72 	OPC_ALU    = 0x13,  /* ALU instruction with two src registers */
73 
74 	/* These seem something to do with setting some external state..
75 	 * doesn't seem to map *directly* to registers, but I guess that
76 	 * is where things end up.  For example, this sequence in the
77 	 * CP_INDIRECT_BUFFER handler:
78 	 *
79 	 *     mov $02, $data   ; low 32b of IB target address
80 	 *     mov $03, $data   ; high 32b of IB target
81 	 *     mov $04, $data   ; IB size in dwords
82 	 *     breq $04, 0x0, #l23 (#69, 04a2)
83 	 *     and $05, $18, 0x0003
84 	 *     shl $05, $05, 0x0002
85 	 *     cwrite $02, [$05 + 0x0b0], 0x8
86 	 *     cwrite $03, [$05 + 0x0b1], 0x8
87 	 *     cwrite $04, [$05 + 0x0b2], 0x8
88 	 *
89 	 * Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and
90 	 * 0x0b22->0x0b24 (IB2).  Presumably $05 ends up w/ different value
91 	 * for RB->IB1 vs IB1->IB2.
92 	 */
93 	OPC_CWRITE5 = 0x15,
94 	OPC_CREAD5  = 0x16,
95 
96 	/* A6xx shuffled around the cwrite/cread opcodes and added new opcodes
97 	 * that let you read/write directly to memory (and bypass the IOMMU?).
98 	 */
99 	OPC_STORE6  = 0x14,
100 	OPC_CWRITE6 = 0x15,
101 	OPC_LOAD6   = 0x16,
102 	OPC_CREAD6  = 0x17,
103 
104 	OPC_BRNEI  = 0x30,  /* relative branch (if $src != immed) */
105 	OPC_BREQI  = 0x31,  /* relative branch (if $src == immed) */
106 	OPC_BRNEB  = 0x32,  /* relative branch (if bit not set) */
107 	OPC_BREQB  = 0x33,  /* relative branch (if bit is set) */
108 	OPC_RET    = 0x34,  /* return */
109 	OPC_CALL   = 0x35,  /* "function" call */
110 	OPC_WIN    = 0x36,  /* wait for input (ie. wait for WPTR to advance) */
111 	OPC_PREEMPTLEAVE6 = 0x38,  /* try to leave preemption */
112 	OPC_SETSECURE = 0x3b, /* switch secure mode on/off */
113 } afuc_opc;
114 
115 
116 typedef union PACKED {
117 	/* addi, subi, andi, ori, xori, etc: */
118 	struct PACKED {
119 		uint32_t uimm    : 16;
120 		uint32_t dst     : 5;
121 		uint32_t src     : 5;
122 		uint32_t hdr     : 6;
123 	} alui;
124 	struct PACKED {
125 		uint32_t uimm    : 16;
126 		uint32_t dst     : 5;
127 		uint32_t shift   : 5;
128 		uint32_t hdr     : 6;
129 	} movi;
130 	struct PACKED {
131 		uint32_t alu     : 5;
132 		uint32_t pad     : 4;
133 		uint32_t xmov    : 2; /* execute eXtra mov's based on $rem */
134 		uint32_t dst     : 5;
135 		uint32_t src2    : 5;
136 		uint32_t src1    : 5;
137 		uint32_t hdr     : 6;
138 	} alu;
139 	struct PACKED {
140 		uint32_t uimm    : 12;
141 		uint32_t flags   : 4;
142 		uint32_t src1    : 5;     /* dst (cread) or src (cwrite) register */
143 		uint32_t src2    : 5;     /* read or write address is src2+uimm */
144 		uint32_t hdr     : 6;
145 	} control;
146 	struct PACKED {
147 		int32_t  ioff    : 16;    /* relative offset */
148 		uint32_t bit_or_imm : 5;
149 		uint32_t src     : 5;
150 		uint32_t hdr     : 6;
151 	} br;
152 	struct PACKED {
153 		uint32_t uoff    : 26;    /* absolute (unsigned) offset */
154 		uint32_t hdr     : 6;
155 	} call;
156 	struct PACKED {
157 		uint32_t pad       : 25;
158 		uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */
159 		uint32_t hdr       : 6;
160 	} ret;
161 	struct PACKED {
162 		uint32_t pad     : 26;
163 		uint32_t hdr     : 6;
164 	} waitin;
165 	struct PACKED {
166 		uint32_t pad     : 26;
167 		uint32_t opc_r   : 6;
168 	};
169 
170 } afuc_instr;
171 
172 static inline void
afuc_get_opc(afuc_instr * ai,afuc_opc * opc,bool * rep)173 afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)
174 {
175 	if (ai->opc_r < 0x30) {
176 		*opc = ai->opc_r >> 1;
177 		*rep = ai->opc_r & 0x1;
178 	} else {
179 		*opc = ai->opc_r;
180 		*rep = false;
181 	}
182 }
183 
184 static inline void
afuc_set_opc(afuc_instr * ai,afuc_opc opc,bool rep)185 afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)
186 {
187 	if (opc < 0x30) {
188 		ai->opc_r = opc << 1;
189 		ai->opc_r |= !!rep;
190 	} else {
191 		ai->opc_r = opc;
192 	}
193 }
194 
195 #endif /* _AFUC_H_ */
196