1 /*
2 * Copyright © 2014 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file
26 *
27 * Validates the QPU instruction sequence after register allocation and
28 * scheduling.
29 */
30
31 #include <assert.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "v3d_compiler.h"
35 #include "qpu/qpu_disasm.h"
36
37 struct v3d_qpu_validate_state {
38 struct v3d_compile *c;
39 const struct v3d_qpu_instr *last;
40 int ip;
41 int last_sfu_write;
42 int last_branch_ip;
43 int last_thrsw_ip;
44
45 /* Set when we've found the last-THRSW signal, or if we were started
46 * in single-segment mode.
47 */
48 bool last_thrsw_found;
49
50 /* Set when we've found the THRSW after the last THRSW */
51 bool thrend_found;
52
53 int thrsw_count;
54 };
55
56 static void
fail_instr(struct v3d_qpu_validate_state * state,const char * msg)57 fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
58 {
59 struct v3d_compile *c = state->c;
60
61 fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
62
63 int dump_ip = 0;
64 vir_for_each_inst_inorder(inst, c) {
65 v3d_qpu_dump(c->devinfo, &inst->qpu);
66
67 if (dump_ip++ == state->ip)
68 fprintf(stderr, " *** ERROR ***");
69
70 fprintf(stderr, "\n");
71 }
72
73 fprintf(stderr, "\n");
74 abort();
75 }
76
77 static bool
in_branch_delay_slots(struct v3d_qpu_validate_state * state)78 in_branch_delay_slots(struct v3d_qpu_validate_state *state)
79 {
80 return (state->ip - state->last_branch_ip) < 3;
81 }
82
83 static bool
in_thrsw_delay_slots(struct v3d_qpu_validate_state * state)84 in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
85 {
86 return (state->ip - state->last_thrsw_ip) < 3;
87 }
88
89 static bool
qpu_magic_waddr_matches(const struct v3d_qpu_instr * inst,bool (* predicate)(enum v3d_qpu_waddr waddr))90 qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
91 bool (*predicate)(enum v3d_qpu_waddr waddr))
92 {
93 if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
94 return false;
95
96 if (inst->alu.add.op != V3D_QPU_A_NOP &&
97 inst->alu.add.magic_write &&
98 predicate(inst->alu.add.waddr))
99 return true;
100
101 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
102 inst->alu.mul.magic_write &&
103 predicate(inst->alu.mul.waddr))
104 return true;
105
106 return false;
107 }
108
109 static void
qpu_validate_inst(struct v3d_qpu_validate_state * state,struct qinst * qinst)110 qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
111 {
112 const struct v3d_device_info *devinfo = state->c->devinfo;
113 const struct v3d_qpu_instr *inst = &qinst->qpu;
114
115 if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
116 return;
117
118 /* LDVARY writes r5 two instructions later and LDUNIF writes
119 * r5 one instruction later, which is illegal to have
120 * together.
121 */
122 if (state->last && state->last->sig.ldvary &&
123 (inst->sig.ldunif || inst->sig.ldunifa)) {
124 fail_instr(state, "LDUNIF after a LDVARY");
125 }
126
127 /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)
128 *
129 * FIXME: This would not check correctly for V3D 4.2 versions lower
130 * than V3D 4.2.14, but that is not a real issue because the simulator
131 * will still catch this, and we are not really targetting any such
132 * versions anyway.
133 */
134 if (state->c->devinfo->ver < 42) {
135 bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
136 state->last->sig.ldunifrf));
137 bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
138 state->last->sig.ldunifarf));
139 bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
140 bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
141 if ((last_reads_ldunif && reads_ldunifa) ||
142 (last_reads_ldunifa && reads_ldunif)) {
143 fail_instr(state,
144 "LDUNIF and LDUNIFA can't be next to each other");
145 }
146 }
147
148 int tmu_writes = 0;
149 int sfu_writes = 0;
150 int vpm_writes = 0;
151 int tlb_writes = 0;
152 int tsy_writes = 0;
153
154 if (inst->alu.add.op != V3D_QPU_A_NOP) {
155 if (inst->alu.add.magic_write) {
156 if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
157 inst->alu.add.waddr)) {
158 tmu_writes++;
159 }
160 if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
161 sfu_writes++;
162 if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
163 vpm_writes++;
164 if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
165 tlb_writes++;
166 if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
167 tsy_writes++;
168 }
169 }
170
171 if (inst->alu.mul.op != V3D_QPU_M_NOP) {
172 if (inst->alu.mul.magic_write) {
173 if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
174 inst->alu.mul.waddr)) {
175 tmu_writes++;
176 }
177 if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
178 sfu_writes++;
179 if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
180 vpm_writes++;
181 if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
182 tlb_writes++;
183 if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
184 tsy_writes++;
185 }
186 }
187
188 if (in_thrsw_delay_slots(state)) {
189 /* There's no way you want to start SFU during the THRSW delay
190 * slots, since the result would land in the other thread.
191 */
192 if (sfu_writes) {
193 fail_instr(state,
194 "SFU write started during THRSW delay slots ");
195 }
196
197 if (inst->sig.ldvary)
198 fail_instr(state, "LDVARY during THRSW delay slots");
199 }
200
201 (void)qpu_magic_waddr_matches; /* XXX */
202
203 /* SFU r4 results come back two instructions later. No doing
204 * r4 read/writes or other SFU lookups until it's done.
205 */
206 if (state->ip - state->last_sfu_write < 2) {
207 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
208 fail_instr(state, "R4 read too soon after SFU");
209
210 if (v3d_qpu_writes_r4(devinfo, inst))
211 fail_instr(state, "R4 write too soon after SFU");
212
213 if (sfu_writes)
214 fail_instr(state, "SFU write too soon after SFU");
215 }
216
217 /* XXX: The docs say VPM can happen with the others, but the simulator
218 * disagrees.
219 */
220 if (tmu_writes +
221 sfu_writes +
222 vpm_writes +
223 tlb_writes +
224 tsy_writes +
225 inst->sig.ldtmu +
226 inst->sig.ldtlb +
227 inst->sig.ldvpm +
228 inst->sig.ldtlbu > 1) {
229 fail_instr(state,
230 "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
231 }
232
233 if (sfu_writes)
234 state->last_sfu_write = state->ip;
235
236 if (inst->sig.thrsw) {
237 if (in_branch_delay_slots(state))
238 fail_instr(state, "THRSW in a branch delay slot.");
239
240 if (state->last_thrsw_found)
241 state->thrend_found = true;
242
243 if (state->last_thrsw_ip == state->ip - 1) {
244 /* If it's the second THRSW in a row, then it's just a
245 * last-thrsw signal.
246 */
247 if (state->last_thrsw_found)
248 fail_instr(state, "Two last-THRSW signals");
249 state->last_thrsw_found = true;
250 } else {
251 if (in_thrsw_delay_slots(state)) {
252 fail_instr(state,
253 "THRSW too close to another THRSW.");
254 }
255 state->thrsw_count++;
256 state->last_thrsw_ip = state->ip;
257 }
258 }
259
260 if (state->thrend_found &&
261 state->last_thrsw_ip - state->ip <= 2 &&
262 inst->type == V3D_QPU_INSTR_TYPE_ALU) {
263 if ((inst->alu.add.op != V3D_QPU_A_NOP &&
264 !inst->alu.add.magic_write)) {
265 fail_instr(state, "RF write after THREND");
266 }
267
268 if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
269 !inst->alu.mul.magic_write)) {
270 fail_instr(state, "RF write after THREND");
271 }
272
273 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
274 !inst->sig_magic) {
275 fail_instr(state, "RF write after THREND");
276 }
277
278 /* GFXH-1625: No TMUWT in the last instruction */
279 if (state->last_thrsw_ip - state->ip == 2 &&
280 inst->alu.add.op == V3D_QPU_A_TMUWT)
281 fail_instr(state, "TMUWT in last instruction");
282 }
283
284 if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
285 if (in_branch_delay_slots(state))
286 fail_instr(state, "branch in a branch delay slot.");
287 if (in_thrsw_delay_slots(state))
288 fail_instr(state, "branch in a THRSW delay slot.");
289 state->last_branch_ip = state->ip;
290 }
291 }
292
293 static void
qpu_validate_block(struct v3d_qpu_validate_state * state,struct qblock * block)294 qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
295 {
296 vir_for_each_inst(qinst, block) {
297 qpu_validate_inst(state, qinst);
298
299 state->last = &qinst->qpu;
300 state->ip++;
301 }
302 }
303
304 /**
305 * Checks for the instruction restrictions from page 37 ("Summary of
306 * Instruction Restrictions").
307 */
308 void
qpu_validate(struct v3d_compile * c)309 qpu_validate(struct v3d_compile *c)
310 {
311 /* We don't want to do validation in release builds, but we want to
312 * keep compiling the validation code to make sure it doesn't get
313 * broken.
314 */
315 #ifndef DEBUG
316 return;
317 #endif
318
319 struct v3d_qpu_validate_state state = {
320 .c = c,
321 .last_sfu_write = -10,
322 .last_thrsw_ip = -10,
323 .last_branch_ip = -10,
324 .ip = 0,
325
326 .last_thrsw_found = !c->last_thrsw,
327 };
328
329 vir_for_each_block(block, c) {
330 qpu_validate_block(&state, block);
331 }
332
333 if (state.thrsw_count > 1 && !state.last_thrsw_found) {
334 fail_instr(&state,
335 "thread switch found without last-THRSW in program");
336 }
337
338 if (!state.thrend_found)
339 fail_instr(&state, "No program-end THRSW found");
340 }
341