• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file
26  *
27  * Validates the QPU instruction sequence after register allocation and
28  * scheduling.
29  */
30 
31 #include <assert.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include "v3d_compiler.h"
35 #include "qpu/qpu_disasm.h"
36 
37 struct v3d_qpu_validate_state {
38         struct v3d_compile *c;
39         const struct v3d_qpu_instr *last;
40         int ip;
41         int last_sfu_write;
42         int last_branch_ip;
43         int last_thrsw_ip;
44         int first_tlb_z_write;
45 
46         /* Set when we've found the last-THRSW signal, or if we were started
47          * in single-segment mode.
48          */
49         bool last_thrsw_found;
50 
51         /* Set when we've found the THRSW after the last THRSW */
52         bool thrend_found;
53 
54         int thrsw_count;
55 };
56 
57 static void
fail_instr(struct v3d_qpu_validate_state * state,const char * msg)58 fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
59 {
60         struct v3d_compile *c = state->c;
61 
62         fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
63 
64         int dump_ip = 0;
65         vir_for_each_inst_inorder(inst, c) {
66                 v3d_qpu_dump(c->devinfo, &inst->qpu);
67 
68                 if (dump_ip++ == state->ip)
69                         fprintf(stderr, " *** ERROR ***");
70 
71                 fprintf(stderr, "\n");
72         }
73 
74         fprintf(stderr, "\n");
75         abort();
76 }
77 
78 static bool
in_branch_delay_slots(struct v3d_qpu_validate_state * state)79 in_branch_delay_slots(struct v3d_qpu_validate_state *state)
80 {
81         return (state->ip - state->last_branch_ip) < 3;
82 }
83 
84 static bool
in_thrsw_delay_slots(struct v3d_qpu_validate_state * state)85 in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
86 {
87         return (state->ip - state->last_thrsw_ip) < 3;
88 }
89 
90 static bool
qpu_magic_waddr_matches(const struct v3d_qpu_instr * inst,bool (* predicate)(enum v3d_qpu_waddr waddr))91 qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
92                         bool (*predicate)(enum v3d_qpu_waddr waddr))
93 {
94         if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
95                 return false;
96 
97         if (inst->alu.add.op != V3D_QPU_A_NOP &&
98             inst->alu.add.magic_write &&
99             predicate(inst->alu.add.waddr))
100                 return true;
101 
102         if (inst->alu.mul.op != V3D_QPU_M_NOP &&
103             inst->alu.mul.magic_write &&
104             predicate(inst->alu.mul.waddr))
105                 return true;
106 
107         return false;
108 }
109 
110 static void
qpu_validate_inst(struct v3d_qpu_validate_state * state,struct qinst * qinst)111 qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
112 {
113         const struct v3d_device_info *devinfo = state->c->devinfo;
114 
115         if (qinst->is_tlb_z_write && state->ip < state->first_tlb_z_write)
116                 state->first_tlb_z_write = state->ip;
117 
118         const struct v3d_qpu_instr *inst = &qinst->qpu;
119 
120         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH &&
121             state->first_tlb_z_write >= 0 &&
122             state->ip > state->first_tlb_z_write &&
123             inst->branch.msfign != V3D_QPU_MSFIGN_NONE &&
124             inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS &&
125             inst->branch.cond != V3D_QPU_BRANCH_COND_A0 &&
126             inst->branch.cond != V3D_QPU_BRANCH_COND_NA0) {
127                 fail_instr(state, "Implicit branch MSF read after TLB Z write");
128         }
129 
130         if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
131                 return;
132 
133         if (inst->alu.add.op == V3D_QPU_A_SETMSF &&
134             state->first_tlb_z_write >= 0 &&
135             state->ip > state->first_tlb_z_write) {
136                 fail_instr(state, "SETMSF after TLB Z write");
137         }
138 
139         if (state->first_tlb_z_write >= 0 &&
140             state->ip > state->first_tlb_z_write &&
141             inst->alu.add.op == V3D_QPU_A_MSF) {
142                 fail_instr(state, "MSF read after TLB Z write");
143         }
144 
145         if (devinfo->ver < 71) {
146                 if (inst->sig.small_imm_a || inst->sig.small_imm_c ||
147                     inst->sig.small_imm_d) {
148                         fail_instr(state, "small imm a/c/d added after V3D 7.1");
149                 }
150         } else {
151                 if ((inst->sig.small_imm_a || inst->sig.small_imm_b) &&
152                     !vir_is_add(qinst)) {
153                         fail_instr(state, "small imm a/b used but no ADD inst");
154                 }
155                 if ((inst->sig.small_imm_c || inst->sig.small_imm_d) &&
156                     !vir_is_mul(qinst)) {
157                         fail_instr(state, "small imm c/d used but no MUL inst");
158                 }
159                 if (inst->sig.small_imm_a + inst->sig.small_imm_b +
160                     inst->sig.small_imm_c + inst->sig.small_imm_d > 1) {
161                         fail_instr(state, "only one small immediate can be "
162                                    "enabled per instruction");
163                 }
164         }
165 
166         /* LDVARY writes r5 two instructions later and LDUNIF writes
167          * r5 one instruction later, which is illegal to have
168          * together.
169          */
170         if (state->last && state->last->sig.ldvary &&
171             (inst->sig.ldunif || inst->sig.ldunifa)) {
172                 fail_instr(state, "LDUNIF after a LDVARY");
173         }
174 
175         /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)
176          *
177          * FIXME: This would not check correctly for V3D 4.2 versions lower
178          * than V3D 4.2.14, but that is not a real issue because the simulator
179          * will still catch this, and we are not really targeting any such
180          * versions anyway.
181          */
182         if (state->c->devinfo->ver < 42) {
183                 bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
184                                                           state->last->sig.ldunifrf));
185                 bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
186                                                            state->last->sig.ldunifarf));
187                 bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
188                 bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
189                 if ((last_reads_ldunif && reads_ldunifa) ||
190                     (last_reads_ldunifa && reads_ldunif)) {
191                         fail_instr(state,
192                                    "LDUNIF and LDUNIFA can't be next to each other");
193                 }
194         }
195 
196         int tmu_writes = 0;
197         int sfu_writes = 0;
198         int vpm_writes = 0;
199         int tlb_writes = 0;
200         int tsy_writes = 0;
201 
202         if (inst->alu.add.op != V3D_QPU_A_NOP) {
203                 if (inst->alu.add.magic_write) {
204                         if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
205                                                        inst->alu.add.waddr)) {
206                                 tmu_writes++;
207                         }
208                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
209                                 sfu_writes++;
210                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
211                                 vpm_writes++;
212                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
213                                 tlb_writes++;
214                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
215                                 tsy_writes++;
216                 }
217         }
218 
219         if (inst->alu.mul.op != V3D_QPU_M_NOP) {
220                 if (inst->alu.mul.magic_write) {
221                         if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
222                                                        inst->alu.mul.waddr)) {
223                                 tmu_writes++;
224                         }
225                         if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
226                                 sfu_writes++;
227                         if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
228                                 vpm_writes++;
229                         if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
230                                 tlb_writes++;
231                         if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
232                                 tsy_writes++;
233                 }
234         }
235 
236         if (in_thrsw_delay_slots(state)) {
237                 /* There's no way you want to start SFU during the THRSW delay
238                  * slots, since the result would land in the other thread.
239                  */
240                 if (sfu_writes) {
241                         fail_instr(state,
242                                    "SFU write started during THRSW delay slots ");
243                 }
244 
245                 if (inst->sig.ldvary) {
246                         if (devinfo->ver == 42)
247                                 fail_instr(state, "LDVARY during THRSW delay slots");
248                         if (devinfo->ver >= 71 &&
249                             state->ip - state->last_thrsw_ip == 2) {
250                                 fail_instr(state, "LDVARY in 2nd THRSW delay slot");
251                         }
252                 }
253         }
254 
255         (void)qpu_magic_waddr_matches; /* XXX */
256 
257         /* SFU r4 results come back two instructions later.  No doing
258          * r4 read/writes or other SFU lookups until it's done.
259          */
260         if (state->ip - state->last_sfu_write < 2) {
261                 if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
262                         fail_instr(state, "R4 read too soon after SFU");
263 
264                 if (v3d_qpu_writes_r4(devinfo, inst))
265                         fail_instr(state, "R4 write too soon after SFU");
266 
267                 if (sfu_writes)
268                         fail_instr(state, "SFU write too soon after SFU");
269         }
270 
271         /* XXX: The docs say VPM can happen with the others, but the simulator
272          * disagrees.
273          */
274         if (tmu_writes +
275             sfu_writes +
276             vpm_writes +
277             tlb_writes +
278             tsy_writes +
279             (devinfo->ver == 42 ? inst->sig.ldtmu : 0) +
280             inst->sig.ldtlb +
281             inst->sig.ldvpm +
282             inst->sig.ldtlbu > 1) {
283                 fail_instr(state,
284                            "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
285         }
286 
287         if (sfu_writes)
288                 state->last_sfu_write = state->ip;
289 
290         if (inst->sig.thrsw) {
291                 if (in_branch_delay_slots(state))
292                         fail_instr(state, "THRSW in a branch delay slot.");
293 
294                 if (state->last_thrsw_found)
295                         state->thrend_found = true;
296 
297                 if (state->last_thrsw_ip == state->ip - 1) {
298                         /* If it's the second THRSW in a row, then it's just a
299                          * last-thrsw signal.
300                          */
301                         if (state->last_thrsw_found)
302                                 fail_instr(state, "Two last-THRSW signals");
303                         state->last_thrsw_found = true;
304                 } else {
305                         if (in_thrsw_delay_slots(state)) {
306                                 fail_instr(state,
307                                            "THRSW too close to another THRSW.");
308                         }
309                         state->thrsw_count++;
310                         state->last_thrsw_ip = state->ip;
311                 }
312         }
313 
314         if (state->thrend_found &&
315             state->last_thrsw_ip - state->ip <= 2 &&
316             inst->type == V3D_QPU_INSTR_TYPE_ALU) {
317                 if ((inst->alu.add.op != V3D_QPU_A_NOP &&
318                      !inst->alu.add.magic_write)) {
319                         if (devinfo->ver == 42) {
320                                 fail_instr(state, "RF write after THREND");
321                         } else if (devinfo->ver >= 71) {
322                                 if (state->last_thrsw_ip - state->ip == 0) {
323                                         fail_instr(state,
324                                                    "ADD RF write at THREND");
325                                 }
326                                 if (inst->alu.add.waddr == 2 ||
327                                     inst->alu.add.waddr == 3) {
328                                         fail_instr(state,
329                                                    "RF2-3 write after THREND");
330                                 }
331                         }
332                 }
333 
334                 if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
335                      !inst->alu.mul.magic_write)) {
336                         if (devinfo->ver == 42) {
337                                 fail_instr(state, "RF write after THREND");
338                         } else if (devinfo->ver >= 71) {
339                                 if (state->last_thrsw_ip - state->ip == 0) {
340                                         fail_instr(state,
341                                                    "MUL RF write at THREND");
342                                 }
343 
344                                 if (inst->alu.mul.waddr == 2 ||
345                                     inst->alu.mul.waddr == 3) {
346                                         fail_instr(state,
347                                                    "RF2-3 write after THREND");
348                                 }
349                         }
350                 }
351 
352                 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
353                     !inst->sig_magic) {
354                         if (devinfo->ver == 42) {
355                                 fail_instr(state, "RF write after THREND");
356                         } else if (devinfo->ver >= 71 &&
357                                    (inst->sig_addr == 2 ||
358                                     inst->sig_addr == 3)) {
359                                 fail_instr(state, "RF2-3 write after THREND");
360                         }
361                 }
362 
363                 /* GFXH-1625: No TMUWT in the last instruction */
364                 if (state->last_thrsw_ip - state->ip == 2 &&
365                     inst->alu.add.op == V3D_QPU_A_TMUWT)
366                         fail_instr(state, "TMUWT in last instruction");
367         }
368 
369         if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
370                 if (in_branch_delay_slots(state))
371                         fail_instr(state, "branch in a branch delay slot.");
372                 if (in_thrsw_delay_slots(state))
373                         fail_instr(state, "branch in a THRSW delay slot.");
374                 state->last_branch_ip = state->ip;
375         }
376 }
377 
378 static void
qpu_validate_block(struct v3d_qpu_validate_state * state,struct qblock * block)379 qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
380 {
381         vir_for_each_inst(qinst, block) {
382                 qpu_validate_inst(state, qinst);
383 
384                 state->last = &qinst->qpu;
385                 state->ip++;
386         }
387 }
388 
389 /**
390  * Checks for the instruction restrictions from page 37 ("Summary of
391  * Instruction Restrictions").
392  */
393 void
qpu_validate(struct v3d_compile * c)394 qpu_validate(struct v3d_compile *c)
395 {
396         /* We don't want to do validation in release builds, but we want to
397          * keep compiling the validation code to make sure it doesn't get
398          * broken.
399          */
400 #ifndef DEBUG
401         return;
402 #endif
403 
404         struct v3d_qpu_validate_state state = {
405                 .c = c,
406                 .last_sfu_write = -10,
407                 .last_thrsw_ip = -10,
408                 .last_branch_ip = -10,
409                 .first_tlb_z_write = INT_MAX,
410                 .ip = 0,
411 
412                 .last_thrsw_found = !c->last_thrsw,
413         };
414 
415         vir_for_each_block(block, c) {
416                 qpu_validate_block(&state, block);
417         }
418 
419         if (state.thrsw_count > 1 && !state.last_thrsw_found) {
420                 fail_instr(&state,
421                            "thread switch found without last-THRSW in program");
422         }
423 
424         if (!state.thrend_found)
425                 fail_instr(&state, "No program-end THRSW found");
426 }
427