1 /*
2 * Copyright © 2020 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdio.h>
25
26 #include "ir3.h"
27 #include "ir3_assembler.h"
28 #include "ir3_shader.h"
29
30 /*
31 * A test for delay-slot calculation. Each test specifies ir3 assembly
32 * for one or more instructions and the last instruction that consumes
33 * the previously produced values. And the expected number of delay
34 * slots that would be needed before that last instruction. Any source
35 * registers in the last instruction which are not written in a previous
36 * instruction are not counted.
37 */
38
39 #define TEST(n, ...) { # __VA_ARGS__, n }
40
41 static const struct test {
42 const char *asmstr;
43 unsigned expected_delay;
44 } tests[] = {
45 TEST(6,
46 add.f r0.x, r2.x, r2.y
47 rsq r0.x, r0.x
48 ),
49 TEST(3,
50 mov.f32f32 r0.x, c0.x
51 mov.f32f32 r0.y, c0.y
52 add.f r0.x, r0.x, r0.y
53 ),
54 TEST(2,
55 mov.f32f32 r0.x, c0.x
56 mov.f32f32 r0.y, c0.y
57 mov.f32f32 r0.z, c0.z
58 mad.f32 r0.x, r0.x, r0.y, r0.z
59 ),
60 TEST(2,
61 mov.f32f32 r0.x, c0.x
62 mov.f32f32 r0.y, c0.y
63 (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
64 ),
65 TEST(2,
66 (rpt1)mov.f32f32 r0.x, c0.x
67 (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
68 ),
69 TEST(3,
70 mov.f32f32 r0.y, c0.y
71 mov.f32f32 r0.x, c0.x
72 (rpt1)add.f r0.x, (r)r0.x, (r)c0.x
73 ),
74 TEST(1,
75 (rpt2)mov.f32f32 r0.x, (r)c0.x
76 add.f r0.x, r0.x, c0.x
77 ),
78 TEST(2,
79 (rpt2)mov.f32f32 r0.x, (r)c0.x
80 add.f r0.x, r0.x, r0.y
81 ),
82 TEST(1,
83 (rpt2)mov.f32f32 r0.x, (r)c0.x
84 (rpt2)add.f r0.x, (r)r0.x, c0.x
85 ),
86 };
87
88 static struct ir3_shader *
parse_asm(struct ir3_compiler * c,const char * asmstr)89 parse_asm(struct ir3_compiler *c, const char *asmstr)
90 {
91 struct ir3_kernel_info info = {};
92 FILE *in = fmemopen((void *)asmstr, strlen(asmstr), "r");
93 struct ir3_shader *shader = ir3_parse_asm(c, &info, in);
94
95 fclose(in);
96
97 return shader;
98 }
99
100 static unsigned
regn(struct ir3_register * reg)101 regn(struct ir3_register *reg)
102 {
103 unsigned regn = reg->num;
104 if (reg->flags & IR3_REG_HALF)
105 regn += MAX_REG;
106 return regn;
107 }
108
109 /**
110 * Super-cheezy into-ssa pass, doesn't handle flow control or anything
111 * hard. Just enough to figure out the SSA srcs of the last instruction.
112 *
113 * Note that this is not clever enough to know how many src/dst there are
114 * for various tex/mem instructions. But the rules for tex consuming alu
115 * are the same as sfu consuming alu.
116 */
117 static void
regs_to_ssa(struct ir3 * ir)118 regs_to_ssa(struct ir3 *ir)
119 {
120 struct ir3_instruction *regfile[2 * MAX_REG] = {};
121 struct ir3_block *block =
122 list_first_entry(&ir->block_list, struct ir3_block, node);
123
124 foreach_instr_safe (instr, &block->instr_list) {
125 foreach_src (reg, instr) {
126 if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
127 continue;
128
129 struct ir3_instruction *src = regfile[regn(reg)];
130
131 if (!src)
132 continue;
133
134 if (reg->flags & IR3_REG_R) {
135 unsigned nsrc = 1 + instr->repeat;
136 unsigned flags = src->regs[0]->flags & IR3_REG_HALF;
137 struct ir3_instruction *collect =
138 ir3_instr_create2(block, OPC_META_COLLECT, 1 + nsrc);
139 __ssa_dst(collect)->flags |= flags;
140 for (unsigned i = 0; i < nsrc; i++)
141 __ssa_src(collect, regfile[regn(reg) + i], flags);
142
143 ir3_instr_move_before(collect, instr);
144
145 src = collect;
146 }
147
148 reg->instr = src;
149 reg->flags |= IR3_REG_SSA;
150 }
151
152 if (instr->repeat) {
153 unsigned ndst = 1 + instr->repeat;
154 unsigned flags = instr->regs[0]->flags & IR3_REG_HALF;
155
156 for (unsigned i = 0; i < ndst; i++) {
157 struct ir3_instruction *split =
158 ir3_instr_create(block, OPC_META_SPLIT);
159 __ssa_dst(split)->flags |= flags;
160 __ssa_src(split, instr, flags);
161 split->split.off = i;
162
163 ir3_instr_move_after(split, instr);
164
165 regfile[regn(instr->regs[0]) + i] = split;
166 }
167 } else {
168 regfile[regn(instr->regs[0])] = instr;
169 }
170 }
171 }
172
173
174 int
main(int argc,char ** argv)175 main(int argc, char **argv)
176 {
177 struct ir3_compiler *c;
178 int result = 0;
179
180 c = ir3_compiler_create(NULL, 630);
181
182 for (int i = 0; i < ARRAY_SIZE(tests); i++) {
183 const struct test *test = &tests[i];
184 struct ir3_shader *shader = parse_asm(c, test->asmstr);
185 struct ir3 *ir = shader->variants->ir;
186
187 regs_to_ssa(ir);
188
189 ir3_debug_print(ir, "AFTER REGS->SSA");
190
191 struct ir3_block *block =
192 list_first_entry(&ir->block_list, struct ir3_block, node);
193 struct ir3_instruction *last = NULL;
194
195 foreach_instr_rev (instr, &block->instr_list) {
196 if (is_meta(instr))
197 continue;
198 last = instr;
199 break;
200 }
201
202 /* The delay calc is expecting the instr to not yet be added to the
203 * block, so remove it from the block so that it doesn't get counted
204 * in the distance from assigner:
205 */
206 list_delinit(&last->node);
207
208 unsigned n = ir3_delay_calc(block, last, false, false);
209
210 if (n != test->expected_delay) {
211 printf("%d: FAIL: Expected delay %u, but got %u, for:\n%s\n",
212 i, test->expected_delay, n, test->asmstr);
213 result = -1;
214 } else {
215 printf("%d: PASS\n", i);
216 }
217
218 ir3_shader_destroy(shader);
219 }
220
221 return result;
222 }
223