1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "aco_ir.h"
26
27 #include "util/memstream.h"
28 #include "util/ralloc.h"
29
30 #include <array>
31 #include <map>
32 #include <set>
33 #include <vector>
34
35 namespace aco {
36
37 static void
aco_log(Program * program,enum aco_compiler_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)38 aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, const char* file,
39 unsigned line, const char* fmt, va_list args)
40 {
41 char* msg;
42
43 if (program->debug.shorten_messages) {
44 msg = ralloc_vasprintf(NULL, fmt, args);
45 } else {
46 msg = ralloc_strdup(NULL, prefix);
47 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
48 ralloc_asprintf_append(&msg, " ");
49 ralloc_vasprintf_append(&msg, fmt, args);
50 }
51
52 if (program->debug.func)
53 program->debug.func(program->debug.private_data, level, msg);
54
55 fprintf(program->debug.output, "%s\n", msg);
56
57 ralloc_free(msg);
58 }
59
60 void
_aco_perfwarn(Program * program,const char * file,unsigned line,const char * fmt,...)61 _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...)
62 {
63 va_list args;
64
65 va_start(args, fmt);
66 aco_log(program, ACO_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args);
67 va_end(args);
68 }
69
70 void
_aco_err(Program * program,const char * file,unsigned line,const char * fmt,...)71 _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)
72 {
73 va_list args;
74
75 va_start(args, fmt);
76 aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);
77 va_end(args);
78 }
79
80 bool
validate_ir(Program * program)81 validate_ir(Program* program)
82 {
83 bool is_valid = true;
84 auto check = [&program, &is_valid](bool success, const char* msg,
85 aco::Instruction* instr) -> void
86 {
87 if (!success) {
88 char* out;
89 size_t outsize;
90 struct u_memstream mem;
91 u_memstream_open(&mem, &out, &outsize);
92 FILE* const memf = u_memstream_get(&mem);
93
94 fprintf(memf, "%s: ", msg);
95 aco_print_instr(program->gfx_level, instr, memf);
96 u_memstream_close(&mem);
97
98 aco_err(program, "%s", out);
99 free(out);
100
101 is_valid = false;
102 }
103 };
104
105 for (Block& block : program->blocks) {
106 for (aco_ptr<Instruction>& instr : block.instructions) {
107
108 unsigned pck_defs = instr_info.definitions[(int)instr->opcode];
109 unsigned pck_ops = instr_info.operands[(int)instr->opcode];
110
111 if (pck_defs != 0) {
112 /* Before GFX10 v_cmpx also writes VCC. */
113 if (instr->isVOPC() && program->gfx_level < GFX10 && pck_defs == exec_hi)
114 pck_defs = vcc | (exec_hi << 8);
115
116 for (unsigned i = 0; i < 4; i++) {
117 uint32_t def = (pck_defs >> (i * 8)) & 0xff;
118 if (def == 0) {
119 check(i == instr->definitions.size(), "Too many definitions", instr.get());
120 break;
121 } else {
122 check(i < instr->definitions.size(), "Too few definitions", instr.get());
123 if (i >= instr->definitions.size())
124 break;
125 }
126
127 if (def == m0) {
128 check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == m0,
129 "Definition needs m0", instr.get());
130 } else if (def == scc) {
131 check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == scc,
132 "Definition needs scc", instr.get());
133 } else if (def == exec_hi) {
134 RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
135 check(instr->definitions[i].isFixed() &&
136 instr->definitions[i].physReg() == exec &&
137 instr->definitions[i].regClass() == rc,
138 "Definition needs exec", instr.get());
139 } else if (def == exec_lo) {
140 check(instr->definitions[i].isFixed() &&
141 instr->definitions[i].physReg() == exec_lo &&
142 instr->definitions[i].regClass() == s1,
143 "Definition needs exec_lo", instr.get());
144 } else if (def == vcc) {
145 check(instr->definitions[i].regClass() == program->lane_mask,
146 "Definition has to be lane mask", instr.get());
147 check(!instr->definitions[i].isFixed() ||
148 instr->definitions[i].physReg() == vcc || instr->isVOP3() ||
149 instr->isSDWA(),
150 "Definition has to be vcc", instr.get());
151 } else {
152 check(instr->definitions[i].size() == def, "Definition has wrong size",
153 instr.get());
154 }
155 }
156 }
157
158 if (pck_ops != 0) {
159 for (unsigned i = 0; i < 4; i++) {
160 uint32_t op = (pck_ops >> (i * 8)) & 0xff;
161 if (op == 0) {
162 check(i == instr->operands.size(), "Too many operands", instr.get());
163 break;
164 } else {
165 check(i < instr->operands.size(), "Too few operands", instr.get());
166 if (i >= instr->operands.size())
167 break;
168 }
169
170 if (op == m0) {
171 check(instr->operands[i].isFixed() && instr->operands[i].physReg() == m0,
172 "Operand needs m0", instr.get());
173 } else if (op == scc) {
174 check(instr->operands[i].isFixed() && instr->operands[i].physReg() == scc,
175 "Operand needs scc", instr.get());
176 } else if (op == exec_hi) {
177 RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
178 check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec &&
179 instr->operands[i].hasRegClass() && instr->operands[i].regClass() == rc,
180 "Operand needs exec", instr.get());
181 } else if (op == exec_lo) {
182 check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec_lo &&
183 instr->operands[i].hasRegClass() && instr->operands[i].regClass() == s1,
184 "Operand needs exec_lo", instr.get());
185 } else if (op == vcc) {
186 check(instr->operands[i].hasRegClass() &&
187 instr->operands[i].regClass() == program->lane_mask,
188 "Operand has to be lane mask", instr.get());
189 check(!instr->operands[i].isFixed() || instr->operands[i].physReg() == vcc ||
190 instr->isVOP3(),
191 "Operand has to be vcc", instr.get());
192 } else {
193 check(instr->operands[i].size() == op ||
194 (instr->operands[i].isFixed() && instr->operands[i].physReg() >= 128 &&
195 instr->operands[i].physReg() < 256),
196 "Operand has wrong size", instr.get());
197 }
198 }
199 }
200
201 /* check base format */
202 Format base_format = instr->format;
203 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
204 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16);
205 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8);
206 if ((uint32_t)base_format & (uint32_t)Format::VOP1)
207 base_format = Format::VOP1;
208 else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
209 base_format = Format::VOP2;
210 else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
211 base_format = Format::VOPC;
212 else if (base_format == Format::VINTRP) {
213 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
214 instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
215 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
216 instr->opcode == aco_opcode::v_interp_p2_f16) {
217 /* v_interp_*_fp16 are considered VINTRP by the compiler but
218 * they are emitted as VOP3.
219 */
220 base_format = Format::VOP3;
221 } else {
222 base_format = Format::VINTRP;
223 }
224 }
225 check(base_format == instr_info.format[(int)instr->opcode],
226 "Wrong base format for instruction", instr.get());
227
228 /* check VOP3 modifiers */
229 if (instr->isVOP3() && withoutDPP(instr->format) != Format::VOP3) {
230 check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
231 base_format == Format::VOPC || base_format == Format::VINTRP,
232 "Format cannot have VOP3/VOP3B applied", instr.get());
233 }
234
235 if (instr->isDPP()) {
236 check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
237 base_format == Format::VOPC || base_format == Format::VOP3 ||
238 base_format == Format::VOP3P,
239 "Format cannot have DPP applied", instr.get());
240 check((!instr->isVOP3() && !instr->isVOP3P()) || program->gfx_level >= GFX11,
241 "VOP3+DPP is GFX11+ only", instr.get());
242
243 bool fi =
244 instr->isDPP8() ? instr->dpp8().fetch_inactive : instr->dpp16().fetch_inactive;
245 check(!fi || program->gfx_level >= GFX10, "DPP Fetch-Inactive is GFX10+ only",
246 instr.get());
247 }
248
249 /* check SDWA */
250 if (instr->isSDWA()) {
251 check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
252 base_format == Format::VOPC,
253 "Format cannot have SDWA applied", instr.get());
254
255 check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get());
256 check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get());
257
258 SDWA_instruction& sdwa = instr->sdwa();
259 check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+",
260 instr.get());
261 if (base_format == Format::VOPC) {
262 check(sdwa.clamp == false || program->gfx_level == GFX8,
263 "SDWA VOPC clamp only supported on GFX8", instr.get());
264 check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
265 program->gfx_level >= GFX9,
266 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
267 } else {
268 const Definition& def = instr->definitions[0];
269 check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
270 instr.get());
271 check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
272 "SDWA definition selection size must be at most definition size", instr.get());
273 check(
274 sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
275 "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
276 check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
277 instr.get());
278 check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
279 "SDWA dst_sel size must be definition size for subdword definitions",
280 instr.get());
281 check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
282 "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
283 }
284
285 for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
286 const Operand& op = instr->operands[i];
287 check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
288 check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
289 "SDWA operand selection size must be at most operand size", instr.get());
290 check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
291 "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
292 check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
293 instr.get());
294 }
295 if (instr->operands.size() >= 3) {
296 check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
297 "3rd operand must be fixed to vcc with SDWA", instr.get());
298 }
299 if (instr->definitions.size() >= 2) {
300 check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
301 "2nd definition must be fixed to vcc with SDWA", instr.get());
302 }
303
304 const bool sdwa_opcodes =
305 instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
306 instr->opcode != aco_opcode::v_fmamk_f32 &&
307 instr->opcode != aco_opcode::v_fmaak_f32 &&
308 instr->opcode != aco_opcode::v_fmamk_f16 &&
309 instr->opcode != aco_opcode::v_fmaak_f16 &&
310 instr->opcode != aco_opcode::v_madmk_f32 &&
311 instr->opcode != aco_opcode::v_madak_f32 &&
312 instr->opcode != aco_opcode::v_madmk_f16 &&
313 instr->opcode != aco_opcode::v_madak_f16 &&
314 instr->opcode != aco_opcode::v_readfirstlane_b32 &&
315 instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
316
317 const bool feature_mac =
318 program->gfx_level == GFX8 &&
319 (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
320
321 check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
322 }
323
324 /* check opsel */
325 if (instr->opcode == aco_opcode::v_permlane16_b32 ||
326 instr->opcode == aco_opcode::v_permlanex16_b32) {
327 check(instr->valu().opsel <= 0x3, "Unexpected opsel for permlane", instr.get());
328 } else if (instr->isVOP3() || instr->isVOP1() || instr->isVOP2() || instr->isVOPC()) {
329 VALU_instruction& valu = instr->valu();
330 check(valu.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
331 instr.get());
332 check(valu.opsel == 0 || instr->format == Format::VOP3 || program->gfx_level >= GFX11,
333 "Opsel is only supported for VOP3 before GFX11", instr.get());
334
335 for (unsigned i = 0; i < 3; i++) {
336 if (i >= instr->operands.size() ||
337 (!instr->isVOP3() && !instr->operands[i].isOfType(RegType::vgpr)) ||
338 (instr->operands[i].hasRegClass() &&
339 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
340 check(!valu.opsel[i], "Unexpected opsel for operand", instr.get());
341 }
342 if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
343 check(!valu.opsel[3], "Unexpected opsel for sub-dword definition", instr.get());
344 } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
345 instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
346 instr->opcode == aco_opcode::v_fma_mix_f32) {
347 check(instr->definitions[0].regClass() ==
348 (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
349 "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
350 } else if (instr->isVOP3P()) {
351 VALU_instruction& vop3p = instr->valu();
352 for (unsigned i = 0; i < instr->operands.size(); i++) {
353 if (instr->operands[i].hasRegClass() &&
354 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
355 check(!vop3p.opsel_lo[i] && !vop3p.opsel_hi[i],
356 "Unexpected opsel for subdword operand", instr.get());
357 }
358 check(instr->definitions[0].regClass() == v1 ||
359 instr_info.classes[(int)instr->opcode] == instr_class::wmma,
360 "VOP3P must have v1 definition", instr.get());
361 }
362
363 /* check for undefs */
364 for (unsigned i = 0; i < instr->operands.size(); i++) {
365 if (instr->operands[i].isUndefined()) {
366 bool flat = instr->isFlatLike();
367 bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
368 instr->opcode == aco_opcode::p_create_vector ||
369 instr->opcode == aco_opcode::p_jump_to_epilog ||
370 instr->opcode == aco_opcode::p_dual_src_export_gfx11 ||
371 instr->opcode == aco_opcode::p_end_with_regs ||
372 (instr->opcode == aco_opcode::p_interp_gfx11 && i == 0) ||
373 (instr->opcode == aco_opcode::p_bpermute_permlane && i == 0) ||
374 (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
375 ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
376 (instr->isScratch() && i == 0) || (instr->isDS() && i == 0) ||
377 (instr->opcode == aco_opcode::p_init_scratch && i == 0);
378 check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
379 } else {
380 check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
381 instr->operands[i].isConstant(),
382 "Uninitialized Operand", instr.get());
383 }
384 }
385
386 /* check subdword definitions */
387 for (unsigned i = 0; i < instr->definitions.size(); i++) {
388 if (instr->definitions[i].regClass().is_subdword())
389 check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(),
390 "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes",
391 instr.get());
392 }
393
394 if ((instr->isSALU() && instr->opcode != aco_opcode::p_constaddr_addlo &&
395 instr->opcode != aco_opcode::p_resumeaddr_addlo) ||
396 instr->isVALU()) {
397 /* check literals */
398 Operand literal(s1);
399 for (unsigned i = 0; i < instr->operands.size(); i++) {
400 Operand op = instr->operands[i];
401 if (!op.isLiteral())
402 continue;
403
404 check(!instr->isDPP() && !instr->isSDWA() &&
405 (!instr->isVOP3() || program->gfx_level >= GFX10) &&
406 (!instr->isVOP3P() || program->gfx_level >= GFX10),
407 "Literal applied on wrong instruction format", instr.get());
408
409 check(literal.isUndefined() || (literal.size() == op.size() &&
410 literal.constantValue() == op.constantValue()),
411 "Only 1 Literal allowed", instr.get());
412 literal = op;
413 check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
414 "Wrong source position for Literal argument", instr.get());
415 }
416
417 /* check num sgprs for VALU */
418 if (instr->isVALU()) {
419 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
420 instr->opcode == aco_opcode::v_lshrrev_b64 ||
421 instr->opcode == aco_opcode::v_ashrrev_i64;
422 unsigned const_bus_limit = 1;
423 if (program->gfx_level >= GFX10 && !is_shift64)
424 const_bus_limit = 2;
425
426 uint32_t scalar_mask =
427 instr->isVOP3() || instr->isVOP3P() || instr->isVINTERP_INREG() ? 0x7 : 0x5;
428 if (instr->isSDWA())
429 scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4;
430 else if (instr->isDPP())
431 scalar_mask = 0x4;
432
433 if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
434 instr->opcode == aco_opcode::v_readlane_b32 ||
435 instr->opcode == aco_opcode::v_readlane_b32_e64) {
436 check(instr->definitions[0].regClass().type() == RegType::sgpr,
437 "Wrong Definition type for VALU instruction", instr.get());
438 } else {
439 check(instr->definitions[0].regClass().type() == RegType::vgpr,
440 "Wrong Definition type for VALU instruction", instr.get());
441 }
442
443 unsigned num_sgprs = 0;
444 unsigned sgpr[] = {0, 0};
445 for (unsigned i = 0; i < instr->operands.size(); i++) {
446 Operand op = instr->operands[i];
447 if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
448 instr->opcode == aco_opcode::v_readlane_b32 ||
449 instr->opcode == aco_opcode::v_readlane_b32_e64) {
450 check(i != 1 || op.isOfType(RegType::sgpr) || op.isConstant(),
451 "Must be a SGPR or a constant", instr.get());
452 check(i == 1 || (op.isOfType(RegType::vgpr) && op.bytes() <= 4),
453 "Wrong Operand type for VALU instruction", instr.get());
454 continue;
455 }
456 if (instr->opcode == aco_opcode::v_permlane16_b32 ||
457 instr->opcode == aco_opcode::v_permlanex16_b32 ||
458 instr->opcode == aco_opcode::v_permlane64_b32) {
459 check(i != 0 || op.isOfType(RegType::vgpr),
460 "Operand 0 of v_permlane must be VGPR", instr.get());
461 check(i == 0 || op.isOfType(RegType::sgpr) || op.isConstant(),
462 "Lane select operands of v_permlane must be SGPR or constant",
463 instr.get());
464 }
465
466 if (instr->opcode == aco_opcode::v_writelane_b32 ||
467 instr->opcode == aco_opcode::v_writelane_b32_e64) {
468 check(i != 2 || (op.isOfType(RegType::vgpr) && op.bytes() <= 4),
469 "Wrong Operand type for VALU instruction", instr.get());
470 check(i == 2 || op.isOfType(RegType::sgpr) || op.isConstant(),
471 "Must be a SGPR or a constant", instr.get());
472 continue;
473 }
474 if (op.isOfType(RegType::sgpr)) {
475 check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",
476 instr.get());
477
478 if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
479 if (num_sgprs < 2)
480 sgpr[num_sgprs++] = op.tempId();
481 }
482 }
483
484 if (op.isConstant() && !op.isLiteral())
485 check(scalar_mask & (1 << i), "Wrong source position for constant argument",
486 instr.get());
487 }
488 check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,
489 "Too many SGPRs/literals", instr.get());
490
491 /* Validate modifiers. */
492 check(!instr->valu().opsel || instr->isVOP3() || instr->isVOP1() ||
493 instr->isVOP2() || instr->isVOPC() || instr->isVINTERP_INREG(),
494 "OPSEL set for unsupported instruction format", instr.get());
495 check(!instr->valu().opsel_lo || instr->isVOP3P(),
496 "OPSEL_LO set for unsupported instruction format", instr.get());
497 check(!instr->valu().opsel_hi || instr->isVOP3P(),
498 "OPSEL_HI set for unsupported instruction format", instr.get());
499 check(!instr->valu().omod || instr->isVOP3() || instr->isSDWA(),
500 "OMOD set for unsupported instruction format", instr.get());
501 check(!instr->valu().clamp || instr->isVOP3() || instr->isVOP3P() ||
502 instr->isSDWA() || instr->isVINTERP_INREG(),
503 "CLAMP set for unsupported instruction format", instr.get());
504
505 for (bool abs : instr->valu().abs) {
506 check(!abs || instr->isVOP3() || instr->isVOP3P() || instr->isSDWA() ||
507 instr->isDPP16(),
508 "ABS/NEG_HI set for unsupported instruction format", instr.get());
509 }
510 for (bool neg : instr->valu().neg) {
511 check(!neg || instr->isVOP3() || instr->isVOP3P() || instr->isSDWA() ||
512 instr->isDPP16() || instr->isVINTERP_INREG(),
513 "NEG/NEG_LO set for unsupported instruction format", instr.get());
514 }
515 }
516
517 if (instr->isSOP1() || instr->isSOP2()) {
518 if (!instr->definitions.empty())
519 check(instr->definitions[0].regClass().type() == RegType::sgpr,
520 "Wrong Definition type for SALU instruction", instr.get());
521 for (const Operand& op : instr->operands) {
522 check(op.isConstant() || op.isOfType(RegType::sgpr),
523 "Wrong Operand type for SALU instruction", instr.get());
524 }
525 }
526 }
527
528 switch (instr->format) {
529 case Format::PSEUDO: {
530 if (instr->opcode == aco_opcode::p_create_vector) {
531 unsigned size = 0;
532 for (const Operand& op : instr->operands) {
533 check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
534 size += op.bytes();
535 }
536 check(size == instr->definitions[0].bytes(),
537 "Definition size does not match operand sizes", instr.get());
538 if (instr->definitions[0].regClass().type() == RegType::sgpr) {
539 for (const Operand& op : instr->operands) {
540 check(op.isConstant() || op.regClass().type() == RegType::sgpr,
541 "Wrong Operand type for scalar vector", instr.get());
542 }
543 }
544 } else if (instr->opcode == aco_opcode::p_extract_vector) {
545 check(!instr->operands[0].isConstant() && instr->operands[1].isConstant(),
546 "Wrong Operand types", instr.get());
547 check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
548 instr->operands[0].bytes(),
549 "Index out of range", instr.get());
550 check(instr->definitions[0].regClass().type() == RegType::vgpr ||
551 instr->operands[0].regClass().type() == RegType::sgpr,
552 "Cannot extract SGPR value from VGPR vector", instr.get());
553 check(program->gfx_level >= GFX9 ||
554 !instr->definitions[0].regClass().is_subdword() ||
555 instr->operands[0].regClass().type() == RegType::vgpr,
556 "Cannot extract subdword from SGPR before GFX9+", instr.get());
557 } else if (instr->opcode == aco_opcode::p_split_vector) {
558 check(!instr->operands[0].isConstant(), "Operand must not be constant", instr.get());
559 unsigned size = 0;
560 for (const Definition& def : instr->definitions) {
561 size += def.bytes();
562 }
563 check(size == instr->operands[0].bytes(),
564 "Operand size does not match definition sizes", instr.get());
565 if (instr->operands[0].isOfType(RegType::vgpr)) {
566 for (const Definition& def : instr->definitions)
567 check(def.regClass().type() == RegType::vgpr,
568 "Wrong Definition type for VGPR split_vector", instr.get());
569 } else {
570 for (const Definition& def : instr->definitions)
571 check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
572 "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
573 }
574 } else if (instr->opcode == aco_opcode::p_parallelcopy) {
575 check(instr->definitions.size() == instr->operands.size(),
576 "Number of Operands does not match number of Definitions", instr.get());
577 for (unsigned i = 0; i < instr->operands.size(); i++) {
578 check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
579 "Operand and Definition size must match", instr.get());
580 if (instr->operands[i].hasRegClass()) {
581 check((instr->definitions[i].regClass().type() ==
582 instr->operands[i].regClass().type()) ||
583 (instr->definitions[i].regClass().type() == RegType::vgpr &&
584 instr->operands[i].regClass().type() == RegType::sgpr),
585 "Operand and Definition types do not match", instr.get());
586 check(instr->definitions[i].regClass().is_linear_vgpr() ==
587 instr->operands[i].regClass().is_linear_vgpr(),
588 "Operand and Definition types do not match", instr.get());
589 } else {
590 check(!instr->definitions[i].regClass().is_linear_vgpr(),
591 "Can only copy linear VGPRs into linear VGPRs, not constant/undef",
592 instr.get());
593 }
594 }
595 } else if (instr->opcode == aco_opcode::p_phi) {
596 check(instr->operands.size() == block.logical_preds.size(),
597 "Number of Operands does not match number of predecessors", instr.get());
598 check(instr->definitions[0].regClass().type() == RegType::vgpr,
599 "Logical Phi Definition must be vgpr", instr.get());
600 for (const Operand& op : instr->operands)
601 check(instr->definitions[0].size() == op.size(),
602 "Operand sizes must match Definition size", instr.get());
603 } else if (instr->opcode == aco_opcode::p_linear_phi) {
604 for (const Operand& op : instr->operands) {
605 check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",
606 instr.get());
607 check(instr->definitions[0].size() == op.size(),
608 "Operand sizes must match Definition size", instr.get());
609 }
610 check(instr->operands.size() == block.linear_preds.size(),
611 "Number of Operands does not match number of predecessors", instr.get());
612 } else if (instr->opcode == aco_opcode::p_extract ||
613 instr->opcode == aco_opcode::p_insert) {
614 check(!instr->operands[0].isConstant(), "Data operand must not be constant",
615 instr.get());
616 check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
617 if (instr->opcode == aco_opcode::p_extract)
618 check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
619 instr.get());
620
621 check(instr->definitions[0].regClass().type() != RegType::sgpr ||
622 instr->operands[0].regClass().type() == RegType::sgpr,
623 "Can't extract/insert VGPR to SGPR", instr.get());
624
625 if (instr->opcode == aco_opcode::p_insert)
626 check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
627 "Sizes of p_insert data operand and definition must match", instr.get());
628
629 if (instr->definitions[0].regClass().type() == RegType::sgpr)
630 check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
631 instr->definitions[1].physReg() == scc,
632 "SGPR extract/insert needs an SCC definition", instr.get());
633
634 unsigned data_bits = instr->operands[0].bytes() * 8u;
635 unsigned op_bits = instr->operands[2].constantValue();
636
637 if (instr->opcode == aco_opcode::p_insert) {
638 check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
639 check(op_bits < data_bits, "Size must be smaller than source", instr.get());
640 } else if (instr->opcode == aco_opcode::p_extract) {
641 check(op_bits == 8 || op_bits == 16 || op_bits == 32,
642 "Size must be 8 or 16 or 32", instr.get());
643 check(data_bits >= op_bits, "Can't extract more bits than what the data has.",
644 instr.get());
645 }
646
647 unsigned comp = data_bits / MAX2(op_bits, 1);
648 check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
649 instr.get());
650 } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
651 check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
652 instr.get());
653 check(instr->operands.size() > 0 && instr->operands[0].isOfType(RegType::sgpr) &&
654 instr->operands[0].size() == 2,
655 "First operand of p_jump_to_epilog must be a SGPR", instr.get());
656 for (unsigned i = 1; i < instr->operands.size(); i++) {
657 check(instr->operands[i].isOfType(RegType::vgpr) ||
658 instr->operands[i].isOfType(RegType::sgpr) ||
659 instr->operands[i].isUndefined(),
660 "Other operands of p_jump_to_epilog must be VGPRs, SGPRs or undef",
661 instr.get());
662 }
663 } else if (instr->opcode == aco_opcode::p_dual_src_export_gfx11) {
664 check(instr->definitions.size() == 6,
665 "p_dual_src_export_gfx11 must have 6 definitions", instr.get());
666 check(instr->definitions[2].regClass() == program->lane_mask,
667 "Third definition of p_dual_src_export_gfx11 must be a lane mask",
668 instr.get());
669 check(instr->definitions[3].regClass() == program->lane_mask,
670 "Fourth definition of p_dual_src_export_gfx11 must be a lane mask",
671 instr.get());
672 check(instr->definitions[4].physReg() == vcc,
673 "Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
674 check(instr->definitions[5].physReg() == scc,
675 "Sixth definition of p_dual_src_export_gfx11 must be scc", instr.get());
676 check(instr->operands.size() == 8, "p_dual_src_export_gfx11 must have 8 operands",
677 instr.get());
678 for (unsigned i = 0; i < instr->operands.size(); i++) {
679 check(
680 instr->operands[i].isOfType(RegType::vgpr) || instr->operands[i].isUndefined(),
681 "Operands of p_dual_src_export_gfx11 must be VGPRs or undef", instr.get());
682 }
683 } else if (instr->opcode == aco_opcode::p_start_linear_vgpr) {
684 check(instr->definitions.size() == 1, "Must have one definition", instr.get());
685 check(instr->operands.size() <= 1, "Must have one or zero operands", instr.get());
686 if (!instr->definitions.empty())
687 check(instr->definitions[0].regClass().is_linear_vgpr(),
688 "Definition must be linear VGPR", instr.get());
689 if (!instr->definitions.empty() && !instr->operands.empty())
690 check(instr->definitions[0].bytes() == instr->operands[0].bytes(),
691 "Operand size must match definition", instr.get());
692 }
693 break;
694 }
695 case Format::PSEUDO_REDUCTION: {
696 for (const Operand& op : instr->operands)
697 check(op.regClass().type() == RegType::vgpr,
698 "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",
699 instr.get());
700
701 if (instr->opcode == aco_opcode::p_reduce &&
702 instr->reduction().cluster_size == program->wave_size)
703 check(instr->definitions[0].regClass().type() == RegType::sgpr ||
704 program->wave_size == 32,
705 "The result of unclustered reductions must go into an SGPR.", instr.get());
706 else
707 check(instr->definitions[0].regClass().type() == RegType::vgpr,
708 "The result of scans and clustered reductions must go into a VGPR.",
709 instr.get());
710
711 break;
712 }
713 case Format::SMEM: {
714 if (instr->operands.size() >= 1)
715 check(instr->operands[0].isOfType(RegType::sgpr), "SMEM operands must be sgpr",
716 instr.get());
717 if (instr->operands.size() >= 2)
718 check(instr->operands[1].isConstant() || instr->operands[1].isOfType(RegType::sgpr),
719 "SMEM offset must be constant or sgpr", instr.get());
720 if (!instr->definitions.empty())
721 check(instr->definitions[0].regClass().type() == RegType::sgpr,
722 "SMEM result must be sgpr", instr.get());
723 break;
724 }
725 case Format::MTBUF:
726 case Format::MUBUF: {
727 check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
728 instr.get());
729 check(instr->operands[1].isOfType(RegType::vgpr),
730 "VADDR must be in vgpr for VMEM instructions", instr.get());
731 check(instr->operands[0].isOfType(RegType::sgpr), "VMEM resource constant must be sgpr",
732 instr.get());
733 check(instr->operands.size() < 4 || instr->operands[3].isOfType(RegType::vgpr),
734 "VMEM write data must be vgpr", instr.get());
735
736 const bool d16 =
737 instr->opcode ==
738 aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
739 instr->opcode == aco_opcode::buffer_load_ubyte ||
740 instr->opcode == aco_opcode::buffer_load_sbyte ||
741 instr->opcode == aco_opcode::buffer_load_ushort ||
742 instr->opcode == aco_opcode::buffer_load_sshort ||
743 instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
744 instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
745 instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
746 instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
747 instr->opcode == aco_opcode::buffer_load_short_d16 ||
748 instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
749 instr->opcode == aco_opcode::buffer_load_format_d16_x ||
750 instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
751 instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
752 instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
753 instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
754 instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
755 instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
756 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
757 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
758 if (instr->definitions.size()) {
759 check(instr->definitions[0].regClass().type() == RegType::vgpr,
760 "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
761 check(d16 || !instr->definitions[0].regClass().is_subdword(),
762 "Only D16 opcodes can load subdword values.", instr.get());
763 check(instr->definitions[0].bytes() <= 8 || !d16,
764 "D16 opcodes can only load up to 8 bytes.", instr.get());
765 }
766 break;
767 }
768 case Format::MIMG: {
769 check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
770 instr.get());
771 check(instr->operands[0].hasRegClass() &&
772 (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
773 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
774 if (instr->operands[1].hasRegClass())
775 check(instr->operands[1].regClass() == s4,
776 "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
777 if (!instr->operands[2].isUndefined()) {
778 bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
779 instr->opcode == aco_opcode::image_atomic_fcmpswap;
780 check(instr->definitions.empty() ||
781 (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
782 is_cmpswap),
783 "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "
784 "TFE/LWE loads",
785 instr.get());
786 }
787
788 if (instr->mimg().strict_wqm) {
789 check(instr->operands[3].hasRegClass() &&
790 instr->operands[3].regClass().is_linear_vgpr(),
791 "MIMG operands[3] must be temp linear VGPR.", instr.get());
792
793 unsigned total_size = 0;
794 for (unsigned i = 4; i < instr->operands.size(); i++) {
795 check(instr->operands[i].hasRegClass() && instr->operands[i].regClass() == v1,
796 "MIMG operands[4+] (VADDR) must be v1", instr.get());
797 total_size += instr->operands[i].bytes();
798 }
799 check(total_size <= instr->operands[3].bytes(),
800 "MIMG operands[4+] must fit within operands[3].", instr.get());
801 } else {
802 check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
803 "NSA is only supported on GFX10+", instr.get());
804 for (unsigned i = 3; i < instr->operands.size(); i++) {
805 check(instr->operands[i].hasRegClass() &&
806 instr->operands[i].regClass().type() == RegType::vgpr,
807 "MIMG operands[3+] (VADDR) must be VGPR", instr.get());
808 if (instr->operands.size() > 4) {
809 if (program->gfx_level < GFX11) {
810 check(instr->operands[i].regClass() == v1,
811 "GFX10 MIMG VADDR must be v1 if NSA is used", instr.get());
812 } else {
813 if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
814 instr->opcode != aco_opcode::image_bvh64_intersect_ray && i < 7) {
815 check(instr->operands[i].regClass() == v1,
816 "first 4 GFX11 MIMG VADDR must be v1 if NSA is used", instr.get());
817 }
818 }
819 }
820 }
821 }
822
823 if (instr->definitions.size()) {
824 check(instr->definitions[0].regClass().type() == RegType::vgpr,
825 "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
826 check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(),
827 "Only D16 MIMG instructions can load subdword values.", instr.get());
828 check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16,
829 "D16 MIMG instructions can only load up to 8 bytes.", instr.get());
830 }
831 break;
832 }
833 case Format::DS: {
834 for (const Operand& op : instr->operands) {
835 check(op.isOfType(RegType::vgpr) || op.physReg() == m0 || op.isUndefined(),
836 "Only VGPRs are valid DS instruction operands", instr.get());
837 }
838 if (!instr->definitions.empty())
839 check(instr->definitions[0].regClass().type() == RegType::vgpr,
840 "DS instruction must return VGPR", instr.get());
841 break;
842 }
843 case Format::EXP: {
844 for (unsigned i = 0; i < 4; i++)
845 check(instr->operands[i].isOfType(RegType::vgpr),
846 "Only VGPRs are valid Export arguments", instr.get());
847 break;
848 }
849 case Format::FLAT:
850 check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
851 instr.get());
852 FALLTHROUGH;
853 case Format::GLOBAL:
854 check(instr->operands[0].isOfType(RegType::vgpr), "FLAT/GLOBAL address must be vgpr",
855 instr.get());
856 FALLTHROUGH;
857 case Format::SCRATCH: {
858 check(instr->operands[0].isOfType(RegType::vgpr),
859 "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get());
860 check(instr->operands[1].isOfType(RegType::sgpr),
861 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
862 if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3)
863 check(!instr->operands[0].isUndefined() || !instr->operands[1].isUndefined(),
864 "SCRATCH must have either SADDR or ADDR operand", instr.get());
865 if (!instr->definitions.empty())
866 check(instr->definitions[0].regClass().type() == RegType::vgpr,
867 "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
868 else
869 check(instr->operands[2].isOfType(RegType::vgpr),
870 "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
871 break;
872 }
873 case Format::LDSDIR: {
874 check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1,
875 "LDSDIR must have an v1 definition", instr.get());
876 check(instr->operands.size() == 1, "LDSDIR must have an operand", instr.get());
877 if (!instr->operands.empty()) {
878 check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand",
879 instr.get());
880 check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0,
881 "LDSDIR must have an operand fixed to m0", instr.get());
882 }
883 break;
884 }
885 default: break;
886 }
887 }
888 }
889
890 return is_valid;
891 }
892
893 bool
validate_cfg(Program * program)894 validate_cfg(Program* program)
895 {
896 if (!(debug_flags & DEBUG_VALIDATE_IR))
897 return true;
898
899 bool is_valid = true;
900 auto check_block = [&program, &is_valid](bool success, const char* msg,
901 aco::Block* block) -> void
902 {
903 if (!success) {
904 aco_err(program, "%s: BB%u", msg, block->index);
905 is_valid = false;
906 }
907 };
908
909 /* validate CFG */
910 for (unsigned i = 0; i < program->blocks.size(); i++) {
911 Block& block = program->blocks[i];
912 check_block(block.index == i, "block.index must match actual index", &block);
913
914 /* predecessors/successors should be sorted */
915 for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
916 check_block(block.linear_preds[j] < block.linear_preds[j + 1],
917 "linear predecessors must be sorted", &block);
918 for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
919 check_block(block.logical_preds[j] < block.logical_preds[j + 1],
920 "logical predecessors must be sorted", &block);
921 for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
922 check_block(block.linear_succs[j] < block.linear_succs[j + 1],
923 "linear successors must be sorted", &block);
924 for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
925 check_block(block.logical_succs[j] < block.logical_succs[j + 1],
926 "logical successors must be sorted", &block);
927
928 /* critical edges are not allowed */
929 if (block.linear_preds.size() > 1) {
930 for (unsigned pred : block.linear_preds)
931 check_block(program->blocks[pred].linear_succs.size() == 1,
932 "linear critical edges are not allowed", &program->blocks[pred]);
933 for (unsigned pred : block.logical_preds)
934 check_block(program->blocks[pred].logical_succs.size() == 1,
935 "logical critical edges are not allowed", &program->blocks[pred]);
936 }
937 }
938
939 return is_valid;
940 }
941
942 /* RA validation */
943 namespace {
944
945 struct Location {
Locationaco::__anon191fc7c00311::Location946 Location() : block(NULL), instr(NULL) {}
947
948 Block* block;
949 Instruction* instr; // NULL if it's the block's live-in
950 };
951
952 struct Assignment {
953 Location defloc;
954 Location firstloc;
955 PhysReg reg;
956 bool valid;
957 };
958
959 bool
ra_fail(Program * program,Location loc,Location loc2,const char * fmt,...)960 ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
961 {
962 va_list args;
963 va_start(args, fmt);
964 char msg[1024];
965 vsprintf(msg, fmt, args);
966 va_end(args);
967
968 char* out;
969 size_t outsize;
970 struct u_memstream mem;
971 u_memstream_open(&mem, &out, &outsize);
972 FILE* const memf = u_memstream_get(&mem);
973
974 fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);
975 if (loc.instr) {
976 aco_print_instr(program->gfx_level, loc.instr, memf);
977 fprintf(memf, "\n%s", msg);
978 } else {
979 fprintf(memf, "%s", msg);
980 }
981 if (loc2.block) {
982 fprintf(memf, " in BB%d:\n", loc2.block->index);
983 aco_print_instr(program->gfx_level, loc2.instr, memf);
984 }
985 fprintf(memf, "\n\n");
986 u_memstream_close(&mem);
987
988 aco_err(program, "%s", out);
989 free(out);
990
991 return true;
992 }
993
994 bool
validate_subdword_operand(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr,unsigned index)995 validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
996 unsigned index)
997 {
998 Operand op = instr->operands[index];
999 unsigned byte = op.physReg().byte();
1000
1001 if (instr->opcode == aco_opcode::p_as_uniform)
1002 return byte == 0;
1003 if (instr->isPseudo() && gfx_level >= GFX8)
1004 return true;
1005 if (instr->isSDWA())
1006 return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
1007 byte % instr->sdwa().sel[index].size() == 0;
1008 if (instr->isVOP3P()) {
1009 bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
1010 instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
1011 instr->opcode == aco_opcode::v_fma_mix_f32;
1012 return instr->valu().opsel_lo[index] == (byte >> 1) &&
1013 instr->valu().opsel_hi[index] == (fma_mix || (byte >> 1));
1014 }
1015 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
1016 return true;
1017
1018 switch (instr->opcode) {
1019 case aco_opcode::v_cvt_f32_ubyte1:
1020 if (byte == 1)
1021 return true;
1022 break;
1023 case aco_opcode::v_cvt_f32_ubyte2:
1024 if (byte == 2)
1025 return true;
1026 break;
1027 case aco_opcode::v_cvt_f32_ubyte3:
1028 if (byte == 3)
1029 return true;
1030 break;
1031 case aco_opcode::ds_write_b8_d16_hi:
1032 case aco_opcode::ds_write_b16_d16_hi:
1033 if (byte == 2 && index == 1)
1034 return true;
1035 break;
1036 case aco_opcode::buffer_store_byte_d16_hi:
1037 case aco_opcode::buffer_store_short_d16_hi:
1038 case aco_opcode::buffer_store_format_d16_hi_x:
1039 if (byte == 2 && index == 3)
1040 return true;
1041 break;
1042 case aco_opcode::flat_store_byte_d16_hi:
1043 case aco_opcode::flat_store_short_d16_hi:
1044 case aco_opcode::scratch_store_byte_d16_hi:
1045 case aco_opcode::scratch_store_short_d16_hi:
1046 case aco_opcode::global_store_byte_d16_hi:
1047 case aco_opcode::global_store_short_d16_hi:
1048 if (byte == 2 && index == 2)
1049 return true;
1050 break;
1051 default: break;
1052 }
1053
1054 return byte == 0;
1055 }
1056
1057 bool
validate_subdword_definition(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr)1058 validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
1059 {
1060 Definition def = instr->definitions[0];
1061 unsigned byte = def.physReg().byte();
1062
1063 if (instr->isPseudo() && gfx_level >= GFX8)
1064 return true;
1065 if (instr->isSDWA())
1066 return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
1067 byte % instr->sdwa().dst_sel.size() == 0;
1068 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
1069 return true;
1070
1071 switch (instr->opcode) {
1072 case aco_opcode::v_fma_mixhi_f16:
1073 case aco_opcode::buffer_load_ubyte_d16_hi:
1074 case aco_opcode::buffer_load_sbyte_d16_hi:
1075 case aco_opcode::buffer_load_short_d16_hi:
1076 case aco_opcode::buffer_load_format_d16_hi_x:
1077 case aco_opcode::flat_load_ubyte_d16_hi:
1078 case aco_opcode::flat_load_short_d16_hi:
1079 case aco_opcode::scratch_load_ubyte_d16_hi:
1080 case aco_opcode::scratch_load_short_d16_hi:
1081 case aco_opcode::global_load_ubyte_d16_hi:
1082 case aco_opcode::global_load_short_d16_hi:
1083 case aco_opcode::ds_read_u8_d16_hi:
1084 case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
1085 default: break;
1086 }
1087
1088 return byte == 0;
1089 }
1090
1091 unsigned
get_subdword_bytes_written(Program * program,const aco_ptr<Instruction> & instr,unsigned index)1092 get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
1093 {
1094 amd_gfx_level gfx_level = program->gfx_level;
1095 Definition def = instr->definitions[index];
1096
1097 if (instr->isPseudo())
1098 return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
1099 if (instr->isVALU()) {
1100 assert(def.bytes() <= 2);
1101 if (instr->isSDWA())
1102 return instr->sdwa().dst_sel.size();
1103
1104 if (instr_is_16bit(gfx_level, instr->opcode))
1105 return 2;
1106
1107 return 4;
1108 }
1109
1110 if (instr->isMIMG()) {
1111 assert(instr->mimg().d16);
1112 return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes();
1113 }
1114
1115 switch (instr->opcode) {
1116 case aco_opcode::buffer_load_ubyte_d16:
1117 case aco_opcode::buffer_load_sbyte_d16:
1118 case aco_opcode::buffer_load_short_d16:
1119 case aco_opcode::buffer_load_format_d16_x:
1120 case aco_opcode::tbuffer_load_format_d16_x:
1121 case aco_opcode::flat_load_ubyte_d16:
1122 case aco_opcode::flat_load_short_d16:
1123 case aco_opcode::scratch_load_ubyte_d16:
1124 case aco_opcode::scratch_load_short_d16:
1125 case aco_opcode::global_load_ubyte_d16:
1126 case aco_opcode::global_load_short_d16:
1127 case aco_opcode::ds_read_u8_d16:
1128 case aco_opcode::ds_read_u16_d16:
1129 case aco_opcode::buffer_load_ubyte_d16_hi:
1130 case aco_opcode::buffer_load_sbyte_d16_hi:
1131 case aco_opcode::buffer_load_short_d16_hi:
1132 case aco_opcode::buffer_load_format_d16_hi_x:
1133 case aco_opcode::flat_load_ubyte_d16_hi:
1134 case aco_opcode::flat_load_short_d16_hi:
1135 case aco_opcode::scratch_load_ubyte_d16_hi:
1136 case aco_opcode::scratch_load_short_d16_hi:
1137 case aco_opcode::global_load_ubyte_d16_hi:
1138 case aco_opcode::global_load_short_d16_hi:
1139 case aco_opcode::ds_read_u8_d16_hi:
1140 case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
1141 case aco_opcode::buffer_load_format_d16_xyz:
1142 case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;
1143 default: return def.size() * 4;
1144 }
1145 }
1146
1147 bool
validate_instr_defs(Program * program,std::array<unsigned,2048> & regs,const std::vector<Assignment> & assignments,const Location & loc,aco_ptr<Instruction> & instr)1148 validate_instr_defs(Program* program, std::array<unsigned, 2048>& regs,
1149 const std::vector<Assignment>& assignments, const Location& loc,
1150 aco_ptr<Instruction>& instr)
1151 {
1152 bool err = false;
1153
1154 for (unsigned i = 0; i < instr->definitions.size(); i++) {
1155 Definition& def = instr->definitions[i];
1156 if (!def.isTemp())
1157 continue;
1158 Temp tmp = def.getTemp();
1159 PhysReg reg = assignments[tmp.id()].reg;
1160 for (unsigned j = 0; j < tmp.bytes(); j++) {
1161 if (regs[reg.reg_b + j])
1162 err |=
1163 ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc,
1164 "Assignment of element %d of %%%d already taken by %%%d from instruction", i,
1165 tmp.id(), regs[reg.reg_b + j]);
1166 regs[reg.reg_b + j] = tmp.id();
1167 }
1168 if (def.regClass().is_subdword() && def.bytes() < 4) {
1169 unsigned written = get_subdword_bytes_written(program, instr, i);
1170 /* If written=4, the instruction still might write the upper half. In that case, it's
1171 * the lower half that isn't preserved */
1172 for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
1173 unsigned written_reg = reg.reg() * 4u + j;
1174 if (regs[written_reg] && regs[written_reg] != def.tempId())
1175 err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc,
1176 "Assignment of element %d of %%%d overwrites the full register "
1177 "taken by %%%d from instruction",
1178 i, tmp.id(), regs[written_reg]);
1179 }
1180 }
1181 }
1182
1183 for (const Definition& def : instr->definitions) {
1184 if (!def.isTemp())
1185 continue;
1186 if (def.isKill()) {
1187 for (unsigned j = 0; j < def.getTemp().bytes(); j++)
1188 regs[def.physReg().reg_b + j] = 0;
1189 }
1190 }
1191
1192 return err;
1193 }
1194
1195 } /* end namespace */
1196
1197 bool
validate_ra(Program * program)1198 validate_ra(Program* program)
1199 {
1200 if (!(debug_flags & DEBUG_VALIDATE_RA))
1201 return false;
1202
1203 bool err = false;
1204 aco::live live_vars = aco::live_var_analysis(program);
1205 std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
1206 uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
1207
1208 std::vector<Assignment> assignments(program->peekAllocationId());
1209 for (Block& block : program->blocks) {
1210 Location loc;
1211 loc.block = █
1212 for (aco_ptr<Instruction>& instr : block.instructions) {
1213 if (instr->opcode == aco_opcode::p_phi) {
1214 for (unsigned i = 0; i < instr->operands.size(); i++) {
1215 if (instr->operands[i].isTemp() &&
1216 instr->operands[i].getTemp().type() == RegType::sgpr &&
1217 instr->operands[i].isFirstKill())
1218 phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
1219 }
1220 }
1221
1222 loc.instr = instr.get();
1223 for (unsigned i = 0; i < instr->operands.size(); i++) {
1224 Operand& op = instr->operands[i];
1225 if (!op.isTemp())
1226 continue;
1227 if (!op.isFixed())
1228 err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);
1229 if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg())
1230 err |=
1231 ra_fail(program, loc, assignments[op.tempId()].firstloc,
1232 "Operand %d has an inconsistent register assignment with instruction", i);
1233 if ((op.getTemp().type() == RegType::vgpr &&
1234 op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
1235 (op.getTemp().type() == RegType::sgpr &&
1236 op.physReg() + op.size() > program->config->num_sgprs &&
1237 op.physReg() < sgpr_limit))
1238 err |= ra_fail(program, loc, assignments[op.tempId()].firstloc,
1239 "Operand %d has an out-of-bounds register assignment", i);
1240 if (op.physReg() == vcc && !program->needs_vcc)
1241 err |= ra_fail(program, loc, Location(),
1242 "Operand %d fixed to vcc but needs_vcc=false", i);
1243 if (op.regClass().is_subdword() &&
1244 !validate_subdword_operand(program->gfx_level, instr, i))
1245 err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
1246 if (!assignments[op.tempId()].firstloc.block)
1247 assignments[op.tempId()].firstloc = loc;
1248 if (!assignments[op.tempId()].defloc.block) {
1249 assignments[op.tempId()].reg = op.physReg();
1250 assignments[op.tempId()].valid = true;
1251 }
1252 }
1253
1254 for (unsigned i = 0; i < instr->definitions.size(); i++) {
1255 Definition& def = instr->definitions[i];
1256 if (!def.isTemp())
1257 continue;
1258 if (!def.isFixed())
1259 err |=
1260 ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);
1261 if (assignments[def.tempId()].defloc.block)
1262 err |= ra_fail(program, loc, assignments[def.tempId()].defloc,
1263 "Temporary %%%d also defined by instruction", def.tempId());
1264 if ((def.getTemp().type() == RegType::vgpr &&
1265 def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
1266 (def.getTemp().type() == RegType::sgpr &&
1267 def.physReg() + def.size() > program->config->num_sgprs &&
1268 def.physReg() < sgpr_limit))
1269 err |= ra_fail(program, loc, assignments[def.tempId()].firstloc,
1270 "Definition %d has an out-of-bounds register assignment", i);
1271 if (def.physReg() == vcc && !program->needs_vcc)
1272 err |= ra_fail(program, loc, Location(),
1273 "Definition %d fixed to vcc but needs_vcc=false", i);
1274 if (def.regClass().is_subdword() &&
1275 !validate_subdword_definition(program->gfx_level, instr))
1276 err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
1277 if (!assignments[def.tempId()].firstloc.block)
1278 assignments[def.tempId()].firstloc = loc;
1279 assignments[def.tempId()].defloc = loc;
1280 assignments[def.tempId()].reg = def.physReg();
1281 assignments[def.tempId()].valid = true;
1282 }
1283 }
1284 }
1285
1286 for (Block& block : program->blocks) {
1287 Location loc;
1288 loc.block = █
1289
1290 std::array<unsigned, 2048> regs; /* register file in bytes */
1291 regs.fill(0);
1292
1293 IDSet live = live_vars.live_out[block.index];
1294 /* remove killed p_phi sgpr operands */
1295 for (Temp tmp : phi_sgpr_ops[block.index])
1296 live.erase(tmp.id());
1297
1298 /* check live out */
1299 for (unsigned id : live) {
1300 Temp tmp(id, program->temp_rc[id]);
1301 PhysReg reg = assignments[id].reg;
1302 for (unsigned i = 0; i < tmp.bytes(); i++) {
1303 if (regs[reg.reg_b + i]) {
1304 err |= ra_fail(program, loc, Location(),
1305 "Assignment of element %d of %%%d already taken by %%%d in live-out",
1306 i, id, regs[reg.reg_b + i]);
1307 }
1308 regs[reg.reg_b + i] = id;
1309 }
1310 }
1311 regs.fill(0);
1312
1313 for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
1314 aco_ptr<Instruction>& instr = *it;
1315
1316 /* check killed p_phi sgpr operands */
1317 if (instr->opcode == aco_opcode::p_logical_end) {
1318 for (Temp tmp : phi_sgpr_ops[block.index]) {
1319 PhysReg reg = assignments[tmp.id()].reg;
1320 for (unsigned i = 0; i < tmp.bytes(); i++) {
1321 if (regs[reg.reg_b + i])
1322 err |= ra_fail(
1323 program, loc, Location(),
1324 "Assignment of element %d of %%%d already taken by %%%d in live-out", i,
1325 tmp.id(), regs[reg.reg_b + i]);
1326 }
1327 live.insert(tmp.id());
1328 }
1329 }
1330
1331 for (const Definition& def : instr->definitions) {
1332 if (!def.isTemp())
1333 continue;
1334 live.erase(def.tempId());
1335 }
1336
1337 /* don't count phi operands as live-in, since they are actually
1338 * killed when they are copied at the predecessor */
1339 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1340 for (const Operand& op : instr->operands) {
1341 if (!op.isTemp())
1342 continue;
1343 live.insert(op.tempId());
1344 }
1345 }
1346 }
1347
1348 for (unsigned id : live) {
1349 Temp tmp(id, program->temp_rc[id]);
1350 PhysReg reg = assignments[id].reg;
1351 for (unsigned i = 0; i < tmp.bytes(); i++)
1352 regs[reg.reg_b + i] = id;
1353 }
1354
1355 for (aco_ptr<Instruction>& instr : block.instructions) {
1356 loc.instr = instr.get();
1357
1358 /* remove killed p_phi operands from regs */
1359 if (instr->opcode == aco_opcode::p_logical_end) {
1360 for (Temp tmp : phi_sgpr_ops[block.index]) {
1361 PhysReg reg = assignments[tmp.id()].reg;
1362 for (unsigned i = 0; i < tmp.bytes(); i++)
1363 regs[reg.reg_b + i] = 0;
1364 }
1365 }
1366
1367 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1368 for (const Operand& op : instr->operands) {
1369 if (!op.isTemp())
1370 continue;
1371 if (op.isFirstKillBeforeDef()) {
1372 for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1373 regs[op.physReg().reg_b + j] = 0;
1374 }
1375 }
1376 }
1377
1378 if (!instr->isBranch() || block.linear_succs.size() != 1)
1379 err |= validate_instr_defs(program, regs, assignments, loc, instr);
1380
1381 if (!is_phi(instr)) {
1382 for (const Operand& op : instr->operands) {
1383 if (!op.isTemp())
1384 continue;
1385 if (op.isLateKill() && op.isFirstKill()) {
1386 for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1387 regs[op.physReg().reg_b + j] = 0;
1388 }
1389 }
1390 } else if (block.linear_preds.size() != 1 ||
1391 program->blocks[block.linear_preds[0]].linear_succs.size() == 1) {
1392 for (unsigned pred : block.linear_preds) {
1393 aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
1394 assert(br->isBranch());
1395 err |= validate_instr_defs(program, regs, assignments, loc, br);
1396 }
1397 }
1398 }
1399 }
1400
1401 return err;
1402 }
1403 } // namespace aco
1404