1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "aco_ir.h"
26
27 #include "util/memstream.h"
28
29 #include <array>
30 #include <map>
31 #include <set>
32 #include <vector>
33
34 namespace aco {
35
36 static void
aco_log(Program * program,enum aco_compiler_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)37 aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix,
38 const char* file, unsigned line, const char* fmt, va_list args)
39 {
40 char* msg;
41
42 if (program->debug.shorten_messages) {
43 msg = ralloc_vasprintf(NULL, fmt, args);
44 } else {
45 msg = ralloc_strdup(NULL, prefix);
46 ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
47 ralloc_asprintf_append(&msg, " ");
48 ralloc_vasprintf_append(&msg, fmt, args);
49 }
50
51 if (program->debug.func)
52 program->debug.func(program->debug.private_data, level, msg);
53
54 fprintf(program->debug.output, "%s\n", msg);
55
56 ralloc_free(msg);
57 }
58
59 void
_aco_perfwarn(Program * program,const char * file,unsigned line,const char * fmt,...)60 _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...)
61 {
62 va_list args;
63
64 va_start(args, fmt);
65 aco_log(program, ACO_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args);
66 va_end(args);
67 }
68
69 void
_aco_err(Program * program,const char * file,unsigned line,const char * fmt,...)70 _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)
71 {
72 va_list args;
73
74 va_start(args, fmt);
75 aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);
76 va_end(args);
77 }
78
79 bool
validate_ir(Program * program)80 validate_ir(Program* program)
81 {
82 bool is_valid = true;
83 auto check = [&program, &is_valid](bool success, const char* msg,
84 aco::Instruction* instr) -> void
85 {
86 if (!success) {
87 char* out;
88 size_t outsize;
89 struct u_memstream mem;
90 u_memstream_open(&mem, &out, &outsize);
91 FILE* const memf = u_memstream_get(&mem);
92
93 fprintf(memf, "%s: ", msg);
94 aco_print_instr(instr, memf);
95 u_memstream_close(&mem);
96
97 aco_err(program, "%s", out);
98 free(out);
99
100 is_valid = false;
101 }
102 };
103
104 auto check_block = [&program, &is_valid](bool success, const char* msg,
105 aco::Block* block) -> void
106 {
107 if (!success) {
108 aco_err(program, "%s: BB%u", msg, block->index);
109 is_valid = false;
110 }
111 };
112
113 for (Block& block : program->blocks) {
114 for (aco_ptr<Instruction>& instr : block.instructions) {
115
116 /* check base format */
117 Format base_format = instr->format;
118 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
119 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16);
120 base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8);
121 if ((uint32_t)base_format & (uint32_t)Format::VOP1)
122 base_format = Format::VOP1;
123 else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
124 base_format = Format::VOP2;
125 else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
126 base_format = Format::VOPC;
127 else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) {
128 if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
129 instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
130 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
131 instr->opcode == aco_opcode::v_interp_p2_f16) {
132 /* v_interp_*_fp16 are considered VINTRP by the compiler but
133 * they are emitted as VOP3.
134 */
135 base_format = Format::VOP3;
136 } else {
137 base_format = Format::VINTRP;
138 }
139 }
140 check(base_format == instr_info.format[(int)instr->opcode],
141 "Wrong base format for instruction", instr.get());
142
143 /* check VOP3 modifiers */
144 if (instr->isVOP3() && instr->format != Format::VOP3) {
145 check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
146 base_format == Format::VOPC || base_format == Format::VINTRP,
147 "Format cannot have VOP3/VOP3B applied", instr.get());
148 }
149
150 /* check SDWA */
151 if (instr->isSDWA()) {
152 check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
153 base_format == Format::VOPC,
154 "Format cannot have SDWA applied", instr.get());
155
156 check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get());
157 check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get());
158
159 SDWA_instruction& sdwa = instr->sdwa();
160 check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+",
161 instr.get());
162 if (base_format == Format::VOPC) {
163 check(sdwa.clamp == false || program->gfx_level == GFX8,
164 "SDWA VOPC clamp only supported on GFX8", instr.get());
165 check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
166 program->gfx_level >= GFX9,
167 "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
168 } else {
169 const Definition& def = instr->definitions[0];
170 check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
171 instr.get());
172 check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
173 "SDWA definition selection size must be at most definition size", instr.get());
174 check(
175 sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
176 "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
177 check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
178 instr.get());
179 check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
180 "SDWA dst_sel size must be definition size for subdword definitions",
181 instr.get());
182 check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
183 "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
184 }
185
186 for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
187 const Operand& op = instr->operands[i];
188 check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
189 check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
190 "SDWA operand selection size must be at most operand size", instr.get());
191 check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
192 "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
193 check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
194 instr.get());
195 }
196 if (instr->operands.size() >= 3) {
197 check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
198 "3rd operand must be fixed to vcc with SDWA", instr.get());
199 }
200 if (instr->definitions.size() >= 2) {
201 check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
202 "2nd definition must be fixed to vcc with SDWA", instr.get());
203 }
204
205 const bool sdwa_opcodes =
206 instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
207 instr->opcode != aco_opcode::v_fmamk_f32 &&
208 instr->opcode != aco_opcode::v_fmaak_f32 &&
209 instr->opcode != aco_opcode::v_fmamk_f16 &&
210 instr->opcode != aco_opcode::v_fmaak_f16 &&
211 instr->opcode != aco_opcode::v_madmk_f32 &&
212 instr->opcode != aco_opcode::v_madak_f32 &&
213 instr->opcode != aco_opcode::v_madmk_f16 &&
214 instr->opcode != aco_opcode::v_madak_f16 &&
215 instr->opcode != aco_opcode::v_readfirstlane_b32 &&
216 instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
217
218 const bool feature_mac =
219 program->gfx_level == GFX8 &&
220 (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
221
222 check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
223 }
224
225 /* check opsel */
226 if (instr->isVOP3()) {
227 VOP3_instruction& vop3 = instr->vop3();
228 check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
229 instr.get());
230
231 for (unsigned i = 0; i < 3; i++) {
232 if (i >= instr->operands.size() ||
233 (instr->operands[i].hasRegClass() &&
234 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
235 check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
236 }
237 if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
238 check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
239 instr.get());
240 } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
241 instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
242 instr->opcode == aco_opcode::v_fma_mix_f32) {
243 check(instr->definitions[0].regClass() ==
244 (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
245 "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
246 } else if (instr->isVOP3P()) {
247 VOP3P_instruction& vop3p = instr->vop3p();
248 for (unsigned i = 0; i < instr->operands.size(); i++) {
249 if (instr->operands[i].hasRegClass() &&
250 instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
251 check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0,
252 "Unexpected opsel for subdword operand", instr.get());
253 }
254 check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
255 instr.get());
256 }
257
258 /* check for undefs */
259 for (unsigned i = 0; i < instr->operands.size(); i++) {
260 if (instr->operands[i].isUndefined()) {
261 bool flat = instr->isFlatLike();
262 bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
263 instr->opcode == aco_opcode::p_create_vector ||
264 instr->opcode == aco_opcode::p_jump_to_epilog ||
265 (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
266 ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
267 (instr->isScratch() && i == 0);
268 check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
269 } else {
270 check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
271 instr->operands[i].isConstant(),
272 "Uninitialized Operand", instr.get());
273 }
274 }
275
276 /* check subdword definitions */
277 for (unsigned i = 0; i < instr->definitions.size(); i++) {
278 if (instr->definitions[i].regClass().is_subdword())
279 check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(),
280 "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes",
281 instr.get());
282 }
283
284 if (instr->isSALU() || instr->isVALU()) {
285 /* check literals */
286 Operand literal(s1);
287 for (unsigned i = 0; i < instr->operands.size(); i++) {
288 Operand op = instr->operands[i];
289 if (!op.isLiteral())
290 continue;
291
292 check(!instr->isDPP() && !instr->isSDWA() &&
293 (!instr->isVOP3() || program->gfx_level >= GFX10) &&
294 (!instr->isVOP3P() || program->gfx_level >= GFX10),
295 "Literal applied on wrong instruction format", instr.get());
296
297 check(literal.isUndefined() || (literal.size() == op.size() &&
298 literal.constantValue() == op.constantValue()),
299 "Only 1 Literal allowed", instr.get());
300 literal = op;
301 check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
302 "Wrong source position for Literal argument", instr.get());
303 }
304
305 /* check num sgprs for VALU */
306 if (instr->isVALU()) {
307 bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
308 instr->opcode == aco_opcode::v_lshrrev_b64 ||
309 instr->opcode == aco_opcode::v_ashrrev_i64;
310 unsigned const_bus_limit = 1;
311 if (program->gfx_level >= GFX10 && !is_shift64)
312 const_bus_limit = 2;
313
314 uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
315 if (instr->isSDWA())
316 scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4;
317 else if (instr->isDPP())
318 scalar_mask = 0x4;
319
320 if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
321 instr->opcode == aco_opcode::v_readlane_b32 ||
322 instr->opcode == aco_opcode::v_readlane_b32_e64) {
323 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
324 "Wrong Definition type for VALU instruction", instr.get());
325 } else {
326 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
327 "Wrong Definition type for VALU instruction", instr.get());
328 }
329
330 unsigned num_sgprs = 0;
331 unsigned sgpr[] = {0, 0};
332 for (unsigned i = 0; i < instr->operands.size(); i++) {
333 Operand op = instr->operands[i];
334 if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
335 instr->opcode == aco_opcode::v_readlane_b32 ||
336 instr->opcode == aco_opcode::v_readlane_b32_e64) {
337 check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
338 op.isConstant(),
339 "Must be a SGPR or a constant", instr.get());
340 check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
341 op.bytes() <= 4),
342 "Wrong Operand type for VALU instruction", instr.get());
343 continue;
344 }
345 if (instr->opcode == aco_opcode::v_permlane16_b32 ||
346 instr->opcode == aco_opcode::v_permlanex16_b32) {
347 check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr),
348 "Operand 0 of v_permlane must be VGPR", instr.get());
349 check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
350 op.isConstant(),
351 "Lane select operands of v_permlane must be SGPR or constant",
352 instr.get());
353 }
354
355 if (instr->opcode == aco_opcode::v_writelane_b32 ||
356 instr->opcode == aco_opcode::v_writelane_b32_e64) {
357 check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
358 op.bytes() <= 4),
359 "Wrong Operand type for VALU instruction", instr.get());
360 check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
361 op.isConstant(),
362 "Must be a SGPR or a constant", instr.get());
363 continue;
364 }
365 if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
366 check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",
367 instr.get());
368
369 if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
370 if (num_sgprs < 2)
371 sgpr[num_sgprs++] = op.tempId();
372 }
373 }
374
375 if (op.isConstant() && !op.isLiteral())
376 check(scalar_mask & (1 << i), "Wrong source position for constant argument",
377 instr.get());
378 }
379 check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,
380 "Too many SGPRs/literals", instr.get());
381 }
382
383 if (instr->isSOP1() || instr->isSOP2()) {
384 if (!instr->definitions.empty())
385 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
386 "Wrong Definition type for SALU instruction", instr.get());
387 for (const Operand& op : instr->operands) {
388 check(op.isConstant() || op.regClass().type() <= RegType::sgpr,
389 "Wrong Operand type for SALU instruction", instr.get());
390 }
391 }
392 }
393
394 switch (instr->format) {
395 case Format::PSEUDO: {
396 if (instr->opcode == aco_opcode::p_create_vector) {
397 unsigned size = 0;
398 for (const Operand& op : instr->operands) {
399 check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
400 size += op.bytes();
401 }
402 check(size == instr->definitions[0].bytes(),
403 "Definition size does not match operand sizes", instr.get());
404 if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
405 for (const Operand& op : instr->operands) {
406 check(op.isConstant() || op.regClass().type() == RegType::sgpr,
407 "Wrong Operand type for scalar vector", instr.get());
408 }
409 }
410 } else if (instr->opcode == aco_opcode::p_extract_vector) {
411 check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),
412 "Wrong Operand types", instr.get());
413 check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
414 instr->operands[0].bytes(),
415 "Index out of range", instr.get());
416 check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
417 instr->operands[0].regClass().type() == RegType::sgpr,
418 "Cannot extract SGPR value from VGPR vector", instr.get());
419 check(program->gfx_level >= GFX9 ||
420 !instr->definitions[0].regClass().is_subdword() ||
421 instr->operands[0].regClass().type() == RegType::vgpr,
422 "Cannot extract subdword from SGPR before GFX9+", instr.get());
423 } else if (instr->opcode == aco_opcode::p_split_vector) {
424 check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());
425 unsigned size = 0;
426 for (const Definition& def : instr->definitions) {
427 size += def.bytes();
428 }
429 check(size == instr->operands[0].bytes(),
430 "Operand size does not match definition sizes", instr.get());
431 if (instr->operands[0].getTemp().type() == RegType::vgpr) {
432 for (const Definition& def : instr->definitions)
433 check(def.regClass().type() == RegType::vgpr,
434 "Wrong Definition type for VGPR split_vector", instr.get());
435 } else {
436 for (const Definition& def : instr->definitions)
437 check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
438 "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
439 }
440 } else if (instr->opcode == aco_opcode::p_parallelcopy) {
441 check(instr->definitions.size() == instr->operands.size(),
442 "Number of Operands does not match number of Definitions", instr.get());
443 for (unsigned i = 0; i < instr->operands.size(); i++) {
444 check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
445 "Operand and Definition size must match", instr.get());
446 if (instr->operands[i].isTemp()) {
447 check((instr->definitions[i].getTemp().type() ==
448 instr->operands[i].regClass().type()) ||
449 (instr->definitions[i].getTemp().type() == RegType::vgpr &&
450 instr->operands[i].regClass().type() == RegType::sgpr),
451 "Operand and Definition types do not match", instr.get());
452 check(instr->definitions[i].regClass().is_linear_vgpr() ==
453 instr->operands[i].regClass().is_linear_vgpr(),
454 "Operand and Definition types do not match", instr.get());
455 } else {
456 check(!instr->definitions[i].regClass().is_linear_vgpr(),
457 "Can only copy linear VGPRs into linear VGPRs, not constant/undef",
458 instr.get());
459 }
460 }
461 } else if (instr->opcode == aco_opcode::p_phi) {
462 check(instr->operands.size() == block.logical_preds.size(),
463 "Number of Operands does not match number of predecessors", instr.get());
464 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
465 "Logical Phi Definition must be vgpr", instr.get());
466 for (const Operand& op : instr->operands)
467 check(instr->definitions[0].size() == op.size(),
468 "Operand sizes must match Definition size", instr.get());
469 } else if (instr->opcode == aco_opcode::p_linear_phi) {
470 for (const Operand& op : instr->operands) {
471 check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",
472 instr.get());
473 check(instr->definitions[0].size() == op.size(),
474 "Operand sizes must match Definition size", instr.get());
475 }
476 check(instr->operands.size() == block.linear_preds.size(),
477 "Number of Operands does not match number of predecessors", instr.get());
478 } else if (instr->opcode == aco_opcode::p_extract ||
479 instr->opcode == aco_opcode::p_insert) {
480 check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());
481 check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
482 if (instr->opcode == aco_opcode::p_extract)
483 check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
484 instr.get());
485
486 check(instr->definitions[0].getTemp().type() != RegType::sgpr ||
487 instr->operands[0].getTemp().type() == RegType::sgpr,
488 "Can't extract/insert VGPR to SGPR", instr.get());
489
490 if (instr->opcode == aco_opcode::p_insert)
491 check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
492 "Sizes of p_insert data operand and definition must match", instr.get());
493
494 if (instr->definitions[0].getTemp().type() == RegType::sgpr)
495 check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
496 instr->definitions[1].physReg() == scc,
497 "SGPR extract/insert needs an SCC definition", instr.get());
498
499 unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u;
500 unsigned op_bits = instr->operands[2].constantValue();
501
502 if (instr->opcode == aco_opcode::p_insert) {
503 check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
504 check(op_bits < data_bits, "Size must be smaller than source", instr.get());
505 } else if (instr->opcode == aco_opcode::p_extract) {
506 check(op_bits == 8 || op_bits == 16 || op_bits == 32,
507 "Size must be 8 or 16 or 32", instr.get());
508 check(data_bits >= op_bits, "Can't extract more bits than what the data has.",
509 instr.get());
510 }
511
512 unsigned comp = data_bits / MAX2(op_bits, 1);
513 check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
514 instr.get());
515 } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
516 check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
517 instr.get());
518 check(instr->operands.size() > 0 &&
519 instr->operands[0].getTemp().type() == RegType::sgpr &&
520 instr->operands[0].getTemp().size() == 2,
521 "First operand of p_jump_to_epilog must be a SGPR", instr.get());
522 for (unsigned i = 1; i < instr->operands.size(); i++) {
523 check(instr->operands[i].getTemp().type() == RegType::vgpr ||
524 instr->operands[i].isUndefined(),
525 "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
526 }
527 }
528 break;
529 }
530 case Format::PSEUDO_REDUCTION: {
531 for (const Operand& op : instr->operands)
532 check(op.regClass().type() == RegType::vgpr,
533 "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",
534 instr.get());
535
536 if (instr->opcode == aco_opcode::p_reduce &&
537 instr->reduction().cluster_size == program->wave_size)
538 check(instr->definitions[0].regClass().type() == RegType::sgpr ||
539 program->wave_size == 32,
540 "The result of unclustered reductions must go into an SGPR.", instr.get());
541 else
542 check(instr->definitions[0].regClass().type() == RegType::vgpr,
543 "The result of scans and clustered reductions must go into a VGPR.",
544 instr.get());
545
546 break;
547 }
548 case Format::SMEM: {
549 if (instr->operands.size() >= 1)
550 check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||
551 (instr->operands[0].isTemp() &&
552 instr->operands[0].regClass().type() == RegType::sgpr),
553 "SMEM operands must be sgpr", instr.get());
554 if (instr->operands.size() >= 2)
555 check(instr->operands[1].isConstant() ||
556 (instr->operands[1].isTemp() &&
557 instr->operands[1].regClass().type() == RegType::sgpr),
558 "SMEM offset must be constant or sgpr", instr.get());
559 if (!instr->definitions.empty())
560 check(instr->definitions[0].getTemp().type() == RegType::sgpr,
561 "SMEM result must be sgpr", instr.get());
562 break;
563 }
564 case Format::MTBUF:
565 case Format::MUBUF: {
566 check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
567 instr.get());
568 check(instr->operands[1].hasRegClass() &&
569 instr->operands[1].regClass().type() == RegType::vgpr,
570 "VADDR must be in vgpr for VMEM instructions", instr.get());
571 check(
572 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,
573 "VMEM resource constant must be sgpr", instr.get());
574 check(instr->operands.size() < 4 ||
575 (instr->operands[3].isTemp() &&
576 instr->operands[3].regClass().type() == RegType::vgpr),
577 "VMEM write data must be vgpr", instr.get());
578
579 const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
580 instr->opcode == aco_opcode::buffer_load_ubyte ||
581 instr->opcode == aco_opcode::buffer_load_sbyte ||
582 instr->opcode == aco_opcode::buffer_load_ushort ||
583 instr->opcode == aco_opcode::buffer_load_sshort ||
584 instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
585 instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
586 instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
587 instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
588 instr->opcode == aco_opcode::buffer_load_short_d16 ||
589 instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
590 instr->opcode == aco_opcode::buffer_load_format_d16_x ||
591 instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
592 instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
593 instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
594 instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
595 instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
596 instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
597 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
598 instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
599 if (instr->definitions.size()) {
600 check(instr->definitions[0].isTemp() &&
601 instr->definitions[0].regClass().type() == RegType::vgpr,
602 "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
603 check(d16 || !instr->definitions[0].regClass().is_subdword(),
604 "Only D16 opcodes can load subdword values.", instr.get());
605 check(instr->definitions[0].bytes() <= 8 || !d16,
606 "D16 opcodes can only load up to 8 bytes.", instr.get());
607 }
608 break;
609 }
610 case Format::MIMG: {
611 check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
612 instr.get());
613 check(instr->operands[0].hasRegClass() &&
614 (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
615 "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
616 if (instr->operands[1].hasRegClass())
617 check(instr->operands[1].regClass() == s4,
618 "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
619 if (!instr->operands[2].isUndefined()) {
620 bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
621 instr->opcode == aco_opcode::image_atomic_fcmpswap;
622 check(instr->definitions.empty() ||
623 (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
624 is_cmpswap),
625 "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "
626 "TFE/LWE loads",
627 instr.get());
628 }
629 check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
630 "NSA is only supported on GFX10+", instr.get());
631 for (unsigned i = 3; i < instr->operands.size(); i++) {
632 if (instr->operands.size() == 4) {
633 check(instr->operands[i].hasRegClass() &&
634 instr->operands[i].regClass().type() == RegType::vgpr,
635 "MIMG operands[3] (VADDR) must be VGPR", instr.get());
636 } else {
637 check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",
638 instr.get());
639 }
640 }
641
642 if (instr->definitions.size()) {
643 check(instr->definitions[0].isTemp() &&
644 instr->definitions[0].regClass().type() == RegType::vgpr,
645 "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
646 check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(),
647 "Only D16 MIMG instructions can load subdword values.", instr.get());
648 check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16,
649 "D16 MIMG instructions can only load up to 8 bytes.", instr.get());
650 }
651 break;
652 }
653 case Format::DS: {
654 for (const Operand& op : instr->operands) {
655 check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,
656 "Only VGPRs are valid DS instruction operands", instr.get());
657 }
658 if (!instr->definitions.empty())
659 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
660 "DS instruction must return VGPR", instr.get());
661 break;
662 }
663 case Format::EXP: {
664 for (unsigned i = 0; i < 4; i++)
665 check(instr->operands[i].hasRegClass() &&
666 instr->operands[i].regClass().type() == RegType::vgpr,
667 "Only VGPRs are valid Export arguments", instr.get());
668 break;
669 }
670 case Format::FLAT:
671 check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
672 instr.get());
673 FALLTHROUGH;
674 case Format::GLOBAL:
675 check(
676 instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,
677 "FLAT/GLOBAL address must be vgpr", instr.get());
678 FALLTHROUGH;
679 case Format::SCRATCH: {
680 check(instr->operands[0].hasRegClass() &&
681 instr->operands[0].regClass().type() == RegType::vgpr,
682 "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get());
683 check(instr->operands[1].hasRegClass() &&
684 instr->operands[1].regClass().type() == RegType::sgpr,
685 "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
686 if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3)
687 check(instr->operands[0].isTemp() || instr->operands[1].isTemp(),
688 "SCRATCH must have either SADDR or ADDR operand", instr.get());
689 if (!instr->definitions.empty())
690 check(instr->definitions[0].getTemp().type() == RegType::vgpr,
691 "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
692 else
693 check(instr->operands[2].regClass().type() == RegType::vgpr,
694 "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
695 break;
696 }
697 default: break;
698 }
699 }
700 }
701
702 /* validate CFG */
703 for (unsigned i = 0; i < program->blocks.size(); i++) {
704 Block& block = program->blocks[i];
705 check_block(block.index == i, "block.index must match actual index", &block);
706
707 /* predecessors/successors should be sorted */
708 for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
709 check_block(block.linear_preds[j] < block.linear_preds[j + 1],
710 "linear predecessors must be sorted", &block);
711 for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
712 check_block(block.logical_preds[j] < block.logical_preds[j + 1],
713 "logical predecessors must be sorted", &block);
714 for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
715 check_block(block.linear_succs[j] < block.linear_succs[j + 1],
716 "linear successors must be sorted", &block);
717 for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
718 check_block(block.logical_succs[j] < block.logical_succs[j + 1],
719 "logical successors must be sorted", &block);
720
721 /* critical edges are not allowed */
722 if (block.linear_preds.size() > 1) {
723 for (unsigned pred : block.linear_preds)
724 check_block(program->blocks[pred].linear_succs.size() == 1,
725 "linear critical edges are not allowed", &program->blocks[pred]);
726 for (unsigned pred : block.logical_preds)
727 check_block(program->blocks[pred].logical_succs.size() == 1,
728 "logical critical edges are not allowed", &program->blocks[pred]);
729 }
730 }
731
732 return is_valid;
733 }
734
735 /* RA validation */
736 namespace {
737
738 struct Location {
Locationaco::__anona98ac8fc0311::Location739 Location() : block(NULL), instr(NULL) {}
740
741 Block* block;
742 Instruction* instr; // NULL if it's the block's live-in
743 };
744
745 struct Assignment {
746 Location defloc;
747 Location firstloc;
748 PhysReg reg;
749 bool valid;
750 };
751
752 bool
ra_fail(Program * program,Location loc,Location loc2,const char * fmt,...)753 ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
754 {
755 va_list args;
756 va_start(args, fmt);
757 char msg[1024];
758 vsprintf(msg, fmt, args);
759 va_end(args);
760
761 char* out;
762 size_t outsize;
763 struct u_memstream mem;
764 u_memstream_open(&mem, &out, &outsize);
765 FILE* const memf = u_memstream_get(&mem);
766
767 fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);
768 if (loc.instr) {
769 aco_print_instr(loc.instr, memf);
770 fprintf(memf, "\n%s", msg);
771 } else {
772 fprintf(memf, "%s", msg);
773 }
774 if (loc2.block) {
775 fprintf(memf, " in BB%d:\n", loc2.block->index);
776 aco_print_instr(loc2.instr, memf);
777 }
778 fprintf(memf, "\n\n");
779 u_memstream_close(&mem);
780
781 aco_err(program, "%s", out);
782 free(out);
783
784 return true;
785 }
786
787 bool
validate_subdword_operand(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr,unsigned index)788 validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
789 unsigned index)
790 {
791 Operand op = instr->operands[index];
792 unsigned byte = op.physReg().byte();
793
794 if (instr->opcode == aco_opcode::p_as_uniform)
795 return byte == 0;
796 if (instr->isPseudo() && gfx_level >= GFX8)
797 return true;
798 if (instr->isSDWA())
799 return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
800 byte % instr->sdwa().sel[index].size() == 0;
801 if (instr->isVOP3P()) {
802 bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
803 instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
804 instr->opcode == aco_opcode::v_fma_mix_f32;
805 return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
806 ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
807 }
808 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
809 return true;
810
811 switch (instr->opcode) {
812 case aco_opcode::v_cvt_f32_ubyte1:
813 if (byte == 1)
814 return true;
815 break;
816 case aco_opcode::v_cvt_f32_ubyte2:
817 if (byte == 2)
818 return true;
819 break;
820 case aco_opcode::v_cvt_f32_ubyte3:
821 if (byte == 3)
822 return true;
823 break;
824 case aco_opcode::ds_write_b8_d16_hi:
825 case aco_opcode::ds_write_b16_d16_hi:
826 if (byte == 2 && index == 1)
827 return true;
828 break;
829 case aco_opcode::buffer_store_byte_d16_hi:
830 case aco_opcode::buffer_store_short_d16_hi:
831 case aco_opcode::buffer_store_format_d16_hi_x:
832 if (byte == 2 && index == 3)
833 return true;
834 break;
835 case aco_opcode::flat_store_byte_d16_hi:
836 case aco_opcode::flat_store_short_d16_hi:
837 case aco_opcode::scratch_store_byte_d16_hi:
838 case aco_opcode::scratch_store_short_d16_hi:
839 case aco_opcode::global_store_byte_d16_hi:
840 case aco_opcode::global_store_short_d16_hi:
841 if (byte == 2 && index == 2)
842 return true;
843 break;
844 default: break;
845 }
846
847 return byte == 0;
848 }
849
850 bool
validate_subdword_definition(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr)851 validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
852 {
853 Definition def = instr->definitions[0];
854 unsigned byte = def.physReg().byte();
855
856 if (instr->isPseudo() && gfx_level >= GFX8)
857 return true;
858 if (instr->isSDWA())
859 return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
860 byte % instr->sdwa().dst_sel.size() == 0;
861 if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
862 return true;
863
864 switch (instr->opcode) {
865 case aco_opcode::v_fma_mixhi_f16:
866 case aco_opcode::buffer_load_ubyte_d16_hi:
867 case aco_opcode::buffer_load_sbyte_d16_hi:
868 case aco_opcode::buffer_load_short_d16_hi:
869 case aco_opcode::buffer_load_format_d16_hi_x:
870 case aco_opcode::flat_load_ubyte_d16_hi:
871 case aco_opcode::flat_load_short_d16_hi:
872 case aco_opcode::scratch_load_ubyte_d16_hi:
873 case aco_opcode::scratch_load_short_d16_hi:
874 case aco_opcode::global_load_ubyte_d16_hi:
875 case aco_opcode::global_load_short_d16_hi:
876 case aco_opcode::ds_read_u8_d16_hi:
877 case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
878 default: break;
879 }
880
881 return byte == 0;
882 }
883
884 unsigned
get_subdword_bytes_written(Program * program,const aco_ptr<Instruction> & instr,unsigned index)885 get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
886 {
887 amd_gfx_level gfx_level = program->gfx_level;
888 Definition def = instr->definitions[index];
889
890 if (instr->isPseudo())
891 return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
892 if (instr->isVALU()) {
893 assert(def.bytes() <= 2);
894 if (instr->isSDWA())
895 return instr->sdwa().dst_sel.size();
896
897 if (instr_is_16bit(gfx_level, instr->opcode))
898 return 2;
899
900 return 4;
901 }
902
903 if (instr->isMIMG()) {
904 assert(instr->mimg().d16);
905 return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes();
906 }
907
908 switch (instr->opcode) {
909 case aco_opcode::buffer_load_ubyte_d16:
910 case aco_opcode::buffer_load_sbyte_d16:
911 case aco_opcode::buffer_load_short_d16:
912 case aco_opcode::buffer_load_format_d16_x:
913 case aco_opcode::tbuffer_load_format_d16_x:
914 case aco_opcode::flat_load_ubyte_d16:
915 case aco_opcode::flat_load_short_d16:
916 case aco_opcode::scratch_load_ubyte_d16:
917 case aco_opcode::scratch_load_short_d16:
918 case aco_opcode::global_load_ubyte_d16:
919 case aco_opcode::global_load_short_d16:
920 case aco_opcode::ds_read_u8_d16:
921 case aco_opcode::ds_read_u16_d16:
922 case aco_opcode::buffer_load_ubyte_d16_hi:
923 case aco_opcode::buffer_load_sbyte_d16_hi:
924 case aco_opcode::buffer_load_short_d16_hi:
925 case aco_opcode::buffer_load_format_d16_hi_x:
926 case aco_opcode::flat_load_ubyte_d16_hi:
927 case aco_opcode::flat_load_short_d16_hi:
928 case aco_opcode::scratch_load_ubyte_d16_hi:
929 case aco_opcode::scratch_load_short_d16_hi:
930 case aco_opcode::global_load_ubyte_d16_hi:
931 case aco_opcode::global_load_short_d16_hi:
932 case aco_opcode::ds_read_u8_d16_hi:
933 case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
934 case aco_opcode::buffer_load_format_d16_xyz:
935 case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;
936 default: return def.size() * 4;
937 }
938 }
939
940 bool
validate_instr_defs(Program * program,std::array<unsigned,2048> & regs,const std::vector<Assignment> & assignments,const Location & loc,aco_ptr<Instruction> & instr)941 validate_instr_defs(Program* program, std::array<unsigned, 2048>& regs,
942 const std::vector<Assignment>& assignments, const Location& loc,
943 aco_ptr<Instruction>& instr)
944 {
945 bool err = false;
946
947 for (unsigned i = 0; i < instr->definitions.size(); i++) {
948 Definition& def = instr->definitions[i];
949 if (!def.isTemp())
950 continue;
951 Temp tmp = def.getTemp();
952 PhysReg reg = assignments[tmp.id()].reg;
953 for (unsigned j = 0; j < tmp.bytes(); j++) {
954 if (regs[reg.reg_b + j])
955 err |=
956 ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc,
957 "Assignment of element %d of %%%d already taken by %%%d from instruction", i,
958 tmp.id(), regs[reg.reg_b + j]);
959 regs[reg.reg_b + j] = tmp.id();
960 }
961 if (def.regClass().is_subdword() && def.bytes() < 4) {
962 unsigned written = get_subdword_bytes_written(program, instr, i);
963 /* If written=4, the instruction still might write the upper half. In that case, it's
964 * the lower half that isn't preserved */
965 for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
966 unsigned written_reg = reg.reg() * 4u + j;
967 if (regs[written_reg] && regs[written_reg] != def.tempId())
968 err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc,
969 "Assignment of element %d of %%%d overwrites the full register "
970 "taken by %%%d from instruction",
971 i, tmp.id(), regs[written_reg]);
972 }
973 }
974 }
975
976 for (const Definition& def : instr->definitions) {
977 if (!def.isTemp())
978 continue;
979 if (def.isKill()) {
980 for (unsigned j = 0; j < def.getTemp().bytes(); j++)
981 regs[def.physReg().reg_b + j] = 0;
982 }
983 }
984
985 return err;
986 }
987
988 } /* end namespace */
989
990 bool
validate_ra(Program * program)991 validate_ra(Program* program)
992 {
993 if (!(debug_flags & DEBUG_VALIDATE_RA))
994 return false;
995
996 bool err = false;
997 aco::live live_vars = aco::live_var_analysis(program);
998 std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
999 uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
1000
1001 std::vector<Assignment> assignments(program->peekAllocationId());
1002 for (Block& block : program->blocks) {
1003 Location loc;
1004 loc.block = █
1005 for (aco_ptr<Instruction>& instr : block.instructions) {
1006 if (instr->opcode == aco_opcode::p_phi) {
1007 for (unsigned i = 0; i < instr->operands.size(); i++) {
1008 if (instr->operands[i].isTemp() &&
1009 instr->operands[i].getTemp().type() == RegType::sgpr &&
1010 instr->operands[i].isFirstKill())
1011 phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
1012 }
1013 }
1014
1015 loc.instr = instr.get();
1016 for (unsigned i = 0; i < instr->operands.size(); i++) {
1017 Operand& op = instr->operands[i];
1018 if (!op.isTemp())
1019 continue;
1020 if (!op.isFixed())
1021 err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);
1022 if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg())
1023 err |=
1024 ra_fail(program, loc, assignments[op.tempId()].firstloc,
1025 "Operand %d has an inconsistent register assignment with instruction", i);
1026 if ((op.getTemp().type() == RegType::vgpr &&
1027 op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
1028 (op.getTemp().type() == RegType::sgpr &&
1029 op.physReg() + op.size() > program->config->num_sgprs &&
1030 op.physReg() < sgpr_limit))
1031 err |= ra_fail(program, loc, assignments[op.tempId()].firstloc,
1032 "Operand %d has an out-of-bounds register assignment", i);
1033 if (op.physReg() == vcc && !program->needs_vcc)
1034 err |= ra_fail(program, loc, Location(),
1035 "Operand %d fixed to vcc but needs_vcc=false", i);
1036 if (op.regClass().is_subdword() &&
1037 !validate_subdword_operand(program->gfx_level, instr, i))
1038 err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
1039 if (!assignments[op.tempId()].firstloc.block)
1040 assignments[op.tempId()].firstloc = loc;
1041 if (!assignments[op.tempId()].defloc.block) {
1042 assignments[op.tempId()].reg = op.physReg();
1043 assignments[op.tempId()].valid = true;
1044 }
1045 }
1046
1047 for (unsigned i = 0; i < instr->definitions.size(); i++) {
1048 Definition& def = instr->definitions[i];
1049 if (!def.isTemp())
1050 continue;
1051 if (!def.isFixed())
1052 err |=
1053 ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);
1054 if (assignments[def.tempId()].defloc.block)
1055 err |= ra_fail(program, loc, assignments[def.tempId()].defloc,
1056 "Temporary %%%d also defined by instruction", def.tempId());
1057 if ((def.getTemp().type() == RegType::vgpr &&
1058 def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
1059 (def.getTemp().type() == RegType::sgpr &&
1060 def.physReg() + def.size() > program->config->num_sgprs &&
1061 def.physReg() < sgpr_limit))
1062 err |= ra_fail(program, loc, assignments[def.tempId()].firstloc,
1063 "Definition %d has an out-of-bounds register assignment", i);
1064 if (def.physReg() == vcc && !program->needs_vcc)
1065 err |= ra_fail(program, loc, Location(),
1066 "Definition %d fixed to vcc but needs_vcc=false", i);
1067 if (def.regClass().is_subdword() &&
1068 !validate_subdword_definition(program->gfx_level, instr))
1069 err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
1070 if (!assignments[def.tempId()].firstloc.block)
1071 assignments[def.tempId()].firstloc = loc;
1072 assignments[def.tempId()].defloc = loc;
1073 assignments[def.tempId()].reg = def.physReg();
1074 assignments[def.tempId()].valid = true;
1075 }
1076 }
1077 }
1078
1079 for (Block& block : program->blocks) {
1080 Location loc;
1081 loc.block = █
1082
1083 std::array<unsigned, 2048> regs; /* register file in bytes */
1084 regs.fill(0);
1085
1086 IDSet live = live_vars.live_out[block.index];
1087 /* remove killed p_phi sgpr operands */
1088 for (Temp tmp : phi_sgpr_ops[block.index])
1089 live.erase(tmp.id());
1090
1091 /* check live out */
1092 for (unsigned id : live) {
1093 Temp tmp(id, program->temp_rc[id]);
1094 PhysReg reg = assignments[id].reg;
1095 for (unsigned i = 0; i < tmp.bytes(); i++) {
1096 if (regs[reg.reg_b + i]) {
1097 err |= ra_fail(program, loc, Location(),
1098 "Assignment of element %d of %%%d already taken by %%%d in live-out",
1099 i, id, regs[reg.reg_b + i]);
1100 }
1101 regs[reg.reg_b + i] = id;
1102 }
1103 }
1104 regs.fill(0);
1105
1106 for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
1107 aco_ptr<Instruction>& instr = *it;
1108
1109 /* check killed p_phi sgpr operands */
1110 if (instr->opcode == aco_opcode::p_logical_end) {
1111 for (Temp tmp : phi_sgpr_ops[block.index]) {
1112 PhysReg reg = assignments[tmp.id()].reg;
1113 for (unsigned i = 0; i < tmp.bytes(); i++) {
1114 if (regs[reg.reg_b + i])
1115 err |= ra_fail(
1116 program, loc, Location(),
1117 "Assignment of element %d of %%%d already taken by %%%d in live-out", i,
1118 tmp.id(), regs[reg.reg_b + i]);
1119 }
1120 live.insert(tmp.id());
1121 }
1122 }
1123
1124 for (const Definition& def : instr->definitions) {
1125 if (!def.isTemp())
1126 continue;
1127 live.erase(def.tempId());
1128 }
1129
1130 /* don't count phi operands as live-in, since they are actually
1131 * killed when they are copied at the predecessor */
1132 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1133 for (const Operand& op : instr->operands) {
1134 if (!op.isTemp())
1135 continue;
1136 live.insert(op.tempId());
1137 }
1138 }
1139 }
1140
1141 for (unsigned id : live) {
1142 Temp tmp(id, program->temp_rc[id]);
1143 PhysReg reg = assignments[id].reg;
1144 for (unsigned i = 0; i < tmp.bytes(); i++)
1145 regs[reg.reg_b + i] = id;
1146 }
1147
1148 for (aco_ptr<Instruction>& instr : block.instructions) {
1149 loc.instr = instr.get();
1150
1151 /* remove killed p_phi operands from regs */
1152 if (instr->opcode == aco_opcode::p_logical_end) {
1153 for (Temp tmp : phi_sgpr_ops[block.index]) {
1154 PhysReg reg = assignments[tmp.id()].reg;
1155 for (unsigned i = 0; i < tmp.bytes(); i++)
1156 regs[reg.reg_b + i] = 0;
1157 }
1158 }
1159
1160 if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1161 for (const Operand& op : instr->operands) {
1162 if (!op.isTemp())
1163 continue;
1164 if (op.isFirstKillBeforeDef()) {
1165 for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1166 regs[op.physReg().reg_b + j] = 0;
1167 }
1168 }
1169 }
1170
1171 if (!instr->isBranch() || block.linear_succs.size() != 1)
1172 err |= validate_instr_defs(program, regs, assignments, loc, instr);
1173
1174 if (!is_phi(instr)) {
1175 for (const Operand& op : instr->operands) {
1176 if (!op.isTemp())
1177 continue;
1178 if (op.isLateKill() && op.isFirstKill()) {
1179 for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1180 regs[op.physReg().reg_b + j] = 0;
1181 }
1182 }
1183 } else if (block.linear_preds.size() != 1 ||
1184 program->blocks[block.linear_preds[0]].linear_succs.size() == 1) {
1185 for (unsigned pred : block.linear_preds) {
1186 aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
1187 assert(br->isBranch());
1188 err |= validate_instr_defs(program, regs, assignments, loc, br);
1189 }
1190 }
1191 }
1192 }
1193
1194 return err;
1195 }
1196 } // namespace aco
1197