• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "aco_ir.h"
26 
27 #include "util/memstream.h"
28 
29 #include <array>
30 #include <map>
31 #include <set>
32 #include <vector>
33 
34 namespace aco {
35 
36 static void
aco_log(Program * program,enum aco_compiler_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)37 aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix,
38         const char* file, unsigned line, const char* fmt, va_list args)
39 {
40    char* msg;
41 
42    if (program->debug.shorten_messages) {
43       msg = ralloc_vasprintf(NULL, fmt, args);
44    } else {
45       msg = ralloc_strdup(NULL, prefix);
46       ralloc_asprintf_append(&msg, "    In file %s:%u\n", file, line);
47       ralloc_asprintf_append(&msg, "    ");
48       ralloc_vasprintf_append(&msg, fmt, args);
49    }
50 
51    if (program->debug.func)
52       program->debug.func(program->debug.private_data, level, msg);
53 
54    fprintf(program->debug.output, "%s\n", msg);
55 
56    ralloc_free(msg);
57 }
58 
59 void
_aco_perfwarn(Program * program,const char * file,unsigned line,const char * fmt,...)60 _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...)
61 {
62    va_list args;
63 
64    va_start(args, fmt);
65    aco_log(program, ACO_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args);
66    va_end(args);
67 }
68 
69 void
_aco_err(Program * program,const char * file,unsigned line,const char * fmt,...)70 _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)
71 {
72    va_list args;
73 
74    va_start(args, fmt);
75    aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);
76    va_end(args);
77 }
78 
79 bool
validate_ir(Program * program)80 validate_ir(Program* program)
81 {
82    bool is_valid = true;
83    auto check = [&program, &is_valid](bool success, const char* msg,
84                                       aco::Instruction* instr) -> void
85    {
86       if (!success) {
87          char* out;
88          size_t outsize;
89          struct u_memstream mem;
90          u_memstream_open(&mem, &out, &outsize);
91          FILE* const memf = u_memstream_get(&mem);
92 
93          fprintf(memf, "%s: ", msg);
94          aco_print_instr(instr, memf);
95          u_memstream_close(&mem);
96 
97          aco_err(program, "%s", out);
98          free(out);
99 
100          is_valid = false;
101       }
102    };
103 
104    auto check_block = [&program, &is_valid](bool success, const char* msg,
105                                             aco::Block* block) -> void
106    {
107       if (!success) {
108          aco_err(program, "%s: BB%u", msg, block->index);
109          is_valid = false;
110       }
111    };
112 
113    for (Block& block : program->blocks) {
114       for (aco_ptr<Instruction>& instr : block.instructions) {
115 
116          /* check base format */
117          Format base_format = instr->format;
118          base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
119          base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16);
120          base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8);
121          if ((uint32_t)base_format & (uint32_t)Format::VOP1)
122             base_format = Format::VOP1;
123          else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
124             base_format = Format::VOP2;
125          else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
126             base_format = Format::VOPC;
127          else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) {
128             if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
129                 instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
130                 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
131                 instr->opcode == aco_opcode::v_interp_p2_f16) {
132                /* v_interp_*_fp16 are considered VINTRP by the compiler but
133                 * they are emitted as VOP3.
134                 */
135                base_format = Format::VOP3;
136             } else {
137                base_format = Format::VINTRP;
138             }
139          }
140          check(base_format == instr_info.format[(int)instr->opcode],
141                "Wrong base format for instruction", instr.get());
142 
143          /* check VOP3 modifiers */
144          if (instr->isVOP3() && instr->format != Format::VOP3) {
145             check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
146                      base_format == Format::VOPC || base_format == Format::VINTRP,
147                   "Format cannot have VOP3/VOP3B applied", instr.get());
148          }
149 
150          /* check SDWA */
151          if (instr->isSDWA()) {
152             check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
153                      base_format == Format::VOPC,
154                   "Format cannot have SDWA applied", instr.get());
155 
156             check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get());
157             check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get());
158 
159             SDWA_instruction& sdwa = instr->sdwa();
160             check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+",
161                   instr.get());
162             if (base_format == Format::VOPC) {
163                check(sdwa.clamp == false || program->gfx_level == GFX8,
164                      "SDWA VOPC clamp only supported on GFX8", instr.get());
165                check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
166                         program->gfx_level >= GFX9,
167                      "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
168             } else {
169                const Definition& def = instr->definitions[0];
170                check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
171                      instr.get());
172                check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
173                      "SDWA definition selection size must be at most definition size", instr.get());
174                check(
175                   sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
176                   "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
177                check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
178                      instr.get());
179                check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
180                      "SDWA dst_sel size must be definition size for subdword definitions",
181                      instr.get());
182                check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
183                      "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
184             }
185 
186             for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
187                const Operand& op = instr->operands[i];
188                check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
189                check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
190                      "SDWA operand selection size must be at most operand size", instr.get());
191                check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
192                      "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
193                check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
194                      instr.get());
195             }
196             if (instr->operands.size() >= 3) {
197                check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
198                      "3rd operand must be fixed to vcc with SDWA", instr.get());
199             }
200             if (instr->definitions.size() >= 2) {
201                check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
202                      "2nd definition must be fixed to vcc with SDWA", instr.get());
203             }
204 
205             const bool sdwa_opcodes =
206                instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
207                instr->opcode != aco_opcode::v_fmamk_f32 &&
208                instr->opcode != aco_opcode::v_fmaak_f32 &&
209                instr->opcode != aco_opcode::v_fmamk_f16 &&
210                instr->opcode != aco_opcode::v_fmaak_f16 &&
211                instr->opcode != aco_opcode::v_madmk_f32 &&
212                instr->opcode != aco_opcode::v_madak_f32 &&
213                instr->opcode != aco_opcode::v_madmk_f16 &&
214                instr->opcode != aco_opcode::v_madak_f16 &&
215                instr->opcode != aco_opcode::v_readfirstlane_b32 &&
216                instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
217 
218             const bool feature_mac =
219                program->gfx_level == GFX8 &&
220                (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
221 
222             check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
223          }
224 
225          /* check opsel */
226          if (instr->isVOP3()) {
227             VOP3_instruction& vop3 = instr->vop3();
228             check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
229                   instr.get());
230 
231             for (unsigned i = 0; i < 3; i++) {
232                if (i >= instr->operands.size() ||
233                    (instr->operands[i].hasRegClass() &&
234                     instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
235                   check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
236             }
237             if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
238                check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
239                      instr.get());
240          } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
241                     instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
242                     instr->opcode == aco_opcode::v_fma_mix_f32) {
243             check(instr->definitions[0].regClass() ==
244                      (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
245                   "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
246          } else if (instr->isVOP3P()) {
247             VOP3P_instruction& vop3p = instr->vop3p();
248             for (unsigned i = 0; i < instr->operands.size(); i++) {
249                if (instr->operands[i].hasRegClass() &&
250                    instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
251                   check((vop3p.opsel_lo & (1 << i)) == 0 && (vop3p.opsel_hi & (1 << i)) == 0,
252                         "Unexpected opsel for subdword operand", instr.get());
253             }
254             check(instr->definitions[0].regClass() == v1, "VOP3P must have v1 definition",
255                   instr.get());
256          }
257 
258          /* check for undefs */
259          for (unsigned i = 0; i < instr->operands.size(); i++) {
260             if (instr->operands[i].isUndefined()) {
261                bool flat = instr->isFlatLike();
262                bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
263                                    instr->opcode == aco_opcode::p_create_vector ||
264                                    instr->opcode == aco_opcode::p_jump_to_epilog ||
265                                    (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
266                                    ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
267                                    (instr->isScratch() && i == 0);
268                check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
269             } else {
270                check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
271                         instr->operands[i].isConstant(),
272                      "Uninitialized Operand", instr.get());
273             }
274          }
275 
276          /* check subdword definitions */
277          for (unsigned i = 0; i < instr->definitions.size(); i++) {
278             if (instr->definitions[i].regClass().is_subdword())
279                check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(),
280                      "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes",
281                      instr.get());
282          }
283 
284          if (instr->isSALU() || instr->isVALU()) {
285             /* check literals */
286             Operand literal(s1);
287             for (unsigned i = 0; i < instr->operands.size(); i++) {
288                Operand op = instr->operands[i];
289                if (!op.isLiteral())
290                   continue;
291 
292                check(!instr->isDPP() && !instr->isSDWA() &&
293                         (!instr->isVOP3() || program->gfx_level >= GFX10) &&
294                         (!instr->isVOP3P() || program->gfx_level >= GFX10),
295                      "Literal applied on wrong instruction format", instr.get());
296 
297                check(literal.isUndefined() || (literal.size() == op.size() &&
298                                                literal.constantValue() == op.constantValue()),
299                      "Only 1 Literal allowed", instr.get());
300                literal = op;
301                check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
302                      "Wrong source position for Literal argument", instr.get());
303             }
304 
305             /* check num sgprs for VALU */
306             if (instr->isVALU()) {
307                bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
308                                  instr->opcode == aco_opcode::v_lshrrev_b64 ||
309                                  instr->opcode == aco_opcode::v_ashrrev_i64;
310                unsigned const_bus_limit = 1;
311                if (program->gfx_level >= GFX10 && !is_shift64)
312                   const_bus_limit = 2;
313 
314                uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
315                if (instr->isSDWA())
316                   scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4;
317                else if (instr->isDPP())
318                   scalar_mask = 0x4;
319 
320                if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
321                    instr->opcode == aco_opcode::v_readlane_b32 ||
322                    instr->opcode == aco_opcode::v_readlane_b32_e64) {
323                   check(instr->definitions[0].getTemp().type() == RegType::sgpr,
324                         "Wrong Definition type for VALU instruction", instr.get());
325                } else {
326                   check(instr->definitions[0].getTemp().type() == RegType::vgpr,
327                         "Wrong Definition type for VALU instruction", instr.get());
328                }
329 
330                unsigned num_sgprs = 0;
331                unsigned sgpr[] = {0, 0};
332                for (unsigned i = 0; i < instr->operands.size(); i++) {
333                   Operand op = instr->operands[i];
334                   if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
335                       instr->opcode == aco_opcode::v_readlane_b32 ||
336                       instr->opcode == aco_opcode::v_readlane_b32_e64) {
337                      check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
338                               op.isConstant(),
339                            "Must be a SGPR or a constant", instr.get());
340                      check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
341                                       op.bytes() <= 4),
342                            "Wrong Operand type for VALU instruction", instr.get());
343                      continue;
344                   }
345                   if (instr->opcode == aco_opcode::v_permlane16_b32 ||
346                       instr->opcode == aco_opcode::v_permlanex16_b32) {
347                      check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr),
348                            "Operand 0 of v_permlane must be VGPR", instr.get());
349                      check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
350                               op.isConstant(),
351                            "Lane select operands of v_permlane must be SGPR or constant",
352                            instr.get());
353                   }
354 
355                   if (instr->opcode == aco_opcode::v_writelane_b32 ||
356                       instr->opcode == aco_opcode::v_writelane_b32_e64) {
357                      check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
358                                       op.bytes() <= 4),
359                            "Wrong Operand type for VALU instruction", instr.get());
360                      check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
361                               op.isConstant(),
362                            "Must be a SGPR or a constant", instr.get());
363                      continue;
364                   }
365                   if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
366                      check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",
367                            instr.get());
368 
369                      if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
370                         if (num_sgprs < 2)
371                            sgpr[num_sgprs++] = op.tempId();
372                      }
373                   }
374 
375                   if (op.isConstant() && !op.isLiteral())
376                      check(scalar_mask & (1 << i), "Wrong source position for constant argument",
377                            instr.get());
378                }
379                check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,
380                      "Too many SGPRs/literals", instr.get());
381             }
382 
383             if (instr->isSOP1() || instr->isSOP2()) {
384                if (!instr->definitions.empty())
385                   check(instr->definitions[0].getTemp().type() == RegType::sgpr,
386                         "Wrong Definition type for SALU instruction", instr.get());
387                for (const Operand& op : instr->operands) {
388                   check(op.isConstant() || op.regClass().type() <= RegType::sgpr,
389                         "Wrong Operand type for SALU instruction", instr.get());
390                }
391             }
392          }
393 
394          switch (instr->format) {
395          case Format::PSEUDO: {
396             if (instr->opcode == aco_opcode::p_create_vector) {
397                unsigned size = 0;
398                for (const Operand& op : instr->operands) {
399                   check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
400                   size += op.bytes();
401                }
402                check(size == instr->definitions[0].bytes(),
403                      "Definition size does not match operand sizes", instr.get());
404                if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
405                   for (const Operand& op : instr->operands) {
406                      check(op.isConstant() || op.regClass().type() == RegType::sgpr,
407                            "Wrong Operand type for scalar vector", instr.get());
408                   }
409                }
410             } else if (instr->opcode == aco_opcode::p_extract_vector) {
411                check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),
412                      "Wrong Operand types", instr.get());
413                check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
414                         instr->operands[0].bytes(),
415                      "Index out of range", instr.get());
416                check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
417                         instr->operands[0].regClass().type() == RegType::sgpr,
418                      "Cannot extract SGPR value from VGPR vector", instr.get());
419                check(program->gfx_level >= GFX9 ||
420                         !instr->definitions[0].regClass().is_subdword() ||
421                         instr->operands[0].regClass().type() == RegType::vgpr,
422                      "Cannot extract subdword from SGPR before GFX9+", instr.get());
423             } else if (instr->opcode == aco_opcode::p_split_vector) {
424                check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());
425                unsigned size = 0;
426                for (const Definition& def : instr->definitions) {
427                   size += def.bytes();
428                }
429                check(size == instr->operands[0].bytes(),
430                      "Operand size does not match definition sizes", instr.get());
431                if (instr->operands[0].getTemp().type() == RegType::vgpr) {
432                   for (const Definition& def : instr->definitions)
433                      check(def.regClass().type() == RegType::vgpr,
434                            "Wrong Definition type for VGPR split_vector", instr.get());
435                } else {
436                   for (const Definition& def : instr->definitions)
437                      check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
438                            "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
439                }
440             } else if (instr->opcode == aco_opcode::p_parallelcopy) {
441                check(instr->definitions.size() == instr->operands.size(),
442                      "Number of Operands does not match number of Definitions", instr.get());
443                for (unsigned i = 0; i < instr->operands.size(); i++) {
444                   check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
445                         "Operand and Definition size must match", instr.get());
446                   if (instr->operands[i].isTemp()) {
447                      check((instr->definitions[i].getTemp().type() ==
448                             instr->operands[i].regClass().type()) ||
449                               (instr->definitions[i].getTemp().type() == RegType::vgpr &&
450                                instr->operands[i].regClass().type() == RegType::sgpr),
451                            "Operand and Definition types do not match", instr.get());
452                      check(instr->definitions[i].regClass().is_linear_vgpr() ==
453                               instr->operands[i].regClass().is_linear_vgpr(),
454                            "Operand and Definition types do not match", instr.get());
455                   } else {
456                      check(!instr->definitions[i].regClass().is_linear_vgpr(),
457                            "Can only copy linear VGPRs into linear VGPRs, not constant/undef",
458                            instr.get());
459                   }
460                }
461             } else if (instr->opcode == aco_opcode::p_phi) {
462                check(instr->operands.size() == block.logical_preds.size(),
463                      "Number of Operands does not match number of predecessors", instr.get());
464                check(instr->definitions[0].getTemp().type() == RegType::vgpr,
465                      "Logical Phi Definition must be vgpr", instr.get());
466                for (const Operand& op : instr->operands)
467                   check(instr->definitions[0].size() == op.size(),
468                         "Operand sizes must match Definition size", instr.get());
469             } else if (instr->opcode == aco_opcode::p_linear_phi) {
470                for (const Operand& op : instr->operands) {
471                   check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",
472                         instr.get());
473                   check(instr->definitions[0].size() == op.size(),
474                         "Operand sizes must match Definition size", instr.get());
475                }
476                check(instr->operands.size() == block.linear_preds.size(),
477                      "Number of Operands does not match number of predecessors", instr.get());
478             } else if (instr->opcode == aco_opcode::p_extract ||
479                        instr->opcode == aco_opcode::p_insert) {
480                check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());
481                check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
482                if (instr->opcode == aco_opcode::p_extract)
483                   check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
484                         instr.get());
485 
486                check(instr->definitions[0].getTemp().type() != RegType::sgpr ||
487                         instr->operands[0].getTemp().type() == RegType::sgpr,
488                      "Can't extract/insert VGPR to SGPR", instr.get());
489 
490                if (instr->opcode == aco_opcode::p_insert)
491                   check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
492                         "Sizes of p_insert data operand and definition must match", instr.get());
493 
494                if (instr->definitions[0].getTemp().type() == RegType::sgpr)
495                   check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
496                            instr->definitions[1].physReg() == scc,
497                         "SGPR extract/insert needs an SCC definition", instr.get());
498 
499                unsigned data_bits = instr->operands[0].getTemp().bytes() * 8u;
500                unsigned op_bits = instr->operands[2].constantValue();
501 
502                if (instr->opcode == aco_opcode::p_insert) {
503                   check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
504                   check(op_bits < data_bits, "Size must be smaller than source", instr.get());
505                } else if (instr->opcode == aco_opcode::p_extract) {
506                   check(op_bits == 8 || op_bits == 16 || op_bits == 32,
507                         "Size must be 8 or 16 or 32", instr.get());
508                   check(data_bits >= op_bits, "Can't extract more bits than what the data has.",
509                         instr.get());
510                }
511 
512                unsigned comp = data_bits / MAX2(op_bits, 1);
513                check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
514                      instr.get());
515             } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
516                check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
517                      instr.get());
518                check(instr->operands.size() > 0 &&
519                         instr->operands[0].getTemp().type() == RegType::sgpr &&
520                         instr->operands[0].getTemp().size() == 2,
521                      "First operand of p_jump_to_epilog must be a SGPR", instr.get());
522                for (unsigned i = 1; i < instr->operands.size(); i++) {
523                   check(instr->operands[i].getTemp().type() == RegType::vgpr ||
524                            instr->operands[i].isUndefined(),
525                         "Other operands of p_jump_to_epilog must be VGPRs or undef", instr.get());
526                }
527             }
528             break;
529          }
530          case Format::PSEUDO_REDUCTION: {
531             for (const Operand& op : instr->operands)
532                check(op.regClass().type() == RegType::vgpr,
533                      "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",
534                      instr.get());
535 
536             if (instr->opcode == aco_opcode::p_reduce &&
537                 instr->reduction().cluster_size == program->wave_size)
538                check(instr->definitions[0].regClass().type() == RegType::sgpr ||
539                         program->wave_size == 32,
540                      "The result of unclustered reductions must go into an SGPR.", instr.get());
541             else
542                check(instr->definitions[0].regClass().type() == RegType::vgpr,
543                      "The result of scans and clustered reductions must go into a VGPR.",
544                      instr.get());
545 
546             break;
547          }
548          case Format::SMEM: {
549             if (instr->operands.size() >= 1)
550                check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||
551                         (instr->operands[0].isTemp() &&
552                          instr->operands[0].regClass().type() == RegType::sgpr),
553                      "SMEM operands must be sgpr", instr.get());
554             if (instr->operands.size() >= 2)
555                check(instr->operands[1].isConstant() ||
556                         (instr->operands[1].isTemp() &&
557                          instr->operands[1].regClass().type() == RegType::sgpr),
558                      "SMEM offset must be constant or sgpr", instr.get());
559             if (!instr->definitions.empty())
560                check(instr->definitions[0].getTemp().type() == RegType::sgpr,
561                      "SMEM result must be sgpr", instr.get());
562             break;
563          }
564          case Format::MTBUF:
565          case Format::MUBUF: {
566             check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
567                   instr.get());
568             check(instr->operands[1].hasRegClass() &&
569                      instr->operands[1].regClass().type() == RegType::vgpr,
570                   "VADDR must be in vgpr for VMEM instructions", instr.get());
571             check(
572                instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,
573                "VMEM resource constant must be sgpr", instr.get());
574             check(instr->operands.size() < 4 ||
575                      (instr->operands[3].isTemp() &&
576                       instr->operands[3].regClass().type() == RegType::vgpr),
577                   "VMEM write data must be vgpr", instr.get());
578 
579             const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
580                              instr->opcode == aco_opcode::buffer_load_ubyte ||
581                              instr->opcode == aco_opcode::buffer_load_sbyte ||
582                              instr->opcode == aco_opcode::buffer_load_ushort ||
583                              instr->opcode == aco_opcode::buffer_load_sshort ||
584                              instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
585                              instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
586                              instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
587                              instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
588                              instr->opcode == aco_opcode::buffer_load_short_d16 ||
589                              instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
590                              instr->opcode == aco_opcode::buffer_load_format_d16_x ||
591                              instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
592                              instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
593                              instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
594                              instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
595                              instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
596                              instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
597                              instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
598                              instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
599             if (instr->definitions.size()) {
600                check(instr->definitions[0].isTemp() &&
601                         instr->definitions[0].regClass().type() == RegType::vgpr,
602                      "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
603                check(d16 || !instr->definitions[0].regClass().is_subdword(),
604                      "Only D16 opcodes can load subdword values.", instr.get());
605                check(instr->definitions[0].bytes() <= 8 || !d16,
606                      "D16 opcodes can only load up to 8 bytes.", instr.get());
607             }
608             break;
609          }
610          case Format::MIMG: {
611             check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
612                   instr.get());
613             check(instr->operands[0].hasRegClass() &&
614                      (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
615                   "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
616             if (instr->operands[1].hasRegClass())
617                check(instr->operands[1].regClass() == s4,
618                      "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
619             if (!instr->operands[2].isUndefined()) {
620                bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
621                                  instr->opcode == aco_opcode::image_atomic_fcmpswap;
622                check(instr->definitions.empty() ||
623                         (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
624                          is_cmpswap),
625                      "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "
626                      "TFE/LWE loads",
627                      instr.get());
628             }
629             check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
630                   "NSA is only supported on GFX10+", instr.get());
631             for (unsigned i = 3; i < instr->operands.size(); i++) {
632                if (instr->operands.size() == 4) {
633                   check(instr->operands[i].hasRegClass() &&
634                            instr->operands[i].regClass().type() == RegType::vgpr,
635                         "MIMG operands[3] (VADDR) must be VGPR", instr.get());
636                } else {
637                   check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",
638                         instr.get());
639                }
640             }
641 
642             if (instr->definitions.size()) {
643                check(instr->definitions[0].isTemp() &&
644                         instr->definitions[0].regClass().type() == RegType::vgpr,
645                      "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
646                check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(),
647                      "Only D16 MIMG instructions can load subdword values.", instr.get());
648                check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16,
649                      "D16 MIMG instructions can only load up to 8 bytes.", instr.get());
650             }
651             break;
652          }
653          case Format::DS: {
654             for (const Operand& op : instr->operands) {
655                check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,
656                      "Only VGPRs are valid DS instruction operands", instr.get());
657             }
658             if (!instr->definitions.empty())
659                check(instr->definitions[0].getTemp().type() == RegType::vgpr,
660                      "DS instruction must return VGPR", instr.get());
661             break;
662          }
663          case Format::EXP: {
664             for (unsigned i = 0; i < 4; i++)
665                check(instr->operands[i].hasRegClass() &&
666                         instr->operands[i].regClass().type() == RegType::vgpr,
667                      "Only VGPRs are valid Export arguments", instr.get());
668             break;
669          }
670          case Format::FLAT:
671             check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
672                   instr.get());
673             FALLTHROUGH;
674          case Format::GLOBAL:
675             check(
676                instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,
677                "FLAT/GLOBAL address must be vgpr", instr.get());
678             FALLTHROUGH;
679          case Format::SCRATCH: {
680             check(instr->operands[0].hasRegClass() &&
681                      instr->operands[0].regClass().type() == RegType::vgpr,
682                   "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get());
683             check(instr->operands[1].hasRegClass() &&
684                      instr->operands[1].regClass().type() == RegType::sgpr,
685                   "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
686             if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3)
687                check(instr->operands[0].isTemp() || instr->operands[1].isTemp(),
688                      "SCRATCH must have either SADDR or ADDR operand", instr.get());
689             if (!instr->definitions.empty())
690                check(instr->definitions[0].getTemp().type() == RegType::vgpr,
691                      "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
692             else
693                check(instr->operands[2].regClass().type() == RegType::vgpr,
694                      "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
695             break;
696          }
697          default: break;
698          }
699       }
700    }
701 
702    /* validate CFG */
703    for (unsigned i = 0; i < program->blocks.size(); i++) {
704       Block& block = program->blocks[i];
705       check_block(block.index == i, "block.index must match actual index", &block);
706 
707       /* predecessors/successors should be sorted */
708       for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
709          check_block(block.linear_preds[j] < block.linear_preds[j + 1],
710                      "linear predecessors must be sorted", &block);
711       for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
712          check_block(block.logical_preds[j] < block.logical_preds[j + 1],
713                      "logical predecessors must be sorted", &block);
714       for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
715          check_block(block.linear_succs[j] < block.linear_succs[j + 1],
716                      "linear successors must be sorted", &block);
717       for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
718          check_block(block.logical_succs[j] < block.logical_succs[j + 1],
719                      "logical successors must be sorted", &block);
720 
721       /* critical edges are not allowed */
722       if (block.linear_preds.size() > 1) {
723          for (unsigned pred : block.linear_preds)
724             check_block(program->blocks[pred].linear_succs.size() == 1,
725                         "linear critical edges are not allowed", &program->blocks[pred]);
726          for (unsigned pred : block.logical_preds)
727             check_block(program->blocks[pred].logical_succs.size() == 1,
728                         "logical critical edges are not allowed", &program->blocks[pred]);
729       }
730    }
731 
732    return is_valid;
733 }
734 
735 /* RA validation */
736 namespace {
737 
738 struct Location {
Locationaco::__anona98ac8fc0311::Location739    Location() : block(NULL), instr(NULL) {}
740 
741    Block* block;
742    Instruction* instr; // NULL if it's the block's live-in
743 };
744 
745 struct Assignment {
746    Location defloc;
747    Location firstloc;
748    PhysReg reg;
749    bool valid;
750 };
751 
752 bool
ra_fail(Program * program,Location loc,Location loc2,const char * fmt,...)753 ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
754 {
755    va_list args;
756    va_start(args, fmt);
757    char msg[1024];
758    vsprintf(msg, fmt, args);
759    va_end(args);
760 
761    char* out;
762    size_t outsize;
763    struct u_memstream mem;
764    u_memstream_open(&mem, &out, &outsize);
765    FILE* const memf = u_memstream_get(&mem);
766 
767    fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);
768    if (loc.instr) {
769       aco_print_instr(loc.instr, memf);
770       fprintf(memf, "\n%s", msg);
771    } else {
772       fprintf(memf, "%s", msg);
773    }
774    if (loc2.block) {
775       fprintf(memf, " in BB%d:\n", loc2.block->index);
776       aco_print_instr(loc2.instr, memf);
777    }
778    fprintf(memf, "\n\n");
779    u_memstream_close(&mem);
780 
781    aco_err(program, "%s", out);
782    free(out);
783 
784    return true;
785 }
786 
787 bool
validate_subdword_operand(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr,unsigned index)788 validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
789                           unsigned index)
790 {
791    Operand op = instr->operands[index];
792    unsigned byte = op.physReg().byte();
793 
794    if (instr->opcode == aco_opcode::p_as_uniform)
795       return byte == 0;
796    if (instr->isPseudo() && gfx_level >= GFX8)
797       return true;
798    if (instr->isSDWA())
799       return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
800              byte % instr->sdwa().sel[index].size() == 0;
801    if (instr->isVOP3P()) {
802       bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
803                      instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
804                      instr->opcode == aco_opcode::v_fma_mix_f32;
805       return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
806              ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
807    }
808    if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
809       return true;
810 
811    switch (instr->opcode) {
812    case aco_opcode::v_cvt_f32_ubyte1:
813       if (byte == 1)
814          return true;
815       break;
816    case aco_opcode::v_cvt_f32_ubyte2:
817       if (byte == 2)
818          return true;
819       break;
820    case aco_opcode::v_cvt_f32_ubyte3:
821       if (byte == 3)
822          return true;
823       break;
824    case aco_opcode::ds_write_b8_d16_hi:
825    case aco_opcode::ds_write_b16_d16_hi:
826       if (byte == 2 && index == 1)
827          return true;
828       break;
829    case aco_opcode::buffer_store_byte_d16_hi:
830    case aco_opcode::buffer_store_short_d16_hi:
831    case aco_opcode::buffer_store_format_d16_hi_x:
832       if (byte == 2 && index == 3)
833          return true;
834       break;
835    case aco_opcode::flat_store_byte_d16_hi:
836    case aco_opcode::flat_store_short_d16_hi:
837    case aco_opcode::scratch_store_byte_d16_hi:
838    case aco_opcode::scratch_store_short_d16_hi:
839    case aco_opcode::global_store_byte_d16_hi:
840    case aco_opcode::global_store_short_d16_hi:
841       if (byte == 2 && index == 2)
842          return true;
843       break;
844    default: break;
845    }
846 
847    return byte == 0;
848 }
849 
850 bool
validate_subdword_definition(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr)851 validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
852 {
853    Definition def = instr->definitions[0];
854    unsigned byte = def.physReg().byte();
855 
856    if (instr->isPseudo() && gfx_level >= GFX8)
857       return true;
858    if (instr->isSDWA())
859       return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
860              byte % instr->sdwa().dst_sel.size() == 0;
861    if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
862       return true;
863 
864    switch (instr->opcode) {
865    case aco_opcode::v_fma_mixhi_f16:
866    case aco_opcode::buffer_load_ubyte_d16_hi:
867    case aco_opcode::buffer_load_sbyte_d16_hi:
868    case aco_opcode::buffer_load_short_d16_hi:
869    case aco_opcode::buffer_load_format_d16_hi_x:
870    case aco_opcode::flat_load_ubyte_d16_hi:
871    case aco_opcode::flat_load_short_d16_hi:
872    case aco_opcode::scratch_load_ubyte_d16_hi:
873    case aco_opcode::scratch_load_short_d16_hi:
874    case aco_opcode::global_load_ubyte_d16_hi:
875    case aco_opcode::global_load_short_d16_hi:
876    case aco_opcode::ds_read_u8_d16_hi:
877    case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
878    default: break;
879    }
880 
881    return byte == 0;
882 }
883 
884 unsigned
get_subdword_bytes_written(Program * program,const aco_ptr<Instruction> & instr,unsigned index)885 get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
886 {
887    amd_gfx_level gfx_level = program->gfx_level;
888    Definition def = instr->definitions[index];
889 
890    if (instr->isPseudo())
891       return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
892    if (instr->isVALU()) {
893       assert(def.bytes() <= 2);
894       if (instr->isSDWA())
895          return instr->sdwa().dst_sel.size();
896 
897       if (instr_is_16bit(gfx_level, instr->opcode))
898          return 2;
899 
900       return 4;
901    }
902 
903    if (instr->isMIMG()) {
904       assert(instr->mimg().d16);
905       return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes();
906    }
907 
908    switch (instr->opcode) {
909    case aco_opcode::buffer_load_ubyte_d16:
910    case aco_opcode::buffer_load_sbyte_d16:
911    case aco_opcode::buffer_load_short_d16:
912    case aco_opcode::buffer_load_format_d16_x:
913    case aco_opcode::tbuffer_load_format_d16_x:
914    case aco_opcode::flat_load_ubyte_d16:
915    case aco_opcode::flat_load_short_d16:
916    case aco_opcode::scratch_load_ubyte_d16:
917    case aco_opcode::scratch_load_short_d16:
918    case aco_opcode::global_load_ubyte_d16:
919    case aco_opcode::global_load_short_d16:
920    case aco_opcode::ds_read_u8_d16:
921    case aco_opcode::ds_read_u16_d16:
922    case aco_opcode::buffer_load_ubyte_d16_hi:
923    case aco_opcode::buffer_load_sbyte_d16_hi:
924    case aco_opcode::buffer_load_short_d16_hi:
925    case aco_opcode::buffer_load_format_d16_hi_x:
926    case aco_opcode::flat_load_ubyte_d16_hi:
927    case aco_opcode::flat_load_short_d16_hi:
928    case aco_opcode::scratch_load_ubyte_d16_hi:
929    case aco_opcode::scratch_load_short_d16_hi:
930    case aco_opcode::global_load_ubyte_d16_hi:
931    case aco_opcode::global_load_short_d16_hi:
932    case aco_opcode::ds_read_u8_d16_hi:
933    case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
934    case aco_opcode::buffer_load_format_d16_xyz:
935    case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;
936    default: return def.size() * 4;
937    }
938 }
939 
940 bool
validate_instr_defs(Program * program,std::array<unsigned,2048> & regs,const std::vector<Assignment> & assignments,const Location & loc,aco_ptr<Instruction> & instr)941 validate_instr_defs(Program* program, std::array<unsigned, 2048>& regs,
942                     const std::vector<Assignment>& assignments, const Location& loc,
943                     aco_ptr<Instruction>& instr)
944 {
945    bool err = false;
946 
947    for (unsigned i = 0; i < instr->definitions.size(); i++) {
948       Definition& def = instr->definitions[i];
949       if (!def.isTemp())
950          continue;
951       Temp tmp = def.getTemp();
952       PhysReg reg = assignments[tmp.id()].reg;
953       for (unsigned j = 0; j < tmp.bytes(); j++) {
954          if (regs[reg.reg_b + j])
955             err |=
956                ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc,
957                        "Assignment of element %d of %%%d already taken by %%%d from instruction", i,
958                        tmp.id(), regs[reg.reg_b + j]);
959          regs[reg.reg_b + j] = tmp.id();
960       }
961       if (def.regClass().is_subdword() && def.bytes() < 4) {
962          unsigned written = get_subdword_bytes_written(program, instr, i);
963          /* If written=4, the instruction still might write the upper half. In that case, it's
964           * the lower half that isn't preserved */
965          for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
966             unsigned written_reg = reg.reg() * 4u + j;
967             if (regs[written_reg] && regs[written_reg] != def.tempId())
968                err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc,
969                               "Assignment of element %d of %%%d overwrites the full register "
970                               "taken by %%%d from instruction",
971                               i, tmp.id(), regs[written_reg]);
972          }
973       }
974    }
975 
976    for (const Definition& def : instr->definitions) {
977       if (!def.isTemp())
978          continue;
979       if (def.isKill()) {
980          for (unsigned j = 0; j < def.getTemp().bytes(); j++)
981             regs[def.physReg().reg_b + j] = 0;
982       }
983    }
984 
985    return err;
986 }
987 
988 } /* end namespace */
989 
990 bool
validate_ra(Program * program)991 validate_ra(Program* program)
992 {
993    if (!(debug_flags & DEBUG_VALIDATE_RA))
994       return false;
995 
996    bool err = false;
997    aco::live live_vars = aco::live_var_analysis(program);
998    std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
999    uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
1000 
1001    std::vector<Assignment> assignments(program->peekAllocationId());
1002    for (Block& block : program->blocks) {
1003       Location loc;
1004       loc.block = &block;
1005       for (aco_ptr<Instruction>& instr : block.instructions) {
1006          if (instr->opcode == aco_opcode::p_phi) {
1007             for (unsigned i = 0; i < instr->operands.size(); i++) {
1008                if (instr->operands[i].isTemp() &&
1009                    instr->operands[i].getTemp().type() == RegType::sgpr &&
1010                    instr->operands[i].isFirstKill())
1011                   phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
1012             }
1013          }
1014 
1015          loc.instr = instr.get();
1016          for (unsigned i = 0; i < instr->operands.size(); i++) {
1017             Operand& op = instr->operands[i];
1018             if (!op.isTemp())
1019                continue;
1020             if (!op.isFixed())
1021                err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);
1022             if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg())
1023                err |=
1024                   ra_fail(program, loc, assignments[op.tempId()].firstloc,
1025                           "Operand %d has an inconsistent register assignment with instruction", i);
1026             if ((op.getTemp().type() == RegType::vgpr &&
1027                  op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
1028                 (op.getTemp().type() == RegType::sgpr &&
1029                  op.physReg() + op.size() > program->config->num_sgprs &&
1030                  op.physReg() < sgpr_limit))
1031                err |= ra_fail(program, loc, assignments[op.tempId()].firstloc,
1032                               "Operand %d has an out-of-bounds register assignment", i);
1033             if (op.physReg() == vcc && !program->needs_vcc)
1034                err |= ra_fail(program, loc, Location(),
1035                               "Operand %d fixed to vcc but needs_vcc=false", i);
1036             if (op.regClass().is_subdword() &&
1037                 !validate_subdword_operand(program->gfx_level, instr, i))
1038                err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
1039             if (!assignments[op.tempId()].firstloc.block)
1040                assignments[op.tempId()].firstloc = loc;
1041             if (!assignments[op.tempId()].defloc.block) {
1042                assignments[op.tempId()].reg = op.physReg();
1043                assignments[op.tempId()].valid = true;
1044             }
1045          }
1046 
1047          for (unsigned i = 0; i < instr->definitions.size(); i++) {
1048             Definition& def = instr->definitions[i];
1049             if (!def.isTemp())
1050                continue;
1051             if (!def.isFixed())
1052                err |=
1053                   ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);
1054             if (assignments[def.tempId()].defloc.block)
1055                err |= ra_fail(program, loc, assignments[def.tempId()].defloc,
1056                               "Temporary %%%d also defined by instruction", def.tempId());
1057             if ((def.getTemp().type() == RegType::vgpr &&
1058                  def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
1059                 (def.getTemp().type() == RegType::sgpr &&
1060                  def.physReg() + def.size() > program->config->num_sgprs &&
1061                  def.physReg() < sgpr_limit))
1062                err |= ra_fail(program, loc, assignments[def.tempId()].firstloc,
1063                               "Definition %d has an out-of-bounds register assignment", i);
1064             if (def.physReg() == vcc && !program->needs_vcc)
1065                err |= ra_fail(program, loc, Location(),
1066                               "Definition %d fixed to vcc but needs_vcc=false", i);
1067             if (def.regClass().is_subdword() &&
1068                 !validate_subdword_definition(program->gfx_level, instr))
1069                err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
1070             if (!assignments[def.tempId()].firstloc.block)
1071                assignments[def.tempId()].firstloc = loc;
1072             assignments[def.tempId()].defloc = loc;
1073             assignments[def.tempId()].reg = def.physReg();
1074             assignments[def.tempId()].valid = true;
1075          }
1076       }
1077    }
1078 
1079    for (Block& block : program->blocks) {
1080       Location loc;
1081       loc.block = &block;
1082 
1083       std::array<unsigned, 2048> regs; /* register file in bytes */
1084       regs.fill(0);
1085 
1086       IDSet live = live_vars.live_out[block.index];
1087       /* remove killed p_phi sgpr operands */
1088       for (Temp tmp : phi_sgpr_ops[block.index])
1089          live.erase(tmp.id());
1090 
1091       /* check live out */
1092       for (unsigned id : live) {
1093          Temp tmp(id, program->temp_rc[id]);
1094          PhysReg reg = assignments[id].reg;
1095          for (unsigned i = 0; i < tmp.bytes(); i++) {
1096             if (regs[reg.reg_b + i]) {
1097                err |= ra_fail(program, loc, Location(),
1098                               "Assignment of element %d of %%%d already taken by %%%d in live-out",
1099                               i, id, regs[reg.reg_b + i]);
1100             }
1101             regs[reg.reg_b + i] = id;
1102          }
1103       }
1104       regs.fill(0);
1105 
1106       for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
1107          aco_ptr<Instruction>& instr = *it;
1108 
1109          /* check killed p_phi sgpr operands */
1110          if (instr->opcode == aco_opcode::p_logical_end) {
1111             for (Temp tmp : phi_sgpr_ops[block.index]) {
1112                PhysReg reg = assignments[tmp.id()].reg;
1113                for (unsigned i = 0; i < tmp.bytes(); i++) {
1114                   if (regs[reg.reg_b + i])
1115                      err |= ra_fail(
1116                         program, loc, Location(),
1117                         "Assignment of element %d of %%%d already taken by %%%d in live-out", i,
1118                         tmp.id(), regs[reg.reg_b + i]);
1119                }
1120                live.insert(tmp.id());
1121             }
1122          }
1123 
1124          for (const Definition& def : instr->definitions) {
1125             if (!def.isTemp())
1126                continue;
1127             live.erase(def.tempId());
1128          }
1129 
1130          /* don't count phi operands as live-in, since they are actually
1131           * killed when they are copied at the predecessor */
1132          if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1133             for (const Operand& op : instr->operands) {
1134                if (!op.isTemp())
1135                   continue;
1136                live.insert(op.tempId());
1137             }
1138          }
1139       }
1140 
1141       for (unsigned id : live) {
1142          Temp tmp(id, program->temp_rc[id]);
1143          PhysReg reg = assignments[id].reg;
1144          for (unsigned i = 0; i < tmp.bytes(); i++)
1145             regs[reg.reg_b + i] = id;
1146       }
1147 
1148       for (aco_ptr<Instruction>& instr : block.instructions) {
1149          loc.instr = instr.get();
1150 
1151          /* remove killed p_phi operands from regs */
1152          if (instr->opcode == aco_opcode::p_logical_end) {
1153             for (Temp tmp : phi_sgpr_ops[block.index]) {
1154                PhysReg reg = assignments[tmp.id()].reg;
1155                for (unsigned i = 0; i < tmp.bytes(); i++)
1156                   regs[reg.reg_b + i] = 0;
1157             }
1158          }
1159 
1160          if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1161             for (const Operand& op : instr->operands) {
1162                if (!op.isTemp())
1163                   continue;
1164                if (op.isFirstKillBeforeDef()) {
1165                   for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1166                      regs[op.physReg().reg_b + j] = 0;
1167                }
1168             }
1169          }
1170 
1171          if (!instr->isBranch() || block.linear_succs.size() != 1)
1172             err |= validate_instr_defs(program, regs, assignments, loc, instr);
1173 
1174          if (!is_phi(instr)) {
1175             for (const Operand& op : instr->operands) {
1176                if (!op.isTemp())
1177                   continue;
1178                if (op.isLateKill() && op.isFirstKill()) {
1179                   for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1180                      regs[op.physReg().reg_b + j] = 0;
1181                }
1182             }
1183          } else if (block.linear_preds.size() != 1 ||
1184                     program->blocks[block.linear_preds[0]].linear_succs.size() == 1) {
1185             for (unsigned pred : block.linear_preds) {
1186                aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
1187                assert(br->isBranch());
1188                err |= validate_instr_defs(program, regs, assignments, loc, br);
1189             }
1190          }
1191       }
1192    }
1193 
1194    return err;
1195 }
1196 } // namespace aco
1197