• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "aco_ir.h"
8 
9 #include "util/u_debug.h"
10 
11 #if AMD_LLVM_AVAILABLE
12 #if defined(_MSC_VER) && defined(restrict)
13 #undef restrict
14 #endif
15 #include "llvm/ac_llvm_util.h"
16 
17 #include "llvm-c/Disassembler.h"
18 #include <llvm/ADT/StringRef.h>
19 #include <llvm/MC/MCDisassembler/MCDisassembler.h>
20 #endif
21 
22 #include <array>
23 #include <iomanip>
24 #include <vector>
25 
26 namespace aco {
27 namespace {
28 
29 std::vector<bool>
get_referenced_blocks(Program * program)30 get_referenced_blocks(Program* program)
31 {
32    std::vector<bool> referenced_blocks(program->blocks.size());
33    referenced_blocks[0] = true;
34    for (Block& block : program->blocks) {
35       for (unsigned succ : block.linear_succs)
36          referenced_blocks[succ] = true;
37    }
38    return referenced_blocks;
39 }
40 
41 void
print_block_markers(FILE * output,Program * program,const std::vector<bool> & referenced_blocks,unsigned * next_block,unsigned pos)42 print_block_markers(FILE* output, Program* program, const std::vector<bool>& referenced_blocks,
43                     unsigned* next_block, unsigned pos)
44 {
45    while (*next_block < program->blocks.size() && pos >= program->blocks[*next_block].offset) {
46       assert(pos == program->blocks[*next_block].offset ||
47              program->blocks[*next_block].instructions.empty());
48       if (referenced_blocks[*next_block])
49          fprintf(output, "BB%u:\n", *next_block);
50       (*next_block)++;
51    }
52 }
53 
54 void
print_instr(FILE * output,const std::vector<uint32_t> & binary,char * instr,unsigned size,unsigned pos)55 print_instr(FILE* output, const std::vector<uint32_t>& binary, char* instr, unsigned size,
56             unsigned pos)
57 {
58    fprintf(output, "%-60s ;", instr);
59 
60    for (unsigned i = 0; i < size; i++)
61       fprintf(output, " %.8x", binary[pos + i]);
62    fputc('\n', output);
63 }
64 
65 void
print_constant_data(FILE * output,Program * program)66 print_constant_data(FILE* output, Program* program)
67 {
68    if (program->constant_data.empty())
69       return;
70 
71    fputs("\n/* constant data */\n", output);
72    for (unsigned i = 0; i < program->constant_data.size(); i += 32) {
73       fprintf(output, "[%.6u]", i);
74       unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);
75       for (unsigned j = 0; j < line_size; j += 4) {
76          unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);
77          uint32_t v = 0;
78          memcpy(&v, &program->constant_data[i + j], size);
79          fprintf(output, " %.8x", v);
80       }
81       fputc('\n', output);
82    }
83 }
84 
85 /**
86  * Determines the GPU type to use for CLRXdisasm
87  */
88 const char*
to_clrx_device_name(amd_gfx_level gfx_level,radeon_family family)89 to_clrx_device_name(amd_gfx_level gfx_level, radeon_family family)
90 {
91    switch (gfx_level) {
92    case GFX6:
93       switch (family) {
94       case CHIP_TAHITI: return "tahiti";
95       case CHIP_PITCAIRN: return "pitcairn";
96       case CHIP_VERDE: return "capeverde";
97       case CHIP_OLAND: return "oland";
98       case CHIP_HAINAN: return "hainan";
99       default: return nullptr;
100       }
101    case GFX7:
102       switch (family) {
103       case CHIP_BONAIRE: return "bonaire";
104       case CHIP_KAVERI: return "gfx700";
105       case CHIP_HAWAII: return "hawaii";
106       default: return nullptr;
107       }
108    case GFX8:
109       switch (family) {
110       case CHIP_TONGA: return "tonga";
111       case CHIP_ICELAND: return "iceland";
112       case CHIP_CARRIZO: return "carrizo";
113       case CHIP_FIJI: return "fiji";
114       case CHIP_STONEY: return "stoney";
115       case CHIP_POLARIS10: return "polaris10";
116       case CHIP_POLARIS11: return "polaris11";
117       case CHIP_POLARIS12: return "polaris12";
118       case CHIP_VEGAM: return "polaris11";
119       default: return nullptr;
120       }
121    case GFX9:
122       switch (family) {
123       case CHIP_VEGA10: return "vega10";
124       case CHIP_VEGA12: return "vega12";
125       case CHIP_VEGA20: return "vega20";
126       case CHIP_RAVEN: return "raven";
127       default: return nullptr;
128       }
129    case GFX10:
130       switch (family) {
131       case CHIP_NAVI10: return "gfx1010";
132       case CHIP_NAVI12: return "gfx1011";
133       default: return nullptr;
134       }
135    default: return nullptr;
136    }
137 }
138 
139 bool
get_branch_target(char ** output,Program * program,const std::vector<bool> & referenced_blocks,char ** line_start)140 get_branch_target(char** output, Program* program, const std::vector<bool>& referenced_blocks,
141                   char** line_start)
142 {
143    unsigned pos;
144    if (sscanf(*line_start, ".L%d_0", &pos) != 1)
145       return false;
146    pos /= 4;
147    *line_start = strchr(*line_start, '_') + 2;
148 
149    for (Block& block : program->blocks) {
150       if (referenced_blocks[block.index] && block.offset == pos) {
151          *output += sprintf(*output, "BB%u", block.index);
152          return true;
153       }
154    }
155    return false;
156 }
157 
158 bool
print_asm_clrx(Program * program,std::vector<uint32_t> & binary,unsigned exec_size,FILE * output)159 print_asm_clrx(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
160 {
161 #ifdef _WIN32
162    return true;
163 #else
164    char path[] = "/tmp/fileXXXXXX";
165    char line[2048], command[128];
166    bool ret = false;
167    FILE* p;
168    int fd;
169 
170    const char* gpu_type = to_clrx_device_name(program->gfx_level, program->family);
171 
172    /* Dump the binary into a temporary file. */
173    fd = mkstemp(path);
174    if (fd < 0)
175       return true;
176 
177    for (unsigned i = 0; i < exec_size; i++) {
178       if (write(fd, &binary[i], 4) == -1) {
179          ret = true;
180          goto fail;
181       }
182    }
183 
184    sprintf(command, "clrxdisasm --gpuType=%s -r %s", gpu_type, path);
185 
186    p = popen(command, "r");
187    if (p) {
188       if (!fgets(line, sizeof(line), p)) {
189          fprintf(output, "clrxdisasm not found\n");
190          pclose(p);
191          ret = true;
192          goto fail;
193       }
194 
195       std::vector<bool> referenced_blocks = get_referenced_blocks(program);
196       unsigned next_block = 0;
197 
198       char prev_instr[2048];
199       unsigned prev_pos = 0;
200       do {
201          char* line_start = line;
202          if (strncmp(line_start, "/*", 2))
203             continue;
204 
205          unsigned pos;
206          if (sscanf(line_start, "/*%x*/", &pos) != 1)
207             continue;
208          pos /= 4u; /* get the dword position */
209 
210          while (strncmp(line_start, "*/", 2))
211             line_start++;
212          line_start += 2;
213 
214          while (line_start[0] == ' ')
215             line_start++;
216          *strchr(line_start, '\n') = 0;
217 
218          if (*line_start == 0)
219             continue; /* not an instruction, only a comment */
220 
221          if (pos != prev_pos) {
222             /* Print the previous instruction, now that we know the encoding size. */
223             print_instr(output, binary, prev_instr, pos - prev_pos, prev_pos);
224             prev_pos = pos;
225          }
226 
227          print_block_markers(output, program, referenced_blocks, &next_block, pos);
228 
229          char* dest = prev_instr;
230          *(dest++) = '\t';
231          while (*line_start) {
232             if (!strncmp(line_start, ".L", 2) &&
233                 get_branch_target(&dest, program, referenced_blocks, &line_start))
234                continue;
235             *(dest++) = *(line_start++);
236          }
237          *(dest++) = 0;
238       } while (fgets(line, sizeof(line), p));
239 
240       if (prev_pos != exec_size)
241          print_instr(output, binary, prev_instr, exec_size - prev_pos, prev_pos);
242 
243       pclose(p);
244 
245       print_constant_data(output, program);
246    }
247 
248 fail:
249    close(fd);
250    unlink(path);
251    return ret;
252 #endif
253 }
254 
255 #if AMD_LLVM_AVAILABLE
256 std::pair<bool, size_t>
disasm_instr(amd_gfx_level gfx_level,LLVMDisasmContextRef disasm,uint32_t * binary,unsigned exec_size,size_t pos,char * outline,unsigned outline_size)257 disasm_instr(amd_gfx_level gfx_level, LLVMDisasmContextRef disasm, uint32_t* binary,
258              unsigned exec_size, size_t pos, char* outline, unsigned outline_size)
259 {
260    size_t l =
261       LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t),
262                             pos * 4, outline, outline_size);
263 
264    if (gfx_level >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) &&
265        ((binary[pos + 1] & 0x1ff) == 0xff)) {
266       /* v_writelane with literal uses 3 dwords but llvm consumes only 2 */
267       l += 4;
268    }
269 
270    bool invalid = false;
271    size_t size;
272    if (!l &&
273        ((gfx_level >= GFX9 &&
274          (binary[pos] & 0xffff8000) == 0xd1348000) || /* v_add_u32_e64 + clamp */
275         (gfx_level >= GFX10 &&
276          (binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */
277         (gfx_level <= GFX9 &&
278          (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */
279         (gfx_level >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */
280         (gfx_level == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {
281       strcpy(outline, "\tinteger addition + clamp");
282       bool has_literal = gfx_level >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) ||
283                                                 (((binary[pos + 1] >> 9) & 0x1ff) == 0xff));
284       size = 2 + has_literal;
285    } else if (gfx_level >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {
286       strcpy(outline, "\tv_cndmask_b32 + sdwa");
287       size = 2;
288    } else if (!l) {
289       strcpy(outline, "(invalid instruction)");
290       size = 1;
291       invalid = true;
292    } else {
293       assert(l % 4 == 0);
294       size = l / 4;
295    }
296 
297    return std::make_pair(invalid, size);
298 }
299 
300 bool
print_asm_llvm(Program * program,std::vector<uint32_t> & binary,unsigned exec_size,FILE * output)301 print_asm_llvm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
302 {
303    std::vector<bool> referenced_blocks = get_referenced_blocks(program);
304 
305    std::vector<llvm::SymbolInfoTy> symbols;
306    std::vector<std::array<char, 16>> block_names;
307    block_names.reserve(program->blocks.size());
308    for (Block& block : program->blocks) {
309       if (!referenced_blocks[block.index])
310          continue;
311       std::array<char, 16> name;
312       sprintf(name.data(), "BB%u", block.index);
313       block_names.push_back(name);
314       symbols.emplace_back(block.offset * 4,
315                            llvm::StringRef(block_names[block_names.size() - 1].data()), 0);
316    }
317 
318    const char* features = "";
319    if (program->gfx_level >= GFX10 && program->wave_size == 64) {
320       features = "+wavefrontsize64";
321    }
322 
323    LLVMDisasmContextRef disasm =
324       LLVMCreateDisasmCPUFeatures("amdgcn-mesa-mesa3d", ac_get_llvm_processor_name(program->family),
325                                   features, &symbols, 0, NULL, NULL);
326 
327    size_t pos = 0;
328    bool invalid = false;
329    unsigned next_block = 0;
330 
331    unsigned prev_size = 0;
332    unsigned prev_pos = 0;
333    unsigned repeat_count = 0;
334    while (pos <= exec_size) {
335       bool new_block =
336          next_block < program->blocks.size() && pos == program->blocks[next_block].offset;
337       if (pos + prev_size <= exec_size && prev_pos != pos && !new_block &&
338           memcmp(&binary[prev_pos], &binary[pos], prev_size * 4) == 0) {
339          repeat_count++;
340          pos += prev_size;
341          continue;
342       } else {
343          if (repeat_count)
344             fprintf(output, "\t(then repeated %u times)\n", repeat_count);
345          repeat_count = 0;
346       }
347 
348       print_block_markers(output, program, referenced_blocks, &next_block, pos);
349 
350       /* For empty last block, only print block marker. */
351       if (pos == exec_size)
352          break;
353 
354       char outline[1024];
355       std::pair<bool, size_t> res = disasm_instr(program->gfx_level, disasm, binary.data(),
356                                                  exec_size, pos, outline, sizeof(outline));
357       invalid |= res.first;
358 
359       print_instr(output, binary, outline, res.second, pos);
360 
361       prev_size = res.second;
362       prev_pos = pos;
363       pos += res.second;
364    }
365    assert(next_block == program->blocks.size());
366 
367    LLVMDisasmDispose(disasm);
368 
369    print_constant_data(output, program);
370 
371    return invalid;
372 }
373 #endif /* AMD_LLVM_AVAILABLE */
374 
375 } /* end namespace */
376 
377 bool
check_print_asm_support(Program * program)378 check_print_asm_support(Program* program)
379 {
380 #if AMD_LLVM_AVAILABLE
381    if (program->gfx_level >= GFX8) {
382       /* LLVM disassembler only supports GFX8+ */
383       const char* name = ac_get_llvm_processor_name(program->family);
384       const char* triple = "amdgcn--";
385       LLVMTargetRef target = ac_get_llvm_target(triple);
386 
387       LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
388          target, triple, name, "", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault);
389 
390       bool supported = ac_is_llvm_processor_supported(tm, name);
391       LLVMDisposeTargetMachine(tm);
392 
393       if (supported)
394          return true;
395    }
396 #endif
397 
398 #ifndef _WIN32
399    /* Check if CLRX disassembler binary is available and can disassemble the program */
400    return to_clrx_device_name(program->gfx_level, program->family) &&
401           system("clrxdisasm --version > /dev/null 2>&1") == 0;
402 #else
403    return false;
404 #endif
405 }
406 
407 /* Returns true on failure */
408 bool
print_asm(Program * program,std::vector<uint32_t> & binary,unsigned exec_size,FILE * output)409 print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
410 {
411 #if AMD_LLVM_AVAILABLE
412    if (program->gfx_level >= GFX8) {
413       return print_asm_llvm(program, binary, exec_size, output);
414    }
415 #endif
416 
417    return print_asm_clrx(program, binary, exec_size, output);
418 }
419 
420 } // namespace aco
421