1 /*
2 * Copyright © 2018 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "aco_ir.h"
8
9 #include "util/u_debug.h"
10
11 #if AMD_LLVM_AVAILABLE
12 #if defined(_MSC_VER) && defined(restrict)
13 #undef restrict
14 #endif
15 #include "llvm/ac_llvm_util.h"
16
17 #include "llvm-c/Disassembler.h"
18 #include <llvm/ADT/StringRef.h>
19 #include <llvm/MC/MCDisassembler/MCDisassembler.h>
20 #endif
21
22 #include <array>
23 #include <iomanip>
24 #include <vector>
25
26 namespace aco {
27 namespace {
28
29 std::vector<bool>
get_referenced_blocks(Program * program)30 get_referenced_blocks(Program* program)
31 {
32 std::vector<bool> referenced_blocks(program->blocks.size());
33 referenced_blocks[0] = true;
34 for (Block& block : program->blocks) {
35 for (unsigned succ : block.linear_succs)
36 referenced_blocks[succ] = true;
37 }
38 return referenced_blocks;
39 }
40
41 void
print_block_markers(FILE * output,Program * program,const std::vector<bool> & referenced_blocks,unsigned * next_block,unsigned pos)42 print_block_markers(FILE* output, Program* program, const std::vector<bool>& referenced_blocks,
43 unsigned* next_block, unsigned pos)
44 {
45 while (*next_block < program->blocks.size() && pos >= program->blocks[*next_block].offset) {
46 assert(pos == program->blocks[*next_block].offset ||
47 program->blocks[*next_block].instructions.empty());
48 if (referenced_blocks[*next_block])
49 fprintf(output, "BB%u:\n", *next_block);
50 (*next_block)++;
51 }
52 }
53
54 void
print_instr(FILE * output,const std::vector<uint32_t> & binary,char * instr,unsigned size,unsigned pos)55 print_instr(FILE* output, const std::vector<uint32_t>& binary, char* instr, unsigned size,
56 unsigned pos)
57 {
58 fprintf(output, "%-60s ;", instr);
59
60 for (unsigned i = 0; i < size; i++)
61 fprintf(output, " %.8x", binary[pos + i]);
62 fputc('\n', output);
63 }
64
65 void
print_constant_data(FILE * output,Program * program)66 print_constant_data(FILE* output, Program* program)
67 {
68 if (program->constant_data.empty())
69 return;
70
71 fputs("\n/* constant data */\n", output);
72 for (unsigned i = 0; i < program->constant_data.size(); i += 32) {
73 fprintf(output, "[%.6u]", i);
74 unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);
75 for (unsigned j = 0; j < line_size; j += 4) {
76 unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);
77 uint32_t v = 0;
78 memcpy(&v, &program->constant_data[i + j], size);
79 fprintf(output, " %.8x", v);
80 }
81 fputc('\n', output);
82 }
83 }
84
85 /**
86 * Determines the GPU type to use for CLRXdisasm
87 */
88 const char*
to_clrx_device_name(amd_gfx_level gfx_level,radeon_family family)89 to_clrx_device_name(amd_gfx_level gfx_level, radeon_family family)
90 {
91 switch (gfx_level) {
92 case GFX6:
93 switch (family) {
94 case CHIP_TAHITI: return "tahiti";
95 case CHIP_PITCAIRN: return "pitcairn";
96 case CHIP_VERDE: return "capeverde";
97 case CHIP_OLAND: return "oland";
98 case CHIP_HAINAN: return "hainan";
99 default: return nullptr;
100 }
101 case GFX7:
102 switch (family) {
103 case CHIP_BONAIRE: return "bonaire";
104 case CHIP_KAVERI: return "gfx700";
105 case CHIP_HAWAII: return "hawaii";
106 default: return nullptr;
107 }
108 case GFX8:
109 switch (family) {
110 case CHIP_TONGA: return "tonga";
111 case CHIP_ICELAND: return "iceland";
112 case CHIP_CARRIZO: return "carrizo";
113 case CHIP_FIJI: return "fiji";
114 case CHIP_STONEY: return "stoney";
115 case CHIP_POLARIS10: return "polaris10";
116 case CHIP_POLARIS11: return "polaris11";
117 case CHIP_POLARIS12: return "polaris12";
118 case CHIP_VEGAM: return "polaris11";
119 default: return nullptr;
120 }
121 case GFX9:
122 switch (family) {
123 case CHIP_VEGA10: return "vega10";
124 case CHIP_VEGA12: return "vega12";
125 case CHIP_VEGA20: return "vega20";
126 case CHIP_RAVEN: return "raven";
127 default: return nullptr;
128 }
129 case GFX10:
130 switch (family) {
131 case CHIP_NAVI10: return "gfx1010";
132 case CHIP_NAVI12: return "gfx1011";
133 default: return nullptr;
134 }
135 default: return nullptr;
136 }
137 }
138
139 bool
get_branch_target(char ** output,Program * program,const std::vector<bool> & referenced_blocks,char ** line_start)140 get_branch_target(char** output, Program* program, const std::vector<bool>& referenced_blocks,
141 char** line_start)
142 {
143 unsigned pos;
144 if (sscanf(*line_start, ".L%d_0", &pos) != 1)
145 return false;
146 pos /= 4;
147 *line_start = strchr(*line_start, '_') + 2;
148
149 for (Block& block : program->blocks) {
150 if (referenced_blocks[block.index] && block.offset == pos) {
151 *output += sprintf(*output, "BB%u", block.index);
152 return true;
153 }
154 }
155 return false;
156 }
157
158 bool
print_asm_clrx(Program * program,std::vector<uint32_t> & binary,unsigned exec_size,FILE * output)159 print_asm_clrx(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
160 {
161 #ifdef _WIN32
162 return true;
163 #else
164 char path[] = "/tmp/fileXXXXXX";
165 char line[2048], command[128];
166 bool ret = false;
167 FILE* p;
168 int fd;
169
170 const char* gpu_type = to_clrx_device_name(program->gfx_level, program->family);
171
172 /* Dump the binary into a temporary file. */
173 fd = mkstemp(path);
174 if (fd < 0)
175 return true;
176
177 for (unsigned i = 0; i < exec_size; i++) {
178 if (write(fd, &binary[i], 4) == -1) {
179 ret = true;
180 goto fail;
181 }
182 }
183
184 sprintf(command, "clrxdisasm --gpuType=%s -r %s", gpu_type, path);
185
186 p = popen(command, "r");
187 if (p) {
188 if (!fgets(line, sizeof(line), p)) {
189 fprintf(output, "clrxdisasm not found\n");
190 pclose(p);
191 ret = true;
192 goto fail;
193 }
194
195 std::vector<bool> referenced_blocks = get_referenced_blocks(program);
196 unsigned next_block = 0;
197
198 char prev_instr[2048];
199 unsigned prev_pos = 0;
200 do {
201 char* line_start = line;
202 if (strncmp(line_start, "/*", 2))
203 continue;
204
205 unsigned pos;
206 if (sscanf(line_start, "/*%x*/", &pos) != 1)
207 continue;
208 pos /= 4u; /* get the dword position */
209
210 while (strncmp(line_start, "*/", 2))
211 line_start++;
212 line_start += 2;
213
214 while (line_start[0] == ' ')
215 line_start++;
216 *strchr(line_start, '\n') = 0;
217
218 if (*line_start == 0)
219 continue; /* not an instruction, only a comment */
220
221 if (pos != prev_pos) {
222 /* Print the previous instruction, now that we know the encoding size. */
223 print_instr(output, binary, prev_instr, pos - prev_pos, prev_pos);
224 prev_pos = pos;
225 }
226
227 print_block_markers(output, program, referenced_blocks, &next_block, pos);
228
229 char* dest = prev_instr;
230 *(dest++) = '\t';
231 while (*line_start) {
232 if (!strncmp(line_start, ".L", 2) &&
233 get_branch_target(&dest, program, referenced_blocks, &line_start))
234 continue;
235 *(dest++) = *(line_start++);
236 }
237 *(dest++) = 0;
238 } while (fgets(line, sizeof(line), p));
239
240 if (prev_pos != exec_size)
241 print_instr(output, binary, prev_instr, exec_size - prev_pos, prev_pos);
242
243 pclose(p);
244
245 print_constant_data(output, program);
246 }
247
248 fail:
249 close(fd);
250 unlink(path);
251 return ret;
252 #endif
253 }
254
255 #if AMD_LLVM_AVAILABLE
256 std::pair<bool, size_t>
disasm_instr(amd_gfx_level gfx_level,LLVMDisasmContextRef disasm,uint32_t * binary,unsigned exec_size,size_t pos,char * outline,unsigned outline_size)257 disasm_instr(amd_gfx_level gfx_level, LLVMDisasmContextRef disasm, uint32_t* binary,
258 unsigned exec_size, size_t pos, char* outline, unsigned outline_size)
259 {
260 size_t l =
261 LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t),
262 pos * 4, outline, outline_size);
263
264 if (gfx_level >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) &&
265 ((binary[pos + 1] & 0x1ff) == 0xff)) {
266 /* v_writelane with literal uses 3 dwords but llvm consumes only 2 */
267 l += 4;
268 }
269
270 bool invalid = false;
271 size_t size;
272 if (!l &&
273 ((gfx_level >= GFX9 &&
274 (binary[pos] & 0xffff8000) == 0xd1348000) || /* v_add_u32_e64 + clamp */
275 (gfx_level >= GFX10 &&
276 (binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */
277 (gfx_level <= GFX9 &&
278 (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */
279 (gfx_level >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */
280 (gfx_level == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {
281 strcpy(outline, "\tinteger addition + clamp");
282 bool has_literal = gfx_level >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) ||
283 (((binary[pos + 1] >> 9) & 0x1ff) == 0xff));
284 size = 2 + has_literal;
285 } else if (gfx_level >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {
286 strcpy(outline, "\tv_cndmask_b32 + sdwa");
287 size = 2;
288 } else if (!l) {
289 strcpy(outline, "(invalid instruction)");
290 size = 1;
291 invalid = true;
292 } else {
293 assert(l % 4 == 0);
294 size = l / 4;
295 }
296
297 return std::make_pair(invalid, size);
298 }
299
300 bool
print_asm_llvm(Program * program,std::vector<uint32_t> & binary,unsigned exec_size,FILE * output)301 print_asm_llvm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
302 {
303 std::vector<bool> referenced_blocks = get_referenced_blocks(program);
304
305 std::vector<llvm::SymbolInfoTy> symbols;
306 std::vector<std::array<char, 16>> block_names;
307 block_names.reserve(program->blocks.size());
308 for (Block& block : program->blocks) {
309 if (!referenced_blocks[block.index])
310 continue;
311 std::array<char, 16> name;
312 sprintf(name.data(), "BB%u", block.index);
313 block_names.push_back(name);
314 symbols.emplace_back(block.offset * 4,
315 llvm::StringRef(block_names[block_names.size() - 1].data()), 0);
316 }
317
318 const char* features = "";
319 if (program->gfx_level >= GFX10 && program->wave_size == 64) {
320 features = "+wavefrontsize64";
321 }
322
323 LLVMDisasmContextRef disasm =
324 LLVMCreateDisasmCPUFeatures("amdgcn-mesa-mesa3d", ac_get_llvm_processor_name(program->family),
325 features, &symbols, 0, NULL, NULL);
326
327 size_t pos = 0;
328 bool invalid = false;
329 unsigned next_block = 0;
330
331 unsigned prev_size = 0;
332 unsigned prev_pos = 0;
333 unsigned repeat_count = 0;
334 while (pos <= exec_size) {
335 bool new_block =
336 next_block < program->blocks.size() && pos == program->blocks[next_block].offset;
337 if (pos + prev_size <= exec_size && prev_pos != pos && !new_block &&
338 memcmp(&binary[prev_pos], &binary[pos], prev_size * 4) == 0) {
339 repeat_count++;
340 pos += prev_size;
341 continue;
342 } else {
343 if (repeat_count)
344 fprintf(output, "\t(then repeated %u times)\n", repeat_count);
345 repeat_count = 0;
346 }
347
348 print_block_markers(output, program, referenced_blocks, &next_block, pos);
349
350 /* For empty last block, only print block marker. */
351 if (pos == exec_size)
352 break;
353
354 char outline[1024];
355 std::pair<bool, size_t> res = disasm_instr(program->gfx_level, disasm, binary.data(),
356 exec_size, pos, outline, sizeof(outline));
357 invalid |= res.first;
358
359 print_instr(output, binary, outline, res.second, pos);
360
361 prev_size = res.second;
362 prev_pos = pos;
363 pos += res.second;
364 }
365 assert(next_block == program->blocks.size());
366
367 LLVMDisasmDispose(disasm);
368
369 print_constant_data(output, program);
370
371 return invalid;
372 }
373 #endif /* AMD_LLVM_AVAILABLE */
374
375 } /* end namespace */
376
377 bool
check_print_asm_support(Program * program)378 check_print_asm_support(Program* program)
379 {
380 #if AMD_LLVM_AVAILABLE
381 if (program->gfx_level >= GFX8) {
382 /* LLVM disassembler only supports GFX8+ */
383 const char* name = ac_get_llvm_processor_name(program->family);
384 const char* triple = "amdgcn--";
385 LLVMTargetRef target = ac_get_llvm_target(triple);
386
387 LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
388 target, triple, name, "", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault);
389
390 bool supported = ac_is_llvm_processor_supported(tm, name);
391 LLVMDisposeTargetMachine(tm);
392
393 if (supported)
394 return true;
395 }
396 #endif
397
398 #ifndef _WIN32
399 /* Check if CLRX disassembler binary is available and can disassemble the program */
400 return to_clrx_device_name(program->gfx_level, program->family) &&
401 system("clrxdisasm --version > /dev/null 2>&1") == 0;
402 #else
403 return false;
404 #endif
405 }
406
407 /* Returns true on failure */
408 bool
print_asm(Program * program,std::vector<uint32_t> & binary,unsigned exec_size,FILE * output)409 print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
410 {
411 #if AMD_LLVM_AVAILABLE
412 if (program->gfx_level >= GFX8) {
413 return print_asm_llvm(program, binary, exec_size, output);
414 }
415 #endif
416
417 return print_asm_clrx(program, binary, exec_size, output);
418 }
419
420 } // namespace aco
421