• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  */
25 
26 /**
27  * \file ac_rgp_elf_object_pack.c
28  *
29  * This file provides functions to create elf object for rgp profiling.
30  * The functions in this file create 64bit elf code object irrespective
31  * of if the driver is compiled as 32 or 64 bit.
32  */
33 
34 #include <stdint.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <libelf.h>
38 #include "ac_msgpack.h"
39 #include "ac_rgp.h"
40 
41 #include "util/bitscan.h"
42 #include "util/u_math.h"
43 
44 #ifndef EM_AMDGPU
45 // Old distributions may not have this enum constant
46 #define EM_AMDGPU 224
47 #endif
48 
49 char shader_stage_api_string[6][10] = {
50    ".vertex",      /* vertex */
51    ".hull",        /* tessellation control */
52    ".domain",      /* tessellation evaluation */
53    ".geometry",    /* geometry */
54    ".pixel",       /* fragment */
55    ".compute"      /* compute */
56 };
57 
58 char hw_stage_string[RGP_HW_STAGE_MAX][4] = {
59    ".vs",
60    ".ls",
61    ".hs",
62    ".es",
63    ".gs",
64    ".ps",
65    ".cs"
66 };
67 
68 char hw_stage_symbol_string[RGP_HW_STAGE_MAX][16] = {
69    "_amdgpu_vs_main",
70    "_amdgpu_ls_main",
71    "_amdgpu_hs_main",
72    "_amdgpu_es_main",
73    "_amdgpu_gs_main",
74    "_amdgpu_ps_main",
75    "_amdgpu_cs_main"
76 };
77 
78 /**
79  * rgp profiler requires data for few variables stored in msgpack format
80  * in notes section. This function writes the data from
81  * struct rgp_code_object_record to elf object in msgpack format.
82  * for msgpack specification refer to
83  * github.com/msgpack/msgpack/blob/master/spec.md
84  */
85 static void
ac_rgp_write_msgpack(FILE * output,struct rgp_code_object_record * record,uint32_t * written_size)86 ac_rgp_write_msgpack(FILE *output,
87                      struct rgp_code_object_record *record,
88                      uint32_t *written_size)
89 {
90    struct ac_msgpack msgpack;
91    uint32_t num_shaders;
92    uint32_t i;
93    uint32_t mask;
94 
95    num_shaders = util_bitcount(record->shader_stages_mask);
96 
97    ac_msgpack_init(&msgpack);
98 
99    ac_msgpack_add_fixmap_op(&msgpack, 2);
100       ac_msgpack_add_fixstr(&msgpack, "amdpal.version");
101       ac_msgpack_add_fixarray_op(&msgpack, 2);
102          ac_msgpack_add_uint(&msgpack, 2);
103          ac_msgpack_add_uint(&msgpack, 1);
104 
105       ac_msgpack_add_fixstr(&msgpack, "amdpal.pipelines");
106       ac_msgpack_add_fixarray_op(&msgpack, 1);
107          ac_msgpack_add_fixmap_op(&msgpack, 6);
108 
109             /* 1
110              * This not used in RGP but data needs to be present
111              */
112             ac_msgpack_add_fixstr(&msgpack, ".spill_threshold");
113             ac_msgpack_add_uint(&msgpack, 0xffff);
114 
115             /* 2
116              * This not used in RGP but data needs to be present
117              */
118             ac_msgpack_add_fixstr(&msgpack, ".user_data_limit");
119             ac_msgpack_add_uint(&msgpack, 32);
120 
121             /* 3 */
122             ac_msgpack_add_fixstr(&msgpack, ".shaders");
123             ac_msgpack_add_fixmap_op(&msgpack, num_shaders);
124                mask = record->shader_stages_mask;
125                while(mask) {
126                   i = u_bit_scan(&mask);
127                   ac_msgpack_add_fixstr(&msgpack,
128                                         shader_stage_api_string[i]);
129                   ac_msgpack_add_fixmap_op(&msgpack, 2);
130                   ac_msgpack_add_fixstr(&msgpack, ".api_shader_hash");
131                   ac_msgpack_add_fixarray_op(&msgpack, 2);
132                      ac_msgpack_add_uint(&msgpack,
133                                          record->shader_data[i].hash[0]);
134                      ac_msgpack_add_uint(&msgpack, 0);
135                   ac_msgpack_add_fixstr(&msgpack, ".hardware_mapping");
136                   ac_msgpack_add_fixarray_op(&msgpack, 1);
137                      ac_msgpack_add_fixstr(&msgpack, hw_stage_string[
138                                            record->shader_data[i].hw_stage]);
139                }
140 
141             /* 4 */
142             ac_msgpack_add_fixstr(&msgpack, ".hardware_stages");
143             ac_msgpack_add_fixmap_op(&msgpack,
144                                      record->num_shaders_combined);
145                mask = record->shader_stages_mask;
146                while(mask) {
147                   i = u_bit_scan(&mask);
148 
149                   if (record->shader_data[i].is_combined)
150                      continue;
151 
152                   ac_msgpack_add_fixstr(&msgpack, hw_stage_string[
153                                         record->shader_data[i].hw_stage]);
154                   ac_msgpack_add_fixmap_op(&msgpack, 5);
155                      ac_msgpack_add_fixstr(&msgpack, ".entry_point");
156                      ac_msgpack_add_fixstr(&msgpack, hw_stage_symbol_string[
157                                            record->shader_data[i].hw_stage]);
158 
159                      ac_msgpack_add_fixstr(&msgpack, ".sgpr_count");
160                      ac_msgpack_add_uint(&msgpack,
161                                          record->shader_data[i].sgpr_count);
162 
163                      ac_msgpack_add_fixstr(&msgpack, ".vgpr_count");
164                      ac_msgpack_add_uint(&msgpack,
165                                          record->shader_data[i].vgpr_count);
166 
167                      ac_msgpack_add_fixstr(&msgpack, ".scratch_memory_size");
168                      ac_msgpack_add_uint(&msgpack,
169                                          record->shader_data[i].scratch_memory_size);
170 
171                      ac_msgpack_add_fixstr(&msgpack, ".wavefront_size");
172                      ac_msgpack_add_uint(&msgpack,
173                                          record->shader_data[i].wavefront_size);
174                }
175 
176             /* 5 */
177             ac_msgpack_add_fixstr(&msgpack, ".internal_pipeline_hash");
178             ac_msgpack_add_fixarray_op(&msgpack, 2);
179                ac_msgpack_add_uint(&msgpack, record->pipeline_hash[0]);
180                ac_msgpack_add_uint(&msgpack, record->pipeline_hash[1]);
181 
182             /* 6 */
183             ac_msgpack_add_fixstr(&msgpack, ".api");
184             ac_msgpack_add_fixstr(&msgpack, "Vulkan");
185 
186    ac_msgpack_resize_if_required(&msgpack, 4 - (msgpack.offset % 4));
187    msgpack.offset = ALIGN(msgpack.offset, 4);
188    fwrite(msgpack.mem, 1, msgpack.offset, output);
189    *written_size = msgpack.offset;
190    ac_msgpack_destroy(&msgpack);
191 }
192 
193 
194 static uint32_t
get_lowest_shader(uint32_t * shader_stages_mask,struct rgp_code_object_record * record,struct rgp_shader_data ** rgp_shader_data)195 get_lowest_shader(uint32_t *shader_stages_mask,
196                   struct rgp_code_object_record *record,
197                   struct rgp_shader_data **rgp_shader_data)
198 {
199    uint32_t i, lowest = 0;
200    uint32_t mask;
201    uint64_t base_address = -1;
202 
203    if (*shader_stages_mask == 0)
204       return false;
205 
206    mask = *shader_stages_mask;
207    while(mask) {
208       i = u_bit_scan(&mask);
209       if (record->shader_data[i].is_combined) {
210          *shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << i);
211          continue;
212       }
213       if (base_address > record->shader_data[i].base_address) {
214          lowest = i;
215          base_address = record->shader_data[i].base_address;
216       }
217    }
218 
219    *shader_stages_mask = *shader_stages_mask & ~((uint32_t)1 << lowest);
220    *rgp_shader_data = &record->shader_data[lowest];
221    return true;
222 }
223 
224 /**
225  *  write the shader code into elf object in text section
226  */
227 static void
ac_rgp_file_write_elf_text(FILE * output,uint32_t * elf_size_calc,struct rgp_code_object_record * record,uint32_t * text_size)228 ac_rgp_file_write_elf_text(FILE *output, uint32_t *elf_size_calc,
229                            struct rgp_code_object_record *record,
230                            uint32_t *text_size)
231 {
232    struct rgp_shader_data *rgp_shader_data = NULL;
233    struct rgp_shader_data *prev_rgp_shader_data = NULL;
234    uint32_t symbol_offset = 0;
235    uint32_t mask = record->shader_stages_mask;
236    static bool warn_once = true;
237 
238    while(get_lowest_shader(&mask, record, &rgp_shader_data)) {
239       if (prev_rgp_shader_data) {
240          uint32_t code_offset = rgp_shader_data->base_address -
241                                 prev_rgp_shader_data->base_address;
242          uint32_t gap_between_code = code_offset -
243                                      prev_rgp_shader_data->code_size;
244          symbol_offset += code_offset;
245          if (gap_between_code > 0x10000 && warn_once) {
246             fprintf(stderr, "Warning: shader code far from previous "
247                             "(%d bytes apart). The rgp capture file "
248                             "might be very large.\n", gap_between_code);
249             warn_once = false;
250          }
251 
252          fseek(output, gap_between_code, SEEK_CUR);
253          *elf_size_calc += gap_between_code;
254       }
255 
256       rgp_shader_data->elf_symbol_offset = symbol_offset;
257       fwrite(rgp_shader_data->code, 1, rgp_shader_data->code_size, output);
258       *elf_size_calc += rgp_shader_data->code_size;
259       prev_rgp_shader_data = rgp_shader_data;
260    }
261 
262    symbol_offset += rgp_shader_data->code_size;
263    uint32_t align = ALIGN(symbol_offset, 256) - symbol_offset;
264    fseek(output, align, SEEK_CUR);
265    *elf_size_calc += align;
266    *text_size = symbol_offset + align;
267 }
268 
269 /*
270  * hardcoded index for string table and text section in elf object.
271  * While populating section header table, the index order should
272  * be strictly followed.
273  */
274 #define RGP_ELF_STRING_TBL_SEC_HEADER_INDEX 1
275 #define RGP_ELF_TEXT_SEC_HEADER_INDEX 2
276 
277 /*
278  * hardcode the string table so that is a single write to output.
279  * the strings are in a structure so that it is easy to get the offset
280  * of given string in string table.
281  */
282 struct ac_rgp_elf_string_table {
283    char null[sizeof("")];
284    char strtab[sizeof(".strtab")];
285    char text[sizeof(".text")];
286    char symtab[sizeof(".symtab")];
287    char note[sizeof(".note")];
288    char vs_main[sizeof("_amdgpu_vs_main")];
289    char ls_main[sizeof("_amdgpu_ls_main")];
290    char hs_main[sizeof("_amdgpu_hs_main")];
291    char es_main[sizeof("_amdgpu_es_main")];
292    char gs_main[sizeof("_amdgpu_gs_main")];
293    char ps_main[sizeof("_amdgpu_ps_main")];
294    char cs_main[sizeof("_amdgpu_cs_main")];
295 };
296 
297 struct ac_rgp_elf_string_table rgp_elf_strtab = {
298    .null = "",
299    .strtab = ".strtab",
300    .text = ".text",
301    .symtab = ".symtab",
302    .note = ".note",
303    .vs_main = "_amdgpu_vs_main",
304    .ls_main = "_amdgpu_ls_main",
305    .hs_main = "_amdgpu_hs_main",
306    .es_main = "_amdgpu_es_main",
307    .gs_main = "_amdgpu_gs_main",
308    .ps_main = "_amdgpu_ps_main",
309    .cs_main = "_amdgpu_cs_main",
310 };
311 
312 uint32_t rgp_elf_hw_stage_string_offset[RGP_HW_STAGE_MAX] = {
313    (uintptr_t)((struct ac_rgp_elf_string_table*)0)->vs_main,
314    (uintptr_t)((struct ac_rgp_elf_string_table*)0)->ls_main,
315    (uintptr_t)((struct ac_rgp_elf_string_table*)0)->hs_main,
316    (uintptr_t)((struct ac_rgp_elf_string_table*)0)->es_main,
317    (uintptr_t)((struct ac_rgp_elf_string_table*)0)->gs_main,
318    (uintptr_t)((struct ac_rgp_elf_string_table*)0)->ps_main,
319    (uintptr_t)((struct ac_rgp_elf_string_table*)0)->cs_main,
320 };
321 
322 
323 static void
ac_rgp_file_write_elf_symbol_table(FILE * output,uint32_t * elf_size_calc,struct rgp_code_object_record * record,uint32_t * symbol_table_size)324 ac_rgp_file_write_elf_symbol_table(FILE *output, uint32_t *elf_size_calc,
325                                    struct rgp_code_object_record *record,
326                                    uint32_t *symbol_table_size)
327 {
328    Elf64_Sym elf_sym;
329    uint32_t i;
330    uint32_t mask = record->shader_stages_mask;
331 
332    memset(&elf_sym, 0x00, sizeof(elf_sym));
333    fwrite(&elf_sym, 1, sizeof(elf_sym), output);
334 
335    while(mask) {
336       i = u_bit_scan(&mask);
337       if (record->shader_data[i].is_combined)
338          continue;
339 
340       elf_sym.st_name = rgp_elf_hw_stage_string_offset
341                         [record->shader_data[i].hw_stage];
342       elf_sym.st_info = STT_FUNC;
343       elf_sym.st_other = 0x0;
344       elf_sym.st_shndx = RGP_ELF_TEXT_SEC_HEADER_INDEX;
345       elf_sym.st_value = record->shader_data[i].elf_symbol_offset;
346       elf_sym.st_size = record->shader_data[i].code_size;
347       fwrite(&elf_sym, 1, sizeof(elf_sym), output);
348    }
349 
350    *symbol_table_size = (record->num_shaders_combined + 1)
351                         * sizeof(elf_sym);
352    *elf_size_calc += *symbol_table_size;
353 }
354 
355 
356 /* Below defines from from llvm project
357  * llvm/includel/llvm/BinaryFormat/ELF.h
358  */
359 #define ELFOSABI_AMDGPU_PAL 65
360 #define NT_AMDGPU_METADATA 32
361 
362 uint8_t elf_ident[EI_NIDENT] = { ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
363                                  ELFCLASS64, ELFDATA2LSB, EV_CURRENT,
364                                  ELFOSABI_AMDGPU_PAL,
365                                  0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
366                                  0x00, 0x00 };
367 
368 #define NOTE_MSGPACK_NAME "AMDGPU"
369 struct ac_rgp_elf_note_msgpack_hdr {
370    Elf64_Nhdr  hdr;
371    char name[sizeof(NOTE_MSGPACK_NAME)];
372 };
373 
374 void
ac_rgp_file_write_elf_object(FILE * output,size_t file_elf_start,struct rgp_code_object_record * record,uint32_t * written_size,uint32_t flags)375 ac_rgp_file_write_elf_object(FILE *output, size_t file_elf_start,
376                              struct rgp_code_object_record *record,
377                              uint32_t *written_size, uint32_t flags)
378 {
379    Elf64_Ehdr elf_hdr;
380    Elf64_Shdr sec_hdr[5];
381    uint32_t elf_size_calc;
382    struct ac_rgp_elf_note_msgpack_hdr note_hdr;
383    uint32_t text_size = 0;
384    uint32_t symbol_table_size = 0;
385    uint32_t msgpack_size = 0;
386    size_t note_sec_start;
387    uint32_t sh_offset;
388 
389    /* Give space for header in file. It will be written to file at the end */
390    fseek(output, sizeof(Elf64_Ehdr), SEEK_CUR);
391 
392    elf_size_calc = sizeof(Elf64_Ehdr);
393 
394    /* Initialize elf header */
395    memcpy(&elf_hdr.e_ident, &elf_ident, EI_NIDENT);
396    elf_hdr.e_type = ET_REL;
397    elf_hdr.e_machine = EM_AMDGPU;
398    elf_hdr.e_version = EV_CURRENT;
399    elf_hdr.e_entry = 0;
400    elf_hdr.e_flags = flags;
401    elf_hdr.e_shstrndx = 1; /* string table entry is hardcoded to 1*/
402    elf_hdr.e_phoff = 0;
403    elf_hdr.e_shentsize = sizeof(Elf64_Shdr);
404    elf_hdr.e_ehsize = sizeof(Elf64_Ehdr);
405    elf_hdr.e_phentsize = 0;
406    elf_hdr.e_phnum = 0;
407 
408    /* write hardcoded string table */
409    fwrite(&rgp_elf_strtab, 1, sizeof(rgp_elf_strtab), output);
410    elf_size_calc += sizeof(rgp_elf_strtab);
411 
412    /* write shader code as .text code */
413    ac_rgp_file_write_elf_text(output, &elf_size_calc, record, &text_size);
414 
415    /* write symbol table */
416    ac_rgp_file_write_elf_symbol_table(output, &elf_size_calc, record,
417                                       &symbol_table_size);
418 
419    /* write .note */
420    /* the .note section contains msgpack which stores variables */
421    note_sec_start = file_elf_start + elf_size_calc;
422    fseek(output, sizeof(struct ac_rgp_elf_note_msgpack_hdr), SEEK_CUR);
423    ac_rgp_write_msgpack(output, record, &msgpack_size);
424    note_hdr.hdr.n_namesz = sizeof(NOTE_MSGPACK_NAME);
425    note_hdr.hdr.n_descsz = msgpack_size;
426    note_hdr.hdr.n_type = NT_AMDGPU_METADATA;
427    memcpy(note_hdr.name, NOTE_MSGPACK_NAME "\0",
428           sizeof(NOTE_MSGPACK_NAME) + 1);
429    fseek(output, note_sec_start, SEEK_SET);
430    fwrite(&note_hdr, 1, sizeof(struct ac_rgp_elf_note_msgpack_hdr), output);
431    fseek(output, 0, SEEK_END);
432    elf_size_calc += (msgpack_size +
433                      sizeof(struct ac_rgp_elf_note_msgpack_hdr));
434 
435    /* write section headers */
436    sh_offset = elf_size_calc;
437    memset(&sec_hdr[0], 0x00, sizeof(Elf64_Shdr) * 5);
438 
439    /* string table must be at index 1 as used in other places*/
440    sec_hdr[1].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->strtab;
441    sec_hdr[1].sh_type = SHT_STRTAB;
442    sec_hdr[1].sh_offset = sizeof(Elf64_Ehdr);
443    sec_hdr[1].sh_size = sizeof(rgp_elf_strtab);
444 
445    /* text must be at index 2 as used in other places*/
446    sec_hdr[2].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->text;
447    sec_hdr[2].sh_type = SHT_PROGBITS;
448    sec_hdr[2].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
449    sec_hdr[2].sh_offset = sec_hdr[1].sh_offset + sec_hdr[1].sh_size;
450    sec_hdr[2].sh_size = text_size;
451    sec_hdr[2].sh_addralign = 256;
452 
453    sec_hdr[3].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->symtab;
454    sec_hdr[3].sh_type = SHT_SYMTAB;
455    sec_hdr[3].sh_offset = sec_hdr[2].sh_offset +
456                           ALIGN(sec_hdr[2].sh_size, 256);
457    sec_hdr[3].sh_size = symbol_table_size;
458    sec_hdr[3].sh_link = RGP_ELF_STRING_TBL_SEC_HEADER_INDEX;
459    sec_hdr[3].sh_addralign = 8;
460    sec_hdr[3].sh_entsize = sizeof(Elf64_Sym);
461 
462    sec_hdr[4].sh_name = (uintptr_t)((struct ac_rgp_elf_string_table*)0)->note;
463    sec_hdr[4].sh_type = SHT_NOTE;
464    sec_hdr[4].sh_offset = sec_hdr[3].sh_offset + sec_hdr[3].sh_size;
465    sec_hdr[4].sh_size = msgpack_size +
466                         sizeof(struct ac_rgp_elf_note_msgpack_hdr);
467    sec_hdr[4].sh_addralign = 4;
468    fwrite(&sec_hdr, 1, sizeof(Elf64_Shdr) * 5, output);
469    elf_size_calc += (sizeof(Elf64_Shdr) * 5);
470 
471    /* update and write elf header */
472    elf_hdr.e_shnum = 5;
473    elf_hdr.e_shoff = sh_offset;
474 
475    fseek(output, file_elf_start, SEEK_SET);
476    fwrite(&elf_hdr, 1, sizeof(Elf64_Ehdr), output);
477    fseek(output, 0, SEEK_END);
478 
479    *written_size = elf_size_calc;
480 }
481