1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors: Tom Stellard <thomas.stellard@amd.com>
24 *
25 * Based on radeon_elf_util.c.
26 */
27
28 #include "ac_binary.h"
29
30 #include "util/u_math.h"
31 #include "util/u_memory.h"
32
33 #include <gelf.h>
34 #include <libelf.h>
35 #include <stdio.h>
36
37 #include <sid.h>
38
39 #define SPILLED_SGPRS 0x4
40 #define SPILLED_VGPRS 0x8
41
parse_symbol_table(Elf_Data * symbol_table_data,const GElf_Shdr * symbol_table_header,struct ac_shader_binary * binary)42 static void parse_symbol_table(Elf_Data *symbol_table_data,
43 const GElf_Shdr *symbol_table_header,
44 struct ac_shader_binary *binary)
45 {
46 GElf_Sym symbol;
47 unsigned i = 0;
48 unsigned symbol_count =
49 symbol_table_header->sh_size / symbol_table_header->sh_entsize;
50
51 /* We are over allocating this list, because symbol_count gives the
52 * total number of symbols, and we will only be filling the list
53 * with offsets of global symbols. The memory savings from
54 * allocating the correct size of this list will be small, and
55 * I don't think it is worth the cost of pre-computing the number
56 * of global symbols.
57 */
58 binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
59
60 while (gelf_getsym(symbol_table_data, i++, &symbol)) {
61 unsigned i;
62 if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
63 symbol.st_shndx == 0 /* Undefined symbol */) {
64 continue;
65 }
66
67 binary->global_symbol_offsets[binary->global_symbol_count] =
68 symbol.st_value;
69
70 /* Sort the list using bubble sort. This list will usually
71 * be small. */
72 for (i = binary->global_symbol_count; i > 0; --i) {
73 uint64_t lhs = binary->global_symbol_offsets[i - 1];
74 uint64_t rhs = binary->global_symbol_offsets[i];
75 if (lhs < rhs) {
76 break;
77 }
78 binary->global_symbol_offsets[i] = lhs;
79 binary->global_symbol_offsets[i - 1] = rhs;
80 }
81 ++binary->global_symbol_count;
82 }
83 }
84
parse_relocs(Elf * elf,Elf_Data * relocs,Elf_Data * symbols,unsigned symbol_sh_link,struct ac_shader_binary * binary)85 static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
86 unsigned symbol_sh_link,
87 struct ac_shader_binary *binary)
88 {
89 unsigned i;
90
91 if (!relocs || !symbols || !binary->reloc_count) {
92 return;
93 }
94 binary->relocs = CALLOC(binary->reloc_count,
95 sizeof(struct ac_shader_reloc));
96 for (i = 0; i < binary->reloc_count; i++) {
97 GElf_Sym symbol;
98 GElf_Rel rel;
99 char *symbol_name;
100 struct ac_shader_reloc *reloc = &binary->relocs[i];
101
102 gelf_getrel(relocs, i, &rel);
103 gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
104 symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
105
106 reloc->offset = rel.r_offset;
107 strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
108 reloc->name[sizeof(reloc->name)-1] = 0;
109 }
110 }
111
ac_elf_read(const char * elf_data,unsigned elf_size,struct ac_shader_binary * binary)112 void ac_elf_read(const char *elf_data, unsigned elf_size,
113 struct ac_shader_binary *binary)
114 {
115 char *elf_buffer;
116 Elf *elf;
117 Elf_Scn *section = NULL;
118 Elf_Data *symbols = NULL, *relocs = NULL;
119 size_t section_str_index;
120 unsigned symbol_sh_link = 0;
121
122 /* One of the libelf implementations
123 * (http://www.mr511.de/software/english.htm) requires calling
124 * elf_version() before elf_memory().
125 */
126 elf_version(EV_CURRENT);
127 elf_buffer = MALLOC(elf_size);
128 memcpy(elf_buffer, elf_data, elf_size);
129
130 elf = elf_memory(elf_buffer, elf_size);
131
132 elf_getshdrstrndx(elf, §ion_str_index);
133
134 while ((section = elf_nextscn(elf, section))) {
135 const char *name;
136 Elf_Data *section_data = NULL;
137 GElf_Shdr section_header;
138 if (gelf_getshdr(section, §ion_header) != §ion_header) {
139 fprintf(stderr, "Failed to read ELF section header\n");
140 return;
141 }
142 name = elf_strptr(elf, section_str_index, section_header.sh_name);
143 if (!strcmp(name, ".text")) {
144 section_data = elf_getdata(section, section_data);
145 binary->code_size = section_data->d_size;
146 binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
147 memcpy(binary->code, section_data->d_buf, binary->code_size);
148 } else if (!strcmp(name, ".AMDGPU.config")) {
149 section_data = elf_getdata(section, section_data);
150 binary->config_size = section_data->d_size;
151 binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
152 memcpy(binary->config, section_data->d_buf, binary->config_size);
153 } else if (!strcmp(name, ".AMDGPU.disasm")) {
154 /* Always read disassembly if it's available. */
155 section_data = elf_getdata(section, section_data);
156 binary->disasm_string = strndup(section_data->d_buf,
157 section_data->d_size);
158 } else if (!strncmp(name, ".rodata", 7)) {
159 section_data = elf_getdata(section, section_data);
160 binary->rodata_size = section_data->d_size;
161 binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
162 memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
163 } else if (!strncmp(name, ".symtab", 7)) {
164 symbols = elf_getdata(section, section_data);
165 symbol_sh_link = section_header.sh_link;
166 parse_symbol_table(symbols, §ion_header, binary);
167 } else if (!strcmp(name, ".rel.text")) {
168 relocs = elf_getdata(section, section_data);
169 binary->reloc_count = section_header.sh_size /
170 section_header.sh_entsize;
171 }
172 }
173
174 parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
175
176 if (elf){
177 elf_end(elf);
178 }
179 FREE(elf_buffer);
180
181 /* Cache the config size per symbol */
182 if (binary->global_symbol_count) {
183 binary->config_size_per_symbol =
184 binary->config_size / binary->global_symbol_count;
185 } else {
186 binary->global_symbol_count = 1;
187 binary->config_size_per_symbol = binary->config_size;
188 }
189 }
190
191 static
ac_shader_binary_config_start(const struct ac_shader_binary * binary,uint64_t symbol_offset)192 const unsigned char *ac_shader_binary_config_start(
193 const struct ac_shader_binary *binary,
194 uint64_t symbol_offset)
195 {
196 unsigned i;
197 for (i = 0; i < binary->global_symbol_count; ++i) {
198 if (binary->global_symbol_offsets[i] == symbol_offset) {
199 unsigned offset = i * binary->config_size_per_symbol;
200 return binary->config + offset;
201 }
202 }
203 return binary->config;
204 }
205
206
207 static const char *scratch_rsrc_dword0_symbol =
208 "SCRATCH_RSRC_DWORD0";
209
210 static const char *scratch_rsrc_dword1_symbol =
211 "SCRATCH_RSRC_DWORD1";
212
ac_shader_binary_read_config(struct ac_shader_binary * binary,struct ac_shader_config * conf,unsigned symbol_offset)213 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
214 struct ac_shader_config *conf,
215 unsigned symbol_offset)
216 {
217 unsigned i;
218 const unsigned char *config =
219 ac_shader_binary_config_start(binary, symbol_offset);
220 bool really_needs_scratch = false;
221
222 /* LLVM adds SGPR spills to the scratch size.
223 * Find out if we really need the scratch buffer.
224 */
225 for (i = 0; i < binary->reloc_count; i++) {
226 const struct ac_shader_reloc *reloc = &binary->relocs[i];
227
228 if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
229 !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
230 really_needs_scratch = true;
231 break;
232 }
233 }
234
235 for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
236 unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
237 unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
238 switch (reg) {
239 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
240 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
241 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
242 case R_00B848_COMPUTE_PGM_RSRC1:
243 conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
244 conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
245 conf->float_mode = G_00B028_FLOAT_MODE(value);
246 break;
247 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
248 conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
249 break;
250 case R_00B84C_COMPUTE_PGM_RSRC2:
251 conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
252 break;
253 case R_0286CC_SPI_PS_INPUT_ENA:
254 conf->spi_ps_input_ena = value;
255 break;
256 case R_0286D0_SPI_PS_INPUT_ADDR:
257 conf->spi_ps_input_addr = value;
258 break;
259 case R_0286E8_SPI_TMPRING_SIZE:
260 case R_00B860_COMPUTE_TMPRING_SIZE:
261 /* WAVESIZE is in units of 256 dwords. */
262 if (really_needs_scratch)
263 conf->scratch_bytes_per_wave =
264 G_00B860_WAVESIZE(value) * 256 * 4;
265 break;
266 case SPILLED_SGPRS:
267 conf->spilled_sgprs = value;
268 break;
269 case SPILLED_VGPRS:
270 conf->spilled_vgprs = value;
271 break;
272 default:
273 {
274 static bool printed;
275
276 if (!printed) {
277 fprintf(stderr, "Warning: LLVM emitted unknown "
278 "config register: 0x%x\n", reg);
279 printed = true;
280 }
281 }
282 break;
283 }
284
285 if (!conf->spi_ps_input_addr)
286 conf->spi_ps_input_addr = conf->spi_ps_input_ena;
287 }
288 }
289