• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors: Tom Stellard <thomas.stellard@amd.com>
24  *
25  * Based on radeon_elf_util.c.
26  */
27 
28 #include "ac_binary.h"
29 
30 #include "util/u_math.h"
31 #include "util/u_memory.h"
32 
33 #include <gelf.h>
34 #include <libelf.h>
35 #include <stdio.h>
36 
37 #include <sid.h>
38 
39 #define SPILLED_SGPRS                                     0x4
40 #define SPILLED_VGPRS                                     0x8
41 
parse_symbol_table(Elf_Data * symbol_table_data,const GElf_Shdr * symbol_table_header,struct ac_shader_binary * binary)42 static void parse_symbol_table(Elf_Data *symbol_table_data,
43 				const GElf_Shdr *symbol_table_header,
44 				struct ac_shader_binary *binary)
45 {
46 	GElf_Sym symbol;
47 	unsigned i = 0;
48 	unsigned symbol_count =
49 		symbol_table_header->sh_size / symbol_table_header->sh_entsize;
50 
51 	/* We are over allocating this list, because symbol_count gives the
52 	 * total number of symbols, and we will only be filling the list
53 	 * with offsets of global symbols.  The memory savings from
54 	 * allocating the correct size of this list will be small, and
55 	 * I don't think it is worth the cost of pre-computing the number
56 	 * of global symbols.
57 	 */
58 	binary->global_symbol_offsets = CALLOC(symbol_count, sizeof(uint64_t));
59 
60 	while (gelf_getsym(symbol_table_data, i++, &symbol)) {
61 		unsigned i;
62 		if (GELF_ST_BIND(symbol.st_info) != STB_GLOBAL ||
63 		    symbol.st_shndx == 0 /* Undefined symbol */) {
64 			continue;
65 		}
66 
67 		binary->global_symbol_offsets[binary->global_symbol_count] =
68 					symbol.st_value;
69 
70 		/* Sort the list using bubble sort.  This list will usually
71 		 * be small. */
72 		for (i = binary->global_symbol_count; i > 0; --i) {
73 			uint64_t lhs = binary->global_symbol_offsets[i - 1];
74 			uint64_t rhs = binary->global_symbol_offsets[i];
75 			if (lhs < rhs) {
76 				break;
77 			}
78 			binary->global_symbol_offsets[i] = lhs;
79 			binary->global_symbol_offsets[i - 1] = rhs;
80 		}
81 		++binary->global_symbol_count;
82 	}
83 }
84 
parse_relocs(Elf * elf,Elf_Data * relocs,Elf_Data * symbols,unsigned symbol_sh_link,struct ac_shader_binary * binary)85 static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
86 			unsigned symbol_sh_link,
87 			struct ac_shader_binary *binary)
88 {
89 	unsigned i;
90 
91 	if (!relocs || !symbols || !binary->reloc_count) {
92 		return;
93 	}
94 	binary->relocs = CALLOC(binary->reloc_count,
95 			sizeof(struct ac_shader_reloc));
96 	for (i = 0; i < binary->reloc_count; i++) {
97 		GElf_Sym symbol;
98 		GElf_Rel rel;
99 		char *symbol_name;
100 		struct ac_shader_reloc *reloc = &binary->relocs[i];
101 
102 		gelf_getrel(relocs, i, &rel);
103 		gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &symbol);
104 		symbol_name = elf_strptr(elf, symbol_sh_link, symbol.st_name);
105 
106 		reloc->offset = rel.r_offset;
107 		strncpy(reloc->name, symbol_name, sizeof(reloc->name)-1);
108 		reloc->name[sizeof(reloc->name)-1] = 0;
109 	}
110 }
111 
ac_elf_read(const char * elf_data,unsigned elf_size,struct ac_shader_binary * binary)112 void ac_elf_read(const char *elf_data, unsigned elf_size,
113 		 struct ac_shader_binary *binary)
114 {
115 	char *elf_buffer;
116 	Elf *elf;
117 	Elf_Scn *section = NULL;
118 	Elf_Data *symbols = NULL, *relocs = NULL;
119 	size_t section_str_index;
120 	unsigned symbol_sh_link = 0;
121 
122 	/* One of the libelf implementations
123 	 * (http://www.mr511.de/software/english.htm) requires calling
124 	 * elf_version() before elf_memory().
125 	 */
126 	elf_version(EV_CURRENT);
127 	elf_buffer = MALLOC(elf_size);
128 	memcpy(elf_buffer, elf_data, elf_size);
129 
130 	elf = elf_memory(elf_buffer, elf_size);
131 
132 	elf_getshdrstrndx(elf, &section_str_index);
133 
134 	while ((section = elf_nextscn(elf, section))) {
135 		const char *name;
136 		Elf_Data *section_data = NULL;
137 		GElf_Shdr section_header;
138 		if (gelf_getshdr(section, &section_header) != &section_header) {
139 			fprintf(stderr, "Failed to read ELF section header\n");
140 			return;
141 		}
142 		name = elf_strptr(elf, section_str_index, section_header.sh_name);
143 		if (!strcmp(name, ".text")) {
144 			section_data = elf_getdata(section, section_data);
145 			binary->code_size = section_data->d_size;
146 			binary->code = MALLOC(binary->code_size * sizeof(unsigned char));
147 			memcpy(binary->code, section_data->d_buf, binary->code_size);
148 		} else if (!strcmp(name, ".AMDGPU.config")) {
149 			section_data = elf_getdata(section, section_data);
150 			binary->config_size = section_data->d_size;
151 			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
152 			memcpy(binary->config, section_data->d_buf, binary->config_size);
153 		} else if (!strcmp(name, ".AMDGPU.disasm")) {
154 			/* Always read disassembly if it's available. */
155 			section_data = elf_getdata(section, section_data);
156 			binary->disasm_string = strndup(section_data->d_buf,
157 							section_data->d_size);
158 		} else if (!strncmp(name, ".rodata", 7)) {
159 			section_data = elf_getdata(section, section_data);
160 			binary->rodata_size = section_data->d_size;
161 			binary->rodata = MALLOC(binary->rodata_size * sizeof(unsigned char));
162 			memcpy(binary->rodata, section_data->d_buf, binary->rodata_size);
163 		} else if (!strncmp(name, ".symtab", 7)) {
164 			symbols = elf_getdata(section, section_data);
165 			symbol_sh_link = section_header.sh_link;
166 			parse_symbol_table(symbols, &section_header, binary);
167 		} else if (!strcmp(name, ".rel.text")) {
168 			relocs = elf_getdata(section, section_data);
169 			binary->reloc_count = section_header.sh_size /
170 					section_header.sh_entsize;
171 		}
172 	}
173 
174 	parse_relocs(elf, relocs, symbols, symbol_sh_link, binary);
175 
176 	if (elf){
177 		elf_end(elf);
178 	}
179 	FREE(elf_buffer);
180 
181 	/* Cache the config size per symbol */
182 	if (binary->global_symbol_count) {
183 		binary->config_size_per_symbol =
184 			binary->config_size / binary->global_symbol_count;
185 	} else {
186 		binary->global_symbol_count = 1;
187 		binary->config_size_per_symbol = binary->config_size;
188 	}
189 }
190 
191 static
ac_shader_binary_config_start(const struct ac_shader_binary * binary,uint64_t symbol_offset)192 const unsigned char *ac_shader_binary_config_start(
193 	const struct ac_shader_binary *binary,
194 	uint64_t symbol_offset)
195 {
196 	unsigned i;
197 	for (i = 0; i < binary->global_symbol_count; ++i) {
198 		if (binary->global_symbol_offsets[i] == symbol_offset) {
199 			unsigned offset = i * binary->config_size_per_symbol;
200 			return binary->config + offset;
201 		}
202 	}
203 	return binary->config;
204 }
205 
206 
207 static const char *scratch_rsrc_dword0_symbol =
208 	"SCRATCH_RSRC_DWORD0";
209 
210 static const char *scratch_rsrc_dword1_symbol =
211 	"SCRATCH_RSRC_DWORD1";
212 
ac_shader_binary_read_config(struct ac_shader_binary * binary,struct ac_shader_config * conf,unsigned symbol_offset)213 void ac_shader_binary_read_config(struct ac_shader_binary *binary,
214 				  struct ac_shader_config *conf,
215 				  unsigned symbol_offset)
216 {
217 	unsigned i;
218 	const unsigned char *config =
219 		ac_shader_binary_config_start(binary, symbol_offset);
220 	bool really_needs_scratch = false;
221 
222 	/* LLVM adds SGPR spills to the scratch size.
223 	 * Find out if we really need the scratch buffer.
224 	 */
225 	for (i = 0; i < binary->reloc_count; i++) {
226 		const struct ac_shader_reloc *reloc = &binary->relocs[i];
227 
228 		if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
229 		    !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
230 			really_needs_scratch = true;
231 			break;
232 		}
233 	}
234 
235 	for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
236 		unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
237 		unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
238 		switch (reg) {
239 		case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
240 		case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
241 		case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
242 		case R_00B848_COMPUTE_PGM_RSRC1:
243 			conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
244 			conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
245 			conf->float_mode =  G_00B028_FLOAT_MODE(value);
246 			break;
247 		case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
248 			conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
249 			break;
250 		case R_00B84C_COMPUTE_PGM_RSRC2:
251 			conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
252 			break;
253 		case R_0286CC_SPI_PS_INPUT_ENA:
254 			conf->spi_ps_input_ena = value;
255 			break;
256 		case R_0286D0_SPI_PS_INPUT_ADDR:
257 			conf->spi_ps_input_addr = value;
258 			break;
259 		case R_0286E8_SPI_TMPRING_SIZE:
260 		case R_00B860_COMPUTE_TMPRING_SIZE:
261 			/* WAVESIZE is in units of 256 dwords. */
262 			if (really_needs_scratch)
263 				conf->scratch_bytes_per_wave =
264 					G_00B860_WAVESIZE(value) * 256 * 4;
265 			break;
266 		case SPILLED_SGPRS:
267 			conf->spilled_sgprs = value;
268 			break;
269 		case SPILLED_VGPRS:
270 			conf->spilled_vgprs = value;
271 			break;
272 		default:
273 			{
274 				static bool printed;
275 
276 				if (!printed) {
277 					fprintf(stderr, "Warning: LLVM emitted unknown "
278 						"config register: 0x%x\n", reg);
279 					printed = true;
280 				}
281 			}
282 			break;
283 		}
284 
285 		if (!conf->spi_ps_input_addr)
286 			conf->spi_ps_input_addr = conf->spi_ps_input_ena;
287 	}
288 }
289