1 /*
2 * Copyright (c) 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <stdint.h>
25
26 #include "compiler/nir/nir_serialize.h"
27 #include "util/build_id.h"
28 #include "util/mesa-sha1.h"
29
30 #include "brw_context.h"
31 #include "brw_program.h"
32 #include "brw_state.h"
33
34 static uint8_t driver_sha1[20];
35
36 void
brw_program_binary_init(unsigned device_id)37 brw_program_binary_init(unsigned device_id)
38 {
39 const struct build_id_note *note =
40 build_id_find_nhdr_for_addr(brw_program_binary_init);
41 assert(note);
42
43 /**
44 * With Mesa's megadrivers, taking the sha1 of i965_dri.so may not be
45 * unique. Therefore, we make a sha1 of the "i965" string and the sha1
46 * build id from i965_dri.so.
47 */
48 struct mesa_sha1 ctx;
49 _mesa_sha1_init(&ctx);
50 char renderer[10];
51 assert(device_id < 0x10000);
52 int len = snprintf(renderer, sizeof(renderer), "i965_%04x", device_id);
53 assert(len == sizeof(renderer) - 1);
54 _mesa_sha1_update(&ctx, renderer, len);
55 _mesa_sha1_update(&ctx, build_id_data(note), build_id_length(note));
56 _mesa_sha1_final(&ctx, driver_sha1);
57 }
58
59 void
brw_get_program_binary_driver_sha1(struct gl_context * ctx,uint8_t * sha1)60 brw_get_program_binary_driver_sha1(struct gl_context *ctx, uint8_t *sha1)
61 {
62 memcpy(sha1, driver_sha1, sizeof(uint8_t) * 20);
63 }
64
65 enum driver_cache_blob_part {
66 END_PART,
67 INTEL_PART,
68 NIR_PART,
69 };
70
71 static bool
blob_parts_valid(void * blob,uint32_t size)72 blob_parts_valid(void *blob, uint32_t size)
73 {
74 struct blob_reader reader;
75 blob_reader_init(&reader, blob, size);
76
77 do {
78 uint32_t part_type = blob_read_uint32(&reader);
79 if (reader.overrun)
80 return false;
81 if (part_type == END_PART)
82 return reader.current == reader.end;
83 switch ((enum driver_cache_blob_part)part_type) {
84 case INTEL_PART:
85 case NIR_PART:
86 /* Read the uint32_t part-size and skip over it */
87 blob_skip_bytes(&reader, blob_read_uint32(&reader));
88 if (reader.overrun)
89 return false;
90 break;
91 default:
92 return false;
93 }
94 } while (true);
95 }
96
97 static bool
blob_has_part(void * blob,uint32_t size,enum driver_cache_blob_part part)98 blob_has_part(void *blob, uint32_t size, enum driver_cache_blob_part part)
99 {
100 struct blob_reader reader;
101 blob_reader_init(&reader, blob, size);
102
103 assert(blob_parts_valid(blob, size));
104 do {
105 uint32_t part_type = blob_read_uint32(&reader);
106 if (part_type == END_PART)
107 return false;
108 if (part_type == part)
109 return true;
110 blob_skip_bytes(&reader, blob_read_uint32(&reader));
111 } while (true);
112 }
113
114 static bool
driver_blob_is_ready(void * blob,uint32_t size,bool with_intel_program)115 driver_blob_is_ready(void *blob, uint32_t size, bool with_intel_program)
116 {
117 if (!blob) {
118 return false;
119 } else if (!blob_parts_valid(blob, size)) {
120 unreachable("Driver blob format is bad!");
121 return false;
122 } else if (blob_has_part(blob, size, INTEL_PART) == with_intel_program) {
123 return true;
124 } else {
125 return false;
126 }
127 }
128
129 static void
serialize_nir_part(struct blob * writer,struct gl_program * prog)130 serialize_nir_part(struct blob *writer, struct gl_program *prog)
131 {
132 blob_write_uint32(writer, NIR_PART);
133 intptr_t size_offset = blob_reserve_uint32(writer);
134 size_t nir_start = writer->size;
135 nir_serialize(writer, prog->nir, false);
136 blob_overwrite_uint32(writer, size_offset, writer->size - nir_start);
137 }
138
139 void
brw_program_serialize_nir(struct gl_context * ctx,struct gl_program * prog)140 brw_program_serialize_nir(struct gl_context *ctx, struct gl_program *prog)
141 {
142 if (driver_blob_is_ready(prog->driver_cache_blob,
143 prog->driver_cache_blob_size, false))
144 return;
145
146 if (prog->driver_cache_blob)
147 ralloc_free(prog->driver_cache_blob);
148
149 struct blob writer;
150 blob_init(&writer);
151 serialize_nir_part(&writer, prog);
152 blob_write_uint32(&writer, END_PART);
153 prog->driver_cache_blob = ralloc_size(NULL, writer.size);
154 memcpy(prog->driver_cache_blob, writer.data, writer.size);
155 prog->driver_cache_blob_size = writer.size;
156 blob_finish(&writer);
157 }
158
159 static bool
deserialize_intel_program(struct blob_reader * reader,struct gl_context * ctx,struct gl_program * prog,gl_shader_stage stage)160 deserialize_intel_program(struct blob_reader *reader, struct gl_context *ctx,
161 struct gl_program *prog, gl_shader_stage stage)
162 {
163 struct brw_context *brw = brw_context(ctx);
164
165 union brw_any_prog_key prog_key;
166 blob_copy_bytes(reader, &prog_key, brw_prog_key_size(stage));
167 prog_key.base.program_string_id = brw_program(prog)->id;
168
169 enum brw_cache_id cache_id = brw_stage_cache_id(stage);
170
171 const uint8_t *program;
172 struct brw_stage_prog_data *prog_data =
173 ralloc_size(NULL, sizeof(union brw_any_prog_data));
174
175 if (!brw_read_blob_program_data(reader, prog, stage, &program, prog_data)) {
176 ralloc_free(prog_data);
177 return false;
178 }
179
180 uint32_t offset;
181 void *out_prog_data;
182 brw_upload_cache(&brw->cache, cache_id, &prog_key, brw_prog_key_size(stage),
183 program, prog_data->program_size, prog_data,
184 brw_prog_data_size(stage), &offset, &out_prog_data);
185
186 ralloc_free(prog_data);
187
188 return true;
189 }
190
191 void
brw_program_deserialize_driver_blob(struct gl_context * ctx,struct gl_program * prog,gl_shader_stage stage)192 brw_program_deserialize_driver_blob(struct gl_context *ctx,
193 struct gl_program *prog,
194 gl_shader_stage stage)
195 {
196 if (!prog->driver_cache_blob)
197 return;
198
199 struct blob_reader reader;
200 blob_reader_init(&reader, prog->driver_cache_blob,
201 prog->driver_cache_blob_size);
202
203 do {
204 uint32_t part_type = blob_read_uint32(&reader);
205 if ((enum driver_cache_blob_part)part_type == END_PART)
206 break;
207 switch ((enum driver_cache_blob_part)part_type) {
208 case INTEL_PART: {
209 ASSERTED uint32_t gen_size = blob_read_uint32(&reader);
210 assert(!reader.overrun &&
211 (uintptr_t)(reader.end - reader.current) > gen_size);
212 deserialize_intel_program(&reader, ctx, prog, stage);
213 break;
214 }
215 case NIR_PART: {
216 ASSERTED uint32_t nir_size = blob_read_uint32(&reader);
217 assert(!reader.overrun &&
218 (uintptr_t)(reader.end - reader.current) > nir_size);
219 const struct nir_shader_compiler_options *options =
220 ctx->Const.ShaderCompilerOptions[stage].NirOptions;
221 prog->nir = nir_deserialize(NULL, options, &reader);
222 break;
223 }
224 default:
225 unreachable("Unsupported blob part type!");
226 break;
227 }
228 } while (true);
229
230 ralloc_free(prog->driver_cache_blob);
231 prog->driver_cache_blob = NULL;
232 prog->driver_cache_blob_size = 0;
233 }
234
235 /* This is just a wrapper around brw_program_deserialize_nir() as i965
236 * doesn't need gl_shader_program like other drivers do.
237 */
238 void
brw_deserialize_program_binary(struct gl_context * ctx,struct gl_shader_program * shProg,struct gl_program * prog)239 brw_deserialize_program_binary(struct gl_context *ctx,
240 struct gl_shader_program *shProg,
241 struct gl_program *prog)
242 {
243 brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
244 }
245
246 static void
serialize_intel_part(struct blob * writer,struct gl_context * ctx,struct gl_shader_program * sh_prog,struct gl_program * prog)247 serialize_intel_part(struct blob *writer, struct gl_context *ctx,
248 struct gl_shader_program *sh_prog,
249 struct gl_program *prog)
250 {
251 struct brw_context *brw = brw_context(ctx);
252
253 union brw_any_prog_key key;
254 brw_populate_default_key(brw->screen->compiler, &key, sh_prog, prog);
255
256 const gl_shader_stage stage = prog->info.stage;
257 uint32_t offset = 0;
258 void *prog_data = NULL;
259 if (brw_search_cache(&brw->cache, brw_stage_cache_id(stage), &key,
260 brw_prog_key_size(stage), &offset, &prog_data,
261 false)) {
262 const void *program_map = brw->cache.map + offset;
263 /* TODO: Improve perf for non-LLC. It would be best to save it at
264 * program generation time when the program is in normal memory
265 * accessible with cache to the CPU. Another easier change would be to
266 * use _mesa_streaming_load_memcpy to read from the program mapped
267 * memory.
268 */
269 blob_write_uint32(writer, INTEL_PART);
270 intptr_t size_offset = blob_reserve_uint32(writer);
271 size_t gen_start = writer->size;
272 blob_write_bytes(writer, &key, brw_prog_key_size(stage));
273 brw_write_blob_program_data(writer, stage, program_map, prog_data);
274 blob_overwrite_uint32(writer, size_offset, writer->size - gen_start);
275 }
276 }
277
278 void
brw_serialize_program_binary(struct gl_context * ctx,struct gl_shader_program * sh_prog,struct gl_program * prog)279 brw_serialize_program_binary(struct gl_context *ctx,
280 struct gl_shader_program *sh_prog,
281 struct gl_program *prog)
282 {
283 if (driver_blob_is_ready(prog->driver_cache_blob,
284 prog->driver_cache_blob_size, true))
285 return;
286
287 if (prog->driver_cache_blob) {
288 if (!prog->nir) {
289 /* If we loaded from the disk shader cache, then the nir might not
290 * have been deserialized yet.
291 */
292 brw_program_deserialize_driver_blob(ctx, prog, prog->info.stage);
293 }
294 ralloc_free(prog->driver_cache_blob);
295 }
296
297 struct blob writer;
298 blob_init(&writer);
299 serialize_nir_part(&writer, prog);
300 serialize_intel_part(&writer, ctx, sh_prog, prog);
301 blob_write_uint32(&writer, END_PART);
302 prog->driver_cache_blob = ralloc_size(NULL, writer.size);
303 memcpy(prog->driver_cache_blob, writer.data, writer.size);
304 prog->driver_cache_blob_size = writer.size;
305 blob_finish(&writer);
306 }
307
308 void
brw_write_blob_program_data(struct blob * binary,gl_shader_stage stage,const void * program,struct brw_stage_prog_data * prog_data)309 brw_write_blob_program_data(struct blob *binary, gl_shader_stage stage,
310 const void *program,
311 struct brw_stage_prog_data *prog_data)
312 {
313 /* Write prog_data to blob. */
314 blob_write_bytes(binary, prog_data, brw_prog_data_size(stage));
315
316 /* Write program to blob. */
317 blob_write_bytes(binary, program, prog_data->program_size);
318
319 /* Write push params */
320 blob_write_bytes(binary, prog_data->param,
321 sizeof(uint32_t) * prog_data->nr_params);
322
323 /* Write pull params */
324 blob_write_bytes(binary, prog_data->pull_param,
325 sizeof(uint32_t) * prog_data->nr_pull_params);
326 }
327
328 bool
brw_read_blob_program_data(struct blob_reader * binary,struct gl_program * prog,gl_shader_stage stage,const uint8_t ** program,struct brw_stage_prog_data * prog_data)329 brw_read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
330 gl_shader_stage stage, const uint8_t **program,
331 struct brw_stage_prog_data *prog_data)
332 {
333 /* Read shader prog_data from blob. */
334 blob_copy_bytes(binary, prog_data, brw_prog_data_size(stage));
335 if (binary->overrun)
336 return false;
337
338 /* Read shader program from blob. */
339 *program = blob_read_bytes(binary, prog_data->program_size);
340
341 /* Read push params */
342 prog_data->param = rzalloc_array(NULL, uint32_t, prog_data->nr_params);
343 blob_copy_bytes(binary, prog_data->param,
344 sizeof(uint32_t) * prog_data->nr_params);
345
346 /* Read pull params */
347 prog_data->pull_param = rzalloc_array(NULL, uint32_t,
348 prog_data->nr_pull_params);
349 blob_copy_bytes(binary, prog_data->pull_param,
350 sizeof(uint32_t) * prog_data->nr_pull_params);
351
352 return !binary->overrun;
353 }
354