1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/blob.h"
25 #include "util/hash_table.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "nir/nir_serialize.h"
30 #include "anv_private.h"
31 #include "nir/nir_xfb_info.h"
32 #include "vulkan/util/vk_util.h"
33 #include "compiler/spirv/nir_spirv.h"
34 #include "shaders/float64_spv.h"
35
36 static bool
37 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
38 struct blob *blob);
39
40 struct vk_pipeline_cache_object *
41 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
42 const void *key_data, size_t key_size,
43 struct blob_reader *blob);
44
45 static void
anv_shader_bin_destroy(struct vk_device * _device,struct vk_pipeline_cache_object * object)46 anv_shader_bin_destroy(struct vk_device *_device,
47 struct vk_pipeline_cache_object *object)
48 {
49 struct anv_device *device =
50 container_of(_device, struct anv_device, vk);
51
52 struct anv_shader_bin *shader =
53 container_of(object, struct anv_shader_bin, base);
54
55 anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
56 vk_pipeline_cache_object_finish(&shader->base);
57 vk_free(&device->vk.alloc, shader);
58 }
59
60 static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
61 .serialize = anv_shader_bin_serialize,
62 .deserialize = anv_shader_bin_deserialize,
63 .destroy = anv_shader_bin_destroy,
64 };
65
66 const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
67 &anv_shader_bin_ops,
68 NULL
69 };
70
71 struct anv_shader_bin *
anv_shader_bin_create(struct anv_device * device,gl_shader_stage stage,const void * key_data,uint32_t key_size,const void * kernel_data,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_in,uint32_t prog_data_size,const struct brw_compile_stats * stats,uint32_t num_stats,const nir_xfb_info * xfb_info_in,const struct anv_pipeline_bind_map * bind_map,const struct anv_push_descriptor_info * push_desc_info,enum anv_dynamic_push_bits dynamic_push_values)72 anv_shader_bin_create(struct anv_device *device,
73 gl_shader_stage stage,
74 const void *key_data, uint32_t key_size,
75 const void *kernel_data, uint32_t kernel_size,
76 const struct brw_stage_prog_data *prog_data_in,
77 uint32_t prog_data_size,
78 const struct brw_compile_stats *stats, uint32_t num_stats,
79 const nir_xfb_info *xfb_info_in,
80 const struct anv_pipeline_bind_map *bind_map,
81 const struct anv_push_descriptor_info *push_desc_info,
82 enum anv_dynamic_push_bits dynamic_push_values)
83 {
84 VK_MULTIALLOC(ma);
85 VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
86 VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
87 VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
88 prog_data_size);
89 VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
90 prog_data_in->num_relocs);
91 VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
92
93 VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
94 xfb_info_in == NULL ? 0 :
95 nir_xfb_info_size(xfb_info_in->output_count));
96
97 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
98 bind_map->surface_count);
99 VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
100 bind_map->sampler_count);
101 VK_MULTIALLOC_DECL(&ma, struct brw_kernel_arg_desc, kernel_args,
102 bind_map->kernel_arg_count);
103
104 if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
105 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
106 return NULL;
107
108 memcpy(obj_key_data, key_data, key_size);
109 vk_pipeline_cache_object_init(&device->vk, &shader->base,
110 &anv_shader_bin_ops, obj_key_data, key_size);
111
112 shader->stage = stage;
113
114 shader->kernel =
115 anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
116 memcpy(shader->kernel.map, kernel_data, kernel_size);
117 shader->kernel_size = kernel_size;
118
119 uint64_t shader_data_addr =
120 device->physical->va.instruction_state_pool.addr +
121 shader->kernel.offset +
122 prog_data_in->const_data_offset;
123
124 int rv_count = 0;
125 struct brw_shader_reloc_value reloc_values[6];
126 assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
127 assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
128 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
129 .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
130 .value = device->physical->indirect_descriptors ?
131 (device->physical->va.indirect_descriptor_pool.addr >> 32) :
132 (device->physical->va.internal_surface_state_pool.addr >> 32),
133 };
134 assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
135 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
136 .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
137 .value = shader_data_addr,
138 };
139 assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
140 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
141 .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
142 .value = device->physical->va.instruction_state_pool.addr >> 32,
143 };
144 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
145 .id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
146 .value = shader->kernel.offset,
147 };
148 if (brw_shader_stage_is_bindless(stage)) {
149 const struct brw_bs_prog_data *bs_prog_data =
150 brw_bs_prog_data_const(prog_data_in);
151 uint64_t resume_sbt_addr =
152 device->physical->va.instruction_state_pool.addr +
153 shader->kernel.offset +
154 bs_prog_data->resume_sbt_offset;
155 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
156 .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
157 .value = resume_sbt_addr,
158 };
159 reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
160 .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
161 .value = resume_sbt_addr >> 32,
162 };
163 }
164
165 brw_write_shader_relocs(&device->physical->compiler->isa,
166 shader->kernel.map, prog_data_in,
167 reloc_values, rv_count);
168
169 memcpy(prog_data, prog_data_in, prog_data_size);
170 typed_memcpy(prog_data_relocs, prog_data_in->relocs,
171 prog_data_in->num_relocs);
172 prog_data->relocs = prog_data_relocs;
173 memset(prog_data_param, 0,
174 prog_data->nr_params * sizeof(*prog_data_param));
175 prog_data->param = prog_data_param;
176 shader->prog_data = prog_data;
177 shader->prog_data_size = prog_data_size;
178
179 assert(num_stats <= ARRAY_SIZE(shader->stats));
180 typed_memcpy(shader->stats, stats, num_stats);
181 shader->num_stats = num_stats;
182
183 if (xfb_info_in) {
184 *xfb_info = *xfb_info_in;
185 typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
186 xfb_info_in->output_count);
187 shader->xfb_info = xfb_info;
188 } else {
189 shader->xfb_info = NULL;
190 }
191
192 shader->dynamic_push_values = dynamic_push_values;
193
194 typed_memcpy(&shader->push_desc_info, push_desc_info, 1);
195
196 shader->bind_map = *bind_map;
197 typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
198 bind_map->surface_count);
199 shader->bind_map.surface_to_descriptor = surface_to_descriptor;
200 typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
201 bind_map->sampler_count);
202 shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
203 typed_memcpy(kernel_args, bind_map->kernel_args,
204 bind_map->kernel_arg_count);
205 shader->bind_map.kernel_args = kernel_args;
206
207 return shader;
208 }
209
210 static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)211 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
212 struct blob *blob)
213 {
214 struct anv_shader_bin *shader =
215 container_of(object, struct anv_shader_bin, base);
216
217 blob_write_uint32(blob, shader->stage);
218
219 blob_write_uint32(blob, shader->kernel_size);
220 blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
221
222 blob_write_uint32(blob, shader->prog_data_size);
223
224 union brw_any_prog_data prog_data;
225 assert(shader->prog_data_size <= sizeof(prog_data));
226 memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
227 prog_data.base.relocs = NULL;
228 prog_data.base.param = NULL;
229 blob_write_bytes(blob, &prog_data, shader->prog_data_size);
230
231 blob_write_bytes(blob, shader->prog_data->relocs,
232 shader->prog_data->num_relocs *
233 sizeof(shader->prog_data->relocs[0]));
234
235 blob_write_uint32(blob, shader->num_stats);
236 blob_write_bytes(blob, shader->stats,
237 shader->num_stats * sizeof(shader->stats[0]));
238
239 if (shader->xfb_info) {
240 uint32_t xfb_info_size =
241 nir_xfb_info_size(shader->xfb_info->output_count);
242 blob_write_uint32(blob, xfb_info_size);
243 blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
244 } else {
245 blob_write_uint32(blob, 0);
246 }
247
248 blob_write_uint32(blob, shader->dynamic_push_values);
249
250 blob_write_uint32(blob, shader->push_desc_info.used_descriptors);
251 blob_write_uint32(blob, shader->push_desc_info.fully_promoted_ubo_descriptors);
252 blob_write_uint8(blob, shader->push_desc_info.used_set_buffer);
253
254 blob_write_bytes(blob, shader->bind_map.surface_sha1,
255 sizeof(shader->bind_map.surface_sha1));
256 blob_write_bytes(blob, shader->bind_map.sampler_sha1,
257 sizeof(shader->bind_map.sampler_sha1));
258 blob_write_bytes(blob, shader->bind_map.push_sha1,
259 sizeof(shader->bind_map.push_sha1));
260 blob_write_uint32(blob, shader->bind_map.surface_count);
261 blob_write_uint32(blob, shader->bind_map.sampler_count);
262 if (shader->stage == MESA_SHADER_KERNEL) {
263 uint32_t packed = (uint32_t)shader->bind_map.kernel_args_size << 16 |
264 (uint32_t)shader->bind_map.kernel_arg_count;
265 blob_write_uint32(blob, packed);
266 }
267 blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
268 shader->bind_map.surface_count *
269 sizeof(*shader->bind_map.surface_to_descriptor));
270 blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
271 shader->bind_map.sampler_count *
272 sizeof(*shader->bind_map.sampler_to_descriptor));
273 blob_write_bytes(blob, shader->bind_map.kernel_args,
274 shader->bind_map.kernel_arg_count *
275 sizeof(*shader->bind_map.kernel_args));
276 blob_write_bytes(blob, shader->bind_map.push_ranges,
277 sizeof(shader->bind_map.push_ranges));
278
279 return !blob->out_of_memory;
280 }
281
282 struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)283 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
284 const void *key_data, size_t key_size,
285 struct blob_reader *blob)
286 {
287 struct anv_device *device =
288 container_of(cache->base.device, struct anv_device, vk);
289
290 gl_shader_stage stage = blob_read_uint32(blob);
291
292 uint32_t kernel_size = blob_read_uint32(blob);
293 const void *kernel_data = blob_read_bytes(blob, kernel_size);
294
295 uint32_t prog_data_size = blob_read_uint32(blob);
296 const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
297 if (blob->overrun)
298 return NULL;
299
300 union brw_any_prog_data prog_data;
301 memcpy(&prog_data, prog_data_bytes,
302 MIN2(sizeof(prog_data), prog_data_size));
303 prog_data.base.relocs =
304 blob_read_bytes(blob, prog_data.base.num_relocs *
305 sizeof(prog_data.base.relocs[0]));
306
307 uint32_t num_stats = blob_read_uint32(blob);
308 const struct brw_compile_stats *stats =
309 blob_read_bytes(blob, num_stats * sizeof(stats[0]));
310
311 const nir_xfb_info *xfb_info = NULL;
312 uint32_t xfb_size = blob_read_uint32(blob);
313 if (xfb_size)
314 xfb_info = blob_read_bytes(blob, xfb_size);
315
316 enum anv_dynamic_push_bits dynamic_push_values = blob_read_uint32(blob);
317
318 struct anv_push_descriptor_info push_desc_info = {};
319 push_desc_info.used_descriptors = blob_read_uint32(blob);
320 push_desc_info.fully_promoted_ubo_descriptors = blob_read_uint32(blob);
321 push_desc_info.used_set_buffer = blob_read_uint8(blob);
322
323 struct anv_pipeline_bind_map bind_map = {};
324 blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
325 blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
326 blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
327 bind_map.surface_count = blob_read_uint32(blob);
328 bind_map.sampler_count = blob_read_uint32(blob);
329 if (stage == MESA_SHADER_KERNEL) {
330 uint32_t packed = blob_read_uint32(blob);
331 bind_map.kernel_args_size = (uint16_t)(packed >> 16);
332 bind_map.kernel_arg_count = (uint16_t)packed;
333 }
334 bind_map.surface_to_descriptor = (void *)
335 blob_read_bytes(blob, bind_map.surface_count *
336 sizeof(*bind_map.surface_to_descriptor));
337 bind_map.sampler_to_descriptor = (void *)
338 blob_read_bytes(blob, bind_map.sampler_count *
339 sizeof(*bind_map.sampler_to_descriptor));
340 bind_map.kernel_args = (void *)
341 blob_read_bytes(blob, bind_map.kernel_arg_count *
342 sizeof(*bind_map.kernel_args));
343 blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
344
345 if (blob->overrun)
346 return NULL;
347
348 struct anv_shader_bin *shader =
349 anv_shader_bin_create(device, stage,
350 key_data, key_size,
351 kernel_data, kernel_size,
352 &prog_data.base, prog_data_size,
353 stats, num_stats, xfb_info, &bind_map,
354 &push_desc_info,
355 dynamic_push_values);
356 if (shader == NULL)
357 return NULL;
358
359 return &shader->base;
360 }
361
362 struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const void * key_data,uint32_t key_size,bool * user_cache_hit)363 anv_device_search_for_kernel(struct anv_device *device,
364 struct vk_pipeline_cache *cache,
365 const void *key_data, uint32_t key_size,
366 bool *user_cache_hit)
367 {
368 /* Use the default pipeline cache if none is specified */
369 if (cache == NULL)
370 cache = device->default_pipeline_cache;
371
372 bool cache_hit = false;
373 struct vk_pipeline_cache_object *object =
374 vk_pipeline_cache_lookup_object(cache, key_data, key_size,
375 &anv_shader_bin_ops, &cache_hit);
376 if (user_cache_hit != NULL) {
377 *user_cache_hit = object != NULL && cache_hit &&
378 cache != device->default_pipeline_cache;
379 }
380 if (object == NULL)
381 return NULL;
382
383 return container_of(object, struct anv_shader_bin, base);
384 }
385
386 struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const struct anv_shader_upload_params * params)387 anv_device_upload_kernel(struct anv_device *device,
388 struct vk_pipeline_cache *cache,
389 const struct anv_shader_upload_params *params)
390 {
391 /* Use the default pipeline cache if none is specified */
392 if (cache == NULL)
393 cache = device->default_pipeline_cache;
394
395 struct anv_shader_bin *shader =
396 anv_shader_bin_create(device,
397 params->stage,
398 params->key_data,
399 params->key_size,
400 params->kernel_data,
401 params->kernel_size,
402 params->prog_data,
403 params->prog_data_size,
404 params->stats,
405 params->num_stats,
406 params->xfb_info,
407 params->bind_map,
408 params->push_desc_info,
409 params->dynamic_push_values);
410 if (shader == NULL)
411 return NULL;
412
413 struct vk_pipeline_cache_object *cached =
414 vk_pipeline_cache_add_object(cache, &shader->base);
415
416 return container_of(cached, struct anv_shader_bin, base);
417 }
418
419 #define SHA1_KEY_SIZE 20
420
421 struct nir_shader *
anv_device_search_for_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[SHA1_KEY_SIZE],void * mem_ctx)422 anv_device_search_for_nir(struct anv_device *device,
423 struct vk_pipeline_cache *cache,
424 const nir_shader_compiler_options *nir_options,
425 unsigned char sha1_key[SHA1_KEY_SIZE],
426 void *mem_ctx)
427 {
428 if (cache == NULL)
429 cache = device->default_pipeline_cache;
430
431 return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
432 nir_options, NULL, mem_ctx);
433 }
434
435 void
anv_device_upload_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const struct nir_shader * nir,unsigned char sha1_key[SHA1_KEY_SIZE])436 anv_device_upload_nir(struct anv_device *device,
437 struct vk_pipeline_cache *cache,
438 const struct nir_shader *nir,
439 unsigned char sha1_key[SHA1_KEY_SIZE])
440 {
441 if (cache == NULL)
442 cache = device->default_pipeline_cache;
443
444 vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
445 }
446
447 void
anv_load_fp64_shader(struct anv_device * device)448 anv_load_fp64_shader(struct anv_device *device)
449 {
450 const nir_shader_compiler_options *nir_options =
451 device->physical->compiler->nir_options[MESA_SHADER_VERTEX];
452
453 const char* shader_name = "float64_spv_lib";
454 struct mesa_sha1 sha1_ctx;
455 uint8_t sha1[20];
456 _mesa_sha1_init(&sha1_ctx);
457 _mesa_sha1_update(&sha1_ctx, shader_name, strlen(shader_name));
458 _mesa_sha1_final(&sha1_ctx, sha1);
459
460 device->fp64_nir =
461 anv_device_search_for_nir(device, device->internal_cache,
462 nir_options, sha1, NULL);
463
464 /* The shader found, no need to call spirv_to_nir() again. */
465 if (device->fp64_nir)
466 return;
467
468 struct spirv_to_nir_options spirv_options = {
469 .caps = {
470 .address = true,
471 .float64 = true,
472 .int8 = true,
473 .int16 = true,
474 .int64 = true,
475 },
476 .environment = NIR_SPIRV_VULKAN,
477 .create_library = true
478 };
479
480 nir_shader* nir =
481 spirv_to_nir(float64_spv_source, sizeof(float64_spv_source) / 4,
482 NULL, 0, MESA_SHADER_VERTEX, "main",
483 &spirv_options, nir_options);
484
485 assert(nir != NULL);
486
487 nir_validate_shader(nir, "after spirv_to_nir");
488 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
489
490 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
491 NIR_PASS_V(nir, nir_lower_returns);
492 NIR_PASS_V(nir, nir_inline_functions);
493 NIR_PASS_V(nir, nir_opt_deref);
494
495 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
496 NIR_PASS_V(nir, nir_copy_prop);
497 NIR_PASS_V(nir, nir_opt_dce);
498 NIR_PASS_V(nir, nir_opt_cse);
499 NIR_PASS_V(nir, nir_opt_gcm, true);
500 NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
501 NIR_PASS_V(nir, nir_opt_dce);
502
503 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
504 nir_address_format_62bit_generic);
505
506 anv_device_upload_nir(device, device->internal_cache,
507 nir, sha1);
508
509 device->fp64_nir = nir;
510 }
511