• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/blob.h"
25 #include "util/hash_table.h"
26 #include "util/u_debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "nir/nir_serialize.h"
30 #include "anv_private.h"
31 #include "nir/nir_xfb_info.h"
32 #include "vulkan/util/vk_util.h"
33 #include "compiler/spirv/nir_spirv.h"
34 #include "shaders/float64_spv.h"
35 
36 static bool
37 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
38                          struct blob *blob);
39 
40 struct vk_pipeline_cache_object *
41 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
42                            const void *key_data, size_t key_size,
43                            struct blob_reader *blob);
44 
45 static void
anv_shader_bin_destroy(struct vk_device * _device,struct vk_pipeline_cache_object * object)46 anv_shader_bin_destroy(struct vk_device *_device,
47                        struct vk_pipeline_cache_object *object)
48 {
49    struct anv_device *device =
50       container_of(_device, struct anv_device, vk);
51 
52    struct anv_shader_bin *shader =
53       container_of(object, struct anv_shader_bin, base);
54 
55    anv_state_pool_free(&device->instruction_state_pool, shader->kernel);
56    vk_pipeline_cache_object_finish(&shader->base);
57    vk_free(&device->vk.alloc, shader);
58 }
59 
60 static const struct vk_pipeline_cache_object_ops anv_shader_bin_ops = {
61    .serialize = anv_shader_bin_serialize,
62    .deserialize = anv_shader_bin_deserialize,
63    .destroy = anv_shader_bin_destroy,
64 };
65 
66 const struct vk_pipeline_cache_object_ops *const anv_cache_import_ops[2] = {
67    &anv_shader_bin_ops,
68    NULL
69 };
70 
71 struct anv_shader_bin *
anv_shader_bin_create(struct anv_device * device,gl_shader_stage stage,const void * key_data,uint32_t key_size,const void * kernel_data,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_in,uint32_t prog_data_size,const struct brw_compile_stats * stats,uint32_t num_stats,const nir_xfb_info * xfb_info_in,const struct anv_pipeline_bind_map * bind_map,const struct anv_push_descriptor_info * push_desc_info,enum anv_dynamic_push_bits dynamic_push_values)72 anv_shader_bin_create(struct anv_device *device,
73                       gl_shader_stage stage,
74                       const void *key_data, uint32_t key_size,
75                       const void *kernel_data, uint32_t kernel_size,
76                       const struct brw_stage_prog_data *prog_data_in,
77                       uint32_t prog_data_size,
78                       const struct brw_compile_stats *stats, uint32_t num_stats,
79                       const nir_xfb_info *xfb_info_in,
80                       const struct anv_pipeline_bind_map *bind_map,
81                       const struct anv_push_descriptor_info *push_desc_info,
82                       enum anv_dynamic_push_bits dynamic_push_values)
83 {
84    VK_MULTIALLOC(ma);
85    VK_MULTIALLOC_DECL(&ma, struct anv_shader_bin, shader, 1);
86    VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size);
87    VK_MULTIALLOC_DECL_SIZE(&ma, struct brw_stage_prog_data, prog_data,
88                                 prog_data_size);
89    VK_MULTIALLOC_DECL(&ma, struct brw_shader_reloc, prog_data_relocs,
90                            prog_data_in->num_relocs);
91    VK_MULTIALLOC_DECL(&ma, uint32_t, prog_data_param, prog_data_in->nr_params);
92 
93    VK_MULTIALLOC_DECL_SIZE(&ma, nir_xfb_info, xfb_info,
94                                 xfb_info_in == NULL ? 0 :
95                                 nir_xfb_info_size(xfb_info_in->output_count));
96 
97    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, surface_to_descriptor,
98                            bind_map->surface_count);
99    VK_MULTIALLOC_DECL(&ma, struct anv_pipeline_binding, sampler_to_descriptor,
100                       bind_map->sampler_count);
101    VK_MULTIALLOC_DECL(&ma, struct brw_kernel_arg_desc, kernel_args,
102                       bind_map->kernel_arg_count);
103 
104    if (!vk_multialloc_alloc(&ma, &device->vk.alloc,
105                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
106       return NULL;
107 
108    memcpy(obj_key_data, key_data, key_size);
109    vk_pipeline_cache_object_init(&device->vk, &shader->base,
110                                  &anv_shader_bin_ops, obj_key_data, key_size);
111 
112    shader->stage = stage;
113 
114    shader->kernel =
115       anv_state_pool_alloc(&device->instruction_state_pool, kernel_size, 64);
116    memcpy(shader->kernel.map, kernel_data, kernel_size);
117    shader->kernel_size = kernel_size;
118 
119    uint64_t shader_data_addr =
120       device->physical->va.instruction_state_pool.addr +
121       shader->kernel.offset +
122       prog_data_in->const_data_offset;
123 
124    int rv_count = 0;
125    struct brw_shader_reloc_value reloc_values[6];
126    assert((device->physical->va.indirect_descriptor_pool.addr & 0xffffffff) == 0);
127    assert((device->physical->va.internal_surface_state_pool.addr & 0xffffffff) == 0);
128    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
129       .id = BRW_SHADER_RELOC_DESCRIPTORS_ADDR_HIGH,
130       .value = device->physical->indirect_descriptors ?
131                (device->physical->va.indirect_descriptor_pool.addr >> 32) :
132                (device->physical->va.internal_surface_state_pool.addr >> 32),
133    };
134    assert((device->physical->va.instruction_state_pool.addr & 0xffffffff) == 0);
135    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
136       .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW,
137       .value = shader_data_addr,
138    };
139    assert(shader_data_addr >> 32 == device->physical->va.instruction_state_pool.addr >> 32);
140    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
141       .id = BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
142       .value = device->physical->va.instruction_state_pool.addr >> 32,
143    };
144    reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
145       .id = BRW_SHADER_RELOC_SHADER_START_OFFSET,
146       .value = shader->kernel.offset,
147    };
148    if (brw_shader_stage_is_bindless(stage)) {
149       const struct brw_bs_prog_data *bs_prog_data =
150          brw_bs_prog_data_const(prog_data_in);
151       uint64_t resume_sbt_addr =
152          device->physical->va.instruction_state_pool.addr +
153          shader->kernel.offset +
154          bs_prog_data->resume_sbt_offset;
155       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
156          .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_LOW,
157          .value = resume_sbt_addr,
158       };
159       reloc_values[rv_count++] = (struct brw_shader_reloc_value) {
160          .id = BRW_SHADER_RELOC_RESUME_SBT_ADDR_HIGH,
161          .value = resume_sbt_addr >> 32,
162       };
163    }
164 
165    brw_write_shader_relocs(&device->physical->compiler->isa,
166                            shader->kernel.map, prog_data_in,
167                            reloc_values, rv_count);
168 
169    memcpy(prog_data, prog_data_in, prog_data_size);
170    typed_memcpy(prog_data_relocs, prog_data_in->relocs,
171                 prog_data_in->num_relocs);
172    prog_data->relocs = prog_data_relocs;
173    memset(prog_data_param, 0,
174           prog_data->nr_params * sizeof(*prog_data_param));
175    prog_data->param = prog_data_param;
176    shader->prog_data = prog_data;
177    shader->prog_data_size = prog_data_size;
178 
179    assert(num_stats <= ARRAY_SIZE(shader->stats));
180    typed_memcpy(shader->stats, stats, num_stats);
181    shader->num_stats = num_stats;
182 
183    if (xfb_info_in) {
184       *xfb_info = *xfb_info_in;
185       typed_memcpy(xfb_info->outputs, xfb_info_in->outputs,
186                    xfb_info_in->output_count);
187       shader->xfb_info = xfb_info;
188    } else {
189       shader->xfb_info = NULL;
190    }
191 
192    shader->dynamic_push_values = dynamic_push_values;
193 
194    typed_memcpy(&shader->push_desc_info, push_desc_info, 1);
195 
196    shader->bind_map = *bind_map;
197    typed_memcpy(surface_to_descriptor, bind_map->surface_to_descriptor,
198                 bind_map->surface_count);
199    shader->bind_map.surface_to_descriptor = surface_to_descriptor;
200    typed_memcpy(sampler_to_descriptor, bind_map->sampler_to_descriptor,
201                 bind_map->sampler_count);
202    shader->bind_map.sampler_to_descriptor = sampler_to_descriptor;
203    typed_memcpy(kernel_args, bind_map->kernel_args,
204                 bind_map->kernel_arg_count);
205    shader->bind_map.kernel_args = kernel_args;
206 
207    return shader;
208 }
209 
210 static bool
anv_shader_bin_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)211 anv_shader_bin_serialize(struct vk_pipeline_cache_object *object,
212                          struct blob *blob)
213 {
214    struct anv_shader_bin *shader =
215       container_of(object, struct anv_shader_bin, base);
216 
217    blob_write_uint32(blob, shader->stage);
218 
219    blob_write_uint32(blob, shader->kernel_size);
220    blob_write_bytes(blob, shader->kernel.map, shader->kernel_size);
221 
222    blob_write_uint32(blob, shader->prog_data_size);
223 
224    union brw_any_prog_data prog_data;
225    assert(shader->prog_data_size <= sizeof(prog_data));
226    memcpy(&prog_data, shader->prog_data, shader->prog_data_size);
227    prog_data.base.relocs = NULL;
228    prog_data.base.param = NULL;
229    blob_write_bytes(blob, &prog_data, shader->prog_data_size);
230 
231    blob_write_bytes(blob, shader->prog_data->relocs,
232                     shader->prog_data->num_relocs *
233                     sizeof(shader->prog_data->relocs[0]));
234 
235    blob_write_uint32(blob, shader->num_stats);
236    blob_write_bytes(blob, shader->stats,
237                     shader->num_stats * sizeof(shader->stats[0]));
238 
239    if (shader->xfb_info) {
240       uint32_t xfb_info_size =
241          nir_xfb_info_size(shader->xfb_info->output_count);
242       blob_write_uint32(blob, xfb_info_size);
243       blob_write_bytes(blob, shader->xfb_info, xfb_info_size);
244    } else {
245       blob_write_uint32(blob, 0);
246    }
247 
248    blob_write_uint32(blob, shader->dynamic_push_values);
249 
250    blob_write_uint32(blob, shader->push_desc_info.used_descriptors);
251    blob_write_uint32(blob, shader->push_desc_info.fully_promoted_ubo_descriptors);
252    blob_write_uint8(blob, shader->push_desc_info.used_set_buffer);
253 
254    blob_write_bytes(blob, shader->bind_map.surface_sha1,
255                     sizeof(shader->bind_map.surface_sha1));
256    blob_write_bytes(blob, shader->bind_map.sampler_sha1,
257                     sizeof(shader->bind_map.sampler_sha1));
258    blob_write_bytes(blob, shader->bind_map.push_sha1,
259                     sizeof(shader->bind_map.push_sha1));
260    blob_write_uint32(blob, shader->bind_map.surface_count);
261    blob_write_uint32(blob, shader->bind_map.sampler_count);
262    if (shader->stage == MESA_SHADER_KERNEL) {
263       uint32_t packed = (uint32_t)shader->bind_map.kernel_args_size << 16 |
264                         (uint32_t)shader->bind_map.kernel_arg_count;
265       blob_write_uint32(blob, packed);
266    }
267    blob_write_bytes(blob, shader->bind_map.surface_to_descriptor,
268                     shader->bind_map.surface_count *
269                     sizeof(*shader->bind_map.surface_to_descriptor));
270    blob_write_bytes(blob, shader->bind_map.sampler_to_descriptor,
271                     shader->bind_map.sampler_count *
272                     sizeof(*shader->bind_map.sampler_to_descriptor));
273    blob_write_bytes(blob, shader->bind_map.kernel_args,
274                     shader->bind_map.kernel_arg_count *
275                     sizeof(*shader->bind_map.kernel_args));
276    blob_write_bytes(blob, shader->bind_map.push_ranges,
277                     sizeof(shader->bind_map.push_ranges));
278 
279    return !blob->out_of_memory;
280 }
281 
282 struct vk_pipeline_cache_object *
anv_shader_bin_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)283 anv_shader_bin_deserialize(struct vk_pipeline_cache *cache,
284                            const void *key_data, size_t key_size,
285                            struct blob_reader *blob)
286 {
287    struct anv_device *device =
288       container_of(cache->base.device, struct anv_device, vk);
289 
290    gl_shader_stage stage = blob_read_uint32(blob);
291 
292    uint32_t kernel_size = blob_read_uint32(blob);
293    const void *kernel_data = blob_read_bytes(blob, kernel_size);
294 
295    uint32_t prog_data_size = blob_read_uint32(blob);
296    const void *prog_data_bytes = blob_read_bytes(blob, prog_data_size);
297    if (blob->overrun)
298       return NULL;
299 
300    union brw_any_prog_data prog_data;
301    memcpy(&prog_data, prog_data_bytes,
302           MIN2(sizeof(prog_data), prog_data_size));
303    prog_data.base.relocs =
304       blob_read_bytes(blob, prog_data.base.num_relocs *
305                             sizeof(prog_data.base.relocs[0]));
306 
307    uint32_t num_stats = blob_read_uint32(blob);
308    const struct brw_compile_stats *stats =
309       blob_read_bytes(blob, num_stats * sizeof(stats[0]));
310 
311    const nir_xfb_info *xfb_info = NULL;
312    uint32_t xfb_size = blob_read_uint32(blob);
313    if (xfb_size)
314       xfb_info = blob_read_bytes(blob, xfb_size);
315 
316    enum anv_dynamic_push_bits dynamic_push_values = blob_read_uint32(blob);
317 
318    struct anv_push_descriptor_info push_desc_info = {};
319    push_desc_info.used_descriptors = blob_read_uint32(blob);
320    push_desc_info.fully_promoted_ubo_descriptors = blob_read_uint32(blob);
321    push_desc_info.used_set_buffer = blob_read_uint8(blob);
322 
323    struct anv_pipeline_bind_map bind_map = {};
324    blob_copy_bytes(blob, bind_map.surface_sha1, sizeof(bind_map.surface_sha1));
325    blob_copy_bytes(blob, bind_map.sampler_sha1, sizeof(bind_map.sampler_sha1));
326    blob_copy_bytes(blob, bind_map.push_sha1, sizeof(bind_map.push_sha1));
327    bind_map.surface_count = blob_read_uint32(blob);
328    bind_map.sampler_count = blob_read_uint32(blob);
329    if (stage == MESA_SHADER_KERNEL) {
330       uint32_t packed = blob_read_uint32(blob);
331       bind_map.kernel_args_size = (uint16_t)(packed >> 16);
332       bind_map.kernel_arg_count = (uint16_t)packed;
333    }
334    bind_map.surface_to_descriptor = (void *)
335       blob_read_bytes(blob, bind_map.surface_count *
336                             sizeof(*bind_map.surface_to_descriptor));
337    bind_map.sampler_to_descriptor = (void *)
338       blob_read_bytes(blob, bind_map.sampler_count *
339                             sizeof(*bind_map.sampler_to_descriptor));
340    bind_map.kernel_args = (void *)
341       blob_read_bytes(blob, bind_map.kernel_arg_count *
342                             sizeof(*bind_map.kernel_args));
343    blob_copy_bytes(blob, bind_map.push_ranges, sizeof(bind_map.push_ranges));
344 
345    if (blob->overrun)
346       return NULL;
347 
348    struct anv_shader_bin *shader =
349       anv_shader_bin_create(device, stage,
350                             key_data, key_size,
351                             kernel_data, kernel_size,
352                             &prog_data.base, prog_data_size,
353                             stats, num_stats, xfb_info, &bind_map,
354                             &push_desc_info,
355                             dynamic_push_values);
356    if (shader == NULL)
357       return NULL;
358 
359    return &shader->base;
360 }
361 
362 struct anv_shader_bin *
anv_device_search_for_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const void * key_data,uint32_t key_size,bool * user_cache_hit)363 anv_device_search_for_kernel(struct anv_device *device,
364                              struct vk_pipeline_cache *cache,
365                              const void *key_data, uint32_t key_size,
366                              bool *user_cache_hit)
367 {
368    /* Use the default pipeline cache if none is specified */
369    if (cache == NULL)
370       cache = device->default_pipeline_cache;
371 
372    bool cache_hit = false;
373    struct vk_pipeline_cache_object *object =
374       vk_pipeline_cache_lookup_object(cache, key_data, key_size,
375                                       &anv_shader_bin_ops, &cache_hit);
376    if (user_cache_hit != NULL) {
377       *user_cache_hit = object != NULL && cache_hit &&
378                         cache != device->default_pipeline_cache;
379    }
380    if (object == NULL)
381       return NULL;
382 
383    return container_of(object, struct anv_shader_bin, base);
384 }
385 
386 struct anv_shader_bin *
anv_device_upload_kernel(struct anv_device * device,struct vk_pipeline_cache * cache,const struct anv_shader_upload_params * params)387 anv_device_upload_kernel(struct anv_device *device,
388                          struct vk_pipeline_cache *cache,
389                          const struct anv_shader_upload_params *params)
390 {
391    /* Use the default pipeline cache if none is specified */
392    if (cache == NULL)
393       cache = device->default_pipeline_cache;
394 
395    struct anv_shader_bin *shader =
396       anv_shader_bin_create(device,
397                             params->stage,
398                             params->key_data,
399                             params->key_size,
400                             params->kernel_data,
401                             params->kernel_size,
402                             params->prog_data,
403                             params->prog_data_size,
404                             params->stats,
405                             params->num_stats,
406                             params->xfb_info,
407                             params->bind_map,
408                             params->push_desc_info,
409                             params->dynamic_push_values);
410    if (shader == NULL)
411       return NULL;
412 
413    struct vk_pipeline_cache_object *cached =
414       vk_pipeline_cache_add_object(cache, &shader->base);
415 
416    return container_of(cached, struct anv_shader_bin, base);
417 }
418 
419 #define SHA1_KEY_SIZE 20
420 
421 struct nir_shader *
anv_device_search_for_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[SHA1_KEY_SIZE],void * mem_ctx)422 anv_device_search_for_nir(struct anv_device *device,
423                           struct vk_pipeline_cache *cache,
424                           const nir_shader_compiler_options *nir_options,
425                           unsigned char sha1_key[SHA1_KEY_SIZE],
426                           void *mem_ctx)
427 {
428    if (cache == NULL)
429       cache = device->default_pipeline_cache;
430 
431    return vk_pipeline_cache_lookup_nir(cache, sha1_key, SHA1_KEY_SIZE,
432                                        nir_options, NULL, mem_ctx);
433 }
434 
435 void
anv_device_upload_nir(struct anv_device * device,struct vk_pipeline_cache * cache,const struct nir_shader * nir,unsigned char sha1_key[SHA1_KEY_SIZE])436 anv_device_upload_nir(struct anv_device *device,
437                       struct vk_pipeline_cache *cache,
438                       const struct nir_shader *nir,
439                       unsigned char sha1_key[SHA1_KEY_SIZE])
440 {
441    if (cache == NULL)
442       cache = device->default_pipeline_cache;
443 
444    vk_pipeline_cache_add_nir(cache, sha1_key, SHA1_KEY_SIZE, nir);
445 }
446 
447 void
anv_load_fp64_shader(struct anv_device * device)448 anv_load_fp64_shader(struct anv_device *device)
449 {
450    const nir_shader_compiler_options *nir_options =
451       device->physical->compiler->nir_options[MESA_SHADER_VERTEX];
452 
453    const char* shader_name = "float64_spv_lib";
454    struct mesa_sha1 sha1_ctx;
455    uint8_t sha1[20];
456    _mesa_sha1_init(&sha1_ctx);
457    _mesa_sha1_update(&sha1_ctx, shader_name, strlen(shader_name));
458    _mesa_sha1_final(&sha1_ctx, sha1);
459 
460    device->fp64_nir =
461       anv_device_search_for_nir(device, device->internal_cache,
462                                    nir_options, sha1, NULL);
463 
464    /* The shader found, no need to call spirv_to_nir() again. */
465    if (device->fp64_nir)
466       return;
467 
468    struct spirv_to_nir_options spirv_options = {
469       .caps = {
470          .address = true,
471          .float64 = true,
472          .int8 = true,
473          .int16 = true,
474          .int64 = true,
475       },
476       .environment = NIR_SPIRV_VULKAN,
477       .create_library = true
478    };
479 
480    nir_shader* nir =
481       spirv_to_nir(float64_spv_source, sizeof(float64_spv_source) / 4,
482                    NULL, 0, MESA_SHADER_VERTEX, "main",
483                    &spirv_options, nir_options);
484 
485    assert(nir != NULL);
486 
487    nir_validate_shader(nir, "after spirv_to_nir");
488    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
489 
490    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
491    NIR_PASS_V(nir, nir_lower_returns);
492    NIR_PASS_V(nir, nir_inline_functions);
493    NIR_PASS_V(nir, nir_opt_deref);
494 
495    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
496    NIR_PASS_V(nir, nir_copy_prop);
497    NIR_PASS_V(nir, nir_opt_dce);
498    NIR_PASS_V(nir, nir_opt_cse);
499    NIR_PASS_V(nir, nir_opt_gcm, true);
500    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
501    NIR_PASS_V(nir, nir_opt_dce);
502 
503    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_function_temp,
504               nir_address_format_62bit_generic);
505 
506    anv_device_upload_nir(device, device->internal_cache,
507                          nir, sha1);
508 
509    device->fp64_nir = nir;
510 }
511