• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/debug.h"
25 #include "util/disk_cache.h"
26 #include "util/macros.h"
27 #include "util/mesa-sha1.h"
28 #include "util/u_atomic.h"
29 #include "vulkan/util/vk_util.h"
30 #include "radv_debug.h"
31 #include "radv_private.h"
32 #include "radv_shader.h"
33 #include "aco_interface.h"
34 
35 struct cache_entry {
36    union {
37       unsigned char sha1[20];
38       uint32_t sha1_dw[5];
39    };
40    uint32_t binary_sizes[MESA_VULKAN_SHADER_STAGES];
41    uint32_t num_stack_sizes;
42    struct radv_shader *shaders[MESA_VULKAN_SHADER_STAGES];
43    struct radv_pipeline_slab *slab;
44    char code[0];
45 };
46 
47 static void
radv_pipeline_cache_lock(struct radv_pipeline_cache * cache)48 radv_pipeline_cache_lock(struct radv_pipeline_cache *cache)
49 {
50    if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT)
51       return;
52 
53    mtx_lock(&cache->mutex);
54 }
55 
56 static void
radv_pipeline_cache_unlock(struct radv_pipeline_cache * cache)57 radv_pipeline_cache_unlock(struct radv_pipeline_cache *cache)
58 {
59    if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT)
60       return;
61 
62    mtx_unlock(&cache->mutex);
63 }
64 
65 static bool
radv_is_cache_disabled(struct radv_device * device)66 radv_is_cache_disabled(struct radv_device *device)
67 {
68    /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and
69     * when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere.
70     */
71    return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
72           (device->physical_device->use_llvm ? 0 : aco_get_codegen_flags());
73 }
74 
75 void
radv_pipeline_cache_init(struct radv_pipeline_cache * cache,struct radv_device * device)76 radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device)
77 {
78    vk_object_base_init(&device->vk, &cache->base, VK_OBJECT_TYPE_PIPELINE_CACHE);
79 
80    cache->device = device;
81    mtx_init(&cache->mutex, mtx_plain);
82    cache->flags = 0;
83 
84    cache->modified = false;
85    cache->kernel_count = 0;
86    cache->total_size = 0;
87    cache->table_size = 1024;
88    const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
89    cache->hash_table = malloc(byte_size);
90 
91    /* We don't consider allocation failure fatal, we just start with a 0-sized
92     * cache. Disable caching when we want to keep shader debug info, since
93     * we don't get the debug info on cached shaders. */
94    if (cache->hash_table == NULL || radv_is_cache_disabled(device))
95       cache->table_size = 0;
96    else
97       memset(cache->hash_table, 0, byte_size);
98 }
99 
100 void
radv_pipeline_cache_finish(struct radv_pipeline_cache * cache)101 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
102 {
103    for (unsigned i = 0; i < cache->table_size; ++i)
104       if (cache->hash_table[i]) {
105          for (int j = 0; j < MESA_VULKAN_SHADER_STAGES; ++j) {
106             if (cache->hash_table[i]->shaders[j])
107                radv_shader_destroy(cache->device, cache->hash_table[i]->shaders[j]);
108          }
109          if (cache->hash_table[i]->slab)
110             radv_pipeline_slab_destroy(cache->device, cache->hash_table[i]->slab);
111          vk_free(&cache->alloc, cache->hash_table[i]);
112       }
113    mtx_destroy(&cache->mutex);
114    free(cache->hash_table);
115 
116    vk_object_base_finish(&cache->base);
117 }
118 
119 static uint32_t
entry_size(struct cache_entry * entry)120 entry_size(struct cache_entry *entry)
121 {
122    size_t ret = sizeof(*entry);
123    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
124       if (entry->binary_sizes[i])
125          ret += entry->binary_sizes[i];
126    ret += sizeof(struct radv_pipeline_shader_stack_size) * entry->num_stack_sizes;
127    ret = align(ret, alignof(struct cache_entry));
128    return ret;
129 }
130 
131 void
radv_hash_shaders(unsigned char * hash,const struct radv_pipeline_stage * stages,const struct radv_pipeline_layout * layout,const struct radv_pipeline_key * key,uint32_t flags)132 radv_hash_shaders(unsigned char *hash, const struct radv_pipeline_stage *stages,
133                   const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key,
134                   uint32_t flags)
135 {
136    struct mesa_sha1 ctx;
137 
138    _mesa_sha1_init(&ctx);
139    if (key)
140       _mesa_sha1_update(&ctx, key, sizeof(*key));
141    if (layout)
142       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
143 
144    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
145       if (!stages[s].entrypoint)
146          continue;
147 
148       _mesa_sha1_update(&ctx, stages[s].shader_sha1, sizeof(stages[s].shader_sha1));
149    }
150    _mesa_sha1_update(&ctx, &flags, 4);
151    _mesa_sha1_final(&ctx, hash);
152 }
153 
154 void
radv_hash_rt_shaders(unsigned char * hash,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,uint32_t flags)155 radv_hash_rt_shaders(unsigned char *hash, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
156                      uint32_t flags)
157 {
158    RADV_FROM_HANDLE(radv_pipeline_layout, layout, pCreateInfo->layout);
159    struct mesa_sha1 ctx;
160 
161    _mesa_sha1_init(&ctx);
162    if (layout)
163       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
164 
165    for (uint32_t i = 0; i < pCreateInfo->stageCount; ++i) {
166       RADV_FROM_HANDLE(vk_shader_module, module, pCreateInfo->pStages[i].module);
167       const VkSpecializationInfo *spec_info = pCreateInfo->pStages[i].pSpecializationInfo;
168 
169       const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
170          vk_find_struct_const(pCreateInfo->pStages[i].pNext,
171                PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
172 
173       if (module) {
174          _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
175       } else {
176          assert(iinfo);
177          assert(iinfo->identifierSize <= VK_MAX_SHADER_MODULE_IDENTIFIER_SIZE_EXT);
178          _mesa_sha1_update(&ctx, iinfo->pIdentifier, iinfo->identifierSize);
179       }
180 
181       _mesa_sha1_update(&ctx, pCreateInfo->pStages[i].pName, strlen(pCreateInfo->pStages[i].pName));
182       if (spec_info && spec_info->mapEntryCount) {
183          _mesa_sha1_update(&ctx, spec_info->pMapEntries,
184                            spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
185          _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
186       }
187    }
188 
189    for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
190       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].type,
191                         sizeof(pCreateInfo->pGroups[i].type));
192       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].generalShader,
193                         sizeof(pCreateInfo->pGroups[i].generalShader));
194       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].anyHitShader,
195                         sizeof(pCreateInfo->pGroups[i].anyHitShader));
196       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].closestHitShader,
197                         sizeof(pCreateInfo->pGroups[i].closestHitShader));
198       _mesa_sha1_update(&ctx, &pCreateInfo->pGroups[i].intersectionShader,
199                         sizeof(pCreateInfo->pGroups[i].intersectionShader));
200    }
201 
202    if (!radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo))
203       _mesa_sha1_update(&ctx, &pCreateInfo->maxPipelineRayRecursionDepth, 4);
204    _mesa_sha1_update(&ctx, &flags, 4);
205    _mesa_sha1_final(&ctx, hash);
206 }
207 
208 static struct cache_entry *
radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache * cache,const unsigned char * sha1)209 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, const unsigned char *sha1)
210 {
211    const uint32_t mask = cache->table_size - 1;
212    const uint32_t start = (*(uint32_t *)sha1);
213 
214    if (cache->table_size == 0)
215       return NULL;
216 
217    for (uint32_t i = 0; i < cache->table_size; i++) {
218       const uint32_t index = (start + i) & mask;
219       struct cache_entry *entry = cache->hash_table[index];
220 
221       if (!entry)
222          return NULL;
223 
224       if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
225          return entry;
226       }
227    }
228 
229    unreachable("hash table should never be full");
230 }
231 
232 static struct cache_entry *
radv_pipeline_cache_search(struct radv_pipeline_cache * cache,const unsigned char * sha1)233 radv_pipeline_cache_search(struct radv_pipeline_cache *cache, const unsigned char *sha1)
234 {
235    struct cache_entry *entry;
236 
237    radv_pipeline_cache_lock(cache);
238 
239    entry = radv_pipeline_cache_search_unlocked(cache, sha1);
240 
241    radv_pipeline_cache_unlock(cache);
242 
243    return entry;
244 }
245 
246 static void
radv_pipeline_cache_set_entry(struct radv_pipeline_cache * cache,struct cache_entry * entry)247 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)
248 {
249    const uint32_t mask = cache->table_size - 1;
250    const uint32_t start = entry->sha1_dw[0];
251 
252    /* We'll always be able to insert when we get here. */
253    assert(cache->kernel_count < cache->table_size / 2);
254 
255    for (uint32_t i = 0; i < cache->table_size; i++) {
256       const uint32_t index = (start + i) & mask;
257       if (!cache->hash_table[index]) {
258          cache->hash_table[index] = entry;
259          break;
260       }
261    }
262 
263    cache->total_size += entry_size(entry);
264    cache->kernel_count++;
265 }
266 
267 static VkResult
radv_pipeline_cache_grow(struct radv_pipeline_cache * cache)268 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
269 {
270    const uint32_t table_size = cache->table_size * 2;
271    const uint32_t old_table_size = cache->table_size;
272    const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
273    struct cache_entry **table;
274    struct cache_entry **old_table = cache->hash_table;
275 
276    table = malloc(byte_size);
277    if (table == NULL)
278       return vk_error(cache, VK_ERROR_OUT_OF_HOST_MEMORY);
279 
280    cache->hash_table = table;
281    cache->table_size = table_size;
282    cache->kernel_count = 0;
283    cache->total_size = 0;
284 
285    memset(cache->hash_table, 0, byte_size);
286    for (uint32_t i = 0; i < old_table_size; i++) {
287       struct cache_entry *entry = old_table[i];
288       if (!entry)
289          continue;
290 
291       radv_pipeline_cache_set_entry(cache, entry);
292    }
293 
294    free(old_table);
295 
296    return VK_SUCCESS;
297 }
298 
299 static void
radv_pipeline_cache_add_entry(struct radv_pipeline_cache * cache,struct cache_entry * entry)300 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)
301 {
302    if (cache->kernel_count == cache->table_size / 2)
303       radv_pipeline_cache_grow(cache);
304 
305    /* Failing to grow that hash table isn't fatal, but may mean we don't
306     * have enough space to add this new kernel. Only add it if there's room.
307     */
308    if (cache->kernel_count < cache->table_size / 2)
309       radv_pipeline_cache_set_entry(cache, entry);
310 }
311 
312 bool
radv_create_shaders_from_pipeline_cache(struct radv_device * device,struct radv_pipeline_cache * cache,const unsigned char * sha1,struct radv_pipeline * pipeline,struct radv_pipeline_shader_stack_size ** stack_sizes,uint32_t * num_stack_sizes,bool * found_in_application_cache)313 radv_create_shaders_from_pipeline_cache(
314    struct radv_device *device, struct radv_pipeline_cache *cache, const unsigned char *sha1,
315    struct radv_pipeline *pipeline, struct radv_pipeline_shader_stack_size **stack_sizes,
316    uint32_t *num_stack_sizes, bool *found_in_application_cache)
317 {
318    struct cache_entry *entry;
319    VkResult result;
320 
321    if (!cache) {
322       cache = device->mem_cache;
323       *found_in_application_cache = false;
324    }
325 
326    radv_pipeline_cache_lock(cache);
327 
328    entry = radv_pipeline_cache_search_unlocked(cache, sha1);
329 
330    if (!entry) {
331       *found_in_application_cache = false;
332 
333       /* Don't cache when we want debug info, since this isn't
334        * present in the cache.
335        */
336       if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) {
337          radv_pipeline_cache_unlock(cache);
338          return false;
339       }
340 
341       uint8_t disk_sha1[20];
342       disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);
343 
344       entry =
345          (struct cache_entry *)disk_cache_get(device->physical_device->disk_cache, disk_sha1, NULL);
346       if (!entry) {
347          radv_pipeline_cache_unlock(cache);
348          return false;
349       } else {
350          size_t size = entry_size(entry);
351          struct cache_entry *new_entry =
352             vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
353          if (!new_entry) {
354             free(entry);
355             radv_pipeline_cache_unlock(cache);
356             return false;
357          }
358 
359          memcpy(new_entry, entry, entry_size(entry));
360          free(entry);
361          entry = new_entry;
362 
363          if (!(device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE) ||
364              cache != device->mem_cache)
365             radv_pipeline_cache_add_entry(cache, new_entry);
366       }
367    }
368 
369    struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
370    struct radv_shader_binary *gs_copy_binary = NULL;
371    bool needs_upload = false;
372    char *p = entry->code;
373    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
374       if (!entry->shaders[i] && entry->binary_sizes[i]) {
375          struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]);
376          memcpy(binary, p, entry->binary_sizes[i]);
377          p += entry->binary_sizes[i];
378 
379          entry->shaders[i] = radv_shader_create(device, binary, false, true, NULL);
380 
381          needs_upload = true;
382          binaries[i] = binary;
383       } else if (entry->binary_sizes[i]) {
384          p += entry->binary_sizes[i];
385       }
386    }
387 
388    memcpy(pipeline->shaders, entry->shaders, sizeof(entry->shaders));
389 
390    if (pipeline->shaders[MESA_SHADER_GEOMETRY] &&
391        !pipeline->shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) {
392       /* For the GS copy shader, RADV uses the compute shader slot to avoid a new cache entry. */
393       pipeline->gs_copy_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
394       pipeline->shaders[MESA_SHADER_COMPUTE] = NULL;
395       gs_copy_binary = binaries[MESA_SHADER_COMPUTE];
396    }
397 
398    if (needs_upload) {
399       result = radv_upload_shaders(device, pipeline, binaries, gs_copy_binary);
400 
401       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
402          if (pipeline->shaders[i])
403             free(binaries[i]);
404       }
405       free(gs_copy_binary);
406 
407       if (result != VK_SUCCESS) {
408          radv_pipeline_cache_unlock(cache);
409          return false;
410       }
411 
412       entry->slab = pipeline->slab;
413    } else {
414       pipeline->slab = entry->slab;
415       pipeline->slab_bo = pipeline->slab->alloc->arena->bo;
416    }
417 
418    if (num_stack_sizes) {
419       *num_stack_sizes = entry->num_stack_sizes;
420       if (entry->num_stack_sizes) {
421          *stack_sizes = malloc(entry->num_stack_sizes * sizeof(**stack_sizes));
422          memcpy(*stack_sizes, p, entry->num_stack_sizes * sizeof(**stack_sizes));
423       }
424    } else {
425       assert(!entry->num_stack_sizes);
426    }
427 
428    p += entry->num_stack_sizes * sizeof(**stack_sizes);
429 
430    if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache)
431       vk_free(&cache->alloc, entry);
432    else {
433       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
434          if (entry->shaders[i])
435             p_atomic_inc(&entry->shaders[i]->ref_count);
436       p_atomic_inc(&entry->slab->ref_count);
437    }
438 
439    assert((uintptr_t)p <= (uintptr_t)entry + entry_size(entry));
440    radv_pipeline_cache_unlock(cache);
441    return true;
442 }
443 
444 void
radv_pipeline_cache_insert_shaders(struct radv_device * device,struct radv_pipeline_cache * cache,const unsigned char * sha1,struct radv_pipeline * pipeline,struct radv_shader_binary * const * binaries,const struct radv_pipeline_shader_stack_size * stack_sizes,uint32_t num_stack_sizes)445 radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache,
446                                    const unsigned char *sha1, struct radv_pipeline *pipeline,
447                                    struct radv_shader_binary *const *binaries,
448                                    const struct radv_pipeline_shader_stack_size *stack_sizes,
449                                    uint32_t num_stack_sizes)
450 {
451    if (!cache)
452       cache = device->mem_cache;
453 
454    radv_pipeline_cache_lock(cache);
455    struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
456    if (entry) {
457       for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
458          if (!entry->shaders[i])
459             continue;
460 
461          radv_shader_destroy(cache->device, pipeline->shaders[i]);
462 
463          pipeline->shaders[i] = entry->shaders[i];
464          p_atomic_inc(&pipeline->shaders[i]->ref_count);
465       }
466 
467       radv_pipeline_slab_destroy(cache->device, pipeline->slab);
468 
469       pipeline->slab = entry->slab;
470       p_atomic_inc(&pipeline->slab->ref_count);
471 
472       radv_pipeline_cache_unlock(cache);
473       return;
474    }
475 
476    /* Don't cache when we want debug info, since this isn't
477     * present in the cache.
478     */
479    if (radv_is_cache_disabled(device)) {
480       radv_pipeline_cache_unlock(cache);
481       return;
482    }
483 
484    size_t size = sizeof(*entry) + sizeof(*stack_sizes) * num_stack_sizes;
485    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
486       if (pipeline->shaders[i])
487          size += binaries[i]->total_size;
488    const size_t size_without_align = size;
489    size = align(size_without_align, alignof(struct cache_entry));
490 
491    entry = vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
492    if (!entry) {
493       radv_pipeline_cache_unlock(cache);
494       return;
495    }
496 
497    memset(entry, 0, sizeof(*entry));
498    memcpy(entry->sha1, sha1, 20);
499 
500    char *p = entry->code;
501 
502    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
503       if (!pipeline->shaders[i])
504          continue;
505 
506       entry->binary_sizes[i] = binaries[i]->total_size;
507 
508       memcpy(p, binaries[i], binaries[i]->total_size);
509       p += binaries[i]->total_size;
510    }
511 
512    if (num_stack_sizes) {
513       memcpy(p, stack_sizes, sizeof(*stack_sizes) * num_stack_sizes);
514       p += sizeof(*stack_sizes) * num_stack_sizes;
515    }
516    entry->num_stack_sizes = num_stack_sizes;
517 
518    // Make valgrind happy by filling the alignment hole at the end.
519    assert(p == (char *)entry + size_without_align);
520    assert(sizeof(*entry) + (p - entry->code) == size_without_align);
521    memset((char *)entry + size_without_align, 0, size - size_without_align);
522 
523    /* Always add cache items to disk. This will allow collection of
524     * compiled shaders by third parties such as steam, even if the app
525     * implements its own pipeline cache.
526     *
527     * Make sure to exclude meta shaders because they are stored in a different cache file.
528     */
529    if (device->physical_device->disk_cache && cache != &device->meta_state.cache) {
530       uint8_t disk_sha1[20];
531       disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);
532 
533       disk_cache_put(device->physical_device->disk_cache, disk_sha1, entry, entry_size(entry),
534                      NULL);
535    }
536 
537    if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache) {
538       vk_free2(&cache->alloc, NULL, entry);
539       radv_pipeline_cache_unlock(cache);
540       return;
541    }
542 
543    /* We delay setting the shader so we have reproducible disk cache
544     * items.
545     */
546    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
547       if (!pipeline->shaders[i])
548          continue;
549 
550       entry->shaders[i] = pipeline->shaders[i];
551       p_atomic_inc(&pipeline->shaders[i]->ref_count);
552    }
553 
554    entry->slab = pipeline->slab;
555    p_atomic_inc(&pipeline->slab->ref_count);
556 
557    radv_pipeline_cache_add_entry(cache, entry);
558 
559    cache->modified = true;
560    radv_pipeline_cache_unlock(cache);
561    return;
562 }
563 
564 bool
radv_pipeline_cache_load(struct radv_pipeline_cache * cache,const void * data,size_t size)565 radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size)
566 {
567    struct radv_device *device = cache->device;
568    struct vk_pipeline_cache_header header;
569 
570    if (size < sizeof(header))
571       return false;
572    memcpy(&header, data, sizeof(header));
573    if (header.header_size < sizeof(header))
574       return false;
575    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
576       return false;
577    if (header.vendor_id != ATI_VENDOR_ID)
578       return false;
579    if (header.device_id != device->physical_device->rad_info.pci_id)
580       return false;
581    if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
582       return false;
583 
584    char *end = (char *)data + size;
585    char *p = (char *)data + header.header_size;
586 
587    while (end - p >= sizeof(struct cache_entry)) {
588       struct cache_entry *entry = (struct cache_entry *)p;
589       struct cache_entry *dest_entry;
590       size_t size_of_entry = entry_size(entry);
591       if (end - p < size_of_entry)
592          break;
593 
594       dest_entry = vk_alloc(&cache->alloc, size_of_entry, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
595       if (dest_entry) {
596          memcpy(dest_entry, entry, size_of_entry);
597          for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i)
598             dest_entry->shaders[i] = NULL;
599          dest_entry->slab = NULL;
600          radv_pipeline_cache_add_entry(cache, dest_entry);
601       }
602       p += size_of_entry;
603    }
604 
605    return true;
606 }
607 
608 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)609 radv_CreatePipelineCache(VkDevice _device, const VkPipelineCacheCreateInfo *pCreateInfo,
610                          const VkAllocationCallbacks *pAllocator, VkPipelineCache *pPipelineCache)
611 {
612    RADV_FROM_HANDLE(radv_device, device, _device);
613    struct radv_pipeline_cache *cache;
614 
615    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
616 
617    cache = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*cache), 8,
618                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
619    if (cache == NULL)
620       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
621 
622    if (pAllocator)
623       cache->alloc = *pAllocator;
624    else
625       cache->alloc = device->vk.alloc;
626 
627    radv_pipeline_cache_init(cache, device);
628    cache->flags = pCreateInfo->flags;
629 
630    if (pCreateInfo->initialDataSize > 0) {
631       radv_pipeline_cache_load(cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize);
632    }
633 
634    *pPipelineCache = radv_pipeline_cache_to_handle(cache);
635 
636    return VK_SUCCESS;
637 }
638 
639 VKAPI_ATTR void VKAPI_CALL
radv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)640 radv_DestroyPipelineCache(VkDevice _device, VkPipelineCache _cache,
641                           const VkAllocationCallbacks *pAllocator)
642 {
643    RADV_FROM_HANDLE(radv_device, device, _device);
644    RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
645 
646    if (!cache)
647       return;
648 
649    radv_pipeline_cache_finish(cache);
650    vk_free2(&device->vk.alloc, pAllocator, cache);
651 }
652 
653 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)654 radv_GetPipelineCacheData(VkDevice _device, VkPipelineCache _cache, size_t *pDataSize, void *pData)
655 {
656    RADV_FROM_HANDLE(radv_device, device, _device);
657    RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
658    struct vk_pipeline_cache_header *header;
659    VkResult result = VK_SUCCESS;
660 
661    radv_pipeline_cache_lock(cache);
662 
663    const size_t size = sizeof(*header) + cache->total_size;
664    if (pData == NULL) {
665       radv_pipeline_cache_unlock(cache);
666       *pDataSize = size;
667       return VK_SUCCESS;
668    }
669    if (*pDataSize < sizeof(*header)) {
670       radv_pipeline_cache_unlock(cache);
671       *pDataSize = 0;
672       return VK_INCOMPLETE;
673    }
674    void *p = pData, *end = (char *)pData + *pDataSize;
675    header = p;
676    header->header_size = align(sizeof(*header), alignof(struct cache_entry));
677    header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
678    header->vendor_id = ATI_VENDOR_ID;
679    header->device_id = device->physical_device->rad_info.pci_id;
680    memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
681    p = (char *)p + header->header_size;
682 
683    struct cache_entry *entry;
684    for (uint32_t i = 0; i < cache->table_size; i++) {
685       if (!cache->hash_table[i])
686          continue;
687       entry = cache->hash_table[i];
688       const uint32_t size_of_entry = entry_size(entry);
689       if ((char *)end < (char *)p + size_of_entry) {
690          result = VK_INCOMPLETE;
691          break;
692       }
693 
694       memcpy(p, entry, size_of_entry);
695       for (int j = 0; j < MESA_VULKAN_SHADER_STAGES; ++j)
696          ((struct cache_entry *)p)->shaders[j] = NULL;
697       ((struct cache_entry *)p)->slab = NULL;
698       p = (char *)p + size_of_entry;
699    }
700    *pDataSize = (char *)p - (char *)pData;
701 
702    radv_pipeline_cache_unlock(cache);
703    return result;
704 }
705 
706 static void
radv_pipeline_cache_merge(struct radv_pipeline_cache * dst,struct radv_pipeline_cache * src)707 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, struct radv_pipeline_cache *src)
708 {
709    for (uint32_t i = 0; i < src->table_size; i++) {
710       struct cache_entry *entry = src->hash_table[i];
711       if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
712          continue;
713 
714       radv_pipeline_cache_add_entry(dst, entry);
715 
716       src->hash_table[i] = NULL;
717    }
718 }
719 
720 VKAPI_ATTR VkResult VKAPI_CALL
radv_MergePipelineCaches(VkDevice _device,VkPipelineCache destCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)721 radv_MergePipelineCaches(VkDevice _device, VkPipelineCache destCache, uint32_t srcCacheCount,
722                          const VkPipelineCache *pSrcCaches)
723 {
724    RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
725 
726    for (uint32_t i = 0; i < srcCacheCount; i++) {
727       RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
728 
729       radv_pipeline_cache_merge(dst, src);
730    }
731 
732    return VK_SUCCESS;
733 }
734