• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "vulkan/util/vk_util.h"
26 #include "util/blob.h"
27 #include "nir/nir_serialize.h"
28 
29 static const bool debug_cache = false;
30 static const bool dump_stats = false;
31 static const bool dump_stats_on_destroy = false;
32 
33 /* Shared for nir/variants */
34 #define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35 
36 static uint32_t
sha1_hash_func(const void * sha1)37 sha1_hash_func(const void *sha1)
38 {
39    return _mesa_hash_data(sha1, 20);
40 }
41 
42 static bool
sha1_compare_func(const void * sha1_a,const void * sha1_b)43 sha1_compare_func(const void *sha1_a, const void *sha1_b)
44 {
45    return memcmp(sha1_a, sha1_b, 20) == 0;
46 }
47 
48 struct serialized_nir {
49    unsigned char sha1_key[20];
50    size_t size;
51    char data[0];
52 };
53 
54 static void
cache_dump_stats(struct v3dv_pipeline_cache * cache)55 cache_dump_stats(struct v3dv_pipeline_cache *cache)
56 {
57    fprintf(stderr, "  NIR cache entries:      %d\n", cache->nir_stats.count);
58    fprintf(stderr, "  NIR cache miss count:   %d\n", cache->nir_stats.miss);
59    fprintf(stderr, "  NIR cache hit  count:   %d\n", cache->nir_stats.hit);
60 
61    fprintf(stderr, "  cache entries:      %d\n", cache->stats.count);
62    fprintf(stderr, "  cache miss count:   %d\n", cache->stats.miss);
63    fprintf(stderr, "  cache hit  count:   %d\n", cache->stats.hit);
64 }
65 
66 static void
pipeline_cache_lock(struct v3dv_pipeline_cache * cache)67 pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
68 {
69    if (!cache->externally_synchronized)
70       pthread_mutex_lock(&cache->mutex);
71 }
72 
73 static void
pipeline_cache_unlock(struct v3dv_pipeline_cache * cache)74 pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
75 {
76    if (!cache->externally_synchronized)
77       pthread_mutex_unlock(&cache->mutex);
78 }
79 
80 void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,nir_shader * nir,unsigned char sha1_key[20])81 v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
82                                struct v3dv_pipeline_cache *cache,
83                                nir_shader *nir,
84                                unsigned char sha1_key[20])
85 {
86    if (!cache || !cache->nir_cache)
87       return;
88 
89    if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
90       return;
91 
92    pipeline_cache_lock(cache);
93    struct hash_entry *entry =
94       _mesa_hash_table_search(cache->nir_cache, sha1_key);
95    pipeline_cache_unlock(cache);
96    if (entry)
97       return;
98 
99    struct blob blob;
100    blob_init(&blob);
101 
102    nir_serialize(&blob, nir, false);
103    if (blob.out_of_memory) {
104       blob_finish(&blob);
105       return;
106    }
107 
108    pipeline_cache_lock(cache);
109    /* Because ralloc isn't thread-safe, we have to do all this inside the
110     * lock.  We could unlock for the big memcpy but it's probably not worth
111     * the hassle.
112     */
113    entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
114    if (entry) {
115       blob_finish(&blob);
116       pipeline_cache_unlock(cache);
117       return;
118    }
119 
120    struct serialized_nir *snir =
121       ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
122    memcpy(snir->sha1_key, sha1_key, 20);
123    snir->size = blob.size;
124    memcpy(snir->data, blob.data, blob.size);
125 
126    blob_finish(&blob);
127 
128    cache->nir_stats.count++;
129    if (debug_cache) {
130       char sha1buf[41];
131       _mesa_sha1_format(sha1buf, snir->sha1_key);
132       fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
133       if (dump_stats)
134          cache_dump_stats(cache);
135    }
136 
137    _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
138 
139    pipeline_cache_unlock(cache);
140 }
141 
142 nir_shader*
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[20])143 v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
144                                    struct v3dv_pipeline_cache *cache,
145                                    const nir_shader_compiler_options *nir_options,
146                                    unsigned char sha1_key[20])
147 {
148    if (!cache || !cache->nir_cache)
149       return NULL;
150 
151    if (debug_cache) {
152       char sha1buf[41];
153       _mesa_sha1_format(sha1buf, sha1_key);
154 
155       fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
156    }
157 
158    const struct serialized_nir *snir = NULL;
159 
160    pipeline_cache_lock(cache);
161    struct hash_entry *entry =
162       _mesa_hash_table_search(cache->nir_cache, sha1_key);
163    if (entry)
164       snir = entry->data;
165    pipeline_cache_unlock(cache);
166 
167    if (snir) {
168       struct blob_reader blob;
169       blob_reader_init(&blob, snir->data, snir->size);
170 
171       /* We use context NULL as we want the p_stage to keep the reference to
172        * nir, as we keep open the possibility of provide a shader variant
173        * after cache creation
174        */
175       nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
176       if (blob.overrun) {
177          ralloc_free(nir);
178       } else {
179          cache->nir_stats.hit++;
180          if (debug_cache) {
181             fprintf(stderr, "\tnir cache hit: %p\n", nir);
182             if (dump_stats)
183                cache_dump_stats(cache);
184          }
185          return nir;
186       }
187    }
188 
189    cache->nir_stats.miss++;
190    if (debug_cache) {
191       fprintf(stderr, "\tnir cache miss\n");
192       if (dump_stats)
193          cache_dump_stats(cache);
194    }
195 
196    return NULL;
197 }
198 
199 void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache * cache,struct v3dv_device * device,VkPipelineCacheCreateFlags flags,bool cache_enabled)200 v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
201                          struct v3dv_device *device,
202                          VkPipelineCacheCreateFlags flags,
203                          bool cache_enabled)
204 {
205    cache->device = device;
206    pthread_mutex_init(&cache->mutex, NULL);
207 
208    if (cache_enabled) {
209       cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
210                                                  sha1_compare_func);
211       cache->nir_stats.miss = 0;
212       cache->nir_stats.hit = 0;
213       cache->nir_stats.count = 0;
214 
215       cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
216                                              sha1_compare_func);
217       cache->stats.miss = 0;
218       cache->stats.hit = 0;
219       cache->stats.count = 0;
220 
221       cache->externally_synchronized = flags &
222          VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT;
223    } else {
224       cache->nir_cache = NULL;
225       cache->cache = NULL;
226    }
227 
228 }
229 
230 static struct v3dv_pipeline_shared_data *
231 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
232                                            struct blob_reader *blob);
233 
234 static void
235 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
236                                   struct v3dv_pipeline_shared_data *shared_data,
237                                   bool from_disk_cache);
238 
239 static bool
240 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
241                                         struct blob *blob);
242 
243 /**
244  * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
245  * it, or NULL if doesn't have it cached. On the former, it will increases the
246  * ref_count, so caller is responsible to unref it.
247  */
248 struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache * cache,unsigned char sha1_key[20],bool * cache_hit)249 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
250                                         unsigned char sha1_key[20],
251                                         bool *cache_hit)
252 {
253    if (!cache || !cache->cache)
254       return NULL;
255 
256    if (debug_cache) {
257       char sha1buf[41];
258       _mesa_sha1_format(sha1buf, sha1_key);
259 
260       fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
261    }
262 
263    pipeline_cache_lock(cache);
264 
265    struct hash_entry *entry =
266       _mesa_hash_table_search(cache->cache, sha1_key);
267 
268    if (entry) {
269       struct v3dv_pipeline_shared_data *cache_entry =
270          (struct v3dv_pipeline_shared_data *) entry->data;
271       assert(cache_entry);
272 
273       cache->stats.hit++;
274       *cache_hit = true;
275       if (debug_cache) {
276          fprintf(stderr, "\tcache hit: %p\n", cache_entry);
277          if (dump_stats)
278             cache_dump_stats(cache);
279       }
280 
281 
282       v3dv_pipeline_shared_data_ref(cache_entry);
283 
284       pipeline_cache_unlock(cache);
285 
286       return cache_entry;
287    }
288 
289    cache->stats.miss++;
290    if (debug_cache) {
291       fprintf(stderr, "\tcache miss\n");
292       if (dump_stats)
293          cache_dump_stats(cache);
294    }
295 
296    pipeline_cache_unlock(cache);
297 
298 #ifdef ENABLE_SHADER_CACHE
299    struct v3dv_device *device = cache->device;
300    struct disk_cache *disk_cache = device->pdevice->disk_cache;
301    /* Note that the on-disk-cache can be independently disabled, while keeping
302     * the pipeline cache working, by using the environment variable
303     * MESA_GLSL_CACHE_DISABLE.  In that case the calls to disk_cache_put/get
304     * will not do anything.
305     */
306    if (disk_cache && device->instance->pipeline_cache_enabled) {
307       cache_key cache_key;
308       disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
309 
310       size_t buffer_size;
311       uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
312       if (buffer) {
313          struct blob_reader blob;
314          struct v3dv_pipeline_shared_data *shared_data;
315 
316          if (debug_cache)
317             fprintf(stderr, "\ton-disk-cache hit\n");
318 
319          blob_reader_init(&blob, buffer, buffer_size);
320          shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
321          free(buffer);
322 
323          if (shared_data) {
324             if (cache)
325                pipeline_cache_upload_shared_data(cache, shared_data, true);
326             return shared_data;
327          }
328       } else {
329          if (debug_cache)
330             fprintf(stderr, "\ton-disk-cache miss\n");
331       }
332    }
333 #endif
334 
335    return NULL;
336 }
337 
338 void
v3dv_pipeline_shared_data_destroy(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)339 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
340                                   struct v3dv_pipeline_shared_data *shared_data)
341 {
342    assert(shared_data->ref_cnt == 0);
343 
344    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
345       if (shared_data->variants[stage] != NULL)
346          v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
347 
348       /* We don't free binning descriptor maps as we are sharing them
349        * with the render shaders.
350        */
351       if (shared_data->maps[stage] != NULL &&
352           !broadcom_shader_stage_is_binning(stage)) {
353          vk_free(&device->vk.alloc, shared_data->maps[stage]);
354       }
355    }
356 
357    if (shared_data->assembly_bo)
358       v3dv_bo_free(device, shared_data->assembly_bo);
359 
360    vk_free(&device->vk.alloc, shared_data);
361 }
362 
363 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache * cache,const unsigned char sha1_key[20],struct v3dv_descriptor_maps ** maps,struct v3dv_shader_variant ** variants,const uint64_t * total_assembly,const uint32_t total_assembly_size)364 v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
365                               const unsigned char sha1_key[20],
366                               struct v3dv_descriptor_maps **maps,
367                               struct v3dv_shader_variant **variants,
368                               const uint64_t *total_assembly,
369                               const uint32_t total_assembly_size)
370 {
371    size_t size = sizeof(struct v3dv_pipeline_shared_data);
372    /* We create new_entry using the device alloc. Right now shared_data is ref
373     * and unref by both the pipeline and the pipeline cache, so we can't
374     * ensure that the cache or pipeline alloc will be available on the last
375     * unref.
376     */
377    struct v3dv_pipeline_shared_data *new_entry =
378       vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
379                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
380 
381    if (new_entry == NULL)
382       return NULL;
383 
384    new_entry->ref_cnt = 1;
385    memcpy(new_entry->sha1_key, sha1_key, 20);
386 
387    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
388       new_entry->maps[stage] = maps[stage];
389       new_entry->variants[stage] = variants[stage];
390    }
391 
392    struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
393                                       "pipeline shader assembly", true);
394    if (!bo) {
395       fprintf(stderr, "failed to allocate memory for shaders assembly\n");
396       v3dv_pipeline_shared_data_unref(cache->device, new_entry);
397       return NULL;
398    }
399 
400    bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
401    if (!ok) {
402       fprintf(stderr, "failed to map source shader buffer\n");
403       v3dv_pipeline_shared_data_unref(cache->device, new_entry);
404       return NULL;
405    }
406 
407    memcpy(bo->map, total_assembly, total_assembly_size);
408 
409    new_entry->assembly_bo = bo;
410 
411    return new_entry;
412 }
413 
414 static void
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache * cache,struct v3dv_pipeline_shared_data * shared_data,bool from_disk_cache)415 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
416                                   struct v3dv_pipeline_shared_data *shared_data,
417                                   bool from_disk_cache)
418 {
419    assert(shared_data);
420 
421    if (!cache || !cache->cache)
422       return;
423 
424    if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
425       return;
426 
427    pipeline_cache_lock(cache);
428    struct hash_entry *entry =
429       _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
430 
431    if (entry) {
432       pipeline_cache_unlock(cache);
433       return;
434    }
435 
436    v3dv_pipeline_shared_data_ref(shared_data);
437    _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
438    cache->stats.count++;
439    if (debug_cache) {
440       char sha1buf[41];
441       _mesa_sha1_format(sha1buf, shared_data->sha1_key);
442 
443       fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
444               cache, sha1buf, shared_data);
445       if (dump_stats)
446          cache_dump_stats(cache);
447    }
448 
449    pipeline_cache_unlock(cache);
450 
451 #ifdef ENABLE_SHADER_CACHE
452    /* If we are being called from a on-disk-cache hit, we can skip writing to
453     * the disk cache
454     */
455    if (from_disk_cache)
456       return;
457 
458    struct v3dv_device *device = cache->device;
459    struct disk_cache *disk_cache = device->pdevice->disk_cache;
460    if (disk_cache) {
461       struct blob binary;
462       blob_init(&binary);
463       if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
464          cache_key cache_key;
465          disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
466 
467          disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
468          if (debug_cache) {
469             char sha1buf[41];
470             _mesa_sha1_format(sha1buf, shared_data->sha1_key);
471 
472             fprintf(stderr, "on-disk-cache, new cache entry with sha1 key %s:%p\n\n",
473                     sha1buf, shared_data);
474          }
475       }
476 
477       blob_finish(&binary);
478    }
479 #endif
480 }
481 
482 /* Uploads all the "cacheable" or shared data from the pipeline */
483 void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)484 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
485                                     struct v3dv_pipeline_cache *cache)
486 {
487    pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
488 }
489 
490 static struct serialized_nir*
serialized_nir_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)491 serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
492                                 struct blob_reader *blob)
493 {
494    const unsigned char *sha1_key = blob_read_bytes(blob, 20);
495    uint32_t snir_size = blob_read_uint32(blob);
496    const char* snir_data = blob_read_bytes(blob, snir_size);
497    if (blob->overrun)
498       return NULL;
499 
500    struct serialized_nir *snir =
501       ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
502    memcpy(snir->sha1_key, sha1_key, 20);
503    snir->size = snir_size;
504    memcpy(snir->data, snir_data, snir_size);
505 
506    return snir;
507 }
508 
509 static struct v3dv_shader_variant*
shader_variant_create_from_blob(struct v3dv_device * device,struct blob_reader * blob)510 shader_variant_create_from_blob(struct v3dv_device *device,
511                                 struct blob_reader *blob)
512 {
513    VkResult result;
514 
515    enum broadcom_shader_stage stage = blob_read_uint32(blob);
516 
517    uint32_t prog_data_size = blob_read_uint32(blob);
518    /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
519    assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
520 
521    const void *prog_data = blob_read_bytes(blob, prog_data_size);
522    if (blob->overrun)
523       return NULL;
524 
525    uint32_t ulist_count = blob_read_uint32(blob);
526    uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
527    const void *contents_data = blob_read_bytes(blob, contents_size);
528    if (blob->overrun)
529       return NULL;
530 
531    uint ulist_data_size = sizeof(uint32_t) * ulist_count;
532    const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
533    if (blob->overrun)
534       return NULL;
535 
536    uint32_t assembly_offset = blob_read_uint32(blob);
537    uint32_t qpu_insts_size = blob_read_uint32(blob);
538 
539    /* shader_variant_create expects a newly created prog_data for their own,
540     * as it is what the v3d compiler returns. So we are also allocating one
541     * (including the uniform list) and filled it up with the data that we read
542     * from the blob
543     */
544    struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
545    memcpy(new_prog_data, prog_data, prog_data_size);
546    struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
547    ulist->count = ulist_count;
548    ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
549    memcpy(ulist->contents, contents_data, contents_size);
550    ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
551    memcpy(ulist->data, ulist_data_data, ulist_data_size);
552 
553    return v3dv_shader_variant_create(device, stage,
554                                      new_prog_data, prog_data_size,
555                                      assembly_offset,
556                                      NULL, qpu_insts_size,
557                                      &result);
558 }
559 
560 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)561 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
562                                            struct blob_reader *blob)
563 {
564    const unsigned char *sha1_key = blob_read_bytes(blob, 20);
565 
566    struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
567 
568    uint8_t descriptor_maps_count = blob_read_uint8(blob);
569    for (uint8_t count = 0; count < descriptor_maps_count; count++) {
570       uint8_t stage = blob_read_uint8(blob);
571 
572       const struct v3dv_descriptor_maps *current_maps =
573          blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
574 
575       if (blob->overrun)
576          return NULL;
577 
578       maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
579                                sizeof(struct v3dv_descriptor_maps), 8,
580                                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
581 
582       if (maps[stage] == NULL)
583          return NULL;
584 
585       memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
586       if (broadcom_shader_stage_is_render_with_binning(stage)) {
587          enum broadcom_shader_stage bin_stage =
588             broadcom_binning_shader_stage_for_render_stage(stage);
589             maps[bin_stage] = maps[stage];
590       }
591    }
592 
593    uint8_t variant_count = blob_read_uint8(blob);
594 
595    struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
596 
597    for (uint8_t count = 0; count < variant_count; count++) {
598       uint8_t stage = blob_read_uint8(blob);
599       struct v3dv_shader_variant *variant =
600          shader_variant_create_from_blob(cache->device, blob);
601       variants[stage] = variant;
602    }
603 
604    uint32_t total_assembly_size = blob_read_uint32(blob);
605    const uint64_t *total_assembly =
606       blob_read_bytes(blob, total_assembly_size);
607 
608    if (blob->overrun)
609       return NULL;
610 
611    return v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
612                                         total_assembly, total_assembly_size);
613 }
614 
615 static void
pipeline_cache_load(struct v3dv_pipeline_cache * cache,size_t size,const void * data)616 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
617                     size_t size,
618                     const void *data)
619 {
620    struct v3dv_device *device = cache->device;
621    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
622    struct vk_pipeline_cache_header header;
623 
624    if (cache->cache == NULL || cache->nir_cache == NULL)
625       return;
626 
627    struct blob_reader blob;
628    blob_reader_init(&blob, data, size);
629 
630    blob_copy_bytes(&blob, &header, sizeof(header));
631    if (size < sizeof(header))
632       return;
633    memcpy(&header, data, sizeof(header));
634    if (header.header_size < sizeof(header))
635       return;
636    if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
637       return;
638    if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
639       return;
640    if (header.device_id != v3dv_physical_device_device_id(pdevice))
641       return;
642    if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
643       return;
644 
645    uint32_t nir_count = blob_read_uint32(&blob);
646    if (blob.overrun)
647       return;
648 
649    for (uint32_t i = 0; i < nir_count; i++) {
650       struct serialized_nir *snir =
651          serialized_nir_create_from_blob(cache, &blob);
652 
653       if (!snir)
654          break;
655 
656       _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
657       cache->nir_stats.count++;
658    }
659 
660    uint32_t count = blob_read_uint32(&blob);
661    if (blob.overrun)
662       return;
663 
664    for (uint32_t i = 0; i < count; i++) {
665       struct v3dv_pipeline_shared_data *cache_entry =
666          v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
667       if (!cache_entry)
668          break;
669 
670       _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
671       cache->stats.count++;
672    }
673 
674    if (debug_cache) {
675       fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
676               "%i entries\n", cache, nir_count, count);
677       if (dump_stats)
678          cache_dump_stats(cache);
679    }
680 }
681 
682 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)683 v3dv_CreatePipelineCache(VkDevice _device,
684                          const VkPipelineCacheCreateInfo *pCreateInfo,
685                          const VkAllocationCallbacks *pAllocator,
686                          VkPipelineCache *pPipelineCache)
687 {
688    V3DV_FROM_HANDLE(v3dv_device, device, _device);
689    struct v3dv_pipeline_cache *cache;
690 
691    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
692 
693    cache = vk_object_zalloc(&device->vk, pAllocator,
694                             sizeof(*cache),
695                             VK_OBJECT_TYPE_PIPELINE_CACHE);
696 
697    if (cache == NULL)
698       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
699 
700    v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
701                             device->instance->pipeline_cache_enabled);
702 
703    if (pCreateInfo->initialDataSize > 0) {
704       pipeline_cache_load(cache,
705                           pCreateInfo->initialDataSize,
706                           pCreateInfo->pInitialData);
707    }
708 
709    *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
710 
711    return VK_SUCCESS;
712 }
713 
714 void
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache * cache)715 v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
716 {
717    pthread_mutex_destroy(&cache->mutex);
718 
719    if (dump_stats_on_destroy)
720       cache_dump_stats(cache);
721 
722    if (cache->nir_cache) {
723       hash_table_foreach(cache->nir_cache, entry)
724          ralloc_free(entry->data);
725 
726       _mesa_hash_table_destroy(cache->nir_cache, NULL);
727    }
728 
729    if (cache->cache) {
730       hash_table_foreach(cache->cache, entry) {
731          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
732          if (cache_entry)
733             v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
734       }
735 
736       _mesa_hash_table_destroy(cache->cache, NULL);
737    }
738 }
739 
740 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)741 v3dv_DestroyPipelineCache(VkDevice _device,
742                           VkPipelineCache _cache,
743                           const VkAllocationCallbacks *pAllocator)
744 {
745    V3DV_FROM_HANDLE(v3dv_device, device, _device);
746    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
747 
748    if (!cache)
749       return;
750 
751    v3dv_pipeline_cache_finish(cache);
752 
753    vk_object_free(&device->vk, pAllocator, cache);
754 }
755 
756 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MergePipelineCaches(VkDevice device,VkPipelineCache dstCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)757 v3dv_MergePipelineCaches(VkDevice device,
758                          VkPipelineCache dstCache,
759                          uint32_t srcCacheCount,
760                          const VkPipelineCache *pSrcCaches)
761 {
762    V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
763 
764    if (!dst->cache || !dst->nir_cache)
765       return VK_SUCCESS;
766 
767    for (uint32_t i = 0; i < srcCacheCount; i++) {
768       V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
769       if (!src->cache || !src->nir_cache)
770          continue;
771 
772       hash_table_foreach(src->nir_cache, entry) {
773          struct serialized_nir *src_snir = entry->data;
774          assert(src_snir);
775 
776          if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
777             continue;
778 
779          /* FIXME: we are using serialized nir shaders because they are
780           * convenient to create and store on the cache, but requires to do a
781           * copy here (and some other places) of the serialized NIR. Perhaps
782           * it would make sense to move to handle the NIR shaders with shared
783           * structures with ref counts, as the variants.
784           */
785          struct serialized_nir *snir_dst =
786             ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
787          memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
788          snir_dst->size = src_snir->size;
789          memcpy(snir_dst->data, src_snir->data, src_snir->size);
790 
791          _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
792          dst->nir_stats.count++;
793          if (debug_cache) {
794             char sha1buf[41];
795             _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
796 
797             fprintf(stderr, "pipeline cache %p, added nir entry %s "
798                     "from pipeline cache %p\n",
799                     dst, sha1buf, src);
800             if (dump_stats)
801                cache_dump_stats(dst);
802          }
803       }
804 
805       hash_table_foreach(src->cache, entry) {
806          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
807          assert(cache_entry);
808 
809          if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
810             continue;
811 
812          v3dv_pipeline_shared_data_ref(cache_entry);
813          _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
814 
815          dst->stats.count++;
816          if (debug_cache) {
817             char sha1buf[41];
818             _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
819 
820             fprintf(stderr, "pipeline cache %p, added entry %s "
821                     "from pipeline cache %p\n",
822                     dst, sha1buf, src);
823             if (dump_stats)
824                cache_dump_stats(dst);
825          }
826       }
827    }
828 
829    return VK_SUCCESS;
830 }
831 
832 static bool
shader_variant_write_to_blob(const struct v3dv_shader_variant * variant,struct blob * blob)833 shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
834                              struct blob *blob)
835 {
836    blob_write_uint32(blob, variant->stage);
837 
838    blob_write_uint32(blob, variant->prog_data_size);
839    blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
840 
841    struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
842    blob_write_uint32(blob, ulist->count);
843    blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
844    blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
845 
846    blob_write_uint32(blob, variant->assembly_offset);
847    blob_write_uint32(blob, variant->qpu_insts_size);
848 
849    return !blob->out_of_memory;
850 }
851 
852 static bool
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * cache_entry,struct blob * blob)853 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
854                                         struct blob *blob)
855 {
856    blob_write_bytes(blob, cache_entry->sha1_key, 20);
857 
858    uint8_t descriptor_maps_count = 0;
859    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
860       if (broadcom_shader_stage_is_binning(stage))
861          continue;
862       if (cache_entry->maps[stage] == NULL)
863          continue;
864       descriptor_maps_count++;
865    }
866 
867    /* Compute pipelines only have one descriptor map,
868     * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
869     * stages take the descriptor map from the render stage.
870     */
871    assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
872           (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
873    blob_write_uint8(blob, descriptor_maps_count);
874 
875    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
876       if (cache_entry->maps[stage] == NULL)
877          continue;
878       if (broadcom_shader_stage_is_binning(stage))
879          continue;
880 
881       blob_write_uint8(blob, stage);
882       blob_write_bytes(blob, cache_entry->maps[stage],
883                        sizeof(struct v3dv_descriptor_maps));
884    }
885 
886    uint8_t variant_count = 0;
887    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
888       if (cache_entry->variants[stage] == NULL)
889          continue;
890       variant_count++;
891    }
892 
893    /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
894     * compute pipelines only have 1.
895     */
896    assert((variant_count == 5  || variant_count == 3) ||
897           (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
898    blob_write_uint8(blob, variant_count);
899 
900    uint32_t total_assembly_size = 0;
901    for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
902       if (cache_entry->variants[stage] == NULL)
903          continue;
904 
905       blob_write_uint8(blob, stage);
906       if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
907          return false;
908 
909       total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
910    }
911    blob_write_uint32(blob, total_assembly_size);
912 
913    assert(cache_entry->assembly_bo->map);
914    assert(cache_entry->assembly_bo->size >= total_assembly_size);
915    blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
916 
917    return !blob->out_of_memory;
918 }
919 
920 
921 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)922 v3dv_GetPipelineCacheData(VkDevice _device,
923                           VkPipelineCache _cache,
924                           size_t *pDataSize,
925                           void *pData)
926 {
927    V3DV_FROM_HANDLE(v3dv_device, device, _device);
928    V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
929 
930    struct blob blob;
931    if (pData) {
932       blob_init_fixed(&blob, pData, *pDataSize);
933    } else {
934       blob_init_fixed(&blob, NULL, SIZE_MAX);
935    }
936 
937    struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
938    VkResult result = VK_INCOMPLETE;
939 
940    pipeline_cache_lock(cache);
941 
942    struct vk_pipeline_cache_header header = {
943       .header_size = sizeof(struct vk_pipeline_cache_header),
944       .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
945       .vendor_id = v3dv_physical_device_vendor_id(pdevice),
946       .device_id = v3dv_physical_device_device_id(pdevice),
947    };
948    memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
949    blob_write_bytes(&blob, &header, sizeof(header));
950 
951    uint32_t nir_count = 0;
952    intptr_t nir_count_offset = blob_reserve_uint32(&blob);
953    if (nir_count_offset < 0) {
954       *pDataSize = 0;
955       goto done;
956    }
957 
958    if (cache->nir_cache) {
959       hash_table_foreach(cache->nir_cache, entry) {
960          const struct serialized_nir *snir = entry->data;
961 
962          size_t save_size = blob.size;
963 
964          blob_write_bytes(&blob, snir->sha1_key, 20);
965          blob_write_uint32(&blob, snir->size);
966          blob_write_bytes(&blob, snir->data, snir->size);
967 
968          if (blob.out_of_memory) {
969             blob.size = save_size;
970             goto done;
971          }
972 
973          nir_count++;
974       }
975    }
976    blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
977 
978    uint32_t count = 0;
979    intptr_t count_offset = blob_reserve_uint32(&blob);
980    if (count_offset < 0) {
981       *pDataSize = 0;
982       goto done;
983    }
984 
985    if (cache->cache) {
986       hash_table_foreach(cache->cache, entry) {
987          struct v3dv_pipeline_shared_data *cache_entry = entry->data;
988 
989          size_t save_size = blob.size;
990          if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
991             /* If it fails reset to the previous size and bail */
992             blob.size = save_size;
993             goto done;
994          }
995 
996          count++;
997       }
998    }
999 
1000    blob_overwrite_uint32(&blob, count_offset, count);
1001 
1002    *pDataSize = blob.size;
1003 
1004    result = VK_SUCCESS;
1005 
1006    if (debug_cache) {
1007       assert(count <= cache->stats.count);
1008       fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
1009               "%i nir shader entries "
1010               "%i entries, %u DataSize\n",
1011               cache, nir_count, count, (uint32_t) *pDataSize);
1012    }
1013 
1014  done:
1015    blob_finish(&blob);
1016 
1017    pipeline_cache_unlock(cache);
1018 
1019    return result;
1020 }
1021