1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "vulkan/util/vk_util.h"
26 #include "util/blob.h"
27 #include "nir/nir_serialize.h"
28
29 static const bool debug_cache = false;
30 static const bool dump_stats = false;
31 static const bool dump_stats_on_destroy = false;
32
33 /* Shared for nir/variants */
34 #define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35
36 static uint32_t
sha1_hash_func(const void * sha1)37 sha1_hash_func(const void *sha1)
38 {
39 return _mesa_hash_data(sha1, 20);
40 }
41
42 static bool
sha1_compare_func(const void * sha1_a,const void * sha1_b)43 sha1_compare_func(const void *sha1_a, const void *sha1_b)
44 {
45 return memcmp(sha1_a, sha1_b, 20) == 0;
46 }
47
48 struct serialized_nir {
49 unsigned char sha1_key[20];
50 size_t size;
51 char data[0];
52 };
53
54 static void
cache_dump_stats(struct v3dv_pipeline_cache * cache)55 cache_dump_stats(struct v3dv_pipeline_cache *cache)
56 {
57 fprintf(stderr, " NIR cache entries: %d\n", cache->nir_stats.count);
58 fprintf(stderr, " NIR cache miss count: %d\n", cache->nir_stats.miss);
59 fprintf(stderr, " NIR cache hit count: %d\n", cache->nir_stats.hit);
60
61 fprintf(stderr, " cache entries: %d\n", cache->stats.count);
62 fprintf(stderr, " cache miss count: %d\n", cache->stats.miss);
63 fprintf(stderr, " cache hit count: %d\n", cache->stats.hit);
64
65 fprintf(stderr, " on-disk cache hit count: %d\n", cache->stats.on_disk_hit);
66 }
67
68 static void
pipeline_cache_lock(struct v3dv_pipeline_cache * cache)69 pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
70 {
71 if (!cache->externally_synchronized)
72 mtx_lock(&cache->mutex);
73 }
74
75 static void
pipeline_cache_unlock(struct v3dv_pipeline_cache * cache)76 pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
77 {
78 if (!cache->externally_synchronized)
79 mtx_unlock(&cache->mutex);
80 }
81
82 void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,nir_shader * nir,unsigned char sha1_key[20])83 v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
84 struct v3dv_pipeline_cache *cache,
85 nir_shader *nir,
86 unsigned char sha1_key[20])
87 {
88 if (!cache || !cache->nir_cache)
89 return;
90
91 if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
92 return;
93
94 pipeline_cache_lock(cache);
95 struct hash_entry *entry =
96 _mesa_hash_table_search(cache->nir_cache, sha1_key);
97 pipeline_cache_unlock(cache);
98 if (entry)
99 return;
100
101 struct blob blob;
102 blob_init(&blob);
103
104 nir_serialize(&blob, nir, false);
105 if (blob.out_of_memory) {
106 blob_finish(&blob);
107 return;
108 }
109
110 pipeline_cache_lock(cache);
111 /* Because ralloc isn't thread-safe, we have to do all this inside the
112 * lock. We could unlock for the big memcpy but it's probably not worth
113 * the hassle.
114 */
115 entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
116 if (entry) {
117 blob_finish(&blob);
118 pipeline_cache_unlock(cache);
119 return;
120 }
121
122 struct serialized_nir *snir =
123 ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
124 memcpy(snir->sha1_key, sha1_key, 20);
125 snir->size = blob.size;
126 memcpy(snir->data, blob.data, blob.size);
127
128 blob_finish(&blob);
129
130 cache->nir_stats.count++;
131 if (debug_cache) {
132 char sha1buf[41];
133 _mesa_sha1_format(sha1buf, snir->sha1_key);
134 fprintf(stderr, "pipeline cache %p, new nir entry %s\n", cache, sha1buf);
135 if (dump_stats)
136 cache_dump_stats(cache);
137 }
138
139 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
140
141 pipeline_cache_unlock(cache);
142 }
143
144 nir_shader*
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[20])145 v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
146 struct v3dv_pipeline_cache *cache,
147 const nir_shader_compiler_options *nir_options,
148 unsigned char sha1_key[20])
149 {
150 if (!cache || !cache->nir_cache)
151 return NULL;
152
153 if (debug_cache) {
154 char sha1buf[41];
155 _mesa_sha1_format(sha1buf, sha1_key);
156
157 fprintf(stderr, "pipeline cache %p, search for nir %s\n", cache, sha1buf);
158 }
159
160 const struct serialized_nir *snir = NULL;
161
162 pipeline_cache_lock(cache);
163 struct hash_entry *entry =
164 _mesa_hash_table_search(cache->nir_cache, sha1_key);
165 if (entry)
166 snir = entry->data;
167 pipeline_cache_unlock(cache);
168
169 if (snir) {
170 struct blob_reader blob;
171 blob_reader_init(&blob, snir->data, snir->size);
172
173 /* We use context NULL as we want the p_stage to keep the reference to
174 * nir, as we keep open the possibility of provide a shader variant
175 * after cache creation
176 */
177 nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
178 if (blob.overrun) {
179 ralloc_free(nir);
180 } else {
181 cache->nir_stats.hit++;
182 if (debug_cache) {
183 fprintf(stderr, "[v3dv nir cache] hit: %p\n", nir);
184 if (dump_stats)
185 cache_dump_stats(cache);
186 }
187 return nir;
188 }
189 }
190
191 cache->nir_stats.miss++;
192 if (debug_cache) {
193 fprintf(stderr, "[v3dv nir cache] miss\n");
194 if (dump_stats)
195 cache_dump_stats(cache);
196 }
197
198 return NULL;
199 }
200
201 void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache * cache,struct v3dv_device * device,VkPipelineCacheCreateFlags flags,bool cache_enabled)202 v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
203 struct v3dv_device *device,
204 VkPipelineCacheCreateFlags flags,
205 bool cache_enabled)
206 {
207 cache->device = device;
208 mtx_init(&cache->mutex, mtx_plain);
209
210 if (cache_enabled) {
211 cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
212 sha1_compare_func);
213 cache->nir_stats.miss = 0;
214 cache->nir_stats.hit = 0;
215 cache->nir_stats.count = 0;
216
217 cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
218 sha1_compare_func);
219 cache->stats.miss = 0;
220 cache->stats.hit = 0;
221 cache->stats.count = 0;
222
223 cache->externally_synchronized = flags &
224 VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
225 } else {
226 cache->nir_cache = NULL;
227 cache->cache = NULL;
228 }
229
230 }
231
232 static struct v3dv_pipeline_shared_data *
233 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
234 struct blob_reader *blob);
235
236 static void
237 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
238 struct v3dv_pipeline_shared_data *shared_data,
239 bool from_disk_cache);
240
241 static bool
242 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
243 struct blob *blob);
244
245 /**
246 * It searchs for pipeline cached data, and returns a v3dv_pipeline_shared_data with
247 * it, or NULL if doesn't have it cached. On the former, it will increases the
248 * ref_count, so caller is responsible to unref it.
249 */
250 struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache * cache,unsigned char sha1_key[20],bool * cache_hit)251 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
252 unsigned char sha1_key[20],
253 bool *cache_hit)
254 {
255 if (!cache || !cache->cache)
256 return NULL;
257
258 if (debug_cache) {
259 char sha1buf[41];
260 _mesa_sha1_format(sha1buf, sha1_key);
261
262 fprintf(stderr, "pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
263 }
264
265 pipeline_cache_lock(cache);
266
267 struct hash_entry *entry =
268 _mesa_hash_table_search(cache->cache, sha1_key);
269
270 if (entry) {
271 struct v3dv_pipeline_shared_data *cache_entry =
272 (struct v3dv_pipeline_shared_data *) entry->data;
273 assert(cache_entry);
274
275 cache->stats.hit++;
276 *cache_hit = true;
277 if (debug_cache) {
278 fprintf(stderr, "[v3dv cache] hit: %p\n", cache_entry);
279 if (dump_stats)
280 cache_dump_stats(cache);
281 }
282
283
284 v3dv_pipeline_shared_data_ref(cache_entry);
285
286 pipeline_cache_unlock(cache);
287
288 return cache_entry;
289 }
290
291 cache->stats.miss++;
292 if (debug_cache) {
293 fprintf(stderr, "[v3dv cache] miss\n");
294 if (dump_stats)
295 cache_dump_stats(cache);
296 }
297
298 pipeline_cache_unlock(cache);
299
300 #ifdef ENABLE_SHADER_CACHE
301 struct v3dv_device *device = cache->device;
302 struct disk_cache *disk_cache = device->pdevice->disk_cache;
303 /* Note that the on-disk-cache can be independently disabled, while keeping
304 * the pipeline cache working, by using the environment variable
305 * MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get
306 * will not do anything.
307 */
308 if (disk_cache && device->instance->pipeline_cache_enabled) {
309 cache_key cache_key;
310 disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
311
312 size_t buffer_size;
313 uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
314 if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
315 char sha1buf[41];
316 _mesa_sha1_format(sha1buf, cache_key);
317 fprintf(stderr, "[v3dv on-disk cache] %s %s\n",
318 buffer ? "hit" : "miss",
319 sha1buf);
320 }
321
322 if (buffer) {
323 struct blob_reader blob;
324 struct v3dv_pipeline_shared_data *shared_data;
325
326 blob_reader_init(&blob, buffer, buffer_size);
327 shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
328 free(buffer);
329
330 if (shared_data) {
331 /* Technically we could increase on_disk_hit as soon as we have a
332 * buffer, but we are more interested on hits that got a valid
333 * shared_data
334 */
335 cache->stats.on_disk_hit++;
336 if (cache)
337 pipeline_cache_upload_shared_data(cache, shared_data, true);
338 return shared_data;
339 }
340 }
341 }
342 #endif
343
344 return NULL;
345 }
346
347 void
v3dv_pipeline_shared_data_destroy(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)348 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
349 struct v3dv_pipeline_shared_data *shared_data)
350 {
351 assert(shared_data->ref_cnt == 0);
352
353 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
354 if (shared_data->variants[stage] != NULL)
355 v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
356
357 /* We don't free binning descriptor maps as we are sharing them
358 * with the render shaders.
359 */
360 if (shared_data->maps[stage] != NULL &&
361 !broadcom_shader_stage_is_binning(stage)) {
362 vk_free(&device->vk.alloc, shared_data->maps[stage]);
363 }
364 }
365
366 if (shared_data->assembly_bo)
367 v3dv_bo_free(device, shared_data->assembly_bo);
368
369 vk_free(&device->vk.alloc, shared_data);
370 }
371
372 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache * cache,const unsigned char sha1_key[20],struct v3dv_descriptor_maps ** maps,struct v3dv_shader_variant ** variants,const uint64_t * total_assembly,const uint32_t total_assembly_size)373 v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
374 const unsigned char sha1_key[20],
375 struct v3dv_descriptor_maps **maps,
376 struct v3dv_shader_variant **variants,
377 const uint64_t *total_assembly,
378 const uint32_t total_assembly_size)
379 {
380 size_t size = sizeof(struct v3dv_pipeline_shared_data);
381 /* We create new_entry using the device alloc. Right now shared_data is ref
382 * and unref by both the pipeline and the pipeline cache, so we can't
383 * ensure that the cache or pipeline alloc will be available on the last
384 * unref.
385 */
386 struct v3dv_pipeline_shared_data *new_entry =
387 vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
388 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
389
390 if (new_entry == NULL)
391 return NULL;
392
393 new_entry->ref_cnt = 1;
394 memcpy(new_entry->sha1_key, sha1_key, 20);
395
396 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
397 new_entry->maps[stage] = maps[stage];
398 new_entry->variants[stage] = variants[stage];
399 }
400
401 struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
402 "pipeline shader assembly", true);
403 if (!bo) {
404 fprintf(stderr, "failed to allocate memory for shaders assembly\n");
405 goto fail;
406 }
407
408 bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
409 if (!ok) {
410 fprintf(stderr, "failed to map source shader buffer\n");
411 goto fail;
412 }
413
414 memcpy(bo->map, total_assembly, total_assembly_size);
415
416 new_entry->assembly_bo = bo;
417
418 return new_entry;
419
420 fail:
421 v3dv_pipeline_shared_data_unref(cache->device, new_entry);
422 return NULL;
423 }
424
425 static void
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache * cache,struct v3dv_pipeline_shared_data * shared_data,bool from_disk_cache)426 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
427 struct v3dv_pipeline_shared_data *shared_data,
428 bool from_disk_cache)
429 {
430 assert(shared_data);
431
432 if (!cache || !cache->cache)
433 return;
434
435 if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
436 return;
437
438 pipeline_cache_lock(cache);
439 struct hash_entry *entry = NULL;
440
441 /* If this is being called from the disk cache, we already know that the
442 * entry is not on the hash table
443 */
444 if (!from_disk_cache)
445 entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
446
447 if (entry) {
448 pipeline_cache_unlock(cache);
449 return;
450 }
451
452 v3dv_pipeline_shared_data_ref(shared_data);
453 _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
454 cache->stats.count++;
455 if (debug_cache) {
456 char sha1buf[41];
457 _mesa_sha1_format(sha1buf, shared_data->sha1_key);
458
459 fprintf(stderr, "pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
460 cache, sha1buf, shared_data);
461 if (dump_stats)
462 cache_dump_stats(cache);
463 }
464
465 pipeline_cache_unlock(cache);
466
467 #ifdef ENABLE_SHADER_CACHE
468 /* If we are being called from a on-disk-cache hit, we can skip writing to
469 * the disk cache
470 */
471 if (from_disk_cache)
472 return;
473
474 struct v3dv_device *device = cache->device;
475 struct disk_cache *disk_cache = device->pdevice->disk_cache;
476 if (disk_cache) {
477 struct blob binary;
478 blob_init(&binary);
479 if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
480 cache_key cache_key;
481 disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
482
483 if (unlikely(V3D_DEBUG & V3D_DEBUG_CACHE)) {
484 char sha1buf[41];
485 _mesa_sha1_format(sha1buf, shared_data->sha1_key);
486 fprintf(stderr, "[v3dv on-disk cache] storing %s\n", sha1buf);
487 }
488 disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
489 }
490
491 blob_finish(&binary);
492 }
493 #endif
494 }
495
496 /* Uploads all the "cacheable" or shared data from the pipeline */
497 void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)498 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
499 struct v3dv_pipeline_cache *cache)
500 {
501 pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
502 }
503
504 static struct serialized_nir*
serialized_nir_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)505 serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
506 struct blob_reader *blob)
507 {
508 const unsigned char *sha1_key = blob_read_bytes(blob, 20);
509 uint32_t snir_size = blob_read_uint32(blob);
510 const char* snir_data = blob_read_bytes(blob, snir_size);
511 if (blob->overrun)
512 return NULL;
513
514 struct serialized_nir *snir =
515 ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
516 memcpy(snir->sha1_key, sha1_key, 20);
517 snir->size = snir_size;
518 memcpy(snir->data, snir_data, snir_size);
519
520 return snir;
521 }
522
523 static struct v3dv_shader_variant*
shader_variant_create_from_blob(struct v3dv_device * device,struct blob_reader * blob)524 shader_variant_create_from_blob(struct v3dv_device *device,
525 struct blob_reader *blob)
526 {
527 VkResult result;
528
529 enum broadcom_shader_stage stage = blob_read_uint32(blob);
530
531 uint32_t prog_data_size = blob_read_uint32(blob);
532 /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
533 assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
534
535 const void *prog_data = blob_read_bytes(blob, prog_data_size);
536 if (blob->overrun)
537 return NULL;
538
539 uint32_t ulist_count = blob_read_uint32(blob);
540 uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
541 const void *contents_data = blob_read_bytes(blob, contents_size);
542 if (blob->overrun)
543 return NULL;
544
545 uint ulist_data_size = sizeof(uint32_t) * ulist_count;
546 const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
547 if (blob->overrun)
548 return NULL;
549
550 uint32_t assembly_offset = blob_read_uint32(blob);
551 uint32_t qpu_insts_size = blob_read_uint32(blob);
552
553 /* shader_variant_create expects a newly created prog_data for their own,
554 * as it is what the v3d compiler returns. So we are also allocating one
555 * (including the uniform list) and filled it up with the data that we read
556 * from the blob
557 */
558 struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
559 memcpy(new_prog_data, prog_data, prog_data_size);
560 struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
561 ulist->count = ulist_count;
562 ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
563 memcpy(ulist->contents, contents_data, contents_size);
564 ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
565 memcpy(ulist->data, ulist_data_data, ulist_data_size);
566
567 return v3dv_shader_variant_create(device, stage,
568 new_prog_data, prog_data_size,
569 assembly_offset,
570 NULL, qpu_insts_size,
571 &result);
572 }
573
574 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)575 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
576 struct blob_reader *blob)
577 {
578 const unsigned char *sha1_key = blob_read_bytes(blob, 20);
579
580 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
581 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
582
583 uint8_t descriptor_maps_count = blob_read_uint8(blob);
584 for (uint8_t count = 0; count < descriptor_maps_count; count++) {
585 uint8_t stage = blob_read_uint8(blob);
586
587 const struct v3dv_descriptor_maps *current_maps =
588 blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
589
590 if (blob->overrun)
591 goto fail;
592
593 maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
594 sizeof(struct v3dv_descriptor_maps), 8,
595 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
596
597 if (maps[stage] == NULL)
598 goto fail;
599
600 memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
601 if (broadcom_shader_stage_is_render_with_binning(stage)) {
602 enum broadcom_shader_stage bin_stage =
603 broadcom_binning_shader_stage_for_render_stage(stage);
604 maps[bin_stage] = maps[stage];
605 }
606 }
607
608 uint8_t variant_count = blob_read_uint8(blob);
609
610 for (uint8_t count = 0; count < variant_count; count++) {
611 uint8_t stage = blob_read_uint8(blob);
612 struct v3dv_shader_variant *variant =
613 shader_variant_create_from_blob(cache->device, blob);
614 variants[stage] = variant;
615 }
616
617 uint32_t total_assembly_size = blob_read_uint32(blob);
618 const uint64_t *total_assembly =
619 blob_read_bytes(blob, total_assembly_size);
620
621 if (blob->overrun)
622 goto fail;
623
624 struct v3dv_pipeline_shared_data *data =
625 v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
626 total_assembly, total_assembly_size);
627
628 if (!data)
629 goto fail;
630
631 return data;
632
633 fail:
634 for (int i = 0; i < BROADCOM_SHADER_STAGES; i++) {
635 if (maps[i])
636 vk_free2(&cache->device->vk.alloc, NULL, maps[i]);
637 if (variants[i])
638 v3dv_shader_variant_destroy(cache->device, variants[i]);
639 }
640 return NULL;
641 }
642
643 static void
pipeline_cache_load(struct v3dv_pipeline_cache * cache,size_t size,const void * data)644 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
645 size_t size,
646 const void *data)
647 {
648 struct v3dv_device *device = cache->device;
649 struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
650 struct vk_pipeline_cache_header header;
651
652 if (cache->cache == NULL || cache->nir_cache == NULL)
653 return;
654
655 struct blob_reader blob;
656 blob_reader_init(&blob, data, size);
657
658 blob_copy_bytes(&blob, &header, sizeof(header));
659 if (size < sizeof(header))
660 return;
661 memcpy(&header, data, sizeof(header));
662 if (header.header_size < sizeof(header))
663 return;
664 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
665 return;
666 if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
667 return;
668 if (header.device_id != v3dv_physical_device_device_id(pdevice))
669 return;
670 if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
671 return;
672
673 uint32_t nir_count = blob_read_uint32(&blob);
674 if (blob.overrun)
675 return;
676
677 for (uint32_t i = 0; i < nir_count; i++) {
678 struct serialized_nir *snir =
679 serialized_nir_create_from_blob(cache, &blob);
680
681 if (!snir)
682 break;
683
684 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
685 cache->nir_stats.count++;
686 }
687
688 uint32_t count = blob_read_uint32(&blob);
689 if (blob.overrun)
690 return;
691
692 for (uint32_t i = 0; i < count; i++) {
693 struct v3dv_pipeline_shared_data *cache_entry =
694 v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
695 if (!cache_entry)
696 break;
697
698 _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
699 cache->stats.count++;
700 }
701
702 if (debug_cache) {
703 fprintf(stderr, "pipeline cache %p, loaded %i nir shaders and "
704 "%i entries\n", cache, nir_count, count);
705 if (dump_stats)
706 cache_dump_stats(cache);
707 }
708 }
709
710 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)711 v3dv_CreatePipelineCache(VkDevice _device,
712 const VkPipelineCacheCreateInfo *pCreateInfo,
713 const VkAllocationCallbacks *pAllocator,
714 VkPipelineCache *pPipelineCache)
715 {
716 V3DV_FROM_HANDLE(v3dv_device, device, _device);
717 struct v3dv_pipeline_cache *cache;
718
719 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
720
721 cache = vk_object_zalloc(&device->vk, pAllocator,
722 sizeof(*cache),
723 VK_OBJECT_TYPE_PIPELINE_CACHE);
724
725 if (cache == NULL)
726 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
727
728 v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
729 device->instance->pipeline_cache_enabled);
730
731 if (pCreateInfo->initialDataSize > 0) {
732 pipeline_cache_load(cache,
733 pCreateInfo->initialDataSize,
734 pCreateInfo->pInitialData);
735 }
736
737 *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
738
739 return VK_SUCCESS;
740 }
741
742 void
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache * cache)743 v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
744 {
745 mtx_destroy(&cache->mutex);
746
747 if (dump_stats_on_destroy)
748 cache_dump_stats(cache);
749
750 if (cache->nir_cache) {
751 hash_table_foreach(cache->nir_cache, entry)
752 ralloc_free(entry->data);
753
754 _mesa_hash_table_destroy(cache->nir_cache, NULL);
755 }
756
757 if (cache->cache) {
758 hash_table_foreach(cache->cache, entry) {
759 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
760 if (cache_entry)
761 v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
762 }
763
764 _mesa_hash_table_destroy(cache->cache, NULL);
765 }
766 }
767
768 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)769 v3dv_DestroyPipelineCache(VkDevice _device,
770 VkPipelineCache _cache,
771 const VkAllocationCallbacks *pAllocator)
772 {
773 V3DV_FROM_HANDLE(v3dv_device, device, _device);
774 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
775
776 if (!cache)
777 return;
778
779 v3dv_pipeline_cache_finish(cache);
780
781 vk_object_free(&device->vk, pAllocator, cache);
782 }
783
784 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MergePipelineCaches(VkDevice device,VkPipelineCache dstCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)785 v3dv_MergePipelineCaches(VkDevice device,
786 VkPipelineCache dstCache,
787 uint32_t srcCacheCount,
788 const VkPipelineCache *pSrcCaches)
789 {
790 V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
791
792 if (!dst->cache || !dst->nir_cache)
793 return VK_SUCCESS;
794
795 for (uint32_t i = 0; i < srcCacheCount; i++) {
796 V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
797 if (!src->cache || !src->nir_cache)
798 continue;
799
800 hash_table_foreach(src->nir_cache, entry) {
801 struct serialized_nir *src_snir = entry->data;
802 assert(src_snir);
803
804 if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
805 continue;
806
807 /* FIXME: we are using serialized nir shaders because they are
808 * convenient to create and store on the cache, but requires to do a
809 * copy here (and some other places) of the serialized NIR. Perhaps
810 * it would make sense to move to handle the NIR shaders with shared
811 * structures with ref counts, as the variants.
812 */
813 struct serialized_nir *snir_dst =
814 ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
815 memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
816 snir_dst->size = src_snir->size;
817 memcpy(snir_dst->data, src_snir->data, src_snir->size);
818
819 _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
820 dst->nir_stats.count++;
821 if (debug_cache) {
822 char sha1buf[41];
823 _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
824
825 fprintf(stderr, "pipeline cache %p, added nir entry %s "
826 "from pipeline cache %p\n",
827 dst, sha1buf, src);
828 if (dump_stats)
829 cache_dump_stats(dst);
830 }
831 }
832
833 hash_table_foreach(src->cache, entry) {
834 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
835 assert(cache_entry);
836
837 if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
838 continue;
839
840 v3dv_pipeline_shared_data_ref(cache_entry);
841 _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
842
843 dst->stats.count++;
844 if (debug_cache) {
845 char sha1buf[41];
846 _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
847
848 fprintf(stderr, "pipeline cache %p, added entry %s "
849 "from pipeline cache %p\n",
850 dst, sha1buf, src);
851 if (dump_stats)
852 cache_dump_stats(dst);
853 }
854 }
855 }
856
857 return VK_SUCCESS;
858 }
859
860 static bool
shader_variant_write_to_blob(const struct v3dv_shader_variant * variant,struct blob * blob)861 shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
862 struct blob *blob)
863 {
864 blob_write_uint32(blob, variant->stage);
865
866 blob_write_uint32(blob, variant->prog_data_size);
867 blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
868
869 struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
870 blob_write_uint32(blob, ulist->count);
871 blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
872 blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
873
874 blob_write_uint32(blob, variant->assembly_offset);
875 blob_write_uint32(blob, variant->qpu_insts_size);
876
877 return !blob->out_of_memory;
878 }
879
880 static bool
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * cache_entry,struct blob * blob)881 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
882 struct blob *blob)
883 {
884 blob_write_bytes(blob, cache_entry->sha1_key, 20);
885
886 uint8_t descriptor_maps_count = 0;
887 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
888 if (broadcom_shader_stage_is_binning(stage))
889 continue;
890 if (cache_entry->maps[stage] == NULL)
891 continue;
892 descriptor_maps_count++;
893 }
894
895 /* Compute pipelines only have one descriptor map,
896 * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
897 * stages take the descriptor map from the render stage.
898 */
899 assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
900 (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
901 blob_write_uint8(blob, descriptor_maps_count);
902
903 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
904 if (cache_entry->maps[stage] == NULL)
905 continue;
906 if (broadcom_shader_stage_is_binning(stage))
907 continue;
908
909 blob_write_uint8(blob, stage);
910 blob_write_bytes(blob, cache_entry->maps[stage],
911 sizeof(struct v3dv_descriptor_maps));
912 }
913
914 uint8_t variant_count = 0;
915 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
916 if (cache_entry->variants[stage] == NULL)
917 continue;
918 variant_count++;
919 }
920
921 /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
922 * compute pipelines only have 1.
923 */
924 assert((variant_count == 5 || variant_count == 3) ||
925 (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
926 blob_write_uint8(blob, variant_count);
927
928 uint32_t total_assembly_size = 0;
929 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
930 if (cache_entry->variants[stage] == NULL)
931 continue;
932
933 blob_write_uint8(blob, stage);
934 if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
935 return false;
936
937 total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
938 }
939 blob_write_uint32(blob, total_assembly_size);
940
941 assert(cache_entry->assembly_bo->map);
942 assert(cache_entry->assembly_bo->size >= total_assembly_size);
943 blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
944
945 return !blob->out_of_memory;
946 }
947
948
949 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)950 v3dv_GetPipelineCacheData(VkDevice _device,
951 VkPipelineCache _cache,
952 size_t *pDataSize,
953 void *pData)
954 {
955 V3DV_FROM_HANDLE(v3dv_device, device, _device);
956 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
957
958 struct blob blob;
959 if (pData) {
960 blob_init_fixed(&blob, pData, *pDataSize);
961 } else {
962 blob_init_fixed(&blob, NULL, SIZE_MAX);
963 }
964
965 struct v3dv_physical_device *pdevice = &device->instance->physicalDevice;
966 VkResult result = VK_INCOMPLETE;
967
968 pipeline_cache_lock(cache);
969
970 struct vk_pipeline_cache_header header = {
971 .header_size = sizeof(struct vk_pipeline_cache_header),
972 .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
973 .vendor_id = v3dv_physical_device_vendor_id(pdevice),
974 .device_id = v3dv_physical_device_device_id(pdevice),
975 };
976 memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
977 blob_write_bytes(&blob, &header, sizeof(header));
978
979 uint32_t nir_count = 0;
980 intptr_t nir_count_offset = blob_reserve_uint32(&blob);
981 if (nir_count_offset < 0) {
982 *pDataSize = 0;
983 goto done;
984 }
985
986 if (cache->nir_cache) {
987 hash_table_foreach(cache->nir_cache, entry) {
988 const struct serialized_nir *snir = entry->data;
989
990 size_t save_size = blob.size;
991
992 blob_write_bytes(&blob, snir->sha1_key, 20);
993 blob_write_uint32(&blob, snir->size);
994 blob_write_bytes(&blob, snir->data, snir->size);
995
996 if (blob.out_of_memory) {
997 blob.size = save_size;
998 goto done;
999 }
1000
1001 nir_count++;
1002 }
1003 }
1004 blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
1005
1006 uint32_t count = 0;
1007 intptr_t count_offset = blob_reserve_uint32(&blob);
1008 if (count_offset < 0) {
1009 *pDataSize = 0;
1010 goto done;
1011 }
1012
1013 if (cache->cache) {
1014 hash_table_foreach(cache->cache, entry) {
1015 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
1016
1017 size_t save_size = blob.size;
1018 if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
1019 /* If it fails reset to the previous size and bail */
1020 blob.size = save_size;
1021 goto done;
1022 }
1023
1024 count++;
1025 }
1026 }
1027
1028 blob_overwrite_uint32(&blob, count_offset, count);
1029
1030 *pDataSize = blob.size;
1031
1032 result = VK_SUCCESS;
1033
1034 if (debug_cache) {
1035 assert(count <= cache->stats.count);
1036 fprintf(stderr, "GetPipelineCacheData: serializing cache %p, "
1037 "%i nir shader entries "
1038 "%i entries, %u DataSize\n",
1039 cache, nir_count, count, (uint32_t) *pDataSize);
1040 }
1041
1042 done:
1043 blob_finish(&blob);
1044
1045 pipeline_cache_unlock(cache);
1046
1047 return result;
1048 }
1049