1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "vk_util.h"
26 #include "util/blob.h"
27 #include "nir/nir_serialize.h"
28
29 static const bool debug_cache = false;
30 static const bool dump_stats = false;
31 static const bool dump_stats_on_destroy = false;
32
33 /* Shared for nir/variants */
34 #define V3DV_MAX_PIPELINE_CACHE_ENTRIES 4096
35
36 static uint32_t
sha1_hash_func(const void * sha1)37 sha1_hash_func(const void *sha1)
38 {
39 return _mesa_hash_data(sha1, 20);
40 }
41
42 static bool
sha1_compare_func(const void * sha1_a,const void * sha1_b)43 sha1_compare_func(const void *sha1_a, const void *sha1_b)
44 {
45 return memcmp(sha1_a, sha1_b, 20) == 0;
46 }
47
48 struct serialized_nir {
49 unsigned char sha1_key[20];
50 size_t size;
51 char data[0];
52 };
53
54 static void
cache_dump_stats(struct v3dv_pipeline_cache * cache)55 cache_dump_stats(struct v3dv_pipeline_cache *cache)
56 {
57 mesa_logi(" NIR cache entries: %d\n", cache->nir_stats.count);
58 mesa_logi(" NIR cache miss count: %d\n", cache->nir_stats.miss);
59 mesa_logi(" NIR cache hit count: %d\n", cache->nir_stats.hit);
60
61 mesa_logi(" cache entries: %d\n", cache->stats.count);
62 mesa_logi(" cache miss count: %d\n", cache->stats.miss);
63 mesa_logi(" cache hit count: %d\n", cache->stats.hit);
64
65 mesa_logi(" on-disk cache hit count: %d\n", cache->stats.on_disk_hit);
66 }
67
68 static void
pipeline_cache_lock(struct v3dv_pipeline_cache * cache)69 pipeline_cache_lock(struct v3dv_pipeline_cache *cache)
70 {
71 if (!cache->externally_synchronized)
72 mtx_lock(&cache->mutex);
73 }
74
75 static void
pipeline_cache_unlock(struct v3dv_pipeline_cache * cache)76 pipeline_cache_unlock(struct v3dv_pipeline_cache *cache)
77 {
78 if (!cache->externally_synchronized)
79 mtx_unlock(&cache->mutex);
80 }
81
82 void
v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,nir_shader * nir,unsigned char sha1_key[20])83 v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
84 struct v3dv_pipeline_cache *cache,
85 nir_shader *nir,
86 unsigned char sha1_key[20])
87 {
88 if (!cache || !cache->nir_cache)
89 return;
90
91 if (cache->nir_stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
92 return;
93
94 pipeline_cache_lock(cache);
95 struct hash_entry *entry =
96 _mesa_hash_table_search(cache->nir_cache, sha1_key);
97 pipeline_cache_unlock(cache);
98 if (entry)
99 return;
100
101 struct blob blob;
102 blob_init(&blob);
103
104 nir_serialize(&blob, nir, false);
105 if (blob.out_of_memory) {
106 blob_finish(&blob);
107 return;
108 }
109
110 pipeline_cache_lock(cache);
111 /* Because ralloc isn't thread-safe, we have to do all this inside the
112 * lock. We could unlock for the big memcpy but it's probably not worth
113 * the hassle.
114 */
115 entry = _mesa_hash_table_search(cache->nir_cache, sha1_key);
116 if (entry) {
117 blob_finish(&blob);
118 pipeline_cache_unlock(cache);
119 return;
120 }
121
122 struct serialized_nir *snir =
123 ralloc_size(cache->nir_cache, sizeof(*snir) + blob.size);
124 memcpy(snir->sha1_key, sha1_key, 20);
125 snir->size = blob.size;
126 memcpy(snir->data, blob.data, blob.size);
127
128 blob_finish(&blob);
129
130 cache->nir_stats.count++;
131 if (debug_cache) {
132 char sha1buf[41];
133 _mesa_sha1_format(sha1buf, snir->sha1_key);
134 mesa_logi("pipeline cache %p, new nir entry %s\n", cache, sha1buf);
135 if (dump_stats)
136 cache_dump_stats(cache);
137 }
138
139 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
140
141 pipeline_cache_unlock(cache);
142 }
143
144 nir_shader*
v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache,const nir_shader_compiler_options * nir_options,unsigned char sha1_key[20])145 v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
146 struct v3dv_pipeline_cache *cache,
147 const nir_shader_compiler_options *nir_options,
148 unsigned char sha1_key[20])
149 {
150 if (!cache || !cache->nir_cache)
151 return NULL;
152
153 if (debug_cache) {
154 char sha1buf[41];
155 _mesa_sha1_format(sha1buf, sha1_key);
156
157 mesa_logi("pipeline cache %p, search for nir %s\n", cache, sha1buf);
158 }
159
160 const struct serialized_nir *snir = NULL;
161
162 pipeline_cache_lock(cache);
163 struct hash_entry *entry =
164 _mesa_hash_table_search(cache->nir_cache, sha1_key);
165 if (entry)
166 snir = entry->data;
167 pipeline_cache_unlock(cache);
168
169 if (snir) {
170 struct blob_reader blob;
171 blob_reader_init(&blob, snir->data, snir->size);
172
173 /* We use context NULL as we want the p_stage to keep the reference to
174 * nir, as we keep open the possibility of provide a shader variant
175 * after cache creation
176 */
177 nir_shader *nir = nir_deserialize(NULL, nir_options, &blob);
178 if (blob.overrun) {
179 ralloc_free(nir);
180 } else {
181 cache->nir_stats.hit++;
182 if (debug_cache) {
183 mesa_logi("[v3dv nir cache] hit: %p\n", nir);
184 if (dump_stats)
185 cache_dump_stats(cache);
186 }
187 return nir;
188 }
189 }
190
191 cache->nir_stats.miss++;
192 if (debug_cache) {
193 mesa_logi("[v3dv nir cache] miss\n");
194 if (dump_stats)
195 cache_dump_stats(cache);
196 }
197
198 return NULL;
199 }
200
201 void
v3dv_pipeline_cache_init(struct v3dv_pipeline_cache * cache,struct v3dv_device * device,VkPipelineCacheCreateFlags flags,bool cache_enabled)202 v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
203 struct v3dv_device *device,
204 VkPipelineCacheCreateFlags flags,
205 bool cache_enabled)
206 {
207 cache->device = device;
208 mtx_init(&cache->mutex, mtx_plain);
209
210 if (cache_enabled) {
211 cache->nir_cache = _mesa_hash_table_create(NULL, sha1_hash_func,
212 sha1_compare_func);
213 cache->nir_stats.miss = 0;
214 cache->nir_stats.hit = 0;
215 cache->nir_stats.count = 0;
216
217 cache->cache = _mesa_hash_table_create(NULL, sha1_hash_func,
218 sha1_compare_func);
219 cache->stats.miss = 0;
220 cache->stats.hit = 0;
221 cache->stats.count = 0;
222
223 cache->externally_synchronized = flags &
224 VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
225 } else {
226 cache->nir_cache = NULL;
227 cache->cache = NULL;
228 }
229
230 }
231
232 static struct v3dv_pipeline_shared_data *
233 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
234 struct blob_reader *blob);
235
236 static void
237 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
238 struct v3dv_pipeline_shared_data *shared_data,
239 bool from_disk_cache);
240
241 static bool
242 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
243 struct blob *blob);
244
245 /**
246 * It searches for pipeline cached data, and returns a v3dv_pipeline_shared_data with
247 * it, or NULL if doesn't have it cached. On the former, it will increases the
248 * ref_count, so caller is responsible to unref it.
249 */
250 struct v3dv_pipeline_shared_data *
v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache * cache,unsigned char sha1_key[20],bool * cache_hit)251 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
252 unsigned char sha1_key[20],
253 bool *cache_hit)
254 {
255 if (!cache || !cache->cache)
256 return NULL;
257
258 if (debug_cache) {
259 char sha1buf[41];
260 _mesa_sha1_format(sha1buf, sha1_key);
261
262 mesa_logi("pipeline cache %p, search pipeline with key %s\n", cache, sha1buf);
263 }
264
265 pipeline_cache_lock(cache);
266
267 struct hash_entry *entry =
268 _mesa_hash_table_search(cache->cache, sha1_key);
269
270 if (entry) {
271 struct v3dv_pipeline_shared_data *cache_entry =
272 (struct v3dv_pipeline_shared_data *) entry->data;
273 assert(cache_entry);
274
275 cache->stats.hit++;
276 *cache_hit = true;
277 if (debug_cache) {
278 mesa_logi("[v3dv cache] hit: %p\n", cache_entry);
279 if (dump_stats)
280 cache_dump_stats(cache);
281 }
282
283
284 v3dv_pipeline_shared_data_ref(cache_entry);
285
286 pipeline_cache_unlock(cache);
287
288 return cache_entry;
289 }
290
291 cache->stats.miss++;
292 if (debug_cache) {
293 mesa_logi("[v3dv cache] miss\n");
294 if (dump_stats)
295 cache_dump_stats(cache);
296 }
297
298 pipeline_cache_unlock(cache);
299
300 #ifdef ENABLE_SHADER_CACHE
301 struct v3dv_device *device = cache->device;
302 struct disk_cache *disk_cache = device->pdevice->disk_cache;
303 /* Note that the on-disk-cache can be independently disabled, while keeping
304 * the pipeline cache working, by using the environment variable
305 * MESA_SHADER_CACHE_DISABLE. In that case the calls to disk_cache_put/get
306 * will not do anything.
307 */
308 if (disk_cache && device->instance->pipeline_cache_enabled) {
309 cache_key cache_key;
310 disk_cache_compute_key(disk_cache, sha1_key, 20, cache_key);
311
312 size_t buffer_size;
313 uint8_t *buffer = disk_cache_get(disk_cache, cache_key, &buffer_size);
314 if (V3D_DBG(CACHE)) {
315 char sha1buf[41];
316 _mesa_sha1_format(sha1buf, cache_key);
317 mesa_logi("[v3dv on-disk cache] %s %s\n",
318 buffer ? "hit" : "miss", sha1buf);
319 }
320
321 if (buffer) {
322 struct blob_reader blob;
323 struct v3dv_pipeline_shared_data *shared_data;
324
325 blob_reader_init(&blob, buffer, buffer_size);
326 shared_data = v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
327 free(buffer);
328
329 if (shared_data) {
330 /* Technically we could increase on_disk_hit as soon as we have a
331 * buffer, but we are more interested on hits that got a valid
332 * shared_data
333 */
334 cache->stats.on_disk_hit++;
335 if (cache)
336 pipeline_cache_upload_shared_data(cache, shared_data, true);
337 return shared_data;
338 }
339 }
340 }
341 #endif
342
343 return NULL;
344 }
345
346 void
v3dv_pipeline_shared_data_destroy(struct v3dv_device * device,struct v3dv_pipeline_shared_data * shared_data)347 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
348 struct v3dv_pipeline_shared_data *shared_data)
349 {
350 assert(shared_data->ref_cnt == 0);
351
352 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
353 if (shared_data->variants[stage] != NULL)
354 v3dv_shader_variant_destroy(device, shared_data->variants[stage]);
355
356 /* We don't free binning descriptor maps as we are sharing them
357 * with the render shaders.
358 */
359 if (shared_data->maps[stage] != NULL &&
360 !broadcom_shader_stage_is_binning(stage)) {
361 vk_free(&device->vk.alloc, shared_data->maps[stage]);
362 }
363 }
364
365 if (shared_data->assembly_bo)
366 v3dv_bo_free(device, shared_data->assembly_bo);
367
368 vk_free(&device->vk.alloc, shared_data);
369 }
370
371 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache * cache,const unsigned char sha1_key[20],struct v3dv_descriptor_maps ** maps,struct v3dv_shader_variant ** variants,const uint64_t * total_assembly,const uint32_t total_assembly_size)372 v3dv_pipeline_shared_data_new(struct v3dv_pipeline_cache *cache,
373 const unsigned char sha1_key[20],
374 struct v3dv_descriptor_maps **maps,
375 struct v3dv_shader_variant **variants,
376 const uint64_t *total_assembly,
377 const uint32_t total_assembly_size)
378 {
379 size_t size = sizeof(struct v3dv_pipeline_shared_data);
380 /* We create new_entry using the device alloc. Right now shared_data is ref
381 * and unref by both the pipeline and the pipeline cache, so we can't
382 * ensure that the cache or pipeline alloc will be available on the last
383 * unref.
384 */
385 struct v3dv_pipeline_shared_data *new_entry =
386 vk_zalloc2(&cache->device->vk.alloc, NULL, size, 8,
387 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
388
389 if (new_entry == NULL)
390 return NULL;
391
392 new_entry->ref_cnt = 1;
393 memcpy(new_entry->sha1_key, sha1_key, 20);
394
395 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
396 new_entry->maps[stage] = maps[stage];
397 new_entry->variants[stage] = variants[stage];
398 }
399
400 struct v3dv_bo *bo = v3dv_bo_alloc(cache->device, total_assembly_size,
401 "pipeline shader assembly", true);
402 if (!bo) {
403 mesa_loge("failed to allocate memory for shaders assembly\n");
404 goto fail;
405 }
406
407 bool ok = v3dv_bo_map(cache->device, bo, total_assembly_size);
408 if (!ok) {
409 mesa_loge("failed to map source shader buffer\n");
410 goto fail;
411 }
412
413 memcpy(bo->map, total_assembly, total_assembly_size);
414
415 new_entry->assembly_bo = bo;
416
417 return new_entry;
418
419 fail:
420 v3dv_pipeline_shared_data_unref(cache->device, new_entry);
421 return NULL;
422 }
423
424 static void
pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache * cache,struct v3dv_pipeline_shared_data * shared_data,bool from_disk_cache)425 pipeline_cache_upload_shared_data(struct v3dv_pipeline_cache *cache,
426 struct v3dv_pipeline_shared_data *shared_data,
427 bool from_disk_cache)
428 {
429 assert(shared_data);
430
431 if (!cache || !cache->cache)
432 return;
433
434 if (cache->stats.count > V3DV_MAX_PIPELINE_CACHE_ENTRIES)
435 return;
436
437 pipeline_cache_lock(cache);
438 struct hash_entry *entry = NULL;
439
440 /* If this is being called from the disk cache, we already know that the
441 * entry is not on the hash table
442 */
443 if (!from_disk_cache)
444 entry = _mesa_hash_table_search(cache->cache, shared_data->sha1_key);
445
446 if (entry) {
447 pipeline_cache_unlock(cache);
448 return;
449 }
450
451 v3dv_pipeline_shared_data_ref(shared_data);
452 _mesa_hash_table_insert(cache->cache, shared_data->sha1_key, shared_data);
453 cache->stats.count++;
454 if (debug_cache) {
455 char sha1buf[41];
456 _mesa_sha1_format(sha1buf, shared_data->sha1_key);
457
458 mesa_logi("pipeline cache %p, new cache entry with sha1 key %s:%p\n\n",
459 cache, sha1buf, shared_data);
460 if (dump_stats)
461 cache_dump_stats(cache);
462 }
463
464 pipeline_cache_unlock(cache);
465
466 #ifdef ENABLE_SHADER_CACHE
467 /* If we are being called from a on-disk-cache hit, we can skip writing to
468 * the disk cache
469 */
470 if (from_disk_cache)
471 return;
472
473 struct v3dv_device *device = cache->device;
474 struct disk_cache *disk_cache = device->pdevice->disk_cache;
475 if (disk_cache) {
476 struct blob binary;
477 blob_init(&binary);
478 if (v3dv_pipeline_shared_data_write_to_blob(shared_data, &binary)) {
479 cache_key cache_key;
480 disk_cache_compute_key(disk_cache, shared_data->sha1_key, 20, cache_key);
481
482 if (V3D_DBG(CACHE)) {
483 char sha1buf[41];
484 _mesa_sha1_format(sha1buf, shared_data->sha1_key);
485 mesa_logi("[v3dv on-disk cache] storing %s\n", sha1buf);
486 }
487 disk_cache_put(disk_cache, cache_key, binary.data, binary.size, NULL);
488 }
489
490 blob_finish(&binary);
491 }
492 #endif
493 }
494
495 /* Uploads all the "cacheable" or shared data from the pipeline */
496 void
v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline * pipeline,struct v3dv_pipeline_cache * cache)497 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
498 struct v3dv_pipeline_cache *cache)
499 {
500 pipeline_cache_upload_shared_data(cache, pipeline->shared_data, false);
501 }
502
503 static struct serialized_nir*
serialized_nir_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)504 serialized_nir_create_from_blob(struct v3dv_pipeline_cache *cache,
505 struct blob_reader *blob)
506 {
507 const unsigned char *sha1_key = blob_read_bytes(blob, 20);
508 uint32_t snir_size = blob_read_uint32(blob);
509 const char* snir_data = blob_read_bytes(blob, snir_size);
510 if (blob->overrun)
511 return NULL;
512
513 struct serialized_nir *snir =
514 ralloc_size(cache->nir_cache, sizeof(*snir) + snir_size);
515 memcpy(snir->sha1_key, sha1_key, 20);
516 snir->size = snir_size;
517 memcpy(snir->data, snir_data, snir_size);
518
519 return snir;
520 }
521
522 static struct v3dv_shader_variant*
shader_variant_create_from_blob(struct v3dv_device * device,struct blob_reader * blob)523 shader_variant_create_from_blob(struct v3dv_device *device,
524 struct blob_reader *blob)
525 {
526 VkResult result;
527
528 enum broadcom_shader_stage stage = blob_read_uint32(blob);
529
530 uint32_t prog_data_size = blob_read_uint32(blob);
531 /* FIXME: as we include the stage perhaps we can avoid prog_data_size? */
532 assert(prog_data_size == v3d_prog_data_size(broadcom_shader_stage_to_gl(stage)));
533
534 const void *prog_data = blob_read_bytes(blob, prog_data_size);
535 if (blob->overrun)
536 return NULL;
537
538 uint32_t ulist_count = blob_read_uint32(blob);
539 uint32_t contents_size = sizeof(enum quniform_contents) * ulist_count;
540 const void *contents_data = blob_read_bytes(blob, contents_size);
541 if (blob->overrun)
542 return NULL;
543
544 size_t ulist_data_size = sizeof(uint32_t) * ulist_count;
545 const void *ulist_data_data = blob_read_bytes(blob, ulist_data_size);
546 if (blob->overrun)
547 return NULL;
548
549 uint32_t assembly_offset = blob_read_uint32(blob);
550 uint32_t qpu_insts_size = blob_read_uint32(blob);
551
552 /* shader_variant_create expects a newly created prog_data for their own,
553 * as it is what the v3d compiler returns. So we are also allocating one
554 * (including the uniform list) and filled it up with the data that we read
555 * from the blob
556 */
557 struct v3d_prog_data *new_prog_data = rzalloc_size(NULL, prog_data_size);
558 memcpy(new_prog_data, prog_data, prog_data_size);
559 struct v3d_uniform_list *ulist = &new_prog_data->uniforms;
560 ulist->count = ulist_count;
561 ulist->contents = ralloc_array(new_prog_data, enum quniform_contents, ulist->count);
562 memcpy(ulist->contents, contents_data, contents_size);
563 ulist->data = ralloc_array(new_prog_data, uint32_t, ulist->count);
564 memcpy(ulist->data, ulist_data_data, ulist_data_size);
565
566 return v3dv_shader_variant_create(device, stage,
567 new_prog_data, prog_data_size,
568 assembly_offset,
569 NULL, qpu_insts_size,
570 &result);
571 }
572
573 static struct v3dv_pipeline_shared_data *
v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache * cache,struct blob_reader * blob)574 v3dv_pipeline_shared_data_create_from_blob(struct v3dv_pipeline_cache *cache,
575 struct blob_reader *blob)
576 {
577 const unsigned char *sha1_key = blob_read_bytes(blob, 20);
578
579 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES] = { 0 };
580 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES] = { 0 };
581
582 uint8_t descriptor_maps_count = blob_read_uint8(blob);
583 for (uint8_t count = 0; count < descriptor_maps_count; count++) {
584 uint8_t stage = blob_read_uint8(blob);
585
586 const struct v3dv_descriptor_maps *current_maps =
587 blob_read_bytes(blob, sizeof(struct v3dv_descriptor_maps));
588
589 if (blob->overrun)
590 goto fail;
591
592 maps[stage] = vk_zalloc2(&cache->device->vk.alloc, NULL,
593 sizeof(struct v3dv_descriptor_maps), 8,
594 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
595
596 if (maps[stage] == NULL)
597 goto fail;
598
599 memcpy(maps[stage], current_maps, sizeof(struct v3dv_descriptor_maps));
600 if (broadcom_shader_stage_is_render_with_binning(stage)) {
601 enum broadcom_shader_stage bin_stage =
602 broadcom_binning_shader_stage_for_render_stage(stage);
603 maps[bin_stage] = maps[stage];
604 }
605 }
606
607 uint8_t variant_count = blob_read_uint8(blob);
608
609 for (uint8_t count = 0; count < variant_count; count++) {
610 uint8_t stage = blob_read_uint8(blob);
611 struct v3dv_shader_variant *variant =
612 shader_variant_create_from_blob(cache->device, blob);
613 variants[stage] = variant;
614 }
615
616 uint32_t total_assembly_size = blob_read_uint32(blob);
617 const uint64_t *total_assembly =
618 blob_read_bytes(blob, total_assembly_size);
619
620 if (blob->overrun)
621 goto fail;
622
623 struct v3dv_pipeline_shared_data *data =
624 v3dv_pipeline_shared_data_new(cache, sha1_key, maps, variants,
625 total_assembly, total_assembly_size);
626
627 if (!data)
628 goto fail;
629
630 return data;
631
632 fail:
633 for (int i = 0; i < BROADCOM_SHADER_STAGES; i++) {
634 if (maps[i])
635 vk_free2(&cache->device->vk.alloc, NULL, maps[i]);
636 if (variants[i])
637 v3dv_shader_variant_destroy(cache->device, variants[i]);
638 }
639 return NULL;
640 }
641
642 static void
pipeline_cache_load(struct v3dv_pipeline_cache * cache,size_t size,const void * data)643 pipeline_cache_load(struct v3dv_pipeline_cache *cache,
644 size_t size,
645 const void *data)
646 {
647 struct v3dv_device *device = cache->device;
648 struct v3dv_physical_device *pdevice = device->pdevice;
649 struct vk_pipeline_cache_header header;
650
651 if (cache->cache == NULL || cache->nir_cache == NULL)
652 return;
653
654 struct blob_reader blob;
655 blob_reader_init(&blob, data, size);
656
657 blob_copy_bytes(&blob, &header, sizeof(header));
658 if (size < sizeof(header))
659 return;
660 memcpy(&header, data, sizeof(header));
661 if (header.header_size < sizeof(header))
662 return;
663 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
664 return;
665 if (header.vendor_id != v3dv_physical_device_vendor_id(pdevice))
666 return;
667 if (header.device_id != v3dv_physical_device_device_id(pdevice))
668 return;
669 if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
670 return;
671
672 uint32_t nir_count = blob_read_uint32(&blob);
673 if (blob.overrun)
674 return;
675
676 for (uint32_t i = 0; i < nir_count; i++) {
677 struct serialized_nir *snir =
678 serialized_nir_create_from_blob(cache, &blob);
679
680 if (!snir)
681 break;
682
683 _mesa_hash_table_insert(cache->nir_cache, snir->sha1_key, snir);
684 cache->nir_stats.count++;
685 }
686
687 uint32_t count = blob_read_uint32(&blob);
688 if (blob.overrun)
689 return;
690
691 for (uint32_t i = 0; i < count; i++) {
692 struct v3dv_pipeline_shared_data *cache_entry =
693 v3dv_pipeline_shared_data_create_from_blob(cache, &blob);
694 if (!cache_entry)
695 break;
696
697 _mesa_hash_table_insert(cache->cache, cache_entry->sha1_key, cache_entry);
698 cache->stats.count++;
699 }
700
701 if (debug_cache) {
702 mesa_logi("pipeline cache %p, loaded %i nir shaders and "
703 "%i entries\n", cache, nir_count, count);
704 if (dump_stats)
705 cache_dump_stats(cache);
706 }
707 }
708
709 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)710 v3dv_CreatePipelineCache(VkDevice _device,
711 const VkPipelineCacheCreateInfo *pCreateInfo,
712 const VkAllocationCallbacks *pAllocator,
713 VkPipelineCache *pPipelineCache)
714 {
715 V3DV_FROM_HANDLE(v3dv_device, device, _device);
716 struct v3dv_pipeline_cache *cache;
717
718 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
719
720 cache = vk_object_zalloc(&device->vk, pAllocator,
721 sizeof(*cache),
722 VK_OBJECT_TYPE_PIPELINE_CACHE);
723
724 if (cache == NULL)
725 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
726
727 v3dv_pipeline_cache_init(cache, device, pCreateInfo->flags,
728 device->instance->pipeline_cache_enabled);
729
730 if (pCreateInfo->initialDataSize > 0) {
731 pipeline_cache_load(cache,
732 pCreateInfo->initialDataSize,
733 pCreateInfo->pInitialData);
734 }
735
736 *pPipelineCache = v3dv_pipeline_cache_to_handle(cache);
737
738 return VK_SUCCESS;
739 }
740
741 void
v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache * cache)742 v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache)
743 {
744 mtx_destroy(&cache->mutex);
745
746 if (dump_stats_on_destroy)
747 cache_dump_stats(cache);
748
749 if (cache->nir_cache) {
750 hash_table_foreach(cache->nir_cache, entry)
751 ralloc_free(entry->data);
752
753 _mesa_hash_table_destroy(cache->nir_cache, NULL);
754 }
755
756 if (cache->cache) {
757 hash_table_foreach(cache->cache, entry) {
758 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
759 if (cache_entry)
760 v3dv_pipeline_shared_data_unref(cache->device, cache_entry);
761 }
762
763 _mesa_hash_table_destroy(cache->cache, NULL);
764 }
765 }
766
767 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)768 v3dv_DestroyPipelineCache(VkDevice _device,
769 VkPipelineCache _cache,
770 const VkAllocationCallbacks *pAllocator)
771 {
772 V3DV_FROM_HANDLE(v3dv_device, device, _device);
773 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
774
775 if (!cache)
776 return;
777
778 v3dv_pipeline_cache_finish(cache);
779
780 vk_object_free(&device->vk, pAllocator, cache);
781 }
782
783 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MergePipelineCaches(VkDevice device,VkPipelineCache dstCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)784 v3dv_MergePipelineCaches(VkDevice device,
785 VkPipelineCache dstCache,
786 uint32_t srcCacheCount,
787 const VkPipelineCache *pSrcCaches)
788 {
789 V3DV_FROM_HANDLE(v3dv_pipeline_cache, dst, dstCache);
790
791 if (!dst->cache || !dst->nir_cache)
792 return VK_SUCCESS;
793
794 for (uint32_t i = 0; i < srcCacheCount; i++) {
795 V3DV_FROM_HANDLE(v3dv_pipeline_cache, src, pSrcCaches[i]);
796 if (!src->cache || !src->nir_cache)
797 continue;
798
799 hash_table_foreach(src->nir_cache, entry) {
800 struct serialized_nir *src_snir = entry->data;
801 assert(src_snir);
802
803 if (_mesa_hash_table_search(dst->nir_cache, src_snir->sha1_key))
804 continue;
805
806 /* FIXME: we are using serialized nir shaders because they are
807 * convenient to create and store on the cache, but requires to do a
808 * copy here (and some other places) of the serialized NIR. Perhaps
809 * it would make sense to move to handle the NIR shaders with shared
810 * structures with ref counts, as the variants.
811 */
812 struct serialized_nir *snir_dst =
813 ralloc_size(dst->nir_cache, sizeof(*snir_dst) + src_snir->size);
814 memcpy(snir_dst->sha1_key, src_snir->sha1_key, 20);
815 snir_dst->size = src_snir->size;
816 memcpy(snir_dst->data, src_snir->data, src_snir->size);
817
818 _mesa_hash_table_insert(dst->nir_cache, snir_dst->sha1_key, snir_dst);
819 dst->nir_stats.count++;
820 if (debug_cache) {
821 char sha1buf[41];
822 _mesa_sha1_format(sha1buf, snir_dst->sha1_key);
823
824 mesa_logi("pipeline cache %p, added nir entry %s "
825 "from pipeline cache %p\n",
826 dst, sha1buf, src);
827 if (dump_stats)
828 cache_dump_stats(dst);
829 }
830 }
831
832 hash_table_foreach(src->cache, entry) {
833 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
834 assert(cache_entry);
835
836 if (_mesa_hash_table_search(dst->cache, cache_entry->sha1_key))
837 continue;
838
839 v3dv_pipeline_shared_data_ref(cache_entry);
840 _mesa_hash_table_insert(dst->cache, cache_entry->sha1_key, cache_entry);
841
842 dst->stats.count++;
843 if (debug_cache) {
844 char sha1buf[41];
845 _mesa_sha1_format(sha1buf, cache_entry->sha1_key);
846
847 mesa_logi("pipeline cache %p, added entry %s "
848 "from pipeline cache %p\n",
849 dst, sha1buf, src);
850 if (dump_stats)
851 cache_dump_stats(dst);
852 }
853 }
854 }
855
856 return VK_SUCCESS;
857 }
858
859 static bool
shader_variant_write_to_blob(const struct v3dv_shader_variant * variant,struct blob * blob)860 shader_variant_write_to_blob(const struct v3dv_shader_variant *variant,
861 struct blob *blob)
862 {
863 blob_write_uint32(blob, variant->stage);
864
865 blob_write_uint32(blob, variant->prog_data_size);
866 blob_write_bytes(blob, variant->prog_data.base, variant->prog_data_size);
867
868 struct v3d_uniform_list *ulist = &variant->prog_data.base->uniforms;
869 blob_write_uint32(blob, ulist->count);
870 blob_write_bytes(blob, ulist->contents, sizeof(enum quniform_contents) * ulist->count);
871 blob_write_bytes(blob, ulist->data, sizeof(uint32_t) * ulist->count);
872
873 blob_write_uint32(blob, variant->assembly_offset);
874 blob_write_uint32(blob, variant->qpu_insts_size);
875
876 return !blob->out_of_memory;
877 }
878
879 static bool
v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data * cache_entry,struct blob * blob)880 v3dv_pipeline_shared_data_write_to_blob(const struct v3dv_pipeline_shared_data *cache_entry,
881 struct blob *blob)
882 {
883 blob_write_bytes(blob, cache_entry->sha1_key, 20);
884
885 uint8_t descriptor_maps_count = 0;
886 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
887 if (broadcom_shader_stage_is_binning(stage))
888 continue;
889 if (cache_entry->maps[stage] == NULL)
890 continue;
891 descriptor_maps_count++;
892 }
893
894 /* Compute pipelines only have one descriptor map,
895 * graphics pipelines may have 2 (VS+FS) or 3 (VS+GS+FS), since the binning
896 * stages take the descriptor map from the render stage.
897 */
898 assert((descriptor_maps_count >= 2 && descriptor_maps_count <= 3) ||
899 (descriptor_maps_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
900 blob_write_uint8(blob, descriptor_maps_count);
901
902 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
903 if (cache_entry->maps[stage] == NULL)
904 continue;
905 if (broadcom_shader_stage_is_binning(stage))
906 continue;
907
908 blob_write_uint8(blob, stage);
909 blob_write_bytes(blob, cache_entry->maps[stage],
910 sizeof(struct v3dv_descriptor_maps));
911 }
912
913 uint8_t variant_count = 0;
914 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
915 if (cache_entry->variants[stage] == NULL)
916 continue;
917 variant_count++;
918 }
919
920 /* Graphics pipelines with VS+FS have 3 variants, VS+GS+FS will have 5 and
921 * compute pipelines only have 1.
922 */
923 assert((variant_count == 5 || variant_count == 3) ||
924 (variant_count == 1 && cache_entry->variants[BROADCOM_SHADER_COMPUTE]));
925 blob_write_uint8(blob, variant_count);
926
927 uint32_t total_assembly_size = 0;
928 for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {
929 if (cache_entry->variants[stage] == NULL)
930 continue;
931
932 blob_write_uint8(blob, stage);
933 if (!shader_variant_write_to_blob(cache_entry->variants[stage], blob))
934 return false;
935
936 total_assembly_size += cache_entry->variants[stage]->qpu_insts_size;
937 }
938 blob_write_uint32(blob, total_assembly_size);
939
940 assert(cache_entry->assembly_bo->map);
941 assert(cache_entry->assembly_bo->size >= total_assembly_size);
942 blob_write_bytes(blob, cache_entry->assembly_bo->map, total_assembly_size);
943
944 return !blob->out_of_memory;
945 }
946
947
948 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)949 v3dv_GetPipelineCacheData(VkDevice _device,
950 VkPipelineCache _cache,
951 size_t *pDataSize,
952 void *pData)
953 {
954 V3DV_FROM_HANDLE(v3dv_device, device, _device);
955 V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);
956
957 struct blob blob;
958 if (pData) {
959 blob_init_fixed(&blob, pData, *pDataSize);
960 } else {
961 blob_init_fixed(&blob, NULL, SIZE_MAX);
962 }
963
964 struct v3dv_physical_device *pdevice = device->pdevice;
965 VkResult result = VK_INCOMPLETE;
966
967 pipeline_cache_lock(cache);
968
969 struct vk_pipeline_cache_header header = {
970 .header_size = sizeof(struct vk_pipeline_cache_header),
971 .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
972 .vendor_id = v3dv_physical_device_vendor_id(pdevice),
973 .device_id = v3dv_physical_device_device_id(pdevice),
974 };
975 memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
976 blob_write_bytes(&blob, &header, sizeof(header));
977
978 uint32_t nir_count = 0;
979 intptr_t nir_count_offset = blob_reserve_uint32(&blob);
980 if (nir_count_offset < 0) {
981 *pDataSize = 0;
982 goto done;
983 }
984
985 if (cache->nir_cache) {
986 hash_table_foreach(cache->nir_cache, entry) {
987 const struct serialized_nir *snir = entry->data;
988
989 size_t save_size = blob.size;
990
991 blob_write_bytes(&blob, snir->sha1_key, 20);
992 blob_write_uint32(&blob, snir->size);
993 blob_write_bytes(&blob, snir->data, snir->size);
994
995 if (blob.out_of_memory) {
996 blob.size = save_size;
997 goto done;
998 }
999
1000 nir_count++;
1001 }
1002 }
1003 blob_overwrite_uint32(&blob, nir_count_offset, nir_count);
1004
1005 uint32_t count = 0;
1006 intptr_t count_offset = blob_reserve_uint32(&blob);
1007 if (count_offset < 0) {
1008 *pDataSize = 0;
1009 goto done;
1010 }
1011
1012 if (cache->cache) {
1013 hash_table_foreach(cache->cache, entry) {
1014 struct v3dv_pipeline_shared_data *cache_entry = entry->data;
1015
1016 size_t save_size = blob.size;
1017 if (!v3dv_pipeline_shared_data_write_to_blob(cache_entry, &blob)) {
1018 /* If it fails reset to the previous size and bail */
1019 blob.size = save_size;
1020 goto done;
1021 }
1022
1023 count++;
1024 }
1025 }
1026
1027 blob_overwrite_uint32(&blob, count_offset, count);
1028
1029 *pDataSize = blob.size;
1030
1031 result = VK_SUCCESS;
1032
1033 if (debug_cache) {
1034 assert(count <= cache->stats.count);
1035 mesa_logi("GetPipelineCacheData: serializing cache %p, "
1036 "%i nir shader entries "
1037 "%i entries, %u DataSize\n",
1038 cache, nir_count, count, (uint32_t) *pDataSize);
1039 }
1040
1041 done:
1042 blob_finish(&blob);
1043
1044 pipeline_cache_unlock(cache);
1045
1046 return result;
1047 }
1048