1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "tu_private.h"
25
26 #include "util/debug.h"
27 #include "util/disk_cache.h"
28 #include "util/mesa-sha1.h"
29 #include "util/u_atomic.h"
30 #include "vulkan/util/vk_util.h"
31
32 struct cache_entry_variant_info
33 {
34 };
35
36 struct cache_entry
37 {
38 union {
39 unsigned char sha1[20];
40 uint32_t sha1_dw[5];
41 };
42 uint32_t code_sizes[MESA_SHADER_STAGES];
43 struct tu_shader_variant *variants[MESA_SHADER_STAGES];
44 char code[0];
45 };
46
47 static void
tu_pipeline_cache_init(struct tu_pipeline_cache * cache,struct tu_device * device)48 tu_pipeline_cache_init(struct tu_pipeline_cache *cache,
49 struct tu_device *device)
50 {
51 cache->device = device;
52 pthread_mutex_init(&cache->mutex, NULL);
53
54 cache->modified = false;
55 cache->kernel_count = 0;
56 cache->total_size = 0;
57 cache->table_size = 1024;
58 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
59 cache->hash_table = malloc(byte_size);
60
61 /* We don't consider allocation failure fatal, we just start with a 0-sized
62 * cache. Disable caching when we want to keep shader debug info, since
63 * we don't get the debug info on cached shaders. */
64 if (cache->hash_table == NULL)
65 cache->table_size = 0;
66 else
67 memset(cache->hash_table, 0, byte_size);
68 }
69
70 static void
tu_pipeline_cache_finish(struct tu_pipeline_cache * cache)71 tu_pipeline_cache_finish(struct tu_pipeline_cache *cache)
72 {
73 for (unsigned i = 0; i < cache->table_size; ++i)
74 if (cache->hash_table[i]) {
75 vk_free(&cache->alloc, cache->hash_table[i]);
76 }
77 pthread_mutex_destroy(&cache->mutex);
78 free(cache->hash_table);
79 }
80
81 static uint32_t
entry_size(struct cache_entry * entry)82 entry_size(struct cache_entry *entry)
83 {
84 size_t ret = sizeof(*entry);
85 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
86 if (entry->code_sizes[i])
87 ret +=
88 sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
89 return ret;
90 }
91
92 static struct cache_entry *
tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache * cache,const unsigned char * sha1)93 tu_pipeline_cache_search_unlocked(struct tu_pipeline_cache *cache,
94 const unsigned char *sha1)
95 {
96 const uint32_t mask = cache->table_size - 1;
97 const uint32_t start = (*(uint32_t *) sha1);
98
99 if (cache->table_size == 0)
100 return NULL;
101
102 for (uint32_t i = 0; i < cache->table_size; i++) {
103 const uint32_t index = (start + i) & mask;
104 struct cache_entry *entry = cache->hash_table[index];
105
106 if (!entry)
107 return NULL;
108
109 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
110 return entry;
111 }
112 }
113
114 unreachable("hash table should never be full");
115 }
116
117 static struct cache_entry *
tu_pipeline_cache_search(struct tu_pipeline_cache * cache,const unsigned char * sha1)118 tu_pipeline_cache_search(struct tu_pipeline_cache *cache,
119 const unsigned char *sha1)
120 {
121 struct cache_entry *entry;
122
123 pthread_mutex_lock(&cache->mutex);
124
125 entry = tu_pipeline_cache_search_unlocked(cache, sha1);
126
127 pthread_mutex_unlock(&cache->mutex);
128
129 return entry;
130 }
131
132 static void
tu_pipeline_cache_set_entry(struct tu_pipeline_cache * cache,struct cache_entry * entry)133 tu_pipeline_cache_set_entry(struct tu_pipeline_cache *cache,
134 struct cache_entry *entry)
135 {
136 const uint32_t mask = cache->table_size - 1;
137 const uint32_t start = entry->sha1_dw[0];
138
139 /* We'll always be able to insert when we get here. */
140 assert(cache->kernel_count < cache->table_size / 2);
141
142 for (uint32_t i = 0; i < cache->table_size; i++) {
143 const uint32_t index = (start + i) & mask;
144 if (!cache->hash_table[index]) {
145 cache->hash_table[index] = entry;
146 break;
147 }
148 }
149
150 cache->total_size += entry_size(entry);
151 cache->kernel_count++;
152 }
153
154 static VkResult
tu_pipeline_cache_grow(struct tu_pipeline_cache * cache)155 tu_pipeline_cache_grow(struct tu_pipeline_cache *cache)
156 {
157 const uint32_t table_size = cache->table_size * 2;
158 const uint32_t old_table_size = cache->table_size;
159 const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
160 struct cache_entry **table;
161 struct cache_entry **old_table = cache->hash_table;
162
163 table = malloc(byte_size);
164 if (table == NULL)
165 return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
166
167 cache->hash_table = table;
168 cache->table_size = table_size;
169 cache->kernel_count = 0;
170 cache->total_size = 0;
171
172 memset(cache->hash_table, 0, byte_size);
173 for (uint32_t i = 0; i < old_table_size; i++) {
174 struct cache_entry *entry = old_table[i];
175 if (!entry)
176 continue;
177
178 tu_pipeline_cache_set_entry(cache, entry);
179 }
180
181 free(old_table);
182
183 return VK_SUCCESS;
184 }
185
186 static void
tu_pipeline_cache_add_entry(struct tu_pipeline_cache * cache,struct cache_entry * entry)187 tu_pipeline_cache_add_entry(struct tu_pipeline_cache *cache,
188 struct cache_entry *entry)
189 {
190 if (cache->kernel_count == cache->table_size / 2)
191 tu_pipeline_cache_grow(cache);
192
193 /* Failing to grow that hash table isn't fatal, but may mean we don't
194 * have enough space to add this new kernel. Only add it if there's room.
195 */
196 if (cache->kernel_count < cache->table_size / 2)
197 tu_pipeline_cache_set_entry(cache, entry);
198 }
199
200 static void
tu_pipeline_cache_load(struct tu_pipeline_cache * cache,const void * data,size_t size)201 tu_pipeline_cache_load(struct tu_pipeline_cache *cache,
202 const void *data,
203 size_t size)
204 {
205 struct tu_device *device = cache->device;
206 struct vk_pipeline_cache_header header;
207
208 if (size < sizeof(header))
209 return;
210 memcpy(&header, data, sizeof(header));
211 if (header.header_size < sizeof(header))
212 return;
213 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
214 return;
215 if (header.vendor_id != 0 /* TODO */)
216 return;
217 if (header.device_id != 0 /* TODO */)
218 return;
219 if (memcmp(header.uuid, device->physical_device->cache_uuid,
220 VK_UUID_SIZE) != 0)
221 return;
222
223 char *end = (void *) data + size;
224 char *p = (void *) data + header.header_size;
225
226 while (end - p >= sizeof(struct cache_entry)) {
227 struct cache_entry *entry = (struct cache_entry *) p;
228 struct cache_entry *dest_entry;
229 size_t size = entry_size(entry);
230 if (end - p < size)
231 break;
232
233 dest_entry =
234 vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
235 if (dest_entry) {
236 memcpy(dest_entry, entry, size);
237 for (int i = 0; i < MESA_SHADER_STAGES; ++i)
238 dest_entry->variants[i] = NULL;
239 tu_pipeline_cache_add_entry(cache, dest_entry);
240 }
241 p += size;
242 }
243 }
244
245 VkResult
tu_CreatePipelineCache(VkDevice _device,const VkPipelineCacheCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipelineCache * pPipelineCache)246 tu_CreatePipelineCache(VkDevice _device,
247 const VkPipelineCacheCreateInfo *pCreateInfo,
248 const VkAllocationCallbacks *pAllocator,
249 VkPipelineCache *pPipelineCache)
250 {
251 TU_FROM_HANDLE(tu_device, device, _device);
252 struct tu_pipeline_cache *cache;
253
254 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
255 assert(pCreateInfo->flags == 0);
256
257 cache = vk_object_alloc(&device->vk, pAllocator, sizeof(*cache),
258 VK_OBJECT_TYPE_PIPELINE_CACHE);
259 if (cache == NULL)
260 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
261
262 if (pAllocator)
263 cache->alloc = *pAllocator;
264 else
265 cache->alloc = device->vk.alloc;
266
267 tu_pipeline_cache_init(cache, device);
268
269 if (pCreateInfo->initialDataSize > 0) {
270 tu_pipeline_cache_load(cache, pCreateInfo->pInitialData,
271 pCreateInfo->initialDataSize);
272 }
273
274 *pPipelineCache = tu_pipeline_cache_to_handle(cache);
275
276 return VK_SUCCESS;
277 }
278
279 void
tu_DestroyPipelineCache(VkDevice _device,VkPipelineCache _cache,const VkAllocationCallbacks * pAllocator)280 tu_DestroyPipelineCache(VkDevice _device,
281 VkPipelineCache _cache,
282 const VkAllocationCallbacks *pAllocator)
283 {
284 TU_FROM_HANDLE(tu_device, device, _device);
285 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
286
287 if (!cache)
288 return;
289 tu_pipeline_cache_finish(cache);
290
291 vk_object_free(&device->vk, pAllocator, cache);
292 }
293
294 VkResult
tu_GetPipelineCacheData(VkDevice _device,VkPipelineCache _cache,size_t * pDataSize,void * pData)295 tu_GetPipelineCacheData(VkDevice _device,
296 VkPipelineCache _cache,
297 size_t *pDataSize,
298 void *pData)
299 {
300 TU_FROM_HANDLE(tu_device, device, _device);
301 TU_FROM_HANDLE(tu_pipeline_cache, cache, _cache);
302 struct vk_pipeline_cache_header *header;
303 VkResult result = VK_SUCCESS;
304
305 pthread_mutex_lock(&cache->mutex);
306
307 const size_t size = sizeof(*header) + cache->total_size;
308 if (pData == NULL) {
309 pthread_mutex_unlock(&cache->mutex);
310 *pDataSize = size;
311 return VK_SUCCESS;
312 }
313 if (*pDataSize < sizeof(*header)) {
314 pthread_mutex_unlock(&cache->mutex);
315 *pDataSize = 0;
316 return VK_INCOMPLETE;
317 }
318 void *p = pData, *end = pData + *pDataSize;
319 header = p;
320 header->header_size = sizeof(*header);
321 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
322 header->vendor_id = 0 /* TODO */;
323 header->device_id = 0 /* TODO */;
324 memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
325 p += header->header_size;
326
327 struct cache_entry *entry;
328 for (uint32_t i = 0; i < cache->table_size; i++) {
329 if (!cache->hash_table[i])
330 continue;
331 entry = cache->hash_table[i];
332 const uint32_t size = entry_size(entry);
333 if (end < p + size) {
334 result = VK_INCOMPLETE;
335 break;
336 }
337
338 memcpy(p, entry, size);
339 for (int j = 0; j < MESA_SHADER_STAGES; ++j)
340 ((struct cache_entry *) p)->variants[j] = NULL;
341 p += size;
342 }
343 *pDataSize = p - pData;
344
345 pthread_mutex_unlock(&cache->mutex);
346 return result;
347 }
348
349 static void
tu_pipeline_cache_merge(struct tu_pipeline_cache * dst,struct tu_pipeline_cache * src)350 tu_pipeline_cache_merge(struct tu_pipeline_cache *dst,
351 struct tu_pipeline_cache *src)
352 {
353 for (uint32_t i = 0; i < src->table_size; i++) {
354 struct cache_entry *entry = src->hash_table[i];
355 if (!entry || tu_pipeline_cache_search(dst, entry->sha1))
356 continue;
357
358 tu_pipeline_cache_add_entry(dst, entry);
359
360 src->hash_table[i] = NULL;
361 }
362 }
363
364 VkResult
tu_MergePipelineCaches(VkDevice _device,VkPipelineCache destCache,uint32_t srcCacheCount,const VkPipelineCache * pSrcCaches)365 tu_MergePipelineCaches(VkDevice _device,
366 VkPipelineCache destCache,
367 uint32_t srcCacheCount,
368 const VkPipelineCache *pSrcCaches)
369 {
370 TU_FROM_HANDLE(tu_pipeline_cache, dst, destCache);
371
372 for (uint32_t i = 0; i < srcCacheCount; i++) {
373 TU_FROM_HANDLE(tu_pipeline_cache, src, pSrcCaches[i]);
374
375 tu_pipeline_cache_merge(dst, src);
376 }
377
378 return VK_SUCCESS;
379 }
380