1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /**
24 * @file crocus_program_cache.c
25 *
26 * The in-memory program cache. This is basically a hash table mapping
27 * API-specified shaders and a state key to a compiled variant. It also
28 * takes care of uploading shader assembly into a BO for use on the GPU.
29 */
30
31 #include <stdio.h>
32 #include <errno.h>
33 #include "pipe/p_defines.h"
34 #include "pipe/p_state.h"
35 #include "pipe/p_context.h"
36 #include "pipe/p_screen.h"
37 #include "util/u_atomic.h"
38 #include "util/u_upload_mgr.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/compiler/brw_eu.h"
43 #include "intel/compiler/brw_nir.h"
44 #include "crocus_context.h"
45 #include "crocus_resource.h"
46
47 struct keybox {
48 uint16_t size;
49 enum crocus_program_cache_id cache_id;
50 uint8_t data[0];
51 };
52
53 static struct keybox *
make_keybox(void * mem_ctx,enum crocus_program_cache_id cache_id,const void * key,uint32_t key_size)54 make_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id,
55 const void *key, uint32_t key_size)
56 {
57 struct keybox *keybox =
58 ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
59
60 keybox->cache_id = cache_id;
61 keybox->size = key_size;
62 memcpy(keybox->data, key, key_size);
63
64 return keybox;
65 }
66
67 static uint32_t
keybox_hash(const void * void_key)68 keybox_hash(const void *void_key)
69 {
70 const struct keybox *key = void_key;
71 return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
72 }
73
74 static bool
keybox_equals(const void * void_a,const void * void_b)75 keybox_equals(const void *void_a, const void *void_b)
76 {
77 const struct keybox *a = void_a, *b = void_b;
78 if (a->size != b->size)
79 return false;
80
81 return memcmp(a->data, b->data, a->size) == 0;
82 }
83
84 struct crocus_compiled_shader *
crocus_find_cached_shader(struct crocus_context * ice,enum crocus_program_cache_id cache_id,uint32_t key_size,const void * key)85 crocus_find_cached_shader(struct crocus_context *ice,
86 enum crocus_program_cache_id cache_id,
87 uint32_t key_size, const void *key)
88 {
89 struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
90 struct hash_entry *entry =
91 _mesa_hash_table_search(ice->shaders.cache, keybox);
92
93 ralloc_free(keybox);
94
95 return entry ? entry->data : NULL;
96 }
97
98 const void *
crocus_find_previous_compile(const struct crocus_context * ice,enum crocus_program_cache_id cache_id,unsigned program_string_id)99 crocus_find_previous_compile(const struct crocus_context *ice,
100 enum crocus_program_cache_id cache_id,
101 unsigned program_string_id)
102 {
103 hash_table_foreach(ice->shaders.cache, entry) {
104 const struct keybox *keybox = entry->key;
105 const struct brw_base_prog_key *key = (const void *)keybox->data;
106 if (keybox->cache_id == cache_id &&
107 key->program_string_id == program_string_id) {
108 return keybox->data;
109 }
110 }
111
112 return NULL;
113 }
114
115 /**
116 * Look for an existing entry in the cache that has identical assembly code.
117 *
118 * This is useful for programs generating shaders at runtime, where multiple
119 * distinct shaders (from an API perspective) may compile to the same assembly
120 * in our backend. This saves space in the program cache buffer.
121 */
122 static const struct crocus_compiled_shader *
find_existing_assembly(struct hash_table * cache,void * map,const void * assembly,unsigned assembly_size)123 find_existing_assembly(struct hash_table *cache, void *map,
124 const void *assembly, unsigned assembly_size)
125 {
126 hash_table_foreach (cache, entry) {
127 const struct crocus_compiled_shader *existing = entry->data;
128
129 if (existing->map_size != assembly_size)
130 continue;
131
132 if (memcmp(map + existing->offset, assembly, assembly_size) == 0)
133 return existing;
134 }
135 return NULL;
136 }
137
138 static void
crocus_cache_new_bo(struct crocus_context * ice,uint32_t new_size)139 crocus_cache_new_bo(struct crocus_context *ice,
140 uint32_t new_size)
141 {
142 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
143 struct crocus_bo *new_bo;
144 new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size);
145
146 void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE |
147 MAP_ASYNC | MAP_PERSISTENT);
148
149 if (ice->shaders.cache_next_offset != 0) {
150 memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset);
151 }
152
153 crocus_bo_unmap(ice->shaders.cache_bo);
154 crocus_bo_unreference(ice->shaders.cache_bo);
155 ice->shaders.cache_bo = new_bo;
156 ice->shaders.cache_bo_map = map;
157
158 if (screen->devinfo.ver <= 5) {
159 /* reemit all shaders on GEN4 only. */
160 ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER |
161 CROCUS_DIRTY_WM;
162 ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;
163 }
164 ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false;
165 ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false;
166 /* unset state base address */
167 }
168
169 static uint32_t
crocus_alloc_item_data(struct crocus_context * ice,uint32_t size)170 crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
171 {
172 if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) {
173 uint32_t new_size = ice->shaders.cache_bo->size * 2;
174 while (ice->shaders.cache_next_offset + size > new_size)
175 new_size *= 2;
176
177 crocus_cache_new_bo(ice, new_size);
178 }
179 uint32_t offset = ice->shaders.cache_next_offset;
180
181 /* Programs are always 64-byte aligned, so set up the next one now */
182 ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
183 return offset;
184 }
185
186 struct crocus_compiled_shader *
crocus_upload_shader(struct crocus_context * ice,enum crocus_program_cache_id cache_id,uint32_t key_size,const void * key,const void * assembly,uint32_t asm_size,struct brw_stage_prog_data * prog_data,uint32_t prog_data_size,uint32_t * streamout,enum brw_param_builtin * system_values,unsigned num_system_values,unsigned num_cbufs,const struct crocus_binding_table * bt)187 crocus_upload_shader(struct crocus_context *ice,
188 enum crocus_program_cache_id cache_id, uint32_t key_size,
189 const void *key, const void *assembly, uint32_t asm_size,
190 struct brw_stage_prog_data *prog_data,
191 uint32_t prog_data_size, uint32_t *streamout,
192 enum brw_param_builtin *system_values,
193 unsigned num_system_values, unsigned num_cbufs,
194 const struct crocus_binding_table *bt)
195 {
196 struct hash_table *cache = ice->shaders.cache;
197 struct crocus_compiled_shader *shader =
198 rzalloc_size(cache, sizeof(struct crocus_compiled_shader));
199 const struct crocus_compiled_shader *existing = find_existing_assembly(
200 cache, ice->shaders.cache_bo_map, assembly, asm_size);
201
202 /* If we can find a matching prog in the cache already, then reuse the
203 * existing stuff without creating new copy into the underlying buffer
204 * object. This is notably useful for programs generating shaders at
205 * runtime, where multiple shaders may compile to the same thing in our
206 * backend.
207 */
208 if (existing) {
209 shader->offset = existing->offset;
210 shader->map_size = existing->map_size;
211 } else {
212 shader->offset = crocus_alloc_item_data(ice, asm_size);
213 shader->map_size = asm_size;
214
215 memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size);
216 }
217
218 shader->prog_data = prog_data;
219 shader->prog_data_size = prog_data_size;
220 shader->streamout = streamout;
221 shader->system_values = system_values;
222 shader->num_system_values = num_system_values;
223 shader->num_cbufs = num_cbufs;
224 shader->bt = *bt;
225
226 ralloc_steal(shader, shader->prog_data);
227 if (prog_data_size > 16) {
228 ralloc_steal(shader->prog_data, prog_data->param);
229 ralloc_steal(shader->prog_data, prog_data->pull_param);
230 }
231 ralloc_steal(shader, shader->streamout);
232 ralloc_steal(shader, shader->system_values);
233
234 struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
235 _mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
236
237 return shader;
238 }
239
240 bool
crocus_blorp_lookup_shader(struct blorp_batch * blorp_batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)241 crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key,
242 uint32_t key_size, uint32_t *kernel_out,
243 void *prog_data_out)
244 {
245 struct blorp_context *blorp = blorp_batch->blorp;
246 struct crocus_context *ice = blorp->driver_ctx;
247 struct crocus_compiled_shader *shader =
248 crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key);
249
250 if (!shader)
251 return false;
252
253 *kernel_out = shader->offset;
254 *((void **)prog_data_out) = shader->prog_data;
255
256 return true;
257 }
258
259 bool
crocus_blorp_upload_shader(struct blorp_batch * blorp_batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_templ,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)260 crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
261 const void *key, uint32_t key_size,
262 const void *kernel, uint32_t kernel_size,
263 const struct brw_stage_prog_data *prog_data_templ,
264 uint32_t prog_data_size, uint32_t *kernel_out,
265 void *prog_data_out)
266 {
267 struct blorp_context *blorp = blorp_batch->blorp;
268 struct crocus_context *ice = blorp->driver_ctx;
269
270 struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
271 memcpy(prog_data, prog_data_templ, prog_data_size);
272
273 struct crocus_binding_table bt;
274 memset(&bt, 0, sizeof(bt));
275
276 struct crocus_compiled_shader *shader = crocus_upload_shader(
277 ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data,
278 prog_data_size, NULL, NULL, 0, 0, &bt);
279
280 *kernel_out = shader->offset;
281 *((void **)prog_data_out) = shader->prog_data;
282
283 return true;
284 }
285
286 void
crocus_init_program_cache(struct crocus_context * ice)287 crocus_init_program_cache(struct crocus_context *ice)
288 {
289 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
290 ice->shaders.cache =
291 _mesa_hash_table_create(ice, keybox_hash, keybox_equals);
292
293 ice->shaders.cache_bo =
294 crocus_bo_alloc(screen->bufmgr, "program_cache", 16384);
295 ice->shaders.cache_bo_map =
296 crocus_bo_map(NULL, ice->shaders.cache_bo,
297 MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT);
298 }
299
300 void
crocus_destroy_program_cache(struct crocus_context * ice)301 crocus_destroy_program_cache(struct crocus_context *ice)
302 {
303 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
304 ice->shaders.prog[i] = NULL;
305 }
306
307 if (ice->shaders.cache_bo) {
308 crocus_bo_unmap(ice->shaders.cache_bo);
309 crocus_bo_unreference(ice->shaders.cache_bo);
310 ice->shaders.cache_bo_map = NULL;
311 ice->shaders.cache_bo = NULL;
312 }
313
314 ralloc_free(ice->shaders.cache);
315 }
316
317 static const char *
cache_name(enum crocus_program_cache_id cache_id)318 cache_name(enum crocus_program_cache_id cache_id)
319 {
320 if (cache_id == CROCUS_CACHE_BLORP)
321 return "BLORP";
322
323 if (cache_id == CROCUS_CACHE_SF)
324 return "SF";
325
326 if (cache_id == CROCUS_CACHE_CLIP)
327 return "CLIP";
328
329 if (cache_id == CROCUS_CACHE_FF_GS)
330 return "FF_GS";
331
332 return _mesa_shader_stage_to_string(cache_id);
333 }
334
335 void
crocus_print_program_cache(struct crocus_context * ice)336 crocus_print_program_cache(struct crocus_context *ice)
337 {
338 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
339 const struct intel_device_info *devinfo = &screen->devinfo;
340
341 hash_table_foreach(ice->shaders.cache, entry) {
342 const struct keybox *keybox = entry->key;
343 struct crocus_compiled_shader *shader = entry->data;
344 fprintf(stderr, "%s:\n", cache_name(keybox->cache_id));
345 brw_disassemble(devinfo, ice->shaders.cache_bo_map + shader->offset, 0,
346 shader->prog_data->program_size, NULL, stderr);
347 }
348 }
349