• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file crocus_program_cache.c
25  *
26  * The in-memory program cache.  This is basically a hash table mapping
27  * API-specified shaders and a state key to a compiled variant.  It also
28  * takes care of uploading shader assembly into a BO for use on the GPU.
29  */
30 
31 #include <stdio.h>
32 #include <errno.h>
33 #include "pipe/p_defines.h"
34 #include "pipe/p_state.h"
35 #include "pipe/p_context.h"
36 #include "pipe/p_screen.h"
37 #include "util/u_atomic.h"
38 #include "util/u_upload_mgr.h"
39 #include "compiler/nir/nir.h"
40 #include "compiler/nir/nir_builder.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/compiler/brw_eu.h"
43 #include "intel/compiler/brw_nir.h"
44 #include "crocus_context.h"
45 #include "crocus_resource.h"
46 
47 struct keybox {
48    uint16_t size;
49    enum crocus_program_cache_id cache_id;
50    uint8_t data[0];
51 };
52 
53 static struct keybox *
make_keybox(void * mem_ctx,enum crocus_program_cache_id cache_id,const void * key,uint32_t key_size)54 make_keybox(void *mem_ctx, enum crocus_program_cache_id cache_id,
55             const void *key, uint32_t key_size)
56 {
57    struct keybox *keybox =
58       ralloc_size(mem_ctx, sizeof(struct keybox) + key_size);
59 
60    keybox->cache_id = cache_id;
61    keybox->size = key_size;
62    memcpy(keybox->data, key, key_size);
63 
64    return keybox;
65 }
66 
67 static uint32_t
keybox_hash(const void * void_key)68 keybox_hash(const void *void_key)
69 {
70    const struct keybox *key = void_key;
71    return _mesa_hash_data(&key->cache_id, key->size + sizeof(key->cache_id));
72 }
73 
74 static bool
keybox_equals(const void * void_a,const void * void_b)75 keybox_equals(const void *void_a, const void *void_b)
76 {
77    const struct keybox *a = void_a, *b = void_b;
78    if (a->size != b->size)
79       return false;
80 
81    return memcmp(a->data, b->data, a->size) == 0;
82 }
83 
84 struct crocus_compiled_shader *
crocus_find_cached_shader(struct crocus_context * ice,enum crocus_program_cache_id cache_id,uint32_t key_size,const void * key)85 crocus_find_cached_shader(struct crocus_context *ice,
86                           enum crocus_program_cache_id cache_id,
87                           uint32_t key_size, const void *key)
88 {
89    struct keybox *keybox = make_keybox(NULL, cache_id, key, key_size);
90    struct hash_entry *entry =
91       _mesa_hash_table_search(ice->shaders.cache, keybox);
92 
93    ralloc_free(keybox);
94 
95    return entry ? entry->data : NULL;
96 }
97 
98 const void *
crocus_find_previous_compile(const struct crocus_context * ice,enum crocus_program_cache_id cache_id,unsigned program_string_id)99 crocus_find_previous_compile(const struct crocus_context *ice,
100                              enum crocus_program_cache_id cache_id,
101                              unsigned program_string_id)
102 {
103    hash_table_foreach(ice->shaders.cache, entry) {
104       const struct keybox *keybox = entry->key;
105       const struct brw_base_prog_key *key = (const void *)keybox->data;
106       if (keybox->cache_id == cache_id &&
107           key->program_string_id == program_string_id) {
108          return keybox->data;
109       }
110    }
111 
112    return NULL;
113 }
114 
115 /**
116  * Look for an existing entry in the cache that has identical assembly code.
117  *
118  * This is useful for programs generating shaders at runtime, where multiple
119  * distinct shaders (from an API perspective) may compile to the same assembly
120  * in our backend.  This saves space in the program cache buffer.
121  */
122 static const struct crocus_compiled_shader *
find_existing_assembly(struct hash_table * cache,void * map,const void * assembly,unsigned assembly_size)123 find_existing_assembly(struct hash_table *cache, void *map,
124                        const void *assembly, unsigned assembly_size)
125 {
126    hash_table_foreach (cache, entry) {
127       const struct crocus_compiled_shader *existing = entry->data;
128 
129       if (existing->map_size != assembly_size)
130          continue;
131 
132       if (memcmp(map + existing->offset, assembly, assembly_size) == 0)
133          return existing;
134    }
135    return NULL;
136 }
137 
138 static void
crocus_cache_new_bo(struct crocus_context * ice,uint32_t new_size)139 crocus_cache_new_bo(struct crocus_context *ice,
140                     uint32_t new_size)
141 {
142    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
143    struct crocus_bo *new_bo;
144    new_bo = crocus_bo_alloc(screen->bufmgr, "program cache", new_size);
145 
146    void *map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE |
147                              MAP_ASYNC | MAP_PERSISTENT);
148 
149    if (ice->shaders.cache_next_offset != 0) {
150       memcpy(map, ice->shaders.cache_bo_map, ice->shaders.cache_next_offset);
151    }
152 
153    crocus_bo_unmap(ice->shaders.cache_bo);
154    crocus_bo_unreference(ice->shaders.cache_bo);
155    ice->shaders.cache_bo = new_bo;
156    ice->shaders.cache_bo_map = map;
157 
158    if (screen->devinfo.ver <= 5) {
159       /* reemit all shaders on GEN4 only. */
160       ice->state.dirty |= CROCUS_DIRTY_CLIP | CROCUS_DIRTY_RASTER |
161          CROCUS_DIRTY_WM;
162       ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_VS;
163    }
164    ice->batches[CROCUS_BATCH_RENDER].state_base_address_emitted = false;
165    ice->batches[CROCUS_BATCH_COMPUTE].state_base_address_emitted = false;
166    /* unset state base address */
167 }
168 
169 static uint32_t
crocus_alloc_item_data(struct crocus_context * ice,uint32_t size)170 crocus_alloc_item_data(struct crocus_context *ice, uint32_t size)
171 {
172    if (ice->shaders.cache_next_offset + size > ice->shaders.cache_bo->size) {
173       uint32_t new_size = ice->shaders.cache_bo->size * 2;
174       while (ice->shaders.cache_next_offset + size > new_size)
175          new_size *= 2;
176 
177       crocus_cache_new_bo(ice, new_size);
178    }
179    uint32_t offset = ice->shaders.cache_next_offset;
180 
181    /* Programs are always 64-byte aligned, so set up the next one now */
182    ice->shaders.cache_next_offset = ALIGN(offset + size, 64);
183    return offset;
184 }
185 
186 struct crocus_compiled_shader *
crocus_upload_shader(struct crocus_context * ice,enum crocus_program_cache_id cache_id,uint32_t key_size,const void * key,const void * assembly,uint32_t asm_size,struct brw_stage_prog_data * prog_data,uint32_t prog_data_size,uint32_t * streamout,enum brw_param_builtin * system_values,unsigned num_system_values,unsigned num_cbufs,const struct crocus_binding_table * bt)187 crocus_upload_shader(struct crocus_context *ice,
188                      enum crocus_program_cache_id cache_id, uint32_t key_size,
189                      const void *key, const void *assembly, uint32_t asm_size,
190                      struct brw_stage_prog_data *prog_data,
191                      uint32_t prog_data_size, uint32_t *streamout,
192                      enum brw_param_builtin *system_values,
193                      unsigned num_system_values, unsigned num_cbufs,
194                      const struct crocus_binding_table *bt)
195 {
196    struct hash_table *cache = ice->shaders.cache;
197    struct crocus_compiled_shader *shader =
198       rzalloc_size(cache, sizeof(struct crocus_compiled_shader));
199    const struct crocus_compiled_shader *existing = find_existing_assembly(
200       cache, ice->shaders.cache_bo_map, assembly, asm_size);
201 
202    /* If we can find a matching prog in the cache already, then reuse the
203     * existing stuff without creating new copy into the underlying buffer
204     * object.  This is notably useful for programs generating shaders at
205     * runtime, where multiple shaders may compile to the same thing in our
206     * backend.
207     */
208    if (existing) {
209       shader->offset = existing->offset;
210       shader->map_size = existing->map_size;
211    } else {
212       shader->offset = crocus_alloc_item_data(ice, asm_size);
213       shader->map_size = asm_size;
214 
215       memcpy(ice->shaders.cache_bo_map + shader->offset, assembly, asm_size);
216    }
217 
218    shader->prog_data = prog_data;
219    shader->prog_data_size = prog_data_size;
220    shader->streamout = streamout;
221    shader->system_values = system_values;
222    shader->num_system_values = num_system_values;
223    shader->num_cbufs = num_cbufs;
224    shader->bt = *bt;
225 
226    ralloc_steal(shader, shader->prog_data);
227    if (prog_data_size > 16) {
228       ralloc_steal(shader->prog_data, prog_data->param);
229       ralloc_steal(shader->prog_data, prog_data->pull_param);
230    }
231    ralloc_steal(shader, shader->streamout);
232    ralloc_steal(shader, shader->system_values);
233 
234    struct keybox *keybox = make_keybox(shader, cache_id, key, key_size);
235    _mesa_hash_table_insert(ice->shaders.cache, keybox, shader);
236 
237    return shader;
238 }
239 
240 bool
crocus_blorp_lookup_shader(struct blorp_batch * blorp_batch,const void * key,uint32_t key_size,uint32_t * kernel_out,void * prog_data_out)241 crocus_blorp_lookup_shader(struct blorp_batch *blorp_batch, const void *key,
242                            uint32_t key_size, uint32_t *kernel_out,
243                            void *prog_data_out)
244 {
245    struct blorp_context *blorp = blorp_batch->blorp;
246    struct crocus_context *ice = blorp->driver_ctx;
247    struct crocus_compiled_shader *shader =
248       crocus_find_cached_shader(ice, CROCUS_CACHE_BLORP, key_size, key);
249 
250    if (!shader)
251       return false;
252 
253    *kernel_out = shader->offset;
254    *((void **)prog_data_out) = shader->prog_data;
255 
256    return true;
257 }
258 
259 bool
crocus_blorp_upload_shader(struct blorp_batch * blorp_batch,uint32_t stage,const void * key,uint32_t key_size,const void * kernel,uint32_t kernel_size,const struct brw_stage_prog_data * prog_data_templ,uint32_t prog_data_size,uint32_t * kernel_out,void * prog_data_out)260 crocus_blorp_upload_shader(struct blorp_batch *blorp_batch, uint32_t stage,
261                            const void *key, uint32_t key_size,
262                            const void *kernel, uint32_t kernel_size,
263                            const struct brw_stage_prog_data *prog_data_templ,
264                            uint32_t prog_data_size, uint32_t *kernel_out,
265                            void *prog_data_out)
266 {
267    struct blorp_context *blorp = blorp_batch->blorp;
268    struct crocus_context *ice = blorp->driver_ctx;
269 
270    struct brw_stage_prog_data *prog_data = ralloc_size(NULL, prog_data_size);
271    memcpy(prog_data, prog_data_templ, prog_data_size);
272 
273    struct crocus_binding_table bt;
274    memset(&bt, 0, sizeof(bt));
275 
276    struct crocus_compiled_shader *shader = crocus_upload_shader(
277       ice, CROCUS_CACHE_BLORP, key_size, key, kernel, kernel_size, prog_data,
278       prog_data_size, NULL, NULL, 0, 0, &bt);
279 
280    *kernel_out = shader->offset;
281    *((void **)prog_data_out) = shader->prog_data;
282 
283    return true;
284 }
285 
286 void
crocus_init_program_cache(struct crocus_context * ice)287 crocus_init_program_cache(struct crocus_context *ice)
288 {
289    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
290    ice->shaders.cache =
291       _mesa_hash_table_create(ice, keybox_hash, keybox_equals);
292 
293    ice->shaders.cache_bo =
294       crocus_bo_alloc(screen->bufmgr, "program_cache", 16384);
295    ice->shaders.cache_bo_map =
296       crocus_bo_map(NULL, ice->shaders.cache_bo,
297                     MAP_READ | MAP_WRITE | MAP_ASYNC | MAP_PERSISTENT);
298 }
299 
300 void
crocus_destroy_program_cache(struct crocus_context * ice)301 crocus_destroy_program_cache(struct crocus_context *ice)
302 {
303    for (int i = 0; i < MESA_SHADER_STAGES; i++) {
304       ice->shaders.prog[i] = NULL;
305    }
306 
307    if (ice->shaders.cache_bo) {
308       crocus_bo_unmap(ice->shaders.cache_bo);
309       crocus_bo_unreference(ice->shaders.cache_bo);
310       ice->shaders.cache_bo_map = NULL;
311       ice->shaders.cache_bo = NULL;
312    }
313 
314    ralloc_free(ice->shaders.cache);
315 }
316 
317 static const char *
cache_name(enum crocus_program_cache_id cache_id)318 cache_name(enum crocus_program_cache_id cache_id)
319 {
320    if (cache_id == CROCUS_CACHE_BLORP)
321       return "BLORP";
322 
323    if (cache_id == CROCUS_CACHE_SF)
324       return "SF";
325 
326    if (cache_id == CROCUS_CACHE_CLIP)
327       return "CLIP";
328 
329    if (cache_id == CROCUS_CACHE_FF_GS)
330       return "FF_GS";
331 
332    return _mesa_shader_stage_to_string(cache_id);
333 }
334 
335 void
crocus_print_program_cache(struct crocus_context * ice)336 crocus_print_program_cache(struct crocus_context *ice)
337 {
338    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
339    const struct intel_device_info *devinfo = &screen->devinfo;
340 
341    hash_table_foreach(ice->shaders.cache, entry) {
342       const struct keybox *keybox = entry->key;
343       struct crocus_compiled_shader *shader = entry->data;
344       fprintf(stderr, "%s:\n", cache_name(keybox->cache_id));
345       brw_disassemble(devinfo, ice->shaders.cache_bo_map + shader->offset, 0,
346                       shader->prog_data->program_size, NULL, stderr);
347    }
348 }
349