• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 
26 #include "compiler/intel_nir.h"
27 #include "compiler/brw_compiler.h"
28 #include "compiler/brw_nir.h"
29 #include "compiler/nir/nir.h"
30 #include "compiler/nir/nir_builder.h"
31 #include "dev/intel_debug.h"
32 #include "intel/compiler/intel_nir.h"
33 #include "util/macros.h"
34 
35 #include "vk_nir.h"
36 
37 #include "anv_internal_kernels.h"
38 
39 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * data)40 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
41                         UNUSED void *data)
42 {
43    if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id)
44       return false;
45 
46    b->cursor = nir_instr_remove(&intrin->instr);
47    nir_def_rewrite_uses(&intrin->def, nir_imm_zero(b, 3, 32));
48    return true;
49 }
50 
51 static nir_shader *
load_libanv(struct anv_device * device)52 load_libanv(struct anv_device *device)
53 {
54    uint32_t spv_size;
55    const uint32_t *spv_code = anv_genX(device->info, libanv_spv)(&spv_size);
56 
57    void *mem_ctx = ralloc_context(NULL);
58 
59    return brw_nir_from_spirv(mem_ctx, spv_code, spv_size);
60 }
61 
62 static void
link_libanv(nir_shader * nir,const nir_shader * libanv)63 link_libanv(nir_shader *nir, const nir_shader *libanv)
64 {
65    nir_link_shader_functions(nir, libanv);
66    NIR_PASS_V(nir, nir_inline_functions);
67    NIR_PASS_V(nir, nir_remove_non_entrypoints);
68    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
69               glsl_get_cl_type_size_align);
70    NIR_PASS_V(nir, nir_opt_deref);
71    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
72    NIR_PASS_V(nir, nir_lower_explicit_io,
73               nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
74                  nir_var_mem_global,
75               nir_address_format_62bit_generic);
76    NIR_PASS_V(nir, nir_lower_scratch_to_var);
77 }
78 
79 static struct anv_shader_bin *
compile_shader(struct anv_device * device,const nir_shader * libanv,enum anv_internal_kernel_name shader_name,gl_shader_stage stage,const char * name,const void * hash_key,uint32_t hash_key_size,uint32_t sends_count_expectation)80 compile_shader(struct anv_device *device,
81                const nir_shader *libanv,
82                enum anv_internal_kernel_name shader_name,
83                gl_shader_stage stage,
84                const char *name,
85                const void *hash_key,
86                uint32_t hash_key_size,
87                uint32_t sends_count_expectation)
88 {
89    const nir_shader_compiler_options *nir_options =
90       device->physical->compiler->nir_options[stage];
91 
92    nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
93                                                   "%s", name);
94 
95    uint32_t uniform_size =
96       anv_genX(device->info, call_internal_shader)(&b, shader_name);
97 
98    nir_shader *nir = b.shader;
99 
100    link_libanv(nir, libanv);
101 
102    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
103    NIR_PASS_V(nir, nir_opt_cse);
104    NIR_PASS_V(nir, nir_opt_gcm, true);
105    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
106 
107    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
108 
109    NIR_PASS_V(nir, nir_split_var_copies);
110    NIR_PASS_V(nir, nir_split_per_member_structs);
111 
112    if (stage == MESA_SHADER_COMPUTE) {
113       nir->info.workgroup_size[0] = 16;
114       nir->info.workgroup_size[1] = 1;
115       nir->info.workgroup_size[2] = 1;
116    }
117 
118    struct brw_compiler *compiler = device->physical->compiler;
119    struct brw_nir_compiler_opts opts = {};
120    brw_preprocess_nir(compiler, nir, &opts);
121 
122    NIR_PASS_V(nir, nir_propagate_invariant, false);
123 
124    if (stage == MESA_SHADER_FRAGMENT) {
125       NIR_PASS_V(nir, nir_lower_input_attachments,
126                  &(nir_input_attachment_options) {
127                     .use_fragcoord_sysval = true,
128                     .use_layer_id_sysval = true,
129                  });
130    } else {
131       nir_lower_compute_system_values_options options = {
132          .has_base_workgroup_id = true,
133          .lower_cs_local_id_to_index = true,
134          .lower_workgroup_id_to_index = gl_shader_stage_is_mesh(stage),
135       };
136       NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
137       NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
138                  nir_metadata_control_flow, NULL);
139    }
140 
141    /* Reset sizes before gathering information */
142    nir->global_mem_size = 0;
143    nir->scratch_size = 0;
144    nir->info.shared_size = 0;
145    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
146 
147    NIR_PASS_V(nir, nir_copy_prop);
148    NIR_PASS_V(nir, nir_opt_constant_folding);
149    NIR_PASS_V(nir, nir_opt_dce);
150 
151    union brw_any_prog_key key;
152    memset(&key, 0, sizeof(key));
153 
154    union brw_any_prog_data prog_data;
155    memset(&prog_data, 0, sizeof(prog_data));
156 
157    if (stage == MESA_SHADER_COMPUTE) {
158       NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics,
159                  device->info, &prog_data.cs);
160    }
161 
162    /* Do vectorizing here. For some reason when trying to do it in the back
163     * this just isn't working.
164     */
165    nir_load_store_vectorize_options options = {
166       .modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
167       .callback = brw_nir_should_vectorize_mem,
168       .robust_modes = (nir_variable_mode)0,
169    };
170    NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
171 
172    nir->num_uniforms = uniform_size;
173 
174    prog_data.base.nr_params = nir->num_uniforms / 4;
175 
176    brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
177 
178    void *temp_ctx = ralloc_context(NULL);
179 
180    const unsigned *program;
181    if (stage == MESA_SHADER_FRAGMENT) {
182       struct brw_compile_stats stats[3];
183       struct brw_compile_fs_params params = {
184          .base = {
185             .nir = nir,
186             .log_data = device,
187             .debug_flag = DEBUG_WM,
188             .stats = stats,
189             .mem_ctx = temp_ctx,
190          },
191          .key = &key.wm,
192          .prog_data = &prog_data.wm,
193       };
194       program = brw_compile_fs(compiler, &params);
195 
196       if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
197          unsigned stat_idx = 0;
198          if (prog_data.wm.dispatch_8) {
199             assert(stats[stat_idx].spills == 0);
200             assert(stats[stat_idx].fills == 0);
201             assert(stats[stat_idx].sends == sends_count_expectation);
202             stat_idx++;
203          }
204          if (prog_data.wm.dispatch_16) {
205             assert(stats[stat_idx].spills == 0);
206             assert(stats[stat_idx].fills == 0);
207             assert(stats[stat_idx].sends == sends_count_expectation);
208             stat_idx++;
209          }
210          if (prog_data.wm.dispatch_32) {
211             assert(stats[stat_idx].spills == 0);
212             assert(stats[stat_idx].fills == 0);
213             assert(stats[stat_idx].sends ==
214                    sends_count_expectation *
215                    (device->info->ver < 20 ? 2 : 1));
216             stat_idx++;
217          }
218       }
219    } else {
220       struct brw_compile_stats stats;
221       struct brw_compile_cs_params params = {
222          .base = {
223             .nir = nir,
224             .stats = &stats,
225             .log_data = device,
226             .debug_flag = DEBUG_CS,
227             .mem_ctx = temp_ctx,
228          },
229          .key = &key.cs,
230          .prog_data = &prog_data.cs,
231       };
232       program = brw_compile_cs(compiler, &params);
233 
234       if (!INTEL_DEBUG(DEBUG_SHADER_PRINT)) {
235          assert(stats.spills == 0);
236          assert(stats.fills == 0);
237          assert(stats.sends == sends_count_expectation);
238       }
239    }
240 
241    assert(prog_data.base.total_scratch == 0);
242    assert(program != NULL);
243    struct anv_shader_bin *kernel = NULL;
244    if (program == NULL)
245       goto exit;
246 
247    struct anv_pipeline_bind_map empty_bind_map = {};
248    struct anv_push_descriptor_info empty_push_desc_info = {};
249    struct anv_shader_upload_params upload_params = {
250       .stage               = nir->info.stage,
251       .key_data            = hash_key,
252       .key_size            = hash_key_size,
253       .kernel_data         = program,
254       .kernel_size         = prog_data.base.program_size,
255       .prog_data           = &prog_data.base,
256       .prog_data_size      = sizeof(prog_data),
257       .bind_map            = &empty_bind_map,
258       .push_desc_info      = &empty_push_desc_info,
259    };
260 
261    kernel = anv_device_upload_kernel(device, device->internal_cache, &upload_params);
262 
263 exit:
264    ralloc_free(temp_ctx);
265    ralloc_free(nir);
266 
267    return kernel;
268 }
269 
270 VkResult
anv_device_get_internal_shader(struct anv_device * device,enum anv_internal_kernel_name name,struct anv_shader_bin ** out_bin)271 anv_device_get_internal_shader(struct anv_device *device,
272                                enum anv_internal_kernel_name name,
273                                struct anv_shader_bin **out_bin)
274 {
275    const struct {
276       struct {
277          char name[40];
278       } key;
279 
280       gl_shader_stage stage;
281 
282       uint32_t        send_count;
283    } internal_kernels[] = {
284       [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
285          .key        = {
286             .name    = "anv-generated-indirect-draws",
287          },
288          .stage      = MESA_SHADER_FRAGMENT,
289          .send_count =  (device->info->ver == 9 ?
290                          /* 1 load +
291                           * 4 stores +
292                           * 2 * (2 loads + 2 stores) +
293                           * 3 stores
294                           */
295                          16 :
296                          /* 1 load +
297                           * 2 * (2 loads + 3 stores) +
298                           * 3 stores
299                           */
300                          14) +
301          /* 3 loads + 3 stores */
302          (intel_needs_workaround(device->info, 16011107343) ? 6 : 0) +
303          /* 3 loads + 3 stores */
304          (intel_needs_workaround(device->info, 22018402687) ? 6 : 0),
305       },
306       [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
307          .key        = {
308             .name    = "anv-copy-query-compute",
309          },
310          .stage      = MESA_SHADER_COMPUTE,
311          .send_count = device->info->verx10 >= 125 ?
312                        9 /* 4 loads + 4 stores + 1 EOT */ :
313                        8 /* 3 loads + 4 stores + 1 EOT */,
314       },
315       [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
316          .key        = {
317             .name    = "anv-copy-query-fragment",
318          },
319          .stage      = MESA_SHADER_FRAGMENT,
320          .send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
321       },
322       [ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = {
323          .key        = {
324             .name    = "anv-memcpy-compute",
325          },
326          .stage      = MESA_SHADER_COMPUTE,
327          .send_count = device->info->verx10 >= 125 ?
328                        10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ :
329                        9 /* 4 loads + 4 stores + 1 EOT */,
330       },
331    };
332 
333    struct anv_shader_bin *bin =
334       p_atomic_read(&device->internal_kernels[name]);
335    if (bin != NULL) {
336       *out_bin = bin;
337       return VK_SUCCESS;
338    }
339 
340    bin =
341       anv_device_search_for_kernel(device,
342                                    device->internal_cache,
343                                    &internal_kernels[name].key,
344                                    sizeof(internal_kernels[name].key),
345                                    NULL);
346    if (bin != NULL) {
347       p_atomic_set(&device->internal_kernels[name], bin);
348       *out_bin = bin;
349       return VK_SUCCESS;
350    }
351 
352    nir_shader *libanv_shaders = load_libanv(device);
353 
354    bin = compile_shader(device,
355                         libanv_shaders,
356                         name,
357                         internal_kernels[name].stage,
358                         internal_kernels[name].key.name,
359                         &internal_kernels[name].key,
360                         sizeof(internal_kernels[name].key),
361                         internal_kernels[name].send_count);
362    ralloc_free(libanv_shaders);
363    if (bin == NULL)
364       return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
365                        "Unable to compiler internal kernel");
366 
367    /* The cache already has a reference and it's not going anywhere so
368     * there is no need to hold a second reference.
369     */
370    anv_shader_bin_unref(device, bin);
371 
372    p_atomic_set(&device->internal_kernels[name], bin);
373 
374    *out_bin = bin;
375    return VK_SUCCESS;
376 }
377 
378 VkResult
anv_device_init_internal_kernels(struct anv_device * device)379 anv_device_init_internal_kernels(struct anv_device *device)
380 {
381    const struct intel_l3_weights w =
382       intel_get_default_l3_weights(device->info,
383                                    true /* wants_dc_cache */,
384                                    false /* needs_slm */);
385    device->internal_kernels_l3_config = intel_get_l3_config(device->info, w);
386 
387    return VK_SUCCESS;
388 }
389 
390 void
anv_device_finish_internal_kernels(struct anv_device * device)391 anv_device_finish_internal_kernels(struct anv_device *device)
392 {
393 }
394