• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 
26 #include "compiler/intel_nir.h"
27 #include "compiler/brw_compiler.h"
28 #include "compiler/brw_nir.h"
29 #include "compiler/nir/nir.h"
30 #include "compiler/nir/nir_builder.h"
31 #include "dev/intel_debug.h"
32 #include "intel/compiler/intel_nir.h"
33 #include "util/macros.h"
34 
35 #include "vk_nir.h"
36 
37 #include "anv_internal_kernels.h"
38 
39 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * data)40 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
41                         UNUSED void *data)
42 {
43    if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id)
44       return false;
45 
46    b->cursor = nir_instr_remove(&intrin->instr);
47    nir_def_rewrite_uses(&intrin->def, nir_imm_zero(b, 3, 32));
48    return true;
49 }
50 
51 static void
link_libanv(nir_shader * nir,const nir_shader * libanv)52 link_libanv(nir_shader *nir, const nir_shader *libanv)
53 {
54    nir_link_shader_functions(nir, libanv);
55    NIR_PASS_V(nir, nir_inline_functions);
56    NIR_PASS_V(nir, nir_remove_non_entrypoints);
57    NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
58               glsl_get_cl_type_size_align);
59    NIR_PASS_V(nir, nir_opt_deref);
60    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
61    NIR_PASS_V(nir, nir_lower_explicit_io,
62               nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
63                  nir_var_mem_global,
64               nir_address_format_62bit_generic);
65 }
66 
67 static struct anv_shader_bin *
compile_shader(struct anv_device * device,const nir_shader * libanv,enum anv_internal_kernel_name shader_name,gl_shader_stage stage,const char * name,const void * hash_key,uint32_t hash_key_size,uint32_t sends_count_expectation)68 compile_shader(struct anv_device *device,
69                const nir_shader *libanv,
70                enum anv_internal_kernel_name shader_name,
71                gl_shader_stage stage,
72                const char *name,
73                const void *hash_key,
74                uint32_t hash_key_size,
75                uint32_t sends_count_expectation)
76 {
77    const nir_shader_compiler_options *nir_options =
78       device->physical->compiler->nir_options[stage];
79 
80    nir_builder b = nir_builder_init_simple_shader(stage, nir_options,
81                                                   "%s", name);
82 
83    uint32_t uniform_size =
84       anv_genX(device->info, call_internal_shader)(&b, shader_name);
85 
86    nir_shader *nir = b.shader;
87 
88    link_libanv(nir, libanv);
89 
90    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
91    NIR_PASS_V(nir, nir_opt_cse);
92    NIR_PASS_V(nir, nir_opt_gcm, true);
93    NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);
94 
95    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
96 
97    NIR_PASS_V(nir, nir_split_var_copies);
98    NIR_PASS_V(nir, nir_split_per_member_structs);
99 
100    if (stage == MESA_SHADER_COMPUTE) {
101       nir->info.workgroup_size[0] = 16;
102       nir->info.workgroup_size[1] = 1;
103       nir->info.workgroup_size[2] = 1;
104    }
105 
106    struct brw_compiler *compiler = device->physical->compiler;
107    struct brw_nir_compiler_opts opts = {};
108    brw_preprocess_nir(compiler, nir, &opts);
109 
110    NIR_PASS_V(nir, nir_propagate_invariant, false);
111 
112    if (stage == MESA_SHADER_FRAGMENT) {
113       NIR_PASS_V(nir, nir_lower_input_attachments,
114                  &(nir_input_attachment_options) {
115                     .use_fragcoord_sysval = true,
116                     .use_layer_id_sysval = true,
117                  });
118    } else {
119       nir_lower_compute_system_values_options options = {
120          .has_base_workgroup_id = true,
121          .lower_cs_local_id_to_index = true,
122          .lower_workgroup_id_to_index = true,
123       };
124       NIR_PASS_V(nir, nir_lower_compute_system_values, &options);
125       NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
126                  nir_metadata_block_index | nir_metadata_dominance, NULL);
127    }
128 
129    /* Reset sizes before gathering information */
130    nir->global_mem_size = 0;
131    nir->scratch_size = 0;
132    nir->info.shared_size = 0;
133    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
134 
135    NIR_PASS_V(nir, nir_copy_prop);
136    NIR_PASS_V(nir, nir_opt_constant_folding);
137    NIR_PASS_V(nir, nir_opt_dce);
138 
139    union brw_any_prog_key key;
140    memset(&key, 0, sizeof(key));
141 
142    union brw_any_prog_data prog_data;
143    memset(&prog_data, 0, sizeof(prog_data));
144 
145    if (stage == MESA_SHADER_COMPUTE) {
146       NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics,
147                  device->info, &prog_data.cs);
148    }
149 
150    /* Do vectorizing here. For some reason when trying to do it in the back
151     * this just isn't working.
152     */
153    nir_load_store_vectorize_options options = {
154       .modes = nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global,
155       .callback = brw_nir_should_vectorize_mem,
156       .robust_modes = (nir_variable_mode)0,
157    };
158    NIR_PASS_V(nir, nir_opt_load_store_vectorize, &options);
159 
160    nir->num_uniforms = uniform_size;
161 
162    prog_data.base.nr_params = nir->num_uniforms / 4;
163 
164    brw_nir_analyze_ubo_ranges(compiler, nir, prog_data.base.ubo_ranges);
165 
166    void *temp_ctx = ralloc_context(NULL);
167 
168    const unsigned *program;
169    if (stage == MESA_SHADER_FRAGMENT) {
170       struct brw_compile_stats stats[3];
171       struct brw_compile_fs_params params = {
172          .base = {
173             .nir = nir,
174             .log_data = device,
175             .debug_flag = DEBUG_WM,
176             .stats = stats,
177             .mem_ctx = temp_ctx,
178          },
179          .key = &key.wm,
180          .prog_data = &prog_data.wm,
181       };
182       program = brw_compile_fs(compiler, &params);
183 
184       unsigned stat_idx = 0;
185       if (prog_data.wm.dispatch_8) {
186          assert(stats[stat_idx].spills == 0);
187          assert(stats[stat_idx].fills == 0);
188          assert(stats[stat_idx].sends == sends_count_expectation);
189          stat_idx++;
190       }
191       if (prog_data.wm.dispatch_16) {
192          assert(stats[stat_idx].spills == 0);
193          assert(stats[stat_idx].fills == 0);
194          assert(stats[stat_idx].sends == sends_count_expectation);
195          stat_idx++;
196       }
197       if (prog_data.wm.dispatch_32) {
198          assert(stats[stat_idx].spills == 0);
199          assert(stats[stat_idx].fills == 0);
200          assert(stats[stat_idx].sends == sends_count_expectation * 2);
201          stat_idx++;
202       }
203    } else {
204       struct brw_compile_stats stats;
205       struct brw_compile_cs_params params = {
206          .base = {
207             .nir = nir,
208             .stats = &stats,
209             .log_data = device,
210             .debug_flag = DEBUG_CS,
211             .mem_ctx = temp_ctx,
212          },
213          .key = &key.cs,
214          .prog_data = &prog_data.cs,
215       };
216       program = brw_compile_cs(compiler, &params);
217 
218       assert(stats.spills == 0);
219       assert(stats.fills == 0);
220       assert(stats.sends == sends_count_expectation);
221    }
222 
223    assert(prog_data.base.total_scratch == 0);
224 
225    struct anv_pipeline_bind_map empty_bind_map = {};
226    struct anv_push_descriptor_info empty_push_desc_info = {};
227    struct anv_shader_upload_params upload_params = {
228       .stage               = nir->info.stage,
229       .key_data            = hash_key,
230       .key_size            = hash_key_size,
231       .kernel_data         = program,
232       .kernel_size         = prog_data.base.program_size,
233       .prog_data           = &prog_data.base,
234       .prog_data_size      = sizeof(prog_data),
235       .bind_map            = &empty_bind_map,
236       .push_desc_info      = &empty_push_desc_info,
237    };
238 
239    struct anv_shader_bin *kernel =
240       anv_device_upload_kernel(device, device->internal_cache, &upload_params);
241 
242    ralloc_free(temp_ctx);
243    ralloc_free(nir);
244 
245    return kernel;
246 }
247 
248 VkResult
anv_device_init_internal_kernels(struct anv_device * device)249 anv_device_init_internal_kernels(struct anv_device *device)
250 {
251    const struct intel_l3_weights w =
252       intel_get_default_l3_weights(device->info,
253                                    true /* wants_dc_cache */,
254                                    false /* needs_slm */);
255    device->internal_kernels_l3_config = intel_get_l3_config(device->info, w);
256 
257    void *mem_ctx = ralloc_context(NULL);
258 
259    nir_shader *libanv_shaders =
260       anv_genX(device->info, load_libanv_shader)(device, mem_ctx);
261 
262    const struct {
263       struct {
264          char name[40];
265       } key;
266 
267       gl_shader_stage stage;
268 
269       uint32_t        send_count;
270    } internal_kernels[] = {
271       [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
272          .key        = {
273             .name    = "anv-generated-indirect-draws",
274          },
275          .stage      = MESA_SHADER_FRAGMENT,
276          .send_count =  (device->info->ver == 9 ?
277                          /* 1 load +
278                           * 4 stores +
279                           * 2 * (2 loads + 2 stores) +
280                           * 3 stores
281                           */
282                          16 :
283                          /* 1 load +
284                           * 2 * (2 loads + 3 stores) +
285                           * 3 stores
286                           */
287                          14),
288       },
289       [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE] = {
290          .key        = {
291             .name    = "anv-copy-query-compute",
292          },
293          .stage      = MESA_SHADER_COMPUTE,
294          .send_count = device->info->verx10 >= 125 ?
295                        9 /* 4 loads + 4 stores + 1 EOT */ :
296                        8 /* 3 loads + 4 stores + 1 EOT */,
297       },
298       [ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT] = {
299          .key        = {
300             .name    = "anv-copy-query-fragment",
301          },
302          .stage      = MESA_SHADER_FRAGMENT,
303          .send_count = 8 /* 3 loads + 4 stores + 1 EOT */,
304       },
305       [ANV_INTERNAL_KERNEL_MEMCPY_COMPUTE] = {
306          .key        = {
307             .name    = "anv-memcpy-compute",
308          },
309          .stage      = MESA_SHADER_COMPUTE,
310          .send_count = device->info->verx10 >= 125 ?
311                        10 /* 5 loads (1 pull constants) + 4 stores + 1 EOT */ :
312                        9 /* 4 loads + 4 stores + 1 EOT */,
313       },
314    };
315 
316    VkResult result = VK_SUCCESS;
317    for (uint32_t i = 0; i < ARRAY_SIZE(internal_kernels); i++) {
318       device->internal_kernels[i] =
319          anv_device_search_for_kernel(device,
320                                       device->internal_cache,
321                                       &internal_kernels[i].key,
322                                       sizeof(internal_kernels[i].key),
323                                       NULL);
324       if (device->internal_kernels[i] == NULL) {
325          device->internal_kernels[i] =
326             compile_shader(device,
327                            libanv_shaders,
328                            i,
329                            internal_kernels[i].stage,
330                            internal_kernels[i].key.name,
331                            &internal_kernels[i].key,
332                            sizeof(internal_kernels[i].key),
333                            internal_kernels[i].send_count);
334       }
335       if (device->internal_kernels[i] == NULL) {
336          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
337          goto error;
338       }
339 
340       /* The cache already has a reference and it's not going anywhere so
341        * there is no need to hold a second reference.
342        */
343       anv_shader_bin_unref(device, device->internal_kernels[i]);
344    }
345 
346  error:
347    return result;
348 }
349 
350 void
anv_device_finish_internal_kernels(struct anv_device * device)351 anv_device_finish_internal_kernels(struct anv_device *device)
352 {
353 }
354