• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2012 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "blorp_priv.h"
7 #include "blorp_nir_builder.h"
8 #include "compiler/elk/elk_compiler.h"
9 #include "compiler/elk/elk_nir.h"
10 #include "compiler/intel_nir.h"
11 #include "dev/intel_debug.h"
12 
13 static const nir_shader_compiler_options *
blorp_nir_options_elk(struct blorp_context * blorp,gl_shader_stage stage)14 blorp_nir_options_elk(struct blorp_context *blorp,
15                       gl_shader_stage stage)
16 {
17    const struct elk_compiler *compiler = blorp->compiler->elk;
18    return compiler->nir_options[stage];
19 }
20 
21 static struct blorp_program
blorp_compile_fs_elk(struct blorp_context * blorp,void * mem_ctx,struct nir_shader * nir,bool multisample_fbo,bool is_fast_clear,bool use_repclear)22 blorp_compile_fs_elk(struct blorp_context *blorp, void *mem_ctx,
23                      struct nir_shader *nir,
24                      bool multisample_fbo,
25                      bool is_fast_clear,
26                      bool use_repclear)
27 {
28    const struct elk_compiler *compiler = blorp->compiler->elk;
29 
30    struct elk_wm_prog_data *wm_prog_data = rzalloc(mem_ctx, struct elk_wm_prog_data);
31    wm_prog_data->base.nr_params = 0;
32    wm_prog_data->base.param = NULL;
33 
34    struct elk_nir_compiler_opts opts = {};
35    elk_preprocess_nir(compiler, nir, &opts);
36    nir_remove_dead_variables(nir, nir_var_shader_in, NULL);
37    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
38 
39    struct elk_wm_prog_key wm_key;
40    memset(&wm_key, 0, sizeof(wm_key));
41    wm_key.multisample_fbo = multisample_fbo ? ELK_ALWAYS : ELK_NEVER;
42    wm_key.nr_color_regions = 1;
43 
44    if (compiler->devinfo->ver < 6) {
45       if (nir->info.fs.uses_discard)
46          wm_key.iz_lookup |= ELK_WM_IZ_PS_KILL_ALPHATEST_BIT;
47 
48       wm_key.input_slots_valid = nir->info.inputs_read | VARYING_BIT_POS;
49    }
50 
51    struct elk_compile_fs_params params = {
52       .base = {
53          .mem_ctx = mem_ctx,
54          .nir = nir,
55          .log_data = blorp->driver_ctx,
56          .debug_flag = DEBUG_BLORP,
57       },
58       .key = &wm_key,
59       .prog_data = wm_prog_data,
60 
61       .use_rep_send = use_repclear,
62       .max_polygons = 1,
63    };
64 
65    const unsigned *kernel = elk_compile_fs(compiler, &params);
66    return (struct blorp_program){
67       .kernel         = kernel,
68       .kernel_size    = wm_prog_data->base.program_size,
69       .prog_data      = wm_prog_data,
70       .prog_data_size = sizeof(*wm_prog_data),
71    };
72 }
73 
74 static struct blorp_program
blorp_compile_vs_elk(struct blorp_context * blorp,void * mem_ctx,struct nir_shader * nir)75 blorp_compile_vs_elk(struct blorp_context *blorp, void *mem_ctx,
76                      struct nir_shader *nir)
77 {
78    const struct elk_compiler *compiler = blorp->compiler->elk;
79 
80    struct elk_nir_compiler_opts opts = {};
81    elk_preprocess_nir(compiler, nir, &opts);
82    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
83 
84    struct elk_vs_prog_data *vs_prog_data = rzalloc(mem_ctx, struct elk_vs_prog_data);
85    vs_prog_data->inputs_read = nir->info.inputs_read;
86 
87    elk_compute_vue_map(compiler->devinfo,
88                        &vs_prog_data->base.vue_map,
89                        nir->info.outputs_written,
90                        nir->info.separate_shader,
91                        1);
92 
93    struct elk_vs_prog_key vs_key = { 0, };
94 
95    struct elk_compile_vs_params params = {
96       .base = {
97          .mem_ctx = mem_ctx,
98          .nir = nir,
99          .log_data = blorp->driver_ctx,
100          .debug_flag = DEBUG_BLORP,
101       },
102       .key = &vs_key,
103       .prog_data = vs_prog_data,
104    };
105 
106    const unsigned *kernel = elk_compile_vs(compiler, &params);
107    return (struct blorp_program) {
108       .kernel         = kernel,
109       .kernel_size    = vs_prog_data->base.base.program_size,
110       .prog_data      = vs_prog_data,
111       .prog_data_size = sizeof(*vs_prog_data),
112    };
113 }
114 
115 static bool
lower_base_workgroup_id(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * data)116 lower_base_workgroup_id(nir_builder *b, nir_intrinsic_instr *intrin,
117                         UNUSED void *data)
118 {
119    if (intrin->intrinsic != nir_intrinsic_load_base_workgroup_id)
120       return false;
121 
122    b->cursor = nir_instr_remove(&intrin->instr);
123    nir_def_rewrite_uses(&intrin->def, nir_imm_zero(b, 3, 32));
124    return true;
125 }
126 
127 static struct blorp_program
blorp_compile_cs_elk(struct blorp_context * blorp,void * mem_ctx,struct nir_shader * nir)128 blorp_compile_cs_elk(struct blorp_context *blorp, void *mem_ctx,
129                      struct nir_shader *nir)
130 {
131    const struct elk_compiler *compiler = blorp->compiler->elk;
132 
133    struct elk_nir_compiler_opts opts = {};
134    elk_preprocess_nir(compiler, nir, &opts);
135    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
136 
137    NIR_PASS_V(nir, nir_lower_io, nir_var_uniform, elk_type_size_scalar_bytes,
138               (nir_lower_io_options)0);
139 
140    STATIC_ASSERT(offsetof(struct blorp_wm_inputs, subgroup_id) + 4 ==
141                  sizeof(struct blorp_wm_inputs));
142    nir->num_uniforms = offsetof(struct blorp_wm_inputs, subgroup_id);
143    unsigned nr_params = nir->num_uniforms / 4;
144 
145    struct elk_cs_prog_data *cs_prog_data = rzalloc(mem_ctx, struct elk_cs_prog_data);
146    cs_prog_data->base.nr_params = nr_params;
147    cs_prog_data->base.param = rzalloc_array(NULL, uint32_t, nr_params);
148 
149    NIR_PASS_V(nir, elk_nir_lower_cs_intrinsics, compiler->devinfo,
150               cs_prog_data);
151    NIR_PASS_V(nir, nir_shader_intrinsics_pass, lower_base_workgroup_id,
152               nir_metadata_control_flow, NULL);
153 
154    struct elk_cs_prog_key cs_key;
155    memset(&cs_key, 0, sizeof(cs_key));
156 
157    struct elk_compile_cs_params params = {
158       .base = {
159          .mem_ctx = mem_ctx,
160          .nir = nir,
161          .log_data = blorp->driver_ctx,
162          .debug_flag = DEBUG_BLORP,
163       },
164       .key = &cs_key,
165       .prog_data = cs_prog_data,
166    };
167 
168    const unsigned *kernel = elk_compile_cs(compiler, &params);
169 
170    ralloc_free(cs_prog_data->base.param);
171    cs_prog_data->base.param = NULL;
172 
173    return (struct blorp_program) {
174       .kernel         = kernel,
175       .kernel_size    = cs_prog_data->base.program_size,
176       .prog_data      = cs_prog_data,
177       .prog_data_size = sizeof(*cs_prog_data),
178    };
179 }
180 
181 struct blorp_sf_key {
182    struct blorp_base_key base;
183    struct elk_sf_prog_key key;
184 };
185 
186 static bool
blorp_ensure_sf_program_elk(struct blorp_batch * batch,struct blorp_params * params)187 blorp_ensure_sf_program_elk(struct blorp_batch *batch,
188                             struct blorp_params *params)
189 {
190    struct blorp_context *blorp = batch->blorp;
191    const struct elk_compiler *compiler = blorp->compiler->elk;
192    const struct elk_wm_prog_data *wm_prog_data = params->wm_prog_data;
193    assert(params->wm_prog_data);
194 
195    /* Gfx6+ doesn't need a strips and fans program */
196    if (compiler->devinfo->ver >= 6)
197       return true;
198 
199    struct blorp_sf_key key = {
200       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_GFX4_SF),
201    };
202 
203    /* Everything gets compacted in vertex setup, so we just need a
204     * pass-through for the correct number of input varyings.
205     */
206    const uint64_t slots_valid = VARYING_BIT_POS |
207       ((1ull << wm_prog_data->num_varying_inputs) - 1) << VARYING_SLOT_VAR0;
208 
209    key.key.attrs = slots_valid;
210    key.key.primitive = ELK_SF_PRIM_TRIANGLES;
211    key.key.contains_flat_varying = wm_prog_data->contains_flat_varying;
212 
213    STATIC_ASSERT(sizeof(key.key.interp_mode) ==
214                  sizeof(wm_prog_data->interp_mode));
215    memcpy(key.key.interp_mode, wm_prog_data->interp_mode,
216           sizeof(key.key.interp_mode));
217 
218    if (blorp->lookup_shader(batch, &key, sizeof(key),
219                             &params->sf_prog_kernel, &params->sf_prog_data))
220       return true;
221 
222    void *mem_ctx = ralloc_context(NULL);
223 
224    const unsigned *program;
225    unsigned program_size;
226 
227    struct intel_vue_map vue_map;
228    elk_compute_vue_map(compiler->devinfo, &vue_map, slots_valid, false, 1);
229 
230    struct elk_sf_prog_data prog_data_tmp;
231    program = elk_compile_sf(compiler, mem_ctx, &key.key,
232                             &prog_data_tmp, &vue_map, &program_size);
233 
234    bool result =
235       blorp->upload_shader(batch, MESA_SHADER_NONE,
236                            &key, sizeof(key), program, program_size,
237                            (void *)&prog_data_tmp, sizeof(prog_data_tmp),
238                            &params->sf_prog_kernel, &params->sf_prog_data);
239 
240    ralloc_free(mem_ctx);
241 
242    return result;
243 }
244 
245 #pragma pack(push, 1)
246 struct layer_offset_vs_key {
247    struct blorp_base_key base;
248    unsigned num_inputs;
249 };
250 #pragma pack(pop)
251 
252 /* In the case of doing attachment clears, we are using a surface state that
253  * is handed to us so we can't set (and don't even know) the base array layer.
254  * In order to do a layered clear in this scenario, we need some way of adding
255  * the base array layer to the instance id.  Unfortunately, our hardware has
256  * no real concept of "base instance", so we have to do it manually in a
257  * vertex shader.
258  */
259 static bool
blorp_params_get_layer_offset_vs_elk(struct blorp_batch * batch,struct blorp_params * params)260 blorp_params_get_layer_offset_vs_elk(struct blorp_batch *batch,
261                                      struct blorp_params *params)
262 {
263    struct blorp_context *blorp = batch->blorp;
264    struct layer_offset_vs_key blorp_key = {
265       .base = BLORP_BASE_KEY_INIT(BLORP_SHADER_TYPE_LAYER_OFFSET_VS),
266    };
267 
268    struct elk_wm_prog_data *wm_prog_data = params->wm_prog_data;
269    if (wm_prog_data)
270       blorp_key.num_inputs = wm_prog_data->num_varying_inputs;
271 
272    if (blorp->lookup_shader(batch, &blorp_key, sizeof(blorp_key),
273                             &params->vs_prog_kernel, &params->vs_prog_data))
274       return true;
275 
276    void *mem_ctx = ralloc_context(NULL);
277 
278    nir_builder b;
279    blorp_nir_init_shader(&b, blorp, mem_ctx, MESA_SHADER_VERTEX,
280                          blorp_shader_type_to_name(blorp_key.base.shader_type));
281 
282    const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
283 
284    /* First we deal with the header which has instance and base instance */
285    nir_variable *a_header = nir_variable_create(b.shader, nir_var_shader_in,
286                                                 uvec4_type, "header");
287    a_header->data.location = VERT_ATTRIB_GENERIC0;
288 
289    nir_variable *v_layer = nir_variable_create(b.shader, nir_var_shader_out,
290                                                glsl_int_type(), "layer_id");
291    v_layer->data.location = VARYING_SLOT_LAYER;
292 
293    /* Compute the layer id */
294    nir_def *header = nir_load_var(&b, a_header);
295    nir_def *base_layer = nir_channel(&b, header, 0);
296    nir_def *instance = nir_channel(&b, header, 1);
297    nir_store_var(&b, v_layer, nir_iadd(&b, instance, base_layer), 0x1);
298 
299    /* Then we copy the vertex from the next slot to VARYING_SLOT_POS */
300    nir_variable *a_vertex = nir_variable_create(b.shader, nir_var_shader_in,
301                                                 glsl_vec4_type(), "a_vertex");
302    a_vertex->data.location = VERT_ATTRIB_GENERIC1;
303 
304    nir_variable *v_pos = nir_variable_create(b.shader, nir_var_shader_out,
305                                              glsl_vec4_type(), "v_pos");
306    v_pos->data.location = VARYING_SLOT_POS;
307 
308    nir_copy_var(&b, v_pos, a_vertex);
309 
310    /* Then we copy everything else */
311    for (unsigned i = 0; i < blorp_key.num_inputs; i++) {
312       nir_variable *a_in = nir_variable_create(b.shader, nir_var_shader_in,
313                                                uvec4_type, "input");
314       a_in->data.location = VERT_ATTRIB_GENERIC2 + i;
315 
316       nir_variable *v_out = nir_variable_create(b.shader, nir_var_shader_out,
317                                                 uvec4_type, "output");
318       v_out->data.location = VARYING_SLOT_VAR0 + i;
319 
320       nir_copy_var(&b, v_out, a_in);
321    }
322 
323    const struct blorp_program p =
324       blorp_compile_vs(blorp, mem_ctx, b.shader);
325 
326    bool result =
327       blorp->upload_shader(batch, MESA_SHADER_VERTEX,
328                            &blorp_key, sizeof(blorp_key),
329                            p.kernel, p.kernel_size,
330                            p.prog_data, p.prog_data_size,
331                            &params->vs_prog_kernel, &params->vs_prog_data);
332 
333    ralloc_free(mem_ctx);
334    return result;
335 }
336 
337 void
blorp_init_elk(struct blorp_context * blorp,void * driver_ctx,struct isl_device * isl_dev,const struct elk_compiler * elk,const struct blorp_config * config)338 blorp_init_elk(struct blorp_context *blorp, void *driver_ctx,
339                struct isl_device *isl_dev, const struct elk_compiler *elk,
340                const struct blorp_config *config)
341 {
342    blorp_init(blorp, driver_ctx, isl_dev, config);
343    assert(elk);
344 
345    blorp->compiler->elk = elk;
346    blorp->compiler->nir_options = blorp_nir_options_elk;
347    blorp->compiler->compile_fs = blorp_compile_fs_elk;
348    blorp->compiler->compile_vs = blorp_compile_vs_elk;
349    blorp->compiler->compile_cs = blorp_compile_cs_elk;
350    blorp->compiler->ensure_sf_program = blorp_ensure_sf_program_elk;
351    blorp->compiler->params_get_layer_offset_vs =
352       blorp_params_get_layer_offset_vs_elk;
353 }
354