• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file brw_tcs.c
26  *
27  * Tessellation control shader state upload code.
28  */
29 
30 #include "brw_context.h"
31 #include "compiler/brw_nir.h"
32 #include "brw_program.h"
33 #include "brw_state.h"
34 #include "program/prog_parameter.h"
35 #include "nir_builder.h"
36 
37 static nir_shader *
create_passthrough_tcs(void * mem_ctx,const struct brw_compiler * compiler,const nir_shader_compiler_options * options,const struct brw_tcs_prog_key * key)38 create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,
39                        const nir_shader_compiler_options *options,
40                        const struct brw_tcs_prog_key *key)
41 {
42    nir_builder b;
43    nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_TESS_CTRL,
44                                   options);
45    nir_shader *nir = b.shader;
46    nir_variable *var;
47    nir_intrinsic_instr *load;
48    nir_intrinsic_instr *store;
49    nir_ssa_def *zero = nir_imm_int(&b, 0);
50    nir_ssa_def *invoc_id =
51       nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);
52 
53    nir->info.inputs_read = key->outputs_written &
54       ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
55    nir->info.outputs_written = key->outputs_written;
56    nir->info.tess.tcs_vertices_out = key->input_vertices;
57    nir->info.name = ralloc_strdup(nir, "passthrough");
58    nir->num_uniforms = 8 * sizeof(uint32_t);
59 
60    var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0");
61    var->data.location = 0;
62    var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1");
63    var->data.location = 1;
64 
65    /* Write the patch URB header. */
66    for (int i = 0; i <= 1; i++) {
67       load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
68       load->num_components = 4;
69       load->src[0] = nir_src_for_ssa(zero);
70       nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
71       nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t));
72       nir_builder_instr_insert(&b, &load->instr);
73 
74       store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output);
75       store->num_components = 4;
76       store->src[0] = nir_src_for_ssa(&load->dest.ssa);
77       store->src[1] = nir_src_for_ssa(zero);
78       nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i);
79       nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
80       nir_builder_instr_insert(&b, &store->instr);
81    }
82 
83    /* Copy inputs to outputs. */
84    uint64_t varyings = nir->info.inputs_read;
85 
86    while (varyings != 0) {
87       const int varying = ffsll(varyings) - 1;
88 
89       load = nir_intrinsic_instr_create(nir,
90                                         nir_intrinsic_load_per_vertex_input);
91       load->num_components = 4;
92       load->src[0] = nir_src_for_ssa(invoc_id);
93       load->src[1] = nir_src_for_ssa(zero);
94       nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
95       nir_intrinsic_set_base(load, varying);
96       nir_builder_instr_insert(&b, &load->instr);
97 
98       store = nir_intrinsic_instr_create(nir,
99                                          nir_intrinsic_store_per_vertex_output);
100       store->num_components = 4;
101       store->src[0] = nir_src_for_ssa(&load->dest.ssa);
102       store->src[1] = nir_src_for_ssa(invoc_id);
103       store->src[2] = nir_src_for_ssa(zero);
104       nir_intrinsic_set_base(store, varying);
105       nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
106       nir_builder_instr_insert(&b, &store->instr);
107 
108       varyings &= ~BITFIELD64_BIT(varying);
109    }
110 
111    nir_validate_shader(nir);
112 
113    nir = brw_preprocess_nir(compiler, nir);
114 
115    return nir;
116 }
117 
118 static void
brw_tcs_debug_recompile(struct brw_context * brw,struct gl_program * prog,const struct brw_tcs_prog_key * key)119 brw_tcs_debug_recompile(struct brw_context *brw, struct gl_program *prog,
120                        const struct brw_tcs_prog_key *key)
121 {
122    perf_debug("Recompiling tessellation control shader for program %d\n",
123               prog->Id);
124 
125    bool found = false;
126    const struct brw_tcs_prog_key *old_key =
127       brw_find_previous_compile(&brw->cache, BRW_CACHE_TCS_PROG,
128                                 key->program_string_id);
129 
130    if (!old_key) {
131       perf_debug("  Didn't find previous compile in the shader cache for "
132                  "debug\n");
133       return;
134    }
135 
136    found |= key_debug(brw, "input vertices", old_key->input_vertices,
137                       key->input_vertices);
138    found |= key_debug(brw, "outputs written", old_key->outputs_written,
139                       key->outputs_written);
140    found |= key_debug(brw, "patch outputs written", old_key->patch_outputs_written,
141                       key->patch_outputs_written);
142    found |= key_debug(brw, "TES primitive mode", old_key->tes_primitive_mode,
143                       key->tes_primitive_mode);
144    found |= key_debug(brw, "quads and equal_spacing workaround",
145                       old_key->quads_workaround, key->quads_workaround);
146    found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
147 
148    if (!found) {
149       perf_debug("  Something else\n");
150    }
151 }
152 
153 static bool
brw_codegen_tcs_prog(struct brw_context * brw,struct brw_program * tcp,struct brw_program * tep,struct brw_tcs_prog_key * key)154 brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
155                      struct brw_program *tep, struct brw_tcs_prog_key *key)
156 {
157    struct gl_context *ctx = &brw->ctx;
158    const struct brw_compiler *compiler = brw->screen->compiler;
159    const struct gen_device_info *devinfo = compiler->devinfo;
160    struct brw_stage_state *stage_state = &brw->tcs.base;
161    nir_shader *nir;
162    struct brw_tcs_prog_data prog_data;
163    bool start_busy = false;
164    double start_time = 0;
165 
166    void *mem_ctx = ralloc_context(NULL);
167    if (tcp) {
168       nir = tcp->program.nir;
169    } else {
170       /* Create a dummy nir_shader.  We won't actually use NIR code to
171        * generate assembly (it's easier to generate assembly directly),
172        * but the whole compiler assumes one of these exists.
173        */
174       const nir_shader_compiler_options *options =
175          ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions;
176       nir = create_passthrough_tcs(mem_ctx, compiler, options, key);
177    }
178 
179    memset(&prog_data, 0, sizeof(prog_data));
180 
181    if (tcp) {
182       brw_assign_common_binding_table_offsets(devinfo, &tcp->program,
183                                               &prog_data.base.base, 0);
184 
185       brw_nir_setup_glsl_uniforms(mem_ctx, nir, &tcp->program,
186                                   &prog_data.base.base,
187                                   compiler->scalar_stage[MESA_SHADER_TESS_CTRL]);
188       brw_nir_analyze_ubo_ranges(compiler, tcp->program.nir,
189                                  prog_data.base.base.ubo_ranges);
190    } else {
191       /* Upload the Patch URB Header as the first two uniforms.
192        * Do the annoying scrambling so the shader doesn't have to.
193        */
194       assert(nir->num_uniforms == 32);
195       prog_data.base.base.param = rzalloc_array(mem_ctx, uint32_t, 8);
196       prog_data.base.base.nr_params = 8;
197 
198       uint32_t *param = prog_data.base.base.param;
199       for (int i = 0; i < 8; i++)
200          param[i] = BRW_PARAM_BUILTIN_ZERO;
201 
202       if (key->tes_primitive_mode == GL_QUADS) {
203          for (int i = 0; i < 4; i++)
204             param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
205 
206          param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
207          param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
208       } else if (key->tes_primitive_mode == GL_TRIANGLES) {
209          for (int i = 0; i < 3; i++)
210             param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
211 
212          param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
213       } else {
214          assert(key->tes_primitive_mode == GL_ISOLINES);
215          param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
216          param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
217       }
218    }
219 
220    int st_index = -1;
221    if (unlikely((INTEL_DEBUG & DEBUG_SHADER_TIME) && tep))
222       st_index = brw_get_shader_time_index(brw, &tep->program, ST_TCS, true);
223 
224    if (unlikely(brw->perf_debug)) {
225       start_busy = brw->batch.last_bo && brw_bo_busy(brw->batch.last_bo);
226       start_time = get_time();
227    }
228 
229    char *error_str;
230    const unsigned *program =
231       brw_compile_tcs(compiler, brw, mem_ctx, key, &prog_data, nir, st_index,
232                       &error_str);
233    if (program == NULL) {
234       if (tep) {
235          tep->program.sh.data->LinkStatus = linking_failure;
236          ralloc_strcat(&tep->program.sh.data->InfoLog, error_str);
237       }
238 
239       _mesa_problem(NULL, "Failed to compile tessellation control shader: "
240                     "%s\n", error_str);
241 
242       ralloc_free(mem_ctx);
243       return false;
244    }
245 
246    if (unlikely(brw->perf_debug)) {
247       if (tcp) {
248          if (tcp->compiled_once) {
249             brw_tcs_debug_recompile(brw, &tcp->program, key);
250          }
251          tcp->compiled_once = true;
252       }
253 
254       if (start_busy && !brw_bo_busy(brw->batch.last_bo)) {
255          perf_debug("TCS compile took %.03f ms and stalled the GPU\n",
256                     (get_time() - start_time) * 1000);
257       }
258    }
259 
260    /* Scratch space is used for register spilling */
261    brw_alloc_stage_scratch(brw, stage_state,
262                            prog_data.base.base.total_scratch);
263 
264    /* The param and pull_param arrays will be freed by the shader cache. */
265    ralloc_steal(NULL, prog_data.base.base.param);
266    ralloc_steal(NULL, prog_data.base.base.pull_param);
267    brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
268                     key, sizeof(*key),
269                     program, prog_data.base.base.program_size,
270                     &prog_data, sizeof(prog_data),
271                     &stage_state->prog_offset, &brw->tcs.base.prog_data);
272    ralloc_free(mem_ctx);
273 
274    return true;
275 }
276 
277 void
brw_tcs_populate_key(struct brw_context * brw,struct brw_tcs_prog_key * key)278 brw_tcs_populate_key(struct brw_context *brw,
279                      struct brw_tcs_prog_key *key)
280 {
281    const struct gen_device_info *devinfo = &brw->screen->devinfo;
282    struct brw_program *tcp =
283       (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
284    struct brw_program *tep =
285       (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
286    struct gl_program *tes_prog = &tep->program;
287 
288    uint64_t per_vertex_slots = tes_prog->info.inputs_read;
289    uint32_t per_patch_slots = tes_prog->info.patch_inputs_read;
290 
291    memset(key, 0, sizeof(*key));
292 
293    if (tcp) {
294       struct gl_program *prog = &tcp->program;
295       per_vertex_slots |= prog->info.outputs_written;
296       per_patch_slots |= prog->info.patch_outputs_written;
297    }
298 
299    if (devinfo->gen < 8 || !tcp)
300       key->input_vertices = brw->ctx.TessCtrlProgram.patch_vertices;
301    key->outputs_written = per_vertex_slots;
302    key->patch_outputs_written = per_patch_slots;
303 
304    /* We need to specialize our code generation for tessellation levels
305     * based on the domain the DS is expecting to tessellate.
306     */
307    key->tes_primitive_mode = tep->program.info.tess.primitive_mode;
308    key->quads_workaround = devinfo->gen < 9 &&
309                            tep->program.info.tess.primitive_mode == GL_QUADS &&
310                            tep->program.info.tess.spacing == TESS_SPACING_EQUAL;
311 
312    if (tcp) {
313       key->program_string_id = tcp->id;
314 
315       /* _NEW_TEXTURE */
316       brw_populate_sampler_prog_key_data(&brw->ctx, &tcp->program, &key->tex);
317    }
318 }
319 
320 void
brw_upload_tcs_prog(struct brw_context * brw)321 brw_upload_tcs_prog(struct brw_context *brw)
322 {
323    struct brw_stage_state *stage_state = &brw->tcs.base;
324    struct brw_tcs_prog_key key;
325    /* BRW_NEW_TESS_PROGRAMS */
326    struct brw_program *tcp =
327       (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
328    MAYBE_UNUSED struct brw_program *tep =
329       (struct brw_program *) brw->programs[MESA_SHADER_TESS_EVAL];
330    assert(tep);
331 
332    if (!brw_state_dirty(brw,
333                         _NEW_TEXTURE,
334                         BRW_NEW_PATCH_PRIMITIVE |
335                         BRW_NEW_TESS_PROGRAMS))
336       return;
337 
338    brw_tcs_populate_key(brw, &key);
339 
340    if (brw_search_cache(&brw->cache, BRW_CACHE_TCS_PROG,
341                         &key, sizeof(key),
342                         &stage_state->prog_offset,
343                         &brw->tcs.base.prog_data))
344       return;
345 
346    if (brw_disk_cache_upload_program(brw, MESA_SHADER_TESS_CTRL))
347       return;
348 
349    tcp = (struct brw_program *) brw->programs[MESA_SHADER_TESS_CTRL];
350    if (tcp)
351       tcp->id = key.program_string_id;
352 
353    MAYBE_UNUSED bool success = brw_codegen_tcs_prog(brw, tcp, tep, &key);
354    assert(success);
355 }
356 
357 
358 bool
brw_tcs_precompile(struct gl_context * ctx,struct gl_shader_program * shader_prog,struct gl_program * prog)359 brw_tcs_precompile(struct gl_context *ctx,
360                    struct gl_shader_program *shader_prog,
361                    struct gl_program *prog)
362 {
363    struct brw_context *brw = brw_context(ctx);
364    struct brw_tcs_prog_key key;
365    uint32_t old_prog_offset = brw->tcs.base.prog_offset;
366    struct brw_stage_prog_data *old_prog_data = brw->tcs.base.prog_data;
367    bool success;
368 
369    struct brw_program *btcp = brw_program(prog);
370    const struct gl_linked_shader *tes =
371       shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
372    const struct gen_device_info *devinfo = &brw->screen->devinfo;
373 
374    memset(&key, 0, sizeof(key));
375 
376    key.program_string_id = btcp->id;
377    brw_setup_tex_for_precompile(brw, &key.tex, prog);
378 
379    /* Guess that the input and output patches have the same dimensionality. */
380    if (devinfo->gen < 8)
381       key.input_vertices = prog->info.tess.tcs_vertices_out;
382 
383    struct brw_program *btep;
384    if (tes) {
385       btep = brw_program(tes->Program);
386       key.tes_primitive_mode = tes->Program->info.tess.primitive_mode;
387       key.quads_workaround = devinfo->gen < 9 &&
388                              tes->Program->info.tess.primitive_mode == GL_QUADS &&
389                              tes->Program->info.tess.spacing == TESS_SPACING_EQUAL;
390    } else {
391       btep = NULL;
392       key.tes_primitive_mode = GL_TRIANGLES;
393    }
394 
395    key.outputs_written = prog->nir->info.outputs_written;
396    key.patch_outputs_written = prog->nir->info.patch_outputs_written;
397 
398    success = brw_codegen_tcs_prog(brw, btcp, btep, &key);
399 
400    brw->tcs.base.prog_offset = old_prog_offset;
401    brw->tcs.base.prog_data = old_prog_data;
402 
403    return success;
404 }
405