• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2013 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_eu.h"
7 #include "intel_nir.h"
8 #include "brw_nir.h"
9 #include "brw_fs.h"
10 #include "brw_private.h"
11 #include "dev/intel_debug.h"
12 
13 /**
14  * Return the number of patches to accumulate before a MULTI_PATCH mode thread is
15  * launched.  In cases with a large number of input control points and a large
16  * amount of VS outputs, the VS URB space needed to store an entire 8 patches
17  * worth of data can be prohibitive, so it can be beneficial to launch threads
18  * early.
19  *
20  * See the 3DSTATE_HS::Patch Count Threshold documentation for the recommended
21  * values.  Note that 0 means to "disable" early dispatch, meaning to wait for
22  * a full 8 patches as normal.
23  */
24 static int
get_patch_count_threshold(int input_control_points)25 get_patch_count_threshold(int input_control_points)
26 {
27    if (input_control_points <= 4)
28       return 0;
29    else if (input_control_points <= 6)
30       return 5;
31    else if (input_control_points <= 8)
32       return 4;
33    else if (input_control_points <= 10)
34       return 3;
35    else if (input_control_points <= 14)
36       return 2;
37 
38    /* Return patch count 1 for PATCHLIST_15 - PATCHLIST_32 */
39    return 1;
40 }
41 
42 extern "C" const unsigned *
brw_compile_tcs(const struct brw_compiler * compiler,struct brw_compile_tcs_params * params)43 brw_compile_tcs(const struct brw_compiler *compiler,
44                 struct brw_compile_tcs_params *params)
45 {
46    const struct intel_device_info *devinfo = compiler->devinfo;
47    nir_shader *nir = params->base.nir;
48    const struct brw_tcs_prog_key *key = params->key;
49    struct brw_tcs_prog_data *prog_data = params->prog_data;
50    struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
51 
52    const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TCS);
53 
54    vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL;
55    prog_data->base.base.ray_queries = nir->info.ray_queries;
56    prog_data->base.base.total_scratch = 0;
57 
58    nir->info.outputs_written = key->outputs_written;
59    nir->info.patch_outputs_written = key->patch_outputs_written;
60 
61    struct intel_vue_map input_vue_map;
62    brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
63                        nir->info.separate_shader, 1);
64    brw_compute_tess_vue_map(&vue_prog_data->vue_map,
65                             nir->info.outputs_written,
66                             nir->info.patch_outputs_written);
67 
68    brw_nir_apply_key(nir, compiler, &key->base, 8);
69    brw_nir_lower_vue_inputs(nir, &input_vue_map);
70    brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
71                              key->_tes_primitive_mode);
72    if (key->input_vertices > 0)
73       intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
74 
75    brw_postprocess_nir(nir, compiler, debug_enabled,
76                        key->base.robust_flags);
77 
78    bool has_primitive_id =
79       BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
80 
81    prog_data->patch_count_threshold = get_patch_count_threshold(key->input_vertices);
82 
83    if (compiler->use_tcs_multi_patch) {
84       vue_prog_data->dispatch_mode = INTEL_DISPATCH_MODE_TCS_MULTI_PATCH;
85       prog_data->instances = nir->info.tess.tcs_vertices_out;
86       prog_data->include_primitive_id = has_primitive_id;
87    } else {
88       unsigned verts_per_thread = 8;
89       vue_prog_data->dispatch_mode = INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH;
90       prog_data->instances =
91          DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread);
92    }
93 
94    /* Compute URB entry size.  The maximum allowed URB entry size is 32k.
95     * That divides up as follows:
96     *
97     *     32 bytes for the patch header (tessellation factors)
98     *    480 bytes for per-patch varyings (a varying component is 4 bytes and
99     *              gl_MaxTessPatchComponents = 120)
100     *  16384 bytes for per-vertex varyings (a varying component is 4 bytes,
101     *              gl_MaxPatchVertices = 32 and
102     *              gl_MaxTessControlOutputComponents = 128)
103     *
104     *  15808 bytes left for varying packing overhead
105     */
106    const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots;
107    const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots;
108    unsigned output_size_bytes = 0;
109    /* Note that the patch header is counted in num_per_patch_slots. */
110    output_size_bytes += num_per_patch_slots * 16;
111    output_size_bytes += nir->info.tess.tcs_vertices_out *
112                         num_per_vertex_slots * 16;
113 
114    assert(output_size_bytes >= 1);
115    if (output_size_bytes > GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES)
116       return NULL;
117 
118    /* URB entry sizes are stored as a multiple of 64 bytes. */
119    vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
120 
121    /* HS does not use the usual payload pushing from URB to GRFs,
122     * because we don't have enough registers for a full-size payload, and
123     * the hardware is broken on Haswell anyway.
124     */
125    vue_prog_data->urb_read_length = 0;
126 
127    if (unlikely(debug_enabled)) {
128       fprintf(stderr, "TCS Input ");
129       brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_TESS_CTRL);
130       fprintf(stderr, "TCS Output ");
131       brw_print_vue_map(stderr, &vue_prog_data->vue_map, MESA_SHADER_TESS_CTRL);
132    }
133 
134    const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
135    fs_visitor v(compiler, &params->base, &key->base,
136                 &prog_data->base.base, nir, dispatch_width,
137                 params->base.stats != NULL, debug_enabled);
138    if (!v.run_tcs()) {
139       params->base.error_str =
140          ralloc_strdup(params->base.mem_ctx, v.fail_msg);
141       return NULL;
142    }
143 
144    assert(v.payload().num_regs % reg_unit(devinfo) == 0);
145    prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
146 
147    fs_generator g(compiler, &params->base,
148                   &prog_data->base.base, MESA_SHADER_TESS_CTRL);
149    if (unlikely(debug_enabled)) {
150       g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
151                                      "%s tessellation control shader %s",
152                                      nir->info.label ? nir->info.label
153                                                      : "unnamed",
154                                      nir->info.name));
155    }
156 
157    g.generate_code(v.cfg, dispatch_width, v.shader_stats,
158                    v.performance_analysis.require(), params->base.stats);
159 
160    g.add_const_data(nir->constant_data, nir->constant_data_size);
161 
162    return g.get_assembly();
163 }
164