1 /*
2 * Copyright © 2013 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_eu.h"
7 #include "intel_nir.h"
8 #include "brw_nir.h"
9 #include "brw_fs.h"
10 #include "brw_private.h"
11 #include "dev/intel_debug.h"
12
13 /**
14 * Return the number of patches to accumulate before a MULTI_PATCH mode thread is
15 * launched. In cases with a large number of input control points and a large
16 * amount of VS outputs, the VS URB space needed to store an entire 8 patches
17 * worth of data can be prohibitive, so it can be beneficial to launch threads
18 * early.
19 *
20 * See the 3DSTATE_HS::Patch Count Threshold documentation for the recommended
21 * values. Note that 0 means to "disable" early dispatch, meaning to wait for
22 * a full 8 patches as normal.
23 */
24 static int
get_patch_count_threshold(int input_control_points)25 get_patch_count_threshold(int input_control_points)
26 {
27 if (input_control_points <= 4)
28 return 0;
29 else if (input_control_points <= 6)
30 return 5;
31 else if (input_control_points <= 8)
32 return 4;
33 else if (input_control_points <= 10)
34 return 3;
35 else if (input_control_points <= 14)
36 return 2;
37
38 /* Return patch count 1 for PATCHLIST_15 - PATCHLIST_32 */
39 return 1;
40 }
41
42 extern "C" const unsigned *
brw_compile_tcs(const struct brw_compiler * compiler,struct brw_compile_tcs_params * params)43 brw_compile_tcs(const struct brw_compiler *compiler,
44 struct brw_compile_tcs_params *params)
45 {
46 const struct intel_device_info *devinfo = compiler->devinfo;
47 nir_shader *nir = params->base.nir;
48 const struct brw_tcs_prog_key *key = params->key;
49 struct brw_tcs_prog_data *prog_data = params->prog_data;
50 struct brw_vue_prog_data *vue_prog_data = &prog_data->base;
51
52 const bool debug_enabled = brw_should_print_shader(nir, DEBUG_TCS);
53
54 vue_prog_data->base.stage = MESA_SHADER_TESS_CTRL;
55 prog_data->base.base.ray_queries = nir->info.ray_queries;
56 prog_data->base.base.total_scratch = 0;
57
58 nir->info.outputs_written = key->outputs_written;
59 nir->info.patch_outputs_written = key->patch_outputs_written;
60
61 struct intel_vue_map input_vue_map;
62 brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
63 nir->info.separate_shader, 1);
64 brw_compute_tess_vue_map(&vue_prog_data->vue_map,
65 nir->info.outputs_written,
66 nir->info.patch_outputs_written);
67
68 brw_nir_apply_key(nir, compiler, &key->base, 8);
69 brw_nir_lower_vue_inputs(nir, &input_vue_map);
70 brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
71 key->_tes_primitive_mode);
72 if (key->input_vertices > 0)
73 intel_nir_lower_patch_vertices_in(nir, key->input_vertices);
74
75 brw_postprocess_nir(nir, compiler, debug_enabled,
76 key->base.robust_flags);
77
78 bool has_primitive_id =
79 BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
80
81 prog_data->patch_count_threshold = get_patch_count_threshold(key->input_vertices);
82
83 if (compiler->use_tcs_multi_patch) {
84 vue_prog_data->dispatch_mode = INTEL_DISPATCH_MODE_TCS_MULTI_PATCH;
85 prog_data->instances = nir->info.tess.tcs_vertices_out;
86 prog_data->include_primitive_id = has_primitive_id;
87 } else {
88 unsigned verts_per_thread = 8;
89 vue_prog_data->dispatch_mode = INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH;
90 prog_data->instances =
91 DIV_ROUND_UP(nir->info.tess.tcs_vertices_out, verts_per_thread);
92 }
93
94 /* Compute URB entry size. The maximum allowed URB entry size is 32k.
95 * That divides up as follows:
96 *
97 * 32 bytes for the patch header (tessellation factors)
98 * 480 bytes for per-patch varyings (a varying component is 4 bytes and
99 * gl_MaxTessPatchComponents = 120)
100 * 16384 bytes for per-vertex varyings (a varying component is 4 bytes,
101 * gl_MaxPatchVertices = 32 and
102 * gl_MaxTessControlOutputComponents = 128)
103 *
104 * 15808 bytes left for varying packing overhead
105 */
106 const int num_per_patch_slots = vue_prog_data->vue_map.num_per_patch_slots;
107 const int num_per_vertex_slots = vue_prog_data->vue_map.num_per_vertex_slots;
108 unsigned output_size_bytes = 0;
109 /* Note that the patch header is counted in num_per_patch_slots. */
110 output_size_bytes += num_per_patch_slots * 16;
111 output_size_bytes += nir->info.tess.tcs_vertices_out *
112 num_per_vertex_slots * 16;
113
114 assert(output_size_bytes >= 1);
115 if (output_size_bytes > GFX7_MAX_HS_URB_ENTRY_SIZE_BYTES)
116 return NULL;
117
118 /* URB entry sizes are stored as a multiple of 64 bytes. */
119 vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
120
121 /* HS does not use the usual payload pushing from URB to GRFs,
122 * because we don't have enough registers for a full-size payload, and
123 * the hardware is broken on Haswell anyway.
124 */
125 vue_prog_data->urb_read_length = 0;
126
127 if (unlikely(debug_enabled)) {
128 fprintf(stderr, "TCS Input ");
129 brw_print_vue_map(stderr, &input_vue_map, MESA_SHADER_TESS_CTRL);
130 fprintf(stderr, "TCS Output ");
131 brw_print_vue_map(stderr, &vue_prog_data->vue_map, MESA_SHADER_TESS_CTRL);
132 }
133
134 const unsigned dispatch_width = devinfo->ver >= 20 ? 16 : 8;
135 fs_visitor v(compiler, ¶ms->base, &key->base,
136 &prog_data->base.base, nir, dispatch_width,
137 params->base.stats != NULL, debug_enabled);
138 if (!v.run_tcs()) {
139 params->base.error_str =
140 ralloc_strdup(params->base.mem_ctx, v.fail_msg);
141 return NULL;
142 }
143
144 assert(v.payload().num_regs % reg_unit(devinfo) == 0);
145 prog_data->base.base.dispatch_grf_start_reg = v.payload().num_regs / reg_unit(devinfo);
146
147 fs_generator g(compiler, ¶ms->base,
148 &prog_data->base.base, MESA_SHADER_TESS_CTRL);
149 if (unlikely(debug_enabled)) {
150 g.enable_debug(ralloc_asprintf(params->base.mem_ctx,
151 "%s tessellation control shader %s",
152 nir->info.label ? nir->info.label
153 : "unnamed",
154 nir->info.name));
155 }
156
157 g.generate_code(v.cfg, dispatch_width, v.shader_stats,
158 v.performance_analysis.require(), params->base.stats);
159
160 g.add_const_data(nir->constant_data, nir->constant_data_size);
161
162 return g.get_assembly();
163 }
164