• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_fs.h"
7 #include "brw_builder.h"
8 #include "brw_fs_live_variables.h"
9 #include "brw_generator.h"
10 #include "brw_nir.h"
11 #include "brw_cfg.h"
12 #include "brw_private.h"
13 #include "intel_nir.h"
14 #include "shader_enums.h"
15 #include "dev/intel_debug.h"
16 #include "dev/intel_wa.h"
17 
18 #include <memory>
19 
20 using namespace brw;
21 
22 static void
fill_push_const_block_info(struct brw_push_const_block * block,unsigned dwords)23 fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
24 {
25    block->dwords = dwords;
26    block->regs = DIV_ROUND_UP(dwords, 8);
27    block->size = block->regs * 32;
28 }
29 
30 static void
cs_fill_push_const_info(const struct intel_device_info * devinfo,struct brw_cs_prog_data * cs_prog_data)31 cs_fill_push_const_info(const struct intel_device_info *devinfo,
32                         struct brw_cs_prog_data *cs_prog_data)
33 {
34    const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
35    int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
36 
37    /* The thread ID should be stored in the last param dword */
38    assert(subgroup_id_index == -1 ||
39           subgroup_id_index == (int)prog_data->nr_params - 1);
40 
41    unsigned cross_thread_dwords, per_thread_dwords;
42    if (subgroup_id_index >= 0) {
43       /* Fill all but the last register with cross-thread payload */
44       cross_thread_dwords = 8 * (subgroup_id_index / 8);
45       per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
46       assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
47    } else {
48       /* Fill all data using cross-thread payload */
49       cross_thread_dwords = prog_data->nr_params;
50       per_thread_dwords = 0u;
51    }
52 
53    fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
54    fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
55 
56    assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
57           cs_prog_data->push.per_thread.size == 0);
58    assert(cs_prog_data->push.cross_thread.dwords +
59           cs_prog_data->push.per_thread.dwords ==
60              prog_data->nr_params);
61 }
62 
63 static bool
run_cs(fs_visitor & s,bool allow_spilling)64 run_cs(fs_visitor &s, bool allow_spilling)
65 {
66    assert(gl_shader_stage_is_compute(s.stage));
67    const brw_builder bld = brw_builder(&s).at_end();
68 
69    s.payload_ = new cs_thread_payload(s);
70 
71    if (s.devinfo->platform == INTEL_PLATFORM_HSW && s.prog_data->total_shared > 0) {
72       /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
73       const brw_builder abld = bld.exec_all().group(1, 0);
74       abld.MOV(retype(brw_sr0_reg(1), BRW_TYPE_UW),
75                suboffset(retype(brw_vec1_grf(0, 0), BRW_TYPE_UW), 1));
76    }
77 
78    nir_to_brw(&s);
79 
80    if (s.failed)
81       return false;
82 
83    s.emit_cs_terminate();
84 
85    brw_calculate_cfg(s);
86 
87    brw_optimize(s);
88 
89    s.assign_curb_setup();
90 
91    brw_lower_3src_null_dest(s);
92    brw_workaround_memory_fence_before_eot(s);
93    brw_workaround_emit_dummy_mov_instruction(s);
94 
95    brw_allocate_registers(s, allow_spilling);
96 
97    brw_workaround_source_arf_before_eot(s);
98 
99    return !s.failed;
100 }
101 
102 static bool
instr_uses_sampler(nir_builder * b,nir_instr * instr,void * cb_data)103 instr_uses_sampler(nir_builder *b, nir_instr *instr, void *cb_data)
104 {
105    if (instr->type != nir_instr_type_tex)
106       return false;
107 
108    switch (nir_instr_as_tex(instr)->op) {
109    case nir_texop_tex:
110    case nir_texop_txd:
111    case nir_texop_txf:
112    case nir_texop_txl:
113    case nir_texop_txb:
114    case nir_texop_txf_ms:
115    case nir_texop_txf_ms_mcs_intel:
116    case nir_texop_lod:
117    case nir_texop_tg4:
118    case nir_texop_texture_samples:
119       return true;
120 
121    default:
122       return false;
123    }
124 }
125 
126 static bool
brw_nir_uses_sampler(nir_shader * shader)127 brw_nir_uses_sampler(nir_shader *shader)
128 {
129    return nir_shader_instructions_pass(shader, instr_uses_sampler,
130                                        nir_metadata_all,
131                                        NULL);
132 }
133 
134 const unsigned *
brw_compile_cs(const struct brw_compiler * compiler,struct brw_compile_cs_params * params)135 brw_compile_cs(const struct brw_compiler *compiler,
136                struct brw_compile_cs_params *params)
137 {
138    const struct intel_device_info *devinfo = compiler->devinfo;
139    struct nir_shader *nir = params->base.nir;
140    const struct brw_cs_prog_key *key = params->key;
141    struct brw_cs_prog_data *prog_data = params->prog_data;
142 
143    const bool debug_enabled =
144       brw_should_print_shader(nir, params->base.debug_flag ?
145                                    params->base.debug_flag : DEBUG_CS);
146 
147    prog_data->base.stage = MESA_SHADER_COMPUTE;
148    prog_data->base.total_shared = nir->info.shared_size;
149    prog_data->base.ray_queries = nir->info.ray_queries;
150    prog_data->base.total_scratch = 0;
151    prog_data->uses_inline_data = brw_nir_uses_inline_data(nir);
152    assert(compiler->devinfo->verx10 >= 125 || !prog_data->uses_inline_data);
153 
154    if (!nir->info.workgroup_size_variable) {
155       prog_data->local_size[0] = nir->info.workgroup_size[0];
156       prog_data->local_size[1] = nir->info.workgroup_size[1];
157       prog_data->local_size[2] = nir->info.workgroup_size[2];
158    }
159 
160    brw_simd_selection_state simd_state{
161       .devinfo = compiler->devinfo,
162       .prog_data = prog_data,
163       .required_width = brw_required_dispatch_width(&nir->info),
164    };
165 
166    prog_data->uses_sampler = brw_nir_uses_sampler(params->base.nir);
167 
168    std::unique_ptr<fs_visitor> v[3];
169 
170    for (unsigned i = 0; i < 3; i++) {
171       const unsigned simd = devinfo->ver >= 30 ? 2 - i : i;
172 
173       if (!brw_simd_should_compile(simd_state, simd))
174          continue;
175 
176       const unsigned dispatch_width = 8u << simd;
177 
178       nir_shader *shader = nir_shader_clone(params->base.mem_ctx, nir);
179       brw_nir_apply_key(shader, compiler, &key->base,
180                         dispatch_width);
181 
182       NIR_PASS(_, shader, brw_nir_lower_simd, dispatch_width);
183 
184       /* Clean up after the local index and ID calculations. */
185       NIR_PASS(_, shader, nir_opt_constant_folding);
186       NIR_PASS(_, shader, nir_opt_dce);
187 
188       brw_postprocess_nir(shader, compiler, debug_enabled,
189                           key->base.robust_flags);
190 
191       v[simd] = std::make_unique<fs_visitor>(compiler, &params->base,
192                                              &key->base,
193                                              &prog_data->base,
194                                              shader, dispatch_width,
195                                              params->base.stats != NULL,
196                                              debug_enabled);
197 
198       const bool allow_spilling = simd == 0 ||
199          (!simd_state.compiled[simd - 1] && !brw_simd_should_compile(simd_state, simd - 1)) ||
200          nir->info.workgroup_size_variable;
201 
202       if (devinfo->ver < 30 || nir->info.workgroup_size_variable) {
203          const int first = brw_simd_first_compiled(simd_state);
204          if (first >= 0)
205             v[simd]->import_uniforms(v[first].get());
206          assert(allow_spilling == (first < 0 || nir->info.workgroup_size_variable));
207       }
208 
209       if (run_cs(*v[simd], allow_spilling)) {
210          cs_fill_push_const_info(compiler->devinfo, prog_data);
211 
212          brw_simd_mark_compiled(simd_state, simd, v[simd]->spilled_any_registers);
213 
214          if (devinfo->ver >= 30 && !v[simd]->spilled_any_registers &&
215              !nir->info.workgroup_size_variable)
216             break;
217       } else {
218          simd_state.error[simd] = ralloc_strdup(params->base.mem_ctx, v[simd]->fail_msg);
219          if (simd > 0) {
220             brw_shader_perf_log(compiler, params->base.log_data,
221                                 "SIMD%u shader failed to compile: %s\n",
222                                 dispatch_width, v[simd]->fail_msg);
223          }
224       }
225    }
226 
227    const int selected_simd = brw_simd_select(simd_state);
228    if (selected_simd < 0) {
229       params->base.error_str =
230          ralloc_asprintf(params->base.mem_ctx,
231                          "Can't compile shader: "
232                          "SIMD8 '%s', SIMD16 '%s' and SIMD32 '%s'.\n",
233                          simd_state.error[0], simd_state.error[1],
234                          simd_state.error[2]);
235       return NULL;
236    }
237 
238    assert(selected_simd < 3);
239 
240    if (!nir->info.workgroup_size_variable)
241       prog_data->prog_mask = 1 << selected_simd;
242 
243    brw_generator g(compiler, &params->base, &prog_data->base,
244                   MESA_SHADER_COMPUTE);
245    if (unlikely(debug_enabled)) {
246       char *name = ralloc_asprintf(params->base.mem_ctx,
247                                    "%s compute shader %s",
248                                    nir->info.label ?
249                                    nir->info.label : "unnamed",
250                                    nir->info.name);
251       g.enable_debug(name);
252    }
253 
254    uint32_t max_dispatch_width = 8u << (util_last_bit(prog_data->prog_mask) - 1);
255 
256    struct brw_compile_stats *stats = params->base.stats;
257    for (unsigned simd = 0; simd < 3; simd++) {
258       if (prog_data->prog_mask & (1u << simd)) {
259          assert(v[simd]);
260          prog_data->prog_offset[simd] =
261             g.generate_code(v[simd]->cfg, 8u << simd, v[simd]->shader_stats,
262                             v[simd]->performance_analysis.require(), stats);
263          if (stats)
264             stats->max_dispatch_width = max_dispatch_width;
265          stats = stats ? stats + 1 : NULL;
266 
267          prog_data->base.grf_used = MAX2(prog_data->base.grf_used,
268                                          v[simd]->grf_used);
269 
270          max_dispatch_width = 8u << simd;
271       }
272    }
273 
274    g.add_const_data(nir->constant_data, nir->constant_data_size);
275 
276    return g.get_assembly();
277 }
278