1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 #pragma once
29
30 #include "brw_cfg.h"
31 #include "brw_compiler.h"
32 #include "brw_ir_allocator.h"
33 #include "brw_ir_fs.h"
34 #include "brw_fs_live_variables.h"
35 #include "brw_ir_performance.h"
36 #include "compiler/nir/nir.h"
37
38 struct bblock_t;
39 namespace {
40 struct acp_entry;
41 }
42
43 struct fs_visitor;
44
45 namespace brw {
46 /**
47 * Register pressure analysis of a shader. Estimates how many registers
48 * are live at any point of the program in GRF units.
49 */
50 struct register_pressure {
51 register_pressure(const fs_visitor *v);
52 ~register_pressure();
53
54 analysis_dependency_class
dependency_classregister_pressure55 dependency_class() const
56 {
57 return (DEPENDENCY_INSTRUCTION_IDENTITY |
58 DEPENDENCY_INSTRUCTION_DATA_FLOW |
59 DEPENDENCY_VARIABLES);
60 }
61
62 bool
validateregister_pressure63 validate(const fs_visitor *) const
64 {
65 /* FINISHME */
66 return true;
67 }
68
69 unsigned *regs_live_at_ip;
70 };
71
72 class def_analysis {
73 public:
74 def_analysis(const fs_visitor *v);
75 ~def_analysis();
76
77 fs_inst *
get(const brw_reg & reg)78 get(const brw_reg ®) const
79 {
80 return reg.file == VGRF && reg.nr < def_count ?
81 def_insts[reg.nr] : NULL;
82 }
83
84 bblock_t *
get_block(const brw_reg & reg)85 get_block(const brw_reg ®) const
86 {
87 return reg.file == VGRF && reg.nr < def_count ?
88 def_blocks[reg.nr] : NULL;
89 }
90
91 uint32_t
get_use_count(const brw_reg & reg)92 get_use_count(const brw_reg ®) const
93 {
94 return reg.file == VGRF && reg.nr < def_count ?
95 def_use_counts[reg.nr] : 0;
96 }
97
count()98 unsigned count() const { return def_count; }
99 unsigned ssa_count() const;
100
101 void print_stats(const fs_visitor *) const;
102
103 analysis_dependency_class
dependency_class()104 dependency_class() const
105 {
106 return DEPENDENCY_INSTRUCTION_IDENTITY |
107 DEPENDENCY_INSTRUCTION_DATA_FLOW |
108 DEPENDENCY_VARIABLES |
109 DEPENDENCY_BLOCKS;
110 }
111
112 bool validate(const fs_visitor *) const;
113
114 private:
115 void mark_invalid(int);
116 bool fully_defines(const fs_visitor *v, fs_inst *);
117 void update_for_reads(const idom_tree &idom, bblock_t *block, fs_inst *);
118 void update_for_write(const fs_visitor *v, bblock_t *block, fs_inst *);
119
120 fs_inst **def_insts;
121 bblock_t **def_blocks;
122 uint32_t *def_use_counts;
123 unsigned def_count;
124 };
125 }
126
127 #define UBO_START ((1 << 16) - 4)
128
129 /**
130 * Scratch data used when compiling a GLSL geometry shader.
131 */
132 struct brw_gs_compile
133 {
134 struct brw_gs_prog_key key;
135 struct intel_vue_map input_vue_map;
136
137 unsigned control_data_bits_per_vertex;
138 unsigned control_data_header_size_bits;
139 };
140
141 namespace brw {
142 class fs_builder;
143 }
144
145 struct brw_shader_stats {
146 const char *scheduler_mode;
147 unsigned promoted_constants;
148 unsigned spill_count;
149 unsigned fill_count;
150 unsigned max_register_pressure;
151 unsigned non_ssa_registers_after_nir;
152 };
153
154 /** Register numbers for thread payload fields. */
155 struct thread_payload {
156 /** The number of thread payload registers the hardware will supply. */
157 uint8_t num_regs;
158
159 virtual ~thread_payload() = default;
160
161 protected:
thread_payloadthread_payload162 thread_payload() : num_regs() {}
163 };
164
165 struct vs_thread_payload : public thread_payload {
166 vs_thread_payload(const fs_visitor &v);
167
168 brw_reg urb_handles;
169 };
170
171 struct tcs_thread_payload : public thread_payload {
172 tcs_thread_payload(const fs_visitor &v);
173
174 brw_reg patch_urb_output;
175 brw_reg primitive_id;
176 brw_reg icp_handle_start;
177 };
178
179 struct tes_thread_payload : public thread_payload {
180 tes_thread_payload(const fs_visitor &v);
181
182 brw_reg patch_urb_input;
183 brw_reg primitive_id;
184 brw_reg coords[3];
185 brw_reg urb_output;
186 };
187
188 struct gs_thread_payload : public thread_payload {
189 gs_thread_payload(fs_visitor &v);
190
191 brw_reg urb_handles;
192 brw_reg primitive_id;
193 brw_reg instance_id;
194 brw_reg icp_handle_start;
195 };
196
197 struct fs_thread_payload : public thread_payload {
198 fs_thread_payload(const fs_visitor &v,
199 bool &source_depth_to_render_target);
200
201 uint8_t subspan_coord_reg[2];
202 uint8_t source_depth_reg[2];
203 uint8_t source_w_reg[2];
204 uint8_t aa_dest_stencil_reg[2];
205 uint8_t dest_depth_reg[2];
206 uint8_t sample_pos_reg[2];
207 uint8_t sample_mask_in_reg[2];
208 uint8_t barycentric_coord_reg[INTEL_BARYCENTRIC_MODE_COUNT][2];
209
210 uint8_t depth_w_coef_reg;
211 uint8_t pc_bary_coef_reg;
212 uint8_t npc_bary_coef_reg;
213 uint8_t sample_offsets_reg;
214 };
215
216 struct cs_thread_payload : public thread_payload {
217 cs_thread_payload(const fs_visitor &v);
218
219 void load_subgroup_id(const brw::fs_builder &bld, brw_reg &dest) const;
220
221 brw_reg local_invocation_id[3];
222
223 brw_reg inline_parameter;
224
225 protected:
226 brw_reg subgroup_id_;
227 };
228
229 struct task_mesh_thread_payload : public cs_thread_payload {
230 task_mesh_thread_payload(fs_visitor &v);
231
232 brw_reg extended_parameter_0;
233 brw_reg local_index;
234
235 brw_reg urb_output;
236
237 /* URB to read Task memory inputs. Only valid for MESH stage. */
238 brw_reg task_urb_input;
239 };
240
241 struct bs_thread_payload : public thread_payload {
242 bs_thread_payload(const fs_visitor &v);
243
244 brw_reg global_arg_ptr;
245 brw_reg local_arg_ptr;
246
247 void load_shader_type(const brw::fs_builder &bld, brw_reg &dest) const;
248 };
249
250 enum instruction_scheduler_mode {
251 SCHEDULE_PRE,
252 SCHEDULE_PRE_NON_LIFO,
253 SCHEDULE_PRE_LIFO,
254 SCHEDULE_POST,
255 SCHEDULE_NONE,
256 };
257
258 class instruction_scheduler;
259
260 enum brw_shader_phase {
261 BRW_SHADER_PHASE_INITIAL = 0,
262 BRW_SHADER_PHASE_AFTER_NIR,
263 BRW_SHADER_PHASE_AFTER_OPT_LOOP,
264 BRW_SHADER_PHASE_AFTER_EARLY_LOWERING,
265 BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING,
266 BRW_SHADER_PHASE_AFTER_LATE_LOWERING,
267 BRW_SHADER_PHASE_AFTER_REGALLOC,
268
269 /* Larger value than any other phase. */
270 BRW_SHADER_PHASE_INVALID,
271 };
272
273 /**
274 * The fragment shader front-end.
275 *
276 * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
277 */
278 struct fs_visitor
279 {
280 public:
281 fs_visitor(const struct brw_compiler *compiler,
282 const struct brw_compile_params *params,
283 const brw_base_prog_key *key,
284 struct brw_stage_prog_data *prog_data,
285 const nir_shader *shader,
286 unsigned dispatch_width,
287 bool needs_register_pressure,
288 bool debug_enabled);
289 fs_visitor(const struct brw_compiler *compiler,
290 const struct brw_compile_params *params,
291 const brw_wm_prog_key *key,
292 struct brw_wm_prog_data *prog_data,
293 const nir_shader *shader,
294 unsigned dispatch_width,
295 unsigned num_polygons,
296 bool needs_register_pressure,
297 bool debug_enabled);
298 fs_visitor(const struct brw_compiler *compiler,
299 const struct brw_compile_params *params,
300 struct brw_gs_compile *gs_compile,
301 struct brw_gs_prog_data *prog_data,
302 const nir_shader *shader,
303 bool needs_register_pressure,
304 bool debug_enabled);
305 void init();
306 ~fs_visitor();
307
308 void import_uniforms(fs_visitor *v);
309
310 void assign_curb_setup();
311 void convert_attr_sources_to_hw_regs(fs_inst *inst);
312 void calculate_payload_ranges(bool allow_spilling,
313 unsigned payload_node_count,
314 int *payload_last_use_ip) const;
315 void invalidate_analysis(brw::analysis_dependency_class c);
316
317 void vfail(const char *msg, va_list args);
318 void fail(const char *msg, ...);
319 void limit_dispatch_width(unsigned n, const char *msg);
320
321 void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg());
322 void emit_gs_control_data_bits(const brw_reg &vertex_count);
323 brw_reg gs_urb_channel_mask(const brw_reg &dword_index);
324 brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count);
325 bool mark_last_urb_write_with_eot();
326 void emit_cs_terminate();
327
328 const struct brw_compiler *compiler;
329 void *log_data; /* Passed to compiler->*_log functions */
330
331 const struct intel_device_info * const devinfo;
332 const nir_shader *nir;
333
334 /** ralloc context for temporary data used during compile */
335 void *mem_ctx;
336
337 /** List of fs_inst. */
338 exec_list instructions;
339
340 cfg_t *cfg;
341
342 gl_shader_stage stage;
343 bool debug_enabled;
344
345 brw::simple_allocator alloc;
346
347 const brw_base_prog_key *const key;
348
349 struct brw_gs_compile *gs_compile;
350
351 struct brw_stage_prog_data *prog_data;
352
353 brw_analysis<brw::fs_live_variables, fs_visitor> live_analysis;
354 brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
355 brw_analysis<brw::performance, fs_visitor> performance_analysis;
356 brw_analysis<brw::idom_tree, fs_visitor> idom_analysis;
357 brw_analysis<brw::def_analysis, fs_visitor> def_analysis;
358
359 /** Number of uniform variable components visited. */
360 unsigned uniforms;
361
362 /** Byte-offset for the next available spot in the scratch space buffer. */
363 unsigned last_scratch;
364
365 brw_reg frag_depth;
366 brw_reg frag_stencil;
367 brw_reg sample_mask;
368 brw_reg outputs[VARYING_SLOT_MAX];
369 brw_reg dual_src_output;
370 int first_non_payload_grf;
371
372 enum brw_shader_phase phase;
373
374 bool failed;
375 char *fail_msg;
376
377 thread_payload *payload_;
378
payloadfs_visitor379 thread_payload &payload() {
380 return *this->payload_;
381 }
382
vs_payloadfs_visitor383 vs_thread_payload &vs_payload() {
384 assert(stage == MESA_SHADER_VERTEX);
385 return *static_cast<vs_thread_payload *>(this->payload_);
386 }
387
tcs_payloadfs_visitor388 tcs_thread_payload &tcs_payload() {
389 assert(stage == MESA_SHADER_TESS_CTRL);
390 return *static_cast<tcs_thread_payload *>(this->payload_);
391 }
392
tes_payloadfs_visitor393 tes_thread_payload &tes_payload() {
394 assert(stage == MESA_SHADER_TESS_EVAL);
395 return *static_cast<tes_thread_payload *>(this->payload_);
396 }
397
gs_payloadfs_visitor398 gs_thread_payload &gs_payload() {
399 assert(stage == MESA_SHADER_GEOMETRY);
400 return *static_cast<gs_thread_payload *>(this->payload_);
401 }
402
fs_payloadfs_visitor403 fs_thread_payload &fs_payload() {
404 assert(stage == MESA_SHADER_FRAGMENT);
405 return *static_cast<fs_thread_payload *>(this->payload_);
406 };
407
fs_payloadfs_visitor408 const fs_thread_payload &fs_payload() const {
409 assert(stage == MESA_SHADER_FRAGMENT);
410 return *static_cast<const fs_thread_payload *>(this->payload_);
411 };
412
cs_payloadfs_visitor413 cs_thread_payload &cs_payload() {
414 assert(gl_shader_stage_uses_workgroup(stage));
415 return *static_cast<cs_thread_payload *>(this->payload_);
416 }
417
task_mesh_payloadfs_visitor418 task_mesh_thread_payload &task_mesh_payload() {
419 assert(stage == MESA_SHADER_TASK || stage == MESA_SHADER_MESH);
420 return *static_cast<task_mesh_thread_payload *>(this->payload_);
421 }
422
bs_payloadfs_visitor423 bs_thread_payload &bs_payload() {
424 assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
425 return *static_cast<bs_thread_payload *>(this->payload_);
426 }
427
428 bool source_depth_to_render_target;
429
430 brw_reg pixel_x;
431 brw_reg pixel_y;
432 brw_reg pixel_z;
433 brw_reg wpos_w;
434 brw_reg pixel_w;
435 brw_reg delta_xy[INTEL_BARYCENTRIC_MODE_COUNT];
436 brw_reg final_gs_vertex_count;
437 brw_reg control_data_bits;
438 brw_reg invocation_id;
439
440 unsigned grf_used;
441 bool spilled_any_registers;
442 bool needs_register_pressure;
443
444 const unsigned dispatch_width; /**< 8, 16 or 32 */
445 const unsigned max_polygons;
446 unsigned max_dispatch_width;
447
448 /* The API selected subgroup size */
449 unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
450
451 unsigned next_address_register_nr;
452
453 struct brw_shader_stats shader_stats;
454
455 void debug_optimizer(const nir_shader *nir,
456 const char *pass_name,
457 int iteration, int pass_num) const;
458 };
459
460 void brw_print_instructions(const fs_visitor &s, FILE *file = stderr);
461
462 void brw_print_instruction(const fs_visitor &s, const fs_inst *inst,
463 FILE *file = stderr,
464 const brw::def_analysis *defs = nullptr);
465
466 void brw_print_swsb(FILE *f, const struct intel_device_info *devinfo, const tgl_swsb swsb);
467
468 /**
469 * Return the flag register used in fragment shaders to keep track of live
470 * samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
471 * dispatch mode.
472 */
473 static inline unsigned
sample_mask_flag_subreg(const fs_visitor & s)474 sample_mask_flag_subreg(const fs_visitor &s)
475 {
476 assert(s.stage == MESA_SHADER_FRAGMENT);
477 return 2;
478 }
479
480 namespace brw {
481 brw_reg
482 fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
483 brw_reg_type type = BRW_TYPE_F,
484 unsigned n = 1);
485
486 brw_reg
487 fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]);
488
489 inline brw_reg
dynamic_msaa_flags(const struct brw_wm_prog_data * wm_prog_data)490 dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
491 {
492 return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD);
493 }
494
495 void
496 check_dynamic_msaa_flag(const fs_builder &bld,
497 const struct brw_wm_prog_data *wm_prog_data,
498 enum intel_msaa_flags flag);
499
500 bool
501 lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
502 }
503
504 void shuffle_from_32bit_read(const brw::fs_builder &bld,
505 const brw_reg &dst,
506 const brw_reg &src,
507 uint32_t first_component,
508 uint32_t components);
509
510 enum intel_barycentric_mode brw_barycentric_mode(const struct brw_wm_prog_key *key,
511 nir_intrinsic_instr *intr);
512
513 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
514 const struct brw_wm_prog_data *prog_data);
515
516 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
517
518 brw_reg brw_sample_mask_reg(const brw::fs_builder &bld);
519 void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst);
520
521 int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
522 const brw_stage_prog_data *prog_data);
523
524 void nir_to_brw(fs_visitor *s);
525
526 void brw_shader_phase_update(fs_visitor &s, enum brw_shader_phase phase);
527
528 #ifndef NDEBUG
529 void brw_validate(const fs_visitor &s);
530 #else
brw_validate(const fs_visitor & s)531 static inline void brw_validate(const fs_visitor &s) {}
532 #endif
533
534 void brw_calculate_cfg(fs_visitor &s);
535
536 void brw_optimize(fs_visitor &s);
537
538 instruction_scheduler *brw_prepare_scheduler(fs_visitor &s, void *mem_ctx);
539 void brw_schedule_instructions_pre_ra(fs_visitor &s, instruction_scheduler *sched,
540 instruction_scheduler_mode mode);
541 void brw_schedule_instructions_post_ra(fs_visitor &s);
542
543 void brw_allocate_registers(fs_visitor &s, bool allow_spilling);
544 bool brw_assign_regs(fs_visitor &s, bool allow_spilling, bool spill_all);
545 void brw_assign_regs_trivial(fs_visitor &s);
546
547 bool brw_lower_3src_null_dest(fs_visitor &s);
548 bool brw_lower_alu_restrictions(fs_visitor &s);
549 bool brw_lower_barycentrics(fs_visitor &s);
550 bool brw_lower_constant_loads(fs_visitor &s);
551 bool brw_lower_csel(fs_visitor &s);
552 bool brw_lower_derivatives(fs_visitor &s);
553 bool brw_lower_dpas(fs_visitor &s);
554 bool brw_lower_find_live_channel(fs_visitor &s);
555 bool brw_lower_indirect_mov(fs_visitor &s);
556 bool brw_lower_integer_multiplication(fs_visitor &s);
557 bool brw_lower_load_payload(fs_visitor &s);
558 bool brw_lower_load_subgroup_invocation(fs_visitor &s);
559 bool brw_lower_logical_sends(fs_visitor &s);
560 bool brw_lower_pack(fs_visitor &s);
561 bool brw_lower_regioning(fs_visitor &s);
562 bool brw_lower_scalar_fp64_MAD(fs_visitor &s);
563 bool brw_lower_scoreboard(fs_visitor &s);
564 bool brw_lower_send_descriptors(fs_visitor &s);
565 bool brw_lower_sends_overlapping_payload(fs_visitor &s);
566 bool brw_lower_simd_width(fs_visitor &s);
567 bool brw_lower_sub_sat(fs_visitor &s);
568 bool brw_lower_subgroup_ops(fs_visitor &s);
569 bool brw_lower_uniform_pull_constant_loads(fs_visitor &s);
570 void brw_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
571
572 bool brw_opt_address_reg_load(fs_visitor &s);
573 bool brw_opt_algebraic(fs_visitor &s);
574 bool brw_opt_bank_conflicts(fs_visitor &s);
575 bool brw_opt_cmod_propagation(fs_visitor &s);
576 bool brw_opt_combine_constants(fs_visitor &s);
577 bool brw_opt_combine_convergent_txf(fs_visitor &s);
578 bool brw_opt_compact_virtual_grfs(fs_visitor &s);
579 bool brw_opt_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
580 bool brw_opt_copy_propagation(fs_visitor &s);
581 bool brw_opt_copy_propagation_defs(fs_visitor &s);
582 bool brw_opt_cse_defs(fs_visitor &s);
583 bool brw_opt_dead_code_eliminate(fs_visitor &s);
584 bool brw_opt_eliminate_find_live_channel(fs_visitor &s);
585 bool brw_opt_register_coalesce(fs_visitor &s);
586 bool brw_opt_remove_extra_rounding_modes(fs_visitor &s);
587 bool brw_opt_remove_redundant_halts(fs_visitor &s);
588 bool brw_opt_saturate_propagation(fs_visitor &s);
589 bool brw_opt_split_sends(fs_visitor &s);
590 bool brw_opt_split_virtual_grfs(fs_visitor &s);
591 bool brw_opt_zero_samples(fs_visitor &s);
592
593 bool brw_workaround_emit_dummy_mov_instruction(fs_visitor &s);
594 bool brw_workaround_memory_fence_before_eot(fs_visitor &s);
595 bool brw_workaround_nomask_control_flow(fs_visitor &s);
596 bool brw_workaround_source_arf_before_eot(fs_visitor &s);
597
598 /* Helpers. */
599 unsigned brw_get_lowered_simd_width(const fs_visitor *shader,
600 const fs_inst *inst);
601