• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #pragma once
29 
30 #include "brw_cfg.h"
31 #include "brw_compiler.h"
32 #include "brw_ir_allocator.h"
33 #include "brw_ir_fs.h"
34 #include "brw_fs_live_variables.h"
35 #include "brw_ir_performance.h"
36 #include "compiler/nir/nir.h"
37 
38 struct bblock_t;
39 namespace {
40    struct acp_entry;
41 }
42 
43 struct fs_visitor;
44 
45 namespace brw {
46    /**
47     * Register pressure analysis of a shader.  Estimates how many registers
48     * are live at any point of the program in GRF units.
49     */
50    struct register_pressure {
51       register_pressure(const fs_visitor *v);
52       ~register_pressure();
53 
54       analysis_dependency_class
dependency_classregister_pressure55       dependency_class() const
56       {
57          return (DEPENDENCY_INSTRUCTION_IDENTITY |
58                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
59                  DEPENDENCY_VARIABLES);
60       }
61 
62       bool
validateregister_pressure63       validate(const fs_visitor *) const
64       {
65          /* FINISHME */
66          return true;
67       }
68 
69       unsigned *regs_live_at_ip;
70    };
71 
72    class def_analysis {
73    public:
74       def_analysis(const fs_visitor *v);
75       ~def_analysis();
76 
77       fs_inst *
get(const brw_reg & reg)78       get(const brw_reg &reg) const
79       {
80          return reg.file == VGRF && reg.nr < def_count ?
81                 def_insts[reg.nr] : NULL;
82       }
83 
84       bblock_t *
get_block(const brw_reg & reg)85       get_block(const brw_reg &reg) const
86       {
87          return reg.file == VGRF && reg.nr < def_count ?
88                 def_blocks[reg.nr] : NULL;
89       }
90 
91       uint32_t
get_use_count(const brw_reg & reg)92       get_use_count(const brw_reg &reg) const
93       {
94          return reg.file == VGRF && reg.nr < def_count ?
95                 def_use_counts[reg.nr] : 0;
96       }
97 
count()98       unsigned count() const { return def_count; }
99       unsigned ssa_count() const;
100 
101       void print_stats(const fs_visitor *) const;
102 
103       analysis_dependency_class
dependency_class()104       dependency_class() const
105       {
106          return DEPENDENCY_INSTRUCTION_IDENTITY |
107                 DEPENDENCY_INSTRUCTION_DATA_FLOW |
108                 DEPENDENCY_VARIABLES |
109                 DEPENDENCY_BLOCKS;
110       }
111 
112       bool validate(const fs_visitor *) const;
113 
114    private:
115       void mark_invalid(int);
116       bool fully_defines(const fs_visitor *v, fs_inst *);
117       void update_for_reads(const idom_tree &idom, bblock_t *block, fs_inst *);
118       void update_for_write(const fs_visitor *v, bblock_t *block, fs_inst *);
119 
120       fs_inst **def_insts;
121       bblock_t **def_blocks;
122       uint32_t *def_use_counts;
123       unsigned def_count;
124    };
125 }
126 
127 #define UBO_START ((1 << 16) - 4)
128 
129 /**
130  * Scratch data used when compiling a GLSL geometry shader.
131  */
132 struct brw_gs_compile
133 {
134    struct brw_gs_prog_key key;
135    struct intel_vue_map input_vue_map;
136 
137    unsigned control_data_bits_per_vertex;
138    unsigned control_data_header_size_bits;
139 };
140 
141 namespace brw {
142 class fs_builder;
143 }
144 
145 struct brw_shader_stats {
146    const char *scheduler_mode;
147    unsigned promoted_constants;
148    unsigned spill_count;
149    unsigned fill_count;
150    unsigned max_register_pressure;
151    unsigned non_ssa_registers_after_nir;
152 };
153 
154 /** Register numbers for thread payload fields. */
155 struct thread_payload {
156    /** The number of thread payload registers the hardware will supply. */
157    uint8_t num_regs;
158 
159    virtual ~thread_payload() = default;
160 
161 protected:
thread_payloadthread_payload162    thread_payload() : num_regs() {}
163 };
164 
165 struct vs_thread_payload : public thread_payload {
166    vs_thread_payload(const fs_visitor &v);
167 
168    brw_reg urb_handles;
169 };
170 
171 struct tcs_thread_payload : public thread_payload {
172    tcs_thread_payload(const fs_visitor &v);
173 
174    brw_reg patch_urb_output;
175    brw_reg primitive_id;
176    brw_reg icp_handle_start;
177 };
178 
179 struct tes_thread_payload : public thread_payload {
180    tes_thread_payload(const fs_visitor &v);
181 
182    brw_reg patch_urb_input;
183    brw_reg primitive_id;
184    brw_reg coords[3];
185    brw_reg urb_output;
186 };
187 
188 struct gs_thread_payload : public thread_payload {
189    gs_thread_payload(fs_visitor &v);
190 
191    brw_reg urb_handles;
192    brw_reg primitive_id;
193    brw_reg instance_id;
194    brw_reg icp_handle_start;
195 };
196 
197 struct fs_thread_payload : public thread_payload {
198    fs_thread_payload(const fs_visitor &v,
199                      bool &source_depth_to_render_target);
200 
201    uint8_t subspan_coord_reg[2];
202    uint8_t source_depth_reg[2];
203    uint8_t source_w_reg[2];
204    uint8_t aa_dest_stencil_reg[2];
205    uint8_t dest_depth_reg[2];
206    uint8_t sample_pos_reg[2];
207    uint8_t sample_mask_in_reg[2];
208    uint8_t barycentric_coord_reg[INTEL_BARYCENTRIC_MODE_COUNT][2];
209 
210    uint8_t depth_w_coef_reg;
211    uint8_t pc_bary_coef_reg;
212    uint8_t npc_bary_coef_reg;
213    uint8_t sample_offsets_reg;
214 };
215 
216 struct cs_thread_payload : public thread_payload {
217    cs_thread_payload(const fs_visitor &v);
218 
219    void load_subgroup_id(const brw::fs_builder &bld, brw_reg &dest) const;
220 
221    brw_reg local_invocation_id[3];
222 
223    brw_reg inline_parameter;
224 
225 protected:
226    brw_reg subgroup_id_;
227 };
228 
229 struct task_mesh_thread_payload : public cs_thread_payload {
230    task_mesh_thread_payload(fs_visitor &v);
231 
232    brw_reg extended_parameter_0;
233    brw_reg local_index;
234 
235    brw_reg urb_output;
236 
237    /* URB to read Task memory inputs. Only valid for MESH stage. */
238    brw_reg task_urb_input;
239 };
240 
241 struct bs_thread_payload : public thread_payload {
242    bs_thread_payload(const fs_visitor &v);
243 
244    brw_reg global_arg_ptr;
245    brw_reg local_arg_ptr;
246 
247    void load_shader_type(const brw::fs_builder &bld, brw_reg &dest) const;
248 };
249 
250 enum instruction_scheduler_mode {
251    SCHEDULE_PRE,
252    SCHEDULE_PRE_NON_LIFO,
253    SCHEDULE_PRE_LIFO,
254    SCHEDULE_POST,
255    SCHEDULE_NONE,
256 };
257 
258 class instruction_scheduler;
259 
260 enum brw_shader_phase {
261    BRW_SHADER_PHASE_INITIAL = 0,
262    BRW_SHADER_PHASE_AFTER_NIR,
263    BRW_SHADER_PHASE_AFTER_OPT_LOOP,
264    BRW_SHADER_PHASE_AFTER_EARLY_LOWERING,
265    BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING,
266    BRW_SHADER_PHASE_AFTER_LATE_LOWERING,
267    BRW_SHADER_PHASE_AFTER_REGALLOC,
268 
269    /* Larger value than any other phase. */
270    BRW_SHADER_PHASE_INVALID,
271 };
272 
273 /**
274  * The fragment shader front-end.
275  *
276  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
277  */
278 struct fs_visitor
279 {
280 public:
281    fs_visitor(const struct brw_compiler *compiler,
282               const struct brw_compile_params *params,
283               const brw_base_prog_key *key,
284               struct brw_stage_prog_data *prog_data,
285               const nir_shader *shader,
286               unsigned dispatch_width,
287               bool needs_register_pressure,
288               bool debug_enabled);
289    fs_visitor(const struct brw_compiler *compiler,
290               const struct brw_compile_params *params,
291               const brw_wm_prog_key *key,
292               struct brw_wm_prog_data *prog_data,
293               const nir_shader *shader,
294               unsigned dispatch_width,
295               unsigned num_polygons,
296               bool needs_register_pressure,
297               bool debug_enabled);
298    fs_visitor(const struct brw_compiler *compiler,
299               const struct brw_compile_params *params,
300               struct brw_gs_compile *gs_compile,
301               struct brw_gs_prog_data *prog_data,
302               const nir_shader *shader,
303               bool needs_register_pressure,
304               bool debug_enabled);
305    void init();
306    ~fs_visitor();
307 
308    void import_uniforms(fs_visitor *v);
309 
310    void assign_curb_setup();
311    void convert_attr_sources_to_hw_regs(fs_inst *inst);
312    void calculate_payload_ranges(bool allow_spilling,
313                                  unsigned payload_node_count,
314                                  int *payload_last_use_ip) const;
315    void invalidate_analysis(brw::analysis_dependency_class c);
316 
317    void vfail(const char *msg, va_list args);
318    void fail(const char *msg, ...);
319    void limit_dispatch_width(unsigned n, const char *msg);
320 
321    void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg());
322    void emit_gs_control_data_bits(const brw_reg &vertex_count);
323    brw_reg gs_urb_channel_mask(const brw_reg &dword_index);
324    brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count);
325    bool mark_last_urb_write_with_eot();
326    void emit_cs_terminate();
327 
328    const struct brw_compiler *compiler;
329    void *log_data; /* Passed to compiler->*_log functions */
330 
331    const struct intel_device_info * const devinfo;
332    const nir_shader *nir;
333 
334    /** ralloc context for temporary data used during compile */
335    void *mem_ctx;
336 
337    /** List of fs_inst. */
338    exec_list instructions;
339 
340    cfg_t *cfg;
341 
342    gl_shader_stage stage;
343    bool debug_enabled;
344 
345    brw::simple_allocator alloc;
346 
347    const brw_base_prog_key *const key;
348 
349    struct brw_gs_compile *gs_compile;
350 
351    struct brw_stage_prog_data *prog_data;
352 
353    brw_analysis<brw::fs_live_variables, fs_visitor> live_analysis;
354    brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
355    brw_analysis<brw::performance, fs_visitor> performance_analysis;
356    brw_analysis<brw::idom_tree, fs_visitor> idom_analysis;
357    brw_analysis<brw::def_analysis, fs_visitor> def_analysis;
358 
359    /** Number of uniform variable components visited. */
360    unsigned uniforms;
361 
362    /** Byte-offset for the next available spot in the scratch space buffer. */
363    unsigned last_scratch;
364 
365    brw_reg frag_depth;
366    brw_reg frag_stencil;
367    brw_reg sample_mask;
368    brw_reg outputs[VARYING_SLOT_MAX];
369    brw_reg dual_src_output;
370    int first_non_payload_grf;
371 
372    enum brw_shader_phase phase;
373 
374    bool failed;
375    char *fail_msg;
376 
377    thread_payload *payload_;
378 
payloadfs_visitor379    thread_payload &payload() {
380       return *this->payload_;
381    }
382 
vs_payloadfs_visitor383    vs_thread_payload &vs_payload() {
384       assert(stage == MESA_SHADER_VERTEX);
385       return *static_cast<vs_thread_payload *>(this->payload_);
386    }
387 
tcs_payloadfs_visitor388    tcs_thread_payload &tcs_payload() {
389       assert(stage == MESA_SHADER_TESS_CTRL);
390       return *static_cast<tcs_thread_payload *>(this->payload_);
391    }
392 
tes_payloadfs_visitor393    tes_thread_payload &tes_payload() {
394       assert(stage == MESA_SHADER_TESS_EVAL);
395       return *static_cast<tes_thread_payload *>(this->payload_);
396    }
397 
gs_payloadfs_visitor398    gs_thread_payload &gs_payload() {
399       assert(stage == MESA_SHADER_GEOMETRY);
400       return *static_cast<gs_thread_payload *>(this->payload_);
401    }
402 
fs_payloadfs_visitor403    fs_thread_payload &fs_payload() {
404       assert(stage == MESA_SHADER_FRAGMENT);
405       return *static_cast<fs_thread_payload *>(this->payload_);
406    };
407 
fs_payloadfs_visitor408    const fs_thread_payload &fs_payload() const {
409       assert(stage == MESA_SHADER_FRAGMENT);
410       return *static_cast<const fs_thread_payload *>(this->payload_);
411    };
412 
cs_payloadfs_visitor413    cs_thread_payload &cs_payload() {
414       assert(gl_shader_stage_uses_workgroup(stage));
415       return *static_cast<cs_thread_payload *>(this->payload_);
416    }
417 
task_mesh_payloadfs_visitor418    task_mesh_thread_payload &task_mesh_payload() {
419       assert(stage == MESA_SHADER_TASK || stage == MESA_SHADER_MESH);
420       return *static_cast<task_mesh_thread_payload *>(this->payload_);
421    }
422 
bs_payloadfs_visitor423    bs_thread_payload &bs_payload() {
424       assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
425       return *static_cast<bs_thread_payload *>(this->payload_);
426    }
427 
428    bool source_depth_to_render_target;
429 
430    brw_reg pixel_x;
431    brw_reg pixel_y;
432    brw_reg pixel_z;
433    brw_reg wpos_w;
434    brw_reg pixel_w;
435    brw_reg delta_xy[INTEL_BARYCENTRIC_MODE_COUNT];
436    brw_reg final_gs_vertex_count;
437    brw_reg control_data_bits;
438    brw_reg invocation_id;
439 
440    unsigned grf_used;
441    bool spilled_any_registers;
442    bool needs_register_pressure;
443 
444    const unsigned dispatch_width; /**< 8, 16 or 32 */
445    const unsigned max_polygons;
446    unsigned max_dispatch_width;
447 
448    /* The API selected subgroup size */
449    unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
450 
451    unsigned next_address_register_nr;
452 
453    struct brw_shader_stats shader_stats;
454 
455    void debug_optimizer(const nir_shader *nir,
456                         const char *pass_name,
457                         int iteration, int pass_num) const;
458 };
459 
460 void brw_print_instructions(const fs_visitor &s, FILE *file = stderr);
461 
462 void brw_print_instruction(const fs_visitor &s, const fs_inst *inst,
463                            FILE *file = stderr,
464                            const brw::def_analysis *defs = nullptr);
465 
466 void brw_print_swsb(FILE *f, const struct intel_device_info *devinfo, const tgl_swsb swsb);
467 
468 /**
469  * Return the flag register used in fragment shaders to keep track of live
470  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
471  * dispatch mode.
472  */
473 static inline unsigned
sample_mask_flag_subreg(const fs_visitor & s)474 sample_mask_flag_subreg(const fs_visitor &s)
475 {
476    assert(s.stage == MESA_SHADER_FRAGMENT);
477    return 2;
478 }
479 
480 namespace brw {
481    brw_reg
482    fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
483                      brw_reg_type type = BRW_TYPE_F,
484                      unsigned n = 1);
485 
486    brw_reg
487    fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]);
488 
489    inline brw_reg
dynamic_msaa_flags(const struct brw_wm_prog_data * wm_prog_data)490    dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
491    {
492       return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD);
493    }
494 
495    void
496    check_dynamic_msaa_flag(const fs_builder &bld,
497                            const struct brw_wm_prog_data *wm_prog_data,
498                            enum intel_msaa_flags flag);
499 
500    bool
501    lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
502 }
503 
504 void shuffle_from_32bit_read(const brw::fs_builder &bld,
505                              const brw_reg &dst,
506                              const brw_reg &src,
507                              uint32_t first_component,
508                              uint32_t components);
509 
510 enum intel_barycentric_mode brw_barycentric_mode(const struct brw_wm_prog_key *key,
511                                                  nir_intrinsic_instr *intr);
512 
513 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
514                                   const struct brw_wm_prog_data *prog_data);
515 
516 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
517 
518 brw_reg brw_sample_mask_reg(const brw::fs_builder &bld);
519 void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst);
520 
521 int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
522                                     const brw_stage_prog_data *prog_data);
523 
524 void nir_to_brw(fs_visitor *s);
525 
526 void brw_shader_phase_update(fs_visitor &s, enum brw_shader_phase phase);
527 
528 #ifndef NDEBUG
529 void brw_validate(const fs_visitor &s);
530 #else
brw_validate(const fs_visitor & s)531 static inline void brw_validate(const fs_visitor &s) {}
532 #endif
533 
534 void brw_calculate_cfg(fs_visitor &s);
535 
536 void brw_optimize(fs_visitor &s);
537 
538 instruction_scheduler *brw_prepare_scheduler(fs_visitor &s, void *mem_ctx);
539 void brw_schedule_instructions_pre_ra(fs_visitor &s, instruction_scheduler *sched,
540                                       instruction_scheduler_mode mode);
541 void brw_schedule_instructions_post_ra(fs_visitor &s);
542 
543 void brw_allocate_registers(fs_visitor &s, bool allow_spilling);
544 bool brw_assign_regs(fs_visitor &s, bool allow_spilling, bool spill_all);
545 void brw_assign_regs_trivial(fs_visitor &s);
546 
547 bool brw_lower_3src_null_dest(fs_visitor &s);
548 bool brw_lower_alu_restrictions(fs_visitor &s);
549 bool brw_lower_barycentrics(fs_visitor &s);
550 bool brw_lower_constant_loads(fs_visitor &s);
551 bool brw_lower_csel(fs_visitor &s);
552 bool brw_lower_derivatives(fs_visitor &s);
553 bool brw_lower_dpas(fs_visitor &s);
554 bool brw_lower_find_live_channel(fs_visitor &s);
555 bool brw_lower_indirect_mov(fs_visitor &s);
556 bool brw_lower_integer_multiplication(fs_visitor &s);
557 bool brw_lower_load_payload(fs_visitor &s);
558 bool brw_lower_load_subgroup_invocation(fs_visitor &s);
559 bool brw_lower_logical_sends(fs_visitor &s);
560 bool brw_lower_pack(fs_visitor &s);
561 bool brw_lower_regioning(fs_visitor &s);
562 bool brw_lower_scalar_fp64_MAD(fs_visitor &s);
563 bool brw_lower_scoreboard(fs_visitor &s);
564 bool brw_lower_send_descriptors(fs_visitor &s);
565 bool brw_lower_sends_overlapping_payload(fs_visitor &s);
566 bool brw_lower_simd_width(fs_visitor &s);
567 bool brw_lower_sub_sat(fs_visitor &s);
568 bool brw_lower_subgroup_ops(fs_visitor &s);
569 bool brw_lower_uniform_pull_constant_loads(fs_visitor &s);
570 void brw_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
571 
572 bool brw_opt_address_reg_load(fs_visitor &s);
573 bool brw_opt_algebraic(fs_visitor &s);
574 bool brw_opt_bank_conflicts(fs_visitor &s);
575 bool brw_opt_cmod_propagation(fs_visitor &s);
576 bool brw_opt_combine_constants(fs_visitor &s);
577 bool brw_opt_combine_convergent_txf(fs_visitor &s);
578 bool brw_opt_compact_virtual_grfs(fs_visitor &s);
579 bool brw_opt_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
580 bool brw_opt_copy_propagation(fs_visitor &s);
581 bool brw_opt_copy_propagation_defs(fs_visitor &s);
582 bool brw_opt_cse_defs(fs_visitor &s);
583 bool brw_opt_dead_code_eliminate(fs_visitor &s);
584 bool brw_opt_eliminate_find_live_channel(fs_visitor &s);
585 bool brw_opt_register_coalesce(fs_visitor &s);
586 bool brw_opt_remove_extra_rounding_modes(fs_visitor &s);
587 bool brw_opt_remove_redundant_halts(fs_visitor &s);
588 bool brw_opt_saturate_propagation(fs_visitor &s);
589 bool brw_opt_split_sends(fs_visitor &s);
590 bool brw_opt_split_virtual_grfs(fs_visitor &s);
591 bool brw_opt_zero_samples(fs_visitor &s);
592 
593 bool brw_workaround_emit_dummy_mov_instruction(fs_visitor &s);
594 bool brw_workaround_memory_fence_before_eot(fs_visitor &s);
595 bool brw_workaround_nomask_control_flow(fs_visitor &s);
596 bool brw_workaround_source_arf_before_eot(fs_visitor &s);
597 
598 /* Helpers. */
599 unsigned brw_get_lowered_simd_width(const fs_visitor *shader,
600                                     const fs_inst *inst);
601