• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #pragma once
29 
30 #include "brw_cfg.h"
31 #include "brw_compiler.h"
32 #include "brw_ir_allocator.h"
33 #include "brw_ir_fs.h"
34 #include "brw_fs_live_variables.h"
35 #include "brw_ir_performance.h"
36 #include "compiler/nir/nir.h"
37 
38 struct bblock_t;
39 namespace {
40    struct acp_entry;
41 }
42 
43 struct fs_visitor;
44 
45 namespace brw {
46    /**
47     * Register pressure analysis of a shader.  Estimates how many registers
48     * are live at any point of the program in GRF units.
49     */
50    struct register_pressure {
51       register_pressure(const fs_visitor *v);
52       ~register_pressure();
53 
54       analysis_dependency_class
dependency_classregister_pressure55       dependency_class() const
56       {
57          return (DEPENDENCY_INSTRUCTION_IDENTITY |
58                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
59                  DEPENDENCY_VARIABLES);
60       }
61 
62       bool
validateregister_pressure63       validate(const fs_visitor *) const
64       {
65          /* FINISHME */
66          return true;
67       }
68 
69       unsigned *regs_live_at_ip;
70    };
71 
72    class def_analysis {
73    public:
74       def_analysis(const fs_visitor *v);
75       ~def_analysis();
76 
77       fs_inst *
get(const brw_reg & reg)78       get(const brw_reg &reg) const
79       {
80          return reg.file == VGRF && reg.nr < def_count ?
81                 def_insts[reg.nr] : NULL;
82       }
83 
84       bblock_t *
get_block(const brw_reg & reg)85       get_block(const brw_reg &reg) const
86       {
87          return reg.file == VGRF && reg.nr < def_count ?
88                 def_blocks[reg.nr] : NULL;
89       }
90 
91       uint32_t
get_use_count(const brw_reg & reg)92       get_use_count(const brw_reg &reg) const
93       {
94          return reg.file == VGRF && reg.nr < def_count ?
95                 def_use_counts[reg.nr] : 0;
96       }
97 
count()98       unsigned count() const { return def_count; }
99       unsigned ssa_count() const;
100 
101       void print_stats(const fs_visitor *) const;
102 
103       analysis_dependency_class
dependency_class()104       dependency_class() const
105       {
106          return DEPENDENCY_INSTRUCTION_IDENTITY |
107                 DEPENDENCY_INSTRUCTION_DATA_FLOW |
108                 DEPENDENCY_VARIABLES |
109                 DEPENDENCY_BLOCKS;
110       }
111 
112       bool validate(const fs_visitor *) const;
113 
114    private:
115       void mark_invalid(int);
116       bool fully_defines(const fs_visitor *v, fs_inst *);
117       void update_for_reads(const idom_tree &idom, bblock_t *block, fs_inst *);
118       void update_for_write(const fs_visitor *v, bblock_t *block, fs_inst *);
119 
120       fs_inst **def_insts;
121       bblock_t **def_blocks;
122       uint32_t *def_use_counts;
123       unsigned def_count;
124    };
125 }
126 
127 #define UBO_START ((1 << 16) - 4)
128 
129 /**
130  * Scratch data used when compiling a GLSL geometry shader.
131  */
132 struct brw_gs_compile
133 {
134    struct brw_gs_prog_key key;
135    struct intel_vue_map input_vue_map;
136 
137    unsigned control_data_bits_per_vertex;
138    unsigned control_data_header_size_bits;
139 };
140 
141 class brw_builder;
142 
143 struct brw_shader_stats {
144    const char *scheduler_mode;
145    unsigned promoted_constants;
146    unsigned spill_count;
147    unsigned fill_count;
148    unsigned max_register_pressure;
149    unsigned non_ssa_registers_after_nir;
150 };
151 
152 /** Register numbers for thread payload fields. */
153 struct thread_payload {
154    /** The number of thread payload registers the hardware will supply. */
155    uint8_t num_regs;
156 
157    virtual ~thread_payload() = default;
158 
159 protected:
thread_payloadthread_payload160    thread_payload() : num_regs() {}
161 };
162 
163 struct vs_thread_payload : public thread_payload {
164    vs_thread_payload(const fs_visitor &v);
165 
166    brw_reg urb_handles;
167 };
168 
169 struct tcs_thread_payload : public thread_payload {
170    tcs_thread_payload(const fs_visitor &v);
171 
172    brw_reg patch_urb_output;
173    brw_reg primitive_id;
174    brw_reg icp_handle_start;
175 };
176 
177 struct tes_thread_payload : public thread_payload {
178    tes_thread_payload(const fs_visitor &v);
179 
180    brw_reg patch_urb_input;
181    brw_reg primitive_id;
182    brw_reg coords[3];
183    brw_reg urb_output;
184 };
185 
186 struct gs_thread_payload : public thread_payload {
187    gs_thread_payload(fs_visitor &v);
188 
189    brw_reg urb_handles;
190    brw_reg primitive_id;
191    brw_reg instance_id;
192    brw_reg icp_handle_start;
193 };
194 
195 struct fs_thread_payload : public thread_payload {
196    fs_thread_payload(const fs_visitor &v,
197                      bool &source_depth_to_render_target);
198 
199    uint8_t subspan_coord_reg[2];
200    uint8_t source_depth_reg[2];
201    uint8_t source_w_reg[2];
202    uint8_t aa_dest_stencil_reg[2];
203    uint8_t dest_depth_reg[2];
204    uint8_t sample_pos_reg[2];
205    uint8_t sample_mask_in_reg[2];
206    uint8_t barycentric_coord_reg[INTEL_BARYCENTRIC_MODE_COUNT][2];
207 
208    uint8_t depth_w_coef_reg;
209    uint8_t pc_bary_coef_reg;
210    uint8_t npc_bary_coef_reg;
211    uint8_t sample_offsets_reg;
212 };
213 
214 struct cs_thread_payload : public thread_payload {
215    cs_thread_payload(const fs_visitor &v);
216 
217    void load_subgroup_id(const brw_builder &bld, brw_reg &dest) const;
218 
219    brw_reg local_invocation_id[3];
220 
221    brw_reg inline_parameter;
222 
223 protected:
224    brw_reg subgroup_id_;
225 };
226 
227 struct task_mesh_thread_payload : public cs_thread_payload {
228    task_mesh_thread_payload(fs_visitor &v);
229 
230    brw_reg extended_parameter_0;
231    brw_reg local_index;
232 
233    brw_reg urb_output;
234 
235    /* URB to read Task memory inputs. Only valid for MESH stage. */
236    brw_reg task_urb_input;
237 };
238 
239 struct bs_thread_payload : public thread_payload {
240    bs_thread_payload(const fs_visitor &v);
241 
242    brw_reg global_arg_ptr;
243    brw_reg local_arg_ptr;
244 
245    void load_shader_type(const brw_builder &bld, brw_reg &dest) const;
246 };
247 
248 enum brw_shader_phase {
249    BRW_SHADER_PHASE_INITIAL = 0,
250    BRW_SHADER_PHASE_AFTER_NIR,
251    BRW_SHADER_PHASE_AFTER_OPT_LOOP,
252    BRW_SHADER_PHASE_AFTER_EARLY_LOWERING,
253    BRW_SHADER_PHASE_AFTER_MIDDLE_LOWERING,
254    BRW_SHADER_PHASE_AFTER_LATE_LOWERING,
255    BRW_SHADER_PHASE_AFTER_REGALLOC,
256 
257    /* Larger value than any other phase. */
258    BRW_SHADER_PHASE_INVALID,
259 };
260 
261 /**
262  * The fragment shader front-end.
263  *
264  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
265  */
266 struct fs_visitor
267 {
268 public:
269    fs_visitor(const struct brw_compiler *compiler,
270               const struct brw_compile_params *params,
271               const brw_base_prog_key *key,
272               struct brw_stage_prog_data *prog_data,
273               const nir_shader *shader,
274               unsigned dispatch_width,
275               bool needs_register_pressure,
276               bool debug_enabled);
277    fs_visitor(const struct brw_compiler *compiler,
278               const struct brw_compile_params *params,
279               const brw_wm_prog_key *key,
280               struct brw_wm_prog_data *prog_data,
281               const nir_shader *shader,
282               unsigned dispatch_width,
283               unsigned num_polygons,
284               bool needs_register_pressure,
285               bool debug_enabled);
286    fs_visitor(const struct brw_compiler *compiler,
287               const struct brw_compile_params *params,
288               struct brw_gs_compile *gs_compile,
289               struct brw_gs_prog_data *prog_data,
290               const nir_shader *shader,
291               bool needs_register_pressure,
292               bool debug_enabled);
293    void init();
294    ~fs_visitor();
295 
296    void import_uniforms(fs_visitor *v);
297 
298    void assign_curb_setup();
299    void convert_attr_sources_to_hw_regs(fs_inst *inst);
300    void calculate_payload_ranges(bool allow_spilling,
301                                  unsigned payload_node_count,
302                                  int *payload_last_use_ip) const;
303    void invalidate_analysis(brw::analysis_dependency_class c);
304 
305    void vfail(const char *msg, va_list args);
306    void fail(const char *msg, ...);
307    void limit_dispatch_width(unsigned n, const char *msg);
308 
309    void emit_urb_writes(const brw_reg &gs_vertex_count = brw_reg());
310    void emit_gs_control_data_bits(const brw_reg &vertex_count);
311    brw_reg gs_urb_channel_mask(const brw_reg &dword_index);
312    brw_reg gs_urb_per_slot_dword_index(const brw_reg &vertex_count);
313    bool mark_last_urb_write_with_eot();
314    void emit_cs_terminate();
315 
316    const struct brw_compiler *compiler;
317    void *log_data; /* Passed to compiler->*_log functions */
318 
319    const struct intel_device_info * const devinfo;
320    const nir_shader *nir;
321 
322    /** ralloc context for temporary data used during compile */
323    void *mem_ctx;
324 
325    /** List of fs_inst. */
326    exec_list instructions;
327 
328    cfg_t *cfg;
329 
330    gl_shader_stage stage;
331    bool debug_enabled;
332 
333    brw::simple_allocator alloc;
334 
335    const brw_base_prog_key *const key;
336 
337    struct brw_gs_compile *gs_compile;
338 
339    struct brw_stage_prog_data *prog_data;
340 
341    brw_analysis<brw::fs_live_variables, fs_visitor> live_analysis;
342    brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
343    brw_analysis<brw::performance, fs_visitor> performance_analysis;
344    brw_analysis<brw::idom_tree, fs_visitor> idom_analysis;
345    brw_analysis<brw::def_analysis, fs_visitor> def_analysis;
346 
347    /** Number of uniform variable components visited. */
348    unsigned uniforms;
349 
350    /** Byte-offset for the next available spot in the scratch space buffer. */
351    unsigned last_scratch;
352 
353    brw_reg frag_depth;
354    brw_reg frag_stencil;
355    brw_reg sample_mask;
356    brw_reg outputs[VARYING_SLOT_MAX];
357    brw_reg dual_src_output;
358    int first_non_payload_grf;
359 
360    enum brw_shader_phase phase;
361 
362    bool failed;
363    char *fail_msg;
364 
365    thread_payload *payload_;
366 
payloadfs_visitor367    thread_payload &payload() {
368       return *this->payload_;
369    }
370 
vs_payloadfs_visitor371    vs_thread_payload &vs_payload() {
372       assert(stage == MESA_SHADER_VERTEX);
373       return *static_cast<vs_thread_payload *>(this->payload_);
374    }
375 
tcs_payloadfs_visitor376    tcs_thread_payload &tcs_payload() {
377       assert(stage == MESA_SHADER_TESS_CTRL);
378       return *static_cast<tcs_thread_payload *>(this->payload_);
379    }
380 
tes_payloadfs_visitor381    tes_thread_payload &tes_payload() {
382       assert(stage == MESA_SHADER_TESS_EVAL);
383       return *static_cast<tes_thread_payload *>(this->payload_);
384    }
385 
gs_payloadfs_visitor386    gs_thread_payload &gs_payload() {
387       assert(stage == MESA_SHADER_GEOMETRY);
388       return *static_cast<gs_thread_payload *>(this->payload_);
389    }
390 
fs_payloadfs_visitor391    fs_thread_payload &fs_payload() {
392       assert(stage == MESA_SHADER_FRAGMENT);
393       return *static_cast<fs_thread_payload *>(this->payload_);
394    };
395 
fs_payloadfs_visitor396    const fs_thread_payload &fs_payload() const {
397       assert(stage == MESA_SHADER_FRAGMENT);
398       return *static_cast<const fs_thread_payload *>(this->payload_);
399    };
400 
cs_payloadfs_visitor401    cs_thread_payload &cs_payload() {
402       assert(gl_shader_stage_uses_workgroup(stage));
403       return *static_cast<cs_thread_payload *>(this->payload_);
404    }
405 
task_mesh_payloadfs_visitor406    task_mesh_thread_payload &task_mesh_payload() {
407       assert(stage == MESA_SHADER_TASK || stage == MESA_SHADER_MESH);
408       return *static_cast<task_mesh_thread_payload *>(this->payload_);
409    }
410 
bs_payloadfs_visitor411    bs_thread_payload &bs_payload() {
412       assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
413       return *static_cast<bs_thread_payload *>(this->payload_);
414    }
415 
416    bool source_depth_to_render_target;
417 
418    brw_reg pixel_x;
419    brw_reg pixel_y;
420    brw_reg pixel_z;
421    brw_reg wpos_w;
422    brw_reg pixel_w;
423    brw_reg delta_xy[INTEL_BARYCENTRIC_MODE_COUNT];
424    brw_reg final_gs_vertex_count;
425    brw_reg control_data_bits;
426    brw_reg invocation_id;
427 
428    unsigned grf_used;
429    bool spilled_any_registers;
430    bool needs_register_pressure;
431 
432    const unsigned dispatch_width; /**< 8, 16 or 32 */
433    const unsigned max_polygons;
434    unsigned max_dispatch_width;
435 
436    /* The API selected subgroup size */
437    unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
438 
439    unsigned next_address_register_nr;
440 
441    struct brw_shader_stats shader_stats;
442 
443    void debug_optimizer(const nir_shader *nir,
444                         const char *pass_name,
445                         int iteration, int pass_num) const;
446 };
447 
448 void brw_print_instructions(const fs_visitor &s, FILE *file = stderr);
449 
450 void brw_print_instruction(const fs_visitor &s, const fs_inst *inst,
451                            FILE *file = stderr,
452                            const brw::def_analysis *defs = nullptr);
453 
454 void brw_print_swsb(FILE *f, const struct intel_device_info *devinfo, const tgl_swsb swsb);
455 
456 /**
457  * Return the flag register used in fragment shaders to keep track of live
458  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
459  * dispatch mode.
460  */
461 static inline unsigned
sample_mask_flag_subreg(const fs_visitor & s)462 sample_mask_flag_subreg(const fs_visitor &s)
463 {
464    assert(s.stage == MESA_SHADER_FRAGMENT);
465    return 2;
466 }
467 
468 inline brw_reg
brw_dynamic_msaa_flags(const struct brw_wm_prog_data * wm_prog_data)469 brw_dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
470 {
471    return brw_uniform_reg(wm_prog_data->msaa_flags_param, BRW_TYPE_UD);
472 }
473 
474 enum intel_barycentric_mode brw_barycentric_mode(const struct brw_wm_prog_key *key,
475                                                  nir_intrinsic_instr *intr);
476 
477 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
478                                   const struct brw_wm_prog_data *prog_data);
479 
480 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
481 
482 int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
483                                     const brw_stage_prog_data *prog_data);
484 
485 void nir_to_brw(fs_visitor *s);
486 
487 void brw_shader_phase_update(fs_visitor &s, enum brw_shader_phase phase);
488 
489 #ifndef NDEBUG
490 void brw_validate(const fs_visitor &s);
491 #else
brw_validate(const fs_visitor & s)492 static inline void brw_validate(const fs_visitor &s) {}
493 #endif
494 
495 void brw_calculate_cfg(fs_visitor &s);
496 
497 void brw_optimize(fs_visitor &s);
498 
499 enum brw_instruction_scheduler_mode {
500    BRW_SCHEDULE_PRE,
501    BRW_SCHEDULE_PRE_NON_LIFO,
502    BRW_SCHEDULE_PRE_LIFO,
503    BRW_SCHEDULE_POST,
504    BRW_SCHEDULE_NONE,
505 };
506 
507 class brw_instruction_scheduler;
508 
509 brw_instruction_scheduler *brw_prepare_scheduler(fs_visitor &s, void *mem_ctx);
510 void brw_schedule_instructions_pre_ra(fs_visitor &s, brw_instruction_scheduler *sched,
511                                       brw_instruction_scheduler_mode mode);
512 void brw_schedule_instructions_post_ra(fs_visitor &s);
513 
514 void brw_allocate_registers(fs_visitor &s, bool allow_spilling);
515 bool brw_assign_regs(fs_visitor &s, bool allow_spilling, bool spill_all);
516 void brw_assign_regs_trivial(fs_visitor &s);
517 
518 bool brw_lower_3src_null_dest(fs_visitor &s);
519 bool brw_lower_alu_restrictions(fs_visitor &s);
520 bool brw_lower_barycentrics(fs_visitor &s);
521 bool brw_lower_constant_loads(fs_visitor &s);
522 bool brw_lower_csel(fs_visitor &s);
523 bool brw_lower_derivatives(fs_visitor &s);
524 bool brw_lower_dpas(fs_visitor &s);
525 bool brw_lower_find_live_channel(fs_visitor &s);
526 bool brw_lower_indirect_mov(fs_visitor &s);
527 bool brw_lower_integer_multiplication(fs_visitor &s);
528 bool brw_lower_load_payload(fs_visitor &s);
529 bool brw_lower_load_subgroup_invocation(fs_visitor &s);
530 bool brw_lower_logical_sends(fs_visitor &s);
531 bool brw_lower_pack(fs_visitor &s);
532 bool brw_lower_regioning(fs_visitor &s);
533 bool brw_lower_scalar_fp64_MAD(fs_visitor &s);
534 bool brw_lower_scoreboard(fs_visitor &s);
535 bool brw_lower_send_descriptors(fs_visitor &s);
536 bool brw_lower_send_gather(fs_visitor &s);
537 bool brw_lower_sends_overlapping_payload(fs_visitor &s);
538 bool brw_lower_simd_width(fs_visitor &s);
539 bool brw_lower_src_modifiers(fs_visitor &s, bblock_t *block, fs_inst *inst, unsigned i);
540 bool brw_lower_sub_sat(fs_visitor &s);
541 bool brw_lower_subgroup_ops(fs_visitor &s);
542 bool brw_lower_uniform_pull_constant_loads(fs_visitor &s);
543 void brw_lower_vgrfs_to_fixed_grfs(fs_visitor &s);
544 
545 bool brw_opt_address_reg_load(fs_visitor &s);
546 bool brw_opt_algebraic(fs_visitor &s);
547 bool brw_opt_bank_conflicts(fs_visitor &s);
548 bool brw_opt_cmod_propagation(fs_visitor &s);
549 bool brw_opt_combine_constants(fs_visitor &s);
550 bool brw_opt_combine_convergent_txf(fs_visitor &s);
551 bool brw_opt_compact_virtual_grfs(fs_visitor &s);
552 bool brw_opt_constant_fold_instruction(const intel_device_info *devinfo, fs_inst *inst);
553 bool brw_opt_copy_propagation(fs_visitor &s);
554 bool brw_opt_copy_propagation_defs(fs_visitor &s);
555 bool brw_opt_cse_defs(fs_visitor &s);
556 bool brw_opt_dead_code_eliminate(fs_visitor &s);
557 bool brw_opt_eliminate_find_live_channel(fs_visitor &s);
558 bool brw_opt_register_coalesce(fs_visitor &s);
559 bool brw_opt_remove_extra_rounding_modes(fs_visitor &s);
560 bool brw_opt_remove_redundant_halts(fs_visitor &s);
561 bool brw_opt_saturate_propagation(fs_visitor &s);
562 bool brw_opt_send_gather_to_send(fs_visitor &s);
563 bool brw_opt_send_to_send_gather(fs_visitor &s);
564 bool brw_opt_split_sends(fs_visitor &s);
565 bool brw_opt_split_virtual_grfs(fs_visitor &s);
566 bool brw_opt_zero_samples(fs_visitor &s);
567 
568 bool brw_workaround_emit_dummy_mov_instruction(fs_visitor &s);
569 bool brw_workaround_memory_fence_before_eot(fs_visitor &s);
570 bool brw_workaround_nomask_control_flow(fs_visitor &s);
571 bool brw_workaround_source_arf_before_eot(fs_visitor &s);
572 
573 /* Helpers. */
574 unsigned brw_get_lowered_simd_width(const fs_visitor *shader,
575                                     const fs_inst *inst);
576