• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #ifndef ELK_FS_H
29 #define ELK_FS_H
30 
31 #include "elk_shader.h"
32 #include "elk_ir_fs.h"
33 #include "elk_fs_live_variables.h"
34 #include "elk_ir_performance.h"
35 #include "compiler/nir/nir.h"
36 
37 struct elk_bblock_t;
38 namespace {
39    struct acp_entry;
40 }
41 
42 class elk_fs_visitor;
43 
44 namespace elk {
45    /**
46     * Register pressure analysis of a shader.  Estimates how many registers
47     * are live at any point of the program in GRF units.
48     */
49    struct register_pressure {
50       register_pressure(const elk_fs_visitor *v);
51       ~register_pressure();
52 
53       analysis_dependency_class
dependency_classregister_pressure54       dependency_class() const
55       {
56          return (DEPENDENCY_INSTRUCTION_IDENTITY |
57                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
58                  DEPENDENCY_VARIABLES);
59       }
60 
61       bool
validateregister_pressure62       validate(const elk_fs_visitor *) const
63       {
64          /* FINISHME */
65          return true;
66       }
67 
68       unsigned *regs_live_at_ip;
69    };
70 }
71 
72 struct elk_gs_compile;
73 
74 namespace elk {
75 class fs_builder;
76 }
77 
78 struct shader_stats {
79    const char *scheduler_mode;
80    unsigned promoted_constants;
81    unsigned spill_count;
82    unsigned fill_count;
83    unsigned max_register_pressure;
84 };
85 
86 /** Register numbers for thread payload fields. */
87 struct elk_elk_thread_payload {
88    /** The number of thread payload registers the hardware will supply. */
89    uint8_t num_regs;
90 
91    virtual ~elk_elk_thread_payload() = default;
92 
93 protected:
elk_elk_thread_payloadelk_elk_thread_payload94    elk_elk_thread_payload() : num_regs() {}
95 };
96 
97 struct elk_vs_thread_payload : public elk_elk_thread_payload {
98    elk_vs_thread_payload(const elk_fs_visitor &v);
99 
100    elk_fs_reg urb_handles;
101 };
102 
103 struct elk_tcs_thread_payload : public elk_elk_thread_payload {
104    elk_tcs_thread_payload(const elk_fs_visitor &v);
105 
106    elk_fs_reg patch_urb_output;
107    elk_fs_reg primitive_id;
108    elk_fs_reg icp_handle_start;
109 };
110 
111 struct elk_tes_thread_payload : public elk_elk_thread_payload {
112    elk_tes_thread_payload(const elk_fs_visitor &v);
113 
114    elk_fs_reg patch_urb_input;
115    elk_fs_reg primitive_id;
116    elk_fs_reg coords[3];
117    elk_fs_reg urb_output;
118 };
119 
120 struct elk_gs_thread_payload : public elk_elk_thread_payload {
121    elk_gs_thread_payload(elk_fs_visitor &v);
122 
123    elk_fs_reg urb_handles;
124    elk_fs_reg primitive_id;
125    elk_fs_reg instance_id;
126    elk_fs_reg icp_handle_start;
127 };
128 
129 struct elk_fs_thread_payload : public elk_elk_thread_payload {
130    elk_fs_thread_payload(const elk_fs_visitor &v,
131                      bool &source_depth_to_render_target,
132                      bool &runtime_check_aads_emit);
133 
134    uint8_t subspan_coord_reg[2];
135    uint8_t source_depth_reg[2];
136    uint8_t source_w_reg[2];
137    uint8_t aa_dest_stencil_reg[2];
138    uint8_t dest_depth_reg[2];
139    uint8_t sample_pos_reg[2];
140    uint8_t sample_mask_in_reg[2];
141    uint8_t depth_w_coef_reg;
142    uint8_t barycentric_coord_reg[ELK_BARYCENTRIC_MODE_COUNT][2];
143 };
144 
145 struct elk_cs_thread_payload : public elk_elk_thread_payload {
146    elk_cs_thread_payload(const elk_fs_visitor &v);
147 
148    void load_subgroup_id(const elk::fs_builder &bld, elk_fs_reg &dest) const;
149 
150    elk_fs_reg local_invocation_id[3];
151 
152 protected:
153    elk_fs_reg subgroup_id_;
154 };
155 
156 class elk_fs_instruction_scheduler;
157 
158 /**
159  * The fragment shader front-end.
160  *
161  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
162  */
163 class elk_fs_visitor : public elk_backend_shader
164 {
165 public:
166    elk_fs_visitor(const struct elk_compiler *compiler,
167               const struct elk_compile_params *params,
168               const elk_base_prog_key *key,
169               struct elk_stage_prog_data *prog_data,
170               const nir_shader *shader,
171               unsigned dispatch_width,
172               bool needs_register_pressure,
173               bool debug_enabled);
174    elk_fs_visitor(const struct elk_compiler *compiler,
175               const struct elk_compile_params *params,
176               const elk_wm_prog_key *key,
177               struct elk_wm_prog_data *prog_data,
178               const nir_shader *shader,
179               unsigned dispatch_width,
180               unsigned num_polygons,
181               bool needs_register_pressure,
182               bool debug_enabled);
183    elk_fs_visitor(const struct elk_compiler *compiler,
184               const struct elk_compile_params *params,
185               struct elk_gs_compile *gs_compile,
186               struct elk_gs_prog_data *prog_data,
187               const nir_shader *shader,
188               bool needs_register_pressure,
189               bool debug_enabled);
190    void init();
191    ~elk_fs_visitor();
192 
193    elk_fs_reg vgrf(const glsl_type *const type);
194    void import_uniforms(elk_fs_visitor *v);
195 
196    void VARYING_PULL_CONSTANT_LOAD(const elk::fs_builder &bld,
197                                    const elk_fs_reg &dst,
198                                    const elk_fs_reg &surface,
199                                    const elk_fs_reg &surface_handle,
200                                    const elk_fs_reg &varying_offset,
201                                    uint32_t const_offset,
202                                    uint8_t alignment,
203                                    unsigned components);
204    void DEP_RESOLVE_MOV(const elk::fs_builder &bld, int grf);
205 
206    bool run_fs(bool allow_spilling, bool do_rep_send);
207    bool run_vs();
208    bool run_tcs();
209    bool run_tes();
210    bool run_gs();
211    bool run_cs(bool allow_spilling);
212    void optimize();
213    void allocate_registers(bool allow_spilling);
214    uint32_t compute_max_register_pressure();
215    bool fixup_sends_duplicate_payload();
216    void fixup_3src_null_dest();
217    void emit_dummy_memory_fence_before_eot();
218    void emit_dummy_mov_instruction();
219    bool fixup_nomask_control_flow();
220    void assign_curb_setup();
221    void assign_urb_setup();
222    void convert_attr_sources_to_hw_regs(elk_fs_inst *inst);
223    void assign_vs_urb_setup();
224    void assign_tcs_urb_setup();
225    void assign_tes_urb_setup();
226    void assign_gs_urb_setup();
227    bool assign_regs(bool allow_spilling, bool spill_all);
228    void assign_regs_trivial();
229    void calculate_payload_ranges(unsigned payload_node_count,
230                                  int *payload_last_use_ip) const;
231    bool split_virtual_grfs();
232    bool compact_virtual_grfs();
233    void assign_constant_locations();
234    bool get_pull_locs(const elk_fs_reg &src, unsigned *out_surf_index,
235                       unsigned *out_pull_index);
236    bool lower_constant_loads();
237    virtual void invalidate_analysis(elk::analysis_dependency_class c);
238 
239 #ifndef NDEBUG
240    void validate();
241 #else
validate()242    void validate() {}
243 #endif
244 
245    bool opt_algebraic();
246    bool opt_redundant_halt();
247    bool opt_cse();
248    bool opt_cse_local(const elk::fs_live_variables &live, elk_bblock_t *block, int &ip);
249 
250    bool opt_copy_propagation();
251    bool opt_bank_conflicts();
252    bool opt_split_sends();
253    bool register_coalesce();
254    bool compute_to_mrf();
255    bool eliminate_find_live_channel();
256    bool dead_code_eliminate();
257    bool remove_duplicate_mrf_writes();
258    bool remove_extra_rounding_modes();
259 
260    elk_fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
261    void schedule_instructions_pre_ra(elk_fs_instruction_scheduler *sched,
262                                      instruction_scheduler_mode mode);
263    void schedule_instructions_post_ra();
264 
265    void insert_gfx4_send_dependency_workarounds();
266    void insert_gfx4_pre_send_dependency_workarounds(elk_bblock_t *block,
267                                                     elk_fs_inst *inst);
268    void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
269                                                      elk_fs_inst *inst);
270    void vfail(const char *msg, va_list args);
271    void fail(const char *msg, ...);
272    void limit_dispatch_width(unsigned n, const char *msg);
273    bool lower_uniform_pull_constant_loads();
274    bool lower_load_payload();
275    bool lower_pack();
276    bool lower_regioning();
277    bool lower_logical_sends();
278    bool lower_integer_multiplication();
279    bool lower_minmax();
280    bool lower_simd_width();
281    bool lower_barycentrics();
282    bool lower_derivatives();
283    bool lower_find_live_channel();
284    bool lower_scoreboard();
285    bool lower_sub_sat();
286    bool opt_combine_constants();
287 
288    void emit_repclear_shader();
289    void emit_interpolation_setup_gfx4();
290    void emit_interpolation_setup_gfx6();
291    bool opt_peephole_sel();
292    bool opt_saturate_propagation();
293    bool opt_cmod_propagation();
294    bool opt_zero_samples();
295 
296    void set_tcs_invocation_id();
297 
298    void emit_alpha_test();
299    elk_fs_inst *emit_single_fb_write(const elk::fs_builder &bld,
300                                  elk_fs_reg color1, elk_fs_reg color2,
301                                  elk_fs_reg src0_alpha, unsigned components);
302    void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
303    void emit_fb_writes();
304    void emit_urb_writes(const elk_fs_reg &gs_vertex_count = elk_fs_reg());
305    void emit_gs_control_data_bits(const elk_fs_reg &vertex_count);
306    void emit_gs_thread_end();
307    bool mark_last_urb_write_with_eot();
308    void emit_tcs_thread_end();
309    void emit_urb_fence();
310    void emit_cs_terminate();
311 
312    elk_fs_reg interp_reg(const elk::fs_builder &bld, unsigned location,
313                      unsigned channel, unsigned comp);
314    elk_fs_reg per_primitive_reg(const elk::fs_builder &bld,
315                             int location, unsigned comp);
316 
317    virtual void dump_instruction_to_file(const elk_backend_instruction *inst, FILE *file) const;
318    virtual void dump_instructions_to_file(FILE *file) const;
319 
320    const elk_base_prog_key *const key;
321    const struct elk_sampler_prog_key_data *key_tex;
322 
323    struct elk_gs_compile *gs_compile;
324 
325    struct elk_stage_prog_data *prog_data;
326 
327    elk_analysis<elk::fs_live_variables, elk_backend_shader> live_analysis;
328    elk_analysis<elk::register_pressure, elk_fs_visitor> regpressure_analysis;
329    elk_analysis<elk::performance, elk_fs_visitor> performance_analysis;
330 
331    /** Number of uniform variable components visited. */
332    unsigned uniforms;
333 
334    /** Byte-offset for the next available spot in the scratch space buffer. */
335    unsigned last_scratch;
336 
337    /**
338     * Array mapping UNIFORM register numbers to the push parameter index,
339     * or -1 if this uniform register isn't being uploaded as a push constant.
340     */
341    int *push_constant_loc;
342 
343    elk_fs_reg frag_depth;
344    elk_fs_reg frag_stencil;
345    elk_fs_reg sample_mask;
346    elk_fs_reg outputs[VARYING_SLOT_MAX];
347    elk_fs_reg dual_src_output;
348    int first_non_payload_grf;
349    /** Either ELK_MAX_GRF or GFX7_MRF_HACK_START */
350    unsigned max_grf;
351 
352    bool failed;
353    char *fail_msg;
354 
355    elk_elk_thread_payload *payload_;
356 
payload()357    elk_elk_thread_payload &payload() {
358       return *this->payload_;
359    }
360 
vs_payload()361    elk_vs_thread_payload &vs_payload() {
362       assert(stage == MESA_SHADER_VERTEX);
363       return *static_cast<elk_vs_thread_payload *>(this->payload_);
364    }
365 
tcs_payload()366    elk_tcs_thread_payload &tcs_payload() {
367       assert(stage == MESA_SHADER_TESS_CTRL);
368       return *static_cast<elk_tcs_thread_payload *>(this->payload_);
369    }
370 
tes_payload()371    elk_tes_thread_payload &tes_payload() {
372       assert(stage == MESA_SHADER_TESS_EVAL);
373       return *static_cast<elk_tes_thread_payload *>(this->payload_);
374    }
375 
gs_payload()376    elk_gs_thread_payload &gs_payload() {
377       assert(stage == MESA_SHADER_GEOMETRY);
378       return *static_cast<elk_gs_thread_payload *>(this->payload_);
379    }
380 
fs_payload()381    elk_fs_thread_payload &fs_payload() {
382       assert(stage == MESA_SHADER_FRAGMENT);
383       return *static_cast<elk_fs_thread_payload *>(this->payload_);
384    };
385 
cs_payload()386    elk_cs_thread_payload &cs_payload() {
387       assert(gl_shader_stage_uses_workgroup(stage));
388       return *static_cast<elk_cs_thread_payload *>(this->payload_);
389    }
390 
391    bool source_depth_to_render_target;
392    bool runtime_check_aads_emit;
393 
394    elk_fs_reg pixel_x;
395    elk_fs_reg pixel_y;
396    elk_fs_reg pixel_z;
397    elk_fs_reg wpos_w;
398    elk_fs_reg pixel_w;
399    elk_fs_reg delta_xy[ELK_BARYCENTRIC_MODE_COUNT];
400    elk_fs_reg final_gs_vertex_count;
401    elk_fs_reg control_data_bits;
402    elk_fs_reg invocation_id;
403 
404    unsigned grf_used;
405    bool spilled_any_registers;
406    bool needs_register_pressure;
407 
408    const unsigned dispatch_width; /**< 8, 16 or 32 */
409    const unsigned max_polygons;
410    unsigned max_dispatch_width;
411 
412    /* The API selected subgroup size */
413    unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
414 
415    struct shader_stats shader_stats;
416 
417    void lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block);
418    void lower_mul_qword_inst(elk_fs_inst *inst, elk_bblock_t *block);
419    void lower_mulh_inst(elk_fs_inst *inst, elk_bblock_t *block);
420 
421    unsigned workgroup_size() const;
422 
423    void debug_optimizer(const nir_shader *nir,
424                         const char *pass_name,
425                         int iteration, int pass_num) const;
426 };
427 
428 /**
429  * Return the flag register used in fragment shaders to keep track of live
430  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
431  * dispatch mode, while earlier generations are constrained to f0.1, which
432  * limits the dispatch width to SIMD16 for fragment shaders that use discard.
433  */
434 static inline unsigned
sample_mask_flag_subreg(const elk_fs_visitor & s)435 sample_mask_flag_subreg(const elk_fs_visitor &s)
436 {
437    assert(s.stage == MESA_SHADER_FRAGMENT);
438    return s.devinfo->ver >= 7 ? 2 : 1;
439 }
440 
441 /**
442  * The fragment shader code generator.
443  *
444  * Translates FS IR to actual i965 assembly code.
445  */
446 class elk_fs_generator
447 {
448 public:
449    elk_fs_generator(const struct elk_compiler *compiler,
450                 const struct elk_compile_params *params,
451                 struct elk_stage_prog_data *prog_data,
452                 bool runtime_check_aads_emit,
453                 gl_shader_stage stage);
454    ~elk_fs_generator();
455 
456    void enable_debug(const char *shader_name);
457    int generate_code(const elk_cfg_t *cfg, int dispatch_width,
458                      struct shader_stats shader_stats,
459                      const elk::performance &perf,
460                      struct elk_compile_stats *stats,
461                      unsigned max_polygons = 0);
462    void add_const_data(void *data, unsigned size);
463    const unsigned *get_assembly();
464 
465 private:
466    void fire_fb_write(elk_fs_inst *inst,
467                       struct elk_reg payload,
468                       struct elk_reg implied_header,
469                       GLuint nr);
470    void generate_send(elk_fs_inst *inst,
471                       struct elk_reg dst,
472                       struct elk_reg desc,
473                       struct elk_reg ex_desc,
474                       struct elk_reg payload,
475                       struct elk_reg payload2);
476    void generate_fb_write(elk_fs_inst *inst, struct elk_reg payload);
477    void generate_fb_read(elk_fs_inst *inst, struct elk_reg dst,
478                          struct elk_reg payload);
479    void generate_cs_terminate(elk_fs_inst *inst, struct elk_reg payload);
480    void generate_barrier(elk_fs_inst *inst, struct elk_reg src);
481    bool generate_linterp(elk_fs_inst *inst, struct elk_reg dst,
482 			 struct elk_reg *src);
483    void generate_tex(elk_fs_inst *inst, struct elk_reg dst,
484                      struct elk_reg surface_index,
485                      struct elk_reg sampler_index);
486    void generate_ddx(const elk_fs_inst *inst,
487                      struct elk_reg dst, struct elk_reg src);
488    void generate_ddy(const elk_fs_inst *inst,
489                      struct elk_reg dst, struct elk_reg src);
490    void generate_scratch_write(elk_fs_inst *inst, struct elk_reg src);
491    void generate_scratch_read(elk_fs_inst *inst, struct elk_reg dst);
492    void generate_scratch_read_gfx7(elk_fs_inst *inst, struct elk_reg dst);
493    void generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst);
494    void generate_uniform_pull_constant_load(elk_fs_inst *inst, struct elk_reg dst,
495                                             struct elk_reg index,
496                                             struct elk_reg offset);
497    void generate_varying_pull_constant_load_gfx4(elk_fs_inst *inst,
498                                                  struct elk_reg dst,
499                                                  struct elk_reg index);
500 
501    void generate_set_sample_id(elk_fs_inst *inst,
502                                struct elk_reg dst,
503                                struct elk_reg src0,
504                                struct elk_reg src1);
505 
506    void generate_halt(elk_fs_inst *inst);
507 
508    void generate_mov_indirect(elk_fs_inst *inst,
509                               struct elk_reg dst,
510                               struct elk_reg reg,
511                               struct elk_reg indirect_byte_offset);
512 
513    void generate_shuffle(elk_fs_inst *inst,
514                          struct elk_reg dst,
515                          struct elk_reg src,
516                          struct elk_reg idx);
517 
518    void generate_quad_swizzle(const elk_fs_inst *inst,
519                               struct elk_reg dst, struct elk_reg src,
520                               unsigned swiz);
521 
522    bool patch_halt_jumps();
523 
524    const struct elk_compiler *compiler;
525    const struct elk_compile_params *params;
526 
527    const struct intel_device_info *devinfo;
528 
529    struct elk_codegen *p;
530    struct elk_stage_prog_data * const prog_data;
531 
532    unsigned dispatch_width; /**< 8, 16 or 32 */
533 
534    exec_list discard_halt_patches;
535    bool runtime_check_aads_emit;
536    bool debug_flag;
537    const char *shader_name;
538    gl_shader_stage stage;
539    void *mem_ctx;
540 };
541 
542 namespace elk {
543    elk_fs_reg
544    fetch_payload_reg(const elk::fs_builder &bld, uint8_t regs[2],
545                      elk_reg_type type = ELK_REGISTER_TYPE_F,
546                      unsigned n = 1);
547 
548    elk_fs_reg
549    fetch_barycentric_reg(const elk::fs_builder &bld, uint8_t regs[2]);
550 
551    inline elk_fs_reg
dynamic_msaa_flags(const struct elk_wm_prog_data * wm_prog_data)552    dynamic_msaa_flags(const struct elk_wm_prog_data *wm_prog_data)
553    {
554       return elk_fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
555                     ELK_REGISTER_TYPE_UD);
556    }
557 
558    void
559    check_dynamic_msaa_flag(const fs_builder &bld,
560                            const struct elk_wm_prog_data *wm_prog_data,
561                            enum intel_msaa_flags flag);
562 
563    bool
564    lower_src_modifiers(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst, unsigned i);
565 }
566 
567 void elk_shuffle_from_32bit_read(const elk::fs_builder &bld,
568                              const elk_fs_reg &dst,
569                              const elk_fs_reg &src,
570                              uint32_t first_component,
571                              uint32_t components);
572 
573 elk_fs_reg elk_setup_imm_df(const elk::fs_builder &bld,
574                     double v);
575 
576 elk_fs_reg elk_setup_imm_b(const elk::fs_builder &bld,
577                    int8_t v);
578 
579 elk_fs_reg elk_setup_imm_ub(const elk::fs_builder &bld,
580                    uint8_t v);
581 
582 enum elk_barycentric_mode elk_barycentric_mode(nir_intrinsic_instr *intr);
583 
584 uint32_t elk_fb_write_msg_control(const elk_fs_inst *inst,
585                                   const struct elk_wm_prog_data *prog_data);
586 
587 void elk_compute_urb_setup_index(struct elk_wm_prog_data *wm_prog_data);
588 
589 bool elk_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
590 
591 elk_fs_reg elk_sample_mask_reg(const elk::fs_builder &bld);
592 void elk_emit_predicate_on_sample_mask(const elk::fs_builder &bld, elk_fs_inst *inst);
593 
594 int elk_get_subgroup_id_param_index(const intel_device_info *devinfo,
595                                     const elk_stage_prog_data *prog_data);
596 
597 bool elk_lower_dpas(elk_fs_visitor &v);
598 
599 void nir_to_elk(elk_fs_visitor *s);
600 
601 #endif /* ELK_FS_H */
602