• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #pragma once
29 
30 #include "elk_shader.h"
31 #include "elk_ir_fs.h"
32 #include "elk_fs_live_variables.h"
33 #include "elk_ir_performance.h"
34 #include "compiler/nir/nir.h"
35 
36 struct elk_bblock_t;
37 namespace {
38    struct acp_entry;
39 }
40 
41 class elk_fs_visitor;
42 
43 namespace elk {
44    /**
45     * Register pressure analysis of a shader.  Estimates how many registers
46     * are live at any point of the program in GRF units.
47     */
48    struct register_pressure {
49       register_pressure(const elk_fs_visitor *v);
50       register_pressure(const register_pressure &) = delete;
51       ~register_pressure();
52       register_pressure & operator=(const register_pressure &) = delete;
53 
54       analysis_dependency_class
dependency_classregister_pressure55       dependency_class() const
56       {
57          return (DEPENDENCY_INSTRUCTION_IDENTITY |
58                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
59                  DEPENDENCY_VARIABLES);
60       }
61 
62       bool
validateregister_pressure63       validate(const elk_fs_visitor *) const
64       {
65          /* FINISHME */
66          return true;
67       }
68 
69       unsigned *regs_live_at_ip;
70    };
71 }
72 
73 struct elk_gs_compile;
74 
75 namespace elk {
76 class fs_builder;
77 }
78 
79 struct elk_shader_stats {
80    const char *scheduler_mode;
81    unsigned promoted_constants;
82    unsigned spill_count;
83    unsigned fill_count;
84    unsigned max_register_pressure;
85 };
86 
87 /** Register numbers for thread payload fields. */
88 struct elk_elk_thread_payload {
89    /** The number of thread payload registers the hardware will supply. */
90    uint8_t num_regs;
91 
92    virtual ~elk_elk_thread_payload() = default;
93 
94 protected:
elk_elk_thread_payloadelk_elk_thread_payload95    elk_elk_thread_payload() : num_regs() {}
96 };
97 
98 struct elk_vs_thread_payload : public elk_elk_thread_payload {
99    elk_vs_thread_payload(const elk_fs_visitor &v);
100 
101    elk_fs_reg urb_handles;
102 };
103 
104 struct elk_tcs_thread_payload : public elk_elk_thread_payload {
105    elk_tcs_thread_payload(const elk_fs_visitor &v);
106 
107    elk_fs_reg patch_urb_output;
108    elk_fs_reg primitive_id;
109    elk_fs_reg icp_handle_start;
110 };
111 
112 struct elk_tes_thread_payload : public elk_elk_thread_payload {
113    elk_tes_thread_payload(const elk_fs_visitor &v);
114 
115    elk_fs_reg patch_urb_input;
116    elk_fs_reg primitive_id;
117    elk_fs_reg coords[3];
118    elk_fs_reg urb_output;
119 };
120 
121 struct elk_gs_thread_payload : public elk_elk_thread_payload {
122    elk_gs_thread_payload(elk_fs_visitor &v);
123 
124    elk_fs_reg urb_handles;
125    elk_fs_reg primitive_id;
126    elk_fs_reg instance_id;
127    elk_fs_reg icp_handle_start;
128 };
129 
130 struct elk_fs_thread_payload : public elk_elk_thread_payload {
131    elk_fs_thread_payload(const elk_fs_visitor &v,
132                      bool &source_depth_to_render_target,
133                      bool &runtime_check_aads_emit);
134 
135    uint8_t subspan_coord_reg[2];
136    uint8_t source_depth_reg[2];
137    uint8_t source_w_reg[2];
138    uint8_t aa_dest_stencil_reg[2];
139    uint8_t dest_depth_reg[2];
140    uint8_t sample_pos_reg[2];
141    uint8_t sample_mask_in_reg[2];
142    uint8_t depth_w_coef_reg;
143    uint8_t barycentric_coord_reg[ELK_BARYCENTRIC_MODE_COUNT][2];
144 };
145 
146 struct elk_cs_thread_payload : public elk_elk_thread_payload {
147    elk_cs_thread_payload(const elk_fs_visitor &v);
148 
149    void load_subgroup_id(const elk::fs_builder &bld, elk_fs_reg &dest) const;
150 
151    elk_fs_reg local_invocation_id[3];
152 
153 protected:
154    elk_fs_reg subgroup_id_;
155 };
156 
157 class elk_fs_instruction_scheduler;
158 
159 /**
160  * The fragment shader front-end.
161  *
162  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
163  */
164 class elk_fs_visitor : public elk_backend_shader
165 {
166 public:
167    elk_fs_visitor(const struct elk_compiler *compiler,
168               const struct elk_compile_params *params,
169               const elk_base_prog_key *key,
170               struct elk_stage_prog_data *prog_data,
171               const nir_shader *shader,
172               unsigned dispatch_width,
173               bool needs_register_pressure,
174               bool debug_enabled);
175    elk_fs_visitor(const struct elk_compiler *compiler,
176               const struct elk_compile_params *params,
177               const elk_wm_prog_key *key,
178               struct elk_wm_prog_data *prog_data,
179               const nir_shader *shader,
180               unsigned dispatch_width,
181               bool needs_register_pressure,
182               bool debug_enabled);
183    elk_fs_visitor(const struct elk_compiler *compiler,
184               const struct elk_compile_params *params,
185               struct elk_gs_compile *gs_compile,
186               struct elk_gs_prog_data *prog_data,
187               const nir_shader *shader,
188               bool needs_register_pressure,
189               bool debug_enabled);
190    elk_fs_visitor(const elk_fs_visitor &) = delete;
191    void init();
192    ~elk_fs_visitor();
193 
194    elk_fs_visitor & operator=(const elk_fs_visitor &) = delete;
195 
196    elk_fs_reg vgrf(const glsl_type *const type);
197    void import_uniforms(elk_fs_visitor *v);
198 
199    void VARYING_PULL_CONSTANT_LOAD(const elk::fs_builder &bld,
200                                    const elk_fs_reg &dst,
201                                    const elk_fs_reg &surface,
202                                    const elk_fs_reg &surface_handle,
203                                    const elk_fs_reg &varying_offset,
204                                    uint32_t const_offset,
205                                    uint8_t alignment,
206                                    unsigned components);
207    void DEP_RESOLVE_MOV(const elk::fs_builder &bld, int grf);
208 
209    bool run_fs(bool allow_spilling, bool do_rep_send);
210    bool run_vs();
211    bool run_tcs();
212    bool run_tes();
213    bool run_gs();
214    bool run_cs(bool allow_spilling);
215    void optimize();
216    void allocate_registers(bool allow_spilling);
217    uint32_t compute_max_register_pressure();
218    void fixup_3src_null_dest();
219    void assign_curb_setup();
220    void assign_urb_setup();
221    void convert_attr_sources_to_hw_regs(elk_fs_inst *inst);
222    void assign_vs_urb_setup();
223    void assign_tcs_urb_setup();
224    void assign_tes_urb_setup();
225    void assign_gs_urb_setup();
226    bool assign_regs(bool allow_spilling, bool spill_all);
227    void assign_regs_trivial();
228    void calculate_payload_ranges(unsigned payload_node_count,
229                                  int *payload_last_use_ip) const;
230    bool split_virtual_grfs();
231    bool compact_virtual_grfs();
232    void assign_constant_locations();
233    bool get_pull_locs(const elk_fs_reg &src, unsigned *out_surf_index,
234                       unsigned *out_pull_index);
235    bool lower_constant_loads();
236    virtual void invalidate_analysis(elk::analysis_dependency_class c);
237 
238 #ifndef NDEBUG
239    void validate();
240 #else
validate()241    void validate() {}
242 #endif
243 
244    bool opt_algebraic();
245    bool opt_redundant_halt();
246    bool opt_cse();
247    bool opt_cse_local(const elk::fs_live_variables &live, elk_bblock_t *block, int &ip);
248 
249    bool opt_copy_propagation();
250    bool opt_bank_conflicts();
251    bool register_coalesce();
252    bool compute_to_mrf();
253    bool eliminate_find_live_channel();
254    bool dead_code_eliminate();
255    bool remove_duplicate_mrf_writes();
256    bool remove_extra_rounding_modes();
257 
258    elk_fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
259    void schedule_instructions_pre_ra(elk_fs_instruction_scheduler *sched,
260                                      instruction_scheduler_mode mode);
261    void schedule_instructions_post_ra();
262 
263    void insert_gfx4_send_dependency_workarounds();
264    void insert_gfx4_pre_send_dependency_workarounds(elk_bblock_t *block,
265                                                     elk_fs_inst *inst);
266    void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
267                                                      elk_fs_inst *inst);
268    bool workaround_source_arf_before_eot();
269    void vfail(const char *msg, va_list args);
270    void fail(const char *msg, ...);
271    void limit_dispatch_width(unsigned n, const char *msg);
272    bool lower_uniform_pull_constant_loads();
273    bool lower_load_payload();
274    bool lower_pack();
275    bool lower_regioning();
276    bool lower_logical_sends();
277    bool lower_integer_multiplication();
278    bool lower_minmax();
279    bool lower_simd_width();
280    bool lower_barycentrics();
281    bool lower_find_live_channel();
282    bool lower_scoreboard();
283    bool lower_sub_sat();
284    bool opt_combine_constants();
285 
286    void emit_repclear_shader();
287    void emit_interpolation_setup_gfx4();
288    void emit_interpolation_setup_gfx6();
289    bool opt_peephole_sel();
290    bool opt_saturate_propagation();
291    bool opt_cmod_propagation();
292    bool opt_zero_samples();
293 
294    void set_tcs_invocation_id();
295 
296    void emit_alpha_test();
297    elk_fs_inst *emit_single_fb_write(const elk::fs_builder &bld,
298                                  elk_fs_reg color1, elk_fs_reg color2,
299                                  elk_fs_reg src0_alpha, unsigned components);
300    void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
301    void emit_fb_writes();
302    void emit_urb_writes(const elk_fs_reg &gs_vertex_count = elk_fs_reg());
303    void emit_gs_control_data_bits(const elk_fs_reg &vertex_count);
304    void emit_gs_thread_end();
305    bool mark_last_urb_write_with_eot();
306    void emit_tcs_thread_end();
307    void emit_urb_fence();
308    void emit_cs_terminate();
309 
310    elk_fs_reg interp_reg(const elk::fs_builder &bld, unsigned location,
311                      unsigned channel, unsigned comp);
312    elk_fs_reg per_primitive_reg(const elk::fs_builder &bld,
313                             int location, unsigned comp);
314 
315    virtual void dump_instruction_to_file(const elk_backend_instruction *inst, FILE *file) const;
316    virtual void dump_instructions_to_file(FILE *file) const;
317 
318    const elk_base_prog_key *const key;
319    const struct elk_sampler_prog_key_data *key_tex;
320 
321    struct elk_gs_compile *gs_compile;
322 
323    struct elk_stage_prog_data *prog_data;
324 
325    elk_analysis<elk::fs_live_variables, elk_backend_shader> live_analysis;
326    elk_analysis<elk::register_pressure, elk_fs_visitor> regpressure_analysis;
327    elk_analysis<elk::performance, elk_fs_visitor> performance_analysis;
328 
329    /** Number of uniform variable components visited. */
330    unsigned uniforms;
331 
332    /** Byte-offset for the next available spot in the scratch space buffer. */
333    unsigned last_scratch;
334 
335    /**
336     * Array mapping UNIFORM register numbers to the push parameter index,
337     * or -1 if this uniform register isn't being uploaded as a push constant.
338     */
339    int *push_constant_loc;
340 
341    elk_fs_reg frag_depth;
342    elk_fs_reg frag_stencil;
343    elk_fs_reg sample_mask;
344    elk_fs_reg outputs[VARYING_SLOT_MAX];
345    elk_fs_reg dual_src_output;
346    int first_non_payload_grf;
347    /** Either ELK_MAX_GRF or GFX7_MRF_HACK_START */
348    unsigned max_grf;
349 
350    bool failed;
351    char *fail_msg;
352 
353    elk_elk_thread_payload *payload_;
354 
payload()355    elk_elk_thread_payload &payload() {
356       return *this->payload_;
357    }
358 
vs_payload()359    elk_vs_thread_payload &vs_payload() {
360       assert(stage == MESA_SHADER_VERTEX);
361       return *static_cast<elk_vs_thread_payload *>(this->payload_);
362    }
363 
tcs_payload()364    elk_tcs_thread_payload &tcs_payload() {
365       assert(stage == MESA_SHADER_TESS_CTRL);
366       return *static_cast<elk_tcs_thread_payload *>(this->payload_);
367    }
368 
tes_payload()369    elk_tes_thread_payload &tes_payload() {
370       assert(stage == MESA_SHADER_TESS_EVAL);
371       return *static_cast<elk_tes_thread_payload *>(this->payload_);
372    }
373 
gs_payload()374    elk_gs_thread_payload &gs_payload() {
375       assert(stage == MESA_SHADER_GEOMETRY);
376       return *static_cast<elk_gs_thread_payload *>(this->payload_);
377    }
378 
fs_payload()379    elk_fs_thread_payload &fs_payload() {
380       assert(stage == MESA_SHADER_FRAGMENT);
381       return *static_cast<elk_fs_thread_payload *>(this->payload_);
382    };
383 
cs_payload()384    elk_cs_thread_payload &cs_payload() {
385       assert(gl_shader_stage_uses_workgroup(stage));
386       return *static_cast<elk_cs_thread_payload *>(this->payload_);
387    }
388 
389    bool source_depth_to_render_target;
390    bool runtime_check_aads_emit;
391 
392    elk_fs_reg pixel_x;
393    elk_fs_reg pixel_y;
394    elk_fs_reg pixel_z;
395    elk_fs_reg wpos_w;
396    elk_fs_reg pixel_w;
397    elk_fs_reg delta_xy[ELK_BARYCENTRIC_MODE_COUNT];
398    elk_fs_reg final_gs_vertex_count;
399    elk_fs_reg control_data_bits;
400    elk_fs_reg invocation_id;
401 
402    unsigned grf_used;
403    bool spilled_any_registers;
404    bool needs_register_pressure;
405 
406    const unsigned dispatch_width; /**< 8, 16 or 32 */
407    unsigned max_dispatch_width;
408 
409    /* The API selected subgroup size */
410    unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
411 
412    struct elk_shader_stats shader_stats;
413 
414    void lower_mul_dword_inst(elk_fs_inst *inst, elk_bblock_t *block);
415    void lower_mul_qword_inst(elk_fs_inst *inst, elk_bblock_t *block);
416    void lower_mulh_inst(elk_fs_inst *inst, elk_bblock_t *block);
417 
418    unsigned workgroup_size() const;
419 
420    void debug_optimizer(const nir_shader *nir,
421                         const char *pass_name,
422                         int iteration, int pass_num) const;
423 };
424 
425 /**
426  * Return the flag register used in fragment shaders to keep track of live
427  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
428  * dispatch mode, while earlier generations are constrained to f0.1, which
429  * limits the dispatch width to SIMD16 for fragment shaders that use discard.
430  */
431 static inline unsigned
sample_mask_flag_subreg(const elk_fs_visitor & s)432 sample_mask_flag_subreg(const elk_fs_visitor &s)
433 {
434    assert(s.stage == MESA_SHADER_FRAGMENT);
435    return s.devinfo->ver >= 7 ? 2 : 1;
436 }
437 
438 /**
439  * The fragment shader code generator.
440  *
441  * Translates FS IR to actual i965 assembly code.
442  */
443 class elk_fs_generator
444 {
445 public:
446    elk_fs_generator(const struct elk_compiler *compiler,
447                 const struct elk_compile_params *params,
448                 struct elk_stage_prog_data *prog_data,
449                 bool runtime_check_aads_emit,
450                 gl_shader_stage stage);
451    ~elk_fs_generator();
452 
453    void enable_debug(const char *shader_name);
454    int generate_code(const elk_cfg_t *cfg, int dispatch_width,
455                      struct elk_shader_stats shader_stats,
456                      const elk::performance &perf,
457                      struct elk_compile_stats *stats);
458    void add_const_data(void *data, unsigned size);
459    const unsigned *get_assembly();
460 
461 private:
462    void fire_fb_write(elk_fs_inst *inst,
463                       struct elk_reg payload,
464                       struct elk_reg implied_header,
465                       GLuint nr);
466    void generate_send(elk_fs_inst *inst,
467                       struct elk_reg dst,
468                       struct elk_reg desc,
469                       struct elk_reg payload);
470    void generate_fb_write(elk_fs_inst *inst, struct elk_reg payload);
471    void generate_cs_terminate(elk_fs_inst *inst, struct elk_reg payload);
472    void generate_barrier(elk_fs_inst *inst, struct elk_reg src);
473    bool generate_linterp(elk_fs_inst *inst, struct elk_reg dst,
474 			 struct elk_reg *src);
475    void generate_tex(elk_fs_inst *inst, struct elk_reg dst,
476                      struct elk_reg surface_index,
477                      struct elk_reg sampler_index);
478    void generate_ddx(const elk_fs_inst *inst,
479                      struct elk_reg dst, struct elk_reg src);
480    void generate_ddy(const elk_fs_inst *inst,
481                      struct elk_reg dst, struct elk_reg src);
482    void generate_scratch_write(elk_fs_inst *inst, struct elk_reg src);
483    void generate_scratch_read(elk_fs_inst *inst, struct elk_reg dst);
484    void generate_scratch_read_gfx7(elk_fs_inst *inst, struct elk_reg dst);
485    void generate_scratch_header(elk_fs_inst *inst, struct elk_reg dst);
486    void generate_uniform_pull_constant_load(elk_fs_inst *inst, struct elk_reg dst,
487                                             struct elk_reg index,
488                                             struct elk_reg offset);
489    void generate_varying_pull_constant_load_gfx4(elk_fs_inst *inst,
490                                                  struct elk_reg dst,
491                                                  struct elk_reg index);
492 
493    void generate_set_sample_id(elk_fs_inst *inst,
494                                struct elk_reg dst,
495                                struct elk_reg src0,
496                                struct elk_reg src1);
497 
498    void generate_halt(elk_fs_inst *inst);
499 
500    void generate_mov_indirect(elk_fs_inst *inst,
501                               struct elk_reg dst,
502                               struct elk_reg reg,
503                               struct elk_reg indirect_byte_offset);
504 
505    void generate_shuffle(elk_fs_inst *inst,
506                          struct elk_reg dst,
507                          struct elk_reg src,
508                          struct elk_reg idx);
509 
510    void generate_quad_swizzle(const elk_fs_inst *inst,
511                               struct elk_reg dst, struct elk_reg src,
512                               unsigned swiz);
513 
514    bool patch_halt_jumps();
515 
516    const struct elk_compiler *compiler;
517    const struct elk_compile_params *params;
518 
519    const struct intel_device_info *devinfo;
520 
521    struct elk_codegen *p;
522    struct elk_stage_prog_data * const prog_data;
523 
524    unsigned dispatch_width; /**< 8, 16 or 32 */
525 
526    exec_list discard_halt_patches;
527    bool runtime_check_aads_emit;
528    bool debug_flag;
529    const char *shader_name;
530    gl_shader_stage stage;
531    void *mem_ctx;
532 };
533 
534 namespace elk {
535    elk_fs_reg
536    fetch_payload_reg(const elk::fs_builder &bld, uint8_t regs[2],
537                      elk_reg_type type = ELK_REGISTER_TYPE_F,
538                      unsigned n = 1);
539 
540    elk_fs_reg
541    fetch_barycentric_reg(const elk::fs_builder &bld, uint8_t regs[2]);
542 
543    inline elk_fs_reg
dynamic_msaa_flags(const struct elk_wm_prog_data * wm_prog_data)544    dynamic_msaa_flags(const struct elk_wm_prog_data *wm_prog_data)
545    {
546       return elk_fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
547                     ELK_REGISTER_TYPE_UD);
548    }
549 
550    void
551    check_dynamic_msaa_flag(const fs_builder &bld,
552                            const struct elk_wm_prog_data *wm_prog_data,
553                            enum intel_msaa_flags flag);
554 
555    bool
556    lower_src_modifiers(elk_fs_visitor *v, elk_bblock_t *block, elk_fs_inst *inst, unsigned i);
557 }
558 
559 void elk_shuffle_from_32bit_read(const elk::fs_builder &bld,
560                              const elk_fs_reg &dst,
561                              const elk_fs_reg &src,
562                              uint32_t first_component,
563                              uint32_t components);
564 
565 elk_fs_reg elk_setup_imm_df(const elk::fs_builder &bld,
566                     double v);
567 
568 elk_fs_reg elk_setup_imm_b(const elk::fs_builder &bld,
569                    int8_t v);
570 
571 elk_fs_reg elk_setup_imm_ub(const elk::fs_builder &bld,
572                    uint8_t v);
573 
574 enum elk_barycentric_mode elk_barycentric_mode(nir_intrinsic_instr *intr);
575 
576 uint32_t elk_fb_write_msg_control(const elk_fs_inst *inst,
577                                   const struct elk_wm_prog_data *prog_data);
578 
579 void elk_compute_urb_setup_index(struct elk_wm_prog_data *wm_prog_data);
580 
581 bool elk_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
582 
583 elk_fs_reg elk_sample_mask_reg(const elk::fs_builder &bld);
584 void elk_emit_predicate_on_sample_mask(const elk::fs_builder &bld, elk_fs_inst *inst);
585 
586 int elk_get_subgroup_id_param_index(const intel_device_info *devinfo,
587                                     const elk_stage_prog_data *prog_data);
588 
589 void nir_to_elk(elk_fs_visitor *s);
590