• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #ifndef BRW_FS_H
29 #define BRW_FS_H
30 
31 #include "brw_shader.h"
32 #include "brw_ir_fs.h"
33 #include "brw_fs_live_variables.h"
34 #include "brw_ir_performance.h"
35 #include "compiler/nir/nir.h"
36 
37 struct bblock_t;
38 namespace {
39    struct acp_entry;
40 }
41 
42 class fs_visitor;
43 
44 namespace brw {
45    /**
46     * Register pressure analysis of a shader.  Estimates how many registers
47     * are live at any point of the program in GRF units.
48     */
49    struct register_pressure {
50       register_pressure(const fs_visitor *v);
51       ~register_pressure();
52 
53       analysis_dependency_class
dependency_classregister_pressure54       dependency_class() const
55       {
56          return (DEPENDENCY_INSTRUCTION_IDENTITY |
57                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
58                  DEPENDENCY_VARIABLES);
59       }
60 
61       bool
validateregister_pressure62       validate(const fs_visitor *) const
63       {
64          /* FINISHME */
65          return true;
66       }
67 
68       unsigned *regs_live_at_ip;
69    };
70 }
71 
72 struct brw_gs_compile;
73 
74 namespace brw {
75 class fs_builder;
76 }
77 
78 struct shader_stats {
79    const char *scheduler_mode;
80    unsigned promoted_constants;
81    unsigned spill_count;
82    unsigned fill_count;
83    unsigned max_register_pressure;
84 };
85 
86 /** Register numbers for thread payload fields. */
87 struct thread_payload {
88    /** The number of thread payload registers the hardware will supply. */
89    uint8_t num_regs;
90 
91    virtual ~thread_payload() = default;
92 
93 protected:
thread_payloadthread_payload94    thread_payload() : num_regs() {}
95 };
96 
97 struct vs_thread_payload : public thread_payload {
98    vs_thread_payload(const fs_visitor &v);
99 
100    fs_reg urb_handles;
101 };
102 
103 struct tcs_thread_payload : public thread_payload {
104    tcs_thread_payload(const fs_visitor &v);
105 
106    fs_reg patch_urb_output;
107    fs_reg primitive_id;
108    fs_reg icp_handle_start;
109 };
110 
111 struct tes_thread_payload : public thread_payload {
112    tes_thread_payload(const fs_visitor &v);
113 
114    fs_reg patch_urb_input;
115    fs_reg primitive_id;
116    fs_reg coords[3];
117    fs_reg urb_output;
118 };
119 
120 struct gs_thread_payload : public thread_payload {
121    gs_thread_payload(fs_visitor &v);
122 
123    fs_reg urb_handles;
124    fs_reg primitive_id;
125    fs_reg instance_id;
126    fs_reg icp_handle_start;
127 };
128 
129 struct fs_thread_payload : public thread_payload {
130    fs_thread_payload(const fs_visitor &v,
131                      bool &source_depth_to_render_target);
132 
133    uint8_t subspan_coord_reg[2];
134    uint8_t source_depth_reg[2];
135    uint8_t source_w_reg[2];
136    uint8_t aa_dest_stencil_reg[2];
137    uint8_t dest_depth_reg[2];
138    uint8_t sample_pos_reg[2];
139    uint8_t sample_mask_in_reg[2];
140    uint8_t depth_w_coef_reg;
141    uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
142 };
143 
144 struct cs_thread_payload : public thread_payload {
145    cs_thread_payload(const fs_visitor &v);
146 
147    void load_subgroup_id(const brw::fs_builder &bld, fs_reg &dest) const;
148 
149    fs_reg local_invocation_id[3];
150 
151 protected:
152    fs_reg subgroup_id_;
153 };
154 
155 struct task_mesh_thread_payload : public cs_thread_payload {
156    task_mesh_thread_payload(fs_visitor &v);
157 
158    fs_reg extended_parameter_0;
159    fs_reg local_index;
160    fs_reg inline_parameter;
161 
162    fs_reg urb_output;
163 
164    /* URB to read Task memory inputs. Only valid for MESH stage. */
165    fs_reg task_urb_input;
166 };
167 
168 struct bs_thread_payload : public thread_payload {
169    bs_thread_payload(const fs_visitor &v);
170 
171    fs_reg global_arg_ptr;
172    fs_reg local_arg_ptr;
173 
174    void load_shader_type(const brw::fs_builder &bld, fs_reg &dest) const;
175 };
176 
177 class fs_instruction_scheduler;
178 
179 /**
180  * The fragment shader front-end.
181  *
182  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
183  */
184 class fs_visitor : public backend_shader
185 {
186 public:
187    fs_visitor(const struct brw_compiler *compiler,
188               const struct brw_compile_params *params,
189               const brw_base_prog_key *key,
190               struct brw_stage_prog_data *prog_data,
191               const nir_shader *shader,
192               unsigned dispatch_width,
193               bool needs_register_pressure,
194               bool debug_enabled);
195    fs_visitor(const struct brw_compiler *compiler,
196               const struct brw_compile_params *params,
197               const brw_wm_prog_key *key,
198               struct brw_wm_prog_data *prog_data,
199               const nir_shader *shader,
200               unsigned dispatch_width,
201               unsigned num_polygons,
202               bool needs_register_pressure,
203               bool debug_enabled);
204    fs_visitor(const struct brw_compiler *compiler,
205               const struct brw_compile_params *params,
206               struct brw_gs_compile *gs_compile,
207               struct brw_gs_prog_data *prog_data,
208               const nir_shader *shader,
209               bool needs_register_pressure,
210               bool debug_enabled);
211    void init();
212    ~fs_visitor();
213 
214    fs_reg vgrf(const glsl_type *const type);
215    void import_uniforms(fs_visitor *v);
216 
217    void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
218                                    const fs_reg &dst,
219                                    const fs_reg &surface,
220                                    const fs_reg &surface_handle,
221                                    const fs_reg &varying_offset,
222                                    uint32_t const_offset,
223                                    uint8_t alignment,
224                                    unsigned components);
225 
226    bool run_fs(bool allow_spilling, bool do_rep_send);
227    bool run_vs();
228    bool run_tcs();
229    bool run_tes();
230    bool run_gs();
231    bool run_cs(bool allow_spilling);
232    bool run_bs(bool allow_spilling);
233    bool run_task(bool allow_spilling);
234    bool run_mesh(bool allow_spilling);
235    void allocate_registers(bool allow_spilling);
236    uint32_t compute_max_register_pressure();
237    void assign_curb_setup();
238    void assign_urb_setup();
239    void convert_attr_sources_to_hw_regs(fs_inst *inst);
240    void assign_vs_urb_setup();
241    void assign_tcs_urb_setup();
242    void assign_tes_urb_setup();
243    void assign_gs_urb_setup();
244    bool assign_regs(bool allow_spilling, bool spill_all);
245    void assign_regs_trivial();
246    void calculate_payload_ranges(unsigned payload_node_count,
247                                  int *payload_last_use_ip) const;
248    void assign_constant_locations();
249    bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
250                       unsigned *out_pull_index);
251    virtual void invalidate_analysis(brw::analysis_dependency_class c);
252 
253 #ifndef NDEBUG
254    void validate();
255 #else
validate()256    void validate() {}
257 #endif
258 
259    fs_instruction_scheduler *prepare_scheduler(void *mem_ctx);
260    void schedule_instructions_pre_ra(fs_instruction_scheduler *sched,
261                                      instruction_scheduler_mode mode);
262    void schedule_instructions_post_ra();
263 
264    void vfail(const char *msg, va_list args);
265    void fail(const char *msg, ...);
266    void limit_dispatch_width(unsigned n, const char *msg);
267 
268    void emit_repclear_shader();
269    void emit_interpolation_setup();
270 
271    void set_tcs_invocation_id();
272 
273    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
274                                  fs_reg color1, fs_reg color2,
275                                  fs_reg src0_alpha, unsigned components);
276    void do_emit_fb_writes(int nr_color_regions, bool replicate_alpha);
277    void emit_fb_writes();
278    void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
279    void emit_gs_control_data_bits(const fs_reg &vertex_count);
280    void emit_gs_thread_end();
281    bool mark_last_urb_write_with_eot();
282    void emit_tcs_thread_end();
283    void emit_urb_fence();
284    void emit_cs_terminate();
285 
286    fs_reg interp_reg(const brw::fs_builder &bld, unsigned location,
287                      unsigned channel, unsigned comp);
288    fs_reg per_primitive_reg(const brw::fs_builder &bld,
289                             int location, unsigned comp);
290 
291    virtual void dump_instruction_to_file(const backend_instruction *inst, FILE *file) const;
292    virtual void dump_instructions_to_file(FILE *file) const;
293 
294    const brw_base_prog_key *const key;
295 
296    struct brw_gs_compile *gs_compile;
297 
298    struct brw_stage_prog_data *prog_data;
299 
300    brw_analysis<brw::fs_live_variables, backend_shader> live_analysis;
301    brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
302    brw_analysis<brw::performance, fs_visitor> performance_analysis;
303 
304    /** Number of uniform variable components visited. */
305    unsigned uniforms;
306 
307    /** Byte-offset for the next available spot in the scratch space buffer. */
308    unsigned last_scratch;
309 
310    /**
311     * Array mapping UNIFORM register numbers to the push parameter index,
312     * or -1 if this uniform register isn't being uploaded as a push constant.
313     */
314    int *push_constant_loc;
315 
316    fs_reg frag_depth;
317    fs_reg frag_stencil;
318    fs_reg sample_mask;
319    fs_reg outputs[VARYING_SLOT_MAX];
320    fs_reg dual_src_output;
321    int first_non_payload_grf;
322 
323    bool failed;
324    char *fail_msg;
325 
326    thread_payload *payload_;
327 
payload()328    thread_payload &payload() {
329       return *this->payload_;
330    }
331 
vs_payload()332    vs_thread_payload &vs_payload() {
333       assert(stage == MESA_SHADER_VERTEX);
334       return *static_cast<vs_thread_payload *>(this->payload_);
335    }
336 
tcs_payload()337    tcs_thread_payload &tcs_payload() {
338       assert(stage == MESA_SHADER_TESS_CTRL);
339       return *static_cast<tcs_thread_payload *>(this->payload_);
340    }
341 
tes_payload()342    tes_thread_payload &tes_payload() {
343       assert(stage == MESA_SHADER_TESS_EVAL);
344       return *static_cast<tes_thread_payload *>(this->payload_);
345    }
346 
gs_payload()347    gs_thread_payload &gs_payload() {
348       assert(stage == MESA_SHADER_GEOMETRY);
349       return *static_cast<gs_thread_payload *>(this->payload_);
350    }
351 
fs_payload()352    fs_thread_payload &fs_payload() {
353       assert(stage == MESA_SHADER_FRAGMENT);
354       return *static_cast<fs_thread_payload *>(this->payload_);
355    };
356 
cs_payload()357    cs_thread_payload &cs_payload() {
358       assert(gl_shader_stage_uses_workgroup(stage));
359       return *static_cast<cs_thread_payload *>(this->payload_);
360    }
361 
task_mesh_payload()362    task_mesh_thread_payload &task_mesh_payload() {
363       assert(stage == MESA_SHADER_TASK || stage == MESA_SHADER_MESH);
364       return *static_cast<task_mesh_thread_payload *>(this->payload_);
365    }
366 
bs_payload()367    bs_thread_payload &bs_payload() {
368       assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
369       return *static_cast<bs_thread_payload *>(this->payload_);
370    }
371 
372    bool source_depth_to_render_target;
373 
374    fs_reg pixel_x;
375    fs_reg pixel_y;
376    fs_reg pixel_z;
377    fs_reg wpos_w;
378    fs_reg pixel_w;
379    fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
380    fs_reg final_gs_vertex_count;
381    fs_reg control_data_bits;
382    fs_reg invocation_id;
383 
384    unsigned grf_used;
385    bool spilled_any_registers;
386    bool needs_register_pressure;
387 
388    const unsigned dispatch_width; /**< 8, 16 or 32 */
389    const unsigned max_polygons;
390    unsigned max_dispatch_width;
391 
392    /* The API selected subgroup size */
393    unsigned api_subgroup_size; /**< 0, 8, 16, 32 */
394 
395    struct shader_stats shader_stats;
396 
397    unsigned workgroup_size() const;
398 
399    void debug_optimizer(const nir_shader *nir,
400                         const char *pass_name,
401                         int iteration, int pass_num) const;
402 };
403 
404 /**
405  * Return the flag register used in fragment shaders to keep track of live
406  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
407  * dispatch mode.
408  */
409 static inline unsigned
sample_mask_flag_subreg(const fs_visitor & s)410 sample_mask_flag_subreg(const fs_visitor &s)
411 {
412    assert(s.stage == MESA_SHADER_FRAGMENT);
413    return 2;
414 }
415 
416 /**
417  * The fragment shader code generator.
418  *
419  * Translates FS IR to actual i965 assembly code.
420  */
421 class fs_generator
422 {
423 public:
424    fs_generator(const struct brw_compiler *compiler,
425                 const struct brw_compile_params *params,
426                 struct brw_stage_prog_data *prog_data,
427                 gl_shader_stage stage);
428    ~fs_generator();
429 
430    void enable_debug(const char *shader_name);
431    int generate_code(const cfg_t *cfg, int dispatch_width,
432                      struct shader_stats shader_stats,
433                      const brw::performance &perf,
434                      struct brw_compile_stats *stats,
435                      unsigned max_polygons = 0);
436    void add_const_data(void *data, unsigned size);
437    void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
438    const unsigned *get_assembly();
439 
440 private:
441    void generate_send(fs_inst *inst,
442                       struct brw_reg dst,
443                       struct brw_reg desc,
444                       struct brw_reg ex_desc,
445                       struct brw_reg payload,
446                       struct brw_reg payload2);
447    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
448                          struct brw_reg payload);
449    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
450    void generate_barrier(fs_inst *inst, struct brw_reg src);
451    bool generate_linterp(fs_inst *inst, struct brw_reg dst,
452 			 struct brw_reg *src);
453    void generate_ddx(const fs_inst *inst,
454                      struct brw_reg dst, struct brw_reg src);
455    void generate_ddy(const fs_inst *inst,
456                      struct brw_reg dst, struct brw_reg src);
457    void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
458 
459    void generate_halt(fs_inst *inst);
460 
461    void generate_mov_indirect(fs_inst *inst,
462                               struct brw_reg dst,
463                               struct brw_reg reg,
464                               struct brw_reg indirect_byte_offset);
465 
466    void generate_shuffle(fs_inst *inst,
467                          struct brw_reg dst,
468                          struct brw_reg src,
469                          struct brw_reg idx);
470 
471    void generate_quad_swizzle(const fs_inst *inst,
472                               struct brw_reg dst, struct brw_reg src,
473                               unsigned swiz);
474 
475    bool patch_halt_jumps();
476 
477    const struct brw_compiler *compiler;
478    const struct brw_compile_params *params;
479 
480    const struct intel_device_info *devinfo;
481 
482    struct brw_codegen *p;
483    struct brw_stage_prog_data * const prog_data;
484 
485    unsigned dispatch_width; /**< 8, 16 or 32 */
486 
487    exec_list discard_halt_patches;
488    bool debug_flag;
489    const char *shader_name;
490    gl_shader_stage stage;
491    void *mem_ctx;
492 };
493 
494 namespace brw {
495    fs_reg
496    fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
497                      brw_reg_type type = BRW_REGISTER_TYPE_F,
498                      unsigned n = 1);
499 
500    fs_reg
501    fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2]);
502 
503    inline fs_reg
dynamic_msaa_flags(const struct brw_wm_prog_data * wm_prog_data)504    dynamic_msaa_flags(const struct brw_wm_prog_data *wm_prog_data)
505    {
506       return fs_reg(UNIFORM, wm_prog_data->msaa_flags_param,
507                     BRW_REGISTER_TYPE_UD);
508    }
509 
510    void
511    check_dynamic_msaa_flag(const fs_builder &bld,
512                            const struct brw_wm_prog_data *wm_prog_data,
513                            enum intel_msaa_flags flag);
514 
515    bool
516    lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
517 }
518 
519 void shuffle_from_32bit_read(const brw::fs_builder &bld,
520                              const fs_reg &dst,
521                              const fs_reg &src,
522                              uint32_t first_component,
523                              uint32_t components);
524 
525 enum brw_barycentric_mode brw_barycentric_mode(nir_intrinsic_instr *intr);
526 
527 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
528                                   const struct brw_wm_prog_data *prog_data);
529 
530 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
531 
532 bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
533 
534 fs_reg brw_sample_mask_reg(const brw::fs_builder &bld);
535 void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst);
536 
537 int brw_get_subgroup_id_param_index(const intel_device_info *devinfo,
538                                     const brw_stage_prog_data *prog_data);
539 
540 bool brw_lower_dpas(fs_visitor &v);
541 
542 void nir_to_brw(fs_visitor *s);
543 
544 void brw_fs_optimize(fs_visitor &s);
545 
546 bool brw_fs_lower_3src_null_dest(fs_visitor &s);
547 bool brw_fs_lower_barycentrics(fs_visitor &s);
548 bool brw_fs_lower_constant_loads(fs_visitor &s);
549 bool brw_fs_lower_derivatives(fs_visitor &s);
550 bool brw_fs_lower_find_live_channel(fs_visitor &s);
551 bool brw_fs_lower_integer_multiplication(fs_visitor &s);
552 bool brw_fs_lower_logical_sends(fs_visitor &s);
553 bool brw_fs_lower_pack(fs_visitor &s);
554 bool brw_fs_lower_load_payload(fs_visitor &s);
555 bool brw_fs_lower_regioning(fs_visitor &s);
556 bool brw_fs_lower_scoreboard(fs_visitor &s);
557 bool brw_fs_lower_sends_overlapping_payload(fs_visitor &s);
558 bool brw_fs_lower_simd_width(fs_visitor &s);
559 bool brw_fs_lower_sub_sat(fs_visitor &s);
560 bool brw_fs_lower_uniform_pull_constant_loads(fs_visitor &s);
561 
562 bool brw_fs_opt_algebraic(fs_visitor &s);
563 bool brw_fs_opt_bank_conflicts(fs_visitor &s);
564 bool brw_fs_opt_cmod_propagation(fs_visitor &s);
565 bool brw_fs_opt_combine_constants(fs_visitor &s);
566 bool brw_fs_opt_compact_virtual_grfs(fs_visitor &s);
567 bool brw_fs_opt_copy_propagation(fs_visitor &s);
568 bool brw_fs_opt_cse(fs_visitor &s);
569 bool brw_fs_opt_dead_code_eliminate(fs_visitor &s);
570 bool brw_fs_opt_eliminate_find_live_channel(fs_visitor &s);
571 bool brw_fs_opt_peephole_sel(fs_visitor &s);
572 bool brw_fs_opt_register_coalesce(fs_visitor &s);
573 bool brw_fs_opt_remove_extra_rounding_modes(fs_visitor &s);
574 bool brw_fs_opt_remove_redundant_halts(fs_visitor &s);
575 bool brw_fs_opt_saturate_propagation(fs_visitor &s);
576 bool brw_fs_opt_split_sends(fs_visitor &s);
577 bool brw_fs_opt_split_virtual_grfs(fs_visitor &s);
578 bool brw_fs_opt_zero_samples(fs_visitor &s);
579 
580 bool brw_fs_workaround_emit_dummy_mov_instruction(fs_visitor &s);
581 bool brw_fs_workaround_memory_fence_before_eot(fs_visitor &s);
582 bool brw_fs_workaround_nomask_control_flow(fs_visitor &s);
583 
584 /* Helpers. */
585 unsigned brw_fs_get_lowered_simd_width(const fs_visitor *shader,
586                                        const fs_inst *inst);
587 
588 #endif /* BRW_FS_H */
589