• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #ifndef BRW_FS_H
29 #define BRW_FS_H
30 
31 #include "brw_shader.h"
32 #include "brw_ir_fs.h"
33 #include "brw_fs_builder.h"
34 #include "brw_fs_live_variables.h"
35 #include "brw_ir_performance.h"
36 #include "compiler/nir/nir.h"
37 
38 struct bblock_t;
39 namespace {
40    struct acp_entry;
41 }
42 
43 class fs_visitor;
44 
45 namespace brw {
46    /**
47     * Register pressure analysis of a shader.  Estimates how many registers
48     * are live at any point of the program in GRF units.
49     */
50    struct register_pressure {
51       register_pressure(const fs_visitor *v);
52       ~register_pressure();
53 
54       analysis_dependency_class
dependency_classregister_pressure55       dependency_class() const
56       {
57          return (DEPENDENCY_INSTRUCTION_IDENTITY |
58                  DEPENDENCY_INSTRUCTION_DATA_FLOW |
59                  DEPENDENCY_VARIABLES);
60       }
61 
62       bool
validateregister_pressure63       validate(const fs_visitor *) const
64       {
65          /* FINISHME */
66          return true;
67       }
68 
69       unsigned *regs_live_at_ip;
70    };
71 }
72 
73 struct brw_gs_compile;
74 
75 static inline fs_reg
offset(const fs_reg & reg,const brw::fs_builder & bld,unsigned delta)76 offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
77 {
78    return offset(reg, bld.dispatch_width(), delta);
79 }
80 
81 struct shader_stats {
82    const char *scheduler_mode;
83    unsigned promoted_constants;
84    unsigned spill_count;
85    unsigned fill_count;
86 };
87 
88 /**
89  * The fragment shader front-end.
90  *
91  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
92  */
93 class fs_visitor : public backend_shader
94 {
95 public:
96    fs_visitor(const struct brw_compiler *compiler, void *log_data,
97               void *mem_ctx,
98               const brw_base_prog_key *key,
99               struct brw_stage_prog_data *prog_data,
100               const nir_shader *shader,
101               unsigned dispatch_width,
102               bool debug_enabled);
103    fs_visitor(const struct brw_compiler *compiler, void *log_data,
104               void *mem_ctx,
105               struct brw_gs_compile *gs_compile,
106               struct brw_gs_prog_data *prog_data,
107               const nir_shader *shader,
108               bool debug_enabled);
109    void init();
110    ~fs_visitor();
111 
112    fs_reg vgrf(const glsl_type *const type);
113    void import_uniforms(fs_visitor *v);
114 
115    void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
116                                    const fs_reg &dst,
117                                    const fs_reg &surf_index,
118                                    const fs_reg &varying_offset,
119                                    uint32_t const_offset,
120                                    uint8_t alignment);
121    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
122 
123    bool run_fs(bool allow_spilling, bool do_rep_send);
124    bool run_vs();
125    bool run_tcs();
126    bool run_tes();
127    bool run_gs();
128    bool run_cs(bool allow_spilling);
129    bool run_bs(bool allow_spilling);
130    bool run_task(bool allow_spilling);
131    bool run_mesh(bool allow_spilling);
132    void optimize();
133    void allocate_registers(bool allow_spilling);
134    void setup_fs_payload_gfx4();
135    void setup_fs_payload_gfx6();
136    void setup_vs_payload();
137    void setup_gs_payload();
138    void setup_cs_payload();
139    bool fixup_sends_duplicate_payload();
140    void fixup_3src_null_dest();
141    void emit_dummy_memory_fence_before_eot();
142    bool fixup_nomask_control_flow();
143    void assign_curb_setup();
144    void assign_urb_setup();
145    void convert_attr_sources_to_hw_regs(fs_inst *inst);
146    void assign_vs_urb_setup();
147    void assign_tcs_urb_setup();
148    void assign_tes_urb_setup();
149    void assign_gs_urb_setup();
150    bool assign_regs(bool allow_spilling, bool spill_all);
151    void assign_regs_trivial();
152    void calculate_payload_ranges(int payload_node_count,
153                                  int *payload_last_use_ip) const;
154    bool split_virtual_grfs();
155    bool compact_virtual_grfs();
156    void assign_constant_locations();
157    bool get_pull_locs(const fs_reg &src, unsigned *out_surf_index,
158                       unsigned *out_pull_index);
159    void lower_constant_loads();
160    virtual void invalidate_analysis(brw::analysis_dependency_class c);
161    void validate();
162    bool opt_algebraic();
163    bool opt_redundant_halt();
164    bool opt_cse();
165    bool opt_cse_local(const brw::fs_live_variables &live, bblock_t *block, int &ip);
166 
167    bool opt_copy_propagation();
168    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
169    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
170    bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
171                                    exec_list *acp);
172    bool opt_drop_redundant_mov_to_flags();
173    bool opt_register_renaming();
174    bool opt_bank_conflicts();
175    bool opt_split_sends();
176    bool register_coalesce();
177    bool compute_to_mrf();
178    bool eliminate_find_live_channel();
179    bool dead_code_eliminate();
180    bool remove_duplicate_mrf_writes();
181    bool remove_extra_rounding_modes();
182 
183    void schedule_instructions(instruction_scheduler_mode mode);
184    void insert_gfx4_send_dependency_workarounds();
185    void insert_gfx4_pre_send_dependency_workarounds(bblock_t *block,
186                                                     fs_inst *inst);
187    void insert_gfx4_post_send_dependency_workarounds(bblock_t *block,
188                                                      fs_inst *inst);
189    void vfail(const char *msg, va_list args);
190    void fail(const char *msg, ...);
191    void limit_dispatch_width(unsigned n, const char *msg);
192    void lower_uniform_pull_constant_loads();
193    bool lower_load_payload();
194    bool lower_pack();
195    bool lower_regioning();
196    bool lower_logical_sends();
197    bool lower_integer_multiplication();
198    bool lower_minmax();
199    bool lower_simd_width();
200    bool lower_barycentrics();
201    bool lower_derivatives();
202    bool lower_find_live_channel();
203    bool lower_scoreboard();
204    bool lower_sub_sat();
205    bool opt_combine_constants();
206 
207    void emit_dummy_fs();
208    void emit_repclear_shader();
209    void emit_fragcoord_interpolation(fs_reg wpos);
210    void emit_is_helper_invocation(fs_reg result);
211    fs_reg emit_frontfacing_interpolation();
212    fs_reg emit_samplepos_setup();
213    fs_reg emit_sampleid_setup();
214    fs_reg emit_samplemaskin_setup();
215    fs_reg emit_shading_rate_setup();
216    void emit_interpolation_setup_gfx4();
217    void emit_interpolation_setup_gfx6();
218    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
219                          const fs_reg &texture,
220                          const fs_reg &texture_handle);
221    fs_reg resolve_source_modifiers(const fs_reg &src);
222    void emit_fsign(const class brw::fs_builder &, const nir_alu_instr *instr,
223                    fs_reg result, fs_reg *op, unsigned fsign_src);
224    void emit_shader_float_controls_execution_mode();
225    bool opt_peephole_sel();
226    bool opt_peephole_predicated_break();
227    bool opt_saturate_propagation();
228    bool opt_cmod_propagation();
229    bool opt_zero_samples();
230 
231    void set_tcs_invocation_id();
232 
233    void emit_nir_code();
234    void nir_setup_outputs();
235    void nir_setup_uniforms();
236    void nir_emit_system_values();
237    void nir_emit_impl(nir_function_impl *impl);
238    void nir_emit_cf_list(exec_list *list);
239    void nir_emit_if(nir_if *if_stmt);
240    void nir_emit_loop(nir_loop *loop);
241    void nir_emit_block(nir_block *block);
242    void nir_emit_instr(nir_instr *instr);
243    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr,
244                      bool need_dest);
245    bool try_emit_b2fi_of_inot(const brw::fs_builder &bld, fs_reg result,
246                               nir_alu_instr *instr);
247    void nir_emit_load_const(const brw::fs_builder &bld,
248                             nir_load_const_instr *instr);
249    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
250                               nir_intrinsic_instr *instr);
251    void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
252                                nir_intrinsic_instr *instr);
253    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
254                               nir_intrinsic_instr *instr);
255    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
256                               nir_intrinsic_instr *instr);
257    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
258                               nir_intrinsic_instr *instr);
259    void nir_emit_bs_intrinsic(const brw::fs_builder &bld,
260                               nir_intrinsic_instr *instr);
261    void nir_emit_task_intrinsic(const brw::fs_builder &bld,
262                                 nir_intrinsic_instr *instr);
263    void nir_emit_mesh_intrinsic(const brw::fs_builder &bld,
264                                 nir_intrinsic_instr *instr);
265    void nir_emit_task_mesh_intrinsic(const brw::fs_builder &bld,
266                                      nir_intrinsic_instr *instr);
267    fs_reg get_nir_image_intrinsic_image(const brw::fs_builder &bld,
268                                         nir_intrinsic_instr *instr);
269    fs_reg get_nir_ssbo_intrinsic_index(const brw::fs_builder &bld,
270                                        nir_intrinsic_instr *instr);
271    fs_reg swizzle_nir_scratch_addr(const brw::fs_builder &bld,
272                                    const fs_reg &addr,
273                                    bool in_dwords);
274    void nir_emit_intrinsic(const brw::fs_builder &bld,
275                            nir_intrinsic_instr *instr);
276    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
277                                nir_intrinsic_instr *instr);
278    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
279                              int op, nir_intrinsic_instr *instr);
280    void nir_emit_ssbo_atomic_float(const brw::fs_builder &bld,
281                                    int op, nir_intrinsic_instr *instr);
282    void nir_emit_shared_atomic(const brw::fs_builder &bld,
283                                int op, nir_intrinsic_instr *instr);
284    void nir_emit_shared_atomic_float(const brw::fs_builder &bld,
285                                      int op, nir_intrinsic_instr *instr);
286    void nir_emit_global_atomic(const brw::fs_builder &bld,
287                                int op, nir_intrinsic_instr *instr);
288    void nir_emit_global_atomic_float(const brw::fs_builder &bld,
289                                      int op, nir_intrinsic_instr *instr);
290    void nir_emit_texture(const brw::fs_builder &bld,
291                          nir_tex_instr *instr);
292    void nir_emit_jump(const brw::fs_builder &bld,
293                       nir_jump_instr *instr);
294    fs_reg get_nir_src(const nir_src &src);
295    fs_reg get_nir_src_imm(const nir_src &src);
296    fs_reg get_nir_dest(const nir_dest &dest);
297    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
298    fs_reg get_tcs_single_patch_icp_handle(const brw::fs_builder &bld,
299                                           nir_intrinsic_instr *instr);
300    fs_reg get_tcs_eight_patch_icp_handle(const brw::fs_builder &bld,
301                                          nir_intrinsic_instr *instr);
302    struct brw_reg get_tcs_output_urb_handle();
303 
304    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
305                      unsigned wr_mask);
306 
307    bool optimize_extract_to_float(nir_alu_instr *instr,
308                                   const fs_reg &result);
309    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
310                                      const fs_reg &result);
311 
312    void emit_alpha_test();
313    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
314                                  fs_reg color1, fs_reg color2,
315                                  fs_reg src0_alpha, unsigned components);
316    void emit_fb_writes();
317    fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
318                                       const fs_reg &dst, unsigned target);
319    void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
320    void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
321                                         unsigned stream_id);
322    void emit_gs_control_data_bits(const fs_reg &vertex_count);
323    void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
324    void emit_gs_vertex(const nir_src &vertex_count_nir_src,
325                        unsigned stream_id);
326    void emit_gs_thread_end();
327    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
328                            unsigned base_offset, const nir_src &offset_src,
329                            unsigned num_components, unsigned first_component);
330    void emit_urb_fence();
331    void emit_cs_terminate();
332    fs_reg emit_work_group_id_setup();
333 
334    void emit_task_mesh_store(const brw::fs_builder &bld,
335                              nir_intrinsic_instr *instr);
336    void emit_task_mesh_load(const brw::fs_builder &bld,
337                             nir_intrinsic_instr *instr);
338 
339    void emit_barrier();
340 
341    fs_reg get_timestamp(const brw::fs_builder &bld);
342 
343    fs_reg interp_reg(int location, int channel);
344    fs_reg per_primitive_reg(int location);
345 
346    virtual void dump_instructions() const;
347    virtual void dump_instructions(const char *name) const;
348    void dump_instruction(const backend_instruction *inst) const;
349    void dump_instruction(const backend_instruction *inst, FILE *file) const;
350 
351    const brw_base_prog_key *const key;
352    const struct brw_sampler_prog_key_data *key_tex;
353 
354    struct brw_gs_compile *gs_compile;
355 
356    struct brw_stage_prog_data *prog_data;
357 
358    brw_analysis<brw::fs_live_variables, backend_shader> live_analysis;
359    brw_analysis<brw::register_pressure, fs_visitor> regpressure_analysis;
360    brw_analysis<brw::performance, fs_visitor> performance_analysis;
361 
362    /** Number of uniform variable components visited. */
363    unsigned uniforms;
364 
365    /** Byte-offset for the next available spot in the scratch space buffer. */
366    unsigned last_scratch;
367 
368    /**
369     * Array mapping UNIFORM register numbers to the push parameter index,
370     * or -1 if this uniform register isn't being uploaded as a push constant.
371     */
372    int *push_constant_loc;
373 
374    fs_reg subgroup_id;
375    fs_reg group_size[3];
376    fs_reg scratch_base;
377    fs_reg frag_depth;
378    fs_reg frag_stencil;
379    fs_reg sample_mask;
380    fs_reg outputs[VARYING_SLOT_MAX];
381    fs_reg dual_src_output;
382    int first_non_payload_grf;
383    /** Either BRW_MAX_GRF or GFX7_MRF_HACK_START */
384    unsigned max_grf;
385 
386    fs_reg *nir_locals;
387    fs_reg *nir_ssa_values;
388    fs_reg *nir_system_values;
389 
390    bool failed;
391    char *fail_msg;
392 
393    /** Register numbers for thread payload fields. */
394    struct thread_payload {
395       uint8_t subspan_coord_reg[2];
396       uint8_t source_depth_reg[2];
397       uint8_t source_w_reg[2];
398       uint8_t aa_dest_stencil_reg[2];
399       uint8_t dest_depth_reg[2];
400       uint8_t sample_pos_reg[2];
401       uint8_t sample_mask_in_reg[2];
402       uint8_t depth_w_coef_reg[2];
403       uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
404       uint8_t local_invocation_id_reg[2];
405 
406       /** The number of thread payload registers the hardware will supply. */
407       uint8_t num_regs;
408    } payload;
409 
410    bool source_depth_to_render_target;
411    bool runtime_check_aads_emit;
412 
413    fs_reg pixel_x;
414    fs_reg pixel_y;
415    fs_reg pixel_z;
416    fs_reg wpos_w;
417    fs_reg pixel_w;
418    fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
419    fs_reg shader_start_time;
420    fs_reg final_gs_vertex_count;
421    fs_reg control_data_bits;
422    fs_reg invocation_id;
423 
424    unsigned grf_used;
425    bool spilled_any_registers;
426 
427    const unsigned dispatch_width; /**< 8, 16 or 32 */
428    unsigned max_dispatch_width;
429 
430    struct shader_stats shader_stats;
431 
432    brw::fs_builder bld;
433 
434 private:
435    fs_reg prepare_alu_destination_and_sources(const brw::fs_builder &bld,
436                                               nir_alu_instr *instr,
437                                               fs_reg *op,
438                                               bool need_dest);
439 
440    void resolve_inot_sources(const brw::fs_builder &bld, nir_alu_instr *instr,
441                              fs_reg *op);
442    void lower_mul_dword_inst(fs_inst *inst, bblock_t *block);
443    void lower_mul_qword_inst(fs_inst *inst, bblock_t *block);
444    void lower_mulh_inst(fs_inst *inst, bblock_t *block);
445 
446    unsigned workgroup_size() const;
447 };
448 
449 /**
450  * Return the flag register used in fragment shaders to keep track of live
451  * samples.  On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
452  * dispatch mode, while earlier generations are constrained to f0.1, which
453  * limits the dispatch width to SIMD16 for fragment shaders that use discard.
454  */
455 static inline unsigned
sample_mask_flag_subreg(const fs_visitor * shader)456 sample_mask_flag_subreg(const fs_visitor *shader)
457 {
458    assert(shader->stage == MESA_SHADER_FRAGMENT);
459    return shader->devinfo->ver >= 7 ? 2 : 1;
460 }
461 
462 /**
463  * The fragment shader code generator.
464  *
465  * Translates FS IR to actual i965 assembly code.
466  */
467 class fs_generator
468 {
469 public:
470    fs_generator(const struct brw_compiler *compiler, void *log_data,
471                 void *mem_ctx,
472                 struct brw_stage_prog_data *prog_data,
473                 bool runtime_check_aads_emit,
474                 gl_shader_stage stage);
475    ~fs_generator();
476 
477    void enable_debug(const char *shader_name);
478    int generate_code(const cfg_t *cfg, int dispatch_width,
479                      struct shader_stats shader_stats,
480                      const brw::performance &perf,
481                      struct brw_compile_stats *stats);
482    void add_const_data(void *data, unsigned size);
483    void add_resume_sbt(unsigned num_resume_shaders, uint64_t *sbt);
484    const unsigned *get_assembly();
485 
486 private:
487    void fire_fb_write(fs_inst *inst,
488                       struct brw_reg payload,
489                       struct brw_reg implied_header,
490                       GLuint nr);
491    void generate_send(fs_inst *inst,
492                       struct brw_reg dst,
493                       struct brw_reg desc,
494                       struct brw_reg ex_desc,
495                       struct brw_reg payload,
496                       struct brw_reg payload2);
497    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
498    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
499                          struct brw_reg payload);
500    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
501    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
502    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
503    void generate_barrier(fs_inst *inst, struct brw_reg src);
504    bool generate_linterp(fs_inst *inst, struct brw_reg dst,
505 			 struct brw_reg *src);
506    void generate_tex(fs_inst *inst, struct brw_reg dst,
507                      struct brw_reg surface_index,
508                      struct brw_reg sampler_index);
509    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
510                                  struct brw_reg src,
511                                  struct brw_reg surf_index);
512    void generate_ddx(const fs_inst *inst,
513                      struct brw_reg dst, struct brw_reg src);
514    void generate_ddy(const fs_inst *inst,
515                      struct brw_reg dst, struct brw_reg src);
516    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
517    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
518    void generate_scratch_read_gfx7(fs_inst *inst, struct brw_reg dst);
519    void generate_scratch_header(fs_inst *inst, struct brw_reg dst);
520    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
521                                             struct brw_reg index,
522                                             struct brw_reg offset);
523    void generate_uniform_pull_constant_load_gfx7(fs_inst *inst,
524                                                  struct brw_reg dst,
525                                                  struct brw_reg surf_index,
526                                                  struct brw_reg payload);
527    void generate_varying_pull_constant_load_gfx4(fs_inst *inst,
528                                                  struct brw_reg dst,
529                                                  struct brw_reg index);
530    void generate_mov_dispatch_to_flags(fs_inst *inst);
531 
532    void generate_pixel_interpolator_query(fs_inst *inst,
533                                           struct brw_reg dst,
534                                           struct brw_reg src,
535                                           struct brw_reg msg_data,
536                                           unsigned msg_type);
537 
538    void generate_set_sample_id(fs_inst *inst,
539                                struct brw_reg dst,
540                                struct brw_reg src0,
541                                struct brw_reg src1);
542 
543    void generate_halt(fs_inst *inst);
544 
545    void generate_pack_half_2x16_split(fs_inst *inst,
546                                       struct brw_reg dst,
547                                       struct brw_reg x,
548                                       struct brw_reg y);
549 
550    void generate_mov_indirect(fs_inst *inst,
551                               struct brw_reg dst,
552                               struct brw_reg reg,
553                               struct brw_reg indirect_byte_offset);
554 
555    void generate_shuffle(fs_inst *inst,
556                          struct brw_reg dst,
557                          struct brw_reg src,
558                          struct brw_reg idx);
559 
560    void generate_quad_swizzle(const fs_inst *inst,
561                               struct brw_reg dst, struct brw_reg src,
562                               unsigned swiz);
563 
564    bool patch_halt_jumps();
565 
566    const struct brw_compiler *compiler;
567    void *log_data; /* Passed to compiler->*_log functions */
568 
569    const struct intel_device_info *devinfo;
570 
571    struct brw_codegen *p;
572    struct brw_stage_prog_data * const prog_data;
573 
574    unsigned dispatch_width; /**< 8, 16 or 32 */
575 
576    exec_list discard_halt_patches;
577    bool runtime_check_aads_emit;
578    bool debug_flag;
579    const char *shader_name;
580    gl_shader_stage stage;
581    void *mem_ctx;
582 };
583 
584 namespace brw {
585    inline fs_reg
586    fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
587                      brw_reg_type type = BRW_REGISTER_TYPE_F)
588    {
589       if (!regs[0])
590          return fs_reg();
591 
592       if (bld.dispatch_width() > 16) {
593          const fs_reg tmp = bld.vgrf(type);
594          const brw::fs_builder hbld = bld.exec_all().group(16, 0);
595          const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
596          fs_reg components[2];
597          assert(m <= 2);
598 
599          for (unsigned g = 0; g < m; g++)
600                components[g] = retype(brw_vec8_grf(regs[g], 0), type);
601 
602          hbld.LOAD_PAYLOAD(tmp, components, m, 0);
603 
604          return tmp;
605 
606       } else {
607          return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
608       }
609    }
610 
611    inline fs_reg
fetch_barycentric_reg(const brw::fs_builder & bld,uint8_t regs[2])612    fetch_barycentric_reg(const brw::fs_builder &bld, uint8_t regs[2])
613    {
614       if (!regs[0])
615          return fs_reg();
616 
617       const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
618       const brw::fs_builder hbld = bld.exec_all().group(8, 0);
619       const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
620       fs_reg *const components = new fs_reg[2 * m];
621 
622       for (unsigned c = 0; c < 2; c++) {
623          for (unsigned g = 0; g < m; g++)
624             components[c * m + g] = offset(brw_vec8_grf(regs[g / 2], 0),
625                                            hbld, c + 2 * (g % 2));
626       }
627 
628       hbld.LOAD_PAYLOAD(tmp, components, 2 * m, 0);
629 
630       delete[] components;
631       return tmp;
632    }
633 
634    bool
635    lower_src_modifiers(fs_visitor *v, bblock_t *block, fs_inst *inst, unsigned i);
636 }
637 
638 void shuffle_from_32bit_read(const brw::fs_builder &bld,
639                              const fs_reg &dst,
640                              const fs_reg &src,
641                              uint32_t first_component,
642                              uint32_t components);
643 
644 fs_reg setup_imm_df(const brw::fs_builder &bld,
645                     double v);
646 
647 fs_reg setup_imm_b(const brw::fs_builder &bld,
648                    int8_t v);
649 
650 fs_reg setup_imm_ub(const brw::fs_builder &bld,
651                    uint8_t v);
652 
653 enum brw_barycentric_mode brw_barycentric_mode(nir_intrinsic_instr *intr);
654 
655 uint32_t brw_fb_write_msg_control(const fs_inst *inst,
656                                   const struct brw_wm_prog_data *prog_data);
657 
658 void brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data);
659 
660 bool brw_nir_lower_simd(nir_shader *nir, unsigned dispatch_width);
661 
662 namespace brw {
663    class fs_builder;
664 }
665 
666 fs_reg brw_sample_mask_reg(const brw::fs_builder &bld);
667 void brw_emit_predicate_on_sample_mask(const brw::fs_builder &bld, fs_inst *inst);
668 
669 #endif /* BRW_FS_H */
670