• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #pragma once
29 
30 #include "brw_shader.h"
31 #include "brw_ir_fs.h"
32 #include "brw_fs_builder.h"
33 #include "compiler/nir/nir.h"
34 
35 struct bblock_t;
36 namespace {
37    struct acp_entry;
38 }
39 
40 namespace brw {
41    class fs_live_variables;
42 }
43 
44 struct brw_gs_compile;
45 
46 static inline fs_reg
offset(const fs_reg & reg,const brw::fs_builder & bld,unsigned delta)47 offset(const fs_reg &reg, const brw::fs_builder &bld, unsigned delta)
48 {
49    return offset(reg, bld.dispatch_width(), delta);
50 }
51 
52 /**
53  * The fragment shader front-end.
54  *
55  * Translates either GLSL IR or Mesa IR (for ARB_fragment_program) into FS IR.
56  */
57 class fs_visitor : public backend_shader
58 {
59 public:
60    fs_visitor(const struct brw_compiler *compiler, void *log_data,
61               void *mem_ctx,
62               const void *key,
63               struct brw_stage_prog_data *prog_data,
64               struct gl_program *prog,
65               const nir_shader *shader,
66               unsigned dispatch_width,
67               int shader_time_index,
68               const struct brw_vue_map *input_vue_map = NULL);
69    fs_visitor(const struct brw_compiler *compiler, void *log_data,
70               void *mem_ctx,
71               struct brw_gs_compile *gs_compile,
72               struct brw_gs_prog_data *prog_data,
73               const nir_shader *shader,
74               int shader_time_index);
75    void init();
76    ~fs_visitor();
77 
78    fs_reg vgrf(const glsl_type *const type);
79    void import_uniforms(fs_visitor *v);
80    void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
81    void compute_clip_distance(gl_clip_plane *clip_planes);
82 
83    fs_inst *get_instruction_generating_reg(fs_inst *start,
84 					   fs_inst *end,
85 					   const fs_reg &reg);
86 
87    void VARYING_PULL_CONSTANT_LOAD(const brw::fs_builder &bld,
88                                    const fs_reg &dst,
89                                    const fs_reg &surf_index,
90                                    const fs_reg &varying_offset,
91                                    uint32_t const_offset);
92    void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
93 
94    bool run_fs(bool allow_spilling, bool do_rep_send);
95    bool run_vs(gl_clip_plane *clip_planes);
96    bool run_tcs_single_patch();
97    bool run_tes();
98    bool run_gs();
99    bool run_cs();
100    void optimize();
101    void allocate_registers(bool allow_spilling);
102    void setup_fs_payload_gen4();
103    void setup_fs_payload_gen6();
104    void setup_vs_payload();
105    void setup_gs_payload();
106    void setup_cs_payload();
107    void fixup_3src_null_dest();
108    void assign_curb_setup();
109    void calculate_urb_setup();
110    void assign_urb_setup();
111    void convert_attr_sources_to_hw_regs(fs_inst *inst);
112    void assign_vs_urb_setup();
113    void assign_tcs_single_patch_urb_setup();
114    void assign_tes_urb_setup();
115    void assign_gs_urb_setup();
116    bool assign_regs(bool allow_spilling, bool spill_all);
117    void assign_regs_trivial();
118    void calculate_payload_ranges(int payload_node_count,
119                                  int *payload_last_use_ip);
120    void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
121                                    int first_payload_node);
122    int choose_spill_reg(struct ra_graph *g);
123    void spill_reg(int spill_reg);
124    void split_virtual_grfs();
125    bool compact_virtual_grfs();
126    void assign_constant_locations();
127    void lower_constant_loads();
128    void invalidate_live_intervals();
129    void calculate_live_intervals();
130    void calculate_register_pressure();
131    void validate();
132    bool opt_algebraic();
133    bool opt_redundant_discard_jumps();
134    bool opt_cse();
135    bool opt_cse_local(bblock_t *block);
136    bool opt_copy_propagation();
137    bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
138    bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
139    bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
140                                    exec_list *acp);
141    bool opt_drop_redundant_mov_to_flags();
142    bool opt_register_renaming();
143    bool register_coalesce();
144    bool compute_to_mrf();
145    bool eliminate_find_live_channel();
146    bool dead_code_eliminate();
147    bool remove_duplicate_mrf_writes();
148 
149    bool opt_sampler_eot();
150    bool virtual_grf_interferes(int a, int b);
151    void schedule_instructions(instruction_scheduler_mode mode);
152    void insert_gen4_send_dependency_workarounds();
153    void insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
154                                                     fs_inst *inst);
155    void insert_gen4_post_send_dependency_workarounds(bblock_t *block,
156                                                      fs_inst *inst);
157    void vfail(const char *msg, va_list args);
158    void fail(const char *msg, ...);
159    void limit_dispatch_width(unsigned n, const char *msg);
160    void lower_uniform_pull_constant_loads();
161    bool lower_load_payload();
162    bool lower_pack();
163    bool lower_d2x();
164    bool lower_logical_sends();
165    bool lower_integer_multiplication();
166    bool lower_minmax();
167    bool lower_simd_width();
168    bool opt_combine_constants();
169 
170    void emit_dummy_fs();
171    void emit_repclear_shader();
172    void emit_fragcoord_interpolation(fs_reg wpos);
173    fs_reg *emit_frontfacing_interpolation();
174    fs_reg *emit_samplepos_setup();
175    fs_reg *emit_sampleid_setup();
176    fs_reg *emit_samplemaskin_setup();
177    fs_reg *emit_vs_system_value(int location);
178    void emit_interpolation_setup_gen4();
179    void emit_interpolation_setup_gen6();
180    void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
181    fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
182                          const fs_reg &sampler);
183    void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
184    fs_reg resolve_source_modifiers(const fs_reg &src);
185    void emit_discard_jump();
186    bool opt_peephole_sel();
187    bool opt_peephole_predicated_break();
188    bool opt_saturate_propagation();
189    bool opt_cmod_propagation();
190    bool opt_zero_samples();
191 
192    void emit_nir_code();
193    void nir_setup_outputs();
194    void nir_setup_uniforms();
195    void nir_emit_system_values();
196    void nir_emit_impl(nir_function_impl *impl);
197    void nir_emit_cf_list(exec_list *list);
198    void nir_emit_if(nir_if *if_stmt);
199    void nir_emit_loop(nir_loop *loop);
200    void nir_emit_block(nir_block *block);
201    void nir_emit_instr(nir_instr *instr);
202    void nir_emit_alu(const brw::fs_builder &bld, nir_alu_instr *instr);
203    void nir_emit_load_const(const brw::fs_builder &bld,
204                             nir_load_const_instr *instr);
205    void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
206                               nir_intrinsic_instr *instr);
207    void nir_emit_tcs_intrinsic(const brw::fs_builder &bld,
208                                nir_intrinsic_instr *instr);
209    void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
210                               nir_intrinsic_instr *instr);
211    void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
212                               nir_intrinsic_instr *instr);
213    void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
214                               nir_intrinsic_instr *instr);
215    void nir_emit_intrinsic(const brw::fs_builder &bld,
216                            nir_intrinsic_instr *instr);
217    void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
218                                nir_intrinsic_instr *instr);
219    void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
220                              int op, nir_intrinsic_instr *instr);
221    void nir_emit_shared_atomic(const brw::fs_builder &bld,
222                                int op, nir_intrinsic_instr *instr);
223    void nir_emit_texture(const brw::fs_builder &bld,
224                          nir_tex_instr *instr);
225    void nir_emit_jump(const brw::fs_builder &bld,
226                       nir_jump_instr *instr);
227    fs_reg get_nir_src(const nir_src &src);
228    fs_reg get_nir_src_imm(const nir_src &src);
229    fs_reg get_nir_dest(const nir_dest &dest);
230    fs_reg get_nir_image_deref(const nir_deref_var *deref);
231    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
232    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
233                      unsigned wr_mask);
234 
235    bool optimize_extract_to_float(nir_alu_instr *instr,
236                                   const fs_reg &result);
237    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
238                                      const fs_reg &result);
239 
240    void emit_alpha_test();
241    fs_inst *emit_single_fb_write(const brw::fs_builder &bld,
242                                  fs_reg color1, fs_reg color2,
243                                  fs_reg src0_alpha, unsigned components);
244    void emit_fb_writes();
245    fs_inst *emit_non_coherent_fb_read(const brw::fs_builder &bld,
246                                       const fs_reg &dst, unsigned target);
247    void emit_urb_writes(const fs_reg &gs_vertex_count = fs_reg());
248    void set_gs_stream_control_data_bits(const fs_reg &vertex_count,
249                                         unsigned stream_id);
250    void emit_gs_control_data_bits(const fs_reg &vertex_count);
251    void emit_gs_end_primitive(const nir_src &vertex_count_nir_src);
252    void emit_gs_vertex(const nir_src &vertex_count_nir_src,
253                        unsigned stream_id);
254    void emit_gs_thread_end();
255    void emit_gs_input_load(const fs_reg &dst, const nir_src &vertex_src,
256                            unsigned base_offset, const nir_src &offset_src,
257                            unsigned num_components, unsigned first_component);
258    void emit_cs_terminate();
259    fs_reg *emit_cs_work_group_id_setup();
260 
261    void emit_barrier();
262 
263    void emit_shader_time_begin();
264    void emit_shader_time_end();
265    void SHADER_TIME_ADD(const brw::fs_builder &bld,
266                         int shader_time_subindex,
267                         fs_reg value);
268 
269    fs_reg get_timestamp(const brw::fs_builder &bld);
270 
271    struct brw_reg interp_reg(int location, int channel);
272 
273    int implied_mrf_writes(fs_inst *inst);
274 
275    virtual void dump_instructions();
276    virtual void dump_instructions(const char *name);
277    void dump_instruction(backend_instruction *inst);
278    void dump_instruction(backend_instruction *inst, FILE *file);
279 
280    const void *const key;
281    const struct brw_sampler_prog_key_data *key_tex;
282 
283    struct brw_gs_compile *gs_compile;
284 
285    struct brw_stage_prog_data *prog_data;
286    struct gl_program *prog;
287 
288    const struct brw_vue_map *input_vue_map;
289 
290    int *virtual_grf_start;
291    int *virtual_grf_end;
292    brw::fs_live_variables *live_intervals;
293 
294    int *regs_live_at_ip;
295 
296    /** Number of uniform variable components visited. */
297    unsigned uniforms;
298 
299    /** Byte-offset for the next available spot in the scratch space buffer. */
300    unsigned last_scratch;
301 
302    /**
303     * Array mapping UNIFORM register numbers to the pull parameter index,
304     * or -1 if this uniform register isn't being uploaded as a pull constant.
305     */
306    int *pull_constant_loc;
307 
308    /**
309     * Array mapping UNIFORM register numbers to the push parameter index,
310     * or -1 if this uniform register isn't being uploaded as a push constant.
311     */
312    int *push_constant_loc;
313 
314    fs_reg frag_depth;
315    fs_reg frag_stencil;
316    fs_reg sample_mask;
317    fs_reg outputs[VARYING_SLOT_MAX];
318    fs_reg dual_src_output;
319    int first_non_payload_grf;
320    /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
321    unsigned max_grf;
322 
323    fs_reg *nir_locals;
324    fs_reg *nir_ssa_values;
325    fs_reg *nir_system_values;
326 
327    bool failed;
328    char *fail_msg;
329 
330    /** Register numbers for thread payload fields. */
331    struct thread_payload {
332       uint8_t source_depth_reg;
333       uint8_t source_w_reg;
334       uint8_t aa_dest_stencil_reg;
335       uint8_t dest_depth_reg;
336       uint8_t sample_pos_reg;
337       uint8_t sample_mask_in_reg;
338       uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT];
339       uint8_t local_invocation_id_reg;
340 
341       /** The number of thread payload registers the hardware will supply. */
342       uint8_t num_regs;
343    } payload;
344 
345    bool source_depth_to_render_target;
346    bool runtime_check_aads_emit;
347 
348    fs_reg pixel_x;
349    fs_reg pixel_y;
350    fs_reg wpos_w;
351    fs_reg pixel_w;
352    fs_reg delta_xy[BRW_BARYCENTRIC_MODE_COUNT];
353    fs_reg shader_start_time;
354    fs_reg userplane[MAX_CLIP_PLANES];
355    fs_reg final_gs_vertex_count;
356    fs_reg control_data_bits;
357    fs_reg invocation_id;
358 
359    unsigned grf_used;
360    bool spilled_any_registers;
361 
362    const unsigned dispatch_width; /**< 8, 16 or 32 */
363    unsigned min_dispatch_width;
364    unsigned max_dispatch_width;
365 
366    int shader_time_index;
367 
368    unsigned promoted_constants;
369    brw::fs_builder bld;
370 };
371 
372 /**
373  * The fragment shader code generator.
374  *
375  * Translates FS IR to actual i965 assembly code.
376  */
377 class fs_generator
378 {
379 public:
380    fs_generator(const struct brw_compiler *compiler, void *log_data,
381                 void *mem_ctx,
382                 const void *key,
383                 struct brw_stage_prog_data *prog_data,
384                 unsigned promoted_constants,
385                 bool runtime_check_aads_emit,
386                 gl_shader_stage stage);
387    ~fs_generator();
388 
389    void enable_debug(const char *shader_name);
390    int generate_code(const cfg_t *cfg, int dispatch_width);
391    const unsigned *get_assembly(unsigned int *assembly_size);
392 
393 private:
394    void fire_fb_write(fs_inst *inst,
395                       struct brw_reg payload,
396                       struct brw_reg implied_header,
397                       GLuint nr);
398    void generate_fb_write(fs_inst *inst, struct brw_reg payload);
399    void generate_fb_read(fs_inst *inst, struct brw_reg dst,
400                          struct brw_reg payload);
401    void generate_urb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload);
402    void generate_urb_write(fs_inst *inst, struct brw_reg payload);
403    void generate_cs_terminate(fs_inst *inst, struct brw_reg payload);
404    void generate_barrier(fs_inst *inst, struct brw_reg src);
405    void generate_linterp(fs_inst *inst, struct brw_reg dst,
406 			 struct brw_reg *src);
407    void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
408                      struct brw_reg surface_index,
409                      struct brw_reg sampler_index);
410    void generate_get_buffer_size(fs_inst *inst, struct brw_reg dst,
411                                  struct brw_reg src,
412                                  struct brw_reg surf_index);
413    void generate_ddx(enum opcode op, struct brw_reg dst, struct brw_reg src);
414    void generate_ddy(enum opcode op, struct brw_reg dst, struct brw_reg src);
415    void generate_scratch_write(fs_inst *inst, struct brw_reg src);
416    void generate_scratch_read(fs_inst *inst, struct brw_reg dst);
417    void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst);
418    void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
419                                             struct brw_reg index,
420                                             struct brw_reg offset);
421    void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
422                                                  struct brw_reg dst,
423                                                  struct brw_reg surf_index,
424                                                  struct brw_reg payload);
425    void generate_varying_pull_constant_load_gen4(fs_inst *inst,
426                                                  struct brw_reg dst,
427                                                  struct brw_reg index);
428    void generate_varying_pull_constant_load_gen7(fs_inst *inst,
429                                                  struct brw_reg dst,
430                                                  struct brw_reg index,
431                                                  struct brw_reg offset);
432    void generate_mov_dispatch_to_flags(fs_inst *inst);
433 
434    void generate_pixel_interpolator_query(fs_inst *inst,
435                                           struct brw_reg dst,
436                                           struct brw_reg src,
437                                           struct brw_reg msg_data,
438                                           unsigned msg_type);
439 
440    void generate_set_sample_id(fs_inst *inst,
441                                struct brw_reg dst,
442                                struct brw_reg src0,
443                                struct brw_reg src1);
444 
445    void generate_discard_jump(fs_inst *inst);
446 
447    void generate_pack_half_2x16_split(fs_inst *inst,
448                                       struct brw_reg dst,
449                                       struct brw_reg x,
450                                       struct brw_reg y);
451    void generate_unpack_half_2x16_split(fs_inst *inst,
452                                         struct brw_reg dst,
453                                         struct brw_reg src);
454 
455    void generate_shader_time_add(fs_inst *inst,
456                                  struct brw_reg payload,
457                                  struct brw_reg offset,
458                                  struct brw_reg value);
459 
460    void generate_mov_indirect(fs_inst *inst,
461                               struct brw_reg dst,
462                               struct brw_reg reg,
463                               struct brw_reg indirect_byte_offset);
464 
465    bool patch_discard_jumps_to_fb_writes();
466 
467    const struct brw_compiler *compiler;
468    void *log_data; /* Passed to compiler->*_log functions */
469 
470    const struct gen_device_info *devinfo;
471 
472    struct brw_codegen *p;
473    const void * const key;
474    struct brw_stage_prog_data * const prog_data;
475 
476    unsigned dispatch_width; /**< 8, 16 or 32 */
477 
478    exec_list discard_halt_patches;
479    unsigned promoted_constants;
480    bool runtime_check_aads_emit;
481    bool debug_flag;
482    const char *shader_name;
483    gl_shader_stage stage;
484    void *mem_ctx;
485 };
486 
487 bool brw_do_channel_expressions(struct exec_list *instructions);
488 bool brw_do_vector_splitting(struct exec_list *instructions);
489 
490 void shuffle_32bit_load_result_to_64bit_data(const brw::fs_builder &bld,
491                                              const fs_reg &dst,
492                                              const fs_reg &src,
493                                              uint32_t components);
494 
495 void shuffle_64bit_data_for_32bit_write(const brw::fs_builder &bld,
496                                         const fs_reg &dst,
497                                         const fs_reg &src,
498                                         uint32_t components);
499 fs_reg setup_imm_df(const brw::fs_builder &bld,
500                     double v);
501 
502 enum brw_barycentric_mode brw_barycentric_mode(enum glsl_interp_mode mode,
503                                                nir_intrinsic_op op);
504