• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef BRW_VEC4_H
25 #define BRW_VEC4_H
26 
27 #include "brw_shader.h"
28 
29 #ifdef __cplusplus
30 #include "brw_ir_vec4.h"
31 #include "brw_ir_performance.h"
32 #include "brw_vec4_builder.h"
33 #include "brw_vec4_live_variables.h"
34 #endif
35 
36 #include "compiler/glsl/ir.h"
37 #include "compiler/nir/nir.h"
38 
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 const unsigned *
45 brw_vec4_generate_assembly(const struct brw_compiler *compiler,
46                            void *log_data,
47                            void *mem_ctx,
48                            const nir_shader *nir,
49                            struct brw_vue_prog_data *prog_data,
50                            const struct cfg_t *cfg,
51                            const brw::performance &perf,
52                            struct brw_compile_stats *stats,
53                            bool debug_enabled);
54 
55 #ifdef __cplusplus
56 } /* extern "C" */
57 
58 namespace brw {
59 /**
60  * The vertex shader front-end.
61  *
62  * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
63  * fixed-function) into VS IR.
64  */
65 class vec4_visitor : public backend_shader
66 {
67 public:
68    vec4_visitor(const struct brw_compiler *compiler,
69                 void *log_data,
70                 const struct brw_sampler_prog_key_data *key,
71                 struct brw_vue_prog_data *prog_data,
72                 const nir_shader *shader,
73 		void *mem_ctx,
74                 bool no_spills,
75                 bool debug_enabled);
76 
dst_null_f()77    dst_reg dst_null_f()
78    {
79       return dst_reg(brw_null_reg());
80    }
81 
dst_null_df()82    dst_reg dst_null_df()
83    {
84       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF));
85    }
86 
dst_null_d()87    dst_reg dst_null_d()
88    {
89       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
90    }
91 
dst_null_ud()92    dst_reg dst_null_ud()
93    {
94       return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
95    }
96 
97    const struct brw_sampler_prog_key_data * const key_tex;
98    struct brw_vue_prog_data * const prog_data;
99    char *fail_msg;
100    bool failed;
101 
102    /**
103     * GLSL IR currently being processed, which is associated with our
104     * driver IR instructions for debugging purposes.
105     */
106    const void *base_ir;
107    const char *current_annotation;
108 
109    int first_non_payload_grf;
110    unsigned ubo_push_start[4];
111    unsigned push_length;
112    unsigned int max_grf;
113    brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis;
114    brw_analysis<brw::performance, vec4_visitor> performance_analysis;
115 
116    bool need_all_constants_in_pull_buffer;
117 
118    /* Regs for vertex results.  Generated at ir_variable visiting time
119     * for the ir->location's used.
120     */
121    dst_reg output_reg[VARYING_SLOT_TESS_MAX][4];
122    unsigned output_num_components[VARYING_SLOT_TESS_MAX][4];
123    const char *output_reg_annotation[VARYING_SLOT_TESS_MAX];
124    int uniforms;
125 
126    src_reg shader_start_time;
127 
128    bool run();
129    void fail(const char *msg, ...);
130 
131    int setup_uniforms(int payload_reg);
132 
133    bool reg_allocate_trivial();
134    bool reg_allocate();
135    void evaluate_spill_costs(float *spill_costs, bool *no_spill);
136    int choose_spill_reg(struct ra_graph *g);
137    void spill_reg(unsigned spill_reg);
138    void move_grf_array_access_to_scratch();
139    void move_uniform_array_access_to_pull_constants();
140    void split_uniform_registers();
141    void setup_push_ranges();
142    virtual void invalidate_analysis(brw::analysis_dependency_class c);
143    void split_virtual_grfs();
144    bool opt_vector_float();
145    bool opt_reduce_swizzle();
146    bool dead_code_eliminate();
147    bool opt_cmod_propagation();
148    bool opt_copy_propagation(bool do_constant_prop = true);
149    bool opt_cse_local(bblock_t *block, const vec4_live_variables &live);
150    bool opt_cse();
151    bool opt_algebraic();
152    bool opt_register_coalesce();
153    bool eliminate_find_live_channel();
154    bool is_dep_ctrl_unsafe(const vec4_instruction *inst);
155    void opt_set_dependency_control();
156    void opt_schedule_instructions();
157    void convert_to_hw_regs();
158    void fixup_3src_null_dest();
159 
160    bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg);
161    bool lower_simd_width();
162    bool scalarize_df();
163    bool lower_64bit_mad_to_mul_add();
164    void apply_logical_swizzle(struct brw_reg *hw_reg,
165                               vec4_instruction *inst, int arg);
166 
167    vec4_instruction *emit(vec4_instruction *inst);
168 
169    vec4_instruction *emit(enum opcode opcode);
170    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst);
171    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
172                           const src_reg &src0);
173    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
174                           const src_reg &src0, const src_reg &src1);
175    vec4_instruction *emit(enum opcode opcode, const dst_reg &dst,
176                           const src_reg &src0, const src_reg &src1,
177                           const src_reg &src2);
178 
179    vec4_instruction *emit_before(bblock_t *block,
180                                  vec4_instruction *inst,
181 				 vec4_instruction *new_inst);
182 
183 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &);
184 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &);
185 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &);
186    EMIT1(MOV)
187    EMIT1(NOT)
188    EMIT1(RNDD)
189    EMIT1(RNDE)
190    EMIT1(RNDZ)
191    EMIT1(FRC)
192    EMIT1(F32TO16)
193    EMIT1(F16TO32)
194    EMIT2(ADD)
195    EMIT2(MUL)
196    EMIT2(MACH)
197    EMIT2(MAC)
198    EMIT2(AND)
199    EMIT2(OR)
200    EMIT2(XOR)
201    EMIT2(DP3)
202    EMIT2(DP4)
203    EMIT2(DPH)
204    EMIT2(SHL)
205    EMIT2(SHR)
206    EMIT2(ASR)
207    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
208 			 enum brw_conditional_mod condition);
209    vec4_instruction *IF(src_reg src0, src_reg src1,
210                         enum brw_conditional_mod condition);
211    vec4_instruction *IF(enum brw_predicate predicate);
212    EMIT1(SCRATCH_READ)
213    EMIT2(SCRATCH_WRITE)
214    EMIT3(LRP)
215    EMIT1(BFREV)
216    EMIT3(BFE)
217    EMIT2(BFI1)
218    EMIT3(BFI2)
219    EMIT1(FBH)
220    EMIT1(FBL)
221    EMIT1(CBIT)
222    EMIT3(MAD)
223    EMIT2(ADDC)
224    EMIT2(SUBB)
225    EMIT1(DIM)
226 
227 #undef EMIT1
228 #undef EMIT2
229 #undef EMIT3
230 
231    vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
232                                  src_reg src0, src_reg src1);
233 
234    /**
235     * Copy any live channel from \p src to the first channel of the
236     * result.
237     */
238    src_reg emit_uniformize(const src_reg &src);
239 
240    /** Fix all float operands of a 3-source instruction. */
241    void fix_float_operands(src_reg op[3], nir_alu_instr *instr);
242 
243    src_reg fix_3src_operand(const src_reg &src);
244 
245    vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
246                                const src_reg &src1 = src_reg());
247 
248    src_reg fix_math_operand(const src_reg &src);
249 
250    void emit_pack_half_2x16(dst_reg dst, src_reg src0);
251    void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
252    void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
253    void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0);
254    void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0);
255    void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0);
256 
257    src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate,
258                           src_reg surface);
259 
260    void emit_ndc_computation();
261    void emit_psiz_and_flags(dst_reg reg);
262    vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp);
263    virtual void emit_urb_slot(dst_reg reg, int varying);
264 
265    src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst,
266 			      src_reg *reladdr, int reg_offset);
267    void emit_scratch_read(bblock_t *block, vec4_instruction *inst,
268 			  dst_reg dst,
269 			  src_reg orig_src,
270 			  int base_offset);
271    void emit_scratch_write(bblock_t *block, vec4_instruction *inst,
272 			   int base_offset);
273    void emit_pull_constant_load_reg(dst_reg dst,
274                                     src_reg surf_index,
275                                     src_reg offset,
276                                     bblock_t *before_block,
277                                     vec4_instruction *before_inst);
278    src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
279                                 vec4_instruction *inst, src_reg src);
280 
281    void resolve_ud_negate(src_reg *reg);
282 
283    bool lower_minmax();
284 
285    src_reg get_timestamp();
286 
287    void dump_instruction(const backend_instruction *inst) const;
288    void dump_instruction(const backend_instruction *inst, FILE *file) const;
289 
290    bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate);
291 
292    void emit_conversion_from_double(dst_reg dst, src_reg src);
293    void emit_conversion_to_double(dst_reg dst, src_reg src);
294 
295    vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src,
296                                         bool for_write,
297                                         bool for_scratch = false,
298                                         bblock_t *block = NULL,
299                                         vec4_instruction *ref = NULL);
300 
301    virtual void emit_nir_code();
302    virtual void nir_setup_uniforms();
303    virtual void nir_emit_impl(nir_function_impl *impl);
304    virtual void nir_emit_cf_list(exec_list *list);
305    virtual void nir_emit_if(nir_if *if_stmt);
306    virtual void nir_emit_loop(nir_loop *loop);
307    virtual void nir_emit_block(nir_block *block);
308    virtual void nir_emit_instr(nir_instr *instr);
309    virtual void nir_emit_load_const(nir_load_const_instr *instr);
310    src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr);
311    virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
312    virtual void nir_emit_alu(nir_alu_instr *instr);
313    virtual void nir_emit_jump(nir_jump_instr *instr);
314    virtual void nir_emit_texture(nir_tex_instr *instr);
315    virtual void nir_emit_undef(nir_ssa_undef_instr *instr);
316    virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr);
317 
318    dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type);
319    dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type);
320    dst_reg get_nir_dest(const nir_dest &dest);
321    src_reg get_nir_src(const nir_src &src, enum brw_reg_type type,
322                        unsigned num_components = 4);
323    src_reg get_nir_src(const nir_src &src, nir_alu_type type,
324                        unsigned num_components = 4);
325    src_reg get_nir_src(const nir_src &src,
326                        unsigned num_components = 4);
327    src_reg get_nir_src_imm(const nir_src &src);
328    src_reg get_indirect_offset(nir_intrinsic_instr *instr);
329 
330    dst_reg *nir_locals;
331    dst_reg *nir_ssa_values;
332 
333 protected:
334    void emit_vertex();
335    void setup_payload_interference(struct ra_graph *g, int first_payload_node,
336                                    int reg_node_count);
337    virtual void setup_payload() = 0;
338    virtual void emit_prolog() = 0;
339    virtual void emit_thread_end() = 0;
340    virtual void emit_urb_write_header(int mrf) = 0;
341    virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
342    virtual void gs_emit_vertex(int stream_id);
343    virtual void gs_end_primitive();
344 
345 private:
346    /**
347     * If true, then register allocation should fail instead of spilling.
348     */
349    const bool no_spills;
350 
351    unsigned last_scratch; /**< measured in 32-byte (register size) units */
352 };
353 
354 } /* namespace brw */
355 #endif /* __cplusplus */
356 
357 #endif /* BRW_VEC4_H */
358