1 /* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #ifndef BRW_VEC4_H 25 #define BRW_VEC4_H 26 27 #include "brw_shader.h" 28 29 #ifdef __cplusplus 30 #include "brw_ir_vec4.h" 31 #include "brw_ir_performance.h" 32 #include "brw_vec4_builder.h" 33 #include "brw_vec4_live_variables.h" 34 #endif 35 36 #include "compiler/glsl/ir.h" 37 #include "compiler/nir/nir.h" 38 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 const unsigned * 45 brw_vec4_generate_assembly(const struct brw_compiler *compiler, 46 void *log_data, 47 void *mem_ctx, 48 const nir_shader *nir, 49 struct brw_vue_prog_data *prog_data, 50 const struct cfg_t *cfg, 51 const brw::performance &perf, 52 struct brw_compile_stats *stats, 53 bool debug_enabled); 54 55 #ifdef __cplusplus 56 } /* extern "C" */ 57 58 namespace brw { 59 /** 60 * The vertex shader front-end. 61 * 62 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and 63 * fixed-function) into VS IR. 64 */ 65 class vec4_visitor : public backend_shader 66 { 67 public: 68 vec4_visitor(const struct brw_compiler *compiler, 69 void *log_data, 70 const struct brw_sampler_prog_key_data *key, 71 struct brw_vue_prog_data *prog_data, 72 const nir_shader *shader, 73 void *mem_ctx, 74 bool no_spills, 75 bool debug_enabled); 76 dst_null_f()77 dst_reg dst_null_f() 78 { 79 return dst_reg(brw_null_reg()); 80 } 81 dst_null_df()82 dst_reg dst_null_df() 83 { 84 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF)); 85 } 86 dst_null_d()87 dst_reg dst_null_d() 88 { 89 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 90 } 91 dst_null_ud()92 dst_reg dst_null_ud() 93 { 94 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); 95 } 96 97 const struct brw_sampler_prog_key_data * const key_tex; 98 struct brw_vue_prog_data * const prog_data; 99 char *fail_msg; 100 bool failed; 101 102 /** 103 * GLSL IR currently being processed, which is associated with our 104 * driver IR instructions for debugging purposes. 105 */ 106 const void *base_ir; 107 const char *current_annotation; 108 109 int first_non_payload_grf; 110 unsigned ubo_push_start[4]; 111 unsigned push_length; 112 unsigned int max_grf; 113 brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis; 114 brw_analysis<brw::performance, vec4_visitor> performance_analysis; 115 116 bool need_all_constants_in_pull_buffer; 117 118 /* Regs for vertex results. Generated at ir_variable visiting time 119 * for the ir->location's used. 120 */ 121 dst_reg output_reg[VARYING_SLOT_TESS_MAX][4]; 122 unsigned output_num_components[VARYING_SLOT_TESS_MAX][4]; 123 const char *output_reg_annotation[VARYING_SLOT_TESS_MAX]; 124 int uniforms; 125 126 src_reg shader_start_time; 127 128 bool run(); 129 void fail(const char *msg, ...); 130 131 int setup_uniforms(int payload_reg); 132 133 bool reg_allocate_trivial(); 134 bool reg_allocate(); 135 void evaluate_spill_costs(float *spill_costs, bool *no_spill); 136 int choose_spill_reg(struct ra_graph *g); 137 void spill_reg(unsigned spill_reg); 138 void move_grf_array_access_to_scratch(); 139 void move_uniform_array_access_to_pull_constants(); 140 void split_uniform_registers(); 141 void setup_push_ranges(); 142 virtual void invalidate_analysis(brw::analysis_dependency_class c); 143 void split_virtual_grfs(); 144 bool opt_vector_float(); 145 bool opt_reduce_swizzle(); 146 bool dead_code_eliminate(); 147 bool opt_cmod_propagation(); 148 bool opt_copy_propagation(bool do_constant_prop = true); 149 bool opt_cse_local(bblock_t *block, const vec4_live_variables &live); 150 bool opt_cse(); 151 bool opt_algebraic(); 152 bool opt_register_coalesce(); 153 bool eliminate_find_live_channel(); 154 bool is_dep_ctrl_unsafe(const vec4_instruction *inst); 155 void opt_set_dependency_control(); 156 void opt_schedule_instructions(); 157 void convert_to_hw_regs(); 158 void fixup_3src_null_dest(); 159 160 bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg); 161 bool lower_simd_width(); 162 bool scalarize_df(); 163 bool lower_64bit_mad_to_mul_add(); 164 void apply_logical_swizzle(struct brw_reg *hw_reg, 165 vec4_instruction *inst, int arg); 166 167 vec4_instruction *emit(vec4_instruction *inst); 168 169 vec4_instruction *emit(enum opcode opcode); 170 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst); 171 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 172 const src_reg &src0); 173 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 174 const src_reg &src0, const src_reg &src1); 175 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 176 const src_reg &src0, const src_reg &src1, 177 const src_reg &src2); 178 179 vec4_instruction *emit_before(bblock_t *block, 180 vec4_instruction *inst, 181 vec4_instruction *new_inst); 182 183 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &); 184 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &); 185 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &); 186 EMIT1(MOV) 187 EMIT1(NOT) 188 EMIT1(RNDD) 189 EMIT1(RNDE) 190 EMIT1(RNDZ) 191 EMIT1(FRC) 192 EMIT1(F32TO16) 193 EMIT1(F16TO32) 194 EMIT2(ADD) 195 EMIT2(MUL) 196 EMIT2(MACH) 197 EMIT2(MAC) 198 EMIT2(AND) 199 EMIT2(OR) 200 EMIT2(XOR) 201 EMIT2(DP3) 202 EMIT2(DP4) 203 EMIT2(DPH) 204 EMIT2(SHL) 205 EMIT2(SHR) 206 EMIT2(ASR) 207 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, 208 enum brw_conditional_mod condition); 209 vec4_instruction *IF(src_reg src0, src_reg src1, 210 enum brw_conditional_mod condition); 211 vec4_instruction *IF(enum brw_predicate predicate); 212 EMIT1(SCRATCH_READ) 213 EMIT2(SCRATCH_WRITE) 214 EMIT3(LRP) 215 EMIT1(BFREV) 216 EMIT3(BFE) 217 EMIT2(BFI1) 218 EMIT3(BFI2) 219 EMIT1(FBH) 220 EMIT1(FBL) 221 EMIT1(CBIT) 222 EMIT3(MAD) 223 EMIT2(ADDC) 224 EMIT2(SUBB) 225 EMIT1(DIM) 226 227 #undef EMIT1 228 #undef EMIT2 229 #undef EMIT3 230 231 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, 232 src_reg src0, src_reg src1); 233 234 /** 235 * Copy any live channel from \p src to the first channel of the 236 * result. 237 */ 238 src_reg emit_uniformize(const src_reg &src); 239 240 /** Fix all float operands of a 3-source instruction. */ 241 void fix_float_operands(src_reg op[3], nir_alu_instr *instr); 242 243 src_reg fix_3src_operand(const src_reg &src); 244 245 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 246 const src_reg &src1 = src_reg()); 247 248 src_reg fix_math_operand(const src_reg &src); 249 250 void emit_pack_half_2x16(dst_reg dst, src_reg src0); 251 void emit_unpack_half_2x16(dst_reg dst, src_reg src0); 252 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0); 253 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0); 254 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); 255 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0); 256 257 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, 258 src_reg surface); 259 260 void emit_ndc_computation(); 261 void emit_psiz_and_flags(dst_reg reg); 262 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp); 263 virtual void emit_urb_slot(dst_reg reg, int varying); 264 265 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst, 266 src_reg *reladdr, int reg_offset); 267 void emit_scratch_read(bblock_t *block, vec4_instruction *inst, 268 dst_reg dst, 269 src_reg orig_src, 270 int base_offset); 271 void emit_scratch_write(bblock_t *block, vec4_instruction *inst, 272 int base_offset); 273 void emit_pull_constant_load_reg(dst_reg dst, 274 src_reg surf_index, 275 src_reg offset, 276 bblock_t *before_block, 277 vec4_instruction *before_inst); 278 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, 279 vec4_instruction *inst, src_reg src); 280 281 void resolve_ud_negate(src_reg *reg); 282 283 bool lower_minmax(); 284 285 src_reg get_timestamp(); 286 287 void dump_instruction(const backend_instruction *inst) const; 288 void dump_instruction(const backend_instruction *inst, FILE *file) const; 289 290 bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate); 291 292 void emit_conversion_from_double(dst_reg dst, src_reg src); 293 void emit_conversion_to_double(dst_reg dst, src_reg src); 294 295 vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, 296 bool for_write, 297 bool for_scratch = false, 298 bblock_t *block = NULL, 299 vec4_instruction *ref = NULL); 300 301 virtual void emit_nir_code(); 302 virtual void nir_setup_uniforms(); 303 virtual void nir_emit_impl(nir_function_impl *impl); 304 virtual void nir_emit_cf_list(exec_list *list); 305 virtual void nir_emit_if(nir_if *if_stmt); 306 virtual void nir_emit_loop(nir_loop *loop); 307 virtual void nir_emit_block(nir_block *block); 308 virtual void nir_emit_instr(nir_instr *instr); 309 virtual void nir_emit_load_const(nir_load_const_instr *instr); 310 src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr); 311 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); 312 virtual void nir_emit_alu(nir_alu_instr *instr); 313 virtual void nir_emit_jump(nir_jump_instr *instr); 314 virtual void nir_emit_texture(nir_tex_instr *instr); 315 virtual void nir_emit_undef(nir_ssa_undef_instr *instr); 316 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr); 317 318 dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type); 319 dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type); 320 dst_reg get_nir_dest(const nir_dest &dest); 321 src_reg get_nir_src(const nir_src &src, enum brw_reg_type type, 322 unsigned num_components = 4); 323 src_reg get_nir_src(const nir_src &src, nir_alu_type type, 324 unsigned num_components = 4); 325 src_reg get_nir_src(const nir_src &src, 326 unsigned num_components = 4); 327 src_reg get_nir_src_imm(const nir_src &src); 328 src_reg get_indirect_offset(nir_intrinsic_instr *instr); 329 330 dst_reg *nir_locals; 331 dst_reg *nir_ssa_values; 332 333 protected: 334 void emit_vertex(); 335 void setup_payload_interference(struct ra_graph *g, int first_payload_node, 336 int reg_node_count); 337 virtual void setup_payload() = 0; 338 virtual void emit_prolog() = 0; 339 virtual void emit_thread_end() = 0; 340 virtual void emit_urb_write_header(int mrf) = 0; 341 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; 342 virtual void gs_emit_vertex(int stream_id); 343 virtual void gs_end_primitive(); 344 345 private: 346 /** 347 * If true, then register allocation should fail instead of spilling. 348 */ 349 const bool no_spills; 350 351 unsigned last_scratch; /**< measured in 32-byte (register size) units */ 352 }; 353 354 } /* namespace brw */ 355 #endif /* __cplusplus */ 356 357 #endif /* BRW_VEC4_H */ 358