1 /* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #ifndef BRW_VEC4_H 25 #define BRW_VEC4_H 26 27 #include "brw_shader.h" 28 29 #ifdef __cplusplus 30 #include "brw_ir_vec4.h" 31 #include "brw_ir_performance.h" 32 #include "brw_vec4_builder.h" 33 #include "brw_vec4_live_variables.h" 34 #endif 35 36 #include "compiler/glsl/ir.h" 37 #include "compiler/nir/nir.h" 38 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 const unsigned * 45 brw_vec4_generate_assembly(const struct brw_compiler *compiler, 46 void *log_data, 47 void *mem_ctx, 48 const nir_shader *nir, 49 struct brw_vue_prog_data *prog_data, 50 const struct cfg_t *cfg, 51 const brw::performance &perf, 52 struct brw_compile_stats *stats, 53 bool debug_enabled); 54 55 #ifdef __cplusplus 56 } /* extern "C" */ 57 58 namespace brw { 59 /** 60 * The vertex shader front-end. 61 * 62 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and 63 * fixed-function) into VS IR. 64 */ 65 class vec4_visitor : public backend_shader 66 { 67 public: 68 vec4_visitor(const struct brw_compiler *compiler, 69 void *log_data, 70 const struct brw_sampler_prog_key_data *key, 71 struct brw_vue_prog_data *prog_data, 72 const nir_shader *shader, 73 void *mem_ctx, 74 bool no_spills, 75 int shader_time_index, 76 bool debug_enabled); 77 dst_null_f()78 dst_reg dst_null_f() 79 { 80 return dst_reg(brw_null_reg()); 81 } 82 dst_null_df()83 dst_reg dst_null_df() 84 { 85 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF)); 86 } 87 dst_null_d()88 dst_reg dst_null_d() 89 { 90 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 91 } 92 dst_null_ud()93 dst_reg dst_null_ud() 94 { 95 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); 96 } 97 98 const struct brw_sampler_prog_key_data * const key_tex; 99 struct brw_vue_prog_data * const prog_data; 100 char *fail_msg; 101 bool failed; 102 103 /** 104 * GLSL IR currently being processed, which is associated with our 105 * driver IR instructions for debugging purposes. 106 */ 107 const void *base_ir; 108 const char *current_annotation; 109 110 int first_non_payload_grf; 111 unsigned ubo_push_start[4]; 112 unsigned push_length; 113 unsigned int max_grf; 114 brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis; 115 brw_analysis<brw::performance, vec4_visitor> performance_analysis; 116 117 bool need_all_constants_in_pull_buffer; 118 119 /* Regs for vertex results. Generated at ir_variable visiting time 120 * for the ir->location's used. 121 */ 122 dst_reg output_reg[VARYING_SLOT_TESS_MAX][4]; 123 unsigned output_num_components[VARYING_SLOT_TESS_MAX][4]; 124 const char *output_reg_annotation[VARYING_SLOT_TESS_MAX]; 125 int uniforms; 126 127 src_reg shader_start_time; 128 129 bool run(); 130 void fail(const char *msg, ...); 131 132 int setup_uniforms(int payload_reg); 133 134 bool reg_allocate_trivial(); 135 bool reg_allocate(); 136 void evaluate_spill_costs(float *spill_costs, bool *no_spill); 137 int choose_spill_reg(struct ra_graph *g); 138 void spill_reg(unsigned spill_reg); 139 void move_grf_array_access_to_scratch(); 140 void move_uniform_array_access_to_pull_constants(); 141 void move_push_constants_to_pull_constants(); 142 void split_uniform_registers(); 143 void pack_uniform_registers(); 144 void setup_push_ranges(); 145 virtual void invalidate_analysis(brw::analysis_dependency_class c); 146 void split_virtual_grfs(); 147 bool opt_vector_float(); 148 bool opt_reduce_swizzle(); 149 bool dead_code_eliminate(); 150 bool opt_cmod_propagation(); 151 bool opt_copy_propagation(bool do_constant_prop = true); 152 bool opt_cse_local(bblock_t *block, const vec4_live_variables &live); 153 bool opt_cse(); 154 bool opt_algebraic(); 155 bool opt_register_coalesce(); 156 bool eliminate_find_live_channel(); 157 bool is_dep_ctrl_unsafe(const vec4_instruction *inst); 158 void opt_set_dependency_control(); 159 void opt_schedule_instructions(); 160 void convert_to_hw_regs(); 161 void fixup_3src_null_dest(); 162 163 bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg); 164 bool lower_simd_width(); 165 bool scalarize_df(); 166 bool lower_64bit_mad_to_mul_add(); 167 void apply_logical_swizzle(struct brw_reg *hw_reg, 168 vec4_instruction *inst, int arg); 169 170 vec4_instruction *emit(vec4_instruction *inst); 171 172 vec4_instruction *emit(enum opcode opcode); 173 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst); 174 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 175 const src_reg &src0); 176 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 177 const src_reg &src0, const src_reg &src1); 178 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 179 const src_reg &src0, const src_reg &src1, 180 const src_reg &src2); 181 182 vec4_instruction *emit_before(bblock_t *block, 183 vec4_instruction *inst, 184 vec4_instruction *new_inst); 185 186 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &); 187 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &); 188 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &); 189 EMIT1(MOV) 190 EMIT1(NOT) 191 EMIT1(RNDD) 192 EMIT1(RNDE) 193 EMIT1(RNDZ) 194 EMIT1(FRC) 195 EMIT1(F32TO16) 196 EMIT1(F16TO32) 197 EMIT2(ADD) 198 EMIT2(MUL) 199 EMIT2(MACH) 200 EMIT2(MAC) 201 EMIT2(AND) 202 EMIT2(OR) 203 EMIT2(XOR) 204 EMIT2(DP3) 205 EMIT2(DP4) 206 EMIT2(DPH) 207 EMIT2(SHL) 208 EMIT2(SHR) 209 EMIT2(ASR) 210 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, 211 enum brw_conditional_mod condition); 212 vec4_instruction *IF(src_reg src0, src_reg src1, 213 enum brw_conditional_mod condition); 214 vec4_instruction *IF(enum brw_predicate predicate); 215 EMIT1(SCRATCH_READ) 216 EMIT2(SCRATCH_WRITE) 217 EMIT3(LRP) 218 EMIT1(BFREV) 219 EMIT3(BFE) 220 EMIT2(BFI1) 221 EMIT3(BFI2) 222 EMIT1(FBH) 223 EMIT1(FBL) 224 EMIT1(CBIT) 225 EMIT3(MAD) 226 EMIT2(ADDC) 227 EMIT2(SUBB) 228 EMIT1(DIM) 229 230 #undef EMIT1 231 #undef EMIT2 232 #undef EMIT3 233 234 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, 235 src_reg src0, src_reg src1); 236 237 /** 238 * Copy any live channel from \p src to the first channel of the 239 * result. 240 */ 241 src_reg emit_uniformize(const src_reg &src); 242 243 /** Fix all float operands of a 3-source instruction. */ 244 void fix_float_operands(src_reg op[3], nir_alu_instr *instr); 245 246 src_reg fix_3src_operand(const src_reg &src); 247 248 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 249 const src_reg &src1 = src_reg()); 250 251 src_reg fix_math_operand(const src_reg &src); 252 253 void emit_pack_half_2x16(dst_reg dst, src_reg src0); 254 void emit_unpack_half_2x16(dst_reg dst, src_reg src0); 255 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0); 256 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0); 257 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); 258 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0); 259 260 void emit_texture(ir_texture_opcode op, 261 dst_reg dest, 262 int dest_components, 263 src_reg coordinate, 264 int coord_components, 265 src_reg shadow_comparator, 266 src_reg lod, src_reg lod2, 267 src_reg sample_index, 268 uint32_t constant_offset, 269 src_reg offset_value, 270 src_reg mcs, 271 uint32_t surface, src_reg surface_reg, 272 src_reg sampler_reg); 273 274 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, 275 src_reg surface); 276 void emit_gfx6_gather_wa(uint8_t wa, dst_reg dst); 277 278 void emit_ndc_computation(); 279 void emit_psiz_and_flags(dst_reg reg); 280 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp); 281 virtual void emit_urb_slot(dst_reg reg, int varying); 282 283 void emit_shader_time_begin(); 284 void emit_shader_time_end(); 285 void emit_shader_time_write(int shader_time_subindex, src_reg value); 286 287 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst, 288 src_reg *reladdr, int reg_offset); 289 void emit_scratch_read(bblock_t *block, vec4_instruction *inst, 290 dst_reg dst, 291 src_reg orig_src, 292 int base_offset); 293 void emit_scratch_write(bblock_t *block, vec4_instruction *inst, 294 int base_offset); 295 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, 296 dst_reg dst, 297 src_reg orig_src, 298 int base_offset, 299 src_reg indirect); 300 void emit_pull_constant_load_reg(dst_reg dst, 301 src_reg surf_index, 302 src_reg offset, 303 bblock_t *before_block, 304 vec4_instruction *before_inst); 305 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, 306 vec4_instruction *inst, src_reg src); 307 308 void resolve_ud_negate(src_reg *reg); 309 310 bool lower_minmax(); 311 312 src_reg get_timestamp(); 313 314 void dump_instruction(const backend_instruction *inst) const; 315 void dump_instruction(const backend_instruction *inst, FILE *file) const; 316 317 bool is_high_sampler(src_reg sampler); 318 319 bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate); 320 321 void emit_conversion_from_double(dst_reg dst, src_reg src); 322 void emit_conversion_to_double(dst_reg dst, src_reg src); 323 324 vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, 325 bool for_write, 326 bool for_scratch = false, 327 bblock_t *block = NULL, 328 vec4_instruction *ref = NULL); 329 330 virtual void emit_nir_code(); 331 virtual void nir_setup_uniforms(); 332 virtual void nir_emit_impl(nir_function_impl *impl); 333 virtual void nir_emit_cf_list(exec_list *list); 334 virtual void nir_emit_if(nir_if *if_stmt); 335 virtual void nir_emit_loop(nir_loop *loop); 336 virtual void nir_emit_block(nir_block *block); 337 virtual void nir_emit_instr(nir_instr *instr); 338 virtual void nir_emit_load_const(nir_load_const_instr *instr); 339 src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr); 340 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); 341 virtual void nir_emit_alu(nir_alu_instr *instr); 342 virtual void nir_emit_jump(nir_jump_instr *instr); 343 virtual void nir_emit_texture(nir_tex_instr *instr); 344 virtual void nir_emit_undef(nir_ssa_undef_instr *instr); 345 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr); 346 347 dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type); 348 dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type); 349 dst_reg get_nir_dest(const nir_dest &dest); 350 src_reg get_nir_src(const nir_src &src, enum brw_reg_type type, 351 unsigned num_components = 4); 352 src_reg get_nir_src(const nir_src &src, nir_alu_type type, 353 unsigned num_components = 4); 354 src_reg get_nir_src(const nir_src &src, 355 unsigned num_components = 4); 356 src_reg get_nir_src_imm(const nir_src &src); 357 src_reg get_indirect_offset(nir_intrinsic_instr *instr); 358 359 dst_reg *nir_locals; 360 dst_reg *nir_ssa_values; 361 362 protected: 363 void emit_vertex(); 364 void setup_payload_interference(struct ra_graph *g, int first_payload_node, 365 int reg_node_count); 366 virtual void setup_payload() = 0; 367 virtual void emit_prolog() = 0; 368 virtual void emit_thread_end() = 0; 369 virtual void emit_urb_write_header(int mrf) = 0; 370 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; 371 virtual void gs_emit_vertex(int stream_id); 372 virtual void gs_end_primitive(); 373 374 private: 375 /** 376 * If true, then register allocation should fail instead of spilling. 377 */ 378 const bool no_spills; 379 380 int shader_time_index; 381 382 unsigned last_scratch; /**< measured in 32-byte (register size) units */ 383 }; 384 385 } /* namespace brw */ 386 #endif /* __cplusplus */ 387 388 #endif /* BRW_VEC4_H */ 389