1 /* 2 * Copyright © 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #ifndef BRW_VEC4_H 25 #define BRW_VEC4_H 26 27 #include "brw_shader.h" 28 29 #ifdef __cplusplus 30 #include "brw_ir_vec4.h" 31 #include "brw_ir_performance.h" 32 #include "brw_vec4_builder.h" 33 #include "brw_vec4_live_variables.h" 34 #endif 35 36 #include "compiler/glsl/ir.h" 37 #include "compiler/nir/nir.h" 38 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 const unsigned * 45 brw_vec4_generate_assembly(const struct brw_compiler *compiler, 46 void *log_data, 47 void *mem_ctx, 48 const nir_shader *nir, 49 struct brw_vue_prog_data *prog_data, 50 const struct cfg_t *cfg, 51 const brw::performance &perf, 52 struct brw_compile_stats *stats); 53 54 #ifdef __cplusplus 55 } /* extern "C" */ 56 57 namespace brw { 58 /** 59 * The vertex shader front-end. 60 * 61 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and 62 * fixed-function) into VS IR. 63 */ 64 class vec4_visitor : public backend_shader 65 { 66 public: 67 vec4_visitor(const struct brw_compiler *compiler, 68 void *log_data, 69 const struct brw_sampler_prog_key_data *key, 70 struct brw_vue_prog_data *prog_data, 71 const nir_shader *shader, 72 void *mem_ctx, 73 bool no_spills, 74 int shader_time_index); 75 dst_null_f()76 dst_reg dst_null_f() 77 { 78 return dst_reg(brw_null_reg()); 79 } 80 dst_null_df()81 dst_reg dst_null_df() 82 { 83 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF)); 84 } 85 dst_null_d()86 dst_reg dst_null_d() 87 { 88 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 89 } 90 dst_null_ud()91 dst_reg dst_null_ud() 92 { 93 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); 94 } 95 96 const struct brw_sampler_prog_key_data * const key_tex; 97 struct brw_vue_prog_data * const prog_data; 98 char *fail_msg; 99 bool failed; 100 101 /** 102 * GLSL IR currently being processed, which is associated with our 103 * driver IR instructions for debugging purposes. 104 */ 105 const void *base_ir; 106 const char *current_annotation; 107 108 int first_non_payload_grf; 109 unsigned int max_grf; 110 brw_analysis<brw::vec4_live_variables, backend_shader> live_analysis; 111 brw_analysis<brw::performance, vec4_visitor> performance_analysis; 112 113 bool need_all_constants_in_pull_buffer; 114 115 /* Regs for vertex results. Generated at ir_variable visiting time 116 * for the ir->location's used. 117 */ 118 dst_reg output_reg[VARYING_SLOT_TESS_MAX][4]; 119 unsigned output_num_components[VARYING_SLOT_TESS_MAX][4]; 120 const char *output_reg_annotation[VARYING_SLOT_TESS_MAX]; 121 int uniforms; 122 123 src_reg shader_start_time; 124 125 bool run(); 126 void fail(const char *msg, ...); 127 128 int setup_uniforms(int payload_reg); 129 130 bool reg_allocate_trivial(); 131 bool reg_allocate(); 132 void evaluate_spill_costs(float *spill_costs, bool *no_spill); 133 int choose_spill_reg(struct ra_graph *g); 134 void spill_reg(unsigned spill_reg); 135 void move_grf_array_access_to_scratch(); 136 void move_uniform_array_access_to_pull_constants(); 137 void move_push_constants_to_pull_constants(); 138 void split_uniform_registers(); 139 void pack_uniform_registers(); 140 virtual void invalidate_analysis(brw::analysis_dependency_class c); 141 void split_virtual_grfs(); 142 bool opt_vector_float(); 143 bool opt_reduce_swizzle(); 144 bool dead_code_eliminate(); 145 bool opt_cmod_propagation(); 146 bool opt_copy_propagation(bool do_constant_prop = true); 147 bool opt_cse_local(bblock_t *block, const vec4_live_variables &live); 148 bool opt_cse(); 149 bool opt_algebraic(); 150 bool opt_register_coalesce(); 151 bool eliminate_find_live_channel(); 152 bool is_dep_ctrl_unsafe(const vec4_instruction *inst); 153 void opt_set_dependency_control(); 154 void opt_schedule_instructions(); 155 void convert_to_hw_regs(); 156 void fixup_3src_null_dest(); 157 158 bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg); 159 bool lower_simd_width(); 160 bool scalarize_df(); 161 bool lower_64bit_mad_to_mul_add(); 162 void apply_logical_swizzle(struct brw_reg *hw_reg, 163 vec4_instruction *inst, int arg); 164 165 vec4_instruction *emit(vec4_instruction *inst); 166 167 vec4_instruction *emit(enum opcode opcode); 168 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst); 169 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 170 const src_reg &src0); 171 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 172 const src_reg &src0, const src_reg &src1); 173 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 174 const src_reg &src0, const src_reg &src1, 175 const src_reg &src2); 176 177 vec4_instruction *emit_before(bblock_t *block, 178 vec4_instruction *inst, 179 vec4_instruction *new_inst); 180 181 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &); 182 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &); 183 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &); 184 EMIT1(MOV) 185 EMIT1(NOT) 186 EMIT1(RNDD) 187 EMIT1(RNDE) 188 EMIT1(RNDZ) 189 EMIT1(FRC) 190 EMIT1(F32TO16) 191 EMIT1(F16TO32) 192 EMIT2(ADD) 193 EMIT2(MUL) 194 EMIT2(MACH) 195 EMIT2(MAC) 196 EMIT2(AND) 197 EMIT2(OR) 198 EMIT2(XOR) 199 EMIT2(DP3) 200 EMIT2(DP4) 201 EMIT2(DPH) 202 EMIT2(SHL) 203 EMIT2(SHR) 204 EMIT2(ASR) 205 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, 206 enum brw_conditional_mod condition); 207 vec4_instruction *IF(src_reg src0, src_reg src1, 208 enum brw_conditional_mod condition); 209 vec4_instruction *IF(enum brw_predicate predicate); 210 EMIT1(SCRATCH_READ) 211 EMIT2(SCRATCH_WRITE) 212 EMIT3(LRP) 213 EMIT1(BFREV) 214 EMIT3(BFE) 215 EMIT2(BFI1) 216 EMIT3(BFI2) 217 EMIT1(FBH) 218 EMIT1(FBL) 219 EMIT1(CBIT) 220 EMIT3(MAD) 221 EMIT2(ADDC) 222 EMIT2(SUBB) 223 EMIT1(DIM) 224 225 #undef EMIT1 226 #undef EMIT2 227 #undef EMIT3 228 229 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, 230 src_reg src0, src_reg src1); 231 232 /** 233 * Copy any live channel from \p src to the first channel of the 234 * result. 235 */ 236 src_reg emit_uniformize(const src_reg &src); 237 238 /** Fix all float operands of a 3-source instruction. */ 239 void fix_float_operands(src_reg op[3], nir_alu_instr *instr); 240 241 src_reg fix_3src_operand(const src_reg &src); 242 243 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 244 const src_reg &src1 = src_reg()); 245 246 src_reg fix_math_operand(const src_reg &src); 247 248 void emit_pack_half_2x16(dst_reg dst, src_reg src0); 249 void emit_unpack_half_2x16(dst_reg dst, src_reg src0); 250 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0); 251 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0); 252 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); 253 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0); 254 255 void emit_texture(ir_texture_opcode op, 256 dst_reg dest, 257 const glsl_type *dest_type, 258 src_reg coordinate, 259 int coord_components, 260 src_reg shadow_comparator, 261 src_reg lod, src_reg lod2, 262 src_reg sample_index, 263 uint32_t constant_offset, 264 src_reg offset_value, 265 src_reg mcs, 266 uint32_t surface, src_reg surface_reg, 267 src_reg sampler_reg); 268 269 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, 270 src_reg surface); 271 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); 272 273 void emit_ndc_computation(); 274 void emit_psiz_and_flags(dst_reg reg); 275 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp); 276 virtual void emit_urb_slot(dst_reg reg, int varying); 277 278 void emit_shader_time_begin(); 279 void emit_shader_time_end(); 280 void emit_shader_time_write(int shader_time_subindex, src_reg value); 281 282 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst, 283 src_reg *reladdr, int reg_offset); 284 void emit_scratch_read(bblock_t *block, vec4_instruction *inst, 285 dst_reg dst, 286 src_reg orig_src, 287 int base_offset); 288 void emit_scratch_write(bblock_t *block, vec4_instruction *inst, 289 int base_offset); 290 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, 291 dst_reg dst, 292 src_reg orig_src, 293 int base_offset, 294 src_reg indirect); 295 void emit_pull_constant_load_reg(dst_reg dst, 296 src_reg surf_index, 297 src_reg offset, 298 bblock_t *before_block, 299 vec4_instruction *before_inst); 300 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, 301 vec4_instruction *inst, src_reg src); 302 303 void resolve_ud_negate(src_reg *reg); 304 305 bool lower_minmax(); 306 307 src_reg get_timestamp(); 308 309 void dump_instruction(const backend_instruction *inst) const; 310 void dump_instruction(const backend_instruction *inst, FILE *file) const; 311 312 bool is_high_sampler(src_reg sampler); 313 314 bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate); 315 316 void emit_conversion_from_double(dst_reg dst, src_reg src); 317 void emit_conversion_to_double(dst_reg dst, src_reg src); 318 319 vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, 320 bool for_write, 321 bblock_t *block = NULL, 322 vec4_instruction *ref = NULL); 323 324 virtual void emit_nir_code(); 325 virtual void nir_setup_uniforms(); 326 virtual void nir_emit_impl(nir_function_impl *impl); 327 virtual void nir_emit_cf_list(exec_list *list); 328 virtual void nir_emit_if(nir_if *if_stmt); 329 virtual void nir_emit_loop(nir_loop *loop); 330 virtual void nir_emit_block(nir_block *block); 331 virtual void nir_emit_instr(nir_instr *instr); 332 virtual void nir_emit_load_const(nir_load_const_instr *instr); 333 src_reg get_nir_ssbo_intrinsic_index(nir_intrinsic_instr *instr); 334 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); 335 virtual void nir_emit_alu(nir_alu_instr *instr); 336 virtual void nir_emit_jump(nir_jump_instr *instr); 337 virtual void nir_emit_texture(nir_tex_instr *instr); 338 virtual void nir_emit_undef(nir_ssa_undef_instr *instr); 339 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr); 340 341 dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type); 342 dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type); 343 dst_reg get_nir_dest(const nir_dest &dest); 344 src_reg get_nir_src(const nir_src &src, enum brw_reg_type type, 345 unsigned num_components = 4); 346 src_reg get_nir_src(const nir_src &src, nir_alu_type type, 347 unsigned num_components = 4); 348 src_reg get_nir_src(const nir_src &src, 349 unsigned num_components = 4); 350 src_reg get_nir_src_imm(const nir_src &src); 351 src_reg get_indirect_offset(nir_intrinsic_instr *instr); 352 353 dst_reg *nir_locals; 354 dst_reg *nir_ssa_values; 355 356 protected: 357 void emit_vertex(); 358 void setup_payload_interference(struct ra_graph *g, int first_payload_node, 359 int reg_node_count); 360 virtual void setup_payload() = 0; 361 virtual void emit_prolog() = 0; 362 virtual void emit_thread_end() = 0; 363 virtual void emit_urb_write_header(int mrf) = 0; 364 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; 365 virtual void gs_emit_vertex(int stream_id); 366 virtual void gs_end_primitive(); 367 368 private: 369 /** 370 * If true, then register allocation should fail instead of spilling. 371 */ 372 const bool no_spills; 373 374 int shader_time_index; 375 376 unsigned last_scratch; /**< measured in 32-byte (register size) units */ 377 }; 378 379 } /* namespace brw */ 380 #endif /* __cplusplus */ 381 382 #endif /* BRW_VEC4_H */ 383