1 /* -*- mesa-c++ -*- 2 * 3 * Copyright (c) 2022 Collabora LTD 4 * 5 * Author: Gert Wollny <gert.wollny@collabora.com> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * on the rights to use, copy, modify, merge, publish, distribute, sub 11 * license, and/or sell copies of the Software, and to permit persons to whom 12 * the Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 #ifndef SFN_SHADER_H 28 #define SFN_SHADER_H 29 30 #include "amd_family.h" 31 #include "compiler/shader_enums.h" 32 #include "gallium/drivers/r600/r600_shader.h" 33 #include "sfn_instr.h" 34 #include "sfn_instr_controlflow.h" 35 #include "sfn_instrfactory.h" 36 #include "sfn_liverangeevaluator.h" 37 38 #include <bitset> 39 #include <memory> 40 #include <stack> 41 #include <vector> 42 43 struct nir_shader; 44 struct nir_cf_node; 45 struct nir_if; 46 struct nir_block; 47 struct nir_instr; 48 49 namespace r600 { 50 51 class ShaderIO { 52 public: 53 void print(std::ostream& os) const; 54 location()55 int location() const { return m_location; } set_location(int location)56 void set_location(int location) { m_location = location; } 57 varying_slot()58 gl_varying_slot varying_slot() const { return m_varying_slot; } set_varying_slot(gl_varying_slot varying_slot)59 void set_varying_slot(gl_varying_slot varying_slot) { m_varying_slot = varying_slot; } 60 no_varying()61 bool no_varying() const { return m_no_varying; } set_no_varying(bool no_varying)62 void set_no_varying(bool no_varying) { m_no_varying = no_varying; } 63 64 int spi_sid() const; 65 set_gpr(int gpr)66 void set_gpr(int gpr) { m_gpr = gpr; } gpr()67 int gpr() const { return m_gpr; } 68 69 protected: 70 ShaderIO(const char *type, int loc, gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS); 71 72 private: 73 virtual void do_print(std::ostream& os) const = 0; 74 75 const char *m_type; 76 int m_location{-1}; 77 gl_varying_slot m_varying_slot{NUM_TOTAL_VARYING_SLOTS}; 78 bool m_no_varying{false}; 79 int m_gpr{0}; 80 }; 81 82 class ShaderOutput : public ShaderIO { 83 public: 84 ShaderOutput(); 85 ShaderOutput(int location, int writemask, 86 gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS); 87 frag_result()88 gl_frag_result frag_result() const { return m_frag_result; } set_frag_result(gl_frag_result frag_result)89 void set_frag_result(gl_frag_result frag_result) { m_frag_result = frag_result; } 90 writemask()91 int writemask() const { return m_writemask; } set_writemask(int writemask)92 void set_writemask(int writemask) { m_writemask = writemask; } 93 export_param()94 int export_param() const { return m_export_param; } set_export_param(int export_param)95 void set_export_param(int export_param) { m_export_param = export_param; } 96 97 private: 98 void do_print(std::ostream& os) const override; 99 100 gl_frag_result m_frag_result{static_cast<gl_frag_result>(FRAG_RESULT_MAX)}; 101 int m_writemask{0}; 102 int m_export_param{-1}; 103 }; 104 105 class ShaderInput : public ShaderIO { 106 public: 107 ShaderInput(); 108 ShaderInput(int location, gl_varying_slot varying_slot = NUM_TOTAL_VARYING_SLOTS); 109 system_value()110 gl_system_value system_value() const { return m_system_value; } set_system_value(gl_system_value system_value)111 void set_system_value(gl_system_value system_value) { m_system_value = system_value; } 112 113 void set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid); 114 void set_uses_interpolate_at_centroid(); set_need_lds_pos()115 void set_need_lds_pos() { m_need_lds_pos = true; } ij_index()116 int ij_index() const { return m_ij_index; } 117 interpolator()118 int interpolator() const { return m_interpolator; } interpolate_loc()119 int interpolate_loc() const { return m_interpolate_loc; } need_lds_pos()120 bool need_lds_pos() const { return m_need_lds_pos; } lds_pos()121 int lds_pos() const { return m_lds_pos; } set_lds_pos(int pos)122 void set_lds_pos(int pos) { m_lds_pos = pos; } 123 ring_offset()124 int ring_offset() const { return m_ring_offset; } set_ring_offset(int offs)125 void set_ring_offset(int offs) { m_ring_offset = offs; } uses_interpolate_at_centroid()126 bool uses_interpolate_at_centroid() const { return m_uses_interpolate_at_centroid; } 127 128 private: 129 void do_print(std::ostream& os) const override; 130 131 gl_system_value m_system_value{SYSTEM_VALUE_MAX}; 132 int m_interpolator{0}; 133 int m_interpolate_loc{0}; 134 int m_ij_index{0}; 135 bool m_uses_interpolate_at_centroid{false}; 136 bool m_need_lds_pos{false}; 137 int m_lds_pos{0}; 138 int m_ring_offset{0}; 139 }; 140 141 class Shader : public Allocate { 142 public: 143 using InputIterator = std::map<int, ShaderInput>::iterator; 144 using OutputIterator = std::map<int, ShaderOutput>::iterator; 145 146 using ShaderBlocks = std::list<Block::Pointer, Allocator<Block::Pointer>>; 147 148 Shader(const Shader& orig) = delete; 149 ~Shader()150 virtual ~Shader() {} 151 shader_id()152 auto shader_id() const {return m_shader_id;} 153 // Needed for testing reset_shader_id()154 void reset_shader_id() {m_shader_id = 0;} 155 156 bool add_info_from_string(std::istream& is); 157 158 static Shader *translate_from_nir(nir_shader *nir, 159 const pipe_stream_output_info *so_info, 160 r600_shader *gs_shader, 161 const r600_shader_key& key, 162 r600_chip_class chip_class, 163 radeon_family family); 164 165 bool process(nir_shader *nir); 166 167 bool process_cf_node(nir_cf_node *node); 168 bool process_if(nir_if *node); 169 bool process_loop(nir_loop *node); 170 bool process_block(nir_block *node); 171 bool process_instr(nir_instr *instr); 172 void emit_instruction(PInst instr); 173 bool emit_atomic_local_shared(nir_intrinsic_instr *instr); 174 175 void print(std::ostream& os) const; 176 void print_header(std::ostream& os) const; 177 178 bool process_intrinsic(nir_intrinsic_instr *intr); 179 180 virtual bool load_input(nir_intrinsic_instr *intr) = 0; 181 virtual bool store_output(nir_intrinsic_instr *intr) = 0; 182 183 bool load_ubo(nir_intrinsic_instr *intr); 184 185 ValueFactory& value_factory(); 186 add_output(const ShaderOutput & output)187 void add_output(const ShaderOutput& output) { m_outputs[output.location()] = output; } 188 add_input(const ShaderInput & input)189 void add_input(const ShaderInput& input) { m_inputs[input.location()] = input; } 190 191 void set_input_gpr(int driver_lcation, int gpr); 192 find_input(int location)193 InputIterator find_input(int location) { return m_inputs.find(location); } 194 input_not_found()195 InputIterator input_not_found() { return m_inputs.end(); } 196 197 OutputIterator find_output(int location); output_not_found()198 OutputIterator output_not_found() { return m_outputs.end(); } 199 func()200 ShaderBlocks& func() { return m_root; } 201 void reset_function(ShaderBlocks& new_root); 202 203 void emit_instruction_from_string(const std::string& s); 204 205 void set_info(nir_shader *nir); 206 void get_shader_info(r600_shader *sh_info); 207 chip_class()208 r600_chip_class chip_class() const { return m_chip_class; } set_chip_class(r600_chip_class cls)209 void set_chip_class(r600_chip_class cls) { m_chip_class = cls; } 210 chip_family()211 radeon_family chip_family() const { return m_chip_family; } set_chip_family(radeon_family family)212 void set_chip_family(radeon_family family) { m_chip_family = family; } 213 214 void start_new_block(int nesting_depth); 215 216 const ShaderOutput& output(int base) const; 217 218 LiveRangeMap prepare_live_range_map(); 219 set_last_txd(Instr * txd)220 void set_last_txd(Instr *txd) { m_last_txd = txd; } last_txd()221 Instr *last_txd() { return m_last_txd; } 222 223 // Needed for keeping the memory access in order 224 void chain_scratch_read(Instr *instr); 225 void chain_ssbo_read(Instr *instr); 226 enabled_stream_buffers_mask()227 virtual uint32_t enabled_stream_buffers_mask() const { return 0; } 228 noutputs()229 size_t noutputs() const { return m_outputs.size(); } ninputs()230 size_t ninputs() const { return m_inputs.size(); } 231 232 enum Flags { 233 sh_indirect_const_file, 234 sh_needs_scratch_space, 235 sh_needs_sbo_ret_address, 236 sh_uses_atomics, 237 sh_uses_images, 238 sh_uses_tex_buffer, 239 sh_writes_memory, 240 sh_txs_cube_array_comp, 241 sh_indirect_atomic, 242 sh_mem_barrier, 243 sh_legacy_math_rules, 244 sh_disble_sb, 245 sh_flags_count 246 }; 247 set_flag(Flags f)248 void set_flag(Flags f) { m_flags.set(f); } has_flag(Flags f)249 bool has_flag(Flags f) const { return m_flags.test(f); } 250 atomic_file_count()251 int atomic_file_count() const { return m_atomic_file_count; } 252 253 PRegister atomic_update(); 254 int remap_atomic_base(int base); 255 auto evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) 256 -> std::pair<int, PRegister>; ssbo_image_offset()257 int ssbo_image_offset() const { return m_ssbo_image_offset; } rat_return_address()258 PRegister rat_return_address() 259 { 260 assert(m_rat_return_address); 261 return m_rat_return_address; 262 } 263 264 PRegister emit_load_to_register(PVirtualValue src); 265 image_size_const_offset()266 virtual unsigned image_size_const_offset() { return 0;} 267 required_registers()268 auto required_registers() const { return m_required_registers;} 269 270 protected: 271 enum ESlots { 272 es_face, 273 es_instanceid, 274 es_invocation_id, 275 es_patch_id, 276 es_pos, 277 es_rel_patch_id, 278 es_sample_mask_in, 279 es_sample_id, 280 es_sample_pos, 281 es_tess_factor_base, 282 es_vertexid, 283 es_tess_coord, 284 es_primitive_id, 285 es_helper_invocation, 286 es_last 287 }; 288 289 std::bitset<es_last> m_sv_values; 290 291 Shader(const char *type_id, unsigned atomic_base); 292 293 const ShaderInput& input(int base) const; 294 295 bool emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin = pin_free); 296 297 template <typename T> 298 using IOMap = std::map<int, T, std::less<int>, Allocator<std::pair<const int, T>>>; 299 inputs()300 IOMap<ShaderInput>& inputs() { return m_inputs; } 301 302 private: 303 virtual bool process_stage_intrinsic(nir_intrinsic_instr *intr) = 0; 304 305 bool allocate_registers_from_string(std::istream& is, Pin pin); 306 bool allocate_arrays_from_string(std::istream& is); 307 308 bool read_chipclass(std::istream& is); 309 bool read_family(std::istream& is); 310 311 bool scan_shader(const nir_function *impl); 312 bool scan_uniforms(nir_variable *uniform); 313 void allocate_reserved_registers(); 314 315 virtual int do_allocate_reserved_registers() = 0; 316 317 bool scan_instruction(nir_instr *instr); 318 virtual bool do_scan_instruction(nir_instr *instr) = 0; 319 320 void print_properties(std::ostream& os) const; 321 virtual void do_print_properties(std::ostream& os) const = 0; 322 323 bool read_output(std::istream& is); 324 bool read_input(std::istream& is); 325 virtual bool read_prop(std::istream& is) = 0; 326 327 bool emit_control_flow(ControlFlowInstr::CFType type); 328 bool emit_store_scratch(nir_intrinsic_instr *intr); 329 bool emit_load_scratch(nir_intrinsic_instr *intr); 330 bool emit_load_global(nir_intrinsic_instr *intr); 331 bool emit_local_store(nir_intrinsic_instr *intr); 332 bool emit_local_load(nir_intrinsic_instr *instr); 333 bool emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset); 334 bool emit_group_barrier(nir_intrinsic_instr *intr); 335 bool emit_shader_clock(nir_intrinsic_instr *instr); 336 bool emit_wait_ack(); 337 bool emit_barrier(nir_intrinsic_instr *instr); 338 bool emit_load_reg(nir_intrinsic_instr *intr); 339 bool emit_load_reg_indirect(nir_intrinsic_instr *intr); 340 bool emit_store_reg(nir_intrinsic_instr *intr); 341 bool emit_store_reg_indirect(nir_intrinsic_instr *intr); 342 343 bool equal_to(const Shader& other) const; 344 void finalize(); 345 virtual void do_finalize(); 346 347 virtual void do_get_shader_info(r600_shader *sh_info); 348 349 ShaderBlocks m_root; 350 Block::Pointer m_current_block; 351 352 InstrFactory *m_instr_factory; 353 const char *m_type_id; 354 355 IOMap<ShaderOutput> m_outputs; 356 IOMap<ShaderInput> m_inputs; 357 r600_chip_class m_chip_class; 358 radeon_family m_chip_family{CHIP_CEDAR}; 359 360 int m_scratch_size; 361 int m_next_block; 362 bool m_indirect_const_file{false}; 363 364 Instr *m_last_txd{nullptr}; 365 366 uint32_t m_indirect_files{0}; 367 std::bitset<sh_flags_count> m_flags; 368 uint32_t nhwatomic_ranges{0}; 369 std::vector<r600_shader_atomic, Allocator<r600_shader_atomic>> m_atomics; 370 371 uint32_t m_nhwatomic{0}; 372 uint32_t m_atomic_base{0}; 373 uint32_t m_next_hwatomic_loc{0}; 374 std::unordered_map<int, int, 375 std::hash<int>, std::equal_to<int>, 376 Allocator<std::pair<const int, int>>> m_atomic_base_map; 377 uint32_t m_atomic_file_count{0}; 378 PRegister m_atomic_update{nullptr}; 379 PRegister m_rat_return_address{nullptr}; 380 381 int32_t m_ssbo_image_offset{0}; 382 uint32_t m_nloops{0}; 383 uint32_t m_required_registers{0}; 384 385 int64_t m_shader_id; 386 static int64_t s_next_shader_id; 387 388 class InstructionChain : public InstrVisitor { 389 public: visit(AluGroup * instr)390 void visit(AluGroup *instr) override { (void)instr; } visit(TexInstr * instr)391 void visit(TexInstr *instr) override { (void)instr; } visit(ExportInstr * instr)392 void visit(ExportInstr *instr) override { (void)instr; } visit(FetchInstr * instr)393 void visit(FetchInstr *instr) override { (void)instr; } visit(Block * instr)394 void visit(Block *instr) override { (void)instr; } visit(ControlFlowInstr * instr)395 void visit(ControlFlowInstr *instr) override { (void)instr; } visit(IfInstr * instr)396 void visit(IfInstr *instr) override { (void)instr; } visit(StreamOutInstr * instr)397 void visit(StreamOutInstr *instr) override { (void)instr; } visit(MemRingOutInstr * instr)398 void visit(MemRingOutInstr *instr) override { (void)instr; } visit(EmitVertexInstr * instr)399 void visit(EmitVertexInstr *instr) override { (void)instr; } visit(WriteTFInstr * instr)400 void visit(WriteTFInstr *instr) override { (void)instr; } visit(LDSAtomicInstr * instr)401 void visit(LDSAtomicInstr *instr) override { (void)instr; } visit(LDSReadInstr * instr)402 void visit(LDSReadInstr *instr) override { (void)instr; } 403 404 void visit(AluInstr *instr) override; 405 void visit(ScratchIOInstr *instr) override; 406 void visit(GDSInstr *instr) override; 407 void visit(RatInstr *instr) override; 408 409 void apply(Instr *current, Instr **last); 410 411 Shader *this_shader{nullptr}; 412 Instr *last_scratch_instr{nullptr}; 413 Instr *last_gds_instr{nullptr}; 414 Instr *last_ssbo_instr{nullptr}; 415 Instr *last_kill_instr{nullptr}; 416 std::unordered_map<int, Instr * > last_alu_with_indirect_reg; 417 bool prepare_mem_barrier{false}; 418 }; 419 420 InstructionChain m_chain_instr; 421 std::list<Instr *, Allocator<Instr *>> m_loops; 422 int m_control_flow_depth{0}; 423 std::list<nir_intrinsic_instr*> m_register_allocations; 424 }; 425 426 } // namespace r600 427 428 #endif // SHADER_H 429