/third_party/mesa3d/src/amd/compiler/ |
D | aco_interface.cpp | 38 static const std::array<aco_compiler_statistic_info, aco::num_statistics> statistic_infos = []() 40 std::array<aco_compiler_statistic_info, aco::num_statistics> ret{}; in __anon814593c40102() 41 ret[aco::statistic_hash] = in __anon814593c40102() 43 ret[aco::statistic_instructions] = in __anon814593c40102() 45 ret[aco::statistic_copies] = in __anon814593c40102() 47 ret[aco::statistic_branches] = aco_compiler_statistic_info{"Branches", "Branch instructions"}; in __anon814593c40102() 48 ret[aco::statistic_latency] = in __anon814593c40102() 50 ret[aco::statistic_inv_throughput] = aco_compiler_statistic_info{ in __anon814593c40102() 52 ret[aco::statistic_vmem_clauses] = aco_compiler_statistic_info{ in __anon814593c40102() 54 ret[aco::statistic_smem_clauses] = aco_compiler_statistic_info{ in __anon814593c40102() [all …]
|
/third_party/mesa3d/src/amd/compiler/tests/ |
D | helpers.h | 65 extern std::unique_ptr<aco::Program> program; 66 extern aco::Builder bld; 67 extern aco::Temp inputs[16]; 69 namespace aco { 73 void create_program(enum chip_class chip_class, aco::Stage stage, 79 void finish_program(aco::Program *program); 82 void finish_ra_test(aco::ra_test_policy, bool lower=false); 89 void writeout(unsigned i, aco::Temp tmp=aco::Temp(0, aco::s1)); 90 void writeout(unsigned i, aco::Builder::Result res); 91 void writeout(unsigned i, aco::Operand op); [all …]
|
D | helpers.cpp | 32 using namespace aco; 81 aco::init_program(program.get(), stage, &info, chip_class, family, false, &config); in create_program() 151 if (aco::validate_ir(program.get())) in finish_validator_test() 160 if (!aco::validate_ir(program.get())) { in finish_opt_test() 164 aco::optimize(program.get()); in finish_opt_test() 165 if (!aco::validate_ir(program.get())) { in finish_opt_test() 175 if (!aco::validate_ir(program.get())) { in finish_ra_test() 181 aco::live live_vars = aco::live_var_analysis(program.get()); in finish_ra_test() 182 aco::register_allocation(program.get(), live_vars.live_out, policy); in finish_ra_test() 184 if (aco::validate_ra(program.get())) { in finish_ra_test() [all …]
|
/third_party/mesa3d/docs/relnotes/ |
D | 19.3.3.rst | 34 - aco: Dead Rising 4 crashes in lower_to_hw_instr() on GFX6-GFX7 72 - aco: fix unconditional demote_to_helper 73 - aco: rework lower_to_cssa() 173 - aco: set vm for pos0 exports on GFX10 174 - aco: fix imageSize()/textureSize() with large buffers on GFX8 175 - aco: fix uninitialized data in the binary 176 - aco: set exec_potentially_empty for demotes 177 - aco: disable add combining for ds_swizzle_b32 178 - aco: don't DCE atomics with return values 179 - aco: check if multiplication/clamp is live when applying output [all …]
|
D | 20.0.0.rst | 62 - aco: sun flickering with Assassins Creeds Origins 64 - aco: wrong geometry with Assassins Creed Origins on GFX6 78 - aco: implement GFX6 support 85 - aco: Dead Rising 4 crashes in lower_to_hw_instr() on GFX6-GFX7 92 - [Navi/aco] Guild Wars 2 - ring gfx timeout with commit 3bca0af2 93 - [radv/aco] Regression is causing a soft crash in The Witcher 3 158 - radv/aco Jedi Fallen Order hair rendering buggy 693 - aco: Constify radv_nir_compiler_options in isel 694 - aco: Use radv_shader_args in aco_compile_shader() 695 - aco: Split vector arguments at the beginning [all …]
|
D | 20.3.5.rst | 107 - aco: fix nir_intrinsic_ballot with wave32 108 - aco: fix shared VGPR allocation on RDNA2 237 - aco: always set exec_live=false 238 - aco: do not flag all blocks WQM to ensure we enter all nested loops in WQM 239 - aco/lower_phis: fix all_preds_uniform with continue_or_break 240 - aco: add missing usable_read2 check 242 - aco: set compr for fp16 exports 243 - aco: implement 64-bit VGPR {u,i}find_msb 244 - radv,aco: don't use MUBUF for multi-channel loads on GFX8 with robustness2 247 - aco: calculate all p_as_uniform and v_readfirstlane_b32 sources in WQM [all …]
|
D | 20.1.0.rst | 230 - aco: sun flickering with Assassins Creeds Origins 232 - aco: wrong geometry with Assassins Creed Origins on GFX6 265 - aco: Minor optimization in spill_ctx constructor 266 - aco: pass vars by const & 1185 - aco: fix image_atomic_cmp_swap 1195 - aco: add comparison operators for PhysReg 1196 - aco: add sub-dword regclasses 1197 - aco: refactor regClass setup for subdword VGPRs 1198 - aco: validate p_create_vector with subdword elements properly 1199 - aco: validate register alignment of subdword operands and definitions [all …]
|
D | 20.2.4.rst | 118 - aco: don't combine precise max(min()) to med3 119 - aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit 120 - aco: disallow various v_add_u32 opts if modifiers are used 121 - aco: disable omod if the sign of zeros should be preserved 122 - aco: fix fp16 \*0.5 omod 135 - aco/optimizer: Only set scc_needed when it is actually needed.
|
D | 21.1.3.rst | 121 - aco: do not clause NSA instructions 122 - aco: don't create 4 and 5 dword NSA instructions on GFX10 123 - aco: use v1b/v2b for ds_read_u8/ds_read_u16 134 - aco: fix range checking for SSBO loads/stores with SGPR offset on GFX6-7 135 - aco: fix emitting literal offsets with SMEM on GFX7 145 - aco/ra: Fix off-by-one-error in print_regs
|
D | 21.0.0.rst | 132 - radv/aco: "Failed to allocate registers" in AC:Valhalla 177 - radv,aco: CTS image robustness tests fail to compile 191 - \[aco\] problem compiling compute pipeline 808 - aco/ra: use get_reg_specified() for p_extract_vector 809 - aco: don't create dead exec mask phis on merge blocks 810 - aco: fix DCE of rematerializable phi operands 811 - aco/spill: only prevent rematerializable vars from being DCE'd if they haven't been renamed 812 - aco/ra: fix phi operand renaming 814 - aco: don't emit parallelcopy when switching to WQM. 815 - aco: make pred_by_exec_mask() accessible in other files [all …]
|
D | 20.0.3.rst | 139 - aco: set has_divergent_branch for discards in loops 140 - aco: handle missing second predecessors at merge block phis 141 - aco: skip NIR in unreachable merge blocks 142 - aco: improve check for unreachable loop continue blocks 143 - aco: emit IR in IF's merge block instead if the other side ends in a 145 - aco: fix boolean undef regclass 147 - aco: implement 64-bit VGPR constant copies in handle_operands()
|
D | 20.2.2.rst | 36 - radv/aco: Vertex explosion on RPCS3 116 - aco: add missing SCC clobber in get_buffer_size 117 - aco: update phi_map in add_subdword_operand() 118 - aco: ignore the ACO-inserted continue in create_continue_phis() 130 - aco: fix determining if LOD is zero for nir_texop_txf/nir_texop_txs 147 - aco/isel: Always export position data from VS/NGG
|
D | 20.2.0.rst | 1233 - aco: either copy-propagate or inline create_vector operands 1234 - aco: coalesce parallelcopies during register allocation 1240 - aco: fix WQM coalescing 1241 - aco: restrict copying of create_vector operands to GFX9+ 1242 - aco: don't move create_vector subdword operands to unsupported register offsets 1243 - aco: fix corner case in register allocation 1244 - aco: don't allow unaligned subdword accesses on GFX6/7 1245 - aco: fix register assignment for p_create_vector on GFX6/7 1246 - aco: simplify statistics collection for copies 1247 - aco: use full-register instructions to implement subdword packing on GFX6/7 [all …]
|
D | 21.2.2.rst | 115 - aco: fix p_insert lowering with 16bit sources 240 - aco: include utility in isel 241 - aco: don't constant propagate to DPP instructions 242 - aco/spill: add temporary operands of exec phis to next_use_distances_end 271 - aco: Fix to_uniform_bool_instr when operands are not suitable. 272 - aco: Emit zero for the derivatives of uniforms. 273 - aco: Unset 16 and 24-bit flags from operands in apply_extract. 275 - aco: Fix invalid usage of std::fill with std::array. 276 - aco: Use Builder reference in emit_copies_block. 277 - aco: Skip code paths to emit copies when there are no copies.
|
D | 19.3.0.rst | 48 - New compiler backend "ACO" for RADV (RADV_PERFTEST=aco) 58 - radv/aco Jedi Fallen Order hair rendering buggy 484 - amd: Build aco only if radv is enabled 782 - aco: Initial commit of independent AMD compiler 783 - radv/aco: Setup alternate path in RADV to support the experimental 786 - radv/aco: enable VK_EXT_shader_demote_to_helper_invocation 788 - aco: only emit waitcnt on loop continues if we there was some load or 793 - radv/aco: Don't lower subtractions 794 - aco: call nir_opt_algebraic_late() exhaustively 796 - aco: re-use existing phi instruction when lowering boolean phis [all …]
|
D | 20.3.0.rst | 77 - \[aco\] problem compiling compute pipeline 95 - radv/aco: Vertex explosion on RPCS3 125 - radv, aco: dEQP-VK.glsl.atomic_operations.*_fragment_reference regressed 874 - aco: Add VK_KHR_shader_terminate_invocation support. 1178 - aco: execute branch instructions in WQM if necessary 1180 - aco/isel: refactor code and remove unnecessary v_mov 1181 - aco/isel: refactor emit_vop3a_instruction() to handle 2 operand instructions 1183 - aco: propagate SGPRs into VOP1 instructions early. 1184 - aco: expand create_vector more carefully w.r.t. subdword operands 1185 - aco: use p_create_vector for nir_op_pack_half_2x16 [all …]
|
D | 21.3.3.rst | 54 - aco/optimizer: fix fneg modifier propagation on VOP3P 55 - aco/ra: fix get_reg_for_operand() in case of stride mismatches 103 - aco/optimizer_postRA: Fix combining DPP into VALU. 104 - aco/optimizer_postRA: Fix applying VCC to branches.
|
D | 20.2.5.rst | 67 - aco/ra: use get_reg_specified() for p_extract_vector 68 - aco: fix DCE of rematerializable phi operands 99 - aco: use UINT64_C on 64 bit constant arguments 133 - aco: don't assume src=lower when splitting self-intersecting copies 150 - aco: fix combining max(-min(a, b), c) if a or b uses the neg modifier 164 - aco: Use program->num_waves as maximum in scheduler.
|
D | 19.3.4.rst | 171 - aco: fix operand to scc when selecting SGPR ufind_msb/ifind_msb 172 - aco: ensure predecessors' p_logical_end is in WQM when a p_phi is in 174 - aco: run p_wqm instructions in WQM 175 - aco: don't consider loop header blocks branch blocks in 177 - aco: don't always add logical edges from continue_break blocks to 179 - aco: fix target calculation when vgpr spilling introduces sgpr
|
D | 21.1.1.rst | 75 - aco: fix additional register requirements for spilling 77 - aco/ra: prevent underflow register for p_create_vector operands 79 - aco/ra: also prevent overflow register for p_create_vector operands 196 - aco/ra: initialize temp_in_scc earlier 216 - aco/scheduler: Fix register demand computation for downwards moves 217 - aco/scheduler: Fix register demand computation for upwards moves
|
D | 20.3.1.rst | 57 - aco/ra: use get_reg_specified() for p_extract_vector 90 - aco: use UINT64_C on 64 bit constant arguments 132 - aco: don't assume src=lower when splitting self-intersecting copies 143 - aco: fix combining max(-min(a, b), c) if a or b uses the neg modifier 160 - aco: Use program->num_waves as maximum in scheduler.
|
D | 21.3.0.rst | 529 - aco: Implement call scope. 535 - aco: Add support for ray launch size. 973 - aco/optimizer: ensure to not erase high bits when propagating packed constants 974 - aco/ra: don't allocate vector space for MIMG NSA operands 975 - aco: include <cstddef> in aco_util.h 983 - aco/print_ir: fix printing of VOPC_SDWA definitions 984 - aco: use VOPC_SDWA on GFX9+ 985 - aco: add instr_is_16bit() helper function 986 - aco/ra: refactor subdword definition info 987 - aco/ra: refactor subdword operand stride [all …]
|
D | 20.3.2.rst | 55 - aco: fix DCE of rematerializable phi operands 56 - aco/spill: only prevent rematerializable vars from being DCE'd if they haven't been renamed 57 - aco/ra: fix phi operand renaming 118 - aco: add block to worklist in mark_block_wqm()
|
D | 20.3.3.rst | 117 - aco: fix incorrect address calculation for load_barycentric_at_sample 120 - aco: fix unreachable() for uniform 8/16-bit nir_op_mov from VGPR 128 - aco: fix creating the dest vector when 16-bit vertex fetches are splitted 129 - radv/llvm,aco: always split typed vertex buffer loads on GFX6 and GFX10+
|
D | 21.0.1.rst | 110 - aco/tests: Use \_exit in child process 133 - aco: set compr for fp16 exports 134 - aco: implement 64-bit VGPR {u,i}find_msb 143 - aco: Fix constant address offset calculation for ds_read2 instructions.
|