Home
last modified time | relevance | path

Searched refs:aco (Results 1 – 25 of 139) sorted by relevance

123456

/third_party/mesa3d/src/amd/compiler/
Daco_interface.cpp37 static const std::array<aco_compiler_statistic_info, aco::num_statistics> statistic_infos = []()
39 std::array<aco_compiler_statistic_info, aco::num_statistics> ret{}; in __anonaec810030102()
40 ret[aco::statistic_hash] = in __anonaec810030102()
42 ret[aco::statistic_instructions] = in __anonaec810030102()
44 ret[aco::statistic_copies] = in __anonaec810030102()
46 ret[aco::statistic_branches] = aco_compiler_statistic_info{"Branches", "Branch instructions"}; in __anonaec810030102()
47 ret[aco::statistic_latency] = in __anonaec810030102()
49 ret[aco::statistic_inv_throughput] = aco_compiler_statistic_info{ in __anonaec810030102()
51 ret[aco::statistic_vmem_clauses] = aco_compiler_statistic_info{ in __anonaec810030102()
53 ret[aco::statistic_smem_clauses] = aco_compiler_statistic_info{ in __anonaec810030102()
[all …]
/third_party/mesa3d/src/amd/compiler/tests/
Dhelpers.h65 extern std::unique_ptr<aco::Program> program;
66 extern aco::Builder bld;
67 extern aco::Temp inputs[16];
69 namespace aco {
73 void create_program(enum amd_gfx_level gfx_level, aco::Stage stage,
79 void finish_program(aco::Program *program);
82 void finish_ra_test(aco::ra_test_policy, bool lower=false);
89 void writeout(unsigned i, aco::Temp tmp=aco::Temp(0, aco::s1));
90 void writeout(unsigned i, aco::Builder::Result res);
91 void writeout(unsigned i, aco::Operand op);
[all …]
Dhelpers.cpp32 using namespace aco;
81 aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config); in create_program()
157 if (aco::validate_ir(program.get())) in finish_validator_test()
166 if (!aco::validate_ir(program.get())) { in finish_opt_test()
170 aco::optimize(program.get()); in finish_opt_test()
171 if (!aco::validate_ir(program.get())) { in finish_opt_test()
181 if (!aco::validate_ir(program.get())) { in finish_ra_test()
187 aco::live live_vars = aco::live_var_analysis(program.get()); in finish_ra_test()
188 aco::register_allocation(program.get(), live_vars.live_out, policy); in finish_ra_test()
190 if (aco::validate_ra(program.get())) { in finish_ra_test()
[all …]
/third_party/mesa3d/docs/relnotes/
D20.0.0.rst62 - aco: sun flickering with Assassins Creeds Origins
64 - aco: wrong geometry with Assassins Creed Origins on GFX6
78 - aco: implement GFX6 support
85 - aco: Dead Rising 4 crashes in lower_to_hw_instr() on GFX6-GFX7
92 - [Navi/aco] Guild Wars 2 - ring gfx timeout with commit 3bca0af2
93 - [radv/aco] Regression is causing a soft crash in The Witcher 3
158 - radv/aco Jedi Fallen Order hair rendering buggy
693 - aco: Constify radv_nir_compiler_options in isel
694 - aco: Use radv_shader_args in aco_compile_shader()
695 - aco: Split vector arguments at the beginning
[all …]
D19.3.3.rst34 - aco: Dead Rising 4 crashes in lower_to_hw_instr() on GFX6-GFX7
72 - aco: fix unconditional demote_to_helper
73 - aco: rework lower_to_cssa()
173 - aco: set vm for pos0 exports on GFX10
174 - aco: fix imageSize()/textureSize() with large buffers on GFX8
175 - aco: fix uninitialized data in the binary
176 - aco: set exec_potentially_empty for demotes
177 - aco: disable add combining for ds_swizzle_b32
178 - aco: don't DCE atomics with return values
179 - aco: check if multiplication/clamp is live when applying output
[all …]
D20.1.0.rst230 - aco: sun flickering with Assassins Creeds Origins
232 - aco: wrong geometry with Assassins Creed Origins on GFX6
265 - aco: Minor optimization in spill_ctx constructor
266 - aco: pass vars by const &
1185 - aco: fix image_atomic_cmp_swap
1195 - aco: add comparison operators for PhysReg
1196 - aco: add sub-dword regclasses
1197 - aco: refactor regClass setup for subdword VGPRs
1198 - aco: validate p_create_vector with subdword elements properly
1199 - aco: validate register alignment of subdword operands and definitions
[all …]
D20.3.5.rst107 - aco: fix nir_intrinsic_ballot with wave32
108 - aco: fix shared VGPR allocation on RDNA2
237 - aco: always set exec_live=false
238 - aco: do not flag all blocks WQM to ensure we enter all nested loops in WQM
239 - aco/lower_phis: fix all_preds_uniform with continue_or_break
240 - aco: add missing usable_read2 check
242 - aco: set compr for fp16 exports
243 - aco: implement 64-bit VGPR {u,i}find_msb
244 - radv,aco: don't use MUBUF for multi-channel loads on GFX8 with robustness2
247 - aco: calculate all p_as_uniform and v_readfirstlane_b32 sources in WQM
[all …]
D21.0.0.rst132 - radv/aco: "Failed to allocate registers" in AC:Valhalla
177 - radv,aco: CTS image robustness tests fail to compile
191 - \[aco\] problem compiling compute pipeline
808 - aco/ra: use get_reg_specified() for p_extract_vector
809 - aco: don't create dead exec mask phis on merge blocks
810 - aco: fix DCE of rematerializable phi operands
811 - aco/spill: only prevent rematerializable vars from being DCE'd if they haven't been renamed
812 - aco/ra: fix phi operand renaming
814 - aco: don't emit parallelcopy when switching to WQM.
815 - aco: make pred_by_exec_mask() accessible in other files
[all …]
D20.2.4.rst118 - aco: don't combine precise max(min()) to med3
119 - aco: fix combine_constant_comparison_ordering() NaN check with 16/64-bit
120 - aco: disallow various v_add_u32 opts if modifiers are used
121 - aco: disable omod if the sign of zeros should be preserved
122 - aco: fix fp16 \*0.5 omod
135 - aco/optimizer: Only set scc_needed when it is actually needed.
D20.2.0.rst1233 - aco: either copy-propagate or inline create_vector operands
1234 - aco: coalesce parallelcopies during register allocation
1240 - aco: fix WQM coalescing
1241 - aco: restrict copying of create_vector operands to GFX9+
1242 - aco: don't move create_vector subdword operands to unsupported register offsets
1243 - aco: fix corner case in register allocation
1244 - aco: don't allow unaligned subdword accesses on GFX6/7
1245 - aco: fix register assignment for p_create_vector on GFX6/7
1246 - aco: simplify statistics collection for copies
1247 - aco: use full-register instructions to implement subdword packing on GFX6/7
[all …]
D21.1.3.rst121 - aco: do not clause NSA instructions
122 - aco: don't create 4 and 5 dword NSA instructions on GFX10
123 - aco: use v1b/v2b for ds_read_u8/ds_read_u16
134 - aco: fix range checking for SSBO loads/stores with SGPR offset on GFX6-7
135 - aco: fix emitting literal offsets with SMEM on GFX7
145 - aco/ra: Fix off-by-one-error in print_regs
D20.0.3.rst139 - aco: set has_divergent_branch for discards in loops
140 - aco: handle missing second predecessors at merge block phis
141 - aco: skip NIR in unreachable merge blocks
142 - aco: improve check for unreachable loop continue blocks
143 - aco: emit IR in IF's merge block instead if the other side ends in a
145 - aco: fix boolean undef regclass
147 - aco: implement 64-bit VGPR constant copies in handle_operands()
D20.2.2.rst36 - radv/aco: Vertex explosion on RPCS3
116 - aco: add missing SCC clobber in get_buffer_size
117 - aco: update phi_map in add_subdword_operand()
118 - aco: ignore the ACO-inserted continue in create_continue_phis()
130 - aco: fix determining if LOD is zero for nir_texop_txf/nir_texop_txs
147 - aco/isel: Always export position data from VS/NGG
D20.3.0.rst77 - \[aco\] problem compiling compute pipeline
95 - radv/aco: Vertex explosion on RPCS3
125 - radv, aco: dEQP-VK.glsl.atomic_operations.*_fragment_reference regressed
874 - aco: Add VK_KHR_shader_terminate_invocation support.
1178 - aco: execute branch instructions in WQM if necessary
1180 - aco/isel: refactor code and remove unnecessary v_mov
1181 - aco/isel: refactor emit_vop3a_instruction() to handle 2 operand instructions
1183 - aco: propagate SGPRs into VOP1 instructions early.
1184 - aco: expand create_vector more carefully w.r.t. subdword operands
1185 - aco: use p_create_vector for nir_op_pack_half_2x16
[all …]
D19.3.0.rst48 - New compiler backend "ACO" for RADV (RADV_PERFTEST=aco)
58 - radv/aco Jedi Fallen Order hair rendering buggy
484 - amd: Build aco only if radv is enabled
782 - aco: Initial commit of independent AMD compiler
783 - radv/aco: Setup alternate path in RADV to support the experimental
786 - radv/aco: enable VK_EXT_shader_demote_to_helper_invocation
788 - aco: only emit waitcnt on loop continues if we there was some load or
793 - radv/aco: Don't lower subtractions
794 - aco: call nir_opt_algebraic_late() exhaustively
796 - aco: re-use existing phi instruction when lowering boolean phis
[all …]
D21.2.2.rst115 - aco: fix p_insert lowering with 16bit sources
240 - aco: include utility in isel
241 - aco: don't constant propagate to DPP instructions
242 - aco/spill: add temporary operands of exec phis to next_use_distances_end
271 - aco: Fix to_uniform_bool_instr when operands are not suitable.
272 - aco: Emit zero for the derivatives of uniforms.
273 - aco: Unset 16 and 24-bit flags from operands in apply_extract.
275 - aco: Fix invalid usage of std::fill with std::array.
276 - aco: Use Builder reference in emit_copies_block.
277 - aco: Skip code paths to emit copies when there are no copies.
D20.2.5.rst67 - aco/ra: use get_reg_specified() for p_extract_vector
68 - aco: fix DCE of rematerializable phi operands
99 - aco: use UINT64_C on 64 bit constant arguments
133 - aco: don't assume src=lower when splitting self-intersecting copies
150 - aco: fix combining max(-min(a, b), c) if a or b uses the neg modifier
164 - aco: Use program->num_waves as maximum in scheduler.
D21.3.3.rst54 - aco/optimizer: fix fneg modifier propagation on VOP3P
55 - aco/ra: fix get_reg_for_operand() in case of stride mismatches
103 - aco/optimizer_postRA: Fix combining DPP into VALU.
104 - aco/optimizer_postRA: Fix applying VCC to branches.
D19.3.4.rst171 - aco: fix operand to scc when selecting SGPR ufind_msb/ifind_msb
172 - aco: ensure predecessors' p_logical_end is in WQM when a p_phi is in
174 - aco: run p_wqm instructions in WQM
175 - aco: don't consider loop header blocks branch blocks in
177 - aco: don't always add logical edges from continue_break blocks to
179 - aco: fix target calculation when vgpr spilling introduces sgpr
D21.1.1.rst75 - aco: fix additional register requirements for spilling
77 - aco/ra: prevent underflow register for p_create_vector operands
79 - aco/ra: also prevent overflow register for p_create_vector operands
196 - aco/ra: initialize temp_in_scc earlier
216 - aco/scheduler: Fix register demand computation for downwards moves
217 - aco/scheduler: Fix register demand computation for upwards moves
D21.3.0.rst529 - aco: Implement call scope.
535 - aco: Add support for ray launch size.
973 - aco/optimizer: ensure to not erase high bits when propagating packed constants
974 - aco/ra: don't allocate vector space for MIMG NSA operands
975 - aco: include <cstddef> in aco_util.h
983 - aco/print_ir: fix printing of VOPC_SDWA definitions
984 - aco: use VOPC_SDWA on GFX9+
985 - aco: add instr_is_16bit() helper function
986 - aco/ra: refactor subdword definition info
987 - aco/ra: refactor subdword operand stride
[all …]
D20.3.1.rst57 - aco/ra: use get_reg_specified() for p_extract_vector
90 - aco: use UINT64_C on 64 bit constant arguments
132 - aco: don't assume src=lower when splitting self-intersecting copies
143 - aco: fix combining max(-min(a, b), c) if a or b uses the neg modifier
160 - aco: Use program->num_waves as maximum in scheduler.
D21.1.0.rst1343 - aco: fix VOP3P assembly, VN and validation
1344 - aco/RA: fix subdword operands on VOP3P instructions
1345 - aco: allow constants/literals on every src position for VOP3P
1346 - aco: allow SGPRs on every src position for VOP3P
1347 - aco: change usesModifiers() considering opsel_hi on packed instructions
1348 - aco: create helpers to emit vop3p instructions
1349 - aco: emit packed 16bit instructions
1351 - aco: simplify multiply-add combining
1352 - aco: optimize packed mul+add to v_pk_fma_f16
1353 - aco: optimize packed clamp
[all …]
D20.3.2.rst55 - aco: fix DCE of rematerializable phi operands
56 - aco/spill: only prevent rematerializable vars from being DCE'd if they haven't been renamed
57 - aco/ra: fix phi operand renaming
118 - aco: add block to worklist in mark_block_wqm()
D20.3.3.rst117 - aco: fix incorrect address calculation for load_barycentric_at_sample
120 - aco: fix unreachable() for uniform 8/16-bit nir_op_mov from VGPR
128 - aco: fix creating the dest vector when 16-bit vertex fetches are splitted
129 - radv/llvm,aco: always split typed vertex buffer loads on GFX6 and GFX10+

123456