1 /* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 #include "helpers.h" 25 26 using namespace aco; 27 28 BEGIN_TEST(assembler.s_memtime) 29 for (unsigned i = GFX6; i <= GFX10; i++) { 30 if (!setup_cs(NULL, (chip_class)i)) 31 continue; 32 33 //~gfx[6-7]>> c7800000 34 //~gfx[6-7]! bf810000 35 //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000 36 //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000 37 bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0}); 38 39 finish_assembler_test(); 40 } 41 END_TEST 42 43 BEGIN_TEST(assembler.branch_3f) 44 if (!setup_cs(NULL, (chip_class)GFX10)) 45 return; 46 47 //! BB0: 48 //! s_branch BB1 ; bf820040 49 //! s_nop 0 ; bf800000 50 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1); 51 52 for (unsigned i = 0; i < 0x3f; i++) 53 bld.vop1(aco_opcode::v_nop); 54 55 bld.reset(program->create_and_insert_block()); 56 57 program->blocks[1].linear_preds.push_back(0u); 58 59 finish_assembler_test(); 60 END_TEST 61 62 BEGIN_TEST(assembler.long_jump.unconditional_forwards) 63 if (!setup_cs(NULL, (chip_class)GFX10)) 64 return; 65 66 //!BB0: 67 //! s_getpc_b64 s[0:1] ; be801f00 68 //! s_addc_u32 s0, s0, 0x20018 ; 8200ff00 00020018 69 //! s_addc_u32 s1, s1, 0 ; 82018001 70 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 71 //! s_bitset0_b32 s0, 0 ; be801b80 72 //! s_setpc_b64 s[0:1] ; be802000 73 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 74 75 bld.reset(program->create_and_insert_block()); 76 77 //! s_nop 0 ; bf800000 78 //!(then repeated 32767 times) 79 for (unsigned i = 0; i < INT16_MAX + 1; i++) 80 bld.sopp(aco_opcode::s_nop, -1, 0); 81 82 //! BB2: 83 //! s_endpgm ; bf810000 84 bld.reset(program->create_and_insert_block()); 85 86 program->blocks[2].linear_preds.push_back(0u); 87 program->blocks[2].linear_preds.push_back(1u); 88 89 finish_assembler_test(); 90 END_TEST 91 92 BEGIN_TEST(assembler.long_jump.conditional_forwards) 93 if (!setup_cs(NULL, (chip_class)GFX10)) 94 return; 95 96 //! BB0: 97 //! s_cbranch_scc1 BB1 ; bf850007 98 //! s_getpc_b64 s[0:1] ; be801f00 99 //! s_addc_u32 s0, s0, 0x20018 ; 8200ff00 00020018 100 //! s_addc_u32 s1, s1, 0 ; 82018001 101 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 102 //! s_bitset0_b32 s0, 0 ; be801b80 103 //! s_setpc_b64 s[0:1] ; be802000 104 bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2); 105 106 bld.reset(program->create_and_insert_block()); 107 108 //! BB1: 109 //! s_nop 0 ; bf800000 110 //!(then repeated 32767 times) 111 for (unsigned i = 0; i < INT16_MAX + 1; i++) 112 bld.sopp(aco_opcode::s_nop, -1, 0); 113 114 //! BB2: 115 //! s_endpgm ; bf810000 116 bld.reset(program->create_and_insert_block()); 117 118 program->blocks[1].linear_preds.push_back(0u); 119 program->blocks[2].linear_preds.push_back(0u); 120 program->blocks[2].linear_preds.push_back(1u); 121 122 finish_assembler_test(); 123 END_TEST 124 125 BEGIN_TEST(assembler.long_jump.unconditional_backwards) 126 if (!setup_cs(NULL, (chip_class)GFX10)) 127 return; 128 129 //!BB0: 130 //! s_nop 0 ; bf800000 131 //!(then repeated 32767 times) 132 for (unsigned i = 0; i < INT16_MAX + 1; i++) 133 bld.sopp(aco_opcode::s_nop, -1, 0); 134 135 //! s_getpc_b64 s[0:1] ; be801f00 136 //! s_addc_u32 s0, s0, 0xfffdfffc ; 8200ff00 fffdfffc 137 //! s_addc_u32 s1, s1, -1 ; 8201c101 138 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 139 //! s_bitset0_b32 s0, 0 ; be801b80 140 //! s_setpc_b64 s[0:1] ; be802000 141 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0); 142 143 //! BB1: 144 //! s_endpgm ; bf810000 145 bld.reset(program->create_and_insert_block()); 146 147 program->blocks[0].linear_preds.push_back(0u); 148 program->blocks[1].linear_preds.push_back(0u); 149 150 finish_assembler_test(); 151 END_TEST 152 153 BEGIN_TEST(assembler.long_jump.conditional_backwards) 154 if (!setup_cs(NULL, (chip_class)GFX10)) 155 return; 156 157 //!BB0: 158 //! s_nop 0 ; bf800000 159 //!(then repeated 32767 times) 160 for (unsigned i = 0; i < INT16_MAX + 1; i++) 161 bld.sopp(aco_opcode::s_nop, -1, 0); 162 163 //! s_cbranch_execz BB1 ; bf880007 164 //! s_getpc_b64 s[0:1] ; be801f00 165 //! s_addc_u32 s0, s0, 0xfffdfff8 ; 8200ff00 fffdfff8 166 //! s_addc_u32 s1, s1, -1 ; 8201c101 167 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 168 //! s_bitset0_b32 s0, 0 ; be801b80 169 //! s_setpc_b64 s[0:1] ; be802000 170 bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0); 171 172 //! BB1: 173 //! s_endpgm ; bf810000 174 bld.reset(program->create_and_insert_block()); 175 176 program->blocks[0].linear_preds.push_back(0u); 177 program->blocks[1].linear_preds.push_back(0u); 178 179 finish_assembler_test(); 180 END_TEST 181 182 BEGIN_TEST(assembler.long_jump.3f) 183 if (!setup_cs(NULL, (chip_class)GFX10)) 184 return; 185 186 //! BB0: 187 //! s_branch BB1 ; bf820040 188 //! s_nop 0 ; bf800000 189 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1); 190 191 for (unsigned i = 0; i < 0x3f - 7; i++) // a unconditional long jump is 7 dwords 192 bld.vop1(aco_opcode::v_nop); 193 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 194 195 bld.reset(program->create_and_insert_block()); 196 for (unsigned i = 0; i < INT16_MAX + 1; i++) 197 bld.vop1(aco_opcode::v_nop); 198 bld.reset(program->create_and_insert_block()); 199 200 program->blocks[1].linear_preds.push_back(0u); 201 program->blocks[2].linear_preds.push_back(0u); 202 program->blocks[2].linear_preds.push_back(1u); 203 204 finish_assembler_test(); 205 END_TEST 206 207 BEGIN_TEST(assembler.long_jump.constaddr) 208 if (!setup_cs(NULL, (chip_class)GFX10)) 209 return; 210 211 //>> s_getpc_b64 s[0:1] ; be801f00 212 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 213 214 bld.reset(program->create_and_insert_block()); 215 216 for (unsigned i = 0; i < INT16_MAX + 1; i++) 217 bld.sopp(aco_opcode::s_nop, -1, 0); 218 219 bld.reset(program->create_and_insert_block()); 220 221 //>> s_getpc_b64 s[0:1] ; be801f00 222 //! s_add_u32 s0, s0, 0xe0 ; 8000ff00 000000e0 223 bld.sop1(aco_opcode::p_constaddr, Definition(PhysReg(0), s2), Operand(0u)); 224 225 program->blocks[2].linear_preds.push_back(0u); 226 program->blocks[2].linear_preds.push_back(1u); 227 228 finish_assembler_test(); 229 END_TEST 230 231 BEGIN_TEST(assembler.v_add3) 232 for (unsigned i = GFX9; i <= GFX10; i++) { 233 if (!setup_cs(NULL, (chip_class)i)) 234 continue; 235 236 //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080 237 //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080 238 aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)}; 239 add3->operands[0] = Operand(0u); 240 add3->operands[1] = Operand(0u); 241 add3->operands[2] = Operand(0u); 242 add3->definitions[0] = Definition(PhysReg(0), v1); 243 bld.insert(std::move(add3)); 244 245 finish_assembler_test(); 246 } 247 END_TEST 248 249 BEGIN_TEST(assembler.v_add3_clamp) 250 for (unsigned i = GFX9; i <= GFX10; i++) { 251 if (!setup_cs(NULL, (chip_class)i)) 252 continue; 253 254 //~gfx9>> integer addition + clamp ; d1ff8000 02010080 255 //~gfx10>> integer addition + clamp ; d76d8000 02010080 256 aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)}; 257 add3->operands[0] = Operand(0u); 258 add3->operands[1] = Operand(0u); 259 add3->operands[2] = Operand(0u); 260 add3->definitions[0] = Definition(PhysReg(0), v1); 261 add3->clamp = 1; 262 bld.insert(std::move(add3)); 263 264 finish_assembler_test(); 265 } 266 END_TEST 267