1 /* -*- mesa-c++ -*- 2 * 3 * Copyright (c) 2018-2019 Collabora LTD 4 * 5 * Author: Gert Wollny <gert.wollny@collabora.com> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * on the rights to use, copy, modify, merge, publish, distribute, sub 11 * license, and/or sell copies of the Software, and to permit persons to whom 12 * the Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24 * USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 27 #ifndef r600_sfn_alu_defines_h 28 #define r600_sfn_alu_defines_h 29 30 #include <map> 31 #include <bitset> 32 33 namespace r600 { 34 35 /* ALU op2 instructions 17:7 top three bits always zero. */ 36 enum EAluOp { 37 op2_add = 0, 38 op2_mul = 1, 39 op2_mul_ieee = 2, 40 op2_max = 3, 41 op2_min = 4, 42 op2_max_dx10 = 5, 43 op2_min_dx10 = 6, 44 op2_sete = 8, 45 op2_setgt = 9, 46 op2_setge = 10, 47 op2_setne = 11, 48 op2_sete_dx10 = 12, 49 op2_setgt_dx10 = 13, 50 op2_setge_dx10 = 14, 51 op2_setne_dx10 = 15, 52 op1_fract = 16, 53 op1_trunc = 17, 54 op1_ceil = 18, 55 op1_rndne = 19, 56 op1_floor = 20, 57 op2_ashr_int = 21, 58 op2_lshr_int = 22, 59 op2_lshl_int = 23, 60 op1_mov = 25, 61 op0_nop = 26, 62 op2_mul_64 = 27, 63 op1_flt64_to_flt32 = 28, 64 op1_flt32_to_flt64 = 29, 65 op2_pred_setgt_uint = 30, 66 op2_pred_setge_uint = 31, 67 op2_pred_sete = 32, 68 op2_pred_setgt = 33, 69 op2_pred_setge = 34, 70 op2_pred_setne = 35, 71 op1_pred_set_inv = 36, 72 op2_pred_set_pop = 37, 73 op0_pred_set_clr = 38, 74 op1_pred_set_restore = 39, 75 op2_pred_sete_push = 40, 76 op2_pred_setgt_push = 41, 77 op2_pred_setge_push = 42, 78 op2_pred_setne_push = 43, 79 op2_kille = 44, 80 op2_killgt = 45, 81 op2_killge = 46, 82 op2_killne = 47, 83 op2_and_int = 48, 84 op2_or_int = 49, 85 op2_xor_int = 50, 86 op1_not_int = 51, 87 op2_add_int = 52, 88 op2_sub_int = 53, 89 op2_max_int = 54, 90 op2_min_int = 55, 91 op2_max_uint = 56, 92 op2_min_uint = 57, 93 op2_sete_int = 58, 94 op2_setgt_int = 59, 95 op2_setge_int = 60, 96 op2_setne_int = 61, 97 op2_setgt_uint = 62, 98 op2_setge_uint = 63, 99 op2_killgt_uint = 64, 100 op2_killge_uint = 65, 101 op2_prede_int = 66, 102 op2_pred_setgt_int = 67, 103 op2_pred_setge_int = 68, 104 op2_pred_setne_int = 69, 105 op2_kille_int = 70, 106 op2_killgt_int = 71, 107 op2_killge_int = 72, 108 op2_killne_int = 73, 109 op2_pred_sete_push_int = 74, 110 op2_pred_setgt_push_int = 75, 111 op2_pred_setge_push_int = 76, 112 op2_pred_setne_push_int = 77, 113 op2_pred_setlt_push_int = 78, 114 op2_pred_setle_push_int = 79, 115 op1_flt_to_int = 80, 116 op1_bfrev_int = 81, 117 op2_addc_uint = 82, 118 op2_subb_uint = 83, 119 op0_group_barrier = 84, 120 op0_group_seq_begin = 85, 121 op0_group_seq_end = 86, 122 op2_set_mode = 87, 123 op1_set_cf_idx0 = 88, 124 op1_set_cf_idx1 = 89, 125 op2_set_lds_size = 90, 126 op1_exp_ieee = 129, 127 op1_log_clamped = 130, 128 op1_log_ieee = 131, 129 op1_recip_clamped = 132, 130 op1_recip_ff = 133, 131 op1_recip_ieee = 134, 132 op1_recipsqrt_clamped = 135, 133 op1_recipsqrt_ff = 136, 134 op1_recipsqrt_ieee1 = 137, 135 op1_sqrt_ieee = 138, 136 op1_sin = 141, 137 op1_cos = 142, 138 op2_mullo_int = 143, 139 op2_mulhi_int = 144, 140 op2_mullo_uint = 145, 141 op2_mulhi_uint = 146, 142 op1_recip_int = 147, 143 op1_recip_uint = 148, 144 op1_recip_64 = 149, 145 op1_recip_clamped_64 = 150, 146 op1_recipsqrt_64 = 151, 147 op1_recipsqrt_clamped_64 = 152, 148 op1_sqrt_64 = 153, 149 op1_flt_to_uint = 154, 150 op1_int_to_flt = 155, 151 op1_uint_to_flt = 156, 152 op2_bfm_int = 160, 153 op1_flt32_to_flt16 = 162, 154 op1_flt16_to_flt32 = 163, 155 op1_ubyte0_flt = 164, 156 op1_ubyte1_flt = 165, 157 op1_ubyte2_flt = 166, 158 op1_ubyte3_flt = 167, 159 op1_bcnt_int = 170, 160 op1_ffbh_uint = 171, 161 op1_ffbl_int = 172, 162 op1_ffbh_int = 173, 163 op1_flt_to_uint4 = 174, 164 op2_dot_ieee = 175, 165 op1_flt_to_int_rpi = 176, 166 op1_flt_to_int_floor = 177, 167 op2_mulhi_uint24 = 178, 168 op1_mbcnt_32hi_int = 179, 169 op1_offset_to_flt = 180, 170 op2_mul_uint24 = 181, 171 op1_bcnt_accum_prev_int = 182, 172 op1_mbcnt_32lo_accum_prev_int = 183, 173 op2_sete_64 = 184, 174 op2_setne_64 = 185, 175 op2_setgt_64 = 186, 176 op2_setge_64 = 187, 177 op2_min_64 = 188, 178 op2_max_64 = 189, 179 op2_dot4 = 190, 180 op2_dot4_ieee = 191, 181 op2_cube = 192, 182 op1_max4 = 193, 183 op1_frexp_64 = 196, 184 op1_ldexp_64 = 197, 185 op1_fract_64 = 198, 186 op2_pred_setgt_64 = 199, 187 op2_pred_sete_64 = 198, 188 op2_pred_setge_64 = 201, 189 OP2V_MUL_64 = 202, 190 op2_add_64 = 203, 191 op1_mova_int = 204, 192 op1v_flt64_to_flt32 = 205, 193 op1v_flt32_to_flt64 = 206, 194 op2_sad_accum_prev_uint = 207, 195 op2_dot = 208, 196 op2_mul_prev = 209, 197 op2_mul_ieee_prev = 210, 198 op2_add_prev = 211, 199 op2_muladd_prev = 212, 200 op2_muladd_ieee_prev = 213, 201 op2_interp_xy = 214, 202 op2_interp_zw = 215, 203 op2_interp_x = 216, 204 op2_interp_z = 217, 205 op0_store_flags = 218, 206 op1_load_store_flags = 219, 207 op0_lds_1a = 220, 208 op0_lds_1a1d = 221, 209 op0_lds_2a = 223, 210 op1_interp_load_p0 = 224, 211 op1_interp_load_p10 = 125, 212 op1_interp_load_p20 = 126, 213 // op 3 all left shift 6 214 op3_bfe_uint = 4<< 6, 215 op3_bfe_int = 5<< 6, 216 op3_bfi_int = 6<< 6, 217 op3_fma = 7<< 6, 218 op3_cndne_64 = 9<< 6, 219 op3_fma_64 = 10<< 6, 220 op3_lerp_uint = 11<< 6, 221 op3_bit_align_int = 12<< 6, 222 op3_byte_align_int = 13<< 6, 223 op3_sad_accum_uint = 14<< 6, 224 op3_sad_accum_hi_uint = 15<< 6, 225 op3_muladd_uint24 = 16<< 6, 226 op3_lds_idx_op = 17<< 6, 227 op3_muladd = 20<< 6, 228 op3_muladd_m2 = 21<< 6, 229 op3_muladd_m4 = 22<< 6, 230 op3_muladd_d2 = 23<< 6, 231 op3_muladd_ieee = 24<< 6, 232 op3_cnde = 25<< 6, 233 op3_cndgt = 26<< 6, 234 op3_cndge = 27<< 6, 235 op3_cnde_int = 28<< 6, 236 op3_cndgt_int = 29<< 6, 237 op3_cndge_int = 30<< 6, 238 op3_mul_lit = 31<< 6 239 }; 240 241 242 243 using AluOpFlags=std::bitset<32>; 244 245 struct AluOp { 246 static constexpr int x = 1; 247 static constexpr int y = 2; 248 static constexpr int z = 4; 249 static constexpr int w = 8; 250 static constexpr int v = 15; 251 static constexpr int t = 16; 252 static constexpr int a = 31; 253 AluOpAluOp254 AluOp(int ns, int f, int um, const char *n): 255 nsrc(ns), is_float(f), unit_mask(um), name(n) 256 { 257 } 258 can_channelAluOp259 bool can_channel(int flags) const { 260 return flags & unit_mask; 261 } 262 263 int nsrc: 4; 264 int is_float:1; 265 int unit_mask: 5; 266 const char *name; 267 }; 268 269 extern const std::map<EAluOp, AluOp> alu_ops; 270 271 enum AluInlineConstants { 272 ALU_SRC_LDS_OQ_A = 219, 273 ALU_SRC_LDS_OQ_B = 220, 274 ALU_SRC_LDS_OQ_A_POP = 221, 275 ALU_SRC_LDS_OQ_B_POP = 222, 276 ALU_SRC_LDS_DIRECT_A = 223, 277 ALU_SRC_LDS_DIRECT_B = 224, 278 ALU_SRC_TIME_HI = 227, 279 ALU_SRC_TIME_LO = 228, 280 ALU_SRC_MASK_HI = 229, 281 ALU_SRC_MASK_LO = 230, 282 ALU_SRC_HW_WAVE_ID = 231, 283 ALU_SRC_SIMD_ID = 232, 284 ALU_SRC_SE_ID = 233, 285 ALU_SRC_HW_THREADGRP_ID = 234, 286 ALU_SRC_WAVE_ID_IN_GRP = 235, 287 ALU_SRC_NUM_THREADGRP_WAVES = 236, 288 ALU_SRC_HW_ALU_ODD = 237, 289 ALU_SRC_LOOP_IDX = 238, 290 ALU_SRC_PARAM_BASE_ADDR = 240, 291 ALU_SRC_NEW_PRIM_MASK = 241, 292 ALU_SRC_PRIM_MASK_HI = 242, 293 ALU_SRC_PRIM_MASK_LO = 243, 294 ALU_SRC_1_DBL_L = 244, 295 ALU_SRC_1_DBL_M = 245, 296 ALU_SRC_0_5_DBL_L = 246, 297 ALU_SRC_0_5_DBL_M = 247, 298 ALU_SRC_0 = 248, 299 ALU_SRC_1 = 249, 300 ALU_SRC_1_INT = 250, 301 ALU_SRC_M_1_INT = 251, 302 ALU_SRC_0_5 = 252, 303 ALU_SRC_LITERAL = 253, 304 ALU_SRC_PV = 254, 305 ALU_SRC_PS = 255, 306 ALU_SRC_PARAM_BASE = 0x1C0, 307 ALU_SRC_UNKNOWN 308 }; 309 310 struct AluInlineConstantDescr { 311 bool use_chan; 312 const char *descr; 313 }; 314 315 extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const; 316 317 enum ESDOp { 318 DS_OP_ADD = 0, 319 DS_OP_SUB = 1, 320 DS_OP_RSUB = 2, 321 DS_OP_INC = 3, 322 DS_OP_DEC = 4, 323 DS_OP_MIN_INT = 5, 324 DS_OP_MAX_INT = 6, 325 DS_OP_MIN_UINT = 7, 326 DS_OP_MAX_UINT = 8, 327 DS_OP_AND = 9, 328 DS_OP_OR = 10, 329 DS_OP_XOR = 11, 330 DS_OP_MSKOR = 12, 331 DS_OP_WRITE = 13, 332 DS_OP_WRITE_REL = 14, 333 DS_OP_WRITE2 = 15, 334 DS_OP_CMP_STORE = 16, 335 DS_OP_CMP_STORE_SPF = 17, 336 DS_OP_BYTE_WRITE = 18, 337 DS_OP_SHORT_WRITE = 19, 338 DS_OP_ADD_RET = 32, 339 DS_OP_SUB_RET = 33, 340 DS_OP_RSUB_RET = 34, 341 DS_OP_INC_RET = 35, 342 DS_OP_DEC_RET = 36, 343 DS_OP_MIN_INT_RET = 37, 344 DS_OP_MAX_INT_RET = 38, 345 DS_OP_MIN_UINT_RET = 39, 346 DS_OP_MAX_UINT_RET = 40, 347 DS_OP_AND_RET = 41, 348 DS_OP_OR_RET = 42, 349 DS_OP_XOR_RET = 43, 350 DS_OP_MSKOR_RET = 44, 351 DS_OP_XCHG_RET = 45, 352 DS_OP_XCHG_REL_RET = 46, 353 DS_OP_XCHG2_RET = 47, 354 DS_OP_CMP_XCHG_RET = 48, 355 DS_OP_CMP_XCHG_SPF_RET = 49, 356 DS_OP_READ_RET = 50, 357 DS_OP_READ_REL_RET = 51, 358 DS_OP_READ2_RET = 52, 359 DS_OP_READWRITE_RET = 53, 360 DS_OP_BYTE_READ_RET = 54, 361 DS_OP_UBYTE_READ_RET = 55, 362 DS_OP_SHORT_READ_RET = 56, 363 DS_OP_USHORT_READ_RET = 57, 364 DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63, 365 DS_OP_INVALID = 64 366 }; 367 368 struct LDSOp { 369 int nsrc; 370 const char *name; 371 }; 372 373 extern const std::map<ESDOp, LDSOp> lds_ops; 374 375 } 376 377 #endif // ALU_DEFINES_H 378