• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018-2019 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #ifndef r600_sfn_alu_defines_h
28 #define r600_sfn_alu_defines_h
29 
30 #include "../r600_isa.h"
31 
32 #include <bitset>
33 #include <map>
34 
35 namespace r600 {
36 
37 // We sacrifice 123 for dummy dests
38 static const int g_registers_end = 123;
39 static const int g_clause_local_start = 124;
40 static const int g_clause_local_end = 128;
41 
42 /* ALU op2 instructions 17:7 top three bits always zero. */
43 enum EAluOp {
44    op2_add = 0,
45    op2_mul = 1,
46    op2_mul_ieee = 2,
47    op2_max = 3,
48    op2_min = 4,
49    op2_max_dx10 = 5,
50    op2_min_dx10 = 6,
51    op2_sete = 8,
52    op2_setgt = 9,
53    op2_setge = 10,
54    op2_setne = 11,
55    op2_sete_dx10 = 12,
56    op2_setgt_dx10 = 13,
57    op2_setge_dx10 = 14,
58    op2_setne_dx10 = 15,
59    op1_fract = 16,
60    op1_trunc = 17,
61    op1_ceil = 18,
62    op1_rndne = 19,
63    op1_floor = 20,
64    op2_ashr_int = 21,
65    op2_lshr_int = 22,
66    op2_lshl_int = 23,
67    op1_mov = 25,
68    op0_nop = 26,
69    op2_mul_64 = 27,
70    op1_flt64_to_flt32 = 28,
71    op1_flt32_to_flt64 = 29,
72    op2_pred_setgt_uint = 30,
73    op2_pred_setge_uint = 31,
74    op2_pred_sete = 32,
75    op2_pred_setgt = 33,
76    op2_pred_setge = 34,
77    op2_pred_setne = 35,
78    op1_pred_set_inv = 36,
79    op2_pred_set_pop = 37,
80    op0_pred_set_clr = 38,
81    op1_pred_set_restore = 39,
82    op2_pred_sete_push = 40,
83    op2_pred_setgt_push = 41,
84    op2_pred_setge_push = 42,
85    op2_pred_setne_push = 43,
86    op2_kille = 44,
87    op2_killgt = 45,
88    op2_killge = 46,
89    op2_killne = 47,
90    op2_and_int = 48,
91    op2_or_int = 49,
92    op2_xor_int = 50,
93    op1_not_int = 51,
94    op2_add_int = 52,
95    op2_sub_int = 53,
96    op2_max_int = 54,
97    op2_min_int = 55,
98    op2_max_uint = 56,
99    op2_min_uint = 57,
100    op2_sete_int = 58,
101    op2_setgt_int = 59,
102    op2_setge_int = 60,
103    op2_setne_int = 61,
104    op2_setgt_uint = 62,
105    op2_setge_uint = 63,
106    op2_killgt_uint = 64,
107    op2_killge_uint = 65,
108    op2_prede_int = 66,
109    op2_pred_setgt_int = 67,
110    op2_pred_setge_int = 68,
111    op2_pred_setne_int = 69,
112    op2_kille_int = 70,
113    op2_killgt_int = 71,
114    op2_killge_int = 72,
115    op2_killne_int = 73,
116    op2_pred_sete_push_int = 74,
117    op2_pred_setgt_push_int = 75,
118    op2_pred_setge_push_int = 76,
119    op2_pred_setne_push_int = 77,
120    op2_pred_setlt_push_int = 78,
121    op2_pred_setle_push_int = 79,
122    op1_flt_to_int = 80,
123    op1_bfrev_int = 81,
124    op2_addc_uint = 82,
125    op2_subb_uint = 83,
126    op0_group_barrier = 84,
127    op0_group_seq_begin = 85,
128    op0_group_seq_end = 86,
129    op2_set_mode = 87,
130    op1_set_cf_idx0 = 88,
131    op1_set_cf_idx1 = 89,
132    op2_set_lds_size = 90,
133    op1_exp_ieee = 129,
134    op1_log_clamped = 130,
135    op1_log_ieee = 131,
136    op1_recip_clamped = 132,
137    op1_recip_ff = 133,
138    op1_recip_ieee = 134,
139    op1_recipsqrt_clamped = 135,
140    op1_recipsqrt_ff = 136,
141    op1_recipsqrt_ieee1 = 137,
142    op1_sqrt_ieee = 138,
143    op1_sin = 141,
144    op1_cos = 142,
145    op2_mullo_int = 143,
146    op2_mulhi_int = 144,
147    op2_mullo_uint = 145,
148    op2_mulhi_uint = 146,
149    op1_recip_int = 147,
150    op1_recip_uint = 148,
151    op1_recip_64 = 149,
152    op1_recip_clamped_64 = 150,
153    op1_recipsqrt_64 = 151,
154    op1_recipsqrt_clamped_64 = 152,
155    op1_sqrt_64 = 153,
156    op1_flt_to_uint = 154,
157    op1_int_to_flt = 155,
158    op1_uint_to_flt = 156,
159    op2_bfm_int = 160,
160    op1_flt32_to_flt16 = 162,
161    op1_flt16_to_flt32 = 163,
162    op1_ubyte0_flt = 164,
163    op1_ubyte1_flt = 165,
164    op1_ubyte2_flt = 166,
165    op1_ubyte3_flt = 167,
166    op1_bcnt_int = 170,
167    op1_ffbh_uint = 171,
168    op1_ffbl_int = 172,
169    op1_ffbh_int = 173,
170    op1_flt_to_uint4 = 174,
171    op2_dot_ieee = 175,
172    op1_flt_to_int_rpi = 176,
173    op1_flt_to_int_floor = 177,
174    op2_mulhi_uint24 = 178,
175    op1_mbcnt_32hi_int = 179,
176    op1_offset_to_flt = 180,
177    op2_mul_uint24 = 181,
178    op1_bcnt_accum_prev_int = 182,
179    op1_mbcnt_32lo_accum_prev_int = 183,
180    op2_sete_64 = 184,
181    op2_setne_64 = 185,
182    op2_setgt_64 = 186,
183    op2_setge_64 = 187,
184    op2_min_64 = 188,
185    op2_max_64 = 189,
186    op2_dot4 = 190,
187    op2_dot4_ieee = 191,
188    op2_cube = 192,
189    op1_max4 = 193,
190    op1_frexp_64 = 196,
191    op1_ldexp_64 = 197,
192    op1_fract_64 = 198,
193    op2_pred_setgt_64 = 199,
194    op2_pred_sete_64 = 198,
195    op2_pred_setge_64 = 201,
196    OP2V_MUL_64 = 202,
197    op2_add_64 = 203,
198    op1_mova_int = 204,
199    op1v_flt64_to_flt32 = 205,
200    op1v_flt32_to_flt64 = 206,
201    op2_sad_accum_prev_uint = 207,
202    op2_dot = 208,
203    op1_mul_prev = 209,
204    op1_mul_ieee_prev = 210,
205    op1_add_prev = 211,
206    op2_muladd_prev = 212,
207    op2_muladd_ieee_prev = 213,
208    op2_interp_xy = 214,
209    op2_interp_zw = 215,
210    op2_interp_x = 216,
211    op2_interp_z = 217,
212    op0_store_flags = 218,
213    op1_load_store_flags = 219,
214    op0_lds_1a = 220,
215    op0_lds_1a1d = 221,
216    op0_lds_2a = 223,
217    op1_interp_load_p0 = 224,
218    op1_interp_load_p10 = 125,
219    op1_interp_load_p20 = 126,
220    // op 3 all left shift 6
221    op3_bfe_uint = 4 << 6,
222    op3_bfe_int = 5 << 6,
223    op3_bfi_int = 6 << 6,
224    op3_fma = 7 << 6,
225    op3_cndne_64 = 9 << 6,
226    op3_fma_64 = 10 << 6,
227    op3_lerp_uint = 11 << 6,
228    op3_bit_align_int = 12 << 6,
229    op3_byte_align_int = 13 << 6,
230    op3_sad_accum_uint = 14 << 6,
231    op3_sad_accum_hi_uint = 15 << 6,
232    op3_muladd_uint24 = 16 << 6,
233    op3_lds_idx_op = 17 << 6,
234    op3_muladd = 20 << 6,
235    op3_muladd_m2 = 21 << 6,
236    op3_muladd_m4 = 22 << 6,
237    op3_muladd_d2 = 23 << 6,
238    op3_muladd_ieee = 24 << 6,
239    op3_cnde = 25 << 6,
240    op3_cndgt = 26 << 6,
241    op3_cndge = 27 << 6,
242    op3_cnde_int = 28 << 6,
243    op3_cndgt_int = 29 << 6,
244    op3_cndge_int = 30 << 6,
245    op3_mul_lit = 31 << 6,
246    op_invalid = 0xffff
247 };
248 
249 enum AluModifiers {
250    alu_src0_rel,
251    alu_src1_rel,
252    alu_src2_rel,
253    alu_dst_clamp,
254    alu_dst_rel,
255    alu_last_instr,
256    alu_update_exec,
257    alu_update_pred,
258    alu_write,
259    alu_op3,
260    alu_is_trans,
261    alu_is_cayman_trans,
262    alu_is_lds,
263    alu_lds_group_start,
264    alu_lds_group_end,
265    alu_lds_address,
266    alu_no_schedule_bias,
267    alu_64bit_op,
268    alu_flag_none,
269    alu_flag_count
270 };
271 
272 enum AluDstModifiers {
273    omod_off = 0,
274    omod_mul2 = 1,
275    omod_mul4 = 2,
276    omod_divl2 = 3
277 };
278 
279 enum AluPredSel {
280    pred_off = 0,
281    pred_zero = 2,
282    pred_one = 3
283 };
284 
285 enum AluBankSwizzle {
286    alu_vec_012 = 0,
287    sq_alu_scl_201 = 0,
288    alu_vec_021 = 1,
289    sq_alu_scl_122 = 1,
290    alu_vec_120 = 2,
291    sq_alu_scl_212 = 2,
292    alu_vec_102 = 3,
293    sq_alu_scl_221 = 3,
294    alu_vec_201 = 4,
295    sq_alu_scl_unknown = 4,
296    alu_vec_210 = 5,
297    alu_vec_unknown = 6
298 };
299 
300 inline AluBankSwizzle
301 operator++(AluBankSwizzle& x)
302 {
303    x = static_cast<AluBankSwizzle>(x + 1);
304    return x;
305 }
306 
307 using AluOpFlags = std::bitset<alu_flag_count>;
308 
309 struct AluOp {
310    static constexpr int x = 1;
311    static constexpr int y = 2;
312    static constexpr int z = 4;
313    static constexpr int w = 8;
314    static constexpr int v = 15;
315    static constexpr int t = 16;
316    static constexpr int a = 31;
317 
AluOpAluOp318    AluOp(int ns, bool src_mod, bool clamp, bool fp64, uint8_t um_r600,
319          uint8_t um_r700, uint8_t um_eg, const char *n):
320        nsrc(ns),
321        can_srcmod(src_mod),
322        can_clamp(clamp),
323        is_fp64(fp64),
324        name(n)
325    {
326       unit_mask[0] = um_r600;
327       unit_mask[1] = um_r700;
328       unit_mask[2] = um_eg;
329    }
330 
can_channelAluOp331    bool can_channel(int flags, r600_chip_class unit_type) const
332    {
333       assert(unit_type < 3);
334       return flags & unit_mask[unit_type];
335    }
336 
337    int nsrc : 4;
338    int can_srcmod : 1;
339    int can_clamp : 1;
340    int is_fp64 : 1;
341    uint8_t unit_mask[3];
342    const char *name;
343 };
344 
345 extern const std::map<EAluOp, AluOp> alu_ops;
346 
347 enum AluInlineConstants {
348    ALU_SRC_LDS_OQ_A = 219,
349    ALU_SRC_LDS_OQ_B = 220,
350    ALU_SRC_LDS_OQ_A_POP = 221,
351    ALU_SRC_LDS_OQ_B_POP = 222,
352    ALU_SRC_LDS_DIRECT_A = 223,
353    ALU_SRC_LDS_DIRECT_B = 224,
354    ALU_SRC_TIME_HI = 227,
355    ALU_SRC_TIME_LO = 228,
356    ALU_SRC_MASK_HI = 229,
357    ALU_SRC_MASK_LO = 230,
358    ALU_SRC_HW_WAVE_ID = 231,
359    ALU_SRC_SIMD_ID = 232,
360    ALU_SRC_SE_ID = 233,
361    ALU_SRC_HW_THREADGRP_ID = 234,
362    ALU_SRC_WAVE_ID_IN_GRP = 235,
363    ALU_SRC_NUM_THREADGRP_WAVES = 236,
364    ALU_SRC_HW_ALU_ODD = 237,
365    ALU_SRC_LOOP_IDX = 238,
366    ALU_SRC_PARAM_BASE_ADDR = 240,
367    ALU_SRC_NEW_PRIM_MASK = 241,
368    ALU_SRC_PRIM_MASK_HI = 242,
369    ALU_SRC_PRIM_MASK_LO = 243,
370    ALU_SRC_1_DBL_L = 244,
371    ALU_SRC_1_DBL_M = 245,
372    ALU_SRC_0_5_DBL_L = 246,
373    ALU_SRC_0_5_DBL_M = 247,
374    ALU_SRC_0 = 248,
375    ALU_SRC_1 = 249,
376    ALU_SRC_1_INT = 250,
377    ALU_SRC_M_1_INT = 251,
378    ALU_SRC_0_5 = 252,
379    ALU_SRC_LITERAL = 253,
380    ALU_SRC_PV = 254,
381    ALU_SRC_PS = 255,
382    ALU_SRC_PARAM_BASE = 0x1C0,
383    ALU_SRC_UNKNOWN
384 };
385 
386 struct AluInlineConstantDescr {
387    bool use_chan;
388    const char *descr;
389 };
390 
391 extern const std::map<AluInlineConstants, AluInlineConstantDescr> alu_src_const;
392 
393 #define LDSOP2(X) LDS_##X = LDS_OP2_LDS_##X
394 
395 enum ESDOp {
396    DS_OP_ADD = 0,
397    DS_OP_SUB = 1,
398    DS_OP_RSUB = 2,
399    DS_OP_INC = 3,
400    DS_OP_DEC = 4,
401    DS_OP_MIN_INT = 5,
402    DS_OP_MAX_INT = 6,
403    DS_OP_MIN_UINT = 7,
404    DS_OP_MAX_UINT = 8,
405    DS_OP_AND = 9,
406    DS_OP_OR = 10,
407    DS_OP_XOR = 11,
408    DS_OP_MSKOR = 12,
409    DS_OP_WRITE = 13,
410    DS_OP_WRITE_REL = 14,
411    DS_OP_WRITE2 = 15,
412    DS_OP_CMP_STORE = 16,
413    DS_OP_CMP_STORE_SPF = 17,
414    DS_OP_BYTE_WRITE = 18,
415    DS_OP_SHORT_WRITE = 19,
416    DS_OP_ADD_RET = 32,
417    DS_OP_SUB_RET = 33,
418    DS_OP_RSUB_RET = 34,
419    DS_OP_INC_RET = 35,
420    DS_OP_DEC_RET = 36,
421    DS_OP_MIN_INT_RET = 37,
422    DS_OP_MAX_INT_RET = 38,
423    DS_OP_MIN_UINT_RET = 39,
424    DS_OP_MAX_UINT_RET = 40,
425    DS_OP_AND_RET = 41,
426    DS_OP_OR_RET = 42,
427    DS_OP_XOR_RET = 43,
428    DS_OP_MSKOR_RET = 44,
429    DS_OP_XCHG_RET = 45,
430    DS_OP_XCHG_REL_RET = 46,
431    DS_OP_XCHG2_RET = 47,
432    DS_OP_CMP_XCHG_RET = 48,
433    DS_OP_CMP_XCHG_SPF_RET = 49,
434    DS_OP_READ_RET = 50,
435    DS_OP_READ_REL_RET = 51,
436    DS_OP_READ2_RET = 52,
437    DS_OP_READWRITE_RET = 53,
438    DS_OP_BYTE_READ_RET = 54,
439    DS_OP_UBYTE_READ_RET = 55,
440    DS_OP_SHORT_READ_RET = 56,
441    DS_OP_USHORT_READ_RET = 57,
442    DS_OP_ATOMIC_ORDERED_ALLOC_RET = 63,
443    DS_OP_INVALID = 64,
444    LDSOP2(ADD_RET),
445    LDSOP2(ADD),
446    LDSOP2(AND_RET),
447    LDSOP2(AND),
448    LDSOP2(WRITE),
449    LDSOP2(OR_RET),
450    LDSOP2(OR),
451    LDSOP2(MAX_INT_RET),
452    LDSOP2(MAX_INT),
453    LDSOP2(MAX_UINT_RET),
454    LDSOP2(MAX_UINT),
455    LDSOP2(MIN_INT_RET),
456    LDSOP2(MIN_INT),
457    LDSOP2(MIN_UINT_RET),
458    LDSOP2(MIN_UINT),
459    LDSOP2(XOR_RET),
460    LDSOP2(XOR),
461    LDSOP2(XCHG_RET),
462    LDS_CMP_XCHG_RET = LDS_OP3_LDS_CMP_XCHG_RET,
463    LDS_WRITE_REL = LDS_OP3_LDS_WRITE_REL
464 };
465 
466 #undef LDSOP2
467 
468 struct LDSOp {
469    int nsrc;
470    const char *name;
471 };
472 
473 extern const std::map<ESDOp, LDSOp> lds_ops;
474 
475 struct KCacheLine {
476    int bank{0};
477    int addr{0};
478    int len{0};
479    int index_mode{0};
480    enum KCacheLockMode {
481       free,
482       lock_1,
483       lock_2
484    } mode{free};
485 };
486 
487 } // namespace r600
488 
489 #endif // ALU_DEFINES_H
490