1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file elk_inst.h
26 *
27 * A representation of i965 EU assembly instructions, with helper methods to
28 * get and set various fields. This is the actual hardware format.
29 */
30
31 #ifndef ELK_INST_H
32 #define ELK_INST_H
33
34 #include <assert.h>
35 #include <stdint.h>
36
37 #include "elk_eu_defines.h"
38 #include "elk_isa_info.h"
39 #include "elk_reg_type.h"
40 #include "dev/intel_device_info.h"
41
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45
46 /* elk_context.h has a forward declaration of elk_inst, so name the struct. */
47 typedef struct elk_inst {
48 uint64_t data[2];
49 } elk_inst;
50
51 static inline uint64_t elk_inst_bits(const elk_inst *inst,
52 unsigned high, unsigned low);
53 static inline void elk_inst_set_bits(elk_inst *inst,
54 unsigned high, unsigned low,
55 uint64_t value);
56
57 #define FC(name, hi4, lo4, hi12, lo12, assertions) \
58 static inline void \
59 elk_inst_set_##name(const struct intel_device_info *devinfo, \
60 elk_inst *inst, uint64_t v) \
61 { \
62 assert(assertions); \
63 if (devinfo->ver >= 12) \
64 elk_inst_set_bits(inst, hi12, lo12, v); \
65 else \
66 elk_inst_set_bits(inst, hi4, lo4, v); \
67 } \
68 static inline uint64_t \
69 elk_inst_##name(const struct intel_device_info *devinfo, \
70 const elk_inst *inst) \
71 { \
72 assert(assertions); \
73 if (devinfo->ver >= 12) \
74 return elk_inst_bits(inst, hi12, lo12); \
75 else \
76 return elk_inst_bits(inst, hi4, lo4); \
77 }
78
79 /* A simple macro for fields which stay in the same place on all generations,
80 * except for Gfx12!
81 */
82 #define F(name, hi4, lo4, hi12, lo12) FC(name, hi4, lo4, hi12, lo12, true)
83
84 /* A simple macro for fields which stay in the same place on all generations,
85 * except for Gfx12 and Gfx20.
86 */
87 #define F20(name, hi4, lo4, hi12, lo12, hi20, lo20) \
88 static inline void \
89 elk_inst_set_##name(const struct intel_device_info *devinfo, \
90 elk_inst *inst, uint64_t v) \
91 { \
92 if (devinfo->ver >= 20) \
93 elk_inst_set_bits(inst, hi20, lo20, v); \
94 else if (devinfo->ver >= 12) \
95 elk_inst_set_bits(inst, hi12, lo12, v); \
96 else \
97 elk_inst_set_bits(inst, hi4, lo4, v); \
98 } \
99 static inline uint64_t \
100 elk_inst_##name(const struct intel_device_info *devinfo, \
101 const elk_inst *inst) \
102 { \
103 if (devinfo->ver >= 20) \
104 return elk_inst_bits(inst, hi20, lo20); \
105 else if (devinfo->ver >= 12) \
106 return elk_inst_bits(inst, hi12, lo12); \
107 else \
108 return elk_inst_bits(inst, hi4, lo4); \
109 }
110
111 #define FV20(name, hi4, lo4, hi12, lo12, hi20, lo20) \
112 static inline void \
113 elk_inst_set_##name(const struct intel_device_info *devinfo, \
114 elk_inst *inst, uint64_t v) \
115 { \
116 if (devinfo->ver >= 20) \
117 elk_inst_set_bits(inst, hi20, lo20, v & 0x7); \
118 else if (devinfo->ver >= 12) \
119 elk_inst_set_bits(inst, hi12, lo12, v); \
120 else \
121 elk_inst_set_bits(inst, hi4, lo4, v); \
122 } \
123 static inline uint64_t \
124 elk_inst_##name(const struct intel_device_info *devinfo, \
125 const elk_inst *inst) \
126 { \
127 if (devinfo->ver >= 20) \
128 return elk_inst_bits(inst, hi20, lo20) == 0x7 ? 0xF : \
129 elk_inst_bits(inst, hi20, lo20); \
130 else if (devinfo->ver >= 12) \
131 return elk_inst_bits(inst, hi12, lo12); \
132 else \
133 return elk_inst_bits(inst, hi4, lo4); \
134 }
135
136 #define FD20(name, hi4, lo4, hi12, lo12, hi20, lo20, zero20) \
137 static inline void \
138 elk_inst_set_##name(const struct intel_device_info *devinfo, \
139 elk_inst *inst, uint64_t v) \
140 { \
141 if (devinfo->ver >= 20) { \
142 elk_inst_set_bits(inst, hi20, lo20, v >> 1); \
143 if (zero20 == -1) \
144 assert((v & 1) == 0); \
145 else \
146 elk_inst_set_bits(inst, zero20, zero20, v & 1); \
147 } else if (devinfo->ver >= 12) \
148 elk_inst_set_bits(inst, hi12, lo12, v); \
149 else \
150 elk_inst_set_bits(inst, hi4, lo4, v); \
151 } \
152 static inline uint64_t \
153 elk_inst_##name(const struct intel_device_info *devinfo, \
154 const elk_inst *inst) \
155 { \
156 if (devinfo->ver >= 20) \
157 return (elk_inst_bits(inst, hi20, lo20) << 1) | \
158 (zero20 == -1 ? 0 : \
159 elk_inst_bits(inst, zero20, zero20)); \
160 else if (devinfo->ver >= 12) \
161 return elk_inst_bits(inst, hi12, lo12); \
162 else \
163 return elk_inst_bits(inst, hi4, lo4); \
164 }
165
166 #define BOUNDS(hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
167 hi7, lo7, hi8, lo8, hi12, lo12, hi20, lo20) \
168 unsigned high, low; \
169 if (devinfo->ver >= 20) { \
170 high = hi20; low = lo20; \
171 } else if (devinfo->ver >= 12) { \
172 high = hi12; low = lo12; \
173 } else if (devinfo->ver >= 8) { \
174 high = hi8; low = lo8; \
175 } else if (devinfo->ver >= 7) { \
176 high = hi7; low = lo7; \
177 } else if (devinfo->ver >= 6) { \
178 high = hi6; low = lo6; \
179 } else if (devinfo->ver >= 5) { \
180 high = hi5; low = lo5; \
181 } else if (devinfo->verx10 >= 45) { \
182 high = hi45; low = lo45; \
183 } else { \
184 high = hi4; low = lo4; \
185 } \
186 assert(((int) high) != -1 && ((int) low) != -1);
187
188 /* A general macro for cases where the field has moved to several different
189 * bit locations across generations. GCC appears to combine cases where the
190 * bits are identical, removing some of the inefficiency.
191 */
192 #define FF(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
193 hi7, lo7, hi8, lo8, hi12, lo12, hi20, lo20) \
194 static inline void \
195 elk_inst_set_##name(const struct intel_device_info *devinfo, \
196 elk_inst *inst, uint64_t value) \
197 { \
198 BOUNDS(hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
199 hi7, lo7, hi8, lo8, hi12, lo12, hi20, lo20) \
200 elk_inst_set_bits(inst, high, low, value); \
201 } \
202 static inline uint64_t \
203 elk_inst_##name(const struct intel_device_info *devinfo, const elk_inst *inst)\
204 { \
205 BOUNDS(hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
206 hi7, lo7, hi8, lo8, hi12, lo12, hi20, lo20) \
207 return elk_inst_bits(inst, high, low); \
208 }
209
210 /* A macro for fields which moved as of Gfx8+. */
211 #define F8(name, gfx4_high, gfx4_low, gfx8_high, gfx8_low, \
212 gfx12_high, gfx12_low) \
213 FF(name, \
214 /* 4: */ gfx4_high, gfx4_low, \
215 /* 4.5: */ gfx4_high, gfx4_low, \
216 /* 5: */ gfx4_high, gfx4_low, \
217 /* 6: */ gfx4_high, gfx4_low, \
218 /* 7: */ gfx4_high, gfx4_low, \
219 /* 8: */ gfx8_high, gfx8_low, \
220 /* 12: */ gfx12_high, gfx12_low, \
221 /* 20: */ gfx12_high, gfx12_low);
222
223 /* Macro for fields that gained extra discontiguous MSBs in Gfx12 (specified
224 * by hi12ex-lo12ex).
225 */
226 #define FFDC(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
227 hi7, lo7, hi8, lo8, hi12ex, lo12ex, hi12, lo12, assertions) \
228 static inline void \
229 elk_inst_set_##name(const struct intel_device_info *devinfo, \
230 elk_inst *inst, uint64_t value) \
231 { \
232 assert(assertions); \
233 if (devinfo->ver >= 12) { \
234 const unsigned k = hi12 - lo12 + 1; \
235 if (hi12ex != -1 && lo12ex != -1) \
236 elk_inst_set_bits(inst, hi12ex, lo12ex, value >> k); \
237 elk_inst_set_bits(inst, hi12, lo12, value & ((1ull << k) - 1)); \
238 } else { \
239 BOUNDS(hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
240 hi7, lo7, hi8, lo8, -1, -1, -1, -1); \
241 elk_inst_set_bits(inst, high, low, value); \
242 } \
243 } \
244 static inline uint64_t \
245 elk_inst_##name(const struct intel_device_info *devinfo, const elk_inst *inst)\
246 { \
247 assert(assertions); \
248 if (devinfo->ver >= 12) { \
249 const unsigned k = hi12 - lo12 + 1; \
250 return (hi12ex == -1 || lo12ex == -1 ? 0 : \
251 elk_inst_bits(inst, hi12ex, lo12ex) << k) | \
252 elk_inst_bits(inst, hi12, lo12); \
253 } else { \
254 BOUNDS(hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
255 hi7, lo7, hi8, lo8, -1, -1, -1, -1); \
256 return elk_inst_bits(inst, high, low); \
257 } \
258 }
259
260 #define FD(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
261 hi7, lo7, hi8, lo8, hi12ex, lo12ex, hi12, lo12) \
262 FFDC(name, hi4, lo4, hi45, lo45, hi5, lo5, hi6, lo6, \
263 hi7, lo7, hi8, lo8, hi12ex, lo12ex, hi12, lo12, true)
264
265 /* Macro for fields that didn't move across generations until Gfx12, and then
266 * gained extra discontiguous bits.
267 */
268 #define FDC(name, hi4, lo4, hi12ex, lo12ex, hi12, lo12, assertions) \
269 FFDC(name, hi4, lo4, hi4, lo4, hi4, lo4, hi4, lo4, \
270 hi4, lo4, hi4, lo4, hi12ex, lo12ex, hi12, lo12, assertions)
271
272
273 /* Macro for the 2-bit register file field, which on Gfx12+ is stored as the
274 * variable length combination of an IsImm (hi12) bit and an additional file
275 * (lo12) bit.
276 */
277 #define FI(name, hi4, lo4, hi8, lo8, hi12, lo12) \
278 static inline void \
279 elk_inst_set_##name(const struct intel_device_info *devinfo, \
280 elk_inst *inst, uint64_t value) \
281 { \
282 if (devinfo->ver >= 12) { \
283 elk_inst_set_bits(inst, hi12, hi12, value >> 1); \
284 if ((value >> 1) == 0) \
285 elk_inst_set_bits(inst, lo12, lo12, value & 1); \
286 } else { \
287 BOUNDS(hi4, lo4, hi4, lo4, hi4, lo4, hi4, lo4, \
288 hi4, lo4, hi8, lo8, -1, -1, -1, -1); \
289 elk_inst_set_bits(inst, high, low, value); \
290 } \
291 } \
292 static inline uint64_t \
293 elk_inst_##name(const struct intel_device_info *devinfo, const elk_inst *inst)\
294 { \
295 if (devinfo->ver >= 12) { \
296 return (elk_inst_bits(inst, hi12, hi12) << 1) | \
297 (elk_inst_bits(inst, hi12, hi12) == 0 ? \
298 elk_inst_bits(inst, lo12, lo12) : 1); \
299 } else { \
300 BOUNDS(hi4, lo4, hi4, lo4, hi4, lo4, hi4, lo4, \
301 hi4, lo4, hi8, lo8, -1, -1, -1, -1); \
302 return elk_inst_bits(inst, high, low); \
303 } \
304 }
305
306 /* Macro for fields that become a constant in Gfx12+ not actually represented
307 * in the instruction.
308 */
309 #define FK(name, hi4, lo4, const12) \
310 static inline void \
311 elk_inst_set_##name(const struct intel_device_info *devinfo, \
312 elk_inst *inst, uint64_t v) \
313 { \
314 if (devinfo->ver >= 12) \
315 assert(v == (const12)); \
316 else \
317 elk_inst_set_bits(inst, hi4, lo4, v); \
318 } \
319 static inline uint64_t \
320 elk_inst_##name(const struct intel_device_info *devinfo, \
321 const elk_inst *inst) \
322 { \
323 if (devinfo->ver >= 12) \
324 return (const12); \
325 else \
326 return elk_inst_bits(inst, hi4, lo4); \
327 }
328
329 FV20(src1_vstride, /* 4+ */ 120, 117, /* 12+ */ 119, 116, /* 20+ */ 118, 116)
330 F(src1_width, /* 4+ */ 116, 114, /* 12+ */ 115, 113)
331 F(src1_da16_swiz_w, /* 4+ */ 115, 114, /* 12+ */ -1, -1)
332 F(src1_da16_swiz_z, /* 4+ */ 113, 112, /* 12+ */ -1, -1)
333 F(src1_hstride, /* 4+ */ 113, 112, /* 12+ */ 97, 96)
334 F(src1_address_mode, /* 4+ */ 111, 111, /* 12+ */ 112, 112)
335 /** Src1.SrcMod @{ */
336 F(src1_negate, /* 4+ */ 110, 110, /* 12+ */ 121, 121)
337 F(src1_abs, /* 4+ */ 109, 109, /* 12+ */ 120, 120)
338 /** @} */
339 F8(src1_ia_subreg_nr, /* 4+ */ 108, 106, /* 8+ */ 108, 105, /* 12+ */ 111, 108)
340 F(src1_da_reg_nr, /* 4+ */ 108, 101, /* 12+ */ 111, 104)
341 F(src1_da16_subreg_nr, /* 4+ */ 100, 100, /* 12+ */ -1, -1)
342 FD20(src1_da1_subreg_nr, /* 4+ */ 100, 96, /* 12+ */ 103, 99, /* 20+ */ 103, 99, -1)
343 F(src1_da16_swiz_y, /* 4+ */ 99, 98, /* 12+ */ -1, -1)
344 F(src1_da16_swiz_x, /* 4+ */ 97, 96, /* 12+ */ -1, -1)
345 F8(src1_reg_hw_type, /* 4+ */ 46, 44, /* 8+ */ 94, 91, /* 12+ */ 91, 88)
346 FI(src1_reg_file, /* 4+ */ 43, 42, /* 8+ */ 90, 89, /* 12+ */ 47, 98)
347 F(src1_is_imm, /* 4+ */ -1, -1, /* 12+ */ 47, 47)
348 FV20(src0_vstride, /* 4+ */ 88, 85, /* 12+ */ 87, 84, /* 20+ */ 86, 84)
349 F(src0_width, /* 4+ */ 84, 82, /* 12+ */ 83, 81)
350 F(src0_da16_swiz_w, /* 4+ */ 83, 82, /* 12+ */ -1, -1)
351 F(src0_da16_swiz_z, /* 4+ */ 81, 80, /* 12+ */ -1, -1)
352 F(src0_hstride, /* 4+ */ 81, 80, /* 12+ */ 65, 64)
353 F(src0_address_mode, /* 4+ */ 79, 79, /* 12+ */ 80, 80)
354 /** Src0.SrcMod @{ */
355 F(src0_negate, /* 4+ */ 78, 78, /* 12+ */ 45, 45)
356 F(src0_abs, /* 4+ */ 77, 77, /* 12+ */ 44, 44)
357 /** @} */
358 F8(src0_ia_subreg_nr, /* 4+ */ 76, 74, /* 8+ */ 76, 73, /* 12+ */ 79, 76)
359 F(src0_da_reg_nr, /* 4+ */ 76, 69, /* 12+ */ 79, 72)
360 F(src0_da16_subreg_nr, /* 4+ */ 68, 68, /* 12+ */ -1, -1)
361 FD20(src0_da1_subreg_nr, /* 4+ */ 68, 64, /* 12+ */ 71, 67, /* 20+ */ 71, 67, 87)
362 F(src0_da16_swiz_y, /* 4+ */ 67, 66, /* 12+ */ -1, -1)
363 F(src0_da16_swiz_x, /* 4+ */ 65, 64, /* 12+ */ -1, -1)
364 F(dst_address_mode, /* 4+ */ 63, 63, /* 12+ */ 35, 35)
365 F(dst_hstride, /* 4+ */ 62, 61, /* 12+ */ 49, 48)
366 F8(dst_ia_subreg_nr, /* 4+ */ 60, 58, /* 8+ */ 60, 57, /* 12+ */ 63, 60)
367 F(dst_da_reg_nr, /* 4+ */ 60, 53, /* 12+ */ 63, 56)
368 F(dst_da16_subreg_nr, /* 4+ */ 52, 52, /* 12+ */ -1, -1)
369 FD20(dst_da1_subreg_nr, /* 4+ */ 52, 48, /* 12+ */ 55, 51, /* 20+ */ 55, 51, 33)
370 F(da16_writemask, /* 4+ */ 51, 48, /* 12+ */ -1, -1) /* Dst.ChanEn */
371 F8(src0_reg_hw_type, /* 4+ */ 41, 39, /* 8+ */ 46, 43, /* 12+ */ 43, 40)
372 FI(src0_reg_file, /* 4+ */ 38, 37, /* 8+ */ 42, 41, /* 12+ */ 46, 66)
373 F(src0_is_imm, /* 4+ */ -1, -1, /* 12+ */ 46, 46)
374 F8(dst_reg_hw_type, /* 4+ */ 36, 34, /* 8+ */ 40, 37, /* 12+ */ 39, 36)
375 F8(dst_reg_file, /* 4+ */ 33, 32, /* 8+ */ 36, 35, /* 12+ */ 50, 50)
376 F8(mask_control, /* 4+ */ 9, 9, /* 8+ */ 34, 34, /* 12+ */ 31, 31)
377 FF(flag_reg_nr,
378 /* 4-6: doesn't exist */ -1, -1, -1, -1, -1, -1, -1, -1,
379 /* 7: */ 90, 90,
380 /* 8: */ 33, 33,
381 /* 12: */ 23, 23,
382 /* 20: */ 23, 22)
383 FF(flag_subreg_nr,
384 /* 4-7: */ 89, 89, 89, 89, 89, 89, 89, 89, 89, 89,
385 /* 8: */ 32, 32,
386 /* 12: */ 22, 22,
387 /* 20: */ 21, 21)
388 F(saturate, /* 4+ */ 31, 31, /* 12+ */ 34, 34)
389 F(debug_control, /* 4+ */ 30, 30, /* 12+ */ 30, 30)
390 F(cmpt_control, /* 4+ */ 29, 29, /* 12+ */ 29, 29)
391 FC(branch_control, /* 4+ */ 28, 28, /* 12+ */ 33, 33, devinfo->ver >= 8)
392 FC(acc_wr_control, /* 4+ */ 28, 28, /* 12+ */ 33, 33, devinfo->ver >= 6 && devinfo->ver < 20)
393 FC(mask_control_ex, /* 4+ */ 28, 28, /* 12+ */ -1, -1, devinfo->verx10 == 45 ||
394 devinfo->ver == 5)
395 F(cond_modifier, /* 4+ */ 27, 24, /* 12+ */ 95, 92)
396 FC(math_function, /* 4+ */ 27, 24, /* 12+ */ 95, 92, devinfo->ver >= 6)
397 F20(exec_size, /* 4+ */ 23, 21, /* 12+ */ 18, 16, /* 20+ */ 20, 18)
398 F(pred_inv, /* 4+ */ 20, 20, /* 12+ */ 28, 28)
399 F20(pred_control, /* 4+ */ 19, 16, /* 12+ */ 27, 24, /* 20+ */ 27, 26)
400 F(thread_control, /* 4+ */ 15, 14, /* 12+ */ -1, -1)
401 F(atomic_control, /* 4+ */ -1, -1, /* 12+ */ 32, 32)
402 F20(qtr_control, /* 4+ */ 13, 12, /* 12+ */ 21, 20, /* 20+ */ 25, 24)
403 FF(nib_control,
404 /* 4-6: doesn't exist */ -1, -1, -1, -1, -1, -1, -1, -1,
405 /* 7: */ 47, 47,
406 /* 8: */ 11, 11,
407 /* 12: */ 19, 19,
408 /* 20: */ -1, -1)
409 F8(no_dd_check, /* 4+ */ 11, 11, /* 8+ */ 10, 10, /* 12+ */ -1, -1)
410 F8(no_dd_clear, /* 4+ */ 10, 10, /* 8+ */ 9, 9, /* 12+ */ -1, -1)
411 F20(swsb, /* 4+ */ -1, -1, /* 12+ */ 15, 8, /* 20+ */ 17, 8)
412 FK(access_mode, /* 4+ */ 8, 8, /* 12+ */ ELK_ALIGN_1)
413 /* Bit 7 is Reserved (for future Opcode expansion) */
414 F(hw_opcode, /* 4+ */ 6, 0, /* 12+ */ 6, 0)
415
416 /**
417 * Three-source instructions:
418 * @{
419 */
420 F(3src_src2_reg_nr, /* 4+ */ 125, 118, /* 12+ */ 127, 120) /* same in align1 */
421 F(3src_a16_src2_subreg_nr, /* 4+ */ 117, 115, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
422 F(3src_a16_src2_swizzle, /* 4+ */ 114, 107, /* 12+ */ -1, -1)
423 F(3src_a16_src2_rep_ctrl, /* 4+ */ 106, 106, /* 12+ */ -1, -1)
424 F(3src_src1_reg_nr, /* 4+ */ 104, 97, /* 12+ */ 111, 104) /* same in align1 */
425 F(3src_a16_src1_subreg_nr, /* 4+ */ 96, 94, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
426 F(3src_a16_src1_swizzle, /* 4+ */ 93, 86, /* 12+ */ -1, -1)
427 F(3src_a16_src1_rep_ctrl, /* 4+ */ 85, 85, /* 12+ */ -1, -1)
428 F(3src_src0_reg_nr, /* 4+ */ 83, 76, /* 12+ */ 79, 72) /* same in align1 */
429 F(3src_a16_src0_subreg_nr, /* 4+ */ 75, 73, /* 12+ */ -1, -1) /* Extra discontiguous bit on CHV? */
430 F(3src_a16_src0_swizzle, /* 4+ */ 72, 65, /* 12+ */ -1, -1)
431 F(3src_a16_src0_rep_ctrl, /* 4+ */ 64, 64, /* 12+ */ -1, -1)
432 F(3src_dst_reg_nr, /* 4+ */ 63, 56, /* 12+ */ 63, 56) /* same in align1 */
433 F(3src_a16_dst_subreg_nr, /* 4+ */ 55, 53, /* 12+ */ -1, -1)
434 F(3src_a16_dst_writemask, /* 4+ */ 52, 49, /* 12+ */ -1, -1)
435 F8(3src_a16_nib_ctrl, /* 4+ */ 47, 47, /* 8+ */ 11, 11, /* 12+ */ -1, -1) /* only exists on IVB+ */
436 F8(3src_a16_dst_hw_type, /* 4+ */ 45, 44, /* 8+ */ 48, 46, /* 12+ */ -1, -1) /* only exists on IVB+ */
437 F8(3src_a16_src_hw_type, /* 4+ */ 43, 42, /* 8+ */ 45, 43, /* 12+ */ -1, -1)
438 F8(3src_src2_negate, /* 4+ */ 41, 41, /* 8+ */ 42, 42, /* 12+ */ 85, 85)
439 F8(3src_src2_abs, /* 4+ */ 40, 40, /* 8+ */ 41, 41, /* 12+ */ 84, 84)
440 F8(3src_src1_negate, /* 4+ */ 39, 39, /* 8+ */ 40, 40, /* 12+ */ 87, 87)
441 F8(3src_src1_abs, /* 4+ */ 38, 38, /* 8+ */ 39, 39, /* 12+ */ 86, 86)
442 F8(3src_src0_negate, /* 4+ */ 37, 37, /* 8+ */ 38, 38, /* 12+ */ 45, 45)
443 F8(3src_src0_abs, /* 4+ */ 36, 36, /* 8+ */ 37, 37, /* 12+ */ 44, 44)
444 F8(3src_a16_src1_type, /* 4+ */ -1, -1, /* 8+ */ 36, 36, /* 12+ */ -1, -1)
445 F8(3src_a16_src2_type, /* 4+ */ -1, -1, /* 8+ */ 35, 35, /* 12+ */ -1, -1)
446 F8(3src_a16_flag_reg_nr, /* 4+ */ 34, 34, /* 8+ */ 33, 33, /* 12+ */ -1, -1)
447 F8(3src_a16_flag_subreg_nr, /* 4+ */ 33, 33, /* 8+ */ 32, 32, /* 12+ */ -1, -1)
448 FF(3src_a16_dst_reg_file,
449 /* 4-5: doesn't exist - no 3-source instructions */ -1, -1, -1, -1, -1, -1,
450 /* 6: */ 32, 32,
451 /* 7-8: doesn't exist - no MRFs */ -1, -1, -1, -1,
452 /* 12: */ -1, -1,
453 /* 20: */ -1, -1)
454 F(3src_saturate, /* 4+ */ 31, 31, /* 12+ */ 34, 34)
455 F(3src_debug_control, /* 4+ */ 30, 30, /* 12+ */ 30, 30)
456 F(3src_cmpt_control, /* 4+ */ 29, 29, /* 12+ */ 29, 29)
457 FC(3src_acc_wr_control, /* 4+ */ 28, 28, /* 12+ */ 33, 33, devinfo->ver < 20)
458 F(3src_cond_modifier, /* 4+ */ 27, 24, /* 12+ */ 95, 92)
459 F(3src_exec_size, /* 4+ */ 23, 21, /* 12+ */ 18, 16)
460 F(3src_pred_inv, /* 4+ */ 20, 20, /* 12+ */ 28, 28)
461 F20(3src_pred_control, /* 4+ */ 19, 16, /* 12+ */ 27, 24, /* 20+ */ 27, 26)
462 F(3src_thread_control, /* 4+ */ 15, 14, /* 12+ */ -1, -1)
463 F(3src_atomic_control, /* 4+ */ -1, -1, /* 12+ */ 32, 32)
464 F20(3src_qtr_control, /* 4+ */ 13, 12, /* 12+ */ 21, 20, /* 20+ */ 25, 24)
465 F8(3src_no_dd_check, /* 4+ */ 11, 11, /* 8+ */ 10, 10, /* 12+ */ -1, -1)
466 F8(3src_no_dd_clear, /* 4+ */ 10, 10, /* 8+ */ 9, 9, /* 12+ */ -1, -1)
467 F8(3src_mask_control, /* 4+ */ 9, 9, /* 8+ */ 34, 34, /* 12+ */ 31, 31)
468 FK(3src_access_mode, /* 4+ */ 8, 8, /* 12+ */ ELK_ALIGN_1)
469 F(3src_swsb, /* 4+ */ -1, -1, /* 12+ */ 15, 8)
470 /* Bit 7 is Reserved (for future Opcode expansion) */
471 F(3src_hw_opcode, /* 4+ */ 6, 0, /* 12+ */ 6, 0)
472 /** @} */
473
474 #define REG_TYPE(reg) \
475 static inline void \
476 elk_inst_set_3src_a16_##reg##_type(const struct intel_device_info *devinfo, \
477 elk_inst *inst, enum elk_reg_type type) \
478 { \
479 unsigned hw_type = elk_reg_type_to_a16_hw_3src_type(devinfo, type); \
480 elk_inst_set_3src_a16_##reg##_hw_type(devinfo, inst, hw_type); \
481 } \
482 \
483 static inline enum elk_reg_type \
484 elk_inst_3src_a16_##reg##_type(const struct intel_device_info *devinfo, \
485 const elk_inst *inst) \
486 { \
487 unsigned hw_type = elk_inst_3src_a16_##reg##_hw_type(devinfo, inst); \
488 return elk_a16_hw_3src_type_to_reg_type(devinfo, hw_type); \
489 }
490
REG_TYPE(dst)491 REG_TYPE(dst)
492 REG_TYPE(src)
493 #undef REG_TYPE
494
495 /**
496 * Three-source align1 instructions:
497 * @{
498 */
499 /* Reserved 127:126 */
500 /* src2_reg_nr same in align16 */
501 FD20(3src_a1_src2_subreg_nr,/* 4+ */ 117, 113, /* 12+ */ 119, 115, /* 20+ */ 119, 115, -1)
502 FC(3src_a1_src2_hstride, /* 4+ */ 112, 111, /* 12+ */ 113, 112, devinfo->ver >= 10)
503 /* Reserved 110:109. src2 vstride is an implied parameter */
504 FC(3src_a1_src2_hw_type, /* 4+ */ 108, 106, /* 12+ */ 82, 80, devinfo->ver >= 10)
505 /* Reserved 105 */
506 /* src1_reg_nr same in align16 */
507 FD20(3src_a1_src1_subreg_nr, /* 4+ */ 96, 92, /* 12+ */ 103, 99, /* 20+ */ 103, 99, -1)
508 FC(3src_a1_src1_hstride, /* 4+ */ 91, 90, /* 12+ */ 97, 96, devinfo->ver >= 10)
509 FDC(3src_a1_src1_vstride, /* 4+ */ 89, 88, /* 12+ */ 91, 91, 83, 83, devinfo->ver >= 10)
510 FC(3src_a1_src1_hw_type, /* 4+ */ 87, 85, /* 12+ */ 90, 88, devinfo->ver >= 10)
511 /* Reserved 84 */
512 /* src0_reg_nr same in align16 */
513 FD20(3src_a1_src0_subreg_nr, /* 4+ */ 75, 71, /* 12+ */ 71, 67, /* 20+ */ 71, 67, -1)
514 FC(3src_a1_src0_hstride, /* 4+ */ 70, 69, /* 12+ */ 65, 64, devinfo->ver >= 10)
515 FDC(3src_a1_src0_vstride, /* 4+ */ 68, 67, /* 12+ */ 43, 43, 35, 35, devinfo->ver >= 10)
516 FC(3src_a1_src0_hw_type, /* 4+ */ 66, 64, /* 12+ */ 42, 40, devinfo->ver >= 10)
517 /* dst_reg_nr same in align16 */
518 FC(3src_a1_dst_subreg_nr, /* 4+ */ 55, 54, /* 12+ */ 55, 54, devinfo->ver >= 10)
519 FC(3src_a1_special_acc, /* 4+ */ 55, 52, /* 12+ */ 54, 51, devinfo->ver >= 10) /* aliases dst_subreg_nr */
520 /* Reserved 51:50 */
521 FC(3src_a1_dst_hstride, /* 4+ */ 49, 49, /* 12+ */ 48, 48, devinfo->ver >= 10)
522 FC(3src_a1_dst_hw_type, /* 4+ */ 48, 46, /* 12+ */ 38, 36, devinfo->ver >= 10)
523 FI(3src_a1_src2_reg_file, /* 4+ */ -1, -1, /* 8+ */ 45, 45, /* 12+ */ 47, 114)
524 FC(3src_a1_src1_reg_file, /* 4+ */ 44, 44, /* 12+ */ 98, 98, devinfo->ver >= 10)
525 FI(3src_a1_src0_reg_file, /* 4+ */ -1, -1, /* 8+ */ 43, 43, /* 12+ */ 46, 66)
526
527 F(3src_a1_src2_is_imm, /* 4+ */ -1, -1, /* 12+ */ 47, 47)
528 F(3src_a1_src0_is_imm, /* 4+ */ -1, -1, /* 12+ */ 46, 46)
529
530 /* Source Modifier fields same in align16 */
531 FC(3src_a1_dst_reg_file, /* 4+ */ 36, 36, /* 12+ */ 50, 50, devinfo->ver >= 10)
532 FC(3src_a1_exec_type, /* 4+ */ 35, 35, /* 12+ */ 39, 39, devinfo->ver >= 10)
533 /* Fields below this same in align16 */
534 /** @} */
535
536 #define REG_TYPE(reg) \
537 static inline void \
538 elk_inst_set_3src_a1_##reg##_type(const struct intel_device_info *devinfo, \
539 elk_inst *inst, enum elk_reg_type type) \
540 { \
541 UNUSED enum gfx10_align1_3src_exec_type exec_type = \
542 (enum gfx10_align1_3src_exec_type) elk_inst_3src_a1_exec_type(devinfo, \
543 inst); \
544 if (elk_reg_type_is_floating_point(type)) { \
545 assert(exec_type == ELK_ALIGN1_3SRC_EXEC_TYPE_FLOAT); \
546 } else { \
547 assert(exec_type == ELK_ALIGN1_3SRC_EXEC_TYPE_INT); \
548 } \
549 unsigned hw_type = elk_reg_type_to_a1_hw_3src_type(devinfo, type); \
550 elk_inst_set_3src_a1_##reg##_hw_type(devinfo, inst, hw_type); \
551 } \
552 \
553 static inline enum elk_reg_type \
554 elk_inst_3src_a1_##reg##_type(const struct intel_device_info *devinfo, \
555 const elk_inst *inst) \
556 { \
557 enum gfx10_align1_3src_exec_type exec_type = \
558 (enum gfx10_align1_3src_exec_type) elk_inst_3src_a1_exec_type(devinfo, \
559 inst); \
560 unsigned hw_type = elk_inst_3src_a1_##reg##_hw_type(devinfo, inst); \
561 return elk_a1_hw_3src_type_to_reg_type(devinfo, hw_type, exec_type); \
562 }
563
564 REG_TYPE(dst)
565 REG_TYPE(src0)
566 REG_TYPE(src1)
567 REG_TYPE(src2)
568 #undef REG_TYPE
569
570 /**
571 * Three-source align1 instruction immediates:
572 * @{
573 */
574 static inline uint16_t
575 elk_inst_3src_a1_src0_imm(ASSERTED const struct intel_device_info *devinfo,
576 const elk_inst *insn)
577 {
578 assert(devinfo->ver >= 10);
579 if (devinfo->ver >= 12)
580 return elk_inst_bits(insn, 79, 64);
581 else
582 return elk_inst_bits(insn, 82, 67);
583 }
584
585 static inline uint16_t
elk_inst_3src_a1_src2_imm(ASSERTED const struct intel_device_info * devinfo,const elk_inst * insn)586 elk_inst_3src_a1_src2_imm(ASSERTED const struct intel_device_info *devinfo,
587 const elk_inst *insn)
588 {
589 assert(devinfo->ver >= 10);
590 if (devinfo->ver >= 12)
591 return elk_inst_bits(insn, 127, 112);
592 else
593 return elk_inst_bits(insn, 124, 109);
594 }
595
596 static inline void
elk_inst_set_3src_a1_src0_imm(ASSERTED const struct intel_device_info * devinfo,elk_inst * insn,uint16_t value)597 elk_inst_set_3src_a1_src0_imm(ASSERTED const struct intel_device_info *devinfo,
598 elk_inst *insn, uint16_t value)
599 {
600 assert(devinfo->ver >= 10);
601 if (devinfo->ver >= 12)
602 elk_inst_set_bits(insn, 79, 64, value);
603 else
604 elk_inst_set_bits(insn, 82, 67, value);
605 }
606
607 static inline void
elk_inst_set_3src_a1_src2_imm(ASSERTED const struct intel_device_info * devinfo,elk_inst * insn,uint16_t value)608 elk_inst_set_3src_a1_src2_imm(ASSERTED const struct intel_device_info *devinfo,
609 elk_inst *insn, uint16_t value)
610 {
611 assert(devinfo->ver >= 10);
612 if (devinfo->ver >= 12)
613 elk_inst_set_bits(insn, 127, 112, value);
614 else
615 elk_inst_set_bits(insn, 124, 109, value);
616 }
617 /** @} */
618
619 /**
620 * Three-source systolic instructions:
621 * @{
622 */
623 F(dpas_3src_src2_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 127, 120)
624 F(dpas_3src_src2_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 119, 115)
625 F(dpas_3src_src2_reg_file, /* 4+ */ -1, -1, /* 12+ */ 114, 114)
626 F(dpas_3src_src1_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 111, 104)
627 F(dpas_3src_src1_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 103, 99)
628 F(dpas_3src_src1_reg_file, /* 4+ */ -1, -1, /* 12+ */ 98, 98)
629 F(dpas_3src_src1_hw_type, /* 4+ */ -1, -1, /* 12+ */ 90, 88)
630 F(dpas_3src_src1_subbyte, /* 4+ */ -1, -1, /* 12+ */ 87, 86)
631 F(dpas_3src_src2_subbyte, /* 4+ */ -1, -1, /* 12+ */ 85, 84)
632 F(dpas_3src_src2_hw_type, /* 4+ */ -1, -1, /* 12+ */ 82, 80)
633 F(dpas_3src_src0_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 79, 72)
634 F(dpas_3src_src0_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 71, 67)
635 F(dpas_3src_src0_reg_file, /* 4+ */ -1, -1, /* 12+ */ 66, 66)
636 F(dpas_3src_dst_reg_nr, /* 4+ */ -1, -1, /* 12+ */ 63, 56)
637 F(dpas_3src_dst_subreg_nr, /* 4+ */ -1, -1, /* 12+ */ 55, 51)
638 F(dpas_3src_dst_reg_file, /* 4+ */ -1, -1, /* 12+ */ 50, 50)
639 F(dpas_3src_sdepth, /* 4+ */ -1, -1, /* 12+ */ 49, 48)
640 F(dpas_3src_rcount, /* 4+ */ -1, -1, /* 12+ */ 45, 43)
641 F(dpas_3src_src0_hw_type, /* 4+ */ -1, -1, /* 12+ */ 42, 40)
642 F(dpas_3src_exec_type, /* 4+ */ -1, -1, /* 12+ */ 39, 39)
643 F(dpas_3src_dst_hw_type, /* 4+ */ -1, -1, /* 12+ */ 38, 36)
644 /** @} */
645
646 #define REG_TYPE(reg) \
647 static inline void \
648 elk_inst_set_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \
649 elk_inst *inst, enum elk_reg_type type) \
650 { \
651 UNUSED enum gfx10_align1_3src_exec_type exec_type = \
652 (enum gfx10_align1_3src_exec_type) elk_inst_dpas_3src_exec_type(devinfo,\
653 inst); \
654 if (elk_reg_type_is_floating_point(type)) { \
655 assert(exec_type == ELK_ALIGN1_3SRC_EXEC_TYPE_FLOAT); \
656 } else { \
657 assert(exec_type == ELK_ALIGN1_3SRC_EXEC_TYPE_INT); \
658 } \
659 unsigned hw_type = elk_reg_type_to_a1_hw_3src_type(devinfo, type); \
660 elk_inst_set_dpas_3src_##reg##_hw_type(devinfo, inst, hw_type); \
661 } \
662 \
663 static inline enum elk_reg_type \
664 elk_inst_dpas_3src_##reg##_type(const struct intel_device_info *devinfo, \
665 const elk_inst *inst) \
666 { \
667 enum gfx10_align1_3src_exec_type exec_type = \
668 (enum gfx10_align1_3src_exec_type) elk_inst_dpas_3src_exec_type(devinfo,\
669 inst); \
670 unsigned hw_type = elk_inst_dpas_3src_##reg##_hw_type(devinfo, inst); \
671 return elk_a1_hw_3src_type_to_reg_type(devinfo, hw_type, exec_type); \
672 }
673
REG_TYPE(dst)674 REG_TYPE(dst)
675 REG_TYPE(src0)
676 REG_TYPE(src1)
677 REG_TYPE(src2)
678 #undef REG_TYPE
679
680 /**
681 * Flow control instruction bits:
682 * @{
683 */
684 static inline void
685 elk_inst_set_uip(const struct intel_device_info *devinfo,
686 elk_inst *inst, int32_t value)
687 {
688 assert(devinfo->ver >= 6);
689
690 if (devinfo->ver >= 12)
691 elk_inst_set_src1_is_imm(devinfo, inst, 1);
692
693 if (devinfo->ver >= 8) {
694 elk_inst_set_bits(inst, 95, 64, (uint32_t)value);
695 } else {
696 assert(value <= (1 << 16) - 1);
697 assert(value > -(1 << 16));
698 elk_inst_set_bits(inst, 127, 112, (uint16_t)value);
699 }
700 }
701
702 static inline int32_t
elk_inst_uip(const struct intel_device_info * devinfo,const elk_inst * inst)703 elk_inst_uip(const struct intel_device_info *devinfo, const elk_inst *inst)
704 {
705 assert(devinfo->ver >= 6);
706
707 if (devinfo->ver >= 8) {
708 return elk_inst_bits(inst, 95, 64);
709 } else {
710 return (int16_t)elk_inst_bits(inst, 127, 112);
711 }
712 }
713
714 static inline void
elk_inst_set_jip(const struct intel_device_info * devinfo,elk_inst * inst,int32_t value)715 elk_inst_set_jip(const struct intel_device_info *devinfo,
716 elk_inst *inst, int32_t value)
717 {
718 assert(devinfo->ver >= 6);
719
720 if (devinfo->ver >= 12)
721 elk_inst_set_src0_is_imm(devinfo, inst, 1);
722
723 if (devinfo->ver >= 8) {
724 elk_inst_set_bits(inst, 127, 96, (uint32_t)value);
725 } else {
726 assert(value <= (1 << 15) - 1);
727 assert(value >= -(1 << 15));
728 elk_inst_set_bits(inst, 111, 96, (uint16_t)value);
729 }
730 }
731
732 static inline int32_t
elk_inst_jip(const struct intel_device_info * devinfo,const elk_inst * inst)733 elk_inst_jip(const struct intel_device_info *devinfo, const elk_inst *inst)
734 {
735 assert(devinfo->ver >= 6);
736
737 if (devinfo->ver >= 8) {
738 return elk_inst_bits(inst, 127, 96);
739 } else {
740 return (int16_t)elk_inst_bits(inst, 111, 96);
741 }
742 }
743
744 /** Like FC, but using int16_t to handle negative jump targets. */
745 #define FJ(name, high, low, assertions) \
746 static inline void \
747 elk_inst_set_##name(const struct intel_device_info *devinfo, elk_inst *inst, int16_t v) \
748 { \
749 assert(assertions); \
750 (void) devinfo; \
751 elk_inst_set_bits(inst, high, low, (uint16_t) v); \
752 } \
753 static inline int16_t \
754 elk_inst_##name(const struct intel_device_info *devinfo, const elk_inst *inst)\
755 { \
756 assert(assertions); \
757 (void) devinfo; \
758 return elk_inst_bits(inst, high, low); \
759 }
760
761 FJ(gfx6_jump_count, 63, 48, devinfo->ver == 6)
762 FJ(gfx4_jump_count, 111, 96, devinfo->ver < 6)
763 FC(gfx4_pop_count, /* 4+ */ 115, 112, /* 12+ */ -1, -1, devinfo->ver < 6)
764 /** @} */
765
766 /**
767 * SEND instructions:
768 * @{
769 */
770 FC(send_ex_desc_ia_subreg_nr, /* 4+ */ 82, 80, /* 12+ */ 42, 40, devinfo->ver >= 9)
771 FC(send_src0_address_mode, /* 4+ */ 79, 79, /* 12+ */ -1, -1, devinfo->ver >= 9)
772 FC(send_sel_reg32_desc, /* 4+ */ 77, 77, /* 12+ */ 48, 48, devinfo->ver >= 9)
773 FC(send_sel_reg32_ex_desc, /* 4+ */ 61, 61, /* 12+ */ 49, 49, devinfo->ver >= 9)
774 F8(send_src0_reg_file, /* 4+ */ 38, 37, /* 8+ */ 42, 41, /* 12+ */ 66, 66)
775 FC(send_src1_reg_nr, /* 4+ */ 51, 44, /* 12+ */ 111, 104, devinfo->ver >= 9)
776 FC(send_src1_len, /* 4+ */ -1, -1, /* 12+ */ 103, 99, devinfo->verx10 >= 125)
777 FC(send_src1_reg_file, /* 4+ */ 36, 36, /* 12+ */ 98, 98, devinfo->ver >= 9)
778 FC(send_dst_reg_file, /* 4+ */ 35, 35, /* 12+ */ 50, 50, devinfo->ver >= 9)
779 FC(send_ex_bso, /* 4+ */ -1, -1, /* 12+ */ 39, 39, devinfo->verx10 >= 125)
780 /** @} */
781
782 /* Message descriptor bits */
783 #define MD(x) ((x) + 96)
784 #define MD12(x) ((x) >= 30 ? (x) - 30 + 122 : \
785 (x) >= 25 ? (x) - 25 + 67 : \
786 (x) >= 20 ? (x) - 20 + 51 : \
787 (x) >= 11 ? (x) - 11 + 113 : \
788 (x) - 0 + 81)
789
790 /**
791 * Set the SEND(C) message descriptor immediate.
792 *
793 * This doesn't include the SFID nor the EOT field that were considered to be
794 * part of the message descriptor by ancient versions of the BSpec, because
795 * they are present in the instruction even if the message descriptor is
796 * provided indirectly in the address register, so we want to specify them
797 * separately.
798 */
799 static inline void
elk_inst_set_send_desc(const struct intel_device_info * devinfo,elk_inst * inst,uint32_t value)800 elk_inst_set_send_desc(const struct intel_device_info *devinfo,
801 elk_inst *inst, uint32_t value)
802 {
803 if (devinfo->ver >= 12) {
804 elk_inst_set_bits(inst, 123, 122, GET_BITS(value, 31, 30));
805 elk_inst_set_bits(inst, 71, 67, GET_BITS(value, 29, 25));
806 elk_inst_set_bits(inst, 55, 51, GET_BITS(value, 24, 20));
807 elk_inst_set_bits(inst, 121, 113, GET_BITS(value, 19, 11));
808 elk_inst_set_bits(inst, 91, 81, GET_BITS(value, 10, 0));
809 } else if (devinfo->ver >= 9) {
810 elk_inst_set_bits(inst, 126, 96, value);
811 assert(value >> 31 == 0);
812 } else if (devinfo->ver >= 5) {
813 elk_inst_set_bits(inst, 124, 96, value);
814 assert(value >> 29 == 0);
815 } else {
816 elk_inst_set_bits(inst, 119, 96, value);
817 assert(value >> 24 == 0);
818 }
819 }
820
821 /**
822 * Get the SEND(C) message descriptor immediate.
823 *
824 * \sa elk_inst_set_send_desc().
825 */
826 static inline uint32_t
elk_inst_send_desc(const struct intel_device_info * devinfo,const elk_inst * inst)827 elk_inst_send_desc(const struct intel_device_info *devinfo,
828 const elk_inst *inst)
829 {
830 if (devinfo->ver >= 12) {
831 return (elk_inst_bits(inst, 123, 122) << 30 |
832 elk_inst_bits(inst, 71, 67) << 25 |
833 elk_inst_bits(inst, 55, 51) << 20 |
834 elk_inst_bits(inst, 121, 113) << 11 |
835 elk_inst_bits(inst, 91, 81));
836 } else if (devinfo->ver >= 9) {
837 return elk_inst_bits(inst, 126, 96);
838 } else if (devinfo->ver >= 5) {
839 return elk_inst_bits(inst, 124, 96);
840 } else {
841 return elk_inst_bits(inst, 119, 96);
842 }
843 }
844
845 /**
846 * Set the SEND(C) message extended descriptor immediate.
847 *
848 * This doesn't include the SFID nor the EOT field that were considered to be
849 * part of the extended message descriptor by some versions of the BSpec,
850 * because they are present in the instruction even if the extended message
851 * descriptor is provided indirectly in a register, so we want to specify them
852 * separately.
853 */
854 static inline void
elk_inst_set_send_ex_desc(const struct intel_device_info * devinfo,elk_inst * inst,uint32_t value)855 elk_inst_set_send_ex_desc(const struct intel_device_info *devinfo,
856 elk_inst *inst, uint32_t value)
857 {
858 if (devinfo->ver >= 12) {
859 elk_inst_set_bits(inst, 127, 124, GET_BITS(value, 31, 28));
860 elk_inst_set_bits(inst, 97, 96, GET_BITS(value, 27, 26));
861 elk_inst_set_bits(inst, 65, 64, GET_BITS(value, 25, 24));
862 elk_inst_set_bits(inst, 47, 35, GET_BITS(value, 23, 11));
863 elk_inst_set_bits(inst, 103, 99, GET_BITS(value, 10, 6));
864 assert(GET_BITS(value, 5, 0) == 0);
865 } else {
866 assert(devinfo->ver >= 9);
867 elk_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
868 elk_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
869 elk_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
870 elk_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
871 assert(GET_BITS(value, 15, 0) == 0);
872 }
873 }
874
875 /**
876 * Set the SENDS(C) message extended descriptor immediate.
877 *
878 * This doesn't include the SFID nor the EOT field that were considered to be
879 * part of the extended message descriptor by some versions of the BSpec,
880 * because they are present in the instruction even if the extended message
881 * descriptor is provided indirectly in a register, so we want to specify them
882 * separately.
883 */
884 static inline void
elk_inst_set_sends_ex_desc(const struct intel_device_info * devinfo,elk_inst * inst,uint32_t value)885 elk_inst_set_sends_ex_desc(const struct intel_device_info *devinfo,
886 elk_inst *inst, uint32_t value)
887 {
888 if (devinfo->ver >= 12) {
889 elk_inst_set_send_ex_desc(devinfo, inst, value);
890 } else {
891 elk_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16));
892 assert(GET_BITS(value, 15, 10) == 0);
893 elk_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6));
894 assert(GET_BITS(value, 5, 0) == 0);
895 }
896 }
897
898 /**
899 * Get the SEND(C) message extended descriptor immediate.
900 *
901 * \sa elk_inst_set_send_ex_desc().
902 */
903 static inline uint32_t
elk_inst_send_ex_desc(const struct intel_device_info * devinfo,const elk_inst * inst)904 elk_inst_send_ex_desc(const struct intel_device_info *devinfo,
905 const elk_inst *inst)
906 {
907 if (devinfo->ver >= 12) {
908 return (elk_inst_bits(inst, 127, 124) << 28 |
909 elk_inst_bits(inst, 97, 96) << 26 |
910 elk_inst_bits(inst, 65, 64) << 24 |
911 elk_inst_bits(inst, 47, 35) << 11 |
912 elk_inst_bits(inst, 103, 99) << 6);
913 } else {
914 assert(devinfo->ver >= 9);
915 return (elk_inst_bits(inst, 94, 91) << 28 |
916 elk_inst_bits(inst, 88, 85) << 24 |
917 elk_inst_bits(inst, 83, 80) << 20 |
918 elk_inst_bits(inst, 67, 64) << 16);
919 }
920 }
921
922 /**
923 * Get the SENDS(C) message extended descriptor immediate.
924 *
925 * \sa elk_inst_set_send_ex_desc().
926 */
927 static inline uint32_t
elk_inst_sends_ex_desc(const struct intel_device_info * devinfo,const elk_inst * inst)928 elk_inst_sends_ex_desc(const struct intel_device_info *devinfo,
929 const elk_inst *inst)
930 {
931 if (devinfo->ver >= 12) {
932 return elk_inst_send_ex_desc(devinfo, inst);
933 } else {
934 return (elk_inst_bits(inst, 95, 80) << 16 |
935 elk_inst_bits(inst, 67, 64) << 6);
936 }
937 }
938
939 /**
940 * Fields for SEND messages:
941 * @{
942 */
943 F(eot, /* 4+ */ 127, 127, /* 12+ */ 34, 34)
944 FF(mlen,
945 /* 4: */ 119, 116,
946 /* 4.5: */ 119, 116,
947 /* 5: */ 124, 121,
948 /* 6: */ 124, 121,
949 /* 7: */ 124, 121,
950 /* 8: */ 124, 121,
951 /* 12: */ MD12(28), MD12(25),
952 /* 20: */ MD12(28), MD12(25));
953 FF(rlen,
954 /* 4: */ 115, 112,
955 /* 4.5: */ 115, 112,
956 /* 5: */ 120, 116,
957 /* 6: */ 120, 116,
958 /* 7: */ 120, 116,
959 /* 8: */ 120, 116,
960 /* 12: */ MD12(24), MD12(20),
961 /* 20: */ MD12(24), MD12(20));
962 FF(header_present,
963 /* 4: doesn't exist */ -1, -1, -1, -1,
964 /* 5: */ 115, 115,
965 /* 6: */ 115, 115,
966 /* 7: */ 115, 115,
967 /* 8: */ 115, 115,
968 /* 12: */ MD12(19), MD12(19),
969 /* 20: */ MD12(19), MD12(19))
970 F(gateway_notify, /* 4+ */ MD(16), MD(15), /* 12+ */ -1, -1)
971 FD(function_control,
972 /* 4: */ 111, 96,
973 /* 4.5: */ 111, 96,
974 /* 5: */ 114, 96,
975 /* 6: */ 114, 96,
976 /* 7: */ 114, 96,
977 /* 8: */ 114, 96,
978 /* 12: */ MD12(18), MD12(11), MD12(10), MD12(0))
979 FF(gateway_subfuncid,
980 /* 4: */ MD(1), MD(0),
981 /* 4.5: */ MD(1), MD(0),
982 /* 5: */ MD(1), MD(0), /* 2:0, but bit 2 is reserved MBZ */
983 /* 6: */ MD(2), MD(0),
984 /* 7: */ MD(2), MD(0),
985 /* 8: */ MD(2), MD(0),
986 /* 12: */ MD12(2), MD12(0),
987 /* 20: */ MD12(2), MD12(0))
988 FF(sfid,
989 /* 4: */ 123, 120, /* called msg_target */
990 /* 4.5 */ 123, 120,
991 /* 5: */ 95, 92,
992 /* 6: */ 27, 24,
993 /* 7: */ 27, 24,
994 /* 8: */ 27, 24,
995 /* 12: */ 95, 92,
996 /* 20: */ 95, 92)
997 FF(null_rt,
998 /* 4-7: */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
999 /* 8: */ 80, 80,
1000 /* 12: */ 44, 44,
1001 /* 20: */ 44, 44) /* actually only Gfx11+ */
1002 FC(base_mrf, /* 4+ */ 27, 24, /* 12+ */ -1, -1, devinfo->ver < 6);
1003 FF(send_rta_index,
1004 /* 4: */ -1, -1,
1005 /* 4.5 */ -1, -1,
1006 /* 5: */ -1, -1,
1007 /* 6: */ -1, -1,
1008 /* 7: */ -1, -1,
1009 /* 8: */ -1, -1,
1010 /* 12: */ 38, 36,
1011 /* 20: */ 38, 36)
1012 /** @} */
1013
1014 /**
1015 * URB message function control bits:
1016 * @{
1017 */
1018 FF(urb_per_slot_offset,
1019 /* 4-6: */ -1, -1, -1, -1, -1, -1, -1, -1,
1020 /* 7: */ MD(16), MD(16),
1021 /* 8: */ MD(17), MD(17),
1022 /* 12: */ MD12(17), MD12(17),
1023 /* 20: */ MD12(17), MD12(17))
1024 FC(urb_channel_mask_present, /* 4+ */ MD(15), MD(15), /* 12+ */ MD12(15), MD12(15), devinfo->ver >= 8)
1025 FC(urb_complete, /* 4+ */ MD(15), MD(15), /* 12+ */ -1, -1, devinfo->ver < 8)
1026 FC(urb_used, /* 4+ */ MD(14), MD(14), /* 12+ */ -1, -1, devinfo->ver < 7)
1027 FC(urb_allocate, /* 4+ */ MD(13), MD(13), /* 12+ */ -1, -1, devinfo->ver < 7)
1028 FF(urb_swizzle_control,
1029 /* 4: */ MD(11), MD(10),
1030 /* 4.5: */ MD(11), MD(10),
1031 /* 5: */ MD(11), MD(10),
1032 /* 6: */ MD(11), MD(10),
1033 /* 7: */ MD(14), MD(14),
1034 /* 8: */ MD(15), MD(15),
1035 /* 12: */ -1, -1,
1036 /* 20: */ -1, -1)
1037 FD(urb_global_offset,
1038 /* 4: */ MD( 9), MD(4),
1039 /* 4.5: */ MD( 9), MD(4),
1040 /* 5: */ MD( 9), MD(4),
1041 /* 6: */ MD( 9), MD(4),
1042 /* 7: */ MD(13), MD(3),
1043 /* 8: */ MD(14), MD(4),
1044 /* 12: */ MD12(14), MD12(11), MD12(10), MD12(4))
1045 FF(urb_opcode,
1046 /* 4: */ MD( 3), MD(0),
1047 /* 4.5: */ MD( 3), MD(0),
1048 /* 5: */ MD( 3), MD(0),
1049 /* 6: */ MD( 3), MD(0),
1050 /* 7: */ MD( 2), MD(0),
1051 /* 8: */ MD( 3), MD(0),
1052 /* 12: */ MD12(3), MD12(0),
1053 /* 20: */ MD12(3), MD12(0))
1054 /** @} */
1055
1056 /**
1057 * Gfx4-5 math messages:
1058 * @{
1059 */
1060 FC(math_msg_data_type, /* 4+ */ MD(7), MD(7), /* 12+ */ -1, -1, devinfo->ver < 6)
1061 FC(math_msg_saturate, /* 4+ */ MD(6), MD(6), /* 12+ */ -1, -1, devinfo->ver < 6)
1062 FC(math_msg_precision, /* 4+ */ MD(5), MD(5), /* 12+ */ -1, -1, devinfo->ver < 6)
1063 FC(math_msg_signed_int, /* 4+ */ MD(4), MD(4), /* 12+ */ -1, -1, devinfo->ver < 6)
1064 FC(math_msg_function, /* 4+ */ MD(3), MD(0), /* 12+ */ -1, -1, devinfo->ver < 6)
1065 /** @} */
1066
1067 /**
1068 * Sampler message function control bits:
1069 * @{
1070 */
1071 FF(sampler_simd_mode,
1072 /* 4: doesn't exist */ -1, -1, -1, -1,
1073 /* 5: */ MD(17), MD(16),
1074 /* 6: */ MD(17), MD(16),
1075 /* 7: */ MD(18), MD(17),
1076 /* 8: */ MD(18), MD(17),
1077 /* 12: */ MD12(18), MD12(17),
1078 /* 20: */ MD12(18), MD12(17))
1079 FF(sampler_msg_type,
1080 /* 4: */ MD(15), MD(14),
1081 /* 4.5: */ MD(15), MD(12),
1082 /* 5: */ MD(15), MD(12),
1083 /* 6: */ MD(15), MD(12),
1084 /* 7: */ MD(16), MD(12),
1085 /* 8: */ MD(16), MD(12),
1086 /* 12: */ MD12(16), MD12(12),
1087 /* 20: */ MD12(16), MD12(12))
1088 FC(sampler_return_format, /* 4+ */ MD(13), MD(12), /* 12+ */ -1, -1, devinfo->verx10 == 40)
1089 FD(sampler,
1090 /* 4: */ MD(11), MD(8),
1091 /* 4.5: */ MD(11), MD(8),
1092 /* 5: */ MD(11), MD(8),
1093 /* 6: */ MD(11), MD(8),
1094 /* 7: */ MD(11), MD(8),
1095 /* 8: */ MD(11), MD(8),
1096 /* 12: */ MD12(11), MD12(11), MD12(10), MD12(8))
1097 F(binding_table_index, /* 4+ */ MD(7), MD(0), /* 12+ */ MD12(7), MD12(0)) /* also used by other messages */
1098 /** @} */
1099
1100 /**
1101 * Data port message function control bits:
1102 * @{
1103 */
1104 FC(dp_category, /* 4+ */ MD(18), MD(18), /* 12+ */ MD12(18), MD12(18), devinfo->ver >= 7)
1105
1106 /* Gfx4-5 store fields in different bits for read/write messages. */
1107 FF(dp_read_msg_type,
1108 /* 4: */ MD(13), MD(12),
1109 /* 4.5: */ MD(13), MD(11),
1110 /* 5: */ MD(13), MD(11),
1111 /* 6: */ MD(16), MD(13),
1112 /* 7: */ MD(17), MD(14),
1113 /* 8: */ MD(17), MD(14),
1114 /* 12: */ MD12(17), MD12(14),
1115 /* 20: */ MD12(17), MD12(14))
1116 FF(dp_write_msg_type,
1117 /* 4: */ MD(14), MD(12),
1118 /* 4.5: */ MD(14), MD(12),
1119 /* 5: */ MD(14), MD(12),
1120 /* 6: */ MD(16), MD(13),
1121 /* 7: */ MD(17), MD(14),
1122 /* 8: */ MD(17), MD(14),
1123 /* 12: */ MD12(17), MD12(14),
1124 /* 20: */ MD12(17), MD12(14))
1125 FD(dp_read_msg_control,
1126 /* 4: */ MD(11), MD( 8),
1127 /* 4.5: */ MD(10), MD( 8),
1128 /* 5: */ MD(10), MD( 8),
1129 /* 6: */ MD(12), MD( 8),
1130 /* 7: */ MD(13), MD( 8),
1131 /* 8: */ MD(13), MD( 8),
1132 /* 12: */ MD12(13), MD12(11), MD12(10), MD12(8))
1133 FD(dp_write_msg_control,
1134 /* 4: */ MD(11), MD( 8),
1135 /* 4.5: */ MD(11), MD( 8),
1136 /* 5: */ MD(11), MD( 8),
1137 /* 6: */ MD(12), MD( 8),
1138 /* 7: */ MD(13), MD( 8),
1139 /* 8: */ MD(13), MD( 8),
1140 /* 12: */ MD12(13), MD12(11), MD12(10), MD12(8))
1141 FC(dp_read_target_cache, /* 4+ */ MD(15), MD(14), /* 12+ */ -1, -1, devinfo->ver < 6);
1142
1143 FF(dp_write_commit,
1144 /* 4: */ MD(15), MD(15),
1145 /* 4.5: */ MD(15), MD(15),
1146 /* 5: */ MD(15), MD(15),
1147 /* 6: */ MD(17), MD(17),
1148 /* 7+: does not exist */ -1, -1, -1, -1,
1149 /* 12: */ -1, -1,
1150 /* 20: */ -1, -1)
1151
1152 /* Gfx6+ use the same bit locations for everything. */
1153 FF(dp_msg_type,
1154 /* 4-5: use dp_read_msg_type or dp_write_msg_type instead */
1155 -1, -1, -1, -1, -1, -1,
1156 /* 6: */ MD(16), MD(13),
1157 /* 7: */ MD(17), MD(14),
1158 /* 8: */ MD(18), MD(14),
1159 /* 12: */ MD12(18), MD12(14),
1160 /* 20: */ MD12(18), MD12(14))
1161 FD(dp_msg_control,
1162 /* 4: */ MD(11), MD( 8),
1163 /* 4.5-5: use dp_read_msg_control or dp_write_msg_control */ -1, -1, -1, -1,
1164 /* 6: */ MD(12), MD( 8),
1165 /* 7: */ MD(13), MD( 8),
1166 /* 8: */ MD(13), MD( 8),
1167 /* 12: */ MD12(13), MD12(11), MD12(10), MD12(8))
1168 /** @} */
1169
1170 /**
1171 * Scratch message bits (Gfx7+):
1172 * @{
1173 */
1174 FC(scratch_read_write, /* 4+ */ MD(17), MD(17), /* 12+ */ MD12(17), MD12(17), devinfo->ver >= 7) /* 0 = read, 1 = write */
1175 FC(scratch_type, /* 4+ */ MD(16), MD(16), /* 12+ */ -1, -1, devinfo->ver >= 7) /* 0 = OWord, 1 = DWord */
1176 FC(scratch_invalidate_after_read, /* 4+ */ MD(15), MD(15), /* 12+ */ MD12(15), MD12(15), devinfo->ver >= 7)
1177 FC(scratch_block_size, /* 4+ */ MD(13), MD(12), /* 12+ */ MD12(13), MD12(12), devinfo->ver >= 7)
1178 FD(scratch_addr_offset,
1179 /* 4: */ -1, -1,
1180 /* 4.5: */ -1, -1,
1181 /* 5: */ -1, -1,
1182 /* 6: */ -1, -1,
1183 /* 7: */ MD(11), MD(0),
1184 /* 8: */ MD(11), MD(0),
1185 /* 12: */ MD12(11), MD12(11), MD12(10), MD12(0))
1186 /** @} */
1187
1188 /**
1189 * Render Target message function control bits:
1190 * @{
1191 */
1192 FF(rt_last,
1193 /* 4: */ MD(11), MD(11),
1194 /* 4.5: */ MD(11), MD(11),
1195 /* 5: */ MD(11), MD(11),
1196 /* 6: */ MD(12), MD(12),
1197 /* 7: */ MD(12), MD(12),
1198 /* 8: */ MD(12), MD(12),
1199 /* 12: */ MD12(12), MD12(12),
1200 /* 20: */ MD12(12), MD12(12))
1201 FC(rt_slot_group, /* 4+ */ MD(11), MD(11), /* 12+ */ MD12(11), MD12(11), devinfo->ver >= 6)
1202 F(rt_message_type, /* 4+ */ MD(10), MD( 8), /* 12+ */ MD12(10), MD12(8))
1203 /** @} */
1204
1205 /**
1206 * Thread Spawn message function control bits:
1207 * @{
1208 */
1209 FC(ts_resource_select, /* 4+ */ MD( 4), MD( 4), /* 12+ */ -1, -1, devinfo->ver < 11)
1210 FC(ts_request_type, /* 4+ */ MD( 1), MD( 1), /* 12+ */ -1, -1, devinfo->ver < 11)
1211 F(ts_opcode, /* 4+ */ MD( 0), MD( 0), /* 12+ */ MD12(0), MD12(0))
1212 /** @} */
1213
1214 /**
1215 * Pixel Interpolator message function control bits:
1216 * @{
1217 */
1218 F(pi_simd_mode, /* 4+ */ MD(16), MD(16), /* 12+ */ MD12(16), MD12(16))
1219 F(pi_nopersp, /* 4+ */ MD(14), MD(14), /* 12+ */ MD12(14), MD12(14))
1220 F(pi_message_type, /* 4+ */ MD(13), MD(12), /* 12+ */ MD12(13), MD12(12))
1221 F(pi_slot_group, /* 4+ */ MD(11), MD(11), /* 12+ */ MD12(11), MD12(11))
1222 F(pi_message_data, /* 4+ */ MD(7), MD(0), /* 12+ */ MD12(7), MD12(0))
1223 /** @} */
1224
1225 /**
1226 * Immediates:
1227 * @{
1228 */
1229 static inline int
elk_inst_imm_d(const struct intel_device_info * devinfo,const elk_inst * insn)1230 elk_inst_imm_d(const struct intel_device_info *devinfo, const elk_inst *insn)
1231 {
1232 (void) devinfo;
1233 return elk_inst_bits(insn, 127, 96);
1234 }
1235
1236 static inline unsigned
elk_inst_imm_ud(const struct intel_device_info * devinfo,const elk_inst * insn)1237 elk_inst_imm_ud(const struct intel_device_info *devinfo, const elk_inst *insn)
1238 {
1239 (void) devinfo;
1240 return elk_inst_bits(insn, 127, 96);
1241 }
1242
1243 static inline uint64_t
elk_inst_imm_uq(const struct intel_device_info * devinfo,const elk_inst * insn)1244 elk_inst_imm_uq(const struct intel_device_info *devinfo,
1245 const elk_inst *insn)
1246 {
1247 if (devinfo->ver >= 12) {
1248 return elk_inst_bits(insn, 95, 64) << 32 |
1249 elk_inst_bits(insn, 127, 96);
1250 } else {
1251 assert(devinfo->ver >= 8);
1252 return elk_inst_bits(insn, 127, 64);
1253 }
1254 }
1255
1256 static inline float
elk_inst_imm_f(const struct intel_device_info * devinfo,const elk_inst * insn)1257 elk_inst_imm_f(const struct intel_device_info *devinfo, const elk_inst *insn)
1258 {
1259 union {
1260 float f;
1261 uint32_t u;
1262 } ft;
1263 (void) devinfo;
1264 ft.u = elk_inst_bits(insn, 127, 96);
1265 return ft.f;
1266 }
1267
1268 static inline double
elk_inst_imm_df(const struct intel_device_info * devinfo,const elk_inst * insn)1269 elk_inst_imm_df(const struct intel_device_info *devinfo, const elk_inst *insn)
1270 {
1271 union {
1272 double d;
1273 uint64_t u;
1274 } dt;
1275 dt.u = elk_inst_imm_uq(devinfo, insn);
1276 return dt.d;
1277 }
1278
1279 static inline void
elk_inst_set_imm_d(const struct intel_device_info * devinfo,elk_inst * insn,int value)1280 elk_inst_set_imm_d(const struct intel_device_info *devinfo,
1281 elk_inst *insn, int value)
1282 {
1283 (void) devinfo;
1284 return elk_inst_set_bits(insn, 127, 96, value);
1285 }
1286
1287 static inline void
elk_inst_set_imm_ud(const struct intel_device_info * devinfo,elk_inst * insn,unsigned value)1288 elk_inst_set_imm_ud(const struct intel_device_info *devinfo,
1289 elk_inst *insn, unsigned value)
1290 {
1291 (void) devinfo;
1292 return elk_inst_set_bits(insn, 127, 96, value);
1293 }
1294
1295 static inline void
elk_inst_set_imm_f(const struct intel_device_info * devinfo,elk_inst * insn,float value)1296 elk_inst_set_imm_f(const struct intel_device_info *devinfo,
1297 elk_inst *insn, float value)
1298 {
1299 union {
1300 float f;
1301 uint32_t u;
1302 } ft;
1303 (void) devinfo;
1304 ft.f = value;
1305 elk_inst_set_bits(insn, 127, 96, ft.u);
1306 }
1307
1308 static inline void
elk_inst_set_imm_df(const struct intel_device_info * devinfo,elk_inst * insn,double value)1309 elk_inst_set_imm_df(const struct intel_device_info *devinfo,
1310 elk_inst *insn, double value)
1311 {
1312 union {
1313 double d;
1314 uint64_t u;
1315 } dt;
1316 (void) devinfo;
1317 dt.d = value;
1318
1319 if (devinfo->ver >= 12) {
1320 elk_inst_set_bits(insn, 95, 64, dt.u >> 32);
1321 elk_inst_set_bits(insn, 127, 96, dt.u & 0xFFFFFFFF);
1322 } else {
1323 elk_inst_set_bits(insn, 127, 64, dt.u);
1324 }
1325 }
1326
1327 static inline void
elk_inst_set_imm_uq(const struct intel_device_info * devinfo,elk_inst * insn,uint64_t value)1328 elk_inst_set_imm_uq(const struct intel_device_info *devinfo,
1329 elk_inst *insn, uint64_t value)
1330 {
1331 (void) devinfo;
1332 if (devinfo->ver >= 12) {
1333 elk_inst_set_bits(insn, 95, 64, value >> 32);
1334 elk_inst_set_bits(insn, 127, 96, value & 0xFFFFFFFF);
1335 } else {
1336 elk_inst_set_bits(insn, 127, 64, value);
1337 }
1338 }
1339
1340 /** @} */
1341
1342 #define REG_TYPE(reg) \
1343 static inline void \
1344 elk_inst_set_##reg##_file_type(const struct intel_device_info *devinfo, \
1345 elk_inst *inst, enum elk_reg_file file, \
1346 enum elk_reg_type type) \
1347 { \
1348 assert(file <= ELK_IMMEDIATE_VALUE); \
1349 unsigned hw_type = elk_reg_type_to_hw_type(devinfo, file, type); \
1350 elk_inst_set_##reg##_reg_file(devinfo, inst, file); \
1351 elk_inst_set_##reg##_reg_hw_type(devinfo, inst, hw_type); \
1352 } \
1353 \
1354 static inline enum elk_reg_type \
1355 elk_inst_##reg##_type(const struct intel_device_info *devinfo, \
1356 const elk_inst *inst) \
1357 { \
1358 unsigned file = __builtin_strcmp("dst", #reg) == 0 ? \
1359 (unsigned) ELK_GENERAL_REGISTER_FILE : \
1360 elk_inst_##reg##_reg_file(devinfo, inst); \
1361 unsigned hw_type = elk_inst_##reg##_reg_hw_type(devinfo, inst); \
1362 return elk_hw_type_to_reg_type(devinfo, (enum elk_reg_file)file, hw_type); \
1363 }
1364
1365 REG_TYPE(dst)
REG_TYPE(src0)1366 REG_TYPE(src0)
1367 REG_TYPE(src1)
1368 #undef REG_TYPE
1369
1370
1371 /* The AddrImm fields are split into two discontiguous sections on Gfx8+ */
1372 #define ELK_IA1_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low, \
1373 g12_high, g12_low, g20_high, g20_low, g20_zero) \
1374 static inline void \
1375 elk_inst_set_##reg##_ia1_addr_imm(const struct \
1376 intel_device_info *devinfo, \
1377 elk_inst *inst, \
1378 unsigned value) \
1379 { \
1380 if (devinfo->ver >= 20) { \
1381 assert((value & ~0x7ff) == 0); \
1382 elk_inst_set_bits(inst, g20_high, g20_low, value >> 1); \
1383 if (g20_zero == -1) \
1384 assert((value & 1) == 0); \
1385 else \
1386 elk_inst_set_bits(inst, g20_zero, g20_zero, value & 1); \
1387 } else if (devinfo->ver >= 12) { \
1388 assert((value & ~0x3ff) == 0); \
1389 elk_inst_set_bits(inst, g12_high, g12_low, value); \
1390 } else if (devinfo->ver >= 8) { \
1391 assert((value & ~0x3ff) == 0); \
1392 elk_inst_set_bits(inst, g8_high, g8_low, value & 0x1ff); \
1393 elk_inst_set_bits(inst, g8_nine, g8_nine, value >> 9); \
1394 } else { \
1395 assert((value & ~0x3ff) == 0); \
1396 elk_inst_set_bits(inst, g4_high, g4_low, value); \
1397 } \
1398 } \
1399 static inline unsigned \
1400 elk_inst_##reg##_ia1_addr_imm(const struct intel_device_info *devinfo, \
1401 const elk_inst *inst) \
1402 { \
1403 if (devinfo->ver >= 20) { \
1404 return elk_inst_bits(inst, g20_high, g20_low) << 1 | \
1405 (g20_zero == -1 ? 0 : \
1406 elk_inst_bits(inst, g20_zero, g20_zero)); \
1407 } else if (devinfo->ver >= 12) { \
1408 return elk_inst_bits(inst, g12_high, g12_low); \
1409 } else if (devinfo->ver >= 8) { \
1410 return elk_inst_bits(inst, g8_high, g8_low) | \
1411 (elk_inst_bits(inst, g8_nine, g8_nine) << 9); \
1412 } else { \
1413 return elk_inst_bits(inst, g4_high, g4_low); \
1414 } \
1415 }
1416
1417 /* AddrImm for Align1 Indirect Addressing */
1418 /* -Gen 4- ----Gfx8---- -Gfx12- ---Gfx20--- */
1419 ELK_IA1_ADDR_IMM(src1, 105, 96, 121, 104, 96, 107, 98, 107, 98, -1)
1420 ELK_IA1_ADDR_IMM(src0, 73, 64, 95, 72, 64, 75, 66, 75, 66, 87)
1421 ELK_IA1_ADDR_IMM(dst, 57, 48, 47, 56, 48, 59, 50, 59, 50, 33)
1422
1423 #define ELK_IA16_ADDR_IMM(reg, g4_high, g4_low, g8_nine, g8_high, g8_low) \
1424 static inline void \
1425 elk_inst_set_##reg##_ia16_addr_imm(const struct \
1426 intel_device_info *devinfo, \
1427 elk_inst *inst, unsigned value) \
1428 { \
1429 assert(devinfo->ver < 12); \
1430 assert((value & ~0x3ff) == 0); \
1431 if (devinfo->ver >= 8) { \
1432 assert(GET_BITS(value, 3, 0) == 0); \
1433 elk_inst_set_bits(inst, g8_high, g8_low, GET_BITS(value, 8, 4)); \
1434 elk_inst_set_bits(inst, g8_nine, g8_nine, GET_BITS(value, 9, 9)); \
1435 } else { \
1436 elk_inst_set_bits(inst, g4_high, g4_low, value); \
1437 } \
1438 } \
1439 static inline unsigned \
1440 elk_inst_##reg##_ia16_addr_imm(const struct intel_device_info *devinfo, \
1441 const elk_inst *inst) \
1442 { \
1443 assert(devinfo->ver < 12); \
1444 if (devinfo->ver >= 8) { \
1445 return (elk_inst_bits(inst, g8_high, g8_low) << 4) | \
1446 (elk_inst_bits(inst, g8_nine, g8_nine) << 9); \
1447 } else { \
1448 return elk_inst_bits(inst, g4_high, g4_low); \
1449 } \
1450 }
1451
1452 /* AddrImm[9:0] for Align16 Indirect Addressing:
1453 * Compared to Align1, these are missing the low 4 bits.
1454 * -Gen 4- ----Gfx8----
1455 */
1456 ELK_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
1457 ELK_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68)
1458 ELK_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52)
1459 ELK_IA16_ADDR_IMM(send_src0, -1, -1, 78, 72, 68)
1460 ELK_IA16_ADDR_IMM(send_dst, -1, -1, 62, 56, 52)
1461
1462 /**
1463 * Fetch a set of contiguous bits from the instruction.
1464 *
1465 * Bits indices range from 0..127; fields may not cross 64-bit boundaries.
1466 */
1467 static inline uint64_t
1468 elk_inst_bits(const elk_inst *inst, unsigned high, unsigned low)
1469 {
1470 assume(high < 128);
1471 assume(high >= low);
1472 /* We assume the field doesn't cross 64-bit boundaries. */
1473 const unsigned word = high / 64;
1474 assert(word == low / 64);
1475
1476 high %= 64;
1477 low %= 64;
1478
1479 const uint64_t mask = (~0ull >> (64 - (high - low + 1)));
1480
1481 return (inst->data[word] >> low) & mask;
1482 }
1483
1484 /**
1485 * Set bits in the instruction, with proper shifting and masking.
1486 *
1487 * Bits indices range from 0..127; fields may not cross 64-bit boundaries.
1488 */
1489 static inline void
elk_inst_set_bits(elk_inst * inst,unsigned high,unsigned low,uint64_t value)1490 elk_inst_set_bits(elk_inst *inst, unsigned high, unsigned low, uint64_t value)
1491 {
1492 assume(high < 128);
1493 assume(high >= low);
1494 const unsigned word = high / 64;
1495 assert(word == low / 64);
1496
1497 high %= 64;
1498 low %= 64;
1499
1500 const uint64_t mask = (~0ull >> (64 - (high - low + 1))) << low;
1501
1502 /* Make sure the supplied value actually fits in the given bitfield. */
1503 assert((value & (mask >> low)) == value);
1504
1505 inst->data[word] = (inst->data[word] & ~mask) | (value << low);
1506 }
1507
1508 #undef ELK_IA16_ADDR_IMM
1509 #undef ELK_IA1_ADDR_IMM
1510 #undef MD
1511 #undef F8
1512 #undef FF
1513 #undef BOUNDS
1514 #undef F
1515 #undef FC
1516 #undef F20
1517 #undef FD20
1518
1519 typedef struct {
1520 uint64_t data;
1521 } elk_compact_inst;
1522
1523 /**
1524 * Fetch a set of contiguous bits from the compacted instruction.
1525 *
1526 * Bits indices range from 0..63.
1527 */
1528 static inline unsigned
elk_compact_inst_bits(const elk_compact_inst * inst,unsigned high,unsigned low)1529 elk_compact_inst_bits(const elk_compact_inst *inst, unsigned high, unsigned low)
1530 {
1531 assume(high < 64);
1532 assume(high >= low);
1533 const uint64_t mask = (1ull << (high - low + 1)) - 1;
1534
1535 return (inst->data >> low) & mask;
1536 }
1537
1538 /**
1539 * Set bits in the compacted instruction.
1540 *
1541 * Bits indices range from 0..63.
1542 */
1543 static inline void
elk_compact_inst_set_bits(elk_compact_inst * inst,unsigned high,unsigned low,uint64_t value)1544 elk_compact_inst_set_bits(elk_compact_inst *inst, unsigned high, unsigned low,
1545 uint64_t value)
1546 {
1547 assume(high < 64);
1548 assume(high >= low);
1549 const uint64_t mask = ((1ull << (high - low + 1)) - 1) << low;
1550
1551 /* Make sure the supplied value actually fits in the given bitfield. */
1552 assert((value & (mask >> low)) == value);
1553
1554 inst->data = (inst->data & ~mask) | (value << low);
1555 }
1556
1557 #define FC(name, high, low, gfx12_high, gfx12_low, assertions) \
1558 static inline void \
1559 elk_compact_inst_set_##name(const struct \
1560 intel_device_info *devinfo, \
1561 elk_compact_inst *inst, unsigned v) \
1562 { \
1563 assert(assertions); \
1564 if (devinfo->ver >= 12) \
1565 elk_compact_inst_set_bits(inst, gfx12_high, gfx12_low, v); \
1566 else \
1567 elk_compact_inst_set_bits(inst, high, low, v); \
1568 } \
1569 static inline unsigned \
1570 elk_compact_inst_##name(const struct intel_device_info *devinfo, \
1571 const elk_compact_inst *inst) \
1572 { \
1573 assert(assertions); \
1574 if (devinfo->ver >= 12) \
1575 return elk_compact_inst_bits(inst, gfx12_high, gfx12_low); \
1576 else \
1577 return elk_compact_inst_bits(inst, high, low); \
1578 }
1579
1580 /* A simple macro for fields which stay in the same place on all generations
1581 * except for Gfx12.
1582 */
1583 #define F(name, high, low, gfx12_high, gfx12_low) \
1584 FC(name, high, low, gfx12_high, gfx12_low, true)
1585
1586 /* A macro for fields which moved to several different locations
1587 * across generations.
1588 */
1589 #define F20(name, high, low, hi8, lo8, hi12, lo12, hi20, lo20) \
1590 static inline void \
1591 elk_compact_inst_set_##name(const struct \
1592 intel_device_info *devinfo, \
1593 elk_compact_inst *inst, unsigned v) \
1594 { \
1595 if (devinfo->ver >= 20) \
1596 elk_compact_inst_set_bits(inst, hi20, lo20, v); \
1597 else if (devinfo->ver >= 12) \
1598 elk_compact_inst_set_bits(inst, hi12, lo12, v); \
1599 else if (devinfo->ver >= 8) \
1600 elk_compact_inst_set_bits(inst, hi8, lo8, v); \
1601 else \
1602 elk_compact_inst_set_bits(inst, high, low, v); \
1603 } \
1604 static inline unsigned \
1605 elk_compact_inst_##name(const struct intel_device_info *devinfo, \
1606 const elk_compact_inst *inst) \
1607 { \
1608 if (devinfo->ver >= 20) \
1609 return elk_compact_inst_bits(inst, hi20, lo20); \
1610 else if (devinfo->ver >= 12) \
1611 return elk_compact_inst_bits(inst, hi12, lo12); \
1612 else if (devinfo->ver >= 8) \
1613 return elk_compact_inst_bits(inst, hi8, lo8); \
1614 else \
1615 return elk_compact_inst_bits(inst, high, low); \
1616 }
1617
1618 /* A macro for fields which gained extra discontiguous bits in Gfx20
1619 * (specified by hi20ex-lo20ex).
1620 */
1621 #define FD20(name, high, low, hi8, lo8, hi12, lo12, \
1622 hi20, lo20, hi20ex, lo20ex) \
1623 static inline void \
1624 elk_compact_inst_set_##name(const struct \
1625 intel_device_info *devinfo, \
1626 elk_compact_inst *inst, unsigned v) \
1627 { \
1628 if (devinfo->ver >= 20) { \
1629 const unsigned k = hi20 - lo20 + 1; \
1630 elk_compact_inst_set_bits(inst, hi20ex, lo20ex, v >> k); \
1631 elk_compact_inst_set_bits(inst, hi20, lo20, v & ((1u << k) - 1)); \
1632 } else if (devinfo->ver >= 12) { \
1633 elk_compact_inst_set_bits(inst, hi12, lo12, v); \
1634 } else if (devinfo->ver >= 8) { \
1635 elk_compact_inst_set_bits(inst, hi8, lo8, v); \
1636 } else { \
1637 elk_compact_inst_set_bits(inst, high, low, v); \
1638 } \
1639 } \
1640 static inline unsigned \
1641 elk_compact_inst_##name(const struct intel_device_info *devinfo, \
1642 const elk_compact_inst *inst) \
1643 { \
1644 if (devinfo->ver >= 20) { \
1645 const unsigned k = hi20 - lo20 + 1; \
1646 return (elk_compact_inst_bits(inst, hi20ex, lo20ex) << k | \
1647 elk_compact_inst_bits(inst, hi20, lo20)); \
1648 } else if (devinfo->ver >= 12) { \
1649 return elk_compact_inst_bits(inst, hi12, lo12); \
1650 } else if (devinfo->ver >= 8) { \
1651 return elk_compact_inst_bits(inst, hi8, lo8); \
1652 } else { \
1653 return elk_compact_inst_bits(inst, high, low); \
1654 } \
1655 }
1656
1657 F(src1_reg_nr, /* 4+ */ 63, 56, /* 12+ */ 63, 56)
1658 F(src0_reg_nr, /* 4+ */ 55, 48, /* 12+ */ 47, 40)
1659 F20(dst_reg_nr, /* 4+ */ 47, 40, /* 8+ */ 47, 40, /* 12+ */ 23, 16, /* 20+ */ 39, 32)
1660 F(src1_index, /* 4+ */ 39, 35, /* 12+ */ 55, 52)
1661 F20(src0_index, /* 4+ */ 34, 30, /* 8+ */ 34, 30, /* 12+ */ 51, 48, /* 20+ */ 25, 23)
1662 F(cmpt_control, /* 4+ */ 29, 29, /* 12+ */ 29, 29) /* Same location as elk_inst */
1663 FC(flag_subreg_nr, /* 4+ */ 28, 28, /* 12+ */ -1, -1, devinfo->ver <= 6)
1664 F(cond_modifier, /* 4+ */ 27, 24, /* 12+ */ -1, -1) /* Same location as elk_inst */
1665 FC(acc_wr_control, /* 4+ */ 23, 23, /* 12+ */ -1, -1, devinfo->ver >= 6)
1666 FC(mask_control_ex, /* 4+ */ 23, 23, /* 12+ */ -1, -1, devinfo->verx10 == 45 || devinfo->ver == 5)
1667 F20(subreg_index, /* 4+ */ 22, 18, /* 8+ */ 22, 18, /* 12+ */ 39, 35, /* 20+ */ 51, 48)
1668 FD20(datatype_index, /* 4+ */ 17, 13, /* 8+ */ 17, 13, /* 12+ */ 34, 30, /* 20+ */ 28, 26, 31, 30)
1669 F20(control_index, /* 4+ */ 12, 8, /* 8+ */ 12, 8, /* 12+ */ 28, 24, /* 20+ */ 22, 18)
1670 F20(swsb, /* 4+ */ -1, -1, /* 8+ */ -1, -1, /* 12+ */ 15, 8, /* 20+ */ 17, 8)
1671 F(debug_control, /* 4+ */ 7, 7, /* 12+ */ 7, 7)
1672 F(hw_opcode, /* 4+ */ 6, 0, /* 12+ */ 6, 0) /* Same location as elk_inst */
1673
1674 static inline unsigned
elk_compact_inst_imm(const struct intel_device_info * devinfo,const elk_compact_inst * inst)1675 elk_compact_inst_imm(const struct intel_device_info *devinfo,
1676 const elk_compact_inst *inst)
1677 {
1678 if (devinfo->ver >= 12) {
1679 return elk_compact_inst_bits(inst, 63, 52);
1680 } else {
1681 return (elk_compact_inst_bits(inst, 39, 35) << 8) |
1682 (elk_compact_inst_bits(inst, 63, 56));
1683 }
1684 }
1685
1686 /**
1687 * (Gfx8+) Compacted three-source instructions:
1688 * @{
1689 */
1690 FC(3src_src2_reg_nr, /* 4+ */ 63, 57, /* 12+ */ 55, 48, devinfo->ver >= 8)
1691 FC(3src_src1_reg_nr, /* 4+ */ 56, 50, /* 12+ */ 63, 56, devinfo->ver >= 8)
1692 FC(3src_src0_reg_nr, /* 4+ */ 49, 43, /* 12+ */ 47, 40, devinfo->ver >= 8)
1693 FC(3src_src2_subreg_nr, /* 4+ */ 42, 40, /* 12+ */ -1, -1, devinfo->ver >= 8)
1694 FC(3src_src1_subreg_nr, /* 4+ */ 39, 37, /* 12+ */ -1, -1, devinfo->ver >= 8)
1695 FC(3src_src0_subreg_nr, /* 4+ */ 36, 34, /* 12+ */ -1, -1, devinfo->ver >= 8)
1696 FC(3src_src2_rep_ctrl, /* 4+ */ 33, 33, /* 12+ */ -1, -1, devinfo->ver >= 8)
1697 FC(3src_src1_rep_ctrl, /* 4+ */ 32, 32, /* 12+ */ -1, -1, devinfo->ver >= 8)
1698 FC(3src_saturate, /* 4+ */ 31, 31, /* 12+ */ -1, -1, devinfo->ver >= 8)
1699 FC(3src_debug_control, /* 4+ */ 30, 30, /* 12+ */ 7, 7, devinfo->ver >= 8)
1700 FC(3src_cmpt_control, /* 4+ */ 29, 29, /* 12+ */ 29, 29, devinfo->ver >= 8)
1701 FC(3src_src0_rep_ctrl, /* 4+ */ 28, 28, /* 12+ */ -1, -1, devinfo->ver >= 8)
1702 /* Reserved */
1703 F20(3src_dst_reg_nr, /* 4+ */ 18, 12, /* 8+ */ 18, 12, /* 12+ */ 23, 16, /* 20+ */ 39, 32)
1704 F20(3src_source_index, /* 4+ */ -1, -1, /* 8+ */ 11, 10, /* 12+ */ 34, 30, /* 20+ */ 25, 22)
1705 FD20(3src_subreg_index, /* 4+ */ -1, -1, /* 8+ */ -1, -1, /* 12+ */ 39, 35, /* 20+ */ 28, 26, 31, 30)
1706 F20(3src_control_index, /* 4+ */ -1, -1, /* 8+ */ 9, 8, /* 12+ */ 28, 24, /* 20+ */ 21, 18)
1707 F20(3src_swsb, /* 4+ */ -1, -1, /* 8+ */ -1, -1, /* 12+ */ 15, 8, /* 20+ */ 17, 8)
1708 /* Bit 7 is Reserved (for future Opcode expansion) */
1709 FC(3src_hw_opcode, /* 4+ */ 6, 0, /* 12+ */ 6, 0, devinfo->ver >= 8)
1710 /** @} */
1711
1712 #undef F
1713
1714 static inline void
elk_inst_set_opcode(const struct elk_isa_info * isa,struct elk_inst * inst,enum elk_opcode opcode)1715 elk_inst_set_opcode(const struct elk_isa_info *isa,
1716 struct elk_inst *inst, enum elk_opcode opcode)
1717 {
1718 elk_inst_set_hw_opcode(isa->devinfo, inst, elk_opcode_encode(isa, opcode));
1719 }
1720
1721 static inline enum elk_opcode
elk_inst_opcode(const struct elk_isa_info * isa,const struct elk_inst * inst)1722 elk_inst_opcode(const struct elk_isa_info *isa,
1723 const struct elk_inst *inst)
1724 {
1725 return elk_opcode_decode(isa, elk_inst_hw_opcode(isa->devinfo, inst));
1726 }
1727
1728 #ifdef __cplusplus
1729 }
1730 #endif
1731
1732 #endif
1733