1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #ifndef ELK_EU_H
34 #define ELK_EU_H
35
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include "elk_inst.h"
39 #include "elk_compiler.h"
40 #include "elk_eu_defines.h"
41 #include "elk_isa_info.h"
42 #include "elk_reg.h"
43
44 #include "util/bitset.h"
45
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49
50 struct elk_disasm_info;
51
52 #define ELK_EU_MAX_INSN_STACK 5
53
54 struct elk_insn_state {
55 /* One of ELK_EXECUTE_* */
56 unsigned exec_size:3;
57
58 /* Group in units of channels */
59 unsigned group:5;
60
61 /* Compression control on gfx4-5 */
62 bool compressed:1;
63
64 /* One of ELK_MASK_* */
65 unsigned mask_control:1;
66
67 /* Scheduling info for Gfx12+ */
68 struct tgl_swsb swsb;
69
70 bool saturate:1;
71
72 /* One of ELK_ALIGN_* */
73 unsigned access_mode:1;
74
75 /* One of ELK_PREDICATE_* */
76 enum elk_predicate predicate:4;
77
78 bool pred_inv:1;
79
80 /* Flag subreg. Bottom bit is subreg, top bit is reg */
81 unsigned flag_subreg:2;
82
83 bool acc_wr_control:1;
84 };
85
86
87 /* A helper for accessing the last instruction emitted. This makes it easy
88 * to set various bits on an instruction without having to create temporary
89 * variable and assign the emitted instruction to those.
90 */
91 #define elk_last_inst (&p->store[p->nr_insn - 1])
92
93 struct elk_codegen {
94 elk_inst *store;
95 int store_size;
96 unsigned nr_insn;
97 unsigned int next_insn_offset;
98
99 void *mem_ctx;
100
101 /* Allow clients to push/pop instruction state:
102 */
103 struct elk_insn_state stack[ELK_EU_MAX_INSN_STACK];
104 struct elk_insn_state *current;
105
106 /** Whether or not the user wants automatic exec sizes
107 *
108 * If true, codegen will try to automatically infer the exec size of an
109 * instruction from the width of the destination register. If false, it
110 * will take whatever is set by elk_set_default_exec_size verbatim.
111 *
112 * This is set to true by default in elk_init_codegen.
113 */
114 bool automatic_exec_sizes;
115
116 bool single_program_flow;
117 const struct elk_isa_info *isa;
118 const struct intel_device_info *devinfo;
119
120 /* Control flow stacks:
121 * - if_stack contains IF and ELSE instructions which must be patched
122 * (and popped) once the matching ENDIF instruction is encountered.
123 *
124 * Just store the instruction pointer(an index).
125 */
126 int *if_stack;
127 int if_stack_depth;
128 int if_stack_array_size;
129
130 /**
131 * loop_stack contains the instruction pointers of the starts of loops which
132 * must be patched (and popped) once the matching WHILE instruction is
133 * encountered.
134 */
135 int *loop_stack;
136 /**
137 * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
138 * blocks they were popping out of, to fix up the mask stack. This tracks
139 * the IF/ENDIF nesting in each current nested loop level.
140 */
141 int *if_depth_in_loop;
142 int loop_stack_depth;
143 int loop_stack_array_size;
144
145 struct elk_shader_reloc *relocs;
146 int num_relocs;
147 int reloc_array_size;
148 };
149
150 struct elk_label {
151 int offset;
152 int number;
153 struct elk_label *next;
154 };
155
156 void elk_pop_insn_state( struct elk_codegen *p );
157 void elk_push_insn_state( struct elk_codegen *p );
158 unsigned elk_get_default_exec_size(struct elk_codegen *p);
159 unsigned elk_get_default_group(struct elk_codegen *p);
160 unsigned elk_get_default_access_mode(struct elk_codegen *p);
161 struct tgl_swsb elk_get_default_swsb(struct elk_codegen *p);
162 void elk_set_default_exec_size(struct elk_codegen *p, unsigned value);
163 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value );
164 void elk_set_default_saturate( struct elk_codegen *p, bool enable );
165 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode );
166 void elk_inst_set_compression(const struct intel_device_info *devinfo,
167 elk_inst *inst, bool on);
168 void elk_set_default_compression(struct elk_codegen *p, bool on);
169 void elk_inst_set_group(const struct intel_device_info *devinfo,
170 elk_inst *inst, unsigned group);
171 void elk_set_default_group(struct elk_codegen *p, unsigned group);
172 void elk_set_default_compression_control(struct elk_codegen *p, enum elk_compression c);
173 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc);
174 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse);
175 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg);
176 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value);
177 void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value);
178
179 void elk_init_codegen(const struct elk_isa_info *isa,
180 struct elk_codegen *p, void *mem_ctx);
181 bool elk_has_jip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
182 bool elk_has_uip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
183 const struct elk_shader_reloc *elk_get_shader_relocs(struct elk_codegen *p,
184 unsigned *num_relocs);
185 const unsigned *elk_get_program( struct elk_codegen *p, unsigned *sz );
186
187 bool elk_should_dump_shader_bin(void);
188 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
189 const char *identifier);
190
191 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
192 const char *identifier);
193
194 void elk_realign(struct elk_codegen *p, unsigned alignment);
195 int elk_append_data(struct elk_codegen *p, void *data,
196 unsigned size, unsigned alignment);
197 elk_inst *elk_next_insn(struct elk_codegen *p, unsigned opcode);
198 void elk_add_reloc(struct elk_codegen *p, uint32_t id,
199 enum elk_shader_reloc_type type,
200 uint32_t offset, uint32_t delta);
201 void elk_set_dest(struct elk_codegen *p, elk_inst *insn, struct elk_reg dest);
202 void elk_set_src0(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
203
204 void elk_gfx6_resolve_implied_move(struct elk_codegen *p,
205 struct elk_reg *src,
206 unsigned msg_reg_nr);
207
208 /* Helpers for regular instructions:
209 */
210 #define ALU1(OP) \
211 elk_inst *elk_##OP(struct elk_codegen *p, \
212 struct elk_reg dest, \
213 struct elk_reg src0);
214
215 #define ALU2(OP) \
216 elk_inst *elk_##OP(struct elk_codegen *p, \
217 struct elk_reg dest, \
218 struct elk_reg src0, \
219 struct elk_reg src1);
220
221 #define ALU3(OP) \
222 elk_inst *elk_##OP(struct elk_codegen *p, \
223 struct elk_reg dest, \
224 struct elk_reg src0, \
225 struct elk_reg src1, \
226 struct elk_reg src2);
227
228 ALU1(MOV)
ALU2(SEL)229 ALU2(SEL)
230 ALU1(NOT)
231 ALU2(AND)
232 ALU2(OR)
233 ALU2(XOR)
234 ALU2(SHR)
235 ALU2(SHL)
236 ALU1(DIM)
237 ALU2(ASR)
238 ALU2(ROL)
239 ALU2(ROR)
240 ALU3(CSEL)
241 ALU1(F32TO16)
242 ALU1(F16TO32)
243 ALU2(ADD)
244 ALU3(ADD3)
245 ALU2(AVG)
246 ALU2(MUL)
247 ALU1(FRC)
248 ALU1(RNDD)
249 ALU1(RNDE)
250 ALU1(RNDU)
251 ALU1(RNDZ)
252 ALU2(MAC)
253 ALU2(MACH)
254 ALU1(LZD)
255 ALU2(DP4)
256 ALU2(DPH)
257 ALU2(DP3)
258 ALU2(DP2)
259 ALU3(DP4A)
260 ALU2(LINE)
261 ALU2(PLN)
262 ALU3(MAD)
263 ALU3(LRP)
264 ALU1(BFREV)
265 ALU3(BFE)
266 ALU2(BFI1)
267 ALU3(BFI2)
268 ALU1(FBH)
269 ALU1(FBL)
270 ALU1(CBIT)
271 ALU2(ADDC)
272 ALU2(SUBB)
273
274 #undef ALU1
275 #undef ALU2
276 #undef ALU3
277
278 static inline unsigned
279 reg_unit(const struct intel_device_info *devinfo)
280 {
281 return devinfo->ver >= 20 ? 2 : 1;
282 }
283
284
285 /* Helpers for SEND instruction:
286 */
287
288 /**
289 * Construct a message descriptor immediate with the specified common
290 * descriptor controls.
291 */
292 static inline uint32_t
elk_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)293 elk_message_desc(const struct intel_device_info *devinfo,
294 unsigned msg_length,
295 unsigned response_length,
296 bool header_present)
297 {
298 if (devinfo->ver >= 5) {
299 assert(msg_length % reg_unit(devinfo) == 0);
300 assert(response_length % reg_unit(devinfo) == 0);
301 return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
302 SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
303 SET_BITS(header_present, 19, 19));
304 } else {
305 return (SET_BITS(msg_length, 23, 20) |
306 SET_BITS(response_length, 19, 16));
307 }
308 }
309
310 static inline unsigned
elk_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)311 elk_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
312 {
313 if (devinfo->ver >= 5)
314 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
315 else
316 return GET_BITS(desc, 23, 20);
317 }
318
319 static inline unsigned
elk_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)320 elk_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
321 {
322 if (devinfo->ver >= 5)
323 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
324 else
325 return GET_BITS(desc, 19, 16);
326 }
327
328 static inline bool
elk_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)329 elk_message_desc_header_present(ASSERTED
330 const struct intel_device_info *devinfo,
331 uint32_t desc)
332 {
333 assert(devinfo->ver >= 5);
334 return GET_BITS(desc, 19, 19);
335 }
336
337 static inline unsigned
elk_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)338 elk_message_ex_desc(const struct intel_device_info *devinfo,
339 unsigned ex_msg_length)
340 {
341 assert(ex_msg_length % reg_unit(devinfo) == 0);
342 return SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
343 }
344
345 static inline unsigned
elk_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)346 elk_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
347 uint32_t ex_desc)
348 {
349 return GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
350 }
351
352 static inline uint32_t
elk_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)353 elk_urb_desc(const struct intel_device_info *devinfo,
354 unsigned msg_type,
355 bool per_slot_offset_present,
356 bool channel_mask_present,
357 unsigned global_offset)
358 {
359 if (devinfo->ver >= 8) {
360 return (SET_BITS(per_slot_offset_present, 17, 17) |
361 SET_BITS(channel_mask_present, 15, 15) |
362 SET_BITS(global_offset, 14, 4) |
363 SET_BITS(msg_type, 3, 0));
364 } else if (devinfo->ver >= 7) {
365 assert(!channel_mask_present);
366 return (SET_BITS(per_slot_offset_present, 16, 16) |
367 SET_BITS(global_offset, 13, 3) |
368 SET_BITS(msg_type, 3, 0));
369 } else {
370 unreachable("unhandled URB write generation");
371 }
372 }
373
374 static inline uint32_t
elk_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)375 elk_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
376 uint32_t desc)
377 {
378 assert(devinfo->ver >= 7);
379 return GET_BITS(desc, 3, 0);
380 }
381
382 static inline uint32_t
elk_urb_fence_desc(const struct intel_device_info * devinfo)383 elk_urb_fence_desc(const struct intel_device_info *devinfo)
384 {
385 assert(devinfo->has_lsc);
386 return elk_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
387 }
388
389 /**
390 * Construct a message descriptor immediate with the specified sampler
391 * function controls.
392 */
393 static inline uint32_t
elk_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)394 elk_sampler_desc(const struct intel_device_info *devinfo,
395 unsigned binding_table_index,
396 unsigned sampler,
397 unsigned msg_type,
398 unsigned simd_mode,
399 unsigned return_format)
400 {
401 const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
402 SET_BITS(sampler, 11, 8));
403
404 /* From GFX20 Bspec: Shared Functions - Message Descriptor -
405 * Sampling Engine:
406 *
407 * Message Type[5] 31 This bit represents the upper bit of message type
408 * 6-bit encoding (c.f. [16:12]). This bit is set
409 * for messages with programmable offsets.
410 */
411 if (devinfo->ver >= 20)
412 return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
413 SET_BITS(simd_mode & 0x3, 18, 17) |
414 SET_BITS(simd_mode >> 2, 29, 29) |
415 SET_BITS(return_format, 30, 30) |
416 SET_BITS(msg_type >> 5, 31, 31);
417
418 /* From the CHV Bspec: Shared Functions - Message Descriptor -
419 * Sampling Engine:
420 *
421 * SIMD Mode[2] 29 This field is the upper bit of the 3-bit
422 * SIMD Mode field.
423 */
424 if (devinfo->ver >= 8)
425 return desc | SET_BITS(msg_type, 16, 12) |
426 SET_BITS(simd_mode & 0x3, 18, 17) |
427 SET_BITS(simd_mode >> 2, 29, 29) |
428 SET_BITS(return_format, 30, 30);
429 if (devinfo->ver >= 7)
430 return (desc | SET_BITS(msg_type, 16, 12) |
431 SET_BITS(simd_mode, 18, 17));
432 else if (devinfo->ver >= 5)
433 return (desc | SET_BITS(msg_type, 15, 12) |
434 SET_BITS(simd_mode, 17, 16));
435 else if (devinfo->verx10 >= 45)
436 return desc | SET_BITS(msg_type, 15, 12);
437 else
438 return (desc | SET_BITS(return_format, 13, 12) |
439 SET_BITS(msg_type, 15, 14));
440 }
441
442 static inline unsigned
elk_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)443 elk_sampler_desc_binding_table_index(UNUSED
444 const struct intel_device_info *devinfo,
445 uint32_t desc)
446 {
447 return GET_BITS(desc, 7, 0);
448 }
449
450 static inline unsigned
elk_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)451 elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
452 uint32_t desc)
453 {
454 return GET_BITS(desc, 11, 8);
455 }
456
457 static inline unsigned
elk_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)458 elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
459 {
460 if (devinfo->ver >= 20)
461 return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
462 else if (devinfo->ver >= 7)
463 return GET_BITS(desc, 16, 12);
464 else if (devinfo->verx10 >= 45)
465 return GET_BITS(desc, 15, 12);
466 else
467 return GET_BITS(desc, 15, 14);
468 }
469
470 static inline unsigned
elk_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)471 elk_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
472 uint32_t desc)
473 {
474 assert(devinfo->ver >= 5);
475 if (devinfo->ver >= 8)
476 return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
477 else if (devinfo->ver >= 7)
478 return GET_BITS(desc, 18, 17);
479 else
480 return GET_BITS(desc, 17, 16);
481 }
482
483 static inline unsigned
elk_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)484 elk_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
485 uint32_t desc)
486 {
487 assert(devinfo->verx10 == 40 || devinfo->ver >= 8);
488 if (devinfo->ver >= 8)
489 return GET_BITS(desc, 30, 30);
490 else
491 return GET_BITS(desc, 13, 12);
492 }
493
494 /**
495 * Construct a message descriptor for the dataport
496 */
497 static inline uint32_t
elk_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)498 elk_dp_desc(const struct intel_device_info *devinfo,
499 unsigned binding_table_index,
500 unsigned msg_type,
501 unsigned msg_control)
502 {
503 /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
504 * helpers instead.
505 */
506 assert(devinfo->ver >= 6);
507 const unsigned desc = SET_BITS(binding_table_index, 7, 0);
508 if (devinfo->ver >= 8) {
509 return (desc | SET_BITS(msg_control, 13, 8) |
510 SET_BITS(msg_type, 18, 14));
511 } else if (devinfo->ver >= 7) {
512 return (desc | SET_BITS(msg_control, 13, 8) |
513 SET_BITS(msg_type, 17, 14));
514 } else {
515 return (desc | SET_BITS(msg_control, 12, 8) |
516 SET_BITS(msg_type, 16, 13));
517 }
518 }
519
520 static inline unsigned
elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)521 elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
522 uint32_t desc)
523 {
524 return GET_BITS(desc, 7, 0);
525 }
526
527 static inline unsigned
elk_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)528 elk_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
529 {
530 assert(devinfo->ver >= 6);
531 if (devinfo->ver >= 8)
532 return GET_BITS(desc, 18, 14);
533 else if (devinfo->ver >= 7)
534 return GET_BITS(desc, 17, 14);
535 else
536 return GET_BITS(desc, 16, 13);
537 }
538
539 static inline unsigned
elk_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)540 elk_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
541 {
542 assert(devinfo->ver >= 6);
543 if (devinfo->ver >= 7)
544 return GET_BITS(desc, 13, 8);
545 else
546 return GET_BITS(desc, 12, 8);
547 }
548
549 /**
550 * Construct a message descriptor immediate with the specified dataport read
551 * function controls.
552 */
553 static inline uint32_t
elk_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)554 elk_dp_read_desc(const struct intel_device_info *devinfo,
555 unsigned binding_table_index,
556 unsigned msg_control,
557 unsigned msg_type,
558 unsigned target_cache)
559 {
560 if (devinfo->ver >= 6)
561 return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
562 else if (devinfo->verx10 >= 45)
563 return (SET_BITS(binding_table_index, 7, 0) |
564 SET_BITS(msg_control, 10, 8) |
565 SET_BITS(msg_type, 13, 11) |
566 SET_BITS(target_cache, 15, 14));
567 else
568 return (SET_BITS(binding_table_index, 7, 0) |
569 SET_BITS(msg_control, 11, 8) |
570 SET_BITS(msg_type, 13, 12) |
571 SET_BITS(target_cache, 15, 14));
572 }
573
574 static inline unsigned
elk_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)575 elk_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
576 uint32_t desc)
577 {
578 if (devinfo->ver >= 6)
579 return elk_dp_desc_msg_type(devinfo, desc);
580 else if (devinfo->verx10 >= 45)
581 return GET_BITS(desc, 13, 11);
582 else
583 return GET_BITS(desc, 13, 12);
584 }
585
586 static inline unsigned
elk_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)587 elk_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
588 uint32_t desc)
589 {
590 if (devinfo->ver >= 6)
591 return elk_dp_desc_msg_control(devinfo, desc);
592 else if (devinfo->verx10 >= 45)
593 return GET_BITS(desc, 10, 8);
594 else
595 return GET_BITS(desc, 11, 8);
596 }
597
598 /**
599 * Construct a message descriptor immediate with the specified dataport write
600 * function controls.
601 */
602 static inline uint32_t
elk_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)603 elk_dp_write_desc(const struct intel_device_info *devinfo,
604 unsigned binding_table_index,
605 unsigned msg_control,
606 unsigned msg_type,
607 unsigned send_commit_msg)
608 {
609 assert(devinfo->ver <= 6 || !send_commit_msg);
610 if (devinfo->ver >= 6) {
611 return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
612 SET_BITS(send_commit_msg, 17, 17);
613 } else {
614 return (SET_BITS(binding_table_index, 7, 0) |
615 SET_BITS(msg_control, 11, 8) |
616 SET_BITS(msg_type, 14, 12) |
617 SET_BITS(send_commit_msg, 15, 15));
618 }
619 }
620
621 static inline unsigned
elk_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)622 elk_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
623 uint32_t desc)
624 {
625 if (devinfo->ver >= 6)
626 return elk_dp_desc_msg_type(devinfo, desc);
627 else
628 return GET_BITS(desc, 14, 12);
629 }
630
631 static inline unsigned
elk_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)632 elk_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
633 uint32_t desc)
634 {
635 if (devinfo->ver >= 6)
636 return elk_dp_desc_msg_control(devinfo, desc);
637 else
638 return GET_BITS(desc, 11, 8);
639 }
640
641 static inline bool
elk_dp_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)642 elk_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
643 uint32_t desc)
644 {
645 assert(devinfo->ver <= 6);
646 if (devinfo->ver >= 6)
647 return GET_BITS(desc, 17, 17);
648 else
649 return GET_BITS(desc, 15, 15);
650 }
651
652 /**
653 * Construct a message descriptor immediate with the specified dataport
654 * surface function controls.
655 */
656 static inline uint32_t
elk_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)657 elk_dp_surface_desc(const struct intel_device_info *devinfo,
658 unsigned msg_type,
659 unsigned msg_control)
660 {
661 assert(devinfo->ver >= 7);
662 /* We'll OR in the binding table index later */
663 return elk_dp_desc(devinfo, 0, msg_type, msg_control);
664 }
665
666 static inline uint32_t
elk_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)667 elk_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
668 unsigned exec_size, /**< 0 for SIMD4x2 */
669 unsigned atomic_op,
670 bool response_expected)
671 {
672 assert(exec_size <= 8 || exec_size == 16);
673
674 unsigned msg_type;
675 if (devinfo->verx10 >= 75) {
676 if (exec_size > 0) {
677 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
678 } else {
679 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
680 }
681 } else {
682 msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
683 }
684
685 const unsigned msg_control =
686 SET_BITS(atomic_op, 3, 0) |
687 SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
688 SET_BITS(response_expected, 5, 5);
689
690 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
691 }
692
693 static inline uint32_t
elk_dp_untyped_atomic_float_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)694 elk_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
695 unsigned exec_size,
696 unsigned atomic_op,
697 bool response_expected)
698 {
699 assert(exec_size <= 8 || exec_size == 16);
700 assert(devinfo->ver >= 9);
701
702 assert(exec_size > 0);
703 const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
704
705 const unsigned msg_control =
706 SET_BITS(atomic_op, 1, 0) |
707 SET_BITS(exec_size <= 8, 4, 4) |
708 SET_BITS(response_expected, 5, 5);
709
710 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
711 }
712
713 static inline unsigned
elk_mdc_cmask(unsigned num_channels)714 elk_mdc_cmask(unsigned num_channels)
715 {
716 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
717 return 0xf & (0xf << num_channels);
718 }
719
720 static inline unsigned
lsc_cmask(unsigned num_channels)721 lsc_cmask(unsigned num_channels)
722 {
723 assert(num_channels > 0 && num_channels <= 4);
724 return BITSET_MASK(num_channels);
725 }
726
727 static inline uint32_t
elk_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)728 elk_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
729 unsigned exec_size, /**< 0 for SIMD4x2 */
730 unsigned num_channels,
731 bool write)
732 {
733 assert(exec_size <= 8 || exec_size == 16);
734
735 unsigned msg_type;
736 if (write) {
737 if (devinfo->verx10 >= 75) {
738 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
739 } else {
740 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
741 }
742 } else {
743 /* Read */
744 if (devinfo->verx10 >= 75) {
745 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
746 } else {
747 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
748 }
749 }
750
751 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
752 if (write && devinfo->verx10 == 70 && exec_size == 0)
753 exec_size = 8;
754
755 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
756 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
757 exec_size <= 8 ? 2 : 1;
758
759 const unsigned msg_control =
760 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
761 SET_BITS(simd_mode, 5, 4);
762
763 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
764 }
765
766 static inline unsigned
elk_mdc_ds(unsigned bit_size)767 elk_mdc_ds(unsigned bit_size)
768 {
769 switch (bit_size) {
770 case 8:
771 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
772 case 16:
773 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
774 case 32:
775 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
776 default:
777 unreachable("Unsupported bit_size for byte scattered messages");
778 }
779 }
780
781 static inline uint32_t
elk_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)782 elk_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
783 unsigned exec_size,
784 unsigned bit_size,
785 bool write)
786 {
787 assert(exec_size <= 8 || exec_size == 16);
788
789 assert(devinfo->verx10 >= 75);
790 const unsigned msg_type =
791 write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
792 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
793
794 assert(exec_size > 0);
795 const unsigned msg_control =
796 SET_BITS(exec_size == 16, 0, 0) |
797 SET_BITS(elk_mdc_ds(bit_size), 3, 2);
798
799 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
800 }
801
802 static inline uint32_t
elk_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)803 elk_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
804 unsigned exec_size,
805 bool write)
806 {
807 assert(exec_size == 8 || exec_size == 16);
808
809 unsigned msg_type;
810 if (write) {
811 if (devinfo->ver >= 6) {
812 msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
813 } else {
814 msg_type = ELK_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
815 }
816 } else {
817 if (devinfo->ver >= 7) {
818 msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
819 } else if (devinfo->verx10 >= 45) {
820 msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
821 } else {
822 msg_type = ELK_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
823 }
824 }
825
826 const unsigned msg_control =
827 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
828 SET_BITS(exec_size == 16, 0, 0);
829
830 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
831 }
832
833 static inline uint32_t
elk_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)834 elk_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
835 bool align_16B,
836 unsigned num_dwords,
837 bool write)
838 {
839 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
840 assert(!write || align_16B);
841
842 const unsigned msg_type =
843 write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
844 align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
845 GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
846
847 const unsigned msg_control =
848 SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
849
850 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
851 }
852
853 static inline uint32_t
elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)854 elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
855 unsigned exec_size, /**< 0 for SIMD4x2 */
856 unsigned num_channels,
857 bool write)
858 {
859 assert(exec_size <= 8 || exec_size == 16);
860 assert(devinfo->ver >= 8);
861
862 unsigned msg_type =
863 write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
864 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
865
866 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
867 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
868 exec_size <= 8 ? 2 : 1;
869
870 const unsigned msg_control =
871 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
872 SET_BITS(simd_mode, 5, 4);
873
874 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
875 msg_type, msg_control);
876 }
877
878 static inline uint32_t
elk_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)879 elk_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
880 bool align_16B,
881 unsigned num_dwords,
882 bool write)
883 {
884 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
885 assert(!write || align_16B);
886
887 unsigned msg_type =
888 write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
889 GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
890
891 unsigned msg_control =
892 SET_BITS(!align_16B, 4, 3) |
893 SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
894
895 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
896 msg_type, msg_control);
897 }
898
899 /**
900 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
901 * Skylake PRM).
902 */
903 static inline uint32_t
elk_mdc_a64_ds(unsigned elems)904 elk_mdc_a64_ds(unsigned elems)
905 {
906 switch (elems) {
907 case 1: return 0;
908 case 2: return 1;
909 case 4: return 2;
910 case 8: return 3;
911 default:
912 unreachable("Unsupported elmeent count for A64 scattered message");
913 }
914 }
915
916 static inline uint32_t
elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)917 elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
918 unsigned exec_size, /**< 0 for SIMD4x2 */
919 unsigned bit_size,
920 bool write)
921 {
922 assert(exec_size <= 8 || exec_size == 16);
923 assert(devinfo->ver >= 8);
924
925 unsigned msg_type =
926 write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
927 GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
928
929 const unsigned msg_control =
930 SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
931 SET_BITS(elk_mdc_a64_ds(bit_size / 8), 3, 2) |
932 SET_BITS(exec_size == 16, 4, 4);
933
934 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
935 msg_type, msg_control);
936 }
937
938 static inline uint32_t
elk_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)939 elk_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
940 ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
941 unsigned bit_size,
942 unsigned atomic_op,
943 bool response_expected)
944 {
945 assert(exec_size == 8);
946 assert(devinfo->ver >= 8);
947 assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
948 assert(devinfo->ver >= 12 || bit_size >= 32);
949
950 const unsigned msg_type = bit_size == 16 ?
951 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
952 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
953
954 const unsigned msg_control =
955 SET_BITS(atomic_op, 3, 0) |
956 SET_BITS(bit_size == 64, 4, 4) |
957 SET_BITS(response_expected, 5, 5);
958
959 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
960 msg_type, msg_control);
961 }
962
963 static inline uint32_t
elk_dp_a64_untyped_atomic_float_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)964 elk_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
965 ASSERTED unsigned exec_size,
966 unsigned bit_size,
967 unsigned atomic_op,
968 bool response_expected)
969 {
970 assert(exec_size == 8);
971 assert(devinfo->ver >= 9);
972 assert(bit_size == 16 || bit_size == 32);
973 assert(devinfo->ver >= 12 || bit_size == 32);
974
975 assert(exec_size > 0);
976 const unsigned msg_type = bit_size == 32 ?
977 GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
978 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
979
980 const unsigned msg_control =
981 SET_BITS(atomic_op, 1, 0) |
982 SET_BITS(response_expected, 5, 5);
983
984 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
985 msg_type, msg_control);
986 }
987
988 static inline uint32_t
elk_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)989 elk_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
990 unsigned exec_size,
991 unsigned exec_group,
992 unsigned atomic_op,
993 bool response_expected)
994 {
995 assert(exec_size > 0 || exec_group == 0);
996 assert(exec_group % 8 == 0);
997
998 unsigned msg_type;
999 if (devinfo->verx10 >= 75) {
1000 if (exec_size == 0) {
1001 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
1002 } else {
1003 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
1004 }
1005 } else {
1006 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1007 assert(exec_size > 0);
1008 msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
1009 }
1010
1011 const bool high_sample_mask = (exec_group / 8) % 2 == 1;
1012
1013 const unsigned msg_control =
1014 SET_BITS(atomic_op, 3, 0) |
1015 SET_BITS(high_sample_mask, 4, 4) |
1016 SET_BITS(response_expected, 5, 5);
1017
1018 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
1019 }
1020
1021 static inline uint32_t
elk_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)1022 elk_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
1023 unsigned exec_size,
1024 unsigned exec_group,
1025 unsigned num_channels,
1026 bool write)
1027 {
1028 assert(exec_size > 0 || exec_group == 0);
1029 assert(exec_group % 8 == 0);
1030
1031 /* Typed surface reads and writes don't support SIMD16 */
1032 assert(exec_size <= 8);
1033
1034 unsigned msg_type;
1035 if (write) {
1036 if (devinfo->verx10 >= 75) {
1037 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
1038 } else {
1039 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
1040 }
1041 } else {
1042 if (devinfo->verx10 >= 75) {
1043 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
1044 } else {
1045 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
1046 }
1047 }
1048
1049 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1050 unsigned msg_control;
1051 if (devinfo->verx10 >= 75) {
1052 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1053 const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
1054 1 + ((exec_group / 8) % 2);
1055
1056 msg_control =
1057 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
1058 SET_BITS(slot_group, 5, 4);
1059 } else {
1060 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1061 assert(exec_size > 0);
1062 const unsigned slot_group = ((exec_group / 8) % 2);
1063
1064 msg_control =
1065 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
1066 SET_BITS(slot_group, 5, 5);
1067 }
1068
1069 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
1070 }
1071
1072 static inline uint32_t
elk_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)1073 elk_fb_desc(const struct intel_device_info *devinfo,
1074 unsigned binding_table_index,
1075 unsigned msg_type,
1076 unsigned msg_control)
1077 {
1078 /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1079 * helpers instead.
1080 */
1081 assert(devinfo->ver >= 6);
1082 const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1083 if (devinfo->ver >= 7) {
1084 return (desc | SET_BITS(msg_control, 13, 8) |
1085 SET_BITS(msg_type, 17, 14));
1086 } else {
1087 return (desc | SET_BITS(msg_control, 12, 8) |
1088 SET_BITS(msg_type, 16, 13));
1089 }
1090 }
1091
1092 static inline unsigned
elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1093 elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1094 uint32_t desc)
1095 {
1096 return GET_BITS(desc, 7, 0);
1097 }
1098
1099 static inline uint32_t
elk_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1100 elk_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1101 {
1102 assert(devinfo->ver >= 6);
1103 if (devinfo->ver >= 7)
1104 return GET_BITS(desc, 13, 8);
1105 else
1106 return GET_BITS(desc, 12, 8);
1107 }
1108
1109 static inline unsigned
elk_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1110 elk_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1111 {
1112 assert(devinfo->ver >= 6);
1113 if (devinfo->ver >= 7)
1114 return GET_BITS(desc, 17, 14);
1115 else
1116 return GET_BITS(desc, 16, 13);
1117 }
1118
1119 static inline uint32_t
elk_fb_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned exec_size,bool per_sample)1120 elk_fb_read_desc(const struct intel_device_info *devinfo,
1121 unsigned binding_table_index,
1122 unsigned msg_control,
1123 unsigned exec_size,
1124 bool per_sample)
1125 {
1126 assert(devinfo->ver >= 9);
1127 assert(exec_size == 8 || exec_size == 16);
1128
1129 return elk_fb_desc(devinfo, binding_table_index,
1130 GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
1131 SET_BITS(per_sample, 13, 13) |
1132 SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
1133 }
1134
1135 static inline uint32_t
elk_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)1136 elk_fb_write_desc(const struct intel_device_info *devinfo,
1137 unsigned binding_table_index,
1138 unsigned msg_control,
1139 bool last_render_target,
1140 bool coarse_write)
1141 {
1142 const unsigned msg_type =
1143 devinfo->ver >= 6 ?
1144 GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1145 ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1146
1147 assert(devinfo->ver >= 10 || !coarse_write);
1148
1149 if (devinfo->ver >= 6) {
1150 return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1151 SET_BITS(last_render_target, 12, 12) |
1152 SET_BITS(coarse_write, 18, 18);
1153 } else {
1154 return (SET_BITS(binding_table_index, 7, 0) |
1155 SET_BITS(msg_control, 11, 8) |
1156 SET_BITS(last_render_target, 11, 11) |
1157 SET_BITS(msg_type, 14, 12));
1158 }
1159 }
1160
1161 static inline unsigned
elk_fb_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1162 elk_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1163 uint32_t desc)
1164 {
1165 if (devinfo->ver >= 6)
1166 return elk_fb_desc_msg_type(devinfo, desc);
1167 else
1168 return GET_BITS(desc, 14, 12);
1169 }
1170
1171 static inline unsigned
elk_fb_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1172 elk_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1173 uint32_t desc)
1174 {
1175 if (devinfo->ver >= 6)
1176 return elk_fb_desc_msg_control(devinfo, desc);
1177 else
1178 return GET_BITS(desc, 11, 8);
1179 }
1180
1181 static inline bool
elk_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)1182 elk_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1183 uint32_t desc)
1184 {
1185 if (devinfo->ver >= 6)
1186 return GET_BITS(desc, 12, 12);
1187 else
1188 return GET_BITS(desc, 11, 11);
1189 }
1190
1191 static inline bool
elk_fb_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)1192 elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1193 uint32_t desc)
1194 {
1195 assert(devinfo->ver <= 6);
1196 if (devinfo->ver >= 6)
1197 return GET_BITS(desc, 17, 17);
1198 else
1199 return GET_BITS(desc, 15, 15);
1200 }
1201
1202 static inline bool
elk_fb_write_desc_coarse_write(const struct intel_device_info * devinfo,uint32_t desc)1203 elk_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
1204 uint32_t desc)
1205 {
1206 assert(devinfo->ver >= 10);
1207 return GET_BITS(desc, 18, 18);
1208 }
1209
1210 static inline bool
elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)1211 elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)
1212 {
1213 return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1214 }
1215
1216 static inline bool
elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)1217 elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)
1218 {
1219 return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1220 }
1221
1222 static inline bool
elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)1223 elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)
1224 {
1225 return opcode == LSC_OP_STORE ||
1226 opcode == LSC_OP_STORE_CMASK;
1227 }
1228
1229 static inline bool
elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)1230 elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)
1231 {
1232 switch (opcode) {
1233 case LSC_OP_ATOMIC_INC:
1234 case LSC_OP_ATOMIC_DEC:
1235 case LSC_OP_ATOMIC_LOAD:
1236 case LSC_OP_ATOMIC_STORE:
1237 case LSC_OP_ATOMIC_ADD:
1238 case LSC_OP_ATOMIC_SUB:
1239 case LSC_OP_ATOMIC_MIN:
1240 case LSC_OP_ATOMIC_MAX:
1241 case LSC_OP_ATOMIC_UMIN:
1242 case LSC_OP_ATOMIC_UMAX:
1243 case LSC_OP_ATOMIC_CMPXCHG:
1244 case LSC_OP_ATOMIC_FADD:
1245 case LSC_OP_ATOMIC_FSUB:
1246 case LSC_OP_ATOMIC_FMIN:
1247 case LSC_OP_ATOMIC_FMAX:
1248 case LSC_OP_ATOMIC_FCMPXCHG:
1249 case LSC_OP_ATOMIC_AND:
1250 case LSC_OP_ATOMIC_OR:
1251 case LSC_OP_ATOMIC_XOR:
1252 return true;
1253
1254 default:
1255 return false;
1256 }
1257 }
1258
1259 static inline bool
elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)1260 elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)
1261 {
1262 switch (opcode) {
1263 case LSC_OP_ATOMIC_FADD:
1264 case LSC_OP_ATOMIC_FSUB:
1265 case LSC_OP_ATOMIC_FMIN:
1266 case LSC_OP_ATOMIC_FMAX:
1267 case LSC_OP_ATOMIC_FCMPXCHG:
1268 return true;
1269
1270 default:
1271 return false;
1272 }
1273 }
1274
1275 static inline unsigned
lsc_op_num_data_values(unsigned _op)1276 lsc_op_num_data_values(unsigned _op)
1277 {
1278 enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1279
1280 switch (op) {
1281 case LSC_OP_ATOMIC_CMPXCHG:
1282 case LSC_OP_ATOMIC_FCMPXCHG:
1283 return 2;
1284 case LSC_OP_ATOMIC_INC:
1285 case LSC_OP_ATOMIC_DEC:
1286 case LSC_OP_LOAD:
1287 case LSC_OP_LOAD_CMASK:
1288 case LSC_OP_FENCE:
1289 /* XXX: actually check docs */
1290 return 0;
1291 default:
1292 return 1;
1293 }
1294 }
1295
1296 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1297 lsc_op_to_legacy_atomic(unsigned _op)
1298 {
1299 enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1300
1301 switch (op) {
1302 case LSC_OP_ATOMIC_INC:
1303 return ELK_AOP_INC;
1304 case LSC_OP_ATOMIC_DEC:
1305 return ELK_AOP_DEC;
1306 case LSC_OP_ATOMIC_STORE:
1307 return ELK_AOP_MOV;
1308 case LSC_OP_ATOMIC_ADD:
1309 return ELK_AOP_ADD;
1310 case LSC_OP_ATOMIC_SUB:
1311 return ELK_AOP_SUB;
1312 case LSC_OP_ATOMIC_MIN:
1313 return ELK_AOP_IMIN;
1314 case LSC_OP_ATOMIC_MAX:
1315 return ELK_AOP_IMAX;
1316 case LSC_OP_ATOMIC_UMIN:
1317 return ELK_AOP_UMIN;
1318 case LSC_OP_ATOMIC_UMAX:
1319 return ELK_AOP_UMAX;
1320 case LSC_OP_ATOMIC_CMPXCHG:
1321 return ELK_AOP_CMPWR;
1322 case LSC_OP_ATOMIC_FADD:
1323 return ELK_AOP_FADD;
1324 case LSC_OP_ATOMIC_FMIN:
1325 return ELK_AOP_FMIN;
1326 case LSC_OP_ATOMIC_FMAX:
1327 return ELK_AOP_FMAX;
1328 case LSC_OP_ATOMIC_FCMPXCHG:
1329 return ELK_AOP_FCMPWR;
1330 case LSC_OP_ATOMIC_AND:
1331 return ELK_AOP_AND;
1332 case LSC_OP_ATOMIC_OR:
1333 return ELK_AOP_OR;
1334 case LSC_OP_ATOMIC_XOR:
1335 return ELK_AOP_XOR;
1336 /* No LSC op maps to ELK_AOP_PREDEC */
1337 case LSC_OP_ATOMIC_LOAD:
1338 case LSC_OP_ATOMIC_FSUB:
1339 unreachable("no corresponding legacy atomic operation");
1340 case LSC_OP_LOAD:
1341 case LSC_OP_LOAD_CMASK:
1342 case LSC_OP_STORE:
1343 case LSC_OP_STORE_CMASK:
1344 case LSC_OP_FENCE:
1345 unreachable("not an atomic op");
1346 }
1347
1348 unreachable("invalid LSC op");
1349 }
1350
1351 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1352 lsc_data_size_bytes(enum lsc_data_size data_size)
1353 {
1354 switch (data_size) {
1355 case LSC_DATA_SIZE_D8:
1356 return 1;
1357 case LSC_DATA_SIZE_D16:
1358 return 2;
1359 case LSC_DATA_SIZE_D32:
1360 case LSC_DATA_SIZE_D8U32:
1361 case LSC_DATA_SIZE_D16U32:
1362 case LSC_DATA_SIZE_D16BF32:
1363 return 4;
1364 case LSC_DATA_SIZE_D64:
1365 return 8;
1366 default:
1367 unreachable("Unsupported data payload size.");
1368 }
1369 }
1370
1371 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1372 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1373 {
1374 switch (addr_size) {
1375 case LSC_ADDR_SIZE_A16: return 2;
1376 case LSC_ADDR_SIZE_A32: return 4;
1377 case LSC_ADDR_SIZE_A64: return 8;
1378 default:
1379 unreachable("Unsupported address size.");
1380 }
1381 }
1382
1383 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1384 lsc_vector_length(enum lsc_vect_size vect_size)
1385 {
1386 switch (vect_size) {
1387 case LSC_VECT_SIZE_V1: return 1;
1388 case LSC_VECT_SIZE_V2: return 2;
1389 case LSC_VECT_SIZE_V3: return 3;
1390 case LSC_VECT_SIZE_V4: return 4;
1391 case LSC_VECT_SIZE_V8: return 8;
1392 case LSC_VECT_SIZE_V16: return 16;
1393 case LSC_VECT_SIZE_V32: return 32;
1394 case LSC_VECT_SIZE_V64: return 64;
1395 default:
1396 unreachable("Unsupported size of vector");
1397 }
1398 }
1399
1400 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1401 lsc_vect_size(unsigned vect_size)
1402 {
1403 switch(vect_size) {
1404 case 1: return LSC_VECT_SIZE_V1;
1405 case 2: return LSC_VECT_SIZE_V2;
1406 case 3: return LSC_VECT_SIZE_V3;
1407 case 4: return LSC_VECT_SIZE_V4;
1408 case 8: return LSC_VECT_SIZE_V8;
1409 case 16: return LSC_VECT_SIZE_V16;
1410 case 32: return LSC_VECT_SIZE_V32;
1411 case 64: return LSC_VECT_SIZE_V64;
1412 default:
1413 unreachable("Unsupported vector size for dataport");
1414 }
1415 }
1416
1417 static inline uint32_t
lsc_msg_desc_wcmask(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest,unsigned cmask)1418 lsc_msg_desc_wcmask(UNUSED const struct intel_device_info *devinfo,
1419 enum elk_lsc_opcode opcode, unsigned simd_size,
1420 enum lsc_addr_surface_type addr_type,
1421 enum lsc_addr_size addr_sz, unsigned num_coordinates,
1422 enum lsc_data_size data_sz, unsigned num_channels,
1423 bool transpose, unsigned cache_ctrl, bool has_dest, unsigned cmask)
1424 {
1425 assert(devinfo->has_lsc);
1426
1427 unsigned dest_length = !has_dest ? 0 :
1428 DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1429 reg_unit(devinfo) * REG_SIZE);
1430
1431 unsigned src0_length =
1432 DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1433 reg_unit(devinfo) * REG_SIZE);
1434
1435 assert(!transpose || elk_lsc_opcode_has_transpose(opcode));
1436
1437 unsigned msg_desc =
1438 SET_BITS(opcode, 5, 0) |
1439 SET_BITS(addr_sz, 8, 7) |
1440 SET_BITS(data_sz, 11, 9) |
1441 SET_BITS(transpose, 15, 15) |
1442 SET_BITS(cache_ctrl, 19, 17) |
1443 SET_BITS(dest_length, 24, 20) |
1444 SET_BITS(src0_length, 28, 25) |
1445 SET_BITS(addr_type, 30, 29);
1446
1447 if (elk_lsc_opcode_has_cmask(opcode))
1448 msg_desc |= SET_BITS(cmask ? cmask : lsc_cmask(num_channels), 15, 12);
1449 else
1450 msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1451
1452 return msg_desc;
1453 }
1454
1455 static inline uint32_t
lsc_msg_desc(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest)1456 lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1457 enum elk_lsc_opcode opcode, unsigned simd_size,
1458 enum lsc_addr_surface_type addr_type,
1459 enum lsc_addr_size addr_sz, unsigned num_coordinates,
1460 enum lsc_data_size data_sz, unsigned num_channels,
1461 bool transpose, unsigned cache_ctrl, bool has_dest)
1462 {
1463 return lsc_msg_desc_wcmask(devinfo, opcode, simd_size, addr_type, addr_sz,
1464 num_coordinates, data_sz, num_channels, transpose, cache_ctrl,
1465 has_dest, 0);
1466 }
1467
1468 static inline enum elk_lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1469 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1470 uint32_t desc)
1471 {
1472 assert(devinfo->has_lsc);
1473 return (enum elk_lsc_opcode) GET_BITS(desc, 5, 0);
1474 }
1475
1476 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1477 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1478 uint32_t desc)
1479 {
1480 assert(devinfo->has_lsc);
1481 return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1482 }
1483
1484 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1485 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1486 uint32_t desc)
1487 {
1488 assert(devinfo->has_lsc);
1489 return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1490 }
1491
1492 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1493 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1494 uint32_t desc)
1495 {
1496 assert(devinfo->has_lsc);
1497 assert(!elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1498 return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1499 }
1500
1501 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1502 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1503 uint32_t desc)
1504 {
1505 assert(devinfo->has_lsc);
1506 assert(elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1507 return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1508 }
1509
1510 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1511 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1512 uint32_t desc)
1513 {
1514 assert(devinfo->has_lsc);
1515 return GET_BITS(desc, 15, 15);
1516 }
1517
1518 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1519 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1520 uint32_t desc)
1521 {
1522 assert(devinfo->has_lsc);
1523 return GET_BITS(desc, 19, 17);
1524 }
1525
1526 static inline unsigned
lsc_msg_desc_dest_len(const struct intel_device_info * devinfo,uint32_t desc)1527 lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1528 uint32_t desc)
1529 {
1530 assert(devinfo->has_lsc);
1531 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
1532 }
1533
1534 static inline unsigned
lsc_msg_desc_src0_len(const struct intel_device_info * devinfo,uint32_t desc)1535 lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1536 uint32_t desc)
1537 {
1538 assert(devinfo->has_lsc);
1539 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
1540 }
1541
1542 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1543 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1544 uint32_t desc)
1545 {
1546 assert(devinfo->has_lsc);
1547 return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1548 }
1549
1550 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1551 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1552 enum lsc_fence_scope scope,
1553 enum lsc_flush_type flush_type,
1554 bool route_to_lsc)
1555 {
1556 assert(devinfo->has_lsc);
1557 return SET_BITS(LSC_OP_FENCE, 5, 0) |
1558 SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1559 SET_BITS(scope, 11, 9) |
1560 SET_BITS(flush_type, 14, 12) |
1561 SET_BITS(route_to_lsc, 18, 18) |
1562 SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1563 }
1564
1565 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1566 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1567 uint32_t desc)
1568 {
1569 assert(devinfo->has_lsc);
1570 return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1571 }
1572
1573 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1574 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1575 uint32_t desc)
1576 {
1577 assert(devinfo->has_lsc);
1578 return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1579 }
1580
1581 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1582 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1583 uint32_t desc)
1584 {
1585 assert(devinfo->has_lsc);
1586 return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1587 }
1588
1589 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1590 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1591 {
1592 assert(devinfo->has_lsc);
1593 return SET_BITS(bti, 31, 24) |
1594 SET_BITS(0, 23, 12); /* base offset */
1595 }
1596
1597 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1598 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1599 uint32_t ex_desc)
1600 {
1601 assert(devinfo->has_lsc);
1602 return GET_BITS(ex_desc, 23, 12);
1603 }
1604
1605 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1606 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1607 uint32_t ex_desc)
1608 {
1609 assert(devinfo->has_lsc);
1610 return GET_BITS(ex_desc, 31, 24);
1611 }
1612
1613 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1614 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1615 uint32_t ex_desc)
1616 {
1617 assert(devinfo->has_lsc);
1618 return GET_BITS(ex_desc, 31, 12);
1619 }
1620
1621 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1622 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1623 unsigned surface_state_index)
1624 {
1625 assert(devinfo->has_lsc);
1626 return SET_BITS(surface_state_index, 31, 6);
1627 }
1628
1629 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1630 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1631 uint32_t ex_desc)
1632 {
1633 assert(devinfo->has_lsc);
1634 return GET_BITS(ex_desc, 31, 6);
1635 }
1636
1637 static inline uint32_t
elk_mdc_sm2(unsigned exec_size)1638 elk_mdc_sm2(unsigned exec_size)
1639 {
1640 assert(exec_size == 8 || exec_size == 16);
1641 return exec_size > 8;
1642 }
1643
1644 static inline uint32_t
elk_mdc_sm2_exec_size(uint32_t sm2)1645 elk_mdc_sm2_exec_size(uint32_t sm2)
1646 {
1647 assert(sm2 <= 1);
1648 return 8 << sm2;
1649 }
1650
1651 static inline uint32_t
elk_btd_spawn_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size,unsigned msg_type)1652 elk_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1653 unsigned exec_size, unsigned msg_type)
1654 {
1655 assert(devinfo->has_ray_tracing);
1656 assert(devinfo->ver < 20 || exec_size == 16);
1657
1658 return SET_BITS(0, 19, 19) | /* No header */
1659 SET_BITS(msg_type, 17, 14) |
1660 SET_BITS(elk_mdc_sm2(exec_size), 8, 8);
1661 }
1662
1663 static inline uint32_t
elk_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1664 elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1665 uint32_t desc)
1666 {
1667 return GET_BITS(desc, 17, 14);
1668 }
1669
1670 static inline uint32_t
elk_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1671 elk_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1672 uint32_t desc)
1673 {
1674 return elk_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1675 }
1676
1677 static inline uint32_t
elk_rt_trace_ray_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size)1678 elk_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1679 unsigned exec_size)
1680 {
1681 assert(devinfo->has_ray_tracing);
1682 assert(devinfo->ver < 20 || exec_size == 16);
1683
1684 return SET_BITS(0, 19, 19) | /* No header */
1685 SET_BITS(0, 17, 14) | /* Message type */
1686 SET_BITS(elk_mdc_sm2(exec_size), 8, 8);
1687 }
1688
1689 /**
1690 * Construct a message descriptor immediate with the specified pixel
1691 * interpolator function controls.
1692 */
1693 static inline uint32_t
elk_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1694 elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1695 unsigned msg_type,
1696 bool noperspective,
1697 bool coarse_pixel_rate,
1698 unsigned exec_size,
1699 unsigned group)
1700 {
1701 assert(exec_size == 8 || exec_size == 16);
1702 const bool simd_mode = exec_size == 16;
1703 const bool slot_group = group >= 16;
1704
1705 assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1706 return (SET_BITS(slot_group, 11, 11) |
1707 SET_BITS(msg_type, 13, 12) |
1708 SET_BITS(!!noperspective, 14, 14) |
1709 SET_BITS(coarse_pixel_rate, 15, 15) |
1710 SET_BITS(simd_mode, 16, 16));
1711 }
1712
1713 void elk_urb_WRITE(struct elk_codegen *p,
1714 struct elk_reg dest,
1715 unsigned msg_reg_nr,
1716 struct elk_reg src0,
1717 enum elk_urb_write_flags flags,
1718 unsigned msg_length,
1719 unsigned response_length,
1720 unsigned offset,
1721 unsigned swizzle);
1722
1723 /**
1724 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1725 * desc. If \p desc is not an immediate it will be transparently loaded to an
1726 * address register using an OR instruction.
1727 */
1728 void
1729 elk_send_indirect_message(struct elk_codegen *p,
1730 unsigned sfid,
1731 struct elk_reg dst,
1732 struct elk_reg payload,
1733 struct elk_reg desc,
1734 unsigned desc_imm,
1735 bool eot);
1736
1737 void
1738 elk_send_indirect_split_message(struct elk_codegen *p,
1739 unsigned sfid,
1740 struct elk_reg dst,
1741 struct elk_reg payload0,
1742 struct elk_reg payload1,
1743 struct elk_reg desc,
1744 unsigned desc_imm,
1745 struct elk_reg ex_desc,
1746 unsigned ex_desc_imm,
1747 bool ex_desc_scratch,
1748 bool ex_bso,
1749 bool eot);
1750
1751 void elk_ff_sync(struct elk_codegen *p,
1752 struct elk_reg dest,
1753 unsigned msg_reg_nr,
1754 struct elk_reg src0,
1755 bool allocate,
1756 unsigned response_length,
1757 bool eot);
1758
1759 void elk_svb_write(struct elk_codegen *p,
1760 struct elk_reg dest,
1761 unsigned msg_reg_nr,
1762 struct elk_reg src0,
1763 unsigned binding_table_index,
1764 bool send_commit_msg);
1765
1766 elk_inst *elk_fb_WRITE(struct elk_codegen *p,
1767 struct elk_reg payload,
1768 struct elk_reg implied_header,
1769 unsigned msg_control,
1770 unsigned binding_table_index,
1771 unsigned msg_length,
1772 unsigned response_length,
1773 bool eot,
1774 bool last_render_target,
1775 bool header_present);
1776
1777 elk_inst *elk_gfx9_fb_READ(struct elk_codegen *p,
1778 struct elk_reg dst,
1779 struct elk_reg payload,
1780 unsigned binding_table_index,
1781 unsigned msg_length,
1782 unsigned response_length,
1783 bool per_sample);
1784
1785 void elk_SAMPLE(struct elk_codegen *p,
1786 struct elk_reg dest,
1787 unsigned msg_reg_nr,
1788 struct elk_reg src0,
1789 unsigned binding_table_index,
1790 unsigned sampler,
1791 unsigned msg_type,
1792 unsigned response_length,
1793 unsigned msg_length,
1794 unsigned header_present,
1795 unsigned simd_mode,
1796 unsigned return_format);
1797
1798 void elk_adjust_sampler_state_pointer(struct elk_codegen *p,
1799 struct elk_reg header,
1800 struct elk_reg sampler_index);
1801
1802 void elk_gfx4_math(struct elk_codegen *p,
1803 struct elk_reg dest,
1804 unsigned function,
1805 unsigned msg_reg_nr,
1806 struct elk_reg src,
1807 unsigned precision );
1808
1809 void elk_gfx6_math(struct elk_codegen *p,
1810 struct elk_reg dest,
1811 unsigned function,
1812 struct elk_reg src0,
1813 struct elk_reg src1);
1814
1815 void elk_oword_block_read(struct elk_codegen *p,
1816 struct elk_reg dest,
1817 struct elk_reg mrf,
1818 uint32_t offset,
1819 uint32_t bind_table_index);
1820
1821 unsigned elk_scratch_surface_idx(const struct elk_codegen *p);
1822
1823 void elk_oword_block_read_scratch(struct elk_codegen *p,
1824 struct elk_reg dest,
1825 struct elk_reg mrf,
1826 int num_regs,
1827 unsigned offset);
1828
1829 void elk_oword_block_write_scratch(struct elk_codegen *p,
1830 struct elk_reg mrf,
1831 int num_regs,
1832 unsigned offset);
1833
1834 void elk_gfx7_block_read_scratch(struct elk_codegen *p,
1835 struct elk_reg dest,
1836 int num_regs,
1837 unsigned offset);
1838
1839 /**
1840 * Return the generation-specific jump distance scaling factor.
1841 *
1842 * Given the number of instructions to jump, we need to scale by
1843 * some number to obtain the actual jump distance to program in an
1844 * instruction.
1845 */
1846 static inline unsigned
elk_jump_scale(const struct intel_device_info * devinfo)1847 elk_jump_scale(const struct intel_device_info *devinfo)
1848 {
1849 /* Broadwell measures jump targets in bytes. */
1850 if (devinfo->ver >= 8)
1851 return 16;
1852
1853 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1854 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1855 */
1856 if (devinfo->ver >= 5)
1857 return 2;
1858
1859 /* Gfx4 simply uses the number of 128-bit instructions. */
1860 return 1;
1861 }
1862
1863 void elk_barrier(struct elk_codegen *p, struct elk_reg src);
1864
1865 /* If/else/endif. Works by manipulating the execution flags on each
1866 * channel.
1867 */
1868 elk_inst *elk_IF(struct elk_codegen *p, unsigned execute_size);
1869 elk_inst *elk_gfx6_IF(struct elk_codegen *p, enum elk_conditional_mod conditional,
1870 struct elk_reg src0, struct elk_reg src1);
1871
1872 void elk_ELSE(struct elk_codegen *p);
1873 void elk_ENDIF(struct elk_codegen *p);
1874
1875 /* DO/WHILE loops:
1876 */
1877 elk_inst *elk_DO(struct elk_codegen *p, unsigned execute_size);
1878
1879 elk_inst *elk_WHILE(struct elk_codegen *p);
1880
1881 elk_inst *elk_BREAK(struct elk_codegen *p);
1882 elk_inst *elk_CONT(struct elk_codegen *p);
1883 elk_inst *elk_HALT(struct elk_codegen *p);
1884
1885 /* Forward jumps:
1886 */
1887 void elk_land_fwd_jump(struct elk_codegen *p, int jmp_insn_idx);
1888
1889 elk_inst *elk_JMPI(struct elk_codegen *p, struct elk_reg index,
1890 unsigned predicate_control);
1891
1892 void elk_NOP(struct elk_codegen *p);
1893
1894 void elk_WAIT(struct elk_codegen *p);
1895
1896 void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func);
1897
1898 /* Special case: there is never a destination, execution size will be
1899 * taken from src0:
1900 */
1901 void elk_CMP(struct elk_codegen *p,
1902 struct elk_reg dest,
1903 unsigned conditional,
1904 struct elk_reg src0,
1905 struct elk_reg src1);
1906
1907 void elk_CMPN(struct elk_codegen *p,
1908 struct elk_reg dest,
1909 unsigned conditional,
1910 struct elk_reg src0,
1911 struct elk_reg src1);
1912
1913 elk_inst *elk_DPAS(struct elk_codegen *p, enum elk_gfx12_systolic_depth sdepth,
1914 unsigned rcount, struct elk_reg dest, struct elk_reg src0,
1915 struct elk_reg src1, struct elk_reg src2);
1916
1917 void
1918 elk_untyped_atomic(struct elk_codegen *p,
1919 struct elk_reg dst,
1920 struct elk_reg payload,
1921 struct elk_reg surface,
1922 unsigned atomic_op,
1923 unsigned msg_length,
1924 bool response_expected,
1925 bool header_present);
1926
1927 void
1928 elk_untyped_surface_read(struct elk_codegen *p,
1929 struct elk_reg dst,
1930 struct elk_reg payload,
1931 struct elk_reg surface,
1932 unsigned msg_length,
1933 unsigned num_channels);
1934
1935 void
1936 elk_untyped_surface_write(struct elk_codegen *p,
1937 struct elk_reg payload,
1938 struct elk_reg surface,
1939 unsigned msg_length,
1940 unsigned num_channels,
1941 bool header_present);
1942
1943 void
1944 elk_memory_fence(struct elk_codegen *p,
1945 struct elk_reg dst,
1946 struct elk_reg src,
1947 enum elk_opcode send_op,
1948 enum elk_message_target sfid,
1949 uint32_t desc,
1950 bool commit_enable,
1951 unsigned bti);
1952
1953 void
1954 elk_pixel_interpolator_query(struct elk_codegen *p,
1955 struct elk_reg dest,
1956 struct elk_reg mrf,
1957 bool noperspective,
1958 bool coarse_pixel_rate,
1959 unsigned mode,
1960 struct elk_reg data,
1961 unsigned msg_length,
1962 unsigned response_length);
1963
1964 void
1965 elk_find_live_channel(struct elk_codegen *p,
1966 struct elk_reg dst,
1967 bool last);
1968
1969 void
1970 elk_broadcast(struct elk_codegen *p,
1971 struct elk_reg dst,
1972 struct elk_reg src,
1973 struct elk_reg idx);
1974
1975 void
1976 elk_float_controls_mode(struct elk_codegen *p,
1977 unsigned mode, unsigned mask);
1978
1979 void
1980 elk_update_reloc_imm(const struct elk_isa_info *isa,
1981 elk_inst *inst,
1982 uint32_t value);
1983
1984 void
1985 elk_MOV_reloc_imm(struct elk_codegen *p,
1986 struct elk_reg dst,
1987 enum elk_reg_type src_type,
1988 uint32_t id);
1989
1990 unsigned
1991 elk_num_sources_from_inst(const struct elk_isa_info *isa,
1992 const elk_inst *inst);
1993
1994 /***********************************************************************
1995 * elk_eu_util.c:
1996 */
1997
1998 void elk_copy_indirect_to_indirect(struct elk_codegen *p,
1999 struct elk_indirect dst_ptr,
2000 struct elk_indirect src_ptr,
2001 unsigned count);
2002
2003 void elk_copy_from_indirect(struct elk_codegen *p,
2004 struct elk_reg dst,
2005 struct elk_indirect ptr,
2006 unsigned count);
2007
2008 void elk_copy4(struct elk_codegen *p,
2009 struct elk_reg dst,
2010 struct elk_reg src,
2011 unsigned count);
2012
2013 void elk_copy8(struct elk_codegen *p,
2014 struct elk_reg dst,
2015 struct elk_reg src,
2016 unsigned count);
2017
2018 void elk_math_invert( struct elk_codegen *p,
2019 struct elk_reg dst,
2020 struct elk_reg src);
2021
2022 void elk_set_src1(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
2023
2024 void elk_set_desc_ex(struct elk_codegen *p, elk_inst *insn,
2025 unsigned desc, unsigned ex_desc);
2026
2027 static inline void
elk_set_desc(struct elk_codegen * p,elk_inst * insn,unsigned desc)2028 elk_set_desc(struct elk_codegen *p, elk_inst *insn, unsigned desc)
2029 {
2030 elk_set_desc_ex(p, insn, desc, 0);
2031 }
2032
2033 void elk_set_uip_jip(struct elk_codegen *p, int start_offset);
2034
2035 enum elk_conditional_mod elk_negate_cmod(enum elk_conditional_mod cmod);
2036 enum elk_conditional_mod elk_swap_cmod(enum elk_conditional_mod cmod);
2037
2038 /* elk_eu_compact.c */
2039 void elk_compact_instructions(struct elk_codegen *p, int start_offset,
2040 struct elk_disasm_info *disasm);
2041 void elk_uncompact_instruction(const struct elk_isa_info *isa,
2042 elk_inst *dst, elk_compact_inst *src);
2043 bool elk_try_compact_instruction(const struct elk_isa_info *isa,
2044 elk_compact_inst *dst, const elk_inst *src);
2045
2046 void elk_debug_compact_uncompact(const struct elk_isa_info *isa,
2047 elk_inst *orig, elk_inst *uncompacted);
2048
2049 /* elk_eu_validate.c */
2050 bool elk_validate_instruction(const struct elk_isa_info *isa,
2051 const elk_inst *inst, int offset,
2052 unsigned inst_size,
2053 struct elk_disasm_info *disasm);
2054 bool elk_validate_instructions(const struct elk_isa_info *isa,
2055 const void *assembly, int start_offset, int end_offset,
2056 struct elk_disasm_info *disasm);
2057
2058 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)2059 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
2060 {
2061 elk_inst *insn = (elk_inst *)((char *)store + offset);
2062
2063 if (elk_inst_cmpt_control(devinfo, insn))
2064 return offset + 8;
2065 else
2066 return offset + 16;
2067 }
2068
2069 /** Maximum SEND message length */
2070 #define ELK_MAX_MSG_LENGTH 15
2071
2072 /** First MRF register used by pull loads */
2073 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
2074
2075 /** First MRF register used by spills */
2076 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
2077
2078 #ifdef __cplusplus
2079 }
2080 #endif
2081
2082 #endif
2083