1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 #pragma once
33
34 #include <stdbool.h>
35 #include <stdio.h>
36 #include "brw_eu_inst.h"
37 #include "brw_compiler.h"
38 #include "brw_eu_defines.h"
39 #include "brw_isa_info.h"
40 #include "brw_reg.h"
41
42 #include "intel_wa.h"
43 #include "util/bitset.h"
44
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48
49 struct disasm_info;
50
51 #define BRW_EU_MAX_INSN_STACK 5
52
53 struct brw_insn_state {
54 /* One of BRW_EXECUTE_* */
55 unsigned exec_size:3;
56
57 /* Group in units of channels */
58 unsigned group:5;
59
60 /* One of BRW_MASK_* */
61 unsigned mask_control:1;
62
63 /* Scheduling info for Gfx12+ */
64 struct tgl_swsb swsb;
65
66 bool saturate:1;
67
68 /* One of BRW_ALIGN_* */
69 unsigned access_mode:1;
70
71 /* One of BRW_PREDICATE_* */
72 enum brw_predicate predicate:4;
73
74 bool pred_inv:1;
75
76 /* Flag subreg. Bottom bit is subreg, top bit is reg */
77 unsigned flag_subreg:2;
78
79 bool acc_wr_control:1;
80 };
81
82
83 /* A helper for accessing the last instruction emitted. This makes it easy
84 * to set various bits on an instruction without having to create temporary
85 * variable and assign the emitted instruction to those.
86 */
87 #define brw_last_inst (&p->store[p->nr_insn - 1])
88
89 struct brw_codegen {
90 brw_eu_inst *store;
91 int store_size;
92 unsigned nr_insn;
93 unsigned int next_insn_offset;
94
95 void *mem_ctx;
96
97 /* Allow clients to push/pop instruction state:
98 */
99 struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
100 struct brw_insn_state *current;
101
102 const struct brw_isa_info *isa;
103 const struct intel_device_info *devinfo;
104
105 /* Control flow stacks:
106 * - if_stack contains IF and ELSE instructions which must be patched
107 * (and popped) once the matching ENDIF instruction is encountered.
108 *
109 * Just store the instruction pointer(an index).
110 */
111 int *if_stack;
112 int if_stack_depth;
113 int if_stack_array_size;
114
115 /**
116 * loop_stack contains the instruction pointers of the starts of loops which
117 * must be patched (and popped) once the matching WHILE instruction is
118 * encountered.
119 */
120 int *loop_stack;
121 int loop_stack_depth;
122 int loop_stack_array_size;
123
124 struct brw_shader_reloc *relocs;
125 int num_relocs;
126 int reloc_array_size;
127 };
128
129 struct brw_label {
130 int offset;
131 int number;
132 struct brw_label *next;
133 };
134
135 void brw_pop_insn_state( struct brw_codegen *p );
136 void brw_push_insn_state( struct brw_codegen *p );
137 unsigned brw_get_default_exec_size(struct brw_codegen *p);
138 unsigned brw_get_default_group(struct brw_codegen *p);
139 unsigned brw_get_default_access_mode(struct brw_codegen *p);
140 struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
141 void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
142 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
143 void brw_set_default_saturate( struct brw_codegen *p, bool enable );
144 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
145 void brw_eu_inst_set_group(const struct intel_device_info *devinfo,
146 brw_eu_inst *inst, unsigned group);
147 void brw_set_default_group(struct brw_codegen *p, unsigned group);
148 void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
149 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
150 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
151 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
152 void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
153
154 void brw_init_codegen(const struct brw_isa_info *isa,
155 struct brw_codegen *p, void *mem_ctx);
156 bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
157 bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
158 bool brw_has_branch_ctrl(const struct intel_device_info *devinfo, enum opcode opcode);
159 const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
160 unsigned *num_relocs);
161 const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
162
163 bool brw_should_dump_shader_bin(void);
164 void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset,
165 const char *identifier);
166
167 bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
168 const char *identifier);
169
170 void brw_realign(struct brw_codegen *p, unsigned alignment);
171 int brw_append_data(struct brw_codegen *p, void *data,
172 unsigned size, unsigned alignment);
173 brw_eu_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
174 void brw_add_reloc(struct brw_codegen *p, uint32_t id,
175 enum brw_shader_reloc_type type,
176 uint32_t offset, uint32_t delta);
177 void brw_set_dest(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg dest);
178 void brw_set_src0(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg);
179
180 /* Helpers for regular instructions:
181 */
182 #define ALU1(OP) \
183 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
184 struct brw_reg dest, \
185 struct brw_reg src0);
186
187 #define ALU2(OP) \
188 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
189 struct brw_reg dest, \
190 struct brw_reg src0, \
191 struct brw_reg src1);
192
193 #define ALU3(OP) \
194 brw_eu_inst *brw_##OP(struct brw_codegen *p, \
195 struct brw_reg dest, \
196 struct brw_reg src0, \
197 struct brw_reg src1, \
198 struct brw_reg src2);
199
200 ALU1(MOV)
ALU2(SEL)201 ALU2(SEL)
202 ALU1(NOT)
203 ALU2(AND)
204 ALU2(OR)
205 ALU2(XOR)
206 ALU2(SHR)
207 ALU2(SHL)
208 ALU1(DIM)
209 ALU2(ASR)
210 ALU2(ROL)
211 ALU2(ROR)
212 ALU3(CSEL)
213 ALU1(F32TO16)
214 ALU1(F16TO32)
215 ALU2(ADD)
216 ALU3(ADD3)
217 ALU2(AVG)
218 ALU2(MUL)
219 ALU1(FRC)
220 ALU1(RNDD)
221 ALU1(RNDE)
222 ALU1(RNDU)
223 ALU1(RNDZ)
224 ALU2(MAC)
225 ALU2(MACH)
226 ALU1(LZD)
227 ALU2(DP4)
228 ALU2(DPH)
229 ALU2(DP3)
230 ALU2(DP2)
231 ALU3(DP4A)
232 ALU2(LINE)
233 ALU2(PLN)
234 ALU3(MAD)
235 ALU3(LRP)
236 ALU1(BFREV)
237 ALU3(BFE)
238 ALU2(BFI1)
239 ALU3(BFI2)
240 ALU1(FBH)
241 ALU1(FBL)
242 ALU1(CBIT)
243 ALU2(ADDC)
244 ALU2(SUBB)
245
246 #undef ALU1
247 #undef ALU2
248 #undef ALU3
249
250 static inline unsigned
251 reg_unit(const struct intel_device_info *devinfo)
252 {
253 return devinfo->ver >= 20 ? 2 : 1;
254 }
255
256
257 /* Helpers for SEND instruction:
258 */
259
260 /**
261 * Construct a message descriptor immediate with the specified common
262 * descriptor controls.
263 */
264 static inline uint32_t
brw_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)265 brw_message_desc(const struct intel_device_info *devinfo,
266 unsigned msg_length,
267 unsigned response_length,
268 bool header_present)
269 {
270 assert(msg_length % reg_unit(devinfo) == 0);
271 assert(response_length % reg_unit(devinfo) == 0);
272 return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
273 SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
274 SET_BITS(header_present, 19, 19));
275 }
276
277 static inline unsigned
brw_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)278 brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
279 {
280 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
281 }
282
283 static inline unsigned
brw_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)284 brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
285 {
286 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
287 }
288
289 static inline bool
brw_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)290 brw_message_desc_header_present(ASSERTED
291 const struct intel_device_info *devinfo,
292 uint32_t desc)
293 {
294 return GET_BITS(desc, 19, 19);
295 }
296
297 static inline unsigned
brw_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)298 brw_message_ex_desc(const struct intel_device_info *devinfo,
299 unsigned ex_msg_length)
300 {
301 assert(ex_msg_length % reg_unit(devinfo) == 0);
302 return devinfo->ver >= 20 ?
303 SET_BITS(ex_msg_length / reg_unit(devinfo), 10, 6) :
304 SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
305 }
306
307 static inline unsigned
brw_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)308 brw_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
309 uint32_t ex_desc)
310 {
311 return devinfo->ver >= 20 ?
312 GET_BITS(ex_desc, 10, 6) * reg_unit(devinfo) :
313 GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
314 }
315
316 static inline uint32_t
brw_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)317 brw_urb_desc(const struct intel_device_info *devinfo,
318 unsigned msg_type,
319 bool per_slot_offset_present,
320 bool channel_mask_present,
321 unsigned global_offset)
322 {
323 return (SET_BITS(per_slot_offset_present, 17, 17) |
324 SET_BITS(channel_mask_present, 15, 15) |
325 SET_BITS(global_offset, 14, 4) |
326 SET_BITS(msg_type, 3, 0));
327 }
328
329 static inline uint32_t
brw_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)330 brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
331 uint32_t desc)
332 {
333 return GET_BITS(desc, 3, 0);
334 }
335
336 static inline uint32_t
brw_urb_fence_desc(const struct intel_device_info * devinfo)337 brw_urb_fence_desc(const struct intel_device_info *devinfo)
338 {
339 assert(devinfo->has_lsc);
340 return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
341 }
342
343 /**
344 * Construct a message descriptor immediate with the specified sampler
345 * function controls.
346 */
347 static inline uint32_t
brw_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)348 brw_sampler_desc(const struct intel_device_info *devinfo,
349 unsigned binding_table_index,
350 unsigned sampler,
351 unsigned msg_type,
352 unsigned simd_mode,
353 unsigned return_format)
354 {
355 const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
356 SET_BITS(sampler, 11, 8));
357
358 /* From GFX20 Bspec: Shared Functions - Message Descriptor -
359 * Sampling Engine:
360 *
361 * Message Type[5] 31 This bit represents the upper bit of message type
362 * 6-bit encoding (c.f. [16:12]). This bit is set
363 * for messages with programmable offsets.
364 */
365 if (devinfo->ver >= 20)
366 return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
367 SET_BITS(simd_mode & 0x3, 18, 17) |
368 SET_BITS(simd_mode >> 2, 29, 29) |
369 SET_BITS(return_format, 30, 30) |
370 SET_BITS(msg_type >> 5, 31, 31);
371
372 /* From the CHV Bspec: Shared Functions - Message Descriptor -
373 * Sampling Engine:
374 *
375 * SIMD Mode[2] 29 This field is the upper bit of the 3-bit
376 * SIMD Mode field.
377 */
378 return desc | SET_BITS(msg_type, 16, 12) |
379 SET_BITS(simd_mode & 0x3, 18, 17) |
380 SET_BITS(simd_mode >> 2, 29, 29) |
381 SET_BITS(return_format, 30, 30);
382 }
383
384 static inline unsigned
brw_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)385 brw_sampler_desc_binding_table_index(UNUSED
386 const struct intel_device_info *devinfo,
387 uint32_t desc)
388 {
389 return GET_BITS(desc, 7, 0);
390 }
391
392 static inline unsigned
brw_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)393 brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
394 uint32_t desc)
395 {
396 return GET_BITS(desc, 11, 8);
397 }
398
399 static inline unsigned
brw_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)400 brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
401 {
402 if (devinfo->ver >= 20)
403 return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
404 else
405 return GET_BITS(desc, 16, 12);
406 }
407
408 static inline unsigned
brw_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)409 brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
410 uint32_t desc)
411 {
412 return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
413 }
414
415 static inline unsigned
brw_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)416 brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
417 uint32_t desc)
418 {
419 return GET_BITS(desc, 30, 30);
420 }
421
422 /**
423 * Construct a message descriptor for the dataport
424 */
425 static inline uint32_t
brw_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)426 brw_dp_desc(const struct intel_device_info *devinfo,
427 unsigned binding_table_index,
428 unsigned msg_type,
429 unsigned msg_control)
430 {
431 return SET_BITS(binding_table_index, 7, 0) |
432 SET_BITS(msg_control, 13, 8) |
433 SET_BITS(msg_type, 18, 14);
434 }
435
436 static inline unsigned
brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)437 brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
438 uint32_t desc)
439 {
440 return GET_BITS(desc, 7, 0);
441 }
442
443 static inline unsigned
brw_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)444 brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
445 {
446 return GET_BITS(desc, 18, 14);
447 }
448
449 static inline unsigned
brw_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)450 brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
451 {
452 return GET_BITS(desc, 13, 8);
453 }
454
455 /**
456 * Construct a message descriptor immediate with the specified dataport read
457 * function controls.
458 */
459 static inline uint32_t
brw_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)460 brw_dp_read_desc(const struct intel_device_info *devinfo,
461 unsigned binding_table_index,
462 unsigned msg_control,
463 unsigned msg_type,
464 unsigned target_cache)
465 {
466 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
467 }
468
469 static inline unsigned
brw_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)470 brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
471 uint32_t desc)
472 {
473 return brw_dp_desc_msg_type(devinfo, desc);
474 }
475
476 static inline unsigned
brw_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)477 brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
478 uint32_t desc)
479 {
480 return brw_dp_desc_msg_control(devinfo, desc);
481 }
482
483 /**
484 * Construct a message descriptor immediate with the specified dataport write
485 * function controls.
486 */
487 static inline uint32_t
brw_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)488 brw_dp_write_desc(const struct intel_device_info *devinfo,
489 unsigned binding_table_index,
490 unsigned msg_control,
491 unsigned msg_type,
492 unsigned send_commit_msg)
493 {
494 assert(!send_commit_msg);
495 return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
496 SET_BITS(send_commit_msg, 17, 17);
497 }
498
499 static inline unsigned
brw_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)500 brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
501 uint32_t desc)
502 {
503 return brw_dp_desc_msg_type(devinfo, desc);
504 }
505
506 static inline unsigned
brw_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)507 brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
508 uint32_t desc)
509 {
510 return brw_dp_desc_msg_control(devinfo, desc);
511 }
512
513 /**
514 * Construct a message descriptor immediate with the specified dataport
515 * surface function controls.
516 */
517 static inline uint32_t
brw_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)518 brw_dp_surface_desc(const struct intel_device_info *devinfo,
519 unsigned msg_type,
520 unsigned msg_control)
521 {
522 /* We'll OR in the binding table index later */
523 return brw_dp_desc(devinfo, 0, msg_type, msg_control);
524 }
525
526 static inline uint32_t
brw_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)527 brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
528 unsigned exec_size, /**< 0 for SIMD4x2 */
529 unsigned atomic_op,
530 bool response_expected)
531 {
532 assert(exec_size <= 8 || exec_size == 16);
533
534 unsigned msg_type;
535 if (exec_size > 0) {
536 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
537 } else {
538 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
539 }
540
541 const unsigned msg_control =
542 SET_BITS(atomic_op, 3, 0) |
543 SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
544 SET_BITS(response_expected, 5, 5);
545
546 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
547 }
548
549 static inline uint32_t
brw_dp_untyped_atomic_float_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)550 brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
551 unsigned exec_size,
552 unsigned atomic_op,
553 bool response_expected)
554 {
555 assert(exec_size <= 8 || exec_size == 16);
556
557 assert(exec_size > 0);
558 const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
559
560 const unsigned msg_control =
561 SET_BITS(atomic_op, 1, 0) |
562 SET_BITS(exec_size <= 8, 4, 4) |
563 SET_BITS(response_expected, 5, 5);
564
565 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
566 }
567
568 static inline unsigned
brw_mdc_cmask(unsigned num_channels)569 brw_mdc_cmask(unsigned num_channels)
570 {
571 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
572 return 0xf & (0xf << num_channels);
573 }
574
575 static inline uint32_t
brw_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)576 brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
577 unsigned exec_size, /**< 0 for SIMD4x2 */
578 unsigned num_channels,
579 bool write)
580 {
581 assert(exec_size <= 8 || exec_size == 16);
582
583 const unsigned msg_type =
584 write ? HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
585 HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
586
587 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
588 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
589 exec_size <= 8 ? 2 : 1;
590
591 const unsigned msg_control =
592 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
593 SET_BITS(simd_mode, 5, 4);
594
595 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
596 }
597
598 static inline unsigned
brw_mdc_ds(unsigned bit_size)599 brw_mdc_ds(unsigned bit_size)
600 {
601 switch (bit_size) {
602 case 8:
603 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
604 case 16:
605 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
606 case 32:
607 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
608 default:
609 unreachable("Unsupported bit_size for byte scattered messages");
610 }
611 }
612
613 static inline uint32_t
brw_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)614 brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
615 unsigned exec_size,
616 unsigned bit_size,
617 bool write)
618 {
619 assert(exec_size <= 8 || exec_size == 16);
620
621 const unsigned msg_type =
622 write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
623 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
624
625 assert(exec_size > 0);
626 const unsigned msg_control =
627 SET_BITS(exec_size == 16, 0, 0) |
628 SET_BITS(brw_mdc_ds(bit_size), 3, 2);
629
630 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
631 }
632
633 static inline uint32_t
brw_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)634 brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
635 unsigned exec_size,
636 bool write)
637 {
638 assert(exec_size == 8 || exec_size == 16);
639
640 const unsigned msg_type =
641 write ? GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE :
642 GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
643
644 const unsigned msg_control =
645 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
646 SET_BITS(exec_size == 16, 0, 0);
647
648 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
649 }
650
651 static inline uint32_t
brw_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)652 brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
653 bool align_16B,
654 unsigned num_dwords,
655 bool write)
656 {
657 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
658 assert(!write || align_16B);
659
660 const unsigned msg_type =
661 write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
662 align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
663 GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
664
665 const unsigned msg_control =
666 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
667
668 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
669 }
670
671 static inline uint32_t
brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)672 brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
673 unsigned exec_size, /**< 0 for SIMD4x2 */
674 unsigned num_channels,
675 bool write)
676 {
677 assert(exec_size <= 8 || exec_size == 16);
678
679 unsigned msg_type =
680 write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
681 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
682
683 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
684 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
685 exec_size <= 8 ? 2 : 1;
686
687 const unsigned msg_control =
688 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
689 SET_BITS(simd_mode, 5, 4);
690
691 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
692 msg_type, msg_control);
693 }
694
695 static inline uint32_t
brw_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)696 brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
697 bool align_16B,
698 unsigned num_dwords,
699 bool write)
700 {
701 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
702 assert(!write || align_16B);
703
704 unsigned msg_type =
705 write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
706 GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
707
708 unsigned msg_control =
709 SET_BITS(!align_16B, 4, 3) |
710 SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
711
712 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
713 msg_type, msg_control);
714 }
715
716 /**
717 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
718 * Skylake PRM).
719 */
720 static inline uint32_t
brw_mdc_a64_ds(unsigned elems)721 brw_mdc_a64_ds(unsigned elems)
722 {
723 switch (elems) {
724 case 1: return 0;
725 case 2: return 1;
726 case 4: return 2;
727 case 8: return 3;
728 default:
729 unreachable("Unsupported elmeent count for A64 scattered message");
730 }
731 }
732
733 static inline uint32_t
brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)734 brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
735 unsigned exec_size, /**< 0 for SIMD4x2 */
736 unsigned bit_size,
737 bool write)
738 {
739 assert(exec_size <= 8 || exec_size == 16);
740
741 unsigned msg_type =
742 write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
743 GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
744
745 const unsigned msg_control =
746 SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
747 SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
748 SET_BITS(exec_size == 16, 4, 4);
749
750 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
751 msg_type, msg_control);
752 }
753
754 static inline uint32_t
brw_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)755 brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
756 ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
757 unsigned bit_size,
758 unsigned atomic_op,
759 bool response_expected)
760 {
761 assert(exec_size == 8);
762 assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
763 assert(devinfo->ver >= 12 || bit_size >= 32);
764
765 const unsigned msg_type = bit_size == 16 ?
766 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
767 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
768
769 const unsigned msg_control =
770 SET_BITS(atomic_op, 3, 0) |
771 SET_BITS(bit_size == 64, 4, 4) |
772 SET_BITS(response_expected, 5, 5);
773
774 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
775 msg_type, msg_control);
776 }
777
778 static inline uint32_t
brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)779 brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
780 ASSERTED unsigned exec_size,
781 unsigned bit_size,
782 unsigned atomic_op,
783 bool response_expected)
784 {
785 assert(exec_size == 8);
786 assert(bit_size == 16 || bit_size == 32);
787 assert(devinfo->ver >= 12 || bit_size == 32);
788
789 assert(exec_size > 0);
790 const unsigned msg_type = bit_size == 32 ?
791 GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
792 GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
793
794 const unsigned msg_control =
795 SET_BITS(atomic_op, 1, 0) |
796 SET_BITS(response_expected, 5, 5);
797
798 return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
799 msg_type, msg_control);
800 }
801
802 static inline uint32_t
brw_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)803 brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
804 unsigned exec_size,
805 unsigned exec_group,
806 unsigned atomic_op,
807 bool response_expected)
808 {
809 assert(exec_size > 0 || exec_group == 0);
810 assert(exec_group % 8 == 0);
811
812 const unsigned msg_type =
813 exec_size == 0 ? HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 :
814 HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
815
816 const bool high_sample_mask = (exec_group / 8) % 2 == 1;
817
818 const unsigned msg_control =
819 SET_BITS(atomic_op, 3, 0) |
820 SET_BITS(high_sample_mask, 4, 4) |
821 SET_BITS(response_expected, 5, 5);
822
823 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
824 }
825
826 static inline uint32_t
brw_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)827 brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
828 unsigned exec_size,
829 unsigned exec_group,
830 unsigned num_channels,
831 bool write)
832 {
833 assert(exec_size > 0 || exec_group == 0);
834 assert(exec_group % 8 == 0);
835
836 /* Typed surface reads and writes don't support SIMD16 */
837 assert(exec_size <= 8);
838
839 const unsigned msg_type =
840 write ? HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE :
841 HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
842
843 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
844 const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
845 1 + ((exec_group / 8) % 2);
846
847 const unsigned msg_control =
848 SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
849 SET_BITS(slot_group, 5, 4);
850
851 return brw_dp_surface_desc(devinfo, msg_type, msg_control);
852 }
853
854 static inline uint32_t
brw_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)855 brw_fb_desc(const struct intel_device_info *devinfo,
856 unsigned binding_table_index,
857 unsigned msg_type,
858 unsigned msg_control)
859 {
860 return SET_BITS(binding_table_index, 7, 0) |
861 SET_BITS(msg_control, 13, 8) |
862 SET_BITS(msg_type, 17, 14);
863 }
864
865 static inline unsigned
brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)866 brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
867 uint32_t desc)
868 {
869 return GET_BITS(desc, 7, 0);
870 }
871
872 static inline uint32_t
brw_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)873 brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
874 {
875 return GET_BITS(desc, 13, 8);
876 }
877
878 static inline unsigned
brw_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)879 brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
880 {
881 return GET_BITS(desc, 17, 14);
882 }
883
884 static inline uint32_t
brw_fb_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned exec_size,bool per_sample)885 brw_fb_read_desc(const struct intel_device_info *devinfo,
886 unsigned binding_table_index,
887 unsigned msg_control,
888 unsigned exec_size,
889 bool per_sample)
890 {
891 assert(exec_size == 8 || exec_size == 16);
892
893 return brw_fb_desc(devinfo, binding_table_index,
894 GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
895 SET_BITS(per_sample, 13, 13) |
896 SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
897 }
898
899 static inline uint32_t
brw_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)900 brw_fb_write_desc(const struct intel_device_info *devinfo,
901 unsigned binding_table_index,
902 unsigned msg_control,
903 bool last_render_target,
904 bool coarse_write)
905 {
906 const unsigned msg_type = GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
907
908 assert(devinfo->ver >= 10 || !coarse_write);
909
910 return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
911 SET_BITS(last_render_target, 12, 12) |
912 SET_BITS(coarse_write, 18, 18);
913 }
914
915 static inline bool
brw_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)916 brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
917 uint32_t desc)
918 {
919 return GET_BITS(desc, 12, 12);
920 }
921
922 static inline bool
brw_fb_write_desc_coarse_write(const struct intel_device_info * devinfo,uint32_t desc)923 brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
924 uint32_t desc)
925 {
926 assert(devinfo->ver >= 10);
927 return GET_BITS(desc, 18, 18);
928 }
929
930 static inline bool
lsc_opcode_has_cmask(enum lsc_opcode opcode)931 lsc_opcode_has_cmask(enum lsc_opcode opcode)
932 {
933 return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
934 }
935
936 static inline bool
lsc_opcode_has_transpose(enum lsc_opcode opcode)937 lsc_opcode_has_transpose(enum lsc_opcode opcode)
938 {
939 return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
940 }
941
942 static inline bool
lsc_opcode_is_store(enum lsc_opcode opcode)943 lsc_opcode_is_store(enum lsc_opcode opcode)
944 {
945 return opcode == LSC_OP_STORE ||
946 opcode == LSC_OP_STORE_CMASK;
947 }
948
949 static inline bool
lsc_opcode_is_atomic(enum lsc_opcode opcode)950 lsc_opcode_is_atomic(enum lsc_opcode opcode)
951 {
952 switch (opcode) {
953 case LSC_OP_ATOMIC_INC:
954 case LSC_OP_ATOMIC_DEC:
955 case LSC_OP_ATOMIC_LOAD:
956 case LSC_OP_ATOMIC_STORE:
957 case LSC_OP_ATOMIC_ADD:
958 case LSC_OP_ATOMIC_SUB:
959 case LSC_OP_ATOMIC_MIN:
960 case LSC_OP_ATOMIC_MAX:
961 case LSC_OP_ATOMIC_UMIN:
962 case LSC_OP_ATOMIC_UMAX:
963 case LSC_OP_ATOMIC_CMPXCHG:
964 case LSC_OP_ATOMIC_FADD:
965 case LSC_OP_ATOMIC_FSUB:
966 case LSC_OP_ATOMIC_FMIN:
967 case LSC_OP_ATOMIC_FMAX:
968 case LSC_OP_ATOMIC_FCMPXCHG:
969 case LSC_OP_ATOMIC_AND:
970 case LSC_OP_ATOMIC_OR:
971 case LSC_OP_ATOMIC_XOR:
972 return true;
973
974 default:
975 return false;
976 }
977 }
978
979 static inline bool
lsc_opcode_is_atomic_float(enum lsc_opcode opcode)980 lsc_opcode_is_atomic_float(enum lsc_opcode opcode)
981 {
982 switch (opcode) {
983 case LSC_OP_ATOMIC_FADD:
984 case LSC_OP_ATOMIC_FSUB:
985 case LSC_OP_ATOMIC_FMIN:
986 case LSC_OP_ATOMIC_FMAX:
987 case LSC_OP_ATOMIC_FCMPXCHG:
988 return true;
989
990 default:
991 return false;
992 }
993 }
994
995 static inline unsigned
lsc_op_num_data_values(unsigned _op)996 lsc_op_num_data_values(unsigned _op)
997 {
998 enum lsc_opcode op = (enum lsc_opcode) _op;
999
1000 switch (op) {
1001 case LSC_OP_ATOMIC_CMPXCHG:
1002 case LSC_OP_ATOMIC_FCMPXCHG:
1003 return 2;
1004 case LSC_OP_ATOMIC_INC:
1005 case LSC_OP_ATOMIC_DEC:
1006 case LSC_OP_LOAD:
1007 case LSC_OP_LOAD_CMASK:
1008 case LSC_OP_FENCE:
1009 /* XXX: actually check docs */
1010 return 0;
1011 default:
1012 return 1;
1013 }
1014 }
1015
1016 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1017 lsc_op_to_legacy_atomic(unsigned _op)
1018 {
1019 enum lsc_opcode op = (enum lsc_opcode) _op;
1020
1021 switch (op) {
1022 case LSC_OP_ATOMIC_INC:
1023 return BRW_AOP_INC;
1024 case LSC_OP_ATOMIC_DEC:
1025 return BRW_AOP_DEC;
1026 case LSC_OP_ATOMIC_STORE:
1027 return BRW_AOP_MOV;
1028 case LSC_OP_ATOMIC_ADD:
1029 return BRW_AOP_ADD;
1030 case LSC_OP_ATOMIC_SUB:
1031 return BRW_AOP_SUB;
1032 case LSC_OP_ATOMIC_MIN:
1033 return BRW_AOP_IMIN;
1034 case LSC_OP_ATOMIC_MAX:
1035 return BRW_AOP_IMAX;
1036 case LSC_OP_ATOMIC_UMIN:
1037 return BRW_AOP_UMIN;
1038 case LSC_OP_ATOMIC_UMAX:
1039 return BRW_AOP_UMAX;
1040 case LSC_OP_ATOMIC_CMPXCHG:
1041 return BRW_AOP_CMPWR;
1042 case LSC_OP_ATOMIC_FADD:
1043 return BRW_AOP_FADD;
1044 case LSC_OP_ATOMIC_FMIN:
1045 return BRW_AOP_FMIN;
1046 case LSC_OP_ATOMIC_FMAX:
1047 return BRW_AOP_FMAX;
1048 case LSC_OP_ATOMIC_FCMPXCHG:
1049 return BRW_AOP_FCMPWR;
1050 case LSC_OP_ATOMIC_AND:
1051 return BRW_AOP_AND;
1052 case LSC_OP_ATOMIC_OR:
1053 return BRW_AOP_OR;
1054 case LSC_OP_ATOMIC_XOR:
1055 return BRW_AOP_XOR;
1056 /* No LSC op maps to BRW_AOP_PREDEC */
1057 case LSC_OP_ATOMIC_LOAD:
1058 case LSC_OP_ATOMIC_FSUB:
1059 unreachable("no corresponding legacy atomic operation");
1060 case LSC_OP_LOAD:
1061 case LSC_OP_LOAD_CMASK:
1062 case LSC_OP_STORE:
1063 case LSC_OP_STORE_CMASK:
1064 case LSC_OP_FENCE:
1065 unreachable("not an atomic op");
1066 }
1067
1068 unreachable("invalid LSC op");
1069 }
1070
1071 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1072 lsc_data_size_bytes(enum lsc_data_size data_size)
1073 {
1074 switch (data_size) {
1075 case LSC_DATA_SIZE_D8:
1076 return 1;
1077 case LSC_DATA_SIZE_D16:
1078 return 2;
1079 case LSC_DATA_SIZE_D32:
1080 case LSC_DATA_SIZE_D8U32:
1081 case LSC_DATA_SIZE_D16U32:
1082 case LSC_DATA_SIZE_D16BF32:
1083 return 4;
1084 case LSC_DATA_SIZE_D64:
1085 return 8;
1086 default:
1087 unreachable("Unsupported data payload size.");
1088 }
1089 }
1090
1091 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1092 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1093 {
1094 switch (addr_size) {
1095 case LSC_ADDR_SIZE_A16: return 2;
1096 case LSC_ADDR_SIZE_A32: return 4;
1097 case LSC_ADDR_SIZE_A64: return 8;
1098 default:
1099 unreachable("Unsupported address size.");
1100 }
1101 }
1102
1103 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1104 lsc_vector_length(enum lsc_vect_size vect_size)
1105 {
1106 switch (vect_size) {
1107 case LSC_VECT_SIZE_V1: return 1;
1108 case LSC_VECT_SIZE_V2: return 2;
1109 case LSC_VECT_SIZE_V3: return 3;
1110 case LSC_VECT_SIZE_V4: return 4;
1111 case LSC_VECT_SIZE_V8: return 8;
1112 case LSC_VECT_SIZE_V16: return 16;
1113 case LSC_VECT_SIZE_V32: return 32;
1114 case LSC_VECT_SIZE_V64: return 64;
1115 default:
1116 unreachable("Unsupported size of vector");
1117 }
1118 }
1119
1120 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1121 lsc_vect_size(unsigned vect_size)
1122 {
1123 switch(vect_size) {
1124 case 1: return LSC_VECT_SIZE_V1;
1125 case 2: return LSC_VECT_SIZE_V2;
1126 case 3: return LSC_VECT_SIZE_V3;
1127 case 4: return LSC_VECT_SIZE_V4;
1128 case 8: return LSC_VECT_SIZE_V8;
1129 case 16: return LSC_VECT_SIZE_V16;
1130 case 32: return LSC_VECT_SIZE_V32;
1131 case 64: return LSC_VECT_SIZE_V64;
1132 default:
1133 unreachable("Unsupported vector size for dataport");
1134 }
1135 }
1136
1137 static inline uint32_t
lsc_msg_desc(const struct intel_device_info * devinfo,enum lsc_opcode opcode,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,enum lsc_data_size data_sz,unsigned num_channels_or_cmask,bool transpose,unsigned cache_ctrl)1138 lsc_msg_desc(const struct intel_device_info *devinfo,
1139 enum lsc_opcode opcode,
1140 enum lsc_addr_surface_type addr_type,
1141 enum lsc_addr_size addr_sz,
1142 enum lsc_data_size data_sz, unsigned num_channels_or_cmask,
1143 bool transpose, unsigned cache_ctrl)
1144 {
1145 assert(devinfo->has_lsc);
1146 assert(!transpose || lsc_opcode_has_transpose(opcode));
1147
1148 unsigned msg_desc =
1149 SET_BITS(opcode, 5, 0) |
1150 SET_BITS(addr_sz, 8, 7) |
1151 SET_BITS(data_sz, 11, 9) |
1152 SET_BITS(transpose, 15, 15) |
1153 (devinfo->ver >= 20 ? SET_BITS(cache_ctrl, 19, 16) :
1154 SET_BITS(cache_ctrl, 19, 17)) |
1155 SET_BITS(addr_type, 30, 29);
1156
1157 if (lsc_opcode_has_cmask(opcode))
1158 msg_desc |= SET_BITS(num_channels_or_cmask, 15, 12);
1159 else
1160 msg_desc |= SET_BITS(lsc_vect_size(num_channels_or_cmask), 14, 12);
1161
1162 return msg_desc;
1163 }
1164
1165 static inline enum lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1166 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1167 uint32_t desc)
1168 {
1169 assert(devinfo->has_lsc);
1170 return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1171 }
1172
1173 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1174 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1175 uint32_t desc)
1176 {
1177 assert(devinfo->has_lsc);
1178 return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1179 }
1180
1181 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1182 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1183 uint32_t desc)
1184 {
1185 assert(devinfo->has_lsc);
1186 return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1187 }
1188
1189 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1190 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1191 uint32_t desc)
1192 {
1193 assert(devinfo->has_lsc);
1194 assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1195 return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1196 }
1197
1198 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1199 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1200 uint32_t desc)
1201 {
1202 assert(devinfo->has_lsc);
1203 assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1204 return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1205 }
1206
1207 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1208 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1209 uint32_t desc)
1210 {
1211 assert(devinfo->has_lsc);
1212 return GET_BITS(desc, 15, 15);
1213 }
1214
1215 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1216 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1217 uint32_t desc)
1218 {
1219 assert(devinfo->has_lsc);
1220 return devinfo->ver >= 20 ? GET_BITS(desc, 19, 16) : GET_BITS(desc, 19, 17);
1221 }
1222
1223 static inline unsigned
lsc_msg_dest_len(const struct intel_device_info * devinfo,enum lsc_data_size data_sz,unsigned n)1224 lsc_msg_dest_len(const struct intel_device_info *devinfo,
1225 enum lsc_data_size data_sz, unsigned n)
1226 {
1227 return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n,
1228 reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1229 }
1230
1231 static inline unsigned
lsc_msg_addr_len(const struct intel_device_info * devinfo,enum lsc_addr_size addr_sz,unsigned n)1232 lsc_msg_addr_len(const struct intel_device_info *devinfo,
1233 enum lsc_addr_size addr_sz, unsigned n)
1234 {
1235 return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n,
1236 reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1237 }
1238
1239 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1240 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1241 uint32_t desc)
1242 {
1243 assert(devinfo->has_lsc);
1244 return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1245 }
1246
1247 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1248 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1249 enum lsc_fence_scope scope,
1250 enum lsc_flush_type flush_type,
1251 bool route_to_lsc)
1252 {
1253 assert(devinfo->has_lsc);
1254
1255 #if INTEL_NEEDS_WA_22017182272
1256 assert(flush_type != LSC_FLUSH_TYPE_DISCARD);
1257 #endif
1258
1259 return SET_BITS(LSC_OP_FENCE, 5, 0) |
1260 SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1261 SET_BITS(scope, 11, 9) |
1262 SET_BITS(flush_type, 14, 12) |
1263 SET_BITS(route_to_lsc, 18, 18) |
1264 SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1265 }
1266
1267 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1268 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1269 uint32_t desc)
1270 {
1271 assert(devinfo->has_lsc);
1272 return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1273 }
1274
1275 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1276 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1277 uint32_t desc)
1278 {
1279 assert(devinfo->has_lsc);
1280 return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1281 }
1282
1283 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1284 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1285 uint32_t desc)
1286 {
1287 assert(devinfo->has_lsc);
1288 return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1289 }
1290
1291 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1292 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1293 {
1294 assert(devinfo->has_lsc);
1295 return SET_BITS(bti, 31, 24) |
1296 SET_BITS(0, 23, 12); /* base offset */
1297 }
1298
1299 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1300 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1301 uint32_t ex_desc)
1302 {
1303 assert(devinfo->has_lsc);
1304 return GET_BITS(ex_desc, 23, 12);
1305 }
1306
1307 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1308 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1309 uint32_t ex_desc)
1310 {
1311 assert(devinfo->has_lsc);
1312 return GET_BITS(ex_desc, 31, 24);
1313 }
1314
1315 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1316 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1317 uint32_t ex_desc)
1318 {
1319 assert(devinfo->has_lsc);
1320 return GET_BITS(ex_desc, 31, 12);
1321 }
1322
1323 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1324 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1325 unsigned surface_state_index)
1326 {
1327 assert(devinfo->has_lsc);
1328 return SET_BITS(surface_state_index, 31, 6);
1329 }
1330
1331 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1332 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1333 uint32_t ex_desc)
1334 {
1335 assert(devinfo->has_lsc);
1336 return GET_BITS(ex_desc, 31, 6);
1337 }
1338
1339 static inline uint32_t
brw_mdc_sm2(unsigned exec_size)1340 brw_mdc_sm2(unsigned exec_size)
1341 {
1342 assert(exec_size == 8 || exec_size == 16);
1343 return exec_size > 8;
1344 }
1345
1346 static inline uint32_t
brw_mdc_sm2_exec_size(uint32_t sm2)1347 brw_mdc_sm2_exec_size(uint32_t sm2)
1348 {
1349 assert(sm2 <= 1);
1350 return 8 << sm2;
1351 }
1352
1353 static inline uint32_t
brw_btd_spawn_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size,unsigned msg_type)1354 brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1355 unsigned exec_size, unsigned msg_type)
1356 {
1357 assert(devinfo->has_ray_tracing);
1358 assert(devinfo->ver < 20 || exec_size == 16);
1359
1360 return SET_BITS(0, 19, 19) | /* No header */
1361 SET_BITS(msg_type, 17, 14) |
1362 SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1363 }
1364
1365 static inline uint32_t
brw_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1366 brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1367 uint32_t desc)
1368 {
1369 return GET_BITS(desc, 17, 14);
1370 }
1371
1372 static inline uint32_t
brw_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1373 brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1374 uint32_t desc)
1375 {
1376 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1377 }
1378
1379 static inline uint32_t
brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size)1380 brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1381 unsigned exec_size)
1382 {
1383 assert(devinfo->has_ray_tracing);
1384 assert(devinfo->ver < 20 || exec_size == 16);
1385
1386 return SET_BITS(0, 19, 19) | /* No header */
1387 SET_BITS(0, 17, 14) | /* Message type */
1388 SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1389 }
1390
1391 static inline uint32_t
brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1392 brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1393 uint32_t desc)
1394 {
1395 return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1396 }
1397
1398 /**
1399 * Construct a message descriptor immediate with the specified pixel
1400 * interpolator function controls.
1401 */
1402 static inline uint32_t
brw_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1403 brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1404 unsigned msg_type,
1405 bool noperspective,
1406 bool coarse_pixel_rate,
1407 unsigned exec_size,
1408 unsigned group)
1409 {
1410 assert(exec_size == 8 || exec_size == 16);
1411 const bool simd_mode = exec_size == 16;
1412 const bool slot_group = group >= 16;
1413
1414 assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1415 return (SET_BITS(slot_group, 11, 11) |
1416 SET_BITS(msg_type, 13, 12) |
1417 SET_BITS(!!noperspective, 14, 14) |
1418 SET_BITS(coarse_pixel_rate, 15, 15) |
1419 SET_BITS(simd_mode, 16, 16));
1420 }
1421
1422 /**
1423 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1424 * desc. If \p desc is not an immediate it will be transparently loaded to an
1425 * address register using an OR instruction.
1426 */
1427 void
1428 brw_send_indirect_message(struct brw_codegen *p,
1429 unsigned sfid,
1430 struct brw_reg dst,
1431 struct brw_reg payload,
1432 struct brw_reg desc,
1433 bool eot);
1434
1435 void
1436 brw_send_indirect_split_message(struct brw_codegen *p,
1437 unsigned sfid,
1438 struct brw_reg dst,
1439 struct brw_reg payload0,
1440 struct brw_reg payload1,
1441 struct brw_reg desc,
1442 struct brw_reg ex_desc,
1443 unsigned ex_mlen,
1444 bool ex_bso,
1445 bool eot);
1446
1447 void gfx6_math(struct brw_codegen *p,
1448 struct brw_reg dest,
1449 unsigned function,
1450 struct brw_reg src0,
1451 struct brw_reg src1);
1452
1453 /**
1454 * Return the generation-specific jump distance scaling factor.
1455 *
1456 * Given the number of instructions to jump, we need to scale by
1457 * some number to obtain the actual jump distance to program in an
1458 * instruction.
1459 */
1460 static inline unsigned
brw_jump_scale(const struct intel_device_info * devinfo)1461 brw_jump_scale(const struct intel_device_info *devinfo)
1462 {
1463 /* Broadwell measures jump targets in bytes. */
1464 return 16;
1465 }
1466
1467 void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1468
1469 /* If/else/endif. Works by manipulating the execution flags on each
1470 * channel.
1471 */
1472 brw_eu_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1473
1474 void brw_ELSE(struct brw_codegen *p);
1475 void brw_ENDIF(struct brw_codegen *p);
1476
1477 /* DO/WHILE loops:
1478 */
1479 brw_eu_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1480
1481 brw_eu_inst *brw_WHILE(struct brw_codegen *p);
1482
1483 brw_eu_inst *brw_BREAK(struct brw_codegen *p);
1484 brw_eu_inst *brw_CONT(struct brw_codegen *p);
1485 brw_eu_inst *brw_HALT(struct brw_codegen *p);
1486
1487 /* Forward jumps:
1488 */
1489 brw_eu_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1490 unsigned predicate_control);
1491
1492 void brw_NOP(struct brw_codegen *p);
1493
1494 void brw_WAIT(struct brw_codegen *p);
1495
1496 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1497
1498 /* Special case: there is never a destination, execution size will be
1499 * taken from src0:
1500 */
1501 void brw_CMP(struct brw_codegen *p,
1502 struct brw_reg dest,
1503 unsigned conditional,
1504 struct brw_reg src0,
1505 struct brw_reg src1);
1506
1507 void brw_CMPN(struct brw_codegen *p,
1508 struct brw_reg dest,
1509 unsigned conditional,
1510 struct brw_reg src0,
1511 struct brw_reg src1);
1512
1513 brw_eu_inst *brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1514 unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1515 struct brw_reg src1, struct brw_reg src2);
1516
1517 void
1518 brw_memory_fence(struct brw_codegen *p,
1519 struct brw_reg dst,
1520 struct brw_reg src,
1521 enum opcode send_op,
1522 enum brw_message_target sfid,
1523 uint32_t desc,
1524 bool commit_enable,
1525 unsigned bti);
1526
1527 void
1528 brw_broadcast(struct brw_codegen *p,
1529 struct brw_reg dst,
1530 struct brw_reg src,
1531 struct brw_reg idx);
1532
1533 void
1534 brw_float_controls_mode(struct brw_codegen *p,
1535 unsigned mode, unsigned mask);
1536
1537 void
1538 brw_update_reloc_imm(const struct brw_isa_info *isa,
1539 brw_eu_inst *inst,
1540 uint32_t value);
1541
1542 void
1543 brw_MOV_reloc_imm(struct brw_codegen *p,
1544 struct brw_reg dst,
1545 enum brw_reg_type src_type,
1546 uint32_t id, uint32_t base);
1547
1548 unsigned
1549 brw_num_sources_from_inst(const struct brw_isa_info *isa,
1550 const brw_eu_inst *inst);
1551
1552 void brw_set_src1(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg);
1553
1554 void brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *insn,
1555 unsigned desc, unsigned ex_desc);
1556
1557 static inline void
brw_set_desc(struct brw_codegen * p,brw_eu_inst * insn,unsigned desc)1558 brw_set_desc(struct brw_codegen *p, brw_eu_inst *insn, unsigned desc)
1559 {
1560 brw_set_desc_ex(p, insn, desc, 0);
1561 }
1562
1563 void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1564
1565 enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1566 enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1567
1568 /* brw_eu_compact.c */
1569 void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1570 struct disasm_info *disasm);
1571 void brw_uncompact_instruction(const struct brw_isa_info *isa,
1572 brw_eu_inst *dst, brw_eu_compact_inst *src);
1573 bool brw_try_compact_instruction(const struct brw_isa_info *isa,
1574 brw_eu_compact_inst *dst, const brw_eu_inst *src);
1575
1576 void brw_debug_compact_uncompact(const struct brw_isa_info *isa,
1577 brw_eu_inst *orig, brw_eu_inst *uncompacted);
1578
1579 /* brw_eu_validate.c */
1580 bool brw_validate_instruction(const struct brw_isa_info *isa,
1581 const brw_eu_inst *inst, int offset,
1582 unsigned inst_size,
1583 struct disasm_info *disasm);
1584 bool brw_validate_instructions(const struct brw_isa_info *isa,
1585 const void *assembly, int start_offset, int end_offset,
1586 struct disasm_info *disasm);
1587
1588 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)1589 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1590 {
1591 brw_eu_inst *insn = (brw_eu_inst *)((char *)store + offset);
1592
1593 if (brw_eu_inst_cmpt_control(devinfo, insn))
1594 return offset + 8;
1595 else
1596 return offset + 16;
1597 }
1598
1599 /** Maximum SEND message length */
1600 #define BRW_MAX_MSG_LENGTH 15
1601
1602 #ifdef __cplusplus
1603 }
1604 #endif
1605