• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 #pragma once
33 
34 #include <stdbool.h>
35 #include <stdio.h>
36 #include "brw_eu_inst.h"
37 #include "brw_compiler.h"
38 #include "brw_eu_defines.h"
39 #include "brw_isa_info.h"
40 #include "brw_reg.h"
41 
42 #include "intel_wa.h"
43 #include "util/bitset.h"
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 struct disasm_info;
50 
51 #define BRW_EU_MAX_INSN_STACK 5
52 
53 struct brw_insn_state {
54    /* One of BRW_EXECUTE_* */
55    unsigned exec_size:3;
56 
57    /* Group in units of channels */
58    unsigned group:5;
59 
60    /* One of BRW_MASK_* */
61    unsigned mask_control:1;
62 
63    /* Scheduling info for Gfx12+ */
64    struct tgl_swsb swsb;
65 
66    bool saturate:1;
67 
68    /* One of BRW_ALIGN_* */
69    unsigned access_mode:1;
70 
71    /* One of BRW_PREDICATE_* */
72    enum brw_predicate predicate:4;
73 
74    bool pred_inv:1;
75 
76    /* Flag subreg.  Bottom bit is subreg, top bit is reg */
77    unsigned flag_subreg:2;
78 
79    bool acc_wr_control:1;
80 };
81 
82 
83 /* A helper for accessing the last instruction emitted.  This makes it easy
84  * to set various bits on an instruction without having to create temporary
85  * variable and assign the emitted instruction to those.
86  */
87 #define brw_last_inst (&p->store[p->nr_insn - 1])
88 
89 struct brw_codegen {
90    brw_eu_inst *store;
91    int store_size;
92    unsigned nr_insn;
93    unsigned int next_insn_offset;
94 
95    void *mem_ctx;
96 
97    /* Allow clients to push/pop instruction state:
98     */
99    struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
100    struct brw_insn_state *current;
101 
102    const struct brw_isa_info *isa;
103    const struct intel_device_info *devinfo;
104 
105    /* Control flow stacks:
106     * - if_stack contains IF and ELSE instructions which must be patched
107     *   (and popped) once the matching ENDIF instruction is encountered.
108     *
109     *   Just store the instruction pointer(an index).
110     */
111    int *if_stack;
112    int if_stack_depth;
113    int if_stack_array_size;
114 
115    /**
116     * loop_stack contains the instruction pointers of the starts of loops which
117     * must be patched (and popped) once the matching WHILE instruction is
118     * encountered.
119     */
120    int *loop_stack;
121    int loop_stack_depth;
122    int loop_stack_array_size;
123 
124    struct brw_shader_reloc *relocs;
125    int num_relocs;
126    int reloc_array_size;
127 };
128 
129 struct brw_label {
130    int offset;
131    int number;
132    struct brw_label *next;
133 };
134 
135 void brw_pop_insn_state( struct brw_codegen *p );
136 void brw_push_insn_state( struct brw_codegen *p );
137 unsigned brw_get_default_exec_size(struct brw_codegen *p);
138 unsigned brw_get_default_group(struct brw_codegen *p);
139 unsigned brw_get_default_access_mode(struct brw_codegen *p);
140 struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
141 void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
142 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
143 void brw_set_default_saturate( struct brw_codegen *p, bool enable );
144 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
145 void brw_eu_inst_set_group(const struct intel_device_info *devinfo,
146                         brw_eu_inst *inst, unsigned group);
147 void brw_set_default_group(struct brw_codegen *p, unsigned group);
148 void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
149 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
150 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
151 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
152 void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
153 
154 void brw_init_codegen(const struct brw_isa_info *isa,
155                       struct brw_codegen *p, void *mem_ctx);
156 bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
157 bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
158 bool brw_has_branch_ctrl(const struct intel_device_info *devinfo, enum opcode opcode);
159 const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
160                                                      unsigned *num_relocs);
161 const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
162 
163 bool brw_should_dump_shader_bin(void);
164 void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset,
165                          const char *identifier);
166 
167 bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
168                                const char *identifier);
169 
170 void brw_realign(struct brw_codegen *p, unsigned alignment);
171 int brw_append_data(struct brw_codegen *p, void *data,
172                     unsigned size, unsigned alignment);
173 brw_eu_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
174 void brw_add_reloc(struct brw_codegen *p, uint32_t id,
175                    enum brw_shader_reloc_type type,
176                    uint32_t offset, uint32_t delta);
177 void brw_set_dest(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg dest);
178 void brw_set_src0(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg);
179 
180 /* Helpers for regular instructions:
181  */
182 #define ALU1(OP)				\
183 brw_eu_inst *brw_##OP(struct brw_codegen *p,	\
184 	      struct brw_reg dest,		\
185 	      struct brw_reg src0);
186 
187 #define ALU2(OP)				\
188 brw_eu_inst *brw_##OP(struct brw_codegen *p,	\
189 	      struct brw_reg dest,		\
190 	      struct brw_reg src0,		\
191 	      struct brw_reg src1);
192 
193 #define ALU3(OP)				\
194 brw_eu_inst *brw_##OP(struct brw_codegen *p,	\
195 	      struct brw_reg dest,		\
196 	      struct brw_reg src0,		\
197 	      struct brw_reg src1,		\
198 	      struct brw_reg src2);
199 
200 ALU1(MOV)
ALU2(SEL)201 ALU2(SEL)
202 ALU1(NOT)
203 ALU2(AND)
204 ALU2(OR)
205 ALU2(XOR)
206 ALU2(SHR)
207 ALU2(SHL)
208 ALU1(DIM)
209 ALU2(ASR)
210 ALU2(ROL)
211 ALU2(ROR)
212 ALU3(CSEL)
213 ALU1(F32TO16)
214 ALU1(F16TO32)
215 ALU2(ADD)
216 ALU3(ADD3)
217 ALU2(AVG)
218 ALU2(MUL)
219 ALU1(FRC)
220 ALU1(RNDD)
221 ALU1(RNDE)
222 ALU1(RNDU)
223 ALU1(RNDZ)
224 ALU2(MAC)
225 ALU2(MACH)
226 ALU1(LZD)
227 ALU2(DP4)
228 ALU2(DPH)
229 ALU2(DP3)
230 ALU2(DP2)
231 ALU3(DP4A)
232 ALU2(LINE)
233 ALU2(PLN)
234 ALU3(MAD)
235 ALU3(LRP)
236 ALU1(BFREV)
237 ALU3(BFE)
238 ALU2(BFI1)
239 ALU3(BFI2)
240 ALU1(FBH)
241 ALU1(FBL)
242 ALU1(CBIT)
243 ALU2(ADDC)
244 ALU2(SUBB)
245 
246 #undef ALU1
247 #undef ALU2
248 #undef ALU3
249 
250 static inline unsigned
251 reg_unit(const struct intel_device_info *devinfo)
252 {
253    return devinfo->ver >= 20 ? 2 : 1;
254 }
255 
256 
257 /* Helpers for SEND instruction:
258  */
259 
260 /**
261  * Construct a message descriptor immediate with the specified common
262  * descriptor controls.
263  */
264 static inline uint32_t
brw_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)265 brw_message_desc(const struct intel_device_info *devinfo,
266                  unsigned msg_length,
267                  unsigned response_length,
268                  bool header_present)
269 {
270    assert(msg_length % reg_unit(devinfo) == 0);
271    assert(response_length % reg_unit(devinfo) == 0);
272    return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
273            SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
274            SET_BITS(header_present, 19, 19));
275 }
276 
277 static inline unsigned
brw_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)278 brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
279 {
280    return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
281 }
282 
283 static inline unsigned
brw_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)284 brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
285 {
286    return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
287 }
288 
289 static inline bool
brw_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)290 brw_message_desc_header_present(ASSERTED
291                                 const struct intel_device_info *devinfo,
292                                 uint32_t desc)
293 {
294    return GET_BITS(desc, 19, 19);
295 }
296 
297 static inline unsigned
brw_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)298 brw_message_ex_desc(const struct intel_device_info *devinfo,
299                     unsigned ex_msg_length)
300 {
301    assert(ex_msg_length % reg_unit(devinfo) == 0);
302    return devinfo->ver >= 20 ?
303       SET_BITS(ex_msg_length / reg_unit(devinfo), 10, 6) :
304       SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
305 }
306 
307 static inline unsigned
brw_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)308 brw_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
309                             uint32_t ex_desc)
310 {
311    return devinfo->ver >= 20 ?
312       GET_BITS(ex_desc, 10, 6) * reg_unit(devinfo) :
313       GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
314 }
315 
316 static inline uint32_t
brw_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)317 brw_urb_desc(const struct intel_device_info *devinfo,
318              unsigned msg_type,
319              bool per_slot_offset_present,
320              bool channel_mask_present,
321              unsigned global_offset)
322 {
323    return (SET_BITS(per_slot_offset_present, 17, 17) |
324            SET_BITS(channel_mask_present, 15, 15) |
325            SET_BITS(global_offset, 14, 4) |
326            SET_BITS(msg_type, 3, 0));
327 }
328 
329 static inline uint32_t
brw_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)330 brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
331                       uint32_t desc)
332 {
333    return GET_BITS(desc, 3, 0);
334 }
335 
336 static inline uint32_t
brw_urb_fence_desc(const struct intel_device_info * devinfo)337 brw_urb_fence_desc(const struct intel_device_info *devinfo)
338 {
339    assert(devinfo->has_lsc);
340    return brw_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
341 }
342 
343 /**
344  * Construct a message descriptor immediate with the specified sampler
345  * function controls.
346  */
347 static inline uint32_t
brw_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)348 brw_sampler_desc(const struct intel_device_info *devinfo,
349                  unsigned binding_table_index,
350                  unsigned sampler,
351                  unsigned msg_type,
352                  unsigned simd_mode,
353                  unsigned return_format)
354 {
355    const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
356                           SET_BITS(sampler, 11, 8));
357 
358    /* From GFX20 Bspec: Shared Functions - Message Descriptor -
359     * Sampling Engine:
360     *
361     *    Message Type[5]  31  This bit represents the upper bit of message type
362     *                         6-bit encoding (c.f. [16:12]). This bit is set
363     *                         for messages with programmable offsets.
364     */
365    if (devinfo->ver >= 20)
366       return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
367              SET_BITS(simd_mode & 0x3, 18, 17) |
368              SET_BITS(simd_mode >> 2, 29, 29) |
369              SET_BITS(return_format, 30, 30) |
370              SET_BITS(msg_type >> 5, 31, 31);
371 
372    /* From the CHV Bspec: Shared Functions - Message Descriptor -
373     * Sampling Engine:
374     *
375     *   SIMD Mode[2]  29    This field is the upper bit of the 3-bit
376     *                       SIMD Mode field.
377     */
378    return desc | SET_BITS(msg_type, 16, 12) |
379           SET_BITS(simd_mode & 0x3, 18, 17) |
380           SET_BITS(simd_mode >> 2, 29, 29) |
381           SET_BITS(return_format, 30, 30);
382 }
383 
384 static inline unsigned
brw_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)385 brw_sampler_desc_binding_table_index(UNUSED
386                                      const struct intel_device_info *devinfo,
387                                      uint32_t desc)
388 {
389    return GET_BITS(desc, 7, 0);
390 }
391 
392 static inline unsigned
brw_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)393 brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
394                          uint32_t desc)
395 {
396    return GET_BITS(desc, 11, 8);
397 }
398 
399 static inline unsigned
brw_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)400 brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
401 {
402    if (devinfo->ver >= 20)
403       return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
404    else
405       return GET_BITS(desc, 16, 12);
406 }
407 
408 static inline unsigned
brw_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)409 brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
410                            uint32_t desc)
411 {
412    return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
413 }
414 
415 static inline unsigned
brw_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)416 brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
417                                uint32_t desc)
418 {
419    return GET_BITS(desc, 30, 30);
420 }
421 
422 /**
423  * Construct a message descriptor for the dataport
424  */
425 static inline uint32_t
brw_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)426 brw_dp_desc(const struct intel_device_info *devinfo,
427             unsigned binding_table_index,
428             unsigned msg_type,
429             unsigned msg_control)
430 {
431    return SET_BITS(binding_table_index, 7, 0) |
432           SET_BITS(msg_control, 13, 8) |
433           SET_BITS(msg_type, 18, 14);
434 }
435 
436 static inline unsigned
brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)437 brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
438                                 uint32_t desc)
439 {
440    return GET_BITS(desc, 7, 0);
441 }
442 
443 static inline unsigned
brw_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)444 brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
445 {
446    return GET_BITS(desc, 18, 14);
447 }
448 
449 static inline unsigned
brw_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)450 brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
451 {
452    return GET_BITS(desc, 13, 8);
453 }
454 
455 /**
456  * Construct a message descriptor immediate with the specified dataport read
457  * function controls.
458  */
459 static inline uint32_t
brw_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)460 brw_dp_read_desc(const struct intel_device_info *devinfo,
461                  unsigned binding_table_index,
462                  unsigned msg_control,
463                  unsigned msg_type,
464                  unsigned target_cache)
465 {
466    return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
467 }
468 
469 static inline unsigned
brw_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)470 brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
471                           uint32_t desc)
472 {
473    return brw_dp_desc_msg_type(devinfo, desc);
474 }
475 
476 static inline unsigned
brw_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)477 brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
478                              uint32_t desc)
479 {
480    return brw_dp_desc_msg_control(devinfo, desc);
481 }
482 
483 /**
484  * Construct a message descriptor immediate with the specified dataport write
485  * function controls.
486  */
487 static inline uint32_t
brw_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)488 brw_dp_write_desc(const struct intel_device_info *devinfo,
489                   unsigned binding_table_index,
490                   unsigned msg_control,
491                   unsigned msg_type,
492                   unsigned send_commit_msg)
493 {
494    assert(!send_commit_msg);
495    return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
496           SET_BITS(send_commit_msg, 17, 17);
497 }
498 
499 static inline unsigned
brw_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)500 brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
501                            uint32_t desc)
502 {
503    return brw_dp_desc_msg_type(devinfo, desc);
504 }
505 
506 static inline unsigned
brw_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)507 brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
508                               uint32_t desc)
509 {
510    return brw_dp_desc_msg_control(devinfo, desc);
511 }
512 
513 /**
514  * Construct a message descriptor immediate with the specified dataport
515  * surface function controls.
516  */
517 static inline uint32_t
brw_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)518 brw_dp_surface_desc(const struct intel_device_info *devinfo,
519                     unsigned msg_type,
520                     unsigned msg_control)
521 {
522    /* We'll OR in the binding table index later */
523    return brw_dp_desc(devinfo, 0, msg_type, msg_control);
524 }
525 
526 static inline uint32_t
brw_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)527 brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
528                            unsigned exec_size, /**< 0 for SIMD4x2 */
529                            unsigned atomic_op,
530                            bool response_expected)
531 {
532    assert(exec_size <= 8 || exec_size == 16);
533 
534    unsigned msg_type;
535    if (exec_size > 0) {
536       msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
537    } else {
538       msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
539    }
540 
541    const unsigned msg_control =
542       SET_BITS(atomic_op, 3, 0) |
543       SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
544       SET_BITS(response_expected, 5, 5);
545 
546    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
547 }
548 
549 static inline uint32_t
brw_dp_untyped_atomic_float_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)550 brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
551                                  unsigned exec_size,
552                                  unsigned atomic_op,
553                                  bool response_expected)
554 {
555    assert(exec_size <= 8 || exec_size == 16);
556 
557    assert(exec_size > 0);
558    const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
559 
560    const unsigned msg_control =
561       SET_BITS(atomic_op, 1, 0) |
562       SET_BITS(exec_size <= 8, 4, 4) |
563       SET_BITS(response_expected, 5, 5);
564 
565    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
566 }
567 
568 static inline unsigned
brw_mdc_cmask(unsigned num_channels)569 brw_mdc_cmask(unsigned num_channels)
570 {
571    /* See also MDC_CMASK in the SKL PRM Vol 2d. */
572    return 0xf & (0xf << num_channels);
573 }
574 
575 static inline uint32_t
brw_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)576 brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
577                                unsigned exec_size, /**< 0 for SIMD4x2 */
578                                unsigned num_channels,
579                                bool write)
580 {
581    assert(exec_size <= 8 || exec_size == 16);
582 
583    const unsigned msg_type =
584       write ? HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE :
585               HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
586 
587    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
588    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
589                               exec_size <= 8 ? 2 : 1;
590 
591    const unsigned msg_control =
592       SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
593       SET_BITS(simd_mode, 5, 4);
594 
595    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
596 }
597 
598 static inline unsigned
brw_mdc_ds(unsigned bit_size)599 brw_mdc_ds(unsigned bit_size)
600 {
601    switch (bit_size) {
602    case 8:
603       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
604    case 16:
605       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
606    case 32:
607       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
608    default:
609       unreachable("Unsupported bit_size for byte scattered messages");
610    }
611 }
612 
613 static inline uint32_t
brw_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)614 brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
615                               unsigned exec_size,
616                               unsigned bit_size,
617                               bool write)
618 {
619    assert(exec_size <= 8 || exec_size == 16);
620 
621    const unsigned msg_type =
622       write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
623               HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
624 
625    assert(exec_size > 0);
626    const unsigned msg_control =
627       SET_BITS(exec_size == 16, 0, 0) |
628       SET_BITS(brw_mdc_ds(bit_size), 3, 2);
629 
630    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
631 }
632 
633 static inline uint32_t
brw_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)634 brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
635                                unsigned exec_size,
636                                bool write)
637 {
638    assert(exec_size == 8 || exec_size == 16);
639 
640    const unsigned msg_type =
641       write ? GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE :
642               GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
643 
644    const unsigned msg_control =
645       SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
646       SET_BITS(exec_size == 16, 0, 0);
647 
648    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
649 }
650 
651 static inline uint32_t
brw_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)652 brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
653                            bool align_16B,
654                            unsigned num_dwords,
655                            bool write)
656 {
657    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
658    assert(!write || align_16B);
659 
660    const unsigned msg_type =
661       write ?     GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
662       align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
663                   GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
664 
665    const unsigned msg_control =
666       SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
667 
668    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
669 }
670 
671 static inline uint32_t
brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)672 brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
673                                    unsigned exec_size, /**< 0 for SIMD4x2 */
674                                    unsigned num_channels,
675                                    bool write)
676 {
677    assert(exec_size <= 8 || exec_size == 16);
678 
679    unsigned msg_type =
680       write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
681               GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
682 
683    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
684    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
685                               exec_size <= 8 ? 2 : 1;
686 
687    const unsigned msg_control =
688       SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
689       SET_BITS(simd_mode, 5, 4);
690 
691    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
692                       msg_type, msg_control);
693 }
694 
695 static inline uint32_t
brw_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)696 brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
697                                bool align_16B,
698                                unsigned num_dwords,
699                                bool write)
700 {
701    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
702    assert(!write || align_16B);
703 
704    unsigned msg_type =
705       write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
706               GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
707 
708    unsigned msg_control =
709       SET_BITS(!align_16B, 4, 3) |
710       SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
711 
712    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
713                       msg_type, msg_control);
714 }
715 
716 /**
717  * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
718  * Skylake PRM).
719  */
720 static inline uint32_t
brw_mdc_a64_ds(unsigned elems)721 brw_mdc_a64_ds(unsigned elems)
722 {
723    switch (elems) {
724    case 1:  return 0;
725    case 2:  return 1;
726    case 4:  return 2;
727    case 8:  return 3;
728    default:
729       unreachable("Unsupported elmeent count for A64 scattered message");
730    }
731 }
732 
733 static inline uint32_t
brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)734 brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
735                                   unsigned exec_size, /**< 0 for SIMD4x2 */
736                                   unsigned bit_size,
737                                   bool write)
738 {
739    assert(exec_size <= 8 || exec_size == 16);
740 
741    unsigned msg_type =
742       write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
743               GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
744 
745    const unsigned msg_control =
746       SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
747       SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
748       SET_BITS(exec_size == 16, 4, 4);
749 
750    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
751                       msg_type, msg_control);
752 }
753 
754 static inline uint32_t
brw_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)755 brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
756                                ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
757                                unsigned bit_size,
758                                unsigned atomic_op,
759                                bool response_expected)
760 {
761    assert(exec_size == 8);
762    assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
763    assert(devinfo->ver >= 12 || bit_size >= 32);
764 
765    const unsigned msg_type = bit_size == 16 ?
766       GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
767       GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
768 
769    const unsigned msg_control =
770       SET_BITS(atomic_op, 3, 0) |
771       SET_BITS(bit_size == 64, 4, 4) |
772       SET_BITS(response_expected, 5, 5);
773 
774    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
775                       msg_type, msg_control);
776 }
777 
778 static inline uint32_t
brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)779 brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
780                                      ASSERTED unsigned exec_size,
781                                      unsigned bit_size,
782                                      unsigned atomic_op,
783                                      bool response_expected)
784 {
785    assert(exec_size == 8);
786    assert(bit_size == 16 || bit_size == 32);
787    assert(devinfo->ver >= 12 || bit_size == 32);
788 
789    assert(exec_size > 0);
790    const unsigned msg_type = bit_size == 32 ?
791       GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
792       GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
793 
794    const unsigned msg_control =
795       SET_BITS(atomic_op, 1, 0) |
796       SET_BITS(response_expected, 5, 5);
797 
798    return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
799                       msg_type, msg_control);
800 }
801 
802 static inline uint32_t
brw_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)803 brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
804                          unsigned exec_size,
805                          unsigned exec_group,
806                          unsigned atomic_op,
807                          bool response_expected)
808 {
809    assert(exec_size > 0 || exec_group == 0);
810    assert(exec_group % 8 == 0);
811 
812    const unsigned msg_type =
813       exec_size == 0 ? HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2 :
814                        HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
815 
816    const bool high_sample_mask = (exec_group / 8) % 2 == 1;
817 
818    const unsigned msg_control =
819       SET_BITS(atomic_op, 3, 0) |
820       SET_BITS(high_sample_mask, 4, 4) |
821       SET_BITS(response_expected, 5, 5);
822 
823    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
824 }
825 
826 static inline uint32_t
brw_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)827 brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
828                              unsigned exec_size,
829                              unsigned exec_group,
830                              unsigned num_channels,
831                              bool write)
832 {
833    assert(exec_size > 0 || exec_group == 0);
834    assert(exec_group % 8 == 0);
835 
836    /* Typed surface reads and writes don't support SIMD16 */
837    assert(exec_size <= 8);
838 
839    const unsigned msg_type =
840       write ? HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE :
841               HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
842 
843    /* See also MDC_SG3 in the SKL PRM Vol 2d. */
844    const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
845                                1 + ((exec_group / 8) % 2);
846 
847    const unsigned msg_control =
848       SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
849       SET_BITS(slot_group, 5, 4);
850 
851    return brw_dp_surface_desc(devinfo, msg_type, msg_control);
852 }
853 
854 static inline uint32_t
brw_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)855 brw_fb_desc(const struct intel_device_info *devinfo,
856             unsigned binding_table_index,
857             unsigned msg_type,
858             unsigned msg_control)
859 {
860    return SET_BITS(binding_table_index, 7, 0) |
861           SET_BITS(msg_control, 13, 8) |
862           SET_BITS(msg_type, 17, 14);
863 }
864 
865 static inline unsigned
brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)866 brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
867                                 uint32_t desc)
868 {
869    return GET_BITS(desc, 7, 0);
870 }
871 
872 static inline uint32_t
brw_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)873 brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
874 {
875    return GET_BITS(desc, 13, 8);
876 }
877 
878 static inline unsigned
brw_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)879 brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
880 {
881    return GET_BITS(desc, 17, 14);
882 }
883 
884 static inline uint32_t
brw_fb_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned exec_size,bool per_sample)885 brw_fb_read_desc(const struct intel_device_info *devinfo,
886                  unsigned binding_table_index,
887                  unsigned msg_control,
888                  unsigned exec_size,
889                  bool per_sample)
890 {
891    assert(exec_size == 8 || exec_size == 16);
892 
893    return brw_fb_desc(devinfo, binding_table_index,
894                       GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
895           SET_BITS(per_sample, 13, 13) |
896           SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
897 }
898 
899 static inline uint32_t
brw_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)900 brw_fb_write_desc(const struct intel_device_info *devinfo,
901                   unsigned binding_table_index,
902                   unsigned msg_control,
903                   bool last_render_target,
904                   bool coarse_write)
905 {
906    const unsigned msg_type = GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
907 
908    assert(devinfo->ver >= 10 || !coarse_write);
909 
910    return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
911           SET_BITS(last_render_target, 12, 12) |
912           SET_BITS(coarse_write, 18, 18);
913 }
914 
915 static inline bool
brw_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)916 brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
917                                      uint32_t desc)
918 {
919    return GET_BITS(desc, 12, 12);
920 }
921 
922 static inline bool
brw_fb_write_desc_coarse_write(const struct intel_device_info * devinfo,uint32_t desc)923 brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
924                                uint32_t desc)
925 {
926    assert(devinfo->ver >= 10);
927    return GET_BITS(desc, 18, 18);
928 }
929 
930 static inline bool
lsc_opcode_has_cmask(enum lsc_opcode opcode)931 lsc_opcode_has_cmask(enum lsc_opcode opcode)
932 {
933    return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
934 }
935 
936 static inline bool
lsc_opcode_has_transpose(enum lsc_opcode opcode)937 lsc_opcode_has_transpose(enum lsc_opcode opcode)
938 {
939    return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
940 }
941 
942 static inline bool
lsc_opcode_is_store(enum lsc_opcode opcode)943 lsc_opcode_is_store(enum lsc_opcode opcode)
944 {
945    return opcode == LSC_OP_STORE ||
946           opcode == LSC_OP_STORE_CMASK;
947 }
948 
949 static inline bool
lsc_opcode_is_atomic(enum lsc_opcode opcode)950 lsc_opcode_is_atomic(enum lsc_opcode opcode)
951 {
952    switch (opcode) {
953    case LSC_OP_ATOMIC_INC:
954    case LSC_OP_ATOMIC_DEC:
955    case LSC_OP_ATOMIC_LOAD:
956    case LSC_OP_ATOMIC_STORE:
957    case LSC_OP_ATOMIC_ADD:
958    case LSC_OP_ATOMIC_SUB:
959    case LSC_OP_ATOMIC_MIN:
960    case LSC_OP_ATOMIC_MAX:
961    case LSC_OP_ATOMIC_UMIN:
962    case LSC_OP_ATOMIC_UMAX:
963    case LSC_OP_ATOMIC_CMPXCHG:
964    case LSC_OP_ATOMIC_FADD:
965    case LSC_OP_ATOMIC_FSUB:
966    case LSC_OP_ATOMIC_FMIN:
967    case LSC_OP_ATOMIC_FMAX:
968    case LSC_OP_ATOMIC_FCMPXCHG:
969    case LSC_OP_ATOMIC_AND:
970    case LSC_OP_ATOMIC_OR:
971    case LSC_OP_ATOMIC_XOR:
972       return true;
973 
974    default:
975       return false;
976    }
977 }
978 
979 static inline bool
lsc_opcode_is_atomic_float(enum lsc_opcode opcode)980 lsc_opcode_is_atomic_float(enum lsc_opcode opcode)
981 {
982    switch (opcode) {
983    case LSC_OP_ATOMIC_FADD:
984    case LSC_OP_ATOMIC_FSUB:
985    case LSC_OP_ATOMIC_FMIN:
986    case LSC_OP_ATOMIC_FMAX:
987    case LSC_OP_ATOMIC_FCMPXCHG:
988       return true;
989 
990    default:
991       return false;
992    }
993 }
994 
995 static inline unsigned
lsc_op_num_data_values(unsigned _op)996 lsc_op_num_data_values(unsigned _op)
997 {
998    enum lsc_opcode op = (enum lsc_opcode) _op;
999 
1000    switch (op) {
1001    case LSC_OP_ATOMIC_CMPXCHG:
1002    case LSC_OP_ATOMIC_FCMPXCHG:
1003       return 2;
1004    case LSC_OP_ATOMIC_INC:
1005    case LSC_OP_ATOMIC_DEC:
1006    case LSC_OP_LOAD:
1007    case LSC_OP_LOAD_CMASK:
1008    case LSC_OP_FENCE:
1009       /* XXX: actually check docs */
1010       return 0;
1011    default:
1012       return 1;
1013    }
1014 }
1015 
1016 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1017 lsc_op_to_legacy_atomic(unsigned _op)
1018 {
1019    enum lsc_opcode op = (enum lsc_opcode) _op;
1020 
1021    switch (op) {
1022    case LSC_OP_ATOMIC_INC:
1023       return BRW_AOP_INC;
1024    case LSC_OP_ATOMIC_DEC:
1025       return BRW_AOP_DEC;
1026    case LSC_OP_ATOMIC_STORE:
1027       return BRW_AOP_MOV;
1028    case LSC_OP_ATOMIC_ADD:
1029       return BRW_AOP_ADD;
1030    case LSC_OP_ATOMIC_SUB:
1031       return BRW_AOP_SUB;
1032    case LSC_OP_ATOMIC_MIN:
1033       return BRW_AOP_IMIN;
1034    case LSC_OP_ATOMIC_MAX:
1035       return BRW_AOP_IMAX;
1036    case LSC_OP_ATOMIC_UMIN:
1037       return BRW_AOP_UMIN;
1038    case LSC_OP_ATOMIC_UMAX:
1039       return BRW_AOP_UMAX;
1040    case LSC_OP_ATOMIC_CMPXCHG:
1041       return BRW_AOP_CMPWR;
1042    case LSC_OP_ATOMIC_FADD:
1043       return BRW_AOP_FADD;
1044    case LSC_OP_ATOMIC_FMIN:
1045       return BRW_AOP_FMIN;
1046    case LSC_OP_ATOMIC_FMAX:
1047       return BRW_AOP_FMAX;
1048    case LSC_OP_ATOMIC_FCMPXCHG:
1049       return BRW_AOP_FCMPWR;
1050    case LSC_OP_ATOMIC_AND:
1051       return BRW_AOP_AND;
1052    case LSC_OP_ATOMIC_OR:
1053       return BRW_AOP_OR;
1054    case LSC_OP_ATOMIC_XOR:
1055       return BRW_AOP_XOR;
1056    /* No LSC op maps to BRW_AOP_PREDEC */
1057    case LSC_OP_ATOMIC_LOAD:
1058    case LSC_OP_ATOMIC_FSUB:
1059       unreachable("no corresponding legacy atomic operation");
1060    case LSC_OP_LOAD:
1061    case LSC_OP_LOAD_CMASK:
1062    case LSC_OP_STORE:
1063    case LSC_OP_STORE_CMASK:
1064    case LSC_OP_FENCE:
1065       unreachable("not an atomic op");
1066    }
1067 
1068    unreachable("invalid LSC op");
1069 }
1070 
1071 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1072 lsc_data_size_bytes(enum lsc_data_size data_size)
1073 {
1074    switch (data_size) {
1075    case LSC_DATA_SIZE_D8:
1076       return 1;
1077    case LSC_DATA_SIZE_D16:
1078       return 2;
1079    case LSC_DATA_SIZE_D32:
1080    case LSC_DATA_SIZE_D8U32:
1081    case LSC_DATA_SIZE_D16U32:
1082    case LSC_DATA_SIZE_D16BF32:
1083       return 4;
1084    case LSC_DATA_SIZE_D64:
1085       return 8;
1086    default:
1087       unreachable("Unsupported data payload size.");
1088    }
1089 }
1090 
1091 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1092 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1093 {
1094    switch (addr_size) {
1095    case LSC_ADDR_SIZE_A16: return 2;
1096    case LSC_ADDR_SIZE_A32: return 4;
1097    case LSC_ADDR_SIZE_A64: return 8;
1098    default:
1099       unreachable("Unsupported address size.");
1100    }
1101 }
1102 
1103 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1104 lsc_vector_length(enum lsc_vect_size vect_size)
1105 {
1106    switch (vect_size) {
1107    case LSC_VECT_SIZE_V1: return 1;
1108    case LSC_VECT_SIZE_V2: return 2;
1109    case LSC_VECT_SIZE_V3: return 3;
1110    case LSC_VECT_SIZE_V4: return 4;
1111    case LSC_VECT_SIZE_V8: return 8;
1112    case LSC_VECT_SIZE_V16: return 16;
1113    case LSC_VECT_SIZE_V32: return 32;
1114    case LSC_VECT_SIZE_V64: return 64;
1115    default:
1116       unreachable("Unsupported size of vector");
1117    }
1118 }
1119 
1120 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1121 lsc_vect_size(unsigned vect_size)
1122 {
1123    switch(vect_size) {
1124    case 1:  return LSC_VECT_SIZE_V1;
1125    case 2:  return LSC_VECT_SIZE_V2;
1126    case 3:  return LSC_VECT_SIZE_V3;
1127    case 4:  return LSC_VECT_SIZE_V4;
1128    case 8:  return LSC_VECT_SIZE_V8;
1129    case 16: return LSC_VECT_SIZE_V16;
1130    case 32: return LSC_VECT_SIZE_V32;
1131    case 64: return LSC_VECT_SIZE_V64;
1132    default:
1133       unreachable("Unsupported vector size for dataport");
1134    }
1135 }
1136 
1137 static inline uint32_t
lsc_msg_desc(const struct intel_device_info * devinfo,enum lsc_opcode opcode,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,enum lsc_data_size data_sz,unsigned num_channels_or_cmask,bool transpose,unsigned cache_ctrl)1138 lsc_msg_desc(const struct intel_device_info *devinfo,
1139              enum lsc_opcode opcode,
1140              enum lsc_addr_surface_type addr_type,
1141              enum lsc_addr_size addr_sz,
1142              enum lsc_data_size data_sz, unsigned num_channels_or_cmask,
1143              bool transpose, unsigned cache_ctrl)
1144 {
1145    assert(devinfo->has_lsc);
1146    assert(!transpose || lsc_opcode_has_transpose(opcode));
1147 
1148    unsigned msg_desc =
1149       SET_BITS(opcode, 5, 0) |
1150       SET_BITS(addr_sz, 8, 7) |
1151       SET_BITS(data_sz, 11, 9) |
1152       SET_BITS(transpose, 15, 15) |
1153       (devinfo->ver >= 20 ? SET_BITS(cache_ctrl, 19, 16) :
1154                             SET_BITS(cache_ctrl, 19, 17)) |
1155       SET_BITS(addr_type, 30, 29);
1156 
1157    if (lsc_opcode_has_cmask(opcode))
1158       msg_desc |= SET_BITS(num_channels_or_cmask, 15, 12);
1159    else
1160       msg_desc |= SET_BITS(lsc_vect_size(num_channels_or_cmask), 14, 12);
1161 
1162    return msg_desc;
1163 }
1164 
1165 static inline enum lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1166 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1167                     uint32_t desc)
1168 {
1169    assert(devinfo->has_lsc);
1170    return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1171 }
1172 
1173 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1174 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1175                        uint32_t desc)
1176 {
1177    assert(devinfo->has_lsc);
1178    return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1179 }
1180 
1181 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1182 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1183                        uint32_t desc)
1184 {
1185    assert(devinfo->has_lsc);
1186    return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1187 }
1188 
1189 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1190 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1191                        uint32_t desc)
1192 {
1193    assert(devinfo->has_lsc);
1194    assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1195    return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1196 }
1197 
1198 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1199 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1200                    uint32_t desc)
1201 {
1202    assert(devinfo->has_lsc);
1203    assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1204    return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1205 }
1206 
1207 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1208 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1209                        uint32_t desc)
1210 {
1211    assert(devinfo->has_lsc);
1212    return GET_BITS(desc, 15, 15);
1213 }
1214 
1215 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1216 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1217                         uint32_t desc)
1218 {
1219    assert(devinfo->has_lsc);
1220    return devinfo->ver >= 20 ? GET_BITS(desc, 19, 16) : GET_BITS(desc, 19, 17);
1221 }
1222 
1223 static inline unsigned
lsc_msg_dest_len(const struct intel_device_info * devinfo,enum lsc_data_size data_sz,unsigned n)1224 lsc_msg_dest_len(const struct intel_device_info *devinfo,
1225                  enum lsc_data_size data_sz, unsigned n)
1226 {
1227    return DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * n,
1228                        reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1229 }
1230 
1231 static inline unsigned
lsc_msg_addr_len(const struct intel_device_info * devinfo,enum lsc_addr_size addr_sz,unsigned n)1232 lsc_msg_addr_len(const struct intel_device_info *devinfo,
1233                  enum lsc_addr_size addr_sz, unsigned n)
1234 {
1235    return DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * n,
1236                        reg_unit(devinfo) * REG_SIZE) * reg_unit(devinfo);
1237 }
1238 
1239 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1240 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1241                        uint32_t desc)
1242 {
1243    assert(devinfo->has_lsc);
1244    return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1245 }
1246 
1247 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1248 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1249                    enum lsc_fence_scope scope,
1250                    enum lsc_flush_type flush_type,
1251                    bool route_to_lsc)
1252 {
1253    assert(devinfo->has_lsc);
1254 
1255 #if INTEL_NEEDS_WA_22017182272
1256    assert(flush_type != LSC_FLUSH_TYPE_DISCARD);
1257 #endif
1258 
1259    return SET_BITS(LSC_OP_FENCE, 5, 0) |
1260           SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1261           SET_BITS(scope, 11, 9) |
1262           SET_BITS(flush_type, 14, 12) |
1263           SET_BITS(route_to_lsc, 18, 18) |
1264           SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1265 }
1266 
1267 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1268 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1269                          uint32_t desc)
1270 {
1271    assert(devinfo->has_lsc);
1272    return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1273 }
1274 
1275 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1276 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1277                               uint32_t desc)
1278 {
1279    assert(devinfo->has_lsc);
1280    return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1281 }
1282 
1283 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1284 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1285                                   uint32_t desc)
1286 {
1287    assert(devinfo->has_lsc);
1288    return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1289 }
1290 
1291 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1292 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1293 {
1294    assert(devinfo->has_lsc);
1295    return SET_BITS(bti, 31, 24) |
1296           SET_BITS(0, 23, 12);  /* base offset */
1297 }
1298 
1299 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1300 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1301                             uint32_t ex_desc)
1302 {
1303    assert(devinfo->has_lsc);
1304    return GET_BITS(ex_desc, 23, 12);
1305 }
1306 
1307 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1308 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1309                       uint32_t ex_desc)
1310 {
1311    assert(devinfo->has_lsc);
1312    return GET_BITS(ex_desc, 31, 24);
1313 }
1314 
1315 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1316 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1317                              uint32_t ex_desc)
1318 {
1319    assert(devinfo->has_lsc);
1320    return GET_BITS(ex_desc, 31, 12);
1321 }
1322 
1323 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1324 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1325                 unsigned surface_state_index)
1326 {
1327    assert(devinfo->has_lsc);
1328    return SET_BITS(surface_state_index, 31, 6);
1329 }
1330 
1331 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1332 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1333                       uint32_t ex_desc)
1334 {
1335    assert(devinfo->has_lsc);
1336    return GET_BITS(ex_desc, 31, 6);
1337 }
1338 
1339 static inline uint32_t
brw_mdc_sm2(unsigned exec_size)1340 brw_mdc_sm2(unsigned exec_size)
1341 {
1342    assert(exec_size == 8 || exec_size == 16);
1343    return exec_size > 8;
1344 }
1345 
1346 static inline uint32_t
brw_mdc_sm2_exec_size(uint32_t sm2)1347 brw_mdc_sm2_exec_size(uint32_t sm2)
1348 {
1349    assert(sm2 <= 1);
1350    return 8 << sm2;
1351 }
1352 
1353 static inline uint32_t
brw_btd_spawn_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size,unsigned msg_type)1354 brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1355                    unsigned exec_size, unsigned msg_type)
1356 {
1357    assert(devinfo->has_ray_tracing);
1358    assert(devinfo->ver < 20 || exec_size == 16);
1359 
1360    return SET_BITS(0, 19, 19) | /* No header */
1361           SET_BITS(msg_type, 17, 14) |
1362           SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1363 }
1364 
1365 static inline uint32_t
brw_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1366 brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1367                        uint32_t desc)
1368 {
1369    return GET_BITS(desc, 17, 14);
1370 }
1371 
1372 static inline uint32_t
brw_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1373 brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1374                         uint32_t desc)
1375 {
1376    return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1377 }
1378 
1379 static inline uint32_t
brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size)1380 brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1381                       unsigned exec_size)
1382 {
1383    assert(devinfo->has_ray_tracing);
1384    assert(devinfo->ver < 20 || exec_size == 16);
1385 
1386    return SET_BITS(0, 19, 19) | /* No header */
1387           SET_BITS(0, 17, 14) | /* Message type */
1388           SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1389 }
1390 
1391 static inline uint32_t
brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1392 brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1393                                 uint32_t desc)
1394 {
1395    return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1396 }
1397 
1398 /**
1399  * Construct a message descriptor immediate with the specified pixel
1400  * interpolator function controls.
1401  */
1402 static inline uint32_t
brw_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1403 brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1404                       unsigned msg_type,
1405                       bool noperspective,
1406                       bool coarse_pixel_rate,
1407                       unsigned exec_size,
1408                       unsigned group)
1409 {
1410    assert(exec_size == 8 || exec_size == 16);
1411    const bool simd_mode = exec_size == 16;
1412    const bool slot_group = group >= 16;
1413 
1414    assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1415    return (SET_BITS(slot_group, 11, 11) |
1416            SET_BITS(msg_type, 13, 12) |
1417            SET_BITS(!!noperspective, 14, 14) |
1418            SET_BITS(coarse_pixel_rate, 15, 15) |
1419            SET_BITS(simd_mode, 16, 16));
1420 }
1421 
1422 /**
1423  * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1424  * desc.  If \p desc is not an immediate it will be transparently loaded to an
1425  * address register using an OR instruction.
1426  */
1427 void
1428 brw_send_indirect_message(struct brw_codegen *p,
1429                           unsigned sfid,
1430                           struct brw_reg dst,
1431                           struct brw_reg payload,
1432                           struct brw_reg desc,
1433                           bool eot);
1434 
1435 void
1436 brw_send_indirect_split_message(struct brw_codegen *p,
1437                                 unsigned sfid,
1438                                 struct brw_reg dst,
1439                                 struct brw_reg payload0,
1440                                 struct brw_reg payload1,
1441                                 struct brw_reg desc,
1442                                 struct brw_reg ex_desc,
1443                                 unsigned ex_mlen,
1444                                 bool ex_bso,
1445                                 bool eot);
1446 
1447 void gfx6_math(struct brw_codegen *p,
1448 	       struct brw_reg dest,
1449 	       unsigned function,
1450 	       struct brw_reg src0,
1451 	       struct brw_reg src1);
1452 
1453 /**
1454  * Return the generation-specific jump distance scaling factor.
1455  *
1456  * Given the number of instructions to jump, we need to scale by
1457  * some number to obtain the actual jump distance to program in an
1458  * instruction.
1459  */
1460 static inline unsigned
brw_jump_scale(const struct intel_device_info * devinfo)1461 brw_jump_scale(const struct intel_device_info *devinfo)
1462 {
1463    /* Broadwell measures jump targets in bytes. */
1464    return 16;
1465 }
1466 
1467 void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1468 
1469 /* If/else/endif.  Works by manipulating the execution flags on each
1470  * channel.
1471  */
1472 brw_eu_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1473 
1474 void brw_ELSE(struct brw_codegen *p);
1475 void brw_ENDIF(struct brw_codegen *p);
1476 
1477 /* DO/WHILE loops:
1478  */
1479 brw_eu_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1480 
1481 brw_eu_inst *brw_WHILE(struct brw_codegen *p);
1482 
1483 brw_eu_inst *brw_BREAK(struct brw_codegen *p);
1484 brw_eu_inst *brw_CONT(struct brw_codegen *p);
1485 brw_eu_inst *brw_HALT(struct brw_codegen *p);
1486 
1487 /* Forward jumps:
1488  */
1489 brw_eu_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1490                    unsigned predicate_control);
1491 
1492 void brw_NOP(struct brw_codegen *p);
1493 
1494 void brw_WAIT(struct brw_codegen *p);
1495 
1496 void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1497 
1498 /* Special case: there is never a destination, execution size will be
1499  * taken from src0:
1500  */
1501 void brw_CMP(struct brw_codegen *p,
1502 	     struct brw_reg dest,
1503 	     unsigned conditional,
1504 	     struct brw_reg src0,
1505 	     struct brw_reg src1);
1506 
1507 void brw_CMPN(struct brw_codegen *p,
1508               struct brw_reg dest,
1509               unsigned conditional,
1510               struct brw_reg src0,
1511               struct brw_reg src1);
1512 
1513 brw_eu_inst *brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
1514                    unsigned rcount, struct brw_reg dest, struct brw_reg src0,
1515                    struct brw_reg src1, struct brw_reg src2);
1516 
1517 void
1518 brw_memory_fence(struct brw_codegen *p,
1519                  struct brw_reg dst,
1520                  struct brw_reg src,
1521                  enum opcode send_op,
1522                  enum brw_message_target sfid,
1523                  uint32_t desc,
1524                  bool commit_enable,
1525                  unsigned bti);
1526 
1527 void
1528 brw_broadcast(struct brw_codegen *p,
1529               struct brw_reg dst,
1530               struct brw_reg src,
1531               struct brw_reg idx);
1532 
1533 void
1534 brw_float_controls_mode(struct brw_codegen *p,
1535                         unsigned mode, unsigned mask);
1536 
1537 void
1538 brw_update_reloc_imm(const struct brw_isa_info *isa,
1539                      brw_eu_inst *inst,
1540                      uint32_t value);
1541 
1542 void
1543 brw_MOV_reloc_imm(struct brw_codegen *p,
1544                   struct brw_reg dst,
1545                   enum brw_reg_type src_type,
1546                   uint32_t id, uint32_t base);
1547 
1548 unsigned
1549 brw_num_sources_from_inst(const struct brw_isa_info *isa,
1550                           const brw_eu_inst *inst);
1551 
1552 void brw_set_src1(struct brw_codegen *p, brw_eu_inst *insn, struct brw_reg reg);
1553 
1554 void brw_set_desc_ex(struct brw_codegen *p, brw_eu_inst *insn,
1555                      unsigned desc, unsigned ex_desc);
1556 
1557 static inline void
brw_set_desc(struct brw_codegen * p,brw_eu_inst * insn,unsigned desc)1558 brw_set_desc(struct brw_codegen *p, brw_eu_inst *insn, unsigned desc)
1559 {
1560    brw_set_desc_ex(p, insn, desc, 0);
1561 }
1562 
1563 void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1564 
1565 enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1566 enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1567 
1568 /* brw_eu_compact.c */
1569 void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1570                               struct disasm_info *disasm);
1571 void brw_uncompact_instruction(const struct brw_isa_info *isa,
1572                                brw_eu_inst *dst, brw_eu_compact_inst *src);
1573 bool brw_try_compact_instruction(const struct brw_isa_info *isa,
1574                                  brw_eu_compact_inst *dst, const brw_eu_inst *src);
1575 
1576 void brw_debug_compact_uncompact(const struct brw_isa_info *isa,
1577                                  brw_eu_inst *orig, brw_eu_inst *uncompacted);
1578 
1579 /* brw_eu_validate.c */
1580 bool brw_validate_instruction(const struct brw_isa_info *isa,
1581                               const brw_eu_inst *inst, int offset,
1582                               unsigned inst_size,
1583                               struct disasm_info *disasm);
1584 bool brw_validate_instructions(const struct brw_isa_info *isa,
1585                                const void *assembly, int start_offset, int end_offset,
1586                                struct disasm_info *disasm);
1587 
1588 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)1589 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1590 {
1591    brw_eu_inst *insn = (brw_eu_inst *)((char *)store + offset);
1592 
1593    if (brw_eu_inst_cmpt_control(devinfo, insn))
1594       return offset + 8;
1595    else
1596       return offset + 16;
1597 }
1598 
1599 /** Maximum SEND message length */
1600 #define BRW_MAX_MSG_LENGTH 15
1601 
1602 #ifdef __cplusplus
1603 }
1604 #endif
1605