• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <keithw@vmware.com>
30   */
31 
32 
33 #ifndef ELK_EU_H
34 #define ELK_EU_H
35 
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include "elk_inst.h"
39 #include "elk_compiler.h"
40 #include "elk_eu_defines.h"
41 #include "elk_isa_info.h"
42 #include "elk_reg.h"
43 
44 #include "util/bitset.h"
45 
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49 
50 struct elk_disasm_info;
51 
52 #define ELK_EU_MAX_INSN_STACK 5
53 
54 struct elk_insn_state {
55    /* One of ELK_EXECUTE_* */
56    unsigned exec_size:3;
57 
58    /* Group in units of channels */
59    unsigned group:5;
60 
61    /* Compression control on gfx4-5 */
62    bool compressed:1;
63 
64    /* One of ELK_MASK_* */
65    unsigned mask_control:1;
66 
67    /* Scheduling info for Gfx12+ */
68    struct tgl_swsb swsb;
69 
70    bool saturate:1;
71 
72    /* One of ELK_ALIGN_* */
73    unsigned access_mode:1;
74 
75    /* One of ELK_PREDICATE_* */
76    enum elk_predicate predicate:4;
77 
78    bool pred_inv:1;
79 
80    /* Flag subreg.  Bottom bit is subreg, top bit is reg */
81    unsigned flag_subreg:2;
82 
83    bool acc_wr_control:1;
84 };
85 
86 
87 /* A helper for accessing the last instruction emitted.  This makes it easy
88  * to set various bits on an instruction without having to create temporary
89  * variable and assign the emitted instruction to those.
90  */
91 #define elk_last_inst (&p->store[p->nr_insn - 1])
92 
93 struct elk_codegen {
94    elk_inst *store;
95    int store_size;
96    unsigned nr_insn;
97    unsigned int next_insn_offset;
98 
99    void *mem_ctx;
100 
101    /* Allow clients to push/pop instruction state:
102     */
103    struct elk_insn_state stack[ELK_EU_MAX_INSN_STACK];
104    struct elk_insn_state *current;
105 
106    /** Whether or not the user wants automatic exec sizes
107     *
108     * If true, codegen will try to automatically infer the exec size of an
109     * instruction from the width of the destination register.  If false, it
110     * will take whatever is set by elk_set_default_exec_size verbatim.
111     *
112     * This is set to true by default in elk_init_codegen.
113     */
114    bool automatic_exec_sizes;
115 
116    bool single_program_flow;
117    const struct elk_isa_info *isa;
118    const struct intel_device_info *devinfo;
119 
120    /* Control flow stacks:
121     * - if_stack contains IF and ELSE instructions which must be patched
122     *   (and popped) once the matching ENDIF instruction is encountered.
123     *
124     *   Just store the instruction pointer(an index).
125     */
126    int *if_stack;
127    int if_stack_depth;
128    int if_stack_array_size;
129 
130    /**
131     * loop_stack contains the instruction pointers of the starts of loops which
132     * must be patched (and popped) once the matching WHILE instruction is
133     * encountered.
134     */
135    int *loop_stack;
136    /**
137     * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
138     * blocks they were popping out of, to fix up the mask stack.  This tracks
139     * the IF/ENDIF nesting in each current nested loop level.
140     */
141    int *if_depth_in_loop;
142    int loop_stack_depth;
143    int loop_stack_array_size;
144 
145    struct elk_shader_reloc *relocs;
146    int num_relocs;
147    int reloc_array_size;
148 };
149 
150 struct elk_label {
151    int offset;
152    int number;
153    struct elk_label *next;
154 };
155 
156 void elk_pop_insn_state( struct elk_codegen *p );
157 void elk_push_insn_state( struct elk_codegen *p );
158 unsigned elk_get_default_exec_size(struct elk_codegen *p);
159 unsigned elk_get_default_group(struct elk_codegen *p);
160 unsigned elk_get_default_access_mode(struct elk_codegen *p);
161 struct tgl_swsb elk_get_default_swsb(struct elk_codegen *p);
162 void elk_set_default_exec_size(struct elk_codegen *p, unsigned value);
163 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value );
164 void elk_set_default_saturate( struct elk_codegen *p, bool enable );
165 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode );
166 void elk_inst_set_compression(const struct intel_device_info *devinfo,
167                               elk_inst *inst, bool on);
168 void elk_set_default_compression(struct elk_codegen *p, bool on);
169 void elk_inst_set_group(const struct intel_device_info *devinfo,
170                         elk_inst *inst, unsigned group);
171 void elk_set_default_group(struct elk_codegen *p, unsigned group);
172 void elk_set_default_compression_control(struct elk_codegen *p, enum elk_compression c);
173 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc);
174 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse);
175 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg);
176 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value);
177 void elk_set_default_swsb(struct elk_codegen *p, struct tgl_swsb value);
178 
179 void elk_init_codegen(const struct elk_isa_info *isa,
180                       struct elk_codegen *p, void *mem_ctx);
181 bool elk_has_jip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
182 bool elk_has_uip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
183 const struct elk_shader_reloc *elk_get_shader_relocs(struct elk_codegen *p,
184                                                      unsigned *num_relocs);
185 const unsigned *elk_get_program( struct elk_codegen *p, unsigned *sz );
186 
187 bool elk_should_dump_shader_bin(void);
188 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
189                          const char *identifier);
190 
191 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
192                                const char *identifier);
193 
194 void elk_realign(struct elk_codegen *p, unsigned alignment);
195 int elk_append_data(struct elk_codegen *p, void *data,
196                     unsigned size, unsigned alignment);
197 elk_inst *elk_next_insn(struct elk_codegen *p, unsigned opcode);
198 void elk_add_reloc(struct elk_codegen *p, uint32_t id,
199                    enum elk_shader_reloc_type type,
200                    uint32_t offset, uint32_t delta);
201 void elk_set_dest(struct elk_codegen *p, elk_inst *insn, struct elk_reg dest);
202 void elk_set_src0(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
203 
204 void elk_gfx6_resolve_implied_move(struct elk_codegen *p,
205 			       struct elk_reg *src,
206 			       unsigned msg_reg_nr);
207 
208 /* Helpers for regular instructions:
209  */
210 #define ALU1(OP)				\
211 elk_inst *elk_##OP(struct elk_codegen *p,	\
212 	      struct elk_reg dest,		\
213 	      struct elk_reg src0);
214 
215 #define ALU2(OP)				\
216 elk_inst *elk_##OP(struct elk_codegen *p,	\
217 	      struct elk_reg dest,		\
218 	      struct elk_reg src0,		\
219 	      struct elk_reg src1);
220 
221 #define ALU3(OP)				\
222 elk_inst *elk_##OP(struct elk_codegen *p,	\
223 	      struct elk_reg dest,		\
224 	      struct elk_reg src0,		\
225 	      struct elk_reg src1,		\
226 	      struct elk_reg src2);
227 
228 ALU1(MOV)
ALU2(SEL)229 ALU2(SEL)
230 ALU1(NOT)
231 ALU2(AND)
232 ALU2(OR)
233 ALU2(XOR)
234 ALU2(SHR)
235 ALU2(SHL)
236 ALU1(DIM)
237 ALU2(ASR)
238 ALU2(ROL)
239 ALU2(ROR)
240 ALU3(CSEL)
241 ALU1(F32TO16)
242 ALU1(F16TO32)
243 ALU2(ADD)
244 ALU3(ADD3)
245 ALU2(AVG)
246 ALU2(MUL)
247 ALU1(FRC)
248 ALU1(RNDD)
249 ALU1(RNDE)
250 ALU1(RNDU)
251 ALU1(RNDZ)
252 ALU2(MAC)
253 ALU2(MACH)
254 ALU1(LZD)
255 ALU2(DP4)
256 ALU2(DPH)
257 ALU2(DP3)
258 ALU2(DP2)
259 ALU3(DP4A)
260 ALU2(LINE)
261 ALU2(PLN)
262 ALU3(MAD)
263 ALU3(LRP)
264 ALU1(BFREV)
265 ALU3(BFE)
266 ALU2(BFI1)
267 ALU3(BFI2)
268 ALU1(FBH)
269 ALU1(FBL)
270 ALU1(CBIT)
271 ALU2(ADDC)
272 ALU2(SUBB)
273 
274 #undef ALU1
275 #undef ALU2
276 #undef ALU3
277 
278 static inline unsigned
279 reg_unit(const struct intel_device_info *devinfo)
280 {
281    return devinfo->ver >= 20 ? 2 : 1;
282 }
283 
284 
285 /* Helpers for SEND instruction:
286  */
287 
288 /**
289  * Construct a message descriptor immediate with the specified common
290  * descriptor controls.
291  */
292 static inline uint32_t
elk_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)293 elk_message_desc(const struct intel_device_info *devinfo,
294                  unsigned msg_length,
295                  unsigned response_length,
296                  bool header_present)
297 {
298    if (devinfo->ver >= 5) {
299       assert(msg_length % reg_unit(devinfo) == 0);
300       assert(response_length % reg_unit(devinfo) == 0);
301       return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
302               SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
303               SET_BITS(header_present, 19, 19));
304    } else {
305       return (SET_BITS(msg_length, 23, 20) |
306               SET_BITS(response_length, 19, 16));
307    }
308 }
309 
310 static inline unsigned
elk_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)311 elk_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
312 {
313    if (devinfo->ver >= 5)
314       return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
315    else
316       return GET_BITS(desc, 23, 20);
317 }
318 
319 static inline unsigned
elk_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)320 elk_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
321 {
322    if (devinfo->ver >= 5)
323       return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
324    else
325       return GET_BITS(desc, 19, 16);
326 }
327 
328 static inline bool
elk_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)329 elk_message_desc_header_present(ASSERTED
330                                 const struct intel_device_info *devinfo,
331                                 uint32_t desc)
332 {
333    assert(devinfo->ver >= 5);
334    return GET_BITS(desc, 19, 19);
335 }
336 
337 static inline unsigned
elk_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)338 elk_message_ex_desc(const struct intel_device_info *devinfo,
339                     unsigned ex_msg_length)
340 {
341    assert(ex_msg_length % reg_unit(devinfo) == 0);
342    return SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
343 }
344 
345 static inline unsigned
elk_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)346 elk_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
347                             uint32_t ex_desc)
348 {
349    return GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
350 }
351 
352 static inline uint32_t
elk_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)353 elk_urb_desc(const struct intel_device_info *devinfo,
354              unsigned msg_type,
355              bool per_slot_offset_present,
356              bool channel_mask_present,
357              unsigned global_offset)
358 {
359    if (devinfo->ver >= 8) {
360       return (SET_BITS(per_slot_offset_present, 17, 17) |
361               SET_BITS(channel_mask_present, 15, 15) |
362               SET_BITS(global_offset, 14, 4) |
363               SET_BITS(msg_type, 3, 0));
364    } else if (devinfo->ver >= 7) {
365       assert(!channel_mask_present);
366       return (SET_BITS(per_slot_offset_present, 16, 16) |
367               SET_BITS(global_offset, 13, 3) |
368               SET_BITS(msg_type, 3, 0));
369    } else {
370       unreachable("unhandled URB write generation");
371    }
372 }
373 
374 static inline uint32_t
elk_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)375 elk_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
376                       uint32_t desc)
377 {
378    assert(devinfo->ver >= 7);
379    return GET_BITS(desc, 3, 0);
380 }
381 
382 static inline uint32_t
elk_urb_fence_desc(const struct intel_device_info * devinfo)383 elk_urb_fence_desc(const struct intel_device_info *devinfo)
384 {
385    assert(devinfo->has_lsc);
386    return elk_urb_desc(devinfo, GFX125_URB_OPCODE_FENCE, false, false, 0);
387 }
388 
389 /**
390  * Construct a message descriptor immediate with the specified sampler
391  * function controls.
392  */
393 static inline uint32_t
elk_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)394 elk_sampler_desc(const struct intel_device_info *devinfo,
395                  unsigned binding_table_index,
396                  unsigned sampler,
397                  unsigned msg_type,
398                  unsigned simd_mode,
399                  unsigned return_format)
400 {
401    const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
402                           SET_BITS(sampler, 11, 8));
403 
404    /* From GFX20 Bspec: Shared Functions - Message Descriptor -
405     * Sampling Engine:
406     *
407     *    Message Type[5]  31  This bit represents the upper bit of message type
408     *                         6-bit encoding (c.f. [16:12]). This bit is set
409     *                         for messages with programmable offsets.
410     */
411    if (devinfo->ver >= 20)
412       return desc | SET_BITS(msg_type & 0x1F, 16, 12) |
413              SET_BITS(simd_mode & 0x3, 18, 17) |
414              SET_BITS(simd_mode >> 2, 29, 29) |
415              SET_BITS(return_format, 30, 30) |
416              SET_BITS(msg_type >> 5, 31, 31);
417 
418    /* From the CHV Bspec: Shared Functions - Message Descriptor -
419     * Sampling Engine:
420     *
421     *   SIMD Mode[2]  29    This field is the upper bit of the 3-bit
422     *                       SIMD Mode field.
423     */
424    if (devinfo->ver >= 8)
425       return desc | SET_BITS(msg_type, 16, 12) |
426              SET_BITS(simd_mode & 0x3, 18, 17) |
427              SET_BITS(simd_mode >> 2, 29, 29) |
428              SET_BITS(return_format, 30, 30);
429    if (devinfo->ver >= 7)
430       return (desc | SET_BITS(msg_type, 16, 12) |
431               SET_BITS(simd_mode, 18, 17));
432    else if (devinfo->ver >= 5)
433       return (desc | SET_BITS(msg_type, 15, 12) |
434               SET_BITS(simd_mode, 17, 16));
435    else if (devinfo->verx10 >= 45)
436       return desc | SET_BITS(msg_type, 15, 12);
437    else
438       return (desc | SET_BITS(return_format, 13, 12) |
439               SET_BITS(msg_type, 15, 14));
440 }
441 
442 static inline unsigned
elk_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)443 elk_sampler_desc_binding_table_index(UNUSED
444                                      const struct intel_device_info *devinfo,
445                                      uint32_t desc)
446 {
447    return GET_BITS(desc, 7, 0);
448 }
449 
450 static inline unsigned
elk_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)451 elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
452                          uint32_t desc)
453 {
454    return GET_BITS(desc, 11, 8);
455 }
456 
457 static inline unsigned
elk_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)458 elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
459 {
460    if (devinfo->ver >= 20)
461       return GET_BITS(desc, 31, 31) << 5 | GET_BITS(desc, 16, 12);
462    else if (devinfo->ver >= 7)
463       return GET_BITS(desc, 16, 12);
464    else if (devinfo->verx10 >= 45)
465       return GET_BITS(desc, 15, 12);
466    else
467       return GET_BITS(desc, 15, 14);
468 }
469 
470 static inline unsigned
elk_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)471 elk_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
472                            uint32_t desc)
473 {
474    assert(devinfo->ver >= 5);
475    if (devinfo->ver >= 8)
476       return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
477    else if (devinfo->ver >= 7)
478       return GET_BITS(desc, 18, 17);
479    else
480       return GET_BITS(desc, 17, 16);
481 }
482 
483 static  inline unsigned
elk_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)484 elk_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
485                                uint32_t desc)
486 {
487    assert(devinfo->verx10 == 40 || devinfo->ver >= 8);
488    if (devinfo->ver >= 8)
489       return GET_BITS(desc, 30, 30);
490    else
491       return GET_BITS(desc, 13, 12);
492 }
493 
494 /**
495  * Construct a message descriptor for the dataport
496  */
497 static inline uint32_t
elk_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)498 elk_dp_desc(const struct intel_device_info *devinfo,
499             unsigned binding_table_index,
500             unsigned msg_type,
501             unsigned msg_control)
502 {
503    /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
504     * helpers instead.
505     */
506    assert(devinfo->ver >= 6);
507    const unsigned desc = SET_BITS(binding_table_index, 7, 0);
508    if (devinfo->ver >= 8) {
509       return (desc | SET_BITS(msg_control, 13, 8) |
510               SET_BITS(msg_type, 18, 14));
511    } else if (devinfo->ver >= 7) {
512       return (desc | SET_BITS(msg_control, 13, 8) |
513               SET_BITS(msg_type, 17, 14));
514    } else {
515       return (desc | SET_BITS(msg_control, 12, 8) |
516               SET_BITS(msg_type, 16, 13));
517    }
518 }
519 
520 static inline unsigned
elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)521 elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
522                                 uint32_t desc)
523 {
524    return GET_BITS(desc, 7, 0);
525 }
526 
527 static inline unsigned
elk_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)528 elk_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
529 {
530    assert(devinfo->ver >= 6);
531    if (devinfo->ver >= 8)
532       return GET_BITS(desc, 18, 14);
533    else if (devinfo->ver >= 7)
534       return GET_BITS(desc, 17, 14);
535    else
536       return GET_BITS(desc, 16, 13);
537 }
538 
539 static inline unsigned
elk_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)540 elk_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
541 {
542    assert(devinfo->ver >= 6);
543    if (devinfo->ver >= 7)
544       return GET_BITS(desc, 13, 8);
545    else
546       return GET_BITS(desc, 12, 8);
547 }
548 
549 /**
550  * Construct a message descriptor immediate with the specified dataport read
551  * function controls.
552  */
553 static inline uint32_t
elk_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)554 elk_dp_read_desc(const struct intel_device_info *devinfo,
555                  unsigned binding_table_index,
556                  unsigned msg_control,
557                  unsigned msg_type,
558                  unsigned target_cache)
559 {
560    if (devinfo->ver >= 6)
561       return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
562    else if (devinfo->verx10 >= 45)
563       return (SET_BITS(binding_table_index, 7, 0) |
564               SET_BITS(msg_control, 10, 8) |
565               SET_BITS(msg_type, 13, 11) |
566               SET_BITS(target_cache, 15, 14));
567    else
568       return (SET_BITS(binding_table_index, 7, 0) |
569               SET_BITS(msg_control, 11, 8) |
570               SET_BITS(msg_type, 13, 12) |
571               SET_BITS(target_cache, 15, 14));
572 }
573 
574 static inline unsigned
elk_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)575 elk_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
576                           uint32_t desc)
577 {
578    if (devinfo->ver >= 6)
579       return elk_dp_desc_msg_type(devinfo, desc);
580    else if (devinfo->verx10 >= 45)
581       return GET_BITS(desc, 13, 11);
582    else
583       return GET_BITS(desc, 13, 12);
584 }
585 
586 static inline unsigned
elk_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)587 elk_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
588                              uint32_t desc)
589 {
590    if (devinfo->ver >= 6)
591       return elk_dp_desc_msg_control(devinfo, desc);
592    else if (devinfo->verx10 >= 45)
593       return GET_BITS(desc, 10, 8);
594    else
595       return GET_BITS(desc, 11, 8);
596 }
597 
598 /**
599  * Construct a message descriptor immediate with the specified dataport write
600  * function controls.
601  */
602 static inline uint32_t
elk_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)603 elk_dp_write_desc(const struct intel_device_info *devinfo,
604                   unsigned binding_table_index,
605                   unsigned msg_control,
606                   unsigned msg_type,
607                   unsigned send_commit_msg)
608 {
609    assert(devinfo->ver <= 6 || !send_commit_msg);
610    if (devinfo->ver >= 6) {
611       return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
612              SET_BITS(send_commit_msg, 17, 17);
613    } else {
614       return (SET_BITS(binding_table_index, 7, 0) |
615               SET_BITS(msg_control, 11, 8) |
616               SET_BITS(msg_type, 14, 12) |
617               SET_BITS(send_commit_msg, 15, 15));
618    }
619 }
620 
621 static inline unsigned
elk_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)622 elk_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
623                            uint32_t desc)
624 {
625    if (devinfo->ver >= 6)
626       return elk_dp_desc_msg_type(devinfo, desc);
627    else
628       return GET_BITS(desc, 14, 12);
629 }
630 
631 static inline unsigned
elk_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)632 elk_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
633                               uint32_t desc)
634 {
635    if (devinfo->ver >= 6)
636       return elk_dp_desc_msg_control(devinfo, desc);
637    else
638       return GET_BITS(desc, 11, 8);
639 }
640 
641 static inline bool
elk_dp_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)642 elk_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
643                                uint32_t desc)
644 {
645    assert(devinfo->ver <= 6);
646    if (devinfo->ver >= 6)
647       return GET_BITS(desc, 17, 17);
648    else
649       return GET_BITS(desc, 15, 15);
650 }
651 
652 /**
653  * Construct a message descriptor immediate with the specified dataport
654  * surface function controls.
655  */
656 static inline uint32_t
elk_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)657 elk_dp_surface_desc(const struct intel_device_info *devinfo,
658                     unsigned msg_type,
659                     unsigned msg_control)
660 {
661    assert(devinfo->ver >= 7);
662    /* We'll OR in the binding table index later */
663    return elk_dp_desc(devinfo, 0, msg_type, msg_control);
664 }
665 
666 static inline uint32_t
elk_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)667 elk_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
668                            unsigned exec_size, /**< 0 for SIMD4x2 */
669                            unsigned atomic_op,
670                            bool response_expected)
671 {
672    assert(exec_size <= 8 || exec_size == 16);
673 
674    unsigned msg_type;
675    if (devinfo->verx10 >= 75) {
676       if (exec_size > 0) {
677          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
678       } else {
679          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
680       }
681    } else {
682       msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
683    }
684 
685    const unsigned msg_control =
686       SET_BITS(atomic_op, 3, 0) |
687       SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
688       SET_BITS(response_expected, 5, 5);
689 
690    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
691 }
692 
693 static inline uint32_t
elk_dp_untyped_atomic_float_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)694 elk_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
695                                  unsigned exec_size,
696                                  unsigned atomic_op,
697                                  bool response_expected)
698 {
699    assert(exec_size <= 8 || exec_size == 16);
700    assert(devinfo->ver >= 9);
701 
702    assert(exec_size > 0);
703    const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
704 
705    const unsigned msg_control =
706       SET_BITS(atomic_op, 1, 0) |
707       SET_BITS(exec_size <= 8, 4, 4) |
708       SET_BITS(response_expected, 5, 5);
709 
710    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
711 }
712 
713 static inline unsigned
elk_mdc_cmask(unsigned num_channels)714 elk_mdc_cmask(unsigned num_channels)
715 {
716    /* See also MDC_CMASK in the SKL PRM Vol 2d. */
717    return 0xf & (0xf << num_channels);
718 }
719 
720 static inline unsigned
lsc_cmask(unsigned num_channels)721 lsc_cmask(unsigned num_channels)
722 {
723    assert(num_channels > 0 && num_channels <= 4);
724    return BITSET_MASK(num_channels);
725 }
726 
727 static inline uint32_t
elk_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)728 elk_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
729                                unsigned exec_size, /**< 0 for SIMD4x2 */
730                                unsigned num_channels,
731                                bool write)
732 {
733    assert(exec_size <= 8 || exec_size == 16);
734 
735    unsigned msg_type;
736    if (write) {
737       if (devinfo->verx10 >= 75) {
738          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
739       } else {
740          msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
741       }
742    } else {
743       /* Read */
744       if (devinfo->verx10 >= 75) {
745          msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
746       } else {
747          msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
748       }
749    }
750 
751    /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
752    if (write && devinfo->verx10 == 70 && exec_size == 0)
753       exec_size = 8;
754 
755    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
756    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
757                               exec_size <= 8 ? 2 : 1;
758 
759    const unsigned msg_control =
760       SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
761       SET_BITS(simd_mode, 5, 4);
762 
763    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
764 }
765 
766 static inline unsigned
elk_mdc_ds(unsigned bit_size)767 elk_mdc_ds(unsigned bit_size)
768 {
769    switch (bit_size) {
770    case 8:
771       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
772    case 16:
773       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
774    case 32:
775       return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
776    default:
777       unreachable("Unsupported bit_size for byte scattered messages");
778    }
779 }
780 
781 static inline uint32_t
elk_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)782 elk_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
783                               unsigned exec_size,
784                               unsigned bit_size,
785                               bool write)
786 {
787    assert(exec_size <= 8 || exec_size == 16);
788 
789    assert(devinfo->verx10 >= 75);
790    const unsigned msg_type =
791       write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
792               HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
793 
794    assert(exec_size > 0);
795    const unsigned msg_control =
796       SET_BITS(exec_size == 16, 0, 0) |
797       SET_BITS(elk_mdc_ds(bit_size), 3, 2);
798 
799    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
800 }
801 
802 static inline uint32_t
elk_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)803 elk_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
804                                unsigned exec_size,
805                                bool write)
806 {
807    assert(exec_size == 8 || exec_size == 16);
808 
809    unsigned msg_type;
810    if (write) {
811       if (devinfo->ver >= 6) {
812          msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
813       } else {
814          msg_type = ELK_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
815       }
816    } else {
817       if (devinfo->ver >= 7) {
818          msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
819       } else if (devinfo->verx10 >= 45) {
820          msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
821       } else {
822          msg_type = ELK_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
823       }
824    }
825 
826    const unsigned msg_control =
827       SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
828       SET_BITS(exec_size == 16, 0, 0);
829 
830    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
831 }
832 
833 static inline uint32_t
elk_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)834 elk_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
835                            bool align_16B,
836                            unsigned num_dwords,
837                            bool write)
838 {
839    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
840    assert(!write || align_16B);
841 
842    const unsigned msg_type =
843       write ?     GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
844       align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
845                   GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
846 
847    const unsigned msg_control =
848       SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
849 
850    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
851 }
852 
853 static inline uint32_t
elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)854 elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
855                                    unsigned exec_size, /**< 0 for SIMD4x2 */
856                                    unsigned num_channels,
857                                    bool write)
858 {
859    assert(exec_size <= 8 || exec_size == 16);
860    assert(devinfo->ver >= 8);
861 
862    unsigned msg_type =
863       write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
864               GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
865 
866    /* See also MDC_SM3 in the SKL PRM Vol 2d. */
867    const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
868                               exec_size <= 8 ? 2 : 1;
869 
870    const unsigned msg_control =
871       SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
872       SET_BITS(simd_mode, 5, 4);
873 
874    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
875                       msg_type, msg_control);
876 }
877 
878 static inline uint32_t
elk_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)879 elk_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
880                                bool align_16B,
881                                unsigned num_dwords,
882                                bool write)
883 {
884    /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
885    assert(!write || align_16B);
886 
887    unsigned msg_type =
888       write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
889               GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
890 
891    unsigned msg_control =
892       SET_BITS(!align_16B, 4, 3) |
893       SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
894 
895    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
896                       msg_type, msg_control);
897 }
898 
899 /**
900  * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
901  * Skylake PRM).
902  */
903 static inline uint32_t
elk_mdc_a64_ds(unsigned elems)904 elk_mdc_a64_ds(unsigned elems)
905 {
906    switch (elems) {
907    case 1:  return 0;
908    case 2:  return 1;
909    case 4:  return 2;
910    case 8:  return 3;
911    default:
912       unreachable("Unsupported elmeent count for A64 scattered message");
913    }
914 }
915 
916 static inline uint32_t
elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)917 elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
918                                   unsigned exec_size, /**< 0 for SIMD4x2 */
919                                   unsigned bit_size,
920                                   bool write)
921 {
922    assert(exec_size <= 8 || exec_size == 16);
923    assert(devinfo->ver >= 8);
924 
925    unsigned msg_type =
926       write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
927               GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
928 
929    const unsigned msg_control =
930       SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
931       SET_BITS(elk_mdc_a64_ds(bit_size / 8), 3, 2) |
932       SET_BITS(exec_size == 16, 4, 4);
933 
934    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
935                       msg_type, msg_control);
936 }
937 
938 static inline uint32_t
elk_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)939 elk_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
940                                ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
941                                unsigned bit_size,
942                                unsigned atomic_op,
943                                bool response_expected)
944 {
945    assert(exec_size == 8);
946    assert(devinfo->ver >= 8);
947    assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
948    assert(devinfo->ver >= 12 || bit_size >= 32);
949 
950    const unsigned msg_type = bit_size == 16 ?
951       GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
952       GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
953 
954    const unsigned msg_control =
955       SET_BITS(atomic_op, 3, 0) |
956       SET_BITS(bit_size == 64, 4, 4) |
957       SET_BITS(response_expected, 5, 5);
958 
959    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
960                       msg_type, msg_control);
961 }
962 
963 static inline uint32_t
elk_dp_a64_untyped_atomic_float_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)964 elk_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
965                                      ASSERTED unsigned exec_size,
966                                      unsigned bit_size,
967                                      unsigned atomic_op,
968                                      bool response_expected)
969 {
970    assert(exec_size == 8);
971    assert(devinfo->ver >= 9);
972    assert(bit_size == 16 || bit_size == 32);
973    assert(devinfo->ver >= 12 || bit_size == 32);
974 
975    assert(exec_size > 0);
976    const unsigned msg_type = bit_size == 32 ?
977       GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
978       GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
979 
980    const unsigned msg_control =
981       SET_BITS(atomic_op, 1, 0) |
982       SET_BITS(response_expected, 5, 5);
983 
984    return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
985                       msg_type, msg_control);
986 }
987 
988 static inline uint32_t
elk_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)989 elk_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
990                          unsigned exec_size,
991                          unsigned exec_group,
992                          unsigned atomic_op,
993                          bool response_expected)
994 {
995    assert(exec_size > 0 || exec_group == 0);
996    assert(exec_group % 8 == 0);
997 
998    unsigned msg_type;
999    if (devinfo->verx10 >= 75) {
1000       if (exec_size == 0) {
1001          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
1002       } else {
1003          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
1004       }
1005    } else {
1006       /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1007       assert(exec_size > 0);
1008       msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
1009    }
1010 
1011    const bool high_sample_mask = (exec_group / 8) % 2 == 1;
1012 
1013    const unsigned msg_control =
1014       SET_BITS(atomic_op, 3, 0) |
1015       SET_BITS(high_sample_mask, 4, 4) |
1016       SET_BITS(response_expected, 5, 5);
1017 
1018    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
1019 }
1020 
1021 static inline uint32_t
elk_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)1022 elk_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
1023                              unsigned exec_size,
1024                              unsigned exec_group,
1025                              unsigned num_channels,
1026                              bool write)
1027 {
1028    assert(exec_size > 0 || exec_group == 0);
1029    assert(exec_group % 8 == 0);
1030 
1031    /* Typed surface reads and writes don't support SIMD16 */
1032    assert(exec_size <= 8);
1033 
1034    unsigned msg_type;
1035    if (write) {
1036       if (devinfo->verx10 >= 75) {
1037          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
1038       } else {
1039          msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
1040       }
1041    } else {
1042       if (devinfo->verx10 >= 75) {
1043          msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
1044       } else {
1045          msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
1046       }
1047    }
1048 
1049    /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1050    unsigned msg_control;
1051    if (devinfo->verx10 >= 75) {
1052       /* See also MDC_SG3 in the SKL PRM Vol 2d. */
1053       const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
1054                                   1 + ((exec_group / 8) % 2);
1055 
1056       msg_control =
1057          SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
1058          SET_BITS(slot_group, 5, 4);
1059    } else {
1060       /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1061       assert(exec_size > 0);
1062       const unsigned slot_group = ((exec_group / 8) % 2);
1063 
1064       msg_control =
1065          SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
1066          SET_BITS(slot_group, 5, 5);
1067    }
1068 
1069    return elk_dp_surface_desc(devinfo, msg_type, msg_control);
1070 }
1071 
1072 static inline uint32_t
elk_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)1073 elk_fb_desc(const struct intel_device_info *devinfo,
1074             unsigned binding_table_index,
1075             unsigned msg_type,
1076             unsigned msg_control)
1077 {
1078    /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1079     * helpers instead.
1080     */
1081    assert(devinfo->ver >= 6);
1082    const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1083    if (devinfo->ver >= 7) {
1084       return (desc | SET_BITS(msg_control, 13, 8) |
1085               SET_BITS(msg_type, 17, 14));
1086    } else {
1087       return (desc | SET_BITS(msg_control, 12, 8) |
1088               SET_BITS(msg_type, 16, 13));
1089    }
1090 }
1091 
1092 static inline unsigned
elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1093 elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1094                                 uint32_t desc)
1095 {
1096    return GET_BITS(desc, 7, 0);
1097 }
1098 
1099 static inline uint32_t
elk_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1100 elk_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1101 {
1102    assert(devinfo->ver >= 6);
1103    if (devinfo->ver >= 7)
1104       return GET_BITS(desc, 13, 8);
1105    else
1106       return GET_BITS(desc, 12, 8);
1107 }
1108 
1109 static inline unsigned
elk_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1110 elk_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1111 {
1112    assert(devinfo->ver >= 6);
1113    if (devinfo->ver >= 7)
1114       return GET_BITS(desc, 17, 14);
1115    else
1116       return GET_BITS(desc, 16, 13);
1117 }
1118 
1119 static inline uint32_t
elk_fb_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned exec_size,bool per_sample)1120 elk_fb_read_desc(const struct intel_device_info *devinfo,
1121                  unsigned binding_table_index,
1122                  unsigned msg_control,
1123                  unsigned exec_size,
1124                  bool per_sample)
1125 {
1126    assert(devinfo->ver >= 9);
1127    assert(exec_size == 8 || exec_size == 16);
1128 
1129    return elk_fb_desc(devinfo, binding_table_index,
1130                       GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
1131           SET_BITS(per_sample, 13, 13) |
1132           SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
1133 }
1134 
1135 static inline uint32_t
elk_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)1136 elk_fb_write_desc(const struct intel_device_info *devinfo,
1137                   unsigned binding_table_index,
1138                   unsigned msg_control,
1139                   bool last_render_target,
1140                   bool coarse_write)
1141 {
1142    const unsigned msg_type =
1143       devinfo->ver >= 6 ?
1144       GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1145       ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1146 
1147    assert(devinfo->ver >= 10 || !coarse_write);
1148 
1149    if (devinfo->ver >= 6) {
1150       return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1151              SET_BITS(last_render_target, 12, 12) |
1152              SET_BITS(coarse_write, 18, 18);
1153    } else {
1154       return (SET_BITS(binding_table_index, 7, 0) |
1155               SET_BITS(msg_control, 11, 8) |
1156               SET_BITS(last_render_target, 11, 11) |
1157               SET_BITS(msg_type, 14, 12));
1158    }
1159 }
1160 
1161 static inline unsigned
elk_fb_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1162 elk_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1163                            uint32_t desc)
1164 {
1165    if (devinfo->ver >= 6)
1166       return elk_fb_desc_msg_type(devinfo, desc);
1167    else
1168       return GET_BITS(desc, 14, 12);
1169 }
1170 
1171 static inline unsigned
elk_fb_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1172 elk_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1173                               uint32_t desc)
1174 {
1175    if (devinfo->ver >= 6)
1176       return elk_fb_desc_msg_control(devinfo, desc);
1177    else
1178       return GET_BITS(desc, 11, 8);
1179 }
1180 
1181 static inline bool
elk_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)1182 elk_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1183                                      uint32_t desc)
1184 {
1185    if (devinfo->ver >= 6)
1186       return GET_BITS(desc, 12, 12);
1187    else
1188       return GET_BITS(desc, 11, 11);
1189 }
1190 
1191 static inline bool
elk_fb_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)1192 elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1193                                uint32_t desc)
1194 {
1195    assert(devinfo->ver <= 6);
1196    if (devinfo->ver >= 6)
1197       return GET_BITS(desc, 17, 17);
1198    else
1199       return GET_BITS(desc, 15, 15);
1200 }
1201 
1202 static inline bool
elk_fb_write_desc_coarse_write(const struct intel_device_info * devinfo,uint32_t desc)1203 elk_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
1204                                uint32_t desc)
1205 {
1206    assert(devinfo->ver >= 10);
1207    return GET_BITS(desc, 18, 18);
1208 }
1209 
1210 static inline bool
elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)1211 elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)
1212 {
1213    return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1214 }
1215 
1216 static inline bool
elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)1217 elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)
1218 {
1219    return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1220 }
1221 
1222 static inline bool
elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)1223 elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)
1224 {
1225    return opcode == LSC_OP_STORE ||
1226           opcode == LSC_OP_STORE_CMASK;
1227 }
1228 
1229 static inline bool
elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)1230 elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)
1231 {
1232    switch (opcode) {
1233    case LSC_OP_ATOMIC_INC:
1234    case LSC_OP_ATOMIC_DEC:
1235    case LSC_OP_ATOMIC_LOAD:
1236    case LSC_OP_ATOMIC_STORE:
1237    case LSC_OP_ATOMIC_ADD:
1238    case LSC_OP_ATOMIC_SUB:
1239    case LSC_OP_ATOMIC_MIN:
1240    case LSC_OP_ATOMIC_MAX:
1241    case LSC_OP_ATOMIC_UMIN:
1242    case LSC_OP_ATOMIC_UMAX:
1243    case LSC_OP_ATOMIC_CMPXCHG:
1244    case LSC_OP_ATOMIC_FADD:
1245    case LSC_OP_ATOMIC_FSUB:
1246    case LSC_OP_ATOMIC_FMIN:
1247    case LSC_OP_ATOMIC_FMAX:
1248    case LSC_OP_ATOMIC_FCMPXCHG:
1249    case LSC_OP_ATOMIC_AND:
1250    case LSC_OP_ATOMIC_OR:
1251    case LSC_OP_ATOMIC_XOR:
1252       return true;
1253 
1254    default:
1255       return false;
1256    }
1257 }
1258 
1259 static inline bool
elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)1260 elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)
1261 {
1262    switch (opcode) {
1263    case LSC_OP_ATOMIC_FADD:
1264    case LSC_OP_ATOMIC_FSUB:
1265    case LSC_OP_ATOMIC_FMIN:
1266    case LSC_OP_ATOMIC_FMAX:
1267    case LSC_OP_ATOMIC_FCMPXCHG:
1268       return true;
1269 
1270    default:
1271       return false;
1272    }
1273 }
1274 
1275 static inline unsigned
lsc_op_num_data_values(unsigned _op)1276 lsc_op_num_data_values(unsigned _op)
1277 {
1278    enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1279 
1280    switch (op) {
1281    case LSC_OP_ATOMIC_CMPXCHG:
1282    case LSC_OP_ATOMIC_FCMPXCHG:
1283       return 2;
1284    case LSC_OP_ATOMIC_INC:
1285    case LSC_OP_ATOMIC_DEC:
1286    case LSC_OP_LOAD:
1287    case LSC_OP_LOAD_CMASK:
1288    case LSC_OP_FENCE:
1289       /* XXX: actually check docs */
1290       return 0;
1291    default:
1292       return 1;
1293    }
1294 }
1295 
1296 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1297 lsc_op_to_legacy_atomic(unsigned _op)
1298 {
1299    enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1300 
1301    switch (op) {
1302    case LSC_OP_ATOMIC_INC:
1303       return ELK_AOP_INC;
1304    case LSC_OP_ATOMIC_DEC:
1305       return ELK_AOP_DEC;
1306    case LSC_OP_ATOMIC_STORE:
1307       return ELK_AOP_MOV;
1308    case LSC_OP_ATOMIC_ADD:
1309       return ELK_AOP_ADD;
1310    case LSC_OP_ATOMIC_SUB:
1311       return ELK_AOP_SUB;
1312    case LSC_OP_ATOMIC_MIN:
1313       return ELK_AOP_IMIN;
1314    case LSC_OP_ATOMIC_MAX:
1315       return ELK_AOP_IMAX;
1316    case LSC_OP_ATOMIC_UMIN:
1317       return ELK_AOP_UMIN;
1318    case LSC_OP_ATOMIC_UMAX:
1319       return ELK_AOP_UMAX;
1320    case LSC_OP_ATOMIC_CMPXCHG:
1321       return ELK_AOP_CMPWR;
1322    case LSC_OP_ATOMIC_FADD:
1323       return ELK_AOP_FADD;
1324    case LSC_OP_ATOMIC_FMIN:
1325       return ELK_AOP_FMIN;
1326    case LSC_OP_ATOMIC_FMAX:
1327       return ELK_AOP_FMAX;
1328    case LSC_OP_ATOMIC_FCMPXCHG:
1329       return ELK_AOP_FCMPWR;
1330    case LSC_OP_ATOMIC_AND:
1331       return ELK_AOP_AND;
1332    case LSC_OP_ATOMIC_OR:
1333       return ELK_AOP_OR;
1334    case LSC_OP_ATOMIC_XOR:
1335       return ELK_AOP_XOR;
1336    /* No LSC op maps to ELK_AOP_PREDEC */
1337    case LSC_OP_ATOMIC_LOAD:
1338    case LSC_OP_ATOMIC_FSUB:
1339       unreachable("no corresponding legacy atomic operation");
1340    case LSC_OP_LOAD:
1341    case LSC_OP_LOAD_CMASK:
1342    case LSC_OP_STORE:
1343    case LSC_OP_STORE_CMASK:
1344    case LSC_OP_FENCE:
1345       unreachable("not an atomic op");
1346    }
1347 
1348    unreachable("invalid LSC op");
1349 }
1350 
1351 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1352 lsc_data_size_bytes(enum lsc_data_size data_size)
1353 {
1354    switch (data_size) {
1355    case LSC_DATA_SIZE_D8:
1356       return 1;
1357    case LSC_DATA_SIZE_D16:
1358       return 2;
1359    case LSC_DATA_SIZE_D32:
1360    case LSC_DATA_SIZE_D8U32:
1361    case LSC_DATA_SIZE_D16U32:
1362    case LSC_DATA_SIZE_D16BF32:
1363       return 4;
1364    case LSC_DATA_SIZE_D64:
1365       return 8;
1366    default:
1367       unreachable("Unsupported data payload size.");
1368    }
1369 }
1370 
1371 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1372 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1373 {
1374    switch (addr_size) {
1375    case LSC_ADDR_SIZE_A16: return 2;
1376    case LSC_ADDR_SIZE_A32: return 4;
1377    case LSC_ADDR_SIZE_A64: return 8;
1378    default:
1379       unreachable("Unsupported address size.");
1380    }
1381 }
1382 
1383 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1384 lsc_vector_length(enum lsc_vect_size vect_size)
1385 {
1386    switch (vect_size) {
1387    case LSC_VECT_SIZE_V1: return 1;
1388    case LSC_VECT_SIZE_V2: return 2;
1389    case LSC_VECT_SIZE_V3: return 3;
1390    case LSC_VECT_SIZE_V4: return 4;
1391    case LSC_VECT_SIZE_V8: return 8;
1392    case LSC_VECT_SIZE_V16: return 16;
1393    case LSC_VECT_SIZE_V32: return 32;
1394    case LSC_VECT_SIZE_V64: return 64;
1395    default:
1396       unreachable("Unsupported size of vector");
1397    }
1398 }
1399 
1400 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1401 lsc_vect_size(unsigned vect_size)
1402 {
1403    switch(vect_size) {
1404    case 1:  return LSC_VECT_SIZE_V1;
1405    case 2:  return LSC_VECT_SIZE_V2;
1406    case 3:  return LSC_VECT_SIZE_V3;
1407    case 4:  return LSC_VECT_SIZE_V4;
1408    case 8:  return LSC_VECT_SIZE_V8;
1409    case 16: return LSC_VECT_SIZE_V16;
1410    case 32: return LSC_VECT_SIZE_V32;
1411    case 64: return LSC_VECT_SIZE_V64;
1412    default:
1413       unreachable("Unsupported vector size for dataport");
1414    }
1415 }
1416 
1417 static inline uint32_t
lsc_msg_desc_wcmask(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest,unsigned cmask)1418 lsc_msg_desc_wcmask(UNUSED const struct intel_device_info *devinfo,
1419              enum elk_lsc_opcode opcode, unsigned simd_size,
1420              enum lsc_addr_surface_type addr_type,
1421              enum lsc_addr_size addr_sz, unsigned num_coordinates,
1422              enum lsc_data_size data_sz, unsigned num_channels,
1423              bool transpose, unsigned cache_ctrl, bool has_dest, unsigned cmask)
1424 {
1425    assert(devinfo->has_lsc);
1426 
1427    unsigned dest_length = !has_dest ? 0 :
1428       DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1429                    reg_unit(devinfo) * REG_SIZE);
1430 
1431    unsigned src0_length =
1432       DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1433                    reg_unit(devinfo) * REG_SIZE);
1434 
1435    assert(!transpose || elk_lsc_opcode_has_transpose(opcode));
1436 
1437    unsigned msg_desc =
1438       SET_BITS(opcode, 5, 0) |
1439       SET_BITS(addr_sz, 8, 7) |
1440       SET_BITS(data_sz, 11, 9) |
1441       SET_BITS(transpose, 15, 15) |
1442       SET_BITS(cache_ctrl, 19, 17) |
1443       SET_BITS(dest_length, 24, 20) |
1444       SET_BITS(src0_length, 28, 25) |
1445       SET_BITS(addr_type, 30, 29);
1446 
1447    if (elk_lsc_opcode_has_cmask(opcode))
1448       msg_desc |= SET_BITS(cmask ? cmask : lsc_cmask(num_channels), 15, 12);
1449    else
1450       msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1451 
1452    return msg_desc;
1453 }
1454 
1455 static inline uint32_t
lsc_msg_desc(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest)1456 lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1457              enum elk_lsc_opcode opcode, unsigned simd_size,
1458              enum lsc_addr_surface_type addr_type,
1459              enum lsc_addr_size addr_sz, unsigned num_coordinates,
1460              enum lsc_data_size data_sz, unsigned num_channels,
1461              bool transpose, unsigned cache_ctrl, bool has_dest)
1462 {
1463    return lsc_msg_desc_wcmask(devinfo, opcode, simd_size, addr_type, addr_sz,
1464          num_coordinates, data_sz, num_channels, transpose, cache_ctrl,
1465          has_dest, 0);
1466 }
1467 
1468 static inline enum elk_lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1469 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1470                     uint32_t desc)
1471 {
1472    assert(devinfo->has_lsc);
1473    return (enum elk_lsc_opcode) GET_BITS(desc, 5, 0);
1474 }
1475 
1476 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1477 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1478                        uint32_t desc)
1479 {
1480    assert(devinfo->has_lsc);
1481    return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1482 }
1483 
1484 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1485 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1486                        uint32_t desc)
1487 {
1488    assert(devinfo->has_lsc);
1489    return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1490 }
1491 
1492 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1493 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1494                        uint32_t desc)
1495 {
1496    assert(devinfo->has_lsc);
1497    assert(!elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1498    return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1499 }
1500 
1501 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1502 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1503                    uint32_t desc)
1504 {
1505    assert(devinfo->has_lsc);
1506    assert(elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1507    return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1508 }
1509 
1510 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1511 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1512                        uint32_t desc)
1513 {
1514    assert(devinfo->has_lsc);
1515    return GET_BITS(desc, 15, 15);
1516 }
1517 
1518 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1519 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1520                         uint32_t desc)
1521 {
1522    assert(devinfo->has_lsc);
1523    return GET_BITS(desc, 19, 17);
1524 }
1525 
1526 static inline unsigned
lsc_msg_desc_dest_len(const struct intel_device_info * devinfo,uint32_t desc)1527 lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1528                       uint32_t desc)
1529 {
1530    assert(devinfo->has_lsc);
1531    return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
1532 }
1533 
1534 static inline unsigned
lsc_msg_desc_src0_len(const struct intel_device_info * devinfo,uint32_t desc)1535 lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1536                       uint32_t desc)
1537 {
1538    assert(devinfo->has_lsc);
1539    return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
1540 }
1541 
1542 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1543 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1544                        uint32_t desc)
1545 {
1546    assert(devinfo->has_lsc);
1547    return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1548 }
1549 
1550 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1551 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1552                    enum lsc_fence_scope scope,
1553                    enum lsc_flush_type flush_type,
1554                    bool route_to_lsc)
1555 {
1556    assert(devinfo->has_lsc);
1557    return SET_BITS(LSC_OP_FENCE, 5, 0) |
1558           SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1559           SET_BITS(scope, 11, 9) |
1560           SET_BITS(flush_type, 14, 12) |
1561           SET_BITS(route_to_lsc, 18, 18) |
1562           SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1563 }
1564 
1565 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1566 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1567                          uint32_t desc)
1568 {
1569    assert(devinfo->has_lsc);
1570    return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1571 }
1572 
1573 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1574 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1575                               uint32_t desc)
1576 {
1577    assert(devinfo->has_lsc);
1578    return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1579 }
1580 
1581 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1582 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1583                                   uint32_t desc)
1584 {
1585    assert(devinfo->has_lsc);
1586    return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1587 }
1588 
1589 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1590 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1591 {
1592    assert(devinfo->has_lsc);
1593    return SET_BITS(bti, 31, 24) |
1594           SET_BITS(0, 23, 12);  /* base offset */
1595 }
1596 
1597 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1598 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1599                             uint32_t ex_desc)
1600 {
1601    assert(devinfo->has_lsc);
1602    return GET_BITS(ex_desc, 23, 12);
1603 }
1604 
1605 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1606 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1607                       uint32_t ex_desc)
1608 {
1609    assert(devinfo->has_lsc);
1610    return GET_BITS(ex_desc, 31, 24);
1611 }
1612 
1613 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1614 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1615                              uint32_t ex_desc)
1616 {
1617    assert(devinfo->has_lsc);
1618    return GET_BITS(ex_desc, 31, 12);
1619 }
1620 
1621 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1622 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1623                 unsigned surface_state_index)
1624 {
1625    assert(devinfo->has_lsc);
1626    return SET_BITS(surface_state_index, 31, 6);
1627 }
1628 
1629 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1630 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1631                       uint32_t ex_desc)
1632 {
1633    assert(devinfo->has_lsc);
1634    return GET_BITS(ex_desc, 31, 6);
1635 }
1636 
1637 static inline uint32_t
elk_mdc_sm2(unsigned exec_size)1638 elk_mdc_sm2(unsigned exec_size)
1639 {
1640    assert(exec_size == 8 || exec_size == 16);
1641    return exec_size > 8;
1642 }
1643 
1644 static inline uint32_t
elk_mdc_sm2_exec_size(uint32_t sm2)1645 elk_mdc_sm2_exec_size(uint32_t sm2)
1646 {
1647    assert(sm2 <= 1);
1648    return 8 << sm2;
1649 }
1650 
1651 static inline uint32_t
elk_btd_spawn_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size,unsigned msg_type)1652 elk_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1653                    unsigned exec_size, unsigned msg_type)
1654 {
1655    assert(devinfo->has_ray_tracing);
1656    assert(devinfo->ver < 20 || exec_size == 16);
1657 
1658    return SET_BITS(0, 19, 19) | /* No header */
1659           SET_BITS(msg_type, 17, 14) |
1660           SET_BITS(elk_mdc_sm2(exec_size), 8, 8);
1661 }
1662 
1663 static inline uint32_t
elk_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1664 elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1665                        uint32_t desc)
1666 {
1667    return GET_BITS(desc, 17, 14);
1668 }
1669 
1670 static inline uint32_t
elk_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1671 elk_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1672                         uint32_t desc)
1673 {
1674    return elk_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1675 }
1676 
1677 static inline uint32_t
elk_rt_trace_ray_desc(ASSERTED const struct intel_device_info * devinfo,unsigned exec_size)1678 elk_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1679                       unsigned exec_size)
1680 {
1681    assert(devinfo->has_ray_tracing);
1682    assert(devinfo->ver < 20 || exec_size == 16);
1683 
1684    return SET_BITS(0, 19, 19) | /* No header */
1685           SET_BITS(0, 17, 14) | /* Message type */
1686           SET_BITS(elk_mdc_sm2(exec_size), 8, 8);
1687 }
1688 
1689 /**
1690  * Construct a message descriptor immediate with the specified pixel
1691  * interpolator function controls.
1692  */
1693 static inline uint32_t
elk_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1694 elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1695                       unsigned msg_type,
1696                       bool noperspective,
1697                       bool coarse_pixel_rate,
1698                       unsigned exec_size,
1699                       unsigned group)
1700 {
1701    assert(exec_size == 8 || exec_size == 16);
1702    const bool simd_mode = exec_size == 16;
1703    const bool slot_group = group >= 16;
1704 
1705    assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1706    return (SET_BITS(slot_group, 11, 11) |
1707            SET_BITS(msg_type, 13, 12) |
1708            SET_BITS(!!noperspective, 14, 14) |
1709            SET_BITS(coarse_pixel_rate, 15, 15) |
1710            SET_BITS(simd_mode, 16, 16));
1711 }
1712 
1713 void elk_urb_WRITE(struct elk_codegen *p,
1714 		   struct elk_reg dest,
1715 		   unsigned msg_reg_nr,
1716 		   struct elk_reg src0,
1717                    enum elk_urb_write_flags flags,
1718 		   unsigned msg_length,
1719 		   unsigned response_length,
1720 		   unsigned offset,
1721 		   unsigned swizzle);
1722 
1723 /**
1724  * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1725  * desc.  If \p desc is not an immediate it will be transparently loaded to an
1726  * address register using an OR instruction.
1727  */
1728 void
1729 elk_send_indirect_message(struct elk_codegen *p,
1730                           unsigned sfid,
1731                           struct elk_reg dst,
1732                           struct elk_reg payload,
1733                           struct elk_reg desc,
1734                           unsigned desc_imm,
1735                           bool eot);
1736 
1737 void
1738 elk_send_indirect_split_message(struct elk_codegen *p,
1739                                 unsigned sfid,
1740                                 struct elk_reg dst,
1741                                 struct elk_reg payload0,
1742                                 struct elk_reg payload1,
1743                                 struct elk_reg desc,
1744                                 unsigned desc_imm,
1745                                 struct elk_reg ex_desc,
1746                                 unsigned ex_desc_imm,
1747                                 bool ex_desc_scratch,
1748                                 bool ex_bso,
1749                                 bool eot);
1750 
1751 void elk_ff_sync(struct elk_codegen *p,
1752 		   struct elk_reg dest,
1753 		   unsigned msg_reg_nr,
1754 		   struct elk_reg src0,
1755 		   bool allocate,
1756 		   unsigned response_length,
1757 		   bool eot);
1758 
1759 void elk_svb_write(struct elk_codegen *p,
1760                    struct elk_reg dest,
1761                    unsigned msg_reg_nr,
1762                    struct elk_reg src0,
1763                    unsigned binding_table_index,
1764                    bool   send_commit_msg);
1765 
1766 elk_inst *elk_fb_WRITE(struct elk_codegen *p,
1767                        struct elk_reg payload,
1768                        struct elk_reg implied_header,
1769                        unsigned msg_control,
1770                        unsigned binding_table_index,
1771                        unsigned msg_length,
1772                        unsigned response_length,
1773                        bool eot,
1774                        bool last_render_target,
1775                        bool header_present);
1776 
1777 elk_inst *elk_gfx9_fb_READ(struct elk_codegen *p,
1778                        struct elk_reg dst,
1779                        struct elk_reg payload,
1780                        unsigned binding_table_index,
1781                        unsigned msg_length,
1782                        unsigned response_length,
1783                        bool per_sample);
1784 
1785 void elk_SAMPLE(struct elk_codegen *p,
1786 		struct elk_reg dest,
1787 		unsigned msg_reg_nr,
1788 		struct elk_reg src0,
1789 		unsigned binding_table_index,
1790 		unsigned sampler,
1791 		unsigned msg_type,
1792 		unsigned response_length,
1793 		unsigned msg_length,
1794 		unsigned header_present,
1795 		unsigned simd_mode,
1796 		unsigned return_format);
1797 
1798 void elk_adjust_sampler_state_pointer(struct elk_codegen *p,
1799                                       struct elk_reg header,
1800                                       struct elk_reg sampler_index);
1801 
1802 void elk_gfx4_math(struct elk_codegen *p,
1803 	       struct elk_reg dest,
1804 	       unsigned function,
1805 	       unsigned msg_reg_nr,
1806 	       struct elk_reg src,
1807 	       unsigned precision );
1808 
1809 void elk_gfx6_math(struct elk_codegen *p,
1810 	       struct elk_reg dest,
1811 	       unsigned function,
1812 	       struct elk_reg src0,
1813 	       struct elk_reg src1);
1814 
1815 void elk_oword_block_read(struct elk_codegen *p,
1816 			  struct elk_reg dest,
1817 			  struct elk_reg mrf,
1818 			  uint32_t offset,
1819 			  uint32_t bind_table_index);
1820 
1821 unsigned elk_scratch_surface_idx(const struct elk_codegen *p);
1822 
1823 void elk_oword_block_read_scratch(struct elk_codegen *p,
1824 				  struct elk_reg dest,
1825 				  struct elk_reg mrf,
1826 				  int num_regs,
1827 				  unsigned offset);
1828 
1829 void elk_oword_block_write_scratch(struct elk_codegen *p,
1830 				   struct elk_reg mrf,
1831 				   int num_regs,
1832 				   unsigned offset);
1833 
1834 void elk_gfx7_block_read_scratch(struct elk_codegen *p,
1835                              struct elk_reg dest,
1836                              int num_regs,
1837                              unsigned offset);
1838 
1839 /**
1840  * Return the generation-specific jump distance scaling factor.
1841  *
1842  * Given the number of instructions to jump, we need to scale by
1843  * some number to obtain the actual jump distance to program in an
1844  * instruction.
1845  */
1846 static inline unsigned
elk_jump_scale(const struct intel_device_info * devinfo)1847 elk_jump_scale(const struct intel_device_info *devinfo)
1848 {
1849    /* Broadwell measures jump targets in bytes. */
1850    if (devinfo->ver >= 8)
1851       return 16;
1852 
1853    /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1854     * (to support compaction), so each 128-bit instruction requires 2 chunks.
1855     */
1856    if (devinfo->ver >= 5)
1857       return 2;
1858 
1859    /* Gfx4 simply uses the number of 128-bit instructions. */
1860    return 1;
1861 }
1862 
1863 void elk_barrier(struct elk_codegen *p, struct elk_reg src);
1864 
1865 /* If/else/endif.  Works by manipulating the execution flags on each
1866  * channel.
1867  */
1868 elk_inst *elk_IF(struct elk_codegen *p, unsigned execute_size);
1869 elk_inst *elk_gfx6_IF(struct elk_codegen *p, enum elk_conditional_mod conditional,
1870                   struct elk_reg src0, struct elk_reg src1);
1871 
1872 void elk_ELSE(struct elk_codegen *p);
1873 void elk_ENDIF(struct elk_codegen *p);
1874 
1875 /* DO/WHILE loops:
1876  */
1877 elk_inst *elk_DO(struct elk_codegen *p, unsigned execute_size);
1878 
1879 elk_inst *elk_WHILE(struct elk_codegen *p);
1880 
1881 elk_inst *elk_BREAK(struct elk_codegen *p);
1882 elk_inst *elk_CONT(struct elk_codegen *p);
1883 elk_inst *elk_HALT(struct elk_codegen *p);
1884 
1885 /* Forward jumps:
1886  */
1887 void elk_land_fwd_jump(struct elk_codegen *p, int jmp_insn_idx);
1888 
1889 elk_inst *elk_JMPI(struct elk_codegen *p, struct elk_reg index,
1890                    unsigned predicate_control);
1891 
1892 void elk_NOP(struct elk_codegen *p);
1893 
1894 void elk_WAIT(struct elk_codegen *p);
1895 
1896 void elk_SYNC(struct elk_codegen *p, enum tgl_sync_function func);
1897 
1898 /* Special case: there is never a destination, execution size will be
1899  * taken from src0:
1900  */
1901 void elk_CMP(struct elk_codegen *p,
1902 	     struct elk_reg dest,
1903 	     unsigned conditional,
1904 	     struct elk_reg src0,
1905 	     struct elk_reg src1);
1906 
1907 void elk_CMPN(struct elk_codegen *p,
1908               struct elk_reg dest,
1909               unsigned conditional,
1910               struct elk_reg src0,
1911               struct elk_reg src1);
1912 
1913 elk_inst *elk_DPAS(struct elk_codegen *p, enum elk_gfx12_systolic_depth sdepth,
1914                    unsigned rcount, struct elk_reg dest, struct elk_reg src0,
1915                    struct elk_reg src1, struct elk_reg src2);
1916 
1917 void
1918 elk_untyped_atomic(struct elk_codegen *p,
1919                    struct elk_reg dst,
1920                    struct elk_reg payload,
1921                    struct elk_reg surface,
1922                    unsigned atomic_op,
1923                    unsigned msg_length,
1924                    bool response_expected,
1925                    bool header_present);
1926 
1927 void
1928 elk_untyped_surface_read(struct elk_codegen *p,
1929                          struct elk_reg dst,
1930                          struct elk_reg payload,
1931                          struct elk_reg surface,
1932                          unsigned msg_length,
1933                          unsigned num_channels);
1934 
1935 void
1936 elk_untyped_surface_write(struct elk_codegen *p,
1937                           struct elk_reg payload,
1938                           struct elk_reg surface,
1939                           unsigned msg_length,
1940                           unsigned num_channels,
1941                           bool header_present);
1942 
1943 void
1944 elk_memory_fence(struct elk_codegen *p,
1945                  struct elk_reg dst,
1946                  struct elk_reg src,
1947                  enum elk_opcode send_op,
1948                  enum elk_message_target sfid,
1949                  uint32_t desc,
1950                  bool commit_enable,
1951                  unsigned bti);
1952 
1953 void
1954 elk_pixel_interpolator_query(struct elk_codegen *p,
1955                              struct elk_reg dest,
1956                              struct elk_reg mrf,
1957                              bool noperspective,
1958                              bool coarse_pixel_rate,
1959                              unsigned mode,
1960                              struct elk_reg data,
1961                              unsigned msg_length,
1962                              unsigned response_length);
1963 
1964 void
1965 elk_find_live_channel(struct elk_codegen *p,
1966                       struct elk_reg dst,
1967                       bool last);
1968 
1969 void
1970 elk_broadcast(struct elk_codegen *p,
1971               struct elk_reg dst,
1972               struct elk_reg src,
1973               struct elk_reg idx);
1974 
1975 void
1976 elk_float_controls_mode(struct elk_codegen *p,
1977                         unsigned mode, unsigned mask);
1978 
1979 void
1980 elk_update_reloc_imm(const struct elk_isa_info *isa,
1981                      elk_inst *inst,
1982                      uint32_t value);
1983 
1984 void
1985 elk_MOV_reloc_imm(struct elk_codegen *p,
1986                   struct elk_reg dst,
1987                   enum elk_reg_type src_type,
1988                   uint32_t id);
1989 
1990 unsigned
1991 elk_num_sources_from_inst(const struct elk_isa_info *isa,
1992                           const elk_inst *inst);
1993 
1994 /***********************************************************************
1995  * elk_eu_util.c:
1996  */
1997 
1998 void elk_copy_indirect_to_indirect(struct elk_codegen *p,
1999 				   struct elk_indirect dst_ptr,
2000 				   struct elk_indirect src_ptr,
2001 				   unsigned count);
2002 
2003 void elk_copy_from_indirect(struct elk_codegen *p,
2004 			    struct elk_reg dst,
2005 			    struct elk_indirect ptr,
2006 			    unsigned count);
2007 
2008 void elk_copy4(struct elk_codegen *p,
2009 	       struct elk_reg dst,
2010 	       struct elk_reg src,
2011 	       unsigned count);
2012 
2013 void elk_copy8(struct elk_codegen *p,
2014 	       struct elk_reg dst,
2015 	       struct elk_reg src,
2016 	       unsigned count);
2017 
2018 void elk_math_invert( struct elk_codegen *p,
2019 		      struct elk_reg dst,
2020 		      struct elk_reg src);
2021 
2022 void elk_set_src1(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
2023 
2024 void elk_set_desc_ex(struct elk_codegen *p, elk_inst *insn,
2025                      unsigned desc, unsigned ex_desc);
2026 
2027 static inline void
elk_set_desc(struct elk_codegen * p,elk_inst * insn,unsigned desc)2028 elk_set_desc(struct elk_codegen *p, elk_inst *insn, unsigned desc)
2029 {
2030    elk_set_desc_ex(p, insn, desc, 0);
2031 }
2032 
2033 void elk_set_uip_jip(struct elk_codegen *p, int start_offset);
2034 
2035 enum elk_conditional_mod elk_negate_cmod(enum elk_conditional_mod cmod);
2036 enum elk_conditional_mod elk_swap_cmod(enum elk_conditional_mod cmod);
2037 
2038 /* elk_eu_compact.c */
2039 void elk_compact_instructions(struct elk_codegen *p, int start_offset,
2040                               struct elk_disasm_info *disasm);
2041 void elk_uncompact_instruction(const struct elk_isa_info *isa,
2042                                elk_inst *dst, elk_compact_inst *src);
2043 bool elk_try_compact_instruction(const struct elk_isa_info *isa,
2044                                  elk_compact_inst *dst, const elk_inst *src);
2045 
2046 void elk_debug_compact_uncompact(const struct elk_isa_info *isa,
2047                                  elk_inst *orig, elk_inst *uncompacted);
2048 
2049 /* elk_eu_validate.c */
2050 bool elk_validate_instruction(const struct elk_isa_info *isa,
2051                               const elk_inst *inst, int offset,
2052                               unsigned inst_size,
2053                               struct elk_disasm_info *disasm);
2054 bool elk_validate_instructions(const struct elk_isa_info *isa,
2055                                const void *assembly, int start_offset, int end_offset,
2056                                struct elk_disasm_info *disasm);
2057 
2058 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)2059 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
2060 {
2061    elk_inst *insn = (elk_inst *)((char *)store + offset);
2062 
2063    if (elk_inst_cmpt_control(devinfo, insn))
2064       return offset + 8;
2065    else
2066       return offset + 16;
2067 }
2068 
2069 /** Maximum SEND message length */
2070 #define ELK_MAX_MSG_LENGTH 15
2071 
2072 /** First MRF register used by pull loads */
2073 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
2074 
2075 /** First MRF register used by spills */
2076 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
2077 
2078 #ifdef __cplusplus
2079 }
2080 #endif
2081 
2082 #endif
2083