1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 #pragma once
33
34 #include <stdbool.h>
35 #include <stdio.h>
36 #include "elk_inst.h"
37 #include "elk_compiler.h"
38 #include "elk_eu_defines.h"
39 #include "elk_isa_info.h"
40 #include "elk_reg.h"
41
42 #include "util/bitset.h"
43
44 #ifdef __cplusplus
45 extern "C" {
46 #endif
47
48 struct elk_disasm_info;
49
50 #define ELK_EU_MAX_INSN_STACK 5
51
52 struct elk_insn_state {
53 /* One of ELK_EXECUTE_* */
54 unsigned exec_size:3;
55
56 /* Group in units of channels */
57 unsigned group:5;
58
59 /* Compression control on gfx4-5 */
60 bool compressed:1;
61
62 /* One of ELK_MASK_* */
63 unsigned mask_control:1;
64
65 bool saturate:1;
66
67 /* One of ELK_ALIGN_* */
68 unsigned access_mode:1;
69
70 /* One of ELK_PREDICATE_* */
71 enum elk_predicate predicate:4;
72
73 bool pred_inv:1;
74
75 /* Flag subreg. Bottom bit is subreg, top bit is reg */
76 unsigned flag_subreg:2;
77
78 bool acc_wr_control:1;
79 };
80
81
82 /* A helper for accessing the last instruction emitted. This makes it easy
83 * to set various bits on an instruction without having to create temporary
84 * variable and assign the emitted instruction to those.
85 */
86 #define elk_last_inst (&p->store[p->nr_insn - 1])
87
88 struct elk_codegen {
89 elk_inst *store;
90 int store_size;
91 unsigned nr_insn;
92 unsigned int next_insn_offset;
93
94 void *mem_ctx;
95
96 /* Allow clients to push/pop instruction state:
97 */
98 struct elk_insn_state stack[ELK_EU_MAX_INSN_STACK];
99 struct elk_insn_state *current;
100
101 /** Whether or not the user wants automatic exec sizes
102 *
103 * If true, codegen will try to automatically infer the exec size of an
104 * instruction from the width of the destination register. If false, it
105 * will take whatever is set by elk_set_default_exec_size verbatim.
106 *
107 * This is set to true by default in elk_init_codegen.
108 */
109 bool automatic_exec_sizes;
110
111 bool single_program_flow;
112 const struct elk_isa_info *isa;
113 const struct intel_device_info *devinfo;
114
115 /* Control flow stacks:
116 * - if_stack contains IF and ELSE instructions which must be patched
117 * (and popped) once the matching ENDIF instruction is encountered.
118 *
119 * Just store the instruction pointer(an index).
120 */
121 int *if_stack;
122 int if_stack_depth;
123 int if_stack_array_size;
124
125 /**
126 * loop_stack contains the instruction pointers of the starts of loops which
127 * must be patched (and popped) once the matching WHILE instruction is
128 * encountered.
129 */
130 int *loop_stack;
131 /**
132 * pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
133 * blocks they were popping out of, to fix up the mask stack. This tracks
134 * the IF/ENDIF nesting in each current nested loop level.
135 */
136 int *if_depth_in_loop;
137 int loop_stack_depth;
138 int loop_stack_array_size;
139
140 struct elk_shader_reloc *relocs;
141 int num_relocs;
142 int reloc_array_size;
143 };
144
145 struct elk_label {
146 int offset;
147 int number;
148 struct elk_label *next;
149 };
150
151 void elk_pop_insn_state( struct elk_codegen *p );
152 void elk_push_insn_state( struct elk_codegen *p );
153 unsigned elk_get_default_exec_size(struct elk_codegen *p);
154 unsigned elk_get_default_group(struct elk_codegen *p);
155 unsigned elk_get_default_access_mode(struct elk_codegen *p);
156 void elk_set_default_exec_size(struct elk_codegen *p, unsigned value);
157 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value );
158 void elk_set_default_saturate( struct elk_codegen *p, bool enable );
159 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode );
160 void elk_inst_set_compression(const struct intel_device_info *devinfo,
161 elk_inst *inst, bool on);
162 void elk_set_default_compression(struct elk_codegen *p, bool on);
163 void elk_inst_set_group(const struct intel_device_info *devinfo,
164 elk_inst *inst, unsigned group);
165 void elk_set_default_group(struct elk_codegen *p, unsigned group);
166 void elk_set_default_compression_control(struct elk_codegen *p, enum elk_compression c);
167 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc);
168 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse);
169 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg);
170 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value);
171
172 void elk_init_codegen(const struct elk_isa_info *isa,
173 struct elk_codegen *p, void *mem_ctx);
174 bool elk_has_jip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
175 bool elk_has_uip(const struct intel_device_info *devinfo, enum elk_opcode opcode);
176 bool elk_has_branch_ctrl(const struct intel_device_info *devinfo, enum elk_opcode opcode);
177 const struct elk_shader_reloc *elk_get_shader_relocs(struct elk_codegen *p,
178 unsigned *num_relocs);
179 const unsigned *elk_get_program( struct elk_codegen *p, unsigned *sz );
180
181 bool elk_should_dump_shader_bin(void);
182 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
183 const char *identifier);
184
185 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
186 const char *identifier);
187
188 void elk_realign(struct elk_codegen *p, unsigned alignment);
189 int elk_append_data(struct elk_codegen *p, void *data,
190 unsigned size, unsigned alignment);
191 elk_inst *elk_next_insn(struct elk_codegen *p, unsigned opcode);
192 void elk_add_reloc(struct elk_codegen *p, uint32_t id,
193 enum elk_shader_reloc_type type,
194 uint32_t offset, uint32_t delta);
195 void elk_set_dest(struct elk_codegen *p, elk_inst *insn, struct elk_reg dest);
196 void elk_set_src0(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
197
198 void elk_gfx6_resolve_implied_move(struct elk_codegen *p,
199 struct elk_reg *src,
200 unsigned msg_reg_nr);
201
202 /* Helpers for regular instructions:
203 */
204 #define ALU1(OP) \
205 elk_inst *elk_##OP(struct elk_codegen *p, \
206 struct elk_reg dest, \
207 struct elk_reg src0);
208
209 #define ALU2(OP) \
210 elk_inst *elk_##OP(struct elk_codegen *p, \
211 struct elk_reg dest, \
212 struct elk_reg src0, \
213 struct elk_reg src1);
214
215 #define ALU3(OP) \
216 elk_inst *elk_##OP(struct elk_codegen *p, \
217 struct elk_reg dest, \
218 struct elk_reg src0, \
219 struct elk_reg src1, \
220 struct elk_reg src2);
221
222 ALU1(MOV)
ALU2(SEL)223 ALU2(SEL)
224 ALU1(NOT)
225 ALU2(AND)
226 ALU2(OR)
227 ALU2(XOR)
228 ALU2(SHR)
229 ALU2(SHL)
230 ALU1(DIM)
231 ALU2(ASR)
232 ALU2(ROL)
233 ALU2(ROR)
234 ALU3(CSEL)
235 ALU1(F32TO16)
236 ALU1(F16TO32)
237 ALU2(ADD)
238 ALU2(AVG)
239 ALU2(MUL)
240 ALU1(FRC)
241 ALU1(RNDD)
242 ALU1(RNDE)
243 ALU1(RNDU)
244 ALU1(RNDZ)
245 ALU2(MAC)
246 ALU2(MACH)
247 ALU1(LZD)
248 ALU2(DP4)
249 ALU2(DPH)
250 ALU2(DP3)
251 ALU2(DP2)
252 ALU2(LINE)
253 ALU2(PLN)
254 ALU3(MAD)
255 ALU3(LRP)
256 ALU1(BFREV)
257 ALU3(BFE)
258 ALU2(BFI1)
259 ALU3(BFI2)
260 ALU1(FBH)
261 ALU1(FBL)
262 ALU1(CBIT)
263 ALU2(ADDC)
264 ALU2(SUBB)
265
266 #undef ALU1
267 #undef ALU2
268 #undef ALU3
269
270 static inline unsigned
271 reg_unit(const struct intel_device_info *devinfo)
272 {
273 return 1;
274 }
275
276
277 /* Helpers for SEND instruction:
278 */
279
280 /**
281 * Construct a message descriptor immediate with the specified common
282 * descriptor controls.
283 */
284 static inline uint32_t
elk_message_desc(const struct intel_device_info * devinfo,unsigned msg_length,unsigned response_length,bool header_present)285 elk_message_desc(const struct intel_device_info *devinfo,
286 unsigned msg_length,
287 unsigned response_length,
288 bool header_present)
289 {
290 if (devinfo->ver >= 5) {
291 assert(msg_length % reg_unit(devinfo) == 0);
292 assert(response_length % reg_unit(devinfo) == 0);
293 return (SET_BITS(msg_length / reg_unit(devinfo), 28, 25) |
294 SET_BITS(response_length / reg_unit(devinfo), 24, 20) |
295 SET_BITS(header_present, 19, 19));
296 } else {
297 return (SET_BITS(msg_length, 23, 20) |
298 SET_BITS(response_length, 19, 16));
299 }
300 }
301
302 static inline unsigned
elk_message_desc_mlen(const struct intel_device_info * devinfo,uint32_t desc)303 elk_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
304 {
305 if (devinfo->ver >= 5)
306 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
307 else
308 return GET_BITS(desc, 23, 20);
309 }
310
311 static inline unsigned
elk_message_desc_rlen(const struct intel_device_info * devinfo,uint32_t desc)312 elk_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
313 {
314 if (devinfo->ver >= 5)
315 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
316 else
317 return GET_BITS(desc, 19, 16);
318 }
319
320 static inline bool
elk_message_desc_header_present(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)321 elk_message_desc_header_present(ASSERTED
322 const struct intel_device_info *devinfo,
323 uint32_t desc)
324 {
325 assert(devinfo->ver >= 5);
326 return GET_BITS(desc, 19, 19);
327 }
328
329 static inline unsigned
elk_message_ex_desc(const struct intel_device_info * devinfo,unsigned ex_msg_length)330 elk_message_ex_desc(const struct intel_device_info *devinfo,
331 unsigned ex_msg_length)
332 {
333 assert(ex_msg_length % reg_unit(devinfo) == 0);
334 return SET_BITS(ex_msg_length / reg_unit(devinfo), 9, 6);
335 }
336
337 static inline unsigned
elk_message_ex_desc_ex_mlen(const struct intel_device_info * devinfo,uint32_t ex_desc)338 elk_message_ex_desc_ex_mlen(const struct intel_device_info *devinfo,
339 uint32_t ex_desc)
340 {
341 return GET_BITS(ex_desc, 9, 6) * reg_unit(devinfo);
342 }
343
344 static inline uint32_t
elk_urb_desc(const struct intel_device_info * devinfo,unsigned msg_type,bool per_slot_offset_present,bool channel_mask_present,unsigned global_offset)345 elk_urb_desc(const struct intel_device_info *devinfo,
346 unsigned msg_type,
347 bool per_slot_offset_present,
348 bool channel_mask_present,
349 unsigned global_offset)
350 {
351 if (devinfo->ver >= 8) {
352 return (SET_BITS(per_slot_offset_present, 17, 17) |
353 SET_BITS(channel_mask_present, 15, 15) |
354 SET_BITS(global_offset, 14, 4) |
355 SET_BITS(msg_type, 3, 0));
356 } else if (devinfo->ver >= 7) {
357 assert(!channel_mask_present);
358 return (SET_BITS(per_slot_offset_present, 16, 16) |
359 SET_BITS(global_offset, 13, 3) |
360 SET_BITS(msg_type, 3, 0));
361 } else {
362 unreachable("unhandled URB write generation");
363 }
364 }
365
366 static inline uint32_t
elk_urb_desc_msg_type(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)367 elk_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
368 uint32_t desc)
369 {
370 assert(devinfo->ver >= 7);
371 return GET_BITS(desc, 3, 0);
372 }
373
374 /**
375 * Construct a message descriptor immediate with the specified sampler
376 * function controls.
377 */
378 static inline uint32_t
elk_sampler_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned sampler,unsigned msg_type,unsigned simd_mode,unsigned return_format)379 elk_sampler_desc(const struct intel_device_info *devinfo,
380 unsigned binding_table_index,
381 unsigned sampler,
382 unsigned msg_type,
383 unsigned simd_mode,
384 unsigned return_format)
385 {
386 const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
387 SET_BITS(sampler, 11, 8));
388
389 /* From the CHV Bspec: Shared Functions - Message Descriptor -
390 * Sampling Engine:
391 *
392 * SIMD Mode[2] 29 This field is the upper bit of the 3-bit
393 * SIMD Mode field.
394 */
395 if (devinfo->ver >= 8)
396 return desc | SET_BITS(msg_type, 16, 12) |
397 SET_BITS(simd_mode & 0x3, 18, 17) |
398 SET_BITS(simd_mode >> 2, 29, 29) |
399 SET_BITS(return_format, 30, 30);
400 if (devinfo->ver >= 7)
401 return (desc | SET_BITS(msg_type, 16, 12) |
402 SET_BITS(simd_mode, 18, 17));
403 else if (devinfo->ver >= 5)
404 return (desc | SET_BITS(msg_type, 15, 12) |
405 SET_BITS(simd_mode, 17, 16));
406 else if (devinfo->verx10 >= 45)
407 return desc | SET_BITS(msg_type, 15, 12);
408 else
409 return (desc | SET_BITS(return_format, 13, 12) |
410 SET_BITS(msg_type, 15, 14));
411 }
412
413 static inline unsigned
elk_sampler_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)414 elk_sampler_desc_binding_table_index(UNUSED
415 const struct intel_device_info *devinfo,
416 uint32_t desc)
417 {
418 return GET_BITS(desc, 7, 0);
419 }
420
421 static inline unsigned
elk_sampler_desc_sampler(UNUSED const struct intel_device_info * devinfo,uint32_t desc)422 elk_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
423 uint32_t desc)
424 {
425 return GET_BITS(desc, 11, 8);
426 }
427
428 static inline unsigned
elk_sampler_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)429 elk_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
430 {
431 if (devinfo->ver >= 7)
432 return GET_BITS(desc, 16, 12);
433 else if (devinfo->verx10 >= 45)
434 return GET_BITS(desc, 15, 12);
435 else
436 return GET_BITS(desc, 15, 14);
437 }
438
439 static inline unsigned
elk_sampler_desc_simd_mode(const struct intel_device_info * devinfo,uint32_t desc)440 elk_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
441 uint32_t desc)
442 {
443 assert(devinfo->ver >= 5);
444 if (devinfo->ver >= 8)
445 return GET_BITS(desc, 18, 17) | GET_BITS(desc, 29, 29) << 2;
446 else if (devinfo->ver >= 7)
447 return GET_BITS(desc, 18, 17);
448 else
449 return GET_BITS(desc, 17, 16);
450 }
451
452 static inline unsigned
elk_sampler_desc_return_format(ASSERTED const struct intel_device_info * devinfo,uint32_t desc)453 elk_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
454 uint32_t desc)
455 {
456 assert(devinfo->verx10 == 40 || devinfo->ver >= 8);
457 if (devinfo->ver >= 8)
458 return GET_BITS(desc, 30, 30);
459 else
460 return GET_BITS(desc, 13, 12);
461 }
462
463 /**
464 * Construct a message descriptor for the dataport
465 */
466 static inline uint32_t
elk_dp_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)467 elk_dp_desc(const struct intel_device_info *devinfo,
468 unsigned binding_table_index,
469 unsigned msg_type,
470 unsigned msg_control)
471 {
472 /* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
473 * helpers instead.
474 */
475 assert(devinfo->ver >= 6);
476 const unsigned desc = SET_BITS(binding_table_index, 7, 0);
477 if (devinfo->ver >= 8) {
478 return (desc | SET_BITS(msg_control, 13, 8) |
479 SET_BITS(msg_type, 18, 14));
480 } else if (devinfo->ver >= 7) {
481 return (desc | SET_BITS(msg_control, 13, 8) |
482 SET_BITS(msg_type, 17, 14));
483 } else {
484 return (desc | SET_BITS(msg_control, 12, 8) |
485 SET_BITS(msg_type, 16, 13));
486 }
487 }
488
489 static inline unsigned
elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)490 elk_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
491 uint32_t desc)
492 {
493 return GET_BITS(desc, 7, 0);
494 }
495
496 static inline unsigned
elk_dp_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)497 elk_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
498 {
499 assert(devinfo->ver >= 6);
500 if (devinfo->ver >= 8)
501 return GET_BITS(desc, 18, 14);
502 else if (devinfo->ver >= 7)
503 return GET_BITS(desc, 17, 14);
504 else
505 return GET_BITS(desc, 16, 13);
506 }
507
508 static inline unsigned
elk_dp_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)509 elk_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
510 {
511 assert(devinfo->ver >= 6);
512 if (devinfo->ver >= 7)
513 return GET_BITS(desc, 13, 8);
514 else
515 return GET_BITS(desc, 12, 8);
516 }
517
518 /**
519 * Construct a message descriptor immediate with the specified dataport read
520 * function controls.
521 */
522 static inline uint32_t
elk_dp_read_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned target_cache)523 elk_dp_read_desc(const struct intel_device_info *devinfo,
524 unsigned binding_table_index,
525 unsigned msg_control,
526 unsigned msg_type,
527 unsigned target_cache)
528 {
529 if (devinfo->ver >= 6)
530 return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
531 else if (devinfo->verx10 >= 45)
532 return (SET_BITS(binding_table_index, 7, 0) |
533 SET_BITS(msg_control, 10, 8) |
534 SET_BITS(msg_type, 13, 11) |
535 SET_BITS(target_cache, 15, 14));
536 else
537 return (SET_BITS(binding_table_index, 7, 0) |
538 SET_BITS(msg_control, 11, 8) |
539 SET_BITS(msg_type, 13, 12) |
540 SET_BITS(target_cache, 15, 14));
541 }
542
543 static inline unsigned
elk_dp_read_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)544 elk_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
545 uint32_t desc)
546 {
547 if (devinfo->ver >= 6)
548 return elk_dp_desc_msg_type(devinfo, desc);
549 else if (devinfo->verx10 >= 45)
550 return GET_BITS(desc, 13, 11);
551 else
552 return GET_BITS(desc, 13, 12);
553 }
554
555 static inline unsigned
elk_dp_read_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)556 elk_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
557 uint32_t desc)
558 {
559 if (devinfo->ver >= 6)
560 return elk_dp_desc_msg_control(devinfo, desc);
561 else if (devinfo->verx10 >= 45)
562 return GET_BITS(desc, 10, 8);
563 else
564 return GET_BITS(desc, 11, 8);
565 }
566
567 /**
568 * Construct a message descriptor immediate with the specified dataport write
569 * function controls.
570 */
571 static inline uint32_t
elk_dp_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,unsigned msg_type,unsigned send_commit_msg)572 elk_dp_write_desc(const struct intel_device_info *devinfo,
573 unsigned binding_table_index,
574 unsigned msg_control,
575 unsigned msg_type,
576 unsigned send_commit_msg)
577 {
578 assert(devinfo->ver <= 6 || !send_commit_msg);
579 if (devinfo->ver >= 6) {
580 return elk_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
581 SET_BITS(send_commit_msg, 17, 17);
582 } else {
583 return (SET_BITS(binding_table_index, 7, 0) |
584 SET_BITS(msg_control, 11, 8) |
585 SET_BITS(msg_type, 14, 12) |
586 SET_BITS(send_commit_msg, 15, 15));
587 }
588 }
589
590 static inline unsigned
elk_dp_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)591 elk_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
592 uint32_t desc)
593 {
594 if (devinfo->ver >= 6)
595 return elk_dp_desc_msg_type(devinfo, desc);
596 else
597 return GET_BITS(desc, 14, 12);
598 }
599
600 static inline unsigned
elk_dp_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)601 elk_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
602 uint32_t desc)
603 {
604 if (devinfo->ver >= 6)
605 return elk_dp_desc_msg_control(devinfo, desc);
606 else
607 return GET_BITS(desc, 11, 8);
608 }
609
610 static inline bool
elk_dp_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)611 elk_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
612 uint32_t desc)
613 {
614 assert(devinfo->ver <= 6);
615 if (devinfo->ver >= 6)
616 return GET_BITS(desc, 17, 17);
617 else
618 return GET_BITS(desc, 15, 15);
619 }
620
621 /**
622 * Construct a message descriptor immediate with the specified dataport
623 * surface function controls.
624 */
625 static inline uint32_t
elk_dp_surface_desc(const struct intel_device_info * devinfo,unsigned msg_type,unsigned msg_control)626 elk_dp_surface_desc(const struct intel_device_info *devinfo,
627 unsigned msg_type,
628 unsigned msg_control)
629 {
630 assert(devinfo->ver >= 7);
631 /* We'll OR in the binding table index later */
632 return elk_dp_desc(devinfo, 0, msg_type, msg_control);
633 }
634
635 static inline uint32_t
elk_dp_untyped_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned atomic_op,bool response_expected)636 elk_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
637 unsigned exec_size, /**< 0 for SIMD4x2 */
638 unsigned atomic_op,
639 bool response_expected)
640 {
641 assert(exec_size <= 8 || exec_size == 16);
642
643 unsigned msg_type;
644 if (devinfo->verx10 >= 75) {
645 if (exec_size > 0) {
646 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
647 } else {
648 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
649 }
650 } else {
651 msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
652 }
653
654 const unsigned msg_control =
655 SET_BITS(atomic_op, 3, 0) |
656 SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
657 SET_BITS(response_expected, 5, 5);
658
659 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
660 }
661
662 static inline unsigned
elk_mdc_cmask(unsigned num_channels)663 elk_mdc_cmask(unsigned num_channels)
664 {
665 /* See also MDC_CMASK in the SKL PRM Vol 2d. */
666 return 0xf & (0xf << num_channels);
667 }
668
669 static inline unsigned
lsc_cmask(unsigned num_channels)670 lsc_cmask(unsigned num_channels)
671 {
672 assert(num_channels > 0 && num_channels <= 4);
673 return BITSET_MASK(num_channels);
674 }
675
676 static inline uint32_t
elk_dp_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)677 elk_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
678 unsigned exec_size, /**< 0 for SIMD4x2 */
679 unsigned num_channels,
680 bool write)
681 {
682 assert(exec_size <= 8 || exec_size == 16);
683
684 unsigned msg_type;
685 if (write) {
686 if (devinfo->verx10 >= 75) {
687 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
688 } else {
689 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
690 }
691 } else {
692 /* Read */
693 if (devinfo->verx10 >= 75) {
694 msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
695 } else {
696 msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
697 }
698 }
699
700 /* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
701 if (write && devinfo->verx10 == 70 && exec_size == 0)
702 exec_size = 8;
703
704 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
705 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
706 exec_size <= 8 ? 2 : 1;
707
708 const unsigned msg_control =
709 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
710 SET_BITS(simd_mode, 5, 4);
711
712 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
713 }
714
715 static inline unsigned
elk_mdc_ds(unsigned bit_size)716 elk_mdc_ds(unsigned bit_size)
717 {
718 switch (bit_size) {
719 case 8:
720 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
721 case 16:
722 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
723 case 32:
724 return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
725 default:
726 unreachable("Unsupported bit_size for byte scattered messages");
727 }
728 }
729
730 static inline uint32_t
elk_dp_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)731 elk_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
732 unsigned exec_size,
733 unsigned bit_size,
734 bool write)
735 {
736 assert(exec_size <= 8 || exec_size == 16);
737
738 assert(devinfo->verx10 >= 75);
739 const unsigned msg_type =
740 write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
741 HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
742
743 assert(exec_size > 0);
744 const unsigned msg_control =
745 SET_BITS(exec_size == 16, 0, 0) |
746 SET_BITS(elk_mdc_ds(bit_size), 3, 2);
747
748 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
749 }
750
751 static inline uint32_t
elk_dp_dword_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,bool write)752 elk_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
753 unsigned exec_size,
754 bool write)
755 {
756 assert(exec_size == 8 || exec_size == 16);
757
758 unsigned msg_type;
759 if (write) {
760 if (devinfo->ver >= 6) {
761 msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
762 } else {
763 msg_type = ELK_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
764 }
765 } else {
766 if (devinfo->ver >= 7) {
767 msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
768 } else if (devinfo->verx10 >= 45) {
769 msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
770 } else {
771 msg_type = ELK_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
772 }
773 }
774
775 const unsigned msg_control =
776 SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
777 SET_BITS(exec_size == 16, 0, 0);
778
779 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
780 }
781
782 static inline uint32_t
elk_dp_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)783 elk_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
784 bool align_16B,
785 unsigned num_dwords,
786 bool write)
787 {
788 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
789 assert(!write || align_16B);
790
791 const unsigned msg_type =
792 write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
793 align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
794 GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
795
796 const unsigned msg_control =
797 SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
798
799 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
800 }
801
802 static inline uint32_t
elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned num_channels,bool write)803 elk_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
804 unsigned exec_size, /**< 0 for SIMD4x2 */
805 unsigned num_channels,
806 bool write)
807 {
808 assert(exec_size <= 8 || exec_size == 16);
809 assert(devinfo->ver >= 8);
810
811 unsigned msg_type =
812 write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
813 GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
814
815 /* See also MDC_SM3 in the SKL PRM Vol 2d. */
816 const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
817 exec_size <= 8 ? 2 : 1;
818
819 const unsigned msg_control =
820 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
821 SET_BITS(simd_mode, 5, 4);
822
823 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
824 msg_type, msg_control);
825 }
826
827 static inline uint32_t
elk_dp_a64_oword_block_rw_desc(const struct intel_device_info * devinfo,bool align_16B,unsigned num_dwords,bool write)828 elk_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
829 bool align_16B,
830 unsigned num_dwords,
831 bool write)
832 {
833 /* Writes can only have addresses aligned by OWORDs (16 Bytes). */
834 assert(!write || align_16B);
835
836 unsigned msg_type =
837 write ? GFX8_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
838 GFX8_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
839
840 unsigned msg_control =
841 SET_BITS(!align_16B, 4, 3) |
842 SET_BITS(ELK_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
843
844 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
845 msg_type, msg_control);
846 }
847
848 /**
849 * Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
850 * Skylake PRM).
851 */
852 static inline uint32_t
elk_mdc_a64_ds(unsigned elems)853 elk_mdc_a64_ds(unsigned elems)
854 {
855 switch (elems) {
856 case 1: return 0;
857 case 2: return 1;
858 case 4: return 2;
859 case 8: return 3;
860 default:
861 unreachable("Unsupported elmeent count for A64 scattered message");
862 }
863 }
864
865 static inline uint32_t
elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned bit_size,bool write)866 elk_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
867 unsigned exec_size, /**< 0 for SIMD4x2 */
868 unsigned bit_size,
869 bool write)
870 {
871 assert(exec_size <= 8 || exec_size == 16);
872 assert(devinfo->ver >= 8);
873
874 unsigned msg_type =
875 write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
876 GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
877
878 const unsigned msg_control =
879 SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
880 SET_BITS(elk_mdc_a64_ds(bit_size / 8), 3, 2) |
881 SET_BITS(exec_size == 16, 4, 4);
882
883 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
884 msg_type, msg_control);
885 }
886
887 static inline uint32_t
elk_dp_a64_untyped_atomic_desc(const struct intel_device_info * devinfo,ASSERTED unsigned exec_size,unsigned bit_size,unsigned atomic_op,bool response_expected)888 elk_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
889 ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
890 unsigned bit_size,
891 unsigned atomic_op,
892 bool response_expected)
893 {
894 assert(exec_size == 8);
895 assert(devinfo->ver >= 8);
896 assert(bit_size == 32 || bit_size == 64);
897
898 const unsigned msg_type = GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
899
900 const unsigned msg_control =
901 SET_BITS(atomic_op, 3, 0) |
902 SET_BITS(bit_size == 64, 4, 4) |
903 SET_BITS(response_expected, 5, 5);
904
905 return elk_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
906 msg_type, msg_control);
907 }
908
909 static inline uint32_t
elk_dp_typed_atomic_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned atomic_op,bool response_expected)910 elk_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
911 unsigned exec_size,
912 unsigned exec_group,
913 unsigned atomic_op,
914 bool response_expected)
915 {
916 assert(exec_size > 0 || exec_group == 0);
917 assert(exec_group % 8 == 0);
918
919 unsigned msg_type;
920 if (devinfo->verx10 >= 75) {
921 if (exec_size == 0) {
922 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
923 } else {
924 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
925 }
926 } else {
927 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
928 assert(exec_size > 0);
929 msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
930 }
931
932 const bool high_sample_mask = (exec_group / 8) % 2 == 1;
933
934 const unsigned msg_control =
935 SET_BITS(atomic_op, 3, 0) |
936 SET_BITS(high_sample_mask, 4, 4) |
937 SET_BITS(response_expected, 5, 5);
938
939 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
940 }
941
942 static inline uint32_t
elk_dp_typed_surface_rw_desc(const struct intel_device_info * devinfo,unsigned exec_size,unsigned exec_group,unsigned num_channels,bool write)943 elk_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
944 unsigned exec_size,
945 unsigned exec_group,
946 unsigned num_channels,
947 bool write)
948 {
949 assert(exec_size > 0 || exec_group == 0);
950 assert(exec_group % 8 == 0);
951
952 /* Typed surface reads and writes don't support SIMD16 */
953 assert(exec_size <= 8);
954
955 unsigned msg_type;
956 if (write) {
957 if (devinfo->verx10 >= 75) {
958 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
959 } else {
960 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
961 }
962 } else {
963 if (devinfo->verx10 >= 75) {
964 msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
965 } else {
966 msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
967 }
968 }
969
970 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
971 unsigned msg_control;
972 if (devinfo->verx10 >= 75) {
973 /* See also MDC_SG3 in the SKL PRM Vol 2d. */
974 const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
975 1 + ((exec_group / 8) % 2);
976
977 msg_control =
978 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
979 SET_BITS(slot_group, 5, 4);
980 } else {
981 /* SIMD4x2 typed surface R/W messages only exist on HSW+ */
982 assert(exec_size > 0);
983 const unsigned slot_group = ((exec_group / 8) % 2);
984
985 msg_control =
986 SET_BITS(elk_mdc_cmask(num_channels), 3, 0) |
987 SET_BITS(slot_group, 5, 5);
988 }
989
990 return elk_dp_surface_desc(devinfo, msg_type, msg_control);
991 }
992
993 static inline uint32_t
elk_fb_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_type,unsigned msg_control)994 elk_fb_desc(const struct intel_device_info *devinfo,
995 unsigned binding_table_index,
996 unsigned msg_type,
997 unsigned msg_control)
998 {
999 /* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1000 * helpers instead.
1001 */
1002 assert(devinfo->ver >= 6);
1003 const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1004 if (devinfo->ver >= 7) {
1005 return (desc | SET_BITS(msg_control, 13, 8) |
1006 SET_BITS(msg_type, 17, 14));
1007 } else {
1008 return (desc | SET_BITS(msg_control, 12, 8) |
1009 SET_BITS(msg_type, 16, 13));
1010 }
1011 }
1012
1013 static inline unsigned
elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1014 elk_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1015 uint32_t desc)
1016 {
1017 return GET_BITS(desc, 7, 0);
1018 }
1019
1020 static inline uint32_t
elk_fb_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1021 elk_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1022 {
1023 assert(devinfo->ver >= 6);
1024 if (devinfo->ver >= 7)
1025 return GET_BITS(desc, 13, 8);
1026 else
1027 return GET_BITS(desc, 12, 8);
1028 }
1029
1030 static inline unsigned
elk_fb_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1031 elk_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1032 {
1033 assert(devinfo->ver >= 6);
1034 if (devinfo->ver >= 7)
1035 return GET_BITS(desc, 17, 14);
1036 else
1037 return GET_BITS(desc, 16, 13);
1038 }
1039
1040 static inline uint32_t
elk_fb_write_desc(const struct intel_device_info * devinfo,unsigned binding_table_index,unsigned msg_control,bool last_render_target,bool coarse_write)1041 elk_fb_write_desc(const struct intel_device_info *devinfo,
1042 unsigned binding_table_index,
1043 unsigned msg_control,
1044 bool last_render_target,
1045 bool coarse_write)
1046 {
1047 const unsigned msg_type =
1048 devinfo->ver >= 6 ?
1049 GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1050 ELK_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1051
1052 assert(!coarse_write);
1053
1054 if (devinfo->ver >= 6) {
1055 return elk_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1056 SET_BITS(last_render_target, 12, 12) |
1057 SET_BITS(coarse_write, 18, 18);
1058 } else {
1059 return (SET_BITS(binding_table_index, 7, 0) |
1060 SET_BITS(msg_control, 11, 8) |
1061 SET_BITS(last_render_target, 11, 11) |
1062 SET_BITS(msg_type, 14, 12));
1063 }
1064 }
1065
1066 static inline unsigned
elk_fb_write_desc_msg_type(const struct intel_device_info * devinfo,uint32_t desc)1067 elk_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1068 uint32_t desc)
1069 {
1070 if (devinfo->ver >= 6)
1071 return elk_fb_desc_msg_type(devinfo, desc);
1072 else
1073 return GET_BITS(desc, 14, 12);
1074 }
1075
1076 static inline unsigned
elk_fb_write_desc_msg_control(const struct intel_device_info * devinfo,uint32_t desc)1077 elk_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1078 uint32_t desc)
1079 {
1080 if (devinfo->ver >= 6)
1081 return elk_fb_desc_msg_control(devinfo, desc);
1082 else
1083 return GET_BITS(desc, 11, 8);
1084 }
1085
1086 static inline bool
elk_fb_write_desc_last_render_target(const struct intel_device_info * devinfo,uint32_t desc)1087 elk_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1088 uint32_t desc)
1089 {
1090 if (devinfo->ver >= 6)
1091 return GET_BITS(desc, 12, 12);
1092 else
1093 return GET_BITS(desc, 11, 11);
1094 }
1095
1096 static inline bool
elk_fb_write_desc_write_commit(const struct intel_device_info * devinfo,uint32_t desc)1097 elk_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1098 uint32_t desc)
1099 {
1100 assert(devinfo->ver <= 6);
1101 if (devinfo->ver >= 6)
1102 return GET_BITS(desc, 17, 17);
1103 else
1104 return GET_BITS(desc, 15, 15);
1105 }
1106
1107 static inline bool
elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)1108 elk_lsc_opcode_has_cmask(enum elk_lsc_opcode opcode)
1109 {
1110 return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1111 }
1112
1113 static inline bool
elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)1114 elk_lsc_opcode_has_transpose(enum elk_lsc_opcode opcode)
1115 {
1116 return opcode == LSC_OP_LOAD || opcode == LSC_OP_STORE;
1117 }
1118
1119 static inline bool
elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)1120 elk_lsc_opcode_is_store(enum elk_lsc_opcode opcode)
1121 {
1122 return opcode == LSC_OP_STORE ||
1123 opcode == LSC_OP_STORE_CMASK;
1124 }
1125
1126 static inline bool
elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)1127 elk_lsc_opcode_is_atomic(enum elk_lsc_opcode opcode)
1128 {
1129 switch (opcode) {
1130 case LSC_OP_ATOMIC_INC:
1131 case LSC_OP_ATOMIC_DEC:
1132 case LSC_OP_ATOMIC_LOAD:
1133 case LSC_OP_ATOMIC_STORE:
1134 case LSC_OP_ATOMIC_ADD:
1135 case LSC_OP_ATOMIC_SUB:
1136 case LSC_OP_ATOMIC_MIN:
1137 case LSC_OP_ATOMIC_MAX:
1138 case LSC_OP_ATOMIC_UMIN:
1139 case LSC_OP_ATOMIC_UMAX:
1140 case LSC_OP_ATOMIC_CMPXCHG:
1141 case LSC_OP_ATOMIC_FADD:
1142 case LSC_OP_ATOMIC_FSUB:
1143 case LSC_OP_ATOMIC_FMIN:
1144 case LSC_OP_ATOMIC_FMAX:
1145 case LSC_OP_ATOMIC_FCMPXCHG:
1146 case LSC_OP_ATOMIC_AND:
1147 case LSC_OP_ATOMIC_OR:
1148 case LSC_OP_ATOMIC_XOR:
1149 return true;
1150
1151 default:
1152 return false;
1153 }
1154 }
1155
1156 static inline bool
elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)1157 elk_lsc_opcode_is_atomic_float(enum elk_lsc_opcode opcode)
1158 {
1159 switch (opcode) {
1160 case LSC_OP_ATOMIC_FADD:
1161 case LSC_OP_ATOMIC_FSUB:
1162 case LSC_OP_ATOMIC_FMIN:
1163 case LSC_OP_ATOMIC_FMAX:
1164 case LSC_OP_ATOMIC_FCMPXCHG:
1165 return true;
1166
1167 default:
1168 return false;
1169 }
1170 }
1171
1172 static inline unsigned
lsc_op_num_data_values(unsigned _op)1173 lsc_op_num_data_values(unsigned _op)
1174 {
1175 enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1176
1177 switch (op) {
1178 case LSC_OP_ATOMIC_CMPXCHG:
1179 case LSC_OP_ATOMIC_FCMPXCHG:
1180 return 2;
1181 case LSC_OP_ATOMIC_INC:
1182 case LSC_OP_ATOMIC_DEC:
1183 case LSC_OP_LOAD:
1184 case LSC_OP_LOAD_CMASK:
1185 case LSC_OP_FENCE:
1186 /* XXX: actually check docs */
1187 return 0;
1188 default:
1189 return 1;
1190 }
1191 }
1192
1193 static inline unsigned
lsc_op_to_legacy_atomic(unsigned _op)1194 lsc_op_to_legacy_atomic(unsigned _op)
1195 {
1196 enum elk_lsc_opcode op = (enum elk_lsc_opcode) _op;
1197
1198 switch (op) {
1199 case LSC_OP_ATOMIC_INC:
1200 return ELK_AOP_INC;
1201 case LSC_OP_ATOMIC_DEC:
1202 return ELK_AOP_DEC;
1203 case LSC_OP_ATOMIC_STORE:
1204 return ELK_AOP_MOV;
1205 case LSC_OP_ATOMIC_ADD:
1206 return ELK_AOP_ADD;
1207 case LSC_OP_ATOMIC_SUB:
1208 return ELK_AOP_SUB;
1209 case LSC_OP_ATOMIC_MIN:
1210 return ELK_AOP_IMIN;
1211 case LSC_OP_ATOMIC_MAX:
1212 return ELK_AOP_IMAX;
1213 case LSC_OP_ATOMIC_UMIN:
1214 return ELK_AOP_UMIN;
1215 case LSC_OP_ATOMIC_UMAX:
1216 return ELK_AOP_UMAX;
1217 case LSC_OP_ATOMIC_CMPXCHG:
1218 return ELK_AOP_CMPWR;
1219 case LSC_OP_ATOMIC_FADD:
1220 return ELK_AOP_FADD;
1221 case LSC_OP_ATOMIC_FMIN:
1222 return ELK_AOP_FMIN;
1223 case LSC_OP_ATOMIC_FMAX:
1224 return ELK_AOP_FMAX;
1225 case LSC_OP_ATOMIC_FCMPXCHG:
1226 return ELK_AOP_FCMPWR;
1227 case LSC_OP_ATOMIC_AND:
1228 return ELK_AOP_AND;
1229 case LSC_OP_ATOMIC_OR:
1230 return ELK_AOP_OR;
1231 case LSC_OP_ATOMIC_XOR:
1232 return ELK_AOP_XOR;
1233 /* No LSC op maps to ELK_AOP_PREDEC */
1234 case LSC_OP_ATOMIC_LOAD:
1235 case LSC_OP_ATOMIC_FSUB:
1236 unreachable("no corresponding legacy atomic operation");
1237 case LSC_OP_LOAD:
1238 case LSC_OP_LOAD_CMASK:
1239 case LSC_OP_STORE:
1240 case LSC_OP_STORE_CMASK:
1241 case LSC_OP_FENCE:
1242 unreachable("not an atomic op");
1243 }
1244
1245 unreachable("invalid LSC op");
1246 }
1247
1248 static inline uint32_t
lsc_data_size_bytes(enum lsc_data_size data_size)1249 lsc_data_size_bytes(enum lsc_data_size data_size)
1250 {
1251 switch (data_size) {
1252 case LSC_DATA_SIZE_D8:
1253 return 1;
1254 case LSC_DATA_SIZE_D16:
1255 return 2;
1256 case LSC_DATA_SIZE_D32:
1257 case LSC_DATA_SIZE_D8U32:
1258 case LSC_DATA_SIZE_D16U32:
1259 case LSC_DATA_SIZE_D16BF32:
1260 return 4;
1261 case LSC_DATA_SIZE_D64:
1262 return 8;
1263 default:
1264 unreachable("Unsupported data payload size.");
1265 }
1266 }
1267
1268 static inline uint32_t
lsc_addr_size_bytes(enum lsc_addr_size addr_size)1269 lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1270 {
1271 switch (addr_size) {
1272 case LSC_ADDR_SIZE_A16: return 2;
1273 case LSC_ADDR_SIZE_A32: return 4;
1274 case LSC_ADDR_SIZE_A64: return 8;
1275 default:
1276 unreachable("Unsupported address size.");
1277 }
1278 }
1279
1280 static inline uint32_t
lsc_vector_length(enum lsc_vect_size vect_size)1281 lsc_vector_length(enum lsc_vect_size vect_size)
1282 {
1283 switch (vect_size) {
1284 case LSC_VECT_SIZE_V1: return 1;
1285 case LSC_VECT_SIZE_V2: return 2;
1286 case LSC_VECT_SIZE_V3: return 3;
1287 case LSC_VECT_SIZE_V4: return 4;
1288 case LSC_VECT_SIZE_V8: return 8;
1289 case LSC_VECT_SIZE_V16: return 16;
1290 case LSC_VECT_SIZE_V32: return 32;
1291 case LSC_VECT_SIZE_V64: return 64;
1292 default:
1293 unreachable("Unsupported size of vector");
1294 }
1295 }
1296
1297 static inline enum lsc_vect_size
lsc_vect_size(unsigned vect_size)1298 lsc_vect_size(unsigned vect_size)
1299 {
1300 switch(vect_size) {
1301 case 1: return LSC_VECT_SIZE_V1;
1302 case 2: return LSC_VECT_SIZE_V2;
1303 case 3: return LSC_VECT_SIZE_V3;
1304 case 4: return LSC_VECT_SIZE_V4;
1305 case 8: return LSC_VECT_SIZE_V8;
1306 case 16: return LSC_VECT_SIZE_V16;
1307 case 32: return LSC_VECT_SIZE_V32;
1308 case 64: return LSC_VECT_SIZE_V64;
1309 default:
1310 unreachable("Unsupported vector size for dataport");
1311 }
1312 }
1313
1314 static inline uint32_t
lsc_msg_desc_wcmask(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest,unsigned cmask)1315 lsc_msg_desc_wcmask(UNUSED const struct intel_device_info *devinfo,
1316 enum elk_lsc_opcode opcode, unsigned simd_size,
1317 enum lsc_addr_surface_type addr_type,
1318 enum lsc_addr_size addr_sz, unsigned num_coordinates,
1319 enum lsc_data_size data_sz, unsigned num_channels,
1320 bool transpose, unsigned cache_ctrl, bool has_dest, unsigned cmask)
1321 {
1322 assert(devinfo->has_lsc);
1323
1324 unsigned dest_length = !has_dest ? 0 :
1325 DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1326 reg_unit(devinfo) * REG_SIZE);
1327
1328 unsigned src0_length =
1329 DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1330 reg_unit(devinfo) * REG_SIZE);
1331
1332 assert(!transpose || elk_lsc_opcode_has_transpose(opcode));
1333
1334 unsigned msg_desc =
1335 SET_BITS(opcode, 5, 0) |
1336 SET_BITS(addr_sz, 8, 7) |
1337 SET_BITS(data_sz, 11, 9) |
1338 SET_BITS(transpose, 15, 15) |
1339 SET_BITS(cache_ctrl, 19, 17) |
1340 SET_BITS(dest_length, 24, 20) |
1341 SET_BITS(src0_length, 28, 25) |
1342 SET_BITS(addr_type, 30, 29);
1343
1344 if (elk_lsc_opcode_has_cmask(opcode))
1345 msg_desc |= SET_BITS(cmask ? cmask : lsc_cmask(num_channels), 15, 12);
1346 else
1347 msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1348
1349 return msg_desc;
1350 }
1351
1352 static inline uint32_t
lsc_msg_desc(UNUSED const struct intel_device_info * devinfo,enum elk_lsc_opcode opcode,unsigned simd_size,enum lsc_addr_surface_type addr_type,enum lsc_addr_size addr_sz,unsigned num_coordinates,enum lsc_data_size data_sz,unsigned num_channels,bool transpose,unsigned cache_ctrl,bool has_dest)1353 lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1354 enum elk_lsc_opcode opcode, unsigned simd_size,
1355 enum lsc_addr_surface_type addr_type,
1356 enum lsc_addr_size addr_sz, unsigned num_coordinates,
1357 enum lsc_data_size data_sz, unsigned num_channels,
1358 bool transpose, unsigned cache_ctrl, bool has_dest)
1359 {
1360 return lsc_msg_desc_wcmask(devinfo, opcode, simd_size, addr_type, addr_sz,
1361 num_coordinates, data_sz, num_channels, transpose, cache_ctrl,
1362 has_dest, 0);
1363 }
1364
1365 static inline enum elk_lsc_opcode
lsc_msg_desc_opcode(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1366 lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1367 uint32_t desc)
1368 {
1369 assert(devinfo->has_lsc);
1370 return (enum elk_lsc_opcode) GET_BITS(desc, 5, 0);
1371 }
1372
1373 static inline enum lsc_addr_size
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1374 lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1375 uint32_t desc)
1376 {
1377 assert(devinfo->has_lsc);
1378 return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1379 }
1380
1381 static inline enum lsc_data_size
lsc_msg_desc_data_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1382 lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1383 uint32_t desc)
1384 {
1385 assert(devinfo->has_lsc);
1386 return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1387 }
1388
1389 static inline enum lsc_vect_size
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1390 lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1391 uint32_t desc)
1392 {
1393 assert(devinfo->has_lsc);
1394 assert(!elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1395 return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1396 }
1397
1398 static inline enum lsc_cmask
lsc_msg_desc_cmask(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1399 lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1400 uint32_t desc)
1401 {
1402 assert(devinfo->has_lsc);
1403 assert(elk_lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1404 return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1405 }
1406
1407 static inline bool
lsc_msg_desc_transpose(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1408 lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1409 uint32_t desc)
1410 {
1411 assert(devinfo->has_lsc);
1412 return GET_BITS(desc, 15, 15);
1413 }
1414
1415 static inline unsigned
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1416 lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1417 uint32_t desc)
1418 {
1419 assert(devinfo->has_lsc);
1420 return GET_BITS(desc, 19, 17);
1421 }
1422
1423 static inline unsigned
lsc_msg_desc_dest_len(const struct intel_device_info * devinfo,uint32_t desc)1424 lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1425 uint32_t desc)
1426 {
1427 assert(devinfo->has_lsc);
1428 return GET_BITS(desc, 24, 20) * reg_unit(devinfo);
1429 }
1430
1431 static inline unsigned
lsc_msg_desc_src0_len(const struct intel_device_info * devinfo,uint32_t desc)1432 lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1433 uint32_t desc)
1434 {
1435 assert(devinfo->has_lsc);
1436 return GET_BITS(desc, 28, 25) * reg_unit(devinfo);
1437 }
1438
1439 static inline enum lsc_addr_surface_type
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1440 lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1441 uint32_t desc)
1442 {
1443 assert(devinfo->has_lsc);
1444 return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1445 }
1446
1447 static inline uint32_t
lsc_fence_msg_desc(UNUSED const struct intel_device_info * devinfo,enum lsc_fence_scope scope,enum lsc_flush_type flush_type,bool route_to_lsc)1448 lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1449 enum lsc_fence_scope scope,
1450 enum lsc_flush_type flush_type,
1451 bool route_to_lsc)
1452 {
1453 assert(devinfo->has_lsc);
1454 return SET_BITS(LSC_OP_FENCE, 5, 0) |
1455 SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1456 SET_BITS(scope, 11, 9) |
1457 SET_BITS(flush_type, 14, 12) |
1458 SET_BITS(route_to_lsc, 18, 18) |
1459 SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1460 }
1461
1462 static inline enum lsc_fence_scope
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1463 lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1464 uint32_t desc)
1465 {
1466 assert(devinfo->has_lsc);
1467 return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1468 }
1469
1470 static inline enum lsc_flush_type
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1471 lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1472 uint32_t desc)
1473 {
1474 assert(devinfo->has_lsc);
1475 return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1476 }
1477
1478 static inline enum lsc_backup_fence_routing
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1479 lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1480 uint32_t desc)
1481 {
1482 assert(devinfo->has_lsc);
1483 return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1484 }
1485
1486 static inline uint32_t
lsc_bti_ex_desc(const struct intel_device_info * devinfo,unsigned bti)1487 lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1488 {
1489 assert(devinfo->has_lsc);
1490 return SET_BITS(bti, 31, 24) |
1491 SET_BITS(0, 23, 12); /* base offset */
1492 }
1493
1494 static inline unsigned
lsc_bti_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1495 lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1496 uint32_t ex_desc)
1497 {
1498 assert(devinfo->has_lsc);
1499 return GET_BITS(ex_desc, 23, 12);
1500 }
1501
1502 static inline unsigned
lsc_bti_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1503 lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1504 uint32_t ex_desc)
1505 {
1506 assert(devinfo->has_lsc);
1507 return GET_BITS(ex_desc, 31, 24);
1508 }
1509
1510 static inline unsigned
lsc_flat_ex_desc_base_offset(const struct intel_device_info * devinfo,uint32_t ex_desc)1511 lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1512 uint32_t ex_desc)
1513 {
1514 assert(devinfo->has_lsc);
1515 return GET_BITS(ex_desc, 31, 12);
1516 }
1517
1518 static inline uint32_t
lsc_bss_ex_desc(const struct intel_device_info * devinfo,unsigned surface_state_index)1519 lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1520 unsigned surface_state_index)
1521 {
1522 assert(devinfo->has_lsc);
1523 return SET_BITS(surface_state_index, 31, 6);
1524 }
1525
1526 static inline unsigned
lsc_bss_ex_desc_index(const struct intel_device_info * devinfo,uint32_t ex_desc)1527 lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1528 uint32_t ex_desc)
1529 {
1530 assert(devinfo->has_lsc);
1531 return GET_BITS(ex_desc, 31, 6);
1532 }
1533
1534 static inline uint32_t
elk_mdc_sm2(unsigned exec_size)1535 elk_mdc_sm2(unsigned exec_size)
1536 {
1537 assert(exec_size == 8 || exec_size == 16);
1538 return exec_size > 8;
1539 }
1540
1541 static inline uint32_t
elk_mdc_sm2_exec_size(uint32_t sm2)1542 elk_mdc_sm2_exec_size(uint32_t sm2)
1543 {
1544 assert(sm2 <= 1);
1545 return 8 << sm2;
1546 }
1547
1548 static inline uint32_t
elk_btd_spawn_msg_type(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1549 elk_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1550 uint32_t desc)
1551 {
1552 return GET_BITS(desc, 17, 14);
1553 }
1554
1555 static inline uint32_t
elk_btd_spawn_exec_size(UNUSED const struct intel_device_info * devinfo,uint32_t desc)1556 elk_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1557 uint32_t desc)
1558 {
1559 return elk_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1560 }
1561
1562 /**
1563 * Construct a message descriptor immediate with the specified pixel
1564 * interpolator function controls.
1565 */
1566 static inline uint32_t
elk_pixel_interp_desc(UNUSED const struct intel_device_info * devinfo,unsigned msg_type,bool noperspective,bool coarse_pixel_rate,unsigned exec_size,unsigned group)1567 elk_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1568 unsigned msg_type,
1569 bool noperspective,
1570 bool coarse_pixel_rate,
1571 unsigned exec_size,
1572 unsigned group)
1573 {
1574 assert(exec_size == 8 || exec_size == 16);
1575 const bool simd_mode = exec_size == 16;
1576 const bool slot_group = group >= 16;
1577
1578 assert(!coarse_pixel_rate);
1579 return (SET_BITS(slot_group, 11, 11) |
1580 SET_BITS(msg_type, 13, 12) |
1581 SET_BITS(!!noperspective, 14, 14) |
1582 SET_BITS(coarse_pixel_rate, 15, 15) |
1583 SET_BITS(simd_mode, 16, 16));
1584 }
1585
1586 void elk_urb_WRITE(struct elk_codegen *p,
1587 struct elk_reg dest,
1588 unsigned msg_reg_nr,
1589 struct elk_reg src0,
1590 enum elk_urb_write_flags flags,
1591 unsigned msg_length,
1592 unsigned response_length,
1593 unsigned offset,
1594 unsigned swizzle);
1595
1596 /**
1597 * Send message to shared unit \p sfid with a possibly indirect descriptor \p
1598 * desc. If \p desc is not an immediate it will be transparently loaded to an
1599 * address register using an OR instruction.
1600 */
1601 void
1602 elk_send_indirect_message(struct elk_codegen *p,
1603 unsigned sfid,
1604 struct elk_reg dst,
1605 struct elk_reg payload,
1606 struct elk_reg desc,
1607 unsigned desc_imm,
1608 bool eot);
1609
1610 void
1611 elk_send_indirect_split_message(struct elk_codegen *p,
1612 unsigned sfid,
1613 struct elk_reg dst,
1614 struct elk_reg payload0,
1615 struct elk_reg payload1,
1616 struct elk_reg desc,
1617 unsigned desc_imm,
1618 struct elk_reg ex_desc,
1619 unsigned ex_desc_imm,
1620 bool ex_desc_scratch,
1621 bool ex_bso,
1622 bool eot);
1623
1624 void elk_ff_sync(struct elk_codegen *p,
1625 struct elk_reg dest,
1626 unsigned msg_reg_nr,
1627 struct elk_reg src0,
1628 bool allocate,
1629 unsigned response_length,
1630 bool eot);
1631
1632 void elk_svb_write(struct elk_codegen *p,
1633 struct elk_reg dest,
1634 unsigned msg_reg_nr,
1635 struct elk_reg src0,
1636 unsigned binding_table_index,
1637 bool send_commit_msg);
1638
1639 elk_inst *elk_fb_WRITE(struct elk_codegen *p,
1640 struct elk_reg payload,
1641 struct elk_reg implied_header,
1642 unsigned msg_control,
1643 unsigned binding_table_index,
1644 unsigned msg_length,
1645 unsigned response_length,
1646 bool eot,
1647 bool last_render_target,
1648 bool header_present);
1649
1650 void elk_SAMPLE(struct elk_codegen *p,
1651 struct elk_reg dest,
1652 unsigned msg_reg_nr,
1653 struct elk_reg src0,
1654 unsigned binding_table_index,
1655 unsigned sampler,
1656 unsigned msg_type,
1657 unsigned response_length,
1658 unsigned msg_length,
1659 unsigned header_present,
1660 unsigned simd_mode,
1661 unsigned return_format);
1662
1663 void elk_adjust_sampler_state_pointer(struct elk_codegen *p,
1664 struct elk_reg header,
1665 struct elk_reg sampler_index);
1666
1667 void elk_gfx4_math(struct elk_codegen *p,
1668 struct elk_reg dest,
1669 unsigned function,
1670 unsigned msg_reg_nr,
1671 struct elk_reg src,
1672 unsigned precision );
1673
1674 void elk_gfx6_math(struct elk_codegen *p,
1675 struct elk_reg dest,
1676 unsigned function,
1677 struct elk_reg src0,
1678 struct elk_reg src1);
1679
1680 void elk_oword_block_read(struct elk_codegen *p,
1681 struct elk_reg dest,
1682 struct elk_reg mrf,
1683 uint32_t offset,
1684 uint32_t bind_table_index);
1685
1686 unsigned elk_scratch_surface_idx(const struct elk_codegen *p);
1687
1688 void elk_oword_block_read_scratch(struct elk_codegen *p,
1689 struct elk_reg dest,
1690 struct elk_reg mrf,
1691 int num_regs,
1692 unsigned offset);
1693
1694 void elk_oword_block_write_scratch(struct elk_codegen *p,
1695 struct elk_reg mrf,
1696 int num_regs,
1697 unsigned offset);
1698
1699 void elk_gfx7_block_read_scratch(struct elk_codegen *p,
1700 struct elk_reg dest,
1701 int num_regs,
1702 unsigned offset);
1703
1704 /**
1705 * Return the generation-specific jump distance scaling factor.
1706 *
1707 * Given the number of instructions to jump, we need to scale by
1708 * some number to obtain the actual jump distance to program in an
1709 * instruction.
1710 */
1711 static inline unsigned
elk_jump_scale(const struct intel_device_info * devinfo)1712 elk_jump_scale(const struct intel_device_info *devinfo)
1713 {
1714 /* Broadwell measures jump targets in bytes. */
1715 if (devinfo->ver >= 8)
1716 return 16;
1717
1718 /* Ironlake and later measure jump targets in 64-bit data chunks (in order
1719 * (to support compaction), so each 128-bit instruction requires 2 chunks.
1720 */
1721 if (devinfo->ver >= 5)
1722 return 2;
1723
1724 /* Gfx4 simply uses the number of 128-bit instructions. */
1725 return 1;
1726 }
1727
1728 void elk_barrier(struct elk_codegen *p, struct elk_reg src);
1729
1730 /* If/else/endif. Works by manipulating the execution flags on each
1731 * channel.
1732 */
1733 elk_inst *elk_IF(struct elk_codegen *p, unsigned execute_size);
1734 elk_inst *elk_gfx6_IF(struct elk_codegen *p, enum elk_conditional_mod conditional,
1735 struct elk_reg src0, struct elk_reg src1);
1736
1737 void elk_ELSE(struct elk_codegen *p);
1738 void elk_ENDIF(struct elk_codegen *p);
1739
1740 /* DO/WHILE loops:
1741 */
1742 elk_inst *elk_DO(struct elk_codegen *p, unsigned execute_size);
1743
1744 elk_inst *elk_WHILE(struct elk_codegen *p);
1745
1746 elk_inst *elk_BREAK(struct elk_codegen *p);
1747 elk_inst *elk_CONT(struct elk_codegen *p);
1748 elk_inst *elk_HALT(struct elk_codegen *p);
1749
1750 /* Forward jumps:
1751 */
1752 void elk_land_fwd_jump(struct elk_codegen *p, int jmp_insn_idx);
1753
1754 elk_inst *elk_JMPI(struct elk_codegen *p, struct elk_reg index,
1755 unsigned predicate_control);
1756
1757 void elk_NOP(struct elk_codegen *p);
1758
1759 void elk_WAIT(struct elk_codegen *p);
1760
1761 /* Special case: there is never a destination, execution size will be
1762 * taken from src0:
1763 */
1764 void elk_CMP(struct elk_codegen *p,
1765 struct elk_reg dest,
1766 unsigned conditional,
1767 struct elk_reg src0,
1768 struct elk_reg src1);
1769
1770 void elk_CMPN(struct elk_codegen *p,
1771 struct elk_reg dest,
1772 unsigned conditional,
1773 struct elk_reg src0,
1774 struct elk_reg src1);
1775
1776 void
1777 elk_untyped_atomic(struct elk_codegen *p,
1778 struct elk_reg dst,
1779 struct elk_reg payload,
1780 struct elk_reg surface,
1781 unsigned atomic_op,
1782 unsigned msg_length,
1783 bool response_expected,
1784 bool header_present);
1785
1786 void
1787 elk_untyped_surface_read(struct elk_codegen *p,
1788 struct elk_reg dst,
1789 struct elk_reg payload,
1790 struct elk_reg surface,
1791 unsigned msg_length,
1792 unsigned num_channels);
1793
1794 void
1795 elk_untyped_surface_write(struct elk_codegen *p,
1796 struct elk_reg payload,
1797 struct elk_reg surface,
1798 unsigned msg_length,
1799 unsigned num_channels,
1800 bool header_present);
1801
1802 void
1803 elk_memory_fence(struct elk_codegen *p,
1804 struct elk_reg dst,
1805 struct elk_reg src,
1806 enum elk_opcode send_op,
1807 enum elk_message_target sfid,
1808 uint32_t desc,
1809 bool commit_enable,
1810 unsigned bti);
1811
1812 void
1813 elk_pixel_interpolator_query(struct elk_codegen *p,
1814 struct elk_reg dest,
1815 struct elk_reg mrf,
1816 bool noperspective,
1817 bool coarse_pixel_rate,
1818 unsigned mode,
1819 struct elk_reg data,
1820 unsigned msg_length,
1821 unsigned response_length);
1822
1823 void
1824 elk_find_live_channel(struct elk_codegen *p,
1825 struct elk_reg dst,
1826 bool last);
1827
1828 void
1829 elk_broadcast(struct elk_codegen *p,
1830 struct elk_reg dst,
1831 struct elk_reg src,
1832 struct elk_reg idx);
1833
1834 void
1835 elk_float_controls_mode(struct elk_codegen *p,
1836 unsigned mode, unsigned mask);
1837
1838 void
1839 elk_update_reloc_imm(const struct elk_isa_info *isa,
1840 elk_inst *inst,
1841 uint32_t value);
1842
1843 void
1844 elk_MOV_reloc_imm(struct elk_codegen *p,
1845 struct elk_reg dst,
1846 enum elk_reg_type src_type,
1847 uint32_t id);
1848
1849 unsigned
1850 elk_num_sources_from_inst(const struct elk_isa_info *isa,
1851 const elk_inst *inst);
1852
1853 /***********************************************************************
1854 * elk_eu_util.c:
1855 */
1856
1857 void elk_copy_indirect_to_indirect(struct elk_codegen *p,
1858 struct elk_indirect dst_ptr,
1859 struct elk_indirect src_ptr,
1860 unsigned count);
1861
1862 void elk_copy_from_indirect(struct elk_codegen *p,
1863 struct elk_reg dst,
1864 struct elk_indirect ptr,
1865 unsigned count);
1866
1867 void elk_copy4(struct elk_codegen *p,
1868 struct elk_reg dst,
1869 struct elk_reg src,
1870 unsigned count);
1871
1872 void elk_copy8(struct elk_codegen *p,
1873 struct elk_reg dst,
1874 struct elk_reg src,
1875 unsigned count);
1876
1877 void elk_math_invert( struct elk_codegen *p,
1878 struct elk_reg dst,
1879 struct elk_reg src);
1880
1881 void elk_set_src1(struct elk_codegen *p, elk_inst *insn, struct elk_reg reg);
1882
1883 void elk_set_desc_ex(struct elk_codegen *p, elk_inst *insn,
1884 unsigned desc, unsigned ex_desc);
1885
1886 static inline void
elk_set_desc(struct elk_codegen * p,elk_inst * insn,unsigned desc)1887 elk_set_desc(struct elk_codegen *p, elk_inst *insn, unsigned desc)
1888 {
1889 elk_set_desc_ex(p, insn, desc, 0);
1890 }
1891
1892 void elk_set_uip_jip(struct elk_codegen *p, int start_offset);
1893
1894 enum elk_conditional_mod elk_negate_cmod(enum elk_conditional_mod cmod);
1895 enum elk_conditional_mod elk_swap_cmod(enum elk_conditional_mod cmod);
1896
1897 /* elk_eu_compact.c */
1898 void elk_compact_instructions(struct elk_codegen *p, int start_offset,
1899 struct elk_disasm_info *disasm);
1900 void elk_uncompact_instruction(const struct elk_isa_info *isa,
1901 elk_inst *dst, elk_compact_inst *src);
1902 bool elk_try_compact_instruction(const struct elk_isa_info *isa,
1903 elk_compact_inst *dst, const elk_inst *src);
1904
1905 void elk_debug_compact_uncompact(const struct elk_isa_info *isa,
1906 elk_inst *orig, elk_inst *uncompacted);
1907
1908 /* elk_eu_validate.c */
1909 bool elk_validate_instruction(const struct elk_isa_info *isa,
1910 const elk_inst *inst, int offset,
1911 unsigned inst_size,
1912 struct elk_disasm_info *disasm);
1913 bool elk_validate_instructions(const struct elk_isa_info *isa,
1914 const void *assembly, int start_offset, int end_offset,
1915 struct elk_disasm_info *disasm);
1916
1917 static inline int
next_offset(const struct intel_device_info * devinfo,void * store,int offset)1918 next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1919 {
1920 elk_inst *insn = (elk_inst *)((char *)store + offset);
1921
1922 if (elk_inst_cmpt_control(devinfo, insn))
1923 return offset + 8;
1924 else
1925 return offset + 16;
1926 }
1927
1928 /** Maximum SEND message length */
1929 #define ELK_MAX_MSG_LENGTH 15
1930
1931 /** First MRF register used by pull loads */
1932 #define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1933
1934 /** First MRF register used by spills */
1935 #define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
1936
1937 #ifdef __cplusplus
1938 }
1939 #endif
1940