1 /*
2 * Copyright © 2012-2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file elk_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gfx5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gfx6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gfx7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 *
74 * Gfx12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
78 * the mantissa.
79 */
80
81 #include "elk_eu.h"
82 #include "elk_disasm.h"
83 #include "elk_shader.h"
84 #include "elk_disasm_info.h"
85 #include "dev/intel_debug.h"
86
87 static const uint32_t g45_control_index_table[32] = {
88 0b00000000000000000,
89 0b01000000000000000,
90 0b00110000000000000,
91 0b00000000000000010,
92 0b00100000000000000,
93 0b00010000000000000,
94 0b01000000000100000,
95 0b01000000100000000,
96 0b01010000000100000,
97 0b00000000100000010,
98 0b11000000000000000,
99 0b00001000100000010,
100 0b01001000100000000,
101 0b00000000100000000,
102 0b11000000000100000,
103 0b00001000100000000,
104 0b10110000000000000,
105 0b11010000000100000,
106 0b00110000100000000,
107 0b00100000100000000,
108 0b01000000000001000,
109 0b01000000000000100,
110 0b00111100000000000,
111 0b00101011000000000,
112 0b00110000000010000,
113 0b00010000100000000,
114 0b01000000000100100,
115 0b01000000000101000,
116 0b00110000000000110,
117 0b00000000000001010,
118 0b01010000000101000,
119 0b01010000000100100,
120 };
121
122 static const uint32_t g45_datatype_table[32] = {
123 0b001000000000100001,
124 0b001011010110101101,
125 0b001000001000110001,
126 0b001111011110111101,
127 0b001011010110101100,
128 0b001000000110101101,
129 0b001000000000100000,
130 0b010100010110110001,
131 0b001100011000101101,
132 0b001000000000100010,
133 0b001000001000110110,
134 0b010000001000110001,
135 0b001000001000110010,
136 0b011000001000110010,
137 0b001111011110111100,
138 0b001000000100101000,
139 0b010100011000110001,
140 0b001010010100101001,
141 0b001000001000101001,
142 0b010000001000110110,
143 0b101000001000110001,
144 0b001011011000101101,
145 0b001000000100001001,
146 0b001011011000101100,
147 0b110100011000110001,
148 0b001000001110111101,
149 0b110000001000110001,
150 0b011000000100101010,
151 0b101000001000101001,
152 0b001011010110001100,
153 0b001000000110100001,
154 0b001010010100001000,
155 };
156
157 static const uint16_t g45_subreg_table[32] = {
158 0b000000000000000,
159 0b000000010000000,
160 0b000001000000000,
161 0b000100000000000,
162 0b000000000100000,
163 0b100000000000000,
164 0b000000000010000,
165 0b001100000000000,
166 0b001010000000000,
167 0b000000100000000,
168 0b001000000000000,
169 0b000000000001000,
170 0b000000001000000,
171 0b000000000000001,
172 0b000010000000000,
173 0b000000010100000,
174 0b000000000000111,
175 0b000001000100000,
176 0b011000000000000,
177 0b000000110000000,
178 0b000000000000010,
179 0b000000000000100,
180 0b000000001100000,
181 0b000100000000010,
182 0b001110011000110,
183 0b001110100001000,
184 0b000110011000110,
185 0b000001000011000,
186 0b000110010000100,
187 0b001100000000110,
188 0b000000010000110,
189 0b000001000110000,
190 };
191
192 static const uint16_t g45_src_index_table[32] = {
193 0b000000000000,
194 0b010001101000,
195 0b010110001000,
196 0b011010010000,
197 0b001101001000,
198 0b010110001010,
199 0b010101110000,
200 0b011001111000,
201 0b001000101000,
202 0b000000101000,
203 0b010001010000,
204 0b111101101100,
205 0b010110001100,
206 0b010001101100,
207 0b011010010100,
208 0b010001001100,
209 0b001100101000,
210 0b000000000010,
211 0b111101001100,
212 0b011001101000,
213 0b010101001000,
214 0b000000000100,
215 0b000000101100,
216 0b010001101010,
217 0b000000111000,
218 0b010101011000,
219 0b000100100000,
220 0b010110000000,
221 0b010000000100,
222 0b010000111000,
223 0b000101100000,
224 0b111101110100,
225 };
226
227 static const uint32_t gfx6_control_index_table[32] = {
228 0b00000000000000000,
229 0b01000000000000000,
230 0b00110000000000000,
231 0b00000000100000000,
232 0b00010000000000000,
233 0b00001000100000000,
234 0b00000000100000010,
235 0b00000000000000010,
236 0b01000000100000000,
237 0b01010000000000000,
238 0b10110000000000000,
239 0b00100000000000000,
240 0b11010000000000000,
241 0b11000000000000000,
242 0b01001000100000000,
243 0b01000000000001000,
244 0b01000000000000100,
245 0b00000000000001000,
246 0b00000000000000100,
247 0b00111000100000000,
248 0b00001000100000010,
249 0b00110000100000000,
250 0b00110000000000001,
251 0b00100000000000001,
252 0b00110000000000010,
253 0b00110000000000101,
254 0b00110000000001001,
255 0b00110000000010000,
256 0b00110000000000011,
257 0b00110000000000100,
258 0b00110000100001000,
259 0b00100000000001001,
260 };
261
262 static const uint32_t gfx6_datatype_table[32] = {
263 0b001001110000000000,
264 0b001000110000100000,
265 0b001001110000000001,
266 0b001000000001100000,
267 0b001010110100101001,
268 0b001000000110101101,
269 0b001100011000101100,
270 0b001011110110101101,
271 0b001000000111101100,
272 0b001000000001100001,
273 0b001000110010100101,
274 0b001000000001000001,
275 0b001000001000110001,
276 0b001000001000101001,
277 0b001000000000100000,
278 0b001000001000110010,
279 0b001010010100101001,
280 0b001011010010100101,
281 0b001000000110100101,
282 0b001100011000101001,
283 0b001011011000101100,
284 0b001011010110100101,
285 0b001011110110100101,
286 0b001111011110111101,
287 0b001111011110111100,
288 0b001111011110111101,
289 0b001111011110011101,
290 0b001111011110111110,
291 0b001000000000100001,
292 0b001000000000100010,
293 0b001001111111011101,
294 0b001000001110111110,
295 };
296
297 static const uint16_t gfx6_subreg_table[32] = {
298 0b000000000000000,
299 0b000000000000100,
300 0b000000110000000,
301 0b111000000000000,
302 0b011110000001000,
303 0b000010000000000,
304 0b000000000010000,
305 0b000110000001100,
306 0b001000000000000,
307 0b000001000000000,
308 0b000001010010100,
309 0b000000001010110,
310 0b010000000000000,
311 0b110000000000000,
312 0b000100000000000,
313 0b000000010000000,
314 0b000000000001000,
315 0b100000000000000,
316 0b000001010000000,
317 0b001010000000000,
318 0b001100000000000,
319 0b000000001010100,
320 0b101101010010100,
321 0b010100000000000,
322 0b000000010001111,
323 0b011000000000000,
324 0b111110000000000,
325 0b101000000000000,
326 0b000000000001111,
327 0b000100010001111,
328 0b001000010001111,
329 0b000110000000000,
330 };
331
332 static const uint16_t gfx6_src_index_table[32] = {
333 0b000000000000,
334 0b010110001000,
335 0b010001101000,
336 0b001000101000,
337 0b011010010000,
338 0b000100100000,
339 0b010001101100,
340 0b010101110000,
341 0b011001111000,
342 0b001100101000,
343 0b010110001100,
344 0b001000100000,
345 0b010110001010,
346 0b000000000010,
347 0b010101010000,
348 0b010101101000,
349 0b111101001100,
350 0b111100101100,
351 0b011001110000,
352 0b010110001001,
353 0b010101011000,
354 0b001101001000,
355 0b010000101100,
356 0b010000000000,
357 0b001101110000,
358 0b001100010000,
359 0b001100000000,
360 0b010001101010,
361 0b001101111000,
362 0b000001110000,
363 0b001100100000,
364 0b001101010000,
365 };
366
367 static const uint32_t gfx7_control_index_table[32] = {
368 0b0000000000000000010,
369 0b0000100000000000000,
370 0b0000100000000000001,
371 0b0000100000000000010,
372 0b0000100000000000011,
373 0b0000100000000000100,
374 0b0000100000000000101,
375 0b0000100000000000111,
376 0b0000100000000001000,
377 0b0000100000000001001,
378 0b0000100000000001101,
379 0b0000110000000000000,
380 0b0000110000000000001,
381 0b0000110000000000010,
382 0b0000110000000000011,
383 0b0000110000000000100,
384 0b0000110000000000101,
385 0b0000110000000000111,
386 0b0000110000000001001,
387 0b0000110000000001101,
388 0b0000110000000010000,
389 0b0000110000100000000,
390 0b0001000000000000000,
391 0b0001000000000000010,
392 0b0001000000000000100,
393 0b0001000000100000000,
394 0b0010110000000000000,
395 0b0010110000000010000,
396 0b0011000000000000000,
397 0b0011000000100000000,
398 0b0101000000000000000,
399 0b0101000000100000000,
400 };
401
402 static const uint32_t gfx7_datatype_table[32] = {
403 0b001000000000000001,
404 0b001000000000100000,
405 0b001000000000100001,
406 0b001000000001100001,
407 0b001000000010111101,
408 0b001000001011111101,
409 0b001000001110100001,
410 0b001000001110100101,
411 0b001000001110111101,
412 0b001000010000100001,
413 0b001000110000100000,
414 0b001000110000100001,
415 0b001001010010100101,
416 0b001001110010100100,
417 0b001001110010100101,
418 0b001111001110111101,
419 0b001111011110011101,
420 0b001111011110111100,
421 0b001111011110111101,
422 0b001111111110111100,
423 0b000000001000001100,
424 0b001000000000111101,
425 0b001000000010100101,
426 0b001000010000100000,
427 0b001001010010100100,
428 0b001001110010000100,
429 0b001010010100001001,
430 0b001101111110111101,
431 0b001111111110111101,
432 0b001011110110101100,
433 0b001010010100101000,
434 0b001010110100101000,
435 };
436
437 static const uint16_t gfx7_subreg_table[32] = {
438 0b000000000000000,
439 0b000000000000001,
440 0b000000000001000,
441 0b000000000001111,
442 0b000000000010000,
443 0b000000010000000,
444 0b000000100000000,
445 0b000000110000000,
446 0b000001000000000,
447 0b000001000010000,
448 0b000010100000000,
449 0b001000000000000,
450 0b001000000000001,
451 0b001000010000001,
452 0b001000010000010,
453 0b001000010000011,
454 0b001000010000100,
455 0b001000010000111,
456 0b001000010001000,
457 0b001000010001110,
458 0b001000010001111,
459 0b001000110000000,
460 0b001000111101000,
461 0b010000000000000,
462 0b010000110000000,
463 0b011000000000000,
464 0b011110010000111,
465 0b100000000000000,
466 0b101000000000000,
467 0b110000000000000,
468 0b111000000000000,
469 0b111000000011100,
470 };
471
472 static const uint16_t gfx7_src_index_table[32] = {
473 0b000000000000,
474 0b000000000010,
475 0b000000010000,
476 0b000000010010,
477 0b000000011000,
478 0b000000100000,
479 0b000000101000,
480 0b000001001000,
481 0b000001010000,
482 0b000001110000,
483 0b000001111000,
484 0b001100000000,
485 0b001100000010,
486 0b001100001000,
487 0b001100010000,
488 0b001100010010,
489 0b001100100000,
490 0b001100101000,
491 0b001100111000,
492 0b001101000000,
493 0b001101000010,
494 0b001101001000,
495 0b001101010000,
496 0b001101100000,
497 0b001101101000,
498 0b001101110000,
499 0b001101110001,
500 0b001101111000,
501 0b010001101000,
502 0b010001101001,
503 0b010001101010,
504 0b010110001000,
505 };
506
507 static const uint32_t gfx8_control_index_table[32] = {
508 0b0000000000000000010,
509 0b0000100000000000000,
510 0b0000100000000000001,
511 0b0000100000000000010,
512 0b0000100000000000011,
513 0b0000100000000000100,
514 0b0000100000000000101,
515 0b0000100000000000111,
516 0b0000100000000001000,
517 0b0000100000000001001,
518 0b0000100000000001101,
519 0b0000110000000000000,
520 0b0000110000000000001,
521 0b0000110000000000010,
522 0b0000110000000000011,
523 0b0000110000000000100,
524 0b0000110000000000101,
525 0b0000110000000000111,
526 0b0000110000000001001,
527 0b0000110000000001101,
528 0b0000110000000010000,
529 0b0000110000100000000,
530 0b0001000000000000000,
531 0b0001000000000000010,
532 0b0001000000000000100,
533 0b0001000000100000000,
534 0b0010110000000000000,
535 0b0010110000000010000,
536 0b0011000000000000000,
537 0b0011000000100000000,
538 0b0101000000000000000,
539 0b0101000000100000000,
540 };
541
542 static const uint32_t gfx8_datatype_table[32] = {
543 0b001000000000000000001,
544 0b001000000000001000000,
545 0b001000000000001000001,
546 0b001000000000011000001,
547 0b001000000000101011101,
548 0b001000000010111011101,
549 0b001000000011101000001,
550 0b001000000011101000101,
551 0b001000000011101011101,
552 0b001000001000001000001,
553 0b001000011000001000000,
554 0b001000011000001000001,
555 0b001000101000101000101,
556 0b001000111000101000100,
557 0b001000111000101000101,
558 0b001011100011101011101,
559 0b001011101011100011101,
560 0b001011101011101011100,
561 0b001011101011101011101,
562 0b001011111011101011100,
563 0b000000000010000001100,
564 0b001000000000001011101,
565 0b001000000000101000101,
566 0b001000001000001000000,
567 0b001000101000101000100,
568 0b001000111000100000100,
569 0b001001001001000001001,
570 0b001010111011101011101,
571 0b001011111011101011101,
572 0b001001111001101001100,
573 0b001001001001001001000,
574 0b001001011001001001000,
575 };
576
577 static const uint16_t gfx8_subreg_table[32] = {
578 0b000000000000000,
579 0b000000000000001,
580 0b000000000001000,
581 0b000000000001111,
582 0b000000000010000,
583 0b000000010000000,
584 0b000000100000000,
585 0b000000110000000,
586 0b000001000000000,
587 0b000001000010000,
588 0b000001010000000,
589 0b001000000000000,
590 0b001000000000001,
591 0b001000010000001,
592 0b001000010000010,
593 0b001000010000011,
594 0b001000010000100,
595 0b001000010000111,
596 0b001000010001000,
597 0b001000010001110,
598 0b001000010001111,
599 0b001000110000000,
600 0b001000111101000,
601 0b010000000000000,
602 0b010000110000000,
603 0b011000000000000,
604 0b011110010000111,
605 0b100000000000000,
606 0b101000000000000,
607 0b110000000000000,
608 0b111000000000000,
609 0b111000000011100,
610 };
611
612 static const uint16_t gfx8_src_index_table[32] = {
613 0b000000000000,
614 0b000000000010,
615 0b000000010000,
616 0b000000010010,
617 0b000000011000,
618 0b000000100000,
619 0b000000101000,
620 0b000001001000,
621 0b000001010000,
622 0b000001110000,
623 0b000001111000,
624 0b001100000000,
625 0b001100000010,
626 0b001100001000,
627 0b001100010000,
628 0b001100010010,
629 0b001100100000,
630 0b001100101000,
631 0b001100111000,
632 0b001101000000,
633 0b001101000010,
634 0b001101001000,
635 0b001101010000,
636 0b001101100000,
637 0b001101101000,
638 0b001101110000,
639 0b001101110001,
640 0b001101111000,
641 0b010001101000,
642 0b010001101001,
643 0b010001101010,
644 0b010110001000,
645 };
646
647 static const uint32_t gfx11_datatype_table[32] = {
648 0b001000000000000000001,
649 0b001000000000001000000,
650 0b001000000000001000001,
651 0b001000000000011000001,
652 0b001000000000101100101,
653 0b001000000101111100101,
654 0b001000000100101000001,
655 0b001000000100101000101,
656 0b001000000100101100101,
657 0b001000001000001000001,
658 0b001000011000001000000,
659 0b001000011000001000001,
660 0b001000101000101000101,
661 0b001000111000101000100,
662 0b001000111000101000101,
663 0b001100100100101100101,
664 0b001100101100100100101,
665 0b001100101100101100100,
666 0b001100101100101100101,
667 0b001100111100101100100,
668 0b000000000010000001100,
669 0b001000000000001100101,
670 0b001000000000101000101,
671 0b001000001000001000000,
672 0b001000101000101000100,
673 0b001000111000100000100,
674 0b001001001001000001001,
675 0b001101111100101100101,
676 0b001100111100101100101,
677 0b001001111001101001100,
678 0b001001001001001001000,
679 0b001001011001001001000,
680 };
681
682 static const uint32_t gfx12_control_index_table[32] = {
683 0b000000000000000000100, /* (16|M0) */
684 0b000000000000000000011, /* (8|M0) */
685 0b000000010000000000000, /* (W) (1|M0) */
686 0b000000010000000000100, /* (W) (16|M0) */
687 0b000000010000000000011, /* (W) (8|M0) */
688 0b010000000000000000100, /* (16|M0) (ge)f0.0 */
689 0b000000000000000100100, /* (16|M16) */
690 0b010100000000000000100, /* (16|M0) (lt)f0.0 */
691 0b000000000000000000000, /* (1|M0) */
692 0b000010000000000000100, /* (16|M0) (sat) */
693 0b000000000000000010011, /* (8|M8) */
694 0b001100000000000000100, /* (16|M0) (gt)f0.0 */
695 0b000100000000000000100, /* (16|M0) (eq)f0.0 */
696 0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */
697 0b001000000000000000100, /* (16|M0) (ne)f0.0 */
698 0b000000000000100000100, /* (f0.0) (16|M0) */
699 0b010100000000000000011, /* (8|M0) (lt)f0.0 */
700 0b000000000000110000100, /* (f1.0) (16|M0) */
701 0b000000010000000000001, /* (W) (2|M0) */
702 0b000000000000101000100, /* (f0.1) (16|M0) */
703 0b000000000000111000100, /* (f1.1) (16|M0) */
704 0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */
705 0b000000000000000100011, /* (8|M16) */
706 0b000000000000000110011, /* (8|M24) */
707 0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */
708 0b010000000000000000011, /* (8|M0) (ge)f0.0 */
709 0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */
710 0b000010000000000000011, /* (8|M0) (sat) */
711 0b010100000000010000100, /* (16|M0) (lt)f1.0 */
712 0b000100000000000000011, /* (8|M0) (eq)f0.0 */
713 0b000001000000000000011, /* (8|M0) {AccWrEn} */
714 0b000000010000000100100, /* (W) (16|M16) */
715 };
716
717 static const uint32_t gfx12_datatype_table[32] = {
718 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
719 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
720 0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
721 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
722 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
723 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
724 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
725 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
726 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
727 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
728 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
729 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
730 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
731 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
732 0b00110110110011001100, /* grf<1>:d grf:d imm:d */
733 0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
734 0b00000111000101010100, /* grf<2>:f grf:f arf:ub */
735 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
736 0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
737 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
738 0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */
739 0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
740 0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
741 0b00000110100101001100, /* grf<1>:d grf:f arf:ub */
742 0b10001100100011001100, /* arf<1>:d grf:d grf:uw */
743 0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */
744 0b00101110110001001100, /* grf<1>:d grf:ud imm:w */
745 0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
746 0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */
747 0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */
748 0b00000110100011010100, /* grf<1>:f grf:d arf:ub */
749 0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
750 };
751
752 static const uint16_t gfx12_subreg_table[32] = {
753 0b000000000000000, /* .0 .0 .0 */
754 0b100000000000000, /* .0 .0 .16 */
755 0b001000000000000, /* .0 .0 .4 */
756 0b011000000000000, /* .0 .0 .12 */
757 0b000000010000000, /* .0 .4 .0 */
758 0b010000000000000, /* .0 .0 .8 */
759 0b101000000000000, /* .0 .0 .20 */
760 0b000000000001000, /* .8 .0 .0 */
761 0b000000100000000, /* .0 .8 .0 */
762 0b110000000000000, /* .0 .0 .24 */
763 0b111000000000000, /* .0 .0 .28 */
764 0b000001000000000, /* .0 .16 .0 */
765 0b000000000000100, /* .4 .0 .0 */
766 0b000001100000000, /* .0 .24 .0 */
767 0b000001010000000, /* .0 .20 .0 */
768 0b000000110000000, /* .0 .12 .0 */
769 0b000001110000000, /* .0 .28 .0 */
770 0b000000000011100, /* .28 .0 .0 */
771 0b000000000010000, /* .16 .0 .0 */
772 0b000000000001100, /* .12 .0 .0 */
773 0b000000000011000, /* .24 .0 .0 */
774 0b000000000010100, /* .20 .0 .0 */
775 0b000000000000010, /* .2 .0 .0 */
776 0b000000101000000, /* .0 .10 .0 */
777 0b000000001000000, /* .0 .2 .0 */
778 0b000000010000100, /* .4 .4 .0 */
779 0b000000001011100, /* .28 .2 .0 */
780 0b000000001000010, /* .2 .2 .0 */
781 0b000000110001100, /* .12 .12 .0 */
782 0b000000000100000, /* .0 .1 .0 */
783 0b000000001100000, /* .0 .3 .0 */
784 0b110001100000000, /* .0 .24 .24 */
785 };
786
787 static const uint16_t gfx12_src0_index_table[16] = {
788 0b010001100100, /* r<8;8,1> */
789 0b000000000000, /* r<0;1,0> */
790 0b010001100110, /* -r<8;8,1> */
791 0b010001100101, /* (abs)r<8;8,1> */
792 0b000000000010, /* -r<0;1,0> */
793 0b001000000000, /* r<2;1,0> */
794 0b001001000000, /* r<2;4,0> */
795 0b001101000000, /* r<4;4,0> */
796 0b001000100100, /* r<2;2,1> */
797 0b001100000000, /* r<4;1,0> */
798 0b001000100110, /* -r<2;2,1> */
799 0b001101000100, /* r<4;4,1> */
800 0b010001100111, /* -(abs)r<8;8,1> */
801 0b000100000000, /* r<1;1,0> */
802 0b000000000001, /* (abs)r<0;1,0> */
803 0b111100010000, /* r[a]<1,0> */
804 };
805
806 static const uint16_t gfx12_src1_index_table[16] = {
807 0b000100011001, /* r<8;8,1> */
808 0b000000000000, /* r<0;1,0> */
809 0b100100011001, /* -r<8;8,1> */
810 0b100000000000, /* -r<0;1,0> */
811 0b010100011001, /* (abs)r<8;8,1> */
812 0b100011010000, /* -r<4;4,0> */
813 0b000010000000, /* r<2;1,0> */
814 0b000010001001, /* r<2;2,1> */
815 0b100010001001, /* -r<2;2,1> */
816 0b000011010000, /* r<4;4,0> */
817 0b000011010001, /* r<4;4,1> */
818 0b000011000000, /* r<4;1,0> */
819 0b110100011001, /* -(abs)r<8;8,1> */
820 0b010000000000, /* (abs)r<0;1,0> */
821 0b110000000000, /* -(abs)r<0;1,0> */
822 0b100011010001, /* -r<4;4,1> */
823 };
824
825 static const uint16_t xehp_src0_index_table[16] = {
826 0b000100000000, /* r<1;1,0> */
827 0b000000000000, /* r<0;1,0> */
828 0b000100000010, /* -r<1;1,0> */
829 0b000100000001, /* (abs)r<1;1,0> */
830 0b000000000010, /* -r<0;1,0> */
831 0b001000000000, /* r<2;1,0> */
832 0b001001000000, /* r<2;4,0> */
833 0b001101000000, /* r<4;4,0> */
834 0b001100000000, /* r<4;1,0> */
835 0b000100000011, /* -(abs)r<1;1,0> */
836 0b000000000001, /* (abs)r<0;1,0> */
837 0b111100010000, /* r[a]<1,0> */
838 0b010001100000, /* r<8;8,0> */
839 0b000101000000, /* r<1;4,0> */
840 0b010001001000, /* r<8;4,2> */
841 0b001000000010, /* -r<2;1,0> */
842 };
843
844 static const uint16_t xehp_src1_index_table[16] = {
845 0b000001000000, /* r<1;1,0> */
846 0b000000000000, /* r<0;1,0> */
847 0b100001000000, /* -r<1;1,0> */
848 0b100000000000, /* -r<0;1,0> */
849 0b010001000000, /* (abs)r<1;1,0> */
850 0b100011010000, /* -r<4;4,0> */
851 0b000010000000, /* r<2;1,0> */
852 0b000011010000, /* r<4;4,0> */
853 0b000011000000, /* r<4;1,0> */
854 0b110001000000, /* -(abs)r<1;1,0> */
855 0b010000000000, /* (abs)r<0;1,0> */
856 0b110000000000, /* -(abs)r<0;1,0> */
857 0b000100011000, /* r<8;8,0> */
858 0b100010000000, /* -r<2;1,0> */
859 0b100000001001, /* -r<0;2,1> */
860 0b100001000100, /* -r[a]<1;1,0> */
861 };
862
863 static const uint32_t xe2_control_index_table[32] = {
864 0b000000000000000100, /* (16|M0) */
865 0b000000100000000000, /* (W) (1|M0) */
866 0b000000000010000100, /* (16|M16) */
867 0b000000000000000000, /* (1|M0) */
868 0b000000100000000100, /* (W) (16|M0) */
869 0b010000000000000100, /* (16|M0) (.ge)f0.0 */
870 0b010100000000000100, /* (16|M0) (.lt)f0.0 */
871 0b000000100000000010, /* (W) (4|M0) */
872 0b000000000000000101, /* (32|M0) */
873 0b000000100000000011, /* (W) (8|M0) */
874 0b001100100000000000, /* (W) (1|M0) (.gt)f0.0 */
875 0b000010000000000100, /* (16|M0) (sat) */
876 0b000100000000000100, /* (16|M0) (.eq)f0.0 */
877 0b000000100000000001, /* (W) (2|M0) */
878 0b001100000000000100, /* (16|M0) (.gt)f0.0 */
879 0b000100100000000000, /* (W) (1|M0) (.eq)f0.0 */
880 0b010100100000000010, /* (W) (4|M0) (.lt)f0.0 */
881 0b010000100000000000, /* (W) (1|M0) (.ge)f0.0 */
882 0b010000100000000010, /* (W) (4|M0) (.ge)f0.0 */
883 0b010100100000000000, /* (W) (1|M0) (.lt)f0.0 */
884 0b001000000000000100, /* (16|M0) (.ne)f0.0 */
885 0b000000000100100100, /* (f2.0) (16|M0) */
886 0b010100100000000011, /* (W) (8|M0) (.lt)f0.0 */
887 0b000000000100011100, /* (f1.1) (16|M0) */
888 0b010000100000000011, /* (W) (8|M0) (.ge)f0.0 */
889 0b000000000100001100, /* (f0.1) (16|M0) */
890 0b000000000100010100, /* (f1.0) (16|M0) */
891 0b000000000100110100, /* (f3.0) (16|M0) */
892 0b000000000100111100, /* (f3.1) (16|M0) */
893 0b000000000100101100, /* (f2.1) (16|M0) */
894 0b000000000100000100, /* (f0.0) (16|M0) */
895 0b010100000000100100, /* (16|M0) (.lt)f2.0 */
896 };
897
898 static const uint32_t xe2_datatype_table[32] = {
899 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
900 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
901 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
902 0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */
903 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
904 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
905 0b10111110100011101110, /* grf<1>:q grf:q grf:q */
906 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
907 0b01010110100101010100, /* grf<1>:f grf:f arf:f */
908 0b00000010101001000100, /* grf<1>:ud imm:ud */
909 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
910 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
911 0b01010100100101010100, /* arf<1>:f grf:f arf:f */
912 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
913 0b00000010101101010100, /* grf<1>:f imm:f */
914 0b00000110100011001100, /* grf<1>:d grf:d arf:ub */
915 0b00101110110011101110, /* grf<1>:q grf:q imm:w */
916 0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */
917 0b01010000100101010100, /* arf<1>:f arf:f arf:f */
918 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
919 0b01010010100101010100, /* grf<1>:f arf:f arf:f */
920 0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */
921 0b00110110110011001110, /* grf<1>:q grf:d imm:d */
922 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
923 0b11011110100101110110, /* grf<1>:df grf:df grf:df */
924 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
925 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
926 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
927 0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */
928 0b00000010101010101100, /* grf<1>:d imm:w */
929 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
930 0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */
931 };
932
933 static const uint16_t xe2_subreg_table[16] = {
934 0b000000000000, /* .0 .0 */
935 0b000010000000, /* .0 .4 */
936 0b000000000100, /* .4 .0 */
937 0b010000000000, /* .0 .32 */
938 0b001000000000, /* .0 .16 */
939 0b000000001000, /* .8 .0 */
940 0b000100000000, /* .0 .8 */
941 0b010100000000, /* .0 .40 */
942 0b011000000000, /* .0 .48 */
943 0b000110000000, /* .0 .12 */
944 0b000000010000, /* .16 .0 */
945 0b011010000000, /* .0 .52 */
946 0b001100000000, /* .0 .24 */
947 0b011100000000, /* .0 .56 */
948 0b010110000000, /* .0 .44 */
949 0b010010000000, /* .0 .36 */
950 };
951
952 static const uint16_t xe2_src0_index_table[8] = {
953 0b00100000000, /* r<1;1,0> */
954 0b00000000000, /* r<0;1,0> */
955 0b01000000000, /* r<2;1,0> */
956 0b00100000010, /* -r<1;1,0> */
957 0b01100000000, /* r<4;1,0> */
958 0b00100000001, /* (abs)r<1;1,0> */
959 0b00000000010, /* -r<0;1,0> */
960 0b01001000000, /* r<2;4,0> */
961 };
962
963 static const uint16_t xe2_src1_index_table[16] = {
964 0b0000100000000000, /* r<1;1,0>.0 */
965 0b0000000000000000, /* r<0;1,0>.0 */
966 0b1000100000000000, /* -r<1;1,0>.0 */
967 0b0000000000010000, /* r<0;1,0>.8 */
968 0b0000000000001000, /* r<0;1,0>.4 */
969 0b0000000000011000, /* r<0;1,0>.12 */
970 0b0000000001010000, /* r<0;1,0>.40 */
971 0b0000000001000000, /* r<0;1,0>.32 */
972 0b0000000000100000, /* r<0;1,0>.16 */
973 0b0000000001111000, /* r<0;1,0>.60 */
974 0b0000000000111000, /* r<0;1,0>.28 */
975 0b0000000000101000, /* r<0;1,0>.20 */
976 0b0000000001011000, /* r<0;1,0>.44 */
977 0b0000000001001000, /* r<0;1,0>.36 */
978 0b0000000001110000, /* r<0;1,0>.56 */
979 0b0000000000110000, /* r<0;1,0>.24 */
980 };
981
982 /* This is actually the control index table for Cherryview (26 bits), but the
983 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
984 * the start.
985 *
986 * The low 24 bits have the same mappings on both hardware.
987 */
988 static const uint32_t gfx8_3src_control_index_table[4] = {
989 0b00100000000110000000000001,
990 0b00000000000110000000000001,
991 0b00000000001000000000000001,
992 0b00000000001000000000100001,
993 };
994
995 /* This is actually the control index table for Cherryview (49 bits), but the
996 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
997 * at the start.
998 *
999 * The low 44 bits have the same mappings on both hardware, and since the high
1000 * three bits on Broadwell are zero, we can reuse Cherryview's table.
1001 */
1002 static const uint64_t gfx8_3src_source_index_table[4] = {
1003 0b0000001110010011100100111001000001111000000000000,
1004 0b0000001110010011100100111001000001111000000000010,
1005 0b0000001110010011100100111001000001111000000001000,
1006 0b0000001110010011100100111001000001111000000100000,
1007 };
1008
1009 static const uint64_t gfx12_3src_control_index_table[32] = {
1010 0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
1011 0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
1012 0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
1013 0b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
1014 0b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
1015 0b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
1016 0b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
1017 0b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
1018 0b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
1019 0b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
1020 0b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
1021 0b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
1022 0b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
1023 0b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
1024 0b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
1025 0b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
1026 0b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
1027 0b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
1028 0b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
1029 0b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
1030 0b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
1031 0b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
1032 0b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */
1033 0b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */
1034 0b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
1035 0b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */
1036 0b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */
1037 0b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */
1038 0b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */
1039 0b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */
1040 0b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */
1041 0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
1042 };
1043
1044 static const uint64_t xehp_3src_control_index_table[32] = {
1045 0b0000010010100010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
1046 0b0000010010100010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
1047 0b0000010010000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
1048 0b0000010010100010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
1049 0b0000010010000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
1050 0b0000010010000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
1051 0b0000010010100010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
1052 0b0000010010000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
1053 0b0000010010100010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
1054 0b0000010010100010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
1055 0b0000010010000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
1056 0b0000010010100010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
1057 0b0000010010100010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
1058 0b0000010010000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
1059 0b0000010010100010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
1060 0b0000010010100010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
1061 0b0000010010000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
1062 0b0000010010000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
1063 0b0000010010100010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
1064 0b0000010010100010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
1065 0b0000010010000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
1066 0b0000010010000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
1067 0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b */
1068 0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub {Atomic} */
1069 0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b {Atomic} */
1070 0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub {Atomic} */
1071 0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b */
1072 0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub */
1073 0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b {Atomic} */
1074 0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub */
1075 0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf {Atomic} */
1076 0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */
1077 };
1078
1079 static const uint64_t xe2_3src_control_index_table[16] = {
1080 0b0000010010100010101000000000000100, /* (16|M0) grf<1>:f :f :f :f */
1081 0b0000010010000010101000000000000100, /* (16|M0) arf<1>:f :f :f :f */
1082 0b0000010010100010101000100000000100, /* (W)(16|M0) grf<1>:f :f :f :f */
1083 0b0000010010000010101000100000000100, /* (W)(16|M0) arf<1>:f :f :f :f */
1084 0b0000011011100011101100000000000100, /* (16|M0) grf<1>:df :df :df :df */
1085 0b0000011011100011101100000010000100, /* (16|M16) grf<1>:df :df :df :df */
1086 0b0000011011000011101100000000000100, /* (16|M0) arf<1>:df :df :df :df */
1087 0b0000010010100010101000000000000101, /* (32|M0) grf<1>:f :f :f :f */
1088 0b0000010010000010101000000000000101, /* (32|M0) arf<1>:f :f :f :f */
1089 0b0000010010000010101010000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
1090 0b0000010010100010101010000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
1091 0b0000011011000011101100000010000100, /* (16|M16) arf<1>:df :df :df :df */
1092 0b0000010010100010101000100000000000, /* (W)(1|M0) grf<1>:f :f :f :f */
1093 0b0000010010100010001000000000000100, /* (16|M0) grf<1>:ud :ud :ud :ud */
1094 0b0000110110100110011000000000000101, /* (32|M0) grf<1>:d :d :d :d */
1095 0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df */
1096 };
1097
1098 static const uint64_t xe2_3src_dpas_control_index_table[16] = {
1099 0b0000000000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub Atomic */
1100 0b0000000100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :b Atomic */
1101 0b0000100000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :ub Atomic */
1102 0b0000100100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b Atomic */
1103 0b0000000000111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub */
1104 0b0000100100111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b */
1105 0b0000101101111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf Atomic */
1106 0b0000101101111101101001000000000100, /* dpas.8x* (16|M0) grf:f :bf :bf :bf Atomic */
1107 0b0000101101111010110101000000000100, /* dpas.8x* (16|M0) grf:bf :f :bf :bf Atomic */
1108 0b0000101101111101110101000000000100, /* dpas.8x* (16|M0) grf:bf :bf :bf :bf Atomic */
1109 0b0000101101111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf */
1110 0b0000001001111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf Atomic */
1111 0b0000001001111001101001000000000100, /* dpas.8x* (16|M0) grf:f :hf :hf :hf Atomic */
1112 0b0000001001111010100101000000000100, /* dpas.8x* (16|M0) grf:hf :f :hf :hf Atomic */
1113 0b0000001001111001100101000000000100, /* dpas.8x* (16|M0) grf:hf :hf :hf :hf Atomic */
1114 0b0000001001111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf */
1115 };
1116
1117 static const uint32_t gfx12_3src_source_index_table[32] = {
1118 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
1119 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
1120 0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */
1121 0b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */
1122 0b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */
1123 0b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */
1124 0b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */
1125 0b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */
1126 0b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */
1127 0b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */
1128 0b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */
1129 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
1130 0b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */
1131 0b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */
1132 0b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */
1133 0b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */
1134 0b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */
1135 0b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */
1136 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
1137 0b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */
1138 0b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */
1139 0b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */
1140 0b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */
1141 0b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */
1142 0b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */
1143 0b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */
1144 0b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */
1145 0b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */
1146 0b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */
1147 0b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */
1148 0b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */
1149 0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
1150 };
1151
1152 static const uint32_t xehp_3src_source_index_table[32] = {
1153 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */
1154 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */
1155 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */
1156 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */
1157 0b101100000000100000001, /* grf<1;0> grf<0;0> grf<1> */
1158 0b101100000001100001001, /* -grf<1;0> grf<1;0> grf<1> */
1159 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */
1160 0b101100000001100000000, /* grf<0;0> grf<1;0> grf<1> */
1161 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */
1162 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
1163 0b101100010001100000001, /* grf<1;0> -grf<1;0> grf<1> */
1164 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
1165 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */
1166 0b100100010001100000000, /* grf<0;0> -grf<1;0> grf<0> */
1167 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
1168 0b100100000001100001001, /* -grf<1;0> grf<1;0> grf<0> */
1169 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */
1170 0b100100000001100001000, /* -grf<0;0> grf<1;0> grf<0> */
1171 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>
1172 * dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
1173 * dpas.*x1 grf:f grf:bf grf:bf
1174 */
1175 0b101100010001100000000, /* grf<0;0> -grf<1;0> grf<1> */
1176 0b100100000101100000000, /* grf<0;0> grf<1;0> -grf<0> */
1177 0b101000000001100000000, /* grf<0;0> arf<1;0> grf<1> */
1178 0b100100000101100000001, /* grf<1;0> grf<1;0> -grf<0> */
1179 0b101100000101100001001, /* -grf<1;0> grf<1;0> -grf<1> */
1180 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
1181 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
1182 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
1183 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
1184 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
1185 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
1186 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
1187 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
1188 };
1189
1190 static const uint32_t xe2_3src_source_index_table[16] = {
1191 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */
1192 0b101100000001000000001, /* arf<1;0> grf<1;0> grf<1> */
1193 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */
1194 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */
1195 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */
1196 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */
1197 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */
1198 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
1199 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */
1200 0b101000000001000000001, /* arf<1;0> arf<1;0> grf<1> */
1201 0b100000000001000000001, /* arf<1;0> arf<1;0> grf<0> */
1202 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
1203 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */
1204 0b101100000101000000001, /* arf<1;0> grf<1;0> -grf<1> */
1205 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
1206 0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */
1207 };
1208
1209 static const uint32_t xe2_3src_dpas_source_index_table[16] = {
1210 0b100100000000100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
1211 * dpas.*x1 grf:[f,bf] grf:bf grf:bf
1212 * dpas.*x1 grf:[f,hf] grf:hf grf:hf
1213 */
1214 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
1215 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
1216 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
1217 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
1218 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
1219 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
1220 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
1221 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
1222 0b100100000000100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[ub,b] */
1223 0b100100000010100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[u4,s4] */
1224 0b100100001000100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[ub,b] */
1225 0b100100001010100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[u4,s4] */
1226 0b100100010100100000010, /* dpas.*x2 grf:d grf:[u2,s2] grf:[u2,s2] */
1227 0b100100000000100001110, /* dpas.*x8 grf:d grf:[ub,b] grf:[ub,b] */
1228 0b100100001010100001110, /* dpas.*x8 grf:d grf:[u4,s4] grf:[u4,s4] */
1229 };
1230
1231 static const uint32_t gfx12_3src_subreg_table[32] = {
1232 0b00000000000000000000, /* .0 .0 .0 .0 */
1233 0b00100000000000000000, /* .0 .0 .0 .4 */
1234 0b00000000000110000000, /* .0 .12 .0 .0 */
1235 0b10100000000000000000, /* .0 .0 .0 .20 */
1236 0b10000000001110000000, /* .0 .28 .0 .16 */
1237 0b01100000000000000000, /* .0 .0 .0 .12 */
1238 0b01000000000000000000, /* .0 .0 .0 .8 */
1239 0b00000010000000000000, /* .0 .0 .8 .0 */
1240 0b00000001000000000000, /* .0 .0 .4 .0 */
1241 0b11000000000000000000, /* .0 .0 .0 .24 */
1242 0b10000000000000000000, /* .0 .0 .0 .16 */
1243 0b11100000000000000000, /* .0 .0 .0 .28 */
1244 0b00000110000000000000, /* .0 .0 .24 .0 */
1245 0b00000000000010000000, /* .0 .4 .0 .0 */
1246 0b00000100000000000000, /* .0 .0 .16 .0 */
1247 0b00000011000000000000, /* .0 .0 .12 .0 */
1248 0b00000101000000000000, /* .0 .0 .20 .0 */
1249 0b00000111000000000000, /* .0 .0 .28 .0 */
1250 0b00000000000100000000, /* .0 .8 .0 .0 */
1251 0b00000000001000000000, /* .0 .16 .0 .0 */
1252 0b00000000001100000000, /* .0 .24 .0 .0 */
1253 0b00000000001010000000, /* .0 .20 .0 .0 */
1254 0b00000000001110000000, /* .0 .28 .0 .0 */
1255 0b11000000001110000000, /* .0 .28 .0 .24 */
1256 0b00100000000100000000, /* .0 .8 .0 .4 */
1257 0b00100000000110000000, /* .0 .12 .0 .4 */
1258 0b01000000000110000000, /* .0 .12 .0 .8 */
1259 0b10000000001100000000, /* .0 .24 .0 .16 */
1260 0b10000000001010000000, /* .0 .20 .0 .16 */
1261 0b01100000000010000000, /* .0 .4 .0 .12 */
1262 0b10100000001110000000, /* .0 .28 .0 .20 */
1263 0b01000000000010000000, /* .0 .4 .0 .8 */
1264 };
1265
1266 static const uint32_t xe2_3src_subreg_table[32] = {
1267 0b00000000000000000000, /* .0 .0 .0 .0 */
1268 0b00100000000000000000, /* .0 .0 .0 .8 */
1269 0b10000000000000000000, /* .0 .0 .0 .32 */
1270 0b00010000000000000000, /* .0 .0 .0 .4 */
1271 0b11100000000000000000, /* .0 .0 .0 .56 */
1272 0b01010000000000000000, /* .0 .0 .0 .20 */
1273 0b10110000000000000000, /* .0 .0 .0 .44 */
1274 0b01000000000011000000, /* .0 .12 .0 .16 */
1275 0b01100000000000000000, /* .0 .0 .0 .24 */
1276 0b10100000000000000000, /* .0 .0 .0 .40 */
1277 0b11000000000000000000, /* .0 .0 .0 .48 */
1278 0b01000000000000000000, /* .0 .0 .0 .16 */
1279 0b01110000000110000000, /* .0 .24 .0 .28 */
1280 0b10100000001001000000, /* .0 .36 .0 .40 */
1281 0b11010000001100000000, /* .0 .48 .0 .52 */
1282 0b01110000000000000000, /* .0 .0 .0 .28 */
1283 0b11110000000000000000, /* .0 .0 .0 .60 */
1284 0b10010000000000000000, /* .0 .0 .0 .36 */
1285 0b00110000000000000000, /* .0 .0 .0 .12 */
1286 0b00100000000010000000, /* .0 .8 .0 .8 */
1287 0b00010000000001000000, /* .0 .4 .0 .4 */
1288 0b00110000000011000000, /* .0 .12 .0 .12 */
1289 0b11010000000000000000, /* .0 .0 .0 .52 */
1290 0b00000000000001000000, /* .0 .4 .0 .0 */
1291 0b00000101100000000000, /* .0 .0 .44 .0 */
1292 0b00000100000000000000, /* .0 .0 .32 .0 */
1293 0b00000000000010000000, /* .0 .8 .0 .0 */
1294 0b00000000001100000000, /* .0 .48 .0 .0 */
1295 0b00000000001101000000, /* .0 .52 .0 .0 */
1296 0b00000110100000000000, /* .0 .0 .52 .0 */
1297 0b00000000001000000000, /* .0 .32 .0 .0 */
1298 0b00000000001111000000, /* .0 .60 .0 .0 */
1299 };
1300
1301 struct compaction_state {
1302 const struct elk_isa_info *isa;
1303 const uint32_t *control_index_table;
1304 const uint32_t *datatype_table;
1305 const uint16_t *subreg_table;
1306 const uint16_t *src0_index_table;
1307 const uint16_t *src1_index_table;
1308 };
1309
1310 static void compaction_state_init(struct compaction_state *c,
1311 const struct elk_isa_info *isa);
1312
1313 static bool
set_control_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1314 set_control_index(const struct compaction_state *c,
1315 elk_compact_inst *dst, const elk_inst *src)
1316 {
1317 const struct intel_device_info *devinfo = c->isa->devinfo;
1318 uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1319
1320 if (devinfo->ver >= 20) {
1321 uncompacted = (elk_inst_bits(src, 95, 92) << 14) | /* 4b */
1322 (elk_inst_bits(src, 34, 34) << 13) | /* 1b */
1323 (elk_inst_bits(src, 32, 32) << 12) | /* 1b */
1324 (elk_inst_bits(src, 31, 31) << 11) | /* 1b */
1325 (elk_inst_bits(src, 28, 28) << 10) | /* 1b */
1326 (elk_inst_bits(src, 27, 26) << 8) | /* 2b */
1327 (elk_inst_bits(src, 25, 24) << 6) | /* 2b */
1328 (elk_inst_bits(src, 23, 21) << 3) | /* 3b */
1329 (elk_inst_bits(src, 20, 18)); /* 3b */
1330 } else if (devinfo->ver >= 12) {
1331 uncompacted = (elk_inst_bits(src, 95, 92) << 17) | /* 4b */
1332 (elk_inst_bits(src, 34, 34) << 16) | /* 1b */
1333 (elk_inst_bits(src, 33, 33) << 15) | /* 1b */
1334 (elk_inst_bits(src, 32, 32) << 14) | /* 1b */
1335 (elk_inst_bits(src, 31, 31) << 13) | /* 1b */
1336 (elk_inst_bits(src, 28, 28) << 12) | /* 1b */
1337 (elk_inst_bits(src, 27, 24) << 8) | /* 4b */
1338 (elk_inst_bits(src, 23, 22) << 6) | /* 2b */
1339 (elk_inst_bits(src, 21, 19) << 3) | /* 3b */
1340 (elk_inst_bits(src, 18, 16)); /* 3b */
1341 } else if (devinfo->ver >= 8) {
1342 uncompacted = (elk_inst_bits(src, 33, 31) << 16) | /* 3b */
1343 (elk_inst_bits(src, 23, 12) << 4) | /* 12b */
1344 (elk_inst_bits(src, 10, 9) << 2) | /* 2b */
1345 (elk_inst_bits(src, 34, 34) << 1) | /* 1b */
1346 (elk_inst_bits(src, 8, 8)); /* 1b */
1347 } else {
1348 uncompacted = (elk_inst_bits(src, 31, 31) << 16) | /* 1b */
1349 (elk_inst_bits(src, 23, 8)); /* 16b */
1350
1351 /* On gfx7, the flag register and subregister numbers are integrated into
1352 * the control index.
1353 */
1354 if (devinfo->ver == 7)
1355 uncompacted |= elk_inst_bits(src, 90, 89) << 17; /* 2b */
1356 }
1357
1358 for (int i = 0; i < 32; i++) {
1359 if (c->control_index_table[i] == uncompacted) {
1360 elk_compact_inst_set_control_index(devinfo, dst, i);
1361 return true;
1362 }
1363 }
1364
1365 return false;
1366 }
1367
1368 static bool
set_datatype_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1369 set_datatype_index(const struct compaction_state *c, elk_compact_inst *dst,
1370 const elk_inst *src, bool is_immediate)
1371 {
1372 const struct intel_device_info *devinfo = c->isa->devinfo;
1373 uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1374
1375 if (devinfo->ver >= 12) {
1376 uncompacted = (elk_inst_bits(src, 91, 88) << 15) | /* 4b */
1377 (elk_inst_bits(src, 66, 66) << 14) | /* 1b */
1378 (elk_inst_bits(src, 50, 50) << 13) | /* 1b */
1379 (elk_inst_bits(src, 49, 48) << 11) | /* 2b */
1380 (elk_inst_bits(src, 47, 47) << 10) | /* 1b */
1381 (elk_inst_bits(src, 46, 46) << 9) | /* 1b */
1382 (elk_inst_bits(src, 43, 40) << 5) | /* 4b */
1383 (elk_inst_bits(src, 39, 36) << 1) | /* 4b */
1384 (elk_inst_bits(src, 35, 35)); /* 1b */
1385
1386 /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1387 * is present
1388 */
1389 if (!is_immediate) {
1390 uncompacted |= elk_inst_bits(src, 98, 98) << 19; /* 1b */
1391 }
1392 } else if (devinfo->ver >= 8) {
1393 uncompacted = (elk_inst_bits(src, 63, 61) << 18) | /* 3b */
1394 (elk_inst_bits(src, 94, 89) << 12) | /* 6b */
1395 (elk_inst_bits(src, 46, 35)); /* 12b */
1396 } else {
1397 uncompacted = (elk_inst_bits(src, 63, 61) << 15) | /* 3b */
1398 (elk_inst_bits(src, 46, 32)); /* 15b */
1399 }
1400
1401 for (int i = 0; i < 32; i++) {
1402 if (c->datatype_table[i] == uncompacted) {
1403 elk_compact_inst_set_datatype_index(devinfo, dst, i);
1404 return true;
1405 }
1406 }
1407
1408 return false;
1409 }
1410
1411 static bool
set_subreg_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1412 set_subreg_index(const struct compaction_state *c, elk_compact_inst *dst,
1413 const elk_inst *src, bool is_immediate)
1414 {
1415 const struct intel_device_info *devinfo = c->isa->devinfo;
1416 const unsigned table_len = devinfo->ver >= 20 ?
1417 ARRAY_SIZE(xe2_subreg_table) : ARRAY_SIZE(g45_subreg_table);
1418 uint16_t uncompacted; /* 15b/G45+; 12b/Xe2+ */
1419
1420 if (devinfo->ver >= 20) {
1421 uncompacted = (elk_inst_bits(src, 33, 33) << 0) | /* 1b */
1422 (elk_inst_bits(src, 55, 51) << 1) | /* 5b */
1423 (elk_inst_bits(src, 71, 67) << 6) | /* 5b */
1424 (elk_inst_bits(src, 87, 87) << 11); /* 1b */
1425 } else if (devinfo->ver >= 12) {
1426 uncompacted = (elk_inst_bits(src, 55, 51) << 0) | /* 5b */
1427 (elk_inst_bits(src, 71, 67) << 5); /* 5b */
1428
1429 if (!is_immediate)
1430 uncompacted |= elk_inst_bits(src, 103, 99) << 10; /* 5b */
1431 } else {
1432 uncompacted = (elk_inst_bits(src, 52, 48) << 0) | /* 5b */
1433 (elk_inst_bits(src, 68, 64) << 5); /* 5b */
1434
1435 if (!is_immediate)
1436 uncompacted |= elk_inst_bits(src, 100, 96) << 10; /* 5b */
1437 }
1438
1439 for (int i = 0; i < table_len; i++) {
1440 if (c->subreg_table[i] == uncompacted) {
1441 elk_compact_inst_set_subreg_index(devinfo, dst, i);
1442 return true;
1443 }
1444 }
1445
1446 return false;
1447 }
1448
1449 static bool
set_src0_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1450 set_src0_index(const struct compaction_state *c, elk_compact_inst *dst,
1451 const elk_inst *src)
1452 {
1453 const struct intel_device_info *devinfo = c->isa->devinfo;
1454 uint16_t uncompacted; /* 12b/G45+; 11b/Xe2+ */
1455 int table_len;
1456
1457 if (devinfo->ver >= 12) {
1458 table_len = (devinfo->ver >= 20 ? ARRAY_SIZE(xe2_src0_index_table) :
1459 ARRAY_SIZE(gfx12_src0_index_table));
1460 uncompacted = (devinfo->ver >= 20 ? 0 :
1461 elk_inst_bits(src, 87, 87) << 11) | /* 1b */
1462 (elk_inst_bits(src, 86, 84) << 8) | /* 3b */
1463 (elk_inst_bits(src, 83, 81) << 5) | /* 3b */
1464 (elk_inst_bits(src, 80, 80) << 4) | /* 1b */
1465 (elk_inst_bits(src, 65, 64) << 2) | /* 2b */
1466 (elk_inst_bits(src, 45, 44)); /* 2b */
1467 } else {
1468 table_len = ARRAY_SIZE(gfx8_src_index_table);
1469 uncompacted = elk_inst_bits(src, 88, 77); /* 12b */
1470 }
1471
1472 for (int i = 0; i < table_len; i++) {
1473 if (c->src0_index_table[i] == uncompacted) {
1474 elk_compact_inst_set_src0_index(devinfo, dst, i);
1475 return true;
1476 }
1477 }
1478
1479 return false;
1480 }
1481
1482 static bool
set_src1_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate,unsigned imm)1483 set_src1_index(const struct compaction_state *c, elk_compact_inst *dst,
1484 const elk_inst *src, bool is_immediate, unsigned imm)
1485 {
1486 const struct intel_device_info *devinfo = c->isa->devinfo;
1487 if (is_immediate) {
1488 if (devinfo->ver >= 12) {
1489 /* src1 index takes the low 4 bits of the 12-bit compacted value */
1490 elk_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1491 } else {
1492 /* src1 index takes the high 5 bits of the 13-bit compacted value */
1493 elk_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1494 }
1495 return true;
1496 } else {
1497 uint16_t uncompacted; /* 12b/G45+ 16b/Xe2+ */
1498 int table_len;
1499
1500 if (devinfo->ver >= 20) {
1501 table_len = ARRAY_SIZE(xe2_src1_index_table);
1502 uncompacted = (elk_inst_bits(src, 121, 120) << 14) | /* 2b */
1503 (elk_inst_bits(src, 118, 116) << 11) | /* 3b */
1504 (elk_inst_bits(src, 115, 113) << 8) | /* 3b */
1505 (elk_inst_bits(src, 112, 112) << 7) | /* 1b */
1506 (elk_inst_bits(src, 103, 99) << 2) | /* 5b */
1507 (elk_inst_bits(src, 97, 96)); /* 2b */
1508 } else if (devinfo->ver >= 12) {
1509 table_len = ARRAY_SIZE(gfx12_src0_index_table);
1510 uncompacted = (elk_inst_bits(src, 121, 120) << 10) | /* 2b */
1511 (elk_inst_bits(src, 119, 116) << 6) | /* 4b */
1512 (elk_inst_bits(src, 115, 113) << 3) | /* 3b */
1513 (elk_inst_bits(src, 112, 112) << 2) | /* 1b */
1514 (elk_inst_bits(src, 97, 96)); /* 2b */
1515 } else {
1516 table_len = ARRAY_SIZE(gfx8_src_index_table);
1517 uncompacted = elk_inst_bits(src, 120, 109); /* 12b */
1518 }
1519
1520 for (int i = 0; i < table_len; i++) {
1521 if (c->src1_index_table[i] == uncompacted) {
1522 elk_compact_inst_set_src1_index(devinfo, dst, i);
1523 return true;
1524 }
1525 }
1526 }
1527
1528 return false;
1529 }
1530
1531 static bool
set_3src_control_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src,bool is_dpas)1532 set_3src_control_index(const struct intel_device_info *devinfo,
1533 elk_compact_inst *dst, const elk_inst *src,
1534 bool is_dpas)
1535 {
1536 assert(devinfo->ver >= 8);
1537
1538 if (devinfo->ver >= 20) {
1539 assert(is_dpas || !elk_inst_bits(src, 49, 49));
1540
1541 const uint64_t uncompacted = /* 34b/Xe2+ */
1542 (elk_inst_bits(src, 95, 92) << 30) | /* 4b */
1543 (elk_inst_bits(src, 90, 88) << 27) | /* 3b */
1544 (elk_inst_bits(src, 82, 80) << 24) | /* 3b */
1545 (elk_inst_bits(src, 50, 50) << 23) | /* 1b */
1546 (elk_inst_bits(src, 49, 48) << 21) | /* 2b */
1547 (elk_inst_bits(src, 42, 40) << 18) | /* 3b */
1548 (elk_inst_bits(src, 39, 39) << 17) | /* 1b */
1549 (elk_inst_bits(src, 38, 36) << 14) | /* 3b */
1550 (elk_inst_bits(src, 34, 34) << 13) | /* 1b */
1551 (elk_inst_bits(src, 32, 32) << 12) | /* 1b */
1552 (elk_inst_bits(src, 31, 31) << 11) | /* 1b */
1553 (elk_inst_bits(src, 28, 28) << 10) | /* 1b */
1554 (elk_inst_bits(src, 27, 26) << 8) | /* 2b */
1555 (elk_inst_bits(src, 25, 24) << 6) | /* 2b */
1556 (elk_inst_bits(src, 23, 21) << 3) | /* 3b */
1557 (elk_inst_bits(src, 20, 18)); /* 3b */
1558
1559 /* The bits used to index the tables for 3src and 3src-dpas
1560 * are the same, so just need to pick the right one.
1561 */
1562 const uint64_t *table = is_dpas ? xe2_3src_dpas_control_index_table :
1563 xe2_3src_control_index_table;
1564 const unsigned size = is_dpas ? ARRAY_SIZE(xe2_3src_dpas_control_index_table) :
1565 ARRAY_SIZE(xe2_3src_control_index_table);
1566 for (unsigned i = 0; i < size; i++) {
1567 if (table[i] == uncompacted) {
1568 elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1569 return true;
1570 }
1571 }
1572 } else if (devinfo->verx10 >= 125) {
1573 uint64_t uncompacted = /* 37b/XeHP+ */
1574 (elk_inst_bits(src, 95, 92) << 33) | /* 4b */
1575 (elk_inst_bits(src, 90, 88) << 30) | /* 3b */
1576 (elk_inst_bits(src, 82, 80) << 27) | /* 3b */
1577 (elk_inst_bits(src, 50, 50) << 26) | /* 1b */
1578 (elk_inst_bits(src, 49, 48) << 24) | /* 2b */
1579 (elk_inst_bits(src, 42, 40) << 21) | /* 3b */
1580 (elk_inst_bits(src, 39, 39) << 20) | /* 1b */
1581 (elk_inst_bits(src, 38, 36) << 17) | /* 3b */
1582 (elk_inst_bits(src, 34, 34) << 16) | /* 1b */
1583 (elk_inst_bits(src, 33, 33) << 15) | /* 1b */
1584 (elk_inst_bits(src, 32, 32) << 14) | /* 1b */
1585 (elk_inst_bits(src, 31, 31) << 13) | /* 1b */
1586 (elk_inst_bits(src, 28, 28) << 12) | /* 1b */
1587 (elk_inst_bits(src, 27, 24) << 8) | /* 4b */
1588 (elk_inst_bits(src, 23, 23) << 7) | /* 1b */
1589 (elk_inst_bits(src, 22, 22) << 6) | /* 1b */
1590 (elk_inst_bits(src, 21, 19) << 3) | /* 3b */
1591 (elk_inst_bits(src, 18, 16)); /* 3b */
1592
1593 for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1594 if (xehp_3src_control_index_table[i] == uncompacted) {
1595 elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1596 return true;
1597 }
1598 }
1599 } else if (devinfo->ver >= 12) {
1600 uint64_t uncompacted = /* 36b/TGL+ */
1601 (elk_inst_bits(src, 95, 92) << 32) | /* 4b */
1602 (elk_inst_bits(src, 90, 88) << 29) | /* 3b */
1603 (elk_inst_bits(src, 82, 80) << 26) | /* 3b */
1604 (elk_inst_bits(src, 50, 50) << 25) | /* 1b */
1605 (elk_inst_bits(src, 48, 48) << 24) | /* 1b */
1606 (elk_inst_bits(src, 42, 40) << 21) | /* 3b */
1607 (elk_inst_bits(src, 39, 39) << 20) | /* 1b */
1608 (elk_inst_bits(src, 38, 36) << 17) | /* 3b */
1609 (elk_inst_bits(src, 34, 34) << 16) | /* 1b */
1610 (elk_inst_bits(src, 33, 33) << 15) | /* 1b */
1611 (elk_inst_bits(src, 32, 32) << 14) | /* 1b */
1612 (elk_inst_bits(src, 31, 31) << 13) | /* 1b */
1613 (elk_inst_bits(src, 28, 28) << 12) | /* 1b */
1614 (elk_inst_bits(src, 27, 24) << 8) | /* 4b */
1615 (elk_inst_bits(src, 23, 23) << 7) | /* 1b */
1616 (elk_inst_bits(src, 22, 22) << 6) | /* 1b */
1617 (elk_inst_bits(src, 21, 19) << 3) | /* 3b */
1618 (elk_inst_bits(src, 18, 16)); /* 3b */
1619
1620 for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1621 if (gfx12_3src_control_index_table[i] == uncompacted) {
1622 elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1623 return true;
1624 }
1625 }
1626 } else {
1627 uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1628 (elk_inst_bits(src, 34, 32) << 21) | /* 3b */
1629 (elk_inst_bits(src, 28, 8)); /* 21b */
1630
1631 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1632 uncompacted |=
1633 elk_inst_bits(src, 36, 35) << 24; /* 2b */
1634 }
1635
1636 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1637 if (gfx8_3src_control_index_table[i] == uncompacted) {
1638 elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1639 return true;
1640 }
1641 }
1642 }
1643
1644 return false;
1645 }
1646
1647 static bool
set_3src_source_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src,bool is_dpas)1648 set_3src_source_index(const struct intel_device_info *devinfo,
1649 elk_compact_inst *dst, const elk_inst *src,
1650 bool is_dpas)
1651 {
1652 assert(devinfo->ver >= 8);
1653
1654 if (devinfo->ver >= 12) {
1655 uint32_t uncompacted = /* 21b/TGL+ */
1656 (elk_inst_bits(src, 114, 114) << 20) | /* 1b */
1657 (elk_inst_bits(src, 113, 112) << 18) | /* 2b */
1658 (elk_inst_bits(src, 98, 98) << 17) | /* 1b */
1659 (elk_inst_bits(src, 97, 96) << 15) | /* 2b */
1660 (elk_inst_bits(src, 91, 91) << 14) | /* 1b */
1661 (elk_inst_bits(src, 87, 86) << 12) | /* 2b */
1662 (elk_inst_bits(src, 85, 84) << 10) | /* 2b */
1663 (elk_inst_bits(src, 83, 83) << 9) | /* 1b */
1664 (elk_inst_bits(src, 66, 66) << 8) | /* 1b */
1665 (elk_inst_bits(src, 65, 64) << 6) | /* 2b */
1666 (elk_inst_bits(src, 47, 47) << 5) | /* 1b */
1667 (elk_inst_bits(src, 46, 46) << 4) | /* 1b */
1668 (elk_inst_bits(src, 45, 44) << 2) | /* 2b */
1669 (elk_inst_bits(src, 43, 43) << 1) | /* 1b */
1670 (elk_inst_bits(src, 35, 35)); /* 1b */
1671
1672 /* In Xe2, the bits used to index the tables for 3src and 3src-dpas
1673 * are the same, so just need to pick the right one.
1674 */
1675 const uint32_t *three_src_source_index_table =
1676 devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
1677 xe2_3src_source_index_table) :
1678 devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
1679 gfx12_3src_source_index_table;
1680 const uint32_t three_src_source_index_table_len =
1681 devinfo->ver >= 20 ? (is_dpas ? ARRAY_SIZE(xe2_3src_dpas_source_index_table) :
1682 ARRAY_SIZE(xe2_3src_source_index_table)) :
1683 devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1684 ARRAY_SIZE(gfx12_3src_source_index_table);
1685
1686 for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1687 if (three_src_source_index_table[i] == uncompacted) {
1688 elk_compact_inst_set_3src_source_index(devinfo, dst, i);
1689 return true;
1690 }
1691 }
1692 } else {
1693 uint64_t uncompacted = /* 46b/BDW; 49b/CHV/SKL+ */
1694 (elk_inst_bits(src, 83, 83) << 43) | /* 1b */
1695 (elk_inst_bits(src, 114, 107) << 35) | /* 8b */
1696 (elk_inst_bits(src, 93, 86) << 27) | /* 8b */
1697 (elk_inst_bits(src, 72, 65) << 19) | /* 8b */
1698 (elk_inst_bits(src, 55, 37)); /* 19b */
1699
1700 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1701 uncompacted |=
1702 (elk_inst_bits(src, 126, 125) << 47) | /* 2b */
1703 (elk_inst_bits(src, 105, 104) << 45) | /* 2b */
1704 (elk_inst_bits(src, 84, 84) << 44); /* 1b */
1705 } else {
1706 uncompacted |=
1707 (elk_inst_bits(src, 125, 125) << 45) | /* 1b */
1708 (elk_inst_bits(src, 104, 104) << 44); /* 1b */
1709 }
1710
1711 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1712 if (gfx8_3src_source_index_table[i] == uncompacted) {
1713 elk_compact_inst_set_3src_source_index(devinfo, dst, i);
1714 return true;
1715 }
1716 }
1717 }
1718
1719 return false;
1720 }
1721
1722 static bool
set_3src_subreg_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src)1723 set_3src_subreg_index(const struct intel_device_info *devinfo,
1724 elk_compact_inst *dst, const elk_inst *src)
1725 {
1726 assert(devinfo->ver >= 12);
1727
1728 uint32_t uncompacted = /* 20b/TGL+ */
1729 (elk_inst_bits(src, 119, 115) << 15) | /* 5b */
1730 (elk_inst_bits(src, 103, 99) << 10) | /* 5b */
1731 (elk_inst_bits(src, 71, 67) << 5) | /* 5b */
1732 (elk_inst_bits(src, 55, 51)); /* 5b */
1733
1734 const uint32_t *table = devinfo->ver >= 20 ? xe2_3src_subreg_table :
1735 gfx12_3src_subreg_table;
1736 const uint32_t len =
1737 devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_subreg_table) :
1738 ARRAY_SIZE(gfx12_3src_subreg_table);
1739
1740 for (unsigned i = 0; i < len; i++) {
1741 if (table[i] == uncompacted) {
1742 elk_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1743 return true;
1744 }
1745 }
1746
1747 return false;
1748 }
1749
1750 static bool
has_unmapped_bits(const struct elk_isa_info * isa,const elk_inst * src)1751 has_unmapped_bits(const struct elk_isa_info *isa, const elk_inst *src)
1752 {
1753 const struct intel_device_info *devinfo = isa->devinfo;
1754
1755 /* EOT can only be mapped on a send if the src1 is an immediate */
1756 if ((elk_inst_opcode(isa, src) == ELK_OPCODE_SENDC ||
1757 elk_inst_opcode(isa, src) == ELK_OPCODE_SEND) &&
1758 elk_inst_eot(devinfo, src))
1759 return true;
1760
1761 /* Check for instruction bits that don't map to any of the fields of the
1762 * compacted instruction. The instruction cannot be compacted if any of
1763 * them are set. They overlap with:
1764 * - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1765 * - Dst.AddrImm[9] (bit 47 on Gfx8)
1766 * - Src0.AddrImm[9] (bit 95 on Gfx8)
1767 * - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1768 * - UIP[31] (bit 95 on Gfx8)
1769 */
1770 if (devinfo->ver >= 12) {
1771 assert(!elk_inst_bits(src, 7, 7));
1772 return false;
1773 } else if (devinfo->ver >= 8) {
1774 assert(!elk_inst_bits(src, 7, 7));
1775 return elk_inst_bits(src, 95, 95) ||
1776 elk_inst_bits(src, 47, 47) ||
1777 elk_inst_bits(src, 11, 11);
1778 } else {
1779 assert(!elk_inst_bits(src, 7, 7) &&
1780 !(devinfo->ver < 7 && elk_inst_bits(src, 90, 90)));
1781 return elk_inst_bits(src, 95, 91) ||
1782 elk_inst_bits(src, 47, 47);
1783 }
1784 }
1785
1786 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const elk_inst * src,bool is_dpas)1787 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1788 const elk_inst *src, bool is_dpas)
1789 {
1790 /* Check for three-source instruction bits that don't map to any of the
1791 * fields of the compacted instruction. All of them seem to be reserved
1792 * bits currently.
1793 */
1794 if (devinfo->ver >= 20) {
1795 assert(is_dpas || !elk_inst_bits(src, 49, 49));
1796 assert(!elk_inst_bits(src, 33, 33));
1797 assert(!elk_inst_bits(src, 7, 7));
1798 } else if (devinfo->ver >= 12) {
1799 assert(is_dpas || !elk_inst_bits(src, 49, 49));
1800 assert(!elk_inst_bits(src, 7, 7));
1801 } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1802 assert(!elk_inst_bits(src, 127, 127) &&
1803 !elk_inst_bits(src, 7, 7));
1804 } else {
1805 assert(devinfo->ver >= 8);
1806 assert(!elk_inst_bits(src, 127, 126) &&
1807 !elk_inst_bits(src, 105, 105) &&
1808 !elk_inst_bits(src, 84, 84) &&
1809 !elk_inst_bits(src, 7, 7));
1810
1811 /* Src1Type and Src2Type, used for mixed-precision floating point */
1812 if (elk_inst_bits(src, 36, 35))
1813 return true;
1814 }
1815
1816 return false;
1817 }
1818
1819 static bool
elk_try_compact_3src_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)1820 elk_try_compact_3src_instruction(const struct elk_isa_info *isa,
1821 elk_compact_inst *dst, const elk_inst *src)
1822 {
1823 const struct intel_device_info *devinfo = isa->devinfo;
1824 assert(devinfo->ver >= 8);
1825
1826 bool is_dpas = elk_inst_opcode(isa, src) == ELK_OPCODE_DPAS;
1827 if (has_3src_unmapped_bits(devinfo, src, is_dpas))
1828 return false;
1829
1830 #define compact(field) \
1831 elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_##field(devinfo, src))
1832 #define compact_a16(field) \
1833 elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_a16_##field(devinfo, src))
1834
1835 compact(hw_opcode);
1836
1837 if (!set_3src_control_index(devinfo, dst, src, is_dpas))
1838 return false;
1839
1840 if (!set_3src_source_index(devinfo, dst, src, is_dpas))
1841 return false;
1842
1843 if (devinfo->ver >= 12) {
1844 if (!set_3src_subreg_index(devinfo, dst, src))
1845 return false;
1846
1847 compact(swsb);
1848 compact(debug_control);
1849 compact(dst_reg_nr);
1850 compact(src0_reg_nr);
1851 compact(src1_reg_nr);
1852 compact(src2_reg_nr);
1853 } else {
1854 compact(dst_reg_nr);
1855 compact_a16(src0_rep_ctrl);
1856 compact(debug_control);
1857 compact(saturate);
1858 compact_a16(src1_rep_ctrl);
1859 compact_a16(src2_rep_ctrl);
1860 compact(src0_reg_nr);
1861 compact(src1_reg_nr);
1862 compact(src2_reg_nr);
1863 compact_a16(src0_subreg_nr);
1864 compact_a16(src1_subreg_nr);
1865 compact_a16(src2_subreg_nr);
1866 }
1867 elk_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1868
1869 #undef compact
1870 #undef compact_a16
1871
1872 return true;
1873 }
1874
1875 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1876 * sources, and a 13th bit that's replicated through the high 20 bits.
1877 *
1878 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1879 * of packed vectors as compactable immediates.
1880 *
1881 * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1882 * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1883 * while for unsigned integers it is not.
1884 *
1885 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1886 */
1887 static int
compact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned imm)1888 compact_immediate(const struct intel_device_info *devinfo,
1889 enum elk_reg_type type, unsigned imm)
1890 {
1891 if (devinfo->ver >= 12) {
1892 /* 16-bit immediates need to be replicated through the 32-bit immediate
1893 * field
1894 */
1895 switch (type) {
1896 case ELK_REGISTER_TYPE_W:
1897 case ELK_REGISTER_TYPE_UW:
1898 case ELK_REGISTER_TYPE_HF:
1899 if ((imm >> 16) != (imm & 0xffff))
1900 return -1;
1901 break;
1902 default:
1903 break;
1904 }
1905
1906 switch (type) {
1907 case ELK_REGISTER_TYPE_F:
1908 /* We get the high 12-bits as-is; rest must be zero */
1909 if ((imm & 0xfffff) == 0)
1910 return (imm >> 20) & 0xfff;
1911 break;
1912 case ELK_REGISTER_TYPE_HF:
1913 /* We get the high 12-bits as-is; rest must be zero */
1914 if ((imm & 0xf) == 0)
1915 return (imm >> 4) & 0xfff;
1916 break;
1917 case ELK_REGISTER_TYPE_UD:
1918 case ELK_REGISTER_TYPE_VF:
1919 case ELK_REGISTER_TYPE_UV:
1920 case ELK_REGISTER_TYPE_V:
1921 /* We get the low 12-bits as-is; rest must be zero */
1922 if ((imm & 0xfffff000) == 0)
1923 return imm & 0xfff;
1924 break;
1925 case ELK_REGISTER_TYPE_UW:
1926 /* We get the low 12-bits as-is; rest must be zero */
1927 if ((imm & 0xf000) == 0)
1928 return imm & 0xfff;
1929 break;
1930 case ELK_REGISTER_TYPE_D:
1931 /* We get the low 11-bits as-is; 12th is replicated */
1932 if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1933 return imm & 0xfff;
1934 break;
1935 case ELK_REGISTER_TYPE_W:
1936 /* We get the low 11-bits as-is; 12th is replicated */
1937 if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1938 return imm & 0xfff;
1939 break;
1940 case ELK_REGISTER_TYPE_NF:
1941 case ELK_REGISTER_TYPE_DF:
1942 case ELK_REGISTER_TYPE_Q:
1943 case ELK_REGISTER_TYPE_UQ:
1944 case ELK_REGISTER_TYPE_B:
1945 case ELK_REGISTER_TYPE_UB:
1946 return -1;
1947 }
1948 } else {
1949 /* We get the low 12 bits as-is; 13th is replicated */
1950 if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1951 return imm & 0x1fff;
1952 }
1953 }
1954
1955 return -1;
1956 }
1957
1958 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned compact_imm)1959 uncompact_immediate(const struct intel_device_info *devinfo,
1960 enum elk_reg_type type, unsigned compact_imm)
1961 {
1962 if (devinfo->ver >= 12) {
1963 switch (type) {
1964 case ELK_REGISTER_TYPE_F:
1965 return compact_imm << 20;
1966 case ELK_REGISTER_TYPE_HF:
1967 return (compact_imm << 20) | (compact_imm << 4);
1968 case ELK_REGISTER_TYPE_UD:
1969 case ELK_REGISTER_TYPE_VF:
1970 case ELK_REGISTER_TYPE_UV:
1971 case ELK_REGISTER_TYPE_V:
1972 return compact_imm;
1973 case ELK_REGISTER_TYPE_UW:
1974 /* Replicate */
1975 return compact_imm << 16 | compact_imm;
1976 case ELK_REGISTER_TYPE_D:
1977 /* Extend the 12th bit into the high 20 bits */
1978 return (int)(compact_imm << 20) >> 20;
1979 case ELK_REGISTER_TYPE_W:
1980 /* Extend the 12th bit into the high 4 bits and replicate */
1981 return ((int)(compact_imm << 20) >> 4) |
1982 ((unsigned short)((short)(compact_imm << 4) >> 4));
1983 case ELK_REGISTER_TYPE_NF:
1984 case ELK_REGISTER_TYPE_DF:
1985 case ELK_REGISTER_TYPE_Q:
1986 case ELK_REGISTER_TYPE_UQ:
1987 case ELK_REGISTER_TYPE_B:
1988 case ELK_REGISTER_TYPE_UB:
1989 unreachable("not reached");
1990 }
1991 } else {
1992 /* Replicate the 13th bit into the high 19 bits */
1993 return (int)(compact_imm << 19) >> 19;
1994 }
1995
1996 unreachable("not reached");
1997 }
1998
1999 static bool
has_immediate(const struct intel_device_info * devinfo,const elk_inst * inst,enum elk_reg_type * type)2000 has_immediate(const struct intel_device_info *devinfo, const elk_inst *inst,
2001 enum elk_reg_type *type)
2002 {
2003 if (elk_inst_src0_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
2004 *type = elk_inst_src0_type(devinfo, inst);
2005 return *type != INVALID_REG_TYPE;
2006 } else if (elk_inst_src1_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
2007 *type = elk_inst_src1_type(devinfo, inst);
2008 return *type != INVALID_REG_TYPE;
2009 }
2010
2011 return false;
2012 }
2013
2014 /**
2015 * Applies some small changes to instruction types to increase chances of
2016 * compaction.
2017 */
2018 static elk_inst
precompact(const struct elk_isa_info * isa,elk_inst inst)2019 precompact(const struct elk_isa_info *isa, elk_inst inst)
2020 {
2021 const struct intel_device_info *devinfo = isa->devinfo;
2022
2023 /* In XeHP the compaction tables removed the entries for source regions
2024 * <8;8,1> giving preference to <1;1,0> as the way to indicate
2025 * sequential elements, so convert to those before compacting.
2026 */
2027 if (devinfo->verx10 >= 125) {
2028 if (elk_inst_src0_reg_file(devinfo, &inst) == ELK_GENERAL_REGISTER_FILE &&
2029 elk_inst_src0_vstride(devinfo, &inst) > ELK_VERTICAL_STRIDE_1 &&
2030 elk_inst_src0_vstride(devinfo, &inst) == (elk_inst_src0_width(devinfo, &inst) + 1) &&
2031 elk_inst_src0_hstride(devinfo, &inst) == ELK_HORIZONTAL_STRIDE_1) {
2032 elk_inst_set_src0_vstride(devinfo, &inst, ELK_VERTICAL_STRIDE_1);
2033 elk_inst_set_src0_width(devinfo, &inst, ELK_WIDTH_1);
2034 elk_inst_set_src0_hstride(devinfo, &inst, ELK_HORIZONTAL_STRIDE_0);
2035 }
2036
2037 if (elk_inst_src1_reg_file(devinfo, &inst) == ELK_GENERAL_REGISTER_FILE &&
2038 elk_inst_src1_vstride(devinfo, &inst) > ELK_VERTICAL_STRIDE_1 &&
2039 elk_inst_src1_vstride(devinfo, &inst) == (elk_inst_src1_width(devinfo, &inst) + 1) &&
2040 elk_inst_src1_hstride(devinfo, &inst) == ELK_HORIZONTAL_STRIDE_1) {
2041 elk_inst_set_src1_vstride(devinfo, &inst, ELK_VERTICAL_STRIDE_1);
2042 elk_inst_set_src1_width(devinfo, &inst, ELK_WIDTH_1);
2043 elk_inst_set_src1_hstride(devinfo, &inst, ELK_HORIZONTAL_STRIDE_0);
2044 }
2045 }
2046
2047 if (elk_inst_src0_reg_file(devinfo, &inst) != ELK_IMMEDIATE_VALUE)
2048 return inst;
2049
2050 /* The Bspec's section titled "Non-present Operands" claims that if src0
2051 * is an immediate that src1's type must be the same as that of src0.
2052 *
2053 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
2054 * that do not follow this rule. E.g., from the IVB/HSW table:
2055 *
2056 * DataTypeIndex 18-Bit Mapping Mapped Meaning
2057 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
2058 *
2059 * And from the SNB table:
2060 *
2061 * DataTypeIndex 18-Bit Mapping Mapped Meaning
2062 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
2063 *
2064 * Neither of these cause warnings from the simulator when used,
2065 * compacted or otherwise. In fact, all compaction mappings that have an
2066 * immediate in src0 use a:ud for src1.
2067 *
2068 * The GM45 instruction compaction tables do not contain mapped meanings
2069 * so it's not clear whether it has the restriction. We'll assume it was
2070 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
2071 *
2072 * Don't do any of this for 64-bit immediates, since the src1 fields
2073 * overlap with the immediate and setting them would overwrite the
2074 * immediate we set.
2075 */
2076 if (devinfo->ver >= 6 &&
2077 !(devinfo->platform == INTEL_PLATFORM_HSW &&
2078 elk_inst_opcode(isa, &inst) == ELK_OPCODE_DIM) &&
2079 !(devinfo->ver >= 8 &&
2080 (elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_DF ||
2081 elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_UQ ||
2082 elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_Q))) {
2083 elk_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
2084 }
2085
2086 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
2087 * for immediate values. Presumably the hardware engineers realized
2088 * that the only useful floating-point value that could be represented
2089 * in this format is 0.0, which can also be represented as a VF-typed
2090 * immediate, so they gave us the previously mentioned mapping on IVB+.
2091 *
2092 * Strangely, we do have a mapping for imm:f in src1, so we don't need
2093 * to do this there.
2094 *
2095 * If we see a 0.0:F, change the type to VF so that it can be compacted.
2096 *
2097 * Compaction of floating-point immediates is improved on Gfx12, thus
2098 * removing the need for this.
2099 */
2100 if (devinfo->ver < 12 &&
2101 elk_inst_imm_ud(devinfo, &inst) == 0x0 &&
2102 elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_F &&
2103 elk_inst_dst_type(devinfo, &inst) == ELK_REGISTER_TYPE_F &&
2104 elk_inst_dst_hstride(devinfo, &inst) == ELK_HORIZONTAL_STRIDE_1) {
2105 enum elk_reg_file file = elk_inst_src0_reg_file(devinfo, &inst);
2106 elk_inst_set_src0_file_type(devinfo, &inst, file, ELK_REGISTER_TYPE_VF);
2107 }
2108
2109 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
2110 * set the types to :UD so the instruction can be compacted.
2111 *
2112 * FINISHME: Use dst:f | imm:f on Gfx12
2113 */
2114 if (devinfo->ver < 12 &&
2115 compact_immediate(devinfo, ELK_REGISTER_TYPE_D,
2116 elk_inst_imm_ud(devinfo, &inst)) != -1 &&
2117 elk_inst_cond_modifier(devinfo, &inst) == ELK_CONDITIONAL_NONE &&
2118 elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_D &&
2119 elk_inst_dst_type(devinfo, &inst) == ELK_REGISTER_TYPE_D) {
2120 enum elk_reg_file src_file = elk_inst_src0_reg_file(devinfo, &inst);
2121 enum elk_reg_file dst_file = elk_inst_dst_reg_file(devinfo, &inst);
2122
2123 elk_inst_set_src0_file_type(devinfo, &inst, src_file, ELK_REGISTER_TYPE_UD);
2124 elk_inst_set_dst_file_type(devinfo, &inst, dst_file, ELK_REGISTER_TYPE_UD);
2125 }
2126
2127 return inst;
2128 }
2129
2130 /**
2131 * Tries to compact instruction src into dst.
2132 *
2133 * It doesn't modify dst unless src is compactable, which is relied on by
2134 * elk_compact_instructions().
2135 */
2136 static bool
try_compact_instruction(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)2137 try_compact_instruction(const struct compaction_state *c,
2138 elk_compact_inst *dst, const elk_inst *src)
2139 {
2140 const struct intel_device_info *devinfo = c->isa->devinfo;
2141 elk_compact_inst temp;
2142
2143 assert(elk_inst_cmpt_control(devinfo, src) == 0);
2144
2145 if (elk_is_3src(c->isa, elk_inst_opcode(c->isa, src))) {
2146 if (devinfo->ver >= 8) {
2147 memset(&temp, 0, sizeof(temp));
2148 if (elk_try_compact_3src_instruction(c->isa, &temp, src)) {
2149 *dst = temp;
2150 return true;
2151 } else {
2152 return false;
2153 }
2154 } else {
2155 return false;
2156 }
2157 }
2158
2159 enum elk_reg_type type;
2160 bool is_immediate = has_immediate(devinfo, src, &type);
2161
2162 unsigned compacted_imm = 0;
2163
2164 if (is_immediate) {
2165 /* Instructions with immediates cannot be compacted on Gen < 6 */
2166 if (devinfo->ver < 6)
2167 return false;
2168
2169 compacted_imm = compact_immediate(devinfo, type,
2170 elk_inst_imm_ud(devinfo, src));
2171 if (compacted_imm == -1)
2172 return false;
2173 }
2174
2175 if (has_unmapped_bits(c->isa, src))
2176 return false;
2177
2178 memset(&temp, 0, sizeof(temp));
2179
2180 #define compact(field) \
2181 elk_compact_inst_set_##field(devinfo, &temp, elk_inst_##field(devinfo, src))
2182 #define compact_reg(field) \
2183 elk_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
2184 elk_inst_##field##_da_reg_nr(devinfo, src))
2185
2186 compact(hw_opcode);
2187 compact(debug_control);
2188
2189 if (!set_control_index(c, &temp, src))
2190 return false;
2191 if (!set_datatype_index(c, &temp, src, is_immediate))
2192 return false;
2193 if (!set_subreg_index(c, &temp, src, is_immediate))
2194 return false;
2195 if (!set_src0_index(c, &temp, src))
2196 return false;
2197 if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
2198 return false;
2199
2200 if (devinfo->ver >= 12) {
2201 compact(swsb);
2202 compact_reg(dst);
2203 compact_reg(src0);
2204
2205 if (is_immediate) {
2206 /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
2207 elk_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
2208 } else {
2209 compact_reg(src1);
2210 }
2211 } else {
2212 if (devinfo->ver >= 6) {
2213 compact(acc_wr_control);
2214 } else {
2215 compact(mask_control_ex);
2216 }
2217
2218 if (devinfo->ver <= 6)
2219 compact(flag_subreg_nr);
2220
2221 compact(cond_modifier);
2222
2223 compact_reg(dst);
2224 compact_reg(src0);
2225
2226 if (is_immediate) {
2227 /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
2228 elk_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
2229 } else {
2230 compact_reg(src1);
2231 }
2232 }
2233 elk_compact_inst_set_cmpt_control(devinfo, &temp, true);
2234
2235 #undef compact
2236 #undef compact_reg
2237
2238 *dst = temp;
2239
2240 return true;
2241 }
2242
2243 bool
elk_try_compact_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)2244 elk_try_compact_instruction(const struct elk_isa_info *isa,
2245 elk_compact_inst *dst, const elk_inst *src)
2246 {
2247 struct compaction_state c;
2248 compaction_state_init(&c, isa);
2249 return try_compact_instruction(&c, dst, src);
2250 }
2251
2252 static void
set_uncompacted_control(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2253 set_uncompacted_control(const struct compaction_state *c, elk_inst *dst,
2254 elk_compact_inst *src)
2255 {
2256 const struct intel_device_info *devinfo = c->isa->devinfo;
2257 uint32_t uncompacted =
2258 c->control_index_table[elk_compact_inst_control_index(devinfo, src)];
2259
2260 if (devinfo->ver >= 20) {
2261 elk_inst_set_bits(dst, 95, 92, (uncompacted >> 14) & 0xf);
2262 elk_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
2263 elk_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
2264 elk_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
2265 elk_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
2266 elk_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3);
2267 elk_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3);
2268 elk_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7);
2269 elk_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7);
2270 } else if (devinfo->ver >= 12) {
2271 elk_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
2272 elk_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2273 elk_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2274 elk_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2275 elk_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2276 elk_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2277 elk_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2278 elk_inst_set_bits(dst, 23, 22, (uncompacted >> 6) & 0x3);
2279 elk_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2280 elk_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2281 } else if (devinfo->ver >= 8) {
2282 elk_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
2283 elk_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
2284 elk_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
2285 elk_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
2286 elk_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
2287 } else {
2288 elk_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
2289 elk_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
2290
2291 if (devinfo->ver == 7)
2292 elk_inst_set_bits(dst, 90, 89, uncompacted >> 17);
2293 }
2294 }
2295
2296 static void
set_uncompacted_datatype(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2297 set_uncompacted_datatype(const struct compaction_state *c, elk_inst *dst,
2298 elk_compact_inst *src)
2299 {
2300 const struct intel_device_info *devinfo = c->isa->devinfo;
2301 uint32_t uncompacted =
2302 c->datatype_table[elk_compact_inst_datatype_index(devinfo, src)];
2303
2304 if (devinfo->ver >= 12) {
2305 elk_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
2306 elk_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
2307 elk_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
2308 elk_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
2309 elk_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
2310 elk_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
2311 elk_inst_set_bits(dst, 46, 46, (uncompacted >> 9) & 0x1);
2312 elk_inst_set_bits(dst, 43, 40, (uncompacted >> 5) & 0xf);
2313 elk_inst_set_bits(dst, 39, 36, (uncompacted >> 1) & 0xf);
2314 elk_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
2315 } else if (devinfo->ver >= 8) {
2316 elk_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
2317 elk_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
2318 elk_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
2319 } else {
2320 elk_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
2321 elk_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
2322 }
2323 }
2324
2325 static void
set_uncompacted_subreg(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2326 set_uncompacted_subreg(const struct compaction_state *c, elk_inst *dst,
2327 elk_compact_inst *src)
2328 {
2329 const struct intel_device_info *devinfo = c->isa->devinfo;
2330 uint16_t uncompacted =
2331 c->subreg_table[elk_compact_inst_subreg_index(devinfo, src)];
2332
2333 if (devinfo->ver >= 20) {
2334 elk_inst_set_bits(dst, 33, 33, (uncompacted >> 0) & 0x1);
2335 elk_inst_set_bits(dst, 55, 51, (uncompacted >> 1) & 0x1f);
2336 elk_inst_set_bits(dst, 71, 67, (uncompacted >> 6) & 0x1f);
2337 elk_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
2338 } else if (devinfo->ver >= 12) {
2339 elk_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
2340 elk_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
2341 elk_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
2342 } else {
2343 elk_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
2344 elk_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
2345 elk_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
2346 }
2347 }
2348
2349 static void
set_uncompacted_src0(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2350 set_uncompacted_src0(const struct compaction_state *c, elk_inst *dst,
2351 elk_compact_inst *src)
2352 {
2353 const struct intel_device_info *devinfo = c->isa->devinfo;
2354 uint32_t compacted = elk_compact_inst_src0_index(devinfo, src);
2355 uint16_t uncompacted = c->src0_index_table[compacted];
2356
2357 if (devinfo->ver >= 12) {
2358 if (devinfo->ver < 20)
2359 elk_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
2360 elk_inst_set_bits(dst, 86, 84, (uncompacted >> 8) & 0x7);
2361 elk_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
2362 elk_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2363 elk_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2364 elk_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2365 } else {
2366 elk_inst_set_bits(dst, 88, 77, uncompacted);
2367 }
2368 }
2369
2370 static void
set_uncompacted_src1(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2371 set_uncompacted_src1(const struct compaction_state *c, elk_inst *dst,
2372 elk_compact_inst *src)
2373 {
2374 const struct intel_device_info *devinfo = c->isa->devinfo;
2375 uint16_t uncompacted =
2376 c->src1_index_table[elk_compact_inst_src1_index(devinfo, src)];
2377
2378 if (devinfo->ver >= 20) {
2379 elk_inst_set_bits(dst, 121, 120, (uncompacted >> 14) & 0x3);
2380 elk_inst_set_bits(dst, 118, 116, (uncompacted >> 11) & 0x7);
2381 elk_inst_set_bits(dst, 115, 113, (uncompacted >> 8) & 0x7);
2382 elk_inst_set_bits(dst, 112, 112, (uncompacted >> 7) & 0x1);
2383 elk_inst_set_bits(dst, 103, 99, (uncompacted >> 2) & 0x1f);
2384 elk_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);
2385 } else if (devinfo->ver >= 12) {
2386 elk_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2387 elk_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf);
2388 elk_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7);
2389 elk_inst_set_bits(dst, 112, 112, (uncompacted >> 2) & 0x1);
2390 elk_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);
2391 } else {
2392 elk_inst_set_bits(dst, 120, 109, uncompacted);
2393 }
2394 }
2395
2396 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src,bool is_dpas)2397 set_uncompacted_3src_control_index(const struct compaction_state *c,
2398 elk_inst *dst, elk_compact_inst *src,
2399 bool is_dpas)
2400 {
2401 const struct intel_device_info *devinfo = c->isa->devinfo;
2402 assert(devinfo->ver >= 8);
2403
2404 if (devinfo->ver >= 20) {
2405 uint64_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2406 uint64_t uncompacted = is_dpas ? xe2_3src_dpas_control_index_table[compacted] :
2407 xe2_3src_control_index_table[compacted];
2408
2409 elk_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf);
2410 elk_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7);
2411 elk_inst_set_bits(dst, 82, 80, (uncompacted >> 24) & 0x7);
2412 elk_inst_set_bits(dst, 50, 50, (uncompacted >> 23) & 0x1);
2413 elk_inst_set_bits(dst, 49, 48, (uncompacted >> 21) & 0x3);
2414 elk_inst_set_bits(dst, 42, 40, (uncompacted >> 18) & 0x7);
2415 elk_inst_set_bits(dst, 39, 39, (uncompacted >> 17) & 0x1);
2416 elk_inst_set_bits(dst, 38, 36, (uncompacted >> 14) & 0x7);
2417 elk_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
2418 elk_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
2419 elk_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
2420 elk_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
2421 elk_inst_set_bits(dst, 27, 26, (uncompacted >> 8) & 0x3);
2422 elk_inst_set_bits(dst, 25, 24, (uncompacted >> 6) & 0x3);
2423 elk_inst_set_bits(dst, 23, 21, (uncompacted >> 3) & 0x7);
2424 elk_inst_set_bits(dst, 20, 18, (uncompacted >> 0) & 0x7);
2425
2426 } else if (devinfo->verx10 >= 125) {
2427 uint64_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2428 uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2429
2430 elk_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2431 elk_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2432 elk_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2433 elk_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2434 elk_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2435 elk_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2436 elk_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2437 elk_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2438 elk_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2439 elk_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2440 elk_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2441 elk_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2442 elk_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2443 elk_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2444 elk_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
2445 elk_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
2446 elk_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2447 elk_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2448
2449 } else if (devinfo->ver >= 12) {
2450 uint64_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2451 uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2452
2453 elk_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2454 elk_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2455 elk_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2456 elk_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2457 elk_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2458 elk_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2459 elk_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2460 elk_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2461 elk_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2462 elk_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2463 elk_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2464 elk_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2465 elk_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2466 elk_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2467 elk_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
2468 elk_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
2469 elk_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2470 elk_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2471 } else {
2472 uint32_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2473 uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2474
2475 elk_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2476 elk_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
2477
2478 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV)
2479 elk_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2480 }
2481 }
2482
2483 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,elk_inst * dst,elk_compact_inst * src,bool is_dpas)2484 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2485 elk_inst *dst, elk_compact_inst *src,
2486 bool is_dpas)
2487 {
2488 assert(devinfo->ver >= 8);
2489
2490 uint32_t compacted = elk_compact_inst_3src_source_index(devinfo, src);
2491
2492 if (devinfo->ver >= 12) {
2493 const uint32_t *three_src_source_index_table =
2494 devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
2495 xe2_3src_source_index_table) :
2496 devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
2497 gfx12_3src_source_index_table;
2498 uint32_t uncompacted = three_src_source_index_table[compacted];
2499
2500 elk_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2501 elk_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2502 elk_inst_set_bits(dst, 98, 98, (uncompacted >> 17) & 0x1);
2503 elk_inst_set_bits(dst, 97, 96, (uncompacted >> 15) & 0x3);
2504 elk_inst_set_bits(dst, 91, 91, (uncompacted >> 14) & 0x1);
2505 elk_inst_set_bits(dst, 87, 86, (uncompacted >> 12) & 0x3);
2506 elk_inst_set_bits(dst, 85, 84, (uncompacted >> 10) & 0x3);
2507 elk_inst_set_bits(dst, 83, 83, (uncompacted >> 9) & 0x1);
2508 elk_inst_set_bits(dst, 66, 66, (uncompacted >> 8) & 0x1);
2509 elk_inst_set_bits(dst, 65, 64, (uncompacted >> 6) & 0x3);
2510 elk_inst_set_bits(dst, 47, 47, (uncompacted >> 5) & 0x1);
2511 elk_inst_set_bits(dst, 46, 46, (uncompacted >> 4) & 0x1);
2512 elk_inst_set_bits(dst, 45, 44, (uncompacted >> 2) & 0x3);
2513 elk_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1);
2514 elk_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
2515 } else {
2516 uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2517
2518 elk_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
2519 elk_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2520 elk_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
2521 elk_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
2522 elk_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
2523
2524 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
2525 elk_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2526 elk_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2527 elk_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
2528 } else {
2529 elk_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2530 elk_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2531 }
2532 }
2533 }
2534
2535 static void
set_uncompacted_3src_subreg_index(const struct intel_device_info * devinfo,elk_inst * dst,elk_compact_inst * src)2536 set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2537 elk_inst *dst, elk_compact_inst *src)
2538 {
2539 assert(devinfo->ver >= 12);
2540
2541 uint32_t compacted = elk_compact_inst_3src_subreg_index(devinfo, src);
2542 uint32_t uncompacted = (devinfo->ver >= 20 ? xe2_3src_subreg_table[compacted]:
2543 gfx12_3src_subreg_table[compacted]);
2544
2545 elk_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2546 elk_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f);
2547 elk_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
2548 elk_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
2549 }
2550
2551 static void
elk_uncompact_3src_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src,bool is_dpas)2552 elk_uncompact_3src_instruction(const struct compaction_state *c,
2553 elk_inst *dst, elk_compact_inst *src, bool is_dpas)
2554 {
2555 const struct intel_device_info *devinfo = c->isa->devinfo;
2556 assert(devinfo->ver >= 8);
2557
2558 #define uncompact(field) \
2559 elk_inst_set_3src_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
2560 #define uncompact_a16(field) \
2561 elk_inst_set_3src_a16_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
2562
2563 uncompact(hw_opcode);
2564
2565 if (devinfo->ver >= 12) {
2566 set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2567 set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2568 set_uncompacted_3src_subreg_index(devinfo, dst, src);
2569
2570 uncompact(debug_control);
2571 uncompact(swsb);
2572 uncompact(dst_reg_nr);
2573 uncompact(src0_reg_nr);
2574 uncompact(src1_reg_nr);
2575 uncompact(src2_reg_nr);
2576 } else {
2577 set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2578 set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2579
2580 uncompact(dst_reg_nr);
2581 uncompact_a16(src0_rep_ctrl);
2582 uncompact(debug_control);
2583 uncompact(saturate);
2584 uncompact_a16(src1_rep_ctrl);
2585 uncompact_a16(src2_rep_ctrl);
2586 uncompact(src0_reg_nr);
2587 uncompact(src1_reg_nr);
2588 uncompact(src2_reg_nr);
2589 uncompact_a16(src0_subreg_nr);
2590 uncompact_a16(src1_subreg_nr);
2591 uncompact_a16(src2_subreg_nr);
2592 }
2593 elk_inst_set_3src_cmpt_control(devinfo, dst, false);
2594
2595 #undef uncompact
2596 #undef uncompact_a16
2597 }
2598
2599 static void
uncompact_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2600 uncompact_instruction(const struct compaction_state *c, elk_inst *dst,
2601 elk_compact_inst *src)
2602 {
2603 const struct intel_device_info *devinfo = c->isa->devinfo;
2604 memset(dst, 0, sizeof(*dst));
2605
2606 if (devinfo->ver >= 8) {
2607 const enum elk_opcode opcode =
2608 elk_opcode_decode(c->isa, elk_compact_inst_3src_hw_opcode(devinfo, src));
2609 if (elk_is_3src(c->isa, opcode)) {
2610 const bool is_dpas = opcode == ELK_OPCODE_DPAS;
2611 elk_uncompact_3src_instruction(c, dst, src, is_dpas);
2612 return;
2613 }
2614 }
2615
2616 #define uncompact(field) \
2617 elk_inst_set_##field(devinfo, dst, elk_compact_inst_##field(devinfo, src))
2618 #define uncompact_reg(field) \
2619 elk_inst_set_##field##_da_reg_nr(devinfo, dst, \
2620 elk_compact_inst_##field##_reg_nr(devinfo, src))
2621
2622 uncompact(hw_opcode);
2623 uncompact(debug_control);
2624
2625 set_uncompacted_control(c, dst, src);
2626 set_uncompacted_datatype(c, dst, src);
2627 set_uncompacted_subreg(c, dst, src);
2628 set_uncompacted_src0(c, dst, src);
2629
2630 enum elk_reg_type type;
2631 if (has_immediate(devinfo, dst, &type)) {
2632 unsigned imm = uncompact_immediate(devinfo, type,
2633 elk_compact_inst_imm(devinfo, src));
2634 elk_inst_set_imm_ud(devinfo, dst, imm);
2635 } else {
2636 set_uncompacted_src1(c, dst, src);
2637 uncompact_reg(src1);
2638 }
2639
2640 if (devinfo->ver >= 12) {
2641 uncompact(swsb);
2642 uncompact_reg(dst);
2643 uncompact_reg(src0);
2644 } else {
2645 if (devinfo->ver >= 6) {
2646 uncompact(acc_wr_control);
2647 } else {
2648 uncompact(mask_control_ex);
2649 }
2650
2651 uncompact(cond_modifier);
2652
2653 if (devinfo->ver <= 6)
2654 uncompact(flag_subreg_nr);
2655
2656 uncompact_reg(dst);
2657 uncompact_reg(src0);
2658 }
2659 elk_inst_set_cmpt_control(devinfo, dst, false);
2660
2661 #undef uncompact
2662 #undef uncompact_reg
2663 }
2664
2665 void
elk_uncompact_instruction(const struct elk_isa_info * isa,elk_inst * dst,elk_compact_inst * src)2666 elk_uncompact_instruction(const struct elk_isa_info *isa,
2667 elk_inst *dst, elk_compact_inst *src)
2668 {
2669 struct compaction_state c;
2670 compaction_state_init(&c, isa);
2671 uncompact_instruction(&c, dst, src);
2672 }
2673
2674 void
elk_debug_compact_uncompact(const struct elk_isa_info * isa,elk_inst * orig,elk_inst * uncompacted)2675 elk_debug_compact_uncompact(const struct elk_isa_info *isa,
2676 elk_inst *orig,
2677 elk_inst *uncompacted)
2678 {
2679 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2680 isa->devinfo->ver);
2681
2682 fprintf(stderr, " before: ");
2683 elk_disassemble_inst(stderr, isa, orig, true, 0, NULL);
2684
2685 fprintf(stderr, " after: ");
2686 elk_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
2687
2688 uint32_t *before_bits = (uint32_t *)orig;
2689 uint32_t *after_bits = (uint32_t *)uncompacted;
2690 fprintf(stderr, " changed bits:\n");
2691 for (int i = 0; i < 128; i++) {
2692 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2693 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2694
2695 if (before != after) {
2696 fprintf(stderr, " bit %d, %s to %s\n", i,
2697 before ? "set" : "unset",
2698 after ? "set" : "unset");
2699 }
2700 }
2701 }
2702
2703 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2704 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2705 {
2706 int this_compacted_count = compacted_counts[old_ip];
2707 int target_compacted_count = compacted_counts[old_target_ip];
2708 return target_compacted_count - this_compacted_count;
2709 }
2710
2711 static void
update_uip_jip(const struct elk_isa_info * isa,elk_inst * insn,int this_old_ip,int * compacted_counts)2712 update_uip_jip(const struct elk_isa_info *isa, elk_inst *insn,
2713 int this_old_ip, int *compacted_counts)
2714 {
2715 const struct intel_device_info *devinfo = isa->devinfo;
2716
2717 /* JIP and UIP are in units of:
2718 * - bytes on Gfx8+; and
2719 * - compacted instructions on Gfx6+.
2720 */
2721 int shift = devinfo->ver >= 8 ? 3 : 0;
2722
2723 int32_t jip_compacted = elk_inst_jip(devinfo, insn) >> shift;
2724 jip_compacted -= compacted_between(this_old_ip,
2725 this_old_ip + (jip_compacted / 2),
2726 compacted_counts);
2727 elk_inst_set_jip(devinfo, insn, jip_compacted << shift);
2728
2729 if (elk_inst_opcode(isa, insn) == ELK_OPCODE_ENDIF ||
2730 elk_inst_opcode(isa, insn) == ELK_OPCODE_WHILE ||
2731 (elk_inst_opcode(isa, insn) == ELK_OPCODE_ELSE && devinfo->ver <= 7))
2732 return;
2733
2734 int32_t uip_compacted = elk_inst_uip(devinfo, insn) >> shift;
2735 uip_compacted -= compacted_between(this_old_ip,
2736 this_old_ip + (uip_compacted / 2),
2737 compacted_counts);
2738 elk_inst_set_uip(devinfo, insn, uip_compacted << shift);
2739 }
2740
2741 static void
update_gfx4_jump_count(const struct intel_device_info * devinfo,elk_inst * insn,int this_old_ip,int * compacted_counts)2742 update_gfx4_jump_count(const struct intel_device_info *devinfo, elk_inst *insn,
2743 int this_old_ip, int *compacted_counts)
2744 {
2745 assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
2746
2747 /* Jump Count is in units of:
2748 * - uncompacted instructions on G45; and
2749 * - compacted instructions on Gfx5.
2750 */
2751 int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
2752
2753 int jump_count_compacted = elk_inst_gfx4_jump_count(devinfo, insn) << shift;
2754
2755 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2756
2757 int this_compacted_count = compacted_counts[this_old_ip];
2758 int target_compacted_count = compacted_counts[target_old_ip];
2759
2760 jump_count_compacted -= (target_compacted_count - this_compacted_count);
2761 elk_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2762 }
2763
2764 static void
compaction_state_init(struct compaction_state * c,const struct elk_isa_info * isa)2765 compaction_state_init(struct compaction_state *c,
2766 const struct elk_isa_info *isa)
2767 {
2768 const struct intel_device_info *devinfo = isa->devinfo;
2769
2770 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2771 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2772 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2773 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2774 assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2775 assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2776 assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2777 assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2778 assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2779 assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2780 assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2781 assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2782 assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2783 assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2784 assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2785 assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2786 assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2787 assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2788 assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2789 assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2790 assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2791 assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2792 assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2793 assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2794 assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0);
2795 assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0);
2796 assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0);
2797 assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0);
2798 assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0);
2799
2800 c->isa = isa;
2801 switch (devinfo->ver) {
2802 case 20:
2803 c->control_index_table = xe2_control_index_table;
2804 c->datatype_table = xe2_datatype_table;
2805 c->subreg_table = xe2_subreg_table;
2806 c->src0_index_table = xe2_src0_index_table;
2807 c->src1_index_table = xe2_src1_index_table;
2808 break;
2809 case 12:
2810 c->control_index_table = gfx12_control_index_table;;
2811 c->datatype_table = gfx12_datatype_table;
2812 c->subreg_table = gfx12_subreg_table;
2813 if (devinfo->verx10 >= 125) {
2814 c->src0_index_table = xehp_src0_index_table;
2815 c->src1_index_table = xehp_src1_index_table;
2816 } else {
2817 c->src0_index_table = gfx12_src0_index_table;
2818 c->src1_index_table = gfx12_src1_index_table;
2819 }
2820 break;
2821 case 11:
2822 c->control_index_table = gfx8_control_index_table;
2823 c->datatype_table = gfx11_datatype_table;
2824 c->subreg_table = gfx8_subreg_table;
2825 c->src0_index_table = gfx8_src_index_table;
2826 c->src1_index_table = gfx8_src_index_table;
2827 break;
2828 case 9:
2829 case 8:
2830 c->control_index_table = gfx8_control_index_table;
2831 c->datatype_table = gfx8_datatype_table;
2832 c->subreg_table = gfx8_subreg_table;
2833 c->src0_index_table = gfx8_src_index_table;
2834 c->src1_index_table = gfx8_src_index_table;
2835 break;
2836 case 7:
2837 c->control_index_table = gfx7_control_index_table;
2838 c->datatype_table = gfx7_datatype_table;
2839 c->subreg_table = gfx7_subreg_table;
2840 c->src0_index_table = gfx7_src_index_table;
2841 c->src1_index_table = gfx7_src_index_table;
2842 break;
2843 case 6:
2844 c->control_index_table = gfx6_control_index_table;
2845 c->datatype_table = gfx6_datatype_table;
2846 c->subreg_table = gfx6_subreg_table;
2847 c->src0_index_table = gfx6_src_index_table;
2848 c->src1_index_table = gfx6_src_index_table;
2849 break;
2850 case 5:
2851 case 4:
2852 c->control_index_table = g45_control_index_table;
2853 c->datatype_table = g45_datatype_table;
2854 c->subreg_table = g45_subreg_table;
2855 c->src0_index_table = g45_src_index_table;
2856 c->src1_index_table = g45_src_index_table;
2857 break;
2858 default:
2859 unreachable("unknown generation");
2860 }
2861 }
2862
2863 void
elk_compact_instructions(struct elk_codegen * p,int start_offset,struct elk_disasm_info * disasm)2864 elk_compact_instructions(struct elk_codegen *p, int start_offset,
2865 struct elk_disasm_info *disasm)
2866 {
2867 if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2868 return;
2869
2870 const struct intel_device_info *devinfo = p->devinfo;
2871 if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
2872 return;
2873
2874 void *store = p->store + start_offset / 16;
2875 /* For an instruction at byte offset 16*i before compaction, this is the
2876 * number of compacted instructions minus the number of padding NOP/NENOPs
2877 * that preceded it.
2878 */
2879 unsigned num_compacted_counts =
2880 (p->next_insn_offset - start_offset) / sizeof(elk_inst);
2881 int *compacted_counts =
2882 calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
2883
2884 /* For an instruction at byte offset 8*i after compaction, this was its IP
2885 * (in 16-byte units) before compaction.
2886 */
2887 unsigned num_old_ip =
2888 (p->next_insn_offset - start_offset) / sizeof(elk_compact_inst) + 1;
2889 int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
2890
2891 struct compaction_state c;
2892 compaction_state_init(&c, p->isa);
2893
2894 int offset = 0;
2895 int compacted_count = 0;
2896 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2897 src_offset += sizeof(elk_inst)) {
2898 elk_inst *src = store + src_offset;
2899 void *dst = store + offset;
2900
2901 old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
2902 compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
2903
2904 elk_inst inst = precompact(p->isa, *src);
2905 elk_inst saved = inst;
2906
2907 if (try_compact_instruction(&c, dst, &inst)) {
2908 compacted_count++;
2909
2910 if (INTEL_DEBUG(DEBUG_VS | DEBUG_GS | DEBUG_TCS |
2911 DEBUG_WM | DEBUG_CS | DEBUG_TES)) {
2912 elk_inst uncompacted;
2913 uncompact_instruction(&c, &uncompacted, dst);
2914 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2915 elk_debug_compact_uncompact(p->isa, &saved, &uncompacted);
2916 }
2917 }
2918
2919 offset += sizeof(elk_compact_inst);
2920 } else {
2921 /* All uncompacted instructions need to be aligned on G45. */
2922 if ((offset & sizeof(elk_compact_inst)) != 0 &&
2923 devinfo->platform == INTEL_PLATFORM_G4X) {
2924 elk_compact_inst *align = store + offset;
2925 memset(align, 0, sizeof(*align));
2926 elk_compact_inst_set_hw_opcode(
2927 devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NENOP));
2928 elk_compact_inst_set_cmpt_control(devinfo, align, true);
2929 offset += sizeof(elk_compact_inst);
2930 compacted_count--;
2931 compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
2932 old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
2933
2934 dst = store + offset;
2935 }
2936
2937 /* If we didn't compact this instruction, we need to move it down into
2938 * place.
2939 */
2940 if (offset != src_offset) {
2941 memmove(dst, src, sizeof(elk_inst));
2942 }
2943 offset += sizeof(elk_inst);
2944 }
2945 }
2946
2947 /* Add an entry for the ending offset of the program. This greatly
2948 * simplifies the linked list walk at the end of the function.
2949 */
2950 old_ip[offset / sizeof(elk_compact_inst)] =
2951 (p->next_insn_offset - start_offset) / sizeof(elk_inst);
2952
2953 /* Fix up control flow offsets. */
2954 p->next_insn_offset = start_offset + offset;
2955 for (offset = 0; offset < p->next_insn_offset - start_offset;
2956 offset = next_offset(devinfo, store, offset)) {
2957 elk_inst *insn = store + offset;
2958 int this_old_ip = old_ip[offset / sizeof(elk_compact_inst)];
2959 int this_compacted_count = compacted_counts[this_old_ip];
2960
2961 switch (elk_inst_opcode(p->isa, insn)) {
2962 case ELK_OPCODE_BREAK:
2963 case ELK_OPCODE_CONTINUE:
2964 case ELK_OPCODE_HALT:
2965 if (devinfo->ver >= 6) {
2966 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2967 } else {
2968 update_gfx4_jump_count(devinfo, insn, this_old_ip,
2969 compacted_counts);
2970 }
2971 break;
2972
2973 case ELK_OPCODE_IF:
2974 case ELK_OPCODE_IFF:
2975 case ELK_OPCODE_ELSE:
2976 case ELK_OPCODE_ENDIF:
2977 case ELK_OPCODE_WHILE:
2978 if (devinfo->ver >= 7) {
2979 if (elk_inst_cmpt_control(devinfo, insn)) {
2980 elk_inst uncompacted;
2981 uncompact_instruction(&c, &uncompacted,
2982 (elk_compact_inst *)insn);
2983
2984 update_uip_jip(p->isa, &uncompacted, this_old_ip,
2985 compacted_counts);
2986
2987 bool ret = try_compact_instruction(&c, (elk_compact_inst *)insn,
2988 &uncompacted);
2989 assert(ret); (void)ret;
2990 } else {
2991 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2992 }
2993 } else if (devinfo->ver == 6) {
2994 assert(!elk_inst_cmpt_control(devinfo, insn));
2995
2996 /* Jump Count is in units of compacted instructions on Gfx6. */
2997 int jump_count_compacted = elk_inst_gfx6_jump_count(devinfo, insn);
2998
2999 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
3000 int target_compacted_count = compacted_counts[target_old_ip];
3001 jump_count_compacted -= (target_compacted_count - this_compacted_count);
3002 elk_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
3003 } else {
3004 update_gfx4_jump_count(devinfo, insn, this_old_ip,
3005 compacted_counts);
3006 }
3007 break;
3008
3009 case ELK_OPCODE_ADD:
3010 /* Add instructions modifying the IP register use an immediate src1,
3011 * and Gens that use this cannot compact instructions with immediate
3012 * operands.
3013 */
3014 if (elk_inst_cmpt_control(devinfo, insn))
3015 break;
3016
3017 if (elk_inst_dst_reg_file(devinfo, insn) == ELK_ARCHITECTURE_REGISTER_FILE &&
3018 elk_inst_dst_da_reg_nr(devinfo, insn) == ELK_ARF_IP) {
3019 assert(elk_inst_src1_reg_file(devinfo, insn) == ELK_IMMEDIATE_VALUE);
3020
3021 int shift = 3;
3022 int jump_compacted = elk_inst_imm_d(devinfo, insn) >> shift;
3023
3024 int target_old_ip = this_old_ip + (jump_compacted / 2);
3025 int target_compacted_count = compacted_counts[target_old_ip];
3026 jump_compacted -= (target_compacted_count - this_compacted_count);
3027 elk_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
3028 }
3029 break;
3030
3031 default:
3032 break;
3033 }
3034 }
3035
3036 /* p->nr_insn is counting the number of uncompacted instructions still, so
3037 * divide. We do want to be sure there's a valid instruction in any
3038 * alignment padding, so that the next compression pass (for the FS 8/16
3039 * compile passes) parses correctly.
3040 */
3041 if (p->next_insn_offset & sizeof(elk_compact_inst)) {
3042 elk_compact_inst *align = store + offset;
3043 memset(align, 0, sizeof(*align));
3044 elk_compact_inst_set_hw_opcode(
3045 devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NOP));
3046 elk_compact_inst_set_cmpt_control(devinfo, align, true);
3047 p->next_insn_offset += sizeof(elk_compact_inst);
3048 }
3049 p->nr_insn = p->next_insn_offset / sizeof(elk_inst);
3050
3051 for (int i = 0; i < p->num_relocs; i++) {
3052 if (p->relocs[i].offset < (uint32_t)start_offset)
3053 continue;
3054
3055 assert(p->relocs[i].offset % 16 == 0);
3056 unsigned idx = (p->relocs[i].offset - start_offset) / 16;
3057 p->relocs[i].offset -= compacted_counts[idx] * 8;
3058 }
3059
3060 /* Update the instruction offsets for each group. */
3061 if (disasm) {
3062 int offset = 0;
3063
3064 foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
3065 while (start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
3066 sizeof(elk_inst) != group->offset) {
3067 assert(start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
3068 sizeof(elk_inst) < group->offset);
3069 offset = next_offset(devinfo, store, offset);
3070 }
3071
3072 group->offset = start_offset + offset;
3073
3074 offset = next_offset(devinfo, store, offset);
3075 }
3076 }
3077
3078 free(compacted_counts);
3079 free(old_ip);
3080 }
3081