1 /*
2 * Copyright © 2012-2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_eu_compact.c
25 *
26 * Instruction compaction is a feature of G45 and newer hardware that allows
27 * for a smaller instruction encoding.
28 *
29 * The instruction cache is on the order of 32KB, and many programs generate
30 * far more instructions than that. The instruction cache is built to barely
31 * keep up with instruction dispatch ability in cache hit cases -- L1
32 * instruction cache misses that still hit in the next level could limit
33 * throughput by around 50%.
34 *
35 * The idea of instruction compaction is that most instructions use a tiny
36 * subset of the GPU functionality, so we can encode what would be a 16 byte
37 * instruction in 8 bytes using some lookup tables for various fields.
38 *
39 *
40 * Instruction compaction capabilities vary subtly by generation.
41 *
42 * G45's support for instruction compaction is very limited. Jump counts on
43 * this generation are in units of 16-byte uncompacted instructions. As such,
44 * all jump targets must be 16-byte aligned. Also, all instructions must be
45 * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46 * A G45-only instruction, NENOP, must be used to provide padding to align
47 * uncompacted instructions.
48 *
49 * Gfx5 removes these restrictions and changes jump counts to be in units of
50 * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51 * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52 *
53 * Gfx6 adds the ability to compact instructions with a limited range of
54 * immediate values. Compactable immediates have 12 unrestricted bits, and a
55 * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56 * value of DW3 in the uncompacted instruction word.
57 *
58 * On Gfx7 we can compact some control flow instructions with a small positive
59 * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60 * control flow instructions with UIP cannot be compacted, because of the
61 * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62 * since the jump count field is not in DW3.
63 *
64 * break JIP/UIP
65 * cont JIP/UIP
66 * halt JIP/UIP
67 * if JIP/UIP
68 * else JIP (plus UIP on BDW+)
69 * endif JIP
70 * while JIP (must be negative)
71 *
72 * Gen 8 adds support for compacting 3-src instructions.
73 *
74 * Gfx12 reduces the number of bits that available to compacted immediates from
75 * 13 to 12, but improves the compaction of floating-point immediates by
76 * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77 * three most significant bits of the mantissa), rather than the lowest bits of
78 * the mantissa.
79 */
80
81 #include "brw_eu.h"
82 #include "brw_shader.h"
83 #include "brw_disasm_info.h"
84 #include "dev/intel_debug.h"
85
86 static const uint32_t g45_control_index_table[32] = {
87 0b00000000000000000,
88 0b01000000000000000,
89 0b00110000000000000,
90 0b00000000000000010,
91 0b00100000000000000,
92 0b00010000000000000,
93 0b01000000000100000,
94 0b01000000100000000,
95 0b01010000000100000,
96 0b00000000100000010,
97 0b11000000000000000,
98 0b00001000100000010,
99 0b01001000100000000,
100 0b00000000100000000,
101 0b11000000000100000,
102 0b00001000100000000,
103 0b10110000000000000,
104 0b11010000000100000,
105 0b00110000100000000,
106 0b00100000100000000,
107 0b01000000000001000,
108 0b01000000000000100,
109 0b00111100000000000,
110 0b00101011000000000,
111 0b00110000000010000,
112 0b00010000100000000,
113 0b01000000000100100,
114 0b01000000000101000,
115 0b00110000000000110,
116 0b00000000000001010,
117 0b01010000000101000,
118 0b01010000000100100,
119 };
120
121 static const uint32_t g45_datatype_table[32] = {
122 0b001000000000100001,
123 0b001011010110101101,
124 0b001000001000110001,
125 0b001111011110111101,
126 0b001011010110101100,
127 0b001000000110101101,
128 0b001000000000100000,
129 0b010100010110110001,
130 0b001100011000101101,
131 0b001000000000100010,
132 0b001000001000110110,
133 0b010000001000110001,
134 0b001000001000110010,
135 0b011000001000110010,
136 0b001111011110111100,
137 0b001000000100101000,
138 0b010100011000110001,
139 0b001010010100101001,
140 0b001000001000101001,
141 0b010000001000110110,
142 0b101000001000110001,
143 0b001011011000101101,
144 0b001000000100001001,
145 0b001011011000101100,
146 0b110100011000110001,
147 0b001000001110111101,
148 0b110000001000110001,
149 0b011000000100101010,
150 0b101000001000101001,
151 0b001011010110001100,
152 0b001000000110100001,
153 0b001010010100001000,
154 };
155
156 static const uint16_t g45_subreg_table[32] = {
157 0b000000000000000,
158 0b000000010000000,
159 0b000001000000000,
160 0b000100000000000,
161 0b000000000100000,
162 0b100000000000000,
163 0b000000000010000,
164 0b001100000000000,
165 0b001010000000000,
166 0b000000100000000,
167 0b001000000000000,
168 0b000000000001000,
169 0b000000001000000,
170 0b000000000000001,
171 0b000010000000000,
172 0b000000010100000,
173 0b000000000000111,
174 0b000001000100000,
175 0b011000000000000,
176 0b000000110000000,
177 0b000000000000010,
178 0b000000000000100,
179 0b000000001100000,
180 0b000100000000010,
181 0b001110011000110,
182 0b001110100001000,
183 0b000110011000110,
184 0b000001000011000,
185 0b000110010000100,
186 0b001100000000110,
187 0b000000010000110,
188 0b000001000110000,
189 };
190
191 static const uint16_t g45_src_index_table[32] = {
192 0b000000000000,
193 0b010001101000,
194 0b010110001000,
195 0b011010010000,
196 0b001101001000,
197 0b010110001010,
198 0b010101110000,
199 0b011001111000,
200 0b001000101000,
201 0b000000101000,
202 0b010001010000,
203 0b111101101100,
204 0b010110001100,
205 0b010001101100,
206 0b011010010100,
207 0b010001001100,
208 0b001100101000,
209 0b000000000010,
210 0b111101001100,
211 0b011001101000,
212 0b010101001000,
213 0b000000000100,
214 0b000000101100,
215 0b010001101010,
216 0b000000111000,
217 0b010101011000,
218 0b000100100000,
219 0b010110000000,
220 0b010000000100,
221 0b010000111000,
222 0b000101100000,
223 0b111101110100,
224 };
225
226 static const uint32_t gfx6_control_index_table[32] = {
227 0b00000000000000000,
228 0b01000000000000000,
229 0b00110000000000000,
230 0b00000000100000000,
231 0b00010000000000000,
232 0b00001000100000000,
233 0b00000000100000010,
234 0b00000000000000010,
235 0b01000000100000000,
236 0b01010000000000000,
237 0b10110000000000000,
238 0b00100000000000000,
239 0b11010000000000000,
240 0b11000000000000000,
241 0b01001000100000000,
242 0b01000000000001000,
243 0b01000000000000100,
244 0b00000000000001000,
245 0b00000000000000100,
246 0b00111000100000000,
247 0b00001000100000010,
248 0b00110000100000000,
249 0b00110000000000001,
250 0b00100000000000001,
251 0b00110000000000010,
252 0b00110000000000101,
253 0b00110000000001001,
254 0b00110000000010000,
255 0b00110000000000011,
256 0b00110000000000100,
257 0b00110000100001000,
258 0b00100000000001001,
259 };
260
261 static const uint32_t gfx6_datatype_table[32] = {
262 0b001001110000000000,
263 0b001000110000100000,
264 0b001001110000000001,
265 0b001000000001100000,
266 0b001010110100101001,
267 0b001000000110101101,
268 0b001100011000101100,
269 0b001011110110101101,
270 0b001000000111101100,
271 0b001000000001100001,
272 0b001000110010100101,
273 0b001000000001000001,
274 0b001000001000110001,
275 0b001000001000101001,
276 0b001000000000100000,
277 0b001000001000110010,
278 0b001010010100101001,
279 0b001011010010100101,
280 0b001000000110100101,
281 0b001100011000101001,
282 0b001011011000101100,
283 0b001011010110100101,
284 0b001011110110100101,
285 0b001111011110111101,
286 0b001111011110111100,
287 0b001111011110111101,
288 0b001111011110011101,
289 0b001111011110111110,
290 0b001000000000100001,
291 0b001000000000100010,
292 0b001001111111011101,
293 0b001000001110111110,
294 };
295
296 static const uint16_t gfx6_subreg_table[32] = {
297 0b000000000000000,
298 0b000000000000100,
299 0b000000110000000,
300 0b111000000000000,
301 0b011110000001000,
302 0b000010000000000,
303 0b000000000010000,
304 0b000110000001100,
305 0b001000000000000,
306 0b000001000000000,
307 0b000001010010100,
308 0b000000001010110,
309 0b010000000000000,
310 0b110000000000000,
311 0b000100000000000,
312 0b000000010000000,
313 0b000000000001000,
314 0b100000000000000,
315 0b000001010000000,
316 0b001010000000000,
317 0b001100000000000,
318 0b000000001010100,
319 0b101101010010100,
320 0b010100000000000,
321 0b000000010001111,
322 0b011000000000000,
323 0b111110000000000,
324 0b101000000000000,
325 0b000000000001111,
326 0b000100010001111,
327 0b001000010001111,
328 0b000110000000000,
329 };
330
331 static const uint16_t gfx6_src_index_table[32] = {
332 0b000000000000,
333 0b010110001000,
334 0b010001101000,
335 0b001000101000,
336 0b011010010000,
337 0b000100100000,
338 0b010001101100,
339 0b010101110000,
340 0b011001111000,
341 0b001100101000,
342 0b010110001100,
343 0b001000100000,
344 0b010110001010,
345 0b000000000010,
346 0b010101010000,
347 0b010101101000,
348 0b111101001100,
349 0b111100101100,
350 0b011001110000,
351 0b010110001001,
352 0b010101011000,
353 0b001101001000,
354 0b010000101100,
355 0b010000000000,
356 0b001101110000,
357 0b001100010000,
358 0b001100000000,
359 0b010001101010,
360 0b001101111000,
361 0b000001110000,
362 0b001100100000,
363 0b001101010000,
364 };
365
366 static const uint32_t gfx7_control_index_table[32] = {
367 0b0000000000000000010,
368 0b0000100000000000000,
369 0b0000100000000000001,
370 0b0000100000000000010,
371 0b0000100000000000011,
372 0b0000100000000000100,
373 0b0000100000000000101,
374 0b0000100000000000111,
375 0b0000100000000001000,
376 0b0000100000000001001,
377 0b0000100000000001101,
378 0b0000110000000000000,
379 0b0000110000000000001,
380 0b0000110000000000010,
381 0b0000110000000000011,
382 0b0000110000000000100,
383 0b0000110000000000101,
384 0b0000110000000000111,
385 0b0000110000000001001,
386 0b0000110000000001101,
387 0b0000110000000010000,
388 0b0000110000100000000,
389 0b0001000000000000000,
390 0b0001000000000000010,
391 0b0001000000000000100,
392 0b0001000000100000000,
393 0b0010110000000000000,
394 0b0010110000000010000,
395 0b0011000000000000000,
396 0b0011000000100000000,
397 0b0101000000000000000,
398 0b0101000000100000000,
399 };
400
401 static const uint32_t gfx7_datatype_table[32] = {
402 0b001000000000000001,
403 0b001000000000100000,
404 0b001000000000100001,
405 0b001000000001100001,
406 0b001000000010111101,
407 0b001000001011111101,
408 0b001000001110100001,
409 0b001000001110100101,
410 0b001000001110111101,
411 0b001000010000100001,
412 0b001000110000100000,
413 0b001000110000100001,
414 0b001001010010100101,
415 0b001001110010100100,
416 0b001001110010100101,
417 0b001111001110111101,
418 0b001111011110011101,
419 0b001111011110111100,
420 0b001111011110111101,
421 0b001111111110111100,
422 0b000000001000001100,
423 0b001000000000111101,
424 0b001000000010100101,
425 0b001000010000100000,
426 0b001001010010100100,
427 0b001001110010000100,
428 0b001010010100001001,
429 0b001101111110111101,
430 0b001111111110111101,
431 0b001011110110101100,
432 0b001010010100101000,
433 0b001010110100101000,
434 };
435
436 static const uint16_t gfx7_subreg_table[32] = {
437 0b000000000000000,
438 0b000000000000001,
439 0b000000000001000,
440 0b000000000001111,
441 0b000000000010000,
442 0b000000010000000,
443 0b000000100000000,
444 0b000000110000000,
445 0b000001000000000,
446 0b000001000010000,
447 0b000010100000000,
448 0b001000000000000,
449 0b001000000000001,
450 0b001000010000001,
451 0b001000010000010,
452 0b001000010000011,
453 0b001000010000100,
454 0b001000010000111,
455 0b001000010001000,
456 0b001000010001110,
457 0b001000010001111,
458 0b001000110000000,
459 0b001000111101000,
460 0b010000000000000,
461 0b010000110000000,
462 0b011000000000000,
463 0b011110010000111,
464 0b100000000000000,
465 0b101000000000000,
466 0b110000000000000,
467 0b111000000000000,
468 0b111000000011100,
469 };
470
471 static const uint16_t gfx7_src_index_table[32] = {
472 0b000000000000,
473 0b000000000010,
474 0b000000010000,
475 0b000000010010,
476 0b000000011000,
477 0b000000100000,
478 0b000000101000,
479 0b000001001000,
480 0b000001010000,
481 0b000001110000,
482 0b000001111000,
483 0b001100000000,
484 0b001100000010,
485 0b001100001000,
486 0b001100010000,
487 0b001100010010,
488 0b001100100000,
489 0b001100101000,
490 0b001100111000,
491 0b001101000000,
492 0b001101000010,
493 0b001101001000,
494 0b001101010000,
495 0b001101100000,
496 0b001101101000,
497 0b001101110000,
498 0b001101110001,
499 0b001101111000,
500 0b010001101000,
501 0b010001101001,
502 0b010001101010,
503 0b010110001000,
504 };
505
506 static const uint32_t gfx8_control_index_table[32] = {
507 0b0000000000000000010,
508 0b0000100000000000000,
509 0b0000100000000000001,
510 0b0000100000000000010,
511 0b0000100000000000011,
512 0b0000100000000000100,
513 0b0000100000000000101,
514 0b0000100000000000111,
515 0b0000100000000001000,
516 0b0000100000000001001,
517 0b0000100000000001101,
518 0b0000110000000000000,
519 0b0000110000000000001,
520 0b0000110000000000010,
521 0b0000110000000000011,
522 0b0000110000000000100,
523 0b0000110000000000101,
524 0b0000110000000000111,
525 0b0000110000000001001,
526 0b0000110000000001101,
527 0b0000110000000010000,
528 0b0000110000100000000,
529 0b0001000000000000000,
530 0b0001000000000000010,
531 0b0001000000000000100,
532 0b0001000000100000000,
533 0b0010110000000000000,
534 0b0010110000000010000,
535 0b0011000000000000000,
536 0b0011000000100000000,
537 0b0101000000000000000,
538 0b0101000000100000000,
539 };
540
541 static const uint32_t gfx8_datatype_table[32] = {
542 0b001000000000000000001,
543 0b001000000000001000000,
544 0b001000000000001000001,
545 0b001000000000011000001,
546 0b001000000000101011101,
547 0b001000000010111011101,
548 0b001000000011101000001,
549 0b001000000011101000101,
550 0b001000000011101011101,
551 0b001000001000001000001,
552 0b001000011000001000000,
553 0b001000011000001000001,
554 0b001000101000101000101,
555 0b001000111000101000100,
556 0b001000111000101000101,
557 0b001011100011101011101,
558 0b001011101011100011101,
559 0b001011101011101011100,
560 0b001011101011101011101,
561 0b001011111011101011100,
562 0b000000000010000001100,
563 0b001000000000001011101,
564 0b001000000000101000101,
565 0b001000001000001000000,
566 0b001000101000101000100,
567 0b001000111000100000100,
568 0b001001001001000001001,
569 0b001010111011101011101,
570 0b001011111011101011101,
571 0b001001111001101001100,
572 0b001001001001001001000,
573 0b001001011001001001000,
574 };
575
576 static const uint16_t gfx8_subreg_table[32] = {
577 0b000000000000000,
578 0b000000000000001,
579 0b000000000001000,
580 0b000000000001111,
581 0b000000000010000,
582 0b000000010000000,
583 0b000000100000000,
584 0b000000110000000,
585 0b000001000000000,
586 0b000001000010000,
587 0b000001010000000,
588 0b001000000000000,
589 0b001000000000001,
590 0b001000010000001,
591 0b001000010000010,
592 0b001000010000011,
593 0b001000010000100,
594 0b001000010000111,
595 0b001000010001000,
596 0b001000010001110,
597 0b001000010001111,
598 0b001000110000000,
599 0b001000111101000,
600 0b010000000000000,
601 0b010000110000000,
602 0b011000000000000,
603 0b011110010000111,
604 0b100000000000000,
605 0b101000000000000,
606 0b110000000000000,
607 0b111000000000000,
608 0b111000000011100,
609 };
610
611 static const uint16_t gfx8_src_index_table[32] = {
612 0b000000000000,
613 0b000000000010,
614 0b000000010000,
615 0b000000010010,
616 0b000000011000,
617 0b000000100000,
618 0b000000101000,
619 0b000001001000,
620 0b000001010000,
621 0b000001110000,
622 0b000001111000,
623 0b001100000000,
624 0b001100000010,
625 0b001100001000,
626 0b001100010000,
627 0b001100010010,
628 0b001100100000,
629 0b001100101000,
630 0b001100111000,
631 0b001101000000,
632 0b001101000010,
633 0b001101001000,
634 0b001101010000,
635 0b001101100000,
636 0b001101101000,
637 0b001101110000,
638 0b001101110001,
639 0b001101111000,
640 0b010001101000,
641 0b010001101001,
642 0b010001101010,
643 0b010110001000,
644 };
645
646 static const uint32_t gfx11_datatype_table[32] = {
647 0b001000000000000000001,
648 0b001000000000001000000,
649 0b001000000000001000001,
650 0b001000000000011000001,
651 0b001000000000101100101,
652 0b001000000101111100101,
653 0b001000000100101000001,
654 0b001000000100101000101,
655 0b001000000100101100101,
656 0b001000001000001000001,
657 0b001000011000001000000,
658 0b001000011000001000001,
659 0b001000101000101000101,
660 0b001000111000101000100,
661 0b001000111000101000101,
662 0b001100100100101100101,
663 0b001100101100100100101,
664 0b001100101100101100100,
665 0b001100101100101100101,
666 0b001100111100101100100,
667 0b000000000010000001100,
668 0b001000000000001100101,
669 0b001000000000101000101,
670 0b001000001000001000000,
671 0b001000101000101000100,
672 0b001000111000100000100,
673 0b001001001001000001001,
674 0b001101111100101100101,
675 0b001100111100101100101,
676 0b001001111001101001100,
677 0b001001001001001001000,
678 0b001001011001001001000,
679 };
680
681 static const uint32_t gfx12_control_index_table[32] = {
682 0b000000000000000000100, /* (16|M0) */
683 0b000000000000000000011, /* (8|M0) */
684 0b000000010000000000000, /* (W) (1|M0) */
685 0b000000010000000000100, /* (W) (16|M0) */
686 0b000000010000000000011, /* (W) (8|M0) */
687 0b010000000000000000100, /* (16|M0) (ge)f0.0 */
688 0b000000000000000100100, /* (16|M16) */
689 0b010100000000000000100, /* (16|M0) (lt)f0.0 */
690 0b000000000000000000000, /* (1|M0) */
691 0b000010000000000000100, /* (16|M0) (sat) */
692 0b000000000000000010011, /* (8|M8) */
693 0b001100000000000000100, /* (16|M0) (gt)f0.0 */
694 0b000100000000000000100, /* (16|M0) (eq)f0.0 */
695 0b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */
696 0b001000000000000000100, /* (16|M0) (ne)f0.0 */
697 0b000000000000100000100, /* (f0.0) (16|M0) */
698 0b010100000000000000011, /* (8|M0) (lt)f0.0 */
699 0b000000000000110000100, /* (f1.0) (16|M0) */
700 0b000000010000000000001, /* (W) (2|M0) */
701 0b000000000000101000100, /* (f0.1) (16|M0) */
702 0b000000000000111000100, /* (f1.1) (16|M0) */
703 0b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */
704 0b000000000000000100011, /* (8|M16) */
705 0b000000000000000110011, /* (8|M24) */
706 0b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */
707 0b010000000000000000011, /* (8|M0) (ge)f0.0 */
708 0b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */
709 0b000010000000000000011, /* (8|M0) (sat) */
710 0b010100000000010000100, /* (16|M0) (lt)f1.0 */
711 0b000100000000000000011, /* (8|M0) (eq)f0.0 */
712 0b000001000000000000011, /* (8|M0) {AccWrEn} */
713 0b000000010000000100100, /* (W) (16|M16) */
714 };
715
716 static const uint32_t gfx12_datatype_table[32] = {
717 0b11010110100101010100, /* grf<1>:f grf:f grf:f */
718 0b00000110100101010100, /* grf<1>:f grf:f arf:ub */
719 0b00000010101101010100, /* grf<1>:f imm:f arf:ub */
720 0b01010110110101010100, /* grf<1>:f grf:f imm:f */
721 0b11010100100101010100, /* arf<1>:f grf:f grf:f */
722 0b11010010100101010100, /* grf<1>:f arf:f grf:f */
723 0b01010100110101010100, /* arf<1>:f grf:f imm:f */
724 0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725 0b11010000100101010100, /* arf<1>:f arf:f grf:f */
726 0b00101110110011001100, /* grf<1>:d grf:d imm:w */
727 0b10110110100011001100, /* grf<1>:d grf:d grf:d */
728 0b01010010110101010100, /* grf<1>:f arf:f imm:f */
729 0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730 0b01010000110101010100, /* arf<1>:f arf:f imm:f */
731 0b00110110110011001100, /* grf<1>:d grf:d imm:d */
732 0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733 0b00000111000101010100, /* grf<2>:f grf:f arf:ub */
734 0b00101100110011001100, /* arf<1>:d grf:d imm:w */
735 0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736 0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737 0b00100110110000101010, /* grf<1>:w grf:uw imm:uv */
738 0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739 0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740 0b00000110100101001100, /* grf<1>:d grf:f arf:ub */
741 0b10001100100011001100, /* arf<1>:d grf:d grf:uw */
742 0b00000110100001010100, /* grf<1>:f grf:ud arf:ub */
743 0b00101110110001001100, /* grf<1>:d grf:ud imm:w */
744 0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745 0b00000110100000110100, /* grf<1>:f grf:uw arf:ub */
746 0b00000110100000010100, /* grf<1>:f grf:ub arf:ub */
747 0b00000110100011010100, /* grf<1>:f grf:d arf:ub */
748 0b00000010100101010100, /* grf<1>:f arf:f arf:ub */
749 };
750
751 static const uint16_t gfx12_subreg_table[32] = {
752 0b000000000000000, /* .0 .0 .0 */
753 0b100000000000000, /* .0 .0 .16 */
754 0b001000000000000, /* .0 .0 .4 */
755 0b011000000000000, /* .0 .0 .12 */
756 0b000000010000000, /* .0 .4 .0 */
757 0b010000000000000, /* .0 .0 .8 */
758 0b101000000000000, /* .0 .0 .20 */
759 0b000000000001000, /* .8 .0 .0 */
760 0b000000100000000, /* .0 .8 .0 */
761 0b110000000000000, /* .0 .0 .24 */
762 0b111000000000000, /* .0 .0 .28 */
763 0b000001000000000, /* .0 .16 .0 */
764 0b000000000000100, /* .4 .0 .0 */
765 0b000001100000000, /* .0 .24 .0 */
766 0b000001010000000, /* .0 .20 .0 */
767 0b000000110000000, /* .0 .12 .0 */
768 0b000001110000000, /* .0 .28 .0 */
769 0b000000000011100, /* .28 .0 .0 */
770 0b000000000010000, /* .16 .0 .0 */
771 0b000000000001100, /* .12 .0 .0 */
772 0b000000000011000, /* .24 .0 .0 */
773 0b000000000010100, /* .20 .0 .0 */
774 0b000000000000010, /* .2 .0 .0 */
775 0b000000101000000, /* .0 .10 .0 */
776 0b000000001000000, /* .0 .2 .0 */
777 0b000000010000100, /* .4 .4 .0 */
778 0b000000001011100, /* .28 .2 .0 */
779 0b000000001000010, /* .2 .2 .0 */
780 0b000000110001100, /* .12 .12 .0 */
781 0b000000000100000, /* .0 .1 .0 */
782 0b000000001100000, /* .0 .3 .0 */
783 0b110001100000000, /* .0 .24 .24 */
784 };
785
786 static const uint16_t gfx12_src0_index_table[16] = {
787 0b010001100100, /* r<8;8,1> */
788 0b000000000000, /* r<0;1,0> */
789 0b010001100110, /* -r<8;8,1> */
790 0b010001100101, /* (abs)r<8;8,1> */
791 0b000000000010, /* -r<0;1,0> */
792 0b001000000000, /* r<2;1,0> */
793 0b001001000000, /* r<2;4,0> */
794 0b001101000000, /* r<4;4,0> */
795 0b001000100100, /* r<2;2,1> */
796 0b001100000000, /* r<4;1,0> */
797 0b001000100110, /* -r<2;2,1> */
798 0b001101000100, /* r<4;4,1> */
799 0b010001100111, /* -(abs)r<8;8,1> */
800 0b000100000000, /* r<1;1,0> */
801 0b000000000001, /* (abs)r<0;1,0> */
802 0b111100010000, /* r[a]<1,0> */
803 };
804
805 static const uint16_t gfx12_src1_index_table[16] = {
806 0b000100011001, /* r<8;8,1> */
807 0b000000000000, /* r<0;1,0> */
808 0b100100011001, /* -r<8;8,1> */
809 0b100000000000, /* -r<0;1,0> */
810 0b010100011001, /* (abs)r<8;8,1> */
811 0b100011010000, /* -r<4;4,0> */
812 0b000010000000, /* r<2;1,0> */
813 0b000010001001, /* r<2;2,1> */
814 0b100010001001, /* -r<2;2,1> */
815 0b000011010000, /* r<4;4,0> */
816 0b000011010001, /* r<4;4,1> */
817 0b000011000000, /* r<4;1,0> */
818 0b110100011001, /* -(abs)r<8;8,1> */
819 0b010000000000, /* (abs)r<0;1,0> */
820 0b110000000000, /* -(abs)r<0;1,0> */
821 0b100011010001, /* -r<4;4,1> */
822 };
823
824 static const uint16_t xehp_src0_index_table[16] = {
825 0b000100000000, /* r<1;1,0> */
826 0b000000000000, /* r<0;1,0> */
827 0b000100000010, /* -r<1;1,0> */
828 0b000100000001, /* (abs)r<1;1,0> */
829 0b000000000010, /* -r<0;1,0> */
830 0b001000000000, /* r<2;1,0> */
831 0b001001000000, /* r<2;4,0> */
832 0b001101000000, /* r<4;4,0> */
833 0b001100000000, /* r<4;1,0> */
834 0b000100000011, /* -(abs)r<1;1,0> */
835 0b000000000001, /* (abs)r<0;1,0> */
836 0b111100010000, /* r[a]<1,0> */
837 0b010001100000, /* r<8;8,0> */
838 0b000101000000, /* r<1;4,0> */
839 0b010001001000, /* r<8;4,2> */
840 0b001000000010, /* -r<2;1,0> */
841 };
842
843 static const uint16_t xehp_src1_index_table[16] = {
844 0b000001000000, /* r<1;1,0> */
845 0b000000000000, /* r<0;1,0> */
846 0b100001000000, /* -r<1;1,0> */
847 0b100000000000, /* -r<0;1,0> */
848 0b010001000000, /* (abs)r<1;1,0> */
849 0b100011010000, /* -r<4;4,0> */
850 0b000010000000, /* r<2;1,0> */
851 0b000011010000, /* r<4;4,0> */
852 0b000011000000, /* r<4;1,0> */
853 0b110001000000, /* -(abs)r<1;1,0> */
854 0b010000000000, /* (abs)r<0;1,0> */
855 0b110000000000, /* -(abs)r<0;1,0> */
856 0b000100011000, /* r<8;8,0> */
857 0b100010000000, /* -r<2;1,0> */
858 0b100000001001, /* -r<0;2,1> */
859 0b100001000100, /* -r[a]<1;1,0> */
860 };
861
862 /* This is actually the control index table for Cherryview (26 bits), but the
863 * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
864 * the start.
865 *
866 * The low 24 bits have the same mappings on both hardware.
867 */
868 static const uint32_t gfx8_3src_control_index_table[4] = {
869 0b00100000000110000000000001,
870 0b00000000000110000000000001,
871 0b00000000001000000000000001,
872 0b00000000001000000000100001,
873 };
874
875 /* This is actually the control index table for Cherryview (49 bits), but the
876 * only difference from Broadwell (46 bits) is that it has three extra 0-bits
877 * at the start.
878 *
879 * The low 44 bits have the same mappings on both hardware, and since the high
880 * three bits on Broadwell are zero, we can reuse Cherryview's table.
881 */
882 static const uint64_t gfx8_3src_source_index_table[4] = {
883 0b0000001110010011100100111001000001111000000000000,
884 0b0000001110010011100100111001000001111000000000010,
885 0b0000001110010011100100111001000001111000000001000,
886 0b0000001110010011100100111001000001111000000100000,
887 };
888
889 static const uint64_t gfx12_3src_control_index_table[32] = {
890 0b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
891 0b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
892 0b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
893 0b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
894 0b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
895 0b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
896 0b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
897 0b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
898 0b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
899 0b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
900 0b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
901 0b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
902 0b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
903 0b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
904 0b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
905 0b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
906 0b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
907 0b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
908 0b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
909 0b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
910 0b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
911 0b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
912 0b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */
913 0b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */
914 0b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
915 0b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */
916 0b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */
917 0b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */
918 0b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */
919 0b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */
920 0b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */
921 0b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */
922 };
923
924 static const uint64_t xehp_3src_control_index_table[32] = {
925 0b0000010010100010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */
926 0b0000010010100010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */
927 0b0000010010000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */
928 0b0000010010100010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */
929 0b0000010010000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */
930 0b0000010010000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */
931 0b0000010010100010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */
932 0b0000010010000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */
933 0b0000010010100010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */
934 0b0000010010100010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */
935 0b0000010010000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */
936 0b0000010010100010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
937 0b0000010010100010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */
938 0b0000010010000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */
939 0b0000010010100010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */
940 0b0000010010100010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */
941 0b0000010010000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */
942 0b0000010010000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */
943 0b0000010010100010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */
944 0b0000010010100010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */
945 0b0000010010000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */
946 0b0000010010000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */
947 0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b */
948 0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub {Atomic} */
949 0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b {Atomic} */
950 0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub {Atomic} */
951 0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b */
952 0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub */
953 0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b {Atomic} */
954 0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub */
955 0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf {Atomic} */
956 0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */
957 };
958
959 static const uint32_t gfx12_3src_source_index_table[32] = {
960 0b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */
961 0b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */
962 0b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */
963 0b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */
964 0b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */
965 0b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */
966 0b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */
967 0b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */
968 0b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */
969 0b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */
970 0b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */
971 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
972 0b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */
973 0b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */
974 0b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */
975 0b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */
976 0b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */
977 0b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */
978 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */
979 0b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */
980 0b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */
981 0b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */
982 0b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */
983 0b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */
984 0b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */
985 0b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */
986 0b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */
987 0b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */
988 0b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */
989 0b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */
990 0b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */
991 0b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */
992 };
993
994 static const uint32_t xehp_3src_source_index_table[32] = {
995 0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */
996 0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */
997 0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */
998 0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */
999 0b101100000000100000001, /* grf<1;0> grf<0;0> grf<1> */
1000 0b101100000001100001001, /* -grf<1;0> grf<1;0> grf<1> */
1001 0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */
1002 0b101100000001100000000, /* grf<0;0> grf<1;0> grf<1> */
1003 0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */
1004 0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
1005 0b101100010001100000001, /* grf<1;0> -grf<1;0> grf<1> */
1006 0b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */
1007 0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */
1008 0b100100010001100000000, /* grf<0;0> -grf<1;0> grf<0> */
1009 0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
1010 0b100100000001100001001, /* -grf<1;0> grf<1;0> grf<0> */
1011 0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */
1012 0b100100000001100001000, /* -grf<0;0> grf<1;0> grf<0> */
1013 0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>
1014 * dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
1015 * dpas.*x1 grf:f grf:bf grf:bf
1016 */
1017 0b101100010001100000000, /* grf<0;0> -grf<1;0> grf<1> */
1018 0b100100000101100000000, /* grf<0;0> grf<1;0> -grf<0> */
1019 0b101000000001100000000, /* grf<0;0> arf<1;0> grf<1> */
1020 0b100100000101100000001, /* grf<1;0> grf<1;0> -grf<0> */
1021 0b101100000101100001001, /* -grf<1;0> grf<1;0> -grf<1> */
1022 0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
1023 0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
1024 0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
1025 0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
1026 0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
1027 0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
1028 0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
1029 0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
1030 };
1031
1032 static const uint32_t gfx12_3src_subreg_table[32] = {
1033 0b00000000000000000000, /* .0 .0 .0 .0 */
1034 0b00100000000000000000, /* .0 .0 .0 .4 */
1035 0b00000000000110000000, /* .0 .12 .0 .0 */
1036 0b10100000000000000000, /* .0 .0 .0 .20 */
1037 0b10000000001110000000, /* .0 .28 .0 .16 */
1038 0b01100000000000000000, /* .0 .0 .0 .12 */
1039 0b01000000000000000000, /* .0 .0 .0 .8 */
1040 0b00000010000000000000, /* .0 .0 .8 .0 */
1041 0b00000001000000000000, /* .0 .0 .4 .0 */
1042 0b11000000000000000000, /* .0 .0 .0 .24 */
1043 0b10000000000000000000, /* .0 .0 .0 .16 */
1044 0b11100000000000000000, /* .0 .0 .0 .28 */
1045 0b00000110000000000000, /* .0 .0 .24 .0 */
1046 0b00000000000010000000, /* .0 .4 .0 .0 */
1047 0b00000100000000000000, /* .0 .0 .16 .0 */
1048 0b00000011000000000000, /* .0 .0 .12 .0 */
1049 0b00000101000000000000, /* .0 .0 .20 .0 */
1050 0b00000111000000000000, /* .0 .0 .28 .0 */
1051 0b00000000000100000000, /* .0 .8 .0 .0 */
1052 0b00000000001000000000, /* .0 .16 .0 .0 */
1053 0b00000000001100000000, /* .0 .24 .0 .0 */
1054 0b00000000001010000000, /* .0 .20 .0 .0 */
1055 0b00000000001110000000, /* .0 .28 .0 .0 */
1056 0b11000000001110000000, /* .0 .28 .0 .24 */
1057 0b00100000000100000000, /* .0 .8 .0 .4 */
1058 0b00100000000110000000, /* .0 .12 .0 .4 */
1059 0b01000000000110000000, /* .0 .12 .0 .8 */
1060 0b10000000001100000000, /* .0 .24 .0 .16 */
1061 0b10000000001010000000, /* .0 .20 .0 .16 */
1062 0b01100000000010000000, /* .0 .4 .0 .12 */
1063 0b10100000001110000000, /* .0 .28 .0 .20 */
1064 0b01000000000010000000, /* .0 .4 .0 .8 */
1065 };
1066
1067 struct compaction_state {
1068 const struct brw_isa_info *isa;
1069 const uint32_t *control_index_table;
1070 const uint32_t *datatype_table;
1071 const uint16_t *subreg_table;
1072 const uint16_t *src0_index_table;
1073 const uint16_t *src1_index_table;
1074 };
1075
1076 static void compaction_state_init(struct compaction_state *c,
1077 const struct brw_isa_info *isa);
1078
1079 static bool
set_control_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1080 set_control_index(const struct compaction_state *c,
1081 brw_compact_inst *dst, const brw_inst *src)
1082 {
1083 const struct intel_device_info *devinfo = c->isa->devinfo;
1084 uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1085
1086 if (devinfo->ver >= 12) {
1087 uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /* 4b */
1088 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1089 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1090 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1091 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1092 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1093 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1094 (brw_inst_bits(src, 23, 22) << 6) | /* 2b */
1095 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1096 (brw_inst_bits(src, 18, 16)); /* 3b */
1097 } else if (devinfo->ver >= 8) {
1098 uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /* 3b */
1099 (brw_inst_bits(src, 23, 12) << 4) | /* 12b */
1100 (brw_inst_bits(src, 10, 9) << 2) | /* 2b */
1101 (brw_inst_bits(src, 34, 34) << 1) | /* 1b */
1102 (brw_inst_bits(src, 8, 8)); /* 1b */
1103 } else {
1104 uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /* 1b */
1105 (brw_inst_bits(src, 23, 8)); /* 16b */
1106
1107 /* On gfx7, the flag register and subregister numbers are integrated into
1108 * the control index.
1109 */
1110 if (devinfo->ver == 7)
1111 uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1112 }
1113
1114 for (int i = 0; i < 32; i++) {
1115 if (c->control_index_table[i] == uncompacted) {
1116 brw_compact_inst_set_control_index(devinfo, dst, i);
1117 return true;
1118 }
1119 }
1120
1121 return false;
1122 }
1123
1124 static bool
set_datatype_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1125 set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1126 const brw_inst *src, bool is_immediate)
1127 {
1128 const struct intel_device_info *devinfo = c->isa->devinfo;
1129 uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1130
1131 if (devinfo->ver >= 12) {
1132 uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /* 4b */
1133 (brw_inst_bits(src, 66, 66) << 14) | /* 1b */
1134 (brw_inst_bits(src, 50, 50) << 13) | /* 1b */
1135 (brw_inst_bits(src, 49, 48) << 11) | /* 2b */
1136 (brw_inst_bits(src, 47, 47) << 10) | /* 1b */
1137 (brw_inst_bits(src, 46, 46) << 9) | /* 1b */
1138 (brw_inst_bits(src, 43, 40) << 5) | /* 4b */
1139 (brw_inst_bits(src, 39, 36) << 1) | /* 4b */
1140 (brw_inst_bits(src, 35, 35)); /* 1b */
1141
1142 /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1143 * is present
1144 */
1145 if (!is_immediate) {
1146 uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1147 }
1148 } else if (devinfo->ver >= 8) {
1149 uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /* 3b */
1150 (brw_inst_bits(src, 94, 89) << 12) | /* 6b */
1151 (brw_inst_bits(src, 46, 35)); /* 12b */
1152 } else {
1153 uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /* 3b */
1154 (brw_inst_bits(src, 46, 32)); /* 15b */
1155 }
1156
1157 for (int i = 0; i < 32; i++) {
1158 if (c->datatype_table[i] == uncompacted) {
1159 brw_compact_inst_set_datatype_index(devinfo, dst, i);
1160 return true;
1161 }
1162 }
1163
1164 return false;
1165 }
1166
1167 static bool
set_subreg_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1168 set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1169 const brw_inst *src, bool is_immediate)
1170 {
1171 const struct intel_device_info *devinfo = c->isa->devinfo;
1172 uint16_t uncompacted; /* 15b */
1173
1174 if (devinfo->ver >= 12) {
1175 uncompacted = (brw_inst_bits(src, 55, 51) << 0) | /* 5b */
1176 (brw_inst_bits(src, 71, 67) << 5); /* 5b */
1177
1178 if (!is_immediate)
1179 uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1180 } else {
1181 uncompacted = (brw_inst_bits(src, 52, 48) << 0) | /* 5b */
1182 (brw_inst_bits(src, 68, 64) << 5); /* 5b */
1183
1184 if (!is_immediate)
1185 uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1186 }
1187
1188 for (int i = 0; i < 32; i++) {
1189 if (c->subreg_table[i] == uncompacted) {
1190 brw_compact_inst_set_subreg_index(devinfo, dst, i);
1191 return true;
1192 }
1193 }
1194
1195 return false;
1196 }
1197
1198 static bool
set_src0_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1199 set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1200 const brw_inst *src)
1201 {
1202 const struct intel_device_info *devinfo = c->isa->devinfo;
1203 uint16_t uncompacted; /* 12b */
1204 int table_len;
1205
1206 if (devinfo->ver >= 12) {
1207 table_len = ARRAY_SIZE(gfx12_src0_index_table);
1208 uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /* 4b */
1209 (brw_inst_bits(src, 83, 81) << 5) | /* 3b */
1210 (brw_inst_bits(src, 80, 80) << 4) | /* 1b */
1211 (brw_inst_bits(src, 65, 64) << 2) | /* 2b */
1212 (brw_inst_bits(src, 45, 44)); /* 2b */
1213 } else {
1214 table_len = ARRAY_SIZE(gfx8_src_index_table);
1215 uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
1216 }
1217
1218 for (int i = 0; i < table_len; i++) {
1219 if (c->src0_index_table[i] == uncompacted) {
1220 brw_compact_inst_set_src0_index(devinfo, dst, i);
1221 return true;
1222 }
1223 }
1224
1225 return false;
1226 }
1227
1228 static bool
set_src1_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate,unsigned imm)1229 set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1230 const brw_inst *src, bool is_immediate, unsigned imm)
1231 {
1232 const struct intel_device_info *devinfo = c->isa->devinfo;
1233 if (is_immediate) {
1234 if (devinfo->ver >= 12) {
1235 /* src1 index takes the low 4 bits of the 12-bit compacted value */
1236 brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1237 } else {
1238 /* src1 index takes the high 5 bits of the 13-bit compacted value */
1239 brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1240 }
1241 return true;
1242 } else {
1243 uint16_t uncompacted; /* 12b */
1244 int table_len;
1245
1246 if (devinfo->ver >= 12) {
1247 table_len = ARRAY_SIZE(gfx12_src0_index_table);
1248 uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /* 2b */
1249 (brw_inst_bits(src, 119, 116) << 6) | /* 4b */
1250 (brw_inst_bits(src, 115, 113) << 3) | /* 3b */
1251 (brw_inst_bits(src, 112, 112) << 2) | /* 1b */
1252 (brw_inst_bits(src, 97, 96)); /* 2b */
1253 } else {
1254 table_len = ARRAY_SIZE(gfx8_src_index_table);
1255 uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
1256 }
1257
1258 for (int i = 0; i < table_len; i++) {
1259 if (c->src1_index_table[i] == uncompacted) {
1260 brw_compact_inst_set_src1_index(devinfo, dst, i);
1261 return true;
1262 }
1263 }
1264 }
1265
1266 return false;
1267 }
1268
1269 static bool
set_3src_control_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1270 set_3src_control_index(const struct intel_device_info *devinfo,
1271 brw_compact_inst *dst, const brw_inst *src)
1272 {
1273 assert(devinfo->ver >= 8);
1274
1275 if (devinfo->verx10 >= 125) {
1276 uint64_t uncompacted = /* 37b/XeHP+ */
1277 (brw_inst_bits(src, 95, 92) << 33) | /* 4b */
1278 (brw_inst_bits(src, 90, 88) << 30) | /* 3b */
1279 (brw_inst_bits(src, 82, 80) << 27) | /* 3b */
1280 (brw_inst_bits(src, 50, 50) << 26) | /* 1b */
1281 (brw_inst_bits(src, 49, 48) << 24) | /* 2b */
1282 (brw_inst_bits(src, 42, 40) << 21) | /* 3b */
1283 (brw_inst_bits(src, 39, 39) << 20) | /* 1b */
1284 (brw_inst_bits(src, 38, 36) << 17) | /* 3b */
1285 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1286 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1287 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1288 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1289 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1290 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1291 (brw_inst_bits(src, 23, 23) << 7) | /* 1b */
1292 (brw_inst_bits(src, 22, 22) << 6) | /* 1b */
1293 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1294 (brw_inst_bits(src, 18, 16)); /* 3b */
1295
1296 for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1297 if (xehp_3src_control_index_table[i] == uncompacted) {
1298 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1299 return true;
1300 }
1301 }
1302 } else if (devinfo->ver >= 12) {
1303 uint64_t uncompacted = /* 36b/TGL+ */
1304 (brw_inst_bits(src, 95, 92) << 32) | /* 4b */
1305 (brw_inst_bits(src, 90, 88) << 29) | /* 3b */
1306 (brw_inst_bits(src, 82, 80) << 26) | /* 3b */
1307 (brw_inst_bits(src, 50, 50) << 25) | /* 1b */
1308 (brw_inst_bits(src, 48, 48) << 24) | /* 1b */
1309 (brw_inst_bits(src, 42, 40) << 21) | /* 3b */
1310 (brw_inst_bits(src, 39, 39) << 20) | /* 1b */
1311 (brw_inst_bits(src, 38, 36) << 17) | /* 3b */
1312 (brw_inst_bits(src, 34, 34) << 16) | /* 1b */
1313 (brw_inst_bits(src, 33, 33) << 15) | /* 1b */
1314 (brw_inst_bits(src, 32, 32) << 14) | /* 1b */
1315 (brw_inst_bits(src, 31, 31) << 13) | /* 1b */
1316 (brw_inst_bits(src, 28, 28) << 12) | /* 1b */
1317 (brw_inst_bits(src, 27, 24) << 8) | /* 4b */
1318 (brw_inst_bits(src, 23, 23) << 7) | /* 1b */
1319 (brw_inst_bits(src, 22, 22) << 6) | /* 1b */
1320 (brw_inst_bits(src, 21, 19) << 3) | /* 3b */
1321 (brw_inst_bits(src, 18, 16)); /* 3b */
1322
1323 for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1324 if (gfx12_3src_control_index_table[i] == uncompacted) {
1325 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1326 return true;
1327 }
1328 }
1329 } else {
1330 uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1331 (brw_inst_bits(src, 34, 32) << 21) | /* 3b */
1332 (brw_inst_bits(src, 28, 8)); /* 21b */
1333
1334 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1335 uncompacted |=
1336 brw_inst_bits(src, 36, 35) << 24; /* 2b */
1337 }
1338
1339 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1340 if (gfx8_3src_control_index_table[i] == uncompacted) {
1341 brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1342 return true;
1343 }
1344 }
1345 }
1346
1347 return false;
1348 }
1349
1350 static bool
set_3src_source_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1351 set_3src_source_index(const struct intel_device_info *devinfo,
1352 brw_compact_inst *dst, const brw_inst *src)
1353 {
1354 assert(devinfo->ver >= 8);
1355
1356 if (devinfo->ver >= 12) {
1357 uint32_t uncompacted = /* 21b/TGL+ */
1358 (brw_inst_bits(src, 114, 114) << 20) | /* 1b */
1359 (brw_inst_bits(src, 113, 112) << 18) | /* 2b */
1360 (brw_inst_bits(src, 98, 98) << 17) | /* 1b */
1361 (brw_inst_bits(src, 97, 96) << 15) | /* 2b */
1362 (brw_inst_bits(src, 91, 91) << 14) | /* 1b */
1363 (brw_inst_bits(src, 87, 86) << 12) | /* 2b */
1364 (brw_inst_bits(src, 85, 84) << 10) | /* 2b */
1365 (brw_inst_bits(src, 83, 83) << 9) | /* 1b */
1366 (brw_inst_bits(src, 66, 66) << 8) | /* 1b */
1367 (brw_inst_bits(src, 65, 64) << 6) | /* 2b */
1368 (brw_inst_bits(src, 47, 47) << 5) | /* 1b */
1369 (brw_inst_bits(src, 46, 46) << 4) | /* 1b */
1370 (brw_inst_bits(src, 45, 44) << 2) | /* 2b */
1371 (brw_inst_bits(src, 43, 43) << 1) | /* 1b */
1372 (brw_inst_bits(src, 35, 35)); /* 1b */
1373
1374 const uint32_t *three_src_source_index_table =
1375 devinfo->verx10 >= 125 ?
1376 xehp_3src_source_index_table : gfx12_3src_source_index_table;
1377 const uint32_t three_src_source_index_table_len =
1378 devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1379 ARRAY_SIZE(gfx12_3src_source_index_table);
1380
1381 for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1382 if (three_src_source_index_table[i] == uncompacted) {
1383 brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1384 return true;
1385 }
1386 }
1387 } else {
1388 uint64_t uncompacted = /* 46b/BDW; 49b/CHV/SKL+ */
1389 (brw_inst_bits(src, 83, 83) << 43) | /* 1b */
1390 (brw_inst_bits(src, 114, 107) << 35) | /* 8b */
1391 (brw_inst_bits(src, 93, 86) << 27) | /* 8b */
1392 (brw_inst_bits(src, 72, 65) << 19) | /* 8b */
1393 (brw_inst_bits(src, 55, 37)); /* 19b */
1394
1395 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1396 uncompacted |=
1397 (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1398 (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1399 (brw_inst_bits(src, 84, 84) << 44); /* 1b */
1400 } else {
1401 uncompacted |=
1402 (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1403 (brw_inst_bits(src, 104, 104) << 44); /* 1b */
1404 }
1405
1406 for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1407 if (gfx8_3src_source_index_table[i] == uncompacted) {
1408 brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1409 return true;
1410 }
1411 }
1412 }
1413
1414 return false;
1415 }
1416
1417 static bool
set_3src_subreg_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1418 set_3src_subreg_index(const struct intel_device_info *devinfo,
1419 brw_compact_inst *dst, const brw_inst *src)
1420 {
1421 assert(devinfo->ver >= 12);
1422
1423 uint32_t uncompacted = /* 20b/TGL+ */
1424 (brw_inst_bits(src, 119, 115) << 15) | /* 5b */
1425 (brw_inst_bits(src, 103, 99) << 10) | /* 5b */
1426 (brw_inst_bits(src, 71, 67) << 5) | /* 5b */
1427 (brw_inst_bits(src, 55, 51)); /* 5b */
1428
1429 for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
1430 if (gfx12_3src_subreg_table[i] == uncompacted) {
1431 brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1432 return true;
1433 }
1434 }
1435
1436 return false;
1437 }
1438
1439 static bool
has_unmapped_bits(const struct brw_isa_info * isa,const brw_inst * src)1440 has_unmapped_bits(const struct brw_isa_info *isa, const brw_inst *src)
1441 {
1442 const struct intel_device_info *devinfo = isa->devinfo;
1443
1444 /* EOT can only be mapped on a send if the src1 is an immediate */
1445 if ((brw_inst_opcode(isa, src) == BRW_OPCODE_SENDC ||
1446 brw_inst_opcode(isa, src) == BRW_OPCODE_SEND) &&
1447 brw_inst_eot(devinfo, src))
1448 return true;
1449
1450 /* Check for instruction bits that don't map to any of the fields of the
1451 * compacted instruction. The instruction cannot be compacted if any of
1452 * them are set. They overlap with:
1453 * - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1454 * - Dst.AddrImm[9] (bit 47 on Gfx8)
1455 * - Src0.AddrImm[9] (bit 95 on Gfx8)
1456 * - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1457 * - UIP[31] (bit 95 on Gfx8)
1458 */
1459 if (devinfo->ver >= 12) {
1460 assert(!brw_inst_bits(src, 7, 7));
1461 return false;
1462 } else if (devinfo->ver >= 8) {
1463 assert(!brw_inst_bits(src, 7, 7));
1464 return brw_inst_bits(src, 95, 95) ||
1465 brw_inst_bits(src, 47, 47) ||
1466 brw_inst_bits(src, 11, 11);
1467 } else {
1468 assert(!brw_inst_bits(src, 7, 7) &&
1469 !(devinfo->ver < 7 && brw_inst_bits(src, 90, 90)));
1470 return brw_inst_bits(src, 95, 91) ||
1471 brw_inst_bits(src, 47, 47);
1472 }
1473 }
1474
1475 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const brw_inst * src)1476 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1477 const brw_inst *src)
1478 {
1479 /* Check for three-source instruction bits that don't map to any of the
1480 * fields of the compacted instruction. All of them seem to be reserved
1481 * bits currently.
1482 */
1483 if (devinfo->ver >= 12) {
1484 assert(!brw_inst_bits(src, 7, 7));
1485 } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1486 assert(!brw_inst_bits(src, 127, 127) &&
1487 !brw_inst_bits(src, 7, 7));
1488 } else {
1489 assert(devinfo->ver >= 8);
1490 assert(!brw_inst_bits(src, 127, 126) &&
1491 !brw_inst_bits(src, 105, 105) &&
1492 !brw_inst_bits(src, 84, 84) &&
1493 !brw_inst_bits(src, 7, 7));
1494
1495 /* Src1Type and Src2Type, used for mixed-precision floating point */
1496 if (brw_inst_bits(src, 36, 35))
1497 return true;
1498 }
1499
1500 return false;
1501 }
1502
1503 static bool
brw_try_compact_3src_instruction(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1504 brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,
1505 brw_compact_inst *dst, const brw_inst *src)
1506 {
1507 assert(devinfo->ver >= 8);
1508
1509 if (has_3src_unmapped_bits(devinfo, src))
1510 return false;
1511
1512 #define compact(field) \
1513 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1514 #define compact_a16(field) \
1515 brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1516
1517 compact(hw_opcode);
1518
1519 if (!set_3src_control_index(devinfo, dst, src))
1520 return false;
1521
1522 if (!set_3src_source_index(devinfo, dst, src))
1523 return false;
1524
1525 if (devinfo->ver >= 12) {
1526 if (!set_3src_subreg_index(devinfo, dst, src))
1527 return false;
1528
1529 compact(swsb);
1530 compact(debug_control);
1531 compact(dst_reg_nr);
1532 compact(src0_reg_nr);
1533 compact(src1_reg_nr);
1534 compact(src2_reg_nr);
1535 } else {
1536 compact(dst_reg_nr);
1537 compact_a16(src0_rep_ctrl);
1538 compact(debug_control);
1539 compact(saturate);
1540 compact_a16(src1_rep_ctrl);
1541 compact_a16(src2_rep_ctrl);
1542 compact(src0_reg_nr);
1543 compact(src1_reg_nr);
1544 compact(src2_reg_nr);
1545 compact_a16(src0_subreg_nr);
1546 compact_a16(src1_subreg_nr);
1547 compact_a16(src2_subreg_nr);
1548 }
1549 brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1550
1551 #undef compact
1552 #undef compact_a16
1553
1554 return true;
1555 }
1556
1557 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1558 * sources, and a 13th bit that's replicated through the high 20 bits.
1559 *
1560 * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1561 * of packed vectors as compactable immediates.
1562 *
1563 * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1564 * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1565 * while for unsigned integers it is not.
1566 *
1567 * Returns the compacted immediate, or -1 if immediate cannot be compacted
1568 */
1569 static int
compact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned imm)1570 compact_immediate(const struct intel_device_info *devinfo,
1571 enum brw_reg_type type, unsigned imm)
1572 {
1573 if (devinfo->ver >= 12) {
1574 /* 16-bit immediates need to be replicated through the 32-bit immediate
1575 * field
1576 */
1577 switch (type) {
1578 case BRW_REGISTER_TYPE_W:
1579 case BRW_REGISTER_TYPE_UW:
1580 case BRW_REGISTER_TYPE_HF:
1581 if ((imm >> 16) != (imm & 0xffff))
1582 return -1;
1583 break;
1584 default:
1585 break;
1586 }
1587
1588 switch (type) {
1589 case BRW_REGISTER_TYPE_F:
1590 /* We get the high 12-bits as-is; rest must be zero */
1591 if ((imm & 0xfffff) == 0)
1592 return (imm >> 20) & 0xfff;
1593 break;
1594 case BRW_REGISTER_TYPE_HF:
1595 /* We get the high 12-bits as-is; rest must be zero */
1596 if ((imm & 0xf) == 0)
1597 return (imm >> 4) & 0xfff;
1598 break;
1599 case BRW_REGISTER_TYPE_UD:
1600 case BRW_REGISTER_TYPE_VF:
1601 case BRW_REGISTER_TYPE_UV:
1602 case BRW_REGISTER_TYPE_V:
1603 /* We get the low 12-bits as-is; rest must be zero */
1604 if ((imm & 0xfffff000) == 0)
1605 return imm & 0xfff;
1606 break;
1607 case BRW_REGISTER_TYPE_UW:
1608 /* We get the low 12-bits as-is; rest must be zero */
1609 if ((imm & 0xf000) == 0)
1610 return imm & 0xfff;
1611 break;
1612 case BRW_REGISTER_TYPE_D:
1613 /* We get the low 11-bits as-is; 12th is replicated */
1614 if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1615 return imm & 0xfff;
1616 break;
1617 case BRW_REGISTER_TYPE_W:
1618 /* We get the low 11-bits as-is; 12th is replicated */
1619 if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1620 return imm & 0xfff;
1621 break;
1622 case BRW_REGISTER_TYPE_NF:
1623 case BRW_REGISTER_TYPE_DF:
1624 case BRW_REGISTER_TYPE_Q:
1625 case BRW_REGISTER_TYPE_UQ:
1626 case BRW_REGISTER_TYPE_B:
1627 case BRW_REGISTER_TYPE_UB:
1628 return -1;
1629 }
1630 } else {
1631 /* We get the low 12 bits as-is; 13th is replicated */
1632 if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1633 return imm & 0x1fff;
1634 }
1635 }
1636
1637 return -1;
1638 }
1639
1640 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned compact_imm)1641 uncompact_immediate(const struct intel_device_info *devinfo,
1642 enum brw_reg_type type, unsigned compact_imm)
1643 {
1644 if (devinfo->ver >= 12) {
1645 switch (type) {
1646 case BRW_REGISTER_TYPE_F:
1647 return compact_imm << 20;
1648 case BRW_REGISTER_TYPE_HF:
1649 return (compact_imm << 20) | (compact_imm << 4);
1650 case BRW_REGISTER_TYPE_UD:
1651 case BRW_REGISTER_TYPE_VF:
1652 case BRW_REGISTER_TYPE_UV:
1653 case BRW_REGISTER_TYPE_V:
1654 return compact_imm;
1655 case BRW_REGISTER_TYPE_UW:
1656 /* Replicate */
1657 return compact_imm << 16 | compact_imm;
1658 case BRW_REGISTER_TYPE_D:
1659 /* Extend the 12th bit into the high 20 bits */
1660 return (int)(compact_imm << 20) >> 20;
1661 case BRW_REGISTER_TYPE_W:
1662 /* Extend the 12th bit into the high 4 bits and replicate */
1663 return ((int)(compact_imm << 20) >> 4) |
1664 ((unsigned short)((short)(compact_imm << 4) >> 4));
1665 case BRW_REGISTER_TYPE_NF:
1666 case BRW_REGISTER_TYPE_DF:
1667 case BRW_REGISTER_TYPE_Q:
1668 case BRW_REGISTER_TYPE_UQ:
1669 case BRW_REGISTER_TYPE_B:
1670 case BRW_REGISTER_TYPE_UB:
1671 unreachable("not reached");
1672 }
1673 } else {
1674 /* Replicate the 13th bit into the high 19 bits */
1675 return (int)(compact_imm << 19) >> 19;
1676 }
1677
1678 unreachable("not reached");
1679 }
1680
1681 static bool
has_immediate(const struct intel_device_info * devinfo,const brw_inst * inst,enum brw_reg_type * type)1682 has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1683 enum brw_reg_type *type)
1684 {
1685 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1686 *type = brw_inst_src0_type(devinfo, inst);
1687 return *type != INVALID_REG_TYPE;
1688 } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1689 *type = brw_inst_src1_type(devinfo, inst);
1690 return *type != INVALID_REG_TYPE;
1691 }
1692
1693 return false;
1694 }
1695
1696 /**
1697 * Applies some small changes to instruction types to increase chances of
1698 * compaction.
1699 */
1700 static brw_inst
precompact(const struct brw_isa_info * isa,brw_inst inst)1701 precompact(const struct brw_isa_info *isa, brw_inst inst)
1702 {
1703 const struct intel_device_info *devinfo = isa->devinfo;
1704
1705 /* In XeHP the compaction tables removed the entries for source regions
1706 * <8;8,1> giving preference to <1;1,0> as the way to indicate
1707 * sequential elements, so convert to those before compacting.
1708 */
1709 if (devinfo->verx10 >= 125) {
1710 if (brw_inst_src0_reg_file(devinfo, &inst) == BRW_GENERAL_REGISTER_FILE &&
1711 brw_inst_src0_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1712 brw_inst_src0_vstride(devinfo, &inst) == (brw_inst_src0_width(devinfo, &inst) + 1) &&
1713 brw_inst_src0_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1714 brw_inst_set_src0_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1715 brw_inst_set_src0_width(devinfo, &inst, BRW_WIDTH_1);
1716 brw_inst_set_src0_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1717 }
1718
1719 if (brw_inst_src1_reg_file(devinfo, &inst) == BRW_GENERAL_REGISTER_FILE &&
1720 brw_inst_src1_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1721 brw_inst_src1_vstride(devinfo, &inst) == (brw_inst_src1_width(devinfo, &inst) + 1) &&
1722 brw_inst_src1_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1723 brw_inst_set_src1_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1724 brw_inst_set_src1_width(devinfo, &inst, BRW_WIDTH_1);
1725 brw_inst_set_src1_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1726 }
1727 }
1728
1729 if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1730 return inst;
1731
1732 /* The Bspec's section titled "Non-present Operands" claims that if src0
1733 * is an immediate that src1's type must be the same as that of src0.
1734 *
1735 * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1736 * that do not follow this rule. E.g., from the IVB/HSW table:
1737 *
1738 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1739 * 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |
1740 *
1741 * And from the SNB table:
1742 *
1743 * DataTypeIndex 18-Bit Mapping Mapped Meaning
1744 * 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |
1745 *
1746 * Neither of these cause warnings from the simulator when used,
1747 * compacted or otherwise. In fact, all compaction mappings that have an
1748 * immediate in src0 use a:ud for src1.
1749 *
1750 * The GM45 instruction compaction tables do not contain mapped meanings
1751 * so it's not clear whether it has the restriction. We'll assume it was
1752 * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1753 *
1754 * Don't do any of this for 64-bit immediates, since the src1 fields
1755 * overlap with the immediate and setting them would overwrite the
1756 * immediate we set.
1757 */
1758 if (devinfo->ver >= 6 &&
1759 !(devinfo->platform == INTEL_PLATFORM_HSW &&
1760 brw_inst_opcode(isa, &inst) == BRW_OPCODE_DIM) &&
1761 !(devinfo->ver >= 8 &&
1762 (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1763 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1764 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1765 brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1766 }
1767
1768 /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1769 * for immediate values. Presumably the hardware engineers realized
1770 * that the only useful floating-point value that could be represented
1771 * in this format is 0.0, which can also be represented as a VF-typed
1772 * immediate, so they gave us the previously mentioned mapping on IVB+.
1773 *
1774 * Strangely, we do have a mapping for imm:f in src1, so we don't need
1775 * to do this there.
1776 *
1777 * If we see a 0.0:F, change the type to VF so that it can be compacted.
1778 *
1779 * Compaction of floating-point immediates is improved on Gfx12, thus
1780 * removing the need for this.
1781 */
1782 if (devinfo->ver < 12 &&
1783 brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1784 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1785 brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1786 brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1787 enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1788 brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1789 }
1790
1791 /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1792 * set the types to :UD so the instruction can be compacted.
1793 *
1794 * FINISHME: Use dst:f | imm:f on Gfx12
1795 */
1796 if (devinfo->ver < 12 &&
1797 compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1798 brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1799 brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1800 brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1801 brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1802 enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1803 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1804
1805 brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1806 brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1807 }
1808
1809 return inst;
1810 }
1811
1812 /**
1813 * Tries to compact instruction src into dst.
1814 *
1815 * It doesn't modify dst unless src is compactable, which is relied on by
1816 * brw_compact_instructions().
1817 */
1818 static bool
try_compact_instruction(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1819 try_compact_instruction(const struct compaction_state *c,
1820 brw_compact_inst *dst, const brw_inst *src)
1821 {
1822 const struct intel_device_info *devinfo = c->isa->devinfo;
1823 brw_compact_inst temp;
1824
1825 assert(brw_inst_cmpt_control(devinfo, src) == 0);
1826
1827 if (is_3src(c->isa, brw_inst_opcode(c->isa, src))) {
1828 if (devinfo->ver >= 8) {
1829 memset(&temp, 0, sizeof(temp));
1830 if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1831 *dst = temp;
1832 return true;
1833 } else {
1834 return false;
1835 }
1836 } else {
1837 return false;
1838 }
1839 }
1840
1841 enum brw_reg_type type;
1842 bool is_immediate = has_immediate(devinfo, src, &type);
1843
1844 unsigned compacted_imm = 0;
1845
1846 if (is_immediate) {
1847 /* Instructions with immediates cannot be compacted on Gen < 6 */
1848 if (devinfo->ver < 6)
1849 return false;
1850
1851 compacted_imm = compact_immediate(devinfo, type,
1852 brw_inst_imm_ud(devinfo, src));
1853 if (compacted_imm == -1)
1854 return false;
1855 }
1856
1857 if (has_unmapped_bits(c->isa, src))
1858 return false;
1859
1860 memset(&temp, 0, sizeof(temp));
1861
1862 #define compact(field) \
1863 brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1864 #define compact_reg(field) \
1865 brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1866 brw_inst_##field##_da_reg_nr(devinfo, src))
1867
1868 compact(hw_opcode);
1869 compact(debug_control);
1870
1871 if (!set_control_index(c, &temp, src))
1872 return false;
1873 if (!set_datatype_index(c, &temp, src, is_immediate))
1874 return false;
1875 if (!set_subreg_index(c, &temp, src, is_immediate))
1876 return false;
1877 if (!set_src0_index(c, &temp, src))
1878 return false;
1879 if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1880 return false;
1881
1882 if (devinfo->ver >= 12) {
1883 compact(swsb);
1884 compact_reg(dst);
1885 compact_reg(src0);
1886
1887 if (is_immediate) {
1888 /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1889 brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1890 } else {
1891 compact_reg(src1);
1892 }
1893 } else {
1894 if (devinfo->ver >= 6) {
1895 compact(acc_wr_control);
1896 } else {
1897 compact(mask_control_ex);
1898 }
1899
1900 if (devinfo->ver <= 6)
1901 compact(flag_subreg_nr);
1902
1903 compact(cond_modifier);
1904
1905 compact_reg(dst);
1906 compact_reg(src0);
1907
1908 if (is_immediate) {
1909 /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1910 brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1911 } else {
1912 compact_reg(src1);
1913 }
1914 }
1915 brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1916
1917 #undef compact
1918 #undef compact_reg
1919
1920 *dst = temp;
1921
1922 return true;
1923 }
1924
1925 bool
brw_try_compact_instruction(const struct brw_isa_info * isa,brw_compact_inst * dst,const brw_inst * src)1926 brw_try_compact_instruction(const struct brw_isa_info *isa,
1927 brw_compact_inst *dst, const brw_inst *src)
1928 {
1929 struct compaction_state c;
1930 compaction_state_init(&c, isa);
1931 return try_compact_instruction(&c, dst, src);
1932 }
1933
1934 static void
set_uncompacted_control(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1935 set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1936 brw_compact_inst *src)
1937 {
1938 const struct intel_device_info *devinfo = c->isa->devinfo;
1939 uint32_t uncompacted =
1940 c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1941
1942 if (devinfo->ver >= 12) {
1943 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1944 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1945 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1946 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1947 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1948 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1949 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
1950 brw_inst_set_bits(dst, 23, 22, (uncompacted >> 6) & 0x3);
1951 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
1952 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
1953 } else if (devinfo->ver >= 8) {
1954 brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1955 brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);
1956 brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);
1957 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);
1958 brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);
1959 } else {
1960 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1961 brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));
1962
1963 if (devinfo->ver == 7)
1964 brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1965 }
1966 }
1967
1968 static void
set_uncompacted_datatype(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1969 set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1970 brw_compact_inst *src)
1971 {
1972 const struct intel_device_info *devinfo = c->isa->devinfo;
1973 uint32_t uncompacted =
1974 c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1975
1976 if (devinfo->ver >= 12) {
1977 brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1978 brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1979 brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1980 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1981 brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1982 brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1983 brw_inst_set_bits(dst, 46, 46, (uncompacted >> 9) & 0x1);
1984 brw_inst_set_bits(dst, 43, 40, (uncompacted >> 5) & 0xf);
1985 brw_inst_set_bits(dst, 39, 36, (uncompacted >> 1) & 0xf);
1986 brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
1987 } else if (devinfo->ver >= 8) {
1988 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1989 brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1990 brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);
1991 } else {
1992 brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1993 brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1994 }
1995 }
1996
1997 static void
set_uncompacted_subreg(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1998 set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1999 brw_compact_inst *src)
2000 {
2001 const struct intel_device_info *devinfo = c->isa->devinfo;
2002 uint16_t uncompacted =
2003 c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
2004
2005 if (devinfo->ver >= 12) {
2006 brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
2007 brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
2008 brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
2009 } else {
2010 brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
2011 brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);
2012 brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);
2013 }
2014 }
2015
2016 static void
set_uncompacted_src0(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2017 set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
2018 brw_compact_inst *src)
2019 {
2020 const struct intel_device_info *devinfo = c->isa->devinfo;
2021 uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
2022 uint16_t uncompacted = c->src0_index_table[compacted];
2023
2024 if (devinfo->ver >= 12) {
2025 brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
2026 brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
2027 brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2028 brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2029 brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2030 } else {
2031 brw_inst_set_bits(dst, 88, 77, uncompacted);
2032 }
2033 }
2034
2035 static void
set_uncompacted_src1(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2036 set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
2037 brw_compact_inst *src)
2038 {
2039 const struct intel_device_info *devinfo = c->isa->devinfo;
2040 uint16_t uncompacted =
2041 c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
2042
2043 if (devinfo->ver >= 12) {
2044 brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2045 brw_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf);
2046 brw_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7);
2047 brw_inst_set_bits(dst, 112, 112, (uncompacted >> 2) & 0x1);
2048 brw_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);
2049 } else {
2050 brw_inst_set_bits(dst, 120, 109, uncompacted);
2051 }
2052 }
2053
2054 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2055 set_uncompacted_3src_control_index(const struct compaction_state *c,
2056 brw_inst *dst, brw_compact_inst *src)
2057 {
2058 const struct intel_device_info *devinfo = c->isa->devinfo;
2059 assert(devinfo->ver >= 8);
2060
2061 if (devinfo->verx10 >= 125) {
2062 uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2063 uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2064
2065 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2066 brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2067 brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2068 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2069 brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2070 brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2071 brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2072 brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2073 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2074 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2075 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2076 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2077 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2078 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2079 brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
2080 brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
2081 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2082 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2083
2084 } else if (devinfo->ver >= 12) {
2085 uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2086 uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2087
2088 brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2089 brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2090 brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2091 brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2092 brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2093 brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2094 brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2095 brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2096 brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2097 brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2098 brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2099 brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2100 brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2101 brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);
2102 brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);
2103 brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);
2104 brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);
2105 brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);
2106 } else {
2107 uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2108 uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2109
2110 brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2111 brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);
2112
2113 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV)
2114 brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2115 }
2116 }
2117
2118 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2119 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2120 brw_inst *dst, brw_compact_inst *src)
2121 {
2122 assert(devinfo->ver >= 8);
2123
2124 uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2125
2126 if (devinfo->ver >= 12) {
2127 const uint32_t *three_src_source_index_table =
2128 devinfo->verx10 >= 125 ?
2129 xehp_3src_source_index_table : gfx12_3src_source_index_table;
2130 uint32_t uncompacted = three_src_source_index_table[compacted];
2131
2132 brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2133 brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2134 brw_inst_set_bits(dst, 98, 98, (uncompacted >> 17) & 0x1);
2135 brw_inst_set_bits(dst, 97, 96, (uncompacted >> 15) & 0x3);
2136 brw_inst_set_bits(dst, 91, 91, (uncompacted >> 14) & 0x1);
2137 brw_inst_set_bits(dst, 87, 86, (uncompacted >> 12) & 0x3);
2138 brw_inst_set_bits(dst, 85, 84, (uncompacted >> 10) & 0x3);
2139 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 9) & 0x1);
2140 brw_inst_set_bits(dst, 66, 66, (uncompacted >> 8) & 0x1);
2141 brw_inst_set_bits(dst, 65, 64, (uncompacted >> 6) & 0x3);
2142 brw_inst_set_bits(dst, 47, 47, (uncompacted >> 5) & 0x1);
2143 brw_inst_set_bits(dst, 46, 46, (uncompacted >> 4) & 0x1);
2144 brw_inst_set_bits(dst, 45, 44, (uncompacted >> 2) & 0x3);
2145 brw_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1);
2146 brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);
2147 } else {
2148 uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2149
2150 brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);
2151 brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2152 brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);
2153 brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);
2154 brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);
2155
2156 if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
2157 brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2158 brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2159 brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);
2160 } else {
2161 brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2162 brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2163 }
2164 }
2165 }
2166
2167 static void
set_uncompacted_3src_subreg_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2168 set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2169 brw_inst *dst, brw_compact_inst *src)
2170 {
2171 assert(devinfo->ver >= 12);
2172
2173 uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2174 uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
2175
2176 brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2177 brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f);
2178 brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);
2179 brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);
2180 }
2181
2182 static void
brw_uncompact_3src_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2183 brw_uncompact_3src_instruction(const struct compaction_state *c,
2184 brw_inst *dst, brw_compact_inst *src)
2185 {
2186 const struct intel_device_info *devinfo = c->isa->devinfo;
2187 assert(devinfo->ver >= 8);
2188
2189 #define uncompact(field) \
2190 brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2191 #define uncompact_a16(field) \
2192 brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2193
2194 uncompact(hw_opcode);
2195
2196 if (devinfo->ver >= 12) {
2197 set_uncompacted_3src_control_index(c, dst, src);
2198 set_uncompacted_3src_source_index(devinfo, dst, src);
2199 set_uncompacted_3src_subreg_index(devinfo, dst, src);
2200
2201 uncompact(debug_control);
2202 uncompact(swsb);
2203 uncompact(dst_reg_nr);
2204 uncompact(src0_reg_nr);
2205 uncompact(src1_reg_nr);
2206 uncompact(src2_reg_nr);
2207 } else {
2208 set_uncompacted_3src_control_index(c, dst, src);
2209 set_uncompacted_3src_source_index(devinfo, dst, src);
2210
2211 uncompact(dst_reg_nr);
2212 uncompact_a16(src0_rep_ctrl);
2213 uncompact(debug_control);
2214 uncompact(saturate);
2215 uncompact_a16(src1_rep_ctrl);
2216 uncompact_a16(src2_rep_ctrl);
2217 uncompact(src0_reg_nr);
2218 uncompact(src1_reg_nr);
2219 uncompact(src2_reg_nr);
2220 uncompact_a16(src0_subreg_nr);
2221 uncompact_a16(src1_subreg_nr);
2222 uncompact_a16(src2_subreg_nr);
2223 }
2224 brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2225
2226 #undef uncompact
2227 #undef uncompact_a16
2228 }
2229
2230 static void
uncompact_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2231 uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2232 brw_compact_inst *src)
2233 {
2234 const struct intel_device_info *devinfo = c->isa->devinfo;
2235 memset(dst, 0, sizeof(*dst));
2236
2237 if (devinfo->ver >= 8 &&
2238 is_3src(c->isa, brw_opcode_decode(c->isa,
2239 brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2240 brw_uncompact_3src_instruction(c, dst, src);
2241 return;
2242 }
2243
2244 #define uncompact(field) \
2245 brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2246 #define uncompact_reg(field) \
2247 brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2248 brw_compact_inst_##field##_reg_nr(devinfo, src))
2249
2250 uncompact(hw_opcode);
2251 uncompact(debug_control);
2252
2253 set_uncompacted_control(c, dst, src);
2254 set_uncompacted_datatype(c, dst, src);
2255 set_uncompacted_subreg(c, dst, src);
2256 set_uncompacted_src0(c, dst, src);
2257
2258 enum brw_reg_type type;
2259 if (has_immediate(devinfo, dst, &type)) {
2260 unsigned imm = uncompact_immediate(devinfo, type,
2261 brw_compact_inst_imm(devinfo, src));
2262 brw_inst_set_imm_ud(devinfo, dst, imm);
2263 } else {
2264 set_uncompacted_src1(c, dst, src);
2265 uncompact_reg(src1);
2266 }
2267
2268 if (devinfo->ver >= 12) {
2269 uncompact(swsb);
2270 uncompact_reg(dst);
2271 uncompact_reg(src0);
2272 } else {
2273 if (devinfo->ver >= 6) {
2274 uncompact(acc_wr_control);
2275 } else {
2276 uncompact(mask_control_ex);
2277 }
2278
2279 uncompact(cond_modifier);
2280
2281 if (devinfo->ver <= 6)
2282 uncompact(flag_subreg_nr);
2283
2284 uncompact_reg(dst);
2285 uncompact_reg(src0);
2286 }
2287 brw_inst_set_cmpt_control(devinfo, dst, false);
2288
2289 #undef uncompact
2290 #undef uncompact_reg
2291 }
2292
2293 void
brw_uncompact_instruction(const struct brw_isa_info * isa,brw_inst * dst,brw_compact_inst * src)2294 brw_uncompact_instruction(const struct brw_isa_info *isa,
2295 brw_inst *dst, brw_compact_inst *src)
2296 {
2297 struct compaction_state c;
2298 compaction_state_init(&c, isa);
2299 uncompact_instruction(&c, dst, src);
2300 }
2301
2302 void
brw_debug_compact_uncompact(const struct brw_isa_info * isa,brw_inst * orig,brw_inst * uncompacted)2303 brw_debug_compact_uncompact(const struct brw_isa_info *isa,
2304 brw_inst *orig,
2305 brw_inst *uncompacted)
2306 {
2307 fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2308 isa->devinfo->ver);
2309
2310 fprintf(stderr, " before: ");
2311 brw_disassemble_inst(stderr, isa, orig, true, 0, NULL);
2312
2313 fprintf(stderr, " after: ");
2314 brw_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
2315
2316 uint32_t *before_bits = (uint32_t *)orig;
2317 uint32_t *after_bits = (uint32_t *)uncompacted;
2318 fprintf(stderr, " changed bits:\n");
2319 for (int i = 0; i < 128; i++) {
2320 uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2321 uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2322
2323 if (before != after) {
2324 fprintf(stderr, " bit %d, %s to %s\n", i,
2325 before ? "set" : "unset",
2326 after ? "set" : "unset");
2327 }
2328 }
2329 }
2330
2331 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2332 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2333 {
2334 int this_compacted_count = compacted_counts[old_ip];
2335 int target_compacted_count = compacted_counts[old_target_ip];
2336 return target_compacted_count - this_compacted_count;
2337 }
2338
2339 static void
update_uip_jip(const struct brw_isa_info * isa,brw_inst * insn,int this_old_ip,int * compacted_counts)2340 update_uip_jip(const struct brw_isa_info *isa, brw_inst *insn,
2341 int this_old_ip, int *compacted_counts)
2342 {
2343 const struct intel_device_info *devinfo = isa->devinfo;
2344
2345 /* JIP and UIP are in units of:
2346 * - bytes on Gfx8+; and
2347 * - compacted instructions on Gfx6+.
2348 */
2349 int shift = devinfo->ver >= 8 ? 3 : 0;
2350
2351 int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2352 jip_compacted -= compacted_between(this_old_ip,
2353 this_old_ip + (jip_compacted / 2),
2354 compacted_counts);
2355 brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2356
2357 if (brw_inst_opcode(isa, insn) == BRW_OPCODE_ENDIF ||
2358 brw_inst_opcode(isa, insn) == BRW_OPCODE_WHILE ||
2359 (brw_inst_opcode(isa, insn) == BRW_OPCODE_ELSE && devinfo->ver <= 7))
2360 return;
2361
2362 int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2363 uip_compacted -= compacted_between(this_old_ip,
2364 this_old_ip + (uip_compacted / 2),
2365 compacted_counts);
2366 brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2367 }
2368
2369 static void
update_gfx4_jump_count(const struct intel_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)2370 update_gfx4_jump_count(const struct intel_device_info *devinfo, brw_inst *insn,
2371 int this_old_ip, int *compacted_counts)
2372 {
2373 assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
2374
2375 /* Jump Count is in units of:
2376 * - uncompacted instructions on G45; and
2377 * - compacted instructions on Gfx5.
2378 */
2379 int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
2380
2381 int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;
2382
2383 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2384
2385 int this_compacted_count = compacted_counts[this_old_ip];
2386 int target_compacted_count = compacted_counts[target_old_ip];
2387
2388 jump_count_compacted -= (target_compacted_count - this_compacted_count);
2389 brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2390 }
2391
2392 static void
compaction_state_init(struct compaction_state * c,const struct brw_isa_info * isa)2393 compaction_state_init(struct compaction_state *c,
2394 const struct brw_isa_info *isa)
2395 {
2396 const struct intel_device_info *devinfo = isa->devinfo;
2397
2398 assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2399 assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2400 assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2401 assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2402 assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2403 assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2404 assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2405 assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2406 assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2407 assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2408 assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2409 assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2410 assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2411 assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2412 assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2413 assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2414 assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2415 assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2416 assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2417 assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2418 assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2419 assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2420 assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2421 assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2422
2423 c->isa = isa;
2424 switch (devinfo->ver) {
2425 case 12:
2426 c->control_index_table = gfx12_control_index_table;;
2427 c->datatype_table = gfx12_datatype_table;
2428 c->subreg_table = gfx12_subreg_table;
2429 if (devinfo->verx10 >= 125) {
2430 c->src0_index_table = xehp_src0_index_table;
2431 c->src1_index_table = xehp_src1_index_table;
2432 } else {
2433 c->src0_index_table = gfx12_src0_index_table;
2434 c->src1_index_table = gfx12_src1_index_table;
2435 }
2436 break;
2437 case 11:
2438 c->control_index_table = gfx8_control_index_table;
2439 c->datatype_table = gfx11_datatype_table;
2440 c->subreg_table = gfx8_subreg_table;
2441 c->src0_index_table = gfx8_src_index_table;
2442 c->src1_index_table = gfx8_src_index_table;
2443 break;
2444 case 9:
2445 case 8:
2446 c->control_index_table = gfx8_control_index_table;
2447 c->datatype_table = gfx8_datatype_table;
2448 c->subreg_table = gfx8_subreg_table;
2449 c->src0_index_table = gfx8_src_index_table;
2450 c->src1_index_table = gfx8_src_index_table;
2451 break;
2452 case 7:
2453 c->control_index_table = gfx7_control_index_table;
2454 c->datatype_table = gfx7_datatype_table;
2455 c->subreg_table = gfx7_subreg_table;
2456 c->src0_index_table = gfx7_src_index_table;
2457 c->src1_index_table = gfx7_src_index_table;
2458 break;
2459 case 6:
2460 c->control_index_table = gfx6_control_index_table;
2461 c->datatype_table = gfx6_datatype_table;
2462 c->subreg_table = gfx6_subreg_table;
2463 c->src0_index_table = gfx6_src_index_table;
2464 c->src1_index_table = gfx6_src_index_table;
2465 break;
2466 case 5:
2467 case 4:
2468 c->control_index_table = g45_control_index_table;
2469 c->datatype_table = g45_datatype_table;
2470 c->subreg_table = g45_subreg_table;
2471 c->src0_index_table = g45_src_index_table;
2472 c->src1_index_table = g45_src_index_table;
2473 break;
2474 default:
2475 unreachable("unknown generation");
2476 }
2477 }
2478
2479 void
brw_compact_instructions(struct brw_codegen * p,int start_offset,struct disasm_info * disasm)2480 brw_compact_instructions(struct brw_codegen *p, int start_offset,
2481 struct disasm_info *disasm)
2482 {
2483 if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2484 return;
2485
2486 const struct intel_device_info *devinfo = p->devinfo;
2487 if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
2488 return;
2489
2490 void *store = p->store + start_offset / 16;
2491 /* For an instruction at byte offset 16*i before compaction, this is the
2492 * number of compacted instructions minus the number of padding NOP/NENOPs
2493 * that preceded it.
2494 */
2495 unsigned num_compacted_counts =
2496 (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2497 int *compacted_counts =
2498 calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
2499
2500 /* For an instruction at byte offset 8*i after compaction, this was its IP
2501 * (in 16-byte units) before compaction.
2502 */
2503 unsigned num_old_ip =
2504 (p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1;
2505 int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
2506
2507 struct compaction_state c;
2508 compaction_state_init(&c, p->isa);
2509
2510 int offset = 0;
2511 int compacted_count = 0;
2512 for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2513 src_offset += sizeof(brw_inst)) {
2514 brw_inst *src = store + src_offset;
2515 void *dst = store + offset;
2516
2517 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2518 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2519
2520 brw_inst inst = precompact(p->isa, *src);
2521 brw_inst saved = inst;
2522
2523 if (try_compact_instruction(&c, dst, &inst)) {
2524 compacted_count++;
2525
2526 if (INTEL_DEBUG(DEBUG_ANY)) {
2527 brw_inst uncompacted;
2528 uncompact_instruction(&c, &uncompacted, dst);
2529 if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2530 brw_debug_compact_uncompact(p->isa, &saved, &uncompacted);
2531 }
2532 }
2533
2534 offset += sizeof(brw_compact_inst);
2535 } else {
2536 /* All uncompacted instructions need to be aligned on G45. */
2537 if ((offset & sizeof(brw_compact_inst)) != 0 &&
2538 devinfo->platform == INTEL_PLATFORM_G4X) {
2539 brw_compact_inst *align = store + offset;
2540 memset(align, 0, sizeof(*align));
2541 brw_compact_inst_set_hw_opcode(
2542 devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NENOP));
2543 brw_compact_inst_set_cmpt_control(devinfo, align, true);
2544 offset += sizeof(brw_compact_inst);
2545 compacted_count--;
2546 compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2547 old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2548
2549 dst = store + offset;
2550 }
2551
2552 /* If we didn't compact this instruction, we need to move it down into
2553 * place.
2554 */
2555 if (offset != src_offset) {
2556 memmove(dst, src, sizeof(brw_inst));
2557 }
2558 offset += sizeof(brw_inst);
2559 }
2560 }
2561
2562 /* Add an entry for the ending offset of the program. This greatly
2563 * simplifies the linked list walk at the end of the function.
2564 */
2565 old_ip[offset / sizeof(brw_compact_inst)] =
2566 (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2567
2568 /* Fix up control flow offsets. */
2569 p->next_insn_offset = start_offset + offset;
2570 for (offset = 0; offset < p->next_insn_offset - start_offset;
2571 offset = next_offset(devinfo, store, offset)) {
2572 brw_inst *insn = store + offset;
2573 int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2574 int this_compacted_count = compacted_counts[this_old_ip];
2575
2576 switch (brw_inst_opcode(p->isa, insn)) {
2577 case BRW_OPCODE_BREAK:
2578 case BRW_OPCODE_CONTINUE:
2579 case BRW_OPCODE_HALT:
2580 if (devinfo->ver >= 6) {
2581 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2582 } else {
2583 update_gfx4_jump_count(devinfo, insn, this_old_ip,
2584 compacted_counts);
2585 }
2586 break;
2587
2588 case BRW_OPCODE_IF:
2589 case BRW_OPCODE_IFF:
2590 case BRW_OPCODE_ELSE:
2591 case BRW_OPCODE_ENDIF:
2592 case BRW_OPCODE_WHILE:
2593 if (devinfo->ver >= 7) {
2594 if (brw_inst_cmpt_control(devinfo, insn)) {
2595 brw_inst uncompacted;
2596 uncompact_instruction(&c, &uncompacted,
2597 (brw_compact_inst *)insn);
2598
2599 update_uip_jip(p->isa, &uncompacted, this_old_ip,
2600 compacted_counts);
2601
2602 bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2603 &uncompacted);
2604 assert(ret); (void)ret;
2605 } else {
2606 update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2607 }
2608 } else if (devinfo->ver == 6) {
2609 assert(!brw_inst_cmpt_control(devinfo, insn));
2610
2611 /* Jump Count is in units of compacted instructions on Gfx6. */
2612 int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
2613
2614 int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2615 int target_compacted_count = compacted_counts[target_old_ip];
2616 jump_count_compacted -= (target_compacted_count - this_compacted_count);
2617 brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2618 } else {
2619 update_gfx4_jump_count(devinfo, insn, this_old_ip,
2620 compacted_counts);
2621 }
2622 break;
2623
2624 case BRW_OPCODE_ADD:
2625 /* Add instructions modifying the IP register use an immediate src1,
2626 * and Gens that use this cannot compact instructions with immediate
2627 * operands.
2628 */
2629 if (brw_inst_cmpt_control(devinfo, insn))
2630 break;
2631
2632 if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2633 brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2634 assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2635
2636 int shift = 3;
2637 int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2638
2639 int target_old_ip = this_old_ip + (jump_compacted / 2);
2640 int target_compacted_count = compacted_counts[target_old_ip];
2641 jump_compacted -= (target_compacted_count - this_compacted_count);
2642 brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2643 }
2644 break;
2645
2646 default:
2647 break;
2648 }
2649 }
2650
2651 /* p->nr_insn is counting the number of uncompacted instructions still, so
2652 * divide. We do want to be sure there's a valid instruction in any
2653 * alignment padding, so that the next compression pass (for the FS 8/16
2654 * compile passes) parses correctly.
2655 */
2656 if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2657 brw_compact_inst *align = store + offset;
2658 memset(align, 0, sizeof(*align));
2659 brw_compact_inst_set_hw_opcode(
2660 devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NOP));
2661 brw_compact_inst_set_cmpt_control(devinfo, align, true);
2662 p->next_insn_offset += sizeof(brw_compact_inst);
2663 }
2664 p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2665
2666 for (int i = 0; i < p->num_relocs; i++) {
2667 if (p->relocs[i].offset < (uint32_t)start_offset)
2668 continue;
2669
2670 assert(p->relocs[i].offset % 16 == 0);
2671 unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2672 p->relocs[i].offset -= compacted_counts[idx] * 8;
2673 }
2674
2675 /* Update the instruction offsets for each group. */
2676 if (disasm) {
2677 int offset = 0;
2678
2679 foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2680 while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2681 sizeof(brw_inst) != group->offset) {
2682 assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2683 sizeof(brw_inst) < group->offset);
2684 offset = next_offset(devinfo, store, offset);
2685 }
2686
2687 group->offset = start_offset + offset;
2688
2689 offset = next_offset(devinfo, store, offset);
2690 }
2691 }
2692
2693 free(compacted_counts);
2694 free(old_ip);
2695 }
2696