• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012-2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file brw_eu_compact.c
25  *
26  * Instruction compaction is a feature of G45 and newer hardware that allows
27  * for a smaller instruction encoding.
28  *
29  * The instruction cache is on the order of 32KB, and many programs generate
30  * far more instructions than that.  The instruction cache is built to barely
31  * keep up with instruction dispatch ability in cache hit cases -- L1
32  * instruction cache misses that still hit in the next level could limit
33  * throughput by around 50%.
34  *
35  * The idea of instruction compaction is that most instructions use a tiny
36  * subset of the GPU functionality, so we can encode what would be a 16 byte
37  * instruction in 8 bytes using some lookup tables for various fields.
38  *
39  *
40  * Instruction compaction capabilities vary subtly by generation.
41  *
42  * G45's support for instruction compaction is very limited. Jump counts on
43  * this generation are in units of 16-byte uncompacted instructions. As such,
44  * all jump targets must be 16-byte aligned. Also, all instructions must be
45  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46  * A G45-only instruction, NENOP, must be used to provide padding to align
47  * uncompacted instructions.
48  *
49  * Gfx5 removes these restrictions and changes jump counts to be in units of
50  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52  *
53  * Gfx6 adds the ability to compact instructions with a limited range of
54  * immediate values. Compactable immediates have 12 unrestricted bits, and a
55  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56  * value of DW3 in the uncompacted instruction word.
57  *
58  * On Gfx7 we can compact some control flow instructions with a small positive
59  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60  * control flow instructions with UIP cannot be compacted, because of the
61  * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62  * since the jump count field is not in DW3.
63  *
64  *    break    JIP/UIP
65  *    cont     JIP/UIP
66  *    halt     JIP/UIP
67  *    if       JIP/UIP
68  *    else     JIP (plus UIP on BDW+)
69  *    endif    JIP
70  *    while    JIP (must be negative)
71  *
72  * Gen 8 adds support for compacting 3-src instructions.
73  *
74  * Gfx12 reduces the number of bits that available to compacted immediates from
75  * 13 to 12, but improves the compaction of floating-point immediates by
76  * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77  * three most significant bits of the mantissa), rather than the lowest bits of
78  * the mantissa.
79  */
80 
81 #include "brw_eu.h"
82 #include "brw_shader.h"
83 #include "brw_disasm_info.h"
84 #include "dev/intel_debug.h"
85 
86 static const uint32_t g45_control_index_table[32] = {
87    0b00000000000000000,
88    0b01000000000000000,
89    0b00110000000000000,
90    0b00000000000000010,
91    0b00100000000000000,
92    0b00010000000000000,
93    0b01000000000100000,
94    0b01000000100000000,
95    0b01010000000100000,
96    0b00000000100000010,
97    0b11000000000000000,
98    0b00001000100000010,
99    0b01001000100000000,
100    0b00000000100000000,
101    0b11000000000100000,
102    0b00001000100000000,
103    0b10110000000000000,
104    0b11010000000100000,
105    0b00110000100000000,
106    0b00100000100000000,
107    0b01000000000001000,
108    0b01000000000000100,
109    0b00111100000000000,
110    0b00101011000000000,
111    0b00110000000010000,
112    0b00010000100000000,
113    0b01000000000100100,
114    0b01000000000101000,
115    0b00110000000000110,
116    0b00000000000001010,
117    0b01010000000101000,
118    0b01010000000100100,
119 };
120 
121 static const uint32_t g45_datatype_table[32] = {
122    0b001000000000100001,
123    0b001011010110101101,
124    0b001000001000110001,
125    0b001111011110111101,
126    0b001011010110101100,
127    0b001000000110101101,
128    0b001000000000100000,
129    0b010100010110110001,
130    0b001100011000101101,
131    0b001000000000100010,
132    0b001000001000110110,
133    0b010000001000110001,
134    0b001000001000110010,
135    0b011000001000110010,
136    0b001111011110111100,
137    0b001000000100101000,
138    0b010100011000110001,
139    0b001010010100101001,
140    0b001000001000101001,
141    0b010000001000110110,
142    0b101000001000110001,
143    0b001011011000101101,
144    0b001000000100001001,
145    0b001011011000101100,
146    0b110100011000110001,
147    0b001000001110111101,
148    0b110000001000110001,
149    0b011000000100101010,
150    0b101000001000101001,
151    0b001011010110001100,
152    0b001000000110100001,
153    0b001010010100001000,
154 };
155 
156 static const uint16_t g45_subreg_table[32] = {
157    0b000000000000000,
158    0b000000010000000,
159    0b000001000000000,
160    0b000100000000000,
161    0b000000000100000,
162    0b100000000000000,
163    0b000000000010000,
164    0b001100000000000,
165    0b001010000000000,
166    0b000000100000000,
167    0b001000000000000,
168    0b000000000001000,
169    0b000000001000000,
170    0b000000000000001,
171    0b000010000000000,
172    0b000000010100000,
173    0b000000000000111,
174    0b000001000100000,
175    0b011000000000000,
176    0b000000110000000,
177    0b000000000000010,
178    0b000000000000100,
179    0b000000001100000,
180    0b000100000000010,
181    0b001110011000110,
182    0b001110100001000,
183    0b000110011000110,
184    0b000001000011000,
185    0b000110010000100,
186    0b001100000000110,
187    0b000000010000110,
188    0b000001000110000,
189 };
190 
191 static const uint16_t g45_src_index_table[32] = {
192    0b000000000000,
193    0b010001101000,
194    0b010110001000,
195    0b011010010000,
196    0b001101001000,
197    0b010110001010,
198    0b010101110000,
199    0b011001111000,
200    0b001000101000,
201    0b000000101000,
202    0b010001010000,
203    0b111101101100,
204    0b010110001100,
205    0b010001101100,
206    0b011010010100,
207    0b010001001100,
208    0b001100101000,
209    0b000000000010,
210    0b111101001100,
211    0b011001101000,
212    0b010101001000,
213    0b000000000100,
214    0b000000101100,
215    0b010001101010,
216    0b000000111000,
217    0b010101011000,
218    0b000100100000,
219    0b010110000000,
220    0b010000000100,
221    0b010000111000,
222    0b000101100000,
223    0b111101110100,
224 };
225 
226 static const uint32_t gfx6_control_index_table[32] = {
227    0b00000000000000000,
228    0b01000000000000000,
229    0b00110000000000000,
230    0b00000000100000000,
231    0b00010000000000000,
232    0b00001000100000000,
233    0b00000000100000010,
234    0b00000000000000010,
235    0b01000000100000000,
236    0b01010000000000000,
237    0b10110000000000000,
238    0b00100000000000000,
239    0b11010000000000000,
240    0b11000000000000000,
241    0b01001000100000000,
242    0b01000000000001000,
243    0b01000000000000100,
244    0b00000000000001000,
245    0b00000000000000100,
246    0b00111000100000000,
247    0b00001000100000010,
248    0b00110000100000000,
249    0b00110000000000001,
250    0b00100000000000001,
251    0b00110000000000010,
252    0b00110000000000101,
253    0b00110000000001001,
254    0b00110000000010000,
255    0b00110000000000011,
256    0b00110000000000100,
257    0b00110000100001000,
258    0b00100000000001001,
259 };
260 
261 static const uint32_t gfx6_datatype_table[32] = {
262    0b001001110000000000,
263    0b001000110000100000,
264    0b001001110000000001,
265    0b001000000001100000,
266    0b001010110100101001,
267    0b001000000110101101,
268    0b001100011000101100,
269    0b001011110110101101,
270    0b001000000111101100,
271    0b001000000001100001,
272    0b001000110010100101,
273    0b001000000001000001,
274    0b001000001000110001,
275    0b001000001000101001,
276    0b001000000000100000,
277    0b001000001000110010,
278    0b001010010100101001,
279    0b001011010010100101,
280    0b001000000110100101,
281    0b001100011000101001,
282    0b001011011000101100,
283    0b001011010110100101,
284    0b001011110110100101,
285    0b001111011110111101,
286    0b001111011110111100,
287    0b001111011110111101,
288    0b001111011110011101,
289    0b001111011110111110,
290    0b001000000000100001,
291    0b001000000000100010,
292    0b001001111111011101,
293    0b001000001110111110,
294 };
295 
296 static const uint16_t gfx6_subreg_table[32] = {
297    0b000000000000000,
298    0b000000000000100,
299    0b000000110000000,
300    0b111000000000000,
301    0b011110000001000,
302    0b000010000000000,
303    0b000000000010000,
304    0b000110000001100,
305    0b001000000000000,
306    0b000001000000000,
307    0b000001010010100,
308    0b000000001010110,
309    0b010000000000000,
310    0b110000000000000,
311    0b000100000000000,
312    0b000000010000000,
313    0b000000000001000,
314    0b100000000000000,
315    0b000001010000000,
316    0b001010000000000,
317    0b001100000000000,
318    0b000000001010100,
319    0b101101010010100,
320    0b010100000000000,
321    0b000000010001111,
322    0b011000000000000,
323    0b111110000000000,
324    0b101000000000000,
325    0b000000000001111,
326    0b000100010001111,
327    0b001000010001111,
328    0b000110000000000,
329 };
330 
331 static const uint16_t gfx6_src_index_table[32] = {
332    0b000000000000,
333    0b010110001000,
334    0b010001101000,
335    0b001000101000,
336    0b011010010000,
337    0b000100100000,
338    0b010001101100,
339    0b010101110000,
340    0b011001111000,
341    0b001100101000,
342    0b010110001100,
343    0b001000100000,
344    0b010110001010,
345    0b000000000010,
346    0b010101010000,
347    0b010101101000,
348    0b111101001100,
349    0b111100101100,
350    0b011001110000,
351    0b010110001001,
352    0b010101011000,
353    0b001101001000,
354    0b010000101100,
355    0b010000000000,
356    0b001101110000,
357    0b001100010000,
358    0b001100000000,
359    0b010001101010,
360    0b001101111000,
361    0b000001110000,
362    0b001100100000,
363    0b001101010000,
364 };
365 
366 static const uint32_t gfx7_control_index_table[32] = {
367    0b0000000000000000010,
368    0b0000100000000000000,
369    0b0000100000000000001,
370    0b0000100000000000010,
371    0b0000100000000000011,
372    0b0000100000000000100,
373    0b0000100000000000101,
374    0b0000100000000000111,
375    0b0000100000000001000,
376    0b0000100000000001001,
377    0b0000100000000001101,
378    0b0000110000000000000,
379    0b0000110000000000001,
380    0b0000110000000000010,
381    0b0000110000000000011,
382    0b0000110000000000100,
383    0b0000110000000000101,
384    0b0000110000000000111,
385    0b0000110000000001001,
386    0b0000110000000001101,
387    0b0000110000000010000,
388    0b0000110000100000000,
389    0b0001000000000000000,
390    0b0001000000000000010,
391    0b0001000000000000100,
392    0b0001000000100000000,
393    0b0010110000000000000,
394    0b0010110000000010000,
395    0b0011000000000000000,
396    0b0011000000100000000,
397    0b0101000000000000000,
398    0b0101000000100000000,
399 };
400 
401 static const uint32_t gfx7_datatype_table[32] = {
402    0b001000000000000001,
403    0b001000000000100000,
404    0b001000000000100001,
405    0b001000000001100001,
406    0b001000000010111101,
407    0b001000001011111101,
408    0b001000001110100001,
409    0b001000001110100101,
410    0b001000001110111101,
411    0b001000010000100001,
412    0b001000110000100000,
413    0b001000110000100001,
414    0b001001010010100101,
415    0b001001110010100100,
416    0b001001110010100101,
417    0b001111001110111101,
418    0b001111011110011101,
419    0b001111011110111100,
420    0b001111011110111101,
421    0b001111111110111100,
422    0b000000001000001100,
423    0b001000000000111101,
424    0b001000000010100101,
425    0b001000010000100000,
426    0b001001010010100100,
427    0b001001110010000100,
428    0b001010010100001001,
429    0b001101111110111101,
430    0b001111111110111101,
431    0b001011110110101100,
432    0b001010010100101000,
433    0b001010110100101000,
434 };
435 
436 static const uint16_t gfx7_subreg_table[32] = {
437    0b000000000000000,
438    0b000000000000001,
439    0b000000000001000,
440    0b000000000001111,
441    0b000000000010000,
442    0b000000010000000,
443    0b000000100000000,
444    0b000000110000000,
445    0b000001000000000,
446    0b000001000010000,
447    0b000010100000000,
448    0b001000000000000,
449    0b001000000000001,
450    0b001000010000001,
451    0b001000010000010,
452    0b001000010000011,
453    0b001000010000100,
454    0b001000010000111,
455    0b001000010001000,
456    0b001000010001110,
457    0b001000010001111,
458    0b001000110000000,
459    0b001000111101000,
460    0b010000000000000,
461    0b010000110000000,
462    0b011000000000000,
463    0b011110010000111,
464    0b100000000000000,
465    0b101000000000000,
466    0b110000000000000,
467    0b111000000000000,
468    0b111000000011100,
469 };
470 
471 static const uint16_t gfx7_src_index_table[32] = {
472    0b000000000000,
473    0b000000000010,
474    0b000000010000,
475    0b000000010010,
476    0b000000011000,
477    0b000000100000,
478    0b000000101000,
479    0b000001001000,
480    0b000001010000,
481    0b000001110000,
482    0b000001111000,
483    0b001100000000,
484    0b001100000010,
485    0b001100001000,
486    0b001100010000,
487    0b001100010010,
488    0b001100100000,
489    0b001100101000,
490    0b001100111000,
491    0b001101000000,
492    0b001101000010,
493    0b001101001000,
494    0b001101010000,
495    0b001101100000,
496    0b001101101000,
497    0b001101110000,
498    0b001101110001,
499    0b001101111000,
500    0b010001101000,
501    0b010001101001,
502    0b010001101010,
503    0b010110001000,
504 };
505 
506 static const uint32_t gfx8_control_index_table[32] = {
507    0b0000000000000000010,
508    0b0000100000000000000,
509    0b0000100000000000001,
510    0b0000100000000000010,
511    0b0000100000000000011,
512    0b0000100000000000100,
513    0b0000100000000000101,
514    0b0000100000000000111,
515    0b0000100000000001000,
516    0b0000100000000001001,
517    0b0000100000000001101,
518    0b0000110000000000000,
519    0b0000110000000000001,
520    0b0000110000000000010,
521    0b0000110000000000011,
522    0b0000110000000000100,
523    0b0000110000000000101,
524    0b0000110000000000111,
525    0b0000110000000001001,
526    0b0000110000000001101,
527    0b0000110000000010000,
528    0b0000110000100000000,
529    0b0001000000000000000,
530    0b0001000000000000010,
531    0b0001000000000000100,
532    0b0001000000100000000,
533    0b0010110000000000000,
534    0b0010110000000010000,
535    0b0011000000000000000,
536    0b0011000000100000000,
537    0b0101000000000000000,
538    0b0101000000100000000,
539 };
540 
541 static const uint32_t gfx8_datatype_table[32] = {
542    0b001000000000000000001,
543    0b001000000000001000000,
544    0b001000000000001000001,
545    0b001000000000011000001,
546    0b001000000000101011101,
547    0b001000000010111011101,
548    0b001000000011101000001,
549    0b001000000011101000101,
550    0b001000000011101011101,
551    0b001000001000001000001,
552    0b001000011000001000000,
553    0b001000011000001000001,
554    0b001000101000101000101,
555    0b001000111000101000100,
556    0b001000111000101000101,
557    0b001011100011101011101,
558    0b001011101011100011101,
559    0b001011101011101011100,
560    0b001011101011101011101,
561    0b001011111011101011100,
562    0b000000000010000001100,
563    0b001000000000001011101,
564    0b001000000000101000101,
565    0b001000001000001000000,
566    0b001000101000101000100,
567    0b001000111000100000100,
568    0b001001001001000001001,
569    0b001010111011101011101,
570    0b001011111011101011101,
571    0b001001111001101001100,
572    0b001001001001001001000,
573    0b001001011001001001000,
574 };
575 
576 static const uint16_t gfx8_subreg_table[32] = {
577    0b000000000000000,
578    0b000000000000001,
579    0b000000000001000,
580    0b000000000001111,
581    0b000000000010000,
582    0b000000010000000,
583    0b000000100000000,
584    0b000000110000000,
585    0b000001000000000,
586    0b000001000010000,
587    0b000001010000000,
588    0b001000000000000,
589    0b001000000000001,
590    0b001000010000001,
591    0b001000010000010,
592    0b001000010000011,
593    0b001000010000100,
594    0b001000010000111,
595    0b001000010001000,
596    0b001000010001110,
597    0b001000010001111,
598    0b001000110000000,
599    0b001000111101000,
600    0b010000000000000,
601    0b010000110000000,
602    0b011000000000000,
603    0b011110010000111,
604    0b100000000000000,
605    0b101000000000000,
606    0b110000000000000,
607    0b111000000000000,
608    0b111000000011100,
609 };
610 
611 static const uint16_t gfx8_src_index_table[32] = {
612    0b000000000000,
613    0b000000000010,
614    0b000000010000,
615    0b000000010010,
616    0b000000011000,
617    0b000000100000,
618    0b000000101000,
619    0b000001001000,
620    0b000001010000,
621    0b000001110000,
622    0b000001111000,
623    0b001100000000,
624    0b001100000010,
625    0b001100001000,
626    0b001100010000,
627    0b001100010010,
628    0b001100100000,
629    0b001100101000,
630    0b001100111000,
631    0b001101000000,
632    0b001101000010,
633    0b001101001000,
634    0b001101010000,
635    0b001101100000,
636    0b001101101000,
637    0b001101110000,
638    0b001101110001,
639    0b001101111000,
640    0b010001101000,
641    0b010001101001,
642    0b010001101010,
643    0b010110001000,
644 };
645 
646 static const uint32_t gfx11_datatype_table[32] = {
647    0b001000000000000000001,
648    0b001000000000001000000,
649    0b001000000000001000001,
650    0b001000000000011000001,
651    0b001000000000101100101,
652    0b001000000101111100101,
653    0b001000000100101000001,
654    0b001000000100101000101,
655    0b001000000100101100101,
656    0b001000001000001000001,
657    0b001000011000001000000,
658    0b001000011000001000001,
659    0b001000101000101000101,
660    0b001000111000101000100,
661    0b001000111000101000101,
662    0b001100100100101100101,
663    0b001100101100100100101,
664    0b001100101100101100100,
665    0b001100101100101100101,
666    0b001100111100101100100,
667    0b000000000010000001100,
668    0b001000000000001100101,
669    0b001000000000101000101,
670    0b001000001000001000000,
671    0b001000101000101000100,
672    0b001000111000100000100,
673    0b001001001001000001001,
674    0b001101111100101100101,
675    0b001100111100101100101,
676    0b001001111001101001100,
677    0b001001001001001001000,
678    0b001001011001001001000,
679 };
680 
681 static const uint32_t gfx12_control_index_table[32] = {
682    0b000000000000000000100, /* 	       (16|M0)                            */
683    0b000000000000000000011, /* 	       (8|M0)                             */
684    0b000000010000000000000, /* 	(W)    (1|M0)                             */
685    0b000000010000000000100, /* 	(W)    (16|M0)                            */
686    0b000000010000000000011, /* 	(W)    (8|M0)                             */
687    0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
688    0b000000000000000100100, /* 	       (16|M16)                           */
689    0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
690    0b000000000000000000000, /* 	       (1|M0)                             */
691    0b000010000000000000100, /* 	       (16|M0)           (sat)            */
692    0b000000000000000010011, /* 	       (8|M8)                             */
693    0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
694    0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
695    0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
696    0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
697    0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
698    0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
699    0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
700    0b000000010000000000001, /* 	(W)    (2|M0)                             */
701    0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
702    0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
703    0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
704    0b000000000000000100011, /* 	       (8|M16)                            */
705    0b000000000000000110011, /* 	       (8|M24)                            */
706    0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
707    0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
708    0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
709    0b000010000000000000011, /* 	       (8|M0)            (sat)            */
710    0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
711    0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
712    0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
713    0b000000010000000100100, /* 	(W)    (16|M16)                           */
714 };
715 
716 static const uint32_t gfx12_datatype_table[32] = {
717    0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
718    0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
719    0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
720    0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
721    0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
722    0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
723    0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
724    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725    0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
726    0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
727    0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
728    0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
729    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730    0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
731    0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
732    0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733    0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
734    0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
735    0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737    0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
738    0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739    0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740    0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
741    0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
742    0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
743    0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
744    0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745    0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
746    0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
747    0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
748    0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
749 };
750 
751 static const uint16_t gfx12_subreg_table[32] = {
752    0b000000000000000, /* .0  .0  .0  */
753    0b100000000000000, /* .0  .0  .16 */
754    0b001000000000000, /* .0  .0  .4  */
755    0b011000000000000, /* .0  .0  .12 */
756    0b000000010000000, /* .0  .4  .0  */
757    0b010000000000000, /* .0  .0  .8  */
758    0b101000000000000, /* .0  .0  .20 */
759    0b000000000001000, /* .8  .0  .0  */
760    0b000000100000000, /* .0  .8  .0  */
761    0b110000000000000, /* .0  .0  .24 */
762    0b111000000000000, /* .0  .0  .28 */
763    0b000001000000000, /* .0  .16 .0  */
764    0b000000000000100, /* .4  .0  .0  */
765    0b000001100000000, /* .0  .24 .0  */
766    0b000001010000000, /* .0  .20 .0  */
767    0b000000110000000, /* .0  .12 .0  */
768    0b000001110000000, /* .0  .28 .0  */
769    0b000000000011100, /* .28 .0  .0  */
770    0b000000000010000, /* .16 .0  .0  */
771    0b000000000001100, /* .12 .0  .0  */
772    0b000000000011000, /* .24 .0  .0  */
773    0b000000000010100, /* .20 .0  .0  */
774    0b000000000000010, /* .2  .0  .0  */
775    0b000000101000000, /* .0  .10 .0  */
776    0b000000001000000, /* .0  .2  .0  */
777    0b000000010000100, /* .4  .4  .0  */
778    0b000000001011100, /* .28 .2  .0  */
779    0b000000001000010, /* .2  .2  .0  */
780    0b000000110001100, /* .12 .12 .0  */
781    0b000000000100000, /* .0  .1  .0  */
782    0b000000001100000, /* .0  .3  .0  */
783    0b110001100000000, /* .0  .24 .24 */
784 };
785 
786 static const uint16_t gfx12_src0_index_table[16] = {
787    0b010001100100, /*       r<8;8,1>  */
788    0b000000000000, /*       r<0;1,0>  */
789    0b010001100110, /*      -r<8;8,1>  */
790    0b010001100101, /*  (abs)r<8;8,1>  */
791    0b000000000010, /*      -r<0;1,0>  */
792    0b001000000000, /*       r<2;1,0>  */
793    0b001001000000, /*       r<2;4,0>  */
794    0b001101000000, /*       r<4;4,0>  */
795    0b001000100100, /*       r<2;2,1>  */
796    0b001100000000, /*       r<4;1,0>  */
797    0b001000100110, /*      -r<2;2,1>  */
798    0b001101000100, /*       r<4;4,1>  */
799    0b010001100111, /* -(abs)r<8;8,1>  */
800    0b000100000000, /*       r<1;1,0>  */
801    0b000000000001, /*  (abs)r<0;1,0>  */
802    0b111100010000, /*       r[a]<1,0> */
803 };
804 
805 static const uint16_t gfx12_src1_index_table[16] = {
806    0b000100011001, /*       r<8;8,1> */
807    0b000000000000, /*       r<0;1,0> */
808    0b100100011001, /*      -r<8;8,1> */
809    0b100000000000, /*      -r<0;1,0> */
810    0b010100011001, /*  (abs)r<8;8,1> */
811    0b100011010000, /*      -r<4;4,0> */
812    0b000010000000, /*       r<2;1,0> */
813    0b000010001001, /*       r<2;2,1> */
814    0b100010001001, /*      -r<2;2,1> */
815    0b000011010000, /*       r<4;4,0> */
816    0b000011010001, /*       r<4;4,1> */
817    0b000011000000, /*       r<4;1,0> */
818    0b110100011001, /* -(abs)r<8;8,1> */
819    0b010000000000, /*  (abs)r<0;1,0> */
820    0b110000000000, /* -(abs)r<0;1,0> */
821    0b100011010001, /*      -r<4;4,1> */
822 };
823 
824 static const uint16_t xehp_src0_index_table[16] = {
825    0b000100000000, /*       r<1;1,0>  */
826    0b000000000000, /*       r<0;1,0>  */
827    0b000100000010, /*      -r<1;1,0>  */
828    0b000100000001, /*  (abs)r<1;1,0>  */
829    0b000000000010, /*      -r<0;1,0>  */
830    0b001000000000, /*       r<2;1,0>  */
831    0b001001000000, /*       r<2;4,0>  */
832    0b001101000000, /*       r<4;4,0>  */
833    0b001100000000, /*       r<4;1,0>  */
834    0b000100000011, /* -(abs)r<1;1,0>  */
835    0b000000000001, /*  (abs)r<0;1,0>  */
836    0b111100010000, /*       r[a]<1,0> */
837    0b010001100000, /*       r<8;8,0>  */
838    0b000101000000, /*       r<1;4,0>  */
839    0b010001001000, /*       r<8;4,2>  */
840    0b001000000010, /*      -r<2;1,0>  */
841 };
842 
843 static const uint16_t xehp_src1_index_table[16] = {
844    0b000001000000, /*       r<1;1,0>    */
845    0b000000000000, /*       r<0;1,0>    */
846    0b100001000000, /*      -r<1;1,0>    */
847    0b100000000000, /*      -r<0;1,0>    */
848    0b010001000000, /*  (abs)r<1;1,0>    */
849    0b100011010000, /*      -r<4;4,0>    */
850    0b000010000000, /*       r<2;1,0>    */
851    0b000011010000, /*       r<4;4,0>    */
852    0b000011000000, /*       r<4;1,0>    */
853    0b110001000000, /* -(abs)r<1;1,0>    */
854    0b010000000000, /*  (abs)r<0;1,0>    */
855    0b110000000000, /* -(abs)r<0;1,0>    */
856    0b000100011000, /*       r<8;8,0>    */
857    0b100010000000, /*      -r<2;1,0>    */
858    0b100000001001, /*      -r<0;2,1>    */
859    0b100001000100, /*      -r[a]<1;1,0> */
860 };
861 
862 /* This is actually the control index table for Cherryview (26 bits), but the
863  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
864  * the start.
865  *
866  * The low 24 bits have the same mappings on both hardware.
867  */
868 static const uint32_t gfx8_3src_control_index_table[4] = {
869    0b00100000000110000000000001,
870    0b00000000000110000000000001,
871    0b00000000001000000000000001,
872    0b00000000001000000000100001,
873 };
874 
875 /* This is actually the control index table for Cherryview (49 bits), but the
876  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
877  * at the start.
878  *
879  * The low 44 bits have the same mappings on both hardware, and since the high
880  * three bits on Broadwell are zero, we can reuse Cherryview's table.
881  */
882 static const uint64_t gfx8_3src_source_index_table[4] = {
883    0b0000001110010011100100111001000001111000000000000,
884    0b0000001110010011100100111001000001111000000000010,
885    0b0000001110010011100100111001000001111000000001000,
886    0b0000001110010011100100111001000001111000000100000,
887 };
888 
889 static const uint64_t gfx12_3src_control_index_table[32] = {
890    0b000001001010010101000000000000000100, /*      (16|M0)       grf<1>:f   :f  :f  :f */
891    0b000001001010010101000000000000000011, /*      (8|M0)        grf<1>:f   :f  :f  :f */
892    0b000001001000010101000000000000000011, /*      (8|M0)        arf<1>:f   :f  :f  :f */
893    0b000001001010010101000010000000000011, /* (W)  (8|M0)        grf<1>:f   :f  :f  :f */
894    0b000001001000010101000010000000000011, /* (W)  (8|M0)        arf<1>:f   :f  :f  :f */
895    0b000001001000010101000000000000010011, /*      (8|M8)        arf<1>:f   :f  :f  :f */
896    0b000001001010010101000000000000010011, /*      (8|M8)        grf<1>:f   :f  :f  :f */
897    0b000001001000010101000010000000010011, /* (W)  (8|M8)        arf<1>:f   :f  :f  :f */
898    0b000001001010010101000010000000010011, /* (W)  (8|M8)        grf<1>:f   :f  :f  :f */
899    0b000001001010010101000010000000000100, /* (W)  (16|M0)       grf<1>:f   :f  :f  :f */
900    0b000001001000010101000000000000000100, /*      (16|M0)       arf<1>:f   :f  :f  :f */
901    0b000001001010010101010000000000000100, /*      (16|M0)  (sat)grf<1>:f   :f  :f  :f */
902    0b000001001010010101000000000000100100, /*      (16|M16)      grf<1>:f   :f  :f  :f */
903    0b000001001000010101000010000000000100, /* (W)  (16|M0)       arf<1>:f   :f  :f  :f */
904    0b000001001010010101000010000000000000, /* (W)  (1|M0)        grf<1>:f   :f  :f  :f */
905    0b000001001010010101010000000000000011, /*      (8|M0)   (sat)grf<1>:f   :f  :f  :f */
906    0b000001001000010101000010000000110011, /* (W)  (8|M24)       arf<1>:f   :f  :f  :f */
907    0b000001001000010101000010000000100011, /* (W)  (8|M16)       arf<1>:f   :f  :f  :f */
908    0b000001001010010101000010000000110011, /* (W)  (8|M24)       grf<1>:f   :f  :f  :f */
909    0b000001001010010101000010000000100011, /* (W)  (8|M16)       grf<1>:f   :f  :f  :f */
910    0b000001001000010101000000000000100011, /*      (8|M16)       arf<1>:f   :f  :f  :f */
911    0b000001001000010101000000000000110011, /*      (8|M24)       arf<1>:f   :f  :f  :f */
912    0b000001001010010101000000000000100011, /*      (8|M16)       grf<1>:f   :f  :f  :f */
913    0b000001001010010101000000000000110011, /*      (8|M24)       grf<1>:f   :f  :f  :f */
914    0b000001001000010101010000000000000100, /*      (16|M0)  (sat)arf<1>:f   :f  :f  :f */
915    0b000001001010010101010010000000000100, /* (W)  (16|M0)  (sat)grf<1>:f   :f  :f  :f */
916    0b000001001010010101000010000000100100, /* (W)  (16|M16)      grf<1>:f   :f  :f  :f */
917    0b000001001010010001000010000000000000, /* (W)  (1|M0)        grf<1>:ud :ud :ud :ud */
918    0b000001001000010101000000000000100100, /*      (16|M16)      arf<1>:f   :f  :f  :f */
919    0b000001001010010101010000000000100100, /*      (16|M16) (sat)grf<1>:f   :f  :f  :f */
920    0b000001001010010101000010000000000010, /* (W)  (4|M0)        grf<1>:f   :f  :f  :f */
921    0b000001001000010101010000000000000011, /*      (8|M0)   (sat)arf<1>:f   :f  :f  :f */
922 };
923 
924 static const uint64_t xehp_3src_control_index_table[32] = {
925    0b0000010010100010101000000000000000100, /*          (16|M0)       grf<1>:f   :f   :f   :f          */
926    0b0000010010100010101000000000000000011, /*          (8|M0)        grf<1>:f   :f   :f   :f          */
927    0b0000010010000010101000000000000000011, /*          (8|M0)        arf<1>:f   :f   :f   :f          */
928    0b0000010010100010101000010000000000011, /*     (W)  (8|M0)        grf<1>:f   :f   :f   :f          */
929    0b0000010010000010101000010000000000011, /*     (W)  (8|M0)        arf<1>:f   :f   :f   :f          */
930    0b0000010010000010101000000000000010011, /*          (8|M8)        arf<1>:f   :f   :f   :f          */
931    0b0000010010100010101000000000000010011, /*          (8|M8)        grf<1>:f   :f   :f   :f          */
932    0b0000010010000010101000010000000010011, /*     (W)  (8|M8)        arf<1>:f   :f   :f   :f          */
933    0b0000010010100010101000010000000010011, /*     (W)  (8|M8)        grf<1>:f   :f   :f   :f          */
934    0b0000010010100010101000010000000000100, /*     (W)  (16|M0)       grf<1>:f   :f   :f   :f          */
935    0b0000010010000010101000000000000000100, /*          (16|M0)       arf<1>:f   :f   :f   :f          */
936    0b0000010010100010101010000000000000100, /*          (16|M0)  (sat)grf<1>:f   :f   :f   :f          */
937    0b0000010010100010101000000000000100100, /*          (16|M16)      grf<1>:f   :f   :f   :f          */
938    0b0000010010000010101000010000000000100, /*     (W)  (16|M0)       arf<1>:f   :f   :f   :f          */
939    0b0000010010100010101000010000000000000, /*     (W)  (1|M0)        grf<1>:f   :f   :f   :f          */
940    0b0000010010100010101010000000000000011, /*          (8|M0)   (sat)grf<1>:f   :f   :f   :f          */
941    0b0000010010000010101000010000000100011, /*     (W)  (8|M16)       arf<1>:f   :f   :f   :f          */
942    0b0000010010000010101000010000000110011, /*     (W)  (8|M24)       arf<1>:f   :f   :f   :f          */
943    0b0000010010100010101000010000000100011, /*     (W)  (8|M16)       grf<1>:f   :f   :f   :f          */
944    0b0000010010100010101000010000000110011, /*     (W)  (8|M24)       grf<1>:f   :f   :f   :f          */
945    0b0000010010000010101000000000000110011, /*          (8|M24)       arf<1>:f   :f   :f   :f          */
946    0b0000010010000010101000000000000100011, /*          (8|M16)       arf<1>:f   :f   :f   :f          */
947    0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b          */
948    0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub {Atomic} */
949    0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b {Atomic} */
950    0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub {Atomic} */
951    0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b          */
952    0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub          */
953    0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b {Atomic} */
954    0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub          */
955    0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf {Atomic} */
956    0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf          */
957 };
958 
959 static const uint32_t gfx12_3src_source_index_table[32] = {
960    0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
961    0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
962    0b101101100001101000011, /*  grf<8;1>   grf<8;1>  grf<1> */
963    0b100101100001101000011, /*  grf<8;1>   grf<8;1>  grf<0> */
964    0b101100000000101000011, /*  grf<8;1>   grf<0;0>  grf<1> */
965    0b101101100001101001011, /* -grf<8;1>   grf<8;1>  grf<1> */
966    0b101001100001101000011, /*  grf<8;1>   arf<8;1>  grf<1> */
967    0b100001100001100000000, /*  grf<0;0>   arf<8;1>  grf<0> */
968    0b101101100001100000000, /*  grf<0;0>   grf<8;1>  grf<1> */
969    0b101101100101101000011, /*  grf<8;1>   grf<8;1> -grf<1> */
970    0b101101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<1> */
971    0b101100000000100000000, /*  grf<0;0>   grf<0;0>  grf<1> */
972    0b100001100001101000011, /*  grf<8;1>   arf<8;1>  grf<0> */
973    0b100101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<0> */
974    0b100101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<0> */
975    0b100101100001101001011, /* -grf<8;1>   grf<8;1>  grf<0> */
976    0b100100000000101000011, /*  grf<8;1>   grf<0;0>  grf<0> */
977    0b100101100001100001000, /* -grf<0;0>   grf<8;1>  grf<0> */
978    0b100100000000100000000, /*  grf<0;0>   grf<0;0>  grf<0> */
979    0b101101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<1> */
980    0b100101100101100000000, /*  grf<0;0>   grf<8;1> -grf<0> */
981    0b101001100001100000000, /*  grf<0;0>   arf<8;1>  grf<1> */
982    0b100101100101101000011, /*  grf<8;1>   grf<8;1> -grf<0> */
983    0b101101100101101001011, /* -grf<8;1>   grf<8;1> -grf<1> */
984    0b101001100001101001011, /* -grf<8;1>   arf<8;1>  grf<1> */
985    0b101101110001101001011, /* -grf<8;1>  -grf<8;1>  grf<1> */
986    0b101100010000101000011, /*  grf<8;1>  -grf<0;0>  grf<1> */
987    0b101100000100101000011, /*  grf<8;1>   grf<0;0> -grf<1> */
988    0b101101100001100001000, /* -grf<0;0>   grf<8;1>  grf<1> */
989    0b101101100101100000000, /*  grf<0;0>   grf<8;1> -grf<1> */
990    0b100100000100101000011, /*  grf<8;1>   grf<0;0> -grf<0> */
991    0b101001100101101000011, /*  grf<8;1>   arf<8;1> -grf<1> */
992 };
993 
994 static const uint32_t xehp_3src_source_index_table[32] = {
995    0b100100000001100000000, /*           grf<0;0>   grf<1;0>     grf<0>      */
996    0b100100000001000000001, /*           arf<1;0>   grf<1;0>     grf<0>      */
997    0b101100000001100000001, /*           grf<1;0>   grf<1;0>     grf<1>      */
998    0b100100000001100000001, /*           grf<1;0>   grf<1;0>     grf<0>      */
999    0b101100000000100000001, /*           grf<1;0>   grf<0;0>     grf<1>      */
1000    0b101100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<1>      */
1001    0b101000000001100000001, /*           grf<1;0>   arf<1;0>     grf<1>      */
1002    0b101100000001100000000, /*           grf<0;0>   grf<1;0>     grf<1>      */
1003    0b100000000001100000000, /*           grf<0;0>   arf<1;0>     grf<0>      */
1004    0b101100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<1>      */
1005    0b101100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<1>      */
1006    0b101100000000100000000, /*           grf<0;0>   grf<0;0>     grf<1>      */
1007    0b100000000001100000001, /*           grf<1;0>   arf<1;0>     grf<0>      */
1008    0b100100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<0>      */
1009    0b100100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<0>      */
1010    0b100100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<0>      */
1011    0b100100000000100000001, /*           grf<1;0>   grf<0;0>     grf<0>      */
1012    0b100100000001100001000, /*          -grf<0;0>   grf<1;0>     grf<0>      */
1013    0b100100000000100000000, /*           grf<0;0>   grf<0;0>     grf<0>
1014                              * dpas.*x1  grf:d      grf:[ub,b]   grf:[ub,b]
1015                              * dpas.*x1  grf:f      grf:bf       grf:bf
1016                              */
1017    0b101100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<1>      */
1018    0b100100000101100000000, /*           grf<0;0>   grf<1;0>    -grf<0>      */
1019    0b101000000001100000000, /*           grf<0;0>   arf<1;0>     grf<1>      */
1020    0b100100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<0>      */
1021    0b101100000101100001001, /*          -grf<1;0>   grf<1;0>    -grf<1>      */
1022    0b100100010000100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[ub,b]  */
1023    0b100100000100100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u2,s2] */
1024    0b100100010100100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u2,s2] */
1025    0b100100001000100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[ub,b]  */
1026    0b100100001100100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u2,s2] */
1027    0b100100000010100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u4,s4] */
1028    0b100100001010100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u4,s4] */
1029    0b100100010010100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u4,s4] */
1030 };
1031 
1032 static const uint32_t gfx12_3src_subreg_table[32] = {
1033    0b00000000000000000000, /* .0  .0  .0  .0  */
1034    0b00100000000000000000, /* .0  .0  .0  .4  */
1035    0b00000000000110000000, /* .0  .12 .0  .0  */
1036    0b10100000000000000000, /* .0  .0  .0  .20 */
1037    0b10000000001110000000, /* .0  .28 .0  .16 */
1038    0b01100000000000000000, /* .0  .0  .0  .12 */
1039    0b01000000000000000000, /* .0  .0  .0  .8  */
1040    0b00000010000000000000, /* .0  .0  .8  .0  */
1041    0b00000001000000000000, /* .0  .0  .4  .0  */
1042    0b11000000000000000000, /* .0  .0  .0  .24 */
1043    0b10000000000000000000, /* .0  .0  .0  .16 */
1044    0b11100000000000000000, /* .0  .0  .0  .28 */
1045    0b00000110000000000000, /* .0  .0  .24 .0  */
1046    0b00000000000010000000, /* .0  .4  .0  .0  */
1047    0b00000100000000000000, /* .0  .0  .16 .0  */
1048    0b00000011000000000000, /* .0  .0  .12 .0  */
1049    0b00000101000000000000, /* .0  .0  .20 .0  */
1050    0b00000111000000000000, /* .0  .0  .28 .0  */
1051    0b00000000000100000000, /* .0  .8  .0  .0  */
1052    0b00000000001000000000, /* .0  .16 .0  .0  */
1053    0b00000000001100000000, /* .0  .24 .0  .0  */
1054    0b00000000001010000000, /* .0  .20 .0  .0  */
1055    0b00000000001110000000, /* .0  .28 .0  .0  */
1056    0b11000000001110000000, /* .0  .28 .0  .24 */
1057    0b00100000000100000000, /* .0  .8  .0  .4  */
1058    0b00100000000110000000, /* .0  .12 .0  .4  */
1059    0b01000000000110000000, /* .0  .12 .0  .8  */
1060    0b10000000001100000000, /* .0  .24 .0  .16 */
1061    0b10000000001010000000, /* .0  .20 .0  .16 */
1062    0b01100000000010000000, /* .0  .4  .0  .12 */
1063    0b10100000001110000000, /* .0  .28 .0  .20 */
1064    0b01000000000010000000, /* .0  .4  .0  .8  */
1065 };
1066 
1067 struct compaction_state {
1068    const struct intel_device_info *devinfo;
1069    const uint32_t *control_index_table;
1070    const uint32_t *datatype_table;
1071    const uint16_t *subreg_table;
1072    const uint16_t *src0_index_table;
1073    const uint16_t *src1_index_table;
1074 };
1075 
1076 static void compaction_state_init(struct compaction_state *c,
1077                                   const struct intel_device_info *devinfo);
1078 
1079 static bool
set_control_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1080 set_control_index(const struct compaction_state *c,
1081                   brw_compact_inst *dst, const brw_inst *src)
1082 {
1083    const struct intel_device_info *devinfo = c->devinfo;
1084    uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1085 
1086    if (devinfo->ver >= 12) {
1087       uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /*  4b */
1088                     (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1089                     (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1090                     (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1091                     (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1092                     (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1093                     (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1094                     (brw_inst_bits(src, 23, 22) <<  6) | /*  2b */
1095                     (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1096                     (brw_inst_bits(src, 18, 16));        /*  3b */
1097    } else if (devinfo->ver >= 8) {
1098       uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
1099                     (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
1100                     (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
1101                     (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
1102                     (brw_inst_bits(src,  8,  8));        /*  1b */
1103    } else {
1104       uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
1105                     (brw_inst_bits(src, 23,  8));        /* 16b */
1106 
1107       /* On gfx7, the flag register and subregister numbers are integrated into
1108        * the control index.
1109        */
1110       if (devinfo->ver == 7)
1111          uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1112    }
1113 
1114    for (int i = 0; i < 32; i++) {
1115       if (c->control_index_table[i] == uncompacted) {
1116          brw_compact_inst_set_control_index(devinfo, dst, i);
1117 	 return true;
1118       }
1119    }
1120 
1121    return false;
1122 }
1123 
1124 static bool
set_datatype_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1125 set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1126                    const brw_inst *src, bool is_immediate)
1127 {
1128    const struct intel_device_info *devinfo = c->devinfo;
1129    uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1130 
1131    if (devinfo->ver >= 12) {
1132       uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /*  4b */
1133                     (brw_inst_bits(src, 66, 66) << 14) | /*  1b */
1134                     (brw_inst_bits(src, 50, 50) << 13) | /*  1b */
1135                     (brw_inst_bits(src, 49, 48) << 11) | /*  2b */
1136                     (brw_inst_bits(src, 47, 47) << 10) | /*  1b */
1137                     (brw_inst_bits(src, 46, 46) <<  9) | /*  1b */
1138                     (brw_inst_bits(src, 43, 40) <<  5) | /*  4b */
1139                     (brw_inst_bits(src, 39, 36) <<  1) | /*  4b */
1140                     (brw_inst_bits(src, 35, 35));        /*  1b */
1141 
1142       /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1143        * is present
1144        */
1145       if (!is_immediate) {
1146          uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1147       }
1148    } else if (devinfo->ver >= 8) {
1149       uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
1150                     (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
1151                     (brw_inst_bits(src, 46, 35));        /* 12b */
1152    } else {
1153       uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
1154                     (brw_inst_bits(src, 46, 32));        /* 15b */
1155    }
1156 
1157    for (int i = 0; i < 32; i++) {
1158       if (c->datatype_table[i] == uncompacted) {
1159          brw_compact_inst_set_datatype_index(devinfo, dst, i);
1160 	 return true;
1161       }
1162    }
1163 
1164    return false;
1165 }
1166 
1167 static bool
set_subreg_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1168 set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1169                  const brw_inst *src, bool is_immediate)
1170 {
1171    const struct intel_device_info *devinfo = c->devinfo;
1172    uint16_t uncompacted; /* 15b */
1173 
1174    if (devinfo->ver >= 12) {
1175       uncompacted = (brw_inst_bits(src, 55, 51) << 0) |    /* 5b */
1176                     (brw_inst_bits(src, 71, 67) << 5);     /* 5b */
1177 
1178       if (!is_immediate)
1179          uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1180    } else {
1181       uncompacted = (brw_inst_bits(src, 52, 48) << 0) |    /* 5b */
1182                     (brw_inst_bits(src, 68, 64) << 5);     /* 5b */
1183 
1184       if (!is_immediate)
1185          uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1186    }
1187 
1188    for (int i = 0; i < 32; i++) {
1189       if (c->subreg_table[i] == uncompacted) {
1190          brw_compact_inst_set_subreg_index(devinfo, dst, i);
1191 	 return true;
1192       }
1193    }
1194 
1195    return false;
1196 }
1197 
1198 static bool
set_src0_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1199 set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1200                const brw_inst *src)
1201 {
1202    const struct intel_device_info *devinfo = c->devinfo;
1203    uint16_t uncompacted; /* 12b */
1204    int table_len;
1205 
1206    if (devinfo->ver >= 12) {
1207       table_len = ARRAY_SIZE(gfx12_src0_index_table);
1208       uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /*  4b */
1209                     (brw_inst_bits(src, 83, 81) << 5) | /*  3b */
1210                     (brw_inst_bits(src, 80, 80) << 4) | /*  1b */
1211                     (brw_inst_bits(src, 65, 64) << 2) | /*  2b */
1212                     (brw_inst_bits(src, 45, 44));       /*  2b */
1213    } else {
1214       table_len = ARRAY_SIZE(gfx8_src_index_table);
1215       uncompacted = brw_inst_bits(src, 88, 77);         /* 12b */
1216    }
1217 
1218    for (int i = 0; i < table_len; i++) {
1219       if (c->src0_index_table[i] == uncompacted) {
1220          brw_compact_inst_set_src0_index(devinfo, dst, i);
1221 	 return true;
1222       }
1223    }
1224 
1225    return false;
1226 }
1227 
1228 static bool
set_src1_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate,unsigned imm)1229 set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1230                const brw_inst *src, bool is_immediate, unsigned imm)
1231 {
1232    const struct intel_device_info *devinfo = c->devinfo;
1233    if (is_immediate) {
1234       if (devinfo->ver >= 12) {
1235          /* src1 index takes the low 4 bits of the 12-bit compacted value */
1236          brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1237       } else {
1238          /* src1 index takes the high 5 bits of the 13-bit compacted value */
1239          brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1240       }
1241       return true;
1242    } else {
1243       uint16_t uncompacted; /* 12b */
1244       int table_len;
1245 
1246       if (devinfo->ver >= 12) {
1247          table_len = ARRAY_SIZE(gfx12_src0_index_table);
1248          uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /*  2b */
1249                        (brw_inst_bits(src, 119, 116) <<  6) | /*  4b */
1250                        (brw_inst_bits(src, 115, 113) <<  3) | /*  3b */
1251                        (brw_inst_bits(src, 112, 112) <<  2) | /*  1b */
1252                        (brw_inst_bits(src,  97,  96));        /*  2b */
1253       } else {
1254          table_len = ARRAY_SIZE(gfx8_src_index_table);
1255          uncompacted = brw_inst_bits(src, 120, 109);          /* 12b */
1256       }
1257 
1258       for (int i = 0; i < table_len; i++) {
1259          if (c->src1_index_table[i] == uncompacted) {
1260             brw_compact_inst_set_src1_index(devinfo, dst, i);
1261             return true;
1262          }
1263       }
1264    }
1265 
1266    return false;
1267 }
1268 
1269 static bool
set_3src_control_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1270 set_3src_control_index(const struct intel_device_info *devinfo,
1271                        brw_compact_inst *dst, const brw_inst *src)
1272 {
1273    assert(devinfo->ver >= 8);
1274 
1275    if (devinfo->verx10 >= 125) {
1276       uint64_t uncompacted =             /* 37b/XeHP+ */
1277          (brw_inst_bits(src, 95, 92) << 33) | /*  4b */
1278          (brw_inst_bits(src, 90, 88) << 30) | /*  3b */
1279          (brw_inst_bits(src, 82, 80) << 27) | /*  3b */
1280          (brw_inst_bits(src, 50, 50) << 26) | /*  1b */
1281          (brw_inst_bits(src, 49, 48) << 24) | /*  2b */
1282          (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1283          (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1284          (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1285          (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1286          (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1287          (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1288          (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1289          (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1290          (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1291          (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1292          (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1293          (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1294          (brw_inst_bits(src, 18, 16));        /*  3b */
1295 
1296       for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1297          if (xehp_3src_control_index_table[i] == uncompacted) {
1298             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1299             return true;
1300          }
1301       }
1302    } else if (devinfo->ver >= 12) {
1303       uint64_t uncompacted =             /* 36b/TGL+ */
1304          (brw_inst_bits(src, 95, 92) << 32) | /*  4b */
1305          (brw_inst_bits(src, 90, 88) << 29) | /*  3b */
1306          (brw_inst_bits(src, 82, 80) << 26) | /*  3b */
1307          (brw_inst_bits(src, 50, 50) << 25) | /*  1b */
1308          (brw_inst_bits(src, 48, 48) << 24) | /*  1b */
1309          (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1310          (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1311          (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1312          (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1313          (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1314          (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1315          (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1316          (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1317          (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1318          (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1319          (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1320          (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1321          (brw_inst_bits(src, 18, 16));        /*  3b */
1322 
1323       for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1324          if (gfx12_3src_control_index_table[i] == uncompacted) {
1325             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1326             return true;
1327          }
1328       }
1329    } else {
1330       uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1331          (brw_inst_bits(src, 34, 32) << 21) |  /*  3b */
1332          (brw_inst_bits(src, 28,  8));         /* 21b */
1333 
1334       if (devinfo->ver >= 9 || devinfo->is_cherryview) {
1335          uncompacted |=
1336             brw_inst_bits(src, 36, 35) << 24;  /*  2b */
1337       }
1338 
1339       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1340          if (gfx8_3src_control_index_table[i] == uncompacted) {
1341             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1342             return true;
1343          }
1344       }
1345    }
1346 
1347    return false;
1348 }
1349 
1350 static bool
set_3src_source_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1351 set_3src_source_index(const struct intel_device_info *devinfo,
1352                       brw_compact_inst *dst, const brw_inst *src)
1353 {
1354    assert(devinfo->ver >= 8);
1355 
1356    if (devinfo->ver >= 12) {
1357       uint32_t uncompacted =               /* 21b/TGL+ */
1358          (brw_inst_bits(src, 114, 114) << 20) | /*  1b */
1359          (brw_inst_bits(src, 113, 112) << 18) | /*  2b */
1360          (brw_inst_bits(src,  98,  98) << 17) | /*  1b */
1361          (brw_inst_bits(src,  97,  96) << 15) | /*  2b */
1362          (brw_inst_bits(src,  91,  91) << 14) | /*  1b */
1363          (brw_inst_bits(src,  87,  86) << 12) | /*  2b */
1364          (brw_inst_bits(src,  85,  84) << 10) | /*  2b */
1365          (brw_inst_bits(src,  83,  83) <<  9) | /*  1b */
1366          (brw_inst_bits(src,  66,  66) <<  8) | /*  1b */
1367          (brw_inst_bits(src,  65,  64) <<  6) | /*  2b */
1368          (brw_inst_bits(src,  47,  47) <<  5) | /*  1b */
1369          (brw_inst_bits(src,  46,  46) <<  4) | /*  1b */
1370          (brw_inst_bits(src,  45,  44) <<  2) | /*  2b */
1371          (brw_inst_bits(src,  43,  43) <<  1) | /*  1b */
1372          (brw_inst_bits(src,  35,  35));        /*  1b */
1373 
1374       const uint32_t *three_src_source_index_table =
1375          devinfo->verx10 >= 125 ?
1376          xehp_3src_source_index_table : gfx12_3src_source_index_table;
1377       const uint32_t three_src_source_index_table_len =
1378          devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1379                                   ARRAY_SIZE(gfx12_3src_source_index_table);
1380 
1381       for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1382          if (three_src_source_index_table[i] == uncompacted) {
1383             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1384             return true;
1385          }
1386       }
1387    } else {
1388       uint64_t uncompacted =    /* 46b/BDW; 49b/CHV/SKL+ */
1389          (brw_inst_bits(src,  83,  83) << 43) |   /*  1b */
1390          (brw_inst_bits(src, 114, 107) << 35) |   /*  8b */
1391          (brw_inst_bits(src,  93,  86) << 27) |   /*  8b */
1392          (brw_inst_bits(src,  72,  65) << 19) |   /*  8b */
1393          (brw_inst_bits(src,  55,  37));          /* 19b */
1394 
1395       if (devinfo->ver >= 9 || devinfo->is_cherryview) {
1396          uncompacted |=
1397             (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1398             (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1399             (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
1400       } else {
1401          uncompacted |=
1402             (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1403             (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
1404       }
1405 
1406       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1407          if (gfx8_3src_source_index_table[i] == uncompacted) {
1408             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1409             return true;
1410          }
1411       }
1412    }
1413 
1414    return false;
1415 }
1416 
1417 static bool
set_3src_subreg_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1418 set_3src_subreg_index(const struct intel_device_info *devinfo,
1419                       brw_compact_inst *dst, const brw_inst *src)
1420 {
1421    assert(devinfo->ver >= 12);
1422 
1423    uint32_t uncompacted =               /* 20b/TGL+ */
1424       (brw_inst_bits(src, 119, 115) << 15) | /*  5b */
1425       (brw_inst_bits(src, 103,  99) << 10) | /*  5b */
1426       (brw_inst_bits(src,  71,  67) <<  5) | /*  5b */
1427       (brw_inst_bits(src,  55,  51));        /*  5b */
1428 
1429    for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
1430       if (gfx12_3src_subreg_table[i] == uncompacted) {
1431          brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1432 	 return true;
1433       }
1434    }
1435 
1436    return false;
1437 }
1438 
1439 static bool
has_unmapped_bits(const struct intel_device_info * devinfo,const brw_inst * src)1440 has_unmapped_bits(const struct intel_device_info *devinfo, const brw_inst *src)
1441 {
1442    /* EOT can only be mapped on a send if the src1 is an immediate */
1443    if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
1444         brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
1445        brw_inst_eot(devinfo, src))
1446       return true;
1447 
1448    /* Check for instruction bits that don't map to any of the fields of the
1449     * compacted instruction.  The instruction cannot be compacted if any of
1450     * them are set.  They overlap with:
1451     *  - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1452     *  - Dst.AddrImm[9] (bit 47 on Gfx8)
1453     *  - Src0.AddrImm[9] (bit 95 on Gfx8)
1454     *  - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1455     *  - UIP[31] (bit 95 on Gfx8)
1456     */
1457    if (devinfo->ver >= 12) {
1458       assert(!brw_inst_bits(src, 7,  7));
1459       return false;
1460    } else if (devinfo->ver >= 8) {
1461       assert(!brw_inst_bits(src, 7,  7));
1462       return brw_inst_bits(src, 95, 95) ||
1463              brw_inst_bits(src, 47, 47) ||
1464              brw_inst_bits(src, 11, 11);
1465    } else {
1466       assert(!brw_inst_bits(src, 7,  7) &&
1467              !(devinfo->ver < 7 && brw_inst_bits(src, 90, 90)));
1468       return brw_inst_bits(src, 95, 91) ||
1469              brw_inst_bits(src, 47, 47);
1470    }
1471 }
1472 
1473 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const brw_inst * src)1474 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1475                        const brw_inst *src)
1476 {
1477    /* Check for three-source instruction bits that don't map to any of the
1478     * fields of the compacted instruction.  All of them seem to be reserved
1479     * bits currently.
1480     */
1481    if (devinfo->ver >= 12) {
1482       assert(!brw_inst_bits(src, 7, 7));
1483    } else if (devinfo->ver >= 9 || devinfo->is_cherryview) {
1484       assert(!brw_inst_bits(src, 127, 127) &&
1485              !brw_inst_bits(src, 7,  7));
1486    } else {
1487       assert(devinfo->ver >= 8);
1488       assert(!brw_inst_bits(src, 127, 126) &&
1489              !brw_inst_bits(src, 105, 105) &&
1490              !brw_inst_bits(src, 84, 84) &&
1491              !brw_inst_bits(src, 7,  7));
1492 
1493       /* Src1Type and Src2Type, used for mixed-precision floating point */
1494       if (brw_inst_bits(src, 36, 35))
1495          return true;
1496    }
1497 
1498    return false;
1499 }
1500 
1501 static bool
brw_try_compact_3src_instruction(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1502 brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,
1503                                  brw_compact_inst *dst, const brw_inst *src)
1504 {
1505    assert(devinfo->ver >= 8);
1506 
1507    if (has_3src_unmapped_bits(devinfo, src))
1508       return false;
1509 
1510 #define compact(field) \
1511    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1512 #define compact_a16(field) \
1513    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1514 
1515    compact(hw_opcode);
1516 
1517    if (!set_3src_control_index(devinfo, dst, src))
1518       return false;
1519 
1520    if (!set_3src_source_index(devinfo, dst, src))
1521       return false;
1522 
1523    if (devinfo->ver >= 12) {
1524       if (!set_3src_subreg_index(devinfo, dst, src))
1525          return false;
1526 
1527       compact(swsb);
1528       compact(debug_control);
1529       compact(dst_reg_nr);
1530       compact(src0_reg_nr);
1531       compact(src1_reg_nr);
1532       compact(src2_reg_nr);
1533    } else {
1534       compact(dst_reg_nr);
1535       compact_a16(src0_rep_ctrl);
1536       compact(debug_control);
1537       compact(saturate);
1538       compact_a16(src1_rep_ctrl);
1539       compact_a16(src2_rep_ctrl);
1540       compact(src0_reg_nr);
1541       compact(src1_reg_nr);
1542       compact(src2_reg_nr);
1543       compact_a16(src0_subreg_nr);
1544       compact_a16(src1_subreg_nr);
1545       compact_a16(src2_subreg_nr);
1546    }
1547    brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1548 
1549 #undef compact
1550 #undef compact_a16
1551 
1552    return true;
1553 }
1554 
1555 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1556  * sources, and a 13th bit that's replicated through the high 20 bits.
1557  *
1558  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1559  * of packed vectors as compactable immediates.
1560  *
1561  * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1562  * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1563  * while for unsigned integers it is not.
1564  *
1565  * Returns the compacted immediate, or -1 if immediate cannot be compacted
1566  */
1567 static int
compact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned imm)1568 compact_immediate(const struct intel_device_info *devinfo,
1569                   enum brw_reg_type type, unsigned imm)
1570 {
1571    if (devinfo->ver >= 12) {
1572       /* 16-bit immediates need to be replicated through the 32-bit immediate
1573        * field
1574        */
1575       switch (type) {
1576       case BRW_REGISTER_TYPE_W:
1577       case BRW_REGISTER_TYPE_UW:
1578       case BRW_REGISTER_TYPE_HF:
1579          if ((imm >> 16) != (imm & 0xffff))
1580             return -1;
1581          break;
1582       default:
1583          break;
1584       }
1585 
1586       switch (type) {
1587       case BRW_REGISTER_TYPE_F:
1588          /* We get the high 12-bits as-is; rest must be zero */
1589          if ((imm & 0xfffff) == 0)
1590             return (imm >> 20) & 0xfff;
1591          break;
1592       case BRW_REGISTER_TYPE_HF:
1593          /* We get the high 12-bits as-is; rest must be zero */
1594          if ((imm & 0xf) == 0)
1595             return (imm >> 4) & 0xfff;
1596          break;
1597       case BRW_REGISTER_TYPE_UD:
1598       case BRW_REGISTER_TYPE_VF:
1599       case BRW_REGISTER_TYPE_UV:
1600       case BRW_REGISTER_TYPE_V:
1601          /* We get the low 12-bits as-is; rest must be zero */
1602          if ((imm & 0xfffff000) == 0)
1603             return imm & 0xfff;
1604          break;
1605       case BRW_REGISTER_TYPE_UW:
1606          /* We get the low 12-bits as-is; rest must be zero */
1607          if ((imm & 0xf000) == 0)
1608             return imm & 0xfff;
1609          break;
1610       case BRW_REGISTER_TYPE_D:
1611          /* We get the low 11-bits as-is; 12th is replicated */
1612          if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1613             return imm & 0xfff;
1614          break;
1615       case BRW_REGISTER_TYPE_W:
1616          /* We get the low 11-bits as-is; 12th is replicated */
1617          if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1618             return imm & 0xfff;
1619          break;
1620       case BRW_REGISTER_TYPE_NF:
1621       case BRW_REGISTER_TYPE_DF:
1622       case BRW_REGISTER_TYPE_Q:
1623       case BRW_REGISTER_TYPE_UQ:
1624       case BRW_REGISTER_TYPE_B:
1625       case BRW_REGISTER_TYPE_UB:
1626          return -1;
1627       }
1628    } else {
1629       /* We get the low 12 bits as-is; 13th is replicated */
1630       if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1631          return imm & 0x1fff;
1632       }
1633    }
1634 
1635    return -1;
1636 }
1637 
1638 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned compact_imm)1639 uncompact_immediate(const struct intel_device_info *devinfo,
1640                     enum brw_reg_type type, unsigned compact_imm)
1641 {
1642    if (devinfo->ver >= 12) {
1643       switch (type) {
1644       case BRW_REGISTER_TYPE_F:
1645          return compact_imm << 20;
1646       case BRW_REGISTER_TYPE_HF:
1647          return (compact_imm << 20) | (compact_imm << 4);
1648       case BRW_REGISTER_TYPE_UD:
1649       case BRW_REGISTER_TYPE_VF:
1650       case BRW_REGISTER_TYPE_UV:
1651       case BRW_REGISTER_TYPE_V:
1652          return compact_imm;
1653       case BRW_REGISTER_TYPE_UW:
1654          /* Replicate */
1655          return compact_imm << 16 | compact_imm;
1656       case BRW_REGISTER_TYPE_D:
1657          /* Extend the 12th bit into the high 20 bits */
1658          return (int)(compact_imm << 20) >> 20;
1659       case BRW_REGISTER_TYPE_W:
1660          /* Extend the 12th bit into the high 4 bits and replicate */
1661          return (  (int)(compact_imm << 20) >> 4) |
1662                 ((short)(compact_imm <<  4) >> 4);
1663       case BRW_REGISTER_TYPE_NF:
1664       case BRW_REGISTER_TYPE_DF:
1665       case BRW_REGISTER_TYPE_Q:
1666       case BRW_REGISTER_TYPE_UQ:
1667       case BRW_REGISTER_TYPE_B:
1668       case BRW_REGISTER_TYPE_UB:
1669          unreachable("not reached");
1670       }
1671    } else {
1672       /* Replicate the 13th bit into the high 19 bits */
1673       return (int)(compact_imm << 19) >> 19;
1674    }
1675 
1676    unreachable("not reached");
1677 }
1678 
1679 static bool
has_immediate(const struct intel_device_info * devinfo,const brw_inst * inst,enum brw_reg_type * type)1680 has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1681               enum brw_reg_type *type)
1682 {
1683    if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1684       *type = brw_inst_src0_type(devinfo, inst);
1685       return *type != INVALID_REG_TYPE;
1686    } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1687       *type = brw_inst_src1_type(devinfo, inst);
1688       return *type != INVALID_REG_TYPE;
1689    }
1690 
1691    return false;
1692 }
1693 
1694 /**
1695  * Applies some small changes to instruction types to increase chances of
1696  * compaction.
1697  */
1698 static brw_inst
precompact(const struct intel_device_info * devinfo,brw_inst inst)1699 precompact(const struct intel_device_info *devinfo, brw_inst inst)
1700 {
1701    if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1702       return inst;
1703 
1704    /* The Bspec's section titled "Non-present Operands" claims that if src0
1705     * is an immediate that src1's type must be the same as that of src0.
1706     *
1707     * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1708     * that do not follow this rule. E.g., from the IVB/HSW table:
1709     *
1710     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1711     *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1712     *
1713     * And from the SNB table:
1714     *
1715     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1716     *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1717     *
1718     * Neither of these cause warnings from the simulator when used,
1719     * compacted or otherwise. In fact, all compaction mappings that have an
1720     * immediate in src0 use a:ud for src1.
1721     *
1722     * The GM45 instruction compaction tables do not contain mapped meanings
1723     * so it's not clear whether it has the restriction. We'll assume it was
1724     * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1725     *
1726     * Don't do any of this for 64-bit immediates, since the src1 fields
1727     * overlap with the immediate and setting them would overwrite the
1728     * immediate we set.
1729     */
1730    if (devinfo->ver >= 6 &&
1731        !(devinfo->is_haswell &&
1732          brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1733        !(devinfo->ver >= 8 &&
1734          (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1735           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1736           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1737       brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1738    }
1739 
1740    /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1741     * for immediate values. Presumably the hardware engineers realized
1742     * that the only useful floating-point value that could be represented
1743     * in this format is 0.0, which can also be represented as a VF-typed
1744     * immediate, so they gave us the previously mentioned mapping on IVB+.
1745     *
1746     * Strangely, we do have a mapping for imm:f in src1, so we don't need
1747     * to do this there.
1748     *
1749     * If we see a 0.0:F, change the type to VF so that it can be compacted.
1750     *
1751     * Compaction of floating-point immediates is improved on Gfx12, thus
1752     * removing the need for this.
1753     */
1754    if (devinfo->ver < 12 &&
1755        brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1756        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1757        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1758        brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1759       enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1760       brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1761    }
1762 
1763    /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1764     * set the types to :UD so the instruction can be compacted.
1765     *
1766     * FINISHME: Use dst:f | imm:f on Gfx12
1767     */
1768    if (devinfo->ver < 12 &&
1769        compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1770                          brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1771        brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1772        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1773        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1774       enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1775       enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1776 
1777       brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1778       brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1779    }
1780 
1781    return inst;
1782 }
1783 
1784 /**
1785  * Tries to compact instruction src into dst.
1786  *
1787  * It doesn't modify dst unless src is compactable, which is relied on by
1788  * brw_compact_instructions().
1789  */
1790 static bool
try_compact_instruction(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1791 try_compact_instruction(const struct compaction_state *c,
1792                         brw_compact_inst *dst, const brw_inst *src)
1793 {
1794    const struct intel_device_info *devinfo = c->devinfo;
1795    brw_compact_inst temp;
1796 
1797    assert(brw_inst_cmpt_control(devinfo, src) == 0);
1798 
1799    if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1800       if (devinfo->ver >= 8) {
1801          memset(&temp, 0, sizeof(temp));
1802          if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1803             *dst = temp;
1804             return true;
1805          } else {
1806             return false;
1807          }
1808       } else {
1809          return false;
1810       }
1811    }
1812 
1813    enum brw_reg_type type;
1814    bool is_immediate = has_immediate(devinfo, src, &type);
1815 
1816    unsigned compacted_imm = 0;
1817 
1818    if (is_immediate) {
1819       /* Instructions with immediates cannot be compacted on Gen < 6 */
1820       if (devinfo->ver < 6)
1821          return false;
1822 
1823       compacted_imm = compact_immediate(devinfo, type,
1824                                         brw_inst_imm_ud(devinfo, src));
1825       if (compacted_imm == -1)
1826          return false;
1827    }
1828 
1829    if (has_unmapped_bits(devinfo, src))
1830       return false;
1831 
1832    memset(&temp, 0, sizeof(temp));
1833 
1834 #define compact(field) \
1835    brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1836 #define compact_reg(field) \
1837    brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1838                                        brw_inst_##field##_da_reg_nr(devinfo, src))
1839 
1840    compact(hw_opcode);
1841    compact(debug_control);
1842 
1843    if (!set_control_index(c, &temp, src))
1844       return false;
1845    if (!set_datatype_index(c, &temp, src, is_immediate))
1846       return false;
1847    if (!set_subreg_index(c, &temp, src, is_immediate))
1848       return false;
1849    if (!set_src0_index(c, &temp, src))
1850       return false;
1851    if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1852       return false;
1853 
1854    if (devinfo->ver >= 12) {
1855       compact(swsb);
1856       compact_reg(dst);
1857       compact_reg(src0);
1858 
1859       if (is_immediate) {
1860          /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1861          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1862       } else {
1863          compact_reg(src1);
1864       }
1865    } else {
1866       if (devinfo->ver >= 6) {
1867          compact(acc_wr_control);
1868       } else {
1869          compact(mask_control_ex);
1870       }
1871 
1872       if (devinfo->ver <= 6)
1873          compact(flag_subreg_nr);
1874 
1875       compact(cond_modifier);
1876 
1877       compact_reg(dst);
1878       compact_reg(src0);
1879 
1880       if (is_immediate) {
1881          /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1882          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1883       } else {
1884          compact_reg(src1);
1885       }
1886    }
1887    brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1888 
1889 #undef compact
1890 #undef compact_reg
1891 
1892    *dst = temp;
1893 
1894    return true;
1895 }
1896 
1897 bool
brw_try_compact_instruction(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1898 brw_try_compact_instruction(const struct intel_device_info *devinfo,
1899                             brw_compact_inst *dst, const brw_inst *src)
1900 {
1901    struct compaction_state c;
1902    compaction_state_init(&c, devinfo);
1903    return try_compact_instruction(&c, dst, src);
1904 }
1905 
1906 static void
set_uncompacted_control(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1907 set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1908                         brw_compact_inst *src)
1909 {
1910    const struct intel_device_info *devinfo = c->devinfo;
1911    uint32_t uncompacted =
1912       c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1913 
1914    if (devinfo->ver >= 12) {
1915       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1916       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1917       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1918       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1919       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1920       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1921       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1922       brw_inst_set_bits(dst, 23, 22, (uncompacted >>  6) & 0x3);
1923       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1924       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1925    } else if (devinfo->ver >= 8) {
1926       brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1927       brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1928       brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1929       brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1930       brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1931    } else {
1932       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1933       brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1934 
1935       if (devinfo->ver == 7)
1936          brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1937    }
1938 }
1939 
1940 static void
set_uncompacted_datatype(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1941 set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1942                          brw_compact_inst *src)
1943 {
1944    const struct intel_device_info *devinfo = c->devinfo;
1945    uint32_t uncompacted =
1946       c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1947 
1948    if (devinfo->ver >= 12) {
1949       brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1950       brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1951       brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1952       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1953       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1954       brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1955       brw_inst_set_bits(dst, 46, 46, (uncompacted >>  9) & 0x1);
1956       brw_inst_set_bits(dst, 43, 40, (uncompacted >>  5) & 0xf);
1957       brw_inst_set_bits(dst, 39, 36, (uncompacted >>  1) & 0xf);
1958       brw_inst_set_bits(dst, 35, 35, (uncompacted >>  0) & 0x1);
1959    } else if (devinfo->ver >= 8) {
1960       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1961       brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1962       brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1963    } else {
1964       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1965       brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1966    }
1967 }
1968 
1969 static void
set_uncompacted_subreg(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1970 set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1971                        brw_compact_inst *src)
1972 {
1973    const struct intel_device_info *devinfo = c->devinfo;
1974    uint16_t uncompacted =
1975       c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1976 
1977    if (devinfo->ver >= 12) {
1978       brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
1979       brw_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
1980       brw_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
1981    } else {
1982       brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1983       brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1984       brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1985    }
1986 }
1987 
1988 static void
set_uncompacted_src0(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1989 set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
1990                      brw_compact_inst *src)
1991 {
1992    const struct intel_device_info *devinfo = c->devinfo;
1993    uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1994    uint16_t uncompacted = c->src0_index_table[compacted];
1995 
1996    if (devinfo->ver >= 12) {
1997       brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
1998       brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
1999       brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2000       brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2001       brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2002    } else {
2003       brw_inst_set_bits(dst, 88, 77, uncompacted);
2004    }
2005 }
2006 
2007 static void
set_uncompacted_src1(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2008 set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
2009                      brw_compact_inst *src)
2010 {
2011    const struct intel_device_info *devinfo = c->devinfo;
2012    uint16_t uncompacted =
2013       c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
2014 
2015    if (devinfo->ver >= 12) {
2016       brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2017       brw_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
2018       brw_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
2019       brw_inst_set_bits(dst, 112, 112, (uncompacted >>  2) & 0x1);
2020       brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
2021    } else {
2022       brw_inst_set_bits(dst, 120, 109, uncompacted);
2023    }
2024 }
2025 
2026 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2027 set_uncompacted_3src_control_index(const struct compaction_state *c,
2028                                    brw_inst *dst, brw_compact_inst *src)
2029 {
2030    const struct intel_device_info *devinfo = c->devinfo;
2031    assert(devinfo->ver >= 8);
2032 
2033    if (devinfo->verx10 >= 125) {
2034       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2035       uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2036 
2037       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2038       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2039       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2040       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2041       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2042       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2043       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2044       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2045       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2046       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2047       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2048       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2049       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2050       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2051       brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2052       brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2053       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2054       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2055 
2056    } else if (devinfo->ver >= 12) {
2057       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2058       uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2059 
2060       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2061       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2062       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2063       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2064       brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2065       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2066       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2067       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2068       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2069       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2070       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2071       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2072       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2073       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2074       brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2075       brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2076       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2077       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2078    } else {
2079       uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2080       uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2081 
2082       brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2083       brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
2084 
2085       if (devinfo->ver >= 9 || devinfo->is_cherryview)
2086          brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2087    }
2088 }
2089 
2090 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2091 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2092                                   brw_inst *dst, brw_compact_inst *src)
2093 {
2094    assert(devinfo->ver >= 8);
2095 
2096    uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2097 
2098    if (devinfo->ver >= 12) {
2099       const uint32_t *three_src_source_index_table =
2100          devinfo->verx10 >= 125 ?
2101          xehp_3src_source_index_table : gfx12_3src_source_index_table;
2102       uint32_t uncompacted = three_src_source_index_table[compacted];
2103 
2104       brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2105       brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2106       brw_inst_set_bits(dst,  98,  98, (uncompacted >> 17) & 0x1);
2107       brw_inst_set_bits(dst,  97,  96, (uncompacted >> 15) & 0x3);
2108       brw_inst_set_bits(dst,  91,  91, (uncompacted >> 14) & 0x1);
2109       brw_inst_set_bits(dst,  87,  86, (uncompacted >> 12) & 0x3);
2110       brw_inst_set_bits(dst,  85,  84, (uncompacted >> 10) & 0x3);
2111       brw_inst_set_bits(dst,  83,  83, (uncompacted >>  9) & 0x1);
2112       brw_inst_set_bits(dst,  66,  66, (uncompacted >>  8) & 0x1);
2113       brw_inst_set_bits(dst,  65,  64, (uncompacted >>  6) & 0x3);
2114       brw_inst_set_bits(dst,  47,  47, (uncompacted >>  5) & 0x1);
2115       brw_inst_set_bits(dst,  46,  46, (uncompacted >>  4) & 0x1);
2116       brw_inst_set_bits(dst,  45,  44, (uncompacted >>  2) & 0x3);
2117       brw_inst_set_bits(dst,  43,  43, (uncompacted >>  1) & 0x1);
2118       brw_inst_set_bits(dst,  35,  35, (uncompacted >>  0) & 0x1);
2119    } else {
2120       uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2121 
2122       brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
2123       brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2124       brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
2125       brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
2126       brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
2127 
2128       if (devinfo->ver >= 9 || devinfo->is_cherryview) {
2129          brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2130          brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2131          brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
2132       } else {
2133          brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2134          brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2135       }
2136    }
2137 }
2138 
2139 static void
set_uncompacted_3src_subreg_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2140 set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2141                                   brw_inst *dst, brw_compact_inst *src)
2142 {
2143    assert(devinfo->ver >= 12);
2144 
2145    uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2146    uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
2147 
2148    brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2149    brw_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
2150    brw_inst_set_bits(dst,  71,  67, (uncompacted >>  5) & 0x1f);
2151    brw_inst_set_bits(dst,  55,  51, (uncompacted >>  0) & 0x1f);
2152 }
2153 
2154 static void
brw_uncompact_3src_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2155 brw_uncompact_3src_instruction(const struct compaction_state *c,
2156                                brw_inst *dst, brw_compact_inst *src)
2157 {
2158    const struct intel_device_info *devinfo = c->devinfo;
2159    assert(devinfo->ver >= 8);
2160 
2161 #define uncompact(field) \
2162    brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2163 #define uncompact_a16(field) \
2164    brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2165 
2166    uncompact(hw_opcode);
2167 
2168    if (devinfo->ver >= 12) {
2169       set_uncompacted_3src_control_index(c, dst, src);
2170       set_uncompacted_3src_source_index(devinfo, dst, src);
2171       set_uncompacted_3src_subreg_index(devinfo, dst, src);
2172 
2173       uncompact(debug_control);
2174       uncompact(swsb);
2175       uncompact(dst_reg_nr);
2176       uncompact(src0_reg_nr);
2177       uncompact(src1_reg_nr);
2178       uncompact(src2_reg_nr);
2179    } else {
2180       set_uncompacted_3src_control_index(c, dst, src);
2181       set_uncompacted_3src_source_index(devinfo, dst, src);
2182 
2183       uncompact(dst_reg_nr);
2184       uncompact_a16(src0_rep_ctrl);
2185       uncompact(debug_control);
2186       uncompact(saturate);
2187       uncompact_a16(src1_rep_ctrl);
2188       uncompact_a16(src2_rep_ctrl);
2189       uncompact(src0_reg_nr);
2190       uncompact(src1_reg_nr);
2191       uncompact(src2_reg_nr);
2192       uncompact_a16(src0_subreg_nr);
2193       uncompact_a16(src1_subreg_nr);
2194       uncompact_a16(src2_subreg_nr);
2195    }
2196    brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2197 
2198 #undef uncompact
2199 #undef uncompact_a16
2200 }
2201 
2202 static void
uncompact_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2203 uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2204                       brw_compact_inst *src)
2205 {
2206    const struct intel_device_info *devinfo = c->devinfo;
2207    memset(dst, 0, sizeof(*dst));
2208 
2209    if (devinfo->ver >= 8 &&
2210        is_3src(devinfo, brw_opcode_decode(
2211                   devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2212       brw_uncompact_3src_instruction(c, dst, src);
2213       return;
2214    }
2215 
2216 #define uncompact(field) \
2217    brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2218 #define uncompact_reg(field) \
2219    brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2220                                     brw_compact_inst_##field##_reg_nr(devinfo, src))
2221 
2222    uncompact(hw_opcode);
2223    uncompact(debug_control);
2224 
2225    set_uncompacted_control(c, dst, src);
2226    set_uncompacted_datatype(c, dst, src);
2227    set_uncompacted_subreg(c, dst, src);
2228    set_uncompacted_src0(c, dst, src);
2229 
2230    enum brw_reg_type type;
2231    if (has_immediate(devinfo, dst, &type)) {
2232       unsigned imm = uncompact_immediate(devinfo, type,
2233                                          brw_compact_inst_imm(devinfo, src));
2234       brw_inst_set_imm_ud(devinfo, dst, imm);
2235    } else {
2236       set_uncompacted_src1(c, dst, src);
2237       uncompact_reg(src1);
2238    }
2239 
2240    if (devinfo->ver >= 12) {
2241       uncompact(swsb);
2242       uncompact_reg(dst);
2243       uncompact_reg(src0);
2244    } else {
2245       if (devinfo->ver >= 6) {
2246          uncompact(acc_wr_control);
2247       } else {
2248          uncompact(mask_control_ex);
2249       }
2250 
2251       uncompact(cond_modifier);
2252 
2253       if (devinfo->ver <= 6)
2254          uncompact(flag_subreg_nr);
2255 
2256       uncompact_reg(dst);
2257       uncompact_reg(src0);
2258    }
2259    brw_inst_set_cmpt_control(devinfo, dst, false);
2260 
2261 #undef uncompact
2262 #undef uncompact_reg
2263 }
2264 
2265 void
brw_uncompact_instruction(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2266 brw_uncompact_instruction(const struct intel_device_info *devinfo,
2267                           brw_inst *dst, brw_compact_inst *src)
2268 {
2269    struct compaction_state c;
2270    compaction_state_init(&c, devinfo);
2271    uncompact_instruction(&c, dst, src);
2272 }
2273 
brw_debug_compact_uncompact(const struct intel_device_info * devinfo,brw_inst * orig,brw_inst * uncompacted)2274 void brw_debug_compact_uncompact(const struct intel_device_info *devinfo,
2275                                  brw_inst *orig,
2276                                  brw_inst *uncompacted)
2277 {
2278    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2279            devinfo->ver);
2280 
2281    fprintf(stderr, "  before: ");
2282    brw_disassemble_inst(stderr, devinfo, orig, true, 0, NULL);
2283 
2284    fprintf(stderr, "  after:  ");
2285    brw_disassemble_inst(stderr, devinfo, uncompacted, false, 0, NULL);
2286 
2287    uint32_t *before_bits = (uint32_t *)orig;
2288    uint32_t *after_bits = (uint32_t *)uncompacted;
2289    fprintf(stderr, "  changed bits:\n");
2290    for (int i = 0; i < 128; i++) {
2291       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2292       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2293 
2294       if (before != after) {
2295          fprintf(stderr, "  bit %d, %s to %s\n", i,
2296                  before ? "set" : "unset",
2297                  after ? "set" : "unset");
2298       }
2299    }
2300 }
2301 
2302 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2303 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2304 {
2305    int this_compacted_count = compacted_counts[old_ip];
2306    int target_compacted_count = compacted_counts[old_target_ip];
2307    return target_compacted_count - this_compacted_count;
2308 }
2309 
2310 static void
update_uip_jip(const struct intel_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)2311 update_uip_jip(const struct intel_device_info *devinfo, brw_inst *insn,
2312                int this_old_ip, int *compacted_counts)
2313 {
2314    /* JIP and UIP are in units of:
2315     *    - bytes on Gfx8+; and
2316     *    - compacted instructions on Gfx6+.
2317     */
2318    int shift = devinfo->ver >= 8 ? 3 : 0;
2319 
2320    int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2321    jip_compacted -= compacted_between(this_old_ip,
2322                                       this_old_ip + (jip_compacted / 2),
2323                                       compacted_counts);
2324    brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2325 
2326    if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
2327        brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
2328        (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->ver <= 7))
2329       return;
2330 
2331    int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2332    uip_compacted -= compacted_between(this_old_ip,
2333                                       this_old_ip + (uip_compacted / 2),
2334                                       compacted_counts);
2335    brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2336 }
2337 
2338 static void
update_gfx4_jump_count(const struct intel_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)2339 update_gfx4_jump_count(const struct intel_device_info *devinfo, brw_inst *insn,
2340                        int this_old_ip, int *compacted_counts)
2341 {
2342    assert(devinfo->ver == 5 || devinfo->is_g4x);
2343 
2344    /* Jump Count is in units of:
2345     *    - uncompacted instructions on G45; and
2346     *    - compacted instructions on Gfx5.
2347     */
2348    int shift = devinfo->is_g4x ? 1 : 0;
2349 
2350    int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;
2351 
2352    int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2353 
2354    int this_compacted_count = compacted_counts[this_old_ip];
2355    int target_compacted_count = compacted_counts[target_old_ip];
2356 
2357    jump_count_compacted -= (target_compacted_count - this_compacted_count);
2358    brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2359 }
2360 
2361 static void
compaction_state_init(struct compaction_state * c,const struct intel_device_info * devinfo)2362 compaction_state_init(struct compaction_state *c,
2363                       const struct intel_device_info *devinfo)
2364 {
2365    assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2366    assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2367    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2368    assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2369    assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2370    assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2371    assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2372    assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2373    assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2374    assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2375    assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2376    assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2377    assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2378    assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2379    assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2380    assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2381    assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2382    assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2383    assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2384    assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2385    assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2386    assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2387    assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2388    assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2389 
2390    c->devinfo = devinfo;
2391    switch (devinfo->ver) {
2392    case 12:
2393       c->control_index_table = gfx12_control_index_table;;
2394       c->datatype_table = gfx12_datatype_table;
2395       c->subreg_table = gfx12_subreg_table;
2396       if (devinfo->verx10 >= 125) {
2397          c->src0_index_table = xehp_src0_index_table;
2398          c->src1_index_table = xehp_src1_index_table;
2399       } else {
2400          c->src0_index_table = gfx12_src0_index_table;
2401          c->src1_index_table = gfx12_src1_index_table;
2402       }
2403       break;
2404    case 11:
2405       c->control_index_table = gfx8_control_index_table;
2406       c->datatype_table = gfx11_datatype_table;
2407       c->subreg_table = gfx8_subreg_table;
2408       c->src0_index_table = gfx8_src_index_table;
2409       c->src1_index_table = gfx8_src_index_table;
2410       break;
2411    case 9:
2412    case 8:
2413       c->control_index_table = gfx8_control_index_table;
2414       c->datatype_table = gfx8_datatype_table;
2415       c->subreg_table = gfx8_subreg_table;
2416       c->src0_index_table = gfx8_src_index_table;
2417       c->src1_index_table = gfx8_src_index_table;
2418       break;
2419    case 7:
2420       c->control_index_table = gfx7_control_index_table;
2421       c->datatype_table = gfx7_datatype_table;
2422       c->subreg_table = gfx7_subreg_table;
2423       c->src0_index_table = gfx7_src_index_table;
2424       c->src1_index_table = gfx7_src_index_table;
2425       break;
2426    case 6:
2427       c->control_index_table = gfx6_control_index_table;
2428       c->datatype_table = gfx6_datatype_table;
2429       c->subreg_table = gfx6_subreg_table;
2430       c->src0_index_table = gfx6_src_index_table;
2431       c->src1_index_table = gfx6_src_index_table;
2432       break;
2433    case 5:
2434    case 4:
2435       c->control_index_table = g45_control_index_table;
2436       c->datatype_table = g45_datatype_table;
2437       c->subreg_table = g45_subreg_table;
2438       c->src0_index_table = g45_src_index_table;
2439       c->src1_index_table = g45_src_index_table;
2440       break;
2441    default:
2442       unreachable("unknown generation");
2443    }
2444 }
2445 
2446 void
brw_compact_instructions(struct brw_codegen * p,int start_offset,struct disasm_info * disasm)2447 brw_compact_instructions(struct brw_codegen *p, int start_offset,
2448                          struct disasm_info *disasm)
2449 {
2450    if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2451       return;
2452 
2453    const struct intel_device_info *devinfo = p->devinfo;
2454    void *store = p->store + start_offset / 16;
2455    /* For an instruction at byte offset 16*i before compaction, this is the
2456     * number of compacted instructions minus the number of padding NOP/NENOPs
2457     * that preceded it.
2458     */
2459    int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
2460    /* For an instruction at byte offset 8*i after compaction, this was its IP
2461     * (in 16-byte units) before compaction.
2462     */
2463    int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
2464 
2465    if (devinfo->ver == 4 && !devinfo->is_g4x)
2466       return;
2467 
2468    struct compaction_state c;
2469    compaction_state_init(&c, devinfo);
2470 
2471    int offset = 0;
2472    int compacted_count = 0;
2473    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2474         src_offset += sizeof(brw_inst)) {
2475       brw_inst *src = store + src_offset;
2476       void *dst = store + offset;
2477 
2478       old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2479       compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2480 
2481       brw_inst inst = precompact(devinfo, *src);
2482       brw_inst saved = inst;
2483 
2484       if (try_compact_instruction(&c, dst, &inst)) {
2485          compacted_count++;
2486 
2487          if (INTEL_DEBUG(DEBUG_ANY)) {
2488             brw_inst uncompacted;
2489             uncompact_instruction(&c, &uncompacted, dst);
2490             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2491                brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
2492             }
2493          }
2494 
2495          offset += sizeof(brw_compact_inst);
2496       } else {
2497          /* All uncompacted instructions need to be aligned on G45. */
2498          if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
2499             brw_compact_inst *align = store + offset;
2500             memset(align, 0, sizeof(*align));
2501             brw_compact_inst_set_hw_opcode(
2502                devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));
2503             brw_compact_inst_set_cmpt_control(devinfo, align, true);
2504             offset += sizeof(brw_compact_inst);
2505             compacted_count--;
2506             compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2507             old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2508 
2509             dst = store + offset;
2510          }
2511 
2512          /* If we didn't compact this intruction, we need to move it down into
2513           * place.
2514           */
2515          if (offset != src_offset) {
2516             memmove(dst, src, sizeof(brw_inst));
2517          }
2518          offset += sizeof(brw_inst);
2519       }
2520    }
2521 
2522    /* Add an entry for the ending offset of the program. This greatly
2523     * simplifies the linked list walk at the end of the function.
2524     */
2525    old_ip[offset / sizeof(brw_compact_inst)] =
2526       (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2527 
2528    /* Fix up control flow offsets. */
2529    p->next_insn_offset = start_offset + offset;
2530    for (offset = 0; offset < p->next_insn_offset - start_offset;
2531         offset = next_offset(devinfo, store, offset)) {
2532       brw_inst *insn = store + offset;
2533       int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2534       int this_compacted_count = compacted_counts[this_old_ip];
2535 
2536       switch (brw_inst_opcode(devinfo, insn)) {
2537       case BRW_OPCODE_BREAK:
2538       case BRW_OPCODE_CONTINUE:
2539       case BRW_OPCODE_HALT:
2540          if (devinfo->ver >= 6) {
2541             update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2542          } else {
2543             update_gfx4_jump_count(devinfo, insn, this_old_ip,
2544                                    compacted_counts);
2545          }
2546          break;
2547 
2548       case BRW_OPCODE_IF:
2549       case BRW_OPCODE_IFF:
2550       case BRW_OPCODE_ELSE:
2551       case BRW_OPCODE_ENDIF:
2552       case BRW_OPCODE_WHILE:
2553          if (devinfo->ver >= 7) {
2554             if (brw_inst_cmpt_control(devinfo, insn)) {
2555                brw_inst uncompacted;
2556                uncompact_instruction(&c, &uncompacted,
2557                                      (brw_compact_inst *)insn);
2558 
2559                update_uip_jip(devinfo, &uncompacted, this_old_ip,
2560                               compacted_counts);
2561 
2562                bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2563                                                   &uncompacted);
2564                assert(ret); (void)ret;
2565             } else {
2566                update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2567             }
2568          } else if (devinfo->ver == 6) {
2569             assert(!brw_inst_cmpt_control(devinfo, insn));
2570 
2571             /* Jump Count is in units of compacted instructions on Gfx6. */
2572             int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
2573 
2574             int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2575             int target_compacted_count = compacted_counts[target_old_ip];
2576             jump_count_compacted -= (target_compacted_count - this_compacted_count);
2577             brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2578          } else {
2579             update_gfx4_jump_count(devinfo, insn, this_old_ip,
2580                                    compacted_counts);
2581          }
2582          break;
2583 
2584       case BRW_OPCODE_ADD:
2585          /* Add instructions modifying the IP register use an immediate src1,
2586           * and Gens that use this cannot compact instructions with immediate
2587           * operands.
2588           */
2589          if (brw_inst_cmpt_control(devinfo, insn))
2590             break;
2591 
2592          if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2593              brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2594             assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2595 
2596             int shift = 3;
2597             int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2598 
2599             int target_old_ip = this_old_ip + (jump_compacted / 2);
2600             int target_compacted_count = compacted_counts[target_old_ip];
2601             jump_compacted -= (target_compacted_count - this_compacted_count);
2602             brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2603          }
2604          break;
2605 
2606       default:
2607          break;
2608       }
2609    }
2610 
2611    /* p->nr_insn is counting the number of uncompacted instructions still, so
2612     * divide.  We do want to be sure there's a valid instruction in any
2613     * alignment padding, so that the next compression pass (for the FS 8/16
2614     * compile passes) parses correctly.
2615     */
2616    if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2617       brw_compact_inst *align = store + offset;
2618       memset(align, 0, sizeof(*align));
2619       brw_compact_inst_set_hw_opcode(
2620          devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));
2621       brw_compact_inst_set_cmpt_control(devinfo, align, true);
2622       p->next_insn_offset += sizeof(brw_compact_inst);
2623    }
2624    p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2625 
2626    for (int i = 0; i < p->num_relocs; i++) {
2627       if (p->relocs[i].offset < (uint32_t)start_offset)
2628          continue;
2629 
2630       assert(p->relocs[i].offset % 16 == 0);
2631       unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2632       p->relocs[i].offset -= compacted_counts[idx] * 8;
2633    }
2634 
2635    /* Update the instruction offsets for each group. */
2636    if (disasm) {
2637       int offset = 0;
2638 
2639       foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2640          while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2641                 sizeof(brw_inst) != group->offset) {
2642             assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2643                    sizeof(brw_inst) < group->offset);
2644             offset = next_offset(devinfo, store, offset);
2645          }
2646 
2647          group->offset = start_offset + offset;
2648 
2649          offset = next_offset(devinfo, store, offset);
2650       }
2651    }
2652 }
2653