• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012-2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file elk_eu_compact.c
25  *
26  * Instruction compaction is a feature of G45 and newer hardware that allows
27  * for a smaller instruction encoding.
28  *
29  * The instruction cache is on the order of 32KB, and many programs generate
30  * far more instructions than that.  The instruction cache is built to barely
31  * keep up with instruction dispatch ability in cache hit cases -- L1
32  * instruction cache misses that still hit in the next level could limit
33  * throughput by around 50%.
34  *
35  * The idea of instruction compaction is that most instructions use a tiny
36  * subset of the GPU functionality, so we can encode what would be a 16 byte
37  * instruction in 8 bytes using some lookup tables for various fields.
38  *
39  *
40  * Instruction compaction capabilities vary subtly by generation.
41  *
42  * G45's support for instruction compaction is very limited. Jump counts on
43  * this generation are in units of 16-byte uncompacted instructions. As such,
44  * all jump targets must be 16-byte aligned. Also, all instructions must be
45  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46  * A G45-only instruction, NENOP, must be used to provide padding to align
47  * uncompacted instructions.
48  *
49  * Gfx5 removes these restrictions and changes jump counts to be in units of
50  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52  *
53  * Gfx6 adds the ability to compact instructions with a limited range of
54  * immediate values. Compactable immediates have 12 unrestricted bits, and a
55  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56  * value of DW3 in the uncompacted instruction word.
57  *
58  * On Gfx7 we can compact some control flow instructions with a small positive
59  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60  * control flow instructions with UIP cannot be compacted, because of the
61  * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62  * since the jump count field is not in DW3.
63  *
64  *    break    JIP/UIP
65  *    cont     JIP/UIP
66  *    halt     JIP/UIP
67  *    if       JIP/UIP
68  *    else     JIP (plus UIP on BDW+)
69  *    endif    JIP
70  *    while    JIP (must be negative)
71  *
72  * Gen 8 adds support for compacting 3-src instructions.
73  *
74  * Gfx12 reduces the number of bits that available to compacted immediates from
75  * 13 to 12, but improves the compaction of floating-point immediates by
76  * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77  * three most significant bits of the mantissa), rather than the lowest bits of
78  * the mantissa.
79  */
80 
81 #include "elk_eu.h"
82 #include "elk_disasm.h"
83 #include "elk_shader.h"
84 #include "elk_disasm_info.h"
85 #include "dev/intel_debug.h"
86 
87 static const uint32_t g45_control_index_table[32] = {
88    0b00000000000000000,
89    0b01000000000000000,
90    0b00110000000000000,
91    0b00000000000000010,
92    0b00100000000000000,
93    0b00010000000000000,
94    0b01000000000100000,
95    0b01000000100000000,
96    0b01010000000100000,
97    0b00000000100000010,
98    0b11000000000000000,
99    0b00001000100000010,
100    0b01001000100000000,
101    0b00000000100000000,
102    0b11000000000100000,
103    0b00001000100000000,
104    0b10110000000000000,
105    0b11010000000100000,
106    0b00110000100000000,
107    0b00100000100000000,
108    0b01000000000001000,
109    0b01000000000000100,
110    0b00111100000000000,
111    0b00101011000000000,
112    0b00110000000010000,
113    0b00010000100000000,
114    0b01000000000100100,
115    0b01000000000101000,
116    0b00110000000000110,
117    0b00000000000001010,
118    0b01010000000101000,
119    0b01010000000100100,
120 };
121 
122 static const uint32_t g45_datatype_table[32] = {
123    0b001000000000100001,
124    0b001011010110101101,
125    0b001000001000110001,
126    0b001111011110111101,
127    0b001011010110101100,
128    0b001000000110101101,
129    0b001000000000100000,
130    0b010100010110110001,
131    0b001100011000101101,
132    0b001000000000100010,
133    0b001000001000110110,
134    0b010000001000110001,
135    0b001000001000110010,
136    0b011000001000110010,
137    0b001111011110111100,
138    0b001000000100101000,
139    0b010100011000110001,
140    0b001010010100101001,
141    0b001000001000101001,
142    0b010000001000110110,
143    0b101000001000110001,
144    0b001011011000101101,
145    0b001000000100001001,
146    0b001011011000101100,
147    0b110100011000110001,
148    0b001000001110111101,
149    0b110000001000110001,
150    0b011000000100101010,
151    0b101000001000101001,
152    0b001011010110001100,
153    0b001000000110100001,
154    0b001010010100001000,
155 };
156 
157 static const uint16_t g45_subreg_table[32] = {
158    0b000000000000000,
159    0b000000010000000,
160    0b000001000000000,
161    0b000100000000000,
162    0b000000000100000,
163    0b100000000000000,
164    0b000000000010000,
165    0b001100000000000,
166    0b001010000000000,
167    0b000000100000000,
168    0b001000000000000,
169    0b000000000001000,
170    0b000000001000000,
171    0b000000000000001,
172    0b000010000000000,
173    0b000000010100000,
174    0b000000000000111,
175    0b000001000100000,
176    0b011000000000000,
177    0b000000110000000,
178    0b000000000000010,
179    0b000000000000100,
180    0b000000001100000,
181    0b000100000000010,
182    0b001110011000110,
183    0b001110100001000,
184    0b000110011000110,
185    0b000001000011000,
186    0b000110010000100,
187    0b001100000000110,
188    0b000000010000110,
189    0b000001000110000,
190 };
191 
192 static const uint16_t g45_src_index_table[32] = {
193    0b000000000000,
194    0b010001101000,
195    0b010110001000,
196    0b011010010000,
197    0b001101001000,
198    0b010110001010,
199    0b010101110000,
200    0b011001111000,
201    0b001000101000,
202    0b000000101000,
203    0b010001010000,
204    0b111101101100,
205    0b010110001100,
206    0b010001101100,
207    0b011010010100,
208    0b010001001100,
209    0b001100101000,
210    0b000000000010,
211    0b111101001100,
212    0b011001101000,
213    0b010101001000,
214    0b000000000100,
215    0b000000101100,
216    0b010001101010,
217    0b000000111000,
218    0b010101011000,
219    0b000100100000,
220    0b010110000000,
221    0b010000000100,
222    0b010000111000,
223    0b000101100000,
224    0b111101110100,
225 };
226 
227 static const uint32_t gfx6_control_index_table[32] = {
228    0b00000000000000000,
229    0b01000000000000000,
230    0b00110000000000000,
231    0b00000000100000000,
232    0b00010000000000000,
233    0b00001000100000000,
234    0b00000000100000010,
235    0b00000000000000010,
236    0b01000000100000000,
237    0b01010000000000000,
238    0b10110000000000000,
239    0b00100000000000000,
240    0b11010000000000000,
241    0b11000000000000000,
242    0b01001000100000000,
243    0b01000000000001000,
244    0b01000000000000100,
245    0b00000000000001000,
246    0b00000000000000100,
247    0b00111000100000000,
248    0b00001000100000010,
249    0b00110000100000000,
250    0b00110000000000001,
251    0b00100000000000001,
252    0b00110000000000010,
253    0b00110000000000101,
254    0b00110000000001001,
255    0b00110000000010000,
256    0b00110000000000011,
257    0b00110000000000100,
258    0b00110000100001000,
259    0b00100000000001001,
260 };
261 
262 static const uint32_t gfx6_datatype_table[32] = {
263    0b001001110000000000,
264    0b001000110000100000,
265    0b001001110000000001,
266    0b001000000001100000,
267    0b001010110100101001,
268    0b001000000110101101,
269    0b001100011000101100,
270    0b001011110110101101,
271    0b001000000111101100,
272    0b001000000001100001,
273    0b001000110010100101,
274    0b001000000001000001,
275    0b001000001000110001,
276    0b001000001000101001,
277    0b001000000000100000,
278    0b001000001000110010,
279    0b001010010100101001,
280    0b001011010010100101,
281    0b001000000110100101,
282    0b001100011000101001,
283    0b001011011000101100,
284    0b001011010110100101,
285    0b001011110110100101,
286    0b001111011110111101,
287    0b001111011110111100,
288    0b001111011110111101,
289    0b001111011110011101,
290    0b001111011110111110,
291    0b001000000000100001,
292    0b001000000000100010,
293    0b001001111111011101,
294    0b001000001110111110,
295 };
296 
297 static const uint16_t gfx6_subreg_table[32] = {
298    0b000000000000000,
299    0b000000000000100,
300    0b000000110000000,
301    0b111000000000000,
302    0b011110000001000,
303    0b000010000000000,
304    0b000000000010000,
305    0b000110000001100,
306    0b001000000000000,
307    0b000001000000000,
308    0b000001010010100,
309    0b000000001010110,
310    0b010000000000000,
311    0b110000000000000,
312    0b000100000000000,
313    0b000000010000000,
314    0b000000000001000,
315    0b100000000000000,
316    0b000001010000000,
317    0b001010000000000,
318    0b001100000000000,
319    0b000000001010100,
320    0b101101010010100,
321    0b010100000000000,
322    0b000000010001111,
323    0b011000000000000,
324    0b111110000000000,
325    0b101000000000000,
326    0b000000000001111,
327    0b000100010001111,
328    0b001000010001111,
329    0b000110000000000,
330 };
331 
332 static const uint16_t gfx6_src_index_table[32] = {
333    0b000000000000,
334    0b010110001000,
335    0b010001101000,
336    0b001000101000,
337    0b011010010000,
338    0b000100100000,
339    0b010001101100,
340    0b010101110000,
341    0b011001111000,
342    0b001100101000,
343    0b010110001100,
344    0b001000100000,
345    0b010110001010,
346    0b000000000010,
347    0b010101010000,
348    0b010101101000,
349    0b111101001100,
350    0b111100101100,
351    0b011001110000,
352    0b010110001001,
353    0b010101011000,
354    0b001101001000,
355    0b010000101100,
356    0b010000000000,
357    0b001101110000,
358    0b001100010000,
359    0b001100000000,
360    0b010001101010,
361    0b001101111000,
362    0b000001110000,
363    0b001100100000,
364    0b001101010000,
365 };
366 
367 static const uint32_t gfx7_control_index_table[32] = {
368    0b0000000000000000010,
369    0b0000100000000000000,
370    0b0000100000000000001,
371    0b0000100000000000010,
372    0b0000100000000000011,
373    0b0000100000000000100,
374    0b0000100000000000101,
375    0b0000100000000000111,
376    0b0000100000000001000,
377    0b0000100000000001001,
378    0b0000100000000001101,
379    0b0000110000000000000,
380    0b0000110000000000001,
381    0b0000110000000000010,
382    0b0000110000000000011,
383    0b0000110000000000100,
384    0b0000110000000000101,
385    0b0000110000000000111,
386    0b0000110000000001001,
387    0b0000110000000001101,
388    0b0000110000000010000,
389    0b0000110000100000000,
390    0b0001000000000000000,
391    0b0001000000000000010,
392    0b0001000000000000100,
393    0b0001000000100000000,
394    0b0010110000000000000,
395    0b0010110000000010000,
396    0b0011000000000000000,
397    0b0011000000100000000,
398    0b0101000000000000000,
399    0b0101000000100000000,
400 };
401 
402 static const uint32_t gfx7_datatype_table[32] = {
403    0b001000000000000001,
404    0b001000000000100000,
405    0b001000000000100001,
406    0b001000000001100001,
407    0b001000000010111101,
408    0b001000001011111101,
409    0b001000001110100001,
410    0b001000001110100101,
411    0b001000001110111101,
412    0b001000010000100001,
413    0b001000110000100000,
414    0b001000110000100001,
415    0b001001010010100101,
416    0b001001110010100100,
417    0b001001110010100101,
418    0b001111001110111101,
419    0b001111011110011101,
420    0b001111011110111100,
421    0b001111011110111101,
422    0b001111111110111100,
423    0b000000001000001100,
424    0b001000000000111101,
425    0b001000000010100101,
426    0b001000010000100000,
427    0b001001010010100100,
428    0b001001110010000100,
429    0b001010010100001001,
430    0b001101111110111101,
431    0b001111111110111101,
432    0b001011110110101100,
433    0b001010010100101000,
434    0b001010110100101000,
435 };
436 
437 static const uint16_t gfx7_subreg_table[32] = {
438    0b000000000000000,
439    0b000000000000001,
440    0b000000000001000,
441    0b000000000001111,
442    0b000000000010000,
443    0b000000010000000,
444    0b000000100000000,
445    0b000000110000000,
446    0b000001000000000,
447    0b000001000010000,
448    0b000010100000000,
449    0b001000000000000,
450    0b001000000000001,
451    0b001000010000001,
452    0b001000010000010,
453    0b001000010000011,
454    0b001000010000100,
455    0b001000010000111,
456    0b001000010001000,
457    0b001000010001110,
458    0b001000010001111,
459    0b001000110000000,
460    0b001000111101000,
461    0b010000000000000,
462    0b010000110000000,
463    0b011000000000000,
464    0b011110010000111,
465    0b100000000000000,
466    0b101000000000000,
467    0b110000000000000,
468    0b111000000000000,
469    0b111000000011100,
470 };
471 
472 static const uint16_t gfx7_src_index_table[32] = {
473    0b000000000000,
474    0b000000000010,
475    0b000000010000,
476    0b000000010010,
477    0b000000011000,
478    0b000000100000,
479    0b000000101000,
480    0b000001001000,
481    0b000001010000,
482    0b000001110000,
483    0b000001111000,
484    0b001100000000,
485    0b001100000010,
486    0b001100001000,
487    0b001100010000,
488    0b001100010010,
489    0b001100100000,
490    0b001100101000,
491    0b001100111000,
492    0b001101000000,
493    0b001101000010,
494    0b001101001000,
495    0b001101010000,
496    0b001101100000,
497    0b001101101000,
498    0b001101110000,
499    0b001101110001,
500    0b001101111000,
501    0b010001101000,
502    0b010001101001,
503    0b010001101010,
504    0b010110001000,
505 };
506 
507 static const uint32_t gfx8_control_index_table[32] = {
508    0b0000000000000000010,
509    0b0000100000000000000,
510    0b0000100000000000001,
511    0b0000100000000000010,
512    0b0000100000000000011,
513    0b0000100000000000100,
514    0b0000100000000000101,
515    0b0000100000000000111,
516    0b0000100000000001000,
517    0b0000100000000001001,
518    0b0000100000000001101,
519    0b0000110000000000000,
520    0b0000110000000000001,
521    0b0000110000000000010,
522    0b0000110000000000011,
523    0b0000110000000000100,
524    0b0000110000000000101,
525    0b0000110000000000111,
526    0b0000110000000001001,
527    0b0000110000000001101,
528    0b0000110000000010000,
529    0b0000110000100000000,
530    0b0001000000000000000,
531    0b0001000000000000010,
532    0b0001000000000000100,
533    0b0001000000100000000,
534    0b0010110000000000000,
535    0b0010110000000010000,
536    0b0011000000000000000,
537    0b0011000000100000000,
538    0b0101000000000000000,
539    0b0101000000100000000,
540 };
541 
542 static const uint32_t gfx8_datatype_table[32] = {
543    0b001000000000000000001,
544    0b001000000000001000000,
545    0b001000000000001000001,
546    0b001000000000011000001,
547    0b001000000000101011101,
548    0b001000000010111011101,
549    0b001000000011101000001,
550    0b001000000011101000101,
551    0b001000000011101011101,
552    0b001000001000001000001,
553    0b001000011000001000000,
554    0b001000011000001000001,
555    0b001000101000101000101,
556    0b001000111000101000100,
557    0b001000111000101000101,
558    0b001011100011101011101,
559    0b001011101011100011101,
560    0b001011101011101011100,
561    0b001011101011101011101,
562    0b001011111011101011100,
563    0b000000000010000001100,
564    0b001000000000001011101,
565    0b001000000000101000101,
566    0b001000001000001000000,
567    0b001000101000101000100,
568    0b001000111000100000100,
569    0b001001001001000001001,
570    0b001010111011101011101,
571    0b001011111011101011101,
572    0b001001111001101001100,
573    0b001001001001001001000,
574    0b001001011001001001000,
575 };
576 
577 static const uint16_t gfx8_subreg_table[32] = {
578    0b000000000000000,
579    0b000000000000001,
580    0b000000000001000,
581    0b000000000001111,
582    0b000000000010000,
583    0b000000010000000,
584    0b000000100000000,
585    0b000000110000000,
586    0b000001000000000,
587    0b000001000010000,
588    0b000001010000000,
589    0b001000000000000,
590    0b001000000000001,
591    0b001000010000001,
592    0b001000010000010,
593    0b001000010000011,
594    0b001000010000100,
595    0b001000010000111,
596    0b001000010001000,
597    0b001000010001110,
598    0b001000010001111,
599    0b001000110000000,
600    0b001000111101000,
601    0b010000000000000,
602    0b010000110000000,
603    0b011000000000000,
604    0b011110010000111,
605    0b100000000000000,
606    0b101000000000000,
607    0b110000000000000,
608    0b111000000000000,
609    0b111000000011100,
610 };
611 
612 static const uint16_t gfx8_src_index_table[32] = {
613    0b000000000000,
614    0b000000000010,
615    0b000000010000,
616    0b000000010010,
617    0b000000011000,
618    0b000000100000,
619    0b000000101000,
620    0b000001001000,
621    0b000001010000,
622    0b000001110000,
623    0b000001111000,
624    0b001100000000,
625    0b001100000010,
626    0b001100001000,
627    0b001100010000,
628    0b001100010010,
629    0b001100100000,
630    0b001100101000,
631    0b001100111000,
632    0b001101000000,
633    0b001101000010,
634    0b001101001000,
635    0b001101010000,
636    0b001101100000,
637    0b001101101000,
638    0b001101110000,
639    0b001101110001,
640    0b001101111000,
641    0b010001101000,
642    0b010001101001,
643    0b010001101010,
644    0b010110001000,
645 };
646 
647 static const uint32_t gfx11_datatype_table[32] = {
648    0b001000000000000000001,
649    0b001000000000001000000,
650    0b001000000000001000001,
651    0b001000000000011000001,
652    0b001000000000101100101,
653    0b001000000101111100101,
654    0b001000000100101000001,
655    0b001000000100101000101,
656    0b001000000100101100101,
657    0b001000001000001000001,
658    0b001000011000001000000,
659    0b001000011000001000001,
660    0b001000101000101000101,
661    0b001000111000101000100,
662    0b001000111000101000101,
663    0b001100100100101100101,
664    0b001100101100100100101,
665    0b001100101100101100100,
666    0b001100101100101100101,
667    0b001100111100101100100,
668    0b000000000010000001100,
669    0b001000000000001100101,
670    0b001000000000101000101,
671    0b001000001000001000000,
672    0b001000101000101000100,
673    0b001000111000100000100,
674    0b001001001001000001001,
675    0b001101111100101100101,
676    0b001100111100101100101,
677    0b001001111001101001100,
678    0b001001001001001001000,
679    0b001001011001001001000,
680 };
681 
682 static const uint32_t gfx12_control_index_table[32] = {
683    0b000000000000000000100, /* 	       (16|M0)                            */
684    0b000000000000000000011, /* 	       (8|M0)                             */
685    0b000000010000000000000, /* 	(W)    (1|M0)                             */
686    0b000000010000000000100, /* 	(W)    (16|M0)                            */
687    0b000000010000000000011, /* 	(W)    (8|M0)                             */
688    0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
689    0b000000000000000100100, /* 	       (16|M16)                           */
690    0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
691    0b000000000000000000000, /* 	       (1|M0)                             */
692    0b000010000000000000100, /* 	       (16|M0)           (sat)            */
693    0b000000000000000010011, /* 	       (8|M8)                             */
694    0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
695    0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
696    0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
697    0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
698    0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
699    0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
700    0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
701    0b000000010000000000001, /* 	(W)    (2|M0)                             */
702    0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
703    0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
704    0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
705    0b000000000000000100011, /* 	       (8|M16)                            */
706    0b000000000000000110011, /* 	       (8|M24)                            */
707    0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
708    0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
709    0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
710    0b000010000000000000011, /* 	       (8|M0)            (sat)            */
711    0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
712    0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
713    0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
714    0b000000010000000100100, /* 	(W)    (16|M16)                           */
715 };
716 
717 static const uint32_t gfx12_datatype_table[32] = {
718    0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
719    0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
720    0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
721    0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
722    0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
723    0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
724    0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
725    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
726    0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
727    0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
728    0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
729    0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
730    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
731    0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
732    0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
733    0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
734    0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
735    0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
736    0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
737    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
738    0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
739    0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
740    0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
741    0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
742    0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
743    0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
744    0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
745    0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
746    0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
747    0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
748    0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
749    0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
750 };
751 
752 static const uint16_t gfx12_subreg_table[32] = {
753    0b000000000000000, /* .0  .0  .0  */
754    0b100000000000000, /* .0  .0  .16 */
755    0b001000000000000, /* .0  .0  .4  */
756    0b011000000000000, /* .0  .0  .12 */
757    0b000000010000000, /* .0  .4  .0  */
758    0b010000000000000, /* .0  .0  .8  */
759    0b101000000000000, /* .0  .0  .20 */
760    0b000000000001000, /* .8  .0  .0  */
761    0b000000100000000, /* .0  .8  .0  */
762    0b110000000000000, /* .0  .0  .24 */
763    0b111000000000000, /* .0  .0  .28 */
764    0b000001000000000, /* .0  .16 .0  */
765    0b000000000000100, /* .4  .0  .0  */
766    0b000001100000000, /* .0  .24 .0  */
767    0b000001010000000, /* .0  .20 .0  */
768    0b000000110000000, /* .0  .12 .0  */
769    0b000001110000000, /* .0  .28 .0  */
770    0b000000000011100, /* .28 .0  .0  */
771    0b000000000010000, /* .16 .0  .0  */
772    0b000000000001100, /* .12 .0  .0  */
773    0b000000000011000, /* .24 .0  .0  */
774    0b000000000010100, /* .20 .0  .0  */
775    0b000000000000010, /* .2  .0  .0  */
776    0b000000101000000, /* .0  .10 .0  */
777    0b000000001000000, /* .0  .2  .0  */
778    0b000000010000100, /* .4  .4  .0  */
779    0b000000001011100, /* .28 .2  .0  */
780    0b000000001000010, /* .2  .2  .0  */
781    0b000000110001100, /* .12 .12 .0  */
782    0b000000000100000, /* .0  .1  .0  */
783    0b000000001100000, /* .0  .3  .0  */
784    0b110001100000000, /* .0  .24 .24 */
785 };
786 
787 static const uint16_t gfx12_src0_index_table[16] = {
788    0b010001100100, /*       r<8;8,1>  */
789    0b000000000000, /*       r<0;1,0>  */
790    0b010001100110, /*      -r<8;8,1>  */
791    0b010001100101, /*  (abs)r<8;8,1>  */
792    0b000000000010, /*      -r<0;1,0>  */
793    0b001000000000, /*       r<2;1,0>  */
794    0b001001000000, /*       r<2;4,0>  */
795    0b001101000000, /*       r<4;4,0>  */
796    0b001000100100, /*       r<2;2,1>  */
797    0b001100000000, /*       r<4;1,0>  */
798    0b001000100110, /*      -r<2;2,1>  */
799    0b001101000100, /*       r<4;4,1>  */
800    0b010001100111, /* -(abs)r<8;8,1>  */
801    0b000100000000, /*       r<1;1,0>  */
802    0b000000000001, /*  (abs)r<0;1,0>  */
803    0b111100010000, /*       r[a]<1,0> */
804 };
805 
806 static const uint16_t gfx12_src1_index_table[16] = {
807    0b000100011001, /*       r<8;8,1> */
808    0b000000000000, /*       r<0;1,0> */
809    0b100100011001, /*      -r<8;8,1> */
810    0b100000000000, /*      -r<0;1,0> */
811    0b010100011001, /*  (abs)r<8;8,1> */
812    0b100011010000, /*      -r<4;4,0> */
813    0b000010000000, /*       r<2;1,0> */
814    0b000010001001, /*       r<2;2,1> */
815    0b100010001001, /*      -r<2;2,1> */
816    0b000011010000, /*       r<4;4,0> */
817    0b000011010001, /*       r<4;4,1> */
818    0b000011000000, /*       r<4;1,0> */
819    0b110100011001, /* -(abs)r<8;8,1> */
820    0b010000000000, /*  (abs)r<0;1,0> */
821    0b110000000000, /* -(abs)r<0;1,0> */
822    0b100011010001, /*      -r<4;4,1> */
823 };
824 
825 static const uint16_t xehp_src0_index_table[16] = {
826    0b000100000000, /*       r<1;1,0>  */
827    0b000000000000, /*       r<0;1,0>  */
828    0b000100000010, /*      -r<1;1,0>  */
829    0b000100000001, /*  (abs)r<1;1,0>  */
830    0b000000000010, /*      -r<0;1,0>  */
831    0b001000000000, /*       r<2;1,0>  */
832    0b001001000000, /*       r<2;4,0>  */
833    0b001101000000, /*       r<4;4,0>  */
834    0b001100000000, /*       r<4;1,0>  */
835    0b000100000011, /* -(abs)r<1;1,0>  */
836    0b000000000001, /*  (abs)r<0;1,0>  */
837    0b111100010000, /*       r[a]<1,0> */
838    0b010001100000, /*       r<8;8,0>  */
839    0b000101000000, /*       r<1;4,0>  */
840    0b010001001000, /*       r<8;4,2>  */
841    0b001000000010, /*      -r<2;1,0>  */
842 };
843 
844 static const uint16_t xehp_src1_index_table[16] = {
845    0b000001000000, /*       r<1;1,0>    */
846    0b000000000000, /*       r<0;1,0>    */
847    0b100001000000, /*      -r<1;1,0>    */
848    0b100000000000, /*      -r<0;1,0>    */
849    0b010001000000, /*  (abs)r<1;1,0>    */
850    0b100011010000, /*      -r<4;4,0>    */
851    0b000010000000, /*       r<2;1,0>    */
852    0b000011010000, /*       r<4;4,0>    */
853    0b000011000000, /*       r<4;1,0>    */
854    0b110001000000, /* -(abs)r<1;1,0>    */
855    0b010000000000, /*  (abs)r<0;1,0>    */
856    0b110000000000, /* -(abs)r<0;1,0>    */
857    0b000100011000, /*       r<8;8,0>    */
858    0b100010000000, /*      -r<2;1,0>    */
859    0b100000001001, /*      -r<0;2,1>    */
860    0b100001000100, /*      -r[a]<1;1,0> */
861 };
862 
863 static const uint32_t xe2_control_index_table[32] = {
864    0b000000000000000100, /* (16|M0)               */
865    0b000000100000000000, /* (W) (1|M0)            */
866    0b000000000010000100, /* (16|M16)              */
867    0b000000000000000000, /* (1|M0)                */
868    0b000000100000000100, /* (W) (16|M0)           */
869    0b010000000000000100, /* (16|M0) (.ge)f0.0     */
870    0b010100000000000100, /* (16|M0) (.lt)f0.0     */
871    0b000000100000000010, /* (W) (4|M0)            */
872    0b000000000000000101, /* (32|M0)               */
873    0b000000100000000011, /* (W) (8|M0)            */
874    0b001100100000000000, /* (W) (1|M0) (.gt)f0.0  */
875    0b000010000000000100, /* (16|M0) (sat)         */
876    0b000100000000000100, /* (16|M0) (.eq)f0.0     */
877    0b000000100000000001, /* (W) (2|M0)            */
878    0b001100000000000100, /* (16|M0) (.gt)f0.0     */
879    0b000100100000000000, /* (W) (1|M0) (.eq)f0.0  */
880    0b010100100000000010, /* (W) (4|M0) (.lt)f0.0  */
881    0b010000100000000000, /* (W) (1|M0) (.ge)f0.0  */
882    0b010000100000000010, /* (W) (4|M0) (.ge)f0.0  */
883    0b010100100000000000, /* (W) (1|M0) (.lt)f0.0  */
884    0b001000000000000100, /* (16|M0) (.ne)f0.0     */
885    0b000000000100100100, /* (f2.0) (16|M0)        */
886    0b010100100000000011, /* (W) (8|M0) (.lt)f0.0  */
887    0b000000000100011100, /* (f1.1) (16|M0)        */
888    0b010000100000000011, /* (W) (8|M0) (.ge)f0.0  */
889    0b000000000100001100, /* (f0.1) (16|M0)        */
890    0b000000000100010100, /* (f1.0) (16|M0)        */
891    0b000000000100110100, /* (f3.0) (16|M0)        */
892    0b000000000100111100, /* (f3.1) (16|M0)        */
893    0b000000000100101100, /* (f2.1) (16|M0)        */
894    0b000000000100000100, /* (f0.0) (16|M0)        */
895    0b010100000000100100, /* (16|M0) (.lt)f2.0     */
896 };
897 
898 static const uint32_t xe2_datatype_table[32] = {
899    0b11010110100101010100, /* grf<1>:f grf:f grf:f    */
900    0b11010100100101010100, /* arf<1>:f grf:f grf:f    */
901    0b00000110100101010100, /* grf<1>:f grf:f arf:ub   */
902    0b00000110100001000100, /* grf<1>:ud grf:ud arf:ub */
903    0b01010110110101010100, /* grf<1>:f grf:f imm:f    */
904    0b11010010100101010100, /* grf<1>:f arf:f grf:f    */
905    0b10111110100011101110, /* grf<1>:q grf:q grf:q    */
906    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
907    0b01010110100101010100, /* grf<1>:f grf:f arf:f    */
908    0b00000010101001000100, /* grf<1>:ud imm:ud        */
909    0b00101110110011001100, /* grf<1>:d grf:d imm:w    */
910    0b11010000100101010100, /* arf<1>:f arf:f grf:f    */
911    0b01010100100101010100, /* arf<1>:f grf:f arf:f    */
912    0b01010100110101010100, /* arf<1>:f grf:f imm:f    */
913    0b00000010101101010100, /* grf<1>:f imm:f          */
914    0b00000110100011001100, /* grf<1>:d grf:d arf:ub   */
915    0b00101110110011101110, /* grf<1>:q grf:q imm:w    */
916    0b00000110100001100110, /* grf<1>:uq grf:uq arf:ub */
917    0b01010000100101010100, /* arf<1>:f arf:f arf:f    */
918    0b10110110100011001100, /* grf<1>:d grf:d grf:d    */
919    0b01010010100101010100, /* grf<1>:f arf:f arf:f    */
920    0b00000111000001000100, /* grf<2>:ud grf:ud arf:ub */
921    0b00110110110011001110, /* grf<1>:q grf:d imm:d    */
922    0b00101100110011001100, /* arf<1>:d grf:d imm:w    */
923    0b11011110100101110110, /* grf<1>:df grf:df grf:df */
924    0b01010010110101010100, /* grf<1>:f arf:f imm:f    */
925    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
926    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
927    0b00001110110001000100, /* grf<1>:ud grf:ud imm:uw */
928    0b00000010101010101100, /* grf<1>:d imm:w          */
929    0b01010000110101010100, /* arf<1>:f arf:f imm:f    */
930    0b00000100100001000100, /* arf<1>:ud grf:ud arf:ub */
931 };
932 
933 static const uint16_t xe2_subreg_table[16] = {
934    0b000000000000, /* .0 .0  */
935    0b000010000000, /* .0 .4  */
936    0b000000000100, /* .4 .0  */
937    0b010000000000, /* .0 .32 */
938    0b001000000000, /* .0 .16 */
939    0b000000001000, /* .8 .0  */
940    0b000100000000, /* .0 .8  */
941    0b010100000000, /* .0 .40 */
942    0b011000000000, /* .0 .48 */
943    0b000110000000, /* .0 .12 */
944    0b000000010000, /* .16 .0 */
945    0b011010000000, /* .0 .52 */
946    0b001100000000, /* .0 .24 */
947    0b011100000000, /* .0 .56 */
948    0b010110000000, /* .0 .44 */
949    0b010010000000, /* .0 .36 */
950 };
951 
952 static const uint16_t xe2_src0_index_table[8] = {
953    0b00100000000, /* r<1;1,0>      */
954    0b00000000000, /* r<0;1,0>      */
955    0b01000000000, /* r<2;1,0>      */
956    0b00100000010, /* -r<1;1,0>     */
957    0b01100000000, /* r<4;1,0>      */
958    0b00100000001, /* (abs)r<1;1,0> */
959    0b00000000010, /* -r<0;1,0>     */
960    0b01001000000, /* r<2;4,0>      */
961 };
962 
963 static const uint16_t xe2_src1_index_table[16] = {
964    0b0000100000000000, /* r<1;1,0>.0  */
965    0b0000000000000000, /* r<0;1,0>.0  */
966    0b1000100000000000, /* -r<1;1,0>.0 */
967    0b0000000000010000, /* r<0;1,0>.8  */
968    0b0000000000001000, /* r<0;1,0>.4  */
969    0b0000000000011000, /* r<0;1,0>.12 */
970    0b0000000001010000, /* r<0;1,0>.40 */
971    0b0000000001000000, /* r<0;1,0>.32 */
972    0b0000000000100000, /* r<0;1,0>.16 */
973    0b0000000001111000, /* r<0;1,0>.60 */
974    0b0000000000111000, /* r<0;1,0>.28 */
975    0b0000000000101000, /* r<0;1,0>.20 */
976    0b0000000001011000, /* r<0;1,0>.44 */
977    0b0000000001001000, /* r<0;1,0>.36 */
978    0b0000000001110000, /* r<0;1,0>.56 */
979    0b0000000000110000, /* r<0;1,0>.24 */
980 };
981 
982 /* This is actually the control index table for Cherryview (26 bits), but the
983  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
984  * the start.
985  *
986  * The low 24 bits have the same mappings on both hardware.
987  */
988 static const uint32_t gfx8_3src_control_index_table[4] = {
989    0b00100000000110000000000001,
990    0b00000000000110000000000001,
991    0b00000000001000000000000001,
992    0b00000000001000000000100001,
993 };
994 
995 /* This is actually the control index table for Cherryview (49 bits), but the
996  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
997  * at the start.
998  *
999  * The low 44 bits have the same mappings on both hardware, and since the high
1000  * three bits on Broadwell are zero, we can reuse Cherryview's table.
1001  */
1002 static const uint64_t gfx8_3src_source_index_table[4] = {
1003    0b0000001110010011100100111001000001111000000000000,
1004    0b0000001110010011100100111001000001111000000000010,
1005    0b0000001110010011100100111001000001111000000001000,
1006    0b0000001110010011100100111001000001111000000100000,
1007 };
1008 
1009 static const uint64_t gfx12_3src_control_index_table[32] = {
1010    0b000001001010010101000000000000000100, /*      (16|M0)       grf<1>:f   :f  :f  :f */
1011    0b000001001010010101000000000000000011, /*      (8|M0)        grf<1>:f   :f  :f  :f */
1012    0b000001001000010101000000000000000011, /*      (8|M0)        arf<1>:f   :f  :f  :f */
1013    0b000001001010010101000010000000000011, /* (W)  (8|M0)        grf<1>:f   :f  :f  :f */
1014    0b000001001000010101000010000000000011, /* (W)  (8|M0)        arf<1>:f   :f  :f  :f */
1015    0b000001001000010101000000000000010011, /*      (8|M8)        arf<1>:f   :f  :f  :f */
1016    0b000001001010010101000000000000010011, /*      (8|M8)        grf<1>:f   :f  :f  :f */
1017    0b000001001000010101000010000000010011, /* (W)  (8|M8)        arf<1>:f   :f  :f  :f */
1018    0b000001001010010101000010000000010011, /* (W)  (8|M8)        grf<1>:f   :f  :f  :f */
1019    0b000001001010010101000010000000000100, /* (W)  (16|M0)       grf<1>:f   :f  :f  :f */
1020    0b000001001000010101000000000000000100, /*      (16|M0)       arf<1>:f   :f  :f  :f */
1021    0b000001001010010101010000000000000100, /*      (16|M0)  (sat)grf<1>:f   :f  :f  :f */
1022    0b000001001010010101000000000000100100, /*      (16|M16)      grf<1>:f   :f  :f  :f */
1023    0b000001001000010101000010000000000100, /* (W)  (16|M0)       arf<1>:f   :f  :f  :f */
1024    0b000001001010010101000010000000000000, /* (W)  (1|M0)        grf<1>:f   :f  :f  :f */
1025    0b000001001010010101010000000000000011, /*      (8|M0)   (sat)grf<1>:f   :f  :f  :f */
1026    0b000001001000010101000010000000110011, /* (W)  (8|M24)       arf<1>:f   :f  :f  :f */
1027    0b000001001000010101000010000000100011, /* (W)  (8|M16)       arf<1>:f   :f  :f  :f */
1028    0b000001001010010101000010000000110011, /* (W)  (8|M24)       grf<1>:f   :f  :f  :f */
1029    0b000001001010010101000010000000100011, /* (W)  (8|M16)       grf<1>:f   :f  :f  :f */
1030    0b000001001000010101000000000000100011, /*      (8|M16)       arf<1>:f   :f  :f  :f */
1031    0b000001001000010101000000000000110011, /*      (8|M24)       arf<1>:f   :f  :f  :f */
1032    0b000001001010010101000000000000100011, /*      (8|M16)       grf<1>:f   :f  :f  :f */
1033    0b000001001010010101000000000000110011, /*      (8|M24)       grf<1>:f   :f  :f  :f */
1034    0b000001001000010101010000000000000100, /*      (16|M0)  (sat)arf<1>:f   :f  :f  :f */
1035    0b000001001010010101010010000000000100, /* (W)  (16|M0)  (sat)grf<1>:f   :f  :f  :f */
1036    0b000001001010010101000010000000100100, /* (W)  (16|M16)      grf<1>:f   :f  :f  :f */
1037    0b000001001010010001000010000000000000, /* (W)  (1|M0)        grf<1>:ud :ud :ud :ud */
1038    0b000001001000010101000000000000100100, /*      (16|M16)      arf<1>:f   :f  :f  :f */
1039    0b000001001010010101010000000000100100, /*      (16|M16) (sat)grf<1>:f   :f  :f  :f */
1040    0b000001001010010101000010000000000010, /* (W)  (4|M0)        grf<1>:f   :f  :f  :f */
1041    0b000001001000010101010000000000000011, /*      (8|M0)   (sat)arf<1>:f   :f  :f  :f */
1042 };
1043 
1044 static const uint64_t xehp_3src_control_index_table[32] = {
1045    0b0000010010100010101000000000000000100, /*          (16|M0)       grf<1>:f   :f   :f   :f          */
1046    0b0000010010100010101000000000000000011, /*          (8|M0)        grf<1>:f   :f   :f   :f          */
1047    0b0000010010000010101000000000000000011, /*          (8|M0)        arf<1>:f   :f   :f   :f          */
1048    0b0000010010100010101000010000000000011, /*     (W)  (8|M0)        grf<1>:f   :f   :f   :f          */
1049    0b0000010010000010101000010000000000011, /*     (W)  (8|M0)        arf<1>:f   :f   :f   :f          */
1050    0b0000010010000010101000000000000010011, /*          (8|M8)        arf<1>:f   :f   :f   :f          */
1051    0b0000010010100010101000000000000010011, /*          (8|M8)        grf<1>:f   :f   :f   :f          */
1052    0b0000010010000010101000010000000010011, /*     (W)  (8|M8)        arf<1>:f   :f   :f   :f          */
1053    0b0000010010100010101000010000000010011, /*     (W)  (8|M8)        grf<1>:f   :f   :f   :f          */
1054    0b0000010010100010101000010000000000100, /*     (W)  (16|M0)       grf<1>:f   :f   :f   :f          */
1055    0b0000010010000010101000000000000000100, /*          (16|M0)       arf<1>:f   :f   :f   :f          */
1056    0b0000010010100010101010000000000000100, /*          (16|M0)  (sat)grf<1>:f   :f   :f   :f          */
1057    0b0000010010100010101000000000000100100, /*          (16|M16)      grf<1>:f   :f   :f   :f          */
1058    0b0000010010000010101000010000000000100, /*     (W)  (16|M0)       arf<1>:f   :f   :f   :f          */
1059    0b0000010010100010101000010000000000000, /*     (W)  (1|M0)        grf<1>:f   :f   :f   :f          */
1060    0b0000010010100010101010000000000000011, /*          (8|M0)   (sat)grf<1>:f   :f   :f   :f          */
1061    0b0000010010000010101000010000000100011, /*     (W)  (8|M16)       arf<1>:f   :f   :f   :f          */
1062    0b0000010010000010101000010000000110011, /*     (W)  (8|M24)       arf<1>:f   :f   :f   :f          */
1063    0b0000010010100010101000010000000100011, /*     (W)  (8|M16)       grf<1>:f   :f   :f   :f          */
1064    0b0000010010100010101000010000000110011, /*     (W)  (8|M24)       grf<1>:f   :f   :f   :f          */
1065    0b0000010010000010101000000000000110011, /*          (8|M24)       arf<1>:f   :f   :f   :f          */
1066    0b0000010010000010101000000000000100011, /*          (8|M16)       arf<1>:f   :f   :f   :f          */
1067    0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b          */
1068    0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub {Atomic} */
1069    0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b {Atomic} */
1070    0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub {Atomic} */
1071    0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b          */
1072    0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub          */
1073    0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b {Atomic} */
1074    0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub          */
1075    0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf {Atomic} */
1076    0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf          */
1077 };
1078 
1079 static const uint64_t xe2_3src_control_index_table[16] = {
1080    0b0000010010100010101000000000000100, /* (16|M0) grf<1>:f :f :f :f      */
1081    0b0000010010000010101000000000000100, /* (16|M0) arf<1>:f :f :f :f      */
1082    0b0000010010100010101000100000000100, /* (W)(16|M0) grf<1>:f :f :f :f   */
1083    0b0000010010000010101000100000000100, /* (W)(16|M0) arf<1>:f :f :f :f   */
1084    0b0000011011100011101100000000000100, /* (16|M0) grf<1>:df :df :df :df  */
1085    0b0000011011100011101100000010000100, /* (16|M16) grf<1>:df :df :df :df */
1086    0b0000011011000011101100000000000100, /* (16|M0) arf<1>:df :df :df :df  */
1087    0b0000010010100010101000000000000101, /* (32|M0) grf<1>:f :f :f :f      */
1088    0b0000010010000010101000000000000101, /* (32|M0) arf<1>:f :f :f :f      */
1089    0b0000010010000010101010000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */
1090    0b0000010010100010101010000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */
1091    0b0000011011000011101100000010000100, /* (16|M16) arf<1>:df :df :df :df */
1092    0b0000010010100010101000100000000000, /* (W)(1|M0) grf<1>:f :f :f :f    */
1093    0b0000010010100010001000000000000100, /* (16|M0) grf<1>:ud :ud :ud :ud  */
1094    0b0000110110100110011000000000000101, /* (32|M0) grf<1>:d :d :d :d      */
1095    0b0000011011000011101100000000000011, /* (8|M0) arf<1>:df :df :df :df   */
1096 };
1097 
1098 static const uint64_t xe2_3src_dpas_control_index_table[16] = {
1099    0b0000000000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub Atomic */
1100    0b0000000100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :b Atomic */
1101    0b0000100000111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :ub Atomic */
1102    0b0000100100111110011001000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b Atomic */
1103    0b0000000000111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :ub :ub */
1104    0b0000100100111110011000000000000100, /* dpas.8x* (16|M0) grf:d :d :b :b */
1105    0b0000101101111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf Atomic */
1106    0b0000101101111101101001000000000100, /* dpas.8x* (16|M0) grf:f :bf :bf :bf Atomic */
1107    0b0000101101111010110101000000000100, /* dpas.8x* (16|M0) grf:bf :f :bf :bf Atomic */
1108    0b0000101101111101110101000000000100, /* dpas.8x* (16|M0) grf:bf :bf :bf :bf Atomic */
1109    0b0000101101111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :bf :bf */
1110    0b0000001001111010101001000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf Atomic */
1111    0b0000001001111001101001000000000100, /* dpas.8x* (16|M0) grf:f :hf :hf :hf Atomic */
1112    0b0000001001111010100101000000000100, /* dpas.8x* (16|M0) grf:hf :f :hf :hf Atomic */
1113    0b0000001001111001100101000000000100, /* dpas.8x* (16|M0) grf:hf :hf :hf :hf Atomic */
1114    0b0000001001111010101000000000000100, /* dpas.8x* (16|M0) grf:f :f :hf :hf */
1115 };
1116 
1117 static const uint32_t gfx12_3src_source_index_table[32] = {
1118    0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
1119    0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
1120    0b101101100001101000011, /*  grf<8;1>   grf<8;1>  grf<1> */
1121    0b100101100001101000011, /*  grf<8;1>   grf<8;1>  grf<0> */
1122    0b101100000000101000011, /*  grf<8;1>   grf<0;0>  grf<1> */
1123    0b101101100001101001011, /* -grf<8;1>   grf<8;1>  grf<1> */
1124    0b101001100001101000011, /*  grf<8;1>   arf<8;1>  grf<1> */
1125    0b100001100001100000000, /*  grf<0;0>   arf<8;1>  grf<0> */
1126    0b101101100001100000000, /*  grf<0;0>   grf<8;1>  grf<1> */
1127    0b101101100101101000011, /*  grf<8;1>   grf<8;1> -grf<1> */
1128    0b101101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<1> */
1129    0b101100000000100000000, /*  grf<0;0>   grf<0;0>  grf<1> */
1130    0b100001100001101000011, /*  grf<8;1>   arf<8;1>  grf<0> */
1131    0b100101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<0> */
1132    0b100101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<0> */
1133    0b100101100001101001011, /* -grf<8;1>   grf<8;1>  grf<0> */
1134    0b100100000000101000011, /*  grf<8;1>   grf<0;0>  grf<0> */
1135    0b100101100001100001000, /* -grf<0;0>   grf<8;1>  grf<0> */
1136    0b100100000000100000000, /*  grf<0;0>   grf<0;0>  grf<0> */
1137    0b101101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<1> */
1138    0b100101100101100000000, /*  grf<0;0>   grf<8;1> -grf<0> */
1139    0b101001100001100000000, /*  grf<0;0>   arf<8;1>  grf<1> */
1140    0b100101100101101000011, /*  grf<8;1>   grf<8;1> -grf<0> */
1141    0b101101100101101001011, /* -grf<8;1>   grf<8;1> -grf<1> */
1142    0b101001100001101001011, /* -grf<8;1>   arf<8;1>  grf<1> */
1143    0b101101110001101001011, /* -grf<8;1>  -grf<8;1>  grf<1> */
1144    0b101100010000101000011, /*  grf<8;1>  -grf<0;0>  grf<1> */
1145    0b101100000100101000011, /*  grf<8;1>   grf<0;0> -grf<1> */
1146    0b101101100001100001000, /* -grf<0;0>   grf<8;1>  grf<1> */
1147    0b101101100101100000000, /*  grf<0;0>   grf<8;1> -grf<1> */
1148    0b100100000100101000011, /*  grf<8;1>   grf<0;0> -grf<0> */
1149    0b101001100101101000011, /*  grf<8;1>   arf<8;1> -grf<1> */
1150 };
1151 
1152 static const uint32_t xehp_3src_source_index_table[32] = {
1153    0b100100000001100000000, /*           grf<0;0>   grf<1;0>     grf<0>      */
1154    0b100100000001000000001, /*           arf<1;0>   grf<1;0>     grf<0>      */
1155    0b101100000001100000001, /*           grf<1;0>   grf<1;0>     grf<1>      */
1156    0b100100000001100000001, /*           grf<1;0>   grf<1;0>     grf<0>      */
1157    0b101100000000100000001, /*           grf<1;0>   grf<0;0>     grf<1>      */
1158    0b101100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<1>      */
1159    0b101000000001100000001, /*           grf<1;0>   arf<1;0>     grf<1>      */
1160    0b101100000001100000000, /*           grf<0;0>   grf<1;0>     grf<1>      */
1161    0b100000000001100000000, /*           grf<0;0>   arf<1;0>     grf<0>      */
1162    0b101100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<1>      */
1163    0b101100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<1>      */
1164    0b101100000000100000000, /*           grf<0;0>   grf<0;0>     grf<1>      */
1165    0b100000000001100000001, /*           grf<1;0>   arf<1;0>     grf<0>      */
1166    0b100100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<0>      */
1167    0b100100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<0>      */
1168    0b100100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<0>      */
1169    0b100100000000100000001, /*           grf<1;0>   grf<0;0>     grf<0>      */
1170    0b100100000001100001000, /*          -grf<0;0>   grf<1;0>     grf<0>      */
1171    0b100100000000100000000, /*           grf<0;0>   grf<0;0>     grf<0>
1172                              * dpas.*x1  grf:d      grf:[ub,b]   grf:[ub,b]
1173                              * dpas.*x1  grf:f      grf:bf       grf:bf
1174                              */
1175    0b101100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<1>      */
1176    0b100100000101100000000, /*           grf<0;0>   grf<1;0>    -grf<0>      */
1177    0b101000000001100000000, /*           grf<0;0>   arf<1;0>     grf<1>      */
1178    0b100100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<0>      */
1179    0b101100000101100001001, /*          -grf<1;0>   grf<1;0>    -grf<1>      */
1180    0b100100010000100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[ub,b]  */
1181    0b100100000100100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u2,s2] */
1182    0b100100010100100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u2,s2] */
1183    0b100100001000100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[ub,b]  */
1184    0b100100001100100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u2,s2] */
1185    0b100100000010100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u4,s4] */
1186    0b100100001010100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u4,s4] */
1187    0b100100010010100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u4,s4] */
1188 };
1189 
1190 static const uint32_t xe2_3src_source_index_table[16] = {
1191    0b101100000001100000001, /* grf<1;0> grf<1;0> grf<1>  */
1192    0b101100000001000000001, /* arf<1;0> grf<1;0> grf<1>  */
1193    0b100100000001100000000, /* grf<0;0> grf<1;0> grf<0>  */
1194    0b100100000001000000001, /* arf<1;0> grf<1;0> grf<0>  */
1195    0b100100000001100000001, /* grf<1;0> grf<1;0> grf<0>  */
1196    0b100000000001100000000, /* grf<0;0> arf<1;0> grf<0>  */
1197    0b100000000001100000001, /* grf<1;0> arf<1;0> grf<0>  */
1198    0b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */
1199    0b101000000001100000001, /* grf<1;0> arf<1;0> grf<1>  */
1200    0b101000000001000000001, /* arf<1;0> arf<1;0> grf<1>  */
1201    0b100000000001000000001, /* arf<1;0> arf<1;0> grf<0>  */
1202    0b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>  */
1203    0b100100000000100000001, /* grf<1;0> grf<0;0> grf<0>  */
1204    0b101100000101000000001, /* arf<1;0> grf<1;0> -grf<1> */
1205    0b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */
1206    0b100100010001000000001, /* arf<1;0> -grf<1;0> grf<0> */
1207 };
1208 
1209 static const uint32_t xe2_3src_dpas_source_index_table[16] = {
1210    0b100100000000100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]
1211                              * dpas.*x1 grf:[f,bf] grf:bf grf:bf
1212                              * dpas.*x1 grf:[f,hf] grf:hf grf:hf
1213                              */
1214    0b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */
1215    0b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */
1216    0b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */
1217    0b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */
1218    0b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */
1219    0b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */
1220    0b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */
1221    0b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */
1222    0b100100000000100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[ub,b] */
1223    0b100100000010100000010, /* dpas.*x2 grf:d grf:[ub,b] grf:[u4,s4] */
1224    0b100100001000100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[ub,b] */
1225    0b100100001010100000010, /* dpas.*x2 grf:d grf:[u4,s4] grf:[u4,s4] */
1226    0b100100010100100000010, /* dpas.*x2 grf:d grf:[u2,s2] grf:[u2,s2] */
1227    0b100100000000100001110, /* dpas.*x8 grf:d grf:[ub,b] grf:[ub,b] */
1228    0b100100001010100001110, /* dpas.*x8 grf:d grf:[u4,s4] grf:[u4,s4] */
1229 };
1230 
1231 static const uint32_t gfx12_3src_subreg_table[32] = {
1232    0b00000000000000000000, /* .0  .0  .0  .0  */
1233    0b00100000000000000000, /* .0  .0  .0  .4  */
1234    0b00000000000110000000, /* .0  .12 .0  .0  */
1235    0b10100000000000000000, /* .0  .0  .0  .20 */
1236    0b10000000001110000000, /* .0  .28 .0  .16 */
1237    0b01100000000000000000, /* .0  .0  .0  .12 */
1238    0b01000000000000000000, /* .0  .0  .0  .8  */
1239    0b00000010000000000000, /* .0  .0  .8  .0  */
1240    0b00000001000000000000, /* .0  .0  .4  .0  */
1241    0b11000000000000000000, /* .0  .0  .0  .24 */
1242    0b10000000000000000000, /* .0  .0  .0  .16 */
1243    0b11100000000000000000, /* .0  .0  .0  .28 */
1244    0b00000110000000000000, /* .0  .0  .24 .0  */
1245    0b00000000000010000000, /* .0  .4  .0  .0  */
1246    0b00000100000000000000, /* .0  .0  .16 .0  */
1247    0b00000011000000000000, /* .0  .0  .12 .0  */
1248    0b00000101000000000000, /* .0  .0  .20 .0  */
1249    0b00000111000000000000, /* .0  .0  .28 .0  */
1250    0b00000000000100000000, /* .0  .8  .0  .0  */
1251    0b00000000001000000000, /* .0  .16 .0  .0  */
1252    0b00000000001100000000, /* .0  .24 .0  .0  */
1253    0b00000000001010000000, /* .0  .20 .0  .0  */
1254    0b00000000001110000000, /* .0  .28 .0  .0  */
1255    0b11000000001110000000, /* .0  .28 .0  .24 */
1256    0b00100000000100000000, /* .0  .8  .0  .4  */
1257    0b00100000000110000000, /* .0  .12 .0  .4  */
1258    0b01000000000110000000, /* .0  .12 .0  .8  */
1259    0b10000000001100000000, /* .0  .24 .0  .16 */
1260    0b10000000001010000000, /* .0  .20 .0  .16 */
1261    0b01100000000010000000, /* .0  .4  .0  .12 */
1262    0b10100000001110000000, /* .0  .28 .0  .20 */
1263    0b01000000000010000000, /* .0  .4  .0  .8  */
1264 };
1265 
1266 static const uint32_t xe2_3src_subreg_table[32] = {
1267    0b00000000000000000000, /* .0 .0 .0 .0   */
1268    0b00100000000000000000, /* .0 .0 .0 .8   */
1269    0b10000000000000000000, /* .0 .0 .0 .32  */
1270    0b00010000000000000000, /* .0 .0 .0 .4   */
1271    0b11100000000000000000, /* .0 .0 .0 .56  */
1272    0b01010000000000000000, /* .0 .0 .0 .20  */
1273    0b10110000000000000000, /* .0 .0 .0 .44  */
1274    0b01000000000011000000, /* .0 .12 .0 .16 */
1275    0b01100000000000000000, /* .0 .0 .0 .24  */
1276    0b10100000000000000000, /* .0 .0 .0 .40  */
1277    0b11000000000000000000, /* .0 .0 .0 .48  */
1278    0b01000000000000000000, /* .0 .0 .0 .16  */
1279    0b01110000000110000000, /* .0 .24 .0 .28 */
1280    0b10100000001001000000, /* .0 .36 .0 .40 */
1281    0b11010000001100000000, /* .0 .48 .0 .52 */
1282    0b01110000000000000000, /* .0 .0 .0 .28  */
1283    0b11110000000000000000, /* .0 .0 .0 .60  */
1284    0b10010000000000000000, /* .0 .0 .0 .36  */
1285    0b00110000000000000000, /* .0 .0 .0 .12  */
1286    0b00100000000010000000, /* .0 .8 .0 .8   */
1287    0b00010000000001000000, /* .0 .4 .0 .4   */
1288    0b00110000000011000000, /* .0 .12 .0 .12 */
1289    0b11010000000000000000, /* .0 .0 .0 .52  */
1290    0b00000000000001000000, /* .0 .4 .0 .0   */
1291    0b00000101100000000000, /* .0 .0 .44 .0  */
1292    0b00000100000000000000, /* .0 .0 .32 .0  */
1293    0b00000000000010000000, /* .0 .8 .0 .0   */
1294    0b00000000001100000000, /* .0 .48 .0 .0  */
1295    0b00000000001101000000, /* .0 .52 .0 .0  */
1296    0b00000110100000000000, /* .0 .0 .52 .0  */
1297    0b00000000001000000000, /* .0 .32 .0 .0  */
1298    0b00000000001111000000, /* .0 .60 .0 .0  */
1299 };
1300 
1301 struct compaction_state {
1302    const struct elk_isa_info *isa;
1303    const uint32_t *control_index_table;
1304    const uint32_t *datatype_table;
1305    const uint16_t *subreg_table;
1306    const uint16_t *src0_index_table;
1307    const uint16_t *src1_index_table;
1308 };
1309 
1310 static void compaction_state_init(struct compaction_state *c,
1311                                   const struct elk_isa_info *isa);
1312 
1313 static bool
set_control_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1314 set_control_index(const struct compaction_state *c,
1315                   elk_compact_inst *dst, const elk_inst *src)
1316 {
1317    const struct intel_device_info *devinfo = c->isa->devinfo;
1318    uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1319 
1320    if (devinfo->ver >= 20) {
1321       uncompacted = (elk_inst_bits(src, 95, 92) << 14) | /*  4b */
1322                     (elk_inst_bits(src, 34, 34) << 13) | /*  1b */
1323                     (elk_inst_bits(src, 32, 32) << 12) | /*  1b */
1324                     (elk_inst_bits(src, 31, 31) << 11) | /*  1b */
1325                     (elk_inst_bits(src, 28, 28) << 10) | /*  1b */
1326                     (elk_inst_bits(src, 27, 26) <<  8) | /*  2b */
1327                     (elk_inst_bits(src, 25, 24) <<  6) | /*  2b */
1328                     (elk_inst_bits(src, 23, 21) <<  3) | /*  3b */
1329                     (elk_inst_bits(src, 20, 18));        /*  3b */
1330    } else if (devinfo->ver >= 12) {
1331       uncompacted = (elk_inst_bits(src, 95, 92) << 17) | /*  4b */
1332                     (elk_inst_bits(src, 34, 34) << 16) | /*  1b */
1333                     (elk_inst_bits(src, 33, 33) << 15) | /*  1b */
1334                     (elk_inst_bits(src, 32, 32) << 14) | /*  1b */
1335                     (elk_inst_bits(src, 31, 31) << 13) | /*  1b */
1336                     (elk_inst_bits(src, 28, 28) << 12) | /*  1b */
1337                     (elk_inst_bits(src, 27, 24) <<  8) | /*  4b */
1338                     (elk_inst_bits(src, 23, 22) <<  6) | /*  2b */
1339                     (elk_inst_bits(src, 21, 19) <<  3) | /*  3b */
1340                     (elk_inst_bits(src, 18, 16));        /*  3b */
1341    } else if (devinfo->ver >= 8) {
1342       uncompacted = (elk_inst_bits(src, 33, 31) << 16) | /*  3b */
1343                     (elk_inst_bits(src, 23, 12) <<  4) | /* 12b */
1344                     (elk_inst_bits(src, 10,  9) <<  2) | /*  2b */
1345                     (elk_inst_bits(src, 34, 34) <<  1) | /*  1b */
1346                     (elk_inst_bits(src,  8,  8));        /*  1b */
1347    } else {
1348       uncompacted = (elk_inst_bits(src, 31, 31) << 16) | /*  1b */
1349                     (elk_inst_bits(src, 23,  8));        /* 16b */
1350 
1351       /* On gfx7, the flag register and subregister numbers are integrated into
1352        * the control index.
1353        */
1354       if (devinfo->ver == 7)
1355          uncompacted |= elk_inst_bits(src, 90, 89) << 17; /* 2b */
1356    }
1357 
1358    for (int i = 0; i < 32; i++) {
1359       if (c->control_index_table[i] == uncompacted) {
1360          elk_compact_inst_set_control_index(devinfo, dst, i);
1361 	 return true;
1362       }
1363    }
1364 
1365    return false;
1366 }
1367 
1368 static bool
set_datatype_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1369 set_datatype_index(const struct compaction_state *c, elk_compact_inst *dst,
1370                    const elk_inst *src, bool is_immediate)
1371 {
1372    const struct intel_device_info *devinfo = c->isa->devinfo;
1373    uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1374 
1375    if (devinfo->ver >= 12) {
1376       uncompacted = (elk_inst_bits(src, 91, 88) << 15) | /*  4b */
1377                     (elk_inst_bits(src, 66, 66) << 14) | /*  1b */
1378                     (elk_inst_bits(src, 50, 50) << 13) | /*  1b */
1379                     (elk_inst_bits(src, 49, 48) << 11) | /*  2b */
1380                     (elk_inst_bits(src, 47, 47) << 10) | /*  1b */
1381                     (elk_inst_bits(src, 46, 46) <<  9) | /*  1b */
1382                     (elk_inst_bits(src, 43, 40) <<  5) | /*  4b */
1383                     (elk_inst_bits(src, 39, 36) <<  1) | /*  4b */
1384                     (elk_inst_bits(src, 35, 35));        /*  1b */
1385 
1386       /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1387        * is present
1388        */
1389       if (!is_immediate) {
1390          uncompacted |= elk_inst_bits(src, 98, 98) << 19; /* 1b */
1391       }
1392    } else if (devinfo->ver >= 8) {
1393       uncompacted = (elk_inst_bits(src, 63, 61) << 18) | /*  3b */
1394                     (elk_inst_bits(src, 94, 89) << 12) | /*  6b */
1395                     (elk_inst_bits(src, 46, 35));        /* 12b */
1396    } else {
1397       uncompacted = (elk_inst_bits(src, 63, 61) << 15) | /*  3b */
1398                     (elk_inst_bits(src, 46, 32));        /* 15b */
1399    }
1400 
1401    for (int i = 0; i < 32; i++) {
1402       if (c->datatype_table[i] == uncompacted) {
1403          elk_compact_inst_set_datatype_index(devinfo, dst, i);
1404 	 return true;
1405       }
1406    }
1407 
1408    return false;
1409 }
1410 
1411 static bool
set_subreg_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate)1412 set_subreg_index(const struct compaction_state *c, elk_compact_inst *dst,
1413                  const elk_inst *src, bool is_immediate)
1414 {
1415    const struct intel_device_info *devinfo = c->isa->devinfo;
1416    const unsigned table_len = devinfo->ver >= 20 ?
1417       ARRAY_SIZE(xe2_subreg_table) : ARRAY_SIZE(g45_subreg_table);
1418    uint16_t uncompacted; /* 15b/G45+; 12b/Xe2+ */
1419 
1420    if (devinfo->ver >= 20) {
1421       uncompacted = (elk_inst_bits(src, 33, 33) << 0) |    /* 1b */
1422                     (elk_inst_bits(src, 55, 51) << 1) |    /* 5b */
1423                     (elk_inst_bits(src, 71, 67) << 6) |    /* 5b */
1424                     (elk_inst_bits(src, 87, 87) << 11);    /* 1b */
1425    } else if (devinfo->ver >= 12) {
1426       uncompacted = (elk_inst_bits(src, 55, 51) << 0) |    /* 5b */
1427                     (elk_inst_bits(src, 71, 67) << 5);     /* 5b */
1428 
1429       if (!is_immediate)
1430          uncompacted |= elk_inst_bits(src, 103, 99) << 10; /* 5b */
1431    } else {
1432       uncompacted = (elk_inst_bits(src, 52, 48) << 0) |    /* 5b */
1433                     (elk_inst_bits(src, 68, 64) << 5);     /* 5b */
1434 
1435       if (!is_immediate)
1436          uncompacted |= elk_inst_bits(src, 100, 96) << 10; /* 5b */
1437    }
1438 
1439    for (int i = 0; i < table_len; i++) {
1440       if (c->subreg_table[i] == uncompacted) {
1441          elk_compact_inst_set_subreg_index(devinfo, dst, i);
1442 	 return true;
1443       }
1444    }
1445 
1446    return false;
1447 }
1448 
1449 static bool
set_src0_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)1450 set_src0_index(const struct compaction_state *c, elk_compact_inst *dst,
1451                const elk_inst *src)
1452 {
1453    const struct intel_device_info *devinfo = c->isa->devinfo;
1454    uint16_t uncompacted; /* 12b/G45+; 11b/Xe2+ */
1455    int table_len;
1456 
1457    if (devinfo->ver >= 12) {
1458       table_len = (devinfo->ver >= 20 ? ARRAY_SIZE(xe2_src0_index_table) :
1459                    ARRAY_SIZE(gfx12_src0_index_table));
1460       uncompacted = (devinfo->ver >= 20 ? 0 :
1461                      elk_inst_bits(src, 87, 87) << 11) | /*  1b */
1462                     (elk_inst_bits(src, 86, 84) << 8) | /*  3b */
1463                     (elk_inst_bits(src, 83, 81) << 5) | /*  3b */
1464                     (elk_inst_bits(src, 80, 80) << 4) | /*  1b */
1465                     (elk_inst_bits(src, 65, 64) << 2) | /*  2b */
1466                     (elk_inst_bits(src, 45, 44));       /*  2b */
1467    } else {
1468       table_len = ARRAY_SIZE(gfx8_src_index_table);
1469       uncompacted = elk_inst_bits(src, 88, 77);         /* 12b */
1470    }
1471 
1472    for (int i = 0; i < table_len; i++) {
1473       if (c->src0_index_table[i] == uncompacted) {
1474          elk_compact_inst_set_src0_index(devinfo, dst, i);
1475 	 return true;
1476       }
1477    }
1478 
1479    return false;
1480 }
1481 
1482 static bool
set_src1_index(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src,bool is_immediate,unsigned imm)1483 set_src1_index(const struct compaction_state *c, elk_compact_inst *dst,
1484                const elk_inst *src, bool is_immediate, unsigned imm)
1485 {
1486    const struct intel_device_info *devinfo = c->isa->devinfo;
1487    if (is_immediate) {
1488       if (devinfo->ver >= 12) {
1489          /* src1 index takes the low 4 bits of the 12-bit compacted value */
1490          elk_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1491       } else {
1492          /* src1 index takes the high 5 bits of the 13-bit compacted value */
1493          elk_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1494       }
1495       return true;
1496    } else {
1497       uint16_t uncompacted; /* 12b/G45+ 16b/Xe2+ */
1498       int table_len;
1499 
1500       if (devinfo->ver >= 20) {
1501          table_len = ARRAY_SIZE(xe2_src1_index_table);
1502          uncompacted = (elk_inst_bits(src, 121, 120) << 14) | /*  2b */
1503                        (elk_inst_bits(src, 118, 116) << 11) | /*  3b */
1504                        (elk_inst_bits(src, 115, 113) <<  8) | /*  3b */
1505                        (elk_inst_bits(src, 112, 112) <<  7) | /*  1b */
1506                        (elk_inst_bits(src, 103,  99) <<  2) | /*  5b */
1507                        (elk_inst_bits(src,  97,  96));        /*  2b */
1508       } else if (devinfo->ver >= 12) {
1509          table_len = ARRAY_SIZE(gfx12_src0_index_table);
1510          uncompacted = (elk_inst_bits(src, 121, 120) << 10) | /*  2b */
1511                        (elk_inst_bits(src, 119, 116) <<  6) | /*  4b */
1512                        (elk_inst_bits(src, 115, 113) <<  3) | /*  3b */
1513                        (elk_inst_bits(src, 112, 112) <<  2) | /*  1b */
1514                        (elk_inst_bits(src,  97,  96));        /*  2b */
1515       } else {
1516          table_len = ARRAY_SIZE(gfx8_src_index_table);
1517          uncompacted = elk_inst_bits(src, 120, 109);          /* 12b */
1518       }
1519 
1520       for (int i = 0; i < table_len; i++) {
1521          if (c->src1_index_table[i] == uncompacted) {
1522             elk_compact_inst_set_src1_index(devinfo, dst, i);
1523             return true;
1524          }
1525       }
1526    }
1527 
1528    return false;
1529 }
1530 
1531 static bool
set_3src_control_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src,bool is_dpas)1532 set_3src_control_index(const struct intel_device_info *devinfo,
1533                        elk_compact_inst *dst, const elk_inst *src,
1534                        bool is_dpas)
1535 {
1536    assert(devinfo->ver >= 8);
1537 
1538    if (devinfo->ver >= 20) {
1539       assert(is_dpas || !elk_inst_bits(src, 49, 49));
1540 
1541       const uint64_t uncompacted =        /* 34b/Xe2+ */
1542          (elk_inst_bits(src, 95, 92) << 30) | /*  4b */
1543          (elk_inst_bits(src, 90, 88) << 27) | /*  3b */
1544          (elk_inst_bits(src, 82, 80) << 24) | /*  3b */
1545          (elk_inst_bits(src, 50, 50) << 23) | /*  1b */
1546          (elk_inst_bits(src, 49, 48) << 21) | /*  2b */
1547          (elk_inst_bits(src, 42, 40) << 18) | /*  3b */
1548          (elk_inst_bits(src, 39, 39) << 17) | /*  1b */
1549          (elk_inst_bits(src, 38, 36) << 14) | /*  3b */
1550          (elk_inst_bits(src, 34, 34) << 13) | /*  1b */
1551          (elk_inst_bits(src, 32, 32) << 12) | /*  1b */
1552          (elk_inst_bits(src, 31, 31) << 11) | /*  1b */
1553          (elk_inst_bits(src, 28, 28) << 10) | /*  1b */
1554          (elk_inst_bits(src, 27, 26) <<  8) | /*  2b */
1555          (elk_inst_bits(src, 25, 24) <<  6) | /*  2b */
1556          (elk_inst_bits(src, 23, 21) <<  3) | /*  3b */
1557          (elk_inst_bits(src, 20, 18));        /*  3b */
1558 
1559       /* The bits used to index the tables for 3src and 3src-dpas
1560        * are the same, so just need to pick the right one.
1561        */
1562       const uint64_t *table = is_dpas ? xe2_3src_dpas_control_index_table :
1563                                         xe2_3src_control_index_table;
1564       const unsigned size = is_dpas ? ARRAY_SIZE(xe2_3src_dpas_control_index_table) :
1565                                       ARRAY_SIZE(xe2_3src_control_index_table);
1566       for (unsigned i = 0; i < size; i++) {
1567          if (table[i] == uncompacted) {
1568             elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1569             return true;
1570          }
1571       }
1572    } else if (devinfo->verx10 >= 125) {
1573       uint64_t uncompacted =             /* 37b/XeHP+ */
1574          (elk_inst_bits(src, 95, 92) << 33) | /*  4b */
1575          (elk_inst_bits(src, 90, 88) << 30) | /*  3b */
1576          (elk_inst_bits(src, 82, 80) << 27) | /*  3b */
1577          (elk_inst_bits(src, 50, 50) << 26) | /*  1b */
1578          (elk_inst_bits(src, 49, 48) << 24) | /*  2b */
1579          (elk_inst_bits(src, 42, 40) << 21) | /*  3b */
1580          (elk_inst_bits(src, 39, 39) << 20) | /*  1b */
1581          (elk_inst_bits(src, 38, 36) << 17) | /*  3b */
1582          (elk_inst_bits(src, 34, 34) << 16) | /*  1b */
1583          (elk_inst_bits(src, 33, 33) << 15) | /*  1b */
1584          (elk_inst_bits(src, 32, 32) << 14) | /*  1b */
1585          (elk_inst_bits(src, 31, 31) << 13) | /*  1b */
1586          (elk_inst_bits(src, 28, 28) << 12) | /*  1b */
1587          (elk_inst_bits(src, 27, 24) <<  8) | /*  4b */
1588          (elk_inst_bits(src, 23, 23) <<  7) | /*  1b */
1589          (elk_inst_bits(src, 22, 22) <<  6) | /*  1b */
1590          (elk_inst_bits(src, 21, 19) <<  3) | /*  3b */
1591          (elk_inst_bits(src, 18, 16));        /*  3b */
1592 
1593       for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1594          if (xehp_3src_control_index_table[i] == uncompacted) {
1595             elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1596             return true;
1597          }
1598       }
1599    } else if (devinfo->ver >= 12) {
1600       uint64_t uncompacted =             /* 36b/TGL+ */
1601          (elk_inst_bits(src, 95, 92) << 32) | /*  4b */
1602          (elk_inst_bits(src, 90, 88) << 29) | /*  3b */
1603          (elk_inst_bits(src, 82, 80) << 26) | /*  3b */
1604          (elk_inst_bits(src, 50, 50) << 25) | /*  1b */
1605          (elk_inst_bits(src, 48, 48) << 24) | /*  1b */
1606          (elk_inst_bits(src, 42, 40) << 21) | /*  3b */
1607          (elk_inst_bits(src, 39, 39) << 20) | /*  1b */
1608          (elk_inst_bits(src, 38, 36) << 17) | /*  3b */
1609          (elk_inst_bits(src, 34, 34) << 16) | /*  1b */
1610          (elk_inst_bits(src, 33, 33) << 15) | /*  1b */
1611          (elk_inst_bits(src, 32, 32) << 14) | /*  1b */
1612          (elk_inst_bits(src, 31, 31) << 13) | /*  1b */
1613          (elk_inst_bits(src, 28, 28) << 12) | /*  1b */
1614          (elk_inst_bits(src, 27, 24) <<  8) | /*  4b */
1615          (elk_inst_bits(src, 23, 23) <<  7) | /*  1b */
1616          (elk_inst_bits(src, 22, 22) <<  6) | /*  1b */
1617          (elk_inst_bits(src, 21, 19) <<  3) | /*  3b */
1618          (elk_inst_bits(src, 18, 16));        /*  3b */
1619 
1620       for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1621          if (gfx12_3src_control_index_table[i] == uncompacted) {
1622             elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1623             return true;
1624          }
1625       }
1626    } else {
1627       uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1628          (elk_inst_bits(src, 34, 32) << 21) |  /*  3b */
1629          (elk_inst_bits(src, 28,  8));         /* 21b */
1630 
1631       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1632          uncompacted |=
1633             elk_inst_bits(src, 36, 35) << 24;  /*  2b */
1634       }
1635 
1636       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1637          if (gfx8_3src_control_index_table[i] == uncompacted) {
1638             elk_compact_inst_set_3src_control_index(devinfo, dst, i);
1639             return true;
1640          }
1641       }
1642    }
1643 
1644    return false;
1645 }
1646 
1647 static bool
set_3src_source_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src,bool is_dpas)1648 set_3src_source_index(const struct intel_device_info *devinfo,
1649                       elk_compact_inst *dst, const elk_inst *src,
1650                       bool is_dpas)
1651 {
1652    assert(devinfo->ver >= 8);
1653 
1654    if (devinfo->ver >= 12) {
1655       uint32_t uncompacted =               /* 21b/TGL+ */
1656          (elk_inst_bits(src, 114, 114) << 20) | /*  1b */
1657          (elk_inst_bits(src, 113, 112) << 18) | /*  2b */
1658          (elk_inst_bits(src,  98,  98) << 17) | /*  1b */
1659          (elk_inst_bits(src,  97,  96) << 15) | /*  2b */
1660          (elk_inst_bits(src,  91,  91) << 14) | /*  1b */
1661          (elk_inst_bits(src,  87,  86) << 12) | /*  2b */
1662          (elk_inst_bits(src,  85,  84) << 10) | /*  2b */
1663          (elk_inst_bits(src,  83,  83) <<  9) | /*  1b */
1664          (elk_inst_bits(src,  66,  66) <<  8) | /*  1b */
1665          (elk_inst_bits(src,  65,  64) <<  6) | /*  2b */
1666          (elk_inst_bits(src,  47,  47) <<  5) | /*  1b */
1667          (elk_inst_bits(src,  46,  46) <<  4) | /*  1b */
1668          (elk_inst_bits(src,  45,  44) <<  2) | /*  2b */
1669          (elk_inst_bits(src,  43,  43) <<  1) | /*  1b */
1670          (elk_inst_bits(src,  35,  35));        /*  1b */
1671 
1672       /* In Xe2, the bits used to index the tables for 3src and 3src-dpas
1673        * are the same, so just need to pick the right one.
1674        */
1675       const uint32_t *three_src_source_index_table =
1676          devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
1677                                          xe2_3src_source_index_table) :
1678          devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
1679          gfx12_3src_source_index_table;
1680       const uint32_t three_src_source_index_table_len =
1681          devinfo->ver >= 20 ? (is_dpas ? ARRAY_SIZE(xe2_3src_dpas_source_index_table) :
1682                                          ARRAY_SIZE(xe2_3src_source_index_table)) :
1683          devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1684          ARRAY_SIZE(gfx12_3src_source_index_table);
1685 
1686       for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1687          if (three_src_source_index_table[i] == uncompacted) {
1688             elk_compact_inst_set_3src_source_index(devinfo, dst, i);
1689             return true;
1690          }
1691       }
1692    } else {
1693       uint64_t uncompacted =    /* 46b/BDW; 49b/CHV/SKL+ */
1694          (elk_inst_bits(src,  83,  83) << 43) |   /*  1b */
1695          (elk_inst_bits(src, 114, 107) << 35) |   /*  8b */
1696          (elk_inst_bits(src,  93,  86) << 27) |   /*  8b */
1697          (elk_inst_bits(src,  72,  65) << 19) |   /*  8b */
1698          (elk_inst_bits(src,  55,  37));          /* 19b */
1699 
1700       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1701          uncompacted |=
1702             (elk_inst_bits(src, 126, 125) << 47) | /* 2b */
1703             (elk_inst_bits(src, 105, 104) << 45) | /* 2b */
1704             (elk_inst_bits(src,  84,  84) << 44);  /* 1b */
1705       } else {
1706          uncompacted |=
1707             (elk_inst_bits(src, 125, 125) << 45) | /* 1b */
1708             (elk_inst_bits(src, 104, 104) << 44);  /* 1b */
1709       }
1710 
1711       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1712          if (gfx8_3src_source_index_table[i] == uncompacted) {
1713             elk_compact_inst_set_3src_source_index(devinfo, dst, i);
1714             return true;
1715          }
1716       }
1717    }
1718 
1719    return false;
1720 }
1721 
1722 static bool
set_3src_subreg_index(const struct intel_device_info * devinfo,elk_compact_inst * dst,const elk_inst * src)1723 set_3src_subreg_index(const struct intel_device_info *devinfo,
1724                       elk_compact_inst *dst, const elk_inst *src)
1725 {
1726    assert(devinfo->ver >= 12);
1727 
1728    uint32_t uncompacted =               /* 20b/TGL+ */
1729       (elk_inst_bits(src, 119, 115) << 15) | /*  5b */
1730       (elk_inst_bits(src, 103,  99) << 10) | /*  5b */
1731       (elk_inst_bits(src,  71,  67) <<  5) | /*  5b */
1732       (elk_inst_bits(src,  55,  51));        /*  5b */
1733 
1734    const uint32_t *table = devinfo->ver >= 20 ? xe2_3src_subreg_table :
1735                            gfx12_3src_subreg_table;
1736    const uint32_t len =
1737       devinfo->ver >= 20 ? ARRAY_SIZE(xe2_3src_subreg_table) :
1738       ARRAY_SIZE(gfx12_3src_subreg_table);
1739 
1740    for (unsigned i = 0; i < len; i++) {
1741       if (table[i] == uncompacted) {
1742          elk_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1743 	 return true;
1744       }
1745    }
1746 
1747    return false;
1748 }
1749 
1750 static bool
has_unmapped_bits(const struct elk_isa_info * isa,const elk_inst * src)1751 has_unmapped_bits(const struct elk_isa_info *isa, const elk_inst *src)
1752 {
1753    const struct intel_device_info *devinfo = isa->devinfo;
1754 
1755    /* EOT can only be mapped on a send if the src1 is an immediate */
1756    if ((elk_inst_opcode(isa, src) == ELK_OPCODE_SENDC ||
1757         elk_inst_opcode(isa, src) == ELK_OPCODE_SEND) &&
1758        elk_inst_eot(devinfo, src))
1759       return true;
1760 
1761    /* Check for instruction bits that don't map to any of the fields of the
1762     * compacted instruction.  The instruction cannot be compacted if any of
1763     * them are set.  They overlap with:
1764     *  - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1765     *  - Dst.AddrImm[9] (bit 47 on Gfx8)
1766     *  - Src0.AddrImm[9] (bit 95 on Gfx8)
1767     *  - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1768     *  - UIP[31] (bit 95 on Gfx8)
1769     */
1770    if (devinfo->ver >= 12) {
1771       assert(!elk_inst_bits(src, 7,  7));
1772       return false;
1773    } else if (devinfo->ver >= 8) {
1774       assert(!elk_inst_bits(src, 7,  7));
1775       return elk_inst_bits(src, 95, 95) ||
1776              elk_inst_bits(src, 47, 47) ||
1777              elk_inst_bits(src, 11, 11);
1778    } else {
1779       assert(!elk_inst_bits(src, 7,  7) &&
1780              !(devinfo->ver < 7 && elk_inst_bits(src, 90, 90)));
1781       return elk_inst_bits(src, 95, 91) ||
1782              elk_inst_bits(src, 47, 47);
1783    }
1784 }
1785 
1786 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const elk_inst * src,bool is_dpas)1787 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1788                        const elk_inst *src, bool is_dpas)
1789 {
1790    /* Check for three-source instruction bits that don't map to any of the
1791     * fields of the compacted instruction.  All of them seem to be reserved
1792     * bits currently.
1793     */
1794    if (devinfo->ver >= 20) {
1795       assert(is_dpas || !elk_inst_bits(src, 49, 49));
1796       assert(!elk_inst_bits(src, 33, 33));
1797       assert(!elk_inst_bits(src, 7, 7));
1798    } else if (devinfo->ver >= 12) {
1799       assert(is_dpas || !elk_inst_bits(src, 49, 49));
1800       assert(!elk_inst_bits(src, 7, 7));
1801    } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1802       assert(!elk_inst_bits(src, 127, 127) &&
1803              !elk_inst_bits(src, 7,  7));
1804    } else {
1805       assert(devinfo->ver >= 8);
1806       assert(!elk_inst_bits(src, 127, 126) &&
1807              !elk_inst_bits(src, 105, 105) &&
1808              !elk_inst_bits(src, 84, 84) &&
1809              !elk_inst_bits(src, 7,  7));
1810 
1811       /* Src1Type and Src2Type, used for mixed-precision floating point */
1812       if (elk_inst_bits(src, 36, 35))
1813          return true;
1814    }
1815 
1816    return false;
1817 }
1818 
1819 static bool
elk_try_compact_3src_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)1820 elk_try_compact_3src_instruction(const struct elk_isa_info *isa,
1821                                  elk_compact_inst *dst, const elk_inst *src)
1822 {
1823    const struct intel_device_info *devinfo = isa->devinfo;
1824    assert(devinfo->ver >= 8);
1825 
1826    bool is_dpas = elk_inst_opcode(isa, src) == ELK_OPCODE_DPAS;
1827    if (has_3src_unmapped_bits(devinfo, src, is_dpas))
1828       return false;
1829 
1830 #define compact(field) \
1831    elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_##field(devinfo, src))
1832 #define compact_a16(field) \
1833    elk_compact_inst_set_3src_##field(devinfo, dst, elk_inst_3src_a16_##field(devinfo, src))
1834 
1835    compact(hw_opcode);
1836 
1837    if (!set_3src_control_index(devinfo, dst, src, is_dpas))
1838       return false;
1839 
1840    if (!set_3src_source_index(devinfo, dst, src, is_dpas))
1841       return false;
1842 
1843    if (devinfo->ver >= 12) {
1844       if (!set_3src_subreg_index(devinfo, dst, src))
1845          return false;
1846 
1847       compact(swsb);
1848       compact(debug_control);
1849       compact(dst_reg_nr);
1850       compact(src0_reg_nr);
1851       compact(src1_reg_nr);
1852       compact(src2_reg_nr);
1853    } else {
1854       compact(dst_reg_nr);
1855       compact_a16(src0_rep_ctrl);
1856       compact(debug_control);
1857       compact(saturate);
1858       compact_a16(src1_rep_ctrl);
1859       compact_a16(src2_rep_ctrl);
1860       compact(src0_reg_nr);
1861       compact(src1_reg_nr);
1862       compact(src2_reg_nr);
1863       compact_a16(src0_subreg_nr);
1864       compact_a16(src1_subreg_nr);
1865       compact_a16(src2_subreg_nr);
1866    }
1867    elk_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1868 
1869 #undef compact
1870 #undef compact_a16
1871 
1872    return true;
1873 }
1874 
1875 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1876  * sources, and a 13th bit that's replicated through the high 20 bits.
1877  *
1878  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1879  * of packed vectors as compactable immediates.
1880  *
1881  * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1882  * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1883  * while for unsigned integers it is not.
1884  *
1885  * Returns the compacted immediate, or -1 if immediate cannot be compacted
1886  */
1887 static int
compact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned imm)1888 compact_immediate(const struct intel_device_info *devinfo,
1889                   enum elk_reg_type type, unsigned imm)
1890 {
1891    if (devinfo->ver >= 12) {
1892       /* 16-bit immediates need to be replicated through the 32-bit immediate
1893        * field
1894        */
1895       switch (type) {
1896       case ELK_REGISTER_TYPE_W:
1897       case ELK_REGISTER_TYPE_UW:
1898       case ELK_REGISTER_TYPE_HF:
1899          if ((imm >> 16) != (imm & 0xffff))
1900             return -1;
1901          break;
1902       default:
1903          break;
1904       }
1905 
1906       switch (type) {
1907       case ELK_REGISTER_TYPE_F:
1908          /* We get the high 12-bits as-is; rest must be zero */
1909          if ((imm & 0xfffff) == 0)
1910             return (imm >> 20) & 0xfff;
1911          break;
1912       case ELK_REGISTER_TYPE_HF:
1913          /* We get the high 12-bits as-is; rest must be zero */
1914          if ((imm & 0xf) == 0)
1915             return (imm >> 4) & 0xfff;
1916          break;
1917       case ELK_REGISTER_TYPE_UD:
1918       case ELK_REGISTER_TYPE_VF:
1919       case ELK_REGISTER_TYPE_UV:
1920       case ELK_REGISTER_TYPE_V:
1921          /* We get the low 12-bits as-is; rest must be zero */
1922          if ((imm & 0xfffff000) == 0)
1923             return imm & 0xfff;
1924          break;
1925       case ELK_REGISTER_TYPE_UW:
1926          /* We get the low 12-bits as-is; rest must be zero */
1927          if ((imm & 0xf000) == 0)
1928             return imm & 0xfff;
1929          break;
1930       case ELK_REGISTER_TYPE_D:
1931          /* We get the low 11-bits as-is; 12th is replicated */
1932          if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1933             return imm & 0xfff;
1934          break;
1935       case ELK_REGISTER_TYPE_W:
1936          /* We get the low 11-bits as-is; 12th is replicated */
1937          if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1938             return imm & 0xfff;
1939          break;
1940       case ELK_REGISTER_TYPE_NF:
1941       case ELK_REGISTER_TYPE_DF:
1942       case ELK_REGISTER_TYPE_Q:
1943       case ELK_REGISTER_TYPE_UQ:
1944       case ELK_REGISTER_TYPE_B:
1945       case ELK_REGISTER_TYPE_UB:
1946          return -1;
1947       }
1948    } else {
1949       /* We get the low 12 bits as-is; 13th is replicated */
1950       if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1951          return imm & 0x1fff;
1952       }
1953    }
1954 
1955    return -1;
1956 }
1957 
1958 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum elk_reg_type type,unsigned compact_imm)1959 uncompact_immediate(const struct intel_device_info *devinfo,
1960                     enum elk_reg_type type, unsigned compact_imm)
1961 {
1962    if (devinfo->ver >= 12) {
1963       switch (type) {
1964       case ELK_REGISTER_TYPE_F:
1965          return compact_imm << 20;
1966       case ELK_REGISTER_TYPE_HF:
1967          return (compact_imm << 20) | (compact_imm << 4);
1968       case ELK_REGISTER_TYPE_UD:
1969       case ELK_REGISTER_TYPE_VF:
1970       case ELK_REGISTER_TYPE_UV:
1971       case ELK_REGISTER_TYPE_V:
1972          return compact_imm;
1973       case ELK_REGISTER_TYPE_UW:
1974          /* Replicate */
1975          return compact_imm << 16 | compact_imm;
1976       case ELK_REGISTER_TYPE_D:
1977          /* Extend the 12th bit into the high 20 bits */
1978          return (int)(compact_imm << 20) >> 20;
1979       case ELK_REGISTER_TYPE_W:
1980          /* Extend the 12th bit into the high 4 bits and replicate */
1981          return ((int)(compact_imm << 20) >> 4) |
1982                 ((unsigned short)((short)(compact_imm << 4) >> 4));
1983       case ELK_REGISTER_TYPE_NF:
1984       case ELK_REGISTER_TYPE_DF:
1985       case ELK_REGISTER_TYPE_Q:
1986       case ELK_REGISTER_TYPE_UQ:
1987       case ELK_REGISTER_TYPE_B:
1988       case ELK_REGISTER_TYPE_UB:
1989          unreachable("not reached");
1990       }
1991    } else {
1992       /* Replicate the 13th bit into the high 19 bits */
1993       return (int)(compact_imm << 19) >> 19;
1994    }
1995 
1996    unreachable("not reached");
1997 }
1998 
1999 static bool
has_immediate(const struct intel_device_info * devinfo,const elk_inst * inst,enum elk_reg_type * type)2000 has_immediate(const struct intel_device_info *devinfo, const elk_inst *inst,
2001               enum elk_reg_type *type)
2002 {
2003    if (elk_inst_src0_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
2004       *type = elk_inst_src0_type(devinfo, inst);
2005       return *type != INVALID_REG_TYPE;
2006    } else if (elk_inst_src1_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
2007       *type = elk_inst_src1_type(devinfo, inst);
2008       return *type != INVALID_REG_TYPE;
2009    }
2010 
2011    return false;
2012 }
2013 
2014 /**
2015  * Applies some small changes to instruction types to increase chances of
2016  * compaction.
2017  */
2018 static elk_inst
precompact(const struct elk_isa_info * isa,elk_inst inst)2019 precompact(const struct elk_isa_info *isa, elk_inst inst)
2020 {
2021    const struct intel_device_info *devinfo = isa->devinfo;
2022 
2023    /* In XeHP the compaction tables removed the entries for source regions
2024     * <8;8,1> giving preference to <1;1,0> as the way to indicate
2025     * sequential elements, so convert to those before compacting.
2026     */
2027    if (devinfo->verx10 >= 125) {
2028       if (elk_inst_src0_reg_file(devinfo, &inst) == ELK_GENERAL_REGISTER_FILE &&
2029           elk_inst_src0_vstride(devinfo, &inst) > ELK_VERTICAL_STRIDE_1 &&
2030           elk_inst_src0_vstride(devinfo, &inst) == (elk_inst_src0_width(devinfo, &inst) + 1) &&
2031           elk_inst_src0_hstride(devinfo, &inst) == ELK_HORIZONTAL_STRIDE_1) {
2032          elk_inst_set_src0_vstride(devinfo, &inst, ELK_VERTICAL_STRIDE_1);
2033          elk_inst_set_src0_width(devinfo, &inst, ELK_WIDTH_1);
2034          elk_inst_set_src0_hstride(devinfo, &inst, ELK_HORIZONTAL_STRIDE_0);
2035       }
2036 
2037       if (elk_inst_src1_reg_file(devinfo, &inst) == ELK_GENERAL_REGISTER_FILE &&
2038           elk_inst_src1_vstride(devinfo, &inst) > ELK_VERTICAL_STRIDE_1 &&
2039           elk_inst_src1_vstride(devinfo, &inst) == (elk_inst_src1_width(devinfo, &inst) + 1) &&
2040           elk_inst_src1_hstride(devinfo, &inst) == ELK_HORIZONTAL_STRIDE_1) {
2041          elk_inst_set_src1_vstride(devinfo, &inst, ELK_VERTICAL_STRIDE_1);
2042          elk_inst_set_src1_width(devinfo, &inst, ELK_WIDTH_1);
2043          elk_inst_set_src1_hstride(devinfo, &inst, ELK_HORIZONTAL_STRIDE_0);
2044       }
2045    }
2046 
2047    if (elk_inst_src0_reg_file(devinfo, &inst) != ELK_IMMEDIATE_VALUE)
2048       return inst;
2049 
2050    /* The Bspec's section titled "Non-present Operands" claims that if src0
2051     * is an immediate that src1's type must be the same as that of src0.
2052     *
2053     * The SNB+ DataTypeIndex instruction compaction tables contain mappings
2054     * that do not follow this rule. E.g., from the IVB/HSW table:
2055     *
2056     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
2057     *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
2058     *
2059     * And from the SNB table:
2060     *
2061     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
2062     *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
2063     *
2064     * Neither of these cause warnings from the simulator when used,
2065     * compacted or otherwise. In fact, all compaction mappings that have an
2066     * immediate in src0 use a:ud for src1.
2067     *
2068     * The GM45 instruction compaction tables do not contain mapped meanings
2069     * so it's not clear whether it has the restriction. We'll assume it was
2070     * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
2071     *
2072     * Don't do any of this for 64-bit immediates, since the src1 fields
2073     * overlap with the immediate and setting them would overwrite the
2074     * immediate we set.
2075     */
2076    if (devinfo->ver >= 6 &&
2077        !(devinfo->platform == INTEL_PLATFORM_HSW &&
2078          elk_inst_opcode(isa, &inst) == ELK_OPCODE_DIM) &&
2079        !(devinfo->ver >= 8 &&
2080          (elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_DF ||
2081           elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_UQ ||
2082           elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_Q))) {
2083       elk_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
2084    }
2085 
2086    /* Compacted instructions only have 12-bits (plus 1 for the other 20)
2087     * for immediate values. Presumably the hardware engineers realized
2088     * that the only useful floating-point value that could be represented
2089     * in this format is 0.0, which can also be represented as a VF-typed
2090     * immediate, so they gave us the previously mentioned mapping on IVB+.
2091     *
2092     * Strangely, we do have a mapping for imm:f in src1, so we don't need
2093     * to do this there.
2094     *
2095     * If we see a 0.0:F, change the type to VF so that it can be compacted.
2096     *
2097     * Compaction of floating-point immediates is improved on Gfx12, thus
2098     * removing the need for this.
2099     */
2100    if (devinfo->ver < 12 &&
2101        elk_inst_imm_ud(devinfo, &inst) == 0x0 &&
2102        elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_F &&
2103        elk_inst_dst_type(devinfo, &inst) == ELK_REGISTER_TYPE_F &&
2104        elk_inst_dst_hstride(devinfo, &inst) == ELK_HORIZONTAL_STRIDE_1) {
2105       enum elk_reg_file file = elk_inst_src0_reg_file(devinfo, &inst);
2106       elk_inst_set_src0_file_type(devinfo, &inst, file, ELK_REGISTER_TYPE_VF);
2107    }
2108 
2109    /* There are no mappings for dst:d | i:d, so if the immediate is suitable
2110     * set the types to :UD so the instruction can be compacted.
2111     *
2112     * FINISHME: Use dst:f | imm:f on Gfx12
2113     */
2114    if (devinfo->ver < 12 &&
2115        compact_immediate(devinfo, ELK_REGISTER_TYPE_D,
2116                          elk_inst_imm_ud(devinfo, &inst)) != -1 &&
2117        elk_inst_cond_modifier(devinfo, &inst) == ELK_CONDITIONAL_NONE &&
2118        elk_inst_src0_type(devinfo, &inst) == ELK_REGISTER_TYPE_D &&
2119        elk_inst_dst_type(devinfo, &inst) == ELK_REGISTER_TYPE_D) {
2120       enum elk_reg_file src_file = elk_inst_src0_reg_file(devinfo, &inst);
2121       enum elk_reg_file dst_file = elk_inst_dst_reg_file(devinfo, &inst);
2122 
2123       elk_inst_set_src0_file_type(devinfo, &inst, src_file, ELK_REGISTER_TYPE_UD);
2124       elk_inst_set_dst_file_type(devinfo, &inst, dst_file, ELK_REGISTER_TYPE_UD);
2125    }
2126 
2127    return inst;
2128 }
2129 
2130 /**
2131  * Tries to compact instruction src into dst.
2132  *
2133  * It doesn't modify dst unless src is compactable, which is relied on by
2134  * elk_compact_instructions().
2135  */
2136 static bool
try_compact_instruction(const struct compaction_state * c,elk_compact_inst * dst,const elk_inst * src)2137 try_compact_instruction(const struct compaction_state *c,
2138                         elk_compact_inst *dst, const elk_inst *src)
2139 {
2140    const struct intel_device_info *devinfo = c->isa->devinfo;
2141    elk_compact_inst temp;
2142 
2143    assert(elk_inst_cmpt_control(devinfo, src) == 0);
2144 
2145    if (elk_is_3src(c->isa, elk_inst_opcode(c->isa, src))) {
2146       if (devinfo->ver >= 8) {
2147          memset(&temp, 0, sizeof(temp));
2148          if (elk_try_compact_3src_instruction(c->isa, &temp, src)) {
2149             *dst = temp;
2150             return true;
2151          } else {
2152             return false;
2153          }
2154       } else {
2155          return false;
2156       }
2157    }
2158 
2159    enum elk_reg_type type;
2160    bool is_immediate = has_immediate(devinfo, src, &type);
2161 
2162    unsigned compacted_imm = 0;
2163 
2164    if (is_immediate) {
2165       /* Instructions with immediates cannot be compacted on Gen < 6 */
2166       if (devinfo->ver < 6)
2167          return false;
2168 
2169       compacted_imm = compact_immediate(devinfo, type,
2170                                         elk_inst_imm_ud(devinfo, src));
2171       if (compacted_imm == -1)
2172          return false;
2173    }
2174 
2175    if (has_unmapped_bits(c->isa, src))
2176       return false;
2177 
2178    memset(&temp, 0, sizeof(temp));
2179 
2180 #define compact(field) \
2181    elk_compact_inst_set_##field(devinfo, &temp, elk_inst_##field(devinfo, src))
2182 #define compact_reg(field) \
2183    elk_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
2184                                        elk_inst_##field##_da_reg_nr(devinfo, src))
2185 
2186    compact(hw_opcode);
2187    compact(debug_control);
2188 
2189    if (!set_control_index(c, &temp, src))
2190       return false;
2191    if (!set_datatype_index(c, &temp, src, is_immediate))
2192       return false;
2193    if (!set_subreg_index(c, &temp, src, is_immediate))
2194       return false;
2195    if (!set_src0_index(c, &temp, src))
2196       return false;
2197    if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
2198       return false;
2199 
2200    if (devinfo->ver >= 12) {
2201       compact(swsb);
2202       compact_reg(dst);
2203       compact_reg(src0);
2204 
2205       if (is_immediate) {
2206          /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
2207          elk_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
2208       } else {
2209          compact_reg(src1);
2210       }
2211    } else {
2212       if (devinfo->ver >= 6) {
2213          compact(acc_wr_control);
2214       } else {
2215          compact(mask_control_ex);
2216       }
2217 
2218       if (devinfo->ver <= 6)
2219          compact(flag_subreg_nr);
2220 
2221       compact(cond_modifier);
2222 
2223       compact_reg(dst);
2224       compact_reg(src0);
2225 
2226       if (is_immediate) {
2227          /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
2228          elk_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
2229       } else {
2230          compact_reg(src1);
2231       }
2232    }
2233    elk_compact_inst_set_cmpt_control(devinfo, &temp, true);
2234 
2235 #undef compact
2236 #undef compact_reg
2237 
2238    *dst = temp;
2239 
2240    return true;
2241 }
2242 
2243 bool
elk_try_compact_instruction(const struct elk_isa_info * isa,elk_compact_inst * dst,const elk_inst * src)2244 elk_try_compact_instruction(const struct elk_isa_info *isa,
2245                             elk_compact_inst *dst, const elk_inst *src)
2246 {
2247    struct compaction_state c;
2248    compaction_state_init(&c, isa);
2249    return try_compact_instruction(&c, dst, src);
2250 }
2251 
2252 static void
set_uncompacted_control(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2253 set_uncompacted_control(const struct compaction_state *c, elk_inst *dst,
2254                         elk_compact_inst *src)
2255 {
2256    const struct intel_device_info *devinfo = c->isa->devinfo;
2257    uint32_t uncompacted =
2258       c->control_index_table[elk_compact_inst_control_index(devinfo, src)];
2259 
2260    if (devinfo->ver >= 20) {
2261       elk_inst_set_bits(dst, 95, 92, (uncompacted >> 14) & 0xf);
2262       elk_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
2263       elk_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
2264       elk_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
2265       elk_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
2266       elk_inst_set_bits(dst, 27, 26, (uncompacted >>  8) & 0x3);
2267       elk_inst_set_bits(dst, 25, 24, (uncompacted >>  6) & 0x3);
2268       elk_inst_set_bits(dst, 23, 21, (uncompacted >>  3) & 0x7);
2269       elk_inst_set_bits(dst, 20, 18, (uncompacted >>  0) & 0x7);
2270    } else if (devinfo->ver >= 12) {
2271       elk_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
2272       elk_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2273       elk_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2274       elk_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2275       elk_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2276       elk_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2277       elk_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2278       elk_inst_set_bits(dst, 23, 22, (uncompacted >>  6) & 0x3);
2279       elk_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2280       elk_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2281    } else if (devinfo->ver >= 8) {
2282       elk_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
2283       elk_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
2284       elk_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
2285       elk_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
2286       elk_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
2287    } else {
2288       elk_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
2289       elk_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
2290 
2291       if (devinfo->ver == 7)
2292          elk_inst_set_bits(dst, 90, 89, uncompacted >> 17);
2293    }
2294 }
2295 
2296 static void
set_uncompacted_datatype(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2297 set_uncompacted_datatype(const struct compaction_state *c, elk_inst *dst,
2298                          elk_compact_inst *src)
2299 {
2300    const struct intel_device_info *devinfo = c->isa->devinfo;
2301    uint32_t uncompacted =
2302       c->datatype_table[elk_compact_inst_datatype_index(devinfo, src)];
2303 
2304    if (devinfo->ver >= 12) {
2305       elk_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
2306       elk_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
2307       elk_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
2308       elk_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
2309       elk_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
2310       elk_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
2311       elk_inst_set_bits(dst, 46, 46, (uncompacted >>  9) & 0x1);
2312       elk_inst_set_bits(dst, 43, 40, (uncompacted >>  5) & 0xf);
2313       elk_inst_set_bits(dst, 39, 36, (uncompacted >>  1) & 0xf);
2314       elk_inst_set_bits(dst, 35, 35, (uncompacted >>  0) & 0x1);
2315    } else if (devinfo->ver >= 8) {
2316       elk_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
2317       elk_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
2318       elk_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
2319    } else {
2320       elk_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
2321       elk_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
2322    }
2323 }
2324 
2325 static void
set_uncompacted_subreg(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2326 set_uncompacted_subreg(const struct compaction_state *c, elk_inst *dst,
2327                        elk_compact_inst *src)
2328 {
2329    const struct intel_device_info *devinfo = c->isa->devinfo;
2330    uint16_t uncompacted =
2331       c->subreg_table[elk_compact_inst_subreg_index(devinfo, src)];
2332 
2333    if (devinfo->ver >= 20) {
2334       elk_inst_set_bits(dst, 33, 33, (uncompacted >> 0) & 0x1);
2335       elk_inst_set_bits(dst, 55, 51, (uncompacted >> 1) & 0x1f);
2336       elk_inst_set_bits(dst, 71, 67, (uncompacted >> 6) & 0x1f);
2337       elk_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
2338    } else if (devinfo->ver >= 12) {
2339       elk_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
2340       elk_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
2341       elk_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
2342    } else {
2343       elk_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
2344       elk_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
2345       elk_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
2346    }
2347 }
2348 
2349 static void
set_uncompacted_src0(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2350 set_uncompacted_src0(const struct compaction_state *c, elk_inst *dst,
2351                      elk_compact_inst *src)
2352 {
2353    const struct intel_device_info *devinfo = c->isa->devinfo;
2354    uint32_t compacted = elk_compact_inst_src0_index(devinfo, src);
2355    uint16_t uncompacted = c->src0_index_table[compacted];
2356 
2357    if (devinfo->ver >= 12) {
2358       if (devinfo->ver < 20)
2359          elk_inst_set_bits(dst, 87, 87, (uncompacted >> 11) & 0x1);
2360       elk_inst_set_bits(dst, 86, 84, (uncompacted >> 8) & 0x7);
2361       elk_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
2362       elk_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2363       elk_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2364       elk_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2365    } else {
2366       elk_inst_set_bits(dst, 88, 77, uncompacted);
2367    }
2368 }
2369 
2370 static void
set_uncompacted_src1(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2371 set_uncompacted_src1(const struct compaction_state *c, elk_inst *dst,
2372                      elk_compact_inst *src)
2373 {
2374    const struct intel_device_info *devinfo = c->isa->devinfo;
2375    uint16_t uncompacted =
2376       c->src1_index_table[elk_compact_inst_src1_index(devinfo, src)];
2377 
2378    if (devinfo->ver >= 20) {
2379       elk_inst_set_bits(dst, 121, 120, (uncompacted >> 14) & 0x3);
2380       elk_inst_set_bits(dst, 118, 116, (uncompacted >> 11) & 0x7);
2381       elk_inst_set_bits(dst, 115, 113, (uncompacted >>  8) & 0x7);
2382       elk_inst_set_bits(dst, 112, 112, (uncompacted >>  7) & 0x1);
2383       elk_inst_set_bits(dst, 103,  99, (uncompacted >>  2) & 0x1f);
2384       elk_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
2385    } else if (devinfo->ver >= 12) {
2386       elk_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2387       elk_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
2388       elk_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
2389       elk_inst_set_bits(dst, 112, 112, (uncompacted >>  2) & 0x1);
2390       elk_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
2391    } else {
2392       elk_inst_set_bits(dst, 120, 109, uncompacted);
2393    }
2394 }
2395 
2396 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src,bool is_dpas)2397 set_uncompacted_3src_control_index(const struct compaction_state *c,
2398                                    elk_inst *dst, elk_compact_inst *src,
2399                                    bool is_dpas)
2400 {
2401    const struct intel_device_info *devinfo = c->isa->devinfo;
2402    assert(devinfo->ver >= 8);
2403 
2404    if (devinfo->ver >= 20) {
2405       uint64_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2406       uint64_t uncompacted = is_dpas ? xe2_3src_dpas_control_index_table[compacted] :
2407                                        xe2_3src_control_index_table[compacted];
2408 
2409       elk_inst_set_bits(dst, 95, 92, (uncompacted >> 30) & 0xf);
2410       elk_inst_set_bits(dst, 90, 88, (uncompacted >> 27) & 0x7);
2411       elk_inst_set_bits(dst, 82, 80, (uncompacted >> 24) & 0x7);
2412       elk_inst_set_bits(dst, 50, 50, (uncompacted >> 23) & 0x1);
2413       elk_inst_set_bits(dst, 49, 48, (uncompacted >> 21) & 0x3);
2414       elk_inst_set_bits(dst, 42, 40, (uncompacted >> 18) & 0x7);
2415       elk_inst_set_bits(dst, 39, 39, (uncompacted >> 17) & 0x1);
2416       elk_inst_set_bits(dst, 38, 36, (uncompacted >> 14) & 0x7);
2417       elk_inst_set_bits(dst, 34, 34, (uncompacted >> 13) & 0x1);
2418       elk_inst_set_bits(dst, 32, 32, (uncompacted >> 12) & 0x1);
2419       elk_inst_set_bits(dst, 31, 31, (uncompacted >> 11) & 0x1);
2420       elk_inst_set_bits(dst, 28, 28, (uncompacted >> 10) & 0x1);
2421       elk_inst_set_bits(dst, 27, 26, (uncompacted >>  8) & 0x3);
2422       elk_inst_set_bits(dst, 25, 24, (uncompacted >>  6) & 0x3);
2423       elk_inst_set_bits(dst, 23, 21, (uncompacted >>  3) & 0x7);
2424       elk_inst_set_bits(dst, 20, 18, (uncompacted >>  0) & 0x7);
2425 
2426    } else if (devinfo->verx10 >= 125) {
2427       uint64_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2428       uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2429 
2430       elk_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2431       elk_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2432       elk_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2433       elk_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2434       elk_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2435       elk_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2436       elk_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2437       elk_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2438       elk_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2439       elk_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2440       elk_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2441       elk_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2442       elk_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2443       elk_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2444       elk_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2445       elk_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2446       elk_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2447       elk_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2448 
2449    } else if (devinfo->ver >= 12) {
2450       uint64_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2451       uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2452 
2453       elk_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2454       elk_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2455       elk_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2456       elk_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2457       elk_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2458       elk_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2459       elk_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2460       elk_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2461       elk_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2462       elk_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2463       elk_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2464       elk_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2465       elk_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2466       elk_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2467       elk_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2468       elk_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2469       elk_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2470       elk_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2471    } else {
2472       uint32_t compacted = elk_compact_inst_3src_control_index(devinfo, src);
2473       uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2474 
2475       elk_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2476       elk_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
2477 
2478       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV)
2479          elk_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2480    }
2481 }
2482 
2483 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,elk_inst * dst,elk_compact_inst * src,bool is_dpas)2484 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2485                                   elk_inst *dst, elk_compact_inst *src,
2486                                   bool is_dpas)
2487 {
2488    assert(devinfo->ver >= 8);
2489 
2490    uint32_t compacted = elk_compact_inst_3src_source_index(devinfo, src);
2491 
2492    if (devinfo->ver >= 12) {
2493       const uint32_t *three_src_source_index_table =
2494          devinfo->ver >= 20 ? (is_dpas ? xe2_3src_dpas_source_index_table :
2495                                          xe2_3src_source_index_table) :
2496          devinfo->verx10 >= 125 ? xehp_3src_source_index_table :
2497                                   gfx12_3src_source_index_table;
2498       uint32_t uncompacted = three_src_source_index_table[compacted];
2499 
2500       elk_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2501       elk_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2502       elk_inst_set_bits(dst,  98,  98, (uncompacted >> 17) & 0x1);
2503       elk_inst_set_bits(dst,  97,  96, (uncompacted >> 15) & 0x3);
2504       elk_inst_set_bits(dst,  91,  91, (uncompacted >> 14) & 0x1);
2505       elk_inst_set_bits(dst,  87,  86, (uncompacted >> 12) & 0x3);
2506       elk_inst_set_bits(dst,  85,  84, (uncompacted >> 10) & 0x3);
2507       elk_inst_set_bits(dst,  83,  83, (uncompacted >>  9) & 0x1);
2508       elk_inst_set_bits(dst,  66,  66, (uncompacted >>  8) & 0x1);
2509       elk_inst_set_bits(dst,  65,  64, (uncompacted >>  6) & 0x3);
2510       elk_inst_set_bits(dst,  47,  47, (uncompacted >>  5) & 0x1);
2511       elk_inst_set_bits(dst,  46,  46, (uncompacted >>  4) & 0x1);
2512       elk_inst_set_bits(dst,  45,  44, (uncompacted >>  2) & 0x3);
2513       elk_inst_set_bits(dst,  43,  43, (uncompacted >>  1) & 0x1);
2514       elk_inst_set_bits(dst,  35,  35, (uncompacted >>  0) & 0x1);
2515    } else {
2516       uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2517 
2518       elk_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
2519       elk_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2520       elk_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
2521       elk_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
2522       elk_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
2523 
2524       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
2525          elk_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2526          elk_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2527          elk_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
2528       } else {
2529          elk_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2530          elk_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2531       }
2532    }
2533 }
2534 
2535 static void
set_uncompacted_3src_subreg_index(const struct intel_device_info * devinfo,elk_inst * dst,elk_compact_inst * src)2536 set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2537                                   elk_inst *dst, elk_compact_inst *src)
2538 {
2539    assert(devinfo->ver >= 12);
2540 
2541    uint32_t compacted = elk_compact_inst_3src_subreg_index(devinfo, src);
2542    uint32_t uncompacted = (devinfo->ver >= 20 ? xe2_3src_subreg_table[compacted]:
2543                            gfx12_3src_subreg_table[compacted]);
2544 
2545    elk_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2546    elk_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
2547    elk_inst_set_bits(dst,  71,  67, (uncompacted >>  5) & 0x1f);
2548    elk_inst_set_bits(dst,  55,  51, (uncompacted >>  0) & 0x1f);
2549 }
2550 
2551 static void
elk_uncompact_3src_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src,bool is_dpas)2552 elk_uncompact_3src_instruction(const struct compaction_state *c,
2553                                elk_inst *dst, elk_compact_inst *src, bool is_dpas)
2554 {
2555    const struct intel_device_info *devinfo = c->isa->devinfo;
2556    assert(devinfo->ver >= 8);
2557 
2558 #define uncompact(field) \
2559    elk_inst_set_3src_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
2560 #define uncompact_a16(field) \
2561    elk_inst_set_3src_a16_##field(devinfo, dst, elk_compact_inst_3src_##field(devinfo, src))
2562 
2563    uncompact(hw_opcode);
2564 
2565    if (devinfo->ver >= 12) {
2566       set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2567       set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2568       set_uncompacted_3src_subreg_index(devinfo, dst, src);
2569 
2570       uncompact(debug_control);
2571       uncompact(swsb);
2572       uncompact(dst_reg_nr);
2573       uncompact(src0_reg_nr);
2574       uncompact(src1_reg_nr);
2575       uncompact(src2_reg_nr);
2576    } else {
2577       set_uncompacted_3src_control_index(c, dst, src, is_dpas);
2578       set_uncompacted_3src_source_index(devinfo, dst, src, is_dpas);
2579 
2580       uncompact(dst_reg_nr);
2581       uncompact_a16(src0_rep_ctrl);
2582       uncompact(debug_control);
2583       uncompact(saturate);
2584       uncompact_a16(src1_rep_ctrl);
2585       uncompact_a16(src2_rep_ctrl);
2586       uncompact(src0_reg_nr);
2587       uncompact(src1_reg_nr);
2588       uncompact(src2_reg_nr);
2589       uncompact_a16(src0_subreg_nr);
2590       uncompact_a16(src1_subreg_nr);
2591       uncompact_a16(src2_subreg_nr);
2592    }
2593    elk_inst_set_3src_cmpt_control(devinfo, dst, false);
2594 
2595 #undef uncompact
2596 #undef uncompact_a16
2597 }
2598 
2599 static void
uncompact_instruction(const struct compaction_state * c,elk_inst * dst,elk_compact_inst * src)2600 uncompact_instruction(const struct compaction_state *c, elk_inst *dst,
2601                       elk_compact_inst *src)
2602 {
2603    const struct intel_device_info *devinfo = c->isa->devinfo;
2604    memset(dst, 0, sizeof(*dst));
2605 
2606    if (devinfo->ver >= 8) {
2607       const enum elk_opcode opcode =
2608          elk_opcode_decode(c->isa, elk_compact_inst_3src_hw_opcode(devinfo, src));
2609       if (elk_is_3src(c->isa, opcode)) {
2610          const bool is_dpas = opcode == ELK_OPCODE_DPAS;
2611          elk_uncompact_3src_instruction(c, dst, src, is_dpas);
2612          return;
2613       }
2614    }
2615 
2616 #define uncompact(field) \
2617    elk_inst_set_##field(devinfo, dst, elk_compact_inst_##field(devinfo, src))
2618 #define uncompact_reg(field) \
2619    elk_inst_set_##field##_da_reg_nr(devinfo, dst, \
2620                                     elk_compact_inst_##field##_reg_nr(devinfo, src))
2621 
2622    uncompact(hw_opcode);
2623    uncompact(debug_control);
2624 
2625    set_uncompacted_control(c, dst, src);
2626    set_uncompacted_datatype(c, dst, src);
2627    set_uncompacted_subreg(c, dst, src);
2628    set_uncompacted_src0(c, dst, src);
2629 
2630    enum elk_reg_type type;
2631    if (has_immediate(devinfo, dst, &type)) {
2632       unsigned imm = uncompact_immediate(devinfo, type,
2633                                          elk_compact_inst_imm(devinfo, src));
2634       elk_inst_set_imm_ud(devinfo, dst, imm);
2635    } else {
2636       set_uncompacted_src1(c, dst, src);
2637       uncompact_reg(src1);
2638    }
2639 
2640    if (devinfo->ver >= 12) {
2641       uncompact(swsb);
2642       uncompact_reg(dst);
2643       uncompact_reg(src0);
2644    } else {
2645       if (devinfo->ver >= 6) {
2646          uncompact(acc_wr_control);
2647       } else {
2648          uncompact(mask_control_ex);
2649       }
2650 
2651       uncompact(cond_modifier);
2652 
2653       if (devinfo->ver <= 6)
2654          uncompact(flag_subreg_nr);
2655 
2656       uncompact_reg(dst);
2657       uncompact_reg(src0);
2658    }
2659    elk_inst_set_cmpt_control(devinfo, dst, false);
2660 
2661 #undef uncompact
2662 #undef uncompact_reg
2663 }
2664 
2665 void
elk_uncompact_instruction(const struct elk_isa_info * isa,elk_inst * dst,elk_compact_inst * src)2666 elk_uncompact_instruction(const struct elk_isa_info *isa,
2667                           elk_inst *dst, elk_compact_inst *src)
2668 {
2669    struct compaction_state c;
2670    compaction_state_init(&c, isa);
2671    uncompact_instruction(&c, dst, src);
2672 }
2673 
2674 void
elk_debug_compact_uncompact(const struct elk_isa_info * isa,elk_inst * orig,elk_inst * uncompacted)2675 elk_debug_compact_uncompact(const struct elk_isa_info *isa,
2676                             elk_inst *orig,
2677                             elk_inst *uncompacted)
2678 {
2679    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2680            isa->devinfo->ver);
2681 
2682    fprintf(stderr, "  before: ");
2683    elk_disassemble_inst(stderr, isa, orig, true, 0, NULL);
2684 
2685    fprintf(stderr, "  after:  ");
2686    elk_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
2687 
2688    uint32_t *before_bits = (uint32_t *)orig;
2689    uint32_t *after_bits = (uint32_t *)uncompacted;
2690    fprintf(stderr, "  changed bits:\n");
2691    for (int i = 0; i < 128; i++) {
2692       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2693       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2694 
2695       if (before != after) {
2696          fprintf(stderr, "  bit %d, %s to %s\n", i,
2697                  before ? "set" : "unset",
2698                  after ? "set" : "unset");
2699       }
2700    }
2701 }
2702 
2703 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2704 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2705 {
2706    int this_compacted_count = compacted_counts[old_ip];
2707    int target_compacted_count = compacted_counts[old_target_ip];
2708    return target_compacted_count - this_compacted_count;
2709 }
2710 
2711 static void
update_uip_jip(const struct elk_isa_info * isa,elk_inst * insn,int this_old_ip,int * compacted_counts)2712 update_uip_jip(const struct elk_isa_info *isa, elk_inst *insn,
2713                int this_old_ip, int *compacted_counts)
2714 {
2715    const struct intel_device_info *devinfo = isa->devinfo;
2716 
2717    /* JIP and UIP are in units of:
2718     *    - bytes on Gfx8+; and
2719     *    - compacted instructions on Gfx6+.
2720     */
2721    int shift = devinfo->ver >= 8 ? 3 : 0;
2722 
2723    int32_t jip_compacted = elk_inst_jip(devinfo, insn) >> shift;
2724    jip_compacted -= compacted_between(this_old_ip,
2725                                       this_old_ip + (jip_compacted / 2),
2726                                       compacted_counts);
2727    elk_inst_set_jip(devinfo, insn, jip_compacted << shift);
2728 
2729    if (elk_inst_opcode(isa, insn) == ELK_OPCODE_ENDIF ||
2730        elk_inst_opcode(isa, insn) == ELK_OPCODE_WHILE ||
2731        (elk_inst_opcode(isa, insn) == ELK_OPCODE_ELSE && devinfo->ver <= 7))
2732       return;
2733 
2734    int32_t uip_compacted = elk_inst_uip(devinfo, insn) >> shift;
2735    uip_compacted -= compacted_between(this_old_ip,
2736                                       this_old_ip + (uip_compacted / 2),
2737                                       compacted_counts);
2738    elk_inst_set_uip(devinfo, insn, uip_compacted << shift);
2739 }
2740 
2741 static void
update_gfx4_jump_count(const struct intel_device_info * devinfo,elk_inst * insn,int this_old_ip,int * compacted_counts)2742 update_gfx4_jump_count(const struct intel_device_info *devinfo, elk_inst *insn,
2743                        int this_old_ip, int *compacted_counts)
2744 {
2745    assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
2746 
2747    /* Jump Count is in units of:
2748     *    - uncompacted instructions on G45; and
2749     *    - compacted instructions on Gfx5.
2750     */
2751    int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
2752 
2753    int jump_count_compacted = elk_inst_gfx4_jump_count(devinfo, insn) << shift;
2754 
2755    int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2756 
2757    int this_compacted_count = compacted_counts[this_old_ip];
2758    int target_compacted_count = compacted_counts[target_old_ip];
2759 
2760    jump_count_compacted -= (target_compacted_count - this_compacted_count);
2761    elk_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2762 }
2763 
2764 static void
compaction_state_init(struct compaction_state * c,const struct elk_isa_info * isa)2765 compaction_state_init(struct compaction_state *c,
2766                       const struct elk_isa_info *isa)
2767 {
2768    const struct intel_device_info *devinfo = isa->devinfo;
2769 
2770    assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2771    assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2772    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2773    assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2774    assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2775    assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2776    assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2777    assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2778    assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2779    assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2780    assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2781    assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2782    assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2783    assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2784    assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2785    assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2786    assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2787    assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2788    assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2789    assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2790    assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2791    assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2792    assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2793    assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2794    assert(xe2_control_index_table[ARRAY_SIZE(xe2_control_index_table) - 1] != 0);
2795    assert(xe2_datatype_table[ARRAY_SIZE(xe2_datatype_table) - 1] != 0);
2796    assert(xe2_subreg_table[ARRAY_SIZE(xe2_subreg_table) - 1] != 0);
2797    assert(xe2_src0_index_table[ARRAY_SIZE(xe2_src0_index_table) - 1] != 0);
2798    assert(xe2_src1_index_table[ARRAY_SIZE(xe2_src1_index_table) - 1] != 0);
2799 
2800    c->isa = isa;
2801    switch (devinfo->ver) {
2802    case 20:
2803       c->control_index_table = xe2_control_index_table;
2804       c->datatype_table = xe2_datatype_table;
2805       c->subreg_table = xe2_subreg_table;
2806       c->src0_index_table = xe2_src0_index_table;
2807       c->src1_index_table = xe2_src1_index_table;
2808       break;
2809    case 12:
2810       c->control_index_table = gfx12_control_index_table;;
2811       c->datatype_table = gfx12_datatype_table;
2812       c->subreg_table = gfx12_subreg_table;
2813       if (devinfo->verx10 >= 125) {
2814          c->src0_index_table = xehp_src0_index_table;
2815          c->src1_index_table = xehp_src1_index_table;
2816       } else {
2817          c->src0_index_table = gfx12_src0_index_table;
2818          c->src1_index_table = gfx12_src1_index_table;
2819       }
2820       break;
2821    case 11:
2822       c->control_index_table = gfx8_control_index_table;
2823       c->datatype_table = gfx11_datatype_table;
2824       c->subreg_table = gfx8_subreg_table;
2825       c->src0_index_table = gfx8_src_index_table;
2826       c->src1_index_table = gfx8_src_index_table;
2827       break;
2828    case 9:
2829    case 8:
2830       c->control_index_table = gfx8_control_index_table;
2831       c->datatype_table = gfx8_datatype_table;
2832       c->subreg_table = gfx8_subreg_table;
2833       c->src0_index_table = gfx8_src_index_table;
2834       c->src1_index_table = gfx8_src_index_table;
2835       break;
2836    case 7:
2837       c->control_index_table = gfx7_control_index_table;
2838       c->datatype_table = gfx7_datatype_table;
2839       c->subreg_table = gfx7_subreg_table;
2840       c->src0_index_table = gfx7_src_index_table;
2841       c->src1_index_table = gfx7_src_index_table;
2842       break;
2843    case 6:
2844       c->control_index_table = gfx6_control_index_table;
2845       c->datatype_table = gfx6_datatype_table;
2846       c->subreg_table = gfx6_subreg_table;
2847       c->src0_index_table = gfx6_src_index_table;
2848       c->src1_index_table = gfx6_src_index_table;
2849       break;
2850    case 5:
2851    case 4:
2852       c->control_index_table = g45_control_index_table;
2853       c->datatype_table = g45_datatype_table;
2854       c->subreg_table = g45_subreg_table;
2855       c->src0_index_table = g45_src_index_table;
2856       c->src1_index_table = g45_src_index_table;
2857       break;
2858    default:
2859       unreachable("unknown generation");
2860    }
2861 }
2862 
2863 void
elk_compact_instructions(struct elk_codegen * p,int start_offset,struct elk_disasm_info * disasm)2864 elk_compact_instructions(struct elk_codegen *p, int start_offset,
2865                          struct elk_disasm_info *disasm)
2866 {
2867    if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2868       return;
2869 
2870    const struct intel_device_info *devinfo = p->devinfo;
2871    if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
2872       return;
2873 
2874    void *store = p->store + start_offset / 16;
2875    /* For an instruction at byte offset 16*i before compaction, this is the
2876     * number of compacted instructions minus the number of padding NOP/NENOPs
2877     * that preceded it.
2878     */
2879    unsigned num_compacted_counts =
2880       (p->next_insn_offset - start_offset) / sizeof(elk_inst);
2881    int *compacted_counts =
2882       calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
2883 
2884    /* For an instruction at byte offset 8*i after compaction, this was its IP
2885     * (in 16-byte units) before compaction.
2886     */
2887    unsigned num_old_ip =
2888       (p->next_insn_offset - start_offset) / sizeof(elk_compact_inst) + 1;
2889    int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
2890 
2891    struct compaction_state c;
2892    compaction_state_init(&c, p->isa);
2893 
2894    int offset = 0;
2895    int compacted_count = 0;
2896    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2897         src_offset += sizeof(elk_inst)) {
2898       elk_inst *src = store + src_offset;
2899       void *dst = store + offset;
2900 
2901       old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
2902       compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
2903 
2904       elk_inst inst = precompact(p->isa, *src);
2905       elk_inst saved = inst;
2906 
2907       if (try_compact_instruction(&c, dst, &inst)) {
2908          compacted_count++;
2909 
2910          if (INTEL_DEBUG(DEBUG_VS | DEBUG_GS | DEBUG_TCS |
2911                          DEBUG_WM | DEBUG_CS | DEBUG_TES)) {
2912             elk_inst uncompacted;
2913             uncompact_instruction(&c, &uncompacted, dst);
2914             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2915                elk_debug_compact_uncompact(p->isa, &saved, &uncompacted);
2916             }
2917          }
2918 
2919          offset += sizeof(elk_compact_inst);
2920       } else {
2921          /* All uncompacted instructions need to be aligned on G45. */
2922          if ((offset & sizeof(elk_compact_inst)) != 0 &&
2923              devinfo->platform == INTEL_PLATFORM_G4X) {
2924             elk_compact_inst *align = store + offset;
2925             memset(align, 0, sizeof(*align));
2926             elk_compact_inst_set_hw_opcode(
2927                devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NENOP));
2928             elk_compact_inst_set_cmpt_control(devinfo, align, true);
2929             offset += sizeof(elk_compact_inst);
2930             compacted_count--;
2931             compacted_counts[src_offset / sizeof(elk_inst)] = compacted_count;
2932             old_ip[offset / sizeof(elk_compact_inst)] = src_offset / sizeof(elk_inst);
2933 
2934             dst = store + offset;
2935          }
2936 
2937          /* If we didn't compact this instruction, we need to move it down into
2938           * place.
2939           */
2940          if (offset != src_offset) {
2941             memmove(dst, src, sizeof(elk_inst));
2942          }
2943          offset += sizeof(elk_inst);
2944       }
2945    }
2946 
2947    /* Add an entry for the ending offset of the program. This greatly
2948     * simplifies the linked list walk at the end of the function.
2949     */
2950    old_ip[offset / sizeof(elk_compact_inst)] =
2951       (p->next_insn_offset - start_offset) / sizeof(elk_inst);
2952 
2953    /* Fix up control flow offsets. */
2954    p->next_insn_offset = start_offset + offset;
2955    for (offset = 0; offset < p->next_insn_offset - start_offset;
2956         offset = next_offset(devinfo, store, offset)) {
2957       elk_inst *insn = store + offset;
2958       int this_old_ip = old_ip[offset / sizeof(elk_compact_inst)];
2959       int this_compacted_count = compacted_counts[this_old_ip];
2960 
2961       switch (elk_inst_opcode(p->isa, insn)) {
2962       case ELK_OPCODE_BREAK:
2963       case ELK_OPCODE_CONTINUE:
2964       case ELK_OPCODE_HALT:
2965          if (devinfo->ver >= 6) {
2966             update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2967          } else {
2968             update_gfx4_jump_count(devinfo, insn, this_old_ip,
2969                                    compacted_counts);
2970          }
2971          break;
2972 
2973       case ELK_OPCODE_IF:
2974       case ELK_OPCODE_IFF:
2975       case ELK_OPCODE_ELSE:
2976       case ELK_OPCODE_ENDIF:
2977       case ELK_OPCODE_WHILE:
2978          if (devinfo->ver >= 7) {
2979             if (elk_inst_cmpt_control(devinfo, insn)) {
2980                elk_inst uncompacted;
2981                uncompact_instruction(&c, &uncompacted,
2982                                      (elk_compact_inst *)insn);
2983 
2984                update_uip_jip(p->isa, &uncompacted, this_old_ip,
2985                               compacted_counts);
2986 
2987                bool ret = try_compact_instruction(&c, (elk_compact_inst *)insn,
2988                                                   &uncompacted);
2989                assert(ret); (void)ret;
2990             } else {
2991                update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2992             }
2993          } else if (devinfo->ver == 6) {
2994             assert(!elk_inst_cmpt_control(devinfo, insn));
2995 
2996             /* Jump Count is in units of compacted instructions on Gfx6. */
2997             int jump_count_compacted = elk_inst_gfx6_jump_count(devinfo, insn);
2998 
2999             int target_old_ip = this_old_ip + (jump_count_compacted / 2);
3000             int target_compacted_count = compacted_counts[target_old_ip];
3001             jump_count_compacted -= (target_compacted_count - this_compacted_count);
3002             elk_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
3003          } else {
3004             update_gfx4_jump_count(devinfo, insn, this_old_ip,
3005                                    compacted_counts);
3006          }
3007          break;
3008 
3009       case ELK_OPCODE_ADD:
3010          /* Add instructions modifying the IP register use an immediate src1,
3011           * and Gens that use this cannot compact instructions with immediate
3012           * operands.
3013           */
3014          if (elk_inst_cmpt_control(devinfo, insn))
3015             break;
3016 
3017          if (elk_inst_dst_reg_file(devinfo, insn) == ELK_ARCHITECTURE_REGISTER_FILE &&
3018              elk_inst_dst_da_reg_nr(devinfo, insn) == ELK_ARF_IP) {
3019             assert(elk_inst_src1_reg_file(devinfo, insn) == ELK_IMMEDIATE_VALUE);
3020 
3021             int shift = 3;
3022             int jump_compacted = elk_inst_imm_d(devinfo, insn) >> shift;
3023 
3024             int target_old_ip = this_old_ip + (jump_compacted / 2);
3025             int target_compacted_count = compacted_counts[target_old_ip];
3026             jump_compacted -= (target_compacted_count - this_compacted_count);
3027             elk_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
3028          }
3029          break;
3030 
3031       default:
3032          break;
3033       }
3034    }
3035 
3036    /* p->nr_insn is counting the number of uncompacted instructions still, so
3037     * divide.  We do want to be sure there's a valid instruction in any
3038     * alignment padding, so that the next compression pass (for the FS 8/16
3039     * compile passes) parses correctly.
3040     */
3041    if (p->next_insn_offset & sizeof(elk_compact_inst)) {
3042       elk_compact_inst *align = store + offset;
3043       memset(align, 0, sizeof(*align));
3044       elk_compact_inst_set_hw_opcode(
3045          devinfo, align, elk_opcode_encode(p->isa, ELK_OPCODE_NOP));
3046       elk_compact_inst_set_cmpt_control(devinfo, align, true);
3047       p->next_insn_offset += sizeof(elk_compact_inst);
3048    }
3049    p->nr_insn = p->next_insn_offset / sizeof(elk_inst);
3050 
3051    for (int i = 0; i < p->num_relocs; i++) {
3052       if (p->relocs[i].offset < (uint32_t)start_offset)
3053          continue;
3054 
3055       assert(p->relocs[i].offset % 16 == 0);
3056       unsigned idx = (p->relocs[i].offset - start_offset) / 16;
3057       p->relocs[i].offset -= compacted_counts[idx] * 8;
3058    }
3059 
3060    /* Update the instruction offsets for each group. */
3061    if (disasm) {
3062       int offset = 0;
3063 
3064       foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
3065          while (start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
3066                 sizeof(elk_inst) != group->offset) {
3067             assert(start_offset + old_ip[offset / sizeof(elk_compact_inst)] *
3068                    sizeof(elk_inst) < group->offset);
3069             offset = next_offset(devinfo, store, offset);
3070          }
3071 
3072          group->offset = start_offset + offset;
3073 
3074          offset = next_offset(devinfo, store, offset);
3075       }
3076    }
3077 
3078    free(compacted_counts);
3079    free(old_ip);
3080 }
3081