• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012-2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file brw_eu_compact.c
25  *
26  * Instruction compaction is a feature of G45 and newer hardware that allows
27  * for a smaller instruction encoding.
28  *
29  * The instruction cache is on the order of 32KB, and many programs generate
30  * far more instructions than that.  The instruction cache is built to barely
31  * keep up with instruction dispatch ability in cache hit cases -- L1
32  * instruction cache misses that still hit in the next level could limit
33  * throughput by around 50%.
34  *
35  * The idea of instruction compaction is that most instructions use a tiny
36  * subset of the GPU functionality, so we can encode what would be a 16 byte
37  * instruction in 8 bytes using some lookup tables for various fields.
38  *
39  *
40  * Instruction compaction capabilities vary subtly by generation.
41  *
42  * G45's support for instruction compaction is very limited. Jump counts on
43  * this generation are in units of 16-byte uncompacted instructions. As such,
44  * all jump targets must be 16-byte aligned. Also, all instructions must be
45  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46  * A G45-only instruction, NENOP, must be used to provide padding to align
47  * uncompacted instructions.
48  *
49  * Gfx5 removes these restrictions and changes jump counts to be in units of
50  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52  *
53  * Gfx6 adds the ability to compact instructions with a limited range of
54  * immediate values. Compactable immediates have 12 unrestricted bits, and a
55  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56  * value of DW3 in the uncompacted instruction word.
57  *
58  * On Gfx7 we can compact some control flow instructions with a small positive
59  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60  * control flow instructions with UIP cannot be compacted, because of the
61  * replicated 13th bit. No control flow instructions can be compacted on Gfx6
62  * since the jump count field is not in DW3.
63  *
64  *    break    JIP/UIP
65  *    cont     JIP/UIP
66  *    halt     JIP/UIP
67  *    if       JIP/UIP
68  *    else     JIP (plus UIP on BDW+)
69  *    endif    JIP
70  *    while    JIP (must be negative)
71  *
72  * Gen 8 adds support for compacting 3-src instructions.
73  *
74  * Gfx12 reduces the number of bits that available to compacted immediates from
75  * 13 to 12, but improves the compaction of floating-point immediates by
76  * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77  * three most significant bits of the mantissa), rather than the lowest bits of
78  * the mantissa.
79  */
80 
81 #include "brw_eu.h"
82 #include "brw_shader.h"
83 #include "brw_disasm_info.h"
84 #include "dev/intel_debug.h"
85 
86 static const uint32_t g45_control_index_table[32] = {
87    0b00000000000000000,
88    0b01000000000000000,
89    0b00110000000000000,
90    0b00000000000000010,
91    0b00100000000000000,
92    0b00010000000000000,
93    0b01000000000100000,
94    0b01000000100000000,
95    0b01010000000100000,
96    0b00000000100000010,
97    0b11000000000000000,
98    0b00001000100000010,
99    0b01001000100000000,
100    0b00000000100000000,
101    0b11000000000100000,
102    0b00001000100000000,
103    0b10110000000000000,
104    0b11010000000100000,
105    0b00110000100000000,
106    0b00100000100000000,
107    0b01000000000001000,
108    0b01000000000000100,
109    0b00111100000000000,
110    0b00101011000000000,
111    0b00110000000010000,
112    0b00010000100000000,
113    0b01000000000100100,
114    0b01000000000101000,
115    0b00110000000000110,
116    0b00000000000001010,
117    0b01010000000101000,
118    0b01010000000100100,
119 };
120 
121 static const uint32_t g45_datatype_table[32] = {
122    0b001000000000100001,
123    0b001011010110101101,
124    0b001000001000110001,
125    0b001111011110111101,
126    0b001011010110101100,
127    0b001000000110101101,
128    0b001000000000100000,
129    0b010100010110110001,
130    0b001100011000101101,
131    0b001000000000100010,
132    0b001000001000110110,
133    0b010000001000110001,
134    0b001000001000110010,
135    0b011000001000110010,
136    0b001111011110111100,
137    0b001000000100101000,
138    0b010100011000110001,
139    0b001010010100101001,
140    0b001000001000101001,
141    0b010000001000110110,
142    0b101000001000110001,
143    0b001011011000101101,
144    0b001000000100001001,
145    0b001011011000101100,
146    0b110100011000110001,
147    0b001000001110111101,
148    0b110000001000110001,
149    0b011000000100101010,
150    0b101000001000101001,
151    0b001011010110001100,
152    0b001000000110100001,
153    0b001010010100001000,
154 };
155 
156 static const uint16_t g45_subreg_table[32] = {
157    0b000000000000000,
158    0b000000010000000,
159    0b000001000000000,
160    0b000100000000000,
161    0b000000000100000,
162    0b100000000000000,
163    0b000000000010000,
164    0b001100000000000,
165    0b001010000000000,
166    0b000000100000000,
167    0b001000000000000,
168    0b000000000001000,
169    0b000000001000000,
170    0b000000000000001,
171    0b000010000000000,
172    0b000000010100000,
173    0b000000000000111,
174    0b000001000100000,
175    0b011000000000000,
176    0b000000110000000,
177    0b000000000000010,
178    0b000000000000100,
179    0b000000001100000,
180    0b000100000000010,
181    0b001110011000110,
182    0b001110100001000,
183    0b000110011000110,
184    0b000001000011000,
185    0b000110010000100,
186    0b001100000000110,
187    0b000000010000110,
188    0b000001000110000,
189 };
190 
191 static const uint16_t g45_src_index_table[32] = {
192    0b000000000000,
193    0b010001101000,
194    0b010110001000,
195    0b011010010000,
196    0b001101001000,
197    0b010110001010,
198    0b010101110000,
199    0b011001111000,
200    0b001000101000,
201    0b000000101000,
202    0b010001010000,
203    0b111101101100,
204    0b010110001100,
205    0b010001101100,
206    0b011010010100,
207    0b010001001100,
208    0b001100101000,
209    0b000000000010,
210    0b111101001100,
211    0b011001101000,
212    0b010101001000,
213    0b000000000100,
214    0b000000101100,
215    0b010001101010,
216    0b000000111000,
217    0b010101011000,
218    0b000100100000,
219    0b010110000000,
220    0b010000000100,
221    0b010000111000,
222    0b000101100000,
223    0b111101110100,
224 };
225 
226 static const uint32_t gfx6_control_index_table[32] = {
227    0b00000000000000000,
228    0b01000000000000000,
229    0b00110000000000000,
230    0b00000000100000000,
231    0b00010000000000000,
232    0b00001000100000000,
233    0b00000000100000010,
234    0b00000000000000010,
235    0b01000000100000000,
236    0b01010000000000000,
237    0b10110000000000000,
238    0b00100000000000000,
239    0b11010000000000000,
240    0b11000000000000000,
241    0b01001000100000000,
242    0b01000000000001000,
243    0b01000000000000100,
244    0b00000000000001000,
245    0b00000000000000100,
246    0b00111000100000000,
247    0b00001000100000010,
248    0b00110000100000000,
249    0b00110000000000001,
250    0b00100000000000001,
251    0b00110000000000010,
252    0b00110000000000101,
253    0b00110000000001001,
254    0b00110000000010000,
255    0b00110000000000011,
256    0b00110000000000100,
257    0b00110000100001000,
258    0b00100000000001001,
259 };
260 
261 static const uint32_t gfx6_datatype_table[32] = {
262    0b001001110000000000,
263    0b001000110000100000,
264    0b001001110000000001,
265    0b001000000001100000,
266    0b001010110100101001,
267    0b001000000110101101,
268    0b001100011000101100,
269    0b001011110110101101,
270    0b001000000111101100,
271    0b001000000001100001,
272    0b001000110010100101,
273    0b001000000001000001,
274    0b001000001000110001,
275    0b001000001000101001,
276    0b001000000000100000,
277    0b001000001000110010,
278    0b001010010100101001,
279    0b001011010010100101,
280    0b001000000110100101,
281    0b001100011000101001,
282    0b001011011000101100,
283    0b001011010110100101,
284    0b001011110110100101,
285    0b001111011110111101,
286    0b001111011110111100,
287    0b001111011110111101,
288    0b001111011110011101,
289    0b001111011110111110,
290    0b001000000000100001,
291    0b001000000000100010,
292    0b001001111111011101,
293    0b001000001110111110,
294 };
295 
296 static const uint16_t gfx6_subreg_table[32] = {
297    0b000000000000000,
298    0b000000000000100,
299    0b000000110000000,
300    0b111000000000000,
301    0b011110000001000,
302    0b000010000000000,
303    0b000000000010000,
304    0b000110000001100,
305    0b001000000000000,
306    0b000001000000000,
307    0b000001010010100,
308    0b000000001010110,
309    0b010000000000000,
310    0b110000000000000,
311    0b000100000000000,
312    0b000000010000000,
313    0b000000000001000,
314    0b100000000000000,
315    0b000001010000000,
316    0b001010000000000,
317    0b001100000000000,
318    0b000000001010100,
319    0b101101010010100,
320    0b010100000000000,
321    0b000000010001111,
322    0b011000000000000,
323    0b111110000000000,
324    0b101000000000000,
325    0b000000000001111,
326    0b000100010001111,
327    0b001000010001111,
328    0b000110000000000,
329 };
330 
331 static const uint16_t gfx6_src_index_table[32] = {
332    0b000000000000,
333    0b010110001000,
334    0b010001101000,
335    0b001000101000,
336    0b011010010000,
337    0b000100100000,
338    0b010001101100,
339    0b010101110000,
340    0b011001111000,
341    0b001100101000,
342    0b010110001100,
343    0b001000100000,
344    0b010110001010,
345    0b000000000010,
346    0b010101010000,
347    0b010101101000,
348    0b111101001100,
349    0b111100101100,
350    0b011001110000,
351    0b010110001001,
352    0b010101011000,
353    0b001101001000,
354    0b010000101100,
355    0b010000000000,
356    0b001101110000,
357    0b001100010000,
358    0b001100000000,
359    0b010001101010,
360    0b001101111000,
361    0b000001110000,
362    0b001100100000,
363    0b001101010000,
364 };
365 
366 static const uint32_t gfx7_control_index_table[32] = {
367    0b0000000000000000010,
368    0b0000100000000000000,
369    0b0000100000000000001,
370    0b0000100000000000010,
371    0b0000100000000000011,
372    0b0000100000000000100,
373    0b0000100000000000101,
374    0b0000100000000000111,
375    0b0000100000000001000,
376    0b0000100000000001001,
377    0b0000100000000001101,
378    0b0000110000000000000,
379    0b0000110000000000001,
380    0b0000110000000000010,
381    0b0000110000000000011,
382    0b0000110000000000100,
383    0b0000110000000000101,
384    0b0000110000000000111,
385    0b0000110000000001001,
386    0b0000110000000001101,
387    0b0000110000000010000,
388    0b0000110000100000000,
389    0b0001000000000000000,
390    0b0001000000000000010,
391    0b0001000000000000100,
392    0b0001000000100000000,
393    0b0010110000000000000,
394    0b0010110000000010000,
395    0b0011000000000000000,
396    0b0011000000100000000,
397    0b0101000000000000000,
398    0b0101000000100000000,
399 };
400 
401 static const uint32_t gfx7_datatype_table[32] = {
402    0b001000000000000001,
403    0b001000000000100000,
404    0b001000000000100001,
405    0b001000000001100001,
406    0b001000000010111101,
407    0b001000001011111101,
408    0b001000001110100001,
409    0b001000001110100101,
410    0b001000001110111101,
411    0b001000010000100001,
412    0b001000110000100000,
413    0b001000110000100001,
414    0b001001010010100101,
415    0b001001110010100100,
416    0b001001110010100101,
417    0b001111001110111101,
418    0b001111011110011101,
419    0b001111011110111100,
420    0b001111011110111101,
421    0b001111111110111100,
422    0b000000001000001100,
423    0b001000000000111101,
424    0b001000000010100101,
425    0b001000010000100000,
426    0b001001010010100100,
427    0b001001110010000100,
428    0b001010010100001001,
429    0b001101111110111101,
430    0b001111111110111101,
431    0b001011110110101100,
432    0b001010010100101000,
433    0b001010110100101000,
434 };
435 
436 static const uint16_t gfx7_subreg_table[32] = {
437    0b000000000000000,
438    0b000000000000001,
439    0b000000000001000,
440    0b000000000001111,
441    0b000000000010000,
442    0b000000010000000,
443    0b000000100000000,
444    0b000000110000000,
445    0b000001000000000,
446    0b000001000010000,
447    0b000010100000000,
448    0b001000000000000,
449    0b001000000000001,
450    0b001000010000001,
451    0b001000010000010,
452    0b001000010000011,
453    0b001000010000100,
454    0b001000010000111,
455    0b001000010001000,
456    0b001000010001110,
457    0b001000010001111,
458    0b001000110000000,
459    0b001000111101000,
460    0b010000000000000,
461    0b010000110000000,
462    0b011000000000000,
463    0b011110010000111,
464    0b100000000000000,
465    0b101000000000000,
466    0b110000000000000,
467    0b111000000000000,
468    0b111000000011100,
469 };
470 
471 static const uint16_t gfx7_src_index_table[32] = {
472    0b000000000000,
473    0b000000000010,
474    0b000000010000,
475    0b000000010010,
476    0b000000011000,
477    0b000000100000,
478    0b000000101000,
479    0b000001001000,
480    0b000001010000,
481    0b000001110000,
482    0b000001111000,
483    0b001100000000,
484    0b001100000010,
485    0b001100001000,
486    0b001100010000,
487    0b001100010010,
488    0b001100100000,
489    0b001100101000,
490    0b001100111000,
491    0b001101000000,
492    0b001101000010,
493    0b001101001000,
494    0b001101010000,
495    0b001101100000,
496    0b001101101000,
497    0b001101110000,
498    0b001101110001,
499    0b001101111000,
500    0b010001101000,
501    0b010001101001,
502    0b010001101010,
503    0b010110001000,
504 };
505 
506 static const uint32_t gfx8_control_index_table[32] = {
507    0b0000000000000000010,
508    0b0000100000000000000,
509    0b0000100000000000001,
510    0b0000100000000000010,
511    0b0000100000000000011,
512    0b0000100000000000100,
513    0b0000100000000000101,
514    0b0000100000000000111,
515    0b0000100000000001000,
516    0b0000100000000001001,
517    0b0000100000000001101,
518    0b0000110000000000000,
519    0b0000110000000000001,
520    0b0000110000000000010,
521    0b0000110000000000011,
522    0b0000110000000000100,
523    0b0000110000000000101,
524    0b0000110000000000111,
525    0b0000110000000001001,
526    0b0000110000000001101,
527    0b0000110000000010000,
528    0b0000110000100000000,
529    0b0001000000000000000,
530    0b0001000000000000010,
531    0b0001000000000000100,
532    0b0001000000100000000,
533    0b0010110000000000000,
534    0b0010110000000010000,
535    0b0011000000000000000,
536    0b0011000000100000000,
537    0b0101000000000000000,
538    0b0101000000100000000,
539 };
540 
541 static const uint32_t gfx8_datatype_table[32] = {
542    0b001000000000000000001,
543    0b001000000000001000000,
544    0b001000000000001000001,
545    0b001000000000011000001,
546    0b001000000000101011101,
547    0b001000000010111011101,
548    0b001000000011101000001,
549    0b001000000011101000101,
550    0b001000000011101011101,
551    0b001000001000001000001,
552    0b001000011000001000000,
553    0b001000011000001000001,
554    0b001000101000101000101,
555    0b001000111000101000100,
556    0b001000111000101000101,
557    0b001011100011101011101,
558    0b001011101011100011101,
559    0b001011101011101011100,
560    0b001011101011101011101,
561    0b001011111011101011100,
562    0b000000000010000001100,
563    0b001000000000001011101,
564    0b001000000000101000101,
565    0b001000001000001000000,
566    0b001000101000101000100,
567    0b001000111000100000100,
568    0b001001001001000001001,
569    0b001010111011101011101,
570    0b001011111011101011101,
571    0b001001111001101001100,
572    0b001001001001001001000,
573    0b001001011001001001000,
574 };
575 
576 static const uint16_t gfx8_subreg_table[32] = {
577    0b000000000000000,
578    0b000000000000001,
579    0b000000000001000,
580    0b000000000001111,
581    0b000000000010000,
582    0b000000010000000,
583    0b000000100000000,
584    0b000000110000000,
585    0b000001000000000,
586    0b000001000010000,
587    0b000001010000000,
588    0b001000000000000,
589    0b001000000000001,
590    0b001000010000001,
591    0b001000010000010,
592    0b001000010000011,
593    0b001000010000100,
594    0b001000010000111,
595    0b001000010001000,
596    0b001000010001110,
597    0b001000010001111,
598    0b001000110000000,
599    0b001000111101000,
600    0b010000000000000,
601    0b010000110000000,
602    0b011000000000000,
603    0b011110010000111,
604    0b100000000000000,
605    0b101000000000000,
606    0b110000000000000,
607    0b111000000000000,
608    0b111000000011100,
609 };
610 
611 static const uint16_t gfx8_src_index_table[32] = {
612    0b000000000000,
613    0b000000000010,
614    0b000000010000,
615    0b000000010010,
616    0b000000011000,
617    0b000000100000,
618    0b000000101000,
619    0b000001001000,
620    0b000001010000,
621    0b000001110000,
622    0b000001111000,
623    0b001100000000,
624    0b001100000010,
625    0b001100001000,
626    0b001100010000,
627    0b001100010010,
628    0b001100100000,
629    0b001100101000,
630    0b001100111000,
631    0b001101000000,
632    0b001101000010,
633    0b001101001000,
634    0b001101010000,
635    0b001101100000,
636    0b001101101000,
637    0b001101110000,
638    0b001101110001,
639    0b001101111000,
640    0b010001101000,
641    0b010001101001,
642    0b010001101010,
643    0b010110001000,
644 };
645 
646 static const uint32_t gfx11_datatype_table[32] = {
647    0b001000000000000000001,
648    0b001000000000001000000,
649    0b001000000000001000001,
650    0b001000000000011000001,
651    0b001000000000101100101,
652    0b001000000101111100101,
653    0b001000000100101000001,
654    0b001000000100101000101,
655    0b001000000100101100101,
656    0b001000001000001000001,
657    0b001000011000001000000,
658    0b001000011000001000001,
659    0b001000101000101000101,
660    0b001000111000101000100,
661    0b001000111000101000101,
662    0b001100100100101100101,
663    0b001100101100100100101,
664    0b001100101100101100100,
665    0b001100101100101100101,
666    0b001100111100101100100,
667    0b000000000010000001100,
668    0b001000000000001100101,
669    0b001000000000101000101,
670    0b001000001000001000000,
671    0b001000101000101000100,
672    0b001000111000100000100,
673    0b001001001001000001001,
674    0b001101111100101100101,
675    0b001100111100101100101,
676    0b001001111001101001100,
677    0b001001001001001001000,
678    0b001001011001001001000,
679 };
680 
681 static const uint32_t gfx12_control_index_table[32] = {
682    0b000000000000000000100, /* 	       (16|M0)                            */
683    0b000000000000000000011, /* 	       (8|M0)                             */
684    0b000000010000000000000, /* 	(W)    (1|M0)                             */
685    0b000000010000000000100, /* 	(W)    (16|M0)                            */
686    0b000000010000000000011, /* 	(W)    (8|M0)                             */
687    0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
688    0b000000000000000100100, /* 	       (16|M16)                           */
689    0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
690    0b000000000000000000000, /* 	       (1|M0)                             */
691    0b000010000000000000100, /* 	       (16|M0)           (sat)            */
692    0b000000000000000010011, /* 	       (8|M8)                             */
693    0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
694    0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
695    0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
696    0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
697    0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
698    0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
699    0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
700    0b000000010000000000001, /* 	(W)    (2|M0)                             */
701    0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
702    0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
703    0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
704    0b000000000000000100011, /* 	       (8|M16)                            */
705    0b000000000000000110011, /* 	       (8|M24)                            */
706    0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
707    0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
708    0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
709    0b000010000000000000011, /* 	       (8|M0)            (sat)            */
710    0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
711    0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
712    0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
713    0b000000010000000100100, /* 	(W)    (16|M16)                           */
714 };
715 
716 static const uint32_t gfx12_datatype_table[32] = {
717    0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
718    0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
719    0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
720    0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
721    0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
722    0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
723    0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
724    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725    0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
726    0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
727    0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
728    0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
729    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730    0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
731    0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
732    0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733    0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
734    0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
735    0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737    0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
738    0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739    0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740    0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
741    0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
742    0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
743    0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
744    0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745    0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
746    0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
747    0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
748    0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
749 };
750 
751 static const uint16_t gfx12_subreg_table[32] = {
752    0b000000000000000, /* .0  .0  .0  */
753    0b100000000000000, /* .0  .0  .16 */
754    0b001000000000000, /* .0  .0  .4  */
755    0b011000000000000, /* .0  .0  .12 */
756    0b000000010000000, /* .0  .4  .0  */
757    0b010000000000000, /* .0  .0  .8  */
758    0b101000000000000, /* .0  .0  .20 */
759    0b000000000001000, /* .8  .0  .0  */
760    0b000000100000000, /* .0  .8  .0  */
761    0b110000000000000, /* .0  .0  .24 */
762    0b111000000000000, /* .0  .0  .28 */
763    0b000001000000000, /* .0  .16 .0  */
764    0b000000000000100, /* .4  .0  .0  */
765    0b000001100000000, /* .0  .24 .0  */
766    0b000001010000000, /* .0  .20 .0  */
767    0b000000110000000, /* .0  .12 .0  */
768    0b000001110000000, /* .0  .28 .0  */
769    0b000000000011100, /* .28 .0  .0  */
770    0b000000000010000, /* .16 .0  .0  */
771    0b000000000001100, /* .12 .0  .0  */
772    0b000000000011000, /* .24 .0  .0  */
773    0b000000000010100, /* .20 .0  .0  */
774    0b000000000000010, /* .2  .0  .0  */
775    0b000000101000000, /* .0  .10 .0  */
776    0b000000001000000, /* .0  .2  .0  */
777    0b000000010000100, /* .4  .4  .0  */
778    0b000000001011100, /* .28 .2  .0  */
779    0b000000001000010, /* .2  .2  .0  */
780    0b000000110001100, /* .12 .12 .0  */
781    0b000000000100000, /* .0  .1  .0  */
782    0b000000001100000, /* .0  .3  .0  */
783    0b110001100000000, /* .0  .24 .24 */
784 };
785 
786 static const uint16_t gfx12_src0_index_table[16] = {
787    0b010001100100, /*       r<8;8,1>  */
788    0b000000000000, /*       r<0;1,0>  */
789    0b010001100110, /*      -r<8;8,1>  */
790    0b010001100101, /*  (abs)r<8;8,1>  */
791    0b000000000010, /*      -r<0;1,0>  */
792    0b001000000000, /*       r<2;1,0>  */
793    0b001001000000, /*       r<2;4,0>  */
794    0b001101000000, /*       r<4;4,0>  */
795    0b001000100100, /*       r<2;2,1>  */
796    0b001100000000, /*       r<4;1,0>  */
797    0b001000100110, /*      -r<2;2,1>  */
798    0b001101000100, /*       r<4;4,1>  */
799    0b010001100111, /* -(abs)r<8;8,1>  */
800    0b000100000000, /*       r<1;1,0>  */
801    0b000000000001, /*  (abs)r<0;1,0>  */
802    0b111100010000, /*       r[a]<1,0> */
803 };
804 
805 static const uint16_t gfx12_src1_index_table[16] = {
806    0b000100011001, /*       r<8;8,1> */
807    0b000000000000, /*       r<0;1,0> */
808    0b100100011001, /*      -r<8;8,1> */
809    0b100000000000, /*      -r<0;1,0> */
810    0b010100011001, /*  (abs)r<8;8,1> */
811    0b100011010000, /*      -r<4;4,0> */
812    0b000010000000, /*       r<2;1,0> */
813    0b000010001001, /*       r<2;2,1> */
814    0b100010001001, /*      -r<2;2,1> */
815    0b000011010000, /*       r<4;4,0> */
816    0b000011010001, /*       r<4;4,1> */
817    0b000011000000, /*       r<4;1,0> */
818    0b110100011001, /* -(abs)r<8;8,1> */
819    0b010000000000, /*  (abs)r<0;1,0> */
820    0b110000000000, /* -(abs)r<0;1,0> */
821    0b100011010001, /*      -r<4;4,1> */
822 };
823 
824 static const uint16_t xehp_src0_index_table[16] = {
825    0b000100000000, /*       r<1;1,0>  */
826    0b000000000000, /*       r<0;1,0>  */
827    0b000100000010, /*      -r<1;1,0>  */
828    0b000100000001, /*  (abs)r<1;1,0>  */
829    0b000000000010, /*      -r<0;1,0>  */
830    0b001000000000, /*       r<2;1,0>  */
831    0b001001000000, /*       r<2;4,0>  */
832    0b001101000000, /*       r<4;4,0>  */
833    0b001100000000, /*       r<4;1,0>  */
834    0b000100000011, /* -(abs)r<1;1,0>  */
835    0b000000000001, /*  (abs)r<0;1,0>  */
836    0b111100010000, /*       r[a]<1,0> */
837    0b010001100000, /*       r<8;8,0>  */
838    0b000101000000, /*       r<1;4,0>  */
839    0b010001001000, /*       r<8;4,2>  */
840    0b001000000010, /*      -r<2;1,0>  */
841 };
842 
843 static const uint16_t xehp_src1_index_table[16] = {
844    0b000001000000, /*       r<1;1,0>    */
845    0b000000000000, /*       r<0;1,0>    */
846    0b100001000000, /*      -r<1;1,0>    */
847    0b100000000000, /*      -r<0;1,0>    */
848    0b010001000000, /*  (abs)r<1;1,0>    */
849    0b100011010000, /*      -r<4;4,0>    */
850    0b000010000000, /*       r<2;1,0>    */
851    0b000011010000, /*       r<4;4,0>    */
852    0b000011000000, /*       r<4;1,0>    */
853    0b110001000000, /* -(abs)r<1;1,0>    */
854    0b010000000000, /*  (abs)r<0;1,0>    */
855    0b110000000000, /* -(abs)r<0;1,0>    */
856    0b000100011000, /*       r<8;8,0>    */
857    0b100010000000, /*      -r<2;1,0>    */
858    0b100000001001, /*      -r<0;2,1>    */
859    0b100001000100, /*      -r[a]<1;1,0> */
860 };
861 
862 /* This is actually the control index table for Cherryview (26 bits), but the
863  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
864  * the start.
865  *
866  * The low 24 bits have the same mappings on both hardware.
867  */
868 static const uint32_t gfx8_3src_control_index_table[4] = {
869    0b00100000000110000000000001,
870    0b00000000000110000000000001,
871    0b00000000001000000000000001,
872    0b00000000001000000000100001,
873 };
874 
875 /* This is actually the control index table for Cherryview (49 bits), but the
876  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
877  * at the start.
878  *
879  * The low 44 bits have the same mappings on both hardware, and since the high
880  * three bits on Broadwell are zero, we can reuse Cherryview's table.
881  */
882 static const uint64_t gfx8_3src_source_index_table[4] = {
883    0b0000001110010011100100111001000001111000000000000,
884    0b0000001110010011100100111001000001111000000000010,
885    0b0000001110010011100100111001000001111000000001000,
886    0b0000001110010011100100111001000001111000000100000,
887 };
888 
889 static const uint64_t gfx12_3src_control_index_table[32] = {
890    0b000001001010010101000000000000000100, /*      (16|M0)       grf<1>:f   :f  :f  :f */
891    0b000001001010010101000000000000000011, /*      (8|M0)        grf<1>:f   :f  :f  :f */
892    0b000001001000010101000000000000000011, /*      (8|M0)        arf<1>:f   :f  :f  :f */
893    0b000001001010010101000010000000000011, /* (W)  (8|M0)        grf<1>:f   :f  :f  :f */
894    0b000001001000010101000010000000000011, /* (W)  (8|M0)        arf<1>:f   :f  :f  :f */
895    0b000001001000010101000000000000010011, /*      (8|M8)        arf<1>:f   :f  :f  :f */
896    0b000001001010010101000000000000010011, /*      (8|M8)        grf<1>:f   :f  :f  :f */
897    0b000001001000010101000010000000010011, /* (W)  (8|M8)        arf<1>:f   :f  :f  :f */
898    0b000001001010010101000010000000010011, /* (W)  (8|M8)        grf<1>:f   :f  :f  :f */
899    0b000001001010010101000010000000000100, /* (W)  (16|M0)       grf<1>:f   :f  :f  :f */
900    0b000001001000010101000000000000000100, /*      (16|M0)       arf<1>:f   :f  :f  :f */
901    0b000001001010010101010000000000000100, /*      (16|M0)  (sat)grf<1>:f   :f  :f  :f */
902    0b000001001010010101000000000000100100, /*      (16|M16)      grf<1>:f   :f  :f  :f */
903    0b000001001000010101000010000000000100, /* (W)  (16|M0)       arf<1>:f   :f  :f  :f */
904    0b000001001010010101000010000000000000, /* (W)  (1|M0)        grf<1>:f   :f  :f  :f */
905    0b000001001010010101010000000000000011, /*      (8|M0)   (sat)grf<1>:f   :f  :f  :f */
906    0b000001001000010101000010000000110011, /* (W)  (8|M24)       arf<1>:f   :f  :f  :f */
907    0b000001001000010101000010000000100011, /* (W)  (8|M16)       arf<1>:f   :f  :f  :f */
908    0b000001001010010101000010000000110011, /* (W)  (8|M24)       grf<1>:f   :f  :f  :f */
909    0b000001001010010101000010000000100011, /* (W)  (8|M16)       grf<1>:f   :f  :f  :f */
910    0b000001001000010101000000000000100011, /*      (8|M16)       arf<1>:f   :f  :f  :f */
911    0b000001001000010101000000000000110011, /*      (8|M24)       arf<1>:f   :f  :f  :f */
912    0b000001001010010101000000000000100011, /*      (8|M16)       grf<1>:f   :f  :f  :f */
913    0b000001001010010101000000000000110011, /*      (8|M24)       grf<1>:f   :f  :f  :f */
914    0b000001001000010101010000000000000100, /*      (16|M0)  (sat)arf<1>:f   :f  :f  :f */
915    0b000001001010010101010010000000000100, /* (W)  (16|M0)  (sat)grf<1>:f   :f  :f  :f */
916    0b000001001010010101000010000000100100, /* (W)  (16|M16)      grf<1>:f   :f  :f  :f */
917    0b000001001010010001000010000000000000, /* (W)  (1|M0)        grf<1>:ud :ud :ud :ud */
918    0b000001001000010101000000000000100100, /*      (16|M16)      arf<1>:f   :f  :f  :f */
919    0b000001001010010101010000000000100100, /*      (16|M16) (sat)grf<1>:f   :f  :f  :f */
920    0b000001001010010101000010000000000010, /* (W)  (4|M0)        grf<1>:f   :f  :f  :f */
921    0b000001001000010101010000000000000011, /*      (8|M0)   (sat)arf<1>:f   :f  :f  :f */
922 };
923 
924 static const uint64_t xehp_3src_control_index_table[32] = {
925    0b0000010010100010101000000000000000100, /*          (16|M0)       grf<1>:f   :f   :f   :f          */
926    0b0000010010100010101000000000000000011, /*          (8|M0)        grf<1>:f   :f   :f   :f          */
927    0b0000010010000010101000000000000000011, /*          (8|M0)        arf<1>:f   :f   :f   :f          */
928    0b0000010010100010101000010000000000011, /*     (W)  (8|M0)        grf<1>:f   :f   :f   :f          */
929    0b0000010010000010101000010000000000011, /*     (W)  (8|M0)        arf<1>:f   :f   :f   :f          */
930    0b0000010010000010101000000000000010011, /*          (8|M8)        arf<1>:f   :f   :f   :f          */
931    0b0000010010100010101000000000000010011, /*          (8|M8)        grf<1>:f   :f   :f   :f          */
932    0b0000010010000010101000010000000010011, /*     (W)  (8|M8)        arf<1>:f   :f   :f   :f          */
933    0b0000010010100010101000010000000010011, /*     (W)  (8|M8)        grf<1>:f   :f   :f   :f          */
934    0b0000010010100010101000010000000000100, /*     (W)  (16|M0)       grf<1>:f   :f   :f   :f          */
935    0b0000010010000010101000000000000000100, /*          (16|M0)       arf<1>:f   :f   :f   :f          */
936    0b0000010010100010101010000000000000100, /*          (16|M0)  (sat)grf<1>:f   :f   :f   :f          */
937    0b0000010010100010101000000000000100100, /*          (16|M16)      grf<1>:f   :f   :f   :f          */
938    0b0000010010000010101000010000000000100, /*     (W)  (16|M0)       arf<1>:f   :f   :f   :f          */
939    0b0000010010100010101000010000000000000, /*     (W)  (1|M0)        grf<1>:f   :f   :f   :f          */
940    0b0000010010100010101010000000000000011, /*          (8|M0)   (sat)grf<1>:f   :f   :f   :f          */
941    0b0000010010000010101000010000000100011, /*     (W)  (8|M16)       arf<1>:f   :f   :f   :f          */
942    0b0000010010000010101000010000000110011, /*     (W)  (8|M24)       arf<1>:f   :f   :f   :f          */
943    0b0000010010100010101000010000000100011, /*     (W)  (8|M16)       grf<1>:f   :f   :f   :f          */
944    0b0000010010100010101000010000000110011, /*     (W)  (8|M24)       grf<1>:f   :f   :f   :f          */
945    0b0000010010000010101000000000000110011, /*          (8|M24)       arf<1>:f   :f   :f   :f          */
946    0b0000010010000010101000000000000100011, /*          (8|M16)       arf<1>:f   :f   :f   :f          */
947    0b0000000100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b          */
948    0b0000000000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub {Atomic} */
949    0b0000100100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b {Atomic} */
950    0b0000100000111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub {Atomic} */
951    0b0000100100111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b   :b          */
952    0b0000000000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub  :ub          */
953    0b0000000100111110011000100000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d  :ub   :b {Atomic} */
954    0b0000100000111110011000000000000000011, /* dpas.8x* (8|M0)        grf<1>:d   :d   :b  :ub          */
955    0b0000101101111010101000100000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf {Atomic} */
956    0b0000101101111010101000000000000000011, /* dpas.8x* (8|M0)        grf<1>:f   :f  :bf  :bf          */
957 };
958 
959 static const uint32_t gfx12_3src_source_index_table[32] = {
960    0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
961    0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
962    0b101101100001101000011, /*  grf<8;1>   grf<8;1>  grf<1> */
963    0b100101100001101000011, /*  grf<8;1>   grf<8;1>  grf<0> */
964    0b101100000000101000011, /*  grf<8;1>   grf<0;0>  grf<1> */
965    0b101101100001101001011, /* -grf<8;1>   grf<8;1>  grf<1> */
966    0b101001100001101000011, /*  grf<8;1>   arf<8;1>  grf<1> */
967    0b100001100001100000000, /*  grf<0;0>   arf<8;1>  grf<0> */
968    0b101101100001100000000, /*  grf<0;0>   grf<8;1>  grf<1> */
969    0b101101100101101000011, /*  grf<8;1>   grf<8;1> -grf<1> */
970    0b101101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<1> */
971    0b101100000000100000000, /*  grf<0;0>   grf<0;0>  grf<1> */
972    0b100001100001101000011, /*  grf<8;1>   arf<8;1>  grf<0> */
973    0b100101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<0> */
974    0b100101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<0> */
975    0b100101100001101001011, /* -grf<8;1>   grf<8;1>  grf<0> */
976    0b100100000000101000011, /*  grf<8;1>   grf<0;0>  grf<0> */
977    0b100101100001100001000, /* -grf<0;0>   grf<8;1>  grf<0> */
978    0b100100000000100000000, /*  grf<0;0>   grf<0;0>  grf<0> */
979    0b101101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<1> */
980    0b100101100101100000000, /*  grf<0;0>   grf<8;1> -grf<0> */
981    0b101001100001100000000, /*  grf<0;0>   arf<8;1>  grf<1> */
982    0b100101100101101000011, /*  grf<8;1>   grf<8;1> -grf<0> */
983    0b101101100101101001011, /* -grf<8;1>   grf<8;1> -grf<1> */
984    0b101001100001101001011, /* -grf<8;1>   arf<8;1>  grf<1> */
985    0b101101110001101001011, /* -grf<8;1>  -grf<8;1>  grf<1> */
986    0b101100010000101000011, /*  grf<8;1>  -grf<0;0>  grf<1> */
987    0b101100000100101000011, /*  grf<8;1>   grf<0;0> -grf<1> */
988    0b101101100001100001000, /* -grf<0;0>   grf<8;1>  grf<1> */
989    0b101101100101100000000, /*  grf<0;0>   grf<8;1> -grf<1> */
990    0b100100000100101000011, /*  grf<8;1>   grf<0;0> -grf<0> */
991    0b101001100101101000011, /*  grf<8;1>   arf<8;1> -grf<1> */
992 };
993 
994 static const uint32_t xehp_3src_source_index_table[32] = {
995    0b100100000001100000000, /*           grf<0;0>   grf<1;0>     grf<0>      */
996    0b100100000001000000001, /*           arf<1;0>   grf<1;0>     grf<0>      */
997    0b101100000001100000001, /*           grf<1;0>   grf<1;0>     grf<1>      */
998    0b100100000001100000001, /*           grf<1;0>   grf<1;0>     grf<0>      */
999    0b101100000000100000001, /*           grf<1;0>   grf<0;0>     grf<1>      */
1000    0b101100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<1>      */
1001    0b101000000001100000001, /*           grf<1;0>   arf<1;0>     grf<1>      */
1002    0b101100000001100000000, /*           grf<0;0>   grf<1;0>     grf<1>      */
1003    0b100000000001100000000, /*           grf<0;0>   arf<1;0>     grf<0>      */
1004    0b101100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<1>      */
1005    0b101100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<1>      */
1006    0b101100000000100000000, /*           grf<0;0>   grf<0;0>     grf<1>      */
1007    0b100000000001100000001, /*           grf<1;0>   arf<1;0>     grf<0>      */
1008    0b100100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<0>      */
1009    0b100100010001100000001, /*           grf<1;0>  -grf<1;0>     grf<0>      */
1010    0b100100000001100001001, /*          -grf<1;0>   grf<1;0>     grf<0>      */
1011    0b100100000000100000001, /*           grf<1;0>   grf<0;0>     grf<0>      */
1012    0b100100000001100001000, /*          -grf<0;0>   grf<1;0>     grf<0>      */
1013    0b100100000000100000000, /*           grf<0;0>   grf<0;0>     grf<0>
1014                              * dpas.*x1  grf:d      grf:[ub,b]   grf:[ub,b]
1015                              * dpas.*x1  grf:f      grf:bf       grf:bf
1016                              */
1017    0b101100010001100000000, /*           grf<0;0>  -grf<1;0>     grf<1>      */
1018    0b100100000101100000000, /*           grf<0;0>   grf<1;0>    -grf<0>      */
1019    0b101000000001100000000, /*           grf<0;0>   arf<1;0>     grf<1>      */
1020    0b100100000101100000001, /*           grf<1;0>   grf<1;0>    -grf<0>      */
1021    0b101100000101100001001, /*          -grf<1;0>   grf<1;0>    -grf<1>      */
1022    0b100100010000100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[ub,b]  */
1023    0b100100000100100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u2,s2] */
1024    0b100100010100100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u2,s2] */
1025    0b100100001000100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[ub,b]  */
1026    0b100100001100100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u2,s2] */
1027    0b100100000010100000000, /* dpas.*x1  grf:d      grf:[ub,b]   grf:[u4,s4] */
1028    0b100100001010100000000, /* dpas.*x1  grf:d      grf:[u4,s4]  grf:[u4,s4] */
1029    0b100100010010100000000, /* dpas.*x1  grf:d      grf:[u2,s2]  grf:[u4,s4] */
1030 };
1031 
1032 static const uint32_t gfx12_3src_subreg_table[32] = {
1033    0b00000000000000000000, /* .0  .0  .0  .0  */
1034    0b00100000000000000000, /* .0  .0  .0  .4  */
1035    0b00000000000110000000, /* .0  .12 .0  .0  */
1036    0b10100000000000000000, /* .0  .0  .0  .20 */
1037    0b10000000001110000000, /* .0  .28 .0  .16 */
1038    0b01100000000000000000, /* .0  .0  .0  .12 */
1039    0b01000000000000000000, /* .0  .0  .0  .8  */
1040    0b00000010000000000000, /* .0  .0  .8  .0  */
1041    0b00000001000000000000, /* .0  .0  .4  .0  */
1042    0b11000000000000000000, /* .0  .0  .0  .24 */
1043    0b10000000000000000000, /* .0  .0  .0  .16 */
1044    0b11100000000000000000, /* .0  .0  .0  .28 */
1045    0b00000110000000000000, /* .0  .0  .24 .0  */
1046    0b00000000000010000000, /* .0  .4  .0  .0  */
1047    0b00000100000000000000, /* .0  .0  .16 .0  */
1048    0b00000011000000000000, /* .0  .0  .12 .0  */
1049    0b00000101000000000000, /* .0  .0  .20 .0  */
1050    0b00000111000000000000, /* .0  .0  .28 .0  */
1051    0b00000000000100000000, /* .0  .8  .0  .0  */
1052    0b00000000001000000000, /* .0  .16 .0  .0  */
1053    0b00000000001100000000, /* .0  .24 .0  .0  */
1054    0b00000000001010000000, /* .0  .20 .0  .0  */
1055    0b00000000001110000000, /* .0  .28 .0  .0  */
1056    0b11000000001110000000, /* .0  .28 .0  .24 */
1057    0b00100000000100000000, /* .0  .8  .0  .4  */
1058    0b00100000000110000000, /* .0  .12 .0  .4  */
1059    0b01000000000110000000, /* .0  .12 .0  .8  */
1060    0b10000000001100000000, /* .0  .24 .0  .16 */
1061    0b10000000001010000000, /* .0  .20 .0  .16 */
1062    0b01100000000010000000, /* .0  .4  .0  .12 */
1063    0b10100000001110000000, /* .0  .28 .0  .20 */
1064    0b01000000000010000000, /* .0  .4  .0  .8  */
1065 };
1066 
1067 struct compaction_state {
1068    const struct brw_isa_info *isa;
1069    const uint32_t *control_index_table;
1070    const uint32_t *datatype_table;
1071    const uint16_t *subreg_table;
1072    const uint16_t *src0_index_table;
1073    const uint16_t *src1_index_table;
1074 };
1075 
1076 static void compaction_state_init(struct compaction_state *c,
1077                                   const struct brw_isa_info *isa);
1078 
1079 static bool
set_control_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1080 set_control_index(const struct compaction_state *c,
1081                   brw_compact_inst *dst, const brw_inst *src)
1082 {
1083    const struct intel_device_info *devinfo = c->isa->devinfo;
1084    uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
1085 
1086    if (devinfo->ver >= 12) {
1087       uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /*  4b */
1088                     (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1089                     (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1090                     (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1091                     (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1092                     (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1093                     (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1094                     (brw_inst_bits(src, 23, 22) <<  6) | /*  2b */
1095                     (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1096                     (brw_inst_bits(src, 18, 16));        /*  3b */
1097    } else if (devinfo->ver >= 8) {
1098       uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
1099                     (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
1100                     (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
1101                     (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
1102                     (brw_inst_bits(src,  8,  8));        /*  1b */
1103    } else {
1104       uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
1105                     (brw_inst_bits(src, 23,  8));        /* 16b */
1106 
1107       /* On gfx7, the flag register and subregister numbers are integrated into
1108        * the control index.
1109        */
1110       if (devinfo->ver == 7)
1111          uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1112    }
1113 
1114    for (int i = 0; i < 32; i++) {
1115       if (c->control_index_table[i] == uncompacted) {
1116          brw_compact_inst_set_control_index(devinfo, dst, i);
1117 	 return true;
1118       }
1119    }
1120 
1121    return false;
1122 }
1123 
1124 static bool
set_datatype_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1125 set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1126                    const brw_inst *src, bool is_immediate)
1127 {
1128    const struct intel_device_info *devinfo = c->isa->devinfo;
1129    uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1130 
1131    if (devinfo->ver >= 12) {
1132       uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /*  4b */
1133                     (brw_inst_bits(src, 66, 66) << 14) | /*  1b */
1134                     (brw_inst_bits(src, 50, 50) << 13) | /*  1b */
1135                     (brw_inst_bits(src, 49, 48) << 11) | /*  2b */
1136                     (brw_inst_bits(src, 47, 47) << 10) | /*  1b */
1137                     (brw_inst_bits(src, 46, 46) <<  9) | /*  1b */
1138                     (brw_inst_bits(src, 43, 40) <<  5) | /*  4b */
1139                     (brw_inst_bits(src, 39, 36) <<  1) | /*  4b */
1140                     (brw_inst_bits(src, 35, 35));        /*  1b */
1141 
1142       /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1143        * is present
1144        */
1145       if (!is_immediate) {
1146          uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1147       }
1148    } else if (devinfo->ver >= 8) {
1149       uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
1150                     (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
1151                     (brw_inst_bits(src, 46, 35));        /* 12b */
1152    } else {
1153       uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
1154                     (brw_inst_bits(src, 46, 32));        /* 15b */
1155    }
1156 
1157    for (int i = 0; i < 32; i++) {
1158       if (c->datatype_table[i] == uncompacted) {
1159          brw_compact_inst_set_datatype_index(devinfo, dst, i);
1160 	 return true;
1161       }
1162    }
1163 
1164    return false;
1165 }
1166 
1167 static bool
set_subreg_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1168 set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1169                  const brw_inst *src, bool is_immediate)
1170 {
1171    const struct intel_device_info *devinfo = c->isa->devinfo;
1172    uint16_t uncompacted; /* 15b */
1173 
1174    if (devinfo->ver >= 12) {
1175       uncompacted = (brw_inst_bits(src, 55, 51) << 0) |    /* 5b */
1176                     (brw_inst_bits(src, 71, 67) << 5);     /* 5b */
1177 
1178       if (!is_immediate)
1179          uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1180    } else {
1181       uncompacted = (brw_inst_bits(src, 52, 48) << 0) |    /* 5b */
1182                     (brw_inst_bits(src, 68, 64) << 5);     /* 5b */
1183 
1184       if (!is_immediate)
1185          uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1186    }
1187 
1188    for (int i = 0; i < 32; i++) {
1189       if (c->subreg_table[i] == uncompacted) {
1190          brw_compact_inst_set_subreg_index(devinfo, dst, i);
1191 	 return true;
1192       }
1193    }
1194 
1195    return false;
1196 }
1197 
1198 static bool
set_src0_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1199 set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1200                const brw_inst *src)
1201 {
1202    const struct intel_device_info *devinfo = c->isa->devinfo;
1203    uint16_t uncompacted; /* 12b */
1204    int table_len;
1205 
1206    if (devinfo->ver >= 12) {
1207       table_len = ARRAY_SIZE(gfx12_src0_index_table);
1208       uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /*  4b */
1209                     (brw_inst_bits(src, 83, 81) << 5) | /*  3b */
1210                     (brw_inst_bits(src, 80, 80) << 4) | /*  1b */
1211                     (brw_inst_bits(src, 65, 64) << 2) | /*  2b */
1212                     (brw_inst_bits(src, 45, 44));       /*  2b */
1213    } else {
1214       table_len = ARRAY_SIZE(gfx8_src_index_table);
1215       uncompacted = brw_inst_bits(src, 88, 77);         /* 12b */
1216    }
1217 
1218    for (int i = 0; i < table_len; i++) {
1219       if (c->src0_index_table[i] == uncompacted) {
1220          brw_compact_inst_set_src0_index(devinfo, dst, i);
1221 	 return true;
1222       }
1223    }
1224 
1225    return false;
1226 }
1227 
1228 static bool
set_src1_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate,unsigned imm)1229 set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1230                const brw_inst *src, bool is_immediate, unsigned imm)
1231 {
1232    const struct intel_device_info *devinfo = c->isa->devinfo;
1233    if (is_immediate) {
1234       if (devinfo->ver >= 12) {
1235          /* src1 index takes the low 4 bits of the 12-bit compacted value */
1236          brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1237       } else {
1238          /* src1 index takes the high 5 bits of the 13-bit compacted value */
1239          brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1240       }
1241       return true;
1242    } else {
1243       uint16_t uncompacted; /* 12b */
1244       int table_len;
1245 
1246       if (devinfo->ver >= 12) {
1247          table_len = ARRAY_SIZE(gfx12_src0_index_table);
1248          uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /*  2b */
1249                        (brw_inst_bits(src, 119, 116) <<  6) | /*  4b */
1250                        (brw_inst_bits(src, 115, 113) <<  3) | /*  3b */
1251                        (brw_inst_bits(src, 112, 112) <<  2) | /*  1b */
1252                        (brw_inst_bits(src,  97,  96));        /*  2b */
1253       } else {
1254          table_len = ARRAY_SIZE(gfx8_src_index_table);
1255          uncompacted = brw_inst_bits(src, 120, 109);          /* 12b */
1256       }
1257 
1258       for (int i = 0; i < table_len; i++) {
1259          if (c->src1_index_table[i] == uncompacted) {
1260             brw_compact_inst_set_src1_index(devinfo, dst, i);
1261             return true;
1262          }
1263       }
1264    }
1265 
1266    return false;
1267 }
1268 
1269 static bool
set_3src_control_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1270 set_3src_control_index(const struct intel_device_info *devinfo,
1271                        brw_compact_inst *dst, const brw_inst *src)
1272 {
1273    assert(devinfo->ver >= 8);
1274 
1275    if (devinfo->verx10 >= 125) {
1276       uint64_t uncompacted =             /* 37b/XeHP+ */
1277          (brw_inst_bits(src, 95, 92) << 33) | /*  4b */
1278          (brw_inst_bits(src, 90, 88) << 30) | /*  3b */
1279          (brw_inst_bits(src, 82, 80) << 27) | /*  3b */
1280          (brw_inst_bits(src, 50, 50) << 26) | /*  1b */
1281          (brw_inst_bits(src, 49, 48) << 24) | /*  2b */
1282          (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1283          (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1284          (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1285          (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1286          (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1287          (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1288          (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1289          (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1290          (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1291          (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1292          (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1293          (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1294          (brw_inst_bits(src, 18, 16));        /*  3b */
1295 
1296       for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {
1297          if (xehp_3src_control_index_table[i] == uncompacted) {
1298             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1299             return true;
1300          }
1301       }
1302    } else if (devinfo->ver >= 12) {
1303       uint64_t uncompacted =             /* 36b/TGL+ */
1304          (brw_inst_bits(src, 95, 92) << 32) | /*  4b */
1305          (brw_inst_bits(src, 90, 88) << 29) | /*  3b */
1306          (brw_inst_bits(src, 82, 80) << 26) | /*  3b */
1307          (brw_inst_bits(src, 50, 50) << 25) | /*  1b */
1308          (brw_inst_bits(src, 48, 48) << 24) | /*  1b */
1309          (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1310          (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1311          (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1312          (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1313          (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1314          (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1315          (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1316          (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1317          (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1318          (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1319          (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1320          (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1321          (brw_inst_bits(src, 18, 16));        /*  3b */
1322 
1323       for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {
1324          if (gfx12_3src_control_index_table[i] == uncompacted) {
1325             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1326             return true;
1327          }
1328       }
1329    } else {
1330       uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1331          (brw_inst_bits(src, 34, 32) << 21) |  /*  3b */
1332          (brw_inst_bits(src, 28,  8));         /* 21b */
1333 
1334       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1335          uncompacted |=
1336             brw_inst_bits(src, 36, 35) << 24;  /*  2b */
1337       }
1338 
1339       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {
1340          if (gfx8_3src_control_index_table[i] == uncompacted) {
1341             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1342             return true;
1343          }
1344       }
1345    }
1346 
1347    return false;
1348 }
1349 
1350 static bool
set_3src_source_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1351 set_3src_source_index(const struct intel_device_info *devinfo,
1352                       brw_compact_inst *dst, const brw_inst *src)
1353 {
1354    assert(devinfo->ver >= 8);
1355 
1356    if (devinfo->ver >= 12) {
1357       uint32_t uncompacted =               /* 21b/TGL+ */
1358          (brw_inst_bits(src, 114, 114) << 20) | /*  1b */
1359          (brw_inst_bits(src, 113, 112) << 18) | /*  2b */
1360          (brw_inst_bits(src,  98,  98) << 17) | /*  1b */
1361          (brw_inst_bits(src,  97,  96) << 15) | /*  2b */
1362          (brw_inst_bits(src,  91,  91) << 14) | /*  1b */
1363          (brw_inst_bits(src,  87,  86) << 12) | /*  2b */
1364          (brw_inst_bits(src,  85,  84) << 10) | /*  2b */
1365          (brw_inst_bits(src,  83,  83) <<  9) | /*  1b */
1366          (brw_inst_bits(src,  66,  66) <<  8) | /*  1b */
1367          (brw_inst_bits(src,  65,  64) <<  6) | /*  2b */
1368          (brw_inst_bits(src,  47,  47) <<  5) | /*  1b */
1369          (brw_inst_bits(src,  46,  46) <<  4) | /*  1b */
1370          (brw_inst_bits(src,  45,  44) <<  2) | /*  2b */
1371          (brw_inst_bits(src,  43,  43) <<  1) | /*  1b */
1372          (brw_inst_bits(src,  35,  35));        /*  1b */
1373 
1374       const uint32_t *three_src_source_index_table =
1375          devinfo->verx10 >= 125 ?
1376          xehp_3src_source_index_table : gfx12_3src_source_index_table;
1377       const uint32_t three_src_source_index_table_len =
1378          devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :
1379                                   ARRAY_SIZE(gfx12_3src_source_index_table);
1380 
1381       for (unsigned i = 0; i < three_src_source_index_table_len; i++) {
1382          if (three_src_source_index_table[i] == uncompacted) {
1383             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1384             return true;
1385          }
1386       }
1387    } else {
1388       uint64_t uncompacted =    /* 46b/BDW; 49b/CHV/SKL+ */
1389          (brw_inst_bits(src,  83,  83) << 43) |   /*  1b */
1390          (brw_inst_bits(src, 114, 107) << 35) |   /*  8b */
1391          (brw_inst_bits(src,  93,  86) << 27) |   /*  8b */
1392          (brw_inst_bits(src,  72,  65) << 19) |   /*  8b */
1393          (brw_inst_bits(src,  55,  37));          /* 19b */
1394 
1395       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1396          uncompacted |=
1397             (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1398             (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1399             (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
1400       } else {
1401          uncompacted |=
1402             (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1403             (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
1404       }
1405 
1406       for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {
1407          if (gfx8_3src_source_index_table[i] == uncompacted) {
1408             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1409             return true;
1410          }
1411       }
1412    }
1413 
1414    return false;
1415 }
1416 
1417 static bool
set_3src_subreg_index(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1418 set_3src_subreg_index(const struct intel_device_info *devinfo,
1419                       brw_compact_inst *dst, const brw_inst *src)
1420 {
1421    assert(devinfo->ver >= 12);
1422 
1423    uint32_t uncompacted =               /* 20b/TGL+ */
1424       (brw_inst_bits(src, 119, 115) << 15) | /*  5b */
1425       (brw_inst_bits(src, 103,  99) << 10) | /*  5b */
1426       (brw_inst_bits(src,  71,  67) <<  5) | /*  5b */
1427       (brw_inst_bits(src,  55,  51));        /*  5b */
1428 
1429    for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {
1430       if (gfx12_3src_subreg_table[i] == uncompacted) {
1431          brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1432 	 return true;
1433       }
1434    }
1435 
1436    return false;
1437 }
1438 
1439 static bool
has_unmapped_bits(const struct brw_isa_info * isa,const brw_inst * src)1440 has_unmapped_bits(const struct brw_isa_info *isa, const brw_inst *src)
1441 {
1442    const struct intel_device_info *devinfo = isa->devinfo;
1443 
1444    /* EOT can only be mapped on a send if the src1 is an immediate */
1445    if ((brw_inst_opcode(isa, src) == BRW_OPCODE_SENDC ||
1446         brw_inst_opcode(isa, src) == BRW_OPCODE_SEND) &&
1447        brw_inst_eot(devinfo, src))
1448       return true;
1449 
1450    /* Check for instruction bits that don't map to any of the fields of the
1451     * compacted instruction.  The instruction cannot be compacted if any of
1452     * them are set.  They overlap with:
1453     *  - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)
1454     *  - Dst.AddrImm[9] (bit 47 on Gfx8)
1455     *  - Src0.AddrImm[9] (bit 95 on Gfx8)
1456     *  - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)
1457     *  - UIP[31] (bit 95 on Gfx8)
1458     */
1459    if (devinfo->ver >= 12) {
1460       assert(!brw_inst_bits(src, 7,  7));
1461       return false;
1462    } else if (devinfo->ver >= 8) {
1463       assert(!brw_inst_bits(src, 7,  7));
1464       return brw_inst_bits(src, 95, 95) ||
1465              brw_inst_bits(src, 47, 47) ||
1466              brw_inst_bits(src, 11, 11);
1467    } else {
1468       assert(!brw_inst_bits(src, 7,  7) &&
1469              !(devinfo->ver < 7 && brw_inst_bits(src, 90, 90)));
1470       return brw_inst_bits(src, 95, 91) ||
1471              brw_inst_bits(src, 47, 47);
1472    }
1473 }
1474 
1475 static bool
has_3src_unmapped_bits(const struct intel_device_info * devinfo,const brw_inst * src)1476 has_3src_unmapped_bits(const struct intel_device_info *devinfo,
1477                        const brw_inst *src)
1478 {
1479    /* Check for three-source instruction bits that don't map to any of the
1480     * fields of the compacted instruction.  All of them seem to be reserved
1481     * bits currently.
1482     */
1483    if (devinfo->ver >= 12) {
1484       assert(!brw_inst_bits(src, 7, 7));
1485    } else if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
1486       assert(!brw_inst_bits(src, 127, 127) &&
1487              !brw_inst_bits(src, 7,  7));
1488    } else {
1489       assert(devinfo->ver >= 8);
1490       assert(!brw_inst_bits(src, 127, 126) &&
1491              !brw_inst_bits(src, 105, 105) &&
1492              !brw_inst_bits(src, 84, 84) &&
1493              !brw_inst_bits(src, 7,  7));
1494 
1495       /* Src1Type and Src2Type, used for mixed-precision floating point */
1496       if (brw_inst_bits(src, 36, 35))
1497          return true;
1498    }
1499 
1500    return false;
1501 }
1502 
1503 static bool
brw_try_compact_3src_instruction(const struct intel_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1504 brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,
1505                                  brw_compact_inst *dst, const brw_inst *src)
1506 {
1507    assert(devinfo->ver >= 8);
1508 
1509    if (has_3src_unmapped_bits(devinfo, src))
1510       return false;
1511 
1512 #define compact(field) \
1513    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1514 #define compact_a16(field) \
1515    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1516 
1517    compact(hw_opcode);
1518 
1519    if (!set_3src_control_index(devinfo, dst, src))
1520       return false;
1521 
1522    if (!set_3src_source_index(devinfo, dst, src))
1523       return false;
1524 
1525    if (devinfo->ver >= 12) {
1526       if (!set_3src_subreg_index(devinfo, dst, src))
1527          return false;
1528 
1529       compact(swsb);
1530       compact(debug_control);
1531       compact(dst_reg_nr);
1532       compact(src0_reg_nr);
1533       compact(src1_reg_nr);
1534       compact(src2_reg_nr);
1535    } else {
1536       compact(dst_reg_nr);
1537       compact_a16(src0_rep_ctrl);
1538       compact(debug_control);
1539       compact(saturate);
1540       compact_a16(src1_rep_ctrl);
1541       compact_a16(src2_rep_ctrl);
1542       compact(src0_reg_nr);
1543       compact(src1_reg_nr);
1544       compact(src2_reg_nr);
1545       compact_a16(src0_subreg_nr);
1546       compact_a16(src1_subreg_nr);
1547       compact_a16(src2_subreg_nr);
1548    }
1549    brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1550 
1551 #undef compact
1552 #undef compact_a16
1553 
1554    return true;
1555 }
1556 
1557 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1558  * sources, and a 13th bit that's replicated through the high 20 bits.
1559  *
1560  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1561  * of packed vectors as compactable immediates.
1562  *
1563  * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1564  * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1565  * while for unsigned integers it is not.
1566  *
1567  * Returns the compacted immediate, or -1 if immediate cannot be compacted
1568  */
1569 static int
compact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned imm)1570 compact_immediate(const struct intel_device_info *devinfo,
1571                   enum brw_reg_type type, unsigned imm)
1572 {
1573    if (devinfo->ver >= 12) {
1574       /* 16-bit immediates need to be replicated through the 32-bit immediate
1575        * field
1576        */
1577       switch (type) {
1578       case BRW_REGISTER_TYPE_W:
1579       case BRW_REGISTER_TYPE_UW:
1580       case BRW_REGISTER_TYPE_HF:
1581          if ((imm >> 16) != (imm & 0xffff))
1582             return -1;
1583          break;
1584       default:
1585          break;
1586       }
1587 
1588       switch (type) {
1589       case BRW_REGISTER_TYPE_F:
1590          /* We get the high 12-bits as-is; rest must be zero */
1591          if ((imm & 0xfffff) == 0)
1592             return (imm >> 20) & 0xfff;
1593          break;
1594       case BRW_REGISTER_TYPE_HF:
1595          /* We get the high 12-bits as-is; rest must be zero */
1596          if ((imm & 0xf) == 0)
1597             return (imm >> 4) & 0xfff;
1598          break;
1599       case BRW_REGISTER_TYPE_UD:
1600       case BRW_REGISTER_TYPE_VF:
1601       case BRW_REGISTER_TYPE_UV:
1602       case BRW_REGISTER_TYPE_V:
1603          /* We get the low 12-bits as-is; rest must be zero */
1604          if ((imm & 0xfffff000) == 0)
1605             return imm & 0xfff;
1606          break;
1607       case BRW_REGISTER_TYPE_UW:
1608          /* We get the low 12-bits as-is; rest must be zero */
1609          if ((imm & 0xf000) == 0)
1610             return imm & 0xfff;
1611          break;
1612       case BRW_REGISTER_TYPE_D:
1613          /* We get the low 11-bits as-is; 12th is replicated */
1614          if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1615             return imm & 0xfff;
1616          break;
1617       case BRW_REGISTER_TYPE_W:
1618          /* We get the low 11-bits as-is; 12th is replicated */
1619          if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1620             return imm & 0xfff;
1621          break;
1622       case BRW_REGISTER_TYPE_NF:
1623       case BRW_REGISTER_TYPE_DF:
1624       case BRW_REGISTER_TYPE_Q:
1625       case BRW_REGISTER_TYPE_UQ:
1626       case BRW_REGISTER_TYPE_B:
1627       case BRW_REGISTER_TYPE_UB:
1628          return -1;
1629       }
1630    } else {
1631       /* We get the low 12 bits as-is; 13th is replicated */
1632       if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1633          return imm & 0x1fff;
1634       }
1635    }
1636 
1637    return -1;
1638 }
1639 
1640 static int
uncompact_immediate(const struct intel_device_info * devinfo,enum brw_reg_type type,unsigned compact_imm)1641 uncompact_immediate(const struct intel_device_info *devinfo,
1642                     enum brw_reg_type type, unsigned compact_imm)
1643 {
1644    if (devinfo->ver >= 12) {
1645       switch (type) {
1646       case BRW_REGISTER_TYPE_F:
1647          return compact_imm << 20;
1648       case BRW_REGISTER_TYPE_HF:
1649          return (compact_imm << 20) | (compact_imm << 4);
1650       case BRW_REGISTER_TYPE_UD:
1651       case BRW_REGISTER_TYPE_VF:
1652       case BRW_REGISTER_TYPE_UV:
1653       case BRW_REGISTER_TYPE_V:
1654          return compact_imm;
1655       case BRW_REGISTER_TYPE_UW:
1656          /* Replicate */
1657          return compact_imm << 16 | compact_imm;
1658       case BRW_REGISTER_TYPE_D:
1659          /* Extend the 12th bit into the high 20 bits */
1660          return (int)(compact_imm << 20) >> 20;
1661       case BRW_REGISTER_TYPE_W:
1662          /* Extend the 12th bit into the high 4 bits and replicate */
1663          return ((int)(compact_imm << 20) >> 4) |
1664                 ((unsigned short)((short)(compact_imm << 4) >> 4));
1665       case BRW_REGISTER_TYPE_NF:
1666       case BRW_REGISTER_TYPE_DF:
1667       case BRW_REGISTER_TYPE_Q:
1668       case BRW_REGISTER_TYPE_UQ:
1669       case BRW_REGISTER_TYPE_B:
1670       case BRW_REGISTER_TYPE_UB:
1671          unreachable("not reached");
1672       }
1673    } else {
1674       /* Replicate the 13th bit into the high 19 bits */
1675       return (int)(compact_imm << 19) >> 19;
1676    }
1677 
1678    unreachable("not reached");
1679 }
1680 
1681 static bool
has_immediate(const struct intel_device_info * devinfo,const brw_inst * inst,enum brw_reg_type * type)1682 has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,
1683               enum brw_reg_type *type)
1684 {
1685    if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1686       *type = brw_inst_src0_type(devinfo, inst);
1687       return *type != INVALID_REG_TYPE;
1688    } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1689       *type = brw_inst_src1_type(devinfo, inst);
1690       return *type != INVALID_REG_TYPE;
1691    }
1692 
1693    return false;
1694 }
1695 
1696 /**
1697  * Applies some small changes to instruction types to increase chances of
1698  * compaction.
1699  */
1700 static brw_inst
precompact(const struct brw_isa_info * isa,brw_inst inst)1701 precompact(const struct brw_isa_info *isa, brw_inst inst)
1702 {
1703    const struct intel_device_info *devinfo = isa->devinfo;
1704 
1705    /* In XeHP the compaction tables removed the entries for source regions
1706     * <8;8,1> giving preference to <1;1,0> as the way to indicate
1707     * sequential elements, so convert to those before compacting.
1708     */
1709    if (devinfo->verx10 >= 125) {
1710       if (brw_inst_src0_reg_file(devinfo, &inst) == BRW_GENERAL_REGISTER_FILE &&
1711           brw_inst_src0_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1712           brw_inst_src0_vstride(devinfo, &inst) == (brw_inst_src0_width(devinfo, &inst) + 1) &&
1713           brw_inst_src0_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1714          brw_inst_set_src0_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1715          brw_inst_set_src0_width(devinfo, &inst, BRW_WIDTH_1);
1716          brw_inst_set_src0_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1717       }
1718 
1719       if (brw_inst_src1_reg_file(devinfo, &inst) == BRW_GENERAL_REGISTER_FILE &&
1720           brw_inst_src1_vstride(devinfo, &inst) > BRW_VERTICAL_STRIDE_1 &&
1721           brw_inst_src1_vstride(devinfo, &inst) == (brw_inst_src1_width(devinfo, &inst) + 1) &&
1722           brw_inst_src1_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1723          brw_inst_set_src1_vstride(devinfo, &inst, BRW_VERTICAL_STRIDE_1);
1724          brw_inst_set_src1_width(devinfo, &inst, BRW_WIDTH_1);
1725          brw_inst_set_src1_hstride(devinfo, &inst, BRW_HORIZONTAL_STRIDE_0);
1726       }
1727    }
1728 
1729    if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1730       return inst;
1731 
1732    /* The Bspec's section titled "Non-present Operands" claims that if src0
1733     * is an immediate that src1's type must be the same as that of src0.
1734     *
1735     * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1736     * that do not follow this rule. E.g., from the IVB/HSW table:
1737     *
1738     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1739     *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1740     *
1741     * And from the SNB table:
1742     *
1743     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1744     *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1745     *
1746     * Neither of these cause warnings from the simulator when used,
1747     * compacted or otherwise. In fact, all compaction mappings that have an
1748     * immediate in src0 use a:ud for src1.
1749     *
1750     * The GM45 instruction compaction tables do not contain mapped meanings
1751     * so it's not clear whether it has the restriction. We'll assume it was
1752     * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1753     *
1754     * Don't do any of this for 64-bit immediates, since the src1 fields
1755     * overlap with the immediate and setting them would overwrite the
1756     * immediate we set.
1757     */
1758    if (devinfo->ver >= 6 &&
1759        !(devinfo->platform == INTEL_PLATFORM_HSW &&
1760          brw_inst_opcode(isa, &inst) == BRW_OPCODE_DIM) &&
1761        !(devinfo->ver >= 8 &&
1762          (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1763           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1764           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1765       brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1766    }
1767 
1768    /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1769     * for immediate values. Presumably the hardware engineers realized
1770     * that the only useful floating-point value that could be represented
1771     * in this format is 0.0, which can also be represented as a VF-typed
1772     * immediate, so they gave us the previously mentioned mapping on IVB+.
1773     *
1774     * Strangely, we do have a mapping for imm:f in src1, so we don't need
1775     * to do this there.
1776     *
1777     * If we see a 0.0:F, change the type to VF so that it can be compacted.
1778     *
1779     * Compaction of floating-point immediates is improved on Gfx12, thus
1780     * removing the need for this.
1781     */
1782    if (devinfo->ver < 12 &&
1783        brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1784        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1785        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1786        brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1787       enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1788       brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1789    }
1790 
1791    /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1792     * set the types to :UD so the instruction can be compacted.
1793     *
1794     * FINISHME: Use dst:f | imm:f on Gfx12
1795     */
1796    if (devinfo->ver < 12 &&
1797        compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1798                          brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1799        brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1800        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1801        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1802       enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1803       enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1804 
1805       brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1806       brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1807    }
1808 
1809    return inst;
1810 }
1811 
1812 /**
1813  * Tries to compact instruction src into dst.
1814  *
1815  * It doesn't modify dst unless src is compactable, which is relied on by
1816  * brw_compact_instructions().
1817  */
1818 static bool
try_compact_instruction(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1819 try_compact_instruction(const struct compaction_state *c,
1820                         brw_compact_inst *dst, const brw_inst *src)
1821 {
1822    const struct intel_device_info *devinfo = c->isa->devinfo;
1823    brw_compact_inst temp;
1824 
1825    assert(brw_inst_cmpt_control(devinfo, src) == 0);
1826 
1827    if (is_3src(c->isa, brw_inst_opcode(c->isa, src))) {
1828       if (devinfo->ver >= 8) {
1829          memset(&temp, 0, sizeof(temp));
1830          if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1831             *dst = temp;
1832             return true;
1833          } else {
1834             return false;
1835          }
1836       } else {
1837          return false;
1838       }
1839    }
1840 
1841    enum brw_reg_type type;
1842    bool is_immediate = has_immediate(devinfo, src, &type);
1843 
1844    unsigned compacted_imm = 0;
1845 
1846    if (is_immediate) {
1847       /* Instructions with immediates cannot be compacted on Gen < 6 */
1848       if (devinfo->ver < 6)
1849          return false;
1850 
1851       compacted_imm = compact_immediate(devinfo, type,
1852                                         brw_inst_imm_ud(devinfo, src));
1853       if (compacted_imm == -1)
1854          return false;
1855    }
1856 
1857    if (has_unmapped_bits(c->isa, src))
1858       return false;
1859 
1860    memset(&temp, 0, sizeof(temp));
1861 
1862 #define compact(field) \
1863    brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1864 #define compact_reg(field) \
1865    brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1866                                        brw_inst_##field##_da_reg_nr(devinfo, src))
1867 
1868    compact(hw_opcode);
1869    compact(debug_control);
1870 
1871    if (!set_control_index(c, &temp, src))
1872       return false;
1873    if (!set_datatype_index(c, &temp, src, is_immediate))
1874       return false;
1875    if (!set_subreg_index(c, &temp, src, is_immediate))
1876       return false;
1877    if (!set_src0_index(c, &temp, src))
1878       return false;
1879    if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1880       return false;
1881 
1882    if (devinfo->ver >= 12) {
1883       compact(swsb);
1884       compact_reg(dst);
1885       compact_reg(src0);
1886 
1887       if (is_immediate) {
1888          /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1889          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1890       } else {
1891          compact_reg(src1);
1892       }
1893    } else {
1894       if (devinfo->ver >= 6) {
1895          compact(acc_wr_control);
1896       } else {
1897          compact(mask_control_ex);
1898       }
1899 
1900       if (devinfo->ver <= 6)
1901          compact(flag_subreg_nr);
1902 
1903       compact(cond_modifier);
1904 
1905       compact_reg(dst);
1906       compact_reg(src0);
1907 
1908       if (is_immediate) {
1909          /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1910          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1911       } else {
1912          compact_reg(src1);
1913       }
1914    }
1915    brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1916 
1917 #undef compact
1918 #undef compact_reg
1919 
1920    *dst = temp;
1921 
1922    return true;
1923 }
1924 
1925 bool
brw_try_compact_instruction(const struct brw_isa_info * isa,brw_compact_inst * dst,const brw_inst * src)1926 brw_try_compact_instruction(const struct brw_isa_info *isa,
1927                             brw_compact_inst *dst, const brw_inst *src)
1928 {
1929    struct compaction_state c;
1930    compaction_state_init(&c, isa);
1931    return try_compact_instruction(&c, dst, src);
1932 }
1933 
1934 static void
set_uncompacted_control(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1935 set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1936                         brw_compact_inst *src)
1937 {
1938    const struct intel_device_info *devinfo = c->isa->devinfo;
1939    uint32_t uncompacted =
1940       c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1941 
1942    if (devinfo->ver >= 12) {
1943       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1944       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1945       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1946       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1947       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1948       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1949       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1950       brw_inst_set_bits(dst, 23, 22, (uncompacted >>  6) & 0x3);
1951       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1952       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1953    } else if (devinfo->ver >= 8) {
1954       brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1955       brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1956       brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1957       brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1958       brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1959    } else {
1960       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1961       brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1962 
1963       if (devinfo->ver == 7)
1964          brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1965    }
1966 }
1967 
1968 static void
set_uncompacted_datatype(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1969 set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1970                          brw_compact_inst *src)
1971 {
1972    const struct intel_device_info *devinfo = c->isa->devinfo;
1973    uint32_t uncompacted =
1974       c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1975 
1976    if (devinfo->ver >= 12) {
1977       brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1978       brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1979       brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1980       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1981       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1982       brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1983       brw_inst_set_bits(dst, 46, 46, (uncompacted >>  9) & 0x1);
1984       brw_inst_set_bits(dst, 43, 40, (uncompacted >>  5) & 0xf);
1985       brw_inst_set_bits(dst, 39, 36, (uncompacted >>  1) & 0xf);
1986       brw_inst_set_bits(dst, 35, 35, (uncompacted >>  0) & 0x1);
1987    } else if (devinfo->ver >= 8) {
1988       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1989       brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1990       brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1991    } else {
1992       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1993       brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1994    }
1995 }
1996 
1997 static void
set_uncompacted_subreg(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1998 set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1999                        brw_compact_inst *src)
2000 {
2001    const struct intel_device_info *devinfo = c->isa->devinfo;
2002    uint16_t uncompacted =
2003       c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
2004 
2005    if (devinfo->ver >= 12) {
2006       brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
2007       brw_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
2008       brw_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
2009    } else {
2010       brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
2011       brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
2012       brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
2013    }
2014 }
2015 
2016 static void
set_uncompacted_src0(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2017 set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
2018                      brw_compact_inst *src)
2019 {
2020    const struct intel_device_info *devinfo = c->isa->devinfo;
2021    uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
2022    uint16_t uncompacted = c->src0_index_table[compacted];
2023 
2024    if (devinfo->ver >= 12) {
2025       brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
2026       brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
2027       brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
2028       brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
2029       brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
2030    } else {
2031       brw_inst_set_bits(dst, 88, 77, uncompacted);
2032    }
2033 }
2034 
2035 static void
set_uncompacted_src1(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2036 set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
2037                      brw_compact_inst *src)
2038 {
2039    const struct intel_device_info *devinfo = c->isa->devinfo;
2040    uint16_t uncompacted =
2041       c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
2042 
2043    if (devinfo->ver >= 12) {
2044       brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
2045       brw_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
2046       brw_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
2047       brw_inst_set_bits(dst, 112, 112, (uncompacted >>  2) & 0x1);
2048       brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
2049    } else {
2050       brw_inst_set_bits(dst, 120, 109, uncompacted);
2051    }
2052 }
2053 
2054 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2055 set_uncompacted_3src_control_index(const struct compaction_state *c,
2056                                    brw_inst *dst, brw_compact_inst *src)
2057 {
2058    const struct intel_device_info *devinfo = c->isa->devinfo;
2059    assert(devinfo->ver >= 8);
2060 
2061    if (devinfo->verx10 >= 125) {
2062       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2063       uint64_t uncompacted = xehp_3src_control_index_table[compacted];
2064 
2065       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));
2066       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);
2067       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);
2068       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);
2069       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);
2070       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2071       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2072       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2073       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2074       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2075       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2076       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2077       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2078       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2079       brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2080       brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2081       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2082       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2083 
2084    } else if (devinfo->ver >= 12) {
2085       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2086       uint64_t uncompacted = gfx12_3src_control_index_table[compacted];
2087 
2088       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
2089       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
2090       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
2091       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
2092       brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
2093       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
2094       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
2095       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
2096       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
2097       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
2098       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
2099       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
2100       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
2101       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
2102       brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
2103       brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
2104       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
2105       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
2106    } else {
2107       uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
2108       uint32_t uncompacted = gfx8_3src_control_index_table[compacted];
2109 
2110       brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
2111       brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
2112 
2113       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV)
2114          brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
2115    }
2116 }
2117 
2118 static void
set_uncompacted_3src_source_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2119 set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,
2120                                   brw_inst *dst, brw_compact_inst *src)
2121 {
2122    assert(devinfo->ver >= 8);
2123 
2124    uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
2125 
2126    if (devinfo->ver >= 12) {
2127       const uint32_t *three_src_source_index_table =
2128          devinfo->verx10 >= 125 ?
2129          xehp_3src_source_index_table : gfx12_3src_source_index_table;
2130       uint32_t uncompacted = three_src_source_index_table[compacted];
2131 
2132       brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
2133       brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
2134       brw_inst_set_bits(dst,  98,  98, (uncompacted >> 17) & 0x1);
2135       brw_inst_set_bits(dst,  97,  96, (uncompacted >> 15) & 0x3);
2136       brw_inst_set_bits(dst,  91,  91, (uncompacted >> 14) & 0x1);
2137       brw_inst_set_bits(dst,  87,  86, (uncompacted >> 12) & 0x3);
2138       brw_inst_set_bits(dst,  85,  84, (uncompacted >> 10) & 0x3);
2139       brw_inst_set_bits(dst,  83,  83, (uncompacted >>  9) & 0x1);
2140       brw_inst_set_bits(dst,  66,  66, (uncompacted >>  8) & 0x1);
2141       brw_inst_set_bits(dst,  65,  64, (uncompacted >>  6) & 0x3);
2142       brw_inst_set_bits(dst,  47,  47, (uncompacted >>  5) & 0x1);
2143       brw_inst_set_bits(dst,  46,  46, (uncompacted >>  4) & 0x1);
2144       brw_inst_set_bits(dst,  45,  44, (uncompacted >>  2) & 0x3);
2145       brw_inst_set_bits(dst,  43,  43, (uncompacted >>  1) & 0x1);
2146       brw_inst_set_bits(dst,  35,  35, (uncompacted >>  0) & 0x1);
2147    } else {
2148       uint64_t uncompacted = gfx8_3src_source_index_table[compacted];
2149 
2150       brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
2151       brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
2152       brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
2153       brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
2154       brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
2155 
2156       if (devinfo->ver >= 9 || devinfo->platform == INTEL_PLATFORM_CHV) {
2157          brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
2158          brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
2159          brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
2160       } else {
2161          brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
2162          brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
2163       }
2164    }
2165 }
2166 
2167 static void
set_uncompacted_3src_subreg_index(const struct intel_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2168 set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,
2169                                   brw_inst *dst, brw_compact_inst *src)
2170 {
2171    assert(devinfo->ver >= 12);
2172 
2173    uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
2174    uint32_t uncompacted = gfx12_3src_subreg_table[compacted];
2175 
2176    brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
2177    brw_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
2178    brw_inst_set_bits(dst,  71,  67, (uncompacted >>  5) & 0x1f);
2179    brw_inst_set_bits(dst,  55,  51, (uncompacted >>  0) & 0x1f);
2180 }
2181 
2182 static void
brw_uncompact_3src_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2183 brw_uncompact_3src_instruction(const struct compaction_state *c,
2184                                brw_inst *dst, brw_compact_inst *src)
2185 {
2186    const struct intel_device_info *devinfo = c->isa->devinfo;
2187    assert(devinfo->ver >= 8);
2188 
2189 #define uncompact(field) \
2190    brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2191 #define uncompact_a16(field) \
2192    brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
2193 
2194    uncompact(hw_opcode);
2195 
2196    if (devinfo->ver >= 12) {
2197       set_uncompacted_3src_control_index(c, dst, src);
2198       set_uncompacted_3src_source_index(devinfo, dst, src);
2199       set_uncompacted_3src_subreg_index(devinfo, dst, src);
2200 
2201       uncompact(debug_control);
2202       uncompact(swsb);
2203       uncompact(dst_reg_nr);
2204       uncompact(src0_reg_nr);
2205       uncompact(src1_reg_nr);
2206       uncompact(src2_reg_nr);
2207    } else {
2208       set_uncompacted_3src_control_index(c, dst, src);
2209       set_uncompacted_3src_source_index(devinfo, dst, src);
2210 
2211       uncompact(dst_reg_nr);
2212       uncompact_a16(src0_rep_ctrl);
2213       uncompact(debug_control);
2214       uncompact(saturate);
2215       uncompact_a16(src1_rep_ctrl);
2216       uncompact_a16(src2_rep_ctrl);
2217       uncompact(src0_reg_nr);
2218       uncompact(src1_reg_nr);
2219       uncompact(src2_reg_nr);
2220       uncompact_a16(src0_subreg_nr);
2221       uncompact_a16(src1_subreg_nr);
2222       uncompact_a16(src2_subreg_nr);
2223    }
2224    brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2225 
2226 #undef uncompact
2227 #undef uncompact_a16
2228 }
2229 
2230 static void
uncompact_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2231 uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2232                       brw_compact_inst *src)
2233 {
2234    const struct intel_device_info *devinfo = c->isa->devinfo;
2235    memset(dst, 0, sizeof(*dst));
2236 
2237    if (devinfo->ver >= 8 &&
2238        is_3src(c->isa, brw_opcode_decode(c->isa,
2239                   brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2240       brw_uncompact_3src_instruction(c, dst, src);
2241       return;
2242    }
2243 
2244 #define uncompact(field) \
2245    brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2246 #define uncompact_reg(field) \
2247    brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2248                                     brw_compact_inst_##field##_reg_nr(devinfo, src))
2249 
2250    uncompact(hw_opcode);
2251    uncompact(debug_control);
2252 
2253    set_uncompacted_control(c, dst, src);
2254    set_uncompacted_datatype(c, dst, src);
2255    set_uncompacted_subreg(c, dst, src);
2256    set_uncompacted_src0(c, dst, src);
2257 
2258    enum brw_reg_type type;
2259    if (has_immediate(devinfo, dst, &type)) {
2260       unsigned imm = uncompact_immediate(devinfo, type,
2261                                          brw_compact_inst_imm(devinfo, src));
2262       brw_inst_set_imm_ud(devinfo, dst, imm);
2263    } else {
2264       set_uncompacted_src1(c, dst, src);
2265       uncompact_reg(src1);
2266    }
2267 
2268    if (devinfo->ver >= 12) {
2269       uncompact(swsb);
2270       uncompact_reg(dst);
2271       uncompact_reg(src0);
2272    } else {
2273       if (devinfo->ver >= 6) {
2274          uncompact(acc_wr_control);
2275       } else {
2276          uncompact(mask_control_ex);
2277       }
2278 
2279       uncompact(cond_modifier);
2280 
2281       if (devinfo->ver <= 6)
2282          uncompact(flag_subreg_nr);
2283 
2284       uncompact_reg(dst);
2285       uncompact_reg(src0);
2286    }
2287    brw_inst_set_cmpt_control(devinfo, dst, false);
2288 
2289 #undef uncompact
2290 #undef uncompact_reg
2291 }
2292 
2293 void
brw_uncompact_instruction(const struct brw_isa_info * isa,brw_inst * dst,brw_compact_inst * src)2294 brw_uncompact_instruction(const struct brw_isa_info *isa,
2295                           brw_inst *dst, brw_compact_inst *src)
2296 {
2297    struct compaction_state c;
2298    compaction_state_init(&c, isa);
2299    uncompact_instruction(&c, dst, src);
2300 }
2301 
2302 void
brw_debug_compact_uncompact(const struct brw_isa_info * isa,brw_inst * orig,brw_inst * uncompacted)2303 brw_debug_compact_uncompact(const struct brw_isa_info *isa,
2304                             brw_inst *orig,
2305                             brw_inst *uncompacted)
2306 {
2307    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2308            isa->devinfo->ver);
2309 
2310    fprintf(stderr, "  before: ");
2311    brw_disassemble_inst(stderr, isa, orig, true, 0, NULL);
2312 
2313    fprintf(stderr, "  after:  ");
2314    brw_disassemble_inst(stderr, isa, uncompacted, false, 0, NULL);
2315 
2316    uint32_t *before_bits = (uint32_t *)orig;
2317    uint32_t *after_bits = (uint32_t *)uncompacted;
2318    fprintf(stderr, "  changed bits:\n");
2319    for (int i = 0; i < 128; i++) {
2320       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2321       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2322 
2323       if (before != after) {
2324          fprintf(stderr, "  bit %d, %s to %s\n", i,
2325                  before ? "set" : "unset",
2326                  after ? "set" : "unset");
2327       }
2328    }
2329 }
2330 
2331 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2332 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2333 {
2334    int this_compacted_count = compacted_counts[old_ip];
2335    int target_compacted_count = compacted_counts[old_target_ip];
2336    return target_compacted_count - this_compacted_count;
2337 }
2338 
2339 static void
update_uip_jip(const struct brw_isa_info * isa,brw_inst * insn,int this_old_ip,int * compacted_counts)2340 update_uip_jip(const struct brw_isa_info *isa, brw_inst *insn,
2341                int this_old_ip, int *compacted_counts)
2342 {
2343    const struct intel_device_info *devinfo = isa->devinfo;
2344 
2345    /* JIP and UIP are in units of:
2346     *    - bytes on Gfx8+; and
2347     *    - compacted instructions on Gfx6+.
2348     */
2349    int shift = devinfo->ver >= 8 ? 3 : 0;
2350 
2351    int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2352    jip_compacted -= compacted_between(this_old_ip,
2353                                       this_old_ip + (jip_compacted / 2),
2354                                       compacted_counts);
2355    brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2356 
2357    if (brw_inst_opcode(isa, insn) == BRW_OPCODE_ENDIF ||
2358        brw_inst_opcode(isa, insn) == BRW_OPCODE_WHILE ||
2359        (brw_inst_opcode(isa, insn) == BRW_OPCODE_ELSE && devinfo->ver <= 7))
2360       return;
2361 
2362    int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2363    uip_compacted -= compacted_between(this_old_ip,
2364                                       this_old_ip + (uip_compacted / 2),
2365                                       compacted_counts);
2366    brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2367 }
2368 
2369 static void
update_gfx4_jump_count(const struct intel_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)2370 update_gfx4_jump_count(const struct intel_device_info *devinfo, brw_inst *insn,
2371                        int this_old_ip, int *compacted_counts)
2372 {
2373    assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X);
2374 
2375    /* Jump Count is in units of:
2376     *    - uncompacted instructions on G45; and
2377     *    - compacted instructions on Gfx5.
2378     */
2379    int shift = devinfo->platform == INTEL_PLATFORM_G4X ? 1 : 0;
2380 
2381    int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;
2382 
2383    int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2384 
2385    int this_compacted_count = compacted_counts[this_old_ip];
2386    int target_compacted_count = compacted_counts[target_old_ip];
2387 
2388    jump_count_compacted -= (target_compacted_count - this_compacted_count);
2389    brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2390 }
2391 
2392 static void
compaction_state_init(struct compaction_state * c,const struct brw_isa_info * isa)2393 compaction_state_init(struct compaction_state *c,
2394                       const struct brw_isa_info *isa)
2395 {
2396    const struct intel_device_info *devinfo = isa->devinfo;
2397 
2398    assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2399    assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2400    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2401    assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2402    assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);
2403    assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);
2404    assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);
2405    assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);
2406    assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);
2407    assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);
2408    assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);
2409    assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);
2410    assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);
2411    assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);
2412    assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);
2413    assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);
2414    assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);
2415    assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);
2416    assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);
2417    assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);
2418    assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);
2419    assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);
2420    assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);
2421    assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);
2422 
2423    c->isa = isa;
2424    switch (devinfo->ver) {
2425    case 12:
2426       c->control_index_table = gfx12_control_index_table;;
2427       c->datatype_table = gfx12_datatype_table;
2428       c->subreg_table = gfx12_subreg_table;
2429       if (devinfo->verx10 >= 125) {
2430          c->src0_index_table = xehp_src0_index_table;
2431          c->src1_index_table = xehp_src1_index_table;
2432       } else {
2433          c->src0_index_table = gfx12_src0_index_table;
2434          c->src1_index_table = gfx12_src1_index_table;
2435       }
2436       break;
2437    case 11:
2438       c->control_index_table = gfx8_control_index_table;
2439       c->datatype_table = gfx11_datatype_table;
2440       c->subreg_table = gfx8_subreg_table;
2441       c->src0_index_table = gfx8_src_index_table;
2442       c->src1_index_table = gfx8_src_index_table;
2443       break;
2444    case 9:
2445    case 8:
2446       c->control_index_table = gfx8_control_index_table;
2447       c->datatype_table = gfx8_datatype_table;
2448       c->subreg_table = gfx8_subreg_table;
2449       c->src0_index_table = gfx8_src_index_table;
2450       c->src1_index_table = gfx8_src_index_table;
2451       break;
2452    case 7:
2453       c->control_index_table = gfx7_control_index_table;
2454       c->datatype_table = gfx7_datatype_table;
2455       c->subreg_table = gfx7_subreg_table;
2456       c->src0_index_table = gfx7_src_index_table;
2457       c->src1_index_table = gfx7_src_index_table;
2458       break;
2459    case 6:
2460       c->control_index_table = gfx6_control_index_table;
2461       c->datatype_table = gfx6_datatype_table;
2462       c->subreg_table = gfx6_subreg_table;
2463       c->src0_index_table = gfx6_src_index_table;
2464       c->src1_index_table = gfx6_src_index_table;
2465       break;
2466    case 5:
2467    case 4:
2468       c->control_index_table = g45_control_index_table;
2469       c->datatype_table = g45_datatype_table;
2470       c->subreg_table = g45_subreg_table;
2471       c->src0_index_table = g45_src_index_table;
2472       c->src1_index_table = g45_src_index_table;
2473       break;
2474    default:
2475       unreachable("unknown generation");
2476    }
2477 }
2478 
2479 void
brw_compact_instructions(struct brw_codegen * p,int start_offset,struct disasm_info * disasm)2480 brw_compact_instructions(struct brw_codegen *p, int start_offset,
2481                          struct disasm_info *disasm)
2482 {
2483    if (INTEL_DEBUG(DEBUG_NO_COMPACTION))
2484       return;
2485 
2486    const struct intel_device_info *devinfo = p->devinfo;
2487    if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X)
2488       return;
2489 
2490    void *store = p->store + start_offset / 16;
2491    /* For an instruction at byte offset 16*i before compaction, this is the
2492     * number of compacted instructions minus the number of padding NOP/NENOPs
2493     * that preceded it.
2494     */
2495    unsigned num_compacted_counts =
2496       (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2497    int *compacted_counts =
2498       calloc(1, sizeof(*compacted_counts) * num_compacted_counts);
2499 
2500    /* For an instruction at byte offset 8*i after compaction, this was its IP
2501     * (in 16-byte units) before compaction.
2502     */
2503    unsigned num_old_ip =
2504       (p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1;
2505    int *old_ip = calloc(1, sizeof(*old_ip) * num_old_ip);
2506 
2507    struct compaction_state c;
2508    compaction_state_init(&c, p->isa);
2509 
2510    int offset = 0;
2511    int compacted_count = 0;
2512    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2513         src_offset += sizeof(brw_inst)) {
2514       brw_inst *src = store + src_offset;
2515       void *dst = store + offset;
2516 
2517       old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2518       compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2519 
2520       brw_inst inst = precompact(p->isa, *src);
2521       brw_inst saved = inst;
2522 
2523       if (try_compact_instruction(&c, dst, &inst)) {
2524          compacted_count++;
2525 
2526          if (INTEL_DEBUG(DEBUG_ANY)) {
2527             brw_inst uncompacted;
2528             uncompact_instruction(&c, &uncompacted, dst);
2529             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2530                brw_debug_compact_uncompact(p->isa, &saved, &uncompacted);
2531             }
2532          }
2533 
2534          offset += sizeof(brw_compact_inst);
2535       } else {
2536          /* All uncompacted instructions need to be aligned on G45. */
2537          if ((offset & sizeof(brw_compact_inst)) != 0 &&
2538              devinfo->platform == INTEL_PLATFORM_G4X) {
2539             brw_compact_inst *align = store + offset;
2540             memset(align, 0, sizeof(*align));
2541             brw_compact_inst_set_hw_opcode(
2542                devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NENOP));
2543             brw_compact_inst_set_cmpt_control(devinfo, align, true);
2544             offset += sizeof(brw_compact_inst);
2545             compacted_count--;
2546             compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2547             old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2548 
2549             dst = store + offset;
2550          }
2551 
2552          /* If we didn't compact this instruction, we need to move it down into
2553           * place.
2554           */
2555          if (offset != src_offset) {
2556             memmove(dst, src, sizeof(brw_inst));
2557          }
2558          offset += sizeof(brw_inst);
2559       }
2560    }
2561 
2562    /* Add an entry for the ending offset of the program. This greatly
2563     * simplifies the linked list walk at the end of the function.
2564     */
2565    old_ip[offset / sizeof(brw_compact_inst)] =
2566       (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2567 
2568    /* Fix up control flow offsets. */
2569    p->next_insn_offset = start_offset + offset;
2570    for (offset = 0; offset < p->next_insn_offset - start_offset;
2571         offset = next_offset(devinfo, store, offset)) {
2572       brw_inst *insn = store + offset;
2573       int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2574       int this_compacted_count = compacted_counts[this_old_ip];
2575 
2576       switch (brw_inst_opcode(p->isa, insn)) {
2577       case BRW_OPCODE_BREAK:
2578       case BRW_OPCODE_CONTINUE:
2579       case BRW_OPCODE_HALT:
2580          if (devinfo->ver >= 6) {
2581             update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2582          } else {
2583             update_gfx4_jump_count(devinfo, insn, this_old_ip,
2584                                    compacted_counts);
2585          }
2586          break;
2587 
2588       case BRW_OPCODE_IF:
2589       case BRW_OPCODE_IFF:
2590       case BRW_OPCODE_ELSE:
2591       case BRW_OPCODE_ENDIF:
2592       case BRW_OPCODE_WHILE:
2593          if (devinfo->ver >= 7) {
2594             if (brw_inst_cmpt_control(devinfo, insn)) {
2595                brw_inst uncompacted;
2596                uncompact_instruction(&c, &uncompacted,
2597                                      (brw_compact_inst *)insn);
2598 
2599                update_uip_jip(p->isa, &uncompacted, this_old_ip,
2600                               compacted_counts);
2601 
2602                bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2603                                                   &uncompacted);
2604                assert(ret); (void)ret;
2605             } else {
2606                update_uip_jip(p->isa, insn, this_old_ip, compacted_counts);
2607             }
2608          } else if (devinfo->ver == 6) {
2609             assert(!brw_inst_cmpt_control(devinfo, insn));
2610 
2611             /* Jump Count is in units of compacted instructions on Gfx6. */
2612             int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);
2613 
2614             int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2615             int target_compacted_count = compacted_counts[target_old_ip];
2616             jump_count_compacted -= (target_compacted_count - this_compacted_count);
2617             brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);
2618          } else {
2619             update_gfx4_jump_count(devinfo, insn, this_old_ip,
2620                                    compacted_counts);
2621          }
2622          break;
2623 
2624       case BRW_OPCODE_ADD:
2625          /* Add instructions modifying the IP register use an immediate src1,
2626           * and Gens that use this cannot compact instructions with immediate
2627           * operands.
2628           */
2629          if (brw_inst_cmpt_control(devinfo, insn))
2630             break;
2631 
2632          if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2633              brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2634             assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2635 
2636             int shift = 3;
2637             int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2638 
2639             int target_old_ip = this_old_ip + (jump_compacted / 2);
2640             int target_compacted_count = compacted_counts[target_old_ip];
2641             jump_compacted -= (target_compacted_count - this_compacted_count);
2642             brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2643          }
2644          break;
2645 
2646       default:
2647          break;
2648       }
2649    }
2650 
2651    /* p->nr_insn is counting the number of uncompacted instructions still, so
2652     * divide.  We do want to be sure there's a valid instruction in any
2653     * alignment padding, so that the next compression pass (for the FS 8/16
2654     * compile passes) parses correctly.
2655     */
2656    if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2657       brw_compact_inst *align = store + offset;
2658       memset(align, 0, sizeof(*align));
2659       brw_compact_inst_set_hw_opcode(
2660          devinfo, align, brw_opcode_encode(p->isa, BRW_OPCODE_NOP));
2661       brw_compact_inst_set_cmpt_control(devinfo, align, true);
2662       p->next_insn_offset += sizeof(brw_compact_inst);
2663    }
2664    p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2665 
2666    for (int i = 0; i < p->num_relocs; i++) {
2667       if (p->relocs[i].offset < (uint32_t)start_offset)
2668          continue;
2669 
2670       assert(p->relocs[i].offset % 16 == 0);
2671       unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2672       p->relocs[i].offset -= compacted_counts[idx] * 8;
2673    }
2674 
2675    /* Update the instruction offsets for each group. */
2676    if (disasm) {
2677       int offset = 0;
2678 
2679       foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2680          while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2681                 sizeof(brw_inst) != group->offset) {
2682             assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2683                    sizeof(brw_inst) < group->offset);
2684             offset = next_offset(devinfo, store, offset);
2685          }
2686 
2687          group->offset = start_offset + offset;
2688 
2689          offset = next_offset(devinfo, store, offset);
2690       }
2691    }
2692 
2693    free(compacted_counts);
2694    free(old_ip);
2695 }
2696