• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012-2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file brw_eu_compact.c
25  *
26  * Instruction compaction is a feature of G45 and newer hardware that allows
27  * for a smaller instruction encoding.
28  *
29  * The instruction cache is on the order of 32KB, and many programs generate
30  * far more instructions than that.  The instruction cache is built to barely
31  * keep up with instruction dispatch ability in cache hit cases -- L1
32  * instruction cache misses that still hit in the next level could limit
33  * throughput by around 50%.
34  *
35  * The idea of instruction compaction is that most instructions use a tiny
36  * subset of the GPU functionality, so we can encode what would be a 16 byte
37  * instruction in 8 bytes using some lookup tables for various fields.
38  *
39  *
40  * Instruction compaction capabilities vary subtly by generation.
41  *
42  * G45's support for instruction compaction is very limited. Jump counts on
43  * this generation are in units of 16-byte uncompacted instructions. As such,
44  * all jump targets must be 16-byte aligned. Also, all instructions must be
45  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46  * A G45-only instruction, NENOP, must be used to provide padding to align
47  * uncompacted instructions.
48  *
49  * Gen5 removes these restrictions and changes jump counts to be in units of
50  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52  *
53  * Gen6 adds the ability to compact instructions with a limited range of
54  * immediate values. Compactable immediates have 12 unrestricted bits, and a
55  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56  * value of DW3 in the uncompacted instruction word.
57  *
58  * On Gen7 we can compact some control flow instructions with a small positive
59  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60  * control flow instructions with UIP cannot be compacted, because of the
61  * replicated 13th bit. No control flow instructions can be compacted on Gen6
62  * since the jump count field is not in DW3.
63  *
64  *    break    JIP/UIP
65  *    cont     JIP/UIP
66  *    halt     JIP/UIP
67  *    if       JIP/UIP
68  *    else     JIP (plus UIP on BDW+)
69  *    endif    JIP
70  *    while    JIP (must be negative)
71  *
72  * Gen 8 adds support for compacting 3-src instructions.
73  *
74  * Gen12 reduces the number of bits that available to compacted immediates from
75  * 13 to 12, but improves the compaction of floating-point immediates by
76  * allowing the high bits to be encoded (the sign, 8-bit exponent, and the
77  * three most significant bits of the mantissa), rather than the lowest bits of
78  * the mantissa.
79  */
80 
81 #include "brw_eu.h"
82 #include "brw_shader.h"
83 #include "brw_disasm_info.h"
84 #include "dev/gen_debug.h"
85 
86 static const uint32_t g45_control_index_table[32] = {
87    0b00000000000000000,
88    0b01000000000000000,
89    0b00110000000000000,
90    0b00000000000000010,
91    0b00100000000000000,
92    0b00010000000000000,
93    0b01000000000100000,
94    0b01000000100000000,
95    0b01010000000100000,
96    0b00000000100000010,
97    0b11000000000000000,
98    0b00001000100000010,
99    0b01001000100000000,
100    0b00000000100000000,
101    0b11000000000100000,
102    0b00001000100000000,
103    0b10110000000000000,
104    0b11010000000100000,
105    0b00110000100000000,
106    0b00100000100000000,
107    0b01000000000001000,
108    0b01000000000000100,
109    0b00111100000000000,
110    0b00101011000000000,
111    0b00110000000010000,
112    0b00010000100000000,
113    0b01000000000100100,
114    0b01000000000101000,
115    0b00110000000000110,
116    0b00000000000001010,
117    0b01010000000101000,
118    0b01010000000100100,
119 };
120 
121 static const uint32_t g45_datatype_table[32] = {
122    0b001000000000100001,
123    0b001011010110101101,
124    0b001000001000110001,
125    0b001111011110111101,
126    0b001011010110101100,
127    0b001000000110101101,
128    0b001000000000100000,
129    0b010100010110110001,
130    0b001100011000101101,
131    0b001000000000100010,
132    0b001000001000110110,
133    0b010000001000110001,
134    0b001000001000110010,
135    0b011000001000110010,
136    0b001111011110111100,
137    0b001000000100101000,
138    0b010100011000110001,
139    0b001010010100101001,
140    0b001000001000101001,
141    0b010000001000110110,
142    0b101000001000110001,
143    0b001011011000101101,
144    0b001000000100001001,
145    0b001011011000101100,
146    0b110100011000110001,
147    0b001000001110111101,
148    0b110000001000110001,
149    0b011000000100101010,
150    0b101000001000101001,
151    0b001011010110001100,
152    0b001000000110100001,
153    0b001010010100001000,
154 };
155 
156 static const uint16_t g45_subreg_table[32] = {
157    0b000000000000000,
158    0b000000010000000,
159    0b000001000000000,
160    0b000100000000000,
161    0b000000000100000,
162    0b100000000000000,
163    0b000000000010000,
164    0b001100000000000,
165    0b001010000000000,
166    0b000000100000000,
167    0b001000000000000,
168    0b000000000001000,
169    0b000000001000000,
170    0b000000000000001,
171    0b000010000000000,
172    0b000000010100000,
173    0b000000000000111,
174    0b000001000100000,
175    0b011000000000000,
176    0b000000110000000,
177    0b000000000000010,
178    0b000000000000100,
179    0b000000001100000,
180    0b000100000000010,
181    0b001110011000110,
182    0b001110100001000,
183    0b000110011000110,
184    0b000001000011000,
185    0b000110010000100,
186    0b001100000000110,
187    0b000000010000110,
188    0b000001000110000,
189 };
190 
191 static const uint16_t g45_src_index_table[32] = {
192    0b000000000000,
193    0b010001101000,
194    0b010110001000,
195    0b011010010000,
196    0b001101001000,
197    0b010110001010,
198    0b010101110000,
199    0b011001111000,
200    0b001000101000,
201    0b000000101000,
202    0b010001010000,
203    0b111101101100,
204    0b010110001100,
205    0b010001101100,
206    0b011010010100,
207    0b010001001100,
208    0b001100101000,
209    0b000000000010,
210    0b111101001100,
211    0b011001101000,
212    0b010101001000,
213    0b000000000100,
214    0b000000101100,
215    0b010001101010,
216    0b000000111000,
217    0b010101011000,
218    0b000100100000,
219    0b010110000000,
220    0b010000000100,
221    0b010000111000,
222    0b000101100000,
223    0b111101110100,
224 };
225 
226 static const uint32_t gen6_control_index_table[32] = {
227    0b00000000000000000,
228    0b01000000000000000,
229    0b00110000000000000,
230    0b00000000100000000,
231    0b00010000000000000,
232    0b00001000100000000,
233    0b00000000100000010,
234    0b00000000000000010,
235    0b01000000100000000,
236    0b01010000000000000,
237    0b10110000000000000,
238    0b00100000000000000,
239    0b11010000000000000,
240    0b11000000000000000,
241    0b01001000100000000,
242    0b01000000000001000,
243    0b01000000000000100,
244    0b00000000000001000,
245    0b00000000000000100,
246    0b00111000100000000,
247    0b00001000100000010,
248    0b00110000100000000,
249    0b00110000000000001,
250    0b00100000000000001,
251    0b00110000000000010,
252    0b00110000000000101,
253    0b00110000000001001,
254    0b00110000000010000,
255    0b00110000000000011,
256    0b00110000000000100,
257    0b00110000100001000,
258    0b00100000000001001,
259 };
260 
261 static const uint32_t gen6_datatype_table[32] = {
262    0b001001110000000000,
263    0b001000110000100000,
264    0b001001110000000001,
265    0b001000000001100000,
266    0b001010110100101001,
267    0b001000000110101101,
268    0b001100011000101100,
269    0b001011110110101101,
270    0b001000000111101100,
271    0b001000000001100001,
272    0b001000110010100101,
273    0b001000000001000001,
274    0b001000001000110001,
275    0b001000001000101001,
276    0b001000000000100000,
277    0b001000001000110010,
278    0b001010010100101001,
279    0b001011010010100101,
280    0b001000000110100101,
281    0b001100011000101001,
282    0b001011011000101100,
283    0b001011010110100101,
284    0b001011110110100101,
285    0b001111011110111101,
286    0b001111011110111100,
287    0b001111011110111101,
288    0b001111011110011101,
289    0b001111011110111110,
290    0b001000000000100001,
291    0b001000000000100010,
292    0b001001111111011101,
293    0b001000001110111110,
294 };
295 
296 static const uint16_t gen6_subreg_table[32] = {
297    0b000000000000000,
298    0b000000000000100,
299    0b000000110000000,
300    0b111000000000000,
301    0b011110000001000,
302    0b000010000000000,
303    0b000000000010000,
304    0b000110000001100,
305    0b001000000000000,
306    0b000001000000000,
307    0b000001010010100,
308    0b000000001010110,
309    0b010000000000000,
310    0b110000000000000,
311    0b000100000000000,
312    0b000000010000000,
313    0b000000000001000,
314    0b100000000000000,
315    0b000001010000000,
316    0b001010000000000,
317    0b001100000000000,
318    0b000000001010100,
319    0b101101010010100,
320    0b010100000000000,
321    0b000000010001111,
322    0b011000000000000,
323    0b111110000000000,
324    0b101000000000000,
325    0b000000000001111,
326    0b000100010001111,
327    0b001000010001111,
328    0b000110000000000,
329 };
330 
331 static const uint16_t gen6_src_index_table[32] = {
332    0b000000000000,
333    0b010110001000,
334    0b010001101000,
335    0b001000101000,
336    0b011010010000,
337    0b000100100000,
338    0b010001101100,
339    0b010101110000,
340    0b011001111000,
341    0b001100101000,
342    0b010110001100,
343    0b001000100000,
344    0b010110001010,
345    0b000000000010,
346    0b010101010000,
347    0b010101101000,
348    0b111101001100,
349    0b111100101100,
350    0b011001110000,
351    0b010110001001,
352    0b010101011000,
353    0b001101001000,
354    0b010000101100,
355    0b010000000000,
356    0b001101110000,
357    0b001100010000,
358    0b001100000000,
359    0b010001101010,
360    0b001101111000,
361    0b000001110000,
362    0b001100100000,
363    0b001101010000,
364 };
365 
366 static const uint32_t gen7_control_index_table[32] = {
367    0b0000000000000000010,
368    0b0000100000000000000,
369    0b0000100000000000001,
370    0b0000100000000000010,
371    0b0000100000000000011,
372    0b0000100000000000100,
373    0b0000100000000000101,
374    0b0000100000000000111,
375    0b0000100000000001000,
376    0b0000100000000001001,
377    0b0000100000000001101,
378    0b0000110000000000000,
379    0b0000110000000000001,
380    0b0000110000000000010,
381    0b0000110000000000011,
382    0b0000110000000000100,
383    0b0000110000000000101,
384    0b0000110000000000111,
385    0b0000110000000001001,
386    0b0000110000000001101,
387    0b0000110000000010000,
388    0b0000110000100000000,
389    0b0001000000000000000,
390    0b0001000000000000010,
391    0b0001000000000000100,
392    0b0001000000100000000,
393    0b0010110000000000000,
394    0b0010110000000010000,
395    0b0011000000000000000,
396    0b0011000000100000000,
397    0b0101000000000000000,
398    0b0101000000100000000,
399 };
400 
401 static const uint32_t gen7_datatype_table[32] = {
402    0b001000000000000001,
403    0b001000000000100000,
404    0b001000000000100001,
405    0b001000000001100001,
406    0b001000000010111101,
407    0b001000001011111101,
408    0b001000001110100001,
409    0b001000001110100101,
410    0b001000001110111101,
411    0b001000010000100001,
412    0b001000110000100000,
413    0b001000110000100001,
414    0b001001010010100101,
415    0b001001110010100100,
416    0b001001110010100101,
417    0b001111001110111101,
418    0b001111011110011101,
419    0b001111011110111100,
420    0b001111011110111101,
421    0b001111111110111100,
422    0b000000001000001100,
423    0b001000000000111101,
424    0b001000000010100101,
425    0b001000010000100000,
426    0b001001010010100100,
427    0b001001110010000100,
428    0b001010010100001001,
429    0b001101111110111101,
430    0b001111111110111101,
431    0b001011110110101100,
432    0b001010010100101000,
433    0b001010110100101000,
434 };
435 
436 static const uint16_t gen7_subreg_table[32] = {
437    0b000000000000000,
438    0b000000000000001,
439    0b000000000001000,
440    0b000000000001111,
441    0b000000000010000,
442    0b000000010000000,
443    0b000000100000000,
444    0b000000110000000,
445    0b000001000000000,
446    0b000001000010000,
447    0b000010100000000,
448    0b001000000000000,
449    0b001000000000001,
450    0b001000010000001,
451    0b001000010000010,
452    0b001000010000011,
453    0b001000010000100,
454    0b001000010000111,
455    0b001000010001000,
456    0b001000010001110,
457    0b001000010001111,
458    0b001000110000000,
459    0b001000111101000,
460    0b010000000000000,
461    0b010000110000000,
462    0b011000000000000,
463    0b011110010000111,
464    0b100000000000000,
465    0b101000000000000,
466    0b110000000000000,
467    0b111000000000000,
468    0b111000000011100,
469 };
470 
471 static const uint16_t gen7_src_index_table[32] = {
472    0b000000000000,
473    0b000000000010,
474    0b000000010000,
475    0b000000010010,
476    0b000000011000,
477    0b000000100000,
478    0b000000101000,
479    0b000001001000,
480    0b000001010000,
481    0b000001110000,
482    0b000001111000,
483    0b001100000000,
484    0b001100000010,
485    0b001100001000,
486    0b001100010000,
487    0b001100010010,
488    0b001100100000,
489    0b001100101000,
490    0b001100111000,
491    0b001101000000,
492    0b001101000010,
493    0b001101001000,
494    0b001101010000,
495    0b001101100000,
496    0b001101101000,
497    0b001101110000,
498    0b001101110001,
499    0b001101111000,
500    0b010001101000,
501    0b010001101001,
502    0b010001101010,
503    0b010110001000,
504 };
505 
506 static const uint32_t gen8_control_index_table[32] = {
507    0b0000000000000000010,
508    0b0000100000000000000,
509    0b0000100000000000001,
510    0b0000100000000000010,
511    0b0000100000000000011,
512    0b0000100000000000100,
513    0b0000100000000000101,
514    0b0000100000000000111,
515    0b0000100000000001000,
516    0b0000100000000001001,
517    0b0000100000000001101,
518    0b0000110000000000000,
519    0b0000110000000000001,
520    0b0000110000000000010,
521    0b0000110000000000011,
522    0b0000110000000000100,
523    0b0000110000000000101,
524    0b0000110000000000111,
525    0b0000110000000001001,
526    0b0000110000000001101,
527    0b0000110000000010000,
528    0b0000110000100000000,
529    0b0001000000000000000,
530    0b0001000000000000010,
531    0b0001000000000000100,
532    0b0001000000100000000,
533    0b0010110000000000000,
534    0b0010110000000010000,
535    0b0011000000000000000,
536    0b0011000000100000000,
537    0b0101000000000000000,
538    0b0101000000100000000,
539 };
540 
541 static const uint32_t gen8_datatype_table[32] = {
542    0b001000000000000000001,
543    0b001000000000001000000,
544    0b001000000000001000001,
545    0b001000000000011000001,
546    0b001000000000101011101,
547    0b001000000010111011101,
548    0b001000000011101000001,
549    0b001000000011101000101,
550    0b001000000011101011101,
551    0b001000001000001000001,
552    0b001000011000001000000,
553    0b001000011000001000001,
554    0b001000101000101000101,
555    0b001000111000101000100,
556    0b001000111000101000101,
557    0b001011100011101011101,
558    0b001011101011100011101,
559    0b001011101011101011100,
560    0b001011101011101011101,
561    0b001011111011101011100,
562    0b000000000010000001100,
563    0b001000000000001011101,
564    0b001000000000101000101,
565    0b001000001000001000000,
566    0b001000101000101000100,
567    0b001000111000100000100,
568    0b001001001001000001001,
569    0b001010111011101011101,
570    0b001011111011101011101,
571    0b001001111001101001100,
572    0b001001001001001001000,
573    0b001001011001001001000,
574 };
575 
576 static const uint16_t gen8_subreg_table[32] = {
577    0b000000000000000,
578    0b000000000000001,
579    0b000000000001000,
580    0b000000000001111,
581    0b000000000010000,
582    0b000000010000000,
583    0b000000100000000,
584    0b000000110000000,
585    0b000001000000000,
586    0b000001000010000,
587    0b000001010000000,
588    0b001000000000000,
589    0b001000000000001,
590    0b001000010000001,
591    0b001000010000010,
592    0b001000010000011,
593    0b001000010000100,
594    0b001000010000111,
595    0b001000010001000,
596    0b001000010001110,
597    0b001000010001111,
598    0b001000110000000,
599    0b001000111101000,
600    0b010000000000000,
601    0b010000110000000,
602    0b011000000000000,
603    0b011110010000111,
604    0b100000000000000,
605    0b101000000000000,
606    0b110000000000000,
607    0b111000000000000,
608    0b111000000011100,
609 };
610 
611 static const uint16_t gen8_src_index_table[32] = {
612    0b000000000000,
613    0b000000000010,
614    0b000000010000,
615    0b000000010010,
616    0b000000011000,
617    0b000000100000,
618    0b000000101000,
619    0b000001001000,
620    0b000001010000,
621    0b000001110000,
622    0b000001111000,
623    0b001100000000,
624    0b001100000010,
625    0b001100001000,
626    0b001100010000,
627    0b001100010010,
628    0b001100100000,
629    0b001100101000,
630    0b001100111000,
631    0b001101000000,
632    0b001101000010,
633    0b001101001000,
634    0b001101010000,
635    0b001101100000,
636    0b001101101000,
637    0b001101110000,
638    0b001101110001,
639    0b001101111000,
640    0b010001101000,
641    0b010001101001,
642    0b010001101010,
643    0b010110001000,
644 };
645 
646 static const uint32_t gen11_datatype_table[32] = {
647    0b001000000000000000001,
648    0b001000000000001000000,
649    0b001000000000001000001,
650    0b001000000000011000001,
651    0b001000000000101100101,
652    0b001000000101111100101,
653    0b001000000100101000001,
654    0b001000000100101000101,
655    0b001000000100101100101,
656    0b001000001000001000001,
657    0b001000011000001000000,
658    0b001000011000001000001,
659    0b001000101000101000101,
660    0b001000111000101000100,
661    0b001000111000101000101,
662    0b001100100100101100101,
663    0b001100101100100100101,
664    0b001100101100101100100,
665    0b001100101100101100101,
666    0b001100111100101100100,
667    0b000000000010000001100,
668    0b001000000000001100101,
669    0b001000000000101000101,
670    0b001000001000001000000,
671    0b001000101000101000100,
672    0b001000111000100000100,
673    0b001001001001000001001,
674    0b001101111100101100101,
675    0b001100111100101100101,
676    0b001001111001101001100,
677    0b001001001001001001000,
678    0b001001011001001001000,
679 };
680 
681 static const uint32_t gen12_control_index_table[32] = {
682    0b000000000000000000100, /* 	       (16|M0)                            */
683    0b000000000000000000011, /* 	       (8|M0)                             */
684    0b000000010000000000000, /* 	(W)    (1|M0)                             */
685    0b000000010000000000100, /* 	(W)    (16|M0)                            */
686    0b000000010000000000011, /* 	(W)    (8|M0)                             */
687    0b010000000000000000100, /* 	       (16|M0)  (ge)f0.0                  */
688    0b000000000000000100100, /* 	       (16|M16)                           */
689    0b010100000000000000100, /* 	       (16|M0)  (lt)f0.0                  */
690    0b000000000000000000000, /* 	       (1|M0)                             */
691    0b000010000000000000100, /* 	       (16|M0)           (sat)            */
692    0b000000000000000010011, /* 	       (8|M8)                             */
693    0b001100000000000000100, /* 	       (16|M0)  (gt)f0.0                  */
694    0b000100000000000000100, /* 	       (16|M0)  (eq)f0.0                  */
695    0b000100010000000000100, /* 	(W)    (16|M0)  (eq)f0.0                  */
696    0b001000000000000000100, /* 	       (16|M0)  (ne)f0.0                  */
697    0b000000000000100000100, /* 	(f0.0) (16|M0)                            */
698    0b010100000000000000011, /* 	       (8|M0)   (lt)f0.0                  */
699    0b000000000000110000100, /* 	(f1.0) (16|M0)                            */
700    0b000000010000000000001, /* 	(W)    (2|M0)                             */
701    0b000000000000101000100, /* 	(f0.1) (16|M0)                            */
702    0b000000000000111000100, /* 	(f1.1) (16|M0)                            */
703    0b010000010000000000100, /* 	(W)    (16|M0)  (ge)f0.0                  */
704    0b000000000000000100011, /* 	       (8|M16)                            */
705    0b000000000000000110011, /* 	       (8|M24)                            */
706    0b010100010000000000100, /* 	(W)    (16|M0)  (lt)f0.0                  */
707    0b010000000000000000011, /* 	       (8|M0)   (ge)f0.0                  */
708    0b000100010000000000000, /* 	(W)    (1|M0)   (eq)f0.0                  */
709    0b000010000000000000011, /* 	       (8|M0)            (sat)            */
710    0b010100000000010000100, /* 	       (16|M0)  (lt)f1.0                  */
711    0b000100000000000000011, /* 	       (8|M0)   (eq)f0.0                  */
712    0b000001000000000000011, /* 	       (8|M0)                   {AccWrEn} */
713    0b000000010000000100100, /* 	(W)    (16|M16)                           */
714 };
715 
716 static const uint32_t gen12_datatype_table[32] = {
717    0b11010110100101010100, /* grf<1>:f  grf:f  grf:f  */
718    0b00000110100101010100, /* grf<1>:f  grf:f  arf:ub */
719    0b00000010101101010100, /* grf<1>:f  imm:f  arf:ub */
720    0b01010110110101010100, /* grf<1>:f  grf:f  imm:f  */
721    0b11010100100101010100, /* arf<1>:f  grf:f  grf:f  */
722    0b11010010100101010100, /* grf<1>:f  arf:f  grf:f  */
723    0b01010100110101010100, /* arf<1>:f  grf:f  imm:f  */
724    0b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */
725    0b11010000100101010100, /* arf<1>:f  arf:f  grf:f  */
726    0b00101110110011001100, /* grf<1>:d  grf:d  imm:w  */
727    0b10110110100011001100, /* grf<1>:d  grf:d  grf:d  */
728    0b01010010110101010100, /* grf<1>:f  arf:f  imm:f  */
729    0b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */
730    0b01010000110101010100, /* arf<1>:f  arf:f  imm:f  */
731    0b00110110110011001100, /* grf<1>:d  grf:d  imm:d  */
732    0b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */
733    0b00000111000101010100, /* grf<2>:f  grf:f  arf:ub */
734    0b00101100110011001100, /* arf<1>:d  grf:d  imm:w  */
735    0b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */
736    0b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */
737    0b00100110110000101010, /* grf<1>:w  grf:uw imm:uv */
738    0b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */
739    0b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */
740    0b00000110100101001100, /* grf<1>:d  grf:f  arf:ub */
741    0b10001100100011001100, /* arf<1>:d  grf:d  grf:uw */
742    0b00000110100001010100, /* grf<1>:f  grf:ud arf:ub */
743    0b00101110110001001100, /* grf<1>:d  grf:ud imm:w  */
744    0b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */
745    0b00000110100000110100, /* grf<1>:f  grf:uw arf:ub */
746    0b00000110100000010100, /* grf<1>:f  grf:ub arf:ub */
747    0b00000110100011010100, /* grf<1>:f  grf:d  arf:ub */
748    0b00000010100101010100, /* grf<1>:f  arf:f  arf:ub */
749 };
750 
751 static const uint16_t gen12_subreg_table[32] = {
752    0b000000000000000, /* .0  .0  .0  */
753    0b100000000000000, /* .0  .0  .16 */
754    0b001000000000000, /* .0  .0  .4  */
755    0b011000000000000, /* .0  .0  .12 */
756    0b000000010000000, /* .0  .4  .0  */
757    0b010000000000000, /* .0  .0  .8  */
758    0b101000000000000, /* .0  .0  .20 */
759    0b000000000001000, /* .8  .0  .0  */
760    0b000000100000000, /* .0  .8  .0  */
761    0b110000000000000, /* .0  .0  .24 */
762    0b111000000000000, /* .0  .0  .28 */
763    0b000001000000000, /* .0  .16 .0  */
764    0b000000000000100, /* .4  .0  .0  */
765    0b000001100000000, /* .0  .24 .0  */
766    0b000001010000000, /* .0  .20 .0  */
767    0b000000110000000, /* .0  .12 .0  */
768    0b000001110000000, /* .0  .28 .0  */
769    0b000000000011100, /* .28 .0  .0  */
770    0b000000000010000, /* .16 .0  .0  */
771    0b000000000001100, /* .12 .0  .0  */
772    0b000000000011000, /* .24 .0  .0  */
773    0b000000000010100, /* .20 .0  .0  */
774    0b000000000000010, /* .2  .0  .0  */
775    0b000000101000000, /* .0  .10 .0  */
776    0b000000001000000, /* .0  .2  .0  */
777    0b000000010000100, /* .4  .4  .0  */
778    0b000000001011100, /* .28 .2  .0  */
779    0b000000001000010, /* .2  .2  .0  */
780    0b000000110001100, /* .12 .12 .0  */
781    0b000000000100000, /* .0  .1  .0  */
782    0b000000001100000, /* .0  .3  .0  */
783    0b110001100000000, /* .0  .24 .24 */
784 };
785 
786 static const uint16_t gen12_src0_index_table[16] = {
787    0b010001100100, /*       r<8;8,1>  */
788    0b000000000000, /*       r<0;1,0>  */
789    0b010001100110, /*      -r<8;8,1>  */
790    0b010001100101, /*  (abs)r<8;8,1>  */
791    0b000000000010, /*      -r<0;1,0>  */
792    0b001000000000, /*       r<2;1,0>  */
793    0b001001000000, /*       r<2;4,0>  */
794    0b001101000000, /*       r<4;4,0>  */
795    0b001000100100, /*       r<2;2,1>  */
796    0b001100000000, /*       r<4;1,0>  */
797    0b001000100110, /*      -r<2;2,1>  */
798    0b001101000100, /*       r<4;4,1>  */
799    0b010001100111, /* -(abs)r<8;8,1>  */
800    0b000100000000, /*       r<1;1,0>  */
801    0b000000000001, /*  (abs)r<0;1,0>  */
802    0b111100010000, /*       r[a]<1,0> */
803 };
804 
805 static const uint16_t gen12_src1_index_table[16] = {
806    0b000100011001, /*       r<8;8,1> */
807    0b000000000000, /*       r<0;1,0> */
808    0b100100011001, /*      -r<8;8,1> */
809    0b100000000000, /*      -r<0;1,0> */
810    0b010100011001, /*  (abs)r<8;8,1> */
811    0b100011010000, /*      -r<4;4,0> */
812    0b000010000000, /*       r<2;1,0> */
813    0b000010001001, /*       r<2;2,1> */
814    0b100010001001, /*      -r<2;2,1> */
815    0b000011010000, /*       r<4;4,0> */
816    0b000011010001, /*       r<4;4,1> */
817    0b000011000000, /*       r<4;1,0> */
818    0b110100011001, /* -(abs)r<8;8,1> */
819    0b010000000000, /*  (abs)r<0;1,0> */
820    0b110000000000, /* -(abs)r<0;1,0> */
821    0b100011010001, /*      -r<4;4,1> */
822 };
823 
824 /* This is actually the control index table for Cherryview (26 bits), but the
825  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
826  * the start.
827  *
828  * The low 24 bits have the same mappings on both hardware.
829  */
830 static const uint32_t gen8_3src_control_index_table[4] = {
831    0b00100000000110000000000001,
832    0b00000000000110000000000001,
833    0b00000000001000000000000001,
834    0b00000000001000000000100001,
835 };
836 
837 /* This is actually the control index table for Cherryview (49 bits), but the
838  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
839  * at the start.
840  *
841  * The low 44 bits have the same mappings on both hardware, and since the high
842  * three bits on Broadwell are zero, we can reuse Cherryview's table.
843  */
844 static const uint64_t gen8_3src_source_index_table[4] = {
845    0b0000001110010011100100111001000001111000000000000,
846    0b0000001110010011100100111001000001111000000000010,
847    0b0000001110010011100100111001000001111000000001000,
848    0b0000001110010011100100111001000001111000000100000,
849 };
850 
851 static const uint64_t gen12_3src_control_index_table[32] = {
852    0b000001001010010101000000000000000100, /*      (16|M0)       grf<1>:f   :f  :f  :f */
853    0b000001001010010101000000000000000011, /*      (8|M0)        grf<1>:f   :f  :f  :f */
854    0b000001001000010101000000000000000011, /*      (8|M0)        arf<1>:f   :f  :f  :f */
855    0b000001001010010101000010000000000011, /* (W)  (8|M0)        grf<1>:f   :f  :f  :f */
856    0b000001001000010101000010000000000011, /* (W)  (8|M0)        arf<1>:f   :f  :f  :f */
857    0b000001001000010101000000000000010011, /*      (8|M8)        arf<1>:f   :f  :f  :f */
858    0b000001001010010101000000000000010011, /*      (8|M8)        grf<1>:f   :f  :f  :f */
859    0b000001001000010101000010000000010011, /* (W)  (8|M8)        arf<1>:f   :f  :f  :f */
860    0b000001001010010101000010000000010011, /* (W)  (8|M8)        grf<1>:f   :f  :f  :f */
861    0b000001001010010101000010000000000100, /* (W)  (16|M0)       grf<1>:f   :f  :f  :f */
862    0b000001001000010101000000000000000100, /*      (16|M0)       arf<1>:f   :f  :f  :f */
863    0b000001001010010101010000000000000100, /*      (16|M0)  (sat)grf<1>:f   :f  :f  :f */
864    0b000001001010010101000000000000100100, /*      (16|M16)      grf<1>:f   :f  :f  :f */
865    0b000001001000010101000010000000000100, /* (W)  (16|M0)       arf<1>:f   :f  :f  :f */
866    0b000001001010010101000010000000000000, /* (W)  (1|M0)        grf<1>:f   :f  :f  :f */
867    0b000001001010010101010000000000000011, /*      (8|M0)   (sat)grf<1>:f   :f  :f  :f */
868    0b000001001000010101000010000000110011, /* (W)  (8|M24)       arf<1>:f   :f  :f  :f */
869    0b000001001000010101000010000000100011, /* (W)  (8|M16)       arf<1>:f   :f  :f  :f */
870    0b000001001010010101000010000000110011, /* (W)  (8|M24)       grf<1>:f   :f  :f  :f */
871    0b000001001010010101000010000000100011, /* (W)  (8|M16)       grf<1>:f   :f  :f  :f */
872    0b000001001000010101000000000000100011, /*      (8|M16)       arf<1>:f   :f  :f  :f */
873    0b000001001000010101000000000000110011, /*      (8|M24)       arf<1>:f   :f  :f  :f */
874    0b000001001010010101000000000000100011, /*      (8|M16)       grf<1>:f   :f  :f  :f */
875    0b000001001010010101000000000000110011, /*      (8|M24)       grf<1>:f   :f  :f  :f */
876    0b000001001000010101010000000000000100, /*      (16|M0)  (sat)arf<1>:f   :f  :f  :f */
877    0b000001001010010101010010000000000100, /* (W)  (16|M0)  (sat)grf<1>:f   :f  :f  :f */
878    0b000001001010010101000010000000100100, /* (W)  (16|M16)      grf<1>:f   :f  :f  :f */
879    0b000001001010010001000010000000000000, /* (W)  (1|M0)        grf<1>:ud :ud :ud :ud */
880    0b000001001000010101000000000000100100, /*      (16|M16)      arf<1>:f   :f  :f  :f */
881    0b000001001010010101010000000000100100, /*      (16|M16) (sat)grf<1>:f   :f  :f  :f */
882    0b000001001010010101000010000000000010, /* (W)  (4|M0)        grf<1>:f   :f  :f  :f */
883    0b000001001000010101010000000000000011, /*      (8|M0)   (sat)arf<1>:f   :f  :f  :f */
884 };
885 
886 static const uint32_t gen12_3src_source_index_table[32] = {
887    0b100101100001100000000, /*  grf<0;0>   grf<8;1>  grf<0> */
888    0b100101100001001000010, /*  arf<4;1>   grf<8;1>  grf<0> */
889    0b101101100001101000011, /*  grf<8;1>   grf<8;1>  grf<1> */
890    0b100101100001101000011, /*  grf<8;1>   grf<8;1>  grf<0> */
891    0b101100000000101000011, /*  grf<8;1>   grf<0;0>  grf<1> */
892    0b101101100001101001011, /* -grf<8;1>   grf<8;1>  grf<1> */
893    0b101001100001101000011, /*  grf<8;1>   arf<8;1>  grf<1> */
894    0b100001100001100000000, /*  grf<0;0>   arf<8;1>  grf<0> */
895    0b101101100001100000000, /*  grf<0;0>   grf<8;1>  grf<1> */
896    0b101101100101101000011, /*  grf<8;1>   grf<8;1> -grf<1> */
897    0b101101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<1> */
898    0b101100000000100000000, /*  grf<0;0>   grf<0;0>  grf<1> */
899    0b100001100001101000011, /*  grf<8;1>   arf<8;1>  grf<0> */
900    0b100101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<0> */
901    0b100101110001101000011, /*  grf<8;1>  -grf<8;1>  grf<0> */
902    0b100101100001101001011, /* -grf<8;1>   grf<8;1>  grf<0> */
903    0b100100000000101000011, /*  grf<8;1>   grf<0;0>  grf<0> */
904    0b100101100001100001000, /* -grf<0;0>   grf<8;1>  grf<0> */
905    0b100100000000100000000, /*  grf<0;0>   grf<0;0>  grf<0> */
906    0b101101110001100000000, /*  grf<0;0>  -grf<8;1>  grf<1> */
907    0b100101100101100000000, /*  grf<0;0>   grf<8;1> -grf<0> */
908    0b101001100001100000000, /*  grf<0;0>   arf<8;1>  grf<1> */
909    0b100101100101101000011, /*  grf<8;1>   grf<8;1> -grf<0> */
910    0b101101100101101001011, /* -grf<8;1>   grf<8;1> -grf<1> */
911    0b101001100001101001011, /* -grf<8;1>   arf<8;1>  grf<1> */
912    0b101101110001101001011, /* -grf<8;1>  -grf<8;1>  grf<1> */
913    0b101100010000101000011, /*  grf<8;1>  -grf<0;0>  grf<1> */
914    0b101100000100101000011, /*  grf<8;1>   grf<0;0> -grf<1> */
915    0b101101100001100001000, /* -grf<0;0>   grf<8;1>  grf<1> */
916    0b101101100101100000000, /*  grf<0;0>   grf<8;1> -grf<1> */
917    0b100100000100101000011, /*  grf<8;1>   grf<0;0> -grf<0> */
918    0b101001100101101000011, /*  grf<8;1>   arf<8;1> -grf<1> */
919 };
920 
921 static const uint32_t gen12_3src_subreg_table[32] = {
922    0b00000000000000000000, /* .0  .0  .0  .0  */
923    0b00100000000000000000, /* .0  .0  .0  .4  */
924    0b00000000000110000000, /* .0  .12 .0  .0  */
925    0b10100000000000000000, /* .0  .0  .0  .20 */
926    0b10000000001110000000, /* .0  .28 .0  .16 */
927    0b01100000000000000000, /* .0  .0  .0  .12 */
928    0b01000000000000000000, /* .0  .0  .0  .8  */
929    0b00000010000000000000, /* .0  .0  .8  .0  */
930    0b00000001000000000000, /* .0  .0  .4  .0  */
931    0b11000000000000000000, /* .0  .0  .0  .24 */
932    0b10000000000000000000, /* .0  .0  .0  .16 */
933    0b11100000000000000000, /* .0  .0  .0  .28 */
934    0b00000110000000000000, /* .0  .0  .24 .0  */
935    0b00000000000010000000, /* .0  .4  .0  .0  */
936    0b00000100000000000000, /* .0  .0  .16 .0  */
937    0b00000011000000000000, /* .0  .0  .12 .0  */
938    0b00000101000000000000, /* .0  .0  .20 .0  */
939    0b00000111000000000000, /* .0  .0  .28 .0  */
940    0b00000000000100000000, /* .0  .8  .0  .0  */
941    0b00000000001000000000, /* .0  .16 .0  .0  */
942    0b00000000001100000000, /* .0  .24 .0  .0  */
943    0b00000000001010000000, /* .0  .20 .0  .0  */
944    0b00000000001110000000, /* .0  .28 .0  .0  */
945    0b11000000001110000000, /* .0  .28 .0  .24 */
946    0b00100000000100000000, /* .0  .8  .0  .4  */
947    0b00100000000110000000, /* .0  .12 .0  .4  */
948    0b01000000000110000000, /* .0  .12 .0  .8  */
949    0b10000000001100000000, /* .0  .24 .0  .16 */
950    0b10000000001010000000, /* .0  .20 .0  .16 */
951    0b01100000000010000000, /* .0  .4  .0  .12 */
952    0b10100000001110000000, /* .0  .28 .0  .20 */
953    0b01000000000010000000, /* .0  .4  .0  .8  */
954 };
955 
956 struct compaction_state {
957    const struct gen_device_info *devinfo;
958    const uint32_t *control_index_table;
959    const uint32_t *datatype_table;
960    const uint16_t *subreg_table;
961    const uint16_t *src0_index_table;
962    const uint16_t *src1_index_table;
963 };
964 
965 static void compaction_state_init(struct compaction_state *c,
966                                   const struct gen_device_info *devinfo);
967 
968 static bool
set_control_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)969 set_control_index(const struct compaction_state *c,
970                   brw_compact_inst *dst, const brw_inst *src)
971 {
972    const struct gen_device_info *devinfo = c->devinfo;
973    uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */
974 
975    if (devinfo->gen >= 12) {
976       uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /*  4b */
977                     (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
978                     (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
979                     (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
980                     (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
981                     (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
982                     (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
983                     (brw_inst_bits(src, 23, 22) <<  6) | /*  2b */
984                     (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
985                     (brw_inst_bits(src, 18, 16));        /*  3b */
986    } else if (devinfo->gen >= 8) {
987       uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
988                     (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
989                     (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
990                     (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
991                     (brw_inst_bits(src,  8,  8));        /*  1b */
992    } else {
993       uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
994                     (brw_inst_bits(src, 23,  8));        /* 16b */
995 
996       /* On gen7, the flag register and subregister numbers are integrated into
997        * the control index.
998        */
999       if (devinfo->gen == 7)
1000          uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
1001    }
1002 
1003    for (int i = 0; i < 32; i++) {
1004       if (c->control_index_table[i] == uncompacted) {
1005          brw_compact_inst_set_control_index(devinfo, dst, i);
1006 	 return true;
1007       }
1008    }
1009 
1010    return false;
1011 }
1012 
1013 static bool
set_datatype_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1014 set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,
1015                    const brw_inst *src, bool is_immediate)
1016 {
1017    const struct gen_device_info *devinfo = c->devinfo;
1018    uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */
1019 
1020    if (devinfo->gen >= 12) {
1021       uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /*  4b */
1022                     (brw_inst_bits(src, 66, 66) << 14) | /*  1b */
1023                     (brw_inst_bits(src, 50, 50) << 13) | /*  1b */
1024                     (brw_inst_bits(src, 49, 48) << 11) | /*  2b */
1025                     (brw_inst_bits(src, 47, 47) << 10) | /*  1b */
1026                     (brw_inst_bits(src, 46, 46) <<  9) | /*  1b */
1027                     (brw_inst_bits(src, 43, 40) <<  5) | /*  4b */
1028                     (brw_inst_bits(src, 39, 36) <<  1) | /*  4b */
1029                     (brw_inst_bits(src, 35, 35));        /*  1b */
1030 
1031       /* Src1.RegFile overlaps with the immediate, so ignore it if an immediate
1032        * is present
1033        */
1034       if (!is_immediate) {
1035          uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */
1036       }
1037    } else if (devinfo->gen >= 8) {
1038       uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
1039                     (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
1040                     (brw_inst_bits(src, 46, 35));        /* 12b */
1041    } else {
1042       uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
1043                     (brw_inst_bits(src, 46, 32));        /* 15b */
1044    }
1045 
1046    for (int i = 0; i < 32; i++) {
1047       if (c->datatype_table[i] == uncompacted) {
1048          brw_compact_inst_set_datatype_index(devinfo, dst, i);
1049 	 return true;
1050       }
1051    }
1052 
1053    return false;
1054 }
1055 
1056 static bool
set_subreg_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)1057 set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,
1058                  const brw_inst *src, bool is_immediate)
1059 {
1060    const struct gen_device_info *devinfo = c->devinfo;
1061    uint16_t uncompacted; /* 15b */
1062 
1063    if (devinfo->gen >= 12) {
1064       uncompacted = (brw_inst_bits(src, 55, 51) << 0) |    /* 5b */
1065                     (brw_inst_bits(src, 71, 67) << 5);     /* 5b */
1066 
1067       if (!is_immediate)
1068          uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */
1069    } else {
1070       uncompacted = (brw_inst_bits(src, 52, 48) << 0) |    /* 5b */
1071                     (brw_inst_bits(src, 68, 64) << 5);     /* 5b */
1072 
1073       if (!is_immediate)
1074          uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
1075    }
1076 
1077    for (int i = 0; i < 32; i++) {
1078       if (c->subreg_table[i] == uncompacted) {
1079          brw_compact_inst_set_subreg_index(devinfo, dst, i);
1080 	 return true;
1081       }
1082    }
1083 
1084    return false;
1085 }
1086 
1087 static bool
set_src0_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1088 set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,
1089                const brw_inst *src)
1090 {
1091    const struct gen_device_info *devinfo = c->devinfo;
1092    uint16_t uncompacted; /* 12b */
1093    int table_len;
1094 
1095    if (devinfo->gen >= 12) {
1096       table_len = ARRAY_SIZE(gen12_src0_index_table);
1097       uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /*  4b */
1098                     (brw_inst_bits(src, 83, 81) << 5) | /*  3b */
1099                     (brw_inst_bits(src, 80, 80) << 4) | /*  1b */
1100                     (brw_inst_bits(src, 65, 64) << 2) | /*  2b */
1101                     (brw_inst_bits(src, 45, 44));       /*  2b */
1102    } else {
1103       table_len = ARRAY_SIZE(gen8_src_index_table);
1104       uncompacted = brw_inst_bits(src, 88, 77);         /* 12b */
1105    }
1106 
1107    for (int i = 0; i < table_len; i++) {
1108       if (c->src0_index_table[i] == uncompacted) {
1109          brw_compact_inst_set_src0_index(devinfo, dst, i);
1110 	 return true;
1111       }
1112    }
1113 
1114    return false;
1115 }
1116 
1117 static bool
set_src1_index(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src,bool is_immediate,unsigned imm)1118 set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,
1119                const brw_inst *src, bool is_immediate, unsigned imm)
1120 {
1121    const struct gen_device_info *devinfo = c->devinfo;
1122    if (is_immediate) {
1123       if (devinfo->gen >= 12) {
1124          /* src1 index takes the low 4 bits of the 12-bit compacted value */
1125          brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);
1126       } else {
1127          /* src1 index takes the high 5 bits of the 13-bit compacted value */
1128          brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);
1129       }
1130       return true;
1131    } else {
1132       uint16_t uncompacted; /* 12b */
1133       int table_len;
1134 
1135       if (devinfo->gen >= 12) {
1136          table_len = ARRAY_SIZE(gen12_src0_index_table);
1137          uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /*  2b */
1138                        (brw_inst_bits(src, 119, 116) <<  6) | /*  4b */
1139                        (brw_inst_bits(src, 115, 113) <<  3) | /*  3b */
1140                        (brw_inst_bits(src, 112, 112) <<  2) | /*  1b */
1141                        (brw_inst_bits(src,  97,  96));        /*  2b */
1142       } else {
1143          table_len = ARRAY_SIZE(gen8_src_index_table);
1144          uncompacted = brw_inst_bits(src, 120, 109);          /* 12b */
1145       }
1146 
1147       for (int i = 0; i < table_len; i++) {
1148          if (c->src1_index_table[i] == uncompacted) {
1149             brw_compact_inst_set_src1_index(devinfo, dst, i);
1150             return true;
1151          }
1152       }
1153    }
1154 
1155    return false;
1156 }
1157 
1158 static bool
set_3src_control_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1159 set_3src_control_index(const struct gen_device_info *devinfo,
1160                        brw_compact_inst *dst, const brw_inst *src)
1161 {
1162    assert(devinfo->gen >= 8);
1163 
1164    if (devinfo->gen >= 12) {
1165       uint64_t uncompacted =             /* 36b/TGL+ */
1166          (brw_inst_bits(src, 95, 92) << 32) | /*  4b */
1167          (brw_inst_bits(src, 90, 88) << 29) | /*  3b */
1168          (brw_inst_bits(src, 82, 80) << 26) | /*  3b */
1169          (brw_inst_bits(src, 50, 50) << 25) | /*  1b */
1170          (brw_inst_bits(src, 48, 48) << 24) | /*  1b */
1171          (brw_inst_bits(src, 42, 40) << 21) | /*  3b */
1172          (brw_inst_bits(src, 39, 39) << 20) | /*  1b */
1173          (brw_inst_bits(src, 38, 36) << 17) | /*  3b */
1174          (brw_inst_bits(src, 34, 34) << 16) | /*  1b */
1175          (brw_inst_bits(src, 33, 33) << 15) | /*  1b */
1176          (brw_inst_bits(src, 32, 32) << 14) | /*  1b */
1177          (brw_inst_bits(src, 31, 31) << 13) | /*  1b */
1178          (brw_inst_bits(src, 28, 28) << 12) | /*  1b */
1179          (brw_inst_bits(src, 27, 24) <<  8) | /*  4b */
1180          (brw_inst_bits(src, 23, 23) <<  7) | /*  1b */
1181          (brw_inst_bits(src, 22, 22) <<  6) | /*  1b */
1182          (brw_inst_bits(src, 21, 19) <<  3) | /*  3b */
1183          (brw_inst_bits(src, 18, 16));        /*  3b */
1184 
1185       for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_control_index_table); i++) {
1186          if (gen12_3src_control_index_table[i] == uncompacted) {
1187             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1188             return true;
1189          }
1190       }
1191    } else {
1192       uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */
1193          (brw_inst_bits(src, 34, 32) << 21) |  /*  3b */
1194          (brw_inst_bits(src, 28,  8));         /* 21b */
1195 
1196       if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1197          uncompacted |=
1198             brw_inst_bits(src, 36, 35) << 24;  /*  2b */
1199       }
1200 
1201       for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
1202          if (gen8_3src_control_index_table[i] == uncompacted) {
1203             brw_compact_inst_set_3src_control_index(devinfo, dst, i);
1204             return true;
1205          }
1206       }
1207    }
1208 
1209    return false;
1210 }
1211 
1212 static bool
set_3src_source_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1213 set_3src_source_index(const struct gen_device_info *devinfo,
1214                       brw_compact_inst *dst, const brw_inst *src)
1215 {
1216    assert(devinfo->gen >= 8);
1217 
1218    if (devinfo->gen >= 12) {
1219       uint32_t uncompacted =               /* 21b/TGL+ */
1220          (brw_inst_bits(src, 114, 114) << 20) | /*  1b */
1221          (brw_inst_bits(src, 113, 112) << 18) | /*  2b */
1222          (brw_inst_bits(src,  98,  98) << 17) | /*  1b */
1223          (brw_inst_bits(src,  97,  96) << 15) | /*  2b */
1224          (brw_inst_bits(src,  91,  91) << 14) | /*  1b */
1225          (brw_inst_bits(src,  87,  86) << 12) | /*  2b */
1226          (brw_inst_bits(src,  85,  84) << 10) | /*  2b */
1227          (brw_inst_bits(src,  83,  83) <<  9) | /*  1b */
1228          (brw_inst_bits(src,  66,  66) <<  8) | /*  1b */
1229          (brw_inst_bits(src,  65,  64) <<  6) | /*  2b */
1230          (brw_inst_bits(src,  47,  47) <<  5) | /*  1b */
1231          (brw_inst_bits(src,  46,  46) <<  4) | /*  1b */
1232          (brw_inst_bits(src,  45,  44) <<  2) | /*  2b */
1233          (brw_inst_bits(src,  43,  43) <<  1) | /*  1b */
1234          (brw_inst_bits(src,  35,  35));        /*  1b */
1235 
1236       for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_source_index_table); i++) {
1237          if (gen12_3src_source_index_table[i] == uncompacted) {
1238             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1239             return true;
1240          }
1241       }
1242    } else {
1243       uint64_t uncompacted =    /* 46b/BDW; 49b/CHV/SKL+ */
1244          (brw_inst_bits(src,  83,  83) << 43) |   /*  1b */
1245          (brw_inst_bits(src, 114, 107) << 35) |   /*  8b */
1246          (brw_inst_bits(src,  93,  86) << 27) |   /*  8b */
1247          (brw_inst_bits(src,  72,  65) << 19) |   /*  8b */
1248          (brw_inst_bits(src,  55,  37));          /* 19b */
1249 
1250       if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1251          uncompacted |=
1252             (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
1253             (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
1254             (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
1255       } else {
1256          uncompacted |=
1257             (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
1258             (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
1259       }
1260 
1261       for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
1262          if (gen8_3src_source_index_table[i] == uncompacted) {
1263             brw_compact_inst_set_3src_source_index(devinfo, dst, i);
1264             return true;
1265          }
1266       }
1267    }
1268 
1269    return false;
1270 }
1271 
1272 static bool
set_3src_subreg_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1273 set_3src_subreg_index(const struct gen_device_info *devinfo,
1274                       brw_compact_inst *dst, const brw_inst *src)
1275 {
1276    assert(devinfo->gen >= 12);
1277 
1278    uint32_t uncompacted =               /* 20b/TGL+ */
1279       (brw_inst_bits(src, 119, 115) << 15) | /*  5b */
1280       (brw_inst_bits(src, 103,  99) << 10) | /*  5b */
1281       (brw_inst_bits(src,  71,  67) <<  5) | /*  5b */
1282       (brw_inst_bits(src,  55,  51));        /*  5b */
1283 
1284    for (unsigned i = 0; i < ARRAY_SIZE(gen12_3src_subreg_table); i++) {
1285       if (gen12_3src_subreg_table[i] == uncompacted) {
1286          brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);
1287 	 return true;
1288       }
1289    }
1290 
1291    return false;
1292 }
1293 
1294 static bool
has_unmapped_bits(const struct gen_device_info * devinfo,const brw_inst * src)1295 has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
1296 {
1297    /* EOT can only be mapped on a send if the src1 is an immediate */
1298    if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
1299         brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
1300        brw_inst_eot(devinfo, src))
1301       return true;
1302 
1303    /* Check for instruction bits that don't map to any of the fields of the
1304     * compacted instruction.  The instruction cannot be compacted if any of
1305     * them are set.  They overlap with:
1306     *  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
1307     *  - Dst.AddrImm[9] (bit 47 on Gen8)
1308     *  - Src0.AddrImm[9] (bit 95 on Gen8)
1309     *  - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
1310     *  - UIP[31] (bit 95 on Gen8)
1311     */
1312    if (devinfo->gen >= 12) {
1313       assert(!brw_inst_bits(src, 7,  7));
1314       return false;
1315    } else if (devinfo->gen >= 8) {
1316       assert(!brw_inst_bits(src, 7,  7));
1317       return brw_inst_bits(src, 95, 95) ||
1318              brw_inst_bits(src, 47, 47) ||
1319              brw_inst_bits(src, 11, 11);
1320    } else {
1321       assert(!brw_inst_bits(src, 7,  7) &&
1322              !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90)));
1323       return brw_inst_bits(src, 95, 91) ||
1324              brw_inst_bits(src, 47, 47);
1325    }
1326 }
1327 
1328 static bool
has_3src_unmapped_bits(const struct gen_device_info * devinfo,const brw_inst * src)1329 has_3src_unmapped_bits(const struct gen_device_info *devinfo,
1330                        const brw_inst *src)
1331 {
1332    /* Check for three-source instruction bits that don't map to any of the
1333     * fields of the compacted instruction.  All of them seem to be reserved
1334     * bits currently.
1335     */
1336    if (devinfo->gen >= 12) {
1337       assert(!brw_inst_bits(src, 7, 7));
1338    } else if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1339       assert(!brw_inst_bits(src, 127, 127) &&
1340              !brw_inst_bits(src, 7,  7));
1341    } else {
1342       assert(devinfo->gen >= 8);
1343       assert(!brw_inst_bits(src, 127, 126) &&
1344              !brw_inst_bits(src, 105, 105) &&
1345              !brw_inst_bits(src, 84, 84) &&
1346              !brw_inst_bits(src, 7,  7));
1347 
1348       /* Src1Type and Src2Type, used for mixed-precision floating point */
1349       if (brw_inst_bits(src, 36, 35))
1350          return true;
1351    }
1352 
1353    return false;
1354 }
1355 
1356 static bool
brw_try_compact_3src_instruction(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1357 brw_try_compact_3src_instruction(const struct gen_device_info *devinfo,
1358                                  brw_compact_inst *dst, const brw_inst *src)
1359 {
1360    assert(devinfo->gen >= 8);
1361 
1362    if (has_3src_unmapped_bits(devinfo, src))
1363       return false;
1364 
1365 #define compact(field) \
1366    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
1367 #define compact_a16(field) \
1368    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
1369 
1370    compact(hw_opcode);
1371 
1372    if (!set_3src_control_index(devinfo, dst, src))
1373       return false;
1374 
1375    if (!set_3src_source_index(devinfo, dst, src))
1376       return false;
1377 
1378    if (devinfo->gen >= 12) {
1379       if (!set_3src_subreg_index(devinfo, dst, src))
1380          return false;
1381 
1382       compact(swsb);
1383       compact(debug_control);
1384       compact(dst_reg_nr);
1385       compact(src0_reg_nr);
1386       compact(src1_reg_nr);
1387       compact(src2_reg_nr);
1388    } else {
1389       compact(dst_reg_nr);
1390       compact_a16(src0_rep_ctrl);
1391       compact(debug_control);
1392       compact(saturate);
1393       compact_a16(src1_rep_ctrl);
1394       compact_a16(src2_rep_ctrl);
1395       compact(src0_reg_nr);
1396       compact(src1_reg_nr);
1397       compact(src2_reg_nr);
1398       compact_a16(src0_subreg_nr);
1399       compact_a16(src1_subreg_nr);
1400       compact_a16(src2_subreg_nr);
1401    }
1402    brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
1403 
1404 #undef compact
1405 #undef compact_a16
1406 
1407    return true;
1408 }
1409 
1410 /* On SNB through ICL, compacted instructions have 12-bits for immediate
1411  * sources, and a 13th bit that's replicated through the high 20 bits.
1412  *
1413  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
1414  * of packed vectors as compactable immediates.
1415  *
1416  * On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded
1417  * rather than the low 12-bits. For signed integer the 12th bit is replicated,
1418  * while for unsigned integers it is not.
1419  *
1420  * Returns the compacted immediate, or -1 if immediate cannot be compacted
1421  */
1422 static int
compact_immediate(const struct gen_device_info * devinfo,enum brw_reg_type type,unsigned imm)1423 compact_immediate(const struct gen_device_info *devinfo,
1424                   enum brw_reg_type type, unsigned imm)
1425 {
1426    if (devinfo->gen >= 12) {
1427       /* 16-bit immediates need to be replicated through the 32-bit immediate
1428        * field
1429        */
1430       switch (type) {
1431       case BRW_REGISTER_TYPE_W:
1432       case BRW_REGISTER_TYPE_UW:
1433       case BRW_REGISTER_TYPE_HF:
1434          if ((imm >> 16) != (imm & 0xffff))
1435             return -1;
1436          break;
1437       default:
1438          break;
1439       }
1440 
1441       switch (type) {
1442       case BRW_REGISTER_TYPE_F:
1443          /* We get the high 12-bits as-is; rest must be zero */
1444          if ((imm & 0xfffff) == 0)
1445             return (imm >> 20) & 0xfff;
1446          break;
1447       case BRW_REGISTER_TYPE_HF:
1448          /* We get the high 12-bits as-is; rest must be zero */
1449          if ((imm & 0xf) == 0)
1450             return (imm >> 4) & 0xfff;
1451          break;
1452       case BRW_REGISTER_TYPE_UD:
1453       case BRW_REGISTER_TYPE_VF:
1454       case BRW_REGISTER_TYPE_UV:
1455       case BRW_REGISTER_TYPE_V:
1456          /* We get the low 12-bits as-is; rest must be zero */
1457          if ((imm & 0xfffff000) == 0)
1458             return imm & 0xfff;
1459          break;
1460       case BRW_REGISTER_TYPE_UW:
1461          /* We get the low 12-bits as-is; rest must be zero */
1462          if ((imm & 0xf000) == 0)
1463             return imm & 0xfff;
1464          break;
1465       case BRW_REGISTER_TYPE_D:
1466          /* We get the low 11-bits as-is; 12th is replicated */
1467          if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)
1468             return imm & 0xfff;
1469          break;
1470       case BRW_REGISTER_TYPE_W:
1471          /* We get the low 11-bits as-is; 12th is replicated */
1472          if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)
1473             return imm & 0xfff;
1474          break;
1475       case BRW_REGISTER_TYPE_NF:
1476       case BRW_REGISTER_TYPE_DF:
1477       case BRW_REGISTER_TYPE_Q:
1478       case BRW_REGISTER_TYPE_UQ:
1479       case BRW_REGISTER_TYPE_B:
1480       case BRW_REGISTER_TYPE_UB:
1481          return -1;
1482       }
1483    } else {
1484       /* We get the low 12 bits as-is; 13th is replicated */
1485       if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {
1486          return imm & 0x1fff;
1487       }
1488    }
1489 
1490    return -1;
1491 }
1492 
1493 static int
uncompact_immediate(const struct gen_device_info * devinfo,enum brw_reg_type type,unsigned compact_imm)1494 uncompact_immediate(const struct gen_device_info *devinfo,
1495                     enum brw_reg_type type, unsigned compact_imm)
1496 {
1497    if (devinfo->gen >= 12) {
1498       switch (type) {
1499       case BRW_REGISTER_TYPE_F:
1500          return compact_imm << 20;
1501       case BRW_REGISTER_TYPE_HF:
1502          return (compact_imm << 20) | (compact_imm << 4);
1503       case BRW_REGISTER_TYPE_UD:
1504       case BRW_REGISTER_TYPE_VF:
1505       case BRW_REGISTER_TYPE_UV:
1506       case BRW_REGISTER_TYPE_V:
1507          return compact_imm;
1508       case BRW_REGISTER_TYPE_UW:
1509          /* Replicate */
1510          return compact_imm << 16 | compact_imm;
1511       case BRW_REGISTER_TYPE_D:
1512          /* Extend the 12th bit into the high 20 bits */
1513          return (int)(compact_imm << 20) >> 20;
1514       case BRW_REGISTER_TYPE_W:
1515          /* Extend the 12th bit into the high 4 bits and replicate */
1516          return (  (int)(compact_imm << 20) >> 4) |
1517                 ((short)(compact_imm <<  4) >> 4);
1518       case BRW_REGISTER_TYPE_NF:
1519       case BRW_REGISTER_TYPE_DF:
1520       case BRW_REGISTER_TYPE_Q:
1521       case BRW_REGISTER_TYPE_UQ:
1522       case BRW_REGISTER_TYPE_B:
1523       case BRW_REGISTER_TYPE_UB:
1524          unreachable("not reached");
1525       }
1526    } else {
1527       /* Replicate the 13th bit into the high 19 bits */
1528       return (int)(compact_imm << 19) >> 19;
1529    }
1530 
1531    unreachable("not reached");
1532 }
1533 
1534 static bool
has_immediate(const struct gen_device_info * devinfo,const brw_inst * inst,enum brw_reg_type * type)1535 has_immediate(const struct gen_device_info *devinfo, const brw_inst *inst,
1536               enum brw_reg_type *type)
1537 {
1538    if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1539       *type = brw_inst_src0_type(devinfo, inst);
1540       return *type != INVALID_REG_TYPE;
1541    } else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {
1542       *type = brw_inst_src1_type(devinfo, inst);
1543       return *type != INVALID_REG_TYPE;
1544    }
1545 
1546    return false;
1547 }
1548 
1549 /**
1550  * Applies some small changes to instruction types to increase chances of
1551  * compaction.
1552  */
1553 static brw_inst
precompact(const struct gen_device_info * devinfo,brw_inst inst)1554 precompact(const struct gen_device_info *devinfo, brw_inst inst)
1555 {
1556    if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
1557       return inst;
1558 
1559    /* The Bspec's section titled "Non-present Operands" claims that if src0
1560     * is an immediate that src1's type must be the same as that of src0.
1561     *
1562     * The SNB+ DataTypeIndex instruction compaction tables contain mappings
1563     * that do not follow this rule. E.g., from the IVB/HSW table:
1564     *
1565     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1566     *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
1567     *
1568     * And from the SNB table:
1569     *
1570     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
1571     *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
1572     *
1573     * Neither of these cause warnings from the simulator when used,
1574     * compacted or otherwise. In fact, all compaction mappings that have an
1575     * immediate in src0 use a:ud for src1.
1576     *
1577     * The GM45 instruction compaction tables do not contain mapped meanings
1578     * so it's not clear whether it has the restriction. We'll assume it was
1579     * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
1580     *
1581     * Don't do any of this for 64-bit immediates, since the src1 fields
1582     * overlap with the immediate and setting them would overwrite the
1583     * immediate we set.
1584     */
1585    if (devinfo->gen >= 6 &&
1586        !(devinfo->is_haswell &&
1587          brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1588        !(devinfo->gen >= 8 &&
1589          (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1590           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1591           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1592       brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);
1593    }
1594 
1595    /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1596     * for immediate values. Presumably the hardware engineers realized
1597     * that the only useful floating-point value that could be represented
1598     * in this format is 0.0, which can also be represented as a VF-typed
1599     * immediate, so they gave us the previously mentioned mapping on IVB+.
1600     *
1601     * Strangely, we do have a mapping for imm:f in src1, so we don't need
1602     * to do this there.
1603     *
1604     * If we see a 0.0:F, change the type to VF so that it can be compacted.
1605     *
1606     * Compaction of floating-point immediates is improved on Gen12, thus
1607     * removing the need for this.
1608     */
1609    if (devinfo->gen < 12 &&
1610        brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1611        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1612        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1613        brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1614       enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1615       brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1616    }
1617 
1618    /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1619     * set the types to :UD so the instruction can be compacted.
1620     *
1621     * FINISHME: Use dst:f | imm:f on Gen12
1622     */
1623    if (devinfo->gen < 12 &&
1624        compact_immediate(devinfo, BRW_REGISTER_TYPE_D,
1625                          brw_inst_imm_ud(devinfo, &inst)) != -1 &&
1626        brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1627        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1628        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1629       enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1630       enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1631 
1632       brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1633       brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1634    }
1635 
1636    return inst;
1637 }
1638 
1639 /**
1640  * Tries to compact instruction src into dst.
1641  *
1642  * It doesn't modify dst unless src is compactable, which is relied on by
1643  * brw_compact_instructions().
1644  */
1645 static bool
try_compact_instruction(const struct compaction_state * c,brw_compact_inst * dst,const brw_inst * src)1646 try_compact_instruction(const struct compaction_state *c,
1647                         brw_compact_inst *dst, const brw_inst *src)
1648 {
1649    const struct gen_device_info *devinfo = c->devinfo;
1650    brw_compact_inst temp;
1651 
1652    assert(brw_inst_cmpt_control(devinfo, src) == 0);
1653 
1654    if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1655       if (devinfo->gen >= 8) {
1656          memset(&temp, 0, sizeof(temp));
1657          if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1658             *dst = temp;
1659             return true;
1660          } else {
1661             return false;
1662          }
1663       } else {
1664          return false;
1665       }
1666    }
1667 
1668    enum brw_reg_type type;
1669    bool is_immediate = has_immediate(devinfo, src, &type);
1670 
1671    unsigned compacted_imm = 0;
1672 
1673    if (is_immediate) {
1674       /* Instructions with immediates cannot be compacted on Gen < 6 */
1675       if (devinfo->gen < 6)
1676          return false;
1677 
1678       compacted_imm = compact_immediate(devinfo, type,
1679                                         brw_inst_imm_ud(devinfo, src));
1680       if (compacted_imm == -1)
1681          return false;
1682    }
1683 
1684    if (has_unmapped_bits(devinfo, src))
1685       return false;
1686 
1687    memset(&temp, 0, sizeof(temp));
1688 
1689 #define compact(field) \
1690    brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1691 #define compact_reg(field) \
1692    brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \
1693                                        brw_inst_##field##_da_reg_nr(devinfo, src))
1694 
1695    compact(hw_opcode);
1696    compact(debug_control);
1697 
1698    if (!set_control_index(c, &temp, src))
1699       return false;
1700    if (!set_datatype_index(c, &temp, src, is_immediate))
1701       return false;
1702    if (!set_subreg_index(c, &temp, src, is_immediate))
1703       return false;
1704    if (!set_src0_index(c, &temp, src))
1705       return false;
1706    if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))
1707       return false;
1708 
1709    if (devinfo->gen >= 12) {
1710       compact(swsb);
1711       compact_reg(dst);
1712       compact_reg(src0);
1713 
1714       if (is_immediate) {
1715          /* src1 reg takes the high 8 bits (of the 12-bit compacted value) */
1716          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);
1717       } else {
1718          compact_reg(src1);
1719       }
1720    } else {
1721       if (devinfo->gen >= 6) {
1722          compact(acc_wr_control);
1723       } else {
1724          compact(mask_control_ex);
1725       }
1726 
1727       if (devinfo->gen <= 6)
1728          compact(flag_subreg_nr);
1729 
1730       compact(cond_modifier);
1731 
1732       compact_reg(dst);
1733       compact_reg(src0);
1734 
1735       if (is_immediate) {
1736          /* src1 reg takes the low 8 bits (of the 13-bit compacted value) */
1737          brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);
1738       } else {
1739          compact_reg(src1);
1740       }
1741    }
1742    brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1743 
1744 #undef compact
1745 #undef compact_reg
1746 
1747    *dst = temp;
1748 
1749    return true;
1750 }
1751 
1752 bool
brw_try_compact_instruction(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1753 brw_try_compact_instruction(const struct gen_device_info *devinfo,
1754                             brw_compact_inst *dst, const brw_inst *src)
1755 {
1756    struct compaction_state c;
1757    compaction_state_init(&c, devinfo);
1758    return try_compact_instruction(&c, dst, src);
1759 }
1760 
1761 static void
set_uncompacted_control(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1762 set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,
1763                         brw_compact_inst *src)
1764 {
1765    const struct gen_device_info *devinfo = c->devinfo;
1766    uint32_t uncompacted =
1767       c->control_index_table[brw_compact_inst_control_index(devinfo, src)];
1768 
1769    if (devinfo->gen >= 12) {
1770       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));
1771       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1772       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1773       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1774       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1775       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1776       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1777       brw_inst_set_bits(dst, 23, 22, (uncompacted >>  6) & 0x3);
1778       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1779       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1780    } else if (devinfo->gen >= 8) {
1781       brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1782       brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1783       brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1784       brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1785       brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1786    } else {
1787       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1788       brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1789 
1790       if (devinfo->gen == 7)
1791          brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1792    }
1793 }
1794 
1795 static void
set_uncompacted_datatype(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1796 set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,
1797                          brw_compact_inst *src)
1798 {
1799    const struct gen_device_info *devinfo = c->devinfo;
1800    uint32_t uncompacted =
1801       c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1802 
1803    if (devinfo->gen >= 12) {
1804       brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));
1805       brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);
1806       brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);
1807       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);
1808       brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);
1809       brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);
1810       brw_inst_set_bits(dst, 46, 46, (uncompacted >>  9) & 0x1);
1811       brw_inst_set_bits(dst, 43, 40, (uncompacted >>  5) & 0xf);
1812       brw_inst_set_bits(dst, 39, 36, (uncompacted >>  1) & 0xf);
1813       brw_inst_set_bits(dst, 35, 35, (uncompacted >>  0) & 0x1);
1814    } else if (devinfo->gen >= 8) {
1815       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1816       brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1817       brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1818    } else {
1819       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1820       brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1821    }
1822 }
1823 
1824 static void
set_uncompacted_subreg(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1825 set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,
1826                        brw_compact_inst *src)
1827 {
1828    const struct gen_device_info *devinfo = c->devinfo;
1829    uint16_t uncompacted =
1830       c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1831 
1832    if (devinfo->gen >= 12) {
1833       brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));
1834       brw_inst_set_bits(dst,  71, 67, (uncompacted >>  5) & 0x1f);
1835       brw_inst_set_bits(dst,  55, 51, (uncompacted >>  0) & 0x1f);
1836    } else {
1837       brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1838       brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1839       brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1840    }
1841 }
1842 
1843 static void
set_uncompacted_src0(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1844 set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,
1845                      brw_compact_inst *src)
1846 {
1847    const struct gen_device_info *devinfo = c->devinfo;
1848    uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1849    uint16_t uncompacted = c->src0_index_table[compacted];
1850 
1851    if (devinfo->gen >= 12) {
1852       brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));
1853       brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);
1854       brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);
1855       brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);
1856       brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);
1857    } else {
1858       brw_inst_set_bits(dst, 88, 77, uncompacted);
1859    }
1860 }
1861 
1862 static void
set_uncompacted_src1(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1863 set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,
1864                      brw_compact_inst *src)
1865 {
1866    const struct gen_device_info *devinfo = c->devinfo;
1867    uint16_t uncompacted =
1868       c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];
1869 
1870    if (devinfo->gen >= 12) {
1871       brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));
1872       brw_inst_set_bits(dst, 119, 116, (uncompacted >>  6) & 0xf);
1873       brw_inst_set_bits(dst, 115, 113, (uncompacted >>  3) & 0x7);
1874       brw_inst_set_bits(dst, 112, 112, (uncompacted >>  2) & 0x1);
1875       brw_inst_set_bits(dst,  97,  96, (uncompacted >>  0) & 0x3);
1876    } else {
1877       brw_inst_set_bits(dst, 120, 109, uncompacted);
1878    }
1879 }
1880 
1881 static void
set_uncompacted_3src_control_index(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1882 set_uncompacted_3src_control_index(const struct compaction_state *c,
1883                                    brw_inst *dst, brw_compact_inst *src)
1884 {
1885    const struct gen_device_info *devinfo = c->devinfo;
1886    assert(devinfo->gen >= 8);
1887 
1888    if (devinfo->gen >= 12) {
1889       uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1890       uint64_t uncompacted = gen12_3src_control_index_table[compacted];
1891 
1892       brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));
1893       brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);
1894       brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);
1895       brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);
1896       brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);
1897       brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);
1898       brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);
1899       brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);
1900       brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);
1901       brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);
1902       brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);
1903       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);
1904       brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);
1905       brw_inst_set_bits(dst, 27, 24, (uncompacted >>  8) & 0xf);
1906       brw_inst_set_bits(dst, 23, 23, (uncompacted >>  7) & 0x1);
1907       brw_inst_set_bits(dst, 22, 22, (uncompacted >>  6) & 0x1);
1908       brw_inst_set_bits(dst, 21, 19, (uncompacted >>  3) & 0x7);
1909       brw_inst_set_bits(dst, 18, 16, (uncompacted >>  0) & 0x7);
1910    } else {
1911       uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1912       uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1913 
1914       brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1915       brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
1916 
1917       if (devinfo->gen >= 9 || devinfo->is_cherryview)
1918          brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1919    }
1920 }
1921 
1922 static void
set_uncompacted_3src_source_index(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1923 set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
1924                                   brw_inst *dst, brw_compact_inst *src)
1925 {
1926    assert(devinfo->gen >= 8);
1927 
1928    uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
1929 
1930    if (devinfo->gen >= 12) {
1931       uint32_t uncompacted = gen12_3src_source_index_table[compacted];
1932 
1933       brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));
1934       brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);
1935       brw_inst_set_bits(dst,  98,  98, (uncompacted >> 17) & 0x1);
1936       brw_inst_set_bits(dst,  97,  96, (uncompacted >> 15) & 0x3);
1937       brw_inst_set_bits(dst,  91,  91, (uncompacted >> 14) & 0x1);
1938       brw_inst_set_bits(dst,  87,  86, (uncompacted >> 12) & 0x3);
1939       brw_inst_set_bits(dst,  85,  84, (uncompacted >> 10) & 0x3);
1940       brw_inst_set_bits(dst,  83,  83, (uncompacted >>  9) & 0x1);
1941       brw_inst_set_bits(dst,  66,  66, (uncompacted >>  8) & 0x1);
1942       brw_inst_set_bits(dst,  65,  64, (uncompacted >>  6) & 0x3);
1943       brw_inst_set_bits(dst,  47,  47, (uncompacted >>  5) & 0x1);
1944       brw_inst_set_bits(dst,  46,  46, (uncompacted >>  4) & 0x1);
1945       brw_inst_set_bits(dst,  45,  44, (uncompacted >>  2) & 0x3);
1946       brw_inst_set_bits(dst,  43,  43, (uncompacted >>  1) & 0x1);
1947       brw_inst_set_bits(dst,  35,  35, (uncompacted >>  0) & 0x1);
1948    } else {
1949       uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1950 
1951       brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
1952       brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1953       brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
1954       brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
1955       brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
1956 
1957       if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1958          brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1959          brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1960          brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
1961       } else {
1962          brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1963          brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1964       }
1965    }
1966 }
1967 
1968 static void
set_uncompacted_3src_subreg_index(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1969 set_uncompacted_3src_subreg_index(const struct gen_device_info *devinfo,
1970                                   brw_inst *dst, brw_compact_inst *src)
1971 {
1972    assert(devinfo->gen >= 12);
1973 
1974    uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);
1975    uint32_t uncompacted = gen12_3src_subreg_table[compacted];
1976 
1977    brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));
1978    brw_inst_set_bits(dst, 103,  99, (uncompacted >> 10) & 0x1f);
1979    brw_inst_set_bits(dst,  71,  67, (uncompacted >>  5) & 0x1f);
1980    brw_inst_set_bits(dst,  55,  51, (uncompacted >>  0) & 0x1f);
1981 }
1982 
1983 static void
brw_uncompact_3src_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)1984 brw_uncompact_3src_instruction(const struct compaction_state *c,
1985                                brw_inst *dst, brw_compact_inst *src)
1986 {
1987    const struct gen_device_info *devinfo = c->devinfo;
1988    assert(devinfo->gen >= 8);
1989 
1990 #define uncompact(field) \
1991    brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1992 #define uncompact_a16(field) \
1993    brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1994 
1995    uncompact(hw_opcode);
1996 
1997    if (devinfo->gen >= 12) {
1998       set_uncompacted_3src_control_index(c, dst, src);
1999       set_uncompacted_3src_source_index(devinfo, dst, src);
2000       set_uncompacted_3src_subreg_index(devinfo, dst, src);
2001 
2002       uncompact(debug_control);
2003       uncompact(swsb);
2004       uncompact(dst_reg_nr);
2005       uncompact(src0_reg_nr);
2006       uncompact(src1_reg_nr);
2007       uncompact(src2_reg_nr);
2008    } else {
2009       set_uncompacted_3src_control_index(c, dst, src);
2010       set_uncompacted_3src_source_index(devinfo, dst, src);
2011 
2012       uncompact(dst_reg_nr);
2013       uncompact_a16(src0_rep_ctrl);
2014       uncompact(debug_control);
2015       uncompact(saturate);
2016       uncompact_a16(src1_rep_ctrl);
2017       uncompact_a16(src2_rep_ctrl);
2018       uncompact(src0_reg_nr);
2019       uncompact(src1_reg_nr);
2020       uncompact(src2_reg_nr);
2021       uncompact_a16(src0_subreg_nr);
2022       uncompact_a16(src1_subreg_nr);
2023       uncompact_a16(src2_subreg_nr);
2024    }
2025    brw_inst_set_3src_cmpt_control(devinfo, dst, false);
2026 
2027 #undef uncompact
2028 #undef uncompact_a16
2029 }
2030 
2031 static void
uncompact_instruction(const struct compaction_state * c,brw_inst * dst,brw_compact_inst * src)2032 uncompact_instruction(const struct compaction_state *c, brw_inst *dst,
2033                       brw_compact_inst *src)
2034 {
2035    const struct gen_device_info *devinfo = c->devinfo;
2036    memset(dst, 0, sizeof(*dst));
2037 
2038    if (devinfo->gen >= 8 &&
2039        is_3src(devinfo, brw_opcode_decode(
2040                   devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {
2041       brw_uncompact_3src_instruction(c, dst, src);
2042       return;
2043    }
2044 
2045 #define uncompact(field) \
2046    brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
2047 #define uncompact_reg(field) \
2048    brw_inst_set_##field##_da_reg_nr(devinfo, dst, \
2049                                     brw_compact_inst_##field##_reg_nr(devinfo, src))
2050 
2051    uncompact(hw_opcode);
2052    uncompact(debug_control);
2053 
2054    set_uncompacted_control(c, dst, src);
2055    set_uncompacted_datatype(c, dst, src);
2056    set_uncompacted_subreg(c, dst, src);
2057    set_uncompacted_src0(c, dst, src);
2058 
2059    enum brw_reg_type type;
2060    if (has_immediate(devinfo, dst, &type)) {
2061       unsigned imm = uncompact_immediate(devinfo, type,
2062                                          brw_compact_inst_imm(devinfo, src));
2063       brw_inst_set_imm_ud(devinfo, dst, imm);
2064    } else {
2065       set_uncompacted_src1(c, dst, src);
2066       uncompact_reg(src1);
2067    }
2068 
2069    if (devinfo->gen >= 12) {
2070       uncompact(swsb);
2071       uncompact_reg(dst);
2072       uncompact_reg(src0);
2073    } else {
2074       if (devinfo->gen >= 6) {
2075          uncompact(acc_wr_control);
2076       } else {
2077          uncompact(mask_control_ex);
2078       }
2079 
2080       uncompact(cond_modifier);
2081 
2082       if (devinfo->gen <= 6)
2083          uncompact(flag_subreg_nr);
2084 
2085       uncompact_reg(dst);
2086       uncompact_reg(src0);
2087    }
2088    brw_inst_set_cmpt_control(devinfo, dst, false);
2089 
2090 #undef uncompact
2091 #undef uncompact_reg
2092 }
2093 
2094 void
brw_uncompact_instruction(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)2095 brw_uncompact_instruction(const struct gen_device_info *devinfo, brw_inst *dst,
2096                           brw_compact_inst *src)
2097 {
2098    struct compaction_state c;
2099    compaction_state_init(&c, devinfo);
2100    uncompact_instruction(&c, dst, src);
2101 }
2102 
brw_debug_compact_uncompact(const struct gen_device_info * devinfo,brw_inst * orig,brw_inst * uncompacted)2103 void brw_debug_compact_uncompact(const struct gen_device_info *devinfo,
2104                                  brw_inst *orig,
2105                                  brw_inst *uncompacted)
2106 {
2107    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
2108            devinfo->gen);
2109 
2110    fprintf(stderr, "  before: ");
2111    brw_disassemble_inst(stderr, devinfo, orig, true, 0, NULL);
2112 
2113    fprintf(stderr, "  after:  ");
2114    brw_disassemble_inst(stderr, devinfo, uncompacted, false, 0, NULL);
2115 
2116    uint32_t *before_bits = (uint32_t *)orig;
2117    uint32_t *after_bits = (uint32_t *)uncompacted;
2118    fprintf(stderr, "  changed bits:\n");
2119    for (int i = 0; i < 128; i++) {
2120       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
2121       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
2122 
2123       if (before != after) {
2124          fprintf(stderr, "  bit %d, %s to %s\n", i,
2125                  before ? "set" : "unset",
2126                  after ? "set" : "unset");
2127       }
2128    }
2129 }
2130 
2131 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)2132 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
2133 {
2134    int this_compacted_count = compacted_counts[old_ip];
2135    int target_compacted_count = compacted_counts[old_target_ip];
2136    return target_compacted_count - this_compacted_count;
2137 }
2138 
2139 static void
update_uip_jip(const struct gen_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)2140 update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
2141                int this_old_ip, int *compacted_counts)
2142 {
2143    /* JIP and UIP are in units of:
2144     *    - bytes on Gen8+; and
2145     *    - compacted instructions on Gen6+.
2146     */
2147    int shift = devinfo->gen >= 8 ? 3 : 0;
2148 
2149    int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
2150    jip_compacted -= compacted_between(this_old_ip,
2151                                       this_old_ip + (jip_compacted / 2),
2152                                       compacted_counts);
2153    brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
2154 
2155    if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
2156        brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
2157        (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7))
2158       return;
2159 
2160    int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
2161    uip_compacted -= compacted_between(this_old_ip,
2162                                       this_old_ip + (uip_compacted / 2),
2163                                       compacted_counts);
2164    brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
2165 }
2166 
2167 static void
update_gen4_jump_count(const struct gen_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)2168 update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
2169                        int this_old_ip, int *compacted_counts)
2170 {
2171    assert(devinfo->gen == 5 || devinfo->is_g4x);
2172 
2173    /* Jump Count is in units of:
2174     *    - uncompacted instructions on G45; and
2175     *    - compacted instructions on Gen5.
2176     */
2177    int shift = devinfo->is_g4x ? 1 : 0;
2178 
2179    int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
2180 
2181    int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2182 
2183    int this_compacted_count = compacted_counts[this_old_ip];
2184    int target_compacted_count = compacted_counts[target_old_ip];
2185 
2186    jump_count_compacted -= (target_compacted_count - this_compacted_count);
2187    brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
2188 }
2189 
2190 static void
compaction_state_init(struct compaction_state * c,const struct gen_device_info * devinfo)2191 compaction_state_init(struct compaction_state *c,
2192                       const struct gen_device_info *devinfo)
2193 {
2194    assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
2195    assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
2196    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
2197    assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
2198    assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
2199    assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
2200    assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
2201    assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
2202    assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
2203    assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
2204    assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
2205    assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
2206    assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
2207    assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
2208    assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
2209    assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
2210    assert(gen11_datatype_table[ARRAY_SIZE(gen11_datatype_table) - 1] != 0);
2211    assert(gen12_control_index_table[ARRAY_SIZE(gen12_control_index_table) - 1] != 0);
2212    assert(gen12_datatype_table[ARRAY_SIZE(gen12_datatype_table) - 1] != 0);
2213    assert(gen12_subreg_table[ARRAY_SIZE(gen12_subreg_table) - 1] != 0);
2214    assert(gen12_src0_index_table[ARRAY_SIZE(gen12_src0_index_table) - 1] != 0);
2215    assert(gen12_src1_index_table[ARRAY_SIZE(gen12_src1_index_table) - 1] != 0);
2216 
2217    c->devinfo = devinfo;
2218    switch (devinfo->gen) {
2219    case 12:
2220       c->control_index_table = gen12_control_index_table;;
2221       c->datatype_table = gen12_datatype_table;
2222       c->subreg_table = gen12_subreg_table;
2223       c->src0_index_table = gen12_src0_index_table;
2224       c->src1_index_table = gen12_src1_index_table;
2225       break;
2226    case 11:
2227       c->control_index_table = gen8_control_index_table;
2228       c->datatype_table = gen11_datatype_table;
2229       c->subreg_table = gen8_subreg_table;
2230       c->src0_index_table = gen8_src_index_table;
2231       c->src1_index_table = gen8_src_index_table;
2232       break;
2233    case 9:
2234    case 8:
2235       c->control_index_table = gen8_control_index_table;
2236       c->datatype_table = gen8_datatype_table;
2237       c->subreg_table = gen8_subreg_table;
2238       c->src0_index_table = gen8_src_index_table;
2239       c->src1_index_table = gen8_src_index_table;
2240       break;
2241    case 7:
2242       c->control_index_table = gen7_control_index_table;
2243       c->datatype_table = gen7_datatype_table;
2244       c->subreg_table = gen7_subreg_table;
2245       c->src0_index_table = gen7_src_index_table;
2246       c->src1_index_table = gen7_src_index_table;
2247       break;
2248    case 6:
2249       c->control_index_table = gen6_control_index_table;
2250       c->datatype_table = gen6_datatype_table;
2251       c->subreg_table = gen6_subreg_table;
2252       c->src0_index_table = gen6_src_index_table;
2253       c->src1_index_table = gen6_src_index_table;
2254       break;
2255    case 5:
2256    case 4:
2257       c->control_index_table = g45_control_index_table;
2258       c->datatype_table = g45_datatype_table;
2259       c->subreg_table = g45_subreg_table;
2260       c->src0_index_table = g45_src_index_table;
2261       c->src1_index_table = g45_src_index_table;
2262       break;
2263    default:
2264       unreachable("unknown generation");
2265    }
2266 }
2267 
2268 void
brw_compact_instructions(struct brw_codegen * p,int start_offset,struct disasm_info * disasm)2269 brw_compact_instructions(struct brw_codegen *p, int start_offset,
2270                          struct disasm_info *disasm)
2271 {
2272    if (INTEL_DEBUG & DEBUG_NO_COMPACTION)
2273       return;
2274 
2275    const struct gen_device_info *devinfo = p->devinfo;
2276    void *store = p->store + start_offset / 16;
2277    /* For an instruction at byte offset 16*i before compaction, this is the
2278     * number of compacted instructions minus the number of padding NOP/NENOPs
2279     * that preceded it.
2280     */
2281    int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
2282    /* For an instruction at byte offset 8*i after compaction, this was its IP
2283     * (in 16-byte units) before compaction.
2284     */
2285    int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
2286 
2287    if (devinfo->gen == 4 && !devinfo->is_g4x)
2288       return;
2289 
2290    struct compaction_state c;
2291    compaction_state_init(&c, devinfo);
2292 
2293    int offset = 0;
2294    int compacted_count = 0;
2295    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
2296         src_offset += sizeof(brw_inst)) {
2297       brw_inst *src = store + src_offset;
2298       void *dst = store + offset;
2299 
2300       old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2301       compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2302 
2303       brw_inst inst = precompact(devinfo, *src);
2304       brw_inst saved = inst;
2305 
2306       if (try_compact_instruction(&c, dst, &inst)) {
2307          compacted_count++;
2308 
2309          if (INTEL_DEBUG) {
2310             brw_inst uncompacted;
2311             uncompact_instruction(&c, &uncompacted, dst);
2312             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
2313                brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
2314             }
2315          }
2316 
2317          offset += sizeof(brw_compact_inst);
2318       } else {
2319          /* All uncompacted instructions need to be aligned on G45. */
2320          if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
2321             brw_compact_inst *align = store + offset;
2322             memset(align, 0, sizeof(*align));
2323             brw_compact_inst_set_hw_opcode(
2324                devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));
2325             brw_compact_inst_set_cmpt_control(devinfo, align, true);
2326             offset += sizeof(brw_compact_inst);
2327             compacted_count--;
2328             compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
2329             old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
2330 
2331             dst = store + offset;
2332          }
2333 
2334          /* If we didn't compact this intruction, we need to move it down into
2335           * place.
2336           */
2337          if (offset != src_offset) {
2338             memmove(dst, src, sizeof(brw_inst));
2339          }
2340          offset += sizeof(brw_inst);
2341       }
2342    }
2343 
2344    /* Add an entry for the ending offset of the program. This greatly
2345     * simplifies the linked list walk at the end of the function.
2346     */
2347    old_ip[offset / sizeof(brw_compact_inst)] =
2348       (p->next_insn_offset - start_offset) / sizeof(brw_inst);
2349 
2350    /* Fix up control flow offsets. */
2351    p->next_insn_offset = start_offset + offset;
2352    for (offset = 0; offset < p->next_insn_offset - start_offset;
2353         offset = next_offset(devinfo, store, offset)) {
2354       brw_inst *insn = store + offset;
2355       int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
2356       int this_compacted_count = compacted_counts[this_old_ip];
2357 
2358       switch (brw_inst_opcode(devinfo, insn)) {
2359       case BRW_OPCODE_BREAK:
2360       case BRW_OPCODE_CONTINUE:
2361       case BRW_OPCODE_HALT:
2362          if (devinfo->gen >= 6) {
2363             update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2364          } else {
2365             update_gen4_jump_count(devinfo, insn, this_old_ip,
2366                                    compacted_counts);
2367          }
2368          break;
2369 
2370       case BRW_OPCODE_IF:
2371       case BRW_OPCODE_IFF:
2372       case BRW_OPCODE_ELSE:
2373       case BRW_OPCODE_ENDIF:
2374       case BRW_OPCODE_WHILE:
2375          if (devinfo->gen >= 7) {
2376             if (brw_inst_cmpt_control(devinfo, insn)) {
2377                brw_inst uncompacted;
2378                uncompact_instruction(&c, &uncompacted,
2379                                      (brw_compact_inst *)insn);
2380 
2381                update_uip_jip(devinfo, &uncompacted, this_old_ip,
2382                               compacted_counts);
2383 
2384                bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,
2385                                                   &uncompacted);
2386                assert(ret); (void)ret;
2387             } else {
2388                update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
2389             }
2390          } else if (devinfo->gen == 6) {
2391             assert(!brw_inst_cmpt_control(devinfo, insn));
2392 
2393             /* Jump Count is in units of compacted instructions on Gen6. */
2394             int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
2395 
2396             int target_old_ip = this_old_ip + (jump_count_compacted / 2);
2397             int target_compacted_count = compacted_counts[target_old_ip];
2398             jump_count_compacted -= (target_compacted_count - this_compacted_count);
2399             brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
2400          } else {
2401             update_gen4_jump_count(devinfo, insn, this_old_ip,
2402                                    compacted_counts);
2403          }
2404          break;
2405 
2406       case BRW_OPCODE_ADD:
2407          /* Add instructions modifying the IP register use an immediate src1,
2408           * and Gens that use this cannot compact instructions with immediate
2409           * operands.
2410           */
2411          if (brw_inst_cmpt_control(devinfo, insn))
2412             break;
2413 
2414          if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
2415              brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
2416             assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
2417 
2418             int shift = 3;
2419             int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
2420 
2421             int target_old_ip = this_old_ip + (jump_compacted / 2);
2422             int target_compacted_count = compacted_counts[target_old_ip];
2423             jump_compacted -= (target_compacted_count - this_compacted_count);
2424             brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
2425          }
2426          break;
2427 
2428       default:
2429          break;
2430       }
2431    }
2432 
2433    /* p->nr_insn is counting the number of uncompacted instructions still, so
2434     * divide.  We do want to be sure there's a valid instruction in any
2435     * alignment padding, so that the next compression pass (for the FS 8/16
2436     * compile passes) parses correctly.
2437     */
2438    if (p->next_insn_offset & sizeof(brw_compact_inst)) {
2439       brw_compact_inst *align = store + offset;
2440       memset(align, 0, sizeof(*align));
2441       brw_compact_inst_set_hw_opcode(
2442          devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));
2443       brw_compact_inst_set_cmpt_control(devinfo, align, true);
2444       p->next_insn_offset += sizeof(brw_compact_inst);
2445    }
2446    p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
2447 
2448    for (int i = 0; i < p->num_relocs; i++) {
2449       if (p->relocs[i].offset < (uint32_t)start_offset)
2450          continue;
2451 
2452       assert(p->relocs[i].offset % 16 == 0);
2453       unsigned idx = (p->relocs[i].offset - start_offset) / 16;
2454       p->relocs[i].offset -= compacted_counts[idx] * 8;
2455    }
2456 
2457    /* Update the instruction offsets for each group. */
2458    if (disasm) {
2459       int offset = 0;
2460 
2461       foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
2462          while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2463                 sizeof(brw_inst) != group->offset) {
2464             assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
2465                    sizeof(brw_inst) < group->offset);
2466             offset = next_offset(devinfo, store, offset);
2467          }
2468 
2469          group->offset = start_offset + offset;
2470 
2471          offset = next_offset(devinfo, store, offset);
2472       }
2473    }
2474 }
2475