• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file brw_eu_compact.c
25  *
26  * Instruction compaction is a feature of G45 and newer hardware that allows
27  * for a smaller instruction encoding.
28  *
29  * The instruction cache is on the order of 32KB, and many programs generate
30  * far more instructions than that.  The instruction cache is built to barely
31  * keep up with instruction dispatch ability in cache hit cases -- L1
32  * instruction cache misses that still hit in the next level could limit
33  * throughput by around 50%.
34  *
35  * The idea of instruction compaction is that most instructions use a tiny
36  * subset of the GPU functionality, so we can encode what would be a 16 byte
37  * instruction in 8 bytes using some lookup tables for various fields.
38  *
39  *
40  * Instruction compaction capabilities vary subtly by generation.
41  *
42  * G45's support for instruction compaction is very limited. Jump counts on
43  * this generation are in units of 16-byte uncompacted instructions. As such,
44  * all jump targets must be 16-byte aligned. Also, all instructions must be
45  * naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.
46  * A G45-only instruction, NENOP, must be used to provide padding to align
47  * uncompacted instructions.
48  *
49  * Gen5 removes these restrictions and changes jump counts to be in units of
50  * 8-byte compacted instructions, allowing jump targets to be only 8-byte
51  * aligned. Uncompacted instructions can also be placed on 8-byte boundaries.
52  *
53  * Gen6 adds the ability to compact instructions with a limited range of
54  * immediate values. Compactable immediates have 12 unrestricted bits, and a
55  * 13th bit that's replicated through the high 20 bits, to create the 32-bit
56  * value of DW3 in the uncompacted instruction word.
57  *
58  * On Gen7 we can compact some control flow instructions with a small positive
59  * immediate in the low bits of DW3, like ENDIF with the JIP field. Other
60  * control flow instructions with UIP cannot be compacted, because of the
61  * replicated 13th bit. No control flow instructions can be compacted on Gen6
62  * since the jump count field is not in DW3.
63  *
64  *    break    JIP/UIP
65  *    cont     JIP/UIP
66  *    halt     JIP/UIP
67  *    if       JIP/UIP
68  *    else     JIP (plus UIP on BDW+)
69  *    endif    JIP
70  *    while    JIP (must be negative)
71  *
72  * Gen 8 adds support for compacting 3-src instructions.
73  */
74 
75 #include "brw_eu.h"
76 #include "brw_shader.h"
77 #include "brw_disasm_info.h"
78 #include "common/gen_debug.h"
79 
80 static const uint32_t g45_control_index_table[32] = {
81    0b00000000000000000,
82    0b01000000000000000,
83    0b00110000000000000,
84    0b00000000000000010,
85    0b00100000000000000,
86    0b00010000000000000,
87    0b01000000000100000,
88    0b01000000100000000,
89    0b01010000000100000,
90    0b00000000100000010,
91    0b11000000000000000,
92    0b00001000100000010,
93    0b01001000100000000,
94    0b00000000100000000,
95    0b11000000000100000,
96    0b00001000100000000,
97    0b10110000000000000,
98    0b11010000000100000,
99    0b00110000100000000,
100    0b00100000100000000,
101    0b01000000000001000,
102    0b01000000000000100,
103    0b00111100000000000,
104    0b00101011000000000,
105    0b00110000000010000,
106    0b00010000100000000,
107    0b01000000000100100,
108    0b01000000000101000,
109    0b00110000000000110,
110    0b00000000000001010,
111    0b01010000000101000,
112    0b01010000000100100
113 };
114 
115 static const uint32_t g45_datatype_table[32] = {
116    0b001000000000100001,
117    0b001011010110101101,
118    0b001000001000110001,
119    0b001111011110111101,
120    0b001011010110101100,
121    0b001000000110101101,
122    0b001000000000100000,
123    0b010100010110110001,
124    0b001100011000101101,
125    0b001000000000100010,
126    0b001000001000110110,
127    0b010000001000110001,
128    0b001000001000110010,
129    0b011000001000110010,
130    0b001111011110111100,
131    0b001000000100101000,
132    0b010100011000110001,
133    0b001010010100101001,
134    0b001000001000101001,
135    0b010000001000110110,
136    0b101000001000110001,
137    0b001011011000101101,
138    0b001000000100001001,
139    0b001011011000101100,
140    0b110100011000110001,
141    0b001000001110111101,
142    0b110000001000110001,
143    0b011000000100101010,
144    0b101000001000101001,
145    0b001011010110001100,
146    0b001000000110100001,
147    0b001010010100001000
148 };
149 
150 static const uint16_t g45_subreg_table[32] = {
151    0b000000000000000,
152    0b000000010000000,
153    0b000001000000000,
154    0b000100000000000,
155    0b000000000100000,
156    0b100000000000000,
157    0b000000000010000,
158    0b001100000000000,
159    0b001010000000000,
160    0b000000100000000,
161    0b001000000000000,
162    0b000000000001000,
163    0b000000001000000,
164    0b000000000000001,
165    0b000010000000000,
166    0b000000010100000,
167    0b000000000000111,
168    0b000001000100000,
169    0b011000000000000,
170    0b000000110000000,
171    0b000000000000010,
172    0b000000000000100,
173    0b000000001100000,
174    0b000100000000010,
175    0b001110011000110,
176    0b001110100001000,
177    0b000110011000110,
178    0b000001000011000,
179    0b000110010000100,
180    0b001100000000110,
181    0b000000010000110,
182    0b000001000110000
183 };
184 
185 static const uint16_t g45_src_index_table[32] = {
186    0b000000000000,
187    0b010001101000,
188    0b010110001000,
189    0b011010010000,
190    0b001101001000,
191    0b010110001010,
192    0b010101110000,
193    0b011001111000,
194    0b001000101000,
195    0b000000101000,
196    0b010001010000,
197    0b111101101100,
198    0b010110001100,
199    0b010001101100,
200    0b011010010100,
201    0b010001001100,
202    0b001100101000,
203    0b000000000010,
204    0b111101001100,
205    0b011001101000,
206    0b010101001000,
207    0b000000000100,
208    0b000000101100,
209    0b010001101010,
210    0b000000111000,
211    0b010101011000,
212    0b000100100000,
213    0b010110000000,
214    0b010000000100,
215    0b010000111000,
216    0b000101100000,
217    0b111101110100
218 };
219 
220 static const uint32_t gen6_control_index_table[32] = {
221    0b00000000000000000,
222    0b01000000000000000,
223    0b00110000000000000,
224    0b00000000100000000,
225    0b00010000000000000,
226    0b00001000100000000,
227    0b00000000100000010,
228    0b00000000000000010,
229    0b01000000100000000,
230    0b01010000000000000,
231    0b10110000000000000,
232    0b00100000000000000,
233    0b11010000000000000,
234    0b11000000000000000,
235    0b01001000100000000,
236    0b01000000000001000,
237    0b01000000000000100,
238    0b00000000000001000,
239    0b00000000000000100,
240    0b00111000100000000,
241    0b00001000100000010,
242    0b00110000100000000,
243    0b00110000000000001,
244    0b00100000000000001,
245    0b00110000000000010,
246    0b00110000000000101,
247    0b00110000000001001,
248    0b00110000000010000,
249    0b00110000000000011,
250    0b00110000000000100,
251    0b00110000100001000,
252    0b00100000000001001
253 };
254 
255 static const uint32_t gen6_datatype_table[32] = {
256    0b001001110000000000,
257    0b001000110000100000,
258    0b001001110000000001,
259    0b001000000001100000,
260    0b001010110100101001,
261    0b001000000110101101,
262    0b001100011000101100,
263    0b001011110110101101,
264    0b001000000111101100,
265    0b001000000001100001,
266    0b001000110010100101,
267    0b001000000001000001,
268    0b001000001000110001,
269    0b001000001000101001,
270    0b001000000000100000,
271    0b001000001000110010,
272    0b001010010100101001,
273    0b001011010010100101,
274    0b001000000110100101,
275    0b001100011000101001,
276    0b001011011000101100,
277    0b001011010110100101,
278    0b001011110110100101,
279    0b001111011110111101,
280    0b001111011110111100,
281    0b001111011110111101,
282    0b001111011110011101,
283    0b001111011110111110,
284    0b001000000000100001,
285    0b001000000000100010,
286    0b001001111111011101,
287    0b001000001110111110,
288 };
289 
290 static const uint16_t gen6_subreg_table[32] = {
291    0b000000000000000,
292    0b000000000000100,
293    0b000000110000000,
294    0b111000000000000,
295    0b011110000001000,
296    0b000010000000000,
297    0b000000000010000,
298    0b000110000001100,
299    0b001000000000000,
300    0b000001000000000,
301    0b000001010010100,
302    0b000000001010110,
303    0b010000000000000,
304    0b110000000000000,
305    0b000100000000000,
306    0b000000010000000,
307    0b000000000001000,
308    0b100000000000000,
309    0b000001010000000,
310    0b001010000000000,
311    0b001100000000000,
312    0b000000001010100,
313    0b101101010010100,
314    0b010100000000000,
315    0b000000010001111,
316    0b011000000000000,
317    0b111110000000000,
318    0b101000000000000,
319    0b000000000001111,
320    0b000100010001111,
321    0b001000010001111,
322    0b000110000000000,
323 };
324 
325 static const uint16_t gen6_src_index_table[32] = {
326    0b000000000000,
327    0b010110001000,
328    0b010001101000,
329    0b001000101000,
330    0b011010010000,
331    0b000100100000,
332    0b010001101100,
333    0b010101110000,
334    0b011001111000,
335    0b001100101000,
336    0b010110001100,
337    0b001000100000,
338    0b010110001010,
339    0b000000000010,
340    0b010101010000,
341    0b010101101000,
342    0b111101001100,
343    0b111100101100,
344    0b011001110000,
345    0b010110001001,
346    0b010101011000,
347    0b001101001000,
348    0b010000101100,
349    0b010000000000,
350    0b001101110000,
351    0b001100010000,
352    0b001100000000,
353    0b010001101010,
354    0b001101111000,
355    0b000001110000,
356    0b001100100000,
357    0b001101010000,
358 };
359 
360 static const uint32_t gen7_control_index_table[32] = {
361    0b0000000000000000010,
362    0b0000100000000000000,
363    0b0000100000000000001,
364    0b0000100000000000010,
365    0b0000100000000000011,
366    0b0000100000000000100,
367    0b0000100000000000101,
368    0b0000100000000000111,
369    0b0000100000000001000,
370    0b0000100000000001001,
371    0b0000100000000001101,
372    0b0000110000000000000,
373    0b0000110000000000001,
374    0b0000110000000000010,
375    0b0000110000000000011,
376    0b0000110000000000100,
377    0b0000110000000000101,
378    0b0000110000000000111,
379    0b0000110000000001001,
380    0b0000110000000001101,
381    0b0000110000000010000,
382    0b0000110000100000000,
383    0b0001000000000000000,
384    0b0001000000000000010,
385    0b0001000000000000100,
386    0b0001000000100000000,
387    0b0010110000000000000,
388    0b0010110000000010000,
389    0b0011000000000000000,
390    0b0011000000100000000,
391    0b0101000000000000000,
392    0b0101000000100000000
393 };
394 
395 static const uint32_t gen7_datatype_table[32] = {
396    0b001000000000000001,
397    0b001000000000100000,
398    0b001000000000100001,
399    0b001000000001100001,
400    0b001000000010111101,
401    0b001000001011111101,
402    0b001000001110100001,
403    0b001000001110100101,
404    0b001000001110111101,
405    0b001000010000100001,
406    0b001000110000100000,
407    0b001000110000100001,
408    0b001001010010100101,
409    0b001001110010100100,
410    0b001001110010100101,
411    0b001111001110111101,
412    0b001111011110011101,
413    0b001111011110111100,
414    0b001111011110111101,
415    0b001111111110111100,
416    0b000000001000001100,
417    0b001000000000111101,
418    0b001000000010100101,
419    0b001000010000100000,
420    0b001001010010100100,
421    0b001001110010000100,
422    0b001010010100001001,
423    0b001101111110111101,
424    0b001111111110111101,
425    0b001011110110101100,
426    0b001010010100101000,
427    0b001010110100101000
428 };
429 
430 static const uint16_t gen7_subreg_table[32] = {
431    0b000000000000000,
432    0b000000000000001,
433    0b000000000001000,
434    0b000000000001111,
435    0b000000000010000,
436    0b000000010000000,
437    0b000000100000000,
438    0b000000110000000,
439    0b000001000000000,
440    0b000001000010000,
441    0b000010100000000,
442    0b001000000000000,
443    0b001000000000001,
444    0b001000010000001,
445    0b001000010000010,
446    0b001000010000011,
447    0b001000010000100,
448    0b001000010000111,
449    0b001000010001000,
450    0b001000010001110,
451    0b001000010001111,
452    0b001000110000000,
453    0b001000111101000,
454    0b010000000000000,
455    0b010000110000000,
456    0b011000000000000,
457    0b011110010000111,
458    0b100000000000000,
459    0b101000000000000,
460    0b110000000000000,
461    0b111000000000000,
462    0b111000000011100
463 };
464 
465 static const uint16_t gen7_src_index_table[32] = {
466    0b000000000000,
467    0b000000000010,
468    0b000000010000,
469    0b000000010010,
470    0b000000011000,
471    0b000000100000,
472    0b000000101000,
473    0b000001001000,
474    0b000001010000,
475    0b000001110000,
476    0b000001111000,
477    0b001100000000,
478    0b001100000010,
479    0b001100001000,
480    0b001100010000,
481    0b001100010010,
482    0b001100100000,
483    0b001100101000,
484    0b001100111000,
485    0b001101000000,
486    0b001101000010,
487    0b001101001000,
488    0b001101010000,
489    0b001101100000,
490    0b001101101000,
491    0b001101110000,
492    0b001101110001,
493    0b001101111000,
494    0b010001101000,
495    0b010001101001,
496    0b010001101010,
497    0b010110001000
498 };
499 
500 static const uint32_t gen8_control_index_table[32] = {
501    0b0000000000000000010,
502    0b0000100000000000000,
503    0b0000100000000000001,
504    0b0000100000000000010,
505    0b0000100000000000011,
506    0b0000100000000000100,
507    0b0000100000000000101,
508    0b0000100000000000111,
509    0b0000100000000001000,
510    0b0000100000000001001,
511    0b0000100000000001101,
512    0b0000110000000000000,
513    0b0000110000000000001,
514    0b0000110000000000010,
515    0b0000110000000000011,
516    0b0000110000000000100,
517    0b0000110000000000101,
518    0b0000110000000000111,
519    0b0000110000000001001,
520    0b0000110000000001101,
521    0b0000110000000010000,
522    0b0000110000100000000,
523    0b0001000000000000000,
524    0b0001000000000000010,
525    0b0001000000000000100,
526    0b0001000000100000000,
527    0b0010110000000000000,
528    0b0010110000000010000,
529    0b0011000000000000000,
530    0b0011000000100000000,
531    0b0101000000000000000,
532    0b0101000000100000000
533 };
534 
535 static const uint32_t gen8_datatype_table[32] = {
536    0b001000000000000000001,
537    0b001000000000001000000,
538    0b001000000000001000001,
539    0b001000000000011000001,
540    0b001000000000101011101,
541    0b001000000010111011101,
542    0b001000000011101000001,
543    0b001000000011101000101,
544    0b001000000011101011101,
545    0b001000001000001000001,
546    0b001000011000001000000,
547    0b001000011000001000001,
548    0b001000101000101000101,
549    0b001000111000101000100,
550    0b001000111000101000101,
551    0b001011100011101011101,
552    0b001011101011100011101,
553    0b001011101011101011100,
554    0b001011101011101011101,
555    0b001011111011101011100,
556    0b000000000010000001100,
557    0b001000000000001011101,
558    0b001000000000101000101,
559    0b001000001000001000000,
560    0b001000101000101000100,
561    0b001000111000100000100,
562    0b001001001001000001001,
563    0b001010111011101011101,
564    0b001011111011101011101,
565    0b001001111001101001100,
566    0b001001001001001001000,
567    0b001001011001001001000
568 };
569 
570 static const uint16_t gen8_subreg_table[32] = {
571    0b000000000000000,
572    0b000000000000001,
573    0b000000000001000,
574    0b000000000001111,
575    0b000000000010000,
576    0b000000010000000,
577    0b000000100000000,
578    0b000000110000000,
579    0b000001000000000,
580    0b000001000010000,
581    0b000001010000000,
582    0b001000000000000,
583    0b001000000000001,
584    0b001000010000001,
585    0b001000010000010,
586    0b001000010000011,
587    0b001000010000100,
588    0b001000010000111,
589    0b001000010001000,
590    0b001000010001110,
591    0b001000010001111,
592    0b001000110000000,
593    0b001000111101000,
594    0b010000000000000,
595    0b010000110000000,
596    0b011000000000000,
597    0b011110010000111,
598    0b100000000000000,
599    0b101000000000000,
600    0b110000000000000,
601    0b111000000000000,
602    0b111000000011100
603 };
604 
605 static const uint16_t gen8_src_index_table[32] = {
606    0b000000000000,
607    0b000000000010,
608    0b000000010000,
609    0b000000010010,
610    0b000000011000,
611    0b000000100000,
612    0b000000101000,
613    0b000001001000,
614    0b000001010000,
615    0b000001110000,
616    0b000001111000,
617    0b001100000000,
618    0b001100000010,
619    0b001100001000,
620    0b001100010000,
621    0b001100010010,
622    0b001100100000,
623    0b001100101000,
624    0b001100111000,
625    0b001101000000,
626    0b001101000010,
627    0b001101001000,
628    0b001101010000,
629    0b001101100000,
630    0b001101101000,
631    0b001101110000,
632    0b001101110001,
633    0b001101111000,
634    0b010001101000,
635    0b010001101001,
636    0b010001101010,
637    0b010110001000
638 };
639 
640 /* This is actually the control index table for Cherryview (26 bits), but the
641  * only difference from Broadwell (24 bits) is that it has two extra 0-bits at
642  * the start.
643  *
644  * The low 24 bits have the same mappings on both hardware.
645  */
646 static const uint32_t gen8_3src_control_index_table[4] = {
647    0b00100000000110000000000001,
648    0b00000000000110000000000001,
649    0b00000000001000000000000001,
650    0b00000000001000000000100001
651 };
652 
653 /* This is actually the control index table for Cherryview (49 bits), but the
654  * only difference from Broadwell (46 bits) is that it has three extra 0-bits
655  * at the start.
656  *
657  * The low 44 bits have the same mappings on both hardware, and since the high
658  * three bits on Broadwell are zero, we can reuse Cherryview's table.
659  */
660 static const uint64_t gen8_3src_source_index_table[4] = {
661    0b0000001110010011100100111001000001111000000000000,
662    0b0000001110010011100100111001000001111000000000010,
663    0b0000001110010011100100111001000001111000000001000,
664    0b0000001110010011100100111001000001111000000100000
665 };
666 
667 static const uint32_t *control_index_table;
668 static const uint32_t *datatype_table;
669 static const uint16_t *subreg_table;
670 static const uint16_t *src_index_table;
671 
672 static bool
set_control_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)673 set_control_index(const struct gen_device_info *devinfo,
674                   brw_compact_inst *dst, const brw_inst *src)
675 {
676    uint32_t uncompacted = devinfo->gen >= 8  /* 17b/G45; 19b/IVB+ */
677       ? (brw_inst_bits(src, 33, 31) << 16) | /*  3b */
678         (brw_inst_bits(src, 23, 12) <<  4) | /* 12b */
679         (brw_inst_bits(src, 10,  9) <<  2) | /*  2b */
680         (brw_inst_bits(src, 34, 34) <<  1) | /*  1b */
681         (brw_inst_bits(src,  8,  8))         /*  1b */
682       : (brw_inst_bits(src, 31, 31) << 16) | /*  1b */
683         (brw_inst_bits(src, 23,  8));        /* 16b */
684 
685    /* On gen7, the flag register and subregister numbers are integrated into
686     * the control index.
687     */
688    if (devinfo->gen == 7)
689       uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */
690 
691    for (int i = 0; i < 32; i++) {
692       if (control_index_table[i] == uncompacted) {
693          brw_compact_inst_set_control_index(devinfo, dst, i);
694 	 return true;
695       }
696    }
697 
698    return false;
699 }
700 
701 static bool
set_datatype_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)702 set_datatype_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
703                    const brw_inst *src)
704 {
705    uint32_t uncompacted = devinfo->gen >= 8  /* 18b/G45+; 21b/BDW+ */
706       ? (brw_inst_bits(src, 63, 61) << 18) | /*  3b */
707         (brw_inst_bits(src, 94, 89) << 12) | /*  6b */
708         (brw_inst_bits(src, 46, 35))         /* 12b */
709       : (brw_inst_bits(src, 63, 61) << 15) | /*  3b */
710         (brw_inst_bits(src, 46, 32));        /* 15b */
711 
712    for (int i = 0; i < 32; i++) {
713       if (datatype_table[i] == uncompacted) {
714          brw_compact_inst_set_datatype_index(devinfo, dst, i);
715 	 return true;
716       }
717    }
718 
719    return false;
720 }
721 
722 static bool
set_subreg_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)723 set_subreg_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
724                  const brw_inst *src, bool is_immediate)
725 {
726    uint16_t uncompacted =                 /* 15b */
727       (brw_inst_bits(src, 52, 48) << 0) | /*  5b */
728       (brw_inst_bits(src, 68, 64) << 5);  /*  5b */
729 
730    if (!is_immediate)
731       uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */
732 
733    for (int i = 0; i < 32; i++) {
734       if (subreg_table[i] == uncompacted) {
735          brw_compact_inst_set_subreg_index(devinfo, dst, i);
736 	 return true;
737       }
738    }
739 
740    return false;
741 }
742 
743 static bool
get_src_index(uint16_t uncompacted,uint16_t * compacted)744 get_src_index(uint16_t uncompacted,
745               uint16_t *compacted)
746 {
747    for (int i = 0; i < 32; i++) {
748       if (src_index_table[i] == uncompacted) {
749 	 *compacted = i;
750 	 return true;
751       }
752    }
753 
754    return false;
755 }
756 
757 static bool
set_src0_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)758 set_src0_index(const struct gen_device_info *devinfo,
759                brw_compact_inst *dst, const brw_inst *src)
760 {
761    uint16_t compacted;
762    uint16_t uncompacted = brw_inst_bits(src, 88, 77); /* 12b */
763 
764    if (!get_src_index(uncompacted, &compacted))
765       return false;
766 
767    brw_compact_inst_set_src0_index(devinfo, dst, compacted);
768 
769    return true;
770 }
771 
772 static bool
set_src1_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src,bool is_immediate)773 set_src1_index(const struct gen_device_info *devinfo, brw_compact_inst *dst,
774                const brw_inst *src, bool is_immediate)
775 {
776    uint16_t compacted;
777 
778    if (is_immediate) {
779       compacted = (brw_inst_imm_ud(devinfo, src) >> 8) & 0x1f;
780    } else {
781       uint16_t uncompacted = brw_inst_bits(src, 120, 109); /* 12b */
782 
783       if (!get_src_index(uncompacted, &compacted))
784          return false;
785    }
786 
787    brw_compact_inst_set_src1_index(devinfo, dst, compacted);
788 
789    return true;
790 }
791 
792 static bool
set_3src_control_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)793 set_3src_control_index(const struct gen_device_info *devinfo,
794                        brw_compact_inst *dst, const brw_inst *src)
795 {
796    assert(devinfo->gen >= 8);
797 
798    uint32_t uncompacted =                  /* 24b/BDW; 26b/CHV */
799       (brw_inst_bits(src, 34, 32) << 21) | /*  3b */
800       (brw_inst_bits(src, 28,  8));        /* 21b */
801 
802    if (devinfo->gen >= 9 || devinfo->is_cherryview)
803       uncompacted |= brw_inst_bits(src, 36, 35) << 24; /* 2b */
804 
805    for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_control_index_table); i++) {
806       if (gen8_3src_control_index_table[i] == uncompacted) {
807          brw_compact_inst_set_3src_control_index(devinfo, dst, i);
808 	 return true;
809       }
810    }
811 
812    return false;
813 }
814 
815 static bool
set_3src_source_index(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)816 set_3src_source_index(const struct gen_device_info *devinfo,
817                       brw_compact_inst *dst, const brw_inst *src)
818 {
819    assert(devinfo->gen >= 8);
820 
821    uint64_t uncompacted =                    /* 46b/BDW; 49b/CHV */
822       (brw_inst_bits(src,  83,  83) << 43) | /*  1b */
823       (brw_inst_bits(src, 114, 107) << 35) | /*  8b */
824       (brw_inst_bits(src,  93,  86) << 27) | /*  8b */
825       (brw_inst_bits(src,  72,  65) << 19) | /*  8b */
826       (brw_inst_bits(src,  55,  37));        /* 19b */
827 
828    if (devinfo->gen >= 9 || devinfo->is_cherryview) {
829       uncompacted |=
830          (brw_inst_bits(src, 126, 125) << 47) | /* 2b */
831          (brw_inst_bits(src, 105, 104) << 45) | /* 2b */
832          (brw_inst_bits(src,  84,  84) << 44);  /* 1b */
833    } else {
834       uncompacted |=
835          (brw_inst_bits(src, 125, 125) << 45) | /* 1b */
836          (brw_inst_bits(src, 104, 104) << 44);  /* 1b */
837    }
838 
839    for (unsigned i = 0; i < ARRAY_SIZE(gen8_3src_source_index_table); i++) {
840       if (gen8_3src_source_index_table[i] == uncompacted) {
841          brw_compact_inst_set_3src_source_index(devinfo, dst, i);
842 	 return true;
843       }
844    }
845 
846    return false;
847 }
848 
849 static bool
has_unmapped_bits(const struct gen_device_info * devinfo,const brw_inst * src)850 has_unmapped_bits(const struct gen_device_info *devinfo, const brw_inst *src)
851 {
852    /* EOT can only be mapped on a send if the src1 is an immediate */
853    if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||
854         brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&
855        brw_inst_eot(devinfo, src))
856       return true;
857 
858    /* Check for instruction bits that don't map to any of the fields of the
859     * compacted instruction.  The instruction cannot be compacted if any of
860     * them are set.  They overlap with:
861     *  - NibCtrl (bit 47 on Gen7, bit 11 on Gen8)
862     *  - Dst.AddrImm[9] (bit 47 on Gen8)
863     *  - Src0.AddrImm[9] (bit 95 on Gen8)
864     *  - Imm64[27:31] (bits 91-95 on Gen7, bit 95 on Gen8)
865     *  - UIP[31] (bit 95 on Gen8)
866     */
867    if (devinfo->gen >= 8) {
868       assert(!brw_inst_bits(src, 7,  7));
869       return brw_inst_bits(src, 95, 95) ||
870              brw_inst_bits(src, 47, 47) ||
871              brw_inst_bits(src, 11, 11);
872    } else {
873       assert(!brw_inst_bits(src, 7,  7) &&
874              !(devinfo->gen < 7 && brw_inst_bits(src, 90, 90)));
875       return brw_inst_bits(src, 95, 91) ||
876              brw_inst_bits(src, 47, 47);
877    }
878 }
879 
880 static bool
has_3src_unmapped_bits(const struct gen_device_info * devinfo,const brw_inst * src)881 has_3src_unmapped_bits(const struct gen_device_info *devinfo,
882                        const brw_inst *src)
883 {
884    /* Check for three-source instruction bits that don't map to any of the
885     * fields of the compacted instruction.  All of them seem to be reserved
886     * bits currently.
887     */
888    if (devinfo->gen >= 9 || devinfo->is_cherryview) {
889       assert(!brw_inst_bits(src, 127, 127) &&
890              !brw_inst_bits(src, 7,  7));
891    } else {
892       assert(devinfo->gen >= 8);
893       assert(!brw_inst_bits(src, 127, 126) &&
894              !brw_inst_bits(src, 105, 105) &&
895              !brw_inst_bits(src, 84, 84) &&
896              !brw_inst_bits(src, 36, 35) &&
897              !brw_inst_bits(src, 7,  7));
898    }
899 
900    return false;
901 }
902 
903 static bool
brw_try_compact_3src_instruction(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)904 brw_try_compact_3src_instruction(const struct gen_device_info *devinfo,
905                                  brw_compact_inst *dst, const brw_inst *src)
906 {
907    assert(devinfo->gen >= 8);
908 
909    if (has_3src_unmapped_bits(devinfo, src))
910       return false;
911 
912 #define compact(field) \
913    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))
914 #define compact_a16(field) \
915    brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))
916 
917    compact(opcode);
918 
919    if (!set_3src_control_index(devinfo, dst, src))
920       return false;
921 
922    if (!set_3src_source_index(devinfo, dst, src))
923       return false;
924 
925    compact(dst_reg_nr);
926    compact_a16(src0_rep_ctrl);
927    brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);
928    compact(debug_control);
929    compact(saturate);
930    compact_a16(src1_rep_ctrl);
931    compact_a16(src2_rep_ctrl);
932    compact(src0_reg_nr);
933    compact(src1_reg_nr);
934    compact(src2_reg_nr);
935    compact_a16(src0_subreg_nr);
936    compact_a16(src1_subreg_nr);
937    compact_a16(src2_subreg_nr);
938 
939 #undef compact
940 #undef compact_a16
941 
942    return true;
943 }
944 
945 /* Compacted instructions have 12-bits for immediate sources, and a 13th bit
946  * that's replicated through the high 20 bits.
947  *
948  * Effectively this means we get 12-bit integers, 0.0f, and some limited uses
949  * of packed vectors as compactable immediates.
950  */
951 static bool
is_compactable_immediate(unsigned imm)952 is_compactable_immediate(unsigned imm)
953 {
954    /* We get the low 12 bits as-is. */
955    imm &= ~0xfff;
956 
957    /* We get one bit replicated through the top 20 bits. */
958    return imm == 0 || imm == 0xfffff000;
959 }
960 
961 /**
962  * Applies some small changes to instruction types to increase chances of
963  * compaction.
964  */
965 static brw_inst
precompact(const struct gen_device_info * devinfo,brw_inst inst)966 precompact(const struct gen_device_info *devinfo, brw_inst inst)
967 {
968    if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)
969       return inst;
970 
971    /* The Bspec's section titled "Non-present Operands" claims that if src0
972     * is an immediate that src1's type must be the same as that of src0.
973     *
974     * The SNB+ DataTypeIndex instruction compaction tables contain mappings
975     * that do not follow this rule. E.g., from the IVB/HSW table:
976     *
977     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
978     *        3         001000001011111101   r:f | i:vf | a:ud | <1> | dir |
979     *
980     * And from the SNB table:
981     *
982     *  DataTypeIndex   18-Bit Mapping       Mapped Meaning
983     *        8         001000000111101100   a:w | i:w | a:ud | <1> | dir |
984     *
985     * Neither of these cause warnings from the simulator when used,
986     * compacted or otherwise. In fact, all compaction mappings that have an
987     * immediate in src0 use a:ud for src1.
988     *
989     * The GM45 instruction compaction tables do not contain mapped meanings
990     * so it's not clear whether it has the restriction. We'll assume it was
991     * lifted on SNB. (FINISHME: decode the GM45 tables and check.)
992     *
993     * Don't do any of this for 64-bit immediates, since the src1 fields
994     * overlap with the immediate and setting them would overwrite the
995     * immediate we set.
996     */
997    if (devinfo->gen >= 6 &&
998        !(devinfo->is_haswell &&
999          brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&
1000        !(devinfo->gen >= 8 &&
1001          (brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||
1002           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||
1003           brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {
1004       enum brw_reg_file file = brw_inst_src1_reg_file(devinfo, &inst);
1005       brw_inst_set_src1_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_UD);
1006    }
1007 
1008    /* Compacted instructions only have 12-bits (plus 1 for the other 20)
1009     * for immediate values. Presumably the hardware engineers realized
1010     * that the only useful floating-point value that could be represented
1011     * in this format is 0.0, which can also be represented as a VF-typed
1012     * immediate, so they gave us the previously mentioned mapping on IVB+.
1013     *
1014     * Strangely, we do have a mapping for imm:f in src1, so we don't need
1015     * to do this there.
1016     *
1017     * If we see a 0.0:F, change the type to VF so that it can be compacted.
1018     */
1019    if (brw_inst_imm_ud(devinfo, &inst) == 0x0 &&
1020        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1021        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&
1022        brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {
1023       enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);
1024       brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);
1025    }
1026 
1027    /* There are no mappings for dst:d | i:d, so if the immediate is suitable
1028     * set the types to :UD so the instruction can be compacted.
1029     */
1030    if (is_compactable_immediate(brw_inst_imm_ud(devinfo, &inst)) &&
1031        brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&
1032        brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&
1033        brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {
1034       enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);
1035       enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);
1036 
1037       brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);
1038       brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);
1039    }
1040 
1041    return inst;
1042 }
1043 
1044 /**
1045  * Tries to compact instruction src into dst.
1046  *
1047  * It doesn't modify dst unless src is compactable, which is relied on by
1048  * brw_compact_instructions().
1049  */
1050 bool
brw_try_compact_instruction(const struct gen_device_info * devinfo,brw_compact_inst * dst,const brw_inst * src)1051 brw_try_compact_instruction(const struct gen_device_info *devinfo,
1052                             brw_compact_inst *dst, const brw_inst *src)
1053 {
1054    brw_compact_inst temp;
1055 
1056    assert(brw_inst_cmpt_control(devinfo, src) == 0);
1057 
1058    if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {
1059       if (devinfo->gen >= 8) {
1060          memset(&temp, 0, sizeof(temp));
1061          if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {
1062             *dst = temp;
1063             return true;
1064          } else {
1065             return false;
1066          }
1067       } else {
1068          return false;
1069       }
1070    }
1071 
1072    bool is_immediate =
1073       brw_inst_src0_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE ||
1074       brw_inst_src1_reg_file(devinfo, src) == BRW_IMMEDIATE_VALUE;
1075    if (is_immediate &&
1076        (devinfo->gen < 6 ||
1077         !is_compactable_immediate(brw_inst_imm_ud(devinfo, src)))) {
1078       return false;
1079    }
1080 
1081    if (has_unmapped_bits(devinfo, src))
1082       return false;
1083 
1084    memset(&temp, 0, sizeof(temp));
1085 
1086 #define compact(field) \
1087    brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))
1088 
1089    compact(opcode);
1090    compact(debug_control);
1091 
1092    if (!set_control_index(devinfo, &temp, src))
1093       return false;
1094    if (!set_datatype_index(devinfo, &temp, src))
1095       return false;
1096    if (!set_subreg_index(devinfo, &temp, src, is_immediate))
1097       return false;
1098 
1099    if (devinfo->gen >= 6) {
1100       compact(acc_wr_control);
1101    } else {
1102       compact(mask_control_ex);
1103    }
1104 
1105    compact(cond_modifier);
1106 
1107    if (devinfo->gen <= 6)
1108       compact(flag_subreg_nr);
1109 
1110    brw_compact_inst_set_cmpt_control(devinfo, &temp, true);
1111 
1112    if (!set_src0_index(devinfo, &temp, src))
1113       return false;
1114    if (!set_src1_index(devinfo, &temp, src, is_immediate))
1115       return false;
1116 
1117    brw_compact_inst_set_dst_reg_nr(devinfo, &temp,
1118                                    brw_inst_dst_da_reg_nr(devinfo, src));
1119    brw_compact_inst_set_src0_reg_nr(devinfo, &temp,
1120                                     brw_inst_src0_da_reg_nr(devinfo, src));
1121 
1122    if (is_immediate) {
1123       brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
1124                                        brw_inst_imm_ud(devinfo, src) & 0xff);
1125    } else {
1126       brw_compact_inst_set_src1_reg_nr(devinfo, &temp,
1127                                        brw_inst_src1_da_reg_nr(devinfo, src));
1128    }
1129 
1130 #undef compact
1131 
1132    *dst = temp;
1133 
1134    return true;
1135 }
1136 
1137 static void
set_uncompacted_control(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1138 set_uncompacted_control(const struct gen_device_info *devinfo, brw_inst *dst,
1139                         brw_compact_inst *src)
1140 {
1141    uint32_t uncompacted =
1142       control_index_table[brw_compact_inst_control_index(devinfo, src)];
1143 
1144    if (devinfo->gen >= 8) {
1145       brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));
1146       brw_inst_set_bits(dst, 23, 12, (uncompacted >>  4) & 0xfff);
1147       brw_inst_set_bits(dst, 10,  9, (uncompacted >>  2) & 0x3);
1148       brw_inst_set_bits(dst, 34, 34, (uncompacted >>  1) & 0x1);
1149       brw_inst_set_bits(dst,  8,  8, (uncompacted >>  0) & 0x1);
1150    } else {
1151       brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);
1152       brw_inst_set_bits(dst, 23,  8, (uncompacted & 0xffff));
1153 
1154       if (devinfo->gen == 7)
1155          brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);
1156    }
1157 }
1158 
1159 static void
set_uncompacted_datatype(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1160 set_uncompacted_datatype(const struct gen_device_info *devinfo, brw_inst *dst,
1161                          brw_compact_inst *src)
1162 {
1163    uint32_t uncompacted =
1164       datatype_table[brw_compact_inst_datatype_index(devinfo, src)];
1165 
1166    if (devinfo->gen >= 8) {
1167       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));
1168       brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);
1169       brw_inst_set_bits(dst, 46, 35, (uncompacted >>  0) & 0xfff);
1170    } else {
1171       brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));
1172       brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));
1173    }
1174 }
1175 
1176 static void
set_uncompacted_subreg(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1177 set_uncompacted_subreg(const struct gen_device_info *devinfo, brw_inst *dst,
1178                        brw_compact_inst *src)
1179 {
1180    uint16_t uncompacted =
1181       subreg_table[brw_compact_inst_subreg_index(devinfo, src)];
1182 
1183    brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));
1184    brw_inst_set_bits(dst,  68, 64, (uncompacted >>  5) & 0x1f);
1185    brw_inst_set_bits(dst,  52, 48, (uncompacted >>  0) & 0x1f);
1186 }
1187 
1188 static void
set_uncompacted_src0(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1189 set_uncompacted_src0(const struct gen_device_info *devinfo, brw_inst *dst,
1190                      brw_compact_inst *src)
1191 {
1192    uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);
1193    uint16_t uncompacted = src_index_table[compacted];
1194 
1195    brw_inst_set_bits(dst, 88, 77, uncompacted);
1196 }
1197 
1198 static void
set_uncompacted_src1(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src,bool is_immediate)1199 set_uncompacted_src1(const struct gen_device_info *devinfo, brw_inst *dst,
1200                      brw_compact_inst *src, bool is_immediate)
1201 {
1202    if (is_immediate) {
1203       signed high5 = brw_compact_inst_src1_index(devinfo, src);
1204       /* Replicate top bit of src1_index into high 20 bits of the immediate. */
1205       brw_inst_set_imm_ud(devinfo, dst, (high5 << 27) >> 19);
1206    } else {
1207       uint16_t uncompacted =
1208          src_index_table[brw_compact_inst_src1_index(devinfo, src)];
1209 
1210       brw_inst_set_bits(dst, 120, 109, uncompacted);
1211    }
1212 }
1213 
1214 static void
set_uncompacted_3src_control_index(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1215 set_uncompacted_3src_control_index(const struct gen_device_info *devinfo,
1216                                    brw_inst *dst, brw_compact_inst *src)
1217 {
1218    assert(devinfo->gen >= 8);
1219 
1220    uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);
1221    uint32_t uncompacted = gen8_3src_control_index_table[compacted];
1222 
1223    brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);
1224    brw_inst_set_bits(dst, 28,  8, (uncompacted >>  0) & 0x1fffff);
1225 
1226    if (devinfo->gen >= 9 || devinfo->is_cherryview)
1227       brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);
1228 }
1229 
1230 static void
set_uncompacted_3src_source_index(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1231 set_uncompacted_3src_source_index(const struct gen_device_info *devinfo,
1232                                   brw_inst *dst, brw_compact_inst *src)
1233 {
1234    assert(devinfo->gen >= 8);
1235 
1236    uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);
1237    uint64_t uncompacted = gen8_3src_source_index_table[compacted];
1238 
1239    brw_inst_set_bits(dst,  83,  83, (uncompacted >> 43) & 0x1);
1240    brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);
1241    brw_inst_set_bits(dst,  93,  86, (uncompacted >> 27) & 0xff);
1242    brw_inst_set_bits(dst,  72,  65, (uncompacted >> 19) & 0xff);
1243    brw_inst_set_bits(dst,  55,  37, (uncompacted >>  0) & 0x7ffff);
1244 
1245    if (devinfo->gen >= 9 || devinfo->is_cherryview) {
1246       brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);
1247       brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);
1248       brw_inst_set_bits(dst,  84,  84, (uncompacted >> 44) & 0x1);
1249    } else {
1250       brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);
1251       brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);
1252    }
1253 }
1254 
1255 static void
brw_uncompact_3src_instruction(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1256 brw_uncompact_3src_instruction(const struct gen_device_info *devinfo,
1257                                brw_inst *dst, brw_compact_inst *src)
1258 {
1259    assert(devinfo->gen >= 8);
1260 
1261 #define uncompact(field) \
1262    brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1263 #define uncompact_a16(field) \
1264    brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))
1265 
1266    uncompact(opcode);
1267 
1268    set_uncompacted_3src_control_index(devinfo, dst, src);
1269    set_uncompacted_3src_source_index(devinfo, dst, src);
1270 
1271    uncompact(dst_reg_nr);
1272    uncompact_a16(src0_rep_ctrl);
1273    brw_inst_set_3src_cmpt_control(devinfo, dst, false);
1274    uncompact(debug_control);
1275    uncompact(saturate);
1276    uncompact_a16(src1_rep_ctrl);
1277    uncompact_a16(src2_rep_ctrl);
1278    uncompact(src0_reg_nr);
1279    uncompact(src1_reg_nr);
1280    uncompact(src2_reg_nr);
1281    uncompact_a16(src0_subreg_nr);
1282    uncompact_a16(src1_subreg_nr);
1283    uncompact_a16(src2_subreg_nr);
1284 
1285 #undef uncompact
1286 #undef uncompact_a16
1287 }
1288 
1289 void
brw_uncompact_instruction(const struct gen_device_info * devinfo,brw_inst * dst,brw_compact_inst * src)1290 brw_uncompact_instruction(const struct gen_device_info *devinfo, brw_inst *dst,
1291                           brw_compact_inst *src)
1292 {
1293    memset(dst, 0, sizeof(*dst));
1294 
1295    if (devinfo->gen >= 8 &&
1296        is_3src(devinfo, brw_compact_inst_3src_opcode(devinfo, src))) {
1297       brw_uncompact_3src_instruction(devinfo, dst, src);
1298       return;
1299    }
1300 
1301 #define uncompact(field) \
1302    brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))
1303 
1304    uncompact(opcode);
1305    uncompact(debug_control);
1306 
1307    set_uncompacted_control(devinfo, dst, src);
1308    set_uncompacted_datatype(devinfo, dst, src);
1309 
1310    /* src0/1 register file fields are in the datatype table. */
1311    bool is_immediate = brw_inst_src0_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE ||
1312                        brw_inst_src1_reg_file(devinfo, dst) == BRW_IMMEDIATE_VALUE;
1313 
1314    set_uncompacted_subreg(devinfo, dst, src);
1315 
1316    if (devinfo->gen >= 6) {
1317       uncompact(acc_wr_control);
1318    } else {
1319       uncompact(mask_control_ex);
1320    }
1321 
1322    uncompact(cond_modifier);
1323 
1324    if (devinfo->gen <= 6)
1325       uncompact(flag_subreg_nr);
1326 
1327    set_uncompacted_src0(devinfo, dst, src);
1328    set_uncompacted_src1(devinfo, dst, src, is_immediate);
1329 
1330    brw_inst_set_dst_da_reg_nr(devinfo, dst,
1331                               brw_compact_inst_dst_reg_nr(devinfo, src));
1332    brw_inst_set_src0_da_reg_nr(devinfo, dst,
1333                                brw_compact_inst_src0_reg_nr(devinfo, src));
1334 
1335    if (is_immediate) {
1336       brw_inst_set_imm_ud(devinfo, dst,
1337                           brw_inst_imm_ud(devinfo, dst) |
1338                           brw_compact_inst_src1_reg_nr(devinfo, src));
1339    } else {
1340       brw_inst_set_src1_da_reg_nr(devinfo, dst,
1341                                   brw_compact_inst_src1_reg_nr(devinfo, src));
1342    }
1343 
1344 #undef uncompact
1345 }
1346 
brw_debug_compact_uncompact(const struct gen_device_info * devinfo,brw_inst * orig,brw_inst * uncompacted)1347 void brw_debug_compact_uncompact(const struct gen_device_info *devinfo,
1348                                  brw_inst *orig,
1349                                  brw_inst *uncompacted)
1350 {
1351    fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",
1352            devinfo->gen);
1353 
1354    fprintf(stderr, "  before: ");
1355    brw_disassemble_inst(stderr, devinfo, orig, true);
1356 
1357    fprintf(stderr, "  after:  ");
1358    brw_disassemble_inst(stderr, devinfo, uncompacted, false);
1359 
1360    uint32_t *before_bits = (uint32_t *)orig;
1361    uint32_t *after_bits = (uint32_t *)uncompacted;
1362    fprintf(stderr, "  changed bits:\n");
1363    for (int i = 0; i < 128; i++) {
1364       uint32_t before = before_bits[i / 32] & (1 << (i & 31));
1365       uint32_t after = after_bits[i / 32] & (1 << (i & 31));
1366 
1367       if (before != after) {
1368          fprintf(stderr, "  bit %d, %s to %s\n", i,
1369                  before ? "set" : "unset",
1370                  after ? "set" : "unset");
1371       }
1372    }
1373 }
1374 
1375 static int
compacted_between(int old_ip,int old_target_ip,int * compacted_counts)1376 compacted_between(int old_ip, int old_target_ip, int *compacted_counts)
1377 {
1378    int this_compacted_count = compacted_counts[old_ip];
1379    int target_compacted_count = compacted_counts[old_target_ip];
1380    return target_compacted_count - this_compacted_count;
1381 }
1382 
1383 static void
update_uip_jip(const struct gen_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)1384 update_uip_jip(const struct gen_device_info *devinfo, brw_inst *insn,
1385                int this_old_ip, int *compacted_counts)
1386 {
1387    /* JIP and UIP are in units of:
1388     *    - bytes on Gen8+; and
1389     *    - compacted instructions on Gen6+.
1390     */
1391    int shift = devinfo->gen >= 8 ? 3 : 0;
1392 
1393    int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;
1394    jip_compacted -= compacted_between(this_old_ip,
1395                                       this_old_ip + (jip_compacted / 2),
1396                                       compacted_counts);
1397    brw_inst_set_jip(devinfo, insn, jip_compacted << shift);
1398 
1399    if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||
1400        brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||
1401        (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->gen <= 7))
1402       return;
1403 
1404    int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;
1405    uip_compacted -= compacted_between(this_old_ip,
1406                                       this_old_ip + (uip_compacted / 2),
1407                                       compacted_counts);
1408    brw_inst_set_uip(devinfo, insn, uip_compacted << shift);
1409 }
1410 
1411 static void
update_gen4_jump_count(const struct gen_device_info * devinfo,brw_inst * insn,int this_old_ip,int * compacted_counts)1412 update_gen4_jump_count(const struct gen_device_info *devinfo, brw_inst *insn,
1413                        int this_old_ip, int *compacted_counts)
1414 {
1415    assert(devinfo->gen == 5 || devinfo->is_g4x);
1416 
1417    /* Jump Count is in units of:
1418     *    - uncompacted instructions on G45; and
1419     *    - compacted instructions on Gen5.
1420     */
1421    int shift = devinfo->is_g4x ? 1 : 0;
1422 
1423    int jump_count_compacted = brw_inst_gen4_jump_count(devinfo, insn) << shift;
1424 
1425    int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1426 
1427    int this_compacted_count = compacted_counts[this_old_ip];
1428    int target_compacted_count = compacted_counts[target_old_ip];
1429 
1430    jump_count_compacted -= (target_compacted_count - this_compacted_count);
1431    brw_inst_set_gen4_jump_count(devinfo, insn, jump_count_compacted >> shift);
1432 }
1433 
1434 void
brw_init_compaction_tables(const struct gen_device_info * devinfo)1435 brw_init_compaction_tables(const struct gen_device_info *devinfo)
1436 {
1437    assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);
1438    assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);
1439    assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);
1440    assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);
1441    assert(gen6_control_index_table[ARRAY_SIZE(gen6_control_index_table) - 1] != 0);
1442    assert(gen6_datatype_table[ARRAY_SIZE(gen6_datatype_table) - 1] != 0);
1443    assert(gen6_subreg_table[ARRAY_SIZE(gen6_subreg_table) - 1] != 0);
1444    assert(gen6_src_index_table[ARRAY_SIZE(gen6_src_index_table) - 1] != 0);
1445    assert(gen7_control_index_table[ARRAY_SIZE(gen7_control_index_table) - 1] != 0);
1446    assert(gen7_datatype_table[ARRAY_SIZE(gen7_datatype_table) - 1] != 0);
1447    assert(gen7_subreg_table[ARRAY_SIZE(gen7_subreg_table) - 1] != 0);
1448    assert(gen7_src_index_table[ARRAY_SIZE(gen7_src_index_table) - 1] != 0);
1449    assert(gen8_control_index_table[ARRAY_SIZE(gen8_control_index_table) - 1] != 0);
1450    assert(gen8_datatype_table[ARRAY_SIZE(gen8_datatype_table) - 1] != 0);
1451    assert(gen8_subreg_table[ARRAY_SIZE(gen8_subreg_table) - 1] != 0);
1452    assert(gen8_src_index_table[ARRAY_SIZE(gen8_src_index_table) - 1] != 0);
1453 
1454    switch (devinfo->gen) {
1455    case 10:
1456    case 9:
1457    case 8:
1458       control_index_table = gen8_control_index_table;
1459       datatype_table = gen8_datatype_table;
1460       subreg_table = gen8_subreg_table;
1461       src_index_table = gen8_src_index_table;
1462       break;
1463    case 7:
1464       control_index_table = gen7_control_index_table;
1465       datatype_table = gen7_datatype_table;
1466       subreg_table = gen7_subreg_table;
1467       src_index_table = gen7_src_index_table;
1468       break;
1469    case 6:
1470       control_index_table = gen6_control_index_table;
1471       datatype_table = gen6_datatype_table;
1472       subreg_table = gen6_subreg_table;
1473       src_index_table = gen6_src_index_table;
1474       break;
1475    case 5:
1476    case 4:
1477       control_index_table = g45_control_index_table;
1478       datatype_table = g45_datatype_table;
1479       subreg_table = g45_subreg_table;
1480       src_index_table = g45_src_index_table;
1481       break;
1482    default:
1483       unreachable("unknown generation");
1484    }
1485 }
1486 
1487 void
brw_compact_instructions(struct brw_codegen * p,int start_offset,struct disasm_info * disasm)1488 brw_compact_instructions(struct brw_codegen *p, int start_offset,
1489                          struct disasm_info *disasm)
1490 {
1491    if (unlikely(INTEL_DEBUG & DEBUG_NO_COMPACTION))
1492       return;
1493 
1494    const struct gen_device_info *devinfo = p->devinfo;
1495    void *store = p->store + start_offset / 16;
1496    /* For an instruction at byte offset 16*i before compaction, this is the
1497     * number of compacted instructions minus the number of padding NOP/NENOPs
1498     * that preceded it.
1499     */
1500    int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];
1501    /* For an instruction at byte offset 8*i after compaction, this was its IP
1502     * (in 16-byte units) before compaction.
1503     */
1504    int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];
1505 
1506    if (devinfo->gen == 4 && !devinfo->is_g4x)
1507       return;
1508 
1509    int offset = 0;
1510    int compacted_count = 0;
1511    for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;
1512         src_offset += sizeof(brw_inst)) {
1513       brw_inst *src = store + src_offset;
1514       void *dst = store + offset;
1515 
1516       old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1517       compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1518 
1519       brw_inst inst = precompact(devinfo, *src);
1520       brw_inst saved = inst;
1521 
1522       if (brw_try_compact_instruction(devinfo, dst, &inst)) {
1523          compacted_count++;
1524 
1525          if (INTEL_DEBUG) {
1526             brw_inst uncompacted;
1527             brw_uncompact_instruction(devinfo, &uncompacted, dst);
1528             if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {
1529                brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);
1530             }
1531          }
1532 
1533          offset += sizeof(brw_compact_inst);
1534       } else {
1535          /* All uncompacted instructions need to be aligned on G45. */
1536          if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){
1537             brw_compact_inst *align = store + offset;
1538             memset(align, 0, sizeof(*align));
1539             brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NENOP);
1540             brw_compact_inst_set_cmpt_control(devinfo, align, true);
1541             offset += sizeof(brw_compact_inst);
1542             compacted_count--;
1543             compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;
1544             old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);
1545 
1546             dst = store + offset;
1547          }
1548 
1549          /* If we didn't compact this intruction, we need to move it down into
1550           * place.
1551           */
1552          if (offset != src_offset) {
1553             memmove(dst, src, sizeof(brw_inst));
1554          }
1555          offset += sizeof(brw_inst);
1556       }
1557    }
1558 
1559    /* Add an entry for the ending offset of the program. This greatly
1560     * simplifies the linked list walk at the end of the function.
1561     */
1562    old_ip[offset / sizeof(brw_compact_inst)] =
1563       (p->next_insn_offset - start_offset) / sizeof(brw_inst);
1564 
1565    /* Fix up control flow offsets. */
1566    p->next_insn_offset = start_offset + offset;
1567    for (offset = 0; offset < p->next_insn_offset - start_offset;
1568         offset = next_offset(devinfo, store, offset)) {
1569       brw_inst *insn = store + offset;
1570       int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];
1571       int this_compacted_count = compacted_counts[this_old_ip];
1572 
1573       switch (brw_inst_opcode(devinfo, insn)) {
1574       case BRW_OPCODE_BREAK:
1575       case BRW_OPCODE_CONTINUE:
1576       case BRW_OPCODE_HALT:
1577          if (devinfo->gen >= 6) {
1578             update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1579          } else {
1580             update_gen4_jump_count(devinfo, insn, this_old_ip,
1581                                    compacted_counts);
1582          }
1583          break;
1584 
1585       case BRW_OPCODE_IF:
1586       case BRW_OPCODE_IFF:
1587       case BRW_OPCODE_ELSE:
1588       case BRW_OPCODE_ENDIF:
1589       case BRW_OPCODE_WHILE:
1590          if (devinfo->gen >= 7) {
1591             if (brw_inst_cmpt_control(devinfo, insn)) {
1592                brw_inst uncompacted;
1593                brw_uncompact_instruction(devinfo, &uncompacted,
1594                                          (brw_compact_inst *)insn);
1595 
1596                update_uip_jip(devinfo, &uncompacted, this_old_ip,
1597                               compacted_counts);
1598 
1599                bool ret = brw_try_compact_instruction(devinfo,
1600                                                       (brw_compact_inst *)insn,
1601                                                       &uncompacted);
1602                assert(ret); (void)ret;
1603             } else {
1604                update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);
1605             }
1606          } else if (devinfo->gen == 6) {
1607             assert(!brw_inst_cmpt_control(devinfo, insn));
1608 
1609             /* Jump Count is in units of compacted instructions on Gen6. */
1610             int jump_count_compacted = brw_inst_gen6_jump_count(devinfo, insn);
1611 
1612             int target_old_ip = this_old_ip + (jump_count_compacted / 2);
1613             int target_compacted_count = compacted_counts[target_old_ip];
1614             jump_count_compacted -= (target_compacted_count - this_compacted_count);
1615             brw_inst_set_gen6_jump_count(devinfo, insn, jump_count_compacted);
1616          } else {
1617             update_gen4_jump_count(devinfo, insn, this_old_ip,
1618                                    compacted_counts);
1619          }
1620          break;
1621 
1622       case BRW_OPCODE_ADD:
1623          /* Add instructions modifying the IP register use an immediate src1,
1624           * and Gens that use this cannot compact instructions with immediate
1625           * operands.
1626           */
1627          if (brw_inst_cmpt_control(devinfo, insn))
1628             break;
1629 
1630          if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&
1631              brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {
1632             assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);
1633 
1634             int shift = 3;
1635             int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;
1636 
1637             int target_old_ip = this_old_ip + (jump_compacted / 2);
1638             int target_compacted_count = compacted_counts[target_old_ip];
1639             jump_compacted -= (target_compacted_count - this_compacted_count);
1640             brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);
1641          }
1642          break;
1643       }
1644    }
1645 
1646    /* p->nr_insn is counting the number of uncompacted instructions still, so
1647     * divide.  We do want to be sure there's a valid instruction in any
1648     * alignment padding, so that the next compression pass (for the FS 8/16
1649     * compile passes) parses correctly.
1650     */
1651    if (p->next_insn_offset & sizeof(brw_compact_inst)) {
1652       brw_compact_inst *align = store + offset;
1653       memset(align, 0, sizeof(*align));
1654       brw_compact_inst_set_opcode(devinfo, align, BRW_OPCODE_NOP);
1655       brw_compact_inst_set_cmpt_control(devinfo, align, true);
1656       p->next_insn_offset += sizeof(brw_compact_inst);
1657    }
1658    p->nr_insn = p->next_insn_offset / sizeof(brw_inst);
1659 
1660    /* Update the instruction offsets for each group. */
1661    if (disasm) {
1662       int offset = 0;
1663 
1664       foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {
1665          while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1666                 sizeof(brw_inst) != group->offset) {
1667             assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *
1668                    sizeof(brw_inst) < group->offset);
1669             offset = next_offset(devinfo, store, offset);
1670          }
1671 
1672          group->offset = start_offset + offset;
1673 
1674          offset = next_offset(devinfo, store, offset);
1675       }
1676    }
1677 }
1678