• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 2012-2013 LunarG, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Chia-I Wu <olv@lunarg.com>
26  */
27 
28 #include "toy_compiler.h"
29 
30 #define CG_REG_SHIFT 5
31 #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
32 
33 struct codegen {
34    const struct ilo_dev *dev;
35    const struct toy_inst *inst;
36    int pc;
37 
38    unsigned flag_reg_num;
39    unsigned flag_sub_reg_num;
40 
41    struct codegen_dst {
42       unsigned file;
43       unsigned type;
44       bool indirect;
45       unsigned indirect_subreg;
46       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
47 
48       unsigned horz_stride;
49 
50       unsigned writemask;
51    } dst;
52 
53    struct codegen_src {
54       unsigned file;
55       unsigned type;
56       bool indirect;
57       unsigned indirect_subreg;
58       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
59 
60       unsigned vert_stride;
61       unsigned width;
62       unsigned horz_stride;
63 
64       unsigned swizzle[4];
65       bool absolute;
66       bool negate;
67    } src[3];
68 };
69 
70 /*
71  * From the Sandy Bridge PRM, volume 4 part 2, page 107-108:
72  *
73  *     "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up
74  *      result forms bits [88:77], the source 0 register region fields, of the
75  *      128-bit instruction word."
76  *
77  *     "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit
78  *      table-look-up result forms bits [100:96], [68,64] and [52,48] of the
79  *      128-bit instruction word."
80  *
81  *     "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit
82  *      table-look-up result forms bits [63:61] and [46, 32] of the 128-bit
83  *      instruction word."
84  *
85  *     "(ControlIndex) The 5-bit index for data type fields. The 17-bit
86  *      table-look-up result forms bits[31], and [23, 8] of the 128-bit
87  *      instruction word."
88  */
89 static const struct toy_compaction_table toy_compaction_table_gen6 = {
90    .control = {
91       [0]   = 0x00000,  /* 00000000000000000 */
92       [1]   = 0x08000,  /* 01000000000000000 */
93       [2]   = 0x06000,  /* 00110000000000000 */
94       [3]   = 0x00100,  /* 00000000100000000 */
95       [4]   = 0x02000,  /* 00010000000000000 */
96       [5]   = 0x01100,  /* 00001000100000000 */
97       [6]   = 0x00102,  /* 00000000100000010 */
98       [7]   = 0x00002,  /* 00000000000000010 */
99       [8]   = 0x08100,  /* 01000000100000000 */
100       [9]   = 0x0a000,  /* 01010000000000000 */
101       [10]  = 0x16000,  /* 10110000000000000 */
102       [11]  = 0x04000,  /* 00100000000000000 */
103       [12]  = 0x1a000,  /* 11010000000000000 */
104       [13]  = 0x18000,  /* 11000000000000000 */
105       [14]  = 0x09100,  /* 01001000100000000 */
106       [15]  = 0x08008,  /* 01000000000001000 */
107       [16]  = 0x08004,  /* 01000000000000100 */
108       [17]  = 0x00008,  /* 00000000000001000 */
109       [18]  = 0x00004,  /* 00000000000000100 */
110       [19]  = 0x01100,  /* 00111000100000000 */
111       [20]  = 0x01102,  /* 00001000100000010 */
112       [21]  = 0x06100,  /* 00110000100000000 */
113       [22]  = 0x06001,  /* 00110000000000001 */
114       [23]  = 0x04001,  /* 00100000000000001 */
115       [24]  = 0x06002,  /* 00110000000000010 */
116       [25]  = 0x06005,  /* 00110000000000101 */
117       [26]  = 0x06009,  /* 00110000000001001 */
118       [27]  = 0x06010,  /* 00110000000010000 */
119       [28]  = 0x06003,  /* 00110000000000011 */
120       [29]  = 0x06004,  /* 00110000000000100 */
121       [30]  = 0x06108,  /* 00110000100001000 */
122       [31]  = 0x04009,  /* 00100000000001001 */
123    },
124    .datatype = {
125       [0]   = 0x09c00,  /* 001001110000000000 */
126       [1]   = 0x08c20,  /* 001000110000100000 */
127       [2]   = 0x09c01,  /* 001001110000000001 */
128       [3]   = 0x08060,  /* 001000000001100000 */
129       [4]   = 0x0ad29,  /* 001010110100101001 */
130       [5]   = 0x081ad,  /* 001000000110101101 */
131       [6]   = 0x0c62c,  /* 001100011000101100 */
132       [7]   = 0x0bdad,  /* 001011110110101101 */
133       [8]   = 0x081ec,  /* 001000000111101100 */
134       [9]   = 0x08061,  /* 001000000001100001 */
135       [10]  = 0x08ca5,  /* 001000110010100101 */
136       [11]  = 0x08041,  /* 001000000001000001 */
137       [12]  = 0x08231,  /* 001000001000110001 */
138       [13]  = 0x08229,  /* 001000001000101001 */
139       [14]  = 0x08020,  /* 001000000000100000 */
140       [15]  = 0x08232,  /* 001000001000110010 */
141       [16]  = 0x0a529,  /* 001010010100101001 */
142       [17]  = 0x0b4a5,  /* 001011010010100101 */
143       [18]  = 0x081a5,  /* 001000000110100101 */
144       [19]  = 0x0c629,  /* 001100011000101001 */
145       [20]  = 0x0b62c,  /* 001011011000101100 */
146       [21]  = 0x0b5a5,  /* 001011010110100101 */
147       [22]  = 0x0bda5,  /* 001011110110100101 */
148       [23]  = 0x0f1bd,  /* 001111011110111101 */
149       [24]  = 0x0f1bc,  /* 001111011110111100 */
150       [25]  = 0x0f1bd,  /* 001111011110111101 */
151       [26]  = 0x0f19d,  /* 001111011110011101 */
152       [27]  = 0x0f1be,  /* 001111011110111110 */
153       [28]  = 0x08021,  /* 001000000000100001 */
154       [29]  = 0x08022,  /* 001000000000100010 */
155       [30]  = 0x09fdd,  /* 001001111111011101 */
156       [31]  = 0x083be,  /* 001000001110111110 */
157    },
158    .subreg = {
159       [0]   = 0x0000,   /* 000000000000000 */
160       [1]   = 0x0004,   /* 000000000000100 */
161       [2]   = 0x0180,   /* 000000110000000 */
162       [3]   = 0x1000,   /* 111000000000000 */
163       [4]   = 0x3c08,   /* 011110000001000 */
164       [5]   = 0x0400,   /* 000010000000000 */
165       [6]   = 0x0010,   /* 000000000010000 */
166       [7]   = 0x0c0c,   /* 000110000001100 */
167       [8]   = 0x1000,   /* 001000000000000 */
168       [9]   = 0x0200,   /* 000001000000000 */
169       [10]  = 0x0294,   /* 000001010010100 */
170       [11]  = 0x0056,   /* 000000001010110 */
171       [12]  = 0x2000,   /* 010000000000000 */
172       [13]  = 0x6000,   /* 110000000000000 */
173       [14]  = 0x0800,   /* 000100000000000 */
174       [15]  = 0x0080,   /* 000000010000000 */
175       [16]  = 0x0008,   /* 000000000001000 */
176       [17]  = 0x4000,   /* 100000000000000 */
177       [18]  = 0x0280,   /* 000001010000000 */
178       [19]  = 0x1400,   /* 001010000000000 */
179       [20]  = 0x1800,   /* 001100000000000 */
180       [21]  = 0x0054,   /* 000000001010100 */
181       [22]  = 0x5a94,   /* 101101010010100 */
182       [23]  = 0x2800,   /* 010100000000000 */
183       [24]  = 0x008f,   /* 000000010001111 */
184       [25]  = 0x3000,   /* 011000000000000 */
185       [26]  = 0x1c00,   /* 111110000000000 */
186       [27]  = 0x5000,   /* 101000000000000 */
187       [28]  = 0x000f,   /* 000000000001111 */
188       [29]  = 0x088f,   /* 000100010001111 */
189       [30]  = 0x108f,   /* 001000010001111 */
190       [31]  = 0x0c00,   /* 000110000000000 */
191    },
192    .src = {
193       [0]   = 0x000,    /* 000000000000 */
194       [1]   = 0x588,    /* 010110001000 */
195       [2]   = 0x468,    /* 010001101000 */
196       [3]   = 0x228,    /* 001000101000 */
197       [4]   = 0x690,    /* 011010010000 */
198       [5]   = 0x120,    /* 000100100000 */
199       [6]   = 0x46c,    /* 010001101100 */
200       [7]   = 0x510,    /* 010101110000 */
201       [8]   = 0x618,    /* 011001111000 */
202       [9]   = 0x328,    /* 001100101000 */
203       [10]  = 0x58c,    /* 010110001100 */
204       [11]  = 0x220,    /* 001000100000 */
205       [12]  = 0x58a,    /* 010110001010 */
206       [13]  = 0x002,    /* 000000000010 */
207       [14]  = 0x550,    /* 010101010000 */
208       [15]  = 0x568,    /* 010101101000 */
209       [16]  = 0xf4c,    /* 111101001100 */
210       [17]  = 0xf2c,    /* 111100101100 */
211       [18]  = 0x610,    /* 011001110000 */
212       [19]  = 0x589,    /* 010110001001 */
213       [20]  = 0x558,    /* 010101011000 */
214       [21]  = 0x348,    /* 001101001000 */
215       [22]  = 0x42c,    /* 010000101100 */
216       [23]  = 0x400,    /* 010000000000 */
217       [24]  = 0x310,    /* 001101110000 */
218       [25]  = 0x310,    /* 001100010000 */
219       [26]  = 0x300,    /* 001100000000 */
220       [27]  = 0x46a,    /* 010001101010 */
221       [28]  = 0x318,    /* 001101111000 */
222       [29]  = 0x010,    /* 000001110000 */
223       [30]  = 0x320,    /* 001100100000 */
224       [31]  = 0x350,    /* 001101010000 */
225    },
226 };
227 
228 /*
229  * From the Ivy Bridge PRM, volume 4 part 3, page 128:
230  *
231  *     "(Src0Index) Lookup one of 32 12-bit values. That value is used (from
232  *      MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride,
233  *      Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields."
234  *
235  *     "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from
236  *      MSB to LSB) for various fields for Src1, Src0, and Dst, including
237  *      ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending
238  *      on AddrMode and AccessMode.
239  *
240  *     "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used
241  *      (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType,
242  *      Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and
243  *      Src1.RegType bit fields."
244  *
245  *     "(ControlIndex) Lookup one of 32 19-bit values. That value is used
246  *      (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate,
247  *      ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl,
248  *      and AccessMode bit fields."
249  */
250 static const struct toy_compaction_table toy_compaction_table_gen7 = {
251    .control = {
252       [0]   = 0x00002,  /* 0000000000000000010 */
253       [1]   = 0x04000,  /* 0000100000000000000 */
254       [2]   = 0x04001,  /* 0000100000000000001 */
255       [3]   = 0x04002,  /* 0000100000000000010 */
256       [4]   = 0x04003,  /* 0000100000000000011 */
257       [5]   = 0x04004,  /* 0000100000000000100 */
258       [6]   = 0x04005,  /* 0000100000000000101 */
259       [7]   = 0x04007,  /* 0000100000000000111 */
260       [8]   = 0x04008,  /* 0000100000000001000 */
261       [9]   = 0x04009,  /* 0000100000000001001 */
262       [10]  = 0x0400d,  /* 0000100000000001101 */
263       [11]  = 0x06000,  /* 0000110000000000000 */
264       [12]  = 0x06001,  /* 0000110000000000001 */
265       [13]  = 0x06002,  /* 0000110000000000010 */
266       [14]  = 0x06003,  /* 0000110000000000011 */
267       [15]  = 0x06004,  /* 0000110000000000100 */
268       [16]  = 0x06005,  /* 0000110000000000101 */
269       [17]  = 0x06007,  /* 0000110000000000111 */
270       [18]  = 0x06009,  /* 0000110000000001001 */
271       [19]  = 0x0600d,  /* 0000110000000001101 */
272       [20]  = 0x06010,  /* 0000110000000010000 */
273       [21]  = 0x06100,  /* 0000110000100000000 */
274       [22]  = 0x08000,  /* 0001000000000000000 */
275       [23]  = 0x08002,  /* 0001000000000000010 */
276       [24]  = 0x08004,  /* 0001000000000000100 */
277       [25]  = 0x08100,  /* 0001000000100000000 */
278       [26]  = 0x16000,  /* 0010110000000000000 */
279       [27]  = 0x16010,  /* 0010110000000010000 */
280       [28]  = 0x18000,  /* 0011000000000000000 */
281       [29]  = 0x18100,  /* 0011000000100000000 */
282       [30]  = 0x28000,  /* 0101000000000000000 */
283       [31]  = 0x28100,  /* 0101000000100000000 */
284    },
285    .datatype = {
286       [0]   = 0x08001,  /* 001000000000000001 */
287       [1]   = 0x08020,  /* 001000000000100000 */
288       [2]   = 0x08021,  /* 001000000000100001 */
289       [3]   = 0x08061,  /* 001000000001100001 */
290       [4]   = 0x080bd,  /* 001000000010111101 */
291       [5]   = 0x082fd,  /* 001000001011111101 */
292       [6]   = 0x083a1,  /* 001000001110100001 */
293       [7]   = 0x083a5,  /* 001000001110100101 */
294       [8]   = 0x083bd,  /* 001000001110111101 */
295       [9]   = 0x08421,  /* 001000010000100001 */
296       [10]  = 0x08c20,  /* 001000110000100000 */
297       [11]  = 0x08c21,  /* 001000110000100001 */
298       [12]  = 0x094a5,  /* 001001010010100101 */
299       [13]  = 0x09ca4,  /* 001001110010100100 */
300       [14]  = 0x09ca5,  /* 001001110010100101 */
301       [15]  = 0x0f3bd,  /* 001111001110111101 */
302       [16]  = 0x0f79d,  /* 001111011110011101 */
303       [17]  = 0x0f7bc,  /* 001111011110111100 */
304       [18]  = 0x0f7bd,  /* 001111011110111101 */
305       [19]  = 0x0ffbc,  /* 001111111110111100 */
306       [20]  = 0x0020c,  /* 000000001000001100 */
307       [21]  = 0x0803d,  /* 001000000000111101 */
308       [22]  = 0x080a5,  /* 001000000010100101 */
309       [23]  = 0x08420,  /* 001000010000100000 */
310       [24]  = 0x094a4,  /* 001001010010100100 */
311       [25]  = 0x09c84,  /* 001001110010000100 */
312       [26]  = 0x0a509,  /* 001010010100001001 */
313       [27]  = 0x0dfbd,  /* 001101111110111101 */
314       [28]  = 0x0ffbd,  /* 001111111110111101 */
315       [29]  = 0x0bdac,  /* 001011110110101100 */
316       [30]  = 0x0a528,  /* 001010010100101000 */
317       [31]  = 0x0ad28,  /* 001010110100101000 */
318    },
319    .subreg = {
320       [0]   = 0x0000,   /* 000000000000000 */
321       [1]   = 0x0001,   /* 000000000000001 */
322       [2]   = 0x0008,   /* 000000000001000 */
323       [3]   = 0x000f,   /* 000000000001111 */
324       [4]   = 0x0010,   /* 000000000010000 */
325       [5]   = 0x0080,   /* 000000010000000 */
326       [6]   = 0x0100,   /* 000000100000000 */
327       [7]   = 0x0180,   /* 000000110000000 */
328       [8]   = 0x0200,   /* 000001000000000 */
329       [9]   = 0x0210,   /* 000001000010000 */
330       [10]  = 0x0280,   /* 000001010000000 */
331       [11]  = 0x1000,   /* 001000000000000 */
332       [12]  = 0x1001,   /* 001000000000001 */
333       [13]  = 0x1081,   /* 001000010000001 */
334       [14]  = 0x1082,   /* 001000010000010 */
335       [15]  = 0x1083,   /* 001000010000011 */
336       [16]  = 0x1084,   /* 001000010000100 */
337       [17]  = 0x1087,   /* 001000010000111 */
338       [18]  = 0x1088,   /* 001000010001000 */
339       [19]  = 0x108e,   /* 001000010001110 */
340       [20]  = 0x108f,   /* 001000010001111 */
341       [21]  = 0x1180,   /* 001000110000000 */
342       [22]  = 0x11e8,   /* 001000111101000 */
343       [23]  = 0x2000,   /* 010000000000000 */
344       [24]  = 0x2180,   /* 010000110000000 */
345       [25]  = 0x3000,   /* 011000000000000 */
346       [26]  = 0x3c87,   /* 011110010000111 */
347       [27]  = 0x4000,   /* 100000000000000 */
348       [28]  = 0x5000,   /* 101000000000000 */
349       [29]  = 0x6000,   /* 110000000000000 */
350       [30]  = 0x7000,   /* 111000000000000 */
351       [31]  = 0x701c,   /* 111000000011100 */
352    },
353    .src = {
354       [0]   = 0x000,    /* 000000000000 */
355       [1]   = 0x002,    /* 000000000010 */
356       [2]   = 0x010,    /* 000000010000 */
357       [3]   = 0x012,    /* 000000010010 */
358       [4]   = 0x018,    /* 000000011000 */
359       [5]   = 0x020,    /* 000000100000 */
360       [6]   = 0x028,    /* 000000101000 */
361       [7]   = 0x048,    /* 000001001000 */
362       [8]   = 0x050,    /* 000001010000 */
363       [9]   = 0x070,    /* 000001110000 */
364       [10]  = 0x078,    /* 000001111000 */
365       [11]  = 0x300,    /* 001100000000 */
366       [12]  = 0x302,    /* 001100000010 */
367       [13]  = 0x308,    /* 001100001000 */
368       [14]  = 0x310,    /* 001100010000 */
369       [15]  = 0x312,    /* 001100010010 */
370       [16]  = 0x320,    /* 001100100000 */
371       [17]  = 0x328,    /* 001100101000 */
372       [18]  = 0x338,    /* 001100111000 */
373       [19]  = 0x340,    /* 001101000000 */
374       [20]  = 0x342,    /* 001101000010 */
375       [21]  = 0x348,    /* 001101001000 */
376       [22]  = 0x350,    /* 001101010000 */
377       [23]  = 0x360,    /* 001101100000 */
378       [24]  = 0x368,    /* 001101101000 */
379       [25]  = 0x370,    /* 001101110000 */
380       [26]  = 0x371,    /* 001101110001 */
381       [27]  = 0x378,    /* 001101111000 */
382       [28]  = 0x468,    /* 010001101000 */
383       [29]  = 0x469,    /* 010001101001 */
384       [30]  = 0x46a,    /* 010001101010 */
385       [31]  = 0x588,    /* 010110001000 */
386    },
387 };
388 
389 static const struct toy_compaction_table toy_compaction_table_gen8 = {
390    .control = {
391    },
392    .datatype = {
393    },
394    .subreg = {
395    },
396    .src = {
397    },
398    .control_3src = {
399    },
400    .source_3src = {
401    },
402 };
403 
404 const struct toy_compaction_table *
toy_compiler_get_compaction_table(const struct ilo_dev * dev)405 toy_compiler_get_compaction_table(const struct ilo_dev *dev)
406 {
407    switch (ilo_dev_gen(dev)) {
408    case ILO_GEN(8):
409       return &toy_compaction_table_gen8;
410    case ILO_GEN(7.5):
411    case ILO_GEN(7):
412       return &toy_compaction_table_gen7;
413    case ILO_GEN(6):
414       return &toy_compaction_table_gen6;
415    default:
416       assert(!"unsupported gen");
417       return NULL;
418    }
419 }
420 
421 /**
422  * Return true if the source operand is null.
423  */
424 static bool
src_is_null(const struct codegen * cg,int idx)425 src_is_null(const struct codegen *cg, int idx)
426 {
427    const struct codegen_src *src = &cg->src[idx];
428 
429    return (src->file == GEN6_FILE_ARF &&
430            src->origin == GEN6_ARF_NULL << CG_REG_SHIFT);
431 }
432 
433 /**
434  * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
435  */
436 static uint32_t
translate_src_gen6(const struct codegen * cg,int idx)437 translate_src_gen6(const struct codegen *cg, int idx)
438 {
439    const struct codegen_src *src = &cg->src[idx];
440    uint32_t dw;
441 
442    ILO_DEV_ASSERT(cg->dev, 6, 8);
443 
444    /* special treatment may be needed if any of the operand is immediate */
445    if (cg->src[0].file == GEN6_FILE_IMM) {
446       assert(!cg->src[0].absolute && !cg->src[0].negate);
447 
448       /* only the last src operand can be an immediate unless it is Gen8+ */
449       assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1));
450 
451       if (!src_is_null(cg, 1))
452          return cg->src[idx].origin;
453 
454       if (idx == 0) {
455          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
456             return cg->src[1].type << 27 |
457                    cg->src[1].file << 25;
458          } else {
459             return cg->flag_sub_reg_num << 25;
460          }
461       } else {
462          return cg->src[0].origin;
463       }
464    }
465    else if (idx && cg->src[1].file == GEN6_FILE_IMM) {
466       assert(!cg->src[1].absolute && !cg->src[1].negate);
467       return cg->src[1].origin;
468    }
469 
470    assert(src->file != GEN6_FILE_IMM);
471 
472    if (src->indirect) {
473       const int offset = (int) src->origin;
474 
475       assert(src->file == GEN6_FILE_GRF);
476       assert(offset < 512 && offset >= -512);
477 
478       if (cg->inst->access_mode == GEN6_ALIGN_16) {
479          assert(src->width == GEN6_WIDTH_4);
480          assert(src->horz_stride == GEN6_HORZSTRIDE_1);
481 
482          /* the lower 4 bits are reserved for the swizzle_[xy] */
483          assert(!(src->origin & 0xf));
484 
485          dw = src->vert_stride << 21 |
486               src->swizzle[3] << 18 |
487               src->swizzle[2] << 16 |
488               GEN6_ADDRMODE_INDIRECT << 15 |
489               src->negate << 14 |
490               src->absolute << 13 |
491               src->swizzle[1] << 2 |
492               src->swizzle[0];
493          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
494             dw |= src->indirect_subreg << 9 |
495                   (src->origin & 0x1f0);
496          } else {
497             dw |= src->indirect_subreg << 10 |
498                   (src->origin & 0x3f0);
499          }
500       }
501       else {
502          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
503                 src->swizzle[1] == TOY_SWIZZLE_Y &&
504                 src->swizzle[2] == TOY_SWIZZLE_Z &&
505                 src->swizzle[3] == TOY_SWIZZLE_W);
506 
507          dw = src->vert_stride << 21 |
508               src->width << 18 |
509               src->horz_stride << 16 |
510               GEN6_ADDRMODE_INDIRECT << 15 |
511               src->negate << 14 |
512               src->absolute << 13;
513          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
514             dw |= src->indirect_subreg << 9 |
515                   (src->origin & 0x1ff);
516          } else {
517             dw |= src->indirect_subreg << 10 |
518                   (src->origin & 0x3ff);
519          }
520       }
521    }
522    else {
523       switch (src->file) {
524       case GEN6_FILE_ARF:
525          break;
526       case GEN6_FILE_GRF:
527          assert(CG_REG_NUM(src->origin) < 128);
528          break;
529       case GEN6_FILE_MRF:
530          assert(cg->inst->opcode == GEN6_OPCODE_SEND ||
531                 cg->inst->opcode == GEN6_OPCODE_SENDC);
532          assert(CG_REG_NUM(src->origin) < 16);
533          break;
534       case GEN6_FILE_IMM:
535       default:
536          assert(!"invalid src file");
537          break;
538       }
539 
540       if (cg->inst->access_mode == GEN6_ALIGN_16) {
541          assert(src->width == GEN6_WIDTH_4);
542          assert(src->horz_stride == GEN6_HORZSTRIDE_1);
543 
544          /* the lower 4 bits are reserved for the swizzle_[xy] */
545          assert(!(src->origin & 0xf));
546 
547          dw = src->vert_stride << 21 |
548               src->swizzle[3] << 18 |
549               src->swizzle[2] << 16 |
550               GEN6_ADDRMODE_DIRECT << 15 |
551               src->negate << 14 |
552               src->absolute << 13 |
553               src->origin |
554               src->swizzle[1] << 2 |
555               src->swizzle[0];
556       }
557       else {
558          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
559                 src->swizzle[1] == TOY_SWIZZLE_Y &&
560                 src->swizzle[2] == TOY_SWIZZLE_Z &&
561                 src->swizzle[3] == TOY_SWIZZLE_W);
562 
563          dw = src->vert_stride << 21 |
564               src->width << 18 |
565               src->horz_stride << 16 |
566               GEN6_ADDRMODE_DIRECT << 15 |
567               src->negate << 14 |
568               src->absolute << 13 |
569               src->origin;
570       }
571    }
572 
573    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
574       const bool indirect_origin_bit9 = (cg->dst.indirect) ?
575          (src->origin & 0x200) : 0;
576 
577       if (idx == 0) {
578          dw |= indirect_origin_bit9 << 31 |
579                cg->src[1].type << 27 |
580                cg->src[1].file << 25;
581       } else {
582          dw |= indirect_origin_bit9 << 25;
583       }
584    } else {
585       if (idx == 0)
586          dw |= cg->flag_sub_reg_num << 25;
587    }
588 
589    return dw;
590 }
591 
592 /**
593  * Translate the destination operand to the higher 16 bits of DW1 of the
594  * 1-src/2-src format.
595  */
596 static uint16_t
translate_dst_region_gen6(const struct codegen * cg)597 translate_dst_region_gen6(const struct codegen *cg)
598 {
599    const struct codegen_dst *dst = &cg->dst;
600    uint16_t dw1_region;
601 
602    ILO_DEV_ASSERT(cg->dev, 6, 8);
603 
604    if (dst->file == GEN6_FILE_IMM) {
605       /* dst is immediate (JIP) when the opcode is a conditional branch */
606       switch (cg->inst->opcode) {
607       case GEN6_OPCODE_IF:
608       case GEN6_OPCODE_ELSE:
609       case GEN6_OPCODE_ENDIF:
610       case GEN6_OPCODE_WHILE:
611          assert(dst->type == GEN6_TYPE_W);
612          dw1_region = (dst->origin & 0xffff);
613          break;
614       default:
615          assert(!"dst cannot be immediate");
616          dw1_region = 0;
617          break;
618       }
619 
620       return dw1_region;
621    }
622 
623    if (dst->indirect) {
624       const int offset = (int) dst->origin;
625 
626       assert(dst->file == GEN6_FILE_GRF);
627       assert(offset < 512 && offset >= -512);
628 
629       if (cg->inst->access_mode == GEN6_ALIGN_16) {
630          /*
631           * From the Sandy Bridge PRM, volume 4 part 2, page 144:
632           *
633           *     "Allthough Dst.HorzStride is a don't care for Align16, HW
634           *      needs this to be programmed as 01."
635           */
636          assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
637          /* the lower 4 bits are reserved for the writemask */
638          assert(!(dst->origin & 0xf));
639 
640          dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
641                       dst->horz_stride << 13 |
642                       dst->writemask;
643          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
644             dw1_region |= dst->indirect_subreg << 9 |
645                           (dst->origin & 0x1f0);
646          } else {
647             dw1_region |= dst->indirect_subreg << 10 |
648                           (dst->origin & 0x3f0);
649          }
650       }
651       else {
652          assert(dst->writemask == TOY_WRITEMASK_XYZW);
653 
654          dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
655                       dst->horz_stride << 13;
656          if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
657             dw1_region |= dst->indirect_subreg << 9 |
658                           (dst->origin & 0x1ff);
659          } else {
660             dw1_region |= dst->indirect_subreg << 10 |
661                           (dst->origin & 0x3ff);
662          }
663       }
664    }
665    else {
666       assert((dst->file == GEN6_FILE_GRF &&
667               CG_REG_NUM(dst->origin) < 128) ||
668              (dst->file == GEN6_FILE_MRF &&
669               CG_REG_NUM(dst->origin) < 16) ||
670              (dst->file == GEN6_FILE_ARF));
671 
672       if (cg->inst->access_mode == GEN6_ALIGN_16) {
673          /* similar to the indirect case */
674          assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
675          assert(!(dst->origin & 0xf));
676 
677          dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
678                       dst->horz_stride << 13 |
679                       dst->origin |
680                       dst->writemask;
681       }
682       else {
683          assert(dst->writemask == TOY_WRITEMASK_XYZW);
684 
685          dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
686                       dst->horz_stride << 13 |
687                       dst->origin;
688       }
689    }
690 
691    return dw1_region;
692 }
693 
694 /**
695  * Translate the destination operand to DW1 of the 1-src/2-src format.
696  */
697 static uint32_t
translate_dst_gen6(const struct codegen * cg)698 translate_dst_gen6(const struct codegen *cg)
699 {
700    ILO_DEV_ASSERT(cg->dev, 6, 7.5);
701 
702    return translate_dst_region_gen6(cg) << 16 |
703           cg->src[1].type << 12 |
704           cg->src[1].file << 10 |
705           cg->src[0].type << 7 |
706           cg->src[0].file << 5 |
707           cg->dst.type << 2 |
708           cg->dst.file;
709 }
710 
711 static uint32_t
translate_dst_gen8(const struct codegen * cg)712 translate_dst_gen8(const struct codegen *cg)
713 {
714    const bool indirect_origin_bit9 = (cg->dst.indirect) ?
715       (cg->dst.origin & 0x200) : 0;
716 
717    ILO_DEV_ASSERT(cg->dev, 8, 8);
718 
719    return translate_dst_region_gen6(cg) << 16 |
720           indirect_origin_bit9 << 15 |
721           cg->src[0].type << 11 |
722           cg->src[0].file << 9 |
723           cg->dst.type << 5 |
724           cg->dst.file << 3 |
725           cg->inst->mask_ctrl << 2 |
726           cg->flag_reg_num << 1 |
727           cg->flag_sub_reg_num;
728 }
729 
730 /**
731  * Translate the instruction to DW0 of the 1-src/2-src format.
732  */
733 static uint32_t
translate_inst_gen6(const struct codegen * cg)734 translate_inst_gen6(const struct codegen *cg)
735 {
736    const bool debug_ctrl = false;
737    const bool cmpt_ctrl = false;
738 
739    ILO_DEV_ASSERT(cg->dev, 6, 7.5);
740 
741    assert(cg->inst->opcode < 128);
742 
743    return cg->inst->saturate << 31 |
744           debug_ctrl << 30 |
745           cmpt_ctrl << 29 |
746           cg->inst->acc_wr_ctrl << 28 |
747           cg->inst->cond_modifier << 24 |
748           cg->inst->exec_size << 21 |
749           cg->inst->pred_inv << 20 |
750           cg->inst->pred_ctrl << 16 |
751           cg->inst->thread_ctrl << 14 |
752           cg->inst->qtr_ctrl << 12 |
753           cg->inst->dep_ctrl << 10 |
754           cg->inst->mask_ctrl << 9 |
755           cg->inst->access_mode << 8 |
756           cg->inst->opcode;
757 }
758 
759 static uint32_t
translate_inst_gen8(const struct codegen * cg)760 translate_inst_gen8(const struct codegen *cg)
761 {
762    const bool debug_ctrl = false;
763    const bool cmpt_ctrl = false;
764 
765    ILO_DEV_ASSERT(cg->dev, 8, 8);
766 
767    assert(cg->inst->opcode < 128);
768 
769    return cg->inst->saturate << 31 |
770           debug_ctrl << 30 |
771           cmpt_ctrl << 29 |
772           cg->inst->acc_wr_ctrl << 28 |
773           cg->inst->cond_modifier << 24 |
774           cg->inst->exec_size << 21 |
775           cg->inst->pred_inv << 20 |
776           cg->inst->pred_ctrl << 16 |
777           cg->inst->thread_ctrl << 14 |
778           cg->inst->qtr_ctrl << 12 |
779           cg->inst->dep_ctrl << 9 |
780           cg->inst->access_mode << 8 |
781           cg->inst->opcode;
782 }
783 
784 /**
785  * Codegen an instruction in 1-src/2-src format.
786  */
787 static void
codegen_inst_gen6(const struct codegen * cg,uint32_t * code)788 codegen_inst_gen6(const struct codegen *cg, uint32_t *code)
789 {
790    ILO_DEV_ASSERT(cg->dev, 6, 8);
791 
792    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
793       code[0] = translate_inst_gen8(cg);
794       code[1] = translate_dst_gen8(cg);
795    } else {
796       code[0] = translate_inst_gen6(cg);
797       code[1] = translate_dst_gen6(cg);
798    }
799 
800    code[2] = translate_src_gen6(cg, 0);
801    code[3] = translate_src_gen6(cg, 1);
802    assert(src_is_null(cg, 2));
803 }
804 
805 /**
806  * Codegen an instruction in 3-src format.
807  */
808 static void
codegen_inst_3src_gen6(const struct codegen * cg,uint32_t * code)809 codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code)
810 {
811    const struct codegen_dst *dst = &cg->dst;
812    uint32_t dw0, dw1, dw_src[3];
813    int i;
814 
815    ILO_DEV_ASSERT(cg->dev, 6, 8);
816 
817    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8))
818       dw0 = translate_inst_gen8(cg);
819    else
820       dw0 = translate_inst_gen6(cg);
821 
822    /*
823     * 3-src instruction restrictions
824     *
825     *  - align16 with direct addressing
826     *  - GRF or MRF dst
827     *  - GRF src
828     *  - sub_reg_num is DWORD aligned
829     *  - no regioning except replication control
830     *    (vert_stride == 0 && horz_stride == 0)
831     */
832    assert(cg->inst->access_mode == GEN6_ALIGN_16);
833 
834    assert(!dst->indirect);
835    assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) ||
836           (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16));
837    assert(!(dst->origin & 0x3));
838    assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
839 
840    if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
841       dw1 = dst->origin << 19 |
842             dst->writemask << 17 |
843             cg->src[2].negate << 10 |
844             cg->src[2].negate << 10 |
845             cg->src[2].absolute << 9 |
846             cg->src[1].negate << 8 |
847             cg->src[1].absolute << 7 |
848             cg->src[0].negate << 6 |
849             cg->src[0].absolute << 5 |
850             cg->inst->mask_ctrl << 2 |
851             cg->flag_reg_num << 1 |
852             cg->flag_sub_reg_num;
853    } else {
854       dw1 = dst->origin << 19 |
855             dst->writemask << 17 |
856             cg->src[2].negate << 9 |
857             cg->src[2].absolute << 8 |
858             cg->src[1].negate << 7 |
859             cg->src[1].absolute << 6 |
860             cg->src[0].negate << 5 |
861             cg->src[0].absolute << 4 |
862             cg->flag_sub_reg_num << 1 |
863             (dst->file == GEN6_FILE_MRF);
864    }
865 
866    for (i = 0; i < 3; i++) {
867       const struct codegen_src *src = &cg->src[i];
868 
869       assert(!src->indirect);
870       assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128);
871       assert(!(src->origin & 0x3));
872 
873       assert((src->vert_stride == GEN6_VERTSTRIDE_4 &&
874               src->horz_stride == GEN6_HORZSTRIDE_1) ||
875              (src->vert_stride == GEN6_VERTSTRIDE_0 &&
876               src->horz_stride == GEN6_HORZSTRIDE_0));
877       assert(src->width == GEN6_WIDTH_4);
878 
879       dw_src[i] = src->origin << 7 |
880                   src->swizzle[3] << 7 |
881                   src->swizzle[2] << 5 |
882                   src->swizzle[1] << 3 |
883                   src->swizzle[0] << 1 |
884                   (src->vert_stride == GEN6_VERTSTRIDE_0 &&
885                    src->horz_stride == GEN6_HORZSTRIDE_0);
886 
887       /* only the lower 20 bits are used */
888       assert((dw_src[i] & 0xfffff) == dw_src[i]);
889    }
890 
891    code[0] = dw0;
892    code[1] = dw1;
893    /* concatenate the bits of dw_src */
894    code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
895    code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
896 }
897 
898 /**
899  * Sanity check the region parameters of the operands.
900  */
901 static void
codegen_validate_region_restrictions(const struct codegen * cg)902 codegen_validate_region_restrictions(const struct codegen *cg)
903 {
904    const int exec_size_map[] = {
905       [GEN6_EXECSIZE_1] = 1,
906       [GEN6_EXECSIZE_2] = 2,
907       [GEN6_EXECSIZE_4] = 4,
908       [GEN6_EXECSIZE_8] = 8,
909       [GEN6_EXECSIZE_16] = 16,
910       [GEN6_EXECSIZE_32] = 32,
911    };
912    const int width_map[] = {
913       [GEN6_WIDTH_1] = 1,
914       [GEN6_WIDTH_2] = 2,
915       [GEN6_WIDTH_4] = 4,
916       [GEN6_WIDTH_8] = 8,
917       [GEN6_WIDTH_16] = 16,
918    };
919    const int horz_stride_map[] = {
920       [GEN6_HORZSTRIDE_0] = 0,
921       [GEN6_HORZSTRIDE_1] = 1,
922       [GEN6_HORZSTRIDE_2] = 2,
923       [GEN6_HORZSTRIDE_4] = 4,
924    };
925    const int vert_stride_map[] = {
926       [GEN6_VERTSTRIDE_0] = 0,
927       [GEN6_VERTSTRIDE_1] = 1,
928       [GEN6_VERTSTRIDE_2] = 2,
929       [GEN6_VERTSTRIDE_4] = 4,
930       [GEN6_VERTSTRIDE_8] = 8,
931       [GEN6_VERTSTRIDE_16] = 16,
932       [GEN6_VERTSTRIDE_32] = 32,
933       [7] = 64,
934       [8] = 128,
935       [9] = 256,
936       [GEN6_VERTSTRIDE_VXH] = 0,
937    };
938    const int exec_size = exec_size_map[cg->inst->exec_size];
939    int i;
940 
941    /* Sandy Bridge PRM, volume 4 part 2, page 94 */
942 
943    /* 1. (we don't do 32 anyway) */
944    assert(exec_size <= 16);
945 
946    for (i = 0; i < ARRAY_SIZE(cg->src); i++) {
947       const int width = width_map[cg->src[i].width];
948       const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
949       const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
950 
951       if (src_is_null(cg, i))
952          break;
953 
954       /* 3. */
955       assert(exec_size >= width);
956 
957       if (exec_size == width) {
958          /* 4. & 5. */
959          if (horz_stride)
960             assert(vert_stride == width * horz_stride);
961       }
962 
963       if (width == 1) {
964          /* 6. */
965          assert(horz_stride == 0);
966 
967          /* 7. */
968          if (exec_size == 1)
969             assert(vert_stride == 0);
970       }
971 
972       /* 8. */
973       if (!vert_stride && !horz_stride)
974          assert(width == 1);
975    }
976 
977    /* derived from 10.1.2. & 10.2. */
978    assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0);
979 }
980 
981 static unsigned
translate_vfile(enum toy_file file)982 translate_vfile(enum toy_file file)
983 {
984    switch (file) {
985    case TOY_FILE_ARF:   return GEN6_FILE_ARF;
986    case TOY_FILE_GRF:   return GEN6_FILE_GRF;
987    case TOY_FILE_MRF:   return GEN6_FILE_MRF;
988    case TOY_FILE_IMM:   return GEN6_FILE_IMM;
989    default:
990       assert(!"unhandled toy file");
991       return GEN6_FILE_GRF;
992    }
993 }
994 
995 static unsigned
translate_vtype(enum toy_type type)996 translate_vtype(enum toy_type type)
997 {
998    switch (type) {
999    case TOY_TYPE_F:     return GEN6_TYPE_F;
1000    case TOY_TYPE_D:     return GEN6_TYPE_D;
1001    case TOY_TYPE_UD:    return GEN6_TYPE_UD;
1002    case TOY_TYPE_W:     return GEN6_TYPE_W;
1003    case TOY_TYPE_UW:    return GEN6_TYPE_UW;
1004    case TOY_TYPE_V:     return GEN6_TYPE_V_IMM;
1005    default:
1006       assert(!"unhandled toy type");
1007       return GEN6_TYPE_F;
1008    }
1009 }
1010 
1011 static unsigned
translate_writemask(enum toy_writemask writemask)1012 translate_writemask(enum toy_writemask writemask)
1013 {
1014    /* TOY_WRITEMASK_* are compatible with the hardware definitions */
1015    assert(writemask <= 0xf);
1016    return writemask;
1017 }
1018 
1019 static unsigned
translate_swizzle(enum toy_swizzle swizzle)1020 translate_swizzle(enum toy_swizzle swizzle)
1021 {
1022    /* TOY_SWIZZLE_* are compatible with the hardware definitions */
1023    assert(swizzle <= 3);
1024    return swizzle;
1025 }
1026 
1027 /**
1028  * Prepare for generating an instruction.
1029  */
1030 static void
codegen_prepare(struct codegen * cg,const struct ilo_dev * dev,const struct toy_inst * inst,int pc,int rect_linear_width)1031 codegen_prepare(struct codegen *cg, const struct ilo_dev *dev,
1032                 const struct toy_inst *inst, int pc, int rect_linear_width)
1033 {
1034    int i;
1035 
1036    cg->dev = dev;
1037    cg->inst = inst;
1038    cg->pc = pc;
1039 
1040    cg->flag_reg_num = 0;
1041    cg->flag_sub_reg_num = 0;
1042 
1043    cg->dst.file = translate_vfile(inst->dst.file);
1044    cg->dst.type = translate_vtype(inst->dst.type);
1045    cg->dst.indirect = inst->dst.indirect;
1046    cg->dst.indirect_subreg = inst->dst.indirect_subreg;
1047    cg->dst.origin = inst->dst.val32;
1048 
1049    /*
1050     * From the Sandy Bridge PRM, volume 4 part 2, page 81:
1051     *
1052     *     "For a word or an unsigned word immediate data, software must
1053     *      replicate the same 16-bit immediate value to both the lower word
1054     *      and the high word of the 32-bit immediate field in an instruction."
1055     */
1056    if (inst->dst.file == TOY_FILE_IMM) {
1057       switch (inst->dst.type) {
1058       case TOY_TYPE_W:
1059       case TOY_TYPE_UW:
1060          cg->dst.origin &= 0xffff;
1061          cg->dst.origin |= cg->dst.origin << 16;
1062          break;
1063       default:
1064          break;
1065       }
1066    }
1067 
1068    cg->dst.writemask = translate_writemask(inst->dst.writemask);
1069 
1070    switch (inst->dst.rect) {
1071    case TOY_RECT_LINEAR:
1072       cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
1073       break;
1074    default:
1075       assert(!"unsupported dst region");
1076       cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
1077       break;
1078    }
1079 
1080    for (i = 0; i < ARRAY_SIZE(cg->src); i++) {
1081       struct codegen_src *src = &cg->src[i];
1082 
1083       src->file = translate_vfile(inst->src[i].file);
1084       src->type = translate_vtype(inst->src[i].type);
1085       src->indirect = inst->src[i].indirect;
1086       src->indirect_subreg = inst->src[i].indirect_subreg;
1087       src->origin = inst->src[i].val32;
1088 
1089       /* do the same for src */
1090       if (inst->dst.file == TOY_FILE_IMM) {
1091          switch (inst->src[i].type) {
1092          case TOY_TYPE_W:
1093          case TOY_TYPE_UW:
1094             src->origin &= 0xffff;
1095             src->origin |= src->origin << 16;
1096             break;
1097          default:
1098             break;
1099          }
1100       }
1101 
1102       src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
1103       src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
1104       src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
1105       src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
1106       src->absolute = inst->src[i].absolute;
1107       src->negate = inst->src[i].negate;
1108 
1109       switch (inst->src[i].rect) {
1110       case TOY_RECT_LINEAR:
1111          switch (rect_linear_width) {
1112          case 1:
1113             src->vert_stride = GEN6_VERTSTRIDE_1;
1114             src->width = GEN6_WIDTH_1;
1115             break;
1116          case 2:
1117             src->vert_stride = GEN6_VERTSTRIDE_2;
1118             src->width = GEN6_WIDTH_2;
1119             break;
1120          case 4:
1121             src->vert_stride = GEN6_VERTSTRIDE_4;
1122             src->width = GEN6_WIDTH_4;
1123             break;
1124          case 8:
1125             src->vert_stride = GEN6_VERTSTRIDE_8;
1126             src->width = GEN6_WIDTH_8;
1127             break;
1128          case 16:
1129             src->vert_stride = GEN6_VERTSTRIDE_16;
1130             src->width = GEN6_WIDTH_16;
1131             break;
1132          default:
1133             assert(!"unsupported TOY_RECT_LINEAR width");
1134             src->vert_stride = GEN6_VERTSTRIDE_1;
1135             src->width = GEN6_WIDTH_1;
1136             break;
1137          }
1138          src->horz_stride = GEN6_HORZSTRIDE_1;
1139          break;
1140       case TOY_RECT_041:
1141          src->vert_stride = GEN6_VERTSTRIDE_0;
1142          src->width = GEN6_WIDTH_4;
1143          src->horz_stride = GEN6_HORZSTRIDE_1;
1144          break;
1145       case TOY_RECT_010:
1146          src->vert_stride = GEN6_VERTSTRIDE_0;
1147          src->width = GEN6_WIDTH_1;
1148          src->horz_stride = GEN6_HORZSTRIDE_0;
1149          break;
1150       case TOY_RECT_220:
1151          src->vert_stride = GEN6_VERTSTRIDE_2;
1152          src->width = GEN6_WIDTH_2;
1153          src->horz_stride = GEN6_HORZSTRIDE_0;
1154          break;
1155       case TOY_RECT_440:
1156          src->vert_stride = GEN6_VERTSTRIDE_4;
1157          src->width = GEN6_WIDTH_4;
1158          src->horz_stride = GEN6_HORZSTRIDE_0;
1159          break;
1160       case TOY_RECT_240:
1161          src->vert_stride = GEN6_VERTSTRIDE_2;
1162          src->width = GEN6_WIDTH_4;
1163          src->horz_stride = GEN6_HORZSTRIDE_0;
1164          break;
1165       default:
1166          assert(!"unsupported src region");
1167          src->vert_stride = GEN6_VERTSTRIDE_1;
1168          src->width = GEN6_WIDTH_1;
1169          src->horz_stride = GEN6_HORZSTRIDE_1;
1170          break;
1171       }
1172    }
1173 }
1174 
1175 /**
1176  * Generate HW shader code.  The instructions should have been legalized.
1177  */
1178 void *
toy_compiler_assemble(struct toy_compiler * tc,int * size)1179 toy_compiler_assemble(struct toy_compiler *tc, int *size)
1180 {
1181    const struct toy_inst *inst;
1182    uint32_t *code;
1183    int pc;
1184 
1185    code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
1186    if (!code)
1187       return NULL;
1188 
1189    pc = 0;
1190    tc_head(tc);
1191    while ((inst = tc_next(tc)) != NULL) {
1192       uint32_t *dw = &code[pc * 4];
1193       struct codegen cg;
1194 
1195       if (pc >= tc->num_instructions) {
1196          tc_fail(tc, "wrong instructoun count");
1197          break;
1198       }
1199 
1200       codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width);
1201       codegen_validate_region_restrictions(&cg);
1202 
1203       switch (inst->opcode) {
1204       case GEN6_OPCODE_MAD:
1205          codegen_inst_3src_gen6(&cg, dw);
1206          break;
1207       default:
1208          codegen_inst_gen6(&cg, dw);
1209          break;
1210       }
1211 
1212       pc++;
1213    }
1214 
1215    /* never return an invalid kernel */
1216    if (tc->fail) {
1217       FREE(code);
1218       return NULL;
1219    }
1220 
1221    if (size)
1222       *size = pc * 4 * sizeof(uint32_t);
1223 
1224    return code;
1225 }
1226