1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "toy_compiler.h"
29
30 #define CG_REG_SHIFT 5
31 #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
32
33 struct codegen {
34 const struct ilo_dev *dev;
35 const struct toy_inst *inst;
36 int pc;
37
38 unsigned flag_reg_num;
39 unsigned flag_sub_reg_num;
40
41 struct codegen_dst {
42 unsigned file;
43 unsigned type;
44 bool indirect;
45 unsigned indirect_subreg;
46 unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
47
48 unsigned horz_stride;
49
50 unsigned writemask;
51 } dst;
52
53 struct codegen_src {
54 unsigned file;
55 unsigned type;
56 bool indirect;
57 unsigned indirect_subreg;
58 unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
59
60 unsigned vert_stride;
61 unsigned width;
62 unsigned horz_stride;
63
64 unsigned swizzle[4];
65 bool absolute;
66 bool negate;
67 } src[3];
68 };
69
70 /*
71 * From the Sandy Bridge PRM, volume 4 part 2, page 107-108:
72 *
73 * "(Src0Index) The 5-bit index for source 0. The 12-bit table-look-up
74 * result forms bits [88:77], the source 0 register region fields, of the
75 * 128-bit instruction word."
76 *
77 * "(SubRegIndex) The 5-bit index for sub-register fields. The 15-bit
78 * table-look-up result forms bits [100:96], [68,64] and [52,48] of the
79 * 128-bit instruction word."
80 *
81 * "(DataTypeIndex) The 5-bit index for data type fields. The 18-bit
82 * table-look-up result forms bits [63:61] and [46, 32] of the 128-bit
83 * instruction word."
84 *
85 * "(ControlIndex) The 5-bit index for data type fields. The 17-bit
86 * table-look-up result forms bits[31], and [23, 8] of the 128-bit
87 * instruction word."
88 */
89 static const struct toy_compaction_table toy_compaction_table_gen6 = {
90 .control = {
91 [0] = 0x00000, /* 00000000000000000 */
92 [1] = 0x08000, /* 01000000000000000 */
93 [2] = 0x06000, /* 00110000000000000 */
94 [3] = 0x00100, /* 00000000100000000 */
95 [4] = 0x02000, /* 00010000000000000 */
96 [5] = 0x01100, /* 00001000100000000 */
97 [6] = 0x00102, /* 00000000100000010 */
98 [7] = 0x00002, /* 00000000000000010 */
99 [8] = 0x08100, /* 01000000100000000 */
100 [9] = 0x0a000, /* 01010000000000000 */
101 [10] = 0x16000, /* 10110000000000000 */
102 [11] = 0x04000, /* 00100000000000000 */
103 [12] = 0x1a000, /* 11010000000000000 */
104 [13] = 0x18000, /* 11000000000000000 */
105 [14] = 0x09100, /* 01001000100000000 */
106 [15] = 0x08008, /* 01000000000001000 */
107 [16] = 0x08004, /* 01000000000000100 */
108 [17] = 0x00008, /* 00000000000001000 */
109 [18] = 0x00004, /* 00000000000000100 */
110 [19] = 0x01100, /* 00111000100000000 */
111 [20] = 0x01102, /* 00001000100000010 */
112 [21] = 0x06100, /* 00110000100000000 */
113 [22] = 0x06001, /* 00110000000000001 */
114 [23] = 0x04001, /* 00100000000000001 */
115 [24] = 0x06002, /* 00110000000000010 */
116 [25] = 0x06005, /* 00110000000000101 */
117 [26] = 0x06009, /* 00110000000001001 */
118 [27] = 0x06010, /* 00110000000010000 */
119 [28] = 0x06003, /* 00110000000000011 */
120 [29] = 0x06004, /* 00110000000000100 */
121 [30] = 0x06108, /* 00110000100001000 */
122 [31] = 0x04009, /* 00100000000001001 */
123 },
124 .datatype = {
125 [0] = 0x09c00, /* 001001110000000000 */
126 [1] = 0x08c20, /* 001000110000100000 */
127 [2] = 0x09c01, /* 001001110000000001 */
128 [3] = 0x08060, /* 001000000001100000 */
129 [4] = 0x0ad29, /* 001010110100101001 */
130 [5] = 0x081ad, /* 001000000110101101 */
131 [6] = 0x0c62c, /* 001100011000101100 */
132 [7] = 0x0bdad, /* 001011110110101101 */
133 [8] = 0x081ec, /* 001000000111101100 */
134 [9] = 0x08061, /* 001000000001100001 */
135 [10] = 0x08ca5, /* 001000110010100101 */
136 [11] = 0x08041, /* 001000000001000001 */
137 [12] = 0x08231, /* 001000001000110001 */
138 [13] = 0x08229, /* 001000001000101001 */
139 [14] = 0x08020, /* 001000000000100000 */
140 [15] = 0x08232, /* 001000001000110010 */
141 [16] = 0x0a529, /* 001010010100101001 */
142 [17] = 0x0b4a5, /* 001011010010100101 */
143 [18] = 0x081a5, /* 001000000110100101 */
144 [19] = 0x0c629, /* 001100011000101001 */
145 [20] = 0x0b62c, /* 001011011000101100 */
146 [21] = 0x0b5a5, /* 001011010110100101 */
147 [22] = 0x0bda5, /* 001011110110100101 */
148 [23] = 0x0f1bd, /* 001111011110111101 */
149 [24] = 0x0f1bc, /* 001111011110111100 */
150 [25] = 0x0f1bd, /* 001111011110111101 */
151 [26] = 0x0f19d, /* 001111011110011101 */
152 [27] = 0x0f1be, /* 001111011110111110 */
153 [28] = 0x08021, /* 001000000000100001 */
154 [29] = 0x08022, /* 001000000000100010 */
155 [30] = 0x09fdd, /* 001001111111011101 */
156 [31] = 0x083be, /* 001000001110111110 */
157 },
158 .subreg = {
159 [0] = 0x0000, /* 000000000000000 */
160 [1] = 0x0004, /* 000000000000100 */
161 [2] = 0x0180, /* 000000110000000 */
162 [3] = 0x1000, /* 111000000000000 */
163 [4] = 0x3c08, /* 011110000001000 */
164 [5] = 0x0400, /* 000010000000000 */
165 [6] = 0x0010, /* 000000000010000 */
166 [7] = 0x0c0c, /* 000110000001100 */
167 [8] = 0x1000, /* 001000000000000 */
168 [9] = 0x0200, /* 000001000000000 */
169 [10] = 0x0294, /* 000001010010100 */
170 [11] = 0x0056, /* 000000001010110 */
171 [12] = 0x2000, /* 010000000000000 */
172 [13] = 0x6000, /* 110000000000000 */
173 [14] = 0x0800, /* 000100000000000 */
174 [15] = 0x0080, /* 000000010000000 */
175 [16] = 0x0008, /* 000000000001000 */
176 [17] = 0x4000, /* 100000000000000 */
177 [18] = 0x0280, /* 000001010000000 */
178 [19] = 0x1400, /* 001010000000000 */
179 [20] = 0x1800, /* 001100000000000 */
180 [21] = 0x0054, /* 000000001010100 */
181 [22] = 0x5a94, /* 101101010010100 */
182 [23] = 0x2800, /* 010100000000000 */
183 [24] = 0x008f, /* 000000010001111 */
184 [25] = 0x3000, /* 011000000000000 */
185 [26] = 0x1c00, /* 111110000000000 */
186 [27] = 0x5000, /* 101000000000000 */
187 [28] = 0x000f, /* 000000000001111 */
188 [29] = 0x088f, /* 000100010001111 */
189 [30] = 0x108f, /* 001000010001111 */
190 [31] = 0x0c00, /* 000110000000000 */
191 },
192 .src = {
193 [0] = 0x000, /* 000000000000 */
194 [1] = 0x588, /* 010110001000 */
195 [2] = 0x468, /* 010001101000 */
196 [3] = 0x228, /* 001000101000 */
197 [4] = 0x690, /* 011010010000 */
198 [5] = 0x120, /* 000100100000 */
199 [6] = 0x46c, /* 010001101100 */
200 [7] = 0x510, /* 010101110000 */
201 [8] = 0x618, /* 011001111000 */
202 [9] = 0x328, /* 001100101000 */
203 [10] = 0x58c, /* 010110001100 */
204 [11] = 0x220, /* 001000100000 */
205 [12] = 0x58a, /* 010110001010 */
206 [13] = 0x002, /* 000000000010 */
207 [14] = 0x550, /* 010101010000 */
208 [15] = 0x568, /* 010101101000 */
209 [16] = 0xf4c, /* 111101001100 */
210 [17] = 0xf2c, /* 111100101100 */
211 [18] = 0x610, /* 011001110000 */
212 [19] = 0x589, /* 010110001001 */
213 [20] = 0x558, /* 010101011000 */
214 [21] = 0x348, /* 001101001000 */
215 [22] = 0x42c, /* 010000101100 */
216 [23] = 0x400, /* 010000000000 */
217 [24] = 0x310, /* 001101110000 */
218 [25] = 0x310, /* 001100010000 */
219 [26] = 0x300, /* 001100000000 */
220 [27] = 0x46a, /* 010001101010 */
221 [28] = 0x318, /* 001101111000 */
222 [29] = 0x010, /* 000001110000 */
223 [30] = 0x320, /* 001100100000 */
224 [31] = 0x350, /* 001101010000 */
225 },
226 };
227
228 /*
229 * From the Ivy Bridge PRM, volume 4 part 3, page 128:
230 *
231 * "(Src0Index) Lookup one of 32 12-bit values. That value is used (from
232 * MSB to LSB) for the Src0.AddrMode, Src0.ChanSel[7:4], Src0.HorzStride,
233 * Src0.SrcMod, Src0.VertStride, and Src0.Width bit fields."
234 *
235 * "(SubRegIndex) Lookup one of 32 15-bit values. That value is used (from
236 * MSB to LSB) for various fields for Src1, Src0, and Dst, including
237 * ChanEn/ChanSel, SubRegNum, and AddrImm[4] or AddrImm[4:0], depending
238 * on AddrMode and AccessMode.
239 *
240 * "(DataTypeIndex) Lookup one of 32 18-bit values. That value is used
241 * (from MSB to LSB) for the Dst.AddrMode, Dst.HorzStride, Dst.DstType,
242 * Dst.RegFile, Src0.SrcType, Src0.RegFile, Src1.SrcType, and
243 * Src1.RegType bit fields."
244 *
245 * "(ControlIndex) Lookup one of 32 19-bit values. That value is used
246 * (from MSB to LSB) for the FlagRegNum, FlagSubRegNum, Saturate,
247 * ExecSize, PredInv, PredCtrl, ThreadCtrl, QtrCtrl, DepCtrl, MaskCtrl,
248 * and AccessMode bit fields."
249 */
250 static const struct toy_compaction_table toy_compaction_table_gen7 = {
251 .control = {
252 [0] = 0x00002, /* 0000000000000000010 */
253 [1] = 0x04000, /* 0000100000000000000 */
254 [2] = 0x04001, /* 0000100000000000001 */
255 [3] = 0x04002, /* 0000100000000000010 */
256 [4] = 0x04003, /* 0000100000000000011 */
257 [5] = 0x04004, /* 0000100000000000100 */
258 [6] = 0x04005, /* 0000100000000000101 */
259 [7] = 0x04007, /* 0000100000000000111 */
260 [8] = 0x04008, /* 0000100000000001000 */
261 [9] = 0x04009, /* 0000100000000001001 */
262 [10] = 0x0400d, /* 0000100000000001101 */
263 [11] = 0x06000, /* 0000110000000000000 */
264 [12] = 0x06001, /* 0000110000000000001 */
265 [13] = 0x06002, /* 0000110000000000010 */
266 [14] = 0x06003, /* 0000110000000000011 */
267 [15] = 0x06004, /* 0000110000000000100 */
268 [16] = 0x06005, /* 0000110000000000101 */
269 [17] = 0x06007, /* 0000110000000000111 */
270 [18] = 0x06009, /* 0000110000000001001 */
271 [19] = 0x0600d, /* 0000110000000001101 */
272 [20] = 0x06010, /* 0000110000000010000 */
273 [21] = 0x06100, /* 0000110000100000000 */
274 [22] = 0x08000, /* 0001000000000000000 */
275 [23] = 0x08002, /* 0001000000000000010 */
276 [24] = 0x08004, /* 0001000000000000100 */
277 [25] = 0x08100, /* 0001000000100000000 */
278 [26] = 0x16000, /* 0010110000000000000 */
279 [27] = 0x16010, /* 0010110000000010000 */
280 [28] = 0x18000, /* 0011000000000000000 */
281 [29] = 0x18100, /* 0011000000100000000 */
282 [30] = 0x28000, /* 0101000000000000000 */
283 [31] = 0x28100, /* 0101000000100000000 */
284 },
285 .datatype = {
286 [0] = 0x08001, /* 001000000000000001 */
287 [1] = 0x08020, /* 001000000000100000 */
288 [2] = 0x08021, /* 001000000000100001 */
289 [3] = 0x08061, /* 001000000001100001 */
290 [4] = 0x080bd, /* 001000000010111101 */
291 [5] = 0x082fd, /* 001000001011111101 */
292 [6] = 0x083a1, /* 001000001110100001 */
293 [7] = 0x083a5, /* 001000001110100101 */
294 [8] = 0x083bd, /* 001000001110111101 */
295 [9] = 0x08421, /* 001000010000100001 */
296 [10] = 0x08c20, /* 001000110000100000 */
297 [11] = 0x08c21, /* 001000110000100001 */
298 [12] = 0x094a5, /* 001001010010100101 */
299 [13] = 0x09ca4, /* 001001110010100100 */
300 [14] = 0x09ca5, /* 001001110010100101 */
301 [15] = 0x0f3bd, /* 001111001110111101 */
302 [16] = 0x0f79d, /* 001111011110011101 */
303 [17] = 0x0f7bc, /* 001111011110111100 */
304 [18] = 0x0f7bd, /* 001111011110111101 */
305 [19] = 0x0ffbc, /* 001111111110111100 */
306 [20] = 0x0020c, /* 000000001000001100 */
307 [21] = 0x0803d, /* 001000000000111101 */
308 [22] = 0x080a5, /* 001000000010100101 */
309 [23] = 0x08420, /* 001000010000100000 */
310 [24] = 0x094a4, /* 001001010010100100 */
311 [25] = 0x09c84, /* 001001110010000100 */
312 [26] = 0x0a509, /* 001010010100001001 */
313 [27] = 0x0dfbd, /* 001101111110111101 */
314 [28] = 0x0ffbd, /* 001111111110111101 */
315 [29] = 0x0bdac, /* 001011110110101100 */
316 [30] = 0x0a528, /* 001010010100101000 */
317 [31] = 0x0ad28, /* 001010110100101000 */
318 },
319 .subreg = {
320 [0] = 0x0000, /* 000000000000000 */
321 [1] = 0x0001, /* 000000000000001 */
322 [2] = 0x0008, /* 000000000001000 */
323 [3] = 0x000f, /* 000000000001111 */
324 [4] = 0x0010, /* 000000000010000 */
325 [5] = 0x0080, /* 000000010000000 */
326 [6] = 0x0100, /* 000000100000000 */
327 [7] = 0x0180, /* 000000110000000 */
328 [8] = 0x0200, /* 000001000000000 */
329 [9] = 0x0210, /* 000001000010000 */
330 [10] = 0x0280, /* 000001010000000 */
331 [11] = 0x1000, /* 001000000000000 */
332 [12] = 0x1001, /* 001000000000001 */
333 [13] = 0x1081, /* 001000010000001 */
334 [14] = 0x1082, /* 001000010000010 */
335 [15] = 0x1083, /* 001000010000011 */
336 [16] = 0x1084, /* 001000010000100 */
337 [17] = 0x1087, /* 001000010000111 */
338 [18] = 0x1088, /* 001000010001000 */
339 [19] = 0x108e, /* 001000010001110 */
340 [20] = 0x108f, /* 001000010001111 */
341 [21] = 0x1180, /* 001000110000000 */
342 [22] = 0x11e8, /* 001000111101000 */
343 [23] = 0x2000, /* 010000000000000 */
344 [24] = 0x2180, /* 010000110000000 */
345 [25] = 0x3000, /* 011000000000000 */
346 [26] = 0x3c87, /* 011110010000111 */
347 [27] = 0x4000, /* 100000000000000 */
348 [28] = 0x5000, /* 101000000000000 */
349 [29] = 0x6000, /* 110000000000000 */
350 [30] = 0x7000, /* 111000000000000 */
351 [31] = 0x701c, /* 111000000011100 */
352 },
353 .src = {
354 [0] = 0x000, /* 000000000000 */
355 [1] = 0x002, /* 000000000010 */
356 [2] = 0x010, /* 000000010000 */
357 [3] = 0x012, /* 000000010010 */
358 [4] = 0x018, /* 000000011000 */
359 [5] = 0x020, /* 000000100000 */
360 [6] = 0x028, /* 000000101000 */
361 [7] = 0x048, /* 000001001000 */
362 [8] = 0x050, /* 000001010000 */
363 [9] = 0x070, /* 000001110000 */
364 [10] = 0x078, /* 000001111000 */
365 [11] = 0x300, /* 001100000000 */
366 [12] = 0x302, /* 001100000010 */
367 [13] = 0x308, /* 001100001000 */
368 [14] = 0x310, /* 001100010000 */
369 [15] = 0x312, /* 001100010010 */
370 [16] = 0x320, /* 001100100000 */
371 [17] = 0x328, /* 001100101000 */
372 [18] = 0x338, /* 001100111000 */
373 [19] = 0x340, /* 001101000000 */
374 [20] = 0x342, /* 001101000010 */
375 [21] = 0x348, /* 001101001000 */
376 [22] = 0x350, /* 001101010000 */
377 [23] = 0x360, /* 001101100000 */
378 [24] = 0x368, /* 001101101000 */
379 [25] = 0x370, /* 001101110000 */
380 [26] = 0x371, /* 001101110001 */
381 [27] = 0x378, /* 001101111000 */
382 [28] = 0x468, /* 010001101000 */
383 [29] = 0x469, /* 010001101001 */
384 [30] = 0x46a, /* 010001101010 */
385 [31] = 0x588, /* 010110001000 */
386 },
387 };
388
389 static const struct toy_compaction_table toy_compaction_table_gen8 = {
390 .control = {
391 },
392 .datatype = {
393 },
394 .subreg = {
395 },
396 .src = {
397 },
398 .control_3src = {
399 },
400 .source_3src = {
401 },
402 };
403
404 const struct toy_compaction_table *
toy_compiler_get_compaction_table(const struct ilo_dev * dev)405 toy_compiler_get_compaction_table(const struct ilo_dev *dev)
406 {
407 switch (ilo_dev_gen(dev)) {
408 case ILO_GEN(8):
409 return &toy_compaction_table_gen8;
410 case ILO_GEN(7.5):
411 case ILO_GEN(7):
412 return &toy_compaction_table_gen7;
413 case ILO_GEN(6):
414 return &toy_compaction_table_gen6;
415 default:
416 assert(!"unsupported gen");
417 return NULL;
418 }
419 }
420
421 /**
422 * Return true if the source operand is null.
423 */
424 static bool
src_is_null(const struct codegen * cg,int idx)425 src_is_null(const struct codegen *cg, int idx)
426 {
427 const struct codegen_src *src = &cg->src[idx];
428
429 return (src->file == GEN6_FILE_ARF &&
430 src->origin == GEN6_ARF_NULL << CG_REG_SHIFT);
431 }
432
433 /**
434 * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
435 */
436 static uint32_t
translate_src_gen6(const struct codegen * cg,int idx)437 translate_src_gen6(const struct codegen *cg, int idx)
438 {
439 const struct codegen_src *src = &cg->src[idx];
440 uint32_t dw;
441
442 ILO_DEV_ASSERT(cg->dev, 6, 8);
443
444 /* special treatment may be needed if any of the operand is immediate */
445 if (cg->src[0].file == GEN6_FILE_IMM) {
446 assert(!cg->src[0].absolute && !cg->src[0].negate);
447
448 /* only the last src operand can be an immediate unless it is Gen8+ */
449 assert(ilo_dev_gen(cg->dev) >= ILO_GEN(8) || src_is_null(cg, 1));
450
451 if (!src_is_null(cg, 1))
452 return cg->src[idx].origin;
453
454 if (idx == 0) {
455 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
456 return cg->src[1].type << 27 |
457 cg->src[1].file << 25;
458 } else {
459 return cg->flag_sub_reg_num << 25;
460 }
461 } else {
462 return cg->src[0].origin;
463 }
464 }
465 else if (idx && cg->src[1].file == GEN6_FILE_IMM) {
466 assert(!cg->src[1].absolute && !cg->src[1].negate);
467 return cg->src[1].origin;
468 }
469
470 assert(src->file != GEN6_FILE_IMM);
471
472 if (src->indirect) {
473 const int offset = (int) src->origin;
474
475 assert(src->file == GEN6_FILE_GRF);
476 assert(offset < 512 && offset >= -512);
477
478 if (cg->inst->access_mode == GEN6_ALIGN_16) {
479 assert(src->width == GEN6_WIDTH_4);
480 assert(src->horz_stride == GEN6_HORZSTRIDE_1);
481
482 /* the lower 4 bits are reserved for the swizzle_[xy] */
483 assert(!(src->origin & 0xf));
484
485 dw = src->vert_stride << 21 |
486 src->swizzle[3] << 18 |
487 src->swizzle[2] << 16 |
488 GEN6_ADDRMODE_INDIRECT << 15 |
489 src->negate << 14 |
490 src->absolute << 13 |
491 src->swizzle[1] << 2 |
492 src->swizzle[0];
493 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
494 dw |= src->indirect_subreg << 9 |
495 (src->origin & 0x1f0);
496 } else {
497 dw |= src->indirect_subreg << 10 |
498 (src->origin & 0x3f0);
499 }
500 }
501 else {
502 assert(src->swizzle[0] == TOY_SWIZZLE_X &&
503 src->swizzle[1] == TOY_SWIZZLE_Y &&
504 src->swizzle[2] == TOY_SWIZZLE_Z &&
505 src->swizzle[3] == TOY_SWIZZLE_W);
506
507 dw = src->vert_stride << 21 |
508 src->width << 18 |
509 src->horz_stride << 16 |
510 GEN6_ADDRMODE_INDIRECT << 15 |
511 src->negate << 14 |
512 src->absolute << 13;
513 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
514 dw |= src->indirect_subreg << 9 |
515 (src->origin & 0x1ff);
516 } else {
517 dw |= src->indirect_subreg << 10 |
518 (src->origin & 0x3ff);
519 }
520 }
521 }
522 else {
523 switch (src->file) {
524 case GEN6_FILE_ARF:
525 break;
526 case GEN6_FILE_GRF:
527 assert(CG_REG_NUM(src->origin) < 128);
528 break;
529 case GEN6_FILE_MRF:
530 assert(cg->inst->opcode == GEN6_OPCODE_SEND ||
531 cg->inst->opcode == GEN6_OPCODE_SENDC);
532 assert(CG_REG_NUM(src->origin) < 16);
533 break;
534 case GEN6_FILE_IMM:
535 default:
536 assert(!"invalid src file");
537 break;
538 }
539
540 if (cg->inst->access_mode == GEN6_ALIGN_16) {
541 assert(src->width == GEN6_WIDTH_4);
542 assert(src->horz_stride == GEN6_HORZSTRIDE_1);
543
544 /* the lower 4 bits are reserved for the swizzle_[xy] */
545 assert(!(src->origin & 0xf));
546
547 dw = src->vert_stride << 21 |
548 src->swizzle[3] << 18 |
549 src->swizzle[2] << 16 |
550 GEN6_ADDRMODE_DIRECT << 15 |
551 src->negate << 14 |
552 src->absolute << 13 |
553 src->origin |
554 src->swizzle[1] << 2 |
555 src->swizzle[0];
556 }
557 else {
558 assert(src->swizzle[0] == TOY_SWIZZLE_X &&
559 src->swizzle[1] == TOY_SWIZZLE_Y &&
560 src->swizzle[2] == TOY_SWIZZLE_Z &&
561 src->swizzle[3] == TOY_SWIZZLE_W);
562
563 dw = src->vert_stride << 21 |
564 src->width << 18 |
565 src->horz_stride << 16 |
566 GEN6_ADDRMODE_DIRECT << 15 |
567 src->negate << 14 |
568 src->absolute << 13 |
569 src->origin;
570 }
571 }
572
573 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
574 const bool indirect_origin_bit9 = (cg->dst.indirect) ?
575 (src->origin & 0x200) : 0;
576
577 if (idx == 0) {
578 dw |= indirect_origin_bit9 << 31 |
579 cg->src[1].type << 27 |
580 cg->src[1].file << 25;
581 } else {
582 dw |= indirect_origin_bit9 << 25;
583 }
584 } else {
585 if (idx == 0)
586 dw |= cg->flag_sub_reg_num << 25;
587 }
588
589 return dw;
590 }
591
592 /**
593 * Translate the destination operand to the higher 16 bits of DW1 of the
594 * 1-src/2-src format.
595 */
596 static uint16_t
translate_dst_region_gen6(const struct codegen * cg)597 translate_dst_region_gen6(const struct codegen *cg)
598 {
599 const struct codegen_dst *dst = &cg->dst;
600 uint16_t dw1_region;
601
602 ILO_DEV_ASSERT(cg->dev, 6, 8);
603
604 if (dst->file == GEN6_FILE_IMM) {
605 /* dst is immediate (JIP) when the opcode is a conditional branch */
606 switch (cg->inst->opcode) {
607 case GEN6_OPCODE_IF:
608 case GEN6_OPCODE_ELSE:
609 case GEN6_OPCODE_ENDIF:
610 case GEN6_OPCODE_WHILE:
611 assert(dst->type == GEN6_TYPE_W);
612 dw1_region = (dst->origin & 0xffff);
613 break;
614 default:
615 assert(!"dst cannot be immediate");
616 dw1_region = 0;
617 break;
618 }
619
620 return dw1_region;
621 }
622
623 if (dst->indirect) {
624 const int offset = (int) dst->origin;
625
626 assert(dst->file == GEN6_FILE_GRF);
627 assert(offset < 512 && offset >= -512);
628
629 if (cg->inst->access_mode == GEN6_ALIGN_16) {
630 /*
631 * From the Sandy Bridge PRM, volume 4 part 2, page 144:
632 *
633 * "Allthough Dst.HorzStride is a don't care for Align16, HW
634 * needs this to be programmed as 01."
635 */
636 assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
637 /* the lower 4 bits are reserved for the writemask */
638 assert(!(dst->origin & 0xf));
639
640 dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
641 dst->horz_stride << 13 |
642 dst->writemask;
643 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
644 dw1_region |= dst->indirect_subreg << 9 |
645 (dst->origin & 0x1f0);
646 } else {
647 dw1_region |= dst->indirect_subreg << 10 |
648 (dst->origin & 0x3f0);
649 }
650 }
651 else {
652 assert(dst->writemask == TOY_WRITEMASK_XYZW);
653
654 dw1_region = GEN6_ADDRMODE_INDIRECT << 15 |
655 dst->horz_stride << 13;
656 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
657 dw1_region |= dst->indirect_subreg << 9 |
658 (dst->origin & 0x1ff);
659 } else {
660 dw1_region |= dst->indirect_subreg << 10 |
661 (dst->origin & 0x3ff);
662 }
663 }
664 }
665 else {
666 assert((dst->file == GEN6_FILE_GRF &&
667 CG_REG_NUM(dst->origin) < 128) ||
668 (dst->file == GEN6_FILE_MRF &&
669 CG_REG_NUM(dst->origin) < 16) ||
670 (dst->file == GEN6_FILE_ARF));
671
672 if (cg->inst->access_mode == GEN6_ALIGN_16) {
673 /* similar to the indirect case */
674 assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
675 assert(!(dst->origin & 0xf));
676
677 dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
678 dst->horz_stride << 13 |
679 dst->origin |
680 dst->writemask;
681 }
682 else {
683 assert(dst->writemask == TOY_WRITEMASK_XYZW);
684
685 dw1_region = GEN6_ADDRMODE_DIRECT << 15 |
686 dst->horz_stride << 13 |
687 dst->origin;
688 }
689 }
690
691 return dw1_region;
692 }
693
694 /**
695 * Translate the destination operand to DW1 of the 1-src/2-src format.
696 */
697 static uint32_t
translate_dst_gen6(const struct codegen * cg)698 translate_dst_gen6(const struct codegen *cg)
699 {
700 ILO_DEV_ASSERT(cg->dev, 6, 7.5);
701
702 return translate_dst_region_gen6(cg) << 16 |
703 cg->src[1].type << 12 |
704 cg->src[1].file << 10 |
705 cg->src[0].type << 7 |
706 cg->src[0].file << 5 |
707 cg->dst.type << 2 |
708 cg->dst.file;
709 }
710
711 static uint32_t
translate_dst_gen8(const struct codegen * cg)712 translate_dst_gen8(const struct codegen *cg)
713 {
714 const bool indirect_origin_bit9 = (cg->dst.indirect) ?
715 (cg->dst.origin & 0x200) : 0;
716
717 ILO_DEV_ASSERT(cg->dev, 8, 8);
718
719 return translate_dst_region_gen6(cg) << 16 |
720 indirect_origin_bit9 << 15 |
721 cg->src[0].type << 11 |
722 cg->src[0].file << 9 |
723 cg->dst.type << 5 |
724 cg->dst.file << 3 |
725 cg->inst->mask_ctrl << 2 |
726 cg->flag_reg_num << 1 |
727 cg->flag_sub_reg_num;
728 }
729
730 /**
731 * Translate the instruction to DW0 of the 1-src/2-src format.
732 */
733 static uint32_t
translate_inst_gen6(const struct codegen * cg)734 translate_inst_gen6(const struct codegen *cg)
735 {
736 const bool debug_ctrl = false;
737 const bool cmpt_ctrl = false;
738
739 ILO_DEV_ASSERT(cg->dev, 6, 7.5);
740
741 assert(cg->inst->opcode < 128);
742
743 return cg->inst->saturate << 31 |
744 debug_ctrl << 30 |
745 cmpt_ctrl << 29 |
746 cg->inst->acc_wr_ctrl << 28 |
747 cg->inst->cond_modifier << 24 |
748 cg->inst->exec_size << 21 |
749 cg->inst->pred_inv << 20 |
750 cg->inst->pred_ctrl << 16 |
751 cg->inst->thread_ctrl << 14 |
752 cg->inst->qtr_ctrl << 12 |
753 cg->inst->dep_ctrl << 10 |
754 cg->inst->mask_ctrl << 9 |
755 cg->inst->access_mode << 8 |
756 cg->inst->opcode;
757 }
758
759 static uint32_t
translate_inst_gen8(const struct codegen * cg)760 translate_inst_gen8(const struct codegen *cg)
761 {
762 const bool debug_ctrl = false;
763 const bool cmpt_ctrl = false;
764
765 ILO_DEV_ASSERT(cg->dev, 8, 8);
766
767 assert(cg->inst->opcode < 128);
768
769 return cg->inst->saturate << 31 |
770 debug_ctrl << 30 |
771 cmpt_ctrl << 29 |
772 cg->inst->acc_wr_ctrl << 28 |
773 cg->inst->cond_modifier << 24 |
774 cg->inst->exec_size << 21 |
775 cg->inst->pred_inv << 20 |
776 cg->inst->pred_ctrl << 16 |
777 cg->inst->thread_ctrl << 14 |
778 cg->inst->qtr_ctrl << 12 |
779 cg->inst->dep_ctrl << 9 |
780 cg->inst->access_mode << 8 |
781 cg->inst->opcode;
782 }
783
784 /**
785 * Codegen an instruction in 1-src/2-src format.
786 */
787 static void
codegen_inst_gen6(const struct codegen * cg,uint32_t * code)788 codegen_inst_gen6(const struct codegen *cg, uint32_t *code)
789 {
790 ILO_DEV_ASSERT(cg->dev, 6, 8);
791
792 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
793 code[0] = translate_inst_gen8(cg);
794 code[1] = translate_dst_gen8(cg);
795 } else {
796 code[0] = translate_inst_gen6(cg);
797 code[1] = translate_dst_gen6(cg);
798 }
799
800 code[2] = translate_src_gen6(cg, 0);
801 code[3] = translate_src_gen6(cg, 1);
802 assert(src_is_null(cg, 2));
803 }
804
805 /**
806 * Codegen an instruction in 3-src format.
807 */
808 static void
codegen_inst_3src_gen6(const struct codegen * cg,uint32_t * code)809 codegen_inst_3src_gen6(const struct codegen *cg, uint32_t *code)
810 {
811 const struct codegen_dst *dst = &cg->dst;
812 uint32_t dw0, dw1, dw_src[3];
813 int i;
814
815 ILO_DEV_ASSERT(cg->dev, 6, 8);
816
817 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8))
818 dw0 = translate_inst_gen8(cg);
819 else
820 dw0 = translate_inst_gen6(cg);
821
822 /*
823 * 3-src instruction restrictions
824 *
825 * - align16 with direct addressing
826 * - GRF or MRF dst
827 * - GRF src
828 * - sub_reg_num is DWORD aligned
829 * - no regioning except replication control
830 * (vert_stride == 0 && horz_stride == 0)
831 */
832 assert(cg->inst->access_mode == GEN6_ALIGN_16);
833
834 assert(!dst->indirect);
835 assert((dst->file == GEN6_FILE_GRF && CG_REG_NUM(dst->origin) < 128) ||
836 (dst->file == GEN6_FILE_MRF && CG_REG_NUM(dst->origin) < 16));
837 assert(!(dst->origin & 0x3));
838 assert(dst->horz_stride == GEN6_HORZSTRIDE_1);
839
840 if (ilo_dev_gen(cg->dev) >= ILO_GEN(8)) {
841 dw1 = dst->origin << 19 |
842 dst->writemask << 17 |
843 cg->src[2].negate << 10 |
844 cg->src[2].negate << 10 |
845 cg->src[2].absolute << 9 |
846 cg->src[1].negate << 8 |
847 cg->src[1].absolute << 7 |
848 cg->src[0].negate << 6 |
849 cg->src[0].absolute << 5 |
850 cg->inst->mask_ctrl << 2 |
851 cg->flag_reg_num << 1 |
852 cg->flag_sub_reg_num;
853 } else {
854 dw1 = dst->origin << 19 |
855 dst->writemask << 17 |
856 cg->src[2].negate << 9 |
857 cg->src[2].absolute << 8 |
858 cg->src[1].negate << 7 |
859 cg->src[1].absolute << 6 |
860 cg->src[0].negate << 5 |
861 cg->src[0].absolute << 4 |
862 cg->flag_sub_reg_num << 1 |
863 (dst->file == GEN6_FILE_MRF);
864 }
865
866 for (i = 0; i < 3; i++) {
867 const struct codegen_src *src = &cg->src[i];
868
869 assert(!src->indirect);
870 assert(src->file == GEN6_FILE_GRF && CG_REG_NUM(src->origin) < 128);
871 assert(!(src->origin & 0x3));
872
873 assert((src->vert_stride == GEN6_VERTSTRIDE_4 &&
874 src->horz_stride == GEN6_HORZSTRIDE_1) ||
875 (src->vert_stride == GEN6_VERTSTRIDE_0 &&
876 src->horz_stride == GEN6_HORZSTRIDE_0));
877 assert(src->width == GEN6_WIDTH_4);
878
879 dw_src[i] = src->origin << 7 |
880 src->swizzle[3] << 7 |
881 src->swizzle[2] << 5 |
882 src->swizzle[1] << 3 |
883 src->swizzle[0] << 1 |
884 (src->vert_stride == GEN6_VERTSTRIDE_0 &&
885 src->horz_stride == GEN6_HORZSTRIDE_0);
886
887 /* only the lower 20 bits are used */
888 assert((dw_src[i] & 0xfffff) == dw_src[i]);
889 }
890
891 code[0] = dw0;
892 code[1] = dw1;
893 /* concatenate the bits of dw_src */
894 code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
895 code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
896 }
897
898 /**
899 * Sanity check the region parameters of the operands.
900 */
901 static void
codegen_validate_region_restrictions(const struct codegen * cg)902 codegen_validate_region_restrictions(const struct codegen *cg)
903 {
904 const int exec_size_map[] = {
905 [GEN6_EXECSIZE_1] = 1,
906 [GEN6_EXECSIZE_2] = 2,
907 [GEN6_EXECSIZE_4] = 4,
908 [GEN6_EXECSIZE_8] = 8,
909 [GEN6_EXECSIZE_16] = 16,
910 [GEN6_EXECSIZE_32] = 32,
911 };
912 const int width_map[] = {
913 [GEN6_WIDTH_1] = 1,
914 [GEN6_WIDTH_2] = 2,
915 [GEN6_WIDTH_4] = 4,
916 [GEN6_WIDTH_8] = 8,
917 [GEN6_WIDTH_16] = 16,
918 };
919 const int horz_stride_map[] = {
920 [GEN6_HORZSTRIDE_0] = 0,
921 [GEN6_HORZSTRIDE_1] = 1,
922 [GEN6_HORZSTRIDE_2] = 2,
923 [GEN6_HORZSTRIDE_4] = 4,
924 };
925 const int vert_stride_map[] = {
926 [GEN6_VERTSTRIDE_0] = 0,
927 [GEN6_VERTSTRIDE_1] = 1,
928 [GEN6_VERTSTRIDE_2] = 2,
929 [GEN6_VERTSTRIDE_4] = 4,
930 [GEN6_VERTSTRIDE_8] = 8,
931 [GEN6_VERTSTRIDE_16] = 16,
932 [GEN6_VERTSTRIDE_32] = 32,
933 [7] = 64,
934 [8] = 128,
935 [9] = 256,
936 [GEN6_VERTSTRIDE_VXH] = 0,
937 };
938 const int exec_size = exec_size_map[cg->inst->exec_size];
939 int i;
940
941 /* Sandy Bridge PRM, volume 4 part 2, page 94 */
942
943 /* 1. (we don't do 32 anyway) */
944 assert(exec_size <= 16);
945
946 for (i = 0; i < ARRAY_SIZE(cg->src); i++) {
947 const int width = width_map[cg->src[i].width];
948 const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
949 const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
950
951 if (src_is_null(cg, i))
952 break;
953
954 /* 3. */
955 assert(exec_size >= width);
956
957 if (exec_size == width) {
958 /* 4. & 5. */
959 if (horz_stride)
960 assert(vert_stride == width * horz_stride);
961 }
962
963 if (width == 1) {
964 /* 6. */
965 assert(horz_stride == 0);
966
967 /* 7. */
968 if (exec_size == 1)
969 assert(vert_stride == 0);
970 }
971
972 /* 8. */
973 if (!vert_stride && !horz_stride)
974 assert(width == 1);
975 }
976
977 /* derived from 10.1.2. & 10.2. */
978 assert(cg->dst.horz_stride != GEN6_HORZSTRIDE_0);
979 }
980
981 static unsigned
translate_vfile(enum toy_file file)982 translate_vfile(enum toy_file file)
983 {
984 switch (file) {
985 case TOY_FILE_ARF: return GEN6_FILE_ARF;
986 case TOY_FILE_GRF: return GEN6_FILE_GRF;
987 case TOY_FILE_MRF: return GEN6_FILE_MRF;
988 case TOY_FILE_IMM: return GEN6_FILE_IMM;
989 default:
990 assert(!"unhandled toy file");
991 return GEN6_FILE_GRF;
992 }
993 }
994
995 static unsigned
translate_vtype(enum toy_type type)996 translate_vtype(enum toy_type type)
997 {
998 switch (type) {
999 case TOY_TYPE_F: return GEN6_TYPE_F;
1000 case TOY_TYPE_D: return GEN6_TYPE_D;
1001 case TOY_TYPE_UD: return GEN6_TYPE_UD;
1002 case TOY_TYPE_W: return GEN6_TYPE_W;
1003 case TOY_TYPE_UW: return GEN6_TYPE_UW;
1004 case TOY_TYPE_V: return GEN6_TYPE_V_IMM;
1005 default:
1006 assert(!"unhandled toy type");
1007 return GEN6_TYPE_F;
1008 }
1009 }
1010
1011 static unsigned
translate_writemask(enum toy_writemask writemask)1012 translate_writemask(enum toy_writemask writemask)
1013 {
1014 /* TOY_WRITEMASK_* are compatible with the hardware definitions */
1015 assert(writemask <= 0xf);
1016 return writemask;
1017 }
1018
1019 static unsigned
translate_swizzle(enum toy_swizzle swizzle)1020 translate_swizzle(enum toy_swizzle swizzle)
1021 {
1022 /* TOY_SWIZZLE_* are compatible with the hardware definitions */
1023 assert(swizzle <= 3);
1024 return swizzle;
1025 }
1026
1027 /**
1028 * Prepare for generating an instruction.
1029 */
1030 static void
codegen_prepare(struct codegen * cg,const struct ilo_dev * dev,const struct toy_inst * inst,int pc,int rect_linear_width)1031 codegen_prepare(struct codegen *cg, const struct ilo_dev *dev,
1032 const struct toy_inst *inst, int pc, int rect_linear_width)
1033 {
1034 int i;
1035
1036 cg->dev = dev;
1037 cg->inst = inst;
1038 cg->pc = pc;
1039
1040 cg->flag_reg_num = 0;
1041 cg->flag_sub_reg_num = 0;
1042
1043 cg->dst.file = translate_vfile(inst->dst.file);
1044 cg->dst.type = translate_vtype(inst->dst.type);
1045 cg->dst.indirect = inst->dst.indirect;
1046 cg->dst.indirect_subreg = inst->dst.indirect_subreg;
1047 cg->dst.origin = inst->dst.val32;
1048
1049 /*
1050 * From the Sandy Bridge PRM, volume 4 part 2, page 81:
1051 *
1052 * "For a word or an unsigned word immediate data, software must
1053 * replicate the same 16-bit immediate value to both the lower word
1054 * and the high word of the 32-bit immediate field in an instruction."
1055 */
1056 if (inst->dst.file == TOY_FILE_IMM) {
1057 switch (inst->dst.type) {
1058 case TOY_TYPE_W:
1059 case TOY_TYPE_UW:
1060 cg->dst.origin &= 0xffff;
1061 cg->dst.origin |= cg->dst.origin << 16;
1062 break;
1063 default:
1064 break;
1065 }
1066 }
1067
1068 cg->dst.writemask = translate_writemask(inst->dst.writemask);
1069
1070 switch (inst->dst.rect) {
1071 case TOY_RECT_LINEAR:
1072 cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
1073 break;
1074 default:
1075 assert(!"unsupported dst region");
1076 cg->dst.horz_stride = GEN6_HORZSTRIDE_1;
1077 break;
1078 }
1079
1080 for (i = 0; i < ARRAY_SIZE(cg->src); i++) {
1081 struct codegen_src *src = &cg->src[i];
1082
1083 src->file = translate_vfile(inst->src[i].file);
1084 src->type = translate_vtype(inst->src[i].type);
1085 src->indirect = inst->src[i].indirect;
1086 src->indirect_subreg = inst->src[i].indirect_subreg;
1087 src->origin = inst->src[i].val32;
1088
1089 /* do the same for src */
1090 if (inst->dst.file == TOY_FILE_IMM) {
1091 switch (inst->src[i].type) {
1092 case TOY_TYPE_W:
1093 case TOY_TYPE_UW:
1094 src->origin &= 0xffff;
1095 src->origin |= src->origin << 16;
1096 break;
1097 default:
1098 break;
1099 }
1100 }
1101
1102 src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
1103 src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
1104 src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
1105 src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
1106 src->absolute = inst->src[i].absolute;
1107 src->negate = inst->src[i].negate;
1108
1109 switch (inst->src[i].rect) {
1110 case TOY_RECT_LINEAR:
1111 switch (rect_linear_width) {
1112 case 1:
1113 src->vert_stride = GEN6_VERTSTRIDE_1;
1114 src->width = GEN6_WIDTH_1;
1115 break;
1116 case 2:
1117 src->vert_stride = GEN6_VERTSTRIDE_2;
1118 src->width = GEN6_WIDTH_2;
1119 break;
1120 case 4:
1121 src->vert_stride = GEN6_VERTSTRIDE_4;
1122 src->width = GEN6_WIDTH_4;
1123 break;
1124 case 8:
1125 src->vert_stride = GEN6_VERTSTRIDE_8;
1126 src->width = GEN6_WIDTH_8;
1127 break;
1128 case 16:
1129 src->vert_stride = GEN6_VERTSTRIDE_16;
1130 src->width = GEN6_WIDTH_16;
1131 break;
1132 default:
1133 assert(!"unsupported TOY_RECT_LINEAR width");
1134 src->vert_stride = GEN6_VERTSTRIDE_1;
1135 src->width = GEN6_WIDTH_1;
1136 break;
1137 }
1138 src->horz_stride = GEN6_HORZSTRIDE_1;
1139 break;
1140 case TOY_RECT_041:
1141 src->vert_stride = GEN6_VERTSTRIDE_0;
1142 src->width = GEN6_WIDTH_4;
1143 src->horz_stride = GEN6_HORZSTRIDE_1;
1144 break;
1145 case TOY_RECT_010:
1146 src->vert_stride = GEN6_VERTSTRIDE_0;
1147 src->width = GEN6_WIDTH_1;
1148 src->horz_stride = GEN6_HORZSTRIDE_0;
1149 break;
1150 case TOY_RECT_220:
1151 src->vert_stride = GEN6_VERTSTRIDE_2;
1152 src->width = GEN6_WIDTH_2;
1153 src->horz_stride = GEN6_HORZSTRIDE_0;
1154 break;
1155 case TOY_RECT_440:
1156 src->vert_stride = GEN6_VERTSTRIDE_4;
1157 src->width = GEN6_WIDTH_4;
1158 src->horz_stride = GEN6_HORZSTRIDE_0;
1159 break;
1160 case TOY_RECT_240:
1161 src->vert_stride = GEN6_VERTSTRIDE_2;
1162 src->width = GEN6_WIDTH_4;
1163 src->horz_stride = GEN6_HORZSTRIDE_0;
1164 break;
1165 default:
1166 assert(!"unsupported src region");
1167 src->vert_stride = GEN6_VERTSTRIDE_1;
1168 src->width = GEN6_WIDTH_1;
1169 src->horz_stride = GEN6_HORZSTRIDE_1;
1170 break;
1171 }
1172 }
1173 }
1174
1175 /**
1176 * Generate HW shader code. The instructions should have been legalized.
1177 */
1178 void *
toy_compiler_assemble(struct toy_compiler * tc,int * size)1179 toy_compiler_assemble(struct toy_compiler *tc, int *size)
1180 {
1181 const struct toy_inst *inst;
1182 uint32_t *code;
1183 int pc;
1184
1185 code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
1186 if (!code)
1187 return NULL;
1188
1189 pc = 0;
1190 tc_head(tc);
1191 while ((inst = tc_next(tc)) != NULL) {
1192 uint32_t *dw = &code[pc * 4];
1193 struct codegen cg;
1194
1195 if (pc >= tc->num_instructions) {
1196 tc_fail(tc, "wrong instructoun count");
1197 break;
1198 }
1199
1200 codegen_prepare(&cg, tc->dev, inst, pc, tc->rect_linear_width);
1201 codegen_validate_region_restrictions(&cg);
1202
1203 switch (inst->opcode) {
1204 case GEN6_OPCODE_MAD:
1205 codegen_inst_3src_gen6(&cg, dw);
1206 break;
1207 default:
1208 codegen_inst_gen6(&cg, dw);
1209 break;
1210 }
1211
1212 pc++;
1213 }
1214
1215 /* never return an invalid kernel */
1216 if (tc->fail) {
1217 FREE(code);
1218 return NULL;
1219 }
1220
1221 if (size)
1222 *size = pc * 4 * sizeof(uint32_t);
1223
1224 return code;
1225 }
1226