1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
31 #include "toy_tgsi.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_shader_internal.h"
36
37 struct fs_compile_context {
38 struct ilo_shader *shader;
39 const struct ilo_shader_variant *variant;
40
41 struct toy_compiler tc;
42 struct toy_tgsi tgsi;
43
44 int const_cache;
45 int dispatch_mode;
46
47 struct {
48 int interp_perspective_pixel;
49 int interp_perspective_centroid;
50 int interp_perspective_sample;
51 int interp_nonperspective_pixel;
52 int interp_nonperspective_centroid;
53 int interp_nonperspective_sample;
54 int source_depth;
55 int source_w;
56 int pos_offset;
57 } payloads[2];
58
59 int first_const_grf;
60 int first_attr_grf;
61 int first_free_grf;
62 int last_free_grf;
63
64 int num_grf_per_vrf;
65
66 int first_free_mrf;
67 int last_free_mrf;
68 };
69
70 static void
fetch_position(struct fs_compile_context * fcc,struct toy_dst dst)71 fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
72 {
73 struct toy_compiler *tc = &fcc->tc;
74 const struct toy_src src_z =
75 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
76 const struct toy_src src_w =
77 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
78 const int fb_height =
79 (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
80 const bool origin_upper_left =
81 (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
82 const bool pixel_center_integer =
83 (fcc->tgsi.props.fs_coord_pixel_center ==
84 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
85 struct toy_src subspan_x, subspan_y;
86 struct toy_dst tmp, tmp_uw;
87 struct toy_dst real_dst[4];
88
89 tdst_transpose(dst, real_dst);
90
91 subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
92 subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
93
94 subspan_y = tsrc_offset(subspan_x, 0, 1);
95
96 tmp_uw = tdst_uw(tc_alloc_tmp(tc));
97 tmp = tc_alloc_tmp(tc);
98
99 /* X */
100 tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
101 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
102 if (pixel_center_integer)
103 tc_MOV(tc, real_dst[0], tsrc_from(tmp));
104 else
105 tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
106
107 /* Y */
108 tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
109 tc_MOV(tc, tmp, tsrc_from(tmp_uw));
110 if (origin_upper_left && pixel_center_integer) {
111 tc_MOV(tc, real_dst[1], tsrc_from(tmp));
112 }
113 else {
114 struct toy_src y = tsrc_from(tmp);
115 float offset = 0.0f;
116
117 if (!pixel_center_integer)
118 offset += 0.5f;
119
120 if (!origin_upper_left) {
121 offset += (float) (fb_height - 1);
122 y = tsrc_negate(y);
123 }
124
125 tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
126 }
127
128 /* Z and W */
129 tc_MOV(tc, real_dst[2], src_z);
130 tc_INV(tc, real_dst[3], src_w);
131 }
132
133 static void
fetch_face(struct fs_compile_context * fcc,struct toy_dst dst)134 fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
135 {
136 struct toy_compiler *tc = &fcc->tc;
137 const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
138 struct toy_dst tmp_f, tmp;
139 struct toy_dst real_dst[4];
140
141 tdst_transpose(dst, real_dst);
142
143 tmp_f = tc_alloc_tmp(tc);
144 tmp = tdst_d(tmp_f);
145 tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
146 tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
147 tc_MOV(tc, tmp_f, tsrc_from(tmp));
148
149 /* convert to 1.0 and -1.0 */
150 tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
151 tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
152
153 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
154 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
155 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
156 }
157
158 static void
fetch_attr(struct fs_compile_context * fcc,struct toy_dst dst,int slot)159 fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
160 {
161 struct toy_compiler *tc = &fcc->tc;
162 struct toy_dst real_dst[4];
163 bool is_const = false;
164 int grf, interp, ch;
165
166 tdst_transpose(dst, real_dst);
167
168 grf = fcc->first_attr_grf + slot * 2;
169
170 switch (fcc->tgsi.inputs[slot].interp) {
171 case TGSI_INTERPOLATE_CONSTANT:
172 is_const = true;
173 break;
174 case TGSI_INTERPOLATE_LINEAR:
175 if (fcc->tgsi.inputs[slot].centroid)
176 interp = fcc->payloads[0].interp_nonperspective_centroid;
177 else
178 interp = fcc->payloads[0].interp_nonperspective_pixel;
179 break;
180 case TGSI_INTERPOLATE_COLOR:
181 if (fcc->variant->u.fs.flatshade) {
182 is_const = true;
183 break;
184 }
185 /* fall through */
186 case TGSI_INTERPOLATE_PERSPECTIVE:
187 if (fcc->tgsi.inputs[slot].centroid)
188 interp = fcc->payloads[0].interp_perspective_centroid;
189 else
190 interp = fcc->payloads[0].interp_perspective_pixel;
191 break;
192 default:
193 assert(!"unexpected FS interpolation");
194 interp = fcc->payloads[0].interp_perspective_pixel;
195 break;
196 }
197
198 if (is_const) {
199 struct toy_src a0[4];
200
201 a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
202 a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
203 a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
204 a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
205
206 for (ch = 0; ch < 4; ch++)
207 tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
208 }
209 else {
210 struct toy_src attr[4], uv;
211
212 attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
213 attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
214 attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
215 attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
216
217 uv = tsrc(TOY_FILE_GRF, interp, 0);
218
219 for (ch = 0; ch < 4; ch++) {
220 tc_add2(tc, GEN6_OPCODE_PLN, real_dst[ch],
221 tsrc_rect(attr[ch], TOY_RECT_010), uv);
222 }
223 }
224
225 if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
226 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
227 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
228 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
229 }
230 }
231
232 static void
fs_lower_opcode_tgsi_in(struct fs_compile_context * fcc,struct toy_dst dst,int dim,int idx)233 fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
234 struct toy_dst dst, int dim, int idx)
235 {
236 int slot;
237
238 assert(!dim);
239
240 slot = toy_tgsi_find_input(&fcc->tgsi, idx);
241 if (slot < 0)
242 return;
243
244 switch (fcc->tgsi.inputs[slot].semantic_name) {
245 case TGSI_SEMANTIC_POSITION:
246 fetch_position(fcc, dst);
247 break;
248 case TGSI_SEMANTIC_FACE:
249 fetch_face(fcc, dst);
250 break;
251 default:
252 fetch_attr(fcc, dst, slot);
253 break;
254 }
255 }
256
257 static void
fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)258 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
259 struct toy_dst dst, int dim,
260 struct toy_src idx)
261 {
262 const struct toy_dst offset =
263 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
264 struct toy_compiler *tc = &fcc->tc;
265 unsigned simd_mode, param_size;
266 struct toy_inst *inst;
267 struct toy_src desc, real_src[4];
268 struct toy_dst tmp, real_dst[4];
269 unsigned i;
270
271 tsrc_transpose(idx, real_src);
272
273 /* set offset */
274 inst = tc_MOV(tc, offset, real_src[0]);
275 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
276
277 switch (inst->exec_size) {
278 case GEN6_EXECSIZE_8:
279 simd_mode = GEN6_MSG_SAMPLER_SIMD8;
280 param_size = 1;
281 break;
282 case GEN6_EXECSIZE_16:
283 simd_mode = GEN6_MSG_SAMPLER_SIMD16;
284 param_size = 2;
285 break;
286 default:
287 assert(!"unsupported execution size");
288 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
289 return;
290 break;
291 }
292
293 desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
294 simd_mode,
295 GEN6_MSG_SAMPLER_LD,
296 0,
297 fcc->shader->bt.const_base + dim);
298
299 tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
300 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
301 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
302
303 tdst_transpose(dst, real_dst);
304 for (i = 0; i < 4; i++) {
305 const struct toy_src src =
306 tsrc_offset(tsrc_from(tmp), param_size * i, 0);
307
308 /* cast to type D to make sure these are raw moves */
309 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
310 }
311 }
312
313 static bool
fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)314 fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
315 struct toy_dst dst, int dim,
316 struct toy_src idx)
317 {
318 const int grf = fcc->first_const_grf + idx.val32 / 2;
319 const int grf_subreg = (idx.val32 & 1) * 16;
320 struct toy_src src;
321 struct toy_dst real_dst[4];
322 unsigned i;
323
324 if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
325 grf >= fcc->first_attr_grf)
326 return false;
327
328 src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010);
329
330 tdst_transpose(dst, real_dst);
331 for (i = 0; i < 4; i++) {
332 /* cast to type D to make sure these are raw moves */
333 tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i)));
334 }
335
336 return true;
337 }
338
339 static void
fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)340 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
341 struct toy_dst dst, int dim, struct toy_src idx)
342 {
343 const struct toy_dst header =
344 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
345 const struct toy_dst global_offset =
346 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
347 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
348 struct toy_compiler *tc = &fcc->tc;
349 unsigned msg_type, msg_ctrl, msg_len;
350 struct toy_inst *inst;
351 struct toy_src desc;
352 struct toy_dst tmp, real_dst[4];
353 unsigned i;
354
355 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
356 return;
357
358 /* set message header */
359 inst = tc_MOV(tc, header, r0);
360 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
361
362 /* set global offset */
363 inst = tc_MOV(tc, global_offset, idx);
364 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
365 inst->exec_size = GEN6_EXECSIZE_1;
366 inst->src[0].rect = TOY_RECT_010;
367
368 msg_type = GEN6_MSG_DP_OWORD_BLOCK_READ;
369 msg_ctrl = GEN6_MSG_DP_OWORD_BLOCK_SIZE_1_LO;
370 msg_len = 1;
371
372 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
373 msg_type, msg_ctrl, fcc->shader->bt.const_base + dim);
374
375 tmp = tc_alloc_tmp(tc);
376
377 tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
378
379 tdst_transpose(dst, real_dst);
380 for (i = 0; i < 4; i++) {
381 const struct toy_src src =
382 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
383
384 /* cast to type D to make sure these are raw moves */
385 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
386 }
387 }
388
389 static void
fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)390 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
391 struct toy_dst dst, int dim, struct toy_src idx)
392 {
393 struct toy_compiler *tc = &fcc->tc;
394 const struct toy_dst offset =
395 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
396 struct toy_src desc;
397 struct toy_inst *inst;
398 struct toy_dst tmp, real_dst[4];
399 unsigned i;
400
401 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
402 return;
403
404 /*
405 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
406 * changed from OWord Block Read to ld to increase performance in the
407 * classic driver. Since we use the constant cache instead of the data
408 * cache, I wonder if we still want to follow the classic driver.
409 */
410
411 /* set offset */
412 inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
413 inst->exec_size = GEN6_EXECSIZE_8;
414 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
415
416 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
417 GEN6_MSG_SAMPLER_SIMD4X2,
418 GEN6_MSG_SAMPLER_LD,
419 0,
420 fcc->shader->bt.const_base + dim);
421
422 tmp = tc_alloc_tmp(tc);
423 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
424 inst->exec_size = GEN6_EXECSIZE_8;
425 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
426
427 tdst_transpose(dst, real_dst);
428 for (i = 0; i < 4; i++) {
429 const struct toy_src src =
430 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
431
432 /* cast to type D to make sure these are raw moves */
433 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
434 }
435 }
436
437 static void
fs_lower_opcode_tgsi_imm(struct fs_compile_context * fcc,struct toy_dst dst,int idx)438 fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
439 struct toy_dst dst, int idx)
440 {
441 const uint32_t *imm;
442 struct toy_dst real_dst[4];
443 int ch;
444
445 imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
446
447 tdst_transpose(dst, real_dst);
448 /* raw moves */
449 for (ch = 0; ch < 4; ch++)
450 tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
451 }
452
453 static void
fs_lower_opcode_tgsi_sv(struct fs_compile_context * fcc,struct toy_dst dst,int dim,int idx)454 fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
455 struct toy_dst dst, int dim, int idx)
456 {
457 struct toy_compiler *tc = &fcc->tc;
458 const struct toy_tgsi *tgsi = &fcc->tgsi;
459 int slot;
460
461 assert(!dim);
462
463 slot = toy_tgsi_find_system_value(tgsi, idx);
464 if (slot < 0)
465 return;
466
467 switch (tgsi->system_values[slot].semantic_name) {
468 case TGSI_SEMANTIC_PRIMID:
469 case TGSI_SEMANTIC_INSTANCEID:
470 case TGSI_SEMANTIC_VERTEXID:
471 default:
472 tc_fail(tc, "unhandled system value");
473 tc_MOV(tc, dst, tsrc_imm_d(0));
474 break;
475 }
476 }
477
478 static void
fs_lower_opcode_tgsi_direct(struct fs_compile_context * fcc,struct toy_inst * inst)479 fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
480 struct toy_inst *inst)
481 {
482 struct toy_compiler *tc = &fcc->tc;
483 int dim, idx;
484
485 assert(inst->src[0].file == TOY_FILE_IMM);
486 dim = inst->src[0].val32;
487
488 assert(inst->src[1].file == TOY_FILE_IMM);
489 idx = inst->src[1].val32;
490
491 switch (inst->opcode) {
492 case TOY_OPCODE_TGSI_IN:
493 fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
494 break;
495 case TOY_OPCODE_TGSI_CONST:
496 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
497 fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
498 else
499 fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
500 break;
501 case TOY_OPCODE_TGSI_SV:
502 fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
503 break;
504 case TOY_OPCODE_TGSI_IMM:
505 assert(!dim);
506 fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
507 break;
508 default:
509 tc_fail(tc, "unhandled TGSI fetch");
510 break;
511 }
512
513 tc_discard_inst(tc, inst);
514 }
515
516 static void
fs_lower_opcode_tgsi_indirect(struct fs_compile_context * fcc,struct toy_inst * inst)517 fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
518 struct toy_inst *inst)
519 {
520 struct toy_compiler *tc = &fcc->tc;
521 enum tgsi_file_type file;
522 int dim, idx;
523 struct toy_src indirect_dim, indirect_idx;
524
525 assert(inst->src[0].file == TOY_FILE_IMM);
526 file = inst->src[0].val32;
527
528 assert(inst->src[1].file == TOY_FILE_IMM);
529 dim = inst->src[1].val32;
530 indirect_dim = inst->src[2];
531
532 assert(inst->src[3].file == TOY_FILE_IMM);
533 idx = inst->src[3].val32;
534 indirect_idx = inst->src[4];
535
536 /* no dimension indirection */
537 assert(indirect_dim.file == TOY_FILE_IMM);
538 dim += indirect_dim.val32;
539
540 switch (inst->opcode) {
541 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
542 if (file == TGSI_FILE_CONSTANT) {
543 if (idx) {
544 struct toy_dst tmp = tc_alloc_tmp(tc);
545
546 tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
547 indirect_idx = tsrc_from(tmp);
548 }
549
550 fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
551 break;
552 }
553 /* fall through */
554 case TOY_OPCODE_TGSI_INDIRECT_STORE:
555 default:
556 tc_fail(tc, "unhandled TGSI indirection");
557 break;
558 }
559
560 tc_discard_inst(tc, inst);
561 }
562
563 /**
564 * Emit instructions to move sampling parameters to the message registers.
565 */
566 static int
fs_add_sampler_params_gen6(struct toy_compiler * tc,int msg_type,int base_mrf,int param_size,struct toy_src * coords,int num_coords,struct toy_src bias_or_lod,struct toy_src ref_or_si,struct toy_src * ddx,struct toy_src * ddy,int num_derivs)567 fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
568 int base_mrf, int param_size,
569 struct toy_src *coords, int num_coords,
570 struct toy_src bias_or_lod, struct toy_src ref_or_si,
571 struct toy_src *ddx, struct toy_src *ddy,
572 int num_derivs)
573 {
574 int num_params, i;
575
576 assert(num_coords <= 4);
577 assert(num_derivs <= 3 && num_derivs <= num_coords);
578
579 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
580 switch (msg_type) {
581 case GEN6_MSG_SAMPLER_SAMPLE:
582 for (i = 0; i < num_coords; i++)
583 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
584 num_params = num_coords;
585 break;
586 case GEN6_MSG_SAMPLER_SAMPLE_B:
587 case GEN6_MSG_SAMPLER_SAMPLE_L:
588 for (i = 0; i < num_coords; i++)
589 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
590 tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
591 num_params = 5;
592 break;
593 case GEN6_MSG_SAMPLER_SAMPLE_C:
594 for (i = 0; i < num_coords; i++)
595 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
596 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
597 num_params = 5;
598 break;
599 case GEN6_MSG_SAMPLER_SAMPLE_D:
600 for (i = 0; i < num_coords; i++)
601 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
602 for (i = 0; i < num_derivs; i++) {
603 tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
604 tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
605 }
606 num_params = 4 + num_derivs * 2;
607 break;
608 case GEN6_MSG_SAMPLER_SAMPLE_B_C:
609 case GEN6_MSG_SAMPLER_SAMPLE_L_C:
610 for (i = 0; i < num_coords; i++)
611 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
612 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
613 tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
614 num_params = 6;
615 break;
616 case GEN6_MSG_SAMPLER_LD:
617 assert(num_coords <= 3);
618
619 for (i = 0; i < num_coords; i++)
620 tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
621 tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
622 tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
623 num_params = 5;
624 break;
625 case GEN6_MSG_SAMPLER_RESINFO:
626 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
627 num_params = 1;
628 break;
629 default:
630 tc_fail(tc, "unknown sampler opcode");
631 num_params = 0;
632 break;
633 }
634 #undef SAMPLER_PARAM
635
636 return num_params * param_size;
637 }
638
639 static int
fs_add_sampler_params_gen7(struct toy_compiler * tc,int msg_type,int base_mrf,int param_size,struct toy_src * coords,int num_coords,struct toy_src bias_or_lod,struct toy_src ref_or_si,struct toy_src * ddx,struct toy_src * ddy,int num_derivs)640 fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
641 int base_mrf, int param_size,
642 struct toy_src *coords, int num_coords,
643 struct toy_src bias_or_lod, struct toy_src ref_or_si,
644 struct toy_src *ddx, struct toy_src *ddy,
645 int num_derivs)
646 {
647 int num_params, i;
648
649 assert(num_coords <= 4);
650 assert(num_derivs <= 3 && num_derivs <= num_coords);
651
652 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
653 switch (msg_type) {
654 case GEN6_MSG_SAMPLER_SAMPLE:
655 for (i = 0; i < num_coords; i++)
656 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
657 num_params = num_coords;
658 break;
659 case GEN6_MSG_SAMPLER_SAMPLE_B:
660 case GEN6_MSG_SAMPLER_SAMPLE_L:
661 tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
662 for (i = 0; i < num_coords; i++)
663 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
664 num_params = 1 + num_coords;
665 break;
666 case GEN6_MSG_SAMPLER_SAMPLE_C:
667 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
668 for (i = 0; i < num_coords; i++)
669 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
670 num_params = 1 + num_coords;
671 break;
672 case GEN6_MSG_SAMPLER_SAMPLE_D:
673 for (i = 0; i < num_coords; i++) {
674 tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
675 if (i < num_derivs) {
676 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
677 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
678 }
679 }
680 num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
681 break;
682 case GEN6_MSG_SAMPLER_SAMPLE_B_C:
683 case GEN6_MSG_SAMPLER_SAMPLE_L_C:
684 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
685 tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
686 for (i = 0; i < num_coords; i++)
687 tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
688 num_params = 2 + num_coords;
689 break;
690 case GEN6_MSG_SAMPLER_LD:
691 assert(num_coords >= 1 && num_coords <= 3);
692
693 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
694 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
695 for (i = 1; i < num_coords; i++)
696 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
697 num_params = 1 + num_coords;
698 break;
699 case GEN6_MSG_SAMPLER_RESINFO:
700 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
701 num_params = 1;
702 break;
703 default:
704 tc_fail(tc, "unknown sampler opcode");
705 num_params = 0;
706 break;
707 }
708 #undef SAMPLER_PARAM
709
710 return num_params * param_size;
711 }
712
713 /**
714 * Set up message registers and return the message descriptor for sampling.
715 */
716 static struct toy_src
fs_prepare_tgsi_sampling(struct fs_compile_context * fcc,const struct toy_inst * inst,int base_mrf,const uint32_t * saturate_coords,unsigned * ret_sampler_index)717 fs_prepare_tgsi_sampling(struct fs_compile_context *fcc,
718 const struct toy_inst *inst,
719 int base_mrf, const uint32_t *saturate_coords,
720 unsigned *ret_sampler_index)
721 {
722 struct toy_compiler *tc = &fcc->tc;
723 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
724 struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
725 int num_coords, ref_pos, num_derivs;
726 int sampler_src, param_size, i;
727
728 switch (inst->exec_size) {
729 case GEN6_EXECSIZE_8:
730 simd_mode = GEN6_MSG_SAMPLER_SIMD8;
731 param_size = 1;
732 break;
733 case GEN6_EXECSIZE_16:
734 simd_mode = GEN6_MSG_SAMPLER_SIMD16;
735 param_size = 2;
736 break;
737 default:
738 tc_fail(tc, "unsupported execute size for sampling");
739 return tsrc_null();
740 break;
741 }
742
743 num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target);
744 ref_pos = tgsi_util_get_shadow_ref_src_index(inst->tex.target);
745
746 tsrc_transpose(inst->src[0], coords);
747 bias_or_lod = tsrc_null();
748 ref_or_si = tsrc_null();
749 num_derivs = 0;
750 sampler_src = 1;
751
752 /*
753 * For TXD,
754 *
755 * src0 := (x, y, z, w)
756 * src1 := ddx
757 * src2 := ddy
758 * src3 := sampler
759 *
760 * For TEX2, TXB2, and TXL2,
761 *
762 * src0 := (x, y, z, w)
763 * src1 := (v or bias or lod, ...)
764 * src2 := sampler
765 *
766 * For TEX, TXB, TXL, and TXP,
767 *
768 * src0 := (x, y, z, w or bias or lod or projection)
769 * src1 := sampler
770 *
771 * For TXQ,
772 *
773 * src0 := (lod, ...)
774 * src1 := sampler
775 *
776 * For TXQ_LZ,
777 *
778 * src0 := sampler
779 *
780 * And for TXF,
781 *
782 * src0 := (x, y, z, w or lod)
783 * src1 := sampler
784 *
785 * State trackers should not generate opcode+texture combinations with
786 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
787 */
788 switch (inst->opcode) {
789 case TOY_OPCODE_TGSI_TEX:
790 if (ref_pos >= 0) {
791 assert(ref_pos < 4);
792
793 msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
794 ref_or_si = coords[ref_pos];
795 }
796 else {
797 msg_type = GEN6_MSG_SAMPLER_SAMPLE;
798 }
799 break;
800 case TOY_OPCODE_TGSI_TXD:
801 if (ref_pos >= 0) {
802 assert(ref_pos < 4);
803
804 msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C;
805 ref_or_si = coords[ref_pos];
806
807 if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5))
808 tc_fail(tc, "TXD with shadow sampler not supported");
809 }
810 else {
811 msg_type = GEN6_MSG_SAMPLER_SAMPLE_D;
812 }
813
814 tsrc_transpose(inst->src[1], ddx);
815 tsrc_transpose(inst->src[2], ddy);
816 num_derivs = num_coords;
817 sampler_src = 3;
818 break;
819 case TOY_OPCODE_TGSI_TXP:
820 if (ref_pos >= 0) {
821 assert(ref_pos < 3);
822
823 msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
824 ref_or_si = coords[ref_pos];
825 }
826 else {
827 msg_type = GEN6_MSG_SAMPLER_SAMPLE;
828 }
829
830 /* project the coordinates */
831 {
832 struct toy_dst tmp[4];
833
834 tc_alloc_tmp4(tc, tmp);
835
836 tc_INV(tc, tmp[3], coords[3]);
837 for (i = 0; i < num_coords && i < 3; i++) {
838 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
839 coords[i] = tsrc_from(tmp[i]);
840 }
841
842 if (ref_pos >= i) {
843 tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
844 ref_or_si = tsrc_from(tmp[ref_pos]);
845 }
846 }
847 break;
848 case TOY_OPCODE_TGSI_TXB:
849 if (ref_pos >= 0) {
850 assert(ref_pos < 3);
851
852 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
853 ref_or_si = coords[ref_pos];
854 }
855 else {
856 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
857 }
858
859 bias_or_lod = coords[3];
860 break;
861 case TOY_OPCODE_TGSI_TXL:
862 if (ref_pos >= 0) {
863 assert(ref_pos < 3);
864
865 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
866 ref_or_si = coords[ref_pos];
867 }
868 else {
869 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
870 }
871
872 bias_or_lod = coords[3];
873 break;
874 case TOY_OPCODE_TGSI_TXF:
875 msg_type = GEN6_MSG_SAMPLER_LD;
876
877 switch (inst->tex.target) {
878 case TGSI_TEXTURE_2D_MSAA:
879 case TGSI_TEXTURE_2D_ARRAY_MSAA:
880 assert(ref_pos >= 0 && ref_pos < 4);
881 /* lod is always 0 */
882 bias_or_lod = tsrc_imm_d(0);
883 ref_or_si = coords[ref_pos];
884 break;
885 default:
886 bias_or_lod = coords[3];
887 break;
888 }
889
890 /* offset the coordinates */
891 if (!tsrc_is_null(inst->tex.offsets[0])) {
892 struct toy_dst tmp[4];
893 struct toy_src offsets[4];
894
895 tc_alloc_tmp4(tc, tmp);
896 tsrc_transpose(inst->tex.offsets[0], offsets);
897
898 for (i = 0; i < num_coords; i++) {
899 tc_ADD(tc, tmp[i], coords[i], offsets[i]);
900 coords[i] = tsrc_from(tmp[i]);
901 }
902 }
903
904 sampler_src = 1;
905 break;
906 case TOY_OPCODE_TGSI_TXQ:
907 msg_type = GEN6_MSG_SAMPLER_RESINFO;
908 num_coords = 0;
909 bias_or_lod = coords[0];
910 break;
911 case TOY_OPCODE_TGSI_TXQ_LZ:
912 msg_type = GEN6_MSG_SAMPLER_RESINFO;
913 num_coords = 0;
914 sampler_src = 0;
915 break;
916 case TOY_OPCODE_TGSI_TEX2:
917 if (ref_pos >= 0) {
918 assert(ref_pos < 5);
919
920 msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
921
922 if (ref_pos >= 4) {
923 struct toy_src src1[4];
924 tsrc_transpose(inst->src[1], src1);
925 ref_or_si = src1[ref_pos - 4];
926 }
927 else {
928 ref_or_si = coords[ref_pos];
929 }
930 }
931 else {
932 msg_type = GEN6_MSG_SAMPLER_SAMPLE;
933 }
934
935 sampler_src = 2;
936 break;
937 case TOY_OPCODE_TGSI_TXB2:
938 if (ref_pos >= 0) {
939 assert(ref_pos < 4);
940
941 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
942 ref_or_si = coords[ref_pos];
943 }
944 else {
945 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
946 }
947
948 {
949 struct toy_src src1[4];
950 tsrc_transpose(inst->src[1], src1);
951 bias_or_lod = src1[0];
952 }
953
954 sampler_src = 2;
955 break;
956 case TOY_OPCODE_TGSI_TXL2:
957 if (ref_pos >= 0) {
958 assert(ref_pos < 4);
959
960 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
961 ref_or_si = coords[ref_pos];
962 }
963 else {
964 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
965 }
966
967 {
968 struct toy_src src1[4];
969 tsrc_transpose(inst->src[1], src1);
970 bias_or_lod = src1[0];
971 }
972
973 sampler_src = 2;
974 break;
975 default:
976 assert(!"unhandled sampling opcode");
977 return tsrc_null();
978 break;
979 }
980
981 assert(inst->src[sampler_src].file == TOY_FILE_IMM);
982 sampler_index = inst->src[sampler_src].val32;
983 binding_table_index = fcc->shader->bt.tex_base + sampler_index;
984
985 /*
986 * From the Sandy Bridge PRM, volume 4 part 1, page 18:
987 *
988 * "Note that the (cube map) coordinates delivered to the sampling
989 * engine must already have been divided by the component with the
990 * largest absolute value."
991 */
992 switch (inst->tex.target) {
993 case TGSI_TEXTURE_CUBE:
994 case TGSI_TEXTURE_SHADOWCUBE:
995 case TGSI_TEXTURE_CUBE_ARRAY:
996 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
997 /* TXQ does not need coordinates */
998 if (num_coords >= 3) {
999 struct toy_dst tmp[4];
1000
1001 tc_alloc_tmp4(tc, tmp);
1002
1003 tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
1004 tsrc_absolute(coords[1]), GEN6_COND_GE);
1005 tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
1006 tsrc_absolute(coords[2]), GEN6_COND_GE);
1007 tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
1008
1009 for (i = 0; i < 3; i++) {
1010 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
1011 coords[i] = tsrc_from(tmp[i]);
1012 }
1013 }
1014 break;
1015 }
1016
1017 /*
1018 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate
1019 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is
1020 * so that sampling outside the border gets the correct colors.
1021 */
1022 for (i = 0; i < MIN2(num_coords, 3); i++) {
1023 bool is_rect;
1024
1025 if (!(saturate_coords[i] & (1 << sampler_index)))
1026 continue;
1027
1028 switch (inst->tex.target) {
1029 case TGSI_TEXTURE_RECT:
1030 case TGSI_TEXTURE_SHADOWRECT:
1031 is_rect = true;
1032 break;
1033 default:
1034 is_rect = false;
1035 break;
1036 }
1037
1038 if (is_rect) {
1039 struct toy_src min, max;
1040 struct toy_dst tmp;
1041
1042 tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
1043 tmp = tc_alloc_tmp(tc);
1044
1045 /* saturate to [0, width] or [0, height] */
1046 /* TODO TXQ? */
1047 min = tsrc_imm_f(0.0f);
1048 max = tsrc_imm_f(2048.0f);
1049
1050 tc_SEL(tc, tmp, coords[i], min, GEN6_COND_G);
1051 tc_SEL(tc, tmp, tsrc_from(tmp), max, GEN6_COND_L);
1052
1053 coords[i] = tsrc_from(tmp);
1054 }
1055 else {
1056 struct toy_dst tmp;
1057 struct toy_inst *inst2;
1058
1059 tmp = tc_alloc_tmp(tc);
1060
1061 /* saturate to [0.0f, 1.0f] */
1062 inst2 = tc_MOV(tc, tmp, coords[i]);
1063 inst2->saturate = true;
1064
1065 coords[i] = tsrc_from(tmp);
1066 }
1067 }
1068
1069 /* set up sampler parameters */
1070 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
1071 msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
1072 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1073 }
1074 else {
1075 msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
1076 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1077 }
1078
1079 /*
1080 * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1081 *
1082 * "The maximum message length allowed to the sampler is 11. This would
1083 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1084 * SIMD16."
1085 */
1086 if (msg_len > 11)
1087 tc_fail(tc, "maximum length for messages to the sampler is 11");
1088
1089 if (ret_sampler_index)
1090 *ret_sampler_index = sampler_index;
1091
1092 return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
1093 false, simd_mode, msg_type, sampler_index, binding_table_index);
1094 }
1095
1096 static void
fs_lower_opcode_tgsi_sampling(struct fs_compile_context * fcc,struct toy_inst * inst)1097 fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
1098 struct toy_inst *inst)
1099 {
1100 struct toy_compiler *tc = &fcc->tc;
1101 struct toy_dst dst[4], tmp[4];
1102 struct toy_src desc;
1103 unsigned sampler_index;
1104 int swizzles[4], i;
1105 bool need_filter;
1106
1107 desc = fs_prepare_tgsi_sampling(fcc, inst,
1108 fcc->first_free_mrf,
1109 fcc->variant->saturate_tex_coords,
1110 &sampler_index);
1111
1112 switch (inst->opcode) {
1113 case TOY_OPCODE_TGSI_TXF:
1114 case TOY_OPCODE_TGSI_TXQ:
1115 case TOY_OPCODE_TGSI_TXQ_LZ:
1116 need_filter = false;
1117 break;
1118 default:
1119 need_filter = true;
1120 break;
1121 }
1122
1123 toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER);
1124 inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
1125 inst->src[1] = desc;
1126 for (i = 2; i < ARRAY_SIZE(inst->src); i++)
1127 inst->src[i] = tsrc_null();
1128
1129 /* write to temps first */
1130 tc_alloc_tmp4(tc, tmp);
1131 for (i = 0; i < 4; i++)
1132 tmp[i].type = inst->dst.type;
1133 tdst_transpose(inst->dst, dst);
1134 inst->dst = tmp[0];
1135
1136 tc_move_inst(tc, inst);
1137
1138 if (need_filter) {
1139 assert(sampler_index < fcc->variant->num_sampler_views);
1140 swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
1141 swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
1142 swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
1143 swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
1144 }
1145 else {
1146 swizzles[0] = PIPE_SWIZZLE_X;
1147 swizzles[1] = PIPE_SWIZZLE_Y;
1148 swizzles[2] = PIPE_SWIZZLE_Z;
1149 swizzles[3] = PIPE_SWIZZLE_W;
1150 }
1151
1152 /* swizzle the results */
1153 for (i = 0; i < 4; i++) {
1154 switch (swizzles[i]) {
1155 case PIPE_SWIZZLE_0:
1156 tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
1157 break;
1158 case PIPE_SWIZZLE_1:
1159 tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
1160 break;
1161 default:
1162 tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
1163 break;
1164 }
1165 }
1166 }
1167
1168 static void
fs_lower_opcode_derivative(struct toy_compiler * tc,struct toy_inst * inst)1169 fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
1170 {
1171 struct toy_dst dst[4];
1172 struct toy_src src[4];
1173 unsigned i;
1174
1175 tdst_transpose(inst->dst, dst);
1176 tsrc_transpose(inst->src[0], src);
1177
1178 /*
1179 * Every four fragments are from a 2x2 subspan, with
1180 *
1181 * fragment 1 on the top-left,
1182 * fragment 2 on the top-right,
1183 * fragment 3 on the bottom-left,
1184 * fragment 4 on the bottom-right.
1185 *
1186 * DDX should thus produce
1187 *
1188 * dst = src.yyww - src.xxzz
1189 *
1190 * and DDY should produce
1191 *
1192 * dst = src.zzww - src.xxyy
1193 *
1194 * But since we are in GEN6_ALIGN_1, swizzling does not work and we have to
1195 * play with the region parameters.
1196 */
1197 if (inst->opcode == TOY_OPCODE_DDX) {
1198 for (i = 0; i < 4; i++) {
1199 struct toy_src left, right;
1200
1201 left = tsrc_rect(src[i], TOY_RECT_220);
1202 right = tsrc_offset(left, 0, 1);
1203
1204 tc_ADD(tc, dst[i], right, tsrc_negate(left));
1205 }
1206 }
1207 else {
1208 for (i = 0; i < 4; i++) {
1209 struct toy_src top, bottom;
1210
1211 /* approximate with dst = src.zzzz - src.xxxx */
1212 top = tsrc_rect(src[i], TOY_RECT_440);
1213 bottom = tsrc_offset(top, 0, 2);
1214
1215 tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
1216 }
1217 }
1218
1219 tc_discard_inst(tc, inst);
1220 }
1221
1222 static void
fs_lower_opcode_fb_write(struct toy_compiler * tc,struct toy_inst * inst)1223 fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
1224 {
1225 /* fs_write_fb() has set up the message registers */
1226 toy_compiler_lower_to_send(tc, inst, true,
1227 GEN6_SFID_DP_RC);
1228 }
1229
1230 static void
fs_lower_opcode_kil(struct toy_compiler * tc,struct toy_inst * inst)1231 fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
1232 {
1233 struct toy_dst pixel_mask_dst;
1234 struct toy_src f0, pixel_mask;
1235 struct toy_inst *tmp;
1236
1237 /* lower half of r1.7:ud */
1238 pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
1239 pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
1240
1241 f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, GEN6_ARF_F0, 0)), TOY_RECT_010);
1242
1243 /* KILL or KILL_IF */
1244 if (tsrc_is_null(inst->src[0])) {
1245 struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
1246 struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, GEN6_ARF_F0, 0));
1247
1248 /* create a mask that masks out all pixels */
1249 tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
1250 tmp->exec_size = GEN6_EXECSIZE_1;
1251 tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1252
1253 tc_CMP(tc, tdst_null(), dummy, dummy, GEN6_COND_NZ);
1254
1255 /* swapping the two src operands breaks glBitmap()!? */
1256 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1257 tmp->exec_size = GEN6_EXECSIZE_1;
1258 tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1259 }
1260 else {
1261 struct toy_src src[4];
1262 unsigned i;
1263
1264 tsrc_transpose(inst->src[0], src);
1265 /* mask out killed pixels */
1266 for (i = 0; i < 4; i++) {
1267 tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
1268 GEN6_COND_GE);
1269
1270 /* swapping the two src operands breaks glBitmap()!? */
1271 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1272 tmp->exec_size = GEN6_EXECSIZE_1;
1273 tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1274 }
1275 }
1276
1277 tc_discard_inst(tc, inst);
1278 }
1279
1280 static void
fs_lower_virtual_opcodes(struct fs_compile_context * fcc)1281 fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
1282 {
1283 struct toy_compiler *tc = &fcc->tc;
1284 struct toy_inst *inst;
1285
1286 /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1287 tc_head(tc);
1288 while ((inst = tc_next(tc)) != NULL) {
1289 switch (inst->opcode) {
1290 case TOY_OPCODE_TGSI_IN:
1291 case TOY_OPCODE_TGSI_CONST:
1292 case TOY_OPCODE_TGSI_SV:
1293 case TOY_OPCODE_TGSI_IMM:
1294 fs_lower_opcode_tgsi_direct(fcc, inst);
1295 break;
1296 case TOY_OPCODE_TGSI_INDIRECT_FETCH:
1297 case TOY_OPCODE_TGSI_INDIRECT_STORE:
1298 fs_lower_opcode_tgsi_indirect(fcc, inst);
1299 break;
1300 case TOY_OPCODE_TGSI_TEX:
1301 case TOY_OPCODE_TGSI_TXB:
1302 case TOY_OPCODE_TGSI_TXD:
1303 case TOY_OPCODE_TGSI_TXL:
1304 case TOY_OPCODE_TGSI_TXP:
1305 case TOY_OPCODE_TGSI_TXF:
1306 case TOY_OPCODE_TGSI_TXQ:
1307 case TOY_OPCODE_TGSI_TXQ_LZ:
1308 case TOY_OPCODE_TGSI_TEX2:
1309 case TOY_OPCODE_TGSI_TXB2:
1310 case TOY_OPCODE_TGSI_TXL2:
1311 case TOY_OPCODE_TGSI_SAMPLE:
1312 case TOY_OPCODE_TGSI_SAMPLE_I:
1313 case TOY_OPCODE_TGSI_SAMPLE_I_MS:
1314 case TOY_OPCODE_TGSI_SAMPLE_B:
1315 case TOY_OPCODE_TGSI_SAMPLE_C:
1316 case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
1317 case TOY_OPCODE_TGSI_SAMPLE_D:
1318 case TOY_OPCODE_TGSI_SAMPLE_L:
1319 case TOY_OPCODE_TGSI_GATHER4:
1320 case TOY_OPCODE_TGSI_SVIEWINFO:
1321 case TOY_OPCODE_TGSI_SAMPLE_POS:
1322 case TOY_OPCODE_TGSI_SAMPLE_INFO:
1323 fs_lower_opcode_tgsi_sampling(fcc, inst);
1324 break;
1325 }
1326 }
1327
1328 tc_head(tc);
1329 while ((inst = tc_next(tc)) != NULL) {
1330 switch (inst->opcode) {
1331 case TOY_OPCODE_INV:
1332 case TOY_OPCODE_LOG:
1333 case TOY_OPCODE_EXP:
1334 case TOY_OPCODE_SQRT:
1335 case TOY_OPCODE_RSQ:
1336 case TOY_OPCODE_SIN:
1337 case TOY_OPCODE_COS:
1338 case TOY_OPCODE_FDIV:
1339 case TOY_OPCODE_POW:
1340 case TOY_OPCODE_INT_DIV_QUOTIENT:
1341 case TOY_OPCODE_INT_DIV_REMAINDER:
1342 toy_compiler_lower_math(tc, inst);
1343 break;
1344 case TOY_OPCODE_DDX:
1345 case TOY_OPCODE_DDY:
1346 fs_lower_opcode_derivative(tc, inst);
1347 break;
1348 case TOY_OPCODE_FB_WRITE:
1349 fs_lower_opcode_fb_write(tc, inst);
1350 break;
1351 case TOY_OPCODE_KIL:
1352 fs_lower_opcode_kil(tc, inst);
1353 break;
1354 default:
1355 if (inst->opcode > 127)
1356 tc_fail(tc, "unhandled virtual opcode");
1357 break;
1358 }
1359 }
1360 }
1361
1362 /**
1363 * Compile the shader.
1364 */
1365 static bool
fs_compile(struct fs_compile_context * fcc)1366 fs_compile(struct fs_compile_context *fcc)
1367 {
1368 struct toy_compiler *tc = &fcc->tc;
1369 struct ilo_shader *sh = fcc->shader;
1370
1371 fs_lower_virtual_opcodes(fcc);
1372 toy_compiler_legalize_for_ra(tc);
1373 toy_compiler_optimize(tc);
1374 toy_compiler_allocate_registers(tc,
1375 fcc->first_free_grf,
1376 fcc->last_free_grf,
1377 fcc->num_grf_per_vrf);
1378 toy_compiler_legalize_for_asm(tc);
1379
1380 if (tc->fail) {
1381 ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
1382 return false;
1383 }
1384
1385 if (ilo_debug & ILO_DEBUG_FS) {
1386 ilo_printf("legalized instructions:\n");
1387 toy_compiler_dump(tc);
1388 ilo_printf("\n");
1389 }
1390
1391 if (true) {
1392 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
1393 }
1394 else {
1395 static const uint32_t microcode[] = {
1396 /* fill in the microcode here */
1397 0x0, 0x0, 0x0, 0x0,
1398 };
1399 const bool swap = true;
1400
1401 sh->kernel_size = sizeof(microcode);
1402 sh->kernel = MALLOC(sh->kernel_size);
1403
1404 if (sh->kernel) {
1405 const int num_dwords = sizeof(microcode) / 4;
1406 const uint32_t *src = microcode;
1407 uint32_t *dst = (uint32_t *) sh->kernel;
1408 int i;
1409
1410 for (i = 0; i < num_dwords; i += 4) {
1411 if (swap) {
1412 dst[i + 0] = src[i + 3];
1413 dst[i + 1] = src[i + 2];
1414 dst[i + 2] = src[i + 1];
1415 dst[i + 3] = src[i + 0];
1416 }
1417 else {
1418 memcpy(dst, src, 16);
1419 }
1420 }
1421 }
1422 }
1423
1424 if (!sh->kernel) {
1425 ilo_err("failed to compile FS: %s\n", tc->reason);
1426 return false;
1427 }
1428
1429 if (ilo_debug & ILO_DEBUG_FS) {
1430 ilo_printf("disassembly:\n");
1431 toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
1432 ilo_printf("\n");
1433 }
1434
1435 return true;
1436 }
1437
1438 /**
1439 * Emit instructions to write the color buffers (and the depth buffer).
1440 */
1441 static void
fs_write_fb(struct fs_compile_context * fcc)1442 fs_write_fb(struct fs_compile_context *fcc)
1443 {
1444 struct toy_compiler *tc = &fcc->tc;
1445 int base_mrf = fcc->first_free_mrf;
1446 const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
1447 bool header_present = false;
1448 struct toy_src desc;
1449 unsigned msg_type, ctrl;
1450 int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
1451 int pos_slot = -1, cbuf, i;
1452
1453 for (i = 0; i < ARRAY_SIZE(color_slots); i++)
1454 color_slots[i] = -1;
1455
1456 for (i = 0; i < fcc->tgsi.num_outputs; i++) {
1457 if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
1458 assert(fcc->tgsi.outputs[i].semantic_index < ARRAY_SIZE(color_slots));
1459 color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
1460 }
1461 else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1462 pos_slot = i;
1463 }
1464 }
1465
1466 num_cbufs = fcc->variant->u.fs.num_cbufs;
1467 /* still need to send EOT (and probably depth) */
1468 if (!num_cbufs)
1469 num_cbufs = 1;
1470
1471 /* we need the header to specify the pixel mask or render target */
1472 if (fcc->tgsi.uses_kill || num_cbufs > 1) {
1473 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
1474 struct toy_inst *inst;
1475
1476 inst = tc_MOV(tc, header, r0);
1477 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1478 base_mrf += fcc->num_grf_per_vrf;
1479
1480 /* this is a two-register header */
1481 if (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) {
1482 inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
1483 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1484 base_mrf += fcc->num_grf_per_vrf;
1485 }
1486
1487 header_present = true;
1488 }
1489
1490 for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
1491 const int slot =
1492 color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
1493 int mrf = base_mrf, vrf;
1494 struct toy_src src[4];
1495
1496 if (slot >= 0) {
1497 const unsigned undefined_mask =
1498 fcc->tgsi.outputs[slot].undefined_mask;
1499 const int index = fcc->tgsi.outputs[slot].index;
1500
1501 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1502 if (vrf >= 0) {
1503 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1504 tsrc_transpose(tmp, src);
1505 }
1506 else {
1507 /* use (0, 0, 0, 0) */
1508 tsrc_transpose(tsrc_imm_f(0.0f), src);
1509 }
1510
1511 for (i = 0; i < 4; i++) {
1512 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1513
1514 if (undefined_mask & (1 << i))
1515 src[i] = tsrc_imm_f(0.0f);
1516
1517 tc_MOV(tc, dst, src[i]);
1518
1519 mrf += fcc->num_grf_per_vrf;
1520 }
1521 }
1522 else {
1523 /* use (0, 0, 0, 0) */
1524 for (i = 0; i < 4; i++) {
1525 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1526
1527 tc_MOV(tc, dst, tsrc_imm_f(0.0f));
1528 mrf += fcc->num_grf_per_vrf;
1529 }
1530 }
1531
1532 /* select BLEND_STATE[rt] */
1533 if (cbuf > 0) {
1534 struct toy_inst *inst;
1535
1536 inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
1537 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1538 inst->exec_size = GEN6_EXECSIZE_1;
1539 inst->src[0].rect = TOY_RECT_010;
1540 }
1541
1542 if (cbuf == 0 && pos_slot >= 0) {
1543 const int index = fcc->tgsi.outputs[pos_slot].index;
1544 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1545 struct toy_src src[4];
1546 int vrf;
1547
1548 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1549 if (vrf >= 0) {
1550 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1551 tsrc_transpose(tmp, src);
1552 }
1553 else {
1554 /* use (0, 0, 0, 0) */
1555 tsrc_transpose(tsrc_imm_f(0.0f), src);
1556 }
1557
1558 /* only Z */
1559 tc_MOV(tc, dst, src[2]);
1560
1561 mrf += fcc->num_grf_per_vrf;
1562 }
1563
1564 msg_type = (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ?
1565 GEN6_MSG_DP_RT_MODE_SIMD16 >> 8 :
1566 GEN6_MSG_DP_RT_MODE_SIMD8_LO >> 8;
1567
1568 ctrl = (cbuf == num_cbufs - 1) << 12 |
1569 msg_type << 8;
1570
1571 desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
1572 mrf - fcc->first_free_mrf, 0,
1573 header_present, false,
1574 GEN6_MSG_DP_RT_WRITE,
1575 ctrl, fcc->shader->bt.rt_base + cbuf);
1576
1577 tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
1578 tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
1579 }
1580 }
1581
1582 /**
1583 * Set up shader outputs for fixed-function units.
1584 */
1585 static void
fs_setup_shader_out(struct ilo_shader * sh,const struct toy_tgsi * tgsi)1586 fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1587 {
1588 unsigned i;
1589
1590 sh->out.count = tgsi->num_outputs;
1591 for (i = 0; i < tgsi->num_outputs; i++) {
1592 sh->out.register_indices[i] = tgsi->outputs[i].index;
1593 sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
1594 sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
1595
1596 if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
1597 sh->out.has_pos = true;
1598 }
1599 }
1600
1601 /**
1602 * Set up shader inputs for fixed-function units.
1603 */
1604 static void
fs_setup_shader_in(struct ilo_shader * sh,const struct toy_tgsi * tgsi,bool flatshade)1605 fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1606 bool flatshade)
1607 {
1608 unsigned i;
1609
1610 sh->in.count = tgsi->num_inputs;
1611 for (i = 0; i < tgsi->num_inputs; i++) {
1612 sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
1613 sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
1614 sh->in.interp[i] = tgsi->inputs[i].interp;
1615 sh->in.centroid[i] = tgsi->inputs[i].centroid;
1616
1617 if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1618 sh->in.has_pos = true;
1619 continue;
1620 }
1621 else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
1622 continue;
1623 }
1624
1625 switch (tgsi->inputs[i].interp) {
1626 case TGSI_INTERPOLATE_CONSTANT:
1627 sh->in.const_interp_enable |= 1 << i;
1628 break;
1629 case TGSI_INTERPOLATE_LINEAR:
1630 sh->in.has_linear_interp = true;
1631
1632 if (tgsi->inputs[i].centroid) {
1633 sh->in.barycentric_interpolation_mode |=
1634 GEN6_INTERP_NONPERSPECTIVE_CENTROID;
1635 }
1636 else {
1637 sh->in.barycentric_interpolation_mode |=
1638 GEN6_INTERP_NONPERSPECTIVE_PIXEL;
1639 }
1640 break;
1641 case TGSI_INTERPOLATE_COLOR:
1642 if (flatshade) {
1643 sh->in.const_interp_enable |= 1 << i;
1644 break;
1645 }
1646 /* fall through */
1647 case TGSI_INTERPOLATE_PERSPECTIVE:
1648 if (tgsi->inputs[i].centroid) {
1649 sh->in.barycentric_interpolation_mode |=
1650 GEN6_INTERP_PERSPECTIVE_CENTROID;
1651 }
1652 else {
1653 sh->in.barycentric_interpolation_mode |=
1654 GEN6_INTERP_PERSPECTIVE_PIXEL;
1655 }
1656 break;
1657 default:
1658 break;
1659 }
1660 }
1661 }
1662
1663 static int
fs_setup_payloads(struct fs_compile_context * fcc)1664 fs_setup_payloads(struct fs_compile_context *fcc)
1665 {
1666 const struct ilo_shader *sh = fcc->shader;
1667 int grf, i;
1668
1669 grf = 0;
1670
1671 /* r0: header */
1672 grf++;
1673
1674 /* r1-r2: coordinates and etc. */
1675 grf += (fcc->dispatch_mode == GEN6_PS_DISPATCH_32) ? 2 : 1;
1676
1677 for (i = 0; i < ARRAY_SIZE(fcc->payloads); i++) {
1678 const int reg_scale =
1679 (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) ? 1 : 2;
1680
1681 /* r3-r26 or r32-r55: barycentric interpolation parameters */
1682 if (sh->in.barycentric_interpolation_mode &
1683 (GEN6_INTERP_PERSPECTIVE_PIXEL)) {
1684 fcc->payloads[i].interp_perspective_pixel = grf;
1685 grf += 2 * reg_scale;
1686 }
1687 if (sh->in.barycentric_interpolation_mode &
1688 (GEN6_INTERP_PERSPECTIVE_CENTROID)) {
1689 fcc->payloads[i].interp_perspective_centroid = grf;
1690 grf += 2 * reg_scale;
1691 }
1692 if (sh->in.barycentric_interpolation_mode &
1693 (GEN6_INTERP_PERSPECTIVE_SAMPLE)) {
1694 fcc->payloads[i].interp_perspective_sample = grf;
1695 grf += 2 * reg_scale;
1696 }
1697 if (sh->in.barycentric_interpolation_mode &
1698 (GEN6_INTERP_NONPERSPECTIVE_PIXEL)) {
1699 fcc->payloads[i].interp_nonperspective_pixel = grf;
1700 grf += 2 * reg_scale;
1701 }
1702 if (sh->in.barycentric_interpolation_mode &
1703 (GEN6_INTERP_NONPERSPECTIVE_CENTROID)) {
1704 fcc->payloads[i].interp_nonperspective_centroid = grf;
1705 grf += 2 * reg_scale;
1706 }
1707 if (sh->in.barycentric_interpolation_mode &
1708 (GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) {
1709 fcc->payloads[i].interp_nonperspective_sample = grf;
1710 grf += 2 * reg_scale;
1711 }
1712
1713 /* r27-r28 or r56-r57: interpoloated depth */
1714 if (sh->in.has_pos) {
1715 fcc->payloads[i].source_depth = grf;
1716 grf += 1 * reg_scale;
1717 }
1718
1719 /* r29-r30 or r58-r59: interpoloated w */
1720 if (sh->in.has_pos) {
1721 fcc->payloads[i].source_w = grf;
1722 grf += 1 * reg_scale;
1723 }
1724
1725 /* r31 or r60: position offset */
1726 if (false) {
1727 fcc->payloads[i].pos_offset = grf;
1728 grf++;
1729 }
1730
1731 if (fcc->dispatch_mode != GEN6_PS_DISPATCH_32)
1732 break;
1733 }
1734
1735 return grf;
1736 }
1737
1738 /**
1739 * Translate the TGSI tokens.
1740 */
1741 static bool
fs_setup_tgsi(struct toy_compiler * tc,const struct tgsi_token * tokens,struct toy_tgsi * tgsi)1742 fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1743 struct toy_tgsi *tgsi)
1744 {
1745 if (ilo_debug & ILO_DEBUG_FS) {
1746 ilo_printf("dumping fragment shader\n");
1747 ilo_printf("\n");
1748
1749 tgsi_dump(tokens, 0);
1750 ilo_printf("\n");
1751 }
1752
1753 toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
1754 if (tc->fail) {
1755 ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
1756 return false;
1757 }
1758
1759 if (ilo_debug & ILO_DEBUG_FS) {
1760 ilo_printf("TGSI translator:\n");
1761 toy_tgsi_dump(tgsi);
1762 ilo_printf("\n");
1763 toy_compiler_dump(tc);
1764 ilo_printf("\n");
1765 }
1766
1767 return true;
1768 }
1769
1770 /**
1771 * Set up FS compile context. This includes translating the TGSI tokens.
1772 */
1773 static bool
fs_setup(struct fs_compile_context * fcc,const struct ilo_shader_state * state,const struct ilo_shader_variant * variant)1774 fs_setup(struct fs_compile_context *fcc,
1775 const struct ilo_shader_state *state,
1776 const struct ilo_shader_variant *variant)
1777 {
1778 int num_consts;
1779
1780 memset(fcc, 0, sizeof(*fcc));
1781
1782 fcc->shader = CALLOC_STRUCT(ilo_shader);
1783 if (!fcc->shader)
1784 return false;
1785
1786 fcc->variant = variant;
1787
1788 toy_compiler_init(&fcc->tc, state->info.dev);
1789
1790 fcc->dispatch_mode = GEN6_PS_DISPATCH_8;
1791
1792 fcc->tc.templ.access_mode = GEN6_ALIGN_1;
1793 if (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) {
1794 fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1H;
1795 fcc->tc.templ.exec_size = GEN6_EXECSIZE_16;
1796 }
1797 else {
1798 fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1Q;
1799 fcc->tc.templ.exec_size = GEN6_EXECSIZE_8;
1800 }
1801
1802 fcc->tc.rect_linear_width = 8;
1803
1804 /*
1805 * The classic driver uses the sampler cache (gen6) or the data cache
1806 * (gen7). Why?
1807 */
1808 fcc->const_cache = GEN6_SFID_DP_CC;
1809
1810 if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
1811 toy_compiler_cleanup(&fcc->tc);
1812 FREE(fcc->shader);
1813 return false;
1814 }
1815
1816 fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
1817 fs_setup_shader_out(fcc->shader, &fcc->tgsi);
1818
1819 if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) {
1820 num_consts = (fcc->tgsi.const_count + 1) / 2;
1821
1822 /*
1823 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1824 *
1825 * "The sum of all four read length fields (each incremented to
1826 * represent the actual read length) must be less than or equal to
1827 * 64"
1828 *
1829 * Since we are usually under a high register pressure, do not allow
1830 * for more than 8.
1831 */
1832 if (num_consts > 8)
1833 num_consts = 0;
1834 }
1835 else {
1836 num_consts = 0;
1837 }
1838
1839 fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts);
1840 fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1841
1842 fcc->first_const_grf = fs_setup_payloads(fcc);
1843 fcc->first_attr_grf = fcc->first_const_grf + num_consts;
1844 fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
1845 fcc->last_free_grf = 127;
1846
1847 /* m0 is reserved for system routines */
1848 fcc->first_free_mrf = 1;
1849 fcc->last_free_mrf = 15;
1850
1851 /* instructions are compressed with GEN6_EXECSIZE_16 */
1852 fcc->num_grf_per_vrf =
1853 (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? 2 : 1;
1854
1855 if (ilo_dev_gen(fcc->tc.dev) >= ILO_GEN(7)) {
1856 fcc->last_free_grf -= 15;
1857 fcc->first_free_mrf = fcc->last_free_grf + 1;
1858 fcc->last_free_mrf = fcc->first_free_mrf + 14;
1859 }
1860
1861 fcc->shader->in.start_grf = fcc->first_const_grf;
1862 fcc->shader->has_kill = fcc->tgsi.uses_kill;
1863 fcc->shader->dispatch_16 =
1864 (fcc->dispatch_mode == GEN6_PS_DISPATCH_16);
1865
1866 fcc->shader->bt.rt_base = 0;
1867 fcc->shader->bt.rt_count = fcc->variant->u.fs.num_cbufs;
1868 /* to send EOT */
1869 if (!fcc->shader->bt.rt_count)
1870 fcc->shader->bt.rt_count = 1;
1871
1872 fcc->shader->bt.tex_base = fcc->shader->bt.rt_base +
1873 fcc->shader->bt.rt_count;
1874 fcc->shader->bt.tex_count = fcc->variant->num_sampler_views;
1875
1876 fcc->shader->bt.const_base = fcc->shader->bt.tex_base +
1877 fcc->shader->bt.tex_count;
1878 fcc->shader->bt.const_count = state->info.constant_buffer_count;
1879
1880 fcc->shader->bt.total_count = fcc->shader->bt.const_base +
1881 fcc->shader->bt.const_count;
1882
1883 return true;
1884 }
1885
1886 /**
1887 * Compile the fragment shader.
1888 */
1889 struct ilo_shader *
ilo_shader_compile_fs(const struct ilo_shader_state * state,const struct ilo_shader_variant * variant)1890 ilo_shader_compile_fs(const struct ilo_shader_state *state,
1891 const struct ilo_shader_variant *variant)
1892 {
1893 struct fs_compile_context fcc;
1894
1895 if (!fs_setup(&fcc, state, variant))
1896 return NULL;
1897
1898 fs_write_fb(&fcc);
1899
1900 if (!fs_compile(&fcc)) {
1901 FREE(fcc.shader);
1902 fcc.shader = NULL;
1903 }
1904
1905 toy_tgsi_cleanup(&fcc.tgsi);
1906 toy_compiler_cleanup(&fcc.tc);
1907
1908 return fcc.shader;
1909 }
1910