• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 2012-2013 LunarG, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Chia-I Wu <olv@lunarg.com>
26  */
27 
28 #include "tgsi/tgsi_dump.h"
29 #include "tgsi/tgsi_util.h"
30 #include "toy_compiler.h"
31 #include "toy_tgsi.h"
32 #include "toy_legalize.h"
33 #include "toy_optimize.h"
34 #include "toy_helpers.h"
35 #include "ilo_shader_internal.h"
36 
37 struct fs_compile_context {
38    struct ilo_shader *shader;
39    const struct ilo_shader_variant *variant;
40 
41    struct toy_compiler tc;
42    struct toy_tgsi tgsi;
43 
44    int const_cache;
45    int dispatch_mode;
46 
47    struct {
48       int interp_perspective_pixel;
49       int interp_perspective_centroid;
50       int interp_perspective_sample;
51       int interp_nonperspective_pixel;
52       int interp_nonperspective_centroid;
53       int interp_nonperspective_sample;
54       int source_depth;
55       int source_w;
56       int pos_offset;
57    } payloads[2];
58 
59    int first_const_grf;
60    int first_attr_grf;
61    int first_free_grf;
62    int last_free_grf;
63 
64    int num_grf_per_vrf;
65 
66    int first_free_mrf;
67    int last_free_mrf;
68 };
69 
70 static void
fetch_position(struct fs_compile_context * fcc,struct toy_dst dst)71 fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
72 {
73    struct toy_compiler *tc = &fcc->tc;
74    const struct toy_src src_z =
75       tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
76    const struct toy_src src_w =
77       tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
78    const int fb_height =
79       (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
80    const bool origin_upper_left =
81       (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
82    const bool pixel_center_integer =
83       (fcc->tgsi.props.fs_coord_pixel_center ==
84        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
85    struct toy_src subspan_x, subspan_y;
86    struct toy_dst tmp, tmp_uw;
87    struct toy_dst real_dst[4];
88 
89    tdst_transpose(dst, real_dst);
90 
91    subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
92    subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
93 
94    subspan_y = tsrc_offset(subspan_x, 0, 1);
95 
96    tmp_uw = tdst_uw(tc_alloc_tmp(tc));
97    tmp = tc_alloc_tmp(tc);
98 
99    /* X */
100    tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
101    tc_MOV(tc, tmp, tsrc_from(tmp_uw));
102    if (pixel_center_integer)
103       tc_MOV(tc, real_dst[0], tsrc_from(tmp));
104    else
105       tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
106 
107    /* Y */
108    tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
109    tc_MOV(tc, tmp, tsrc_from(tmp_uw));
110    if (origin_upper_left && pixel_center_integer) {
111       tc_MOV(tc, real_dst[1], tsrc_from(tmp));
112    }
113    else {
114       struct toy_src y = tsrc_from(tmp);
115       float offset = 0.0f;
116 
117       if (!pixel_center_integer)
118          offset += 0.5f;
119 
120       if (!origin_upper_left) {
121          offset += (float) (fb_height - 1);
122          y = tsrc_negate(y);
123       }
124 
125       tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
126    }
127 
128    /* Z and W */
129    tc_MOV(tc, real_dst[2], src_z);
130    tc_INV(tc, real_dst[3], src_w);
131 }
132 
133 static void
fetch_face(struct fs_compile_context * fcc,struct toy_dst dst)134 fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
135 {
136    struct toy_compiler *tc = &fcc->tc;
137    const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
138    struct toy_dst tmp_f, tmp;
139    struct toy_dst real_dst[4];
140 
141    tdst_transpose(dst, real_dst);
142 
143    tmp_f = tc_alloc_tmp(tc);
144    tmp = tdst_d(tmp_f);
145    tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
146    tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
147    tc_MOV(tc, tmp_f, tsrc_from(tmp));
148 
149    /* convert to 1.0 and -1.0 */
150    tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
151    tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
152 
153    tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
154    tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
155    tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
156 }
157 
158 static void
fetch_attr(struct fs_compile_context * fcc,struct toy_dst dst,int slot)159 fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
160 {
161    struct toy_compiler *tc = &fcc->tc;
162    struct toy_dst real_dst[4];
163    bool is_const = false;
164    int grf, interp, ch;
165 
166    tdst_transpose(dst, real_dst);
167 
168    grf = fcc->first_attr_grf + slot * 2;
169 
170    switch (fcc->tgsi.inputs[slot].interp) {
171    case TGSI_INTERPOLATE_CONSTANT:
172       is_const = true;
173       break;
174    case TGSI_INTERPOLATE_LINEAR:
175       if (fcc->tgsi.inputs[slot].centroid)
176          interp = fcc->payloads[0].interp_nonperspective_centroid;
177       else
178          interp = fcc->payloads[0].interp_nonperspective_pixel;
179       break;
180    case TGSI_INTERPOLATE_COLOR:
181       if (fcc->variant->u.fs.flatshade) {
182          is_const = true;
183          break;
184       }
185       /* fall through */
186    case TGSI_INTERPOLATE_PERSPECTIVE:
187       if (fcc->tgsi.inputs[slot].centroid)
188          interp = fcc->payloads[0].interp_perspective_centroid;
189       else
190          interp = fcc->payloads[0].interp_perspective_pixel;
191       break;
192    default:
193       assert(!"unexpected FS interpolation");
194       interp = fcc->payloads[0].interp_perspective_pixel;
195       break;
196    }
197 
198    if (is_const) {
199       struct toy_src a0[4];
200 
201       a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
202       a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
203       a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
204       a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
205 
206       for (ch = 0; ch < 4; ch++)
207          tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
208    }
209    else {
210       struct toy_src attr[4], uv;
211 
212       attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
213       attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
214       attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
215       attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
216 
217       uv = tsrc(TOY_FILE_GRF, interp, 0);
218 
219       for (ch = 0; ch < 4; ch++) {
220          tc_add2(tc, GEN6_OPCODE_PLN, real_dst[ch],
221                tsrc_rect(attr[ch], TOY_RECT_010), uv);
222       }
223    }
224 
225    if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
226       tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
227       tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
228       tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
229    }
230 }
231 
232 static void
fs_lower_opcode_tgsi_in(struct fs_compile_context * fcc,struct toy_dst dst,int dim,int idx)233 fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
234                         struct toy_dst dst, int dim, int idx)
235 {
236    int slot;
237 
238    assert(!dim);
239 
240    slot = toy_tgsi_find_input(&fcc->tgsi, idx);
241    if (slot < 0)
242       return;
243 
244    switch (fcc->tgsi.inputs[slot].semantic_name) {
245    case TGSI_SEMANTIC_POSITION:
246       fetch_position(fcc, dst);
247       break;
248    case TGSI_SEMANTIC_FACE:
249       fetch_face(fcc, dst);
250       break;
251    default:
252       fetch_attr(fcc, dst, slot);
253       break;
254    }
255 }
256 
257 static void
fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)258 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
259                                     struct toy_dst dst, int dim,
260                                     struct toy_src idx)
261 {
262    const struct toy_dst offset =
263       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
264    struct toy_compiler *tc = &fcc->tc;
265    unsigned simd_mode, param_size;
266    struct toy_inst *inst;
267    struct toy_src desc, real_src[4];
268    struct toy_dst tmp, real_dst[4];
269    unsigned i;
270 
271    tsrc_transpose(idx, real_src);
272 
273    /* set offset */
274    inst = tc_MOV(tc, offset, real_src[0]);
275    inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
276 
277    switch (inst->exec_size) {
278    case GEN6_EXECSIZE_8:
279       simd_mode = GEN6_MSG_SAMPLER_SIMD8;
280       param_size = 1;
281       break;
282    case GEN6_EXECSIZE_16:
283       simd_mode = GEN6_MSG_SAMPLER_SIMD16;
284       param_size = 2;
285       break;
286    default:
287       assert(!"unsupported execution size");
288       tc_MOV(tc, dst, tsrc_imm_f(0.0f));
289       return;
290       break;
291    }
292 
293    desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
294          simd_mode,
295          GEN6_MSG_SAMPLER_LD,
296          0,
297          fcc->shader->bt.const_base + dim);
298 
299    tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
300    inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
301    inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
302 
303    tdst_transpose(dst, real_dst);
304    for (i = 0; i < 4; i++) {
305       const struct toy_src src =
306          tsrc_offset(tsrc_from(tmp), param_size * i, 0);
307 
308       /* cast to type D to make sure these are raw moves */
309       tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
310    }
311 }
312 
313 static bool
fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)314 fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc,
315                                struct toy_dst dst, int dim,
316                                struct toy_src idx)
317 {
318    const int grf = fcc->first_const_grf + idx.val32 / 2;
319    const int grf_subreg = (idx.val32 & 1) * 16;
320    struct toy_src src;
321    struct toy_dst real_dst[4];
322    unsigned i;
323 
324    if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM ||
325        grf >= fcc->first_attr_grf)
326       return false;
327 
328    src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010);
329 
330    tdst_transpose(dst, real_dst);
331    for (i = 0; i < 4; i++) {
332       /* cast to type D to make sure these are raw moves */
333       tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i)));
334    }
335 
336    return true;
337 }
338 
339 static void
fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)340 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
341                                 struct toy_dst dst, int dim, struct toy_src idx)
342 {
343    const struct toy_dst header =
344       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
345    const struct toy_dst global_offset =
346       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
347    const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
348    struct toy_compiler *tc = &fcc->tc;
349    unsigned msg_type, msg_ctrl, msg_len;
350    struct toy_inst *inst;
351    struct toy_src desc;
352    struct toy_dst tmp, real_dst[4];
353    unsigned i;
354 
355    if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
356       return;
357 
358    /* set message header */
359    inst = tc_MOV(tc, header, r0);
360    inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
361 
362    /* set global offset */
363    inst = tc_MOV(tc, global_offset, idx);
364    inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
365    inst->exec_size = GEN6_EXECSIZE_1;
366    inst->src[0].rect = TOY_RECT_010;
367 
368    msg_type = GEN6_MSG_DP_OWORD_BLOCK_READ;
369    msg_ctrl = GEN6_MSG_DP_OWORD_BLOCK_SIZE_1_LO;
370    msg_len = 1;
371 
372    desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
373          msg_type, msg_ctrl, fcc->shader->bt.const_base + dim);
374 
375    tmp = tc_alloc_tmp(tc);
376 
377    tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
378 
379    tdst_transpose(dst, real_dst);
380    for (i = 0; i < 4; i++) {
381       const struct toy_src src =
382          tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
383 
384       /* cast to type D to make sure these are raw moves */
385       tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
386    }
387 }
388 
389 static void
fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context * fcc,struct toy_dst dst,int dim,struct toy_src idx)390 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
391                                 struct toy_dst dst, int dim, struct toy_src idx)
392 {
393    struct toy_compiler *tc = &fcc->tc;
394    const struct toy_dst offset =
395       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
396    struct toy_src desc;
397    struct toy_inst *inst;
398    struct toy_dst tmp, real_dst[4];
399    unsigned i;
400 
401    if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx))
402       return;
403 
404    /*
405     * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
406     * changed from OWord Block Read to ld to increase performance in the
407     * classic driver.  Since we use the constant cache instead of the data
408     * cache, I wonder if we still want to follow the classic driver.
409     */
410 
411    /* set offset */
412    inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
413    inst->exec_size = GEN6_EXECSIZE_8;
414    inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
415 
416    desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
417          GEN6_MSG_SAMPLER_SIMD4X2,
418          GEN6_MSG_SAMPLER_LD,
419          0,
420          fcc->shader->bt.const_base + dim);
421 
422    tmp = tc_alloc_tmp(tc);
423    inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
424    inst->exec_size = GEN6_EXECSIZE_8;
425    inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
426 
427    tdst_transpose(dst, real_dst);
428    for (i = 0; i < 4; i++) {
429       const struct toy_src src =
430          tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
431 
432       /* cast to type D to make sure these are raw moves */
433       tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
434    }
435 }
436 
437 static void
fs_lower_opcode_tgsi_imm(struct fs_compile_context * fcc,struct toy_dst dst,int idx)438 fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
439                          struct toy_dst dst, int idx)
440 {
441    const uint32_t *imm;
442    struct toy_dst real_dst[4];
443    int ch;
444 
445    imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
446 
447    tdst_transpose(dst, real_dst);
448    /* raw moves */
449    for (ch = 0; ch < 4; ch++)
450       tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
451 }
452 
453 static void
fs_lower_opcode_tgsi_sv(struct fs_compile_context * fcc,struct toy_dst dst,int dim,int idx)454 fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
455                         struct toy_dst dst, int dim, int idx)
456 {
457    struct toy_compiler *tc = &fcc->tc;
458    const struct toy_tgsi *tgsi = &fcc->tgsi;
459    int slot;
460 
461    assert(!dim);
462 
463    slot = toy_tgsi_find_system_value(tgsi, idx);
464    if (slot < 0)
465       return;
466 
467    switch (tgsi->system_values[slot].semantic_name) {
468    case TGSI_SEMANTIC_PRIMID:
469    case TGSI_SEMANTIC_INSTANCEID:
470    case TGSI_SEMANTIC_VERTEXID:
471    default:
472       tc_fail(tc, "unhandled system value");
473       tc_MOV(tc, dst, tsrc_imm_d(0));
474       break;
475    }
476 }
477 
478 static void
fs_lower_opcode_tgsi_direct(struct fs_compile_context * fcc,struct toy_inst * inst)479 fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
480                             struct toy_inst *inst)
481 {
482    struct toy_compiler *tc = &fcc->tc;
483    int dim, idx;
484 
485    assert(inst->src[0].file == TOY_FILE_IMM);
486    dim = inst->src[0].val32;
487 
488    assert(inst->src[1].file == TOY_FILE_IMM);
489    idx = inst->src[1].val32;
490 
491    switch (inst->opcode) {
492    case TOY_OPCODE_TGSI_IN:
493       fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
494       break;
495    case TOY_OPCODE_TGSI_CONST:
496       if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
497          fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
498       else
499          fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
500       break;
501    case TOY_OPCODE_TGSI_SV:
502       fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
503       break;
504    case TOY_OPCODE_TGSI_IMM:
505       assert(!dim);
506       fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
507       break;
508    default:
509       tc_fail(tc, "unhandled TGSI fetch");
510       break;
511    }
512 
513    tc_discard_inst(tc, inst);
514 }
515 
516 static void
fs_lower_opcode_tgsi_indirect(struct fs_compile_context * fcc,struct toy_inst * inst)517 fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
518                               struct toy_inst *inst)
519 {
520    struct toy_compiler *tc = &fcc->tc;
521    enum tgsi_file_type file;
522    int dim, idx;
523    struct toy_src indirect_dim, indirect_idx;
524 
525    assert(inst->src[0].file == TOY_FILE_IMM);
526    file = inst->src[0].val32;
527 
528    assert(inst->src[1].file == TOY_FILE_IMM);
529    dim = inst->src[1].val32;
530    indirect_dim = inst->src[2];
531 
532    assert(inst->src[3].file == TOY_FILE_IMM);
533    idx = inst->src[3].val32;
534    indirect_idx = inst->src[4];
535 
536    /* no dimension indirection */
537    assert(indirect_dim.file == TOY_FILE_IMM);
538    dim += indirect_dim.val32;
539 
540    switch (inst->opcode) {
541    case TOY_OPCODE_TGSI_INDIRECT_FETCH:
542       if (file == TGSI_FILE_CONSTANT) {
543          if (idx) {
544             struct toy_dst tmp = tc_alloc_tmp(tc);
545 
546             tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
547             indirect_idx = tsrc_from(tmp);
548          }
549 
550          fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
551          break;
552       }
553       /* fall through */
554    case TOY_OPCODE_TGSI_INDIRECT_STORE:
555    default:
556       tc_fail(tc, "unhandled TGSI indirection");
557       break;
558    }
559 
560    tc_discard_inst(tc, inst);
561 }
562 
563 /**
564  * Emit instructions to move sampling parameters to the message registers.
565  */
566 static int
fs_add_sampler_params_gen6(struct toy_compiler * tc,int msg_type,int base_mrf,int param_size,struct toy_src * coords,int num_coords,struct toy_src bias_or_lod,struct toy_src ref_or_si,struct toy_src * ddx,struct toy_src * ddy,int num_derivs)567 fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
568                            int base_mrf, int param_size,
569                            struct toy_src *coords, int num_coords,
570                            struct toy_src bias_or_lod, struct toy_src ref_or_si,
571                            struct toy_src *ddx, struct toy_src *ddy,
572                            int num_derivs)
573 {
574    int num_params, i;
575 
576    assert(num_coords <= 4);
577    assert(num_derivs <= 3 && num_derivs <= num_coords);
578 
579 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
580    switch (msg_type) {
581    case GEN6_MSG_SAMPLER_SAMPLE:
582       for (i = 0; i < num_coords; i++)
583          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
584       num_params = num_coords;
585       break;
586    case GEN6_MSG_SAMPLER_SAMPLE_B:
587    case GEN6_MSG_SAMPLER_SAMPLE_L:
588       for (i = 0; i < num_coords; i++)
589          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
590       tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
591       num_params = 5;
592       break;
593    case GEN6_MSG_SAMPLER_SAMPLE_C:
594       for (i = 0; i < num_coords; i++)
595          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
596       tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
597       num_params = 5;
598       break;
599    case GEN6_MSG_SAMPLER_SAMPLE_D:
600       for (i = 0; i < num_coords; i++)
601          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
602       for (i = 0; i < num_derivs; i++) {
603          tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
604          tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
605       }
606       num_params = 4 + num_derivs * 2;
607       break;
608    case GEN6_MSG_SAMPLER_SAMPLE_B_C:
609    case GEN6_MSG_SAMPLER_SAMPLE_L_C:
610       for (i = 0; i < num_coords; i++)
611          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
612       tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
613       tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
614       num_params = 6;
615       break;
616    case GEN6_MSG_SAMPLER_LD:
617       assert(num_coords <= 3);
618 
619       for (i = 0; i < num_coords; i++)
620          tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
621       tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
622       tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
623       num_params = 5;
624       break;
625    case GEN6_MSG_SAMPLER_RESINFO:
626       tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
627       num_params = 1;
628       break;
629    default:
630       tc_fail(tc, "unknown sampler opcode");
631       num_params = 0;
632       break;
633    }
634 #undef SAMPLER_PARAM
635 
636    return num_params * param_size;
637 }
638 
639 static int
fs_add_sampler_params_gen7(struct toy_compiler * tc,int msg_type,int base_mrf,int param_size,struct toy_src * coords,int num_coords,struct toy_src bias_or_lod,struct toy_src ref_or_si,struct toy_src * ddx,struct toy_src * ddy,int num_derivs)640 fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
641                            int base_mrf, int param_size,
642                            struct toy_src *coords, int num_coords,
643                            struct toy_src bias_or_lod, struct toy_src ref_or_si,
644                            struct toy_src *ddx, struct toy_src *ddy,
645                            int num_derivs)
646 {
647    int num_params, i;
648 
649    assert(num_coords <= 4);
650    assert(num_derivs <= 3 && num_derivs <= num_coords);
651 
652 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
653    switch (msg_type) {
654    case GEN6_MSG_SAMPLER_SAMPLE:
655       for (i = 0; i < num_coords; i++)
656          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
657       num_params = num_coords;
658       break;
659    case GEN6_MSG_SAMPLER_SAMPLE_B:
660    case GEN6_MSG_SAMPLER_SAMPLE_L:
661       tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
662       for (i = 0; i < num_coords; i++)
663          tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
664       num_params = 1 + num_coords;
665       break;
666    case GEN6_MSG_SAMPLER_SAMPLE_C:
667       tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
668       for (i = 0; i < num_coords; i++)
669          tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
670       num_params = 1 + num_coords;
671       break;
672    case GEN6_MSG_SAMPLER_SAMPLE_D:
673       for (i = 0; i < num_coords; i++) {
674          tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
675          if (i < num_derivs) {
676             tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
677             tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
678          }
679       }
680       num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
681       break;
682    case GEN6_MSG_SAMPLER_SAMPLE_B_C:
683    case GEN6_MSG_SAMPLER_SAMPLE_L_C:
684       tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
685       tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
686       for (i = 0; i < num_coords; i++)
687          tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
688       num_params = 2 + num_coords;
689       break;
690    case GEN6_MSG_SAMPLER_LD:
691       assert(num_coords >= 1 && num_coords <= 3);
692 
693       tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
694       tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
695       for (i = 1; i < num_coords; i++)
696          tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
697       num_params = 1 + num_coords;
698       break;
699    case GEN6_MSG_SAMPLER_RESINFO:
700       tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
701       num_params = 1;
702       break;
703    default:
704       tc_fail(tc, "unknown sampler opcode");
705       num_params = 0;
706       break;
707    }
708 #undef SAMPLER_PARAM
709 
710    return num_params * param_size;
711 }
712 
713 /**
714  * Set up message registers and return the message descriptor for sampling.
715  */
716 static struct toy_src
fs_prepare_tgsi_sampling(struct fs_compile_context * fcc,const struct toy_inst * inst,int base_mrf,const uint32_t * saturate_coords,unsigned * ret_sampler_index)717 fs_prepare_tgsi_sampling(struct fs_compile_context *fcc,
718                          const struct toy_inst *inst,
719                          int base_mrf, const uint32_t *saturate_coords,
720                          unsigned *ret_sampler_index)
721 {
722    struct toy_compiler *tc = &fcc->tc;
723    unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
724    struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
725    int num_coords, ref_pos, num_derivs;
726    int sampler_src, param_size, i;
727 
728    switch (inst->exec_size) {
729    case GEN6_EXECSIZE_8:
730       simd_mode = GEN6_MSG_SAMPLER_SIMD8;
731       param_size = 1;
732       break;
733    case GEN6_EXECSIZE_16:
734       simd_mode = GEN6_MSG_SAMPLER_SIMD16;
735       param_size = 2;
736       break;
737    default:
738       tc_fail(tc, "unsupported execute size for sampling");
739       return tsrc_null();
740       break;
741    }
742 
743    num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target);
744    ref_pos = tgsi_util_get_shadow_ref_src_index(inst->tex.target);
745 
746    tsrc_transpose(inst->src[0], coords);
747    bias_or_lod = tsrc_null();
748    ref_or_si = tsrc_null();
749    num_derivs = 0;
750    sampler_src = 1;
751 
752    /*
753     * For TXD,
754     *
755     *   src0 := (x, y, z, w)
756     *   src1 := ddx
757     *   src2 := ddy
758     *   src3 := sampler
759     *
760     * For TEX2, TXB2, and TXL2,
761     *
762     *   src0 := (x, y, z, w)
763     *   src1 := (v or bias or lod, ...)
764     *   src2 := sampler
765     *
766     * For TEX, TXB, TXL, and TXP,
767     *
768     *   src0 := (x, y, z, w or bias or lod or projection)
769     *   src1 := sampler
770     *
771     * For TXQ,
772     *
773     *   src0 := (lod, ...)
774     *   src1 := sampler
775     *
776     * For TXQ_LZ,
777     *
778     *   src0 := sampler
779     *
780     * And for TXF,
781     *
782     *   src0 := (x, y, z, w or lod)
783     *   src1 := sampler
784     *
785     * State trackers should not generate opcode+texture combinations with
786     * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
787     */
788    switch (inst->opcode) {
789    case TOY_OPCODE_TGSI_TEX:
790       if (ref_pos >= 0) {
791          assert(ref_pos < 4);
792 
793          msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
794          ref_or_si = coords[ref_pos];
795       }
796       else {
797          msg_type = GEN6_MSG_SAMPLER_SAMPLE;
798       }
799       break;
800    case TOY_OPCODE_TGSI_TXD:
801       if (ref_pos >= 0) {
802          assert(ref_pos < 4);
803 
804          msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C;
805          ref_or_si = coords[ref_pos];
806 
807          if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5))
808             tc_fail(tc, "TXD with shadow sampler not supported");
809       }
810       else {
811          msg_type = GEN6_MSG_SAMPLER_SAMPLE_D;
812       }
813 
814       tsrc_transpose(inst->src[1], ddx);
815       tsrc_transpose(inst->src[2], ddy);
816       num_derivs = num_coords;
817       sampler_src = 3;
818       break;
819    case TOY_OPCODE_TGSI_TXP:
820       if (ref_pos >= 0) {
821          assert(ref_pos < 3);
822 
823          msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
824          ref_or_si = coords[ref_pos];
825       }
826       else {
827          msg_type = GEN6_MSG_SAMPLER_SAMPLE;
828       }
829 
830       /* project the coordinates */
831       {
832          struct toy_dst tmp[4];
833 
834          tc_alloc_tmp4(tc, tmp);
835 
836          tc_INV(tc, tmp[3], coords[3]);
837          for (i = 0; i < num_coords && i < 3; i++) {
838             tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
839             coords[i] = tsrc_from(tmp[i]);
840          }
841 
842          if (ref_pos >= i) {
843             tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
844             ref_or_si = tsrc_from(tmp[ref_pos]);
845          }
846       }
847       break;
848    case TOY_OPCODE_TGSI_TXB:
849       if (ref_pos >= 0) {
850          assert(ref_pos < 3);
851 
852          msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
853          ref_or_si = coords[ref_pos];
854       }
855       else {
856          msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
857       }
858 
859       bias_or_lod = coords[3];
860       break;
861    case TOY_OPCODE_TGSI_TXL:
862       if (ref_pos >= 0) {
863          assert(ref_pos < 3);
864 
865          msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
866          ref_or_si = coords[ref_pos];
867       }
868       else {
869          msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
870       }
871 
872       bias_or_lod = coords[3];
873       break;
874    case TOY_OPCODE_TGSI_TXF:
875       msg_type = GEN6_MSG_SAMPLER_LD;
876 
877       switch (inst->tex.target) {
878       case TGSI_TEXTURE_2D_MSAA:
879       case TGSI_TEXTURE_2D_ARRAY_MSAA:
880          assert(ref_pos >= 0 && ref_pos < 4);
881          /* lod is always 0 */
882          bias_or_lod = tsrc_imm_d(0);
883          ref_or_si = coords[ref_pos];
884          break;
885       default:
886          bias_or_lod = coords[3];
887          break;
888       }
889 
890       /* offset the coordinates */
891       if (!tsrc_is_null(inst->tex.offsets[0])) {
892          struct toy_dst tmp[4];
893          struct toy_src offsets[4];
894 
895          tc_alloc_tmp4(tc, tmp);
896          tsrc_transpose(inst->tex.offsets[0], offsets);
897 
898          for (i = 0; i < num_coords; i++) {
899             tc_ADD(tc, tmp[i], coords[i], offsets[i]);
900             coords[i] = tsrc_from(tmp[i]);
901          }
902       }
903 
904       sampler_src = 1;
905       break;
906    case TOY_OPCODE_TGSI_TXQ:
907       msg_type = GEN6_MSG_SAMPLER_RESINFO;
908       num_coords = 0;
909       bias_or_lod = coords[0];
910       break;
911    case TOY_OPCODE_TGSI_TXQ_LZ:
912       msg_type = GEN6_MSG_SAMPLER_RESINFO;
913       num_coords = 0;
914       sampler_src = 0;
915       break;
916    case TOY_OPCODE_TGSI_TEX2:
917       if (ref_pos >= 0) {
918          assert(ref_pos < 5);
919 
920          msg_type = GEN6_MSG_SAMPLER_SAMPLE_C;
921 
922          if (ref_pos >= 4) {
923             struct toy_src src1[4];
924             tsrc_transpose(inst->src[1], src1);
925             ref_or_si = src1[ref_pos - 4];
926          }
927          else {
928             ref_or_si = coords[ref_pos];
929          }
930       }
931       else {
932          msg_type = GEN6_MSG_SAMPLER_SAMPLE;
933       }
934 
935       sampler_src = 2;
936       break;
937    case TOY_OPCODE_TGSI_TXB2:
938       if (ref_pos >= 0) {
939          assert(ref_pos < 4);
940 
941          msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C;
942          ref_or_si = coords[ref_pos];
943       }
944       else {
945          msg_type = GEN6_MSG_SAMPLER_SAMPLE_B;
946       }
947 
948       {
949          struct toy_src src1[4];
950          tsrc_transpose(inst->src[1], src1);
951          bias_or_lod = src1[0];
952       }
953 
954       sampler_src = 2;
955       break;
956    case TOY_OPCODE_TGSI_TXL2:
957       if (ref_pos >= 0) {
958          assert(ref_pos < 4);
959 
960          msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
961          ref_or_si = coords[ref_pos];
962       }
963       else {
964          msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
965       }
966 
967       {
968          struct toy_src src1[4];
969          tsrc_transpose(inst->src[1], src1);
970          bias_or_lod = src1[0];
971       }
972 
973       sampler_src = 2;
974       break;
975    default:
976       assert(!"unhandled sampling opcode");
977       return tsrc_null();
978       break;
979    }
980 
981    assert(inst->src[sampler_src].file == TOY_FILE_IMM);
982    sampler_index = inst->src[sampler_src].val32;
983    binding_table_index = fcc->shader->bt.tex_base + sampler_index;
984 
985    /*
986     * From the Sandy Bridge PRM, volume 4 part 1, page 18:
987     *
988     *     "Note that the (cube map) coordinates delivered to the sampling
989     *      engine must already have been divided by the component with the
990     *      largest absolute value."
991     */
992    switch (inst->tex.target) {
993    case TGSI_TEXTURE_CUBE:
994    case TGSI_TEXTURE_SHADOWCUBE:
995    case TGSI_TEXTURE_CUBE_ARRAY:
996    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
997       /* TXQ does not need coordinates */
998       if (num_coords >= 3) {
999          struct toy_dst tmp[4];
1000 
1001          tc_alloc_tmp4(tc, tmp);
1002 
1003          tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
1004                tsrc_absolute(coords[1]), GEN6_COND_GE);
1005          tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
1006                tsrc_absolute(coords[2]), GEN6_COND_GE);
1007          tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
1008 
1009          for (i = 0; i < 3; i++) {
1010             tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
1011             coords[i] = tsrc_from(tmp[i]);
1012          }
1013       }
1014       break;
1015    }
1016 
1017    /*
1018     * Saturate (s, t, r).  saturate_coords is set for sampler and coordinate
1019     * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively.  It is
1020     * so that sampling outside the border gets the correct colors.
1021     */
1022    for (i = 0; i < MIN2(num_coords, 3); i++) {
1023       bool is_rect;
1024 
1025       if (!(saturate_coords[i] & (1 << sampler_index)))
1026          continue;
1027 
1028       switch (inst->tex.target) {
1029       case TGSI_TEXTURE_RECT:
1030       case TGSI_TEXTURE_SHADOWRECT:
1031          is_rect = true;
1032          break;
1033       default:
1034          is_rect = false;
1035          break;
1036       }
1037 
1038       if (is_rect) {
1039          struct toy_src min, max;
1040          struct toy_dst tmp;
1041 
1042          tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
1043          tmp = tc_alloc_tmp(tc);
1044 
1045          /* saturate to [0, width] or [0, height] */
1046          /* TODO TXQ? */
1047          min = tsrc_imm_f(0.0f);
1048          max = tsrc_imm_f(2048.0f);
1049 
1050          tc_SEL(tc, tmp, coords[i], min, GEN6_COND_G);
1051          tc_SEL(tc, tmp, tsrc_from(tmp), max, GEN6_COND_L);
1052 
1053          coords[i] = tsrc_from(tmp);
1054       }
1055       else {
1056          struct toy_dst tmp;
1057          struct toy_inst *inst2;
1058 
1059          tmp = tc_alloc_tmp(tc);
1060 
1061          /* saturate to [0.0f, 1.0f] */
1062          inst2 = tc_MOV(tc, tmp, coords[i]);
1063          inst2->saturate = true;
1064 
1065          coords[i] = tsrc_from(tmp);
1066       }
1067    }
1068 
1069    /* set up sampler parameters */
1070    if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
1071       msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
1072             coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1073    }
1074    else {
1075       msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
1076             coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
1077    }
1078 
1079    /*
1080     * From the Sandy Bridge PRM, volume 4 part 1, page 136:
1081     *
1082     *     "The maximum message length allowed to the sampler is 11. This would
1083     *      disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
1084     *      SIMD16."
1085     */
1086    if (msg_len > 11)
1087       tc_fail(tc, "maximum length for messages to the sampler is 11");
1088 
1089    if (ret_sampler_index)
1090       *ret_sampler_index = sampler_index;
1091 
1092    return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
1093          false, simd_mode, msg_type, sampler_index, binding_table_index);
1094 }
1095 
1096 static void
fs_lower_opcode_tgsi_sampling(struct fs_compile_context * fcc,struct toy_inst * inst)1097 fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
1098                               struct toy_inst *inst)
1099 {
1100    struct toy_compiler *tc = &fcc->tc;
1101    struct toy_dst dst[4], tmp[4];
1102    struct toy_src desc;
1103    unsigned sampler_index;
1104    int swizzles[4], i;
1105    bool need_filter;
1106 
1107    desc = fs_prepare_tgsi_sampling(fcc, inst,
1108          fcc->first_free_mrf,
1109          fcc->variant->saturate_tex_coords,
1110          &sampler_index);
1111 
1112    switch (inst->opcode) {
1113    case TOY_OPCODE_TGSI_TXF:
1114    case TOY_OPCODE_TGSI_TXQ:
1115    case TOY_OPCODE_TGSI_TXQ_LZ:
1116       need_filter = false;
1117       break;
1118    default:
1119       need_filter = true;
1120       break;
1121    }
1122 
1123    toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER);
1124    inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
1125    inst->src[1] = desc;
1126    for (i = 2; i < ARRAY_SIZE(inst->src); i++)
1127       inst->src[i] = tsrc_null();
1128 
1129    /* write to temps first */
1130    tc_alloc_tmp4(tc, tmp);
1131    for (i = 0; i < 4; i++)
1132       tmp[i].type = inst->dst.type;
1133    tdst_transpose(inst->dst, dst);
1134    inst->dst = tmp[0];
1135 
1136    tc_move_inst(tc, inst);
1137 
1138    if (need_filter) {
1139       assert(sampler_index < fcc->variant->num_sampler_views);
1140       swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
1141       swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
1142       swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
1143       swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
1144    }
1145    else {
1146       swizzles[0] = PIPE_SWIZZLE_X;
1147       swizzles[1] = PIPE_SWIZZLE_Y;
1148       swizzles[2] = PIPE_SWIZZLE_Z;
1149       swizzles[3] = PIPE_SWIZZLE_W;
1150    }
1151 
1152    /* swizzle the results */
1153    for (i = 0; i < 4; i++) {
1154       switch (swizzles[i]) {
1155       case PIPE_SWIZZLE_0:
1156          tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
1157          break;
1158       case PIPE_SWIZZLE_1:
1159          tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
1160          break;
1161       default:
1162          tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
1163          break;
1164       }
1165    }
1166 }
1167 
1168 static void
fs_lower_opcode_derivative(struct toy_compiler * tc,struct toy_inst * inst)1169 fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
1170 {
1171    struct toy_dst dst[4];
1172    struct toy_src src[4];
1173    unsigned i;
1174 
1175    tdst_transpose(inst->dst, dst);
1176    tsrc_transpose(inst->src[0], src);
1177 
1178    /*
1179     * Every four fragments are from a 2x2 subspan, with
1180     *
1181     *   fragment 1 on the top-left,
1182     *   fragment 2 on the top-right,
1183     *   fragment 3 on the bottom-left,
1184     *   fragment 4 on the bottom-right.
1185     *
1186     * DDX should thus produce
1187     *
1188     *   dst = src.yyww - src.xxzz
1189     *
1190     * and DDY should produce
1191     *
1192     *   dst = src.zzww - src.xxyy
1193     *
1194     * But since we are in GEN6_ALIGN_1, swizzling does not work and we have to
1195     * play with the region parameters.
1196     */
1197    if (inst->opcode == TOY_OPCODE_DDX) {
1198       for (i = 0; i < 4; i++) {
1199          struct toy_src left, right;
1200 
1201          left = tsrc_rect(src[i], TOY_RECT_220);
1202          right = tsrc_offset(left, 0, 1);
1203 
1204          tc_ADD(tc, dst[i], right, tsrc_negate(left));
1205       }
1206    }
1207    else {
1208       for (i = 0; i < 4; i++) {
1209          struct toy_src top, bottom;
1210 
1211          /* approximate with dst = src.zzzz - src.xxxx */
1212          top = tsrc_rect(src[i], TOY_RECT_440);
1213          bottom = tsrc_offset(top, 0, 2);
1214 
1215          tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
1216       }
1217    }
1218 
1219    tc_discard_inst(tc, inst);
1220 }
1221 
1222 static void
fs_lower_opcode_fb_write(struct toy_compiler * tc,struct toy_inst * inst)1223 fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
1224 {
1225    /* fs_write_fb() has set up the message registers */
1226    toy_compiler_lower_to_send(tc, inst, true,
1227          GEN6_SFID_DP_RC);
1228 }
1229 
1230 static void
fs_lower_opcode_kil(struct toy_compiler * tc,struct toy_inst * inst)1231 fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
1232 {
1233    struct toy_dst pixel_mask_dst;
1234    struct toy_src f0, pixel_mask;
1235    struct toy_inst *tmp;
1236 
1237    /* lower half of r1.7:ud */
1238    pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
1239    pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
1240 
1241    f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, GEN6_ARF_F0, 0)), TOY_RECT_010);
1242 
1243    /* KILL or KILL_IF */
1244    if (tsrc_is_null(inst->src[0])) {
1245       struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
1246       struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, GEN6_ARF_F0, 0));
1247 
1248       /* create a mask that masks out all pixels */
1249       tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
1250       tmp->exec_size = GEN6_EXECSIZE_1;
1251       tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1252 
1253       tc_CMP(tc, tdst_null(), dummy, dummy, GEN6_COND_NZ);
1254 
1255       /* swapping the two src operands breaks glBitmap()!? */
1256       tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1257       tmp->exec_size = GEN6_EXECSIZE_1;
1258       tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1259    }
1260    else {
1261       struct toy_src src[4];
1262       unsigned i;
1263 
1264       tsrc_transpose(inst->src[0], src);
1265       /* mask out killed pixels */
1266       for (i = 0; i < 4; i++) {
1267          tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
1268                GEN6_COND_GE);
1269 
1270          /* swapping the two src operands breaks glBitmap()!? */
1271          tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
1272          tmp->exec_size = GEN6_EXECSIZE_1;
1273          tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1274       }
1275    }
1276 
1277    tc_discard_inst(tc, inst);
1278 }
1279 
1280 static void
fs_lower_virtual_opcodes(struct fs_compile_context * fcc)1281 fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
1282 {
1283    struct toy_compiler *tc = &fcc->tc;
1284    struct toy_inst *inst;
1285 
1286    /* lower TGSI's first, as they might be lowered to other virtual opcodes */
1287    tc_head(tc);
1288    while ((inst = tc_next(tc)) != NULL) {
1289       switch (inst->opcode) {
1290       case TOY_OPCODE_TGSI_IN:
1291       case TOY_OPCODE_TGSI_CONST:
1292       case TOY_OPCODE_TGSI_SV:
1293       case TOY_OPCODE_TGSI_IMM:
1294          fs_lower_opcode_tgsi_direct(fcc, inst);
1295          break;
1296       case TOY_OPCODE_TGSI_INDIRECT_FETCH:
1297       case TOY_OPCODE_TGSI_INDIRECT_STORE:
1298          fs_lower_opcode_tgsi_indirect(fcc, inst);
1299          break;
1300       case TOY_OPCODE_TGSI_TEX:
1301       case TOY_OPCODE_TGSI_TXB:
1302       case TOY_OPCODE_TGSI_TXD:
1303       case TOY_OPCODE_TGSI_TXL:
1304       case TOY_OPCODE_TGSI_TXP:
1305       case TOY_OPCODE_TGSI_TXF:
1306       case TOY_OPCODE_TGSI_TXQ:
1307       case TOY_OPCODE_TGSI_TXQ_LZ:
1308       case TOY_OPCODE_TGSI_TEX2:
1309       case TOY_OPCODE_TGSI_TXB2:
1310       case TOY_OPCODE_TGSI_TXL2:
1311       case TOY_OPCODE_TGSI_SAMPLE:
1312       case TOY_OPCODE_TGSI_SAMPLE_I:
1313       case TOY_OPCODE_TGSI_SAMPLE_I_MS:
1314       case TOY_OPCODE_TGSI_SAMPLE_B:
1315       case TOY_OPCODE_TGSI_SAMPLE_C:
1316       case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
1317       case TOY_OPCODE_TGSI_SAMPLE_D:
1318       case TOY_OPCODE_TGSI_SAMPLE_L:
1319       case TOY_OPCODE_TGSI_GATHER4:
1320       case TOY_OPCODE_TGSI_SVIEWINFO:
1321       case TOY_OPCODE_TGSI_SAMPLE_POS:
1322       case TOY_OPCODE_TGSI_SAMPLE_INFO:
1323          fs_lower_opcode_tgsi_sampling(fcc, inst);
1324          break;
1325       }
1326    }
1327 
1328    tc_head(tc);
1329    while ((inst = tc_next(tc)) != NULL) {
1330       switch (inst->opcode) {
1331       case TOY_OPCODE_INV:
1332       case TOY_OPCODE_LOG:
1333       case TOY_OPCODE_EXP:
1334       case TOY_OPCODE_SQRT:
1335       case TOY_OPCODE_RSQ:
1336       case TOY_OPCODE_SIN:
1337       case TOY_OPCODE_COS:
1338       case TOY_OPCODE_FDIV:
1339       case TOY_OPCODE_POW:
1340       case TOY_OPCODE_INT_DIV_QUOTIENT:
1341       case TOY_OPCODE_INT_DIV_REMAINDER:
1342          toy_compiler_lower_math(tc, inst);
1343          break;
1344       case TOY_OPCODE_DDX:
1345       case TOY_OPCODE_DDY:
1346          fs_lower_opcode_derivative(tc, inst);
1347          break;
1348       case TOY_OPCODE_FB_WRITE:
1349          fs_lower_opcode_fb_write(tc, inst);
1350          break;
1351       case TOY_OPCODE_KIL:
1352          fs_lower_opcode_kil(tc, inst);
1353          break;
1354       default:
1355          if (inst->opcode > 127)
1356             tc_fail(tc, "unhandled virtual opcode");
1357          break;
1358       }
1359    }
1360 }
1361 
1362 /**
1363  * Compile the shader.
1364  */
1365 static bool
fs_compile(struct fs_compile_context * fcc)1366 fs_compile(struct fs_compile_context *fcc)
1367 {
1368    struct toy_compiler *tc = &fcc->tc;
1369    struct ilo_shader *sh = fcc->shader;
1370 
1371    fs_lower_virtual_opcodes(fcc);
1372    toy_compiler_legalize_for_ra(tc);
1373    toy_compiler_optimize(tc);
1374    toy_compiler_allocate_registers(tc,
1375          fcc->first_free_grf,
1376          fcc->last_free_grf,
1377          fcc->num_grf_per_vrf);
1378    toy_compiler_legalize_for_asm(tc);
1379 
1380    if (tc->fail) {
1381       ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
1382       return false;
1383    }
1384 
1385    if (ilo_debug & ILO_DEBUG_FS) {
1386       ilo_printf("legalized instructions:\n");
1387       toy_compiler_dump(tc);
1388       ilo_printf("\n");
1389    }
1390 
1391    if (true) {
1392       sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
1393    }
1394    else {
1395       static const uint32_t microcode[] = {
1396          /* fill in the microcode here */
1397          0x0, 0x0, 0x0, 0x0,
1398       };
1399       const bool swap = true;
1400 
1401       sh->kernel_size = sizeof(microcode);
1402       sh->kernel = MALLOC(sh->kernel_size);
1403 
1404       if (sh->kernel) {
1405          const int num_dwords = sizeof(microcode) / 4;
1406          const uint32_t *src = microcode;
1407          uint32_t *dst = (uint32_t *) sh->kernel;
1408          int i;
1409 
1410          for (i = 0; i < num_dwords; i += 4) {
1411             if (swap) {
1412                dst[i + 0] = src[i + 3];
1413                dst[i + 1] = src[i + 2];
1414                dst[i + 2] = src[i + 1];
1415                dst[i + 3] = src[i + 0];
1416             }
1417             else {
1418                memcpy(dst, src, 16);
1419             }
1420          }
1421       }
1422    }
1423 
1424    if (!sh->kernel) {
1425       ilo_err("failed to compile FS: %s\n", tc->reason);
1426       return false;
1427    }
1428 
1429    if (ilo_debug & ILO_DEBUG_FS) {
1430       ilo_printf("disassembly:\n");
1431       toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
1432       ilo_printf("\n");
1433    }
1434 
1435    return true;
1436 }
1437 
1438 /**
1439  * Emit instructions to write the color buffers (and the depth buffer).
1440  */
1441 static void
fs_write_fb(struct fs_compile_context * fcc)1442 fs_write_fb(struct fs_compile_context *fcc)
1443 {
1444    struct toy_compiler *tc = &fcc->tc;
1445    int base_mrf = fcc->first_free_mrf;
1446    const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
1447    bool header_present = false;
1448    struct toy_src desc;
1449    unsigned msg_type, ctrl;
1450    int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
1451    int pos_slot = -1, cbuf, i;
1452 
1453    for (i = 0; i < ARRAY_SIZE(color_slots); i++)
1454       color_slots[i] = -1;
1455 
1456    for (i = 0; i < fcc->tgsi.num_outputs; i++) {
1457       if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
1458          assert(fcc->tgsi.outputs[i].semantic_index < ARRAY_SIZE(color_slots));
1459          color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
1460       }
1461       else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1462          pos_slot = i;
1463       }
1464    }
1465 
1466    num_cbufs = fcc->variant->u.fs.num_cbufs;
1467    /* still need to send EOT (and probably depth) */
1468    if (!num_cbufs)
1469       num_cbufs = 1;
1470 
1471    /* we need the header to specify the pixel mask or render target */
1472    if (fcc->tgsi.uses_kill || num_cbufs > 1) {
1473       const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
1474       struct toy_inst *inst;
1475 
1476       inst = tc_MOV(tc, header, r0);
1477       inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1478       base_mrf += fcc->num_grf_per_vrf;
1479 
1480       /* this is a two-register header */
1481       if (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) {
1482          inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
1483          inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1484          base_mrf += fcc->num_grf_per_vrf;
1485       }
1486 
1487       header_present = true;
1488    }
1489 
1490    for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
1491       const int slot =
1492          color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
1493       int mrf = base_mrf, vrf;
1494       struct toy_src src[4];
1495 
1496       if (slot >= 0) {
1497          const unsigned undefined_mask =
1498             fcc->tgsi.outputs[slot].undefined_mask;
1499          const int index = fcc->tgsi.outputs[slot].index;
1500 
1501          vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1502          if (vrf >= 0) {
1503             const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1504             tsrc_transpose(tmp, src);
1505          }
1506          else {
1507             /* use (0, 0, 0, 0) */
1508             tsrc_transpose(tsrc_imm_f(0.0f), src);
1509          }
1510 
1511          for (i = 0; i < 4; i++) {
1512             const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1513 
1514             if (undefined_mask & (1 << i))
1515                src[i] = tsrc_imm_f(0.0f);
1516 
1517             tc_MOV(tc, dst, src[i]);
1518 
1519             mrf += fcc->num_grf_per_vrf;
1520          }
1521       }
1522       else {
1523          /* use (0, 0, 0, 0) */
1524          for (i = 0; i < 4; i++) {
1525             const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1526 
1527             tc_MOV(tc, dst, tsrc_imm_f(0.0f));
1528             mrf += fcc->num_grf_per_vrf;
1529          }
1530       }
1531 
1532       /* select BLEND_STATE[rt] */
1533       if (cbuf > 0) {
1534          struct toy_inst *inst;
1535 
1536          inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
1537          inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
1538          inst->exec_size = GEN6_EXECSIZE_1;
1539          inst->src[0].rect = TOY_RECT_010;
1540       }
1541 
1542       if (cbuf == 0 && pos_slot >= 0) {
1543          const int index = fcc->tgsi.outputs[pos_slot].index;
1544          const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
1545          struct toy_src src[4];
1546          int vrf;
1547 
1548          vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
1549          if (vrf >= 0) {
1550             const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
1551             tsrc_transpose(tmp, src);
1552          }
1553          else {
1554             /* use (0, 0, 0, 0) */
1555             tsrc_transpose(tsrc_imm_f(0.0f), src);
1556          }
1557 
1558          /* only Z */
1559          tc_MOV(tc, dst, src[2]);
1560 
1561          mrf += fcc->num_grf_per_vrf;
1562       }
1563 
1564       msg_type = (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ?
1565          GEN6_MSG_DP_RT_MODE_SIMD16 >> 8 :
1566          GEN6_MSG_DP_RT_MODE_SIMD8_LO >> 8;
1567 
1568       ctrl = (cbuf == num_cbufs - 1) << 12 |
1569              msg_type << 8;
1570 
1571       desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
1572             mrf - fcc->first_free_mrf, 0,
1573             header_present, false,
1574             GEN6_MSG_DP_RT_WRITE,
1575             ctrl, fcc->shader->bt.rt_base + cbuf);
1576 
1577       tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
1578             tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
1579    }
1580 }
1581 
1582 /**
1583  * Set up shader outputs for fixed-function units.
1584  */
1585 static void
fs_setup_shader_out(struct ilo_shader * sh,const struct toy_tgsi * tgsi)1586 fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
1587 {
1588    unsigned i;
1589 
1590    sh->out.count = tgsi->num_outputs;
1591    for (i = 0; i < tgsi->num_outputs; i++) {
1592       sh->out.register_indices[i] = tgsi->outputs[i].index;
1593       sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
1594       sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
1595 
1596       if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
1597          sh->out.has_pos = true;
1598    }
1599 }
1600 
1601 /**
1602  * Set up shader inputs for fixed-function units.
1603  */
1604 static void
fs_setup_shader_in(struct ilo_shader * sh,const struct toy_tgsi * tgsi,bool flatshade)1605 fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
1606                    bool flatshade)
1607 {
1608    unsigned i;
1609 
1610    sh->in.count = tgsi->num_inputs;
1611    for (i = 0; i < tgsi->num_inputs; i++) {
1612       sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
1613       sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
1614       sh->in.interp[i] = tgsi->inputs[i].interp;
1615       sh->in.centroid[i] = tgsi->inputs[i].centroid;
1616 
1617       if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
1618          sh->in.has_pos = true;
1619          continue;
1620       }
1621       else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
1622          continue;
1623       }
1624 
1625       switch (tgsi->inputs[i].interp) {
1626       case TGSI_INTERPOLATE_CONSTANT:
1627          sh->in.const_interp_enable |= 1 << i;
1628          break;
1629       case TGSI_INTERPOLATE_LINEAR:
1630          sh->in.has_linear_interp = true;
1631 
1632          if (tgsi->inputs[i].centroid) {
1633             sh->in.barycentric_interpolation_mode |=
1634                GEN6_INTERP_NONPERSPECTIVE_CENTROID;
1635          }
1636          else {
1637             sh->in.barycentric_interpolation_mode |=
1638                GEN6_INTERP_NONPERSPECTIVE_PIXEL;
1639          }
1640          break;
1641       case TGSI_INTERPOLATE_COLOR:
1642          if (flatshade) {
1643             sh->in.const_interp_enable |= 1 << i;
1644             break;
1645          }
1646          /* fall through */
1647       case TGSI_INTERPOLATE_PERSPECTIVE:
1648          if (tgsi->inputs[i].centroid) {
1649             sh->in.barycentric_interpolation_mode |=
1650                GEN6_INTERP_PERSPECTIVE_CENTROID;
1651          }
1652          else {
1653             sh->in.barycentric_interpolation_mode |=
1654                GEN6_INTERP_PERSPECTIVE_PIXEL;
1655          }
1656          break;
1657       default:
1658          break;
1659       }
1660    }
1661 }
1662 
1663 static int
fs_setup_payloads(struct fs_compile_context * fcc)1664 fs_setup_payloads(struct fs_compile_context *fcc)
1665 {
1666    const struct ilo_shader *sh = fcc->shader;
1667    int grf, i;
1668 
1669    grf = 0;
1670 
1671    /* r0: header */
1672    grf++;
1673 
1674    /* r1-r2: coordinates and etc. */
1675    grf += (fcc->dispatch_mode == GEN6_PS_DISPATCH_32) ? 2 : 1;
1676 
1677    for (i = 0; i < ARRAY_SIZE(fcc->payloads); i++) {
1678       const int reg_scale =
1679          (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) ? 1 : 2;
1680 
1681       /* r3-r26 or r32-r55: barycentric interpolation parameters */
1682       if (sh->in.barycentric_interpolation_mode &
1683             (GEN6_INTERP_PERSPECTIVE_PIXEL)) {
1684          fcc->payloads[i].interp_perspective_pixel = grf;
1685          grf += 2 * reg_scale;
1686       }
1687       if (sh->in.barycentric_interpolation_mode &
1688             (GEN6_INTERP_PERSPECTIVE_CENTROID)) {
1689          fcc->payloads[i].interp_perspective_centroid = grf;
1690          grf += 2 * reg_scale;
1691       }
1692       if (sh->in.barycentric_interpolation_mode &
1693             (GEN6_INTERP_PERSPECTIVE_SAMPLE)) {
1694          fcc->payloads[i].interp_perspective_sample = grf;
1695          grf += 2 * reg_scale;
1696       }
1697       if (sh->in.barycentric_interpolation_mode &
1698             (GEN6_INTERP_NONPERSPECTIVE_PIXEL)) {
1699          fcc->payloads[i].interp_nonperspective_pixel = grf;
1700          grf += 2 * reg_scale;
1701       }
1702       if (sh->in.barycentric_interpolation_mode &
1703             (GEN6_INTERP_NONPERSPECTIVE_CENTROID)) {
1704          fcc->payloads[i].interp_nonperspective_centroid = grf;
1705          grf += 2 * reg_scale;
1706       }
1707       if (sh->in.barycentric_interpolation_mode &
1708             (GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) {
1709          fcc->payloads[i].interp_nonperspective_sample = grf;
1710          grf += 2 * reg_scale;
1711       }
1712 
1713       /* r27-r28 or r56-r57: interpoloated depth */
1714       if (sh->in.has_pos) {
1715          fcc->payloads[i].source_depth = grf;
1716          grf += 1 * reg_scale;
1717       }
1718 
1719       /* r29-r30 or r58-r59: interpoloated w */
1720       if (sh->in.has_pos) {
1721          fcc->payloads[i].source_w = grf;
1722          grf += 1 * reg_scale;
1723       }
1724 
1725       /* r31 or r60: position offset */
1726       if (false) {
1727          fcc->payloads[i].pos_offset = grf;
1728          grf++;
1729       }
1730 
1731       if (fcc->dispatch_mode != GEN6_PS_DISPATCH_32)
1732          break;
1733    }
1734 
1735    return grf;
1736 }
1737 
1738 /**
1739  * Translate the TGSI tokens.
1740  */
1741 static bool
fs_setup_tgsi(struct toy_compiler * tc,const struct tgsi_token * tokens,struct toy_tgsi * tgsi)1742 fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
1743               struct toy_tgsi *tgsi)
1744 {
1745    if (ilo_debug & ILO_DEBUG_FS) {
1746       ilo_printf("dumping fragment shader\n");
1747       ilo_printf("\n");
1748 
1749       tgsi_dump(tokens, 0);
1750       ilo_printf("\n");
1751    }
1752 
1753    toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
1754    if (tc->fail) {
1755       ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
1756       return false;
1757    }
1758 
1759    if (ilo_debug & ILO_DEBUG_FS) {
1760       ilo_printf("TGSI translator:\n");
1761       toy_tgsi_dump(tgsi);
1762       ilo_printf("\n");
1763       toy_compiler_dump(tc);
1764       ilo_printf("\n");
1765    }
1766 
1767    return true;
1768 }
1769 
1770 /**
1771  * Set up FS compile context.  This includes translating the TGSI tokens.
1772  */
1773 static bool
fs_setup(struct fs_compile_context * fcc,const struct ilo_shader_state * state,const struct ilo_shader_variant * variant)1774 fs_setup(struct fs_compile_context *fcc,
1775          const struct ilo_shader_state *state,
1776          const struct ilo_shader_variant *variant)
1777 {
1778    int num_consts;
1779 
1780    memset(fcc, 0, sizeof(*fcc));
1781 
1782    fcc->shader = CALLOC_STRUCT(ilo_shader);
1783    if (!fcc->shader)
1784       return false;
1785 
1786    fcc->variant = variant;
1787 
1788    toy_compiler_init(&fcc->tc, state->info.dev);
1789 
1790    fcc->dispatch_mode = GEN6_PS_DISPATCH_8;
1791 
1792    fcc->tc.templ.access_mode = GEN6_ALIGN_1;
1793    if (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) {
1794       fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1H;
1795       fcc->tc.templ.exec_size = GEN6_EXECSIZE_16;
1796    }
1797    else {
1798       fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1Q;
1799       fcc->tc.templ.exec_size = GEN6_EXECSIZE_8;
1800    }
1801 
1802    fcc->tc.rect_linear_width = 8;
1803 
1804    /*
1805     * The classic driver uses the sampler cache (gen6) or the data cache
1806     * (gen7).  Why?
1807     */
1808    fcc->const_cache = GEN6_SFID_DP_CC;
1809 
1810    if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
1811       toy_compiler_cleanup(&fcc->tc);
1812       FREE(fcc->shader);
1813       return false;
1814    }
1815 
1816    fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
1817    fs_setup_shader_out(fcc->shader, &fcc->tgsi);
1818 
1819    if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) {
1820       num_consts = (fcc->tgsi.const_count + 1) / 2;
1821 
1822       /*
1823        * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1824        *
1825        *     "The sum of all four read length fields (each incremented to
1826        *      represent the actual read length) must be less than or equal to
1827        *      64"
1828        *
1829        * Since we are usually under a high register pressure, do not allow
1830        * for more than 8.
1831        */
1832       if (num_consts > 8)
1833          num_consts = 0;
1834    }
1835    else {
1836       num_consts = 0;
1837    }
1838 
1839    fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts);
1840    fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
1841 
1842    fcc->first_const_grf = fs_setup_payloads(fcc);
1843    fcc->first_attr_grf = fcc->first_const_grf + num_consts;
1844    fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
1845    fcc->last_free_grf = 127;
1846 
1847    /* m0 is reserved for system routines */
1848    fcc->first_free_mrf = 1;
1849    fcc->last_free_mrf = 15;
1850 
1851    /* instructions are compressed with GEN6_EXECSIZE_16 */
1852    fcc->num_grf_per_vrf =
1853       (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? 2 : 1;
1854 
1855    if (ilo_dev_gen(fcc->tc.dev) >= ILO_GEN(7)) {
1856       fcc->last_free_grf -= 15;
1857       fcc->first_free_mrf = fcc->last_free_grf + 1;
1858       fcc->last_free_mrf = fcc->first_free_mrf + 14;
1859    }
1860 
1861    fcc->shader->in.start_grf = fcc->first_const_grf;
1862    fcc->shader->has_kill = fcc->tgsi.uses_kill;
1863    fcc->shader->dispatch_16 =
1864       (fcc->dispatch_mode == GEN6_PS_DISPATCH_16);
1865 
1866    fcc->shader->bt.rt_base = 0;
1867    fcc->shader->bt.rt_count = fcc->variant->u.fs.num_cbufs;
1868    /* to send EOT */
1869    if (!fcc->shader->bt.rt_count)
1870       fcc->shader->bt.rt_count = 1;
1871 
1872    fcc->shader->bt.tex_base = fcc->shader->bt.rt_base +
1873                               fcc->shader->bt.rt_count;
1874    fcc->shader->bt.tex_count = fcc->variant->num_sampler_views;
1875 
1876    fcc->shader->bt.const_base = fcc->shader->bt.tex_base +
1877                                 fcc->shader->bt.tex_count;
1878    fcc->shader->bt.const_count = state->info.constant_buffer_count;
1879 
1880    fcc->shader->bt.total_count = fcc->shader->bt.const_base +
1881                                  fcc->shader->bt.const_count;
1882 
1883    return true;
1884 }
1885 
1886 /**
1887  * Compile the fragment shader.
1888  */
1889 struct ilo_shader *
ilo_shader_compile_fs(const struct ilo_shader_state * state,const struct ilo_shader_variant * variant)1890 ilo_shader_compile_fs(const struct ilo_shader_state *state,
1891                       const struct ilo_shader_variant *variant)
1892 {
1893    struct fs_compile_context fcc;
1894 
1895    if (!fs_setup(&fcc, state, variant))
1896       return NULL;
1897 
1898    fs_write_fb(&fcc);
1899 
1900    if (!fs_compile(&fcc)) {
1901       FREE(fcc.shader);
1902       fcc.shader = NULL;
1903    }
1904 
1905    toy_tgsi_cleanup(&fcc.tgsi);
1906    toy_compiler_cleanup(&fcc.tc);
1907 
1908    return fcc.shader;
1909 }
1910