• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  * Copyright © 2014-2015 Broadcom
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 
30 #include "main/mtypes.h"
31 #include "util/ralloc.h"
32 
33 #include "prog_to_nir.h"
34 #include "prog_instruction.h"
35 #include "prog_parameter.h"
36 #include "prog_print.h"
37 #include "program.h"
38 
39 /**
40  * \file prog_to_nir.c
41  *
42  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
43  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
44  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
45  */
46 
47 struct ptn_compile {
48    const struct gl_program *prog;
49    nir_builder build;
50    bool error;
51 
52    nir_variable *parameters;
53    nir_variable *input_vars[VARYING_SLOT_MAX];
54    nir_variable *output_vars[VARYING_SLOT_MAX];
55    nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
56    nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
57    nir_register **output_regs;
58    nir_register **temp_regs;
59 
60    nir_register *addr_reg;
61 };
62 
63 #define SWIZ(X, Y, Z, W) \
64    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
65 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
66 
67 static nir_ssa_def *
ptn_src_for_dest(struct ptn_compile * c,nir_alu_dest * dest)68 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
69 {
70    nir_builder *b = &c->build;
71 
72    nir_alu_src src;
73    memset(&src, 0, sizeof(src));
74 
75    if (dest->dest.is_ssa)
76       src.src = nir_src_for_ssa(&dest->dest.ssa);
77    else {
78       assert(!dest->dest.reg.indirect);
79       src.src = nir_src_for_reg(dest->dest.reg.reg);
80       src.src.reg.base_offset = dest->dest.reg.base_offset;
81    }
82 
83    for (int i = 0; i < 4; i++)
84       src.swizzle[i] = i;
85 
86    return nir_mov_alu(b, src, 4);
87 }
88 
89 static nir_alu_dest
ptn_get_dest(struct ptn_compile * c,const struct prog_dst_register * prog_dst)90 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
91 {
92    nir_alu_dest dest;
93 
94    memset(&dest, 0, sizeof(dest));
95 
96    switch (prog_dst->File) {
97    case PROGRAM_TEMPORARY:
98       dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
99       break;
100    case PROGRAM_OUTPUT:
101       dest.dest.reg.reg = c->output_regs[prog_dst->Index];
102       break;
103    case PROGRAM_ADDRESS:
104       assert(prog_dst->Index == 0);
105       dest.dest.reg.reg = c->addr_reg;
106       break;
107    case PROGRAM_UNDEFINED:
108       break;
109    }
110 
111    dest.write_mask = prog_dst->WriteMask;
112    dest.saturate = false;
113 
114    assert(!prog_dst->RelAddr);
115 
116    return dest;
117 }
118 
119 static nir_ssa_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)120 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
121 {
122    nir_builder *b = &c->build;
123    nir_alu_src src;
124 
125    memset(&src, 0, sizeof(src));
126 
127    switch (prog_src->File) {
128    case PROGRAM_UNDEFINED:
129       return nir_imm_float(b, 0.0);
130    case PROGRAM_TEMPORARY:
131       assert(!prog_src->RelAddr && prog_src->Index >= 0);
132       src.src.reg.reg = c->temp_regs[prog_src->Index];
133       break;
134    case PROGRAM_INPUT: {
135       /* ARB_vertex_program doesn't allow relative addressing on vertex
136        * attributes; ARB_fragment_program has no relative addressing at all.
137        */
138       assert(!prog_src->RelAddr);
139 
140       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
141 
142       nir_variable *var = c->input_vars[prog_src->Index];
143       src.src = nir_src_for_ssa(nir_load_var(b, var));
144       break;
145    }
146    case PROGRAM_SYSTEM_VALUE: {
147       assert(!prog_src->RelAddr);
148 
149       assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
150 
151       nir_variable *var = c->sysval_vars[prog_src->Index];
152       src.src = nir_src_for_ssa(nir_load_var(b, var));
153       break;
154    }
155    case PROGRAM_STATE_VAR:
156    case PROGRAM_CONSTANT: {
157       /* We actually want to look at the type in the Parameters list for this,
158        * because it lets us upload constant builtin uniforms as actual
159        * constants.
160        */
161       struct gl_program_parameter_list *plist = c->prog->Parameters;
162       gl_register_file file = prog_src->RelAddr ? prog_src->File :
163          plist->Parameters[prog_src->Index].Type;
164 
165       switch (file) {
166       case PROGRAM_CONSTANT:
167          if ((c->prog->arb.IndirectRegisterFiles &
168               (1 << PROGRAM_CONSTANT)) == 0) {
169             unsigned pvo = plist->ParameterValueOffset[prog_src->Index];
170             float *v = (float *) plist->ParameterValues + pvo;
171             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
172             break;
173          }
174          /* FALLTHROUGH */
175       case PROGRAM_STATE_VAR: {
176          assert(c->parameters != NULL);
177 
178          nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
179 
180          nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
181          if (prog_src->RelAddr)
182             index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
183          deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
184 
185          src.src = nir_src_for_ssa(nir_load_deref(b, deref));
186          break;
187       }
188       default:
189          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
190                  _mesa_register_file_name(file), file);
191          abort();
192       }
193       break;
194    }
195    default:
196       fprintf(stderr, "unknown src register file: %s (%d)\n",
197               _mesa_register_file_name(prog_src->File), prog_src->File);
198       abort();
199    }
200 
201    nir_ssa_def *def;
202    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
203        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
204       /* The simple non-SWZ case. */
205       for (int i = 0; i < 4; i++)
206          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
207 
208       def = nir_mov_alu(b, src, 4);
209 
210       if (prog_src->Negate)
211          def = nir_fneg(b, def);
212    } else {
213       /* The SWZ instruction allows per-component zero/one swizzles, and also
214        * per-component negation.
215        */
216       nir_ssa_def *chans[4];
217       for (int i = 0; i < 4; i++) {
218          int swizzle = GET_SWZ(prog_src->Swizzle, i);
219          if (swizzle == SWIZZLE_ZERO) {
220             chans[i] = nir_imm_float(b, 0.0);
221          } else if (swizzle == SWIZZLE_ONE) {
222             chans[i] = nir_imm_float(b, 1.0);
223          } else {
224             assert(swizzle != SWIZZLE_NIL);
225             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
226             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
227             mov->dest.write_mask = 0x1;
228             mov->src[0] = src;
229             mov->src[0].swizzle[0] = swizzle;
230             nir_builder_instr_insert(b, &mov->instr);
231 
232             chans[i] = &mov->dest.dest.ssa;
233          }
234 
235          if (prog_src->Negate & (1 << i))
236             chans[i] = nir_fneg(b, chans[i]);
237       }
238       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
239    }
240 
241    return def;
242 }
243 
244 static void
ptn_alu(nir_builder * b,nir_op op,nir_alu_dest dest,nir_ssa_def ** src)245 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
246 {
247    unsigned num_srcs = nir_op_infos[op].num_inputs;
248    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
249    unsigned i;
250 
251    for (i = 0; i < num_srcs; i++)
252       instr->src[i].src = nir_src_for_ssa(src[i]);
253 
254    instr->dest = dest;
255    nir_builder_instr_insert(b, &instr->instr);
256 }
257 
258 static void
ptn_move_dest_masked(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def,unsigned write_mask)259 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
260                      nir_ssa_def *def, unsigned write_mask)
261 {
262    if (!(dest.write_mask & write_mask))
263       return;
264 
265    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
266    if (!mov)
267       return;
268 
269    mov->dest = dest;
270    mov->dest.write_mask &= write_mask;
271    mov->src[0].src = nir_src_for_ssa(def);
272    for (unsigned i = def->num_components; i < 4; i++)
273       mov->src[0].swizzle[i] = def->num_components - 1;
274    nir_builder_instr_insert(b, &mov->instr);
275 }
276 
277 static void
ptn_move_dest(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def)278 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
279 {
280    ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
281 }
282 
283 static void
ptn_arl(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)284 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
285 {
286    ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
287 }
288 
289 /* EXP - Approximate Exponential Base 2
290  *  dst.x = 2^{\lfloor src.x\rfloor}
291  *  dst.y = src.x - \lfloor src.x\rfloor
292  *  dst.z = 2^{src.x}
293  *  dst.w = 1.0
294  */
295 static void
ptn_exp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)296 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
297 {
298    nir_ssa_def *srcx = ptn_channel(b, src[0], X);
299 
300    ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
301    ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
302    ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
303    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
304 }
305 
306 /* LOG - Approximate Logarithm Base 2
307  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
308  *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
309  *  dst.z = \log_2{|src.x|}
310  *  dst.w = 1.0
311  */
312 static void
ptn_log(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)313 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
314 {
315    nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
316    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
317    nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
318 
319    ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
320    ptn_move_dest_masked(b, dest,
321                         nir_fmul(b, abs_srcx,
322                                  nir_fexp2(b, nir_fneg(b, floor_log2))),
323                         WRITEMASK_Y);
324    ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
325    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
326 }
327 
328 /* DST - Distance Vector
329  *   dst.x = 1.0
330  *   dst.y = src0.y \times src1.y
331  *   dst.z = src0.z
332  *   dst.w = src1.w
333  */
334 static void
ptn_dst(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)335 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
336 {
337    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
338    ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
339    ptn_move_dest_masked(b, dest, nir_mov(b, src[0]), WRITEMASK_Z);
340    ptn_move_dest_masked(b, dest, nir_mov(b, src[1]), WRITEMASK_W);
341 }
342 
343 /* LIT - Light Coefficients
344  *  dst.x = 1.0
345  *  dst.y = max(src.x, 0.0)
346  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
347  *  dst.w = 1.0
348  */
349 static void
ptn_lit(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)350 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
351 {
352    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
353 
354    ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
355                                           nir_imm_float(b, 0.0)), WRITEMASK_Y);
356 
357    if (dest.write_mask & WRITEMASK_Z) {
358       nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
359       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
360                                                  nir_imm_float(b, 128.0)),
361                                      nir_imm_float(b, -128.0));
362       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
363                                   wclamp);
364 
365       nir_ssa_def *z = nir_bcsel(b,
366                                  nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
367                                  nir_imm_float(b, 0.0),
368                                  pow);
369 
370       ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
371    }
372 }
373 
374 /* SCS - Sine Cosine
375  *   dst.x = \cos{src.x}
376  *   dst.y = \sin{src.x}
377  *   dst.z = 0.0
378  *   dst.w = 1.0
379  */
380 static void
ptn_scs(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)381 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
382 {
383    ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
384                         WRITEMASK_X);
385    ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
386                         WRITEMASK_Y);
387    ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
388    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
389 }
390 
391 static void
ptn_slt(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)392 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
393 {
394    ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
395 }
396 
397 static void
ptn_sge(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)398 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
399 {
400    ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
401 }
402 
403 static void
ptn_xpd(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)404 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
405 {
406    ptn_move_dest_masked(b, dest,
407                         nir_fsub(b,
408                                  nir_fmul(b,
409                                           nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
410                                           nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
411                                  nir_fmul(b,
412                                           nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
413                                           nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3))),
414                         WRITEMASK_XYZ);
415    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
416 }
417 
418 static void
ptn_dp2(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)419 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
420 {
421    ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
422 }
423 
424 static void
ptn_dp3(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)425 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
426 {
427    ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
428 }
429 
430 static void
ptn_dp4(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)431 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
432 {
433    ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
434 }
435 
436 static void
ptn_dph(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)437 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
438 {
439    ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
440 }
441 
442 static void
ptn_cmp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)443 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
444 {
445    ptn_move_dest(b, dest, nir_bcsel(b,
446                                     nir_flt(b, src[0], nir_imm_float(b, 0.0)),
447                                     src[1], src[2]));
448 }
449 
450 static void
ptn_lrp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)451 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
452 {
453    ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
454 }
455 
456 static void
ptn_kil(nir_builder * b,nir_ssa_def ** src)457 ptn_kil(nir_builder *b, nir_ssa_def **src)
458 {
459    /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
460    b->exact = true;
461    nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
462    b->exact = false;
463 
464    nir_intrinsic_instr *discard =
465       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
466    discard->src[0] = nir_src_for_ssa(cmp);
467    nir_builder_instr_insert(b, &discard->instr);
468 }
469 
470 static void
ptn_tex(struct ptn_compile * c,nir_alu_dest dest,nir_ssa_def ** src,struct prog_instruction * prog_inst)471 ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
472         struct prog_instruction *prog_inst)
473 {
474    nir_builder *b = &c->build;
475    nir_tex_instr *instr;
476    nir_texop op;
477    unsigned num_srcs;
478 
479    switch (prog_inst->Opcode) {
480    case OPCODE_TEX:
481       op = nir_texop_tex;
482       num_srcs = 1;
483       break;
484    case OPCODE_TXB:
485       op = nir_texop_txb;
486       num_srcs = 2;
487       break;
488    case OPCODE_TXD:
489       op = nir_texop_txd;
490       num_srcs = 3;
491       break;
492    case OPCODE_TXL:
493       op = nir_texop_txl;
494       num_srcs = 2;
495       break;
496    case OPCODE_TXP:
497       op = nir_texop_tex;
498       num_srcs = 2;
499       break;
500    default:
501       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
502       abort();
503    }
504 
505    /* Deref sources */
506    num_srcs += 2;
507 
508    if (prog_inst->TexShadow)
509       num_srcs++;
510 
511    instr = nir_tex_instr_create(b->shader, num_srcs);
512    instr->op = op;
513    instr->dest_type = nir_type_float;
514    instr->is_shadow = prog_inst->TexShadow;
515 
516    switch (prog_inst->TexSrcTarget) {
517    case TEXTURE_1D_INDEX:
518       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
519       break;
520    case TEXTURE_2D_INDEX:
521       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
522       break;
523    case TEXTURE_3D_INDEX:
524       instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
525       break;
526    case TEXTURE_CUBE_INDEX:
527       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
528       break;
529    case TEXTURE_RECT_INDEX:
530       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
531       break;
532    default:
533       fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
534       abort();
535    }
536 
537    instr->coord_components =
538       glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
539 
540    nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
541    if (!var) {
542       const struct glsl_type *type =
543          glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
544       char samplerName[20];
545       snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
546       var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
547       var->data.binding = prog_inst->TexSrcUnit;
548       var->data.explicit_binding = true;
549       c->sampler_vars[prog_inst->TexSrcUnit] = var;
550    }
551 
552    nir_deref_instr *deref = nir_build_deref_var(b, var);
553 
554    unsigned src_number = 0;
555 
556    instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
557    instr->src[src_number].src_type = nir_tex_src_texture_deref;
558    src_number++;
559    instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
560    instr->src[src_number].src_type = nir_tex_src_sampler_deref;
561    src_number++;
562 
563    instr->src[src_number].src =
564       nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
565                                   instr->coord_components));
566    instr->src[src_number].src_type = nir_tex_src_coord;
567    src_number++;
568 
569    if (prog_inst->Opcode == OPCODE_TXP) {
570       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
571       instr->src[src_number].src_type = nir_tex_src_projector;
572       src_number++;
573    }
574 
575    if (prog_inst->Opcode == OPCODE_TXB) {
576       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
577       instr->src[src_number].src_type = nir_tex_src_bias;
578       src_number++;
579    }
580 
581    if (prog_inst->Opcode == OPCODE_TXL) {
582       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
583       instr->src[src_number].src_type = nir_tex_src_lod;
584       src_number++;
585    }
586 
587    if (instr->is_shadow) {
588       if (instr->coord_components < 3)
589          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
590       else
591          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
592 
593       instr->src[src_number].src_type = nir_tex_src_comparator;
594       src_number++;
595    }
596 
597    assert(src_number == num_srcs);
598 
599    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
600    nir_builder_instr_insert(b, &instr->instr);
601 
602    /* Resolve the writemask on the texture op. */
603    ptn_move_dest(b, dest, &instr->dest.ssa);
604 }
605 
606 static const nir_op op_trans[MAX_OPCODE] = {
607    [OPCODE_NOP] = 0,
608    [OPCODE_ABS] = nir_op_fabs,
609    [OPCODE_ADD] = nir_op_fadd,
610    [OPCODE_ARL] = 0,
611    [OPCODE_CMP] = 0,
612    [OPCODE_COS] = 0,
613    [OPCODE_DDX] = nir_op_fddx,
614    [OPCODE_DDY] = nir_op_fddy,
615    [OPCODE_DP2] = 0,
616    [OPCODE_DP3] = 0,
617    [OPCODE_DP4] = 0,
618    [OPCODE_DPH] = 0,
619    [OPCODE_DST] = 0,
620    [OPCODE_END] = 0,
621    [OPCODE_EX2] = 0,
622    [OPCODE_EXP] = 0,
623    [OPCODE_FLR] = nir_op_ffloor,
624    [OPCODE_FRC] = nir_op_ffract,
625    [OPCODE_LG2] = 0,
626    [OPCODE_LIT] = 0,
627    [OPCODE_LOG] = 0,
628    [OPCODE_LRP] = 0,
629    [OPCODE_MAD] = 0,
630    [OPCODE_MAX] = nir_op_fmax,
631    [OPCODE_MIN] = nir_op_fmin,
632    [OPCODE_MOV] = nir_op_mov,
633    [OPCODE_MUL] = nir_op_fmul,
634    [OPCODE_POW] = 0,
635    [OPCODE_RCP] = 0,
636 
637    [OPCODE_RSQ] = 0,
638    [OPCODE_SCS] = 0,
639    [OPCODE_SGE] = 0,
640    [OPCODE_SIN] = 0,
641    [OPCODE_SLT] = 0,
642    [OPCODE_SSG] = nir_op_fsign,
643    [OPCODE_SUB] = nir_op_fsub,
644    [OPCODE_SWZ] = 0,
645    [OPCODE_TEX] = 0,
646    [OPCODE_TRUNC] = nir_op_ftrunc,
647    [OPCODE_TXB] = 0,
648    [OPCODE_TXD] = 0,
649    [OPCODE_TXL] = 0,
650    [OPCODE_TXP] = 0,
651    [OPCODE_XPD] = 0,
652 };
653 
654 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)655 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
656 {
657    nir_builder *b = &c->build;
658    unsigned i;
659    const unsigned op = prog_inst->Opcode;
660 
661    if (op == OPCODE_END)
662       return;
663 
664    nir_ssa_def *src[3];
665    for (i = 0; i < 3; i++) {
666       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
667    }
668    nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
669    if (c->error)
670       return;
671 
672    switch (op) {
673    case OPCODE_RSQ:
674       ptn_move_dest(b, dest,
675                     nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
676       break;
677 
678    case OPCODE_RCP:
679       ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
680       break;
681 
682    case OPCODE_EX2:
683       ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
684       break;
685 
686    case OPCODE_LG2:
687       ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
688       break;
689 
690    case OPCODE_POW:
691       ptn_move_dest(b, dest, nir_fpow(b,
692                                       ptn_channel(b, src[0], X),
693                                       ptn_channel(b, src[1], X)));
694       break;
695 
696    case OPCODE_COS:
697       ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
698       break;
699 
700    case OPCODE_SIN:
701       ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
702       break;
703 
704    case OPCODE_ARL:
705       ptn_arl(b, dest, src);
706       break;
707 
708    case OPCODE_EXP:
709       ptn_exp(b, dest, src);
710       break;
711 
712    case OPCODE_LOG:
713       ptn_log(b, dest, src);
714       break;
715 
716    case OPCODE_LRP:
717       ptn_lrp(b, dest, src);
718       break;
719 
720    case OPCODE_MAD:
721       ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
722       break;
723 
724    case OPCODE_DST:
725       ptn_dst(b, dest, src);
726       break;
727 
728    case OPCODE_LIT:
729       ptn_lit(b, dest, src);
730       break;
731 
732    case OPCODE_XPD:
733       ptn_xpd(b, dest, src);
734       break;
735 
736    case OPCODE_DP2:
737       ptn_dp2(b, dest, src);
738       break;
739 
740    case OPCODE_DP3:
741       ptn_dp3(b, dest, src);
742       break;
743 
744    case OPCODE_DP4:
745       ptn_dp4(b, dest, src);
746       break;
747 
748    case OPCODE_DPH:
749       ptn_dph(b, dest, src);
750       break;
751 
752    case OPCODE_KIL:
753       ptn_kil(b, src);
754       break;
755 
756    case OPCODE_CMP:
757       ptn_cmp(b, dest, src);
758       break;
759 
760    case OPCODE_SCS:
761       ptn_scs(b, dest, src);
762       break;
763 
764    case OPCODE_SLT:
765       ptn_slt(b, dest, src);
766       break;
767 
768    case OPCODE_SGE:
769       ptn_sge(b, dest, src);
770       break;
771 
772    case OPCODE_TEX:
773    case OPCODE_TXB:
774    case OPCODE_TXD:
775    case OPCODE_TXL:
776    case OPCODE_TXP:
777       ptn_tex(c, dest, src, prog_inst);
778       break;
779 
780    case OPCODE_SWZ:
781       /* Extended swizzles were already handled in ptn_get_src(). */
782       ptn_alu(b, nir_op_mov, dest, src);
783       break;
784 
785    case OPCODE_NOP:
786       break;
787 
788    default:
789       if (op_trans[op] != 0) {
790          ptn_alu(b, op_trans[op], dest, src);
791       } else {
792          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
793          abort();
794       }
795       break;
796    }
797 
798    if (prog_inst->Saturate) {
799       assert(prog_inst->Saturate);
800       assert(!dest.dest.is_ssa);
801       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
802    }
803 }
804 
805 /**
806  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
807  * variables at the end of the shader.
808  *
809  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
810  * written, because there's no output load intrinsic, which means we couldn't
811  * handle writemasks.
812  */
813 static void
ptn_add_output_stores(struct ptn_compile * c)814 ptn_add_output_stores(struct ptn_compile *c)
815 {
816    nir_builder *b = &c->build;
817 
818    nir_foreach_shader_out_variable(var, b->shader) {
819       nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
820       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
821           var->data.location == FRAG_RESULT_DEPTH) {
822          /* result.depth has this strange convention of being the .z component of
823           * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
824           * match GLSL's gl_FragDepth and the expectations of most backends.
825           */
826          src = nir_channel(b, src, 2);
827       }
828       if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
829           (var->data.location == VARYING_SLOT_FOGC ||
830            var->data.location == VARYING_SLOT_PSIZ)) {
831          /* result.{fogcoord,psiz} is a single component value */
832          src = nir_channel(b, src, 0);
833       }
834       unsigned num_components = glsl_get_vector_elements(var->type);
835       nir_store_var(b, var, src, (1 << num_components) - 1);
836    }
837 }
838 
839 static void
setup_registers_and_variables(struct ptn_compile * c)840 setup_registers_and_variables(struct ptn_compile *c)
841 {
842    nir_builder *b = &c->build;
843    struct nir_shader *shader = b->shader;
844 
845    /* Create input variables. */
846    uint64_t inputs_read = c->prog->info.inputs_read;
847    while (inputs_read) {
848       const int i = u_bit_scan64(&inputs_read);
849 
850       nir_variable *var =
851          nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
852                              ralloc_asprintf(shader, "in_%d", i));
853       var->data.location = i;
854       var->data.index = 0;
855 
856       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
857          if (i == VARYING_SLOT_FOGC) {
858             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
859              * input variable a float, and create a local containing the
860              * full vec4 value.
861              */
862             var->type = glsl_float_type();
863 
864             nir_variable *fullvar =
865                nir_local_variable_create(b->impl, glsl_vec4_type(),
866                                          "fogcoord_tmp");
867 
868             nir_store_var(b, fullvar,
869                           nir_vec4(b, nir_load_var(b, var),
870                                    nir_imm_float(b, 0.0),
871                                    nir_imm_float(b, 0.0),
872                                    nir_imm_float(b, 1.0)),
873                           WRITEMASK_XYZW);
874 
875             /* We inserted the real input into the list so the driver has real
876              * inputs, but we set c->input_vars[i] to the temporary so we use
877              * the splatted value.
878              */
879             c->input_vars[i] = fullvar;
880             continue;
881          }
882       }
883 
884       c->input_vars[i] = var;
885    }
886 
887    /* Create system value variables */
888    int i;
889    BITSET_FOREACH_SET(i, c->prog->info.system_values_read, SYSTEM_VALUE_MAX) {
890       nir_variable *var =
891          nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
892                              ralloc_asprintf(shader, "sv_%d", i));
893       var->data.location = i;
894       var->data.index = 0;
895 
896       c->sysval_vars[i] = var;
897    }
898 
899    /* Create output registers and variables. */
900    int max_outputs = util_last_bit(c->prog->info.outputs_written);
901    c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
902 
903    uint64_t outputs_written = c->prog->info.outputs_written;
904    while (outputs_written) {
905       const int i = u_bit_scan64(&outputs_written);
906 
907       /* Since we can't load from outputs in the IR, we make temporaries
908        * for the outputs and emit stores to the real outputs at the end of
909        * the shader.
910        */
911       nir_register *reg = nir_local_reg_create(b->impl);
912       reg->num_components = 4;
913 
914       const struct glsl_type *type;
915       if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
916           (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC) ||
917           (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_PSIZ))
918          type = glsl_float_type();
919       else
920          type = glsl_vec4_type();
921 
922       nir_variable *var =
923          nir_variable_create(shader, nir_var_shader_out, type,
924                              ralloc_asprintf(shader, "out_%d", i));
925       var->data.location = i;
926       var->data.index = 0;
927 
928       c->output_regs[i] = reg;
929       c->output_vars[i] = var;
930    }
931 
932    /* Create temporary registers. */
933    c->temp_regs = rzalloc_array(c, nir_register *,
934                                 c->prog->arb.NumTemporaries);
935 
936    nir_register *reg;
937    for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
938       reg = nir_local_reg_create(b->impl);
939       if (!reg) {
940          c->error = true;
941          return;
942       }
943       reg->num_components = 4;
944       c->temp_regs[i] = reg;
945    }
946 
947    /* Create the address register (for ARB_vertex_program). */
948    reg = nir_local_reg_create(b->impl);
949    if (!reg) {
950       c->error = true;
951       return;
952    }
953    reg->num_components = 1;
954    c->addr_reg = reg;
955 }
956 
957 struct nir_shader *
prog_to_nir(const struct gl_program * prog,const nir_shader_compiler_options * options)958 prog_to_nir(const struct gl_program *prog,
959             const nir_shader_compiler_options *options)
960 {
961    struct ptn_compile *c;
962    struct nir_shader *s;
963    gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
964 
965    c = rzalloc(NULL, struct ptn_compile);
966    if (!c)
967       return NULL;
968    c->prog = prog;
969 
970    nir_builder_init_simple_shader(&c->build, NULL, stage, options);
971 
972    /* Copy the shader_info from the gl_program */
973    c->build.shader->info = prog->info;
974 
975    s = c->build.shader;
976 
977    if (prog->Parameters->NumParameters > 0) {
978       const struct glsl_type *type =
979          glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
980       c->parameters =
981          nir_variable_create(s, nir_var_uniform, type,
982                              prog->Parameters->Parameters[0].Name);
983    }
984 
985    setup_registers_and_variables(c);
986    if (unlikely(c->error))
987       goto fail;
988 
989    for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
990       ptn_emit_instruction(c, &prog->arb.Instructions[i]);
991 
992       if (unlikely(c->error))
993          break;
994    }
995 
996    ptn_add_output_stores(c);
997 
998    s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
999    s->info.num_textures = util_last_bit(prog->SamplersUsed);
1000    s->info.num_ubos = 0;
1001    s->info.num_abos = 0;
1002    s->info.num_ssbos = 0;
1003    s->info.num_images = 0;
1004    s->info.uses_texture_gather = false;
1005    s->info.clip_distance_array_size = 0;
1006    s->info.cull_distance_array_size = 0;
1007    s->info.separate_shader = false;
1008 
1009 fail:
1010    if (c->error) {
1011       ralloc_free(s);
1012       s = NULL;
1013    }
1014    ralloc_free(c);
1015    return s;
1016 }
1017