• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  * Copyright © 2014-2015 Broadcom
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 
30 #include "main/mtypes.h"
31 #include "main/shader_types.h"
32 #include "util/ralloc.h"
33 
34 #include "prog_to_nir.h"
35 #include "prog_instruction.h"
36 #include "prog_parameter.h"
37 #include "prog_print.h"
38 #include "program.h"
39 
40 /**
41  * \file prog_to_nir.c
42  *
43  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
44  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
45  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
46  */
47 
48 struct ptn_compile {
49    const struct gl_context *ctx;
50    const struct gl_program *prog;
51    nir_builder build;
52    bool error;
53 
54    nir_variable *parameters;
55    nir_variable *input_vars[VARYING_SLOT_MAX];
56    nir_variable *output_vars[VARYING_SLOT_MAX];
57    nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
58    nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
59    nir_register **output_regs;
60    nir_register **temp_regs;
61 
62    nir_register *addr_reg;
63 };
64 
65 #define SWIZ(X, Y, Z, W) \
66    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
67 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
68 
69 static nir_ssa_def *
ptn_src_for_dest(struct ptn_compile * c,nir_alu_dest * dest)70 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
71 {
72    nir_builder *b = &c->build;
73 
74    nir_alu_src src;
75    memset(&src, 0, sizeof(src));
76 
77    if (dest->dest.is_ssa)
78       src.src = nir_src_for_ssa(&dest->dest.ssa);
79    else {
80       assert(!dest->dest.reg.indirect);
81       src.src = nir_src_for_reg(dest->dest.reg.reg);
82       src.src.reg.base_offset = dest->dest.reg.base_offset;
83    }
84 
85    for (int i = 0; i < 4; i++)
86       src.swizzle[i] = i;
87 
88    return nir_mov_alu(b, src, 4);
89 }
90 
91 static nir_alu_dest
ptn_get_dest(struct ptn_compile * c,const struct prog_dst_register * prog_dst)92 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
93 {
94    nir_alu_dest dest;
95 
96    memset(&dest, 0, sizeof(dest));
97 
98    switch (prog_dst->File) {
99    case PROGRAM_TEMPORARY:
100       dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
101       break;
102    case PROGRAM_OUTPUT:
103       dest.dest.reg.reg = c->output_regs[prog_dst->Index];
104       break;
105    case PROGRAM_ADDRESS:
106       assert(prog_dst->Index == 0);
107       dest.dest.reg.reg = c->addr_reg;
108       break;
109    case PROGRAM_UNDEFINED:
110       break;
111    }
112 
113    dest.write_mask = prog_dst->WriteMask;
114    dest.saturate = false;
115 
116    assert(!prog_dst->RelAddr);
117 
118    return dest;
119 }
120 
121 static nir_ssa_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)122 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
123 {
124    nir_builder *b = &c->build;
125    nir_alu_src src;
126 
127    memset(&src, 0, sizeof(src));
128 
129    switch (prog_src->File) {
130    case PROGRAM_UNDEFINED:
131       return nir_imm_float(b, 0.0);
132    case PROGRAM_TEMPORARY:
133       assert(!prog_src->RelAddr && prog_src->Index >= 0);
134       src.src.reg.reg = c->temp_regs[prog_src->Index];
135       break;
136    case PROGRAM_INPUT: {
137       /* ARB_vertex_program doesn't allow relative addressing on vertex
138        * attributes; ARB_fragment_program has no relative addressing at all.
139        */
140       assert(!prog_src->RelAddr);
141 
142       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
143 
144       nir_variable *var = c->input_vars[prog_src->Index];
145       src.src = nir_src_for_ssa(nir_load_var(b, var));
146       break;
147    }
148    case PROGRAM_SYSTEM_VALUE: {
149       assert(!prog_src->RelAddr);
150 
151       assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
152 
153       nir_variable *var = c->sysval_vars[prog_src->Index];
154       src.src = nir_src_for_ssa(nir_load_var(b, var));
155       break;
156    }
157    case PROGRAM_STATE_VAR:
158    case PROGRAM_CONSTANT: {
159       /* We actually want to look at the type in the Parameters list for this,
160        * because it lets us upload constant builtin uniforms as actual
161        * constants.
162        */
163       struct gl_program_parameter_list *plist = c->prog->Parameters;
164       gl_register_file file = prog_src->RelAddr ? prog_src->File :
165          plist->Parameters[prog_src->Index].Type;
166 
167       switch (file) {
168       case PROGRAM_CONSTANT:
169          if ((c->prog->arb.IndirectRegisterFiles &
170               (1 << PROGRAM_CONSTANT)) == 0) {
171             unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset;
172             float *v = (float *) plist->ParameterValues + pvo;
173             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
174             break;
175          }
176          FALLTHROUGH;
177       case PROGRAM_STATE_VAR: {
178          assert(c->parameters != NULL);
179 
180          nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
181 
182          nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
183          if (prog_src->RelAddr)
184             index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
185          deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
186 
187          src.src = nir_src_for_ssa(nir_load_deref(b, deref));
188          break;
189       }
190       default:
191          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
192                  _mesa_register_file_name(file), file);
193          abort();
194       }
195       break;
196    }
197    default:
198       fprintf(stderr, "unknown src register file: %s (%d)\n",
199               _mesa_register_file_name(prog_src->File), prog_src->File);
200       abort();
201    }
202 
203    nir_ssa_def *def;
204    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
205        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
206       /* The simple non-SWZ case. */
207       for (int i = 0; i < 4; i++)
208          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
209 
210       def = nir_mov_alu(b, src, 4);
211 
212       if (prog_src->Negate)
213          def = nir_fneg(b, def);
214    } else {
215       /* The SWZ instruction allows per-component zero/one swizzles, and also
216        * per-component negation.
217        */
218       nir_ssa_def *chans[4];
219       for (int i = 0; i < 4; i++) {
220          int swizzle = GET_SWZ(prog_src->Swizzle, i);
221          if (swizzle == SWIZZLE_ZERO) {
222             chans[i] = nir_imm_float(b, 0.0);
223          } else if (swizzle == SWIZZLE_ONE) {
224             chans[i] = nir_imm_float(b, 1.0);
225          } else {
226             assert(swizzle != SWIZZLE_NIL);
227             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
228             nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
229             mov->dest.write_mask = 0x1;
230             mov->src[0] = src;
231             mov->src[0].swizzle[0] = swizzle;
232             nir_builder_instr_insert(b, &mov->instr);
233 
234             chans[i] = &mov->dest.dest.ssa;
235          }
236 
237          if (prog_src->Negate & (1 << i))
238             chans[i] = nir_fneg(b, chans[i]);
239       }
240       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
241    }
242 
243    return def;
244 }
245 
246 static void
ptn_alu(nir_builder * b,nir_op op,nir_alu_dest dest,nir_ssa_def ** src)247 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
248 {
249    unsigned num_srcs = nir_op_infos[op].num_inputs;
250    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
251    unsigned i;
252 
253    for (i = 0; i < num_srcs; i++)
254       instr->src[i].src = nir_src_for_ssa(src[i]);
255 
256    instr->dest = dest;
257    nir_builder_instr_insert(b, &instr->instr);
258 }
259 
260 static void
ptn_move_dest_masked(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def,unsigned write_mask)261 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
262                      nir_ssa_def *def, unsigned write_mask)
263 {
264    if (!(dest.write_mask & write_mask))
265       return;
266 
267    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
268    if (!mov)
269       return;
270 
271    mov->dest = dest;
272    mov->dest.write_mask &= write_mask;
273    mov->src[0].src = nir_src_for_ssa(def);
274    for (unsigned i = def->num_components; i < 4; i++)
275       mov->src[0].swizzle[i] = def->num_components - 1;
276    nir_builder_instr_insert(b, &mov->instr);
277 }
278 
279 static void
ptn_move_dest(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def)280 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
281 {
282    ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
283 }
284 
285 static void
ptn_arl(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)286 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
287 {
288    ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
289 }
290 
291 /* EXP - Approximate Exponential Base 2
292  *  dst.x = 2^{\lfloor src.x\rfloor}
293  *  dst.y = src.x - \lfloor src.x\rfloor
294  *  dst.z = 2^{src.x}
295  *  dst.w = 1.0
296  */
297 static void
ptn_exp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)298 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
299 {
300    nir_ssa_def *srcx = ptn_channel(b, src[0], X);
301 
302    ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
303    ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
304    ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
305    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
306 }
307 
308 /* LOG - Approximate Logarithm Base 2
309  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
310  *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
311  *  dst.z = \log_2{|src.x|}
312  *  dst.w = 1.0
313  */
314 static void
ptn_log(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)315 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
316 {
317    nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
318    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
319    nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
320 
321    ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
322    ptn_move_dest_masked(b, dest,
323                         nir_fmul(b, abs_srcx,
324                                  nir_fexp2(b, nir_fneg(b, floor_log2))),
325                         WRITEMASK_Y);
326    ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
327    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
328 }
329 
330 /* DST - Distance Vector
331  *   dst.x = 1.0
332  *   dst.y = src0.y \times src1.y
333  *   dst.z = src0.z
334  *   dst.w = src1.w
335  */
336 static void
ptn_dst(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)337 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
338 {
339    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
340    ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
341    ptn_move_dest_masked(b, dest, nir_mov(b, src[0]), WRITEMASK_Z);
342    ptn_move_dest_masked(b, dest, nir_mov(b, src[1]), WRITEMASK_W);
343 }
344 
345 /* LIT - Light Coefficients
346  *  dst.x = 1.0
347  *  dst.y = max(src.x, 0.0)
348  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
349  *  dst.w = 1.0
350  */
351 static void
ptn_lit(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)352 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
353 {
354    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
355 
356    ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
357                                           nir_imm_float(b, 0.0)), WRITEMASK_Y);
358 
359    if (dest.write_mask & WRITEMASK_Z) {
360       nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
361       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
362                                                  nir_imm_float(b, 128.0)),
363                                      nir_imm_float(b, -128.0));
364       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
365                                   wclamp);
366 
367       nir_ssa_def *z = nir_bcsel(b,
368                                  nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
369                                  nir_imm_float(b, 0.0),
370                                  pow);
371 
372       ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
373    }
374 }
375 
376 /* SCS - Sine Cosine
377  *   dst.x = \cos{src.x}
378  *   dst.y = \sin{src.x}
379  *   dst.z = 0.0
380  *   dst.w = 1.0
381  */
382 static void
ptn_scs(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)383 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
384 {
385    ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
386                         WRITEMASK_X);
387    ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
388                         WRITEMASK_Y);
389    ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
390    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
391 }
392 
393 static void
ptn_slt(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)394 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
395 {
396    ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
397 }
398 
399 static void
ptn_sge(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)400 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
401 {
402    ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
403 }
404 
405 static void
ptn_xpd(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)406 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
407 {
408    ptn_move_dest_masked(b, dest,
409                         nir_fsub(b,
410                                  nir_fmul(b,
411                                           nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
412                                           nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
413                                  nir_fmul(b,
414                                           nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
415                                           nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3))),
416                         WRITEMASK_XYZ);
417    ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
418 }
419 
420 static void
ptn_dp2(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)421 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
422 {
423    ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
424 }
425 
426 static void
ptn_dp3(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)427 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
428 {
429    ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
430 }
431 
432 static void
ptn_dp4(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)433 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
434 {
435    ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
436 }
437 
438 static void
ptn_dph(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)439 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
440 {
441    ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
442 }
443 
444 static void
ptn_cmp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)445 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
446 {
447    ptn_move_dest(b, dest, nir_bcsel(b,
448                                     nir_flt(b, src[0], nir_imm_float(b, 0.0)),
449                                     src[1], src[2]));
450 }
451 
452 static void
ptn_lrp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)453 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
454 {
455    ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
456 }
457 
458 static void
ptn_kil(nir_builder * b,nir_ssa_def ** src)459 ptn_kil(nir_builder *b, nir_ssa_def **src)
460 {
461    /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
462    b->exact = true;
463    nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
464    b->exact = false;
465 
466    nir_discard_if(b, cmp);
467 }
468 
469 enum glsl_sampler_dim
_mesa_texture_index_to_sampler_dim(gl_texture_index index,bool * is_array)470 _mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array)
471 {
472    *is_array = false;
473 
474    switch (index) {
475    case TEXTURE_2D_MULTISAMPLE_INDEX:
476       return GLSL_SAMPLER_DIM_MS;
477    case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
478       *is_array = true;
479       return GLSL_SAMPLER_DIM_MS;
480    case TEXTURE_BUFFER_INDEX:
481       return GLSL_SAMPLER_DIM_BUF;
482    case TEXTURE_1D_INDEX:
483       return GLSL_SAMPLER_DIM_1D;
484    case TEXTURE_2D_INDEX:
485       return GLSL_SAMPLER_DIM_2D;
486    case TEXTURE_3D_INDEX:
487       return GLSL_SAMPLER_DIM_3D;
488    case TEXTURE_CUBE_INDEX:
489       return GLSL_SAMPLER_DIM_CUBE;
490    case TEXTURE_CUBE_ARRAY_INDEX:
491       *is_array = true;
492       return GLSL_SAMPLER_DIM_CUBE;
493    case TEXTURE_RECT_INDEX:
494       return GLSL_SAMPLER_DIM_RECT;
495    case TEXTURE_1D_ARRAY_INDEX:
496       *is_array = true;
497       return GLSL_SAMPLER_DIM_1D;
498    case TEXTURE_2D_ARRAY_INDEX:
499       *is_array = true;
500       return GLSL_SAMPLER_DIM_2D;
501    case TEXTURE_EXTERNAL_INDEX:
502       return GLSL_SAMPLER_DIM_EXTERNAL;
503    case NUM_TEXTURE_TARGETS:
504       break;
505    }
506    unreachable("unknown texture target");
507 }
508 
509 static void
ptn_tex(struct ptn_compile * c,nir_alu_dest dest,nir_ssa_def ** src,struct prog_instruction * prog_inst)510 ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
511         struct prog_instruction *prog_inst)
512 {
513    nir_builder *b = &c->build;
514    nir_tex_instr *instr;
515    nir_texop op;
516    unsigned num_srcs;
517 
518    switch (prog_inst->Opcode) {
519    case OPCODE_TEX:
520       op = nir_texop_tex;
521       num_srcs = 1;
522       break;
523    case OPCODE_TXB:
524       op = nir_texop_txb;
525       num_srcs = 2;
526       break;
527    case OPCODE_TXD:
528       op = nir_texop_txd;
529       num_srcs = 3;
530       break;
531    case OPCODE_TXL:
532       op = nir_texop_txl;
533       num_srcs = 2;
534       break;
535    case OPCODE_TXP:
536       op = nir_texop_tex;
537       num_srcs = 2;
538       break;
539    default:
540       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
541       abort();
542    }
543 
544    /* Deref sources */
545    num_srcs += 2;
546 
547    if (prog_inst->TexShadow)
548       num_srcs++;
549 
550    instr = nir_tex_instr_create(b->shader, num_srcs);
551    instr->op = op;
552    instr->dest_type = nir_type_float32;
553    instr->is_shadow = prog_inst->TexShadow;
554 
555    bool is_array;
556    instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array);
557 
558    instr->coord_components =
559       glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
560 
561    nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
562    if (!var) {
563       const struct glsl_type *type =
564          glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
565       char samplerName[20];
566       snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
567       var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
568       var->data.binding = prog_inst->TexSrcUnit;
569       var->data.explicit_binding = true;
570       c->sampler_vars[prog_inst->TexSrcUnit] = var;
571    }
572 
573    nir_deref_instr *deref = nir_build_deref_var(b, var);
574 
575    unsigned src_number = 0;
576 
577    instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
578    instr->src[src_number].src_type = nir_tex_src_texture_deref;
579    src_number++;
580    instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
581    instr->src[src_number].src_type = nir_tex_src_sampler_deref;
582    src_number++;
583 
584    instr->src[src_number].src =
585       nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
586                                   instr->coord_components));
587    instr->src[src_number].src_type = nir_tex_src_coord;
588    src_number++;
589 
590    if (prog_inst->Opcode == OPCODE_TXP) {
591       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
592       instr->src[src_number].src_type = nir_tex_src_projector;
593       src_number++;
594    }
595 
596    if (prog_inst->Opcode == OPCODE_TXB) {
597       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
598       instr->src[src_number].src_type = nir_tex_src_bias;
599       src_number++;
600    }
601 
602    if (prog_inst->Opcode == OPCODE_TXL) {
603       instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
604       instr->src[src_number].src_type = nir_tex_src_lod;
605       src_number++;
606    }
607 
608    if (instr->is_shadow) {
609       if (instr->coord_components < 3)
610          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
611       else
612          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
613 
614       instr->src[src_number].src_type = nir_tex_src_comparator;
615       src_number++;
616    }
617 
618    assert(src_number == num_srcs);
619 
620    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
621    nir_builder_instr_insert(b, &instr->instr);
622 
623    /* Resolve the writemask on the texture op. */
624    ptn_move_dest(b, dest, &instr->dest.ssa);
625 }
626 
627 static const nir_op op_trans[MAX_OPCODE] = {
628    [OPCODE_NOP] = 0,
629    [OPCODE_ABS] = nir_op_fabs,
630    [OPCODE_ADD] = nir_op_fadd,
631    [OPCODE_ARL] = 0,
632    [OPCODE_CMP] = 0,
633    [OPCODE_COS] = 0,
634    [OPCODE_DDX] = nir_op_fddx,
635    [OPCODE_DDY] = nir_op_fddy,
636    [OPCODE_DP2] = 0,
637    [OPCODE_DP3] = 0,
638    [OPCODE_DP4] = 0,
639    [OPCODE_DPH] = 0,
640    [OPCODE_DST] = 0,
641    [OPCODE_END] = 0,
642    [OPCODE_EX2] = 0,
643    [OPCODE_EXP] = 0,
644    [OPCODE_FLR] = nir_op_ffloor,
645    [OPCODE_FRC] = nir_op_ffract,
646    [OPCODE_LG2] = 0,
647    [OPCODE_LIT] = 0,
648    [OPCODE_LOG] = 0,
649    [OPCODE_LRP] = 0,
650    [OPCODE_MAD] = 0,
651    [OPCODE_MAX] = nir_op_fmax,
652    [OPCODE_MIN] = nir_op_fmin,
653    [OPCODE_MOV] = nir_op_mov,
654    [OPCODE_MUL] = nir_op_fmul,
655    [OPCODE_POW] = 0,
656    [OPCODE_RCP] = 0,
657 
658    [OPCODE_RSQ] = 0,
659    [OPCODE_SCS] = 0,
660    [OPCODE_SGE] = 0,
661    [OPCODE_SIN] = 0,
662    [OPCODE_SLT] = 0,
663    [OPCODE_SSG] = nir_op_fsign,
664    [OPCODE_SUB] = nir_op_fsub,
665    [OPCODE_SWZ] = 0,
666    [OPCODE_TEX] = 0,
667    [OPCODE_TRUNC] = nir_op_ftrunc,
668    [OPCODE_TXB] = 0,
669    [OPCODE_TXD] = 0,
670    [OPCODE_TXL] = 0,
671    [OPCODE_TXP] = 0,
672    [OPCODE_XPD] = 0,
673 };
674 
675 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)676 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
677 {
678    nir_builder *b = &c->build;
679    unsigned i;
680    const unsigned op = prog_inst->Opcode;
681 
682    if (op == OPCODE_END)
683       return;
684 
685    nir_ssa_def *src[3];
686    for (i = 0; i < 3; i++) {
687       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
688    }
689    nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
690    if (c->error)
691       return;
692 
693    switch (op) {
694    case OPCODE_RSQ:
695       ptn_move_dest(b, dest,
696                     nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
697       break;
698 
699    case OPCODE_RCP:
700       ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
701       break;
702 
703    case OPCODE_EX2:
704       ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
705       break;
706 
707    case OPCODE_LG2:
708       ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
709       break;
710 
711    case OPCODE_POW:
712       ptn_move_dest(b, dest, nir_fpow(b,
713                                       ptn_channel(b, src[0], X),
714                                       ptn_channel(b, src[1], X)));
715       break;
716 
717    case OPCODE_COS:
718       ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
719       break;
720 
721    case OPCODE_SIN:
722       ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
723       break;
724 
725    case OPCODE_ARL:
726       ptn_arl(b, dest, src);
727       break;
728 
729    case OPCODE_EXP:
730       ptn_exp(b, dest, src);
731       break;
732 
733    case OPCODE_LOG:
734       ptn_log(b, dest, src);
735       break;
736 
737    case OPCODE_LRP:
738       ptn_lrp(b, dest, src);
739       break;
740 
741    case OPCODE_MAD:
742       ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
743       break;
744 
745    case OPCODE_DST:
746       ptn_dst(b, dest, src);
747       break;
748 
749    case OPCODE_LIT:
750       ptn_lit(b, dest, src);
751       break;
752 
753    case OPCODE_XPD:
754       ptn_xpd(b, dest, src);
755       break;
756 
757    case OPCODE_DP2:
758       ptn_dp2(b, dest, src);
759       break;
760 
761    case OPCODE_DP3:
762       ptn_dp3(b, dest, src);
763       break;
764 
765    case OPCODE_DP4:
766       ptn_dp4(b, dest, src);
767       break;
768 
769    case OPCODE_DPH:
770       ptn_dph(b, dest, src);
771       break;
772 
773    case OPCODE_KIL:
774       ptn_kil(b, src);
775       break;
776 
777    case OPCODE_CMP:
778       ptn_cmp(b, dest, src);
779       break;
780 
781    case OPCODE_SCS:
782       ptn_scs(b, dest, src);
783       break;
784 
785    case OPCODE_SLT:
786       ptn_slt(b, dest, src);
787       break;
788 
789    case OPCODE_SGE:
790       ptn_sge(b, dest, src);
791       break;
792 
793    case OPCODE_TEX:
794    case OPCODE_TXB:
795    case OPCODE_TXD:
796    case OPCODE_TXL:
797    case OPCODE_TXP:
798       ptn_tex(c, dest, src, prog_inst);
799       break;
800 
801    case OPCODE_SWZ:
802       /* Extended swizzles were already handled in ptn_get_src(). */
803       ptn_alu(b, nir_op_mov, dest, src);
804       break;
805 
806    case OPCODE_NOP:
807       break;
808 
809    default:
810       if (op_trans[op] != 0) {
811          ptn_alu(b, op_trans[op], dest, src);
812       } else {
813          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
814          abort();
815       }
816       break;
817    }
818 
819    if (prog_inst->Saturate) {
820       assert(prog_inst->Saturate);
821       assert(!dest.dest.is_ssa);
822       ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
823    }
824 }
825 
826 /**
827  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
828  * variables at the end of the shader.
829  *
830  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
831  * written, because there's no output load intrinsic, which means we couldn't
832  * handle writemasks.
833  */
834 static void
ptn_add_output_stores(struct ptn_compile * c)835 ptn_add_output_stores(struct ptn_compile *c)
836 {
837    nir_builder *b = &c->build;
838 
839    nir_foreach_shader_out_variable(var, b->shader) {
840       nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
841       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
842           var->data.location == FRAG_RESULT_DEPTH) {
843          /* result.depth has this strange convention of being the .z component of
844           * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
845           * match GLSL's gl_FragDepth and the expectations of most backends.
846           */
847          src = nir_channel(b, src, 2);
848       }
849       if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
850           (var->data.location == VARYING_SLOT_FOGC ||
851            var->data.location == VARYING_SLOT_PSIZ)) {
852          /* result.{fogcoord,psiz} is a single component value */
853          src = nir_channel(b, src, 0);
854       }
855       unsigned num_components = glsl_get_vector_elements(var->type);
856       nir_store_var(b, var, src, (1 << num_components) - 1);
857    }
858 }
859 
860 static void
setup_registers_and_variables(struct ptn_compile * c)861 setup_registers_and_variables(struct ptn_compile *c)
862 {
863    nir_builder *b = &c->build;
864    struct nir_shader *shader = b->shader;
865 
866    /* Create input variables. */
867    uint64_t inputs_read = c->prog->info.inputs_read;
868    while (inputs_read) {
869       const int i = u_bit_scan64(&inputs_read);
870 
871       if (c->ctx->Const.GLSLFragCoordIsSysVal &&
872           shader->info.stage == MESA_SHADER_FRAGMENT &&
873           i == VARYING_SLOT_POS) {
874          nir_variable *var = nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
875                                                  "frag_coord");
876          var->data.location = SYSTEM_VALUE_FRAG_COORD;
877          c->input_vars[i] = var;
878          continue;
879       }
880 
881       nir_variable *var =
882          nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
883                              ralloc_asprintf(shader, "in_%d", i));
884       var->data.location = i;
885       var->data.index = 0;
886 
887       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
888          if (i == VARYING_SLOT_FOGC) {
889             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
890              * input variable a float, and create a local containing the
891              * full vec4 value.
892              */
893             var->type = glsl_float_type();
894 
895             nir_variable *fullvar =
896                nir_local_variable_create(b->impl, glsl_vec4_type(),
897                                          "fogcoord_tmp");
898 
899             nir_store_var(b, fullvar,
900                           nir_vec4(b, nir_load_var(b, var),
901                                    nir_imm_float(b, 0.0),
902                                    nir_imm_float(b, 0.0),
903                                    nir_imm_float(b, 1.0)),
904                           WRITEMASK_XYZW);
905 
906             /* We inserted the real input into the list so the driver has real
907              * inputs, but we set c->input_vars[i] to the temporary so we use
908              * the splatted value.
909              */
910             c->input_vars[i] = fullvar;
911             continue;
912          }
913       }
914 
915       c->input_vars[i] = var;
916    }
917 
918    /* Create system value variables */
919    int i;
920    BITSET_FOREACH_SET(i, c->prog->info.system_values_read, SYSTEM_VALUE_MAX) {
921       nir_variable *var =
922          nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
923                              ralloc_asprintf(shader, "sv_%d", i));
924       var->data.location = i;
925       var->data.index = 0;
926 
927       c->sysval_vars[i] = var;
928    }
929 
930    /* Create output registers and variables. */
931    int max_outputs = util_last_bit64(c->prog->info.outputs_written);
932    c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
933 
934    uint64_t outputs_written = c->prog->info.outputs_written;
935    while (outputs_written) {
936       const int i = u_bit_scan64(&outputs_written);
937 
938       /* Since we can't load from outputs in the IR, we make temporaries
939        * for the outputs and emit stores to the real outputs at the end of
940        * the shader.
941        */
942       nir_register *reg = nir_local_reg_create(b->impl);
943       reg->num_components = 4;
944 
945       const struct glsl_type *type;
946       if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
947           (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC) ||
948           (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_PSIZ))
949          type = glsl_float_type();
950       else
951          type = glsl_vec4_type();
952 
953       nir_variable *var =
954          nir_variable_create(shader, nir_var_shader_out, type,
955                              ralloc_asprintf(shader, "out_%d", i));
956       var->data.location = i;
957       var->data.index = 0;
958 
959       c->output_regs[i] = reg;
960       c->output_vars[i] = var;
961    }
962 
963    /* Create temporary registers. */
964    c->temp_regs = rzalloc_array(c, nir_register *,
965                                 c->prog->arb.NumTemporaries);
966 
967    nir_register *reg;
968    for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
969       reg = nir_local_reg_create(b->impl);
970       if (!reg) {
971          c->error = true;
972          return;
973       }
974       reg->num_components = 4;
975       c->temp_regs[i] = reg;
976    }
977 
978    /* Create the address register (for ARB_vertex_program). */
979    reg = nir_local_reg_create(b->impl);
980    if (!reg) {
981       c->error = true;
982       return;
983    }
984    reg->num_components = 1;
985    c->addr_reg = reg;
986 }
987 
988 struct nir_shader *
prog_to_nir(const struct gl_context * ctx,const struct gl_program * prog,const nir_shader_compiler_options * options)989 prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog,
990             const nir_shader_compiler_options *options)
991 {
992    struct ptn_compile *c;
993    struct nir_shader *s;
994    gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
995 
996    c = rzalloc(NULL, struct ptn_compile);
997    if (!c)
998       return NULL;
999    c->prog = prog;
1000    c->ctx = ctx;
1001 
1002    c->build = nir_builder_init_simple_shader(stage, options, NULL);
1003 
1004    /* Copy the shader_info from the gl_program */
1005    c->build.shader->info = prog->info;
1006 
1007    s = c->build.shader;
1008 
1009    if (prog->Parameters->NumParameters > 0) {
1010       const struct glsl_type *type =
1011          glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
1012       c->parameters =
1013          nir_variable_create(s, nir_var_uniform, type,
1014                              prog->Parameters->Parameters[0].Name);
1015    }
1016 
1017    setup_registers_and_variables(c);
1018    if (unlikely(c->error))
1019       goto fail;
1020 
1021    for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1022       ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1023 
1024       if (unlikely(c->error))
1025          break;
1026    }
1027 
1028    ptn_add_output_stores(c);
1029 
1030    s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1031    s->info.num_textures = util_last_bit(prog->SamplersUsed);
1032    s->info.num_ubos = 0;
1033    s->info.num_abos = 0;
1034    s->info.num_ssbos = 0;
1035    s->info.num_images = 0;
1036    s->info.uses_texture_gather = false;
1037    s->info.clip_distance_array_size = 0;
1038    s->info.cull_distance_array_size = 0;
1039    s->info.separate_shader = false;
1040    s->info.io_lowered = false;
1041    s->info.internal = false;
1042 
1043 fail:
1044    if (c->error) {
1045       ralloc_free(s);
1046       s = NULL;
1047    }
1048    ralloc_free(c);
1049    return s;
1050 }
1051