• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  * Copyright © 2014-2015 Broadcom
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 
30 #include "main/mtypes.h"
31 #include "main/shader_types.h"
32 #include "util/ralloc.h"
33 
34 #include "prog_to_nir.h"
35 #include "prog_instruction.h"
36 #include "prog_parameter.h"
37 #include "prog_print.h"
38 #include "program.h"
39 #include "state_tracker/st_nir.h"
40 
41 /**
42  * \file prog_to_nir.c
43  *
44  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
45  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
46  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
47  */
48 
49 struct ptn_compile {
50    const struct gl_context *ctx;
51    const struct gl_program *prog;
52    nir_builder build;
53    bool error;
54 
55    nir_variable *parameters;
56    nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
57    nir_def **output_regs;
58    nir_def **temp_regs;
59 
60    nir_def *addr_reg;
61 };
62 
63 #define SWIZ(X, Y, Z, W) \
64    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
65 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
66 
67 static nir_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)68 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
69 {
70    nir_builder *b = &c->build;
71    nir_alu_src src;
72 
73    memset(&src, 0, sizeof(src));
74 
75    switch (prog_src->File) {
76    case PROGRAM_UNDEFINED:
77       return nir_imm_float(b, 0.0);
78    case PROGRAM_TEMPORARY:
79       assert(!prog_src->RelAddr && prog_src->Index >= 0);
80       src.src = nir_src_for_ssa(nir_load_reg(b, c->temp_regs[prog_src->Index]));
81       break;
82    case PROGRAM_INPUT: {
83       /* ARB_vertex_program doesn't allow relative addressing on vertex
84        * attributes; ARB_fragment_program has no relative addressing at all.
85        */
86       assert(!prog_src->RelAddr);
87       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
88 
89       unsigned slot = prog_src->Index;
90       nir_def *input;
91 
92       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
93          if (slot == VARYING_SLOT_POS && c->ctx->Const.GLSLFragCoordIsSysVal) {
94             nir_variable *pos =
95                nir_get_variable_with_location(b->shader, nir_var_system_value,
96                                               SYSTEM_VALUE_FRAG_COORD,
97                                               glsl_vec4_type());
98             src.src = nir_src_for_ssa(nir_load_var(b, pos));
99             break;
100          }
101 
102          nir_def *baryc = nir_load_barycentric_pixel(b, 32);
103 
104          if (slot != VARYING_SLOT_COL0 && slot != VARYING_SLOT_COL1) {
105             nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
106                                           INTERP_MODE_SMOOTH);
107          }
108 
109          input = nir_load_interpolated_input(b, 4, 32, baryc, nir_imm_int(b, 0),
110                                              .io_semantics.location = slot);
111 
112          /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
113           * input variable a float, and create a local containing the
114           * full vec4 value.
115           */
116          if (slot == VARYING_SLOT_FOGC) {
117             input = nir_vec4(b, nir_channel(b, input, 0),
118                              nir_imm_float(b, 0),
119                              nir_imm_float(b, 0),
120                              nir_imm_float(b, 1));
121          }
122       } else {
123          input = nir_load_input(b, 4, 32, nir_imm_int(b, 0),
124                                 .io_semantics.location = slot);
125       }
126 
127       src.src = nir_src_for_ssa(input);
128       break;
129    }
130    case PROGRAM_STATE_VAR:
131    case PROGRAM_CONSTANT: {
132       /* We actually want to look at the type in the Parameters list for this,
133        * because it lets us upload constant builtin uniforms as actual
134        * constants.
135        */
136       struct gl_program_parameter_list *plist = c->prog->Parameters;
137       gl_register_file file = prog_src->RelAddr ? prog_src->File :
138          plist->Parameters[prog_src->Index].Type;
139 
140       switch (file) {
141       case PROGRAM_CONSTANT:
142          if ((c->prog->arb.IndirectRegisterFiles &
143               (1 << PROGRAM_CONSTANT)) == 0) {
144             unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset;
145             float *v = (float *) plist->ParameterValues + pvo;
146             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
147             break;
148          }
149          FALLTHROUGH;
150       case PROGRAM_STATE_VAR: {
151          assert(c->parameters != NULL);
152 
153          nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
154 
155          nir_def *index = nir_imm_int(b, prog_src->Index);
156 
157          /* Add the address register. Note this is (uniquely) a scalar, so the
158           * component sizes match.
159           */
160          if (prog_src->RelAddr)
161             index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
162 
163          deref = nir_build_deref_array(b, deref, index);
164          src.src = nir_src_for_ssa(nir_load_deref(b, deref));
165          break;
166       }
167       default:
168          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
169                  _mesa_register_file_name(file), file);
170          abort();
171       }
172       break;
173    }
174    default:
175       fprintf(stderr, "unknown src register file: %s (%d)\n",
176               _mesa_register_file_name(prog_src->File), prog_src->File);
177       abort();
178    }
179 
180    nir_def *def;
181    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
182        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
183       /* The simple non-SWZ case. */
184       for (int i = 0; i < 4; i++)
185          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
186 
187       def = nir_mov_alu(b, src, 4);
188 
189       if (prog_src->Negate)
190          def = nir_fneg(b, def);
191    } else {
192       /* The SWZ instruction allows per-component zero/one swizzles, and also
193        * per-component negation.
194        */
195       nir_def *chans[4];
196       for (int i = 0; i < 4; i++) {
197          int swizzle = GET_SWZ(prog_src->Swizzle, i);
198          if (swizzle == SWIZZLE_ZERO) {
199             chans[i] = nir_imm_float(b, 0.0);
200          } else if (swizzle == SWIZZLE_ONE) {
201             chans[i] = nir_imm_float(b, 1.0);
202          } else {
203             assert(swizzle != SWIZZLE_NIL);
204             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
205             nir_def_init(&mov->instr, &mov->def, 1, 32);
206             mov->src[0] = src;
207             mov->src[0].swizzle[0] = swizzle;
208             nir_builder_instr_insert(b, &mov->instr);
209 
210             chans[i] = &mov->def;
211          }
212 
213          if (prog_src->Negate & (1 << i))
214             chans[i] = nir_fneg(b, chans[i]);
215       }
216       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
217    }
218 
219    return def;
220 }
221 
222 /* EXP - Approximate Exponential Base 2
223  *  dst.x = 2^{\lfloor src.x\rfloor}
224  *  dst.y = src.x - \lfloor src.x\rfloor
225  *  dst.z = 2^{src.x}
226  *  dst.w = 1.0
227  */
228 static nir_def *
ptn_exp(nir_builder * b,nir_def ** src)229 ptn_exp(nir_builder *b, nir_def **src)
230 {
231    nir_def *srcx = ptn_channel(b, src[0], X);
232 
233    return nir_vec4(b, nir_fexp2(b, nir_ffloor(b, srcx)),
234                       nir_fsub(b, srcx, nir_ffloor(b, srcx)),
235                       nir_fexp2(b, srcx),
236                       nir_imm_float(b, 1.0));
237 }
238 
239 /* LOG - Approximate Logarithm Base 2
240  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
241  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
242  *  dst.z = \log_2{|src.x|}
243  *  dst.w = 1.0
244  */
245 static nir_def *
ptn_log(nir_builder * b,nir_def ** src)246 ptn_log(nir_builder *b, nir_def **src)
247 {
248    nir_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
249    nir_def *log2 = nir_flog2(b, abs_srcx);
250 
251    return nir_vec4(b, nir_ffloor(b, log2),
252                       nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
253                       nir_flog2(b, abs_srcx),
254                       nir_imm_float(b, 1.0));
255 }
256 
257 /* DST - Distance Vector
258  *   dst.x = 1.0
259  *   dst.y = src0.y \times src1.y
260  *   dst.z = src0.z
261  *   dst.w = src1.w
262  */
263 static nir_def *
ptn_dst(nir_builder * b,nir_def ** src)264 ptn_dst(nir_builder *b, nir_def **src)
265 {
266    return nir_vec4(b, nir_imm_float(b, 1.0),
267                       nir_fmul(b, ptn_channel(b, src[0], Y),
268                                   ptn_channel(b, src[1], Y)),
269                       ptn_channel(b, src[0], Z),
270                       ptn_channel(b, src[1], W));
271 }
272 
273 /* LIT - Light Coefficients
274  *  dst.x = 1.0
275  *  dst.y = max(src.x, 0.0)
276  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
277  *  dst.w = 1.0
278  */
279 static nir_def *
ptn_lit(nir_builder * b,nir_def ** src)280 ptn_lit(nir_builder *b, nir_def **src)
281 {
282    nir_def *src0_y = ptn_channel(b, src[0], Y);
283    nir_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
284                                               nir_imm_float(b, 128.0)),
285                                   nir_imm_float(b, -128.0));
286    nir_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
287                                wclamp);
288 
289    nir_def *z = nir_bcsel(b, nir_fle_imm(b, ptn_channel(b, src[0], X), 0.0),
290                               nir_imm_float(b, 0.0), pow);
291 
292    return nir_vec4(b, nir_imm_float(b, 1.0),
293                       nir_fmax(b, ptn_channel(b, src[0], X),
294                                   nir_imm_float(b, 0.0)),
295                       z,
296                       nir_imm_float(b, 1.0));
297 }
298 
299 /* SCS - Sine Cosine
300  *   dst.x = \cos{src.x}
301  *   dst.y = \sin{src.x}
302  *   dst.z = 0.0
303  *   dst.w = 1.0
304  */
305 static nir_def *
ptn_scs(nir_builder * b,nir_def ** src)306 ptn_scs(nir_builder *b, nir_def **src)
307 {
308    return nir_vec4(b, nir_fcos(b, ptn_channel(b, src[0], X)),
309                       nir_fsin(b, ptn_channel(b, src[0], X)),
310                       nir_imm_float(b, 0.0),
311                       nir_imm_float(b, 1.0));
312 }
313 
314 static nir_def *
ptn_xpd(nir_builder * b,nir_def ** src)315 ptn_xpd(nir_builder *b, nir_def **src)
316 {
317    nir_def *vec =
318       nir_fsub(b, nir_fmul(b, nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
319                               nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
320                   nir_fmul(b, nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
321                               nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3)));
322 
323    return nir_vec4(b, nir_channel(b, vec, 0),
324                       nir_channel(b, vec, 1),
325                       nir_channel(b, vec, 2),
326                       nir_imm_float(b, 1.0));
327 }
328 
329 static void
ptn_kil(nir_builder * b,nir_def ** src)330 ptn_kil(nir_builder *b, nir_def **src)
331 {
332    /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
333    b->exact = true;
334    nir_def *cmp = nir_bany(b, nir_flt_imm(b, src[0], 0.0));
335    b->exact = false;
336 
337    nir_discard_if(b, cmp);
338 }
339 
340 enum glsl_sampler_dim
_mesa_texture_index_to_sampler_dim(gl_texture_index index,bool * is_array)341 _mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array)
342 {
343    *is_array = false;
344 
345    switch (index) {
346    case TEXTURE_2D_MULTISAMPLE_INDEX:
347       return GLSL_SAMPLER_DIM_MS;
348    case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
349       *is_array = true;
350       return GLSL_SAMPLER_DIM_MS;
351    case TEXTURE_BUFFER_INDEX:
352       return GLSL_SAMPLER_DIM_BUF;
353    case TEXTURE_1D_INDEX:
354       return GLSL_SAMPLER_DIM_1D;
355    case TEXTURE_2D_INDEX:
356       return GLSL_SAMPLER_DIM_2D;
357    case TEXTURE_3D_INDEX:
358       return GLSL_SAMPLER_DIM_3D;
359    case TEXTURE_CUBE_INDEX:
360       return GLSL_SAMPLER_DIM_CUBE;
361    case TEXTURE_CUBE_ARRAY_INDEX:
362       *is_array = true;
363       return GLSL_SAMPLER_DIM_CUBE;
364    case TEXTURE_RECT_INDEX:
365       return GLSL_SAMPLER_DIM_RECT;
366    case TEXTURE_1D_ARRAY_INDEX:
367       *is_array = true;
368       return GLSL_SAMPLER_DIM_1D;
369    case TEXTURE_2D_ARRAY_INDEX:
370       *is_array = true;
371       return GLSL_SAMPLER_DIM_2D;
372    case TEXTURE_EXTERNAL_INDEX:
373       return GLSL_SAMPLER_DIM_EXTERNAL;
374    case NUM_TEXTURE_TARGETS:
375       break;
376    }
377    unreachable("unknown texture target");
378 }
379 
380 static nir_def *
ptn_tex(struct ptn_compile * c,nir_def ** src,struct prog_instruction * prog_inst)381 ptn_tex(struct ptn_compile *c, nir_def **src,
382         struct prog_instruction *prog_inst)
383 {
384    nir_builder *b = &c->build;
385    nir_tex_instr *instr;
386    nir_texop op;
387    unsigned num_srcs;
388 
389    switch (prog_inst->Opcode) {
390    case OPCODE_TEX:
391       op = nir_texop_tex;
392       num_srcs = 1;
393       break;
394    case OPCODE_TXB:
395       op = nir_texop_txb;
396       num_srcs = 2;
397       break;
398    case OPCODE_TXD:
399       op = nir_texop_txd;
400       num_srcs = 3;
401       break;
402    case OPCODE_TXL:
403       op = nir_texop_txl;
404       num_srcs = 2;
405       break;
406    case OPCODE_TXP:
407       op = nir_texop_tex;
408       num_srcs = 2;
409       break;
410    default:
411       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
412       abort();
413    }
414 
415    /* Deref sources */
416    num_srcs += 2;
417 
418    if (prog_inst->TexShadow)
419       num_srcs++;
420 
421    instr = nir_tex_instr_create(b->shader, num_srcs);
422    instr->op = op;
423    instr->dest_type = nir_type_float32;
424    instr->is_shadow = prog_inst->TexShadow;
425 
426    bool is_array;
427    instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array);
428 
429    instr->coord_components =
430       glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
431 
432    nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
433    if (!var) {
434       const struct glsl_type *type =
435          glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
436       char samplerName[20];
437       snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
438       var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
439       var->data.binding = prog_inst->TexSrcUnit;
440       var->data.explicit_binding = true;
441       c->sampler_vars[prog_inst->TexSrcUnit] = var;
442    }
443 
444    nir_deref_instr *deref = nir_build_deref_var(b, var);
445 
446    unsigned src_number = 0;
447 
448    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
449                                                 &deref->def);
450    src_number++;
451    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
452                                                 &deref->def);
453    src_number++;
454 
455    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_coord,
456                                                 nir_trim_vector(b, src[0],
457                                                                 instr->coord_components));
458    src_number++;
459 
460    if (prog_inst->Opcode == OPCODE_TXP) {
461       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
462                                                    ptn_channel(b, src[0], W));
463       src_number++;
464    }
465 
466    if (prog_inst->Opcode == OPCODE_TXB) {
467       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
468                                                    ptn_channel(b, src[0], W));
469       src_number++;
470    }
471 
472    if (prog_inst->Opcode == OPCODE_TXL) {
473       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
474                                                    ptn_channel(b, src[0], W));
475       src_number++;
476    }
477 
478    if (instr->is_shadow) {
479       if (instr->coord_components < 3)
480          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
481       else
482          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
483 
484       instr->src[src_number].src_type = nir_tex_src_comparator;
485       src_number++;
486    }
487 
488    assert(src_number == num_srcs);
489 
490    nir_def_init(&instr->instr, &instr->def, 4, 32);
491    nir_builder_instr_insert(b, &instr->instr);
492 
493    return &instr->def;
494 }
495 
496 static const nir_op op_trans[MAX_OPCODE] = {
497    [OPCODE_NOP] = 0,
498    [OPCODE_ABS] = nir_op_fabs,
499    [OPCODE_ADD] = nir_op_fadd,
500    [OPCODE_ARL] = 0,
501    [OPCODE_CMP] = 0,
502    [OPCODE_COS] = 0,
503    [OPCODE_DDX] = 0,
504    [OPCODE_DDY] = 0,
505    [OPCODE_DP2] = 0,
506    [OPCODE_DP3] = 0,
507    [OPCODE_DP4] = 0,
508    [OPCODE_DPH] = 0,
509    [OPCODE_DST] = 0,
510    [OPCODE_END] = 0,
511    [OPCODE_EX2] = 0,
512    [OPCODE_EXP] = 0,
513    [OPCODE_FLR] = nir_op_ffloor,
514    [OPCODE_FRC] = nir_op_ffract,
515    [OPCODE_LG2] = 0,
516    [OPCODE_LIT] = 0,
517    [OPCODE_LOG] = 0,
518    [OPCODE_LRP] = 0,
519    [OPCODE_MAD] = 0,
520    [OPCODE_MAX] = nir_op_fmax,
521    [OPCODE_MIN] = nir_op_fmin,
522    [OPCODE_MOV] = nir_op_mov,
523    [OPCODE_MUL] = nir_op_fmul,
524    [OPCODE_POW] = 0,
525    [OPCODE_RCP] = 0,
526 
527    [OPCODE_RSQ] = 0,
528    [OPCODE_SCS] = 0,
529    [OPCODE_SGE] = 0,
530    [OPCODE_SIN] = 0,
531    [OPCODE_SLT] = 0,
532    [OPCODE_SSG] = nir_op_fsign,
533    [OPCODE_SUB] = nir_op_fsub,
534    [OPCODE_SWZ] = 0,
535    [OPCODE_TEX] = 0,
536    [OPCODE_TXB] = 0,
537    [OPCODE_TXD] = 0,
538    [OPCODE_TXL] = 0,
539    [OPCODE_TXP] = 0,
540    [OPCODE_XPD] = 0,
541 };
542 
543 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)544 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
545 {
546    nir_builder *b = &c->build;
547    unsigned i;
548    const unsigned op = prog_inst->Opcode;
549 
550    if (op == OPCODE_END)
551       return;
552 
553    nir_def *src[3];
554    for (i = 0; i < 3; i++) {
555       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
556    }
557 
558    nir_def *dst = NULL;
559    if (c->error)
560       return;
561 
562    switch (op) {
563    case OPCODE_DDX:
564       dst = nir_ddx(b, src[0]);
565       break;
566 
567    case OPCODE_DDY:
568       dst = nir_ddy(b, src[0]);
569       break;
570 
571    case OPCODE_RSQ:
572       dst = nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)));
573       break;
574 
575    case OPCODE_RCP:
576       dst = nir_frcp(b, ptn_channel(b, src[0], X));
577       break;
578 
579    case OPCODE_EX2:
580       dst = nir_fexp2(b, ptn_channel(b, src[0], X));
581       break;
582 
583    case OPCODE_LG2:
584       dst = nir_flog2(b, ptn_channel(b, src[0], X));
585       break;
586 
587    case OPCODE_POW:
588       dst = nir_fpow(b, ptn_channel(b, src[0], X), ptn_channel(b, src[1], X));
589       break;
590 
591    case OPCODE_COS:
592       dst = nir_fcos(b, ptn_channel(b, src[0], X));
593       break;
594 
595    case OPCODE_SIN:
596       dst = nir_fsin(b, ptn_channel(b, src[0], X));
597       break;
598 
599    case OPCODE_ARL:
600       dst = nir_f2i32(b, nir_ffloor(b, src[0]));
601       break;
602 
603    case OPCODE_EXP:
604       dst = ptn_exp(b, src);
605       break;
606 
607    case OPCODE_LOG:
608       dst = ptn_log(b, src);
609       break;
610 
611    case OPCODE_LRP:
612       dst = nir_flrp(b, src[2], src[1], src[0]);
613       break;
614 
615    case OPCODE_MAD:
616       dst = nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]);
617       break;
618 
619    case OPCODE_DST:
620       dst = ptn_dst(b, src);
621       break;
622 
623    case OPCODE_LIT:
624       dst = ptn_lit(b, src);
625       break;
626 
627    case OPCODE_XPD:
628       dst = ptn_xpd(b, src);
629       break;
630 
631    case OPCODE_DP2:
632       dst = nir_fdot2(b, src[0], src[1]);
633       break;
634 
635    case OPCODE_DP3:
636       dst = nir_fdot3(b, src[0], src[1]);
637       break;
638 
639    case OPCODE_DP4:
640       dst = nir_fdot4(b, src[0], src[1]);
641       break;
642 
643    case OPCODE_DPH:
644       dst = nir_fdph(b, src[0], src[1]);
645       break;
646 
647    case OPCODE_KIL:
648       ptn_kil(b, src);
649       break;
650 
651    case OPCODE_CMP:
652       dst = nir_bcsel(b, nir_flt_imm(b, src[0], 0.0), src[1], src[2]);
653       break;
654 
655    case OPCODE_SCS:
656       dst = ptn_scs(b, src);
657       break;
658 
659    case OPCODE_SLT:
660       dst = nir_slt(b, src[0], src[1]);
661       break;
662 
663    case OPCODE_SGE:
664       dst = nir_sge(b, src[0], src[1]);
665       break;
666 
667    case OPCODE_TEX:
668    case OPCODE_TXB:
669    case OPCODE_TXD:
670    case OPCODE_TXL:
671    case OPCODE_TXP:
672       dst = ptn_tex(c, src, prog_inst);
673       break;
674 
675    case OPCODE_SWZ:
676       /* Extended swizzles were already handled in ptn_get_src(). */
677       dst = nir_build_alu_src_arr(b, nir_op_mov, src);
678       break;
679 
680    case OPCODE_NOP:
681       break;
682 
683    default:
684       if (op_trans[op] != 0) {
685          dst = nir_build_alu_src_arr(b, op_trans[op], src);
686       } else {
687          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
688          abort();
689       }
690       break;
691    }
692 
693    if (dst == NULL)
694       return;
695 
696    if (dst->num_components == 1)
697       dst = nir_replicate(b, dst, 4);
698 
699    assert(dst->num_components == 4);
700 
701    if (prog_inst->Saturate)
702       dst = nir_fsat(b, dst);
703 
704    const struct prog_dst_register *prog_dst = &prog_inst->DstReg;
705    assert(!prog_dst->RelAddr);
706 
707    nir_def *reg = NULL;
708    unsigned write_mask = prog_dst->WriteMask;
709 
710    switch (prog_dst->File) {
711    case PROGRAM_TEMPORARY:
712       reg = c->temp_regs[prog_dst->Index];
713       break;
714    case PROGRAM_OUTPUT:
715       reg = c->output_regs[prog_dst->Index];
716       break;
717    case PROGRAM_ADDRESS:
718       assert(prog_dst->Index == 0);
719       reg = c->addr_reg;
720 
721       /* The address register (uniquely) is scalar. */
722       dst = nir_channel(b, dst, 0);
723       write_mask &= 1;
724       break;
725    case PROGRAM_UNDEFINED:
726       return;
727    }
728 
729    /* In case there was some silly .y write to the scalar address reg */
730    if (write_mask == 0)
731       return;
732 
733    assert(reg != NULL);
734    nir_build_store_reg(b, dst, reg, .write_mask = write_mask);
735 }
736 
737 /**
738  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
739  * variables at the end of the shader.
740  *
741  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
742  * written, because there's no output load intrinsic, which means we couldn't
743  * handle writemasks.
744  */
745 static void
ptn_add_output_stores(struct ptn_compile * c)746 ptn_add_output_stores(struct ptn_compile *c)
747 {
748    nir_builder *b = &c->build;
749 
750    u_foreach_bit64(slot, b->shader->info.outputs_written) {
751       nir_def *src = nir_load_reg(b, c->output_regs[slot]);
752       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
753           slot == FRAG_RESULT_DEPTH) {
754          /* result.depth has this strange convention of being the .z component of
755           * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
756           * match GLSL's gl_FragDepth and the expectations of most backends.
757           */
758          src = nir_channel(b, src, 2);
759       }
760       if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
761           (slot == VARYING_SLOT_FOGC || slot == VARYING_SLOT_PSIZ)) {
762          /* result.{fogcoord,psiz} is a single component value */
763          src = nir_channel(b, src, 0);
764       }
765 
766       nir_store_output(b, src, nir_imm_int(b, 0),
767                        .io_semantics.location = slot);
768    }
769 }
770 
771 static void
setup_registers_and_variables(struct ptn_compile * c)772 setup_registers_and_variables(struct ptn_compile *c)
773 {
774    nir_builder *b = &c->build;
775 
776    /* Create output registers. */
777    int max_outputs = util_last_bit64(c->prog->info.outputs_written);
778    c->output_regs = rzalloc_array(c, nir_def *, max_outputs);
779 
780    u_foreach_bit64(i, c->prog->info.outputs_written) {
781       /* Since we can't load from outputs in the IR, we make temporaries
782        * for the outputs and emit stores to the real outputs at the end of
783        * the shader.
784        */
785       c->output_regs[i] = nir_decl_reg(b, 4, 32, 0);
786    }
787 
788    /* Create temporary registers. */
789    c->temp_regs = rzalloc_array(c, nir_def *,
790                                 c->prog->arb.NumTemporaries);
791 
792    for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
793       c->temp_regs[i] = nir_decl_reg(b, 4, 32, 0);
794    }
795 
796    /* Create the address register (for ARB_vertex_program). This is uniquely a
797     * scalar, requiring special handling for stores.
798     */
799    c->addr_reg = nir_decl_reg(b, 1, 32, 0);
800 }
801 
802 struct nir_shader *
prog_to_nir(const struct gl_context * ctx,const struct gl_program * prog)803 prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog)
804 {
805    const struct nir_shader_compiler_options *options =
806       st_get_nir_compiler_options(ctx->st, prog->info.stage);
807    struct ptn_compile *c;
808    struct nir_shader *s;
809    gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
810 
811    c = rzalloc(NULL, struct ptn_compile);
812    if (!c)
813       return NULL;
814    c->prog = prog;
815    c->ctx = ctx;
816 
817    c->build = nir_builder_init_simple_shader(stage, options, NULL);
818 
819    /* Copy the shader_info from the gl_program */
820    c->build.shader->info = prog->info;
821 
822    s = c->build.shader;
823 
824    if (prog->Parameters->NumParameters > 0) {
825       const struct glsl_type *type =
826          glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
827       c->parameters =
828          nir_variable_create(s, nir_var_uniform, type,
829                              prog->Parameters->Parameters[0].Name);
830    }
831 
832    setup_registers_and_variables(c);
833    if (unlikely(c->error))
834       goto fail;
835 
836    for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
837       ptn_emit_instruction(c, &prog->arb.Instructions[i]);
838 
839       if (unlikely(c->error))
840          break;
841    }
842 
843    ptn_add_output_stores(c);
844 
845    s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
846    s->info.num_textures = util_last_bit(prog->SamplersUsed);
847    s->info.num_ubos = 0;
848    s->info.num_abos = 0;
849    s->info.num_ssbos = 0;
850    s->info.num_images = 0;
851    s->info.uses_texture_gather = false;
852    s->info.clip_distance_array_size = 0;
853    s->info.cull_distance_array_size = 0;
854    s->info.separate_shader = true;
855    s->info.io_lowered = true;
856    s->info.internal = false;
857 
858    /* ARB_vp: */
859    if (prog->arb.IsPositionInvariant) {
860       NIR_PASS(_, s, st_nir_lower_position_invariant,
861                  ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
862                  prog->Parameters);
863    }
864 
865    /* Add OPTION ARB_fog_exp code */
866    if (prog->arb.Fog)
867       NIR_PASS(_, s, st_nir_lower_fog, prog->arb.Fog, prog->Parameters);
868 
869 fail:
870    if (c->error) {
871       ralloc_free(s);
872       s = NULL;
873    }
874    ralloc_free(c);
875    return s;
876 }
877