1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29
30 #include "main/mtypes.h"
31 #include "main/shader_types.h"
32 #include "util/ralloc.h"
33
34 #include "prog_to_nir.h"
35 #include "prog_instruction.h"
36 #include "prog_parameter.h"
37 #include "prog_print.h"
38 #include "program.h"
39 #include "state_tracker/st_nir.h"
40
41 /**
42 * \file prog_to_nir.c
43 *
44 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
45 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
46 * vertex processing. Full GLSL support should use glsl_to_nir instead.
47 */
48
49 struct ptn_compile {
50 const struct gl_context *ctx;
51 const struct gl_program *prog;
52 nir_builder build;
53 bool error;
54
55 nir_variable *parameters;
56 nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
57 nir_def **output_regs;
58 nir_def **temp_regs;
59
60 nir_def *addr_reg;
61 };
62
63 #define SWIZ(X, Y, Z, W) \
64 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
65 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
66
67 static nir_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)68 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
69 {
70 nir_builder *b = &c->build;
71 nir_alu_src src;
72
73 memset(&src, 0, sizeof(src));
74
75 switch (prog_src->File) {
76 case PROGRAM_UNDEFINED:
77 return nir_imm_float(b, 0.0);
78 case PROGRAM_TEMPORARY:
79 assert(!prog_src->RelAddr && prog_src->Index >= 0);
80 src.src = nir_src_for_ssa(nir_load_reg(b, c->temp_regs[prog_src->Index]));
81 break;
82 case PROGRAM_INPUT: {
83 /* ARB_vertex_program doesn't allow relative addressing on vertex
84 * attributes; ARB_fragment_program has no relative addressing at all.
85 */
86 assert(!prog_src->RelAddr);
87 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
88
89 unsigned slot = prog_src->Index;
90 nir_def *input;
91
92 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
93 if (slot == VARYING_SLOT_POS && c->ctx->Const.GLSLFragCoordIsSysVal) {
94 nir_variable *pos =
95 nir_get_variable_with_location(b->shader, nir_var_system_value,
96 SYSTEM_VALUE_FRAG_COORD,
97 glsl_vec4_type());
98 src.src = nir_src_for_ssa(nir_load_var(b, pos));
99 break;
100 }
101
102 nir_def *baryc = nir_load_barycentric_pixel(b, 32);
103
104 if (slot != VARYING_SLOT_COL0 && slot != VARYING_SLOT_COL1) {
105 nir_intrinsic_set_interp_mode(nir_instr_as_intrinsic(baryc->parent_instr),
106 INTERP_MODE_SMOOTH);
107 }
108
109 input = nir_load_interpolated_input(b, 4, 32, baryc, nir_imm_int(b, 0),
110 .io_semantics.location = slot);
111
112 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
113 * input variable a float, and create a local containing the
114 * full vec4 value.
115 */
116 if (slot == VARYING_SLOT_FOGC) {
117 input = nir_vec4(b, nir_channel(b, input, 0),
118 nir_imm_float(b, 0),
119 nir_imm_float(b, 0),
120 nir_imm_float(b, 1));
121 }
122 } else {
123 input = nir_load_input(b, 4, 32, nir_imm_int(b, 0),
124 .io_semantics.location = slot);
125 }
126
127 src.src = nir_src_for_ssa(input);
128 break;
129 }
130 case PROGRAM_STATE_VAR:
131 case PROGRAM_CONSTANT: {
132 /* We actually want to look at the type in the Parameters list for this,
133 * because it lets us upload constant builtin uniforms as actual
134 * constants.
135 */
136 struct gl_program_parameter_list *plist = c->prog->Parameters;
137 gl_register_file file = prog_src->RelAddr ? prog_src->File :
138 plist->Parameters[prog_src->Index].Type;
139
140 switch (file) {
141 case PROGRAM_CONSTANT:
142 if ((c->prog->arb.IndirectRegisterFiles &
143 (1 << PROGRAM_CONSTANT)) == 0) {
144 unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset;
145 float *v = (float *) plist->ParameterValues + pvo;
146 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
147 break;
148 }
149 FALLTHROUGH;
150 case PROGRAM_STATE_VAR: {
151 assert(c->parameters != NULL);
152
153 nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
154
155 nir_def *index = nir_imm_int(b, prog_src->Index);
156
157 /* Add the address register. Note this is (uniquely) a scalar, so the
158 * component sizes match.
159 */
160 if (prog_src->RelAddr)
161 index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
162
163 deref = nir_build_deref_array(b, deref, index);
164 src.src = nir_src_for_ssa(nir_load_deref(b, deref));
165 break;
166 }
167 default:
168 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
169 _mesa_register_file_name(file), file);
170 abort();
171 }
172 break;
173 }
174 default:
175 fprintf(stderr, "unknown src register file: %s (%d)\n",
176 _mesa_register_file_name(prog_src->File), prog_src->File);
177 abort();
178 }
179
180 nir_def *def;
181 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
182 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
183 /* The simple non-SWZ case. */
184 for (int i = 0; i < 4; i++)
185 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
186
187 def = nir_mov_alu(b, src, 4);
188
189 if (prog_src->Negate)
190 def = nir_fneg(b, def);
191 } else {
192 /* The SWZ instruction allows per-component zero/one swizzles, and also
193 * per-component negation.
194 */
195 nir_def *chans[4];
196 for (int i = 0; i < 4; i++) {
197 int swizzle = GET_SWZ(prog_src->Swizzle, i);
198 if (swizzle == SWIZZLE_ZERO) {
199 chans[i] = nir_imm_float(b, 0.0);
200 } else if (swizzle == SWIZZLE_ONE) {
201 chans[i] = nir_imm_float(b, 1.0);
202 } else {
203 assert(swizzle != SWIZZLE_NIL);
204 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
205 nir_def_init(&mov->instr, &mov->def, 1, 32);
206 mov->src[0] = src;
207 mov->src[0].swizzle[0] = swizzle;
208 nir_builder_instr_insert(b, &mov->instr);
209
210 chans[i] = &mov->def;
211 }
212
213 if (prog_src->Negate & (1 << i))
214 chans[i] = nir_fneg(b, chans[i]);
215 }
216 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
217 }
218
219 return def;
220 }
221
222 /* EXP - Approximate Exponential Base 2
223 * dst.x = 2^{\lfloor src.x\rfloor}
224 * dst.y = src.x - \lfloor src.x\rfloor
225 * dst.z = 2^{src.x}
226 * dst.w = 1.0
227 */
228 static nir_def *
ptn_exp(nir_builder * b,nir_def ** src)229 ptn_exp(nir_builder *b, nir_def **src)
230 {
231 nir_def *srcx = ptn_channel(b, src[0], X);
232
233 return nir_vec4(b, nir_fexp2(b, nir_ffloor(b, srcx)),
234 nir_fsub(b, srcx, nir_ffloor(b, srcx)),
235 nir_fexp2(b, srcx),
236 nir_imm_float(b, 1.0));
237 }
238
239 /* LOG - Approximate Logarithm Base 2
240 * dst.x = \lfloor\log_2{|src.x|}\rfloor
241 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
242 * dst.z = \log_2{|src.x|}
243 * dst.w = 1.0
244 */
245 static nir_def *
ptn_log(nir_builder * b,nir_def ** src)246 ptn_log(nir_builder *b, nir_def **src)
247 {
248 nir_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
249 nir_def *log2 = nir_flog2(b, abs_srcx);
250
251 return nir_vec4(b, nir_ffloor(b, log2),
252 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
253 nir_flog2(b, abs_srcx),
254 nir_imm_float(b, 1.0));
255 }
256
257 /* DST - Distance Vector
258 * dst.x = 1.0
259 * dst.y = src0.y \times src1.y
260 * dst.z = src0.z
261 * dst.w = src1.w
262 */
263 static nir_def *
ptn_dst(nir_builder * b,nir_def ** src)264 ptn_dst(nir_builder *b, nir_def **src)
265 {
266 return nir_vec4(b, nir_imm_float(b, 1.0),
267 nir_fmul(b, ptn_channel(b, src[0], Y),
268 ptn_channel(b, src[1], Y)),
269 ptn_channel(b, src[0], Z),
270 ptn_channel(b, src[1], W));
271 }
272
273 /* LIT - Light Coefficients
274 * dst.x = 1.0
275 * dst.y = max(src.x, 0.0)
276 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
277 * dst.w = 1.0
278 */
279 static nir_def *
ptn_lit(nir_builder * b,nir_def ** src)280 ptn_lit(nir_builder *b, nir_def **src)
281 {
282 nir_def *src0_y = ptn_channel(b, src[0], Y);
283 nir_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
284 nir_imm_float(b, 128.0)),
285 nir_imm_float(b, -128.0));
286 nir_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
287 wclamp);
288
289 nir_def *z = nir_bcsel(b, nir_fle_imm(b, ptn_channel(b, src[0], X), 0.0),
290 nir_imm_float(b, 0.0), pow);
291
292 return nir_vec4(b, nir_imm_float(b, 1.0),
293 nir_fmax(b, ptn_channel(b, src[0], X),
294 nir_imm_float(b, 0.0)),
295 z,
296 nir_imm_float(b, 1.0));
297 }
298
299 /* SCS - Sine Cosine
300 * dst.x = \cos{src.x}
301 * dst.y = \sin{src.x}
302 * dst.z = 0.0
303 * dst.w = 1.0
304 */
305 static nir_def *
ptn_scs(nir_builder * b,nir_def ** src)306 ptn_scs(nir_builder *b, nir_def **src)
307 {
308 return nir_vec4(b, nir_fcos(b, ptn_channel(b, src[0], X)),
309 nir_fsin(b, ptn_channel(b, src[0], X)),
310 nir_imm_float(b, 0.0),
311 nir_imm_float(b, 1.0));
312 }
313
314 static nir_def *
ptn_xpd(nir_builder * b,nir_def ** src)315 ptn_xpd(nir_builder *b, nir_def **src)
316 {
317 nir_def *vec =
318 nir_fsub(b, nir_fmul(b, nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
319 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
320 nir_fmul(b, nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
321 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3)));
322
323 return nir_vec4(b, nir_channel(b, vec, 0),
324 nir_channel(b, vec, 1),
325 nir_channel(b, vec, 2),
326 nir_imm_float(b, 1.0));
327 }
328
329 static void
ptn_kil(nir_builder * b,nir_def ** src)330 ptn_kil(nir_builder *b, nir_def **src)
331 {
332 /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
333 b->exact = true;
334 nir_def *cmp = nir_bany(b, nir_flt_imm(b, src[0], 0.0));
335 b->exact = false;
336
337 nir_discard_if(b, cmp);
338 }
339
340 enum glsl_sampler_dim
_mesa_texture_index_to_sampler_dim(gl_texture_index index,bool * is_array)341 _mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array)
342 {
343 *is_array = false;
344
345 switch (index) {
346 case TEXTURE_2D_MULTISAMPLE_INDEX:
347 return GLSL_SAMPLER_DIM_MS;
348 case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
349 *is_array = true;
350 return GLSL_SAMPLER_DIM_MS;
351 case TEXTURE_BUFFER_INDEX:
352 return GLSL_SAMPLER_DIM_BUF;
353 case TEXTURE_1D_INDEX:
354 return GLSL_SAMPLER_DIM_1D;
355 case TEXTURE_2D_INDEX:
356 return GLSL_SAMPLER_DIM_2D;
357 case TEXTURE_3D_INDEX:
358 return GLSL_SAMPLER_DIM_3D;
359 case TEXTURE_CUBE_INDEX:
360 return GLSL_SAMPLER_DIM_CUBE;
361 case TEXTURE_CUBE_ARRAY_INDEX:
362 *is_array = true;
363 return GLSL_SAMPLER_DIM_CUBE;
364 case TEXTURE_RECT_INDEX:
365 return GLSL_SAMPLER_DIM_RECT;
366 case TEXTURE_1D_ARRAY_INDEX:
367 *is_array = true;
368 return GLSL_SAMPLER_DIM_1D;
369 case TEXTURE_2D_ARRAY_INDEX:
370 *is_array = true;
371 return GLSL_SAMPLER_DIM_2D;
372 case TEXTURE_EXTERNAL_INDEX:
373 return GLSL_SAMPLER_DIM_EXTERNAL;
374 case NUM_TEXTURE_TARGETS:
375 break;
376 }
377 unreachable("unknown texture target");
378 }
379
380 static nir_def *
ptn_tex(struct ptn_compile * c,nir_def ** src,struct prog_instruction * prog_inst)381 ptn_tex(struct ptn_compile *c, nir_def **src,
382 struct prog_instruction *prog_inst)
383 {
384 nir_builder *b = &c->build;
385 nir_tex_instr *instr;
386 nir_texop op;
387 unsigned num_srcs;
388
389 switch (prog_inst->Opcode) {
390 case OPCODE_TEX:
391 op = nir_texop_tex;
392 num_srcs = 1;
393 break;
394 case OPCODE_TXB:
395 op = nir_texop_txb;
396 num_srcs = 2;
397 break;
398 case OPCODE_TXD:
399 op = nir_texop_txd;
400 num_srcs = 3;
401 break;
402 case OPCODE_TXL:
403 op = nir_texop_txl;
404 num_srcs = 2;
405 break;
406 case OPCODE_TXP:
407 op = nir_texop_tex;
408 num_srcs = 2;
409 break;
410 default:
411 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
412 abort();
413 }
414
415 /* Deref sources */
416 num_srcs += 2;
417
418 if (prog_inst->TexShadow)
419 num_srcs++;
420
421 instr = nir_tex_instr_create(b->shader, num_srcs);
422 instr->op = op;
423 instr->dest_type = nir_type_float32;
424 instr->is_shadow = prog_inst->TexShadow;
425
426 bool is_array;
427 instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array);
428
429 instr->coord_components =
430 glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
431
432 nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
433 if (!var) {
434 const struct glsl_type *type =
435 glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
436 char samplerName[20];
437 snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
438 var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
439 var->data.binding = prog_inst->TexSrcUnit;
440 var->data.explicit_binding = true;
441 c->sampler_vars[prog_inst->TexSrcUnit] = var;
442 }
443
444 nir_deref_instr *deref = nir_build_deref_var(b, var);
445
446 unsigned src_number = 0;
447
448 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
449 &deref->def);
450 src_number++;
451 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
452 &deref->def);
453 src_number++;
454
455 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_coord,
456 nir_trim_vector(b, src[0],
457 instr->coord_components));
458 src_number++;
459
460 if (prog_inst->Opcode == OPCODE_TXP) {
461 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
462 ptn_channel(b, src[0], W));
463 src_number++;
464 }
465
466 if (prog_inst->Opcode == OPCODE_TXB) {
467 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
468 ptn_channel(b, src[0], W));
469 src_number++;
470 }
471
472 if (prog_inst->Opcode == OPCODE_TXL) {
473 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
474 ptn_channel(b, src[0], W));
475 src_number++;
476 }
477
478 if (instr->is_shadow) {
479 if (instr->coord_components < 3)
480 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
481 else
482 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
483
484 instr->src[src_number].src_type = nir_tex_src_comparator;
485 src_number++;
486 }
487
488 assert(src_number == num_srcs);
489
490 nir_def_init(&instr->instr, &instr->def, 4, 32);
491 nir_builder_instr_insert(b, &instr->instr);
492
493 return &instr->def;
494 }
495
496 static const nir_op op_trans[MAX_OPCODE] = {
497 [OPCODE_NOP] = 0,
498 [OPCODE_ABS] = nir_op_fabs,
499 [OPCODE_ADD] = nir_op_fadd,
500 [OPCODE_ARL] = 0,
501 [OPCODE_CMP] = 0,
502 [OPCODE_COS] = 0,
503 [OPCODE_DDX] = 0,
504 [OPCODE_DDY] = 0,
505 [OPCODE_DP2] = 0,
506 [OPCODE_DP3] = 0,
507 [OPCODE_DP4] = 0,
508 [OPCODE_DPH] = 0,
509 [OPCODE_DST] = 0,
510 [OPCODE_END] = 0,
511 [OPCODE_EX2] = 0,
512 [OPCODE_EXP] = 0,
513 [OPCODE_FLR] = nir_op_ffloor,
514 [OPCODE_FRC] = nir_op_ffract,
515 [OPCODE_LG2] = 0,
516 [OPCODE_LIT] = 0,
517 [OPCODE_LOG] = 0,
518 [OPCODE_LRP] = 0,
519 [OPCODE_MAD] = 0,
520 [OPCODE_MAX] = nir_op_fmax,
521 [OPCODE_MIN] = nir_op_fmin,
522 [OPCODE_MOV] = nir_op_mov,
523 [OPCODE_MUL] = nir_op_fmul,
524 [OPCODE_POW] = 0,
525 [OPCODE_RCP] = 0,
526
527 [OPCODE_RSQ] = 0,
528 [OPCODE_SCS] = 0,
529 [OPCODE_SGE] = 0,
530 [OPCODE_SIN] = 0,
531 [OPCODE_SLT] = 0,
532 [OPCODE_SSG] = nir_op_fsign,
533 [OPCODE_SUB] = nir_op_fsub,
534 [OPCODE_SWZ] = 0,
535 [OPCODE_TEX] = 0,
536 [OPCODE_TXB] = 0,
537 [OPCODE_TXD] = 0,
538 [OPCODE_TXL] = 0,
539 [OPCODE_TXP] = 0,
540 [OPCODE_XPD] = 0,
541 };
542
543 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)544 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
545 {
546 nir_builder *b = &c->build;
547 unsigned i;
548 const unsigned op = prog_inst->Opcode;
549
550 if (op == OPCODE_END)
551 return;
552
553 nir_def *src[3];
554 for (i = 0; i < 3; i++) {
555 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
556 }
557
558 nir_def *dst = NULL;
559 if (c->error)
560 return;
561
562 switch (op) {
563 case OPCODE_DDX:
564 dst = nir_ddx(b, src[0]);
565 break;
566
567 case OPCODE_DDY:
568 dst = nir_ddy(b, src[0]);
569 break;
570
571 case OPCODE_RSQ:
572 dst = nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)));
573 break;
574
575 case OPCODE_RCP:
576 dst = nir_frcp(b, ptn_channel(b, src[0], X));
577 break;
578
579 case OPCODE_EX2:
580 dst = nir_fexp2(b, ptn_channel(b, src[0], X));
581 break;
582
583 case OPCODE_LG2:
584 dst = nir_flog2(b, ptn_channel(b, src[0], X));
585 break;
586
587 case OPCODE_POW:
588 dst = nir_fpow(b, ptn_channel(b, src[0], X), ptn_channel(b, src[1], X));
589 break;
590
591 case OPCODE_COS:
592 dst = nir_fcos(b, ptn_channel(b, src[0], X));
593 break;
594
595 case OPCODE_SIN:
596 dst = nir_fsin(b, ptn_channel(b, src[0], X));
597 break;
598
599 case OPCODE_ARL:
600 dst = nir_f2i32(b, nir_ffloor(b, src[0]));
601 break;
602
603 case OPCODE_EXP:
604 dst = ptn_exp(b, src);
605 break;
606
607 case OPCODE_LOG:
608 dst = ptn_log(b, src);
609 break;
610
611 case OPCODE_LRP:
612 dst = nir_flrp(b, src[2], src[1], src[0]);
613 break;
614
615 case OPCODE_MAD:
616 dst = nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]);
617 break;
618
619 case OPCODE_DST:
620 dst = ptn_dst(b, src);
621 break;
622
623 case OPCODE_LIT:
624 dst = ptn_lit(b, src);
625 break;
626
627 case OPCODE_XPD:
628 dst = ptn_xpd(b, src);
629 break;
630
631 case OPCODE_DP2:
632 dst = nir_fdot2(b, src[0], src[1]);
633 break;
634
635 case OPCODE_DP3:
636 dst = nir_fdot3(b, src[0], src[1]);
637 break;
638
639 case OPCODE_DP4:
640 dst = nir_fdot4(b, src[0], src[1]);
641 break;
642
643 case OPCODE_DPH:
644 dst = nir_fdph(b, src[0], src[1]);
645 break;
646
647 case OPCODE_KIL:
648 ptn_kil(b, src);
649 break;
650
651 case OPCODE_CMP:
652 dst = nir_bcsel(b, nir_flt_imm(b, src[0], 0.0), src[1], src[2]);
653 break;
654
655 case OPCODE_SCS:
656 dst = ptn_scs(b, src);
657 break;
658
659 case OPCODE_SLT:
660 dst = nir_slt(b, src[0], src[1]);
661 break;
662
663 case OPCODE_SGE:
664 dst = nir_sge(b, src[0], src[1]);
665 break;
666
667 case OPCODE_TEX:
668 case OPCODE_TXB:
669 case OPCODE_TXD:
670 case OPCODE_TXL:
671 case OPCODE_TXP:
672 dst = ptn_tex(c, src, prog_inst);
673 break;
674
675 case OPCODE_SWZ:
676 /* Extended swizzles were already handled in ptn_get_src(). */
677 dst = nir_build_alu_src_arr(b, nir_op_mov, src);
678 break;
679
680 case OPCODE_NOP:
681 break;
682
683 default:
684 if (op_trans[op] != 0) {
685 dst = nir_build_alu_src_arr(b, op_trans[op], src);
686 } else {
687 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
688 abort();
689 }
690 break;
691 }
692
693 if (dst == NULL)
694 return;
695
696 if (dst->num_components == 1)
697 dst = nir_replicate(b, dst, 4);
698
699 assert(dst->num_components == 4);
700
701 if (prog_inst->Saturate)
702 dst = nir_fsat(b, dst);
703
704 const struct prog_dst_register *prog_dst = &prog_inst->DstReg;
705 assert(!prog_dst->RelAddr);
706
707 nir_def *reg = NULL;
708 unsigned write_mask = prog_dst->WriteMask;
709
710 switch (prog_dst->File) {
711 case PROGRAM_TEMPORARY:
712 reg = c->temp_regs[prog_dst->Index];
713 break;
714 case PROGRAM_OUTPUT:
715 reg = c->output_regs[prog_dst->Index];
716 break;
717 case PROGRAM_ADDRESS:
718 assert(prog_dst->Index == 0);
719 reg = c->addr_reg;
720
721 /* The address register (uniquely) is scalar. */
722 dst = nir_channel(b, dst, 0);
723 write_mask &= 1;
724 break;
725 case PROGRAM_UNDEFINED:
726 return;
727 }
728
729 /* In case there was some silly .y write to the scalar address reg */
730 if (write_mask == 0)
731 return;
732
733 assert(reg != NULL);
734 nir_build_store_reg(b, dst, reg, .write_mask = write_mask);
735 }
736
737 /**
738 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
739 * variables at the end of the shader.
740 *
741 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
742 * written, because there's no output load intrinsic, which means we couldn't
743 * handle writemasks.
744 */
745 static void
ptn_add_output_stores(struct ptn_compile * c)746 ptn_add_output_stores(struct ptn_compile *c)
747 {
748 nir_builder *b = &c->build;
749
750 u_foreach_bit64(slot, b->shader->info.outputs_written) {
751 nir_def *src = nir_load_reg(b, c->output_regs[slot]);
752 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
753 slot == FRAG_RESULT_DEPTH) {
754 /* result.depth has this strange convention of being the .z component of
755 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
756 * match GLSL's gl_FragDepth and the expectations of most backends.
757 */
758 src = nir_channel(b, src, 2);
759 }
760 if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
761 (slot == VARYING_SLOT_FOGC || slot == VARYING_SLOT_PSIZ)) {
762 /* result.{fogcoord,psiz} is a single component value */
763 src = nir_channel(b, src, 0);
764 }
765
766 nir_store_output(b, src, nir_imm_int(b, 0),
767 .io_semantics.location = slot);
768 }
769 }
770
771 static void
setup_registers_and_variables(struct ptn_compile * c)772 setup_registers_and_variables(struct ptn_compile *c)
773 {
774 nir_builder *b = &c->build;
775
776 /* Create output registers. */
777 int max_outputs = util_last_bit64(c->prog->info.outputs_written);
778 c->output_regs = rzalloc_array(c, nir_def *, max_outputs);
779
780 u_foreach_bit64(i, c->prog->info.outputs_written) {
781 /* Since we can't load from outputs in the IR, we make temporaries
782 * for the outputs and emit stores to the real outputs at the end of
783 * the shader.
784 */
785 c->output_regs[i] = nir_decl_reg(b, 4, 32, 0);
786 }
787
788 /* Create temporary registers. */
789 c->temp_regs = rzalloc_array(c, nir_def *,
790 c->prog->arb.NumTemporaries);
791
792 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
793 c->temp_regs[i] = nir_decl_reg(b, 4, 32, 0);
794 }
795
796 /* Create the address register (for ARB_vertex_program). This is uniquely a
797 * scalar, requiring special handling for stores.
798 */
799 c->addr_reg = nir_decl_reg(b, 1, 32, 0);
800 }
801
802 struct nir_shader *
prog_to_nir(const struct gl_context * ctx,const struct gl_program * prog)803 prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog)
804 {
805 const struct nir_shader_compiler_options *options =
806 st_get_nir_compiler_options(ctx->st, prog->info.stage);
807 struct ptn_compile *c;
808 struct nir_shader *s;
809 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
810
811 c = rzalloc(NULL, struct ptn_compile);
812 if (!c)
813 return NULL;
814 c->prog = prog;
815 c->ctx = ctx;
816
817 c->build = nir_builder_init_simple_shader(stage, options, NULL);
818
819 /* Copy the shader_info from the gl_program */
820 c->build.shader->info = prog->info;
821
822 s = c->build.shader;
823
824 if (prog->Parameters->NumParameters > 0) {
825 const struct glsl_type *type =
826 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
827 c->parameters =
828 nir_variable_create(s, nir_var_uniform, type,
829 prog->Parameters->Parameters[0].Name);
830 }
831
832 setup_registers_and_variables(c);
833 if (unlikely(c->error))
834 goto fail;
835
836 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
837 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
838
839 if (unlikely(c->error))
840 break;
841 }
842
843 ptn_add_output_stores(c);
844
845 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
846 s->info.num_textures = util_last_bit(prog->SamplersUsed);
847 s->info.num_ubos = 0;
848 s->info.num_abos = 0;
849 s->info.num_ssbos = 0;
850 s->info.num_images = 0;
851 s->info.uses_texture_gather = false;
852 s->info.clip_distance_array_size = 0;
853 s->info.cull_distance_array_size = 0;
854 s->info.separate_shader = true;
855 s->info.io_lowered = true;
856 s->info.internal = false;
857
858 /* ARB_vp: */
859 if (prog->arb.IsPositionInvariant) {
860 NIR_PASS(_, s, st_nir_lower_position_invariant,
861 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
862 prog->Parameters);
863 }
864
865 /* Add OPTION ARB_fog_exp code */
866 if (prog->arb.Fog)
867 NIR_PASS(_, s, st_nir_lower_fog, prog->arb.Fog, prog->Parameters);
868
869 fail:
870 if (c->error) {
871 ralloc_free(s);
872 s = NULL;
873 }
874 ralloc_free(c);
875 return s;
876 }
877