1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29
30 #include "main/mtypes.h"
31 #include "main/shader_types.h"
32 #include "util/ralloc.h"
33
34 #include "prog_to_nir.h"
35 #include "prog_instruction.h"
36 #include "prog_parameter.h"
37 #include "prog_print.h"
38 #include "program.h"
39
40 /**
41 * \file prog_to_nir.c
42 *
43 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
44 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
45 * vertex processing. Full GLSL support should use glsl_to_nir instead.
46 */
47
48 struct ptn_compile {
49 const struct gl_context *ctx;
50 const struct gl_program *prog;
51 nir_builder build;
52 bool error;
53
54 nir_variable *parameters;
55 nir_variable *input_vars[VARYING_SLOT_MAX];
56 nir_variable *output_vars[VARYING_SLOT_MAX];
57 nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
58 nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
59 nir_register **output_regs;
60 nir_register **temp_regs;
61
62 nir_register *addr_reg;
63 };
64
65 #define SWIZ(X, Y, Z, W) \
66 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
67 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
68
69 static nir_ssa_def *
ptn_src_for_dest(struct ptn_compile * c,nir_alu_dest * dest)70 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
71 {
72 nir_builder *b = &c->build;
73
74 nir_alu_src src;
75 memset(&src, 0, sizeof(src));
76
77 if (dest->dest.is_ssa)
78 src.src = nir_src_for_ssa(&dest->dest.ssa);
79 else {
80 assert(!dest->dest.reg.indirect);
81 src.src = nir_src_for_reg(dest->dest.reg.reg);
82 src.src.reg.base_offset = dest->dest.reg.base_offset;
83 }
84
85 for (int i = 0; i < 4; i++)
86 src.swizzle[i] = i;
87
88 return nir_mov_alu(b, src, 4);
89 }
90
91 static nir_alu_dest
ptn_get_dest(struct ptn_compile * c,const struct prog_dst_register * prog_dst)92 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
93 {
94 nir_alu_dest dest;
95
96 memset(&dest, 0, sizeof(dest));
97
98 switch (prog_dst->File) {
99 case PROGRAM_TEMPORARY:
100 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
101 break;
102 case PROGRAM_OUTPUT:
103 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
104 break;
105 case PROGRAM_ADDRESS:
106 assert(prog_dst->Index == 0);
107 dest.dest.reg.reg = c->addr_reg;
108 break;
109 case PROGRAM_UNDEFINED:
110 break;
111 }
112
113 dest.write_mask = prog_dst->WriteMask;
114 dest.saturate = false;
115
116 assert(!prog_dst->RelAddr);
117
118 return dest;
119 }
120
121 static nir_ssa_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)122 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
123 {
124 nir_builder *b = &c->build;
125 nir_alu_src src;
126
127 memset(&src, 0, sizeof(src));
128
129 switch (prog_src->File) {
130 case PROGRAM_UNDEFINED:
131 return nir_imm_float(b, 0.0);
132 case PROGRAM_TEMPORARY:
133 assert(!prog_src->RelAddr && prog_src->Index >= 0);
134 src.src.reg.reg = c->temp_regs[prog_src->Index];
135 break;
136 case PROGRAM_INPUT: {
137 /* ARB_vertex_program doesn't allow relative addressing on vertex
138 * attributes; ARB_fragment_program has no relative addressing at all.
139 */
140 assert(!prog_src->RelAddr);
141
142 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
143
144 nir_variable *var = c->input_vars[prog_src->Index];
145 src.src = nir_src_for_ssa(nir_load_var(b, var));
146 break;
147 }
148 case PROGRAM_SYSTEM_VALUE: {
149 assert(!prog_src->RelAddr);
150
151 assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
152
153 nir_variable *var = c->sysval_vars[prog_src->Index];
154 src.src = nir_src_for_ssa(nir_load_var(b, var));
155 break;
156 }
157 case PROGRAM_STATE_VAR:
158 case PROGRAM_CONSTANT: {
159 /* We actually want to look at the type in the Parameters list for this,
160 * because it lets us upload constant builtin uniforms as actual
161 * constants.
162 */
163 struct gl_program_parameter_list *plist = c->prog->Parameters;
164 gl_register_file file = prog_src->RelAddr ? prog_src->File :
165 plist->Parameters[prog_src->Index].Type;
166
167 switch (file) {
168 case PROGRAM_CONSTANT:
169 if ((c->prog->arb.IndirectRegisterFiles &
170 (1 << PROGRAM_CONSTANT)) == 0) {
171 unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset;
172 float *v = (float *) plist->ParameterValues + pvo;
173 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
174 break;
175 }
176 FALLTHROUGH;
177 case PROGRAM_STATE_VAR: {
178 assert(c->parameters != NULL);
179
180 nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
181
182 nir_ssa_def *index = nir_imm_int(b, prog_src->Index);
183 if (prog_src->RelAddr)
184 index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
185 deref = nir_build_deref_array(b, deref, nir_channel(b, index, 0));
186
187 src.src = nir_src_for_ssa(nir_load_deref(b, deref));
188 break;
189 }
190 default:
191 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
192 _mesa_register_file_name(file), file);
193 abort();
194 }
195 break;
196 }
197 default:
198 fprintf(stderr, "unknown src register file: %s (%d)\n",
199 _mesa_register_file_name(prog_src->File), prog_src->File);
200 abort();
201 }
202
203 nir_ssa_def *def;
204 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
205 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
206 /* The simple non-SWZ case. */
207 for (int i = 0; i < 4; i++)
208 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
209
210 def = nir_mov_alu(b, src, 4);
211
212 if (prog_src->Negate)
213 def = nir_fneg(b, def);
214 } else {
215 /* The SWZ instruction allows per-component zero/one swizzles, and also
216 * per-component negation.
217 */
218 nir_ssa_def *chans[4];
219 for (int i = 0; i < 4; i++) {
220 int swizzle = GET_SWZ(prog_src->Swizzle, i);
221 if (swizzle == SWIZZLE_ZERO) {
222 chans[i] = nir_imm_float(b, 0.0);
223 } else if (swizzle == SWIZZLE_ONE) {
224 chans[i] = nir_imm_float(b, 1.0);
225 } else {
226 assert(swizzle != SWIZZLE_NIL);
227 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
228 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
229 mov->dest.write_mask = 0x1;
230 mov->src[0] = src;
231 mov->src[0].swizzle[0] = swizzle;
232 nir_builder_instr_insert(b, &mov->instr);
233
234 chans[i] = &mov->dest.dest.ssa;
235 }
236
237 if (prog_src->Negate & (1 << i))
238 chans[i] = nir_fneg(b, chans[i]);
239 }
240 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
241 }
242
243 return def;
244 }
245
246 static void
ptn_alu(nir_builder * b,nir_op op,nir_alu_dest dest,nir_ssa_def ** src)247 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
248 {
249 unsigned num_srcs = nir_op_infos[op].num_inputs;
250 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
251 unsigned i;
252
253 for (i = 0; i < num_srcs; i++)
254 instr->src[i].src = nir_src_for_ssa(src[i]);
255
256 instr->dest = dest;
257 nir_builder_instr_insert(b, &instr->instr);
258 }
259
260 static void
ptn_move_dest_masked(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def,unsigned write_mask)261 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
262 nir_ssa_def *def, unsigned write_mask)
263 {
264 if (!(dest.write_mask & write_mask))
265 return;
266
267 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
268 if (!mov)
269 return;
270
271 mov->dest = dest;
272 mov->dest.write_mask &= write_mask;
273 mov->src[0].src = nir_src_for_ssa(def);
274 for (unsigned i = def->num_components; i < 4; i++)
275 mov->src[0].swizzle[i] = def->num_components - 1;
276 nir_builder_instr_insert(b, &mov->instr);
277 }
278
279 static void
ptn_move_dest(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def)280 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
281 {
282 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
283 }
284
285 static void
ptn_arl(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)286 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
287 {
288 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
289 }
290
291 /* EXP - Approximate Exponential Base 2
292 * dst.x = 2^{\lfloor src.x\rfloor}
293 * dst.y = src.x - \lfloor src.x\rfloor
294 * dst.z = 2^{src.x}
295 * dst.w = 1.0
296 */
297 static void
ptn_exp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)298 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
299 {
300 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
301
302 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
303 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
304 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
305 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
306 }
307
308 /* LOG - Approximate Logarithm Base 2
309 * dst.x = \lfloor\log_2{|src.x|}\rfloor
310 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
311 * dst.z = \log_2{|src.x|}
312 * dst.w = 1.0
313 */
314 static void
ptn_log(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)315 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
316 {
317 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
318 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
319 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
320
321 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
322 ptn_move_dest_masked(b, dest,
323 nir_fmul(b, abs_srcx,
324 nir_fexp2(b, nir_fneg(b, floor_log2))),
325 WRITEMASK_Y);
326 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
327 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
328 }
329
330 /* DST - Distance Vector
331 * dst.x = 1.0
332 * dst.y = src0.y \times src1.y
333 * dst.z = src0.z
334 * dst.w = src1.w
335 */
336 static void
ptn_dst(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)337 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
338 {
339 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
340 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
341 ptn_move_dest_masked(b, dest, nir_mov(b, src[0]), WRITEMASK_Z);
342 ptn_move_dest_masked(b, dest, nir_mov(b, src[1]), WRITEMASK_W);
343 }
344
345 /* LIT - Light Coefficients
346 * dst.x = 1.0
347 * dst.y = max(src.x, 0.0)
348 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
349 * dst.w = 1.0
350 */
351 static void
ptn_lit(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)352 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
353 {
354 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
355
356 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
357 nir_imm_float(b, 0.0)), WRITEMASK_Y);
358
359 if (dest.write_mask & WRITEMASK_Z) {
360 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
361 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
362 nir_imm_float(b, 128.0)),
363 nir_imm_float(b, -128.0));
364 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
365 wclamp);
366
367 nir_ssa_def *z = nir_bcsel(b,
368 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
369 nir_imm_float(b, 0.0),
370 pow);
371
372 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
373 }
374 }
375
376 /* SCS - Sine Cosine
377 * dst.x = \cos{src.x}
378 * dst.y = \sin{src.x}
379 * dst.z = 0.0
380 * dst.w = 1.0
381 */
382 static void
ptn_scs(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)383 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
384 {
385 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
386 WRITEMASK_X);
387 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
388 WRITEMASK_Y);
389 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
390 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
391 }
392
393 static void
ptn_slt(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)394 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
395 {
396 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
397 }
398
399 static void
ptn_sge(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)400 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
401 {
402 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
403 }
404
405 static void
ptn_xpd(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)406 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
407 {
408 ptn_move_dest_masked(b, dest,
409 nir_fsub(b,
410 nir_fmul(b,
411 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
412 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
413 nir_fmul(b,
414 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
415 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3))),
416 WRITEMASK_XYZ);
417 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
418 }
419
420 static void
ptn_dp2(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)421 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
422 {
423 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
424 }
425
426 static void
ptn_dp3(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)427 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
428 {
429 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
430 }
431
432 static void
ptn_dp4(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)433 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
434 {
435 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
436 }
437
438 static void
ptn_dph(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)439 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
440 {
441 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
442 }
443
444 static void
ptn_cmp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)445 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
446 {
447 ptn_move_dest(b, dest, nir_bcsel(b,
448 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
449 src[1], src[2]));
450 }
451
452 static void
ptn_lrp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)453 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
454 {
455 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
456 }
457
458 static void
ptn_kil(nir_builder * b,nir_ssa_def ** src)459 ptn_kil(nir_builder *b, nir_ssa_def **src)
460 {
461 /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
462 b->exact = true;
463 nir_ssa_def *cmp = nir_bany(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
464 b->exact = false;
465
466 nir_discard_if(b, cmp);
467 }
468
469 enum glsl_sampler_dim
_mesa_texture_index_to_sampler_dim(gl_texture_index index,bool * is_array)470 _mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array)
471 {
472 *is_array = false;
473
474 switch (index) {
475 case TEXTURE_2D_MULTISAMPLE_INDEX:
476 return GLSL_SAMPLER_DIM_MS;
477 case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
478 *is_array = true;
479 return GLSL_SAMPLER_DIM_MS;
480 case TEXTURE_BUFFER_INDEX:
481 return GLSL_SAMPLER_DIM_BUF;
482 case TEXTURE_1D_INDEX:
483 return GLSL_SAMPLER_DIM_1D;
484 case TEXTURE_2D_INDEX:
485 return GLSL_SAMPLER_DIM_2D;
486 case TEXTURE_3D_INDEX:
487 return GLSL_SAMPLER_DIM_3D;
488 case TEXTURE_CUBE_INDEX:
489 return GLSL_SAMPLER_DIM_CUBE;
490 case TEXTURE_CUBE_ARRAY_INDEX:
491 *is_array = true;
492 return GLSL_SAMPLER_DIM_CUBE;
493 case TEXTURE_RECT_INDEX:
494 return GLSL_SAMPLER_DIM_RECT;
495 case TEXTURE_1D_ARRAY_INDEX:
496 *is_array = true;
497 return GLSL_SAMPLER_DIM_1D;
498 case TEXTURE_2D_ARRAY_INDEX:
499 *is_array = true;
500 return GLSL_SAMPLER_DIM_2D;
501 case TEXTURE_EXTERNAL_INDEX:
502 return GLSL_SAMPLER_DIM_EXTERNAL;
503 case NUM_TEXTURE_TARGETS:
504 break;
505 }
506 unreachable("unknown texture target");
507 }
508
509 static void
ptn_tex(struct ptn_compile * c,nir_alu_dest dest,nir_ssa_def ** src,struct prog_instruction * prog_inst)510 ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
511 struct prog_instruction *prog_inst)
512 {
513 nir_builder *b = &c->build;
514 nir_tex_instr *instr;
515 nir_texop op;
516 unsigned num_srcs;
517
518 switch (prog_inst->Opcode) {
519 case OPCODE_TEX:
520 op = nir_texop_tex;
521 num_srcs = 1;
522 break;
523 case OPCODE_TXB:
524 op = nir_texop_txb;
525 num_srcs = 2;
526 break;
527 case OPCODE_TXD:
528 op = nir_texop_txd;
529 num_srcs = 3;
530 break;
531 case OPCODE_TXL:
532 op = nir_texop_txl;
533 num_srcs = 2;
534 break;
535 case OPCODE_TXP:
536 op = nir_texop_tex;
537 num_srcs = 2;
538 break;
539 default:
540 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
541 abort();
542 }
543
544 /* Deref sources */
545 num_srcs += 2;
546
547 if (prog_inst->TexShadow)
548 num_srcs++;
549
550 instr = nir_tex_instr_create(b->shader, num_srcs);
551 instr->op = op;
552 instr->dest_type = nir_type_float32;
553 instr->is_shadow = prog_inst->TexShadow;
554
555 bool is_array;
556 instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array);
557
558 instr->coord_components =
559 glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
560
561 nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
562 if (!var) {
563 const struct glsl_type *type =
564 glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
565 char samplerName[20];
566 snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
567 var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
568 var->data.binding = prog_inst->TexSrcUnit;
569 var->data.explicit_binding = true;
570 c->sampler_vars[prog_inst->TexSrcUnit] = var;
571 }
572
573 nir_deref_instr *deref = nir_build_deref_var(b, var);
574
575 unsigned src_number = 0;
576
577 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
578 instr->src[src_number].src_type = nir_tex_src_texture_deref;
579 src_number++;
580 instr->src[src_number].src = nir_src_for_ssa(&deref->dest.ssa);
581 instr->src[src_number].src_type = nir_tex_src_sampler_deref;
582 src_number++;
583
584 instr->src[src_number].src =
585 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
586 instr->coord_components));
587 instr->src[src_number].src_type = nir_tex_src_coord;
588 src_number++;
589
590 if (prog_inst->Opcode == OPCODE_TXP) {
591 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
592 instr->src[src_number].src_type = nir_tex_src_projector;
593 src_number++;
594 }
595
596 if (prog_inst->Opcode == OPCODE_TXB) {
597 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
598 instr->src[src_number].src_type = nir_tex_src_bias;
599 src_number++;
600 }
601
602 if (prog_inst->Opcode == OPCODE_TXL) {
603 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
604 instr->src[src_number].src_type = nir_tex_src_lod;
605 src_number++;
606 }
607
608 if (instr->is_shadow) {
609 if (instr->coord_components < 3)
610 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
611 else
612 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
613
614 instr->src[src_number].src_type = nir_tex_src_comparator;
615 src_number++;
616 }
617
618 assert(src_number == num_srcs);
619
620 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
621 nir_builder_instr_insert(b, &instr->instr);
622
623 /* Resolve the writemask on the texture op. */
624 ptn_move_dest(b, dest, &instr->dest.ssa);
625 }
626
627 static const nir_op op_trans[MAX_OPCODE] = {
628 [OPCODE_NOP] = 0,
629 [OPCODE_ABS] = nir_op_fabs,
630 [OPCODE_ADD] = nir_op_fadd,
631 [OPCODE_ARL] = 0,
632 [OPCODE_CMP] = 0,
633 [OPCODE_COS] = 0,
634 [OPCODE_DDX] = nir_op_fddx,
635 [OPCODE_DDY] = nir_op_fddy,
636 [OPCODE_DP2] = 0,
637 [OPCODE_DP3] = 0,
638 [OPCODE_DP4] = 0,
639 [OPCODE_DPH] = 0,
640 [OPCODE_DST] = 0,
641 [OPCODE_END] = 0,
642 [OPCODE_EX2] = 0,
643 [OPCODE_EXP] = 0,
644 [OPCODE_FLR] = nir_op_ffloor,
645 [OPCODE_FRC] = nir_op_ffract,
646 [OPCODE_LG2] = 0,
647 [OPCODE_LIT] = 0,
648 [OPCODE_LOG] = 0,
649 [OPCODE_LRP] = 0,
650 [OPCODE_MAD] = 0,
651 [OPCODE_MAX] = nir_op_fmax,
652 [OPCODE_MIN] = nir_op_fmin,
653 [OPCODE_MOV] = nir_op_mov,
654 [OPCODE_MUL] = nir_op_fmul,
655 [OPCODE_POW] = 0,
656 [OPCODE_RCP] = 0,
657
658 [OPCODE_RSQ] = 0,
659 [OPCODE_SCS] = 0,
660 [OPCODE_SGE] = 0,
661 [OPCODE_SIN] = 0,
662 [OPCODE_SLT] = 0,
663 [OPCODE_SSG] = nir_op_fsign,
664 [OPCODE_SUB] = nir_op_fsub,
665 [OPCODE_SWZ] = 0,
666 [OPCODE_TEX] = 0,
667 [OPCODE_TRUNC] = nir_op_ftrunc,
668 [OPCODE_TXB] = 0,
669 [OPCODE_TXD] = 0,
670 [OPCODE_TXL] = 0,
671 [OPCODE_TXP] = 0,
672 [OPCODE_XPD] = 0,
673 };
674
675 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)676 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
677 {
678 nir_builder *b = &c->build;
679 unsigned i;
680 const unsigned op = prog_inst->Opcode;
681
682 if (op == OPCODE_END)
683 return;
684
685 nir_ssa_def *src[3];
686 for (i = 0; i < 3; i++) {
687 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
688 }
689 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
690 if (c->error)
691 return;
692
693 switch (op) {
694 case OPCODE_RSQ:
695 ptn_move_dest(b, dest,
696 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
697 break;
698
699 case OPCODE_RCP:
700 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
701 break;
702
703 case OPCODE_EX2:
704 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
705 break;
706
707 case OPCODE_LG2:
708 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
709 break;
710
711 case OPCODE_POW:
712 ptn_move_dest(b, dest, nir_fpow(b,
713 ptn_channel(b, src[0], X),
714 ptn_channel(b, src[1], X)));
715 break;
716
717 case OPCODE_COS:
718 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
719 break;
720
721 case OPCODE_SIN:
722 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
723 break;
724
725 case OPCODE_ARL:
726 ptn_arl(b, dest, src);
727 break;
728
729 case OPCODE_EXP:
730 ptn_exp(b, dest, src);
731 break;
732
733 case OPCODE_LOG:
734 ptn_log(b, dest, src);
735 break;
736
737 case OPCODE_LRP:
738 ptn_lrp(b, dest, src);
739 break;
740
741 case OPCODE_MAD:
742 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
743 break;
744
745 case OPCODE_DST:
746 ptn_dst(b, dest, src);
747 break;
748
749 case OPCODE_LIT:
750 ptn_lit(b, dest, src);
751 break;
752
753 case OPCODE_XPD:
754 ptn_xpd(b, dest, src);
755 break;
756
757 case OPCODE_DP2:
758 ptn_dp2(b, dest, src);
759 break;
760
761 case OPCODE_DP3:
762 ptn_dp3(b, dest, src);
763 break;
764
765 case OPCODE_DP4:
766 ptn_dp4(b, dest, src);
767 break;
768
769 case OPCODE_DPH:
770 ptn_dph(b, dest, src);
771 break;
772
773 case OPCODE_KIL:
774 ptn_kil(b, src);
775 break;
776
777 case OPCODE_CMP:
778 ptn_cmp(b, dest, src);
779 break;
780
781 case OPCODE_SCS:
782 ptn_scs(b, dest, src);
783 break;
784
785 case OPCODE_SLT:
786 ptn_slt(b, dest, src);
787 break;
788
789 case OPCODE_SGE:
790 ptn_sge(b, dest, src);
791 break;
792
793 case OPCODE_TEX:
794 case OPCODE_TXB:
795 case OPCODE_TXD:
796 case OPCODE_TXL:
797 case OPCODE_TXP:
798 ptn_tex(c, dest, src, prog_inst);
799 break;
800
801 case OPCODE_SWZ:
802 /* Extended swizzles were already handled in ptn_get_src(). */
803 ptn_alu(b, nir_op_mov, dest, src);
804 break;
805
806 case OPCODE_NOP:
807 break;
808
809 default:
810 if (op_trans[op] != 0) {
811 ptn_alu(b, op_trans[op], dest, src);
812 } else {
813 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
814 abort();
815 }
816 break;
817 }
818
819 if (prog_inst->Saturate) {
820 assert(prog_inst->Saturate);
821 assert(!dest.dest.is_ssa);
822 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
823 }
824 }
825
826 /**
827 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
828 * variables at the end of the shader.
829 *
830 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
831 * written, because there's no output load intrinsic, which means we couldn't
832 * handle writemasks.
833 */
834 static void
ptn_add_output_stores(struct ptn_compile * c)835 ptn_add_output_stores(struct ptn_compile *c)
836 {
837 nir_builder *b = &c->build;
838
839 nir_foreach_shader_out_variable(var, b->shader) {
840 nir_ssa_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
841 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
842 var->data.location == FRAG_RESULT_DEPTH) {
843 /* result.depth has this strange convention of being the .z component of
844 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
845 * match GLSL's gl_FragDepth and the expectations of most backends.
846 */
847 src = nir_channel(b, src, 2);
848 }
849 if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
850 (var->data.location == VARYING_SLOT_FOGC ||
851 var->data.location == VARYING_SLOT_PSIZ)) {
852 /* result.{fogcoord,psiz} is a single component value */
853 src = nir_channel(b, src, 0);
854 }
855 unsigned num_components = glsl_get_vector_elements(var->type);
856 nir_store_var(b, var, src, (1 << num_components) - 1);
857 }
858 }
859
860 static void
setup_registers_and_variables(struct ptn_compile * c)861 setup_registers_and_variables(struct ptn_compile *c)
862 {
863 nir_builder *b = &c->build;
864 struct nir_shader *shader = b->shader;
865
866 /* Create input variables. */
867 uint64_t inputs_read = c->prog->info.inputs_read;
868 while (inputs_read) {
869 const int i = u_bit_scan64(&inputs_read);
870
871 if (c->ctx->Const.GLSLFragCoordIsSysVal &&
872 shader->info.stage == MESA_SHADER_FRAGMENT &&
873 i == VARYING_SLOT_POS) {
874 nir_variable *var = nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
875 "frag_coord");
876 var->data.location = SYSTEM_VALUE_FRAG_COORD;
877 c->input_vars[i] = var;
878 continue;
879 }
880
881 nir_variable *var =
882 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
883 ralloc_asprintf(shader, "in_%d", i));
884 var->data.location = i;
885 var->data.index = 0;
886
887 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
888 if (i == VARYING_SLOT_FOGC) {
889 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
890 * input variable a float, and create a local containing the
891 * full vec4 value.
892 */
893 var->type = glsl_float_type();
894
895 nir_variable *fullvar =
896 nir_local_variable_create(b->impl, glsl_vec4_type(),
897 "fogcoord_tmp");
898
899 nir_store_var(b, fullvar,
900 nir_vec4(b, nir_load_var(b, var),
901 nir_imm_float(b, 0.0),
902 nir_imm_float(b, 0.0),
903 nir_imm_float(b, 1.0)),
904 WRITEMASK_XYZW);
905
906 /* We inserted the real input into the list so the driver has real
907 * inputs, but we set c->input_vars[i] to the temporary so we use
908 * the splatted value.
909 */
910 c->input_vars[i] = fullvar;
911 continue;
912 }
913 }
914
915 c->input_vars[i] = var;
916 }
917
918 /* Create system value variables */
919 int i;
920 BITSET_FOREACH_SET(i, c->prog->info.system_values_read, SYSTEM_VALUE_MAX) {
921 nir_variable *var =
922 nir_variable_create(shader, nir_var_system_value, glsl_vec4_type(),
923 ralloc_asprintf(shader, "sv_%d", i));
924 var->data.location = i;
925 var->data.index = 0;
926
927 c->sysval_vars[i] = var;
928 }
929
930 /* Create output registers and variables. */
931 int max_outputs = util_last_bit64(c->prog->info.outputs_written);
932 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
933
934 uint64_t outputs_written = c->prog->info.outputs_written;
935 while (outputs_written) {
936 const int i = u_bit_scan64(&outputs_written);
937
938 /* Since we can't load from outputs in the IR, we make temporaries
939 * for the outputs and emit stores to the real outputs at the end of
940 * the shader.
941 */
942 nir_register *reg = nir_local_reg_create(b->impl);
943 reg->num_components = 4;
944
945 const struct glsl_type *type;
946 if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
947 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC) ||
948 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_PSIZ))
949 type = glsl_float_type();
950 else
951 type = glsl_vec4_type();
952
953 nir_variable *var =
954 nir_variable_create(shader, nir_var_shader_out, type,
955 ralloc_asprintf(shader, "out_%d", i));
956 var->data.location = i;
957 var->data.index = 0;
958
959 c->output_regs[i] = reg;
960 c->output_vars[i] = var;
961 }
962
963 /* Create temporary registers. */
964 c->temp_regs = rzalloc_array(c, nir_register *,
965 c->prog->arb.NumTemporaries);
966
967 nir_register *reg;
968 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
969 reg = nir_local_reg_create(b->impl);
970 if (!reg) {
971 c->error = true;
972 return;
973 }
974 reg->num_components = 4;
975 c->temp_regs[i] = reg;
976 }
977
978 /* Create the address register (for ARB_vertex_program). */
979 reg = nir_local_reg_create(b->impl);
980 if (!reg) {
981 c->error = true;
982 return;
983 }
984 reg->num_components = 1;
985 c->addr_reg = reg;
986 }
987
988 struct nir_shader *
prog_to_nir(const struct gl_context * ctx,const struct gl_program * prog,const nir_shader_compiler_options * options)989 prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog,
990 const nir_shader_compiler_options *options)
991 {
992 struct ptn_compile *c;
993 struct nir_shader *s;
994 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
995
996 c = rzalloc(NULL, struct ptn_compile);
997 if (!c)
998 return NULL;
999 c->prog = prog;
1000 c->ctx = ctx;
1001
1002 c->build = nir_builder_init_simple_shader(stage, options, NULL);
1003
1004 /* Copy the shader_info from the gl_program */
1005 c->build.shader->info = prog->info;
1006
1007 s = c->build.shader;
1008
1009 if (prog->Parameters->NumParameters > 0) {
1010 const struct glsl_type *type =
1011 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
1012 c->parameters =
1013 nir_variable_create(s, nir_var_uniform, type,
1014 prog->Parameters->Parameters[0].Name);
1015 }
1016
1017 setup_registers_and_variables(c);
1018 if (unlikely(c->error))
1019 goto fail;
1020
1021 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1022 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1023
1024 if (unlikely(c->error))
1025 break;
1026 }
1027
1028 ptn_add_output_stores(c);
1029
1030 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1031 s->info.num_textures = util_last_bit(prog->SamplersUsed);
1032 s->info.num_ubos = 0;
1033 s->info.num_abos = 0;
1034 s->info.num_ssbos = 0;
1035 s->info.num_images = 0;
1036 s->info.uses_texture_gather = false;
1037 s->info.clip_distance_array_size = 0;
1038 s->info.cull_distance_array_size = 0;
1039 s->info.separate_shader = false;
1040 s->info.io_lowered = false;
1041 s->info.internal = false;
1042
1043 fail:
1044 if (c->error) {
1045 ralloc_free(s);
1046 s = NULL;
1047 }
1048 ralloc_free(c);
1049 return s;
1050 }
1051