1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 #include "main/imports.h"
30 #include "util/ralloc.h"
31
32 #include "prog_to_nir.h"
33 #include "prog_instruction.h"
34 #include "prog_parameter.h"
35 #include "prog_print.h"
36 #include "program.h"
37
38 /**
39 * \file prog_to_nir.c
40 *
41 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
42 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
43 * vertex processing. Full GLSL support should use glsl_to_nir instead.
44 */
45
46 struct ptn_compile {
47 const struct gl_program *prog;
48 nir_builder build;
49 bool error;
50
51 nir_variable *parameters;
52 nir_variable *input_vars[VARYING_SLOT_MAX];
53 nir_variable *output_vars[VARYING_SLOT_MAX];
54 nir_register **output_regs;
55 nir_register **temp_regs;
56
57 nir_register *addr_reg;
58 };
59
60 #define SWIZ(X, Y, Z, W) \
61 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true)
63
64 static nir_ssa_def *
ptn_src_for_dest(struct ptn_compile * c,nir_alu_dest * dest)65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
66 {
67 nir_builder *b = &c->build;
68
69 nir_alu_src src;
70 memset(&src, 0, sizeof(src));
71
72 if (dest->dest.is_ssa)
73 src.src = nir_src_for_ssa(&dest->dest.ssa);
74 else {
75 assert(!dest->dest.reg.indirect);
76 src.src = nir_src_for_reg(dest->dest.reg.reg);
77 src.src.reg.base_offset = dest->dest.reg.base_offset;
78 }
79
80 for (int i = 0; i < 4; i++)
81 src.swizzle[i] = i;
82
83 return nir_fmov_alu(b, src, 4);
84 }
85
86 static nir_alu_dest
ptn_get_dest(struct ptn_compile * c,const struct prog_dst_register * prog_dst)87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
88 {
89 nir_alu_dest dest;
90
91 memset(&dest, 0, sizeof(dest));
92
93 switch (prog_dst->File) {
94 case PROGRAM_TEMPORARY:
95 dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
96 break;
97 case PROGRAM_OUTPUT:
98 dest.dest.reg.reg = c->output_regs[prog_dst->Index];
99 break;
100 case PROGRAM_ADDRESS:
101 assert(prog_dst->Index == 0);
102 dest.dest.reg.reg = c->addr_reg;
103 break;
104 case PROGRAM_UNDEFINED:
105 break;
106 }
107
108 dest.write_mask = prog_dst->WriteMask;
109 dest.saturate = false;
110
111 assert(!prog_dst->RelAddr);
112
113 return dest;
114 }
115
116 static nir_ssa_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
118 {
119 nir_builder *b = &c->build;
120 nir_alu_src src;
121
122 memset(&src, 0, sizeof(src));
123
124 switch (prog_src->File) {
125 case PROGRAM_UNDEFINED:
126 return nir_imm_float(b, 0.0);
127 case PROGRAM_TEMPORARY:
128 assert(!prog_src->RelAddr && prog_src->Index >= 0);
129 src.src.reg.reg = c->temp_regs[prog_src->Index];
130 break;
131 case PROGRAM_INPUT: {
132 /* ARB_vertex_program doesn't allow relative addressing on vertex
133 * attributes; ARB_fragment_program has no relative addressing at all.
134 */
135 assert(!prog_src->RelAddr);
136
137 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
138
139 nir_intrinsic_instr *load =
140 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
141 load->num_components = 4;
142 load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]);
143
144 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
145 nir_builder_instr_insert(b, &load->instr);
146
147 src.src = nir_src_for_ssa(&load->dest.ssa);
148 break;
149 }
150 case PROGRAM_STATE_VAR:
151 case PROGRAM_CONSTANT: {
152 /* We actually want to look at the type in the Parameters list for this,
153 * because it lets us upload constant builtin uniforms as actual
154 * constants.
155 */
156 struct gl_program_parameter_list *plist = c->prog->Parameters;
157 gl_register_file file = prog_src->RelAddr ? prog_src->File :
158 plist->Parameters[prog_src->Index].Type;
159
160 switch (file) {
161 case PROGRAM_CONSTANT:
162 if ((c->prog->arb.IndirectRegisterFiles &
163 (1 << PROGRAM_CONSTANT)) == 0) {
164 float *v = (float *) plist->ParameterValues[prog_src->Index];
165 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
166 break;
167 }
168 /* FALLTHROUGH */
169 case PROGRAM_STATE_VAR: {
170 assert(c->parameters != NULL);
171
172 nir_intrinsic_instr *load =
173 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var);
174 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
175 load->num_components = 4;
176
177 load->variables[0] = nir_deref_var_create(load, c->parameters);
178 nir_deref_array *deref_arr =
179 nir_deref_array_create(load->variables[0]);
180 deref_arr->deref.type = glsl_vec4_type();
181 load->variables[0]->deref.child = &deref_arr->deref;
182
183 if (prog_src->RelAddr) {
184 deref_arr->deref_array_type = nir_deref_array_type_indirect;
185
186 nir_alu_src addr_src = { NIR_SRC_INIT };
187 addr_src.src = nir_src_for_reg(c->addr_reg);
188 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1);
189
190 if (prog_src->Index < 0) {
191 /* This is a negative offset which should be added to the address
192 * register's value.
193 */
194 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index));
195
196 deref_arr->base_offset = 0;
197 } else {
198 deref_arr->base_offset = prog_src->Index;
199 }
200 deref_arr->indirect = nir_src_for_ssa(reladdr);
201 } else {
202 deref_arr->deref_array_type = nir_deref_array_type_direct;
203 deref_arr->base_offset = prog_src->Index;
204 }
205
206 nir_builder_instr_insert(b, &load->instr);
207
208 src.src = nir_src_for_ssa(&load->dest.ssa);
209 break;
210 }
211 default:
212 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
213 _mesa_register_file_name(file), file);
214 abort();
215 }
216 break;
217 }
218 default:
219 fprintf(stderr, "unknown src register file: %s (%d)\n",
220 _mesa_register_file_name(prog_src->File), prog_src->File);
221 abort();
222 }
223
224 nir_ssa_def *def;
225 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
226 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
227 /* The simple non-SWZ case. */
228 for (int i = 0; i < 4; i++)
229 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
230
231 def = nir_fmov_alu(b, src, 4);
232
233 if (prog_src->Negate)
234 def = nir_fneg(b, def);
235 } else {
236 /* The SWZ instruction allows per-component zero/one swizzles, and also
237 * per-component negation.
238 */
239 nir_ssa_def *chans[4];
240 for (int i = 0; i < 4; i++) {
241 int swizzle = GET_SWZ(prog_src->Swizzle, i);
242 if (swizzle == SWIZZLE_ZERO) {
243 chans[i] = nir_imm_float(b, 0.0);
244 } else if (swizzle == SWIZZLE_ONE) {
245 chans[i] = nir_imm_float(b, 1.0);
246 } else {
247 assert(swizzle != SWIZZLE_NIL);
248 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
249 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL);
250 mov->dest.write_mask = 0x1;
251 mov->src[0] = src;
252 mov->src[0].swizzle[0] = swizzle;
253 nir_builder_instr_insert(b, &mov->instr);
254
255 chans[i] = &mov->dest.dest.ssa;
256 }
257
258 if (prog_src->Negate & (1 << i))
259 chans[i] = nir_fneg(b, chans[i]);
260 }
261 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
262 }
263
264 return def;
265 }
266
267 static void
ptn_alu(nir_builder * b,nir_op op,nir_alu_dest dest,nir_ssa_def ** src)268 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
269 {
270 unsigned num_srcs = nir_op_infos[op].num_inputs;
271 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
272 unsigned i;
273
274 for (i = 0; i < num_srcs; i++)
275 instr->src[i].src = nir_src_for_ssa(src[i]);
276
277 instr->dest = dest;
278 nir_builder_instr_insert(b, &instr->instr);
279 }
280
281 static void
ptn_move_dest_masked(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def,unsigned write_mask)282 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
283 nir_ssa_def *def, unsigned write_mask)
284 {
285 if (!(dest.write_mask & write_mask))
286 return;
287
288 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov);
289 if (!mov)
290 return;
291
292 mov->dest = dest;
293 mov->dest.write_mask &= write_mask;
294 mov->src[0].src = nir_src_for_ssa(def);
295 for (unsigned i = def->num_components; i < 4; i++)
296 mov->src[0].swizzle[i] = def->num_components - 1;
297 nir_builder_instr_insert(b, &mov->instr);
298 }
299
300 static void
ptn_move_dest(nir_builder * b,nir_alu_dest dest,nir_ssa_def * def)301 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
302 {
303 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW);
304 }
305
306 static void
ptn_arl(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)307 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
308 {
309 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0])));
310 }
311
312 /* EXP - Approximate Exponential Base 2
313 * dst.x = 2^{\lfloor src.x\rfloor}
314 * dst.y = src.x - \lfloor src.x\rfloor
315 * dst.z = 2^{src.x}
316 * dst.w = 1.0
317 */
318 static void
ptn_exp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)319 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
320 {
321 nir_ssa_def *srcx = ptn_channel(b, src[0], X);
322
323 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X);
324 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y);
325 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
326 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
327 }
328
329 /* LOG - Approximate Logarithm Base 2
330 * dst.x = \lfloor\log_2{|src.x|}\rfloor
331 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
332 * dst.z = \log_2{|src.x|}
333 * dst.w = 1.0
334 */
335 static void
ptn_log(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)336 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
337 {
338 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
339 nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
340 nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
341
342 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X);
343 ptn_move_dest_masked(b, dest,
344 nir_fmul(b, abs_srcx,
345 nir_fexp2(b, nir_fneg(b, floor_log2))),
346 WRITEMASK_Y);
347 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z);
348 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
349 }
350
351 /* DST - Distance Vector
352 * dst.x = 1.0
353 * dst.y = src0.y \times src1.y
354 * dst.z = src0.z
355 * dst.w = src1.w
356 */
357 static void
ptn_dst(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)358 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
359 {
360 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X);
361 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
362 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
363 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W);
364 }
365
366 /* LIT - Light Coefficients
367 * dst.x = 1.0
368 * dst.y = max(src.x, 0.0)
369 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
370 * dst.w = 1.0
371 */
372 static void
ptn_lit(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)373 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
374 {
375 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW);
376
377 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X),
378 nir_imm_float(b, 0.0)), WRITEMASK_Y);
379
380 if (dest.write_mask & WRITEMASK_Z) {
381 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y);
382 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
383 nir_imm_float(b, 128.0)),
384 nir_imm_float(b, -128.0));
385 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
386 wclamp);
387
388 nir_ssa_def *z;
389 if (b->shader->options->native_integers) {
390 z = nir_bcsel(b,
391 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
392 nir_imm_float(b, 0.0),
393 pow);
394 } else {
395 z = nir_fcsel(b,
396 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)),
397 nir_imm_float(b, 0.0),
398 pow);
399 }
400
401 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z);
402 }
403 }
404
405 /* SCS - Sine Cosine
406 * dst.x = \cos{src.x}
407 * dst.y = \sin{src.x}
408 * dst.z = 0.0
409 * dst.w = 1.0
410 */
411 static void
ptn_scs(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)412 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
413 {
414 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)),
415 WRITEMASK_X);
416 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)),
417 WRITEMASK_Y);
418 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z);
419 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
420 }
421
422 /**
423 * Emit SLT. For platforms with integers, prefer b2f(flt(...)).
424 */
425 static void
ptn_slt(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)426 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
427 {
428 if (b->shader->options->native_integers) {
429 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
430 } else {
431 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1]));
432 }
433 }
434
435 /**
436 * Emit SGE. For platforms with integers, prefer b2f(fge(...)).
437 */
438 static void
ptn_sge(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)439 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
440 {
441 if (b->shader->options->native_integers) {
442 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
443 } else {
444 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1]));
445 }
446 }
447
448 static void
ptn_xpd(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)449 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
450 {
451 ptn_move_dest_masked(b, dest,
452 nir_fsub(b,
453 nir_fmul(b,
454 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true),
455 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)),
456 nir_fmul(b,
457 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true),
458 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))),
459 WRITEMASK_XYZ);
460 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W);
461 }
462
463 static void
ptn_dp2(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)464 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
465 {
466 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
467 }
468
469 static void
ptn_dp3(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)470 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
471 {
472 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
473 }
474
475 static void
ptn_dp4(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)476 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
477 {
478 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
479 }
480
481 static void
ptn_dph(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)482 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
483 {
484 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1]));
485 }
486
487 static void
ptn_cmp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)488 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
489 {
490 if (b->shader->options->native_integers) {
491 ptn_move_dest(b, dest, nir_bcsel(b,
492 nir_flt(b, src[0], nir_imm_float(b, 0.0)),
493 src[1], src[2]));
494 } else {
495 ptn_move_dest(b, dest, nir_fcsel(b,
496 nir_slt(b, src[0], nir_imm_float(b, 0.0)),
497 src[1], src[2]));
498 }
499 }
500
501 static void
ptn_lrp(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src)502 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src)
503 {
504 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
505 }
506
507 static void
ptn_kil(nir_builder * b,nir_ssa_def ** src)508 ptn_kil(nir_builder *b, nir_ssa_def **src)
509 {
510 nir_ssa_def *cmp = b->shader->options->native_integers ?
511 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) :
512 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0));
513
514 nir_intrinsic_instr *discard =
515 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
516 discard->src[0] = nir_src_for_ssa(cmp);
517 nir_builder_instr_insert(b, &discard->instr);
518 }
519
520 static void
ptn_tex(nir_builder * b,nir_alu_dest dest,nir_ssa_def ** src,struct prog_instruction * prog_inst)521 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src,
522 struct prog_instruction *prog_inst)
523 {
524 nir_tex_instr *instr;
525 nir_texop op;
526 unsigned num_srcs;
527
528 switch (prog_inst->Opcode) {
529 case OPCODE_TEX:
530 op = nir_texop_tex;
531 num_srcs = 1;
532 break;
533 case OPCODE_TXB:
534 op = nir_texop_txb;
535 num_srcs = 2;
536 break;
537 case OPCODE_TXD:
538 op = nir_texop_txd;
539 num_srcs = 3;
540 break;
541 case OPCODE_TXL:
542 op = nir_texop_txl;
543 num_srcs = 2;
544 break;
545 case OPCODE_TXP:
546 op = nir_texop_tex;
547 num_srcs = 2;
548 break;
549 default:
550 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
551 abort();
552 }
553
554 if (prog_inst->TexShadow)
555 num_srcs++;
556
557 instr = nir_tex_instr_create(b->shader, num_srcs);
558 instr->op = op;
559 instr->dest_type = nir_type_float;
560 instr->is_shadow = prog_inst->TexShadow;
561 instr->texture_index = prog_inst->TexSrcUnit;
562 instr->sampler_index = prog_inst->TexSrcUnit;
563
564 switch (prog_inst->TexSrcTarget) {
565 case TEXTURE_1D_INDEX:
566 instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
567 break;
568 case TEXTURE_2D_INDEX:
569 instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
570 break;
571 case TEXTURE_3D_INDEX:
572 instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
573 break;
574 case TEXTURE_CUBE_INDEX:
575 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
576 break;
577 case TEXTURE_RECT_INDEX:
578 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
579 break;
580 default:
581 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
582 abort();
583 }
584
585 switch (instr->sampler_dim) {
586 case GLSL_SAMPLER_DIM_1D:
587 case GLSL_SAMPLER_DIM_BUF:
588 instr->coord_components = 1;
589 break;
590 case GLSL_SAMPLER_DIM_2D:
591 case GLSL_SAMPLER_DIM_RECT:
592 case GLSL_SAMPLER_DIM_EXTERNAL:
593 case GLSL_SAMPLER_DIM_MS:
594 instr->coord_components = 2;
595 break;
596 case GLSL_SAMPLER_DIM_3D:
597 case GLSL_SAMPLER_DIM_CUBE:
598 instr->coord_components = 3;
599 break;
600 case GLSL_SAMPLER_DIM_SUBPASS:
601 case GLSL_SAMPLER_DIM_SUBPASS_MS:
602 unreachable("can't reach");
603 }
604
605 unsigned src_number = 0;
606
607 instr->src[src_number].src =
608 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
609 instr->coord_components, true));
610 instr->src[src_number].src_type = nir_tex_src_coord;
611 src_number++;
612
613 if (prog_inst->Opcode == OPCODE_TXP) {
614 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
615 instr->src[src_number].src_type = nir_tex_src_projector;
616 src_number++;
617 }
618
619 if (prog_inst->Opcode == OPCODE_TXB) {
620 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
621 instr->src[src_number].src_type = nir_tex_src_bias;
622 src_number++;
623 }
624
625 if (prog_inst->Opcode == OPCODE_TXL) {
626 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
627 instr->src[src_number].src_type = nir_tex_src_lod;
628 src_number++;
629 }
630
631 if (instr->is_shadow) {
632 if (instr->coord_components < 3)
633 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
634 else
635 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
636
637 instr->src[src_number].src_type = nir_tex_src_comparator;
638 src_number++;
639 }
640
641 assert(src_number == num_srcs);
642
643 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL);
644 nir_builder_instr_insert(b, &instr->instr);
645
646 /* Resolve the writemask on the texture op. */
647 ptn_move_dest(b, dest, &instr->dest.ssa);
648 }
649
650 static const nir_op op_trans[MAX_OPCODE] = {
651 [OPCODE_NOP] = 0,
652 [OPCODE_ABS] = nir_op_fabs,
653 [OPCODE_ADD] = nir_op_fadd,
654 [OPCODE_ARL] = 0,
655 [OPCODE_CMP] = 0,
656 [OPCODE_COS] = 0,
657 [OPCODE_DDX] = nir_op_fddx,
658 [OPCODE_DDY] = nir_op_fddy,
659 [OPCODE_DP2] = 0,
660 [OPCODE_DP3] = 0,
661 [OPCODE_DP4] = 0,
662 [OPCODE_DPH] = 0,
663 [OPCODE_DST] = 0,
664 [OPCODE_END] = 0,
665 [OPCODE_EX2] = 0,
666 [OPCODE_EXP] = 0,
667 [OPCODE_FLR] = nir_op_ffloor,
668 [OPCODE_FRC] = nir_op_ffract,
669 [OPCODE_LG2] = 0,
670 [OPCODE_LIT] = 0,
671 [OPCODE_LOG] = 0,
672 [OPCODE_LRP] = 0,
673 [OPCODE_MAD] = 0,
674 [OPCODE_MAX] = nir_op_fmax,
675 [OPCODE_MIN] = nir_op_fmin,
676 [OPCODE_MOV] = nir_op_fmov,
677 [OPCODE_MUL] = nir_op_fmul,
678 [OPCODE_POW] = 0,
679 [OPCODE_RCP] = 0,
680
681 [OPCODE_RSQ] = 0,
682 [OPCODE_SCS] = 0,
683 [OPCODE_SGE] = 0,
684 [OPCODE_SIN] = 0,
685 [OPCODE_SLT] = 0,
686 [OPCODE_SSG] = nir_op_fsign,
687 [OPCODE_SUB] = nir_op_fsub,
688 [OPCODE_SWZ] = 0,
689 [OPCODE_TEX] = 0,
690 [OPCODE_TRUNC] = nir_op_ftrunc,
691 [OPCODE_TXB] = 0,
692 [OPCODE_TXD] = 0,
693 [OPCODE_TXL] = 0,
694 [OPCODE_TXP] = 0,
695 [OPCODE_XPD] = 0,
696 };
697
698 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)699 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
700 {
701 nir_builder *b = &c->build;
702 unsigned i;
703 const unsigned op = prog_inst->Opcode;
704
705 if (op == OPCODE_END)
706 return;
707
708 nir_ssa_def *src[3];
709 for (i = 0; i < 3; i++) {
710 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
711 }
712 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
713 if (c->error)
714 return;
715
716 switch (op) {
717 case OPCODE_RSQ:
718 ptn_move_dest(b, dest,
719 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X))));
720 break;
721
722 case OPCODE_RCP:
723 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X)));
724 break;
725
726 case OPCODE_EX2:
727 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X)));
728 break;
729
730 case OPCODE_LG2:
731 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X)));
732 break;
733
734 case OPCODE_POW:
735 ptn_move_dest(b, dest, nir_fpow(b,
736 ptn_channel(b, src[0], X),
737 ptn_channel(b, src[1], X)));
738 break;
739
740 case OPCODE_COS:
741 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)));
742 break;
743
744 case OPCODE_SIN:
745 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)));
746 break;
747
748 case OPCODE_ARL:
749 ptn_arl(b, dest, src);
750 break;
751
752 case OPCODE_EXP:
753 ptn_exp(b, dest, src);
754 break;
755
756 case OPCODE_LOG:
757 ptn_log(b, dest, src);
758 break;
759
760 case OPCODE_LRP:
761 ptn_lrp(b, dest, src);
762 break;
763
764 case OPCODE_MAD:
765 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]));
766 break;
767
768 case OPCODE_DST:
769 ptn_dst(b, dest, src);
770 break;
771
772 case OPCODE_LIT:
773 ptn_lit(b, dest, src);
774 break;
775
776 case OPCODE_XPD:
777 ptn_xpd(b, dest, src);
778 break;
779
780 case OPCODE_DP2:
781 ptn_dp2(b, dest, src);
782 break;
783
784 case OPCODE_DP3:
785 ptn_dp3(b, dest, src);
786 break;
787
788 case OPCODE_DP4:
789 ptn_dp4(b, dest, src);
790 break;
791
792 case OPCODE_DPH:
793 ptn_dph(b, dest, src);
794 break;
795
796 case OPCODE_KIL:
797 ptn_kil(b, src);
798 break;
799
800 case OPCODE_CMP:
801 ptn_cmp(b, dest, src);
802 break;
803
804 case OPCODE_SCS:
805 ptn_scs(b, dest, src);
806 break;
807
808 case OPCODE_SLT:
809 ptn_slt(b, dest, src);
810 break;
811
812 case OPCODE_SGE:
813 ptn_sge(b, dest, src);
814 break;
815
816 case OPCODE_TEX:
817 case OPCODE_TXB:
818 case OPCODE_TXD:
819 case OPCODE_TXL:
820 case OPCODE_TXP:
821 ptn_tex(b, dest, src, prog_inst);
822 break;
823
824 case OPCODE_SWZ:
825 /* Extended swizzles were already handled in ptn_get_src(). */
826 ptn_alu(b, nir_op_fmov, dest, src);
827 break;
828
829 case OPCODE_NOP:
830 break;
831
832 default:
833 if (op_trans[op] != 0) {
834 ptn_alu(b, op_trans[op], dest, src);
835 } else {
836 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
837 abort();
838 }
839 break;
840 }
841
842 if (prog_inst->Saturate) {
843 assert(prog_inst->Saturate);
844 assert(!dest.dest.is_ssa);
845 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
846 }
847 }
848
849 /**
850 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
851 * variables at the end of the shader.
852 *
853 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
854 * written, because there's no output load intrinsic, which means we couldn't
855 * handle writemasks.
856 */
857 static void
ptn_add_output_stores(struct ptn_compile * c)858 ptn_add_output_stores(struct ptn_compile *c)
859 {
860 nir_builder *b = &c->build;
861
862 nir_foreach_variable(var, &b->shader->outputs) {
863 nir_intrinsic_instr *store =
864 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
865 store->num_components = glsl_get_vector_elements(var->type);
866 nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1);
867 store->variables[0] =
868 nir_deref_var_create(store, c->output_vars[var->data.location]);
869
870 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
871 var->data.location == FRAG_RESULT_DEPTH) {
872 /* result.depth has this strange convention of being the .z component of
873 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
874 * match GLSL's gl_FragDepth and the expectations of most backends.
875 */
876 nir_alu_src alu_src = { NIR_SRC_INIT };
877 alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]);
878 alu_src.swizzle[0] = SWIZZLE_Z;
879 store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1));
880 } else {
881 store->src[0].reg.reg = c->output_regs[var->data.location];
882 }
883 nir_builder_instr_insert(b, &store->instr);
884 }
885 }
886
887 static void
setup_registers_and_variables(struct ptn_compile * c)888 setup_registers_and_variables(struct ptn_compile *c)
889 {
890 nir_builder *b = &c->build;
891 struct nir_shader *shader = b->shader;
892
893 /* Create input variables. */
894 const int num_inputs = util_last_bit64(c->prog->info.inputs_read);
895 for (int i = 0; i < num_inputs; i++) {
896 if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i)))
897 continue;
898
899 nir_variable *var =
900 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(),
901 ralloc_asprintf(shader, "in_%d", i));
902 var->data.location = i;
903 var->data.index = 0;
904
905 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
906 if (i == VARYING_SLOT_POS) {
907 var->data.origin_upper_left = c->prog->OriginUpperLeft;
908 var->data.pixel_center_integer = c->prog->PixelCenterInteger;
909 } else if (i == VARYING_SLOT_FOGC) {
910 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
911 * input variable a float, and create a local containing the
912 * full vec4 value.
913 */
914 var->type = glsl_float_type();
915
916 nir_intrinsic_instr *load_x =
917 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var);
918 load_x->num_components = 1;
919 load_x->variables[0] = nir_deref_var_create(load_x, var);
920 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, 32, NULL);
921 nir_builder_instr_insert(b, &load_x->instr);
922
923 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0),
924 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0));
925
926 nir_variable *fullvar =
927 nir_local_variable_create(b->impl, glsl_vec4_type(),
928 "fogcoord_tmp");
929 nir_intrinsic_instr *store =
930 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var);
931 store->num_components = 4;
932 nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW);
933 store->variables[0] = nir_deref_var_create(store, fullvar);
934 store->src[0] = nir_src_for_ssa(f001);
935 nir_builder_instr_insert(b, &store->instr);
936
937 /* We inserted the real input into the list so the driver has real
938 * inputs, but we set c->input_vars[i] to the temporary so we use
939 * the splatted value.
940 */
941 c->input_vars[i] = fullvar;
942 continue;
943 }
944 }
945
946 c->input_vars[i] = var;
947 }
948
949 /* Create output registers and variables. */
950 int max_outputs = util_last_bit(c->prog->info.outputs_written);
951 c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
952
953 for (int i = 0; i < max_outputs; i++) {
954 if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i)))
955 continue;
956
957 /* Since we can't load from outputs in the IR, we make temporaries
958 * for the outputs and emit stores to the real outputs at the end of
959 * the shader.
960 */
961 nir_register *reg = nir_local_reg_create(b->impl);
962 reg->num_components = 4;
963
964 nir_variable *var = rzalloc(shader, nir_variable);
965 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH)
966 var->type = glsl_float_type();
967 else
968 var->type = glsl_vec4_type();
969 var->data.mode = nir_var_shader_out;
970 var->name = ralloc_asprintf(var, "out_%d", i);
971
972 var->data.location = i;
973 var->data.index = 0;
974
975 c->output_regs[i] = reg;
976
977 exec_list_push_tail(&shader->outputs, &var->node);
978 c->output_vars[i] = var;
979 }
980
981 /* Create temporary registers. */
982 c->temp_regs = rzalloc_array(c, nir_register *,
983 c->prog->arb.NumTemporaries);
984
985 nir_register *reg;
986 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
987 reg = nir_local_reg_create(b->impl);
988 if (!reg) {
989 c->error = true;
990 return;
991 }
992 reg->num_components = 4;
993 c->temp_regs[i] = reg;
994 }
995
996 /* Create the address register (for ARB_vertex_program). */
997 reg = nir_local_reg_create(b->impl);
998 if (!reg) {
999 c->error = true;
1000 return;
1001 }
1002 reg->num_components = 1;
1003 c->addr_reg = reg;
1004 }
1005
1006 struct nir_shader *
prog_to_nir(const struct gl_program * prog,const nir_shader_compiler_options * options)1007 prog_to_nir(const struct gl_program *prog,
1008 const nir_shader_compiler_options *options)
1009 {
1010 struct ptn_compile *c;
1011 struct nir_shader *s;
1012 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
1013
1014 c = rzalloc(NULL, struct ptn_compile);
1015 if (!c)
1016 return NULL;
1017 c->prog = prog;
1018
1019 nir_builder_init_simple_shader(&c->build, NULL, stage, options);
1020
1021 /* Copy the shader_info from the gl_program */
1022 c->build.shader->info = prog->info;
1023
1024 s = c->build.shader;
1025
1026 if (prog->Parameters->NumParameters > 0) {
1027 c->parameters = rzalloc(s, nir_variable);
1028 c->parameters->type =
1029 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters);
1030 c->parameters->name = "parameters";
1031 c->parameters->data.read_only = true;
1032 c->parameters->data.mode = nir_var_uniform;
1033 exec_list_push_tail(&s->uniforms, &c->parameters->node);
1034 }
1035
1036 setup_registers_and_variables(c);
1037 if (unlikely(c->error))
1038 goto fail;
1039
1040 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
1041 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
1042
1043 if (unlikely(c->error))
1044 break;
1045 }
1046
1047 ptn_add_output_stores(c);
1048
1049 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
1050 s->info.num_textures = util_last_bit(prog->SamplersUsed);
1051 s->info.num_ubos = 0;
1052 s->info.num_abos = 0;
1053 s->info.num_ssbos = 0;
1054 s->info.num_images = 0;
1055 s->info.uses_texture_gather = false;
1056 s->info.clip_distance_array_size = 0;
1057 s->info.cull_distance_array_size = 0;
1058 s->info.separate_shader = false;
1059
1060 fail:
1061 if (c->error) {
1062 ralloc_free(s);
1063 s = NULL;
1064 }
1065 ralloc_free(c);
1066 return s;
1067 }
1068