1 /*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef NIR_BUILDER_H
25 #define NIR_BUILDER_H
26
27 #include "nir_control_flow.h"
28 #include "util/bitscan.h"
29 #include "util/half_float.h"
30
31 struct exec_list;
32
33 typedef struct nir_builder {
34 nir_cursor cursor;
35
36 /* Whether new ALU instructions will be marked "exact" */
37 bool exact;
38
39 /* Whether to run divergence analysis on inserted instructions (loop merge
40 * and header phis are not updated). */
41 bool update_divergence;
42
43 nir_shader *shader;
44 nir_function_impl *impl;
45 } nir_builder;
46
47 static inline void
nir_builder_init(nir_builder * build,nir_function_impl * impl)48 nir_builder_init(nir_builder *build, nir_function_impl *impl)
49 {
50 memset(build, 0, sizeof(*build));
51 build->exact = false;
52 build->impl = impl;
53 build->shader = impl->function->shader;
54 }
55
56 static inline nir_builder MUST_CHECK PRINTFLIKE(3, 4)
nir_builder_init_simple_shader(gl_shader_stage stage,const nir_shader_compiler_options * options,const char * name,...)57 nir_builder_init_simple_shader(gl_shader_stage stage,
58 const nir_shader_compiler_options *options,
59 const char *name, ...)
60 {
61 nir_builder b;
62
63 memset(&b, 0, sizeof(b));
64 b.shader = nir_shader_create(NULL, stage, options, NULL);
65
66 if (name) {
67 va_list args;
68 va_start(args, name);
69 b.shader->info.name = ralloc_vasprintf(b.shader, name, args);
70 va_end(args);
71 }
72
73 nir_function *func = nir_function_create(b.shader, "main");
74 func->is_entrypoint = true;
75 b.exact = false;
76 b.impl = nir_function_impl_create(func);
77 b.cursor = nir_after_cf_list(&b.impl->body);
78
79 return b;
80 }
81
82 typedef bool (*nir_instr_pass_cb)(struct nir_builder *, nir_instr *, void *);
83
84 /**
85 * Iterates over all the instructions in a NIR shader and calls the given pass
86 * on them.
87 *
88 * The pass should return true if it modified the shader. In that case, only
89 * the preserved metadata flags will be preserved in the function impl.
90 *
91 * The builder will be initialized to point at the function impl, but its
92 * cursor is unset.
93 */
94 static inline bool
nir_shader_instructions_pass(nir_shader * shader,nir_instr_pass_cb pass,nir_metadata preserved,void * cb_data)95 nir_shader_instructions_pass(nir_shader *shader,
96 nir_instr_pass_cb pass,
97 nir_metadata preserved,
98 void *cb_data)
99 {
100 bool progress = false;
101
102 nir_foreach_function(function, shader) {
103 if (!function->impl)
104 continue;
105
106 bool func_progress = false;
107 nir_builder b;
108 nir_builder_init(&b, function->impl);
109
110 nir_foreach_block_safe(block, function->impl) {
111 nir_foreach_instr_safe(instr, block) {
112 func_progress |= pass(&b, instr, cb_data);
113 }
114 }
115
116 if (func_progress) {
117 nir_metadata_preserve(function->impl, preserved);
118 progress = true;
119 } else {
120 nir_metadata_preserve(function->impl, nir_metadata_all);
121 }
122 }
123
124 return progress;
125 }
126
127 static inline void
nir_builder_instr_insert(nir_builder * build,nir_instr * instr)128 nir_builder_instr_insert(nir_builder *build, nir_instr *instr)
129 {
130 nir_instr_insert(build->cursor, instr);
131
132 if (build->update_divergence)
133 nir_update_instr_divergence(build->shader, instr);
134
135 /* Move the cursor forward. */
136 build->cursor = nir_after_instr(instr);
137 }
138
139 static inline nir_instr *
nir_builder_last_instr(nir_builder * build)140 nir_builder_last_instr(nir_builder *build)
141 {
142 assert(build->cursor.option == nir_cursor_after_instr);
143 return build->cursor.instr;
144 }
145
146 static inline void
nir_builder_cf_insert(nir_builder * build,nir_cf_node * cf)147 nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)
148 {
149 nir_cf_node_insert(build->cursor, cf);
150 }
151
152 static inline bool
nir_builder_is_inside_cf(nir_builder * build,nir_cf_node * cf_node)153 nir_builder_is_inside_cf(nir_builder *build, nir_cf_node *cf_node)
154 {
155 nir_block *block = nir_cursor_current_block(build->cursor);
156 for (nir_cf_node *n = &block->cf_node; n; n = n->parent) {
157 if (n == cf_node)
158 return true;
159 }
160 return false;
161 }
162
163 static inline nir_if *
nir_push_if_src(nir_builder * build,nir_src condition)164 nir_push_if_src(nir_builder *build, nir_src condition)
165 {
166 nir_if *nif = nir_if_create(build->shader);
167 nif->condition = condition;
168 nir_builder_cf_insert(build, &nif->cf_node);
169 build->cursor = nir_before_cf_list(&nif->then_list);
170 return nif;
171 }
172
173 static inline nir_if *
nir_push_if(nir_builder * build,nir_ssa_def * condition)174 nir_push_if(nir_builder *build, nir_ssa_def *condition)
175 {
176 return nir_push_if_src(build, nir_src_for_ssa(condition));
177 }
178
179 static inline nir_if *
nir_push_else(nir_builder * build,nir_if * nif)180 nir_push_else(nir_builder *build, nir_if *nif)
181 {
182 if (nif) {
183 assert(nir_builder_is_inside_cf(build, &nif->cf_node));
184 } else {
185 nir_block *block = nir_cursor_current_block(build->cursor);
186 nif = nir_cf_node_as_if(block->cf_node.parent);
187 }
188 build->cursor = nir_before_cf_list(&nif->else_list);
189 return nif;
190 }
191
192 static inline void
nir_pop_if(nir_builder * build,nir_if * nif)193 nir_pop_if(nir_builder *build, nir_if *nif)
194 {
195 if (nif) {
196 assert(nir_builder_is_inside_cf(build, &nif->cf_node));
197 } else {
198 nir_block *block = nir_cursor_current_block(build->cursor);
199 nif = nir_cf_node_as_if(block->cf_node.parent);
200 }
201 build->cursor = nir_after_cf_node(&nif->cf_node);
202 }
203
204 static inline nir_ssa_def *
nir_if_phi(nir_builder * build,nir_ssa_def * then_def,nir_ssa_def * else_def)205 nir_if_phi(nir_builder *build, nir_ssa_def *then_def, nir_ssa_def *else_def)
206 {
207 nir_block *block = nir_cursor_current_block(build->cursor);
208 nir_if *nif = nir_cf_node_as_if(nir_cf_node_prev(&block->cf_node));
209
210 nir_phi_instr *phi = nir_phi_instr_create(build->shader);
211 nir_phi_instr_add_src(phi, nir_if_last_then_block(nif), nir_src_for_ssa(then_def));
212 nir_phi_instr_add_src(phi, nir_if_last_else_block(nif), nir_src_for_ssa(else_def));
213
214 assert(then_def->num_components == else_def->num_components);
215 assert(then_def->bit_size == else_def->bit_size);
216 nir_ssa_dest_init(&phi->instr, &phi->dest,
217 then_def->num_components, then_def->bit_size, NULL);
218
219 nir_builder_instr_insert(build, &phi->instr);
220
221 return &phi->dest.ssa;
222 }
223
224 static inline nir_loop *
nir_push_loop(nir_builder * build)225 nir_push_loop(nir_builder *build)
226 {
227 nir_loop *loop = nir_loop_create(build->shader);
228 nir_builder_cf_insert(build, &loop->cf_node);
229 build->cursor = nir_before_cf_list(&loop->body);
230 return loop;
231 }
232
233 static inline void
nir_pop_loop(nir_builder * build,nir_loop * loop)234 nir_pop_loop(nir_builder *build, nir_loop *loop)
235 {
236 if (loop) {
237 assert(nir_builder_is_inside_cf(build, &loop->cf_node));
238 } else {
239 nir_block *block = nir_cursor_current_block(build->cursor);
240 loop = nir_cf_node_as_loop(block->cf_node.parent);
241 }
242 build->cursor = nir_after_cf_node(&loop->cf_node);
243 }
244
245 static inline nir_ssa_def *
nir_ssa_undef(nir_builder * build,unsigned num_components,unsigned bit_size)246 nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size)
247 {
248 nir_ssa_undef_instr *undef =
249 nir_ssa_undef_instr_create(build->shader, num_components, bit_size);
250 if (!undef)
251 return NULL;
252
253 nir_instr_insert(nir_before_cf_list(&build->impl->body), &undef->instr);
254 if (build->update_divergence)
255 nir_update_instr_divergence(build->shader, &undef->instr);
256
257 return &undef->def;
258 }
259
260 static inline nir_ssa_def *
nir_build_imm(nir_builder * build,unsigned num_components,unsigned bit_size,const nir_const_value * value)261 nir_build_imm(nir_builder *build, unsigned num_components,
262 unsigned bit_size, const nir_const_value *value)
263 {
264 nir_load_const_instr *load_const =
265 nir_load_const_instr_create(build->shader, num_components, bit_size);
266 if (!load_const)
267 return NULL;
268
269 memcpy(load_const->value, value, sizeof(nir_const_value) * num_components);
270
271 nir_builder_instr_insert(build, &load_const->instr);
272
273 return &load_const->def;
274 }
275
276 static inline nir_ssa_def *
nir_imm_zero(nir_builder * build,unsigned num_components,unsigned bit_size)277 nir_imm_zero(nir_builder *build, unsigned num_components, unsigned bit_size)
278 {
279 nir_load_const_instr *load_const =
280 nir_load_const_instr_create(build->shader, num_components, bit_size);
281
282 /* nir_load_const_instr_create uses rzalloc so it's already zero */
283
284 nir_builder_instr_insert(build, &load_const->instr);
285
286 return &load_const->def;
287 }
288
289 static inline nir_ssa_def *
nir_imm_boolN_t(nir_builder * build,bool x,unsigned bit_size)290 nir_imm_boolN_t(nir_builder *build, bool x, unsigned bit_size)
291 {
292 nir_const_value v = nir_const_value_for_bool(x, bit_size);
293 return nir_build_imm(build, 1, bit_size, &v);
294 }
295
296 static inline nir_ssa_def *
nir_imm_bool(nir_builder * build,bool x)297 nir_imm_bool(nir_builder *build, bool x)
298 {
299 return nir_imm_boolN_t(build, x, 1);
300 }
301
302 static inline nir_ssa_def *
nir_imm_true(nir_builder * build)303 nir_imm_true(nir_builder *build)
304 {
305 return nir_imm_bool(build, true);
306 }
307
308 static inline nir_ssa_def *
nir_imm_false(nir_builder * build)309 nir_imm_false(nir_builder *build)
310 {
311 return nir_imm_bool(build, false);
312 }
313
314 static inline nir_ssa_def *
nir_imm_floatN_t(nir_builder * build,double x,unsigned bit_size)315 nir_imm_floatN_t(nir_builder *build, double x, unsigned bit_size)
316 {
317 nir_const_value v = nir_const_value_for_float(x, bit_size);
318 return nir_build_imm(build, 1, bit_size, &v);
319 }
320
321 static inline nir_ssa_def *
nir_imm_float16(nir_builder * build,float x)322 nir_imm_float16(nir_builder *build, float x)
323 {
324 return nir_imm_floatN_t(build, x, 16);
325 }
326
327 static inline nir_ssa_def *
nir_imm_float(nir_builder * build,float x)328 nir_imm_float(nir_builder *build, float x)
329 {
330 return nir_imm_floatN_t(build, x, 32);
331 }
332
333 static inline nir_ssa_def *
nir_imm_double(nir_builder * build,double x)334 nir_imm_double(nir_builder *build, double x)
335 {
336 return nir_imm_floatN_t(build, x, 64);
337 }
338
339 static inline nir_ssa_def *
nir_imm_vec2(nir_builder * build,float x,float y)340 nir_imm_vec2(nir_builder *build, float x, float y)
341 {
342 nir_const_value v[2] = {
343 nir_const_value_for_float(x, 32),
344 nir_const_value_for_float(y, 32),
345 };
346 return nir_build_imm(build, 2, 32, v);
347 }
348
349 static inline nir_ssa_def *
nir_imm_vec3(nir_builder * build,float x,float y,float z)350 nir_imm_vec3(nir_builder *build, float x, float y, float z)
351 {
352 nir_const_value v[3] = {
353 nir_const_value_for_float(x, 32),
354 nir_const_value_for_float(y, 32),
355 nir_const_value_for_float(z, 32),
356 };
357 return nir_build_imm(build, 3, 32, v);
358 }
359
360 static inline nir_ssa_def *
nir_imm_vec4(nir_builder * build,float x,float y,float z,float w)361 nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
362 {
363 nir_const_value v[4] = {
364 nir_const_value_for_float(x, 32),
365 nir_const_value_for_float(y, 32),
366 nir_const_value_for_float(z, 32),
367 nir_const_value_for_float(w, 32),
368 };
369
370 return nir_build_imm(build, 4, 32, v);
371 }
372
373 static inline nir_ssa_def *
nir_imm_vec4_16(nir_builder * build,float x,float y,float z,float w)374 nir_imm_vec4_16(nir_builder *build, float x, float y, float z, float w)
375 {
376 nir_const_value v[4] = {
377 nir_const_value_for_float(x, 16),
378 nir_const_value_for_float(y, 16),
379 nir_const_value_for_float(z, 16),
380 nir_const_value_for_float(w, 16),
381 };
382
383 return nir_build_imm(build, 4, 16, v);
384 }
385
386 static inline nir_ssa_def *
nir_imm_intN_t(nir_builder * build,uint64_t x,unsigned bit_size)387 nir_imm_intN_t(nir_builder *build, uint64_t x, unsigned bit_size)
388 {
389 nir_const_value v = nir_const_value_for_raw_uint(x, bit_size);
390 return nir_build_imm(build, 1, bit_size, &v);
391 }
392
393 static inline nir_ssa_def *
nir_imm_int(nir_builder * build,int x)394 nir_imm_int(nir_builder *build, int x)
395 {
396 return nir_imm_intN_t(build, x, 32);
397 }
398
399 static inline nir_ssa_def *
nir_imm_int64(nir_builder * build,int64_t x)400 nir_imm_int64(nir_builder *build, int64_t x)
401 {
402 return nir_imm_intN_t(build, x, 64);
403 }
404
405 static inline nir_ssa_def *
nir_imm_ivec2(nir_builder * build,int x,int y)406 nir_imm_ivec2(nir_builder *build, int x, int y)
407 {
408 nir_const_value v[2] = {
409 nir_const_value_for_int(x, 32),
410 nir_const_value_for_int(y, 32),
411 };
412
413 return nir_build_imm(build, 2, 32, v);
414 }
415
416 static inline nir_ssa_def *
nir_imm_ivec3(nir_builder * build,int x,int y,int z)417 nir_imm_ivec3(nir_builder *build, int x, int y, int z)
418 {
419 nir_const_value v[3] = {
420 nir_const_value_for_int(x, 32),
421 nir_const_value_for_int(y, 32),
422 nir_const_value_for_int(z, 32),
423 };
424
425 return nir_build_imm(build, 3, 32, v);
426 }
427
428 static inline nir_ssa_def *
nir_imm_ivec4(nir_builder * build,int x,int y,int z,int w)429 nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
430 {
431 nir_const_value v[4] = {
432 nir_const_value_for_int(x, 32),
433 nir_const_value_for_int(y, 32),
434 nir_const_value_for_int(z, 32),
435 nir_const_value_for_int(w, 32),
436 };
437
438 return nir_build_imm(build, 4, 32, v);
439 }
440
441 static inline nir_ssa_def *
nir_builder_alu_instr_finish_and_insert(nir_builder * build,nir_alu_instr * instr)442 nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)
443 {
444 const nir_op_info *op_info = &nir_op_infos[instr->op];
445
446 instr->exact = build->exact;
447
448 /* Guess the number of components the destination temporary should have
449 * based on our input sizes, if it's not fixed for the op.
450 */
451 unsigned num_components = op_info->output_size;
452 if (num_components == 0) {
453 for (unsigned i = 0; i < op_info->num_inputs; i++) {
454 if (op_info->input_sizes[i] == 0)
455 num_components = MAX2(num_components,
456 instr->src[i].src.ssa->num_components);
457 }
458 }
459 assert(num_components != 0);
460
461 /* Figure out the bitwidth based on the source bitwidth if the instruction
462 * is variable-width.
463 */
464 unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
465 if (bit_size == 0) {
466 for (unsigned i = 0; i < op_info->num_inputs; i++) {
467 unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
468 if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
469 if (bit_size)
470 assert(src_bit_size == bit_size);
471 else
472 bit_size = src_bit_size;
473 } else {
474 assert(src_bit_size ==
475 nir_alu_type_get_type_size(op_info->input_types[i]));
476 }
477 }
478 }
479
480 /* When in doubt, assume 32. */
481 if (bit_size == 0)
482 bit_size = 32;
483
484 /* Make sure we don't swizzle from outside of our source vector (like if a
485 * scalar value was passed into a multiply with a vector).
486 */
487 for (unsigned i = 0; i < op_info->num_inputs; i++) {
488 for (unsigned j = instr->src[i].src.ssa->num_components;
489 j < NIR_MAX_VEC_COMPONENTS; j++) {
490 instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;
491 }
492 }
493
494 nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
495 bit_size, NULL);
496 instr->dest.write_mask = (1 << num_components) - 1;
497
498 nir_builder_instr_insert(build, &instr->instr);
499
500 return &instr->dest.dest.ssa;
501 }
502
503 static inline nir_ssa_def *
nir_build_alu(nir_builder * build,nir_op op,nir_ssa_def * src0,nir_ssa_def * src1,nir_ssa_def * src2,nir_ssa_def * src3)504 nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
505 nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)
506 {
507 nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
508 if (!instr)
509 return NULL;
510
511 instr->src[0].src = nir_src_for_ssa(src0);
512 if (src1)
513 instr->src[1].src = nir_src_for_ssa(src1);
514 if (src2)
515 instr->src[2].src = nir_src_for_ssa(src2);
516 if (src3)
517 instr->src[3].src = nir_src_for_ssa(src3);
518
519 return nir_builder_alu_instr_finish_and_insert(build, instr);
520 }
521
522 /* for the couple special cases with more than 4 src args: */
523 static inline nir_ssa_def *
nir_build_alu_src_arr(nir_builder * build,nir_op op,nir_ssa_def ** srcs)524 nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs)
525 {
526 const nir_op_info *op_info = &nir_op_infos[op];
527 nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);
528 if (!instr)
529 return NULL;
530
531 for (unsigned i = 0; i < op_info->num_inputs; i++)
532 instr->src[i].src = nir_src_for_ssa(srcs[i]);
533
534 return nir_builder_alu_instr_finish_and_insert(build, instr);
535 }
536
537 /* Generic builder for system values. */
538 static inline nir_ssa_def *
nir_load_system_value(nir_builder * build,nir_intrinsic_op op,int index,unsigned num_components,unsigned bit_size)539 nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
540 unsigned num_components, unsigned bit_size)
541 {
542 nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
543 if (nir_intrinsic_infos[op].dest_components > 0)
544 assert(num_components == nir_intrinsic_infos[op].dest_components);
545 else
546 load->num_components = num_components;
547 load->const_index[0] = index;
548
549 nir_ssa_dest_init(&load->instr, &load->dest,
550 num_components, bit_size, NULL);
551 nir_builder_instr_insert(build, &load->instr);
552 return &load->dest.ssa;
553 }
554
555 #include "nir_builder_opcodes.h"
556 #undef nir_deref_mode_is
557
558 static inline nir_ssa_def *
nir_vec(nir_builder * build,nir_ssa_def ** comp,unsigned num_components)559 nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)
560 {
561 return nir_build_alu_src_arr(build, nir_op_vec(num_components), comp);
562 }
563
564 static inline nir_ssa_def *
nir_mov_alu(nir_builder * build,nir_alu_src src,unsigned num_components)565 nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
566 {
567 assert(!src.abs && !src.negate);
568 if (src.src.is_ssa && src.src.ssa->num_components == num_components) {
569 bool any_swizzles = false;
570 for (unsigned i = 0; i < num_components; i++) {
571 if (src.swizzle[i] != i)
572 any_swizzles = true;
573 }
574 if (!any_swizzles)
575 return src.src.ssa;
576 }
577
578 nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov);
579 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
580 nir_src_bit_size(src.src), NULL);
581 mov->exact = build->exact;
582 mov->dest.write_mask = (1 << num_components) - 1;
583 mov->src[0] = src;
584 nir_builder_instr_insert(build, &mov->instr);
585
586 return &mov->dest.dest.ssa;
587 }
588
589 /**
590 * Construct a mov that reswizzles the source's components.
591 */
592 static inline nir_ssa_def *
nir_swizzle(nir_builder * build,nir_ssa_def * src,const unsigned * swiz,unsigned num_components)593 nir_swizzle(nir_builder *build, nir_ssa_def *src, const unsigned *swiz,
594 unsigned num_components)
595 {
596 assert(num_components <= NIR_MAX_VEC_COMPONENTS);
597 nir_alu_src alu_src = { NIR_SRC_INIT };
598 alu_src.src = nir_src_for_ssa(src);
599
600 bool is_identity_swizzle = true;
601 for (unsigned i = 0; i < num_components && i < NIR_MAX_VEC_COMPONENTS; i++) {
602 if (swiz[i] != i)
603 is_identity_swizzle = false;
604 alu_src.swizzle[i] = swiz[i];
605 }
606
607 if (num_components == src->num_components && is_identity_swizzle)
608 return src;
609
610 return nir_mov_alu(build, alu_src, num_components);
611 }
612
613 /* Selects the right fdot given the number of components in each source. */
614 static inline nir_ssa_def *
nir_fdot(nir_builder * build,nir_ssa_def * src0,nir_ssa_def * src1)615 nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)
616 {
617 assert(src0->num_components == src1->num_components);
618 switch (src0->num_components) {
619 case 1: return nir_fmul(build, src0, src1);
620 case 2: return nir_fdot2(build, src0, src1);
621 case 3: return nir_fdot3(build, src0, src1);
622 case 4: return nir_fdot4(build, src0, src1);
623 case 5: return nir_fdot5(build, src0, src1);
624 case 8: return nir_fdot8(build, src0, src1);
625 case 16: return nir_fdot16(build, src0, src1);
626 default:
627 unreachable("bad component size");
628 }
629
630 return NULL;
631 }
632
633 static inline nir_ssa_def *
nir_ball_iequal(nir_builder * b,nir_ssa_def * src0,nir_ssa_def * src1)634 nir_ball_iequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
635 {
636 switch (src0->num_components) {
637 case 1: return nir_ieq(b, src0, src1);
638 case 2: return nir_ball_iequal2(b, src0, src1);
639 case 3: return nir_ball_iequal3(b, src0, src1);
640 case 4: return nir_ball_iequal4(b, src0, src1);
641 case 5: return nir_ball_iequal5(b, src0, src1);
642 case 8: return nir_ball_iequal8(b, src0, src1);
643 case 16: return nir_ball_iequal16(b, src0, src1);
644 default:
645 unreachable("bad component size");
646 }
647 }
648
649 static inline nir_ssa_def *
nir_ball(nir_builder * b,nir_ssa_def * src)650 nir_ball(nir_builder *b, nir_ssa_def *src)
651 {
652 return nir_ball_iequal(b, src, nir_imm_true(b));
653 }
654
655 static inline nir_ssa_def *
nir_bany_inequal(nir_builder * b,nir_ssa_def * src0,nir_ssa_def * src1)656 nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)
657 {
658 switch (src0->num_components) {
659 case 1: return nir_ine(b, src0, src1);
660 case 2: return nir_bany_inequal2(b, src0, src1);
661 case 3: return nir_bany_inequal3(b, src0, src1);
662 case 4: return nir_bany_inequal4(b, src0, src1);
663 case 5: return nir_bany_inequal5(b, src0, src1);
664 case 8: return nir_bany_inequal8(b, src0, src1);
665 case 16: return nir_bany_inequal16(b, src0, src1);
666 default:
667 unreachable("bad component size");
668 }
669 }
670
671 static inline nir_ssa_def *
nir_bany(nir_builder * b,nir_ssa_def * src)672 nir_bany(nir_builder *b, nir_ssa_def *src)
673 {
674 return nir_bany_inequal(b, src, nir_imm_false(b));
675 }
676
677 static inline nir_ssa_def *
nir_channel(nir_builder * b,nir_ssa_def * def,unsigned c)678 nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)
679 {
680 return nir_swizzle(b, def, &c, 1);
681 }
682
683 static inline nir_ssa_def *
nir_channels(nir_builder * b,nir_ssa_def * def,nir_component_mask_t mask)684 nir_channels(nir_builder *b, nir_ssa_def *def, nir_component_mask_t mask)
685 {
686 unsigned num_channels = 0, swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };
687
688 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
689 if ((mask & (1 << i)) == 0)
690 continue;
691 swizzle[num_channels++] = i;
692 }
693
694 return nir_swizzle(b, def, swizzle, num_channels);
695 }
696
697 static inline nir_ssa_def *
_nir_select_from_array_helper(nir_builder * b,nir_ssa_def ** arr,nir_ssa_def * idx,unsigned start,unsigned end)698 _nir_select_from_array_helper(nir_builder *b, nir_ssa_def **arr,
699 nir_ssa_def *idx,
700 unsigned start, unsigned end)
701 {
702 if (start == end - 1) {
703 return arr[start];
704 } else {
705 unsigned mid = start + (end - start) / 2;
706 return nir_bcsel(b, nir_ilt(b, idx, nir_imm_intN_t(b, mid, idx->bit_size)),
707 _nir_select_from_array_helper(b, arr, idx, start, mid),
708 _nir_select_from_array_helper(b, arr, idx, mid, end));
709 }
710 }
711
712 static inline nir_ssa_def *
nir_select_from_ssa_def_array(nir_builder * b,nir_ssa_def ** arr,unsigned arr_len,nir_ssa_def * idx)713 nir_select_from_ssa_def_array(nir_builder *b, nir_ssa_def **arr,
714 unsigned arr_len, nir_ssa_def *idx)
715 {
716 return _nir_select_from_array_helper(b, arr, idx, 0, arr_len);
717 }
718
719 static inline nir_ssa_def *
nir_vector_extract(nir_builder * b,nir_ssa_def * vec,nir_ssa_def * c)720 nir_vector_extract(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c)
721 {
722 nir_src c_src = nir_src_for_ssa(c);
723 if (nir_src_is_const(c_src)) {
724 uint64_t c_const = nir_src_as_uint(c_src);
725 if (c_const < vec->num_components)
726 return nir_channel(b, vec, c_const);
727 else
728 return nir_ssa_undef(b, 1, vec->bit_size);
729 } else {
730 nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
731 for (unsigned i = 0; i < vec->num_components; i++)
732 comps[i] = nir_channel(b, vec, i);
733 return nir_select_from_ssa_def_array(b, comps, vec->num_components, c);
734 }
735 }
736
737 /** Replaces the component of `vec` specified by `c` with `scalar` */
738 static inline nir_ssa_def *
nir_vector_insert_imm(nir_builder * b,nir_ssa_def * vec,nir_ssa_def * scalar,unsigned c)739 nir_vector_insert_imm(nir_builder *b, nir_ssa_def *vec,
740 nir_ssa_def *scalar, unsigned c)
741 {
742 assert(scalar->num_components == 1);
743 assert(c < vec->num_components);
744
745 nir_op vec_op = nir_op_vec(vec->num_components);
746 nir_alu_instr *vec_instr = nir_alu_instr_create(b->shader, vec_op);
747
748 for (unsigned i = 0; i < vec->num_components; i++) {
749 if (i == c) {
750 vec_instr->src[i].src = nir_src_for_ssa(scalar);
751 vec_instr->src[i].swizzle[0] = 0;
752 } else {
753 vec_instr->src[i].src = nir_src_for_ssa(vec);
754 vec_instr->src[i].swizzle[0] = i;
755 }
756 }
757
758 return nir_builder_alu_instr_finish_and_insert(b, vec_instr);
759 }
760
761 /** Replaces the component of `vec` specified by `c` with `scalar` */
762 static inline nir_ssa_def *
nir_vector_insert(nir_builder * b,nir_ssa_def * vec,nir_ssa_def * scalar,nir_ssa_def * c)763 nir_vector_insert(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *scalar,
764 nir_ssa_def *c)
765 {
766 assert(scalar->num_components == 1);
767 assert(c->num_components == 1);
768
769 nir_src c_src = nir_src_for_ssa(c);
770 if (nir_src_is_const(c_src)) {
771 uint64_t c_const = nir_src_as_uint(c_src);
772 if (c_const < vec->num_components)
773 return nir_vector_insert_imm(b, vec, scalar, c_const);
774 else
775 return vec;
776 } else {
777 nir_const_value per_comp_idx_const[NIR_MAX_VEC_COMPONENTS];
778 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
779 per_comp_idx_const[i] = nir_const_value_for_int(i, c->bit_size);
780 nir_ssa_def *per_comp_idx =
781 nir_build_imm(b, vec->num_components,
782 c->bit_size, per_comp_idx_const);
783
784 /* nir_builder will automatically splat out scalars to vectors so an
785 * insert is as simple as "if I'm the channel, replace me with the
786 * scalar."
787 */
788 return nir_bcsel(b, nir_ieq(b, c, per_comp_idx), scalar, vec);
789 }
790 }
791
792 static inline nir_ssa_def *
nir_i2i(nir_builder * build,nir_ssa_def * x,unsigned dest_bit_size)793 nir_i2i(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)
794 {
795 if (x->bit_size == dest_bit_size)
796 return x;
797
798 switch (dest_bit_size) {
799 case 64: return nir_i2i64(build, x);
800 case 32: return nir_i2i32(build, x);
801 case 16: return nir_i2i16(build, x);
802 case 8: return nir_i2i8(build, x);
803 default: unreachable("Invalid bit size");
804 }
805 }
806
807 static inline nir_ssa_def *
nir_u2u(nir_builder * build,nir_ssa_def * x,unsigned dest_bit_size)808 nir_u2u(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)
809 {
810 if (x->bit_size == dest_bit_size)
811 return x;
812
813 switch (dest_bit_size) {
814 case 64: return nir_u2u64(build, x);
815 case 32: return nir_u2u32(build, x);
816 case 16: return nir_u2u16(build, x);
817 case 8: return nir_u2u8(build, x);
818 default: unreachable("Invalid bit size");
819 }
820 }
821
822 static inline nir_ssa_def *
nir_iadd_imm(nir_builder * build,nir_ssa_def * x,uint64_t y)823 nir_iadd_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
824 {
825 assert(x->bit_size <= 64);
826 y &= BITFIELD64_MASK(x->bit_size);
827
828 if (y == 0) {
829 return x;
830 } else {
831 return nir_iadd(build, x, nir_imm_intN_t(build, y, x->bit_size));
832 }
833 }
834
835 static inline nir_ssa_def *
nir_iadd_imm_nuw(nir_builder * b,nir_ssa_def * x,uint64_t y)836 nir_iadd_imm_nuw(nir_builder *b, nir_ssa_def *x, uint64_t y)
837 {
838 nir_ssa_def *d = nir_iadd_imm(b, x, y);
839 if (d != x && d->parent_instr->type == nir_instr_type_alu)
840 nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true;
841 return d;
842 }
843
844 static inline nir_ssa_def *
nir_iadd_nuw(nir_builder * b,nir_ssa_def * x,nir_ssa_def * y)845 nir_iadd_nuw(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
846 {
847 nir_ssa_def *d = nir_iadd(b, x, y);
848 nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true;
849 return d;
850 }
851
852 static inline nir_ssa_def *
nir_ieq_imm(nir_builder * build,nir_ssa_def * x,uint64_t y)853 nir_ieq_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
854 {
855 return nir_ieq(build, x, nir_imm_intN_t(build, y, x->bit_size));
856 }
857
858 /* Use nir_iadd(x, -y) for reversing parameter ordering */
859 static inline nir_ssa_def *
nir_isub_imm(nir_builder * build,uint64_t y,nir_ssa_def * x)860 nir_isub_imm(nir_builder *build, uint64_t y, nir_ssa_def *x)
861 {
862 return nir_isub(build, nir_imm_intN_t(build, y, x->bit_size), x);
863 }
864
865 static inline nir_ssa_def *
_nir_mul_imm(nir_builder * build,nir_ssa_def * x,uint64_t y,bool amul)866 _nir_mul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y, bool amul)
867 {
868 assert(x->bit_size <= 64);
869 y &= BITFIELD64_MASK(x->bit_size);
870
871 if (y == 0) {
872 return nir_imm_intN_t(build, 0, x->bit_size);
873 } else if (y == 1) {
874 return x;
875 } else if (!build->shader->options->lower_bitops &&
876 util_is_power_of_two_or_zero64(y)) {
877 return nir_ishl(build, x, nir_imm_int(build, ffsll(y) - 1));
878 } else if (amul) {
879 return nir_amul(build, x, nir_imm_intN_t(build, y, x->bit_size));
880 } else {
881 return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));
882 }
883 }
884
885 static inline nir_ssa_def *
nir_imul_imm(nir_builder * build,nir_ssa_def * x,uint64_t y)886 nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
887 {
888 return _nir_mul_imm(build, x, y, false);
889 }
890
891 static inline nir_ssa_def *
nir_amul_imm(nir_builder * build,nir_ssa_def * x,uint64_t y)892 nir_amul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
893 {
894 return _nir_mul_imm(build, x, y, true);
895 }
896
897 static inline nir_ssa_def *
nir_fadd_imm(nir_builder * build,nir_ssa_def * x,double y)898 nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y)
899 {
900 return nir_fadd(build, x, nir_imm_floatN_t(build, y, x->bit_size));
901 }
902
903 static inline nir_ssa_def *
nir_fmul_imm(nir_builder * build,nir_ssa_def * x,double y)904 nir_fmul_imm(nir_builder *build, nir_ssa_def *x, double y)
905 {
906 return nir_fmul(build, x, nir_imm_floatN_t(build, y, x->bit_size));
907 }
908
909 static inline nir_ssa_def *
nir_iand_imm(nir_builder * build,nir_ssa_def * x,uint64_t y)910 nir_iand_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
911 {
912 assert(x->bit_size <= 64);
913 y &= BITFIELD64_MASK(x->bit_size);
914
915 if (y == 0) {
916 return nir_imm_intN_t(build, 0, x->bit_size);
917 } else if (y == BITFIELD64_MASK(x->bit_size)) {
918 return x;
919 } else {
920 return nir_iand(build, x, nir_imm_intN_t(build, y, x->bit_size));
921 }
922 }
923
924 static inline nir_ssa_def *
nir_ishr_imm(nir_builder * build,nir_ssa_def * x,uint32_t y)925 nir_ishr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y)
926 {
927 if (y == 0) {
928 return x;
929 } else {
930 return nir_ishr(build, x, nir_imm_int(build, y));
931 }
932 }
933
934 static inline nir_ssa_def *
nir_ushr_imm(nir_builder * build,nir_ssa_def * x,uint32_t y)935 nir_ushr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y)
936 {
937 if (y == 0) {
938 return x;
939 } else {
940 return nir_ushr(build, x, nir_imm_int(build, y));
941 }
942 }
943
944 static inline nir_ssa_def *
nir_udiv_imm(nir_builder * build,nir_ssa_def * x,uint64_t y)945 nir_udiv_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)
946 {
947 assert(x->bit_size <= 64);
948 y &= BITFIELD64_MASK(x->bit_size);
949
950 if (y == 1) {
951 return x;
952 } else if (util_is_power_of_two_nonzero(y)) {
953 return nir_ushr_imm(build, x, ffsll(y) - 1);
954 } else {
955 return nir_udiv(build, x, nir_imm_intN_t(build, y, x->bit_size));
956 }
957 }
958
959 static inline nir_ssa_def *
nir_fclamp(nir_builder * b,nir_ssa_def * x,nir_ssa_def * min_val,nir_ssa_def * max_val)960 nir_fclamp(nir_builder *b,
961 nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
962 {
963 return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
964 }
965
966 static inline nir_ssa_def *
nir_iclamp(nir_builder * b,nir_ssa_def * x,nir_ssa_def * min_val,nir_ssa_def * max_val)967 nir_iclamp(nir_builder *b,
968 nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
969 {
970 return nir_imin(b, nir_imax(b, x, min_val), max_val);
971 }
972
973 static inline nir_ssa_def *
nir_uclamp(nir_builder * b,nir_ssa_def * x,nir_ssa_def * min_val,nir_ssa_def * max_val)974 nir_uclamp(nir_builder *b,
975 nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
976 {
977 return nir_umin(b, nir_umax(b, x, min_val), max_val);
978 }
979
980 static inline nir_ssa_def *
nir_ffma_imm12(nir_builder * build,nir_ssa_def * src0,double src1,double src2)981 nir_ffma_imm12(nir_builder *build, nir_ssa_def *src0, double src1, double src2)
982 {
983 if (build->shader->options->avoid_ternary_with_two_constants)
984 return nir_fadd_imm(build, nir_fmul_imm(build, src0, src1), src2);
985 else
986 return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size),
987 nir_imm_floatN_t(build, src2, src0->bit_size));
988 }
989
990 static inline nir_ssa_def *
nir_ffma_imm1(nir_builder * build,nir_ssa_def * src0,double src1,nir_ssa_def * src2)991 nir_ffma_imm1(nir_builder *build, nir_ssa_def *src0, double src1, nir_ssa_def *src2)
992 {
993 return nir_ffma(build, src0, nir_imm_floatN_t(build, src1, src0->bit_size), src2);
994 }
995
996 static inline nir_ssa_def *
nir_ffma_imm2(nir_builder * build,nir_ssa_def * src0,nir_ssa_def * src1,double src2)997 nir_ffma_imm2(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1, double src2)
998 {
999 return nir_ffma(build, src0, src1, nir_imm_floatN_t(build, src2, src0->bit_size));
1000 }
1001
1002 static inline nir_ssa_def *
nir_a_minus_bc(nir_builder * build,nir_ssa_def * src0,nir_ssa_def * src1,nir_ssa_def * src2)1003 nir_a_minus_bc(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1,
1004 nir_ssa_def *src2)
1005 {
1006 return nir_ffma(build, nir_fneg(build, src1), src2, src0);
1007 }
1008
1009 static inline nir_ssa_def *
nir_pack_bits(nir_builder * b,nir_ssa_def * src,unsigned dest_bit_size)1010 nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
1011 {
1012 assert(src->num_components * src->bit_size == dest_bit_size);
1013
1014 switch (dest_bit_size) {
1015 case 64:
1016 switch (src->bit_size) {
1017 case 32: return nir_pack_64_2x32(b, src);
1018 case 16: return nir_pack_64_4x16(b, src);
1019 default: break;
1020 }
1021 break;
1022
1023 case 32:
1024 if (src->bit_size == 16)
1025 return nir_pack_32_2x16(b, src);
1026 break;
1027
1028 default:
1029 break;
1030 }
1031
1032 /* If we got here, we have no dedicated unpack opcode. */
1033 nir_ssa_def *dest = nir_imm_intN_t(b, 0, dest_bit_size);
1034 for (unsigned i = 0; i < src->num_components; i++) {
1035 nir_ssa_def *val = nir_u2u(b, nir_channel(b, src, i), dest_bit_size);
1036 val = nir_ishl(b, val, nir_imm_int(b, i * src->bit_size));
1037 dest = nir_ior(b, dest, val);
1038 }
1039 return dest;
1040 }
1041
1042 static inline nir_ssa_def *
nir_unpack_bits(nir_builder * b,nir_ssa_def * src,unsigned dest_bit_size)1043 nir_unpack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
1044 {
1045 assert(src->num_components == 1);
1046 assert(src->bit_size > dest_bit_size);
1047 const unsigned dest_num_components = src->bit_size / dest_bit_size;
1048 assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
1049
1050 switch (src->bit_size) {
1051 case 64:
1052 switch (dest_bit_size) {
1053 case 32: return nir_unpack_64_2x32(b, src);
1054 case 16: return nir_unpack_64_4x16(b, src);
1055 default: break;
1056 }
1057 break;
1058
1059 case 32:
1060 if (dest_bit_size == 16)
1061 return nir_unpack_32_2x16(b, src);
1062 break;
1063
1064 default:
1065 break;
1066 }
1067
1068 /* If we got here, we have no dedicated unpack opcode. */
1069 nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];
1070 for (unsigned i = 0; i < dest_num_components; i++) {
1071 nir_ssa_def *val = nir_ushr_imm(b, src, i * dest_bit_size);
1072 dest_comps[i] = nir_u2u(b, val, dest_bit_size);
1073 }
1074 return nir_vec(b, dest_comps, dest_num_components);
1075 }
1076
1077 /**
1078 * Treats srcs as if it's one big blob of bits and extracts the range of bits
1079 * given by
1080 *
1081 * [first_bit, first_bit + dest_num_components * dest_bit_size)
1082 *
1083 * The range can have any alignment or size as long as it's an integer number
1084 * of destination components and fits inside the concatenated sources.
1085 *
1086 * TODO: The one caveat here is that we can't handle byte alignment if 64-bit
1087 * values are involved because that would require pack/unpack to/from a vec8
1088 * which NIR currently does not support.
1089 */
1090 static inline nir_ssa_def *
nir_extract_bits(nir_builder * b,nir_ssa_def ** srcs,unsigned num_srcs,unsigned first_bit,unsigned dest_num_components,unsigned dest_bit_size)1091 nir_extract_bits(nir_builder *b, nir_ssa_def **srcs, unsigned num_srcs,
1092 unsigned first_bit,
1093 unsigned dest_num_components, unsigned dest_bit_size)
1094 {
1095 const unsigned num_bits = dest_num_components * dest_bit_size;
1096
1097 /* Figure out the common bit size */
1098 unsigned common_bit_size = dest_bit_size;
1099 for (unsigned i = 0; i < num_srcs; i++)
1100 common_bit_size = MIN2(common_bit_size, srcs[i]->bit_size);
1101 if (first_bit > 0)
1102 common_bit_size = MIN2(common_bit_size, (1u << (ffs(first_bit) - 1)));
1103
1104 /* We don't want to have to deal with 1-bit values */
1105 assert(common_bit_size >= 8);
1106
1107 nir_ssa_def *common_comps[NIR_MAX_VEC_COMPONENTS * sizeof(uint64_t)];
1108 assert(num_bits / common_bit_size <= ARRAY_SIZE(common_comps));
1109
1110 /* First, unpack to the common bit size and select the components from the
1111 * source.
1112 */
1113 int src_idx = -1;
1114 unsigned src_start_bit = 0;
1115 unsigned src_end_bit = 0;
1116 for (unsigned i = 0; i < num_bits / common_bit_size; i++) {
1117 const unsigned bit = first_bit + (i * common_bit_size);
1118 while (bit >= src_end_bit) {
1119 src_idx++;
1120 assert(src_idx < (int) num_srcs);
1121 src_start_bit = src_end_bit;
1122 src_end_bit += srcs[src_idx]->bit_size *
1123 srcs[src_idx]->num_components;
1124 }
1125 assert(bit >= src_start_bit);
1126 assert(bit + common_bit_size <= src_end_bit);
1127 const unsigned rel_bit = bit - src_start_bit;
1128 const unsigned src_bit_size = srcs[src_idx]->bit_size;
1129
1130 nir_ssa_def *comp = nir_channel(b, srcs[src_idx],
1131 rel_bit / src_bit_size);
1132 if (srcs[src_idx]->bit_size > common_bit_size) {
1133 nir_ssa_def *unpacked = nir_unpack_bits(b, comp, common_bit_size);
1134 comp = nir_channel(b, unpacked, (rel_bit % src_bit_size) /
1135 common_bit_size);
1136 }
1137 common_comps[i] = comp;
1138 }
1139
1140 /* Now, re-pack the destination if we have to */
1141 if (dest_bit_size > common_bit_size) {
1142 unsigned common_per_dest = dest_bit_size / common_bit_size;
1143 nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];
1144 for (unsigned i = 0; i < dest_num_components; i++) {
1145 nir_ssa_def *unpacked = nir_vec(b, common_comps + i * common_per_dest,
1146 common_per_dest);
1147 dest_comps[i] = nir_pack_bits(b, unpacked, dest_bit_size);
1148 }
1149 return nir_vec(b, dest_comps, dest_num_components);
1150 } else {
1151 assert(dest_bit_size == common_bit_size);
1152 return nir_vec(b, common_comps, dest_num_components);
1153 }
1154 }
1155
1156 static inline nir_ssa_def *
nir_bitcast_vector(nir_builder * b,nir_ssa_def * src,unsigned dest_bit_size)1157 nir_bitcast_vector(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)
1158 {
1159 assert((src->bit_size * src->num_components) % dest_bit_size == 0);
1160 const unsigned dest_num_components =
1161 (src->bit_size * src->num_components) / dest_bit_size;
1162 assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);
1163
1164 return nir_extract_bits(b, &src, 1, 0, dest_num_components, dest_bit_size);
1165 }
1166
1167 /**
1168 * Pad a value to N components with undefs of matching bit size.
1169 * If the value already contains >= num_components, it is returned without change.
1170 */
1171 static inline nir_ssa_def *
nir_pad_vector(nir_builder * b,nir_ssa_def * src,unsigned num_components)1172 nir_pad_vector(nir_builder *b, nir_ssa_def *src, unsigned num_components)
1173 {
1174 assert(src->num_components <= num_components);
1175 if (src->num_components == num_components)
1176 return src;
1177
1178 nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
1179 nir_ssa_def *undef = nir_ssa_undef(b, 1, src->bit_size);
1180 unsigned i = 0;
1181 for (; i < src->num_components; i++)
1182 components[i] = nir_channel(b, src, i);
1183 for (; i < num_components; i++)
1184 components[i] = undef;
1185
1186 return nir_vec(b, components, num_components);
1187 }
1188
1189 /**
1190 * Pad a value to N components with copies of the given immediate of matching
1191 * bit size. If the value already contains >= num_components, it is returned
1192 * without change.
1193 */
1194 static inline nir_ssa_def *
nir_pad_vector_imm_int(nir_builder * b,nir_ssa_def * src,uint64_t imm_val,unsigned num_components)1195 nir_pad_vector_imm_int(nir_builder *b, nir_ssa_def *src, uint64_t imm_val,
1196 unsigned num_components)
1197 {
1198 assert(src->num_components <= num_components);
1199 if (src->num_components == num_components)
1200 return src;
1201
1202 nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
1203 nir_ssa_def *imm = nir_imm_intN_t(b, imm_val, src->bit_size);
1204 unsigned i = 0;
1205 for (; i < src->num_components; i++)
1206 components[i] = nir_channel(b, src, i);
1207 for (; i < num_components; i++)
1208 components[i] = imm;
1209
1210 return nir_vec(b, components, num_components);
1211 }
1212
1213 /**
1214 * Pad a value to 4 components with undefs of matching bit size.
1215 * If the value already contains >= 4 components, it is returned without change.
1216 */
1217 static inline nir_ssa_def *
nir_pad_vec4(nir_builder * b,nir_ssa_def * src)1218 nir_pad_vec4(nir_builder *b, nir_ssa_def *src)
1219 {
1220 return nir_pad_vector(b, src, 4);
1221 }
1222
1223 /**
1224 * Turns a nir_src into a nir_ssa_def * so it can be passed to
1225 * nir_build_alu()-based builder calls.
1226 *
1227 * See nir_ssa_for_alu_src() for alu instructions.
1228 */
1229 static inline nir_ssa_def *
nir_ssa_for_src(nir_builder * build,nir_src src,int num_components)1230 nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)
1231 {
1232 if (src.is_ssa && src.ssa->num_components == num_components)
1233 return src.ssa;
1234
1235 assert((unsigned)num_components <= nir_src_num_components(src));
1236
1237 nir_alu_src alu = { NIR_SRC_INIT };
1238 alu.src = src;
1239 for (int j = 0; j < NIR_MAX_VEC_COMPONENTS; j++)
1240 alu.swizzle[j] = j;
1241
1242 return nir_mov_alu(build, alu, num_components);
1243 }
1244
1245 /**
1246 * Similar to nir_ssa_for_src(), but for alu srcs, respecting the
1247 * nir_alu_src's swizzle.
1248 */
1249 static inline nir_ssa_def *
nir_ssa_for_alu_src(nir_builder * build,nir_alu_instr * instr,unsigned srcn)1250 nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)
1251 {
1252 if (nir_alu_src_is_trivial_ssa(instr, srcn))
1253 return instr->src[srcn].src.ssa;
1254
1255 nir_alu_src *src = &instr->src[srcn];
1256 unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);
1257 return nir_mov_alu(build, *src, num_components);
1258 }
1259
1260 static inline unsigned
nir_get_ptr_bitsize(nir_shader * shader)1261 nir_get_ptr_bitsize(nir_shader *shader)
1262 {
1263 if (shader->info.stage == MESA_SHADER_KERNEL)
1264 return shader->info.cs.ptr_size;
1265 return 32;
1266 }
1267
1268 static inline nir_deref_instr *
nir_build_deref_var(nir_builder * build,nir_variable * var)1269 nir_build_deref_var(nir_builder *build, nir_variable *var)
1270 {
1271 nir_deref_instr *deref =
1272 nir_deref_instr_create(build->shader, nir_deref_type_var);
1273
1274 deref->modes = (nir_variable_mode)var->data.mode;
1275 deref->type = var->type;
1276 deref->var = var;
1277
1278 nir_ssa_dest_init(&deref->instr, &deref->dest, 1,
1279 nir_get_ptr_bitsize(build->shader), NULL);
1280
1281 nir_builder_instr_insert(build, &deref->instr);
1282
1283 return deref;
1284 }
1285
1286 static inline nir_deref_instr *
nir_build_deref_array(nir_builder * build,nir_deref_instr * parent,nir_ssa_def * index)1287 nir_build_deref_array(nir_builder *build, nir_deref_instr *parent,
1288 nir_ssa_def *index)
1289 {
1290 assert(glsl_type_is_array(parent->type) ||
1291 glsl_type_is_matrix(parent->type) ||
1292 glsl_type_is_vector(parent->type));
1293
1294 assert(index->bit_size == parent->dest.ssa.bit_size);
1295
1296 nir_deref_instr *deref =
1297 nir_deref_instr_create(build->shader, nir_deref_type_array);
1298
1299 deref->modes = parent->modes;
1300 deref->type = glsl_get_array_element(parent->type);
1301 deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1302 deref->arr.index = nir_src_for_ssa(index);
1303
1304 nir_ssa_dest_init(&deref->instr, &deref->dest,
1305 parent->dest.ssa.num_components,
1306 parent->dest.ssa.bit_size, NULL);
1307
1308 nir_builder_instr_insert(build, &deref->instr);
1309
1310 return deref;
1311 }
1312
1313 static inline nir_deref_instr *
nir_build_deref_array_imm(nir_builder * build,nir_deref_instr * parent,int64_t index)1314 nir_build_deref_array_imm(nir_builder *build, nir_deref_instr *parent,
1315 int64_t index)
1316 {
1317 assert(parent->dest.is_ssa);
1318 nir_ssa_def *idx_ssa = nir_imm_intN_t(build, index,
1319 parent->dest.ssa.bit_size);
1320
1321 return nir_build_deref_array(build, parent, idx_ssa);
1322 }
1323
1324 static inline nir_deref_instr *
nir_build_deref_ptr_as_array(nir_builder * build,nir_deref_instr * parent,nir_ssa_def * index)1325 nir_build_deref_ptr_as_array(nir_builder *build, nir_deref_instr *parent,
1326 nir_ssa_def *index)
1327 {
1328 assert(parent->deref_type == nir_deref_type_array ||
1329 parent->deref_type == nir_deref_type_ptr_as_array ||
1330 parent->deref_type == nir_deref_type_cast);
1331
1332 assert(index->bit_size == parent->dest.ssa.bit_size);
1333
1334 nir_deref_instr *deref =
1335 nir_deref_instr_create(build->shader, nir_deref_type_ptr_as_array);
1336
1337 deref->modes = parent->modes;
1338 deref->type = parent->type;
1339 deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1340 deref->arr.index = nir_src_for_ssa(index);
1341
1342 nir_ssa_dest_init(&deref->instr, &deref->dest,
1343 parent->dest.ssa.num_components,
1344 parent->dest.ssa.bit_size, NULL);
1345
1346 nir_builder_instr_insert(build, &deref->instr);
1347
1348 return deref;
1349 }
1350
1351 static inline nir_deref_instr *
nir_build_deref_array_wildcard(nir_builder * build,nir_deref_instr * parent)1352 nir_build_deref_array_wildcard(nir_builder *build, nir_deref_instr *parent)
1353 {
1354 assert(glsl_type_is_array(parent->type) ||
1355 glsl_type_is_matrix(parent->type));
1356
1357 nir_deref_instr *deref =
1358 nir_deref_instr_create(build->shader, nir_deref_type_array_wildcard);
1359
1360 deref->modes = parent->modes;
1361 deref->type = glsl_get_array_element(parent->type);
1362 deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1363
1364 nir_ssa_dest_init(&deref->instr, &deref->dest,
1365 parent->dest.ssa.num_components,
1366 parent->dest.ssa.bit_size, NULL);
1367
1368 nir_builder_instr_insert(build, &deref->instr);
1369
1370 return deref;
1371 }
1372
1373 static inline nir_deref_instr *
nir_build_deref_struct(nir_builder * build,nir_deref_instr * parent,unsigned index)1374 nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent,
1375 unsigned index)
1376 {
1377 assert(glsl_type_is_struct_or_ifc(parent->type));
1378
1379 nir_deref_instr *deref =
1380 nir_deref_instr_create(build->shader, nir_deref_type_struct);
1381
1382 deref->modes = parent->modes;
1383 deref->type = glsl_get_struct_field(parent->type, index);
1384 deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1385 deref->strct.index = index;
1386
1387 nir_ssa_dest_init(&deref->instr, &deref->dest,
1388 parent->dest.ssa.num_components,
1389 parent->dest.ssa.bit_size, NULL);
1390
1391 nir_builder_instr_insert(build, &deref->instr);
1392
1393 return deref;
1394 }
1395
1396 static inline nir_deref_instr *
nir_build_deref_cast(nir_builder * build,nir_ssa_def * parent,nir_variable_mode modes,const struct glsl_type * type,unsigned ptr_stride)1397 nir_build_deref_cast(nir_builder *build, nir_ssa_def *parent,
1398 nir_variable_mode modes, const struct glsl_type *type,
1399 unsigned ptr_stride)
1400 {
1401 nir_deref_instr *deref =
1402 nir_deref_instr_create(build->shader, nir_deref_type_cast);
1403
1404 deref->modes = modes;
1405 deref->type = type;
1406 deref->parent = nir_src_for_ssa(parent);
1407 deref->cast.ptr_stride = ptr_stride;
1408
1409 nir_ssa_dest_init(&deref->instr, &deref->dest,
1410 parent->num_components, parent->bit_size, NULL);
1411
1412 nir_builder_instr_insert(build, &deref->instr);
1413
1414 return deref;
1415 }
1416
1417 static inline nir_deref_instr *
nir_alignment_deref_cast(nir_builder * build,nir_deref_instr * parent,uint32_t align_mul,uint32_t align_offset)1418 nir_alignment_deref_cast(nir_builder *build, nir_deref_instr *parent,
1419 uint32_t align_mul, uint32_t align_offset)
1420 {
1421 nir_deref_instr *deref =
1422 nir_deref_instr_create(build->shader, nir_deref_type_cast);
1423
1424 deref->modes = parent->modes;
1425 deref->type = parent->type;
1426 deref->parent = nir_src_for_ssa(&parent->dest.ssa);
1427 deref->cast.ptr_stride = nir_deref_instr_array_stride(deref);
1428 deref->cast.align_mul = align_mul;
1429 deref->cast.align_offset = align_offset;
1430
1431 nir_ssa_dest_init(&deref->instr, &deref->dest,
1432 parent->dest.ssa.num_components,
1433 parent->dest.ssa.bit_size, NULL);
1434
1435 nir_builder_instr_insert(build, &deref->instr);
1436
1437 return deref;
1438 }
1439
1440 /** Returns a deref that follows another but starting from the given parent
1441 *
1442 * The new deref will be the same type and take the same array or struct index
1443 * as the leader deref but it may have a different parent. This is very
1444 * useful for walking deref paths.
1445 */
1446 static inline nir_deref_instr *
nir_build_deref_follower(nir_builder * b,nir_deref_instr * parent,nir_deref_instr * leader)1447 nir_build_deref_follower(nir_builder *b, nir_deref_instr *parent,
1448 nir_deref_instr *leader)
1449 {
1450 /* If the derefs would have the same parent, don't make a new one */
1451 assert(leader->parent.is_ssa);
1452 if (leader->parent.ssa == &parent->dest.ssa)
1453 return leader;
1454
1455 UNUSED nir_deref_instr *leader_parent = nir_src_as_deref(leader->parent);
1456
1457 switch (leader->deref_type) {
1458 case nir_deref_type_var:
1459 unreachable("A var dereference cannot have a parent");
1460 break;
1461
1462 case nir_deref_type_array:
1463 case nir_deref_type_array_wildcard:
1464 assert(glsl_type_is_matrix(parent->type) ||
1465 glsl_type_is_array(parent->type) ||
1466 (leader->deref_type == nir_deref_type_array &&
1467 glsl_type_is_vector(parent->type)));
1468 assert(glsl_get_length(parent->type) ==
1469 glsl_get_length(leader_parent->type));
1470
1471 if (leader->deref_type == nir_deref_type_array) {
1472 assert(leader->arr.index.is_ssa);
1473 nir_ssa_def *index = nir_i2i(b, leader->arr.index.ssa,
1474 parent->dest.ssa.bit_size);
1475 return nir_build_deref_array(b, parent, index);
1476 } else {
1477 return nir_build_deref_array_wildcard(b, parent);
1478 }
1479
1480 case nir_deref_type_struct:
1481 assert(glsl_type_is_struct_or_ifc(parent->type));
1482 assert(glsl_get_length(parent->type) ==
1483 glsl_get_length(leader_parent->type));
1484
1485 return nir_build_deref_struct(b, parent, leader->strct.index);
1486
1487 default:
1488 unreachable("Invalid deref instruction type");
1489 }
1490 }
1491
1492 static inline nir_ssa_def *
nir_load_reg(nir_builder * build,nir_register * reg)1493 nir_load_reg(nir_builder *build, nir_register *reg)
1494 {
1495 return nir_ssa_for_src(build, nir_src_for_reg(reg), reg->num_components);
1496 }
1497
1498 static inline void
nir_store_reg(nir_builder * build,nir_register * reg,nir_ssa_def * def,nir_component_mask_t write_mask)1499 nir_store_reg(nir_builder *build, nir_register *reg,
1500 nir_ssa_def *def, nir_component_mask_t write_mask)
1501 {
1502 assert(reg->num_components == def->num_components);
1503 assert(reg->bit_size == def->bit_size);
1504
1505 nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov);
1506 mov->src[0].src = nir_src_for_ssa(def);
1507 mov->dest.dest = nir_dest_for_reg(reg);
1508 mov->dest.write_mask = write_mask & BITFIELD_MASK(reg->num_components);
1509 nir_builder_instr_insert(build, &mov->instr);
1510 }
1511
1512 static inline nir_ssa_def *
nir_load_deref_with_access(nir_builder * build,nir_deref_instr * deref,enum gl_access_qualifier access)1513 nir_load_deref_with_access(nir_builder *build, nir_deref_instr *deref,
1514 enum gl_access_qualifier access)
1515 {
1516 return nir_build_load_deref(build, glsl_get_vector_elements(deref->type),
1517 glsl_get_bit_size(deref->type), &deref->dest.ssa,
1518 access);
1519 }
1520
1521 #undef nir_load_deref
1522 static inline nir_ssa_def *
nir_load_deref(nir_builder * build,nir_deref_instr * deref)1523 nir_load_deref(nir_builder *build, nir_deref_instr *deref)
1524 {
1525 return nir_load_deref_with_access(build, deref, (enum gl_access_qualifier)0);
1526 }
1527
1528 static inline void
nir_store_deref_with_access(nir_builder * build,nir_deref_instr * deref,nir_ssa_def * value,unsigned writemask,enum gl_access_qualifier access)1529 nir_store_deref_with_access(nir_builder *build, nir_deref_instr *deref,
1530 nir_ssa_def *value, unsigned writemask,
1531 enum gl_access_qualifier access)
1532 {
1533 writemask &= (1u << value->num_components) - 1u;
1534 nir_build_store_deref(build, &deref->dest.ssa, value, writemask, access);
1535 }
1536
1537 #undef nir_store_deref
1538 static inline void
nir_store_deref(nir_builder * build,nir_deref_instr * deref,nir_ssa_def * value,unsigned writemask)1539 nir_store_deref(nir_builder *build, nir_deref_instr *deref,
1540 nir_ssa_def *value, unsigned writemask)
1541 {
1542 nir_store_deref_with_access(build, deref, value, writemask,
1543 (enum gl_access_qualifier)0);
1544 }
1545
1546 static inline void
nir_copy_deref_with_access(nir_builder * build,nir_deref_instr * dest,nir_deref_instr * src,enum gl_access_qualifier dest_access,enum gl_access_qualifier src_access)1547 nir_copy_deref_with_access(nir_builder *build, nir_deref_instr *dest,
1548 nir_deref_instr *src,
1549 enum gl_access_qualifier dest_access,
1550 enum gl_access_qualifier src_access)
1551 {
1552 nir_build_copy_deref(build, &dest->dest.ssa, &src->dest.ssa, dest_access, src_access);
1553 }
1554
1555 #undef nir_copy_deref
1556 static inline void
nir_copy_deref(nir_builder * build,nir_deref_instr * dest,nir_deref_instr * src)1557 nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src)
1558 {
1559 nir_copy_deref_with_access(build, dest, src,
1560 (enum gl_access_qualifier) 0,
1561 (enum gl_access_qualifier) 0);
1562 }
1563
1564 static inline void
nir_memcpy_deref_with_access(nir_builder * build,nir_deref_instr * dest,nir_deref_instr * src,nir_ssa_def * size,enum gl_access_qualifier dest_access,enum gl_access_qualifier src_access)1565 nir_memcpy_deref_with_access(nir_builder *build, nir_deref_instr *dest,
1566 nir_deref_instr *src, nir_ssa_def *size,
1567 enum gl_access_qualifier dest_access,
1568 enum gl_access_qualifier src_access)
1569 {
1570 nir_build_memcpy_deref(build, &dest->dest.ssa, &src->dest.ssa,
1571 size, dest_access, src_access);
1572 }
1573
1574 #undef nir_memcpy_deref
1575 static inline void
nir_memcpy_deref(nir_builder * build,nir_deref_instr * dest,nir_deref_instr * src,nir_ssa_def * size)1576 nir_memcpy_deref(nir_builder *build, nir_deref_instr *dest,
1577 nir_deref_instr *src, nir_ssa_def *size)
1578 {
1579 nir_memcpy_deref_with_access(build, dest, src, size,
1580 (enum gl_access_qualifier)0,
1581 (enum gl_access_qualifier)0);
1582 }
1583
1584 static inline nir_ssa_def *
nir_load_var(nir_builder * build,nir_variable * var)1585 nir_load_var(nir_builder *build, nir_variable *var)
1586 {
1587 return nir_load_deref(build, nir_build_deref_var(build, var));
1588 }
1589
1590 static inline void
nir_store_var(nir_builder * build,nir_variable * var,nir_ssa_def * value,unsigned writemask)1591 nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,
1592 unsigned writemask)
1593 {
1594 nir_store_deref(build, nir_build_deref_var(build, var), value, writemask);
1595 }
1596
1597 static inline void
nir_copy_var(nir_builder * build,nir_variable * dest,nir_variable * src)1598 nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)
1599 {
1600 nir_copy_deref(build, nir_build_deref_var(build, dest),
1601 nir_build_deref_var(build, src));
1602 }
1603
1604 #undef nir_load_global
1605 static inline nir_ssa_def *
nir_load_global(nir_builder * build,nir_ssa_def * addr,unsigned align,unsigned num_components,unsigned bit_size)1606 nir_load_global(nir_builder *build, nir_ssa_def *addr, unsigned align,
1607 unsigned num_components, unsigned bit_size)
1608 {
1609 nir_intrinsic_instr *load =
1610 nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global);
1611 load->num_components = num_components;
1612 load->src[0] = nir_src_for_ssa(addr);
1613 nir_intrinsic_set_align(load, align, 0);
1614 nir_ssa_dest_init(&load->instr, &load->dest,
1615 num_components, bit_size, NULL);
1616 nir_builder_instr_insert(build, &load->instr);
1617 return &load->dest.ssa;
1618 }
1619
1620 #undef nir_store_global
1621 static inline void
nir_store_global(nir_builder * build,nir_ssa_def * addr,unsigned align,nir_ssa_def * value,nir_component_mask_t write_mask)1622 nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align,
1623 nir_ssa_def *value, nir_component_mask_t write_mask)
1624 {
1625 nir_intrinsic_instr *store =
1626 nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_global);
1627 store->num_components = value->num_components;
1628 store->src[0] = nir_src_for_ssa(value);
1629 store->src[1] = nir_src_for_ssa(addr);
1630 nir_intrinsic_set_write_mask(store,
1631 write_mask & BITFIELD_MASK(value->num_components));
1632 nir_intrinsic_set_align(store, align, 0);
1633 nir_builder_instr_insert(build, &store->instr);
1634 }
1635
1636 #undef nir_load_global_constant
1637 static inline nir_ssa_def *
nir_load_global_constant(nir_builder * build,nir_ssa_def * addr,unsigned align,unsigned num_components,unsigned bit_size)1638 nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align,
1639 unsigned num_components, unsigned bit_size)
1640 {
1641 nir_intrinsic_instr *load =
1642 nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant);
1643 load->num_components = num_components;
1644 load->src[0] = nir_src_for_ssa(addr);
1645 nir_intrinsic_set_align(load, align, 0);
1646 nir_ssa_dest_init(&load->instr, &load->dest,
1647 num_components, bit_size, NULL);
1648 nir_builder_instr_insert(build, &load->instr);
1649 return &load->dest.ssa;
1650 }
1651
1652 #undef nir_load_param
1653 static inline nir_ssa_def *
nir_load_param(nir_builder * build,uint32_t param_idx)1654 nir_load_param(nir_builder *build, uint32_t param_idx)
1655 {
1656 assert(param_idx < build->impl->function->num_params);
1657 nir_parameter *param = &build->impl->function->params[param_idx];
1658 return nir_build_load_param(build, param->num_components, param->bit_size, param_idx);
1659 }
1660
1661 /**
1662 * This function takes an I/O intrinsic like load/store_input,
1663 * and emits a sequence that calculates the full offset of that instruction,
1664 * including a stride to the base and component offsets.
1665 */
1666 static inline nir_ssa_def *
nir_build_calc_io_offset(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * base_stride,unsigned component_stride)1667 nir_build_calc_io_offset(nir_builder *b,
1668 nir_intrinsic_instr *intrin,
1669 nir_ssa_def *base_stride,
1670 unsigned component_stride)
1671 {
1672 /* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */
1673 nir_ssa_def *base_op = nir_imul_imm(b, base_stride, nir_intrinsic_base(intrin));
1674
1675 /* offset should be interpreted in relation to the base,
1676 * so the instruction effectively reads/writes another input/output
1677 * when it has an offset
1678 */
1679 nir_ssa_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1));
1680
1681 /* component is in bytes */
1682 unsigned const_op = nir_intrinsic_component(intrin) * component_stride;
1683
1684 return nir_iadd_imm_nuw(b, nir_iadd_nuw(b, base_op, offset_op), const_op);
1685 }
1686
1687 /* calculate a `(1 << value) - 1` in ssa without overflows */
1688 static inline nir_ssa_def *
nir_mask(nir_builder * b,nir_ssa_def * bits,unsigned dst_bit_size)1689 nir_mask(nir_builder *b, nir_ssa_def *bits, unsigned dst_bit_size)
1690 {
1691 return nir_ushr(b, nir_imm_intN_t(b, -1, dst_bit_size),
1692 nir_isub_imm(b, dst_bit_size, nir_u2u32(b, bits)));
1693 }
1694
1695 static inline nir_ssa_def *
nir_f2b(nir_builder * build,nir_ssa_def * f)1696 nir_f2b(nir_builder *build, nir_ssa_def *f)
1697 {
1698 return nir_f2b1(build, f);
1699 }
1700
1701 static inline nir_ssa_def *
nir_i2b(nir_builder * build,nir_ssa_def * i)1702 nir_i2b(nir_builder *build, nir_ssa_def *i)
1703 {
1704 return nir_i2b1(build, i);
1705 }
1706
1707 static inline nir_ssa_def *
nir_b2f(nir_builder * build,nir_ssa_def * b,uint32_t bit_size)1708 nir_b2f(nir_builder *build, nir_ssa_def *b, uint32_t bit_size)
1709 {
1710 switch (bit_size) {
1711 case 64: return nir_b2f64(build, b);
1712 case 32: return nir_b2f32(build, b);
1713 case 16: return nir_b2f16(build, b);
1714 default:
1715 unreachable("Invalid bit-size");
1716 };
1717 }
1718
1719 static inline nir_ssa_def *
nir_b2i(nir_builder * build,nir_ssa_def * b,uint32_t bit_size)1720 nir_b2i(nir_builder *build, nir_ssa_def *b, uint32_t bit_size)
1721 {
1722 switch (bit_size) {
1723 case 64: return nir_b2i64(build, b);
1724 case 32: return nir_b2i32(build, b);
1725 case 16: return nir_b2i16(build, b);
1726 case 8: return nir_b2i8(build, b);
1727 default:
1728 unreachable("Invalid bit-size");
1729 };
1730 }
1731 static inline nir_ssa_def *
nir_load_barycentric(nir_builder * build,nir_intrinsic_op op,unsigned interp_mode)1732 nir_load_barycentric(nir_builder *build, nir_intrinsic_op op,
1733 unsigned interp_mode)
1734 {
1735 unsigned num_components = op == nir_intrinsic_load_barycentric_model ? 3 : 2;
1736 nir_intrinsic_instr *bary = nir_intrinsic_instr_create(build->shader, op);
1737 nir_ssa_dest_init(&bary->instr, &bary->dest, num_components, 32, NULL);
1738 nir_intrinsic_set_interp_mode(bary, interp_mode);
1739 nir_builder_instr_insert(build, &bary->instr);
1740 return &bary->dest.ssa;
1741 }
1742
1743 static inline void
nir_jump(nir_builder * build,nir_jump_type jump_type)1744 nir_jump(nir_builder *build, nir_jump_type jump_type)
1745 {
1746 assert(jump_type != nir_jump_goto && jump_type != nir_jump_goto_if);
1747 nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type);
1748 nir_builder_instr_insert(build, &jump->instr);
1749 }
1750
1751 static inline void
nir_goto(nir_builder * build,struct nir_block * target)1752 nir_goto(nir_builder *build, struct nir_block *target)
1753 {
1754 assert(!build->impl->structured);
1755 nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto);
1756 jump->target = target;
1757 nir_builder_instr_insert(build, &jump->instr);
1758 }
1759
1760 static inline void
nir_goto_if(nir_builder * build,struct nir_block * target,nir_src cond,struct nir_block * else_target)1761 nir_goto_if(nir_builder *build, struct nir_block *target, nir_src cond,
1762 struct nir_block *else_target)
1763 {
1764 assert(!build->impl->structured);
1765 nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto_if);
1766 jump->condition = cond;
1767 jump->target = target;
1768 jump->else_target = else_target;
1769 nir_builder_instr_insert(build, &jump->instr);
1770 }
1771
1772 static inline nir_ssa_def *
nir_compare_func(nir_builder * b,enum compare_func func,nir_ssa_def * src0,nir_ssa_def * src1)1773 nir_compare_func(nir_builder *b, enum compare_func func,
1774 nir_ssa_def *src0, nir_ssa_def *src1)
1775 {
1776 switch (func) {
1777 case COMPARE_FUNC_NEVER:
1778 return nir_imm_int(b, 0);
1779 case COMPARE_FUNC_ALWAYS:
1780 return nir_imm_int(b, ~0);
1781 case COMPARE_FUNC_EQUAL:
1782 return nir_feq(b, src0, src1);
1783 case COMPARE_FUNC_NOTEQUAL:
1784 return nir_fneu(b, src0, src1);
1785 case COMPARE_FUNC_GREATER:
1786 return nir_flt(b, src1, src0);
1787 case COMPARE_FUNC_GEQUAL:
1788 return nir_fge(b, src0, src1);
1789 case COMPARE_FUNC_LESS:
1790 return nir_flt(b, src0, src1);
1791 case COMPARE_FUNC_LEQUAL:
1792 return nir_fge(b, src1, src0);
1793 }
1794 unreachable("bad compare func");
1795 }
1796
1797 static inline void
nir_scoped_memory_barrier(nir_builder * b,nir_scope scope,nir_memory_semantics semantics,nir_variable_mode modes)1798 nir_scoped_memory_barrier(nir_builder *b,
1799 nir_scope scope,
1800 nir_memory_semantics semantics,
1801 nir_variable_mode modes)
1802 {
1803 nir_scoped_barrier(b, NIR_SCOPE_NONE, scope, semantics, modes);
1804 }
1805
1806 static inline nir_ssa_def *
nir_type_convert(nir_builder * b,nir_ssa_def * src,nir_alu_type src_type,nir_alu_type dest_type)1807 nir_type_convert(nir_builder *b,
1808 nir_ssa_def *src,
1809 nir_alu_type src_type,
1810 nir_alu_type dest_type)
1811 {
1812 assert(nir_alu_type_get_type_size(src_type) == 0 ||
1813 nir_alu_type_get_type_size(src_type) == src->bit_size);
1814
1815 src_type = (nir_alu_type) (src_type | src->bit_size);
1816
1817 nir_op opcode =
1818 nir_type_conversion_op(src_type, dest_type, nir_rounding_mode_undef);
1819
1820 return nir_build_alu(b, opcode, src, NULL, NULL, NULL);
1821 }
1822
1823 static inline nir_ssa_def *
nir_convert_to_bit_size(nir_builder * b,nir_ssa_def * src,nir_alu_type type,unsigned bit_size)1824 nir_convert_to_bit_size(nir_builder *b,
1825 nir_ssa_def *src,
1826 nir_alu_type type,
1827 unsigned bit_size)
1828 {
1829 return nir_type_convert(b, src, type, (nir_alu_type) (type | bit_size));
1830 }
1831
1832 static inline nir_ssa_def *
nir_i2iN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1833 nir_i2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1834 {
1835 return nir_convert_to_bit_size(b, src, nir_type_int, bit_size);
1836 }
1837
1838 static inline nir_ssa_def *
nir_u2uN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1839 nir_u2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1840 {
1841 return nir_convert_to_bit_size(b, src, nir_type_uint, bit_size);
1842 }
1843
1844 static inline nir_ssa_def *
nir_b2bN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1845 nir_b2bN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1846 {
1847 return nir_convert_to_bit_size(b, src, nir_type_bool, bit_size);
1848 }
1849
1850 static inline nir_ssa_def *
nir_f2fN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1851 nir_f2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1852 {
1853 return nir_convert_to_bit_size(b, src, nir_type_float, bit_size);
1854 }
1855
1856 static inline nir_ssa_def *
nir_i2fN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1857 nir_i2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1858 {
1859 return nir_type_convert(b, src, nir_type_int,
1860 (nir_alu_type) (nir_type_float | bit_size));
1861 }
1862
1863 static inline nir_ssa_def *
nir_u2fN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1864 nir_u2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1865 {
1866 return nir_type_convert(b, src, nir_type_uint,
1867 (nir_alu_type) (nir_type_float | bit_size));
1868 }
1869
1870 static inline nir_ssa_def *
nir_f2uN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1871 nir_f2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1872 {
1873 return nir_type_convert(b, src, nir_type_float,
1874 (nir_alu_type) (nir_type_uint | bit_size));
1875 }
1876
1877 static inline nir_ssa_def *
nir_f2iN(nir_builder * b,nir_ssa_def * src,unsigned bit_size)1878 nir_f2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)
1879 {
1880 return nir_type_convert(b, src, nir_type_float,
1881 (nir_alu_type) (nir_type_int | bit_size));
1882 }
1883
1884 #endif /* NIR_BUILDER_H */
1885