1 /*
2 * Copyright © 2017-2018 Rob Clark <robclark@freedesktop.org>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <robclark@freedesktop.org>
7 */
8
9 #define GPU 600
10
11 #include "ir3_context.h"
12 #include "ir3_image.h"
13
14 /*
15 * Handlers for instructions changed/added in a6xx:
16 *
17 * Starting with a6xx, isam and stbi is used for SSBOs as well; stbi and the
18 * atomic instructions (used for both SSBO and image) use a new instruction
19 * encoding compared to a4xx/a5xx.
20 */
21
22 static void
lower_ssbo_offset(struct ir3_context * ctx,nir_intrinsic_instr * intr,nir_src * offset_src,struct ir3_instruction ** offset,unsigned * imm_offset)23 lower_ssbo_offset(struct ir3_context *ctx, nir_intrinsic_instr *intr,
24 nir_src *offset_src,
25 struct ir3_instruction **offset, unsigned *imm_offset)
26 {
27 if (ctx->compiler->has_ssbo_imm_offsets) {
28 ir3_lower_imm_offset(ctx, intr, offset_src, 7, offset, imm_offset);
29 } else {
30 assert(nir_intrinsic_base(intr) == 0);
31 *offset = ir3_get_src(ctx, offset_src)[0];
32 *imm_offset = 0;
33 }
34 }
35
36 /* src[] = { buffer_index, offset }. No const_index */
37 static void
emit_intrinsic_load_ssbo(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)38 emit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
39 struct ir3_instruction **dst)
40 {
41 struct ir3_builder *b = &ctx->build;
42 struct ir3_instruction *offset;
43 struct ir3_instruction *ldib;
44 unsigned imm_offset_val;
45
46 lower_ssbo_offset(ctx, intr, &intr->src[2], &offset, &imm_offset_val);
47 struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
48
49 ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0,
50 imm_offset, 0);
51 ldib->dsts[0]->wrmask = MASK(intr->num_components);
52 ldib->cat6.iim_val = intr->num_components;
53 ldib->cat6.d = 1;
54 switch (intr->def.bit_size) {
55 case 8:
56 /* This encodes the 8-bit SSBO load and matches blob's encoding of
57 * imageBuffer access using VK_FORMAT_R8 and the dedicated 8-bit
58 * descriptor. No vectorization is possible.
59 */
60 assert(intr->num_components == 1);
61
62 ldib->cat6.type = TYPE_U16;
63 ldib->cat6.typed = true;
64 break;
65 case 16:
66 ldib->cat6.type = TYPE_U16;
67 break;
68 default:
69 ldib->cat6.type = TYPE_U32;
70 break;
71 }
72 ldib->barrier_class = IR3_BARRIER_BUFFER_R;
73 ldib->barrier_conflict = IR3_BARRIER_BUFFER_W;
74
75 if (imm_offset_val) {
76 assert(ctx->compiler->has_ssbo_imm_offsets);
77 ldib->flags |= IR3_INSTR_IMM_OFFSET;
78 }
79
80 ir3_handle_bindless_cat6(ldib, intr->src[0]);
81 ir3_handle_nonuniform(ldib, intr);
82
83 ir3_split_dest(b, dst, ldib, 0, intr->num_components);
84 }
85
86 /* src[] = { value, block_index, offset }. const_index[] = { write_mask } */
87 static void
emit_intrinsic_store_ssbo(struct ir3_context * ctx,nir_intrinsic_instr * intr)88 emit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
89 {
90 struct ir3_builder *b = &ctx->build;
91 struct ir3_instruction *stib, *val, *offset;
92 unsigned wrmask = nir_intrinsic_write_mask(intr);
93 unsigned ncomp = ffs(~wrmask) - 1;
94 unsigned imm_offset_val;
95
96 assert(wrmask == BITFIELD_MASK(intr->num_components));
97
98 /* src0 is offset, src1 is immediate offset, src2 is value:
99 */
100 val = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp);
101
102 /* Any 8-bit store will be done on a single-component value that additionally
103 * has to be masked to clear up the higher bits or it will malfunction.
104 */
105 if (intr->src[0].ssa->bit_size == 8) {
106 assert(ncomp == 1);
107
108 struct ir3_instruction *mask = create_immed_typed(b, 0xff, TYPE_U8);
109 val = ir3_AND_B(b, val, 0, mask, 0);
110 val->dsts[0]->flags |= IR3_REG_HALF;
111 }
112
113 lower_ssbo_offset(ctx, intr, &intr->src[3], &offset, &imm_offset_val);
114 struct ir3_instruction *imm_offset = create_immed(b, imm_offset_val);
115
116 stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0,
117 imm_offset, 0, val, 0);
118 stib->cat6.iim_val = ncomp;
119 stib->cat6.d = 1;
120 switch (intr->src[0].ssa->bit_size) {
121 case 8:
122 /* As with ldib, this encodes the 8-bit SSBO store and matches blob's
123 * encoding of imageBuffer access using VK_FORMAT_R8 and the extra 8-bit
124 * descriptor. No vectorization is possible and we have to override the
125 * relevant field anyway.
126 */
127 stib->cat6.type = TYPE_U16;
128 stib->cat6.iim_val = 4;
129 stib->cat6.typed = true;
130 break;
131 case 16:
132 stib->cat6.type = TYPE_U16;
133 break;
134 default:
135 stib->cat6.type = TYPE_U32;
136 break;
137 }
138 stib->barrier_class = IR3_BARRIER_BUFFER_W;
139 stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
140
141 if (imm_offset_val) {
142 assert(ctx->compiler->has_ssbo_imm_offsets);
143 stib->flags |= IR3_INSTR_IMM_OFFSET;
144 }
145
146 ir3_handle_bindless_cat6(stib, intr->src[1]);
147 ir3_handle_nonuniform(stib, intr);
148
149 array_insert(ctx->block, ctx->block->keeps, stib);
150 }
151
152 static struct ir3_instruction *
emit_atomic(struct ir3_builder * b,nir_atomic_op op,struct ir3_instruction * ibo,struct ir3_instruction * src0,struct ir3_instruction * src1)153 emit_atomic(struct ir3_builder *b, nir_atomic_op op,
154 struct ir3_instruction *ibo, struct ir3_instruction *src0,
155 struct ir3_instruction *src1)
156 {
157 switch (op) {
158 case nir_atomic_op_iadd:
159 return ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0);
160 case nir_atomic_op_imin:
161 return ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
162 case nir_atomic_op_umin:
163 return ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0);
164 case nir_atomic_op_imax:
165 return ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
166 case nir_atomic_op_umax:
167 return ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0);
168 case nir_atomic_op_iand:
169 return ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0);
170 case nir_atomic_op_ior:
171 return ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0);
172 case nir_atomic_op_ixor:
173 return ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0);
174 case nir_atomic_op_xchg:
175 return ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0);
176 case nir_atomic_op_cmpxchg:
177 return ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0);
178 default:
179 unreachable("boo");
180 }
181 }
182
183 /*
184 * SSBO atomic intrinsics
185 *
186 * All of the SSBO atomic memory operations read a value from memory,
187 * compute a new value using one of the operations below, write the new
188 * value to memory, and return the original value read.
189 *
190 * All operations take 3 sources except CompSwap that takes 4. These
191 * sources represent:
192 *
193 * 0: The SSBO buffer index.
194 * 1: The offset into the SSBO buffer of the variable that the atomic
195 * operation will operate on.
196 * 2: The data parameter to the atomic function (i.e. the value to add
197 * in, etc).
198 * 3: For CompSwap only: the second data parameter.
199 */
200 static struct ir3_instruction *
emit_intrinsic_atomic_ssbo(struct ir3_context * ctx,nir_intrinsic_instr * intr)201 emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
202 {
203 struct ir3_builder *b = &ctx->build;
204 struct ir3_instruction *atomic, *ibo, *src0, *src1, *data, *dummy;
205 nir_atomic_op op = nir_intrinsic_atomic_op(intr);
206 type_t type = nir_atomic_op_type(op) == nir_type_int ? TYPE_S32 : TYPE_U32;
207 if (intr->def.bit_size == 64) {
208 type = TYPE_ATOMIC_U64;
209 }
210
211 ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]);
212
213 data = ir3_get_src(ctx, &intr->src[2])[0];
214
215 /* So this gets a bit creative:
216 *
217 * src0 - vecN offset/coords
218 * src1.x - is actually destination register
219 * src1.y - is 'data' except for cmpxchg where src2.y is 'compare'
220 * src1.z - is 'data' for cmpxchg
221 *
222 * The combining src and dest kinda doesn't work out so well with how
223 * scheduling and RA work. So we create a dummy src2 which is tied to the
224 * destination in RA (i.e. must be allocated to the same vec2/vec3
225 * register) and then immediately extract the first component.
226 *
227 * Note that nir already multiplies the offset by four
228 */
229 dummy = create_immed(b, 0);
230
231 if (op == nir_atomic_op_cmpxchg) {
232 src0 = ir3_get_src(ctx, &intr->src[4])[0];
233 struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[3])[0];
234 if (intr->def.bit_size == 64) {
235 struct ir3_instruction *dummy2 = create_immed(b, 0);
236 struct ir3_instruction *compare2 = ir3_get_src(ctx, &intr->src[3])[1];
237 struct ir3_instruction *data2 = ir3_get_src(ctx, &intr->src[2])[1];
238 src1 = ir3_collect(b, dummy, dummy2, compare, compare2, data, data2);
239 } else {
240 src1 = ir3_collect(b, dummy, compare, data);
241 }
242 } else {
243 src0 = ir3_get_src(ctx, &intr->src[3])[0];
244 if (intr->def.bit_size == 64) {
245 struct ir3_instruction *dummy2 = create_immed(b, 0);
246 struct ir3_instruction *data2 = ir3_get_src(ctx, &intr->src[2])[1];
247 src1 = ir3_collect(b, dummy, dummy2, data, data2);
248 } else {
249 src1 = ir3_collect(b, dummy, data);
250 }
251 }
252
253 atomic = emit_atomic(b, op, ibo, src0, src1);
254 atomic->cat6.iim_val = 1;
255 atomic->cat6.d = 1;
256 atomic->cat6.type = type;
257 atomic->barrier_class = IR3_BARRIER_BUFFER_W;
258 atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
259 ir3_handle_bindless_cat6(atomic, intr->src[0]);
260
261 /* even if nothing consume the result, we can't DCE the instruction: */
262 array_insert(ctx->block, ctx->block->keeps, atomic);
263
264 atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
265 ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
266 ir3_handle_nonuniform(atomic, intr);
267
268 size_t num_results = intr->def.bit_size == 64 ? 2 : 1;
269 struct ir3_instruction *defs[num_results];
270 ir3_split_dest(b, defs, atomic, 0, num_results);
271 return ir3_create_collect(b, defs, num_results);
272 }
273
274 /* src[] = { deref, coord, sample_index }. const_index[] = {} */
275 static void
emit_intrinsic_load_image(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)276 emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
277 struct ir3_instruction **dst)
278 {
279 struct ir3_builder *b = &ctx->build;
280 struct ir3_instruction *ldib;
281 struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
282 unsigned ncoords = ir3_get_image_coords(intr, NULL);
283
284 ldib = ir3_LDIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
285 ir3_create_collect(b, coords, ncoords), 0,
286 create_immed(b, 0), 0);
287 ldib->dsts[0]->wrmask = MASK(intr->num_components);
288 ldib->cat6.iim_val = intr->num_components;
289 ldib->cat6.d = ncoords;
290 ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
291 ldib->cat6.typed = true;
292 ldib->barrier_class = IR3_BARRIER_IMAGE_R;
293 ldib->barrier_conflict = IR3_BARRIER_IMAGE_W;
294 ir3_handle_bindless_cat6(ldib, intr->src[0]);
295 ir3_handle_nonuniform(ldib, intr);
296
297 ir3_split_dest(b, dst, ldib, 0, intr->num_components);
298 }
299
300 /* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
301 static void
emit_intrinsic_store_image(struct ir3_context * ctx,nir_intrinsic_instr * intr)302 emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
303 {
304 struct ir3_builder *b = &ctx->build;
305 struct ir3_instruction *stib;
306 struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[3]);
307 struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
308 unsigned ncoords = ir3_get_image_coords(intr, NULL);
309 enum pipe_format format = nir_intrinsic_format(intr);
310 unsigned ncomp = ir3_get_num_components_for_image_format(format);
311
312 /* src0 is offset, src1 is value:
313 */
314 stib =
315 ir3_STIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0,
316 ir3_create_collect(b, coords, ncoords), 0, create_immed(b, 0), 0,
317 ir3_create_collect(b, value, ncomp), 0);
318 stib->cat6.iim_val = ncomp;
319 stib->cat6.d = ncoords;
320 stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
321 stib->cat6.typed = true;
322 stib->barrier_class = IR3_BARRIER_IMAGE_W;
323 stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
324 ir3_handle_bindless_cat6(stib, intr->src[0]);
325 ir3_handle_nonuniform(stib, intr);
326
327 array_insert(ctx->block, ctx->block->keeps, stib);
328 }
329
330 /* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */
331 static struct ir3_instruction *
emit_intrinsic_atomic_image(struct ir3_context * ctx,nir_intrinsic_instr * intr)332 emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
333 {
334 struct ir3_builder *b = &ctx->build;
335 struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy;
336 struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]);
337 struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0];
338 unsigned ncoords = ir3_get_image_coords(intr, NULL);
339 nir_atomic_op op = nir_intrinsic_atomic_op(intr);
340
341 ibo = ir3_image_to_ibo(ctx, intr->src[0]);
342
343 /* So this gets a bit creative:
344 *
345 * src0 - vecN offset/coords
346 * src1.x - is actually destination register
347 * src1.y - is 'value' except for cmpxchg where src2.y is 'compare'
348 * src1.z - is 'value' for cmpxchg
349 *
350 * The combining src and dest kinda doesn't work out so well with how
351 * scheduling and RA work. So we create a dummy src2 which is tied to the
352 * destination in RA (i.e. must be allocated to the same vec2/vec3
353 * register) and then immediately extract the first component.
354 */
355 dummy = create_immed(b, 0);
356 src0 = ir3_create_collect(b, coords, ncoords);
357
358 if (op == nir_atomic_op_cmpxchg) {
359 struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0];
360 src1 = ir3_collect(b, dummy, compare, value);
361 } else {
362 src1 = ir3_collect(b, dummy, value);
363 }
364
365 atomic = emit_atomic(b, op, ibo, src0, src1);
366 atomic->cat6.iim_val = 1;
367 atomic->cat6.d = ncoords;
368 atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
369 atomic->cat6.typed = true;
370 atomic->barrier_class = IR3_BARRIER_IMAGE_W;
371 atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
372 ir3_handle_bindless_cat6(atomic, intr->src[0]);
373
374 /* even if nothing consume the result, we can't DCE the instruction: */
375 array_insert(ctx->block, ctx->block->keeps, atomic);
376
377 atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask;
378 ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]);
379 ir3_handle_nonuniform(atomic, intr);
380 struct ir3_instruction *split;
381 ir3_split_dest(b, &split, atomic, 0, 1);
382 return split;
383 }
384
385 static void
emit_intrinsic_image_size(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)386 emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
387 struct ir3_instruction **dst)
388 {
389 struct ir3_builder *b = &ctx->build;
390 struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]);
391 struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0);
392 resinfo->cat6.iim_val = 1;
393 resinfo->cat6.d = intr->num_components;
394 resinfo->cat6.type = TYPE_U32;
395 resinfo->cat6.typed = false;
396 /* resinfo has no writemask and always writes out 3 components: */
397 compile_assert(ctx, intr->num_components <= 3);
398 resinfo->dsts[0]->wrmask = MASK(3);
399 ir3_handle_bindless_cat6(resinfo, intr->src[0]);
400 ir3_handle_nonuniform(resinfo, intr);
401
402 ir3_split_dest(b, dst, resinfo, 0, intr->num_components);
403 }
404
405 static void
emit_intrinsic_load_global_ir3(struct ir3_context * ctx,nir_intrinsic_instr * intr,struct ir3_instruction ** dst)406 emit_intrinsic_load_global_ir3(struct ir3_context *ctx,
407 nir_intrinsic_instr *intr,
408 struct ir3_instruction **dst)
409 {
410 struct ir3_builder *b = &ctx->build;
411 unsigned dest_components = nir_intrinsic_dest_components(intr);
412 struct ir3_instruction *addr, *offset;
413
414 addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0],
415 ir3_get_src(ctx, &intr->src[0])[1]);
416
417 struct ir3_instruction *load;
418
419 bool const_offset_in_bounds =
420 nir_src_is_const(intr->src[1]) &&
421 nir_src_as_int(intr->src[1]) < (1 << 8) &&
422 nir_src_as_int(intr->src[1]) > -(1 << 8);
423
424 if (const_offset_in_bounds) {
425 load = ir3_LDG(b, addr, 0,
426 create_immed(b, nir_src_as_int(intr->src[1]) * 4),
427 0, create_immed(b, dest_components), 0);
428 } else {
429 unsigned shift = ctx->compiler->gen >= 7 ? 2 : 0;
430 offset = ir3_get_src(ctx, &intr->src[1])[0];
431 if (shift) {
432 /* A7XX TODO: Move to NIR for it to be properly optimized? */
433 offset = ir3_SHL_B(b, offset, 0, create_immed(b, shift), 0);
434 }
435 load =
436 ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
437 create_immed(b, 0), 0, create_immed(b, dest_components), 0);
438 }
439
440 load->cat6.type = type_uint_size(intr->def.bit_size);
441 load->dsts[0]->wrmask = MASK(dest_components);
442
443 load->barrier_class = IR3_BARRIER_BUFFER_R;
444 load->barrier_conflict = IR3_BARRIER_BUFFER_W;
445
446 ir3_split_dest(b, dst, load, 0, dest_components);
447 }
448
449 static void
emit_intrinsic_store_global_ir3(struct ir3_context * ctx,nir_intrinsic_instr * intr)450 emit_intrinsic_store_global_ir3(struct ir3_context *ctx,
451 nir_intrinsic_instr *intr)
452 {
453 struct ir3_builder *b = &ctx->build;
454 struct ir3_instruction *value, *addr, *offset;
455 unsigned ncomp = nir_intrinsic_src_components(intr, 0);
456
457 addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0],
458 ir3_get_src(ctx, &intr->src[1])[1]);
459
460 value = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp);
461
462 struct ir3_instruction *stg;
463
464 bool const_offset_in_bounds = nir_src_is_const(intr->src[2]) &&
465 nir_src_as_int(intr->src[2]) < (1 << 10) &&
466 nir_src_as_int(intr->src[2]) > -(1 << 10);
467
468 if (const_offset_in_bounds) {
469 stg = ir3_STG(b, addr, 0,
470 create_immed(b, nir_src_as_int(intr->src[2]) * 4), 0,
471 value, 0,
472 create_immed(b, ncomp), 0);
473 } else {
474 offset = ir3_get_src(ctx, &intr->src[2])[0];
475 if (ctx->compiler->gen >= 7) {
476 /* A7XX TODO: Move to NIR for it to be properly optimized? */
477 offset = ir3_SHL_B(b, offset, 0, create_immed(b, 2), 0);
478 }
479 stg =
480 ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0,
481 create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0);
482 }
483
484 stg->cat6.type = type_uint_size(intr->src[0].ssa->bit_size);
485 stg->cat6.iim_val = 1;
486
487 array_insert(ctx->block, ctx->block->keeps, stg);
488
489 stg->barrier_class = IR3_BARRIER_BUFFER_W;
490 stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
491 }
492
493 static struct ir3_instruction *
emit_intrinsic_atomic_global(struct ir3_context * ctx,nir_intrinsic_instr * intr)494 emit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr)
495 {
496 struct ir3_builder *b = &ctx->build;
497 struct ir3_instruction *addr, *atomic, *src1;
498 struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[1])[0];
499 nir_atomic_op op = nir_intrinsic_atomic_op(intr);
500 type_t type = nir_atomic_op_type(op) == nir_type_int ? TYPE_S32 : TYPE_U32;
501 if (intr->def.bit_size == 64) {
502 type = TYPE_ATOMIC_U64;
503 }
504
505 addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0],
506 ir3_get_src(ctx, &intr->src[0])[1]);
507
508 if (op == nir_atomic_op_cmpxchg) {
509 struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[2])[0];
510 src1 = ir3_collect(b, compare, value);
511 if (intr->def.bit_size == 64) {
512 struct ir3_instruction *compare2 = ir3_get_src(ctx, &intr->src[2])[1];
513 struct ir3_instruction *value2 = ir3_get_src(ctx, &intr->src[1])[1];
514 src1 = ir3_collect(b, compare, compare2, value, value2);
515 } else {
516 src1 = ir3_collect(b, compare, value);
517 }
518 } else {
519 if (intr->def.bit_size == 64) {
520 struct ir3_instruction *value2 = ir3_get_src(ctx, &intr->src[1])[1];
521 src1 = ir3_collect(b, value, value2);
522 } else {
523 src1 = value;
524 }
525 }
526
527 switch (op) {
528 case nir_atomic_op_iadd:
529 atomic = ir3_ATOMIC_G_ADD(b, addr, 0, src1, 0);
530 break;
531 case nir_atomic_op_imin:
532 atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
533 type = TYPE_S32;
534 break;
535 case nir_atomic_op_umin:
536 atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0);
537 break;
538 case nir_atomic_op_imax:
539 atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
540 type = TYPE_S32;
541 break;
542 case nir_atomic_op_umax:
543 atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0);
544 break;
545 case nir_atomic_op_iand:
546 atomic = ir3_ATOMIC_G_AND(b, addr, 0, src1, 0);
547 break;
548 case nir_atomic_op_ior:
549 atomic = ir3_ATOMIC_G_OR(b, addr, 0, src1, 0);
550 break;
551 case nir_atomic_op_ixor:
552 atomic = ir3_ATOMIC_G_XOR(b, addr, 0, src1, 0);
553 break;
554 case nir_atomic_op_xchg:
555 atomic = ir3_ATOMIC_G_XCHG(b, addr, 0, src1, 0);
556 break;
557 case nir_atomic_op_cmpxchg:
558 atomic = ir3_ATOMIC_G_CMPXCHG(b, addr, 0, src1, 0);
559 break;
560 default:
561 unreachable("Unknown global atomic op");
562 }
563
564 atomic->cat6.iim_val = 1;
565 atomic->cat6.d = 1;
566 atomic->cat6.type = type;
567 atomic->barrier_class = IR3_BARRIER_BUFFER_W;
568 atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
569 atomic->dsts[0]->wrmask = MASK(intr->def.bit_size == 64 ? 2 : 1);
570
571 /* even if nothing consume the result, we can't DCE the instruction: */
572 array_insert(ctx->block, ctx->block->keeps, atomic);
573
574 return atomic;
575 }
576
577 const struct ir3_context_funcs ir3_a6xx_funcs = {
578 .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo,
579 .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo,
580 .emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo,
581 .emit_intrinsic_load_image = emit_intrinsic_load_image,
582 .emit_intrinsic_store_image = emit_intrinsic_store_image,
583 .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image,
584 .emit_intrinsic_image_size = emit_intrinsic_image_size,
585 .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3,
586 .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3,
587 .emit_intrinsic_atomic_global = emit_intrinsic_atomic_global,
588 };
589