1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "isl/isl.h"
25
26 #include "brw_nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/nir/nir_format_convert.h"
29
30 static nir_ssa_def *
_load_image_param(nir_builder * b,nir_deref_instr * deref,unsigned offset)31 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
32 {
33 nir_intrinsic_instr *load =
34 nir_intrinsic_instr_create(b->shader,
35 nir_intrinsic_image_deref_load_param_intel);
36 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
37 nir_intrinsic_set_base(load, offset / 4);
38
39 switch (offset) {
40 case BRW_IMAGE_PARAM_OFFSET_OFFSET:
41 case BRW_IMAGE_PARAM_SWIZZLING_OFFSET:
42 load->num_components = 2;
43 break;
44 case BRW_IMAGE_PARAM_TILING_OFFSET:
45 case BRW_IMAGE_PARAM_SIZE_OFFSET:
46 load->num_components = 3;
47 break;
48 case BRW_IMAGE_PARAM_STRIDE_OFFSET:
49 load->num_components = 4;
50 break;
51 default:
52 unreachable("Invalid param offset");
53 }
54 nir_ssa_dest_init(&load->instr, &load->dest,
55 load->num_components, 32, NULL);
56
57 nir_builder_instr_insert(b, &load->instr);
58 return &load->dest.ssa;
59 }
60
61 #define load_image_param(b, d, o) \
62 _load_image_param(b, d, BRW_IMAGE_PARAM_##o##_OFFSET)
63
64 static nir_ssa_def *
image_coord_is_in_bounds(nir_builder * b,nir_deref_instr * deref,nir_ssa_def * coord)65 image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref,
66 nir_ssa_def *coord)
67 {
68 nir_ssa_def *size = load_image_param(b, deref, SIZE);
69 nir_ssa_def *cmp = nir_ilt(b, coord, size);
70
71 unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
72 nir_ssa_def *in_bounds = nir_imm_true(b);
73 for (unsigned i = 0; i < coord_comps; i++)
74 in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i));
75
76 return in_bounds;
77 }
78
79 /** Calculate the offset in memory of the texel given by \p coord.
80 *
81 * This is meant to be used with untyped surface messages to access a tiled
82 * surface, what involves taking into account the tiling and swizzling modes
83 * of the surface manually so it will hopefully not happen very often.
84 *
85 * The tiling algorithm implemented here matches either the X or Y tiling
86 * layouts supported by the hardware depending on the tiling coefficients
87 * passed to the program as uniforms. See Volume 1 Part 2 Section 4.5
88 * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
89 * the hardware tiling format.
90 */
91 static nir_ssa_def *
image_address(nir_builder * b,const struct intel_device_info * devinfo,nir_deref_instr * deref,nir_ssa_def * coord)92 image_address(nir_builder *b, const struct intel_device_info *devinfo,
93 nir_deref_instr *deref, nir_ssa_def *coord)
94 {
95 if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D &&
96 glsl_sampler_type_is_array(deref->type)) {
97 /* It's easier if 1D arrays are treated like 2D arrays */
98 coord = nir_vec3(b, nir_channel(b, coord, 0),
99 nir_imm_int(b, 0),
100 nir_channel(b, coord, 1));
101 } else {
102 unsigned dims = glsl_get_sampler_coordinate_components(deref->type);
103 coord = nir_channels(b, coord, (1 << dims) - 1);
104 }
105
106 nir_ssa_def *offset = load_image_param(b, deref, OFFSET);
107 nir_ssa_def *tiling = load_image_param(b, deref, TILING);
108 nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
109
110 /* Shift the coordinates by the fixed surface offset. It may be non-zero
111 * if the image is a single slice of a higher-dimensional surface, or if a
112 * non-zero mipmap level of the surface is bound to the pipeline. The
113 * offset needs to be applied here rather than at surface state set-up time
114 * because the desired slice-level may start mid-tile, so simply shifting
115 * the surface base address wouldn't give a well-formed tiled surface in
116 * the general case.
117 */
118 nir_ssa_def *xypos = (coord->num_components == 1) ?
119 nir_vec2(b, coord, nir_imm_int(b, 0)) :
120 nir_channels(b, coord, 0x3);
121 xypos = nir_iadd(b, xypos, offset);
122
123 /* The layout of 3-D textures in memory is sort-of like a tiling
124 * format. At each miplevel, the slices are arranged in rows of
125 * 2^level slices per row. The slice row is stored in tmp.y and
126 * the slice within the row is stored in tmp.x.
127 *
128 * The layout of 2-D array textures and cubemaps is much simpler:
129 * Depending on whether the ARYSPC_LOD0 layout is in use it will be
130 * stored in memory as an array of slices, each one being a 2-D
131 * arrangement of miplevels, or as a 2D arrangement of miplevels,
132 * each one being an array of slices. In either case the separation
133 * between slices of the same LOD is equal to the qpitch value
134 * provided as stride.w.
135 *
136 * This code can be made to handle either 2D arrays and 3D textures
137 * by passing in the miplevel as tile.z for 3-D textures and 0 in
138 * tile.z for 2-D array textures.
139 *
140 * See Volume 1 Part 1 of the Gfx7 PRM, sections 6.18.4.7 "Surface
141 * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
142 * of the hardware 3D texture and 2D array layouts.
143 */
144 if (coord->num_components > 2) {
145 /* Decompose z into a major (tmp.y) and a minor (tmp.x)
146 * index.
147 */
148 nir_ssa_def *z = nir_channel(b, coord, 2);
149 nir_ssa_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0),
150 nir_channel(b, tiling, 2));
151 nir_ssa_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2));
152
153 /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
154 * slice offset.
155 */
156 xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y),
157 nir_channels(b, stride, 0xc)));
158 }
159
160 nir_ssa_def *addr;
161 if (coord->num_components > 1) {
162 /* Calculate the major/minor x and y indices. In order to
163 * accommodate both X and Y tiling, the Y-major tiling format is
164 * treated as being a bunch of narrow X-tiles placed next to each
165 * other. This means that the tile width for Y-tiling is actually
166 * the width of one sub-column of the Y-major tile where each 4K
167 * tile has 8 512B sub-columns.
168 *
169 * The major Y value is the row of tiles in which the pixel lives.
170 * The major X value is the tile sub-column in which the pixel
171 * lives; for X tiling, this is the same as the tile column, for Y
172 * tiling, each tile has 8 sub-columns. The minor X and Y indices
173 * are the position within the sub-column.
174 */
175
176 /* Calculate the minor x and y indices. */
177 nir_ssa_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0),
178 nir_channels(b, tiling, 0x3));
179 nir_ssa_def *major = nir_ushr(b, xypos, nir_channels(b, tiling, 0x3));
180
181 /* Calculate the texel index from the start of the tile row and the
182 * vertical coordinate of the row.
183 * Equivalent to:
184 * tmp.x = (major.x << tile.y << tile.x) +
185 * (minor.y << tile.x) + minor.x
186 * tmp.y = major.y << tile.y
187 */
188 nir_ssa_def *idx_x, *idx_y;
189 idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1));
190 idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1));
191 idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0));
192 idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0));
193 idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1));
194
195 /* Add it to the start of the tile row. */
196 nir_ssa_def *idx;
197 idx = nir_imul(b, idx_y, nir_channel(b, stride, 1));
198 idx = nir_iadd(b, idx, idx_x);
199
200 /* Multiply by the Bpp value. */
201 addr = nir_imul(b, idx, nir_channel(b, stride, 0));
202
203 if (devinfo->ver < 8 && !devinfo->is_baytrail) {
204 /* Take into account the two dynamically specified shifts. Both are
205 * used to implement swizzling of X-tiled surfaces. For Y-tiled
206 * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
207 * address, so a swz value of 0xff (actually interpreted as 31 by the
208 * hardware) will be provided to cause the relevant bit of tmp.y to
209 * be zero and turn the first XOR into the identity. For linear
210 * surfaces or platforms lacking address swizzling both shifts will
211 * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
212 * what effectively disables swizzling.
213 */
214 nir_ssa_def *swizzle = load_image_param(b, deref, SWIZZLING);
215 nir_ssa_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0));
216 nir_ssa_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1));
217
218 /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
219 nir_ssa_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1),
220 nir_imm_int(b, 1 << 6));
221 addr = nir_ixor(b, addr, bit);
222 }
223 } else {
224 /* Multiply by the Bpp/stride value. Note that the addr.y may be
225 * non-zero even if the image is one-dimensional because a vertical
226 * offset may have been applied above to select a non-zero slice or
227 * level of a higher-dimensional texture.
228 */
229 nir_ssa_def *idx;
230 idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1));
231 idx = nir_iadd(b, nir_channel(b, xypos, 0), idx);
232 addr = nir_imul(b, idx, nir_channel(b, stride, 0));
233 }
234
235 return addr;
236 }
237
238 struct format_info {
239 const struct isl_format_layout *fmtl;
240 unsigned chans;
241 unsigned bits[4];
242 };
243
244 static struct format_info
get_format_info(enum isl_format fmt)245 get_format_info(enum isl_format fmt)
246 {
247 const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
248
249 return (struct format_info) {
250 .fmtl = fmtl,
251 .chans = isl_format_get_num_channels(fmt),
252 .bits = {
253 fmtl->channels.r.bits,
254 fmtl->channels.g.bits,
255 fmtl->channels.b.bits,
256 fmtl->channels.a.bits
257 },
258 };
259 }
260
261 static nir_ssa_def *
convert_color_for_load(nir_builder * b,const struct intel_device_info * devinfo,nir_ssa_def * color,enum isl_format image_fmt,enum isl_format lower_fmt,unsigned dest_components)262 convert_color_for_load(nir_builder *b, const struct intel_device_info *devinfo,
263 nir_ssa_def *color,
264 enum isl_format image_fmt, enum isl_format lower_fmt,
265 unsigned dest_components)
266 {
267 if (image_fmt == lower_fmt)
268 goto expand_vec;
269
270 if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
271 assert(lower_fmt == ISL_FORMAT_R32_UINT);
272 color = nir_format_unpack_11f11f10f(b, color);
273 goto expand_vec;
274 }
275
276 struct format_info image = get_format_info(image_fmt);
277 struct format_info lower = get_format_info(lower_fmt);
278
279 const bool needs_sign_extension =
280 isl_format_has_snorm_channel(image_fmt) ||
281 isl_format_has_sint_channel(image_fmt);
282
283 /* We only check the red channel to detect if we need to pack/unpack */
284 assert(image.bits[0] != lower.bits[0] ||
285 memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0);
286
287 if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
288 if (needs_sign_extension)
289 color = nir_format_unpack_sint(b, color, image.bits, image.chans);
290 else
291 color = nir_format_unpack_uint(b, color, image.bits, image.chans);
292 } else {
293 /* All these formats are homogeneous */
294 for (unsigned i = 1; i < image.chans; i++)
295 assert(image.bits[i] == image.bits[0]);
296
297 /* On IVB, we rely on the undocumented behavior that typed reads from
298 * surfaces of the unsupported R8 and R16 formats return useful data in
299 * their least significant bits. However, the data in the high bits is
300 * garbage so we have to discard it.
301 */
302 if (devinfo->verx10 == 70 &&
303 (lower_fmt == ISL_FORMAT_R16_UINT ||
304 lower_fmt == ISL_FORMAT_R8_UINT))
305 color = nir_format_mask_uvec(b, color, lower.bits);
306
307 if (image.bits[0] != lower.bits[0]) {
308 color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0],
309 image.bits[0]);
310 }
311
312 if (needs_sign_extension)
313 color = nir_format_sign_extend_ivec(b, color, image.bits);
314 }
315
316 switch (image.fmtl->channels.r.type) {
317 case ISL_UNORM:
318 assert(isl_format_has_uint_channel(lower_fmt));
319 color = nir_format_unorm_to_float(b, color, image.bits);
320 break;
321
322 case ISL_SNORM:
323 assert(isl_format_has_uint_channel(lower_fmt));
324 color = nir_format_snorm_to_float(b, color, image.bits);
325 break;
326
327 case ISL_SFLOAT:
328 if (image.bits[0] == 16)
329 color = nir_unpack_half_2x16_split_x(b, color);
330 break;
331
332 case ISL_UINT:
333 case ISL_SINT:
334 break;
335
336 default:
337 unreachable("Invalid image channel type");
338 }
339
340 expand_vec:
341 assert(dest_components == 1 || dest_components == 4);
342 assert(color->num_components <= dest_components);
343 if (color->num_components == dest_components)
344 return color;
345
346 nir_ssa_def *comps[4];
347 for (unsigned i = 0; i < color->num_components; i++)
348 comps[i] = nir_channel(b, color, i);
349
350 for (unsigned i = color->num_components; i < 3; i++)
351 comps[i] = nir_imm_int(b, 0);
352
353 if (color->num_components < 4) {
354 if (isl_format_has_int_channel(image_fmt))
355 comps[3] = nir_imm_int(b, 1);
356 else
357 comps[3] = nir_imm_float(b, 1);
358 }
359
360 return nir_vec(b, comps, dest_components);
361 }
362
363 static bool
lower_image_load_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)364 lower_image_load_instr(nir_builder *b,
365 const struct intel_device_info *devinfo,
366 nir_intrinsic_instr *intrin)
367 {
368 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
369 nir_variable *var = nir_deref_instr_get_variable(deref);
370
371 if (var->data.image.format == PIPE_FORMAT_NONE)
372 return false;
373
374 const enum isl_format image_fmt =
375 isl_format_for_pipe_format(var->data.image.format);
376
377 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
378 const enum isl_format lower_fmt =
379 isl_lower_storage_image_format(devinfo, image_fmt);
380 const unsigned dest_components = intrin->num_components;
381
382 /* Use an undef to hold the uses of the load while we do the color
383 * conversion.
384 */
385 nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
386 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder);
387
388 intrin->num_components = isl_format_get_num_channels(lower_fmt);
389 intrin->dest.ssa.num_components = intrin->num_components;
390
391 b->cursor = nir_after_instr(&intrin->instr);
392
393 nir_ssa_def *color = convert_color_for_load(b, devinfo,
394 &intrin->dest.ssa,
395 image_fmt, lower_fmt,
396 dest_components);
397
398 nir_ssa_def_rewrite_uses(placeholder, color);
399 nir_instr_remove(placeholder->parent_instr);
400 } else {
401 const struct isl_format_layout *image_fmtl =
402 isl_format_get_layout(image_fmt);
403 /* We have a matching typed format for everything 32b and below */
404 assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
405 enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
406 ISL_FORMAT_R32G32_UINT :
407 ISL_FORMAT_R32G32B32A32_UINT;
408 const unsigned dest_components = intrin->num_components;
409
410 b->cursor = nir_instr_remove(&intrin->instr);
411
412 nir_ssa_def *coord = intrin->src[1].ssa;
413
414 nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
415 if (devinfo->verx10 == 70) {
416 /* Check whether the first stride component (i.e. the Bpp value)
417 * is greater than four, what on Gfx7 indicates that a surface of
418 * type RAW has been bound for untyped access. Reading or writing
419 * to a surface of type other than RAW using untyped surface
420 * messages causes a hang on IVB and VLV.
421 */
422 nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
423 nir_ssa_def *is_raw =
424 nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
425 do_load = nir_iand(b, do_load, is_raw);
426 }
427 nir_push_if(b, do_load);
428
429 nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
430 nir_ssa_def *load =
431 nir_image_deref_load_raw_intel(b, image_fmtl->bpb / 32, 32,
432 &deref->dest.ssa, addr);
433
434 nir_push_else(b, NULL);
435
436 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32);
437
438 nir_pop_if(b, NULL);
439
440 nir_ssa_def *value = nir_if_phi(b, load, zero);
441
442 nir_ssa_def *color = convert_color_for_load(b, devinfo, value,
443 image_fmt, raw_fmt,
444 dest_components);
445
446 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, color);
447 }
448
449 return true;
450 }
451
452 static nir_ssa_def *
convert_color_for_store(nir_builder * b,const struct intel_device_info * devinfo,nir_ssa_def * color,enum isl_format image_fmt,enum isl_format lower_fmt)453 convert_color_for_store(nir_builder *b, const struct intel_device_info *devinfo,
454 nir_ssa_def *color,
455 enum isl_format image_fmt, enum isl_format lower_fmt)
456 {
457 struct format_info image = get_format_info(image_fmt);
458 struct format_info lower = get_format_info(lower_fmt);
459
460 color = nir_channels(b, color, (1 << image.chans) - 1);
461
462 if (image_fmt == lower_fmt)
463 return color;
464
465 if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
466 assert(lower_fmt == ISL_FORMAT_R32_UINT);
467 return nir_format_pack_11f11f10f(b, color);
468 }
469
470 switch (image.fmtl->channels.r.type) {
471 case ISL_UNORM:
472 assert(isl_format_has_uint_channel(lower_fmt));
473 color = nir_format_float_to_unorm(b, color, image.bits);
474 break;
475
476 case ISL_SNORM:
477 assert(isl_format_has_uint_channel(lower_fmt));
478 color = nir_format_float_to_snorm(b, color, image.bits);
479 break;
480
481 case ISL_SFLOAT:
482 if (image.bits[0] == 16)
483 color = nir_format_float_to_half(b, color);
484 break;
485
486 case ISL_UINT:
487 color = nir_format_clamp_uint(b, color, image.bits);
488 break;
489
490 case ISL_SINT:
491 color = nir_format_clamp_sint(b, color, image.bits);
492 break;
493
494 default:
495 unreachable("Invalid image channel type");
496 }
497
498 if (image.bits[0] < 32 &&
499 (isl_format_has_snorm_channel(image_fmt) ||
500 isl_format_has_sint_channel(image_fmt)))
501 color = nir_format_mask_uvec(b, color, image.bits);
502
503 if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
504 color = nir_format_pack_uint(b, color, image.bits, image.chans);
505 } else {
506 /* All these formats are homogeneous */
507 for (unsigned i = 1; i < image.chans; i++)
508 assert(image.bits[i] == image.bits[0]);
509
510 if (image.bits[0] != lower.bits[0]) {
511 color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0],
512 lower.bits[0]);
513 }
514 }
515
516 return color;
517 }
518
519 static bool
lower_image_store_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)520 lower_image_store_instr(nir_builder *b,
521 const struct intel_device_info *devinfo,
522 nir_intrinsic_instr *intrin)
523 {
524 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
525 nir_variable *var = nir_deref_instr_get_variable(deref);
526
527 /* For write-only surfaces, we trust that the hardware can just do the
528 * conversion for us.
529 */
530 if (var->data.access & ACCESS_NON_READABLE)
531 return false;
532
533 if (var->data.image.format == PIPE_FORMAT_NONE)
534 return false;
535
536 const enum isl_format image_fmt =
537 isl_format_for_pipe_format(var->data.image.format);
538
539 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
540 const enum isl_format lower_fmt =
541 isl_lower_storage_image_format(devinfo, image_fmt);
542
543 /* Color conversion goes before the store */
544 b->cursor = nir_before_instr(&intrin->instr);
545
546 nir_ssa_def *color = convert_color_for_store(b, devinfo,
547 intrin->src[3].ssa,
548 image_fmt, lower_fmt);
549 intrin->num_components = isl_format_get_num_channels(lower_fmt);
550 nir_instr_rewrite_src(&intrin->instr, &intrin->src[3],
551 nir_src_for_ssa(color));
552 } else {
553 const struct isl_format_layout *image_fmtl =
554 isl_format_get_layout(image_fmt);
555 /* We have a matching typed format for everything 32b and below */
556 assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
557 enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
558 ISL_FORMAT_R32G32_UINT :
559 ISL_FORMAT_R32G32B32A32_UINT;
560
561 b->cursor = nir_instr_remove(&intrin->instr);
562
563 nir_ssa_def *coord = intrin->src[1].ssa;
564
565 nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
566 if (devinfo->verx10 == 70) {
567 /* Check whether the first stride component (i.e. the Bpp value)
568 * is greater than four, what on Gfx7 indicates that a surface of
569 * type RAW has been bound for untyped access. Reading or writing
570 * to a surface of type other than RAW using untyped surface
571 * messages causes a hang on IVB and VLV.
572 */
573 nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
574 nir_ssa_def *is_raw =
575 nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
576 do_store = nir_iand(b, do_store, is_raw);
577 }
578 nir_push_if(b, do_store);
579
580 nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
581 nir_ssa_def *color = convert_color_for_store(b, devinfo,
582 intrin->src[3].ssa,
583 image_fmt, raw_fmt);
584
585 nir_intrinsic_instr *store =
586 nir_intrinsic_instr_create(b->shader,
587 nir_intrinsic_image_deref_store_raw_intel);
588 store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
589 store->src[1] = nir_src_for_ssa(addr);
590 store->src[2] = nir_src_for_ssa(color);
591 store->num_components = image_fmtl->bpb / 32;
592 nir_builder_instr_insert(b, &store->instr);
593
594 nir_pop_if(b, NULL);
595 }
596
597 return true;
598 }
599
600 static bool
lower_image_atomic_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)601 lower_image_atomic_instr(nir_builder *b,
602 const struct intel_device_info *devinfo,
603 nir_intrinsic_instr *intrin)
604 {
605 if (devinfo->verx10 >= 75)
606 return false;
607
608 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
609
610 b->cursor = nir_instr_remove(&intrin->instr);
611
612 /* Use an undef to hold the uses of the load conversion. */
613 nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
614 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, placeholder);
615
616 /* Check the first component of the size field to find out if the
617 * image is bound. Necessary on IVB for typed atomics because
618 * they don't seem to respect null surfaces and will happily
619 * corrupt or read random memory when no image is bound.
620 */
621 nir_ssa_def *size = load_image_param(b, deref, SIZE);
622 nir_ssa_def *zero = nir_imm_int(b, 0);
623 nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero));
624
625 nir_builder_instr_insert(b, &intrin->instr);
626
627 nir_pop_if(b, NULL);
628
629 nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero);
630 nir_ssa_def_rewrite_uses(placeholder, result);
631
632 return true;
633 }
634
635 static bool
lower_image_size_instr(nir_builder * b,const struct intel_device_info * devinfo,nir_intrinsic_instr * intrin)636 lower_image_size_instr(nir_builder *b,
637 const struct intel_device_info *devinfo,
638 nir_intrinsic_instr *intrin)
639 {
640 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
641 nir_variable *var = nir_deref_instr_get_variable(deref);
642
643 /* For write-only images, we have an actual image surface so we fall back
644 * and let the back-end emit a TXS for this.
645 */
646 if (var->data.access & ACCESS_NON_READABLE)
647 return false;
648
649 if (var->data.image.format == PIPE_FORMAT_NONE)
650 return false;
651
652 /* If we have a matching typed format, then we have an actual image surface
653 * so we fall back and let the back-end emit a TXS for this.
654 */
655 const enum isl_format image_fmt =
656 isl_format_for_pipe_format(var->data.image.format);
657 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
658 return false;
659
660 assert(nir_src_as_uint(intrin->src[1]) == 0);
661
662 b->cursor = nir_instr_remove(&intrin->instr);
663
664 nir_ssa_def *size = load_image_param(b, deref, SIZE);
665
666 nir_ssa_def *comps[4] = { NULL, NULL, NULL, NULL };
667
668 assert(nir_intrinsic_image_dim(intrin) != GLSL_SAMPLER_DIM_CUBE);
669 unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
670 for (unsigned c = 0; c < coord_comps; c++)
671 comps[c] = nir_channel(b, size, c);
672
673 for (unsigned c = coord_comps; c < intrin->dest.ssa.num_components; ++c)
674 comps[c] = nir_imm_int(b, 1);
675
676 nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
677 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, vec);
678
679 return true;
680 }
681
682 static bool
brw_nir_lower_storage_image_instr(nir_builder * b,nir_instr * instr,void * cb_data)683 brw_nir_lower_storage_image_instr(nir_builder *b,
684 nir_instr *instr,
685 void *cb_data)
686 {
687 if (instr->type != nir_instr_type_intrinsic)
688 return false;
689 const struct intel_device_info *devinfo = cb_data;
690
691 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
692 switch (intrin->intrinsic) {
693 case nir_intrinsic_image_deref_load:
694 return lower_image_load_instr(b, devinfo, intrin);
695
696 case nir_intrinsic_image_deref_store:
697 return lower_image_store_instr(b, devinfo, intrin);
698
699 case nir_intrinsic_image_deref_atomic_add:
700 case nir_intrinsic_image_deref_atomic_imin:
701 case nir_intrinsic_image_deref_atomic_umin:
702 case nir_intrinsic_image_deref_atomic_imax:
703 case nir_intrinsic_image_deref_atomic_umax:
704 case nir_intrinsic_image_deref_atomic_and:
705 case nir_intrinsic_image_deref_atomic_or:
706 case nir_intrinsic_image_deref_atomic_xor:
707 case nir_intrinsic_image_deref_atomic_exchange:
708 case nir_intrinsic_image_deref_atomic_comp_swap:
709 return lower_image_atomic_instr(b, devinfo, intrin);
710
711 case nir_intrinsic_image_deref_size:
712 return lower_image_size_instr(b, devinfo, intrin);
713
714 default:
715 /* Nothing to do */
716 return false;
717 }
718 }
719
720 bool
brw_nir_lower_storage_image(nir_shader * shader,const struct intel_device_info * devinfo)721 brw_nir_lower_storage_image(nir_shader *shader,
722 const struct intel_device_info *devinfo)
723 {
724 bool progress = false;
725
726 const nir_lower_image_options image_options = {
727 .lower_cube_size = true,
728 };
729
730 progress |= nir_lower_image(shader, &image_options);
731
732 progress |= nir_shader_instructions_pass(shader,
733 brw_nir_lower_storage_image_instr,
734 nir_metadata_none,
735 (void *)devinfo);
736
737 return progress;
738 }
739