• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "isl/isl.h"
25 
26 #include "brw_nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/nir/nir_format_convert.h"
29 
30 static nir_ssa_def *
_load_image_param(nir_builder * b,nir_deref_instr * deref,unsigned offset)31 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
32 {
33    nir_intrinsic_instr *load =
34       nir_intrinsic_instr_create(b->shader,
35                                  nir_intrinsic_image_deref_load_param_intel);
36    load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
37    nir_intrinsic_set_base(load, offset / 4);
38 
39    switch (offset) {
40    case BRW_IMAGE_PARAM_OFFSET_OFFSET:
41    case BRW_IMAGE_PARAM_SWIZZLING_OFFSET:
42       load->num_components = 2;
43       break;
44    case BRW_IMAGE_PARAM_TILING_OFFSET:
45    case BRW_IMAGE_PARAM_SIZE_OFFSET:
46       load->num_components = 3;
47       break;
48    case BRW_IMAGE_PARAM_STRIDE_OFFSET:
49       load->num_components = 4;
50       break;
51    default:
52       unreachable("Invalid param offset");
53    }
54    nir_ssa_dest_init(&load->instr, &load->dest,
55                      load->num_components, 32, NULL);
56 
57    nir_builder_instr_insert(b, &load->instr);
58    return &load->dest.ssa;
59 }
60 
61 #define load_image_param(b, d, o) \
62    _load_image_param(b, d, BRW_IMAGE_PARAM_##o##_OFFSET)
63 
64 static nir_ssa_def *
image_coord_is_in_bounds(nir_builder * b,nir_deref_instr * deref,nir_ssa_def * coord)65 image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref,
66                          nir_ssa_def *coord)
67 {
68    nir_ssa_def *size = load_image_param(b, deref, SIZE);
69    nir_ssa_def *cmp = nir_ilt(b, coord, size);
70 
71    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
72    nir_ssa_def *in_bounds = nir_imm_true(b);
73    for (unsigned i = 0; i < coord_comps; i++)
74       in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i));
75 
76    return in_bounds;
77 }
78 
79 /** Calculate the offset in memory of the texel given by \p coord.
80  *
81  * This is meant to be used with untyped surface messages to access a tiled
82  * surface, what involves taking into account the tiling and swizzling modes
83  * of the surface manually so it will hopefully not happen very often.
84  *
85  * The tiling algorithm implemented here matches either the X or Y tiling
86  * layouts supported by the hardware depending on the tiling coefficients
87  * passed to the program as uniforms.  See Volume 1 Part 2 Section 4.5
88  * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
89  * the hardware tiling format.
90  */
91 static nir_ssa_def *
image_address(nir_builder * b,const struct gen_device_info * devinfo,nir_deref_instr * deref,nir_ssa_def * coord)92 image_address(nir_builder *b, const struct gen_device_info *devinfo,
93               nir_deref_instr *deref, nir_ssa_def *coord)
94 {
95    if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D &&
96        glsl_sampler_type_is_array(deref->type)) {
97       /* It's easier if 1D arrays are treated like 2D arrays */
98       coord = nir_vec3(b, nir_channel(b, coord, 0),
99                           nir_imm_int(b, 0),
100                           nir_channel(b, coord, 1));
101    } else {
102       unsigned dims = glsl_get_sampler_coordinate_components(deref->type);
103       coord = nir_channels(b, coord, (1 << dims) - 1);
104    }
105 
106    nir_ssa_def *offset = load_image_param(b, deref, OFFSET);
107    nir_ssa_def *tiling = load_image_param(b, deref, TILING);
108    nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
109 
110    /* Shift the coordinates by the fixed surface offset.  It may be non-zero
111     * if the image is a single slice of a higher-dimensional surface, or if a
112     * non-zero mipmap level of the surface is bound to the pipeline.  The
113     * offset needs to be applied here rather than at surface state set-up time
114     * because the desired slice-level may start mid-tile, so simply shifting
115     * the surface base address wouldn't give a well-formed tiled surface in
116     * the general case.
117     */
118    nir_ssa_def *xypos = (coord->num_components == 1) ?
119                         nir_vec2(b, coord, nir_imm_int(b, 0)) :
120                         nir_channels(b, coord, 0x3);
121    xypos = nir_iadd(b, xypos, offset);
122 
123    /* The layout of 3-D textures in memory is sort-of like a tiling
124     * format.  At each miplevel, the slices are arranged in rows of
125     * 2^level slices per row.  The slice row is stored in tmp.y and
126     * the slice within the row is stored in tmp.x.
127     *
128     * The layout of 2-D array textures and cubemaps is much simpler:
129     * Depending on whether the ARYSPC_LOD0 layout is in use it will be
130     * stored in memory as an array of slices, each one being a 2-D
131     * arrangement of miplevels, or as a 2D arrangement of miplevels,
132     * each one being an array of slices.  In either case the separation
133     * between slices of the same LOD is equal to the qpitch value
134     * provided as stride.w.
135     *
136     * This code can be made to handle either 2D arrays and 3D textures
137     * by passing in the miplevel as tile.z for 3-D textures and 0 in
138     * tile.z for 2-D array textures.
139     *
140     * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
141     * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
142     * of the hardware 3D texture and 2D array layouts.
143     */
144    if (coord->num_components > 2) {
145       /* Decompose z into a major (tmp.y) and a minor (tmp.x)
146        * index.
147        */
148       nir_ssa_def *z = nir_channel(b, coord, 2);
149       nir_ssa_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0),
150                                   nir_channel(b, tiling, 2));
151       nir_ssa_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2));
152 
153       /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
154        * slice offset.
155        */
156       xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y),
157                                              nir_channels(b, stride, 0xc)));
158    }
159 
160    nir_ssa_def *addr;
161    if (coord->num_components > 1) {
162       /* Calculate the major/minor x and y indices.  In order to
163        * accommodate both X and Y tiling, the Y-major tiling format is
164        * treated as being a bunch of narrow X-tiles placed next to each
165        * other.  This means that the tile width for Y-tiling is actually
166        * the width of one sub-column of the Y-major tile where each 4K
167        * tile has 8 512B sub-columns.
168        *
169        * The major Y value is the row of tiles in which the pixel lives.
170        * The major X value is the tile sub-column in which the pixel
171        * lives; for X tiling, this is the same as the tile column, for Y
172        * tiling, each tile has 8 sub-columns.  The minor X and Y indices
173        * are the position within the sub-column.
174        */
175 
176       /* Calculate the minor x and y indices. */
177       nir_ssa_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0),
178                                        nir_channels(b, tiling, 0x3));
179       nir_ssa_def *major = nir_ushr(b, xypos, nir_channels(b, tiling, 0x3));
180 
181       /* Calculate the texel index from the start of the tile row and the
182        * vertical coordinate of the row.
183        * Equivalent to:
184        *   tmp.x = (major.x << tile.y << tile.x) +
185        *           (minor.y << tile.x) + minor.x
186        *   tmp.y = major.y << tile.y
187        */
188       nir_ssa_def *idx_x, *idx_y;
189       idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1));
190       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1));
191       idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0));
192       idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0));
193       idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1));
194 
195       /* Add it to the start of the tile row. */
196       nir_ssa_def *idx;
197       idx = nir_imul(b, idx_y, nir_channel(b, stride, 1));
198       idx = nir_iadd(b, idx, idx_x);
199 
200       /* Multiply by the Bpp value. */
201       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
202 
203       if (devinfo->gen < 8 && !devinfo->is_baytrail) {
204          /* Take into account the two dynamically specified shifts.  Both are
205           * used to implement swizzling of X-tiled surfaces.  For Y-tiled
206           * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
207           * address, so a swz value of 0xff (actually interpreted as 31 by the
208           * hardware) will be provided to cause the relevant bit of tmp.y to
209           * be zero and turn the first XOR into the identity.  For linear
210           * surfaces or platforms lacking address swizzling both shifts will
211           * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
212           * what effectively disables swizzling.
213           */
214          nir_ssa_def *swizzle = load_image_param(b, deref, SWIZZLING);
215          nir_ssa_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0));
216          nir_ssa_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1));
217 
218          /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
219          nir_ssa_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1),
220                                         nir_imm_int(b, 1 << 6));
221          addr = nir_ixor(b, addr, bit);
222       }
223    } else {
224       /* Multiply by the Bpp/stride value.  Note that the addr.y may be
225        * non-zero even if the image is one-dimensional because a vertical
226        * offset may have been applied above to select a non-zero slice or
227        * level of a higher-dimensional texture.
228        */
229       nir_ssa_def *idx;
230       idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1));
231       idx = nir_iadd(b, nir_channel(b, xypos, 0), idx);
232       addr = nir_imul(b, idx, nir_channel(b, stride, 0));
233    }
234 
235    return addr;
236 }
237 
238 struct format_info {
239    const struct isl_format_layout *fmtl;
240    unsigned chans;
241    unsigned bits[4];
242 };
243 
244 static struct format_info
get_format_info(enum isl_format fmt)245 get_format_info(enum isl_format fmt)
246 {
247    const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
248 
249    return (struct format_info) {
250       .fmtl = fmtl,
251       .chans = isl_format_get_num_channels(fmt),
252       .bits = {
253          fmtl->channels.r.bits,
254          fmtl->channels.g.bits,
255          fmtl->channels.b.bits,
256          fmtl->channels.a.bits
257       },
258    };
259 }
260 
261 static nir_ssa_def *
convert_color_for_load(nir_builder * b,const struct gen_device_info * devinfo,nir_ssa_def * color,enum isl_format image_fmt,enum isl_format lower_fmt,unsigned dest_components)262 convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
263                        nir_ssa_def *color,
264                        enum isl_format image_fmt, enum isl_format lower_fmt,
265                        unsigned dest_components)
266 {
267    if (image_fmt == lower_fmt)
268       goto expand_vec;
269 
270    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
271       assert(lower_fmt == ISL_FORMAT_R32_UINT);
272       color = nir_format_unpack_11f11f10f(b, color);
273       goto expand_vec;
274    }
275 
276    struct format_info image = get_format_info(image_fmt);
277    struct format_info lower = get_format_info(lower_fmt);
278 
279    const bool needs_sign_extension =
280       isl_format_has_snorm_channel(image_fmt) ||
281       isl_format_has_sint_channel(image_fmt);
282 
283    /* We only check the red channel to detect if we need to pack/unpack */
284    assert(image.bits[0] != lower.bits[0] ||
285           memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0);
286 
287    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
288       if (needs_sign_extension)
289          color = nir_format_unpack_sint(b, color, image.bits, image.chans);
290       else
291          color = nir_format_unpack_uint(b, color, image.bits, image.chans);
292    } else {
293       /* All these formats are homogeneous */
294       for (unsigned i = 1; i < image.chans; i++)
295          assert(image.bits[i] == image.bits[0]);
296 
297       /* On IVB, we rely on the undocumented behavior that typed reads from
298        * surfaces of the unsupported R8 and R16 formats return useful data in
299        * their least significant bits.  However, the data in the high bits is
300        * garbage so we have to discard it.
301        */
302       if (devinfo->gen == 7 && !devinfo->is_haswell &&
303           (lower_fmt == ISL_FORMAT_R16_UINT ||
304            lower_fmt == ISL_FORMAT_R8_UINT))
305          color = nir_format_mask_uvec(b, color, lower.bits);
306 
307       if (image.bits[0] != lower.bits[0]) {
308          color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0],
309                                                   image.bits[0]);
310       }
311 
312       if (needs_sign_extension)
313          color = nir_format_sign_extend_ivec(b, color, image.bits);
314    }
315 
316    switch (image.fmtl->channels.r.type) {
317    case ISL_UNORM:
318       assert(isl_format_has_uint_channel(lower_fmt));
319       color = nir_format_unorm_to_float(b, color, image.bits);
320       break;
321 
322    case ISL_SNORM:
323       assert(isl_format_has_uint_channel(lower_fmt));
324       color = nir_format_snorm_to_float(b, color, image.bits);
325       break;
326 
327    case ISL_SFLOAT:
328       if (image.bits[0] == 16)
329          color = nir_unpack_half_2x16_split_x(b, color);
330       break;
331 
332    case ISL_UINT:
333    case ISL_SINT:
334       break;
335 
336    default:
337       unreachable("Invalid image channel type");
338    }
339 
340 expand_vec:
341    assert(dest_components == 1 || dest_components == 4);
342    assert(color->num_components <= dest_components);
343    if (color->num_components == dest_components)
344       return color;
345 
346    nir_ssa_def *comps[4];
347    for (unsigned i = 0; i < color->num_components; i++)
348       comps[i] = nir_channel(b, color, i);
349 
350    for (unsigned i = color->num_components; i < 3; i++)
351       comps[i] = nir_imm_int(b, 0);
352 
353    if (color->num_components < 4) {
354       if (isl_format_has_int_channel(image_fmt))
355          comps[3] = nir_imm_int(b, 1);
356       else
357          comps[3] = nir_imm_float(b, 1);
358    }
359 
360    return nir_vec(b, comps, dest_components);
361 }
362 
363 static bool
lower_image_load_instr(nir_builder * b,const struct gen_device_info * devinfo,nir_intrinsic_instr * intrin)364 lower_image_load_instr(nir_builder *b,
365                        const struct gen_device_info *devinfo,
366                        nir_intrinsic_instr *intrin)
367 {
368    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
369    nir_variable *var = nir_deref_instr_get_variable(deref);
370    const enum isl_format image_fmt =
371       isl_format_for_pipe_format(var->data.image.format);
372 
373    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
374       const enum isl_format lower_fmt =
375          isl_lower_storage_image_format(devinfo, image_fmt);
376       const unsigned dest_components = intrin->num_components;
377 
378       /* Use an undef to hold the uses of the load while we do the color
379        * conversion.
380        */
381       nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
382       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
383 
384       intrin->num_components = isl_format_get_num_channels(lower_fmt);
385       intrin->dest.ssa.num_components = intrin->num_components;
386 
387       b->cursor = nir_after_instr(&intrin->instr);
388 
389       nir_ssa_def *color = convert_color_for_load(b, devinfo,
390                                                   &intrin->dest.ssa,
391                                                   image_fmt, lower_fmt,
392                                                   dest_components);
393 
394       nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color));
395       nir_instr_remove(placeholder->parent_instr);
396    } else {
397       const struct isl_format_layout *image_fmtl =
398          isl_format_get_layout(image_fmt);
399       /* We have a matching typed format for everything 32b and below */
400       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
401       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
402                                 ISL_FORMAT_R32G32_UINT :
403                                 ISL_FORMAT_R32G32B32A32_UINT;
404       const unsigned dest_components = intrin->num_components;
405 
406       b->cursor = nir_instr_remove(&intrin->instr);
407 
408       nir_ssa_def *coord = intrin->src[1].ssa;
409 
410       nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
411       if (devinfo->gen == 7 && !devinfo->is_haswell) {
412          /* Check whether the first stride component (i.e. the Bpp value)
413           * is greater than four, what on Gen7 indicates that a surface of
414           * type RAW has been bound for untyped access.  Reading or writing
415           * to a surface of type other than RAW using untyped surface
416           * messages causes a hang on IVB and VLV.
417           */
418          nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
419          nir_ssa_def *is_raw =
420             nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
421          do_load = nir_iand(b, do_load, is_raw);
422       }
423       nir_push_if(b, do_load);
424 
425       nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
426       nir_intrinsic_instr *load =
427          nir_intrinsic_instr_create(b->shader,
428                                     nir_intrinsic_image_deref_load_raw_intel);
429       load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
430       load->src[1] = nir_src_for_ssa(addr);
431       load->num_components = image_fmtl->bpb / 32;
432       nir_ssa_dest_init(&load->instr, &load->dest,
433                         load->num_components, 32, NULL);
434       nir_builder_instr_insert(b, &load->instr);
435 
436       nir_push_else(b, NULL);
437 
438       nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32);
439 
440       nir_pop_if(b, NULL);
441 
442       nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero);
443 
444       nir_ssa_def *color = convert_color_for_load(b, devinfo, value,
445                                                   image_fmt, raw_fmt,
446                                                   dest_components);
447 
448       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color));
449    }
450 
451    return true;
452 }
453 
454 static nir_ssa_def *
convert_color_for_store(nir_builder * b,const struct gen_device_info * devinfo,nir_ssa_def * color,enum isl_format image_fmt,enum isl_format lower_fmt)455 convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo,
456                         nir_ssa_def *color,
457                         enum isl_format image_fmt, enum isl_format lower_fmt)
458 {
459    struct format_info image = get_format_info(image_fmt);
460    struct format_info lower = get_format_info(lower_fmt);
461 
462    color = nir_channels(b, color, (1 << image.chans) - 1);
463 
464    if (image_fmt == lower_fmt)
465       return color;
466 
467    if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
468       assert(lower_fmt == ISL_FORMAT_R32_UINT);
469       return nir_format_pack_11f11f10f(b, color);
470    }
471 
472    switch (image.fmtl->channels.r.type) {
473    case ISL_UNORM:
474       assert(isl_format_has_uint_channel(lower_fmt));
475       color = nir_format_float_to_unorm(b, color, image.bits);
476       break;
477 
478    case ISL_SNORM:
479       assert(isl_format_has_uint_channel(lower_fmt));
480       color = nir_format_float_to_snorm(b, color, image.bits);
481       break;
482 
483    case ISL_SFLOAT:
484       if (image.bits[0] == 16)
485          color = nir_format_float_to_half(b, color);
486       break;
487 
488    case ISL_UINT:
489       color = nir_format_clamp_uint(b, color, image.bits);
490       break;
491 
492    case ISL_SINT:
493       color = nir_format_clamp_sint(b, color, image.bits);
494       break;
495 
496    default:
497       unreachable("Invalid image channel type");
498    }
499 
500    if (image.bits[0] < 32 &&
501        (isl_format_has_snorm_channel(image_fmt) ||
502         isl_format_has_sint_channel(image_fmt)))
503       color = nir_format_mask_uvec(b, color, image.bits);
504 
505    if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
506       color = nir_format_pack_uint(b, color, image.bits, image.chans);
507    } else {
508       /* All these formats are homogeneous */
509       for (unsigned i = 1; i < image.chans; i++)
510          assert(image.bits[i] == image.bits[0]);
511 
512       if (image.bits[0] != lower.bits[0]) {
513          color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0],
514                                                   lower.bits[0]);
515       }
516    }
517 
518    return color;
519 }
520 
521 static bool
lower_image_store_instr(nir_builder * b,const struct gen_device_info * devinfo,nir_intrinsic_instr * intrin)522 lower_image_store_instr(nir_builder *b,
523                         const struct gen_device_info *devinfo,
524                         nir_intrinsic_instr *intrin)
525 {
526    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
527    nir_variable *var = nir_deref_instr_get_variable(deref);
528 
529    /* For write-only surfaces, we trust that the hardware can just do the
530     * conversion for us.
531     */
532    if (var->data.access & ACCESS_NON_READABLE)
533       return false;
534 
535    const enum isl_format image_fmt =
536       isl_format_for_pipe_format(var->data.image.format);
537 
538    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
539       const enum isl_format lower_fmt =
540          isl_lower_storage_image_format(devinfo, image_fmt);
541 
542       /* Color conversion goes before the store */
543       b->cursor = nir_before_instr(&intrin->instr);
544 
545       nir_ssa_def *color = convert_color_for_store(b, devinfo,
546                                                    intrin->src[3].ssa,
547                                                    image_fmt, lower_fmt);
548       intrin->num_components = isl_format_get_num_channels(lower_fmt);
549       nir_instr_rewrite_src(&intrin->instr, &intrin->src[3],
550                             nir_src_for_ssa(color));
551    } else {
552       const struct isl_format_layout *image_fmtl =
553          isl_format_get_layout(image_fmt);
554       /* We have a matching typed format for everything 32b and below */
555       assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
556       enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
557                                 ISL_FORMAT_R32G32_UINT :
558                                 ISL_FORMAT_R32G32B32A32_UINT;
559 
560       b->cursor = nir_instr_remove(&intrin->instr);
561 
562       nir_ssa_def *coord = intrin->src[1].ssa;
563 
564       nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
565       if (devinfo->gen == 7 && !devinfo->is_haswell) {
566          /* Check whether the first stride component (i.e. the Bpp value)
567           * is greater than four, what on Gen7 indicates that a surface of
568           * type RAW has been bound for untyped access.  Reading or writing
569           * to a surface of type other than RAW using untyped surface
570           * messages causes a hang on IVB and VLV.
571           */
572          nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
573          nir_ssa_def *is_raw =
574             nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
575          do_store = nir_iand(b, do_store, is_raw);
576       }
577       nir_push_if(b, do_store);
578 
579       nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
580       nir_ssa_def *color = convert_color_for_store(b, devinfo,
581                                                    intrin->src[3].ssa,
582                                                    image_fmt, raw_fmt);
583 
584       nir_intrinsic_instr *store =
585          nir_intrinsic_instr_create(b->shader,
586                                     nir_intrinsic_image_deref_store_raw_intel);
587       store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
588       store->src[1] = nir_src_for_ssa(addr);
589       store->src[2] = nir_src_for_ssa(color);
590       store->num_components = image_fmtl->bpb / 32;
591       nir_builder_instr_insert(b, &store->instr);
592 
593       nir_pop_if(b, NULL);
594    }
595 
596    return true;
597 }
598 
599 static bool
lower_image_atomic_instr(nir_builder * b,const struct gen_device_info * devinfo,nir_intrinsic_instr * intrin)600 lower_image_atomic_instr(nir_builder *b,
601                          const struct gen_device_info *devinfo,
602                          nir_intrinsic_instr *intrin)
603 {
604    if (devinfo->is_haswell || devinfo->gen >= 8)
605       return false;
606 
607    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
608 
609    b->cursor = nir_instr_remove(&intrin->instr);
610 
611    /* Use an undef to hold the uses of the load conversion. */
612    nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
613    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
614 
615    /* Check the first component of the size field to find out if the
616     * image is bound.  Necessary on IVB for typed atomics because
617     * they don't seem to respect null surfaces and will happily
618     * corrupt or read random memory when no image is bound.
619     */
620    nir_ssa_def *size = load_image_param(b, deref, SIZE);
621    nir_ssa_def *zero = nir_imm_int(b, 0);
622    nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero));
623 
624    nir_builder_instr_insert(b, &intrin->instr);
625 
626    nir_pop_if(b, NULL);
627 
628    nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero);
629    nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result));
630 
631    return true;
632 }
633 
634 static bool
lower_image_size_instr(nir_builder * b,const struct gen_device_info * devinfo,nir_intrinsic_instr * intrin)635 lower_image_size_instr(nir_builder *b,
636                        const struct gen_device_info *devinfo,
637                        nir_intrinsic_instr *intrin)
638 {
639    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
640    nir_variable *var = nir_deref_instr_get_variable(deref);
641 
642    /* For write-only images, we have an actual image surface so we fall back
643     * and let the back-end emit a TXS for this.
644     */
645    if (var->data.access & ACCESS_NON_READABLE)
646       return false;
647 
648    /* If we have a matching typed format, then we have an actual image surface
649     * so we fall back and let the back-end emit a TXS for this.
650     */
651    const enum isl_format image_fmt =
652       isl_format_for_pipe_format(var->data.image.format);
653    if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
654       return false;
655 
656    assert(nir_src_as_uint(intrin->src[1]) == 0);
657 
658    b->cursor = nir_instr_remove(&intrin->instr);
659 
660    nir_ssa_def *size = load_image_param(b, deref, SIZE);
661 
662    nir_ssa_def *comps[4] = { NULL, NULL, NULL, NULL };
663 
664    enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
665    unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
666    for (unsigned c = 0; c < coord_comps; c++) {
667       if (c == 2 && dim == GLSL_SAMPLER_DIM_CUBE) {
668          comps[2] = nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6));
669       } else {
670          comps[c] = nir_channel(b, size, c);
671       }
672    }
673 
674    for (unsigned c = coord_comps; c < intrin->dest.ssa.num_components; ++c)
675       comps[c] = nir_imm_int(b, 1);
676 
677    nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
678    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec));
679 
680    return true;
681 }
682 
683 bool
brw_nir_lower_image_load_store(nir_shader * shader,const struct gen_device_info * devinfo,bool * uses_atomic_load_store)684 brw_nir_lower_image_load_store(nir_shader *shader,
685                                const struct gen_device_info *devinfo,
686                                bool *uses_atomic_load_store)
687 {
688    bool progress = false;
689 
690    nir_foreach_function(function, shader) {
691       if (function->impl == NULL)
692          continue;
693 
694       bool impl_progress = false;
695       nir_foreach_block_safe(block, function->impl) {
696          nir_builder b;
697          nir_builder_init(&b, function->impl);
698 
699          nir_foreach_instr_safe(instr, block) {
700             if (instr->type != nir_instr_type_intrinsic)
701                continue;
702 
703             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
704             switch (intrin->intrinsic) {
705             case nir_intrinsic_image_deref_load:
706                if (lower_image_load_instr(&b, devinfo, intrin))
707                   impl_progress = true;
708                break;
709 
710             case nir_intrinsic_image_deref_store:
711                if (lower_image_store_instr(&b, devinfo, intrin))
712                   impl_progress = true;
713                break;
714 
715             case nir_intrinsic_image_deref_atomic_add:
716             case nir_intrinsic_image_deref_atomic_imin:
717             case nir_intrinsic_image_deref_atomic_umin:
718             case nir_intrinsic_image_deref_atomic_imax:
719             case nir_intrinsic_image_deref_atomic_umax:
720             case nir_intrinsic_image_deref_atomic_and:
721             case nir_intrinsic_image_deref_atomic_or:
722             case nir_intrinsic_image_deref_atomic_xor:
723             case nir_intrinsic_image_deref_atomic_exchange:
724             case nir_intrinsic_image_deref_atomic_comp_swap:
725                if (uses_atomic_load_store)
726                   *uses_atomic_load_store = true;
727                if (lower_image_atomic_instr(&b, devinfo, intrin))
728                   impl_progress = true;
729                break;
730 
731             case nir_intrinsic_image_deref_size:
732                if (lower_image_size_instr(&b, devinfo, intrin))
733                   impl_progress = true;
734                break;
735 
736             default:
737                /* Nothing to do */
738                break;
739             }
740          }
741       }
742 
743       if (impl_progress) {
744          progress = true;
745          nir_metadata_preserve(function->impl, nir_metadata_none);
746       } else {
747          nir_metadata_preserve(function->impl, nir_metadata_all);
748       }
749    }
750 
751    return progress;
752 }
753