• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright 2023 Alyssa Rosenzweig
3 * Copyright 2023 Valve Corporation
4 * SPDX-License-Identifier: MIT
5 */
6#include "compiler/libcl/libcl.h"
7#include "libagx_intrinsics.h"
8#include <agx_pack.h>
9
10uint3
11libagx_txs(constant struct agx_texture_packed *ptr, uint16_t lod,
12           unsigned nr_comps, bool is_buffer, bool is_1d, bool is_2d,
13           bool is_cube, bool is_array)
14{
15   agx_unpack(NULL, ptr, TEXTURE, d);
16
17   /* From the Vulkan spec:
18    *
19    *    OpImageQuery*...  return 0 if the bound descriptor is a null descriptor
20    */
21   if (d.null)
22      return 0;
23
24   /* Buffer textures are lowered to 2D so the original size is irrecoverable.
25    * Instead, we stash it in the software-defined section.
26    */
27   if (is_buffer)
28      return d.buffer_size_sw;
29
30   /* Load standard dimensions */
31   uint3 size = (uint3)(d.width, d.height, d.depth);
32   lod += d.first_level;
33
34   /* Linear 2D arrays are special.
35    *
36    * TODO: Optimize this, since linear 2D arrays aren't needed for APIs and
37    * this just gets used internally for blits.
38    */
39   if (is_2d && is_array && d.layout == AGX_LAYOUT_LINEAR)
40      size.z = d.depth_linear;
41
42   /* 1D Arrays have their second component as the layer count */
43   if (is_1d && is_array)
44      size.y = size.z;
45
46   /* Adjust for LOD, do not adjust array size */
47   size.x = max(size.x >> lod, 1u);
48
49   if (nr_comps - (uint)is_array >= 2)
50      size.y = max(size.y >> lod, 1u);
51
52   if (nr_comps - (uint)is_array >= 3)
53      size.z = max(size.z >> lod, 1u);
54
55   /* Cube maps have equal width and height, we save some instructions by only
56    * reading one. Dead code elimination will remove the redundant instructions.
57    */
58   if (is_cube)
59      size.y = size.x;
60
61   return size;
62}
63
64uint
65libagx_texture_samples(constant struct agx_texture_packed *ptr)
66{
67   agx_unpack(NULL, ptr, TEXTURE, d);
68
69   /* As above */
70   if (d.null)
71      return 0;
72
73   /* We may assume the input is multisampled, so just check the samples */
74   return (d.samples == AGX_SAMPLE_COUNT_2) ? 2 : 4;
75}
76
77uint
78libagx_texture_levels(constant struct agx_texture_packed *ptr)
79{
80   agx_unpack(NULL, ptr, TEXTURE, d);
81
82   /* As above */
83   if (d.null)
84      return 0;
85   else
86      return (d.last_level - d.first_level) + 1;
87}
88
89/*
90 * Fix robustness behaviour of txf with out-of-bounds LOD. The hardware
91 * returns the correct out-of-bounds colour for out-of-bounds coordinates,
92 * just not LODs. So translate out-of-bounds LOD into an out-of-bounds
93 * coordinate to get correct behaviour in 1 instruction.
94 *
95 * Returns the fixed X-coordinate.
96 *
97 * TODO: This looks like it might be an erratum workaround on G13 (Apple does
98 * it), maybe check if G15 is affected.
99 */
100uint
101libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr,
102                            bool check_lod, ushort lod, bool check_layer,
103                            uint layer, uint x)
104{
105   agx_unpack(NULL, ptr, TEXTURE, d);
106
107   bool valid = true;
108
109   if (check_lod)
110      valid &= lod <= (d.last_level - d.first_level);
111
112   if (check_layer) {
113      bool linear = (d.layout == AGX_LAYOUT_LINEAR);
114      valid &= layer < (linear ? d.depth_linear : d.depth);
115   }
116
117   /* The maximum tail offset is 0xF so by returning 0xFFF0 for out-of-bounds we
118    * stay under 0xFFFF and keep robustness after offsetting.
119    */
120   return valid ? x : 0xFFF0;
121}
122
123static uint32_t
124calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px,
125                               uint16_t tile_h_px, uint32_t aligned_width_px)
126{
127   /* Modulo by the tile width/height to get the offsets within the tile */
128   ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1);
129   uint32_t tile_mask = upsample(tile_mask_vec.y, tile_mask_vec.x);
130   uint32_t coord_xy = upsample(coord.y, coord.x);
131   ushort2 offs_px = as_ushort2(coord_xy & tile_mask);
132   uint32_t offset_within_tile_px = nir_interleave_agx(offs_px.x, offs_px.y);
133
134   /* Get the coordinates of the corner of the tile */
135   ushort2 tile_px = as_ushort2(coord_xy & ~tile_mask);
136
137   /* tile row start (px) =
138    *   (y // tile height) * (# of tiles/row) * (# of pix/tile) =
139    *   align_down(y, tile height) / tile height * width_tl *tile width *
140    *        tile height =
141    *   align_down(y, tile height) * width_tl * tile width
142    */
143   uint32_t tile_row_start_px = tile_px.y * aligned_width_px;
144
145   /* tile column start (px) =
146    *   (x // tile width) * (# of pix/tile) =
147    *   align_down(x, tile width) / tile width * tile width * tile height =
148    *   align_down(x, tile width) * tile height
149    */
150   uint32_t tile_col_start_px = tile_px.x * tile_h_px;
151
152   /* Get the total offset */
153   return tile_row_start_px + tile_col_start_px + offset_within_tile_px;
154}
155
156uint64_t
157libagx_image_texel_address(constant const struct agx_pbe_packed *ptr,
158                           uint4 coord, uint sample_idx,
159                           uint bytes_per_sample_B, bool is_1d, bool is_msaa,
160                           bool is_layered, bool return_index)
161{
162   agx_unpack(NULL, ptr, PBE, d);
163
164   /* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
165    * known use cases. So we're twiddled in this path, unless we're handling a
166    * 1D image which will be always linear, even if it uses a twiddled layout
167    * degrading to linear-equivalent 1x1 tiles. (1D uses this path, not the
168    * buffer path, for 1D arrays.)
169    */
170   uint total_px;
171   if (is_1d) {
172      total_px = coord.x;
173   } else {
174      uint aligned_width_px;
175      if (is_msaa) {
176         aligned_width_px = d.aligned_width_msaa_sw;
177      } else {
178         uint width_px = max(d.width >> d.level, 1u);
179         aligned_width_px = align(width_px, d.tile_width_sw);
180      }
181
182      total_px = calculate_twiddled_coordinates(
183         convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw,
184         aligned_width_px);
185   }
186
187   uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0;
188
189   if (is_layered) {
190      total_px += coord[is_1d ? 1 : 2] *
191                  ((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2);
192   }
193
194   uint total_sa = (total_px << samples_log2) + sample_idx;
195
196   if (return_index)
197      return total_sa;
198   else
199      return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) +
200             (uint64_t)(total_sa * bytes_per_sample_B);
201}
202
203uint64_t
204libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr,
205                            uint4 coord, uint bytes_per_pixel_B)
206{
207   agx_unpack(NULL, ptr, PBE, d);
208
209   uint32_t x_el = d.buffer_offset_sw + coord.x;
210   return d.buffer + (uint64_t)(x_el * bytes_per_pixel_B);
211}
212
213/* Buffer texture lowerings */
214bool
215libagx_texture_is_rgb32(constant struct agx_texture_packed *ptr)
216{
217   agx_unpack(NULL, ptr, TEXTURE, d);
218   return d.channels == AGX_CHANNELS_R32G32B32_EMULATED;
219}
220
221uint4
222libagx_texture_load_rgb32(constant struct agx_texture_packed *ptr, uint coord,
223                          bool is_float)
224{
225   agx_unpack(NULL, ptr, TEXTURE, d);
226
227   /* This is carefully written to let us do the * 3 with a 32-bit operation but
228    * still use the free 64-bit add-extend-shift for the rest.
229    */
230   uint64_t addr = d.address + ((uint64_t)(coord * 3)) * 4;
231   constant uint3 *data = (constant uint3 *)addr;
232
233   return (uint4)(*data, is_float ? as_uint(1.0f) : 1);
234}
235
236uint
237libagx_buffer_texture_offset(constant struct agx_texture_packed *ptr, uint x)
238{
239   agx_unpack(NULL, ptr, TEXTURE, d);
240
241   return x + d.buffer_offset_sw;
242}
243
244uint
245libagx_buffer_image_offset(constant struct agx_pbe_packed *ptr, uint x)
246{
247   agx_unpack(NULL, ptr, PBE, d);
248
249   return x + d.buffer_offset_sw;
250}
251