1/* 2 * Copyright 2023 Alyssa Rosenzweig 3 * Copyright 2023 Valve Corporation 4 * SPDX-License-Identifier: MIT 5 */ 6#include "compiler/libcl/libcl.h" 7#include "libagx_intrinsics.h" 8#include <agx_pack.h> 9 10uint3 11libagx_txs(constant struct agx_texture_packed *ptr, uint16_t lod, 12 unsigned nr_comps, bool is_buffer, bool is_1d, bool is_2d, 13 bool is_cube, bool is_array) 14{ 15 agx_unpack(NULL, ptr, TEXTURE, d); 16 17 /* From the Vulkan spec: 18 * 19 * OpImageQuery*... return 0 if the bound descriptor is a null descriptor 20 */ 21 if (d.null) 22 return 0; 23 24 /* Buffer textures are lowered to 2D so the original size is irrecoverable. 25 * Instead, we stash it in the software-defined section. 26 */ 27 if (is_buffer) 28 return d.buffer_size_sw; 29 30 /* Load standard dimensions */ 31 uint3 size = (uint3)(d.width, d.height, d.depth); 32 lod += d.first_level; 33 34 /* Linear 2D arrays are special. 35 * 36 * TODO: Optimize this, since linear 2D arrays aren't needed for APIs and 37 * this just gets used internally for blits. 38 */ 39 if (is_2d && is_array && d.layout == AGX_LAYOUT_LINEAR) 40 size.z = d.depth_linear; 41 42 /* 1D Arrays have their second component as the layer count */ 43 if (is_1d && is_array) 44 size.y = size.z; 45 46 /* Adjust for LOD, do not adjust array size */ 47 size.x = max(size.x >> lod, 1u); 48 49 if (nr_comps - (uint)is_array >= 2) 50 size.y = max(size.y >> lod, 1u); 51 52 if (nr_comps - (uint)is_array >= 3) 53 size.z = max(size.z >> lod, 1u); 54 55 /* Cube maps have equal width and height, we save some instructions by only 56 * reading one. Dead code elimination will remove the redundant instructions. 57 */ 58 if (is_cube) 59 size.y = size.x; 60 61 return size; 62} 63 64uint 65libagx_texture_samples(constant struct agx_texture_packed *ptr) 66{ 67 agx_unpack(NULL, ptr, TEXTURE, d); 68 69 /* As above */ 70 if (d.null) 71 return 0; 72 73 /* We may assume the input is multisampled, so just check the samples */ 74 return (d.samples == AGX_SAMPLE_COUNT_2) ? 2 : 4; 75} 76 77uint 78libagx_texture_levels(constant struct agx_texture_packed *ptr) 79{ 80 agx_unpack(NULL, ptr, TEXTURE, d); 81 82 /* As above */ 83 if (d.null) 84 return 0; 85 else 86 return (d.last_level - d.first_level) + 1; 87} 88 89/* 90 * Fix robustness behaviour of txf with out-of-bounds LOD. The hardware 91 * returns the correct out-of-bounds colour for out-of-bounds coordinates, 92 * just not LODs. So translate out-of-bounds LOD into an out-of-bounds 93 * coordinate to get correct behaviour in 1 instruction. 94 * 95 * Returns the fixed X-coordinate. 96 * 97 * TODO: This looks like it might be an erratum workaround on G13 (Apple does 98 * it), maybe check if G15 is affected. 99 */ 100uint 101libagx_lower_txf_robustness(constant struct agx_texture_packed *ptr, 102 bool check_lod, ushort lod, bool check_layer, 103 uint layer, uint x) 104{ 105 agx_unpack(NULL, ptr, TEXTURE, d); 106 107 bool valid = true; 108 109 if (check_lod) 110 valid &= lod <= (d.last_level - d.first_level); 111 112 if (check_layer) { 113 bool linear = (d.layout == AGX_LAYOUT_LINEAR); 114 valid &= layer < (linear ? d.depth_linear : d.depth); 115 } 116 117 /* The maximum tail offset is 0xF so by returning 0xFFF0 for out-of-bounds we 118 * stay under 0xFFFF and keep robustness after offsetting. 119 */ 120 return valid ? x : 0xFFF0; 121} 122 123static uint32_t 124calculate_twiddled_coordinates(ushort2 coord, uint16_t tile_w_px, 125 uint16_t tile_h_px, uint32_t aligned_width_px) 126{ 127 /* Modulo by the tile width/height to get the offsets within the tile */ 128 ushort2 tile_mask_vec = (ushort2)(tile_w_px - 1, tile_h_px - 1); 129 uint32_t tile_mask = upsample(tile_mask_vec.y, tile_mask_vec.x); 130 uint32_t coord_xy = upsample(coord.y, coord.x); 131 ushort2 offs_px = as_ushort2(coord_xy & tile_mask); 132 uint32_t offset_within_tile_px = nir_interleave_agx(offs_px.x, offs_px.y); 133 134 /* Get the coordinates of the corner of the tile */ 135 ushort2 tile_px = as_ushort2(coord_xy & ~tile_mask); 136 137 /* tile row start (px) = 138 * (y // tile height) * (# of tiles/row) * (# of pix/tile) = 139 * align_down(y, tile height) / tile height * width_tl *tile width * 140 * tile height = 141 * align_down(y, tile height) * width_tl * tile width 142 */ 143 uint32_t tile_row_start_px = tile_px.y * aligned_width_px; 144 145 /* tile column start (px) = 146 * (x // tile width) * (# of pix/tile) = 147 * align_down(x, tile width) / tile width * tile width * tile height = 148 * align_down(x, tile width) * tile height 149 */ 150 uint32_t tile_col_start_px = tile_px.x * tile_h_px; 151 152 /* Get the total offset */ 153 return tile_row_start_px + tile_col_start_px + offset_within_tile_px; 154} 155 156uint64_t 157libagx_image_texel_address(constant const struct agx_pbe_packed *ptr, 158 uint4 coord, uint sample_idx, 159 uint bytes_per_sample_B, bool is_1d, bool is_msaa, 160 bool is_layered, bool return_index) 161{ 162 agx_unpack(NULL, ptr, PBE, d); 163 164 /* We do not allow atomics on linear 2D or linear 2D arrays, as there are no 165 * known use cases. So we're twiddled in this path, unless we're handling a 166 * 1D image which will be always linear, even if it uses a twiddled layout 167 * degrading to linear-equivalent 1x1 tiles. (1D uses this path, not the 168 * buffer path, for 1D arrays.) 169 */ 170 uint total_px; 171 if (is_1d) { 172 total_px = coord.x; 173 } else { 174 uint aligned_width_px; 175 if (is_msaa) { 176 aligned_width_px = d.aligned_width_msaa_sw; 177 } else { 178 uint width_px = max(d.width >> d.level, 1u); 179 aligned_width_px = align(width_px, d.tile_width_sw); 180 } 181 182 total_px = calculate_twiddled_coordinates( 183 convert_ushort2(coord.xy), d.tile_width_sw, d.tile_height_sw, 184 aligned_width_px); 185 } 186 187 uint samples_log2 = is_msaa ? d.sample_count_log2_sw : 0; 188 189 if (is_layered) { 190 total_px += coord[is_1d ? 1 : 2] * 191 ((d.layer_stride_sw / bytes_per_sample_B) >> samples_log2); 192 } 193 194 uint total_sa = (total_px << samples_log2) + sample_idx; 195 196 if (return_index) 197 return total_sa; 198 else 199 return (d.buffer + (is_msaa ? 0 : d.level_offset_sw)) + 200 (uint64_t)(total_sa * bytes_per_sample_B); 201} 202 203uint64_t 204libagx_buffer_texel_address(constant const struct agx_pbe_packed *ptr, 205 uint4 coord, uint bytes_per_pixel_B) 206{ 207 agx_unpack(NULL, ptr, PBE, d); 208 209 uint32_t x_el = d.buffer_offset_sw + coord.x; 210 return d.buffer + (uint64_t)(x_el * bytes_per_pixel_B); 211} 212 213/* Buffer texture lowerings */ 214bool 215libagx_texture_is_rgb32(constant struct agx_texture_packed *ptr) 216{ 217 agx_unpack(NULL, ptr, TEXTURE, d); 218 return d.channels == AGX_CHANNELS_R32G32B32_EMULATED; 219} 220 221uint4 222libagx_texture_load_rgb32(constant struct agx_texture_packed *ptr, uint coord, 223 bool is_float) 224{ 225 agx_unpack(NULL, ptr, TEXTURE, d); 226 227 /* This is carefully written to let us do the * 3 with a 32-bit operation but 228 * still use the free 64-bit add-extend-shift for the rest. 229 */ 230 uint64_t addr = d.address + ((uint64_t)(coord * 3)) * 4; 231 constant uint3 *data = (constant uint3 *)addr; 232 233 return (uint4)(*data, is_float ? as_uint(1.0f) : 1); 234} 235 236uint 237libagx_buffer_texture_offset(constant struct agx_texture_packed *ptr, uint x) 238{ 239 agx_unpack(NULL, ptr, TEXTURE, d); 240 241 return x + d.buffer_offset_sw; 242} 243 244uint 245libagx_buffer_image_offset(constant struct agx_pbe_packed *ptr, uint x) 246{ 247 agx_unpack(NULL, ptr, PBE, d); 248 249 return x + d.buffer_offset_sw; 250} 251