1// 2// Copyright 2018 The ANGLE Project Authors. All rights reserved. 3// Use of this source code is governed by a BSD-style license that can be 4// found in the LICENSE file. 5// 6// ConvertVertex.comp: vertex buffer conversion. Implements functionality in copyvertex.inc. 7// 8// Each thread of the dispatch call fills in one 4-byte element, no matter how many components 9// fit in it. The src data is laid out in the most general form as follows. Note that component 10// size is assumed to divide buffer stride. 11// 12// Ns components, each Bs bytes 13// ____^_____ 14// / | 15// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 16// |C1|C2|..|CN|..|..|..|..|C1|C2|..|CN|..|..|..|..|C1|C2|..|CN| ... Repeated V times 17// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 18// \__________ __________/ 19// V 20// Ss bytes of stride 21// 22// The output is the array of components converted to the destination format (each Bd bytes) with 23// stride Sd = Nd*Bd (i.e. packed). The output size is therefore V*Nd*Bd bytes. The dispatch size 24// is accordingly ciel(V*Nd*Bd / 4). 25// 26// The input is received in 4-byte elements, therefore each element has Es=4/Bs components. 27// 28// To output exactly one 4-byte element, each thread is responsible for Ed=4/Bd components. 29// Therefore, thread t is responsible for component indices [Ed*t, Ed*(t + 1)). 30// 31// We don't use Bs and Es for A2B10G10R10 and R10G10B10A2 formats since they take 10 or 2 bits per 32// component. Variables that are computed using Bs or Es are hardcoded instead. 33// 34// Component index c is at source offset: 35// 36// floor(c / Ns) * Ss + mod(c, Ns) * Bs 37// 38// - Flags: 39// * IsBigEndian 40// - Conversion: 41// * SintToSint: covers byte, short and int types (distinguished by Bs and Bd). 42// * UintToUint: covers ubyte, ushort, uint and half float types (distinguished by Bs and Bd). 43// * SintToFloat: Same types as SintToSint for source (including scaled). Converts to float. 44// * UintToFloat: Same types as UintToUint for source (including uscaled). Converst to float. 45// * SnormToFloat: Similar to IntToFloat, but normalized. 46// * UnormToFloat: Similar to UintToFloat, but normalized. 47// * FixedToFloat: 16.16 signed fixed-point to floating point. 48// * FloatToFloat: float. 49// * A2BGR10SintToSint: covers the signed int type of component when format is only A2BGR10. 50// * A2BGR10UintToUint: covers the unsigned int type of component when format is only A2BGR10. 51// * A2BGR10SintToFloat: Same types as A2BGR10SintToSint for source (including scaled). 52// Converts to float. 53// * A2BGR10UintToFloat: Same types as A2BGR10UintToUint for source (including uscaled). 54// Converts to float. 55// * A2BGR10SnormToFloat: Similar to IntToFloat, but normalized and only for A2BGR10. 56// * RGB10A2SintToFloat: Same types as RGB10A2SintToSint for source (including scaled). 57// Converts to float. 58// * RGB10A2UintToFloat: Same types as RGB10A2UintToUint for source (including uscaled). 59// Converts to float. 60// * RGB10A2SnormToFloat: Similar to IntToFloat, but normalized and only for RGB10A2. 61// * RGB10A2UnormToFloat: Similar to UintToFloat, but normalized and only for RGB10A2. 62// 63// SintToSint, UintToUint and FloatToFloat correspond to CopyNativeVertexData() and 64// Copy8SintTo16SintVertexData() in renderer/copyvertex.inc, FixedToFloat corresponds to 65// Copy32FixedTo32FVertexData, SintToFloat and UintToFloat correspond to CopyTo32FVertexData with 66// normalized=false and SnormToFloat and UnormToFloat correspond to CopyTo32FVertexData with 67// normalized=true. A2BGR10SintToSint, A2BGR10UintToUint, A2BGR10SintToFloat, A2BGR10UintToFloat 68// and A2BGR10SnormToFloat correspond to CopyXYZ10W2ToXYZW32FVertexData with the proper options. 69// RGB10A2SintToFloat, RGB10A2UintToFloat and RGB10A2SnormToFloat correspond to 70// CopyW2XYZ10ToXYZW32FVertexData. RGB10UintToFloat corresponds to CopyXYZ10ToXYZW32FVertexData 71// with the proper options. 72 73#version 450 core 74 75// Source type 76#if SintToSint || SintToFloat || A2BGR10SintToSint || A2BGR10SintToFloat || RGB10A2SintToFloat 77#define SrcType int 78#elif UintToUint || UintToFloat || A2BGR10UintToUint || A2BGR10UintToFloat || \ 79 RGB10A2UintToFloat 80#define SrcType uint 81#elif SnormToFloat || UnormToFloat || FixedToFloat || FloatToFloat || A2BGR10SnormToFloat || \ 82 RGB10A2SnormToFloat || RGB10A2UnormToFloat 83#define SrcType float 84#else 85#error "Not all conversions are accounted for" 86#endif 87 88// Destination type 89#if SintToSint || A2BGR10SintToSint 90#define DestType int 91#define IsDestFloat 0 92#elif UintToUint || A2BGR10UintToUint 93#define DestType uint 94#define IsDestFloat 0 95#elif SintToFloat || UintToFloat || SnormToFloat || UnormToFloat || FixedToFloat || FloatToFloat || \ 96 A2BGR10SintToFloat || A2BGR10UintToFloat || A2BGR10SnormToFloat || \ 97 RGB10A2SintToFloat || RGB10A2UintToFloat || RGB10A2SnormToFloat || RGB10A2UnormToFloat 98#define DestType float 99#define IsDestFloat 1 100#else 101#error "Not all conversions are accounted for" 102#endif 103 104layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 105 106layout (set = 0, binding = 0) buffer dest 107{ 108 uint destData[]; 109}; 110 111layout (set = 0, binding = 1) buffer src 112{ 113 uint srcData[]; 114}; 115 116layout (push_constant) uniform PushConstants 117{ 118 // outputs to write (= total number of components / Ed): used for range checking 119 uint outputCount; 120 // total number of output components: used for range checking 121 uint componentCount; 122 // source and destination offsets are handled in the shader (instead of binding the buffer with 123 // these offsets), as the binding offset requires alignment with 124 // minStorageBufferOffsetAlignment, which is impossible to enforce on source, and therefore 125 // would limit the usability of the shader. Note that source is a storage buffer, instead of a 126 // uniform buffer, so it wouldn't be affected by the possibly smaller max size of uniform 127 // buffers. 128 uint srcOffset; 129 uint destOffset; 130 131 // Parameters from the above explanation 132 uint Ns; // Number of source components in one vertex attribute 133 uint Bs; // Source component byte size 134 uint Ss; // Source vertex attribyte byte stride 135 uint Es; // Precalculated 4/Bs 136 137 uint Nd; // Number of destination components in one vertex attribute 138 uint Bd; // Destination component byte size 139 uint Sd; // Precalculated Nd*Bd 140 uint Ed; // Precalculated 4/Bd 141} params; 142 143// Define shorthands for more readable formulas: 144#define Ns params.Ns 145#define Ss params.Ss 146#define Nd params.Nd 147#define Sd params.Sd 148 149// With fixed-point and float types, Bs and Bd can only be 4, so they are hardcoded for more 150// efficiency. 151#if FixedToFloat || FloatToFloat 152#define Bs 4 153#define Es 1 154#else 155#define Bs params.Bs 156#define Es params.Es 157#endif 158 159#if IsDestFloat 160#define Bd 4 161#define Ed 1 162#else 163#define Bd params.Bd 164#define Ed params.Ed 165#endif 166 167uint getSourceComponentOffset(uint vertex, uint component) 168{ 169 return vertex * Ss + component * Bs + params.srcOffset; 170} 171 172uint getDestinationComponentOffset(uint vertex, uint component) 173{ 174 return vertex * Sd + component * Bd + params.destOffset; 175} 176 177uint getShiftBits(uint offset, uint B) 178{ 179 // Given a byte offset, calculates the bit shift required to extract/store a component. 180 // 181 // On little endian, it implements the following function: 182 // 183 // Bs == 1: 0->0, 1->8, 2->16, 3->24 184 // Bs == 2: 0->0, 2->16 (1 and 3 are impossible values as Bx is assumed to divide Sx) 185 // Bs == 4: 0->0 (similarly, 1, 2, and 3 are impossible values) 186 // 187 // This is simply given by (offset % 4) * 8. 188 // 189 // On big endian, it implements the following function: 190 // 191 // Bs == 1: 0->24, 1->16, 2->8, 3->0 192 // Bs == 2: 0->16, 2->0 193 // Bs == 4: 0->0 194 // 195 // This is given by (4 - Bx - offset % 4) * 8 196 197 uint shift = (offset % 4) * 8; 198 199 // If big-endian, the most-significant bits contain the first components, so we reverse the 200 // shift count. 201#if IsBigEndian 202 shift = (4 - B) * 8 - shift; 203#endif 204 205 return shift; 206} 207 208SrcType loadSourceComponent(uint cd) 209{ 210 // cd is component index in the destination buffer 211 uint vertex = cd / Nd; 212 uint component = cd % Nd; 213 214 // Fill the alpha channel with 1.0f in case of the source format doesn't have an alpha channel 215 // For all other components fill in 0. 216 if (component >= Ns && component < 3) 217 { 218 return 0; 219 } 220 221 // Load the source component 222 uint offset = getSourceComponentOffset(vertex, component); 223 uint block = srcData[offset / 4]; 224 // A2B10G10R10's components are not byte-aligned, hardcoding values for efficiency. 225#if A2BGR10SintToSint || A2BGR10UintToUint || A2BGR10SnormToFloat || A2BGR10SintToFloat || \ 226 A2BGR10UintToFloat 227 uint valueBits = component == 3 ? 2 : 10; 228 uint shiftBits = 10 * component; 229 uint valueMask = component == 3 ? 0x03 : 0x3FF; 230#elif RGB10A2SintToFloat || RGB10A2UintToFloat || RGB10A2SnormToFloat || RGB10A2UnormToFloat 231 uint valueBits = component == 3 ? 2 : 10; 232 // channel order is reversed 233 uint shiftBits = component == 3 ? 0 : (valueBits * (2 - component) + 2); 234 uint valueMask = component == 3 ? 0x03 : 0x3FF; 235#else 236 uint shiftBits = getShiftBits(offset, Bs); 237 uint valueBits = Bs * 8; 238 uint valueMask = valueBits == 32 ? -1 : (1 << valueBits) - 1; 239#endif 240 241 uint valueAsUint; 242 243 if (component >= Ns && component == 3) 244 { 245 // See GLES3.0 section 2.9.1 Transferring Array Elements 246#if SintToSint || SintToFloat || A2BGR10SintToSint || A2BGR10SintToFloat || RGB10A2SintToFloat || \ 247 UintToUint || UintToFloat || A2BGR10UintToUint || A2BGR10UintToFloat || RGB10A2UintToFloat 248 // For integers, alpha should take a value of 1. 249 valueAsUint = 1; 250#elif SnormToFloat || A2BGR10SnormToFloat || RGB10A2SnormToFloat 251 // The largest signed number is 0b011...1 which is valueMask >> 1 252 valueAsUint = valueMask >> 1; 253#elif UnormToFloat || RGB10A2UnormToFloat 254 // The largest unsigned number is 0b11...1 which is valueMask 255 valueAsUint = valueMask; 256#elif FixedToFloat 257 // 1.0 in fixed point is 0x10000 258 valueAsUint = 0x10000; 259#elif FloatToFloat 260 valueAsUint = floatBitsToUint(1.0); 261#else 262#error "Not all conversions are accounted for" 263#endif 264 } 265 else 266 { 267 valueAsUint = (block >> shiftBits) & valueMask; 268 } 269 270 // Convert to SrcType 271#if SintToSint || SintToFloat || A2BGR10SintToSint || A2BGR10SintToFloat || RGB10A2SintToFloat 272 if (valueBits < 32) 273 { 274 bool isNegative = (valueAsUint & (1 << (valueBits - 1))) != 0; 275 // Sign extend 276 // Note: if valueBits == 32, then 0xFFFFFFFF << valueBits is undefined, 277 // causing sign extension of value below to produce incorrect values. 278 uint signExtension = isNegative ? 0xFFFFFFFF << valueBits : 0; 279 valueAsUint |= signExtension; 280 } 281 SrcType value = SrcType(valueAsUint); 282#elif UintToUint || UintToFloat || A2BGR10UintToUint || A2BGR10UintToFloat || RGB10A2UintToFloat 283 SrcType value = valueAsUint; 284#elif SnormToFloat || A2BGR10SnormToFloat || RGB10A2SnormToFloat 285 if (valueBits < 32) 286 { 287 bool isNegative = (valueAsUint & (1 << (valueBits - 1))) != 0; 288 uint signExtension = isNegative ? 0xFFFFFFFF << valueBits : 0; 289 valueAsUint |= signExtension; 290 } 291 int valueAsInt = int(valueAsUint); 292 SrcType value = float(valueAsInt) / (valueMask >> 1); 293 value = max(value, float(-1)); 294#elif UnormToFloat || RGB10A2UnormToFloat 295 float positiveMax = valueMask; 296 // Scale [0, P] to [0, 1] 297 SrcType value = valueAsUint / positiveMax; 298#elif FixedToFloat 299 float divisor = 1.0f / 65536.0f; 300 SrcType value = int(valueAsUint) * divisor; 301#elif FloatToFloat 302 SrcType value = uintBitsToFloat(valueAsUint); 303#else 304#error "Not all conversions are accounted for" 305#endif 306 307 return value; 308} 309 310DestType convertComponent(SrcType srcValue) 311{ 312 // In all cases, SrcValue already contains the final value, except it may need a cast, which 313 // happens implicitly here. 314 return srcValue; 315} 316 317uint makeDestinationComponent(uint cd, DestType value) 318{ 319 // Return valueAsUint, shifted to the right spot. Multiple calls to this function should be |ed 320 // and eventually written to the destination. 321 322#if SintToSint || UintToUint || A2BGR10SintToSint || A2BGR10UintToUint 323 uint vertex = cd / Nd; 324 uint component = cd % Nd; 325 326 uint offset = getDestinationComponentOffset(vertex, component); 327 uint shiftBits = getShiftBits(offset, Bd); 328 329 uint valueBits = Bd * 8; 330 uint valueMask = valueBits == 32 ? -1 : (1 << valueBits) - 1; 331 uint valueAsUint = (uint(value) & valueMask) << shiftBits; 332 333#elif IsDestFloat 334 // If the destination is float, it will occupy the whole result. 335 uint valueAsUint = floatBitsToInt(value); 336 337#else 338#error "Not all conversions are accounted for" 339#endif 340 341 return valueAsUint; 342} 343 344void storeDestinationComponents(uint valueAsUint) 345{ 346 // Note that the destination allocations are always aligned to kMaxVertexFormatAlignment. 347 destData[gl_GlobalInvocationID.x + params.destOffset / 4] = valueAsUint; 348} 349 350void main() 351{ 352 if (gl_GlobalInvocationID.x >= params.outputCount) 353 return; 354 355 uint valueOut = 0; 356 for (uint i = 0; i < Ed; ++i) 357 { 358 uint cd = gl_GlobalInvocationID.x * Ed + i; 359 if (cd >= params.componentCount) 360 { 361 break; 362 } 363 364 SrcType srcValue = loadSourceComponent(cd); 365 DestType destValue = convertComponent(srcValue); 366 valueOut |= makeDestinationComponent(cd, destValue); 367 } 368 369 storeDestinationComponents(valueOut); 370} 371