• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1//
2// Copyright 2018 The ANGLE Project Authors. All rights reserved.
3// Use of this source code is governed by a BSD-style license that can be
4// found in the LICENSE file.
5//
6// ConvertVertex.comp: vertex buffer conversion.  Implements functionality in copyvertex.inc.
7//
8// Each thread of the dispatch call fills in one 4-byte element, no matter how many components
9// fit in it.  The src data is laid out in the most general form as follows.  Note that component
10// size is assumed to divide buffer stride.
11//
12//    Ns components, each Bs bytes
13//         ____^_____
14//        /          |
15//       +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
16//       |C1|C2|..|CN|..|..|..|..|C1|C2|..|CN|..|..|..|..|C1|C2|..|CN| ... Repeated V times
17//       +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
18//        \__________ __________/
19//                   V
20//           Ss bytes of stride
21//
22// The output is the array of components converted to the destination format (each Bd bytes) with
23// stride Sd = Nd*Bd (i.e. packed).  The output size is therefore V*Nd*Bd bytes.  The dispatch size
24// is accordingly ciel(V*Nd*Bd / 4).
25//
26// The input is received in 4-byte elements, therefore each element has Es=4/Bs components.
27//
28// To output exactly one 4-byte element, each thread is responsible for Ed=4/Bd components.
29// Therefore, thread t is responsible for component indices [Ed*t, Ed*(t + 1)).
30//
31// We don't use Bs and Es for A2B10G10R10 and R10G10B10A2 formats since they take 10 or 2 bits per
32// component. Variables that are computed using Bs or Es are hardcoded instead.
33//
34// Component index c is at source offset:
35//
36//     floor(c / Ns) * Ss + mod(c, Ns) * Bs
37//
38//   - Flags:
39//     * IsBigEndian
40//   - Conversion:
41//     * SintToSint: covers byte, short and int types (distinguished by Bs and Bd).
42//     * UintToUint: covers ubyte, ushort, uint and half float types (distinguished by Bs and Bd).
43//     * SintToFloat: Same types as SintToSint for source (including scaled).  Converts to float.
44//     * UintToFloat: Same types as UintToUint for source (including uscaled).  Converst to float.
45//     * SnormToFloat: Similar to IntToFloat, but normalized.
46//     * UnormToFloat: Similar to UintToFloat, but normalized.
47//     * FixedToFloat: 16.16 signed fixed-point to floating point.
48//     * FloatToFloat: float.
49//     * A2BGR10SintToSint: covers the signed int type of component when format is only A2BGR10.
50//     * A2BGR10UintToUint: covers the unsigned int type of component when format is only A2BGR10.
51//     * A2BGR10SintToFloat: Same types as A2BGR10SintToSint for source (including scaled).
52//                           Converts to float.
53//     * A2BGR10UintToFloat: Same types as A2BGR10UintToUint for source (including uscaled).
54//                           Converts to float.
55//     * A2BGR10SnormToFloat: Similar to IntToFloat, but normalized and only for A2BGR10.
56//     * RGB10A2SintToFloat: Same types as RGB10A2SintToSint for source (including scaled).
57//                           Converts to float.
58//     * RGB10A2UintToFloat: Same types as RGB10A2UintToUint for source (including uscaled).
59//                           Converts to float.
60//     * RGB10A2SnormToFloat: Similar to IntToFloat, but normalized and only for RGB10A2.
61//     * RGB10A2UnormToFloat: Similar to UintToFloat, but normalized and only for RGB10A2.
62//
63// SintToSint, UintToUint and FloatToFloat correspond to CopyNativeVertexData() and
64// Copy8SintTo16SintVertexData() in renderer/copyvertex.inc, FixedToFloat corresponds to
65// Copy32FixedTo32FVertexData, SintToFloat and UintToFloat correspond to CopyTo32FVertexData with
66// normalized=false and SnormToFloat and UnormToFloat correspond to CopyTo32FVertexData with
67// normalized=true. A2BGR10SintToSint, A2BGR10UintToUint, A2BGR10SintToFloat, A2BGR10UintToFloat
68// and A2BGR10SnormToFloat correspond to CopyXYZ10W2ToXYZW32FVertexData with the proper options.
69// RGB10A2SintToFloat, RGB10A2UintToFloat and RGB10A2SnormToFloat correspond to
70// CopyW2XYZ10ToXYZW32FVertexData. RGB10UintToFloat corresponds to CopyXYZ10ToXYZW32FVertexData
71// with the proper options.
72
73#version 450 core
74
75// Source type
76#if SintToSint || SintToFloat || A2BGR10SintToSint || A2BGR10SintToFloat || RGB10A2SintToFloat
77#define SrcType int
78#elif UintToUint || UintToFloat || A2BGR10UintToUint || A2BGR10UintToFloat || \
79    RGB10A2UintToFloat
80#define SrcType uint
81#elif SnormToFloat || UnormToFloat || FixedToFloat || FloatToFloat || A2BGR10SnormToFloat || \
82    RGB10A2SnormToFloat || RGB10A2UnormToFloat
83#define SrcType float
84#else
85#error "Not all conversions are accounted for"
86#endif
87
88// Destination type
89#if SintToSint || A2BGR10SintToSint
90#define DestType int
91#define IsDestFloat 0
92#elif UintToUint || A2BGR10UintToUint
93#define DestType uint
94#define IsDestFloat 0
95#elif SintToFloat || UintToFloat || SnormToFloat || UnormToFloat || FixedToFloat || FloatToFloat || \
96    A2BGR10SintToFloat || A2BGR10UintToFloat || A2BGR10SnormToFloat || \
97    RGB10A2SintToFloat || RGB10A2UintToFloat || RGB10A2SnormToFloat || RGB10A2UnormToFloat
98#define DestType float
99#define IsDestFloat 1
100#else
101#error "Not all conversions are accounted for"
102#endif
103
104layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
105
106layout (set = 0, binding = 0) buffer dest
107{
108    uint destData[];
109};
110
111layout (set = 0, binding = 1) buffer src
112{
113    uint srcData[];
114};
115
116layout (push_constant) uniform PushConstants
117{
118    // outputs to write (= total number of components / Ed): used for range checking
119    uint outputCount;
120    // total number of output components: used for range checking
121    uint componentCount;
122    // source and destination offsets are handled in the shader (instead of binding the buffer with
123    // these offsets), as the binding offset requires alignment with
124    // minStorageBufferOffsetAlignment, which is impossible to enforce on source, and therefore
125    // would limit the usability of the shader.  Note that source is a storage buffer, instead of a
126    // uniform buffer, so it wouldn't be affected by the possibly smaller max size of uniform
127    // buffers.
128    uint srcOffset;
129    uint destOffset;
130
131    // Parameters from the above explanation
132    uint Ns;       // Number of source components in one vertex attribute
133    uint Bs;       // Source component byte size
134    uint Ss;       // Source vertex attribyte byte stride
135    uint Es;       // Precalculated 4/Bs
136
137    uint Nd;       // Number of destination components in one vertex attribute
138    uint Bd;       // Destination component byte size
139    uint Sd;       // Precalculated Nd*Bd
140    uint Ed;       // Precalculated 4/Bd
141} params;
142
143// Define shorthands for more readable formulas:
144#define Ns params.Ns
145#define Ss params.Ss
146#define Nd params.Nd
147#define Sd params.Sd
148
149// With fixed-point and float types, Bs and Bd can only be 4, so they are hardcoded for more
150// efficiency.
151#if FixedToFloat || FloatToFloat
152#define Bs 4
153#define Es 1
154#else
155#define Bs params.Bs
156#define Es params.Es
157#endif
158
159#if IsDestFloat
160#define Bd 4
161#define Ed 1
162#else
163#define Bd params.Bd
164#define Ed params.Ed
165#endif
166
167uint getSourceComponentOffset(uint vertex, uint component)
168{
169    return vertex * Ss + component * Bs + params.srcOffset;
170}
171
172uint getDestinationComponentOffset(uint vertex, uint component)
173{
174    return vertex * Sd + component * Bd + params.destOffset;
175}
176
177uint getShiftBits(uint offset, uint B)
178{
179    // Given a byte offset, calculates the bit shift required to extract/store a component.
180    //
181    // On little endian, it implements the following function:
182    //
183    // Bs == 1: 0->0, 1->8, 2->16, 3->24
184    // Bs == 2: 0->0, 2->16   (1 and 3 are impossible values as Bx is assumed to divide Sx)
185    // Bs == 4: 0->0          (similarly, 1, 2, and 3 are impossible values)
186    //
187    // This is simply given by (offset % 4) * 8.
188    //
189    // On big endian, it implements the following function:
190    //
191    // Bs == 1: 0->24, 1->16, 2->8, 3->0
192    // Bs == 2: 0->16, 2->0
193    // Bs == 4: 0->0
194    //
195    // This is given by (4 - Bx - offset % 4) * 8
196
197    uint shift = (offset % 4) * 8;
198
199    // If big-endian, the most-significant bits contain the first components, so we reverse the
200    // shift count.
201#if IsBigEndian
202    shift = (4 - B) * 8 - shift;
203#endif
204
205    return shift;
206}
207
208SrcType loadSourceComponent(uint cd)
209{
210    // cd is component index in the destination buffer
211    uint vertex = cd / Nd;
212    uint component = cd % Nd;
213
214    // Fill the alpha channel with 1.0f in case of the source format doesn't have an alpha channel
215    // For all other components fill in 0.
216    if (component >= Ns && component < 3)
217    {
218        return 0;
219    }
220
221    // Load the source component
222    uint offset = getSourceComponentOffset(vertex, component);
223    uint block = srcData[offset / 4];
224    // A2B10G10R10's components are not byte-aligned, hardcoding values for efficiency.
225#if A2BGR10SintToSint || A2BGR10UintToUint || A2BGR10SnormToFloat || A2BGR10SintToFloat || \
226    A2BGR10UintToFloat
227    uint valueBits = component == 3 ? 2 : 10;
228    uint shiftBits = 10 * component;
229    uint valueMask = component == 3 ? 0x03 : 0x3FF;
230#elif RGB10A2SintToFloat || RGB10A2UintToFloat || RGB10A2SnormToFloat || RGB10A2UnormToFloat
231    uint valueBits = component == 3 ? 2 : 10;
232    // channel order is reversed
233    uint shiftBits = component == 3 ? 0 : (valueBits * (2 - component) + 2);
234    uint valueMask = component == 3 ? 0x03 : 0x3FF;
235#else
236    uint shiftBits = getShiftBits(offset, Bs);
237    uint valueBits = Bs * 8;
238    uint valueMask = valueBits == 32 ? -1 : (1 << valueBits) - 1;
239#endif
240
241    uint valueAsUint;
242
243    if (component >= Ns && component == 3)
244    {
245        // See GLES3.0 section 2.9.1 Transferring Array Elements
246#if SintToSint || SintToFloat || A2BGR10SintToSint || A2BGR10SintToFloat || RGB10A2SintToFloat || \
247    UintToUint || UintToFloat || A2BGR10UintToUint || A2BGR10UintToFloat || RGB10A2UintToFloat
248        // For integers, alpha should take a value of 1.
249        valueAsUint = 1;
250#elif SnormToFloat || A2BGR10SnormToFloat || RGB10A2SnormToFloat
251        // The largest signed number is 0b011...1 which is valueMask >> 1
252        valueAsUint = valueMask >> 1;
253#elif UnormToFloat || RGB10A2UnormToFloat
254        // The largest unsigned number is 0b11...1 which is valueMask
255        valueAsUint = valueMask;
256#elif FixedToFloat
257        // 1.0 in fixed point is 0x10000
258        valueAsUint = 0x10000;
259#elif FloatToFloat
260        valueAsUint = floatBitsToUint(1.0);
261#else
262#error "Not all conversions are accounted for"
263#endif
264    }
265    else
266    {
267        valueAsUint = (block >> shiftBits) & valueMask;
268    }
269
270    // Convert to SrcType
271#if SintToSint || SintToFloat || A2BGR10SintToSint || A2BGR10SintToFloat || RGB10A2SintToFloat
272    if (valueBits < 32)
273    {
274        bool isNegative = (valueAsUint & (1 << (valueBits - 1))) != 0;
275        // Sign extend
276        // Note: if valueBits == 32, then 0xFFFFFFFF << valueBits is undefined,
277        // causing sign extension of value below to produce incorrect values.
278        uint signExtension = isNegative ? 0xFFFFFFFF << valueBits : 0;
279        valueAsUint |= signExtension;
280    }
281    SrcType value = SrcType(valueAsUint);
282#elif UintToUint || UintToFloat || A2BGR10UintToUint || A2BGR10UintToFloat || RGB10A2UintToFloat
283    SrcType value = valueAsUint;
284#elif SnormToFloat || A2BGR10SnormToFloat || RGB10A2SnormToFloat
285    if (valueBits < 32)
286    {
287        bool isNegative = (valueAsUint & (1 << (valueBits - 1))) != 0;
288        uint signExtension = isNegative ? 0xFFFFFFFF << valueBits : 0;
289        valueAsUint |= signExtension;
290    }
291    int valueAsInt = int(valueAsUint);
292    SrcType value = float(valueAsInt) / (valueMask >> 1);
293    value = max(value, float(-1));
294#elif UnormToFloat || RGB10A2UnormToFloat
295    float positiveMax = valueMask;
296    // Scale [0, P] to [0, 1]
297    SrcType value = valueAsUint / positiveMax;
298#elif FixedToFloat
299    float divisor = 1.0f / 65536.0f;
300    SrcType value = int(valueAsUint) * divisor;
301#elif FloatToFloat
302    SrcType value = uintBitsToFloat(valueAsUint);
303#else
304#error "Not all conversions are accounted for"
305#endif
306
307    return value;
308}
309
310DestType convertComponent(SrcType srcValue)
311{
312    // In all cases, SrcValue already contains the final value, except it may need a cast, which
313    // happens implicitly here.
314    return srcValue;
315}
316
317uint makeDestinationComponent(uint cd, DestType value)
318{
319    // Return valueAsUint, shifted to the right spot.  Multiple calls to this function should be |ed
320    // and eventually written to the destination.
321
322#if SintToSint || UintToUint || A2BGR10SintToSint || A2BGR10UintToUint
323    uint vertex = cd / Nd;
324    uint component = cd % Nd;
325
326    uint offset = getDestinationComponentOffset(vertex, component);
327    uint shiftBits = getShiftBits(offset, Bd);
328
329    uint valueBits = Bd * 8;
330    uint valueMask = valueBits == 32 ? -1 : (1 << valueBits) - 1;
331    uint valueAsUint = (uint(value) & valueMask) << shiftBits;
332
333#elif IsDestFloat
334    // If the destination is float, it will occupy the whole result.
335    uint valueAsUint = floatBitsToInt(value);
336
337#else
338#error "Not all conversions are accounted for"
339#endif
340
341    return valueAsUint;
342}
343
344void storeDestinationComponents(uint valueAsUint)
345{
346    // Note that the destination allocations are always aligned to kMaxVertexFormatAlignment.
347    destData[gl_GlobalInvocationID.x + params.destOffset / 4] = valueAsUint;
348}
349
350void main()
351{
352    if (gl_GlobalInvocationID.x >= params.outputCount)
353        return;
354
355    uint valueOut = 0;
356    for (uint i = 0; i < Ed; ++i)
357    {
358        uint cd = gl_GlobalInvocationID.x * Ed + i;
359        if (cd >= params.componentCount)
360        {
361            break;
362        }
363
364        SrcType srcValue = loadSourceComponent(cd);
365        DestType destValue = convertComponent(srcValue);
366        valueOut |= makeDestinationComponent(cd, destValue);
367    }
368
369    storeDestinationComponents(valueOut);
370}
371