• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "experimental/graphite/src/UniformManager.h"
9 #include "include/core/SkMatrix.h"
10 #include "include/private/SkHalf.h"
11 #include "include/private/SkTemplates.h"
12 
13 // ensure that these types are the sizes the uniform data is expecting
14 static_assert(sizeof(int32_t) == 4);
15 static_assert(sizeof(float) == 4);
16 static_assert(sizeof(int16_t) == 2);
17 static_assert(sizeof(SkHalf) == 2);
18 
19 namespace skgpu {
20 
21 //////////////////////////////////////////////////////////////////////////////
22 
UniformManager(Layout layout)23 UniformManager::UniformManager(Layout layout) : fLayout(layout) {}
24 
25 template<typename BaseType>
tight_vec_size(int vecLength)26 static constexpr size_t tight_vec_size(int vecLength) {
27     return sizeof(BaseType) * vecLength;
28 }
29 
30 /**
31  * From Section 7.6.2.2 "Standard Uniform Block Layout":
32  *  1. If the member is a scalar consuming N basic machine units, the base alignment is N.
33  *  2. If the member is a two- or four-component vector with components consuming N basic machine
34  *     units, the base alignment is 2N or 4N, respectively.
35  *  3. If the member is a three-component vector with components consuming N
36  *     basic machine units, the base alignment is 4N.
37  *  4. If the member is an array of scalars or vectors, the base alignment and array
38  *     stride are set to match the base alignment of a single array element, according
39  *     to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
40  *     array may have padding at the end; the base offset of the member following
41  *     the array is rounded up to the next multiple of the base alignment.
42  *  5. If the member is a column-major matrix with C columns and R rows, the
43  *     matrix is stored identically to an array of C column vectors with R components each,
44  *     according to rule (4).
45  *  6. If the member is an array of S column-major matrices with C columns and
46  *     R rows, the matrix is stored identically to a row of S × C column vectors
47  *     with R components each, according to rule (4).
48  *  7. If the member is a row-major matrix with C columns and R rows, the matrix
49  *     is stored identically to an array of R row vectors with C components each,
50  *     according to rule (4).
51  *  8. If the member is an array of S row-major matrices with C columns and R
52  *     rows, the matrix is stored identically to a row of S × R row vectors with C
53  *    components each, according to rule (4).
54  *  9. If the member is a structure, the base alignment of the structure is N, where
55  *     N is the largest base alignment value of any of its members, and rounded
56  *     up to the base alignment of a vec4. The individual members of this substructure are then
57  *     assigned offsets by applying this set of rules recursively,
58  *     where the base offset of the first member of the sub-structure is equal to the
59  *     aligned offset of the structure. The structure may have padding at the end;
60  *     the base offset of the member following the sub-structure is rounded up to
61  *     the next multiple of the base alignment of the structure.
62  * 10. If the member is an array of S structures, the S elements of the array are laid
63  *     out in order, according to rule (9).
64  */
65 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
66 struct Rules140 {
67     /**
68      * For an array of scalars or vectors this returns the stride between array elements. For
69      * matrices or arrays of matrices this returns the stride between columns of the matrix. Note
70      * that for single (non-array) scalars or vectors we don't require a stride.
71      */
Strideskgpu::Rules14072     static constexpr size_t Stride(int count) {
73         SkASSERT(count >= 1 || count == Uniform::kNonArray);
74         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
75         static_assert(Cols >= 1 && Cols <= 4);
76         if (Cols != 1) {
77             // This is a matrix or array of matrices. We return the stride between columns.
78             SkASSERT(RowsOrVecLength > 1);
79             return Rules140<BaseType, RowsOrVecLength>::Stride(1);
80         }
81         if (count == 0) {
82             // Stride doesn't matter for a non-array.
83             return RowsOrVecLength * sizeof(BaseType);
84         }
85 
86         // Rule 4.
87 
88         // Alignment of vec4 by Rule 2.
89         constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
90         // Get alignment of a single vector of BaseType by Rule 1, 2, or 3
91         int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
92         size_t kElementAlignment = tight_vec_size<BaseType>(n);
93         // Round kElementAlignment up to multiple of kVec4Alignment.
94         size_t m = (kElementAlignment + kVec4Alignment - 1) / kVec4Alignment;
95         return m * kVec4Alignment;
96     }
97 };
98 
99 /**
100  * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
101  * identically to uniform and shader storage blocks using the std140 layout, except that the base
102  * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
103  * not rounded up a multiple of the base alignment of a vec4.
104  */
105 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
106 struct Rules430 {
Strideskgpu::Rules430107     static constexpr size_t Stride(int count) {
108         SkASSERT(count >= 1 || count == Uniform::kNonArray);
109         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
110         static_assert(Cols >= 1 && Cols <= 4);
111 
112         if (Cols != 1) {
113             // This is a matrix or array of matrices. We return the stride between columns.
114             SkASSERT(RowsOrVecLength > 1);
115             return Rules430<BaseType, RowsOrVecLength>::Stride(1);
116         }
117         if (count == 0) {
118             // Stride doesn't matter for a non-array.
119             return RowsOrVecLength * sizeof(BaseType);
120         }
121         // Rule 4 without the round up to a multiple of align-of vec4.
122         return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
123     }
124 };
125 
126 // The strides used here were derived from the rules we've imposed on ourselves in
127 // GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
128 // their 4-component equivalents.
129 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
130 struct RulesMetal {
Strideskgpu::RulesMetal131     static constexpr size_t Stride(int count) {
132         SkASSERT(count >= 1 || count == Uniform::kNonArray);
133         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
134         static_assert(Cols >= 1 && Cols <= 4);
135         if (Cols != 1) {
136             // This is a matrix or array of matrices. We return the stride between columns.
137             SkASSERT(RowsOrVecLength > 1);
138             return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
139         }
140         if (count == 0) {
141             // Stride doesn't matter for a non-array.
142             return RowsOrVecLength * sizeof(BaseType);
143         }
144         return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
145     }
146 };
147 
148 template<template<typename BaseType, int RowsOrVecLength, int Cols> class Rules>
149 class Writer {
150 private:
151     template <typename MemType, typename UniformType>
CopyUniforms(void * dst,const void * src,int numUniforms)152     static void CopyUniforms(void* dst, const void* src, int numUniforms) {
153         if constexpr (std::is_same<MemType, UniformType>::value) {
154             // Matching types--use memcpy.
155             std::memcpy(dst, src, numUniforms * sizeof(MemType));
156             return;
157         }
158 
159         if constexpr (std::is_same<MemType, float>::value &&
160                       std::is_same<UniformType, SkHalf>::value) {
161             // Convert floats to half.
162             const float* floatBits = static_cast<const float*>(src);
163             SkHalf* halfBits = static_cast<SkHalf*>(dst);
164             while (numUniforms-- > 0) {
165                 *halfBits++ = SkFloatToHalf(*floatBits++);
166             }
167             return;
168         }
169 
170         SK_ABORT("implement conversion from MemType to UniformType");
171     }
172 
173     template <typename MemType, typename UniformType, int RowsOrVecLength = 1, int Cols = 1>
Write(void * dst,int n,const MemType src[])174     static uint32_t Write(void *dst, int n, const MemType src[]) {
175         size_t stride = Rules<UniformType, RowsOrVecLength, Cols>::Stride(n);
176         n = (n == Uniform::kNonArray) ? 1 : n;
177         n *= Cols;
178 
179         if (dst) {
180             if (stride == RowsOrVecLength * sizeof(UniformType)) {
181                 CopyUniforms<MemType, UniformType>(dst, src, n * RowsOrVecLength);
182             } else {
183                 for (int i = 0; i < n; ++i) {
184                     CopyUniforms<MemType, UniformType>(dst, src, RowsOrVecLength);
185                     src += RowsOrVecLength;
186                     dst = SkTAddOffset<void>(dst, stride);
187                 }
188             }
189         }
190 
191         return n * stride;
192     }
193 
194     template <typename UniformType>
WriteSkMatrices(void * dst,int n,const SkMatrix m[])195     static uint32_t WriteSkMatrices(void *dst, int n, const SkMatrix m[]) {
196         // Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
197         size_t stride = 3 * Rules<UniformType, 3, 3>::Stride(1);
198         n = std::max(n, 1);
199 
200         if (dst) {
201             size_t offset = 0;
202             for (int i = 0; i < n; ++i) {
203                 float mt[] = {
204                         m[i].get(SkMatrix::kMScaleX),
205                         m[i].get(SkMatrix::kMSkewY),
206                         m[i].get(SkMatrix::kMPersp0),
207                         m[i].get(SkMatrix::kMSkewX),
208                         m[i].get(SkMatrix::kMScaleY),
209                         m[i].get(SkMatrix::kMPersp1),
210                         m[i].get(SkMatrix::kMTransX),
211                         m[i].get(SkMatrix::kMTransY),
212                         m[i].get(SkMatrix::kMPersp2),
213                 };
214                 Write<float, UniformType, 3, 3>(SkTAddOffset<void>(dst, offset), 1, mt);
215                 offset += stride;
216             }
217         }
218         return n * stride;
219     }
220 
221 public:
WriteUniform(SLType type,CType ctype,void * dest,int n,const void * src)222     static uint32_t WriteUniform(SLType type,
223                                  CType ctype,
224                                  void *dest,
225                                  int n,
226                                  const void *src) {
227         SkASSERT(n >= 1 || n == Uniform::kNonArray);
228         switch (type) {
229             case SLType::kInt:
230                 return Write<int32_t, int32_t>(dest, n, static_cast<const int32_t *>(src));
231 
232             case SLType::kInt2:
233                 return Write<int32_t, int32_t, 2>(dest, n, static_cast<const int32_t *>(src));
234 
235             case SLType::kInt3:
236                 return Write<int32_t, int32_t, 3>(dest, n, static_cast<const int32_t *>(src));
237 
238             case SLType::kInt4:
239                 return Write<int32_t, int32_t, 4>(dest, n, static_cast<const int32_t *>(src));
240 
241             case SLType::kHalf:
242                 return Write<float, SkHalf>(dest, n, static_cast<const float *>(src));
243 
244             case SLType::kFloat:
245                 return Write<float, float>(dest, n, static_cast<const float *>(src));
246 
247             case SLType::kHalf2:
248                 return Write<float, SkHalf, 2>(dest, n, static_cast<const float *>(src));
249 
250             case SLType::kFloat2:
251                 return Write<float, float, 2>(dest, n, static_cast<const float *>(src));
252 
253             case SLType::kHalf3:
254                 return Write<float, SkHalf, 3>(dest, n, static_cast<const float *>(src));
255 
256             case SLType::kFloat3:
257                 return Write<float, float, 3>(dest, n, static_cast<const float *>(src));
258 
259             case SLType::kHalf4:
260                 return Write<float, SkHalf, 4>(dest, n, static_cast<const float *>(src));
261 
262             case SLType::kFloat4:
263                 return Write<float, float, 4>(dest, n, static_cast<const float *>(src));
264 
265             case SLType::kHalf2x2:
266                 return Write<float, SkHalf, 2, 2>(dest, n, static_cast<const float *>(src));
267 
268             case SLType::kFloat2x2:
269                 return Write<float, float, 2, 2>(dest, n, static_cast<const float *>(src));
270 
271             case SLType::kHalf3x3:
272                 switch (ctype) {
273                     case CType::kDefault:
274                         return Write<float, SkHalf, 3, 3>(dest, n, static_cast<const float *>(src));
275                     case CType::kSkMatrix:
276                         return WriteSkMatrices<SkHalf>(dest, n, static_cast<const SkMatrix *>(src));
277                 }
278                 SkUNREACHABLE;
279 
280             case SLType::kFloat3x3:
281                 switch (ctype) {
282                     case CType::kDefault:
283                         return Write<float, float, 3, 3>(dest, n, static_cast<const float *>(src));
284                     case CType::kSkMatrix:
285                         return WriteSkMatrices<float>(dest, n, static_cast<const SkMatrix *>(src));
286                 }
287                 SkUNREACHABLE;
288 
289             case SLType::kHalf4x4:
290                 return Write<float, SkHalf, 4, 4>(dest, n, static_cast<const float *>(src));
291 
292             case SLType::kFloat4x4:
293                 return Write<float, float, 4, 4>(dest, n, static_cast<const float *>(src));
294 
295             default:
296                 SK_ABORT("Unexpected uniform type");
297         }
298     }
299 };
300 
301 #ifdef SK_DEBUG
302 // To determine whether a current offset is aligned, we can just 'and' the lowest bits with the
303 // alignment mask. A value of 0 means aligned, any other value is how many bytes past alignment we
304 // are. This works since all alignments are powers of 2. The mask is always (alignment - 1).
sltype_to_alignment_mask(SLType type)305 static uint32_t sltype_to_alignment_mask(SLType type) {
306     switch (type) {
307         case SLType::kInt:
308         case SLType::kUInt:
309         case SLType::kFloat:
310             return 0x3;
311         case SLType::kInt2:
312         case SLType::kUInt2:
313         case SLType::kFloat2:
314             return 0x7;
315         case SLType::kInt3:
316         case SLType::kUInt3:
317         case SLType::kFloat3:
318         case SLType::kInt4:
319         case SLType::kUInt4:
320         case SLType::kFloat4:
321             return 0xF;
322 
323         case SLType::kFloat2x2:
324             return 0x7;
325         case SLType::kFloat3x3:
326             return 0xF;
327         case SLType::kFloat4x4:
328             return 0xF;
329 
330         case SLType::kShort:
331         case SLType::kUShort:
332         case SLType::kHalf:
333             return 0x1;
334         case SLType::kShort2:
335         case SLType::kUShort2:
336         case SLType::kHalf2:
337             return 0x3;
338         case SLType::kShort3:
339         case SLType::kShort4:
340         case SLType::kUShort3:
341         case SLType::kUShort4:
342         case SLType::kHalf3:
343         case SLType::kHalf4:
344             return 0x7;
345 
346         case SLType::kHalf2x2:
347             return 0x3;
348         case SLType::kHalf3x3:
349             return 0x7;
350         case SLType::kHalf4x4:
351             return 0x7;
352 
353         // This query is only valid for certain types.
354         case SLType::kVoid:
355         case SLType::kBool:
356         case SLType::kBool2:
357         case SLType::kBool3:
358         case SLType::kBool4:
359         case SLType::kTexture2DSampler:
360         case SLType::kTextureExternalSampler:
361         case SLType::kTexture2DRectSampler:
362         case SLType::kSampler:
363         case SLType::kTexture2D:
364         case SLType::kInput:
365             break;
366     }
367     SK_ABORT("Unexpected type");
368 }
369 
370 /** Returns the size in bytes taken up in Metal buffers for GrSLTypes. */
sltype_to_mtl_size(SLType type)371 inline uint32_t sltype_to_mtl_size(SLType type) {
372     switch (type) {
373         case SLType::kInt:
374         case SLType::kUInt:
375         case SLType::kFloat:
376             return 4;
377         case SLType::kInt2:
378         case SLType::kUInt2:
379         case SLType::kFloat2:
380             return 8;
381         case SLType::kInt3:
382         case SLType::kUInt3:
383         case SLType::kFloat3:
384         case SLType::kInt4:
385         case SLType::kUInt4:
386         case SLType::kFloat4:
387             return 16;
388 
389         case SLType::kFloat2x2:
390             return 16;
391         case SLType::kFloat3x3:
392             return 48;
393         case SLType::kFloat4x4:
394             return 64;
395 
396         case SLType::kShort:
397         case SLType::kUShort:
398         case SLType::kHalf:
399             return 2;
400         case SLType::kShort2:
401         case SLType::kUShort2:
402         case SLType::kHalf2:
403             return 4;
404         case SLType::kShort3:
405         case SLType::kShort4:
406         case SLType::kUShort3:
407         case SLType::kUShort4:
408         case SLType::kHalf3:
409         case SLType::kHalf4:
410             return 8;
411 
412         case SLType::kHalf2x2:
413             return 8;
414         case SLType::kHalf3x3:
415             return 24;
416         case SLType::kHalf4x4:
417             return 32;
418 
419         // This query is only valid for certain types.
420         case SLType::kVoid:
421         case SLType::kBool:
422         case SLType::kBool2:
423         case SLType::kBool3:
424         case SLType::kBool4:
425         case SLType::kTexture2DSampler:
426         case SLType::kTextureExternalSampler:
427         case SLType::kTexture2DRectSampler:
428         case SLType::kSampler:
429         case SLType::kTexture2D:
430         case SLType::kInput:
431             break;
432     }
433     SK_ABORT("Unexpected type");
434 }
435 
436 // Given the current offset into the ubo, calculate the offset for the uniform we're trying to add
437 // taking into consideration all alignment requirements. The uniformOffset is set to the offset for
438 // the new uniform, and currentOffset is updated to be the offset to the end of the new uniform.
get_ubo_aligned_offset(uint32_t * currentOffset,uint32_t * maxAlignment,SLType type,int arrayCount)439 static uint32_t get_ubo_aligned_offset(uint32_t* currentOffset,
440                                        uint32_t* maxAlignment,
441                                        SLType type,
442                                        int arrayCount) {
443     uint32_t alignmentMask = sltype_to_alignment_mask(type);
444     if (alignmentMask > *maxAlignment) {
445         *maxAlignment = alignmentMask;
446     }
447     uint32_t offsetDiff = *currentOffset & alignmentMask;
448     if (offsetDiff != 0) {
449         offsetDiff = alignmentMask - offsetDiff + 1;
450     }
451     uint32_t uniformOffset = *currentOffset + offsetDiff;
452     SkASSERT(sizeof(float) == 4);
453     if (arrayCount) {
454         *currentOffset = uniformOffset + sltype_to_mtl_size(type) * arrayCount;
455     } else {
456         *currentOffset = uniformOffset + sltype_to_mtl_size(type);
457     }
458     return uniformOffset;
459 }
460 #endif // SK_DEBUG
461 
getUniformTypeForLayout(SLType type)462 SLType UniformManager::getUniformTypeForLayout(SLType type) {
463     if (fLayout != Layout::kMetal) {
464         // GL/Vk expect uniforms in 32-bit precision. Convert lower-precision types to 32-bit.
465         switch (type) {
466             case SLType::kShort:            return SLType::kInt;
467             case SLType::kUShort:           return SLType::kUInt;
468             case SLType::kHalf:             return SLType::kFloat;
469 
470             case SLType::kShort2:           return SLType::kInt2;
471             case SLType::kUShort2:          return SLType::kUInt2;
472             case SLType::kHalf2:            return SLType::kFloat2;
473 
474             case SLType::kShort3:           return SLType::kInt3;
475             case SLType::kUShort3:          return SLType::kUInt3;
476             case SLType::kHalf3:            return SLType::kFloat3;
477 
478             case SLType::kShort4:           return SLType::kInt4;
479             case SLType::kUShort4:          return SLType::kUInt4;
480             case SLType::kHalf4:            return SLType::kFloat4;
481 
482             case SLType::kHalf2x2:          return SLType::kFloat2x2;
483             case SLType::kHalf3x3:          return SLType::kFloat3x3;
484             case SLType::kHalf4x4:          return SLType::kFloat4x4;
485 
486             default:                        break;
487         }
488     }
489 
490     return type;
491 }
492 
writeUniforms(SkSpan<const Uniform> uniforms,void ** srcs,uint32_t * offsets,void * dst)493 uint32_t UniformManager::writeUniforms(SkSpan<const Uniform> uniforms,
494                                        void** srcs,
495                                        uint32_t* offsets,
496                                        void *dst) {
497     decltype(&Writer<Rules140>::WriteUniform) write;
498     switch (fLayout) {
499         case Layout::kStd140:
500             write = Writer<Rules140>::WriteUniform;
501             break;
502         case Layout::kStd430:
503             write = Writer<Rules430>::WriteUniform;
504             break;
505         case Layout::kMetal:
506             write = Writer<RulesMetal>::WriteUniform;
507             break;
508     }
509 
510 #ifdef SK_DEBUG
511     uint32_t curUBOOffset = 0;
512     uint32_t curUBOMaxAlignment = 0;
513 #endif // SK_DEBUG
514 
515     uint32_t offset = 0;
516 
517     for (int i = 0; i < (int) uniforms.size(); ++i) {
518         const Uniform& u = uniforms[i];
519         SLType uniformType = this->getUniformTypeForLayout(u.type());
520 
521 #ifdef SK_DEBUG
522         uint32_t debugOffset = get_ubo_aligned_offset(&curUBOOffset,
523                                                       &curUBOMaxAlignment,
524                                                       uniformType,
525                                                       u.count());
526 #endif // SK_DEBUG
527 
528         uint32_t bytesWritten = write(uniformType,
529                                       CType::kDefault,
530                                       dst,
531                                       u.count(),
532                                       srcs ? srcs[i] : nullptr);
533         SkASSERT(debugOffset == offset);
534 
535         if (offsets) {
536             offsets[i] = offset;
537         }
538         offset += bytesWritten;
539     }
540 
541     return offset;
542 }
543 
544 } // namespace skgpu
545