• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "experimental/graphite/src/UniformManager.h"
9 
10 #include "experimental/graphite/src/DrawTypes.h"
11 #include "include/core/SkMatrix.h"
12 #include "include/private/SkHalf.h"
13 #include "include/private/SkTemplates.h"
14 #include "src/core/SkUniform.h"
15 
16 // ensure that these types are the sizes the uniform data is expecting
17 static_assert(sizeof(int32_t) == 4);
18 static_assert(sizeof(float) == 4);
19 static_assert(sizeof(int16_t) == 2);
20 static_assert(sizeof(SkHalf) == 2);
21 
22 namespace skgpu {
23 
24 //////////////////////////////////////////////////////////////////////////////
25 
UniformManager(Layout layout)26 UniformManager::UniformManager(Layout layout) : fLayout(layout) {}
27 
28 template<typename BaseType>
tight_vec_size(int vecLength)29 static constexpr size_t tight_vec_size(int vecLength) {
30     return sizeof(BaseType) * vecLength;
31 }
32 
33 /**
34  * From Section 7.6.2.2 "Standard Uniform Block Layout":
35  *  1. If the member is a scalar consuming N basic machine units, the base alignment is N.
36  *  2. If the member is a two- or four-component vector with components consuming N basic machine
37  *     units, the base alignment is 2N or 4N, respectively.
38  *  3. If the member is a three-component vector with components consuming N
39  *     basic machine units, the base alignment is 4N.
40  *  4. If the member is an array of scalars or vectors, the base alignment and array
41  *     stride are set to match the base alignment of a single array element, according
42  *     to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
43  *     array may have padding at the end; the base offset of the member following
44  *     the array is rounded up to the next multiple of the base alignment.
45  *  5. If the member is a column-major matrix with C columns and R rows, the
46  *     matrix is stored identically to an array of C column vectors with R components each,
47  *     according to rule (4).
48  *  6. If the member is an array of S column-major matrices with C columns and
49  *     R rows, the matrix is stored identically to a row of S × C column vectors
50  *     with R components each, according to rule (4).
51  *  7. If the member is a row-major matrix with C columns and R rows, the matrix
52  *     is stored identically to an array of R row vectors with C components each,
53  *     according to rule (4).
54  *  8. If the member is an array of S row-major matrices with C columns and R
55  *     rows, the matrix is stored identically to a row of S × R row vectors with C
56  *    components each, according to rule (4).
57  *  9. If the member is a structure, the base alignment of the structure is N, where
58  *     N is the largest base alignment value of any of its members, and rounded
59  *     up to the base alignment of a vec4. The individual members of this substructure are then
60  *     assigned offsets by applying this set of rules recursively,
61  *     where the base offset of the first member of the sub-structure is equal to the
62  *     aligned offset of the structure. The structure may have padding at the end;
63  *     the base offset of the member following the sub-structure is rounded up to
64  *     the next multiple of the base alignment of the structure.
65  * 10. If the member is an array of S structures, the S elements of the array are laid
66  *     out in order, according to rule (9).
67  */
68 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
69 struct Rules140 {
70     /**
71      * For an array of scalars or vectors this returns the stride between array elements. For
72      * matrices or arrays of matrices this returns the stride between columns of the matrix. Note
73      * that for single (non-array) scalars or vectors we don't require a stride.
74      */
Strideskgpu::Rules14075     static constexpr size_t Stride(int count) {
76         SkASSERT(count >= 1 || count == SkUniform::kNonArray);
77         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
78         static_assert(Cols >= 1 && Cols <= 4);
79         if (Cols != 1) {
80             // This is a matrix or array of matrices. We return the stride between columns.
81             SkASSERT(RowsOrVecLength > 1);
82             return Rules140<BaseType, RowsOrVecLength>::Stride(1);
83         }
84         if (count == 0) {
85             // Stride doesn't matter for a non-array.
86             return RowsOrVecLength * sizeof(BaseType);
87         }
88 
89         // Rule 4.
90 
91         // Alignment of vec4 by Rule 2.
92         constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
93         // Get alignment of a single vector of BaseType by Rule 1, 2, or 3
94         int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
95         size_t kElementAlignment = tight_vec_size<BaseType>(n);
96         // Round kElementAlignment up to multiple of kVec4Alignment.
97         size_t m = (kElementAlignment + kVec4Alignment - 1) / kVec4Alignment;
98         return m * kVec4Alignment;
99     }
100 };
101 
102 /**
103  * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
104  * identically to uniform and shader storage blocks using the std140 layout, except that the base
105  * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
106  * not rounded up a multiple of the base alignment of a vec4.
107  */
108 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
109 struct Rules430 {
Strideskgpu::Rules430110     static constexpr size_t Stride(int count) {
111         SkASSERT(count >= 1 || count == SkUniform::kNonArray);
112         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
113         static_assert(Cols >= 1 && Cols <= 4);
114 
115         if (Cols != 1) {
116             // This is a matrix or array of matrices. We return the stride between columns.
117             SkASSERT(RowsOrVecLength > 1);
118             return Rules430<BaseType, RowsOrVecLength>::Stride(1);
119         }
120         if (count == 0) {
121             // Stride doesn't matter for a non-array.
122             return RowsOrVecLength * sizeof(BaseType);
123         }
124         // Rule 4 without the round up to a multiple of align-of vec4.
125         return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
126     }
127 };
128 
129 // The strides used here were derived from the rules we've imposed on ourselves in
130 // GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
131 // their 4-component equivalents.
132 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
133 struct RulesMetal {
Strideskgpu::RulesMetal134     static constexpr size_t Stride(int count) {
135         SkASSERT(count >= 1 || count == SkUniform::kNonArray);
136         static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
137         static_assert(Cols >= 1 && Cols <= 4);
138         if (Cols != 1) {
139             // This is a matrix or array of matrices. We return the stride between columns.
140             SkASSERT(RowsOrVecLength > 1);
141             return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
142         }
143         if (count == 0) {
144             // Stride doesn't matter for a non-array.
145             return RowsOrVecLength * sizeof(BaseType);
146         }
147         return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
148     }
149 };
150 
151 template<template<typename BaseType, int RowsOrVecLength, int Cols> class Rules>
152 class Writer {
153 private:
154     template <typename MemType, typename UniformType>
CopyUniforms(void * dst,const void * src,int numUniforms)155     static void CopyUniforms(void* dst, const void* src, int numUniforms) {
156         if constexpr (std::is_same<MemType, UniformType>::value) {
157             // Matching types--use memcpy.
158             std::memcpy(dst, src, numUniforms * sizeof(MemType));
159             return;
160         }
161 
162         if constexpr (std::is_same<MemType, float>::value &&
163                       std::is_same<UniformType, SkHalf>::value) {
164             // Convert floats to half.
165             const float* floatBits = static_cast<const float*>(src);
166             SkHalf* halfBits = static_cast<SkHalf*>(dst);
167             while (numUniforms-- > 0) {
168                 *halfBits++ = SkFloatToHalf(*floatBits++);
169             }
170             return;
171         }
172 
173         SK_ABORT("implement conversion from MemType to UniformType");
174     }
175 
176     template <typename MemType, typename UniformType, int RowsOrVecLength = 1, int Cols = 1>
Write(void * dst,int n,const MemType src[])177     static uint32_t Write(void *dst, int n, const MemType src[]) {
178         size_t stride = Rules<UniformType, RowsOrVecLength, Cols>::Stride(n);
179         n = (n == SkUniform::kNonArray) ? 1 : n;
180         n *= Cols;
181 
182         if (dst) {
183             if (stride == RowsOrVecLength * sizeof(UniformType)) {
184                 CopyUniforms<MemType, UniformType>(dst, src, n * RowsOrVecLength);
185             } else {
186                 for (int i = 0; i < n; ++i) {
187                     CopyUniforms<MemType, UniformType>(dst, src, RowsOrVecLength);
188                     src += RowsOrVecLength;
189                     dst = SkTAddOffset<void>(dst, stride);
190                 }
191             }
192         }
193 
194         return n * stride;
195     }
196 
197     template <typename UniformType>
WriteSkMatrices(void * dst,int n,const SkMatrix m[])198     static uint32_t WriteSkMatrices(void *dst, int n, const SkMatrix m[]) {
199         // Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
200         size_t stride = 3 * Rules<UniformType, 3, 3>::Stride(1);
201         n = std::max(n, 1);
202 
203         if (dst) {
204             size_t offset = 0;
205             for (int i = 0; i < n; ++i) {
206                 float mt[] = {
207                         m[i].get(SkMatrix::kMScaleX),
208                         m[i].get(SkMatrix::kMSkewY),
209                         m[i].get(SkMatrix::kMPersp0),
210                         m[i].get(SkMatrix::kMSkewX),
211                         m[i].get(SkMatrix::kMScaleY),
212                         m[i].get(SkMatrix::kMPersp1),
213                         m[i].get(SkMatrix::kMTransX),
214                         m[i].get(SkMatrix::kMTransY),
215                         m[i].get(SkMatrix::kMPersp2),
216                 };
217                 Write<float, UniformType, 3, 3>(SkTAddOffset<void>(dst, offset), 1, mt);
218                 offset += stride;
219             }
220         }
221         return n * stride;
222     }
223 
224 public:
WriteUniform(SkSLType type,CType ctype,void * dest,int n,const void * src)225     static uint32_t WriteUniform(SkSLType type,
226                                  CType ctype,
227                                  void *dest,
228                                  int n,
229                                  const void *src) {
230         SkASSERT(n >= 1 || n == SkUniform::kNonArray);
231         switch (type) {
232             case SkSLType::kInt:
233                 return Write<int32_t, int32_t>(dest, n, static_cast<const int32_t *>(src));
234 
235             case SkSLType::kInt2:
236                 return Write<int32_t, int32_t, 2>(dest, n, static_cast<const int32_t *>(src));
237 
238             case SkSLType::kInt3:
239                 return Write<int32_t, int32_t, 3>(dest, n, static_cast<const int32_t *>(src));
240 
241             case SkSLType::kInt4:
242                 return Write<int32_t, int32_t, 4>(dest, n, static_cast<const int32_t *>(src));
243 
244             case SkSLType::kHalf:
245                 return Write<float, SkHalf>(dest, n, static_cast<const float *>(src));
246 
247             case SkSLType::kFloat:
248                 return Write<float, float>(dest, n, static_cast<const float *>(src));
249 
250             case SkSLType::kHalf2:
251                 return Write<float, SkHalf, 2>(dest, n, static_cast<const float *>(src));
252 
253             case SkSLType::kFloat2:
254                 return Write<float, float, 2>(dest, n, static_cast<const float *>(src));
255 
256             case SkSLType::kHalf3:
257                 return Write<float, SkHalf, 3>(dest, n, static_cast<const float *>(src));
258 
259             case SkSLType::kFloat3:
260                 return Write<float, float, 3>(dest, n, static_cast<const float *>(src));
261 
262             case SkSLType::kHalf4:
263                 return Write<float, SkHalf, 4>(dest, n, static_cast<const float *>(src));
264 
265             case SkSLType::kFloat4:
266                 return Write<float, float, 4>(dest, n, static_cast<const float *>(src));
267 
268             case SkSLType::kHalf2x2:
269                 return Write<float, SkHalf, 2, 2>(dest, n, static_cast<const float *>(src));
270 
271             case SkSLType::kFloat2x2:
272                 return Write<float, float, 2, 2>(dest, n, static_cast<const float *>(src));
273 
274             case SkSLType::kHalf3x3:
275                 switch (ctype) {
276                     case CType::kDefault:
277                         return Write<float, SkHalf, 3, 3>(dest, n, static_cast<const float *>(src));
278                     case CType::kSkMatrix:
279                         return WriteSkMatrices<SkHalf>(dest, n, static_cast<const SkMatrix *>(src));
280                 }
281                 SkUNREACHABLE;
282 
283             case SkSLType::kFloat3x3:
284                 switch (ctype) {
285                     case CType::kDefault:
286                         return Write<float, float, 3, 3>(dest, n, static_cast<const float *>(src));
287                     case CType::kSkMatrix:
288                         return WriteSkMatrices<float>(dest, n, static_cast<const SkMatrix *>(src));
289                 }
290                 SkUNREACHABLE;
291 
292             case SkSLType::kHalf4x4:
293                 return Write<float, SkHalf, 4, 4>(dest, n, static_cast<const float *>(src));
294 
295             case SkSLType::kFloat4x4:
296                 return Write<float, float, 4, 4>(dest, n, static_cast<const float *>(src));
297 
298             default:
299                 SK_ABORT("Unexpected uniform type");
300         }
301     }
302 };
303 
304 #ifdef SK_DEBUG
305 // To determine whether a current offset is aligned, we can just 'and' the lowest bits with the
306 // alignment mask. A value of 0 means aligned, any other value is how many bytes past alignment we
307 // are. This works since all alignments are powers of 2. The mask is always (alignment - 1).
sksltype_to_alignment_mask(SkSLType type)308 static uint32_t sksltype_to_alignment_mask(SkSLType type) {
309     switch (type) {
310         case SkSLType::kInt:
311         case SkSLType::kUInt:
312         case SkSLType::kFloat:
313             return 0x3;
314         case SkSLType::kInt2:
315         case SkSLType::kUInt2:
316         case SkSLType::kFloat2:
317             return 0x7;
318         case SkSLType::kInt3:
319         case SkSLType::kUInt3:
320         case SkSLType::kFloat3:
321         case SkSLType::kInt4:
322         case SkSLType::kUInt4:
323         case SkSLType::kFloat4:
324             return 0xF;
325 
326         case SkSLType::kFloat2x2:
327             return 0x7;
328         case SkSLType::kFloat3x3:
329             return 0xF;
330         case SkSLType::kFloat4x4:
331             return 0xF;
332 
333         case SkSLType::kShort:
334         case SkSLType::kUShort:
335         case SkSLType::kHalf:
336             return 0x1;
337         case SkSLType::kShort2:
338         case SkSLType::kUShort2:
339         case SkSLType::kHalf2:
340             return 0x3;
341         case SkSLType::kShort3:
342         case SkSLType::kShort4:
343         case SkSLType::kUShort3:
344         case SkSLType::kUShort4:
345         case SkSLType::kHalf3:
346         case SkSLType::kHalf4:
347             return 0x7;
348 
349         case SkSLType::kHalf2x2:
350             return 0x3;
351         case SkSLType::kHalf3x3:
352             return 0x7;
353         case SkSLType::kHalf4x4:
354             return 0x7;
355 
356         // This query is only valid for certain types.
357         case SkSLType::kVoid:
358         case SkSLType::kBool:
359         case SkSLType::kBool2:
360         case SkSLType::kBool3:
361         case SkSLType::kBool4:
362         case SkSLType::kTexture2DSampler:
363         case SkSLType::kTextureExternalSampler:
364         case SkSLType::kTexture2DRectSampler:
365         case SkSLType::kSampler:
366         case SkSLType::kTexture2D:
367         case SkSLType::kInput:
368             break;
369     }
370     SK_ABORT("Unexpected type");
371 }
372 
373 /** Returns the size in bytes taken up in Metal buffers for SkSLTypes. */
sksltype_to_mtl_size(SkSLType type)374 inline uint32_t sksltype_to_mtl_size(SkSLType type) {
375     switch (type) {
376         case SkSLType::kInt:
377         case SkSLType::kUInt:
378         case SkSLType::kFloat:
379             return 4;
380         case SkSLType::kInt2:
381         case SkSLType::kUInt2:
382         case SkSLType::kFloat2:
383             return 8;
384         case SkSLType::kInt3:
385         case SkSLType::kUInt3:
386         case SkSLType::kFloat3:
387         case SkSLType::kInt4:
388         case SkSLType::kUInt4:
389         case SkSLType::kFloat4:
390             return 16;
391 
392         case SkSLType::kFloat2x2:
393             return 16;
394         case SkSLType::kFloat3x3:
395             return 48;
396         case SkSLType::kFloat4x4:
397             return 64;
398 
399         case SkSLType::kShort:
400         case SkSLType::kUShort:
401         case SkSLType::kHalf:
402             return 2;
403         case SkSLType::kShort2:
404         case SkSLType::kUShort2:
405         case SkSLType::kHalf2:
406             return 4;
407         case SkSLType::kShort3:
408         case SkSLType::kShort4:
409         case SkSLType::kUShort3:
410         case SkSLType::kUShort4:
411         case SkSLType::kHalf3:
412         case SkSLType::kHalf4:
413             return 8;
414 
415         case SkSLType::kHalf2x2:
416             return 8;
417         case SkSLType::kHalf3x3:
418             return 24;
419         case SkSLType::kHalf4x4:
420             return 32;
421 
422         // This query is only valid for certain types.
423         case SkSLType::kVoid:
424         case SkSLType::kBool:
425         case SkSLType::kBool2:
426         case SkSLType::kBool3:
427         case SkSLType::kBool4:
428         case SkSLType::kTexture2DSampler:
429         case SkSLType::kTextureExternalSampler:
430         case SkSLType::kTexture2DRectSampler:
431         case SkSLType::kSampler:
432         case SkSLType::kTexture2D:
433         case SkSLType::kInput:
434             break;
435     }
436     SK_ABORT("Unexpected type");
437 }
438 
439 // Given the current offset into the ubo, calculate the offset for the uniform we're trying to add
440 // taking into consideration all alignment requirements. The uniformOffset is set to the offset for
441 // the new uniform, and currentOffset is updated to be the offset to the end of the new uniform.
get_ubo_aligned_offset(uint32_t * currentOffset,uint32_t * maxAlignment,SkSLType type,int arrayCount)442 static uint32_t get_ubo_aligned_offset(uint32_t* currentOffset,
443                                        uint32_t* maxAlignment,
444                                        SkSLType type,
445                                        int arrayCount) {
446     uint32_t alignmentMask = sksltype_to_alignment_mask(type);
447     if (alignmentMask > *maxAlignment) {
448         *maxAlignment = alignmentMask;
449     }
450     uint32_t offsetDiff = *currentOffset & alignmentMask;
451     if (offsetDiff != 0) {
452         offsetDiff = alignmentMask - offsetDiff + 1;
453     }
454     uint32_t uniformOffset = *currentOffset + offsetDiff;
455     SkASSERT(sizeof(float) == 4);
456     if (arrayCount) {
457         *currentOffset = uniformOffset + sksltype_to_mtl_size(type) * arrayCount;
458     } else {
459         *currentOffset = uniformOffset + sksltype_to_mtl_size(type);
460     }
461     return uniformOffset;
462 }
463 #endif // SK_DEBUG
464 
getUniformTypeForLayout(SkSLType type)465 SkSLType UniformManager::getUniformTypeForLayout(SkSLType type) {
466     if (fLayout != Layout::kMetal) {
467         // GL/Vk expect uniforms in 32-bit precision. Convert lower-precision types to 32-bit.
468         switch (type) {
469             case SkSLType::kShort:            return SkSLType::kInt;
470             case SkSLType::kUShort:           return SkSLType::kUInt;
471             case SkSLType::kHalf:             return SkSLType::kFloat;
472 
473             case SkSLType::kShort2:           return SkSLType::kInt2;
474             case SkSLType::kUShort2:          return SkSLType::kUInt2;
475             case SkSLType::kHalf2:            return SkSLType::kFloat2;
476 
477             case SkSLType::kShort3:           return SkSLType::kInt3;
478             case SkSLType::kUShort3:          return SkSLType::kUInt3;
479             case SkSLType::kHalf3:            return SkSLType::kFloat3;
480 
481             case SkSLType::kShort4:           return SkSLType::kInt4;
482             case SkSLType::kUShort4:          return SkSLType::kUInt4;
483             case SkSLType::kHalf4:            return SkSLType::kFloat4;
484 
485             case SkSLType::kHalf2x2:          return SkSLType::kFloat2x2;
486             case SkSLType::kHalf3x3:          return SkSLType::kFloat3x3;
487             case SkSLType::kHalf4x4:          return SkSLType::kFloat4x4;
488 
489             default:                        break;
490         }
491     }
492 
493     return type;
494 }
495 
writeUniforms(SkSpan<const SkUniform> uniforms,const void ** srcs,uint32_t * offsets,char * dst)496 uint32_t UniformManager::writeUniforms(SkSpan<const SkUniform> uniforms,
497                                        const void** srcs,
498                                        uint32_t* offsets,
499                                        char *dst) {
500     decltype(&Writer<Rules140>::WriteUniform) write;
501     switch (fLayout) {
502         case Layout::kStd140:
503             write = Writer<Rules140>::WriteUniform;
504             break;
505         case Layout::kStd430:
506             write = Writer<Rules430>::WriteUniform;
507             break;
508         case Layout::kMetal:
509             write = Writer<RulesMetal>::WriteUniform;
510             break;
511     }
512 
513 #ifdef SK_DEBUG
514     uint32_t curUBOOffset = 0;
515     uint32_t curUBOMaxAlignment = 0;
516 #endif // SK_DEBUG
517 
518     uint32_t offset = 0;
519 
520     for (int i = 0; i < (int) uniforms.size(); ++i) {
521         const SkUniform& u = uniforms[i];
522         SkSLType uniformType = this->getUniformTypeForLayout(u.type());
523 
524 #ifdef SK_DEBUG
525         uint32_t debugOffset = get_ubo_aligned_offset(&curUBOOffset,
526                                                       &curUBOMaxAlignment,
527                                                       uniformType,
528                                                       u.count());
529 #endif // SK_DEBUG
530 
531         uint32_t bytesWritten = write(uniformType,
532                                       CType::kDefault,
533                                       dst ? &dst[offset] : nullptr,
534                                       u.count(),
535                                       srcs ? srcs[i] : nullptr);
536         SkASSERT(debugOffset == offset);
537 
538         if (offsets) {
539             offsets[i] = offset;
540         }
541         offset += bytesWritten;
542     }
543 
544     return offset;
545 }
546 
547 } // namespace skgpu
548