1 /*
2 * Copyright 2021 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "experimental/graphite/src/UniformManager.h"
9 #include "include/core/SkMatrix.h"
10 #include "include/private/SkHalf.h"
11 #include "include/private/SkTemplates.h"
12
13 // ensure that these types are the sizes the uniform data is expecting
14 static_assert(sizeof(int32_t) == 4);
15 static_assert(sizeof(float) == 4);
16 static_assert(sizeof(int16_t) == 2);
17 static_assert(sizeof(SkHalf) == 2);
18
19 namespace skgpu {
20
21 //////////////////////////////////////////////////////////////////////////////
22
UniformManager(Layout layout)23 UniformManager::UniformManager(Layout layout) : fLayout(layout) {}
24
25 template<typename BaseType>
tight_vec_size(int vecLength)26 static constexpr size_t tight_vec_size(int vecLength) {
27 return sizeof(BaseType) * vecLength;
28 }
29
30 /**
31 * From Section 7.6.2.2 "Standard Uniform Block Layout":
32 * 1. If the member is a scalar consuming N basic machine units, the base alignment is N.
33 * 2. If the member is a two- or four-component vector with components consuming N basic machine
34 * units, the base alignment is 2N or 4N, respectively.
35 * 3. If the member is a three-component vector with components consuming N
36 * basic machine units, the base alignment is 4N.
37 * 4. If the member is an array of scalars or vectors, the base alignment and array
38 * stride are set to match the base alignment of a single array element, according
39 * to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
40 * array may have padding at the end; the base offset of the member following
41 * the array is rounded up to the next multiple of the base alignment.
42 * 5. If the member is a column-major matrix with C columns and R rows, the
43 * matrix is stored identically to an array of C column vectors with R components each,
44 * according to rule (4).
45 * 6. If the member is an array of S column-major matrices with C columns and
46 * R rows, the matrix is stored identically to a row of S × C column vectors
47 * with R components each, according to rule (4).
48 * 7. If the member is a row-major matrix with C columns and R rows, the matrix
49 * is stored identically to an array of R row vectors with C components each,
50 * according to rule (4).
51 * 8. If the member is an array of S row-major matrices with C columns and R
52 * rows, the matrix is stored identically to a row of S × R row vectors with C
53 * components each, according to rule (4).
54 * 9. If the member is a structure, the base alignment of the structure is N, where
55 * N is the largest base alignment value of any of its members, and rounded
56 * up to the base alignment of a vec4. The individual members of this substructure are then
57 * assigned offsets by applying this set of rules recursively,
58 * where the base offset of the first member of the sub-structure is equal to the
59 * aligned offset of the structure. The structure may have padding at the end;
60 * the base offset of the member following the sub-structure is rounded up to
61 * the next multiple of the base alignment of the structure.
62 * 10. If the member is an array of S structures, the S elements of the array are laid
63 * out in order, according to rule (9).
64 */
65 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
66 struct Rules140 {
67 /**
68 * For an array of scalars or vectors this returns the stride between array elements. For
69 * matrices or arrays of matrices this returns the stride between columns of the matrix. Note
70 * that for single (non-array) scalars or vectors we don't require a stride.
71 */
Strideskgpu::Rules14072 static constexpr size_t Stride(int count) {
73 SkASSERT(count >= 1 || count == Uniform::kNonArray);
74 static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
75 static_assert(Cols >= 1 && Cols <= 4);
76 if (Cols != 1) {
77 // This is a matrix or array of matrices. We return the stride between columns.
78 SkASSERT(RowsOrVecLength > 1);
79 return Rules140<BaseType, RowsOrVecLength>::Stride(1);
80 }
81 if (count == 0) {
82 // Stride doesn't matter for a non-array.
83 return RowsOrVecLength * sizeof(BaseType);
84 }
85
86 // Rule 4.
87
88 // Alignment of vec4 by Rule 2.
89 constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
90 // Get alignment of a single vector of BaseType by Rule 1, 2, or 3
91 int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
92 size_t kElementAlignment = tight_vec_size<BaseType>(n);
93 // Round kElementAlignment up to multiple of kVec4Alignment.
94 size_t m = (kElementAlignment + kVec4Alignment - 1) / kVec4Alignment;
95 return m * kVec4Alignment;
96 }
97 };
98
99 /**
100 * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
101 * identically to uniform and shader storage blocks using the std140 layout, except that the base
102 * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
103 * not rounded up a multiple of the base alignment of a vec4.
104 */
105 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
106 struct Rules430 {
Strideskgpu::Rules430107 static constexpr size_t Stride(int count) {
108 SkASSERT(count >= 1 || count == Uniform::kNonArray);
109 static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
110 static_assert(Cols >= 1 && Cols <= 4);
111
112 if (Cols != 1) {
113 // This is a matrix or array of matrices. We return the stride between columns.
114 SkASSERT(RowsOrVecLength > 1);
115 return Rules430<BaseType, RowsOrVecLength>::Stride(1);
116 }
117 if (count == 0) {
118 // Stride doesn't matter for a non-array.
119 return RowsOrVecLength * sizeof(BaseType);
120 }
121 // Rule 4 without the round up to a multiple of align-of vec4.
122 return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
123 }
124 };
125
126 // The strides used here were derived from the rules we've imposed on ourselves in
127 // GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
128 // their 4-component equivalents.
129 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
130 struct RulesMetal {
Strideskgpu::RulesMetal131 static constexpr size_t Stride(int count) {
132 SkASSERT(count >= 1 || count == Uniform::kNonArray);
133 static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
134 static_assert(Cols >= 1 && Cols <= 4);
135 if (Cols != 1) {
136 // This is a matrix or array of matrices. We return the stride between columns.
137 SkASSERT(RowsOrVecLength > 1);
138 return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
139 }
140 if (count == 0) {
141 // Stride doesn't matter for a non-array.
142 return RowsOrVecLength * sizeof(BaseType);
143 }
144 return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
145 }
146 };
147
148 template<template<typename BaseType, int RowsOrVecLength, int Cols> class Rules>
149 class Writer {
150 private:
151 template <typename MemType, typename UniformType>
CopyUniforms(void * dst,const void * src,int numUniforms)152 static void CopyUniforms(void* dst, const void* src, int numUniforms) {
153 if constexpr (std::is_same<MemType, UniformType>::value) {
154 // Matching types--use memcpy.
155 std::memcpy(dst, src, numUniforms * sizeof(MemType));
156 return;
157 }
158
159 if constexpr (std::is_same<MemType, float>::value &&
160 std::is_same<UniformType, SkHalf>::value) {
161 // Convert floats to half.
162 const float* floatBits = static_cast<const float*>(src);
163 SkHalf* halfBits = static_cast<SkHalf*>(dst);
164 while (numUniforms-- > 0) {
165 *halfBits++ = SkFloatToHalf(*floatBits++);
166 }
167 return;
168 }
169
170 SK_ABORT("implement conversion from MemType to UniformType");
171 }
172
173 template <typename MemType, typename UniformType, int RowsOrVecLength = 1, int Cols = 1>
Write(void * dst,int n,const MemType src[])174 static uint32_t Write(void *dst, int n, const MemType src[]) {
175 size_t stride = Rules<UniformType, RowsOrVecLength, Cols>::Stride(n);
176 n = (n == Uniform::kNonArray) ? 1 : n;
177 n *= Cols;
178
179 if (dst) {
180 if (stride == RowsOrVecLength * sizeof(UniformType)) {
181 CopyUniforms<MemType, UniformType>(dst, src, n * RowsOrVecLength);
182 } else {
183 for (int i = 0; i < n; ++i) {
184 CopyUniforms<MemType, UniformType>(dst, src, RowsOrVecLength);
185 src += RowsOrVecLength;
186 dst = SkTAddOffset<void>(dst, stride);
187 }
188 }
189 }
190
191 return n * stride;
192 }
193
194 template <typename UniformType>
WriteSkMatrices(void * dst,int n,const SkMatrix m[])195 static uint32_t WriteSkMatrices(void *dst, int n, const SkMatrix m[]) {
196 // Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
197 size_t stride = 3 * Rules<UniformType, 3, 3>::Stride(1);
198 n = std::max(n, 1);
199
200 if (dst) {
201 size_t offset = 0;
202 for (int i = 0; i < n; ++i) {
203 float mt[] = {
204 m[i].get(SkMatrix::kMScaleX),
205 m[i].get(SkMatrix::kMSkewY),
206 m[i].get(SkMatrix::kMPersp0),
207 m[i].get(SkMatrix::kMSkewX),
208 m[i].get(SkMatrix::kMScaleY),
209 m[i].get(SkMatrix::kMPersp1),
210 m[i].get(SkMatrix::kMTransX),
211 m[i].get(SkMatrix::kMTransY),
212 m[i].get(SkMatrix::kMPersp2),
213 };
214 Write<float, UniformType, 3, 3>(SkTAddOffset<void>(dst, offset), 1, mt);
215 offset += stride;
216 }
217 }
218 return n * stride;
219 }
220
221 public:
WriteUniform(SLType type,CType ctype,void * dest,int n,const void * src)222 static uint32_t WriteUniform(SLType type,
223 CType ctype,
224 void *dest,
225 int n,
226 const void *src) {
227 SkASSERT(n >= 1 || n == Uniform::kNonArray);
228 switch (type) {
229 case SLType::kInt:
230 return Write<int32_t, int32_t>(dest, n, static_cast<const int32_t *>(src));
231
232 case SLType::kInt2:
233 return Write<int32_t, int32_t, 2>(dest, n, static_cast<const int32_t *>(src));
234
235 case SLType::kInt3:
236 return Write<int32_t, int32_t, 3>(dest, n, static_cast<const int32_t *>(src));
237
238 case SLType::kInt4:
239 return Write<int32_t, int32_t, 4>(dest, n, static_cast<const int32_t *>(src));
240
241 case SLType::kHalf:
242 return Write<float, SkHalf>(dest, n, static_cast<const float *>(src));
243
244 case SLType::kFloat:
245 return Write<float, float>(dest, n, static_cast<const float *>(src));
246
247 case SLType::kHalf2:
248 return Write<float, SkHalf, 2>(dest, n, static_cast<const float *>(src));
249
250 case SLType::kFloat2:
251 return Write<float, float, 2>(dest, n, static_cast<const float *>(src));
252
253 case SLType::kHalf3:
254 return Write<float, SkHalf, 3>(dest, n, static_cast<const float *>(src));
255
256 case SLType::kFloat3:
257 return Write<float, float, 3>(dest, n, static_cast<const float *>(src));
258
259 case SLType::kHalf4:
260 return Write<float, SkHalf, 4>(dest, n, static_cast<const float *>(src));
261
262 case SLType::kFloat4:
263 return Write<float, float, 4>(dest, n, static_cast<const float *>(src));
264
265 case SLType::kHalf2x2:
266 return Write<float, SkHalf, 2, 2>(dest, n, static_cast<const float *>(src));
267
268 case SLType::kFloat2x2:
269 return Write<float, float, 2, 2>(dest, n, static_cast<const float *>(src));
270
271 case SLType::kHalf3x3:
272 switch (ctype) {
273 case CType::kDefault:
274 return Write<float, SkHalf, 3, 3>(dest, n, static_cast<const float *>(src));
275 case CType::kSkMatrix:
276 return WriteSkMatrices<SkHalf>(dest, n, static_cast<const SkMatrix *>(src));
277 }
278 SkUNREACHABLE;
279
280 case SLType::kFloat3x3:
281 switch (ctype) {
282 case CType::kDefault:
283 return Write<float, float, 3, 3>(dest, n, static_cast<const float *>(src));
284 case CType::kSkMatrix:
285 return WriteSkMatrices<float>(dest, n, static_cast<const SkMatrix *>(src));
286 }
287 SkUNREACHABLE;
288
289 case SLType::kHalf4x4:
290 return Write<float, SkHalf, 4, 4>(dest, n, static_cast<const float *>(src));
291
292 case SLType::kFloat4x4:
293 return Write<float, float, 4, 4>(dest, n, static_cast<const float *>(src));
294
295 default:
296 SK_ABORT("Unexpected uniform type");
297 }
298 }
299 };
300
301 #ifdef SK_DEBUG
302 // To determine whether a current offset is aligned, we can just 'and' the lowest bits with the
303 // alignment mask. A value of 0 means aligned, any other value is how many bytes past alignment we
304 // are. This works since all alignments are powers of 2. The mask is always (alignment - 1).
sltype_to_alignment_mask(SLType type)305 static uint32_t sltype_to_alignment_mask(SLType type) {
306 switch (type) {
307 case SLType::kInt:
308 case SLType::kUInt:
309 case SLType::kFloat:
310 return 0x3;
311 case SLType::kInt2:
312 case SLType::kUInt2:
313 case SLType::kFloat2:
314 return 0x7;
315 case SLType::kInt3:
316 case SLType::kUInt3:
317 case SLType::kFloat3:
318 case SLType::kInt4:
319 case SLType::kUInt4:
320 case SLType::kFloat4:
321 return 0xF;
322
323 case SLType::kFloat2x2:
324 return 0x7;
325 case SLType::kFloat3x3:
326 return 0xF;
327 case SLType::kFloat4x4:
328 return 0xF;
329
330 case SLType::kShort:
331 case SLType::kUShort:
332 case SLType::kHalf:
333 return 0x1;
334 case SLType::kShort2:
335 case SLType::kUShort2:
336 case SLType::kHalf2:
337 return 0x3;
338 case SLType::kShort3:
339 case SLType::kShort4:
340 case SLType::kUShort3:
341 case SLType::kUShort4:
342 case SLType::kHalf3:
343 case SLType::kHalf4:
344 return 0x7;
345
346 case SLType::kHalf2x2:
347 return 0x3;
348 case SLType::kHalf3x3:
349 return 0x7;
350 case SLType::kHalf4x4:
351 return 0x7;
352
353 // This query is only valid for certain types.
354 case SLType::kVoid:
355 case SLType::kBool:
356 case SLType::kBool2:
357 case SLType::kBool3:
358 case SLType::kBool4:
359 case SLType::kTexture2DSampler:
360 case SLType::kTextureExternalSampler:
361 case SLType::kTexture2DRectSampler:
362 case SLType::kSampler:
363 case SLType::kTexture2D:
364 case SLType::kInput:
365 break;
366 }
367 SK_ABORT("Unexpected type");
368 }
369
370 /** Returns the size in bytes taken up in Metal buffers for GrSLTypes. */
sltype_to_mtl_size(SLType type)371 inline uint32_t sltype_to_mtl_size(SLType type) {
372 switch (type) {
373 case SLType::kInt:
374 case SLType::kUInt:
375 case SLType::kFloat:
376 return 4;
377 case SLType::kInt2:
378 case SLType::kUInt2:
379 case SLType::kFloat2:
380 return 8;
381 case SLType::kInt3:
382 case SLType::kUInt3:
383 case SLType::kFloat3:
384 case SLType::kInt4:
385 case SLType::kUInt4:
386 case SLType::kFloat4:
387 return 16;
388
389 case SLType::kFloat2x2:
390 return 16;
391 case SLType::kFloat3x3:
392 return 48;
393 case SLType::kFloat4x4:
394 return 64;
395
396 case SLType::kShort:
397 case SLType::kUShort:
398 case SLType::kHalf:
399 return 2;
400 case SLType::kShort2:
401 case SLType::kUShort2:
402 case SLType::kHalf2:
403 return 4;
404 case SLType::kShort3:
405 case SLType::kShort4:
406 case SLType::kUShort3:
407 case SLType::kUShort4:
408 case SLType::kHalf3:
409 case SLType::kHalf4:
410 return 8;
411
412 case SLType::kHalf2x2:
413 return 8;
414 case SLType::kHalf3x3:
415 return 24;
416 case SLType::kHalf4x4:
417 return 32;
418
419 // This query is only valid for certain types.
420 case SLType::kVoid:
421 case SLType::kBool:
422 case SLType::kBool2:
423 case SLType::kBool3:
424 case SLType::kBool4:
425 case SLType::kTexture2DSampler:
426 case SLType::kTextureExternalSampler:
427 case SLType::kTexture2DRectSampler:
428 case SLType::kSampler:
429 case SLType::kTexture2D:
430 case SLType::kInput:
431 break;
432 }
433 SK_ABORT("Unexpected type");
434 }
435
436 // Given the current offset into the ubo, calculate the offset for the uniform we're trying to add
437 // taking into consideration all alignment requirements. The uniformOffset is set to the offset for
438 // the new uniform, and currentOffset is updated to be the offset to the end of the new uniform.
get_ubo_aligned_offset(uint32_t * currentOffset,uint32_t * maxAlignment,SLType type,int arrayCount)439 static uint32_t get_ubo_aligned_offset(uint32_t* currentOffset,
440 uint32_t* maxAlignment,
441 SLType type,
442 int arrayCount) {
443 uint32_t alignmentMask = sltype_to_alignment_mask(type);
444 if (alignmentMask > *maxAlignment) {
445 *maxAlignment = alignmentMask;
446 }
447 uint32_t offsetDiff = *currentOffset & alignmentMask;
448 if (offsetDiff != 0) {
449 offsetDiff = alignmentMask - offsetDiff + 1;
450 }
451 uint32_t uniformOffset = *currentOffset + offsetDiff;
452 SkASSERT(sizeof(float) == 4);
453 if (arrayCount) {
454 *currentOffset = uniformOffset + sltype_to_mtl_size(type) * arrayCount;
455 } else {
456 *currentOffset = uniformOffset + sltype_to_mtl_size(type);
457 }
458 return uniformOffset;
459 }
460 #endif // SK_DEBUG
461
getUniformTypeForLayout(SLType type)462 SLType UniformManager::getUniformTypeForLayout(SLType type) {
463 if (fLayout != Layout::kMetal) {
464 // GL/Vk expect uniforms in 32-bit precision. Convert lower-precision types to 32-bit.
465 switch (type) {
466 case SLType::kShort: return SLType::kInt;
467 case SLType::kUShort: return SLType::kUInt;
468 case SLType::kHalf: return SLType::kFloat;
469
470 case SLType::kShort2: return SLType::kInt2;
471 case SLType::kUShort2: return SLType::kUInt2;
472 case SLType::kHalf2: return SLType::kFloat2;
473
474 case SLType::kShort3: return SLType::kInt3;
475 case SLType::kUShort3: return SLType::kUInt3;
476 case SLType::kHalf3: return SLType::kFloat3;
477
478 case SLType::kShort4: return SLType::kInt4;
479 case SLType::kUShort4: return SLType::kUInt4;
480 case SLType::kHalf4: return SLType::kFloat4;
481
482 case SLType::kHalf2x2: return SLType::kFloat2x2;
483 case SLType::kHalf3x3: return SLType::kFloat3x3;
484 case SLType::kHalf4x4: return SLType::kFloat4x4;
485
486 default: break;
487 }
488 }
489
490 return type;
491 }
492
writeUniforms(SkSpan<const Uniform> uniforms,void ** srcs,uint32_t * offsets,void * dst)493 uint32_t UniformManager::writeUniforms(SkSpan<const Uniform> uniforms,
494 void** srcs,
495 uint32_t* offsets,
496 void *dst) {
497 decltype(&Writer<Rules140>::WriteUniform) write;
498 switch (fLayout) {
499 case Layout::kStd140:
500 write = Writer<Rules140>::WriteUniform;
501 break;
502 case Layout::kStd430:
503 write = Writer<Rules430>::WriteUniform;
504 break;
505 case Layout::kMetal:
506 write = Writer<RulesMetal>::WriteUniform;
507 break;
508 }
509
510 #ifdef SK_DEBUG
511 uint32_t curUBOOffset = 0;
512 uint32_t curUBOMaxAlignment = 0;
513 #endif // SK_DEBUG
514
515 uint32_t offset = 0;
516
517 for (int i = 0; i < (int) uniforms.size(); ++i) {
518 const Uniform& u = uniforms[i];
519 SLType uniformType = this->getUniformTypeForLayout(u.type());
520
521 #ifdef SK_DEBUG
522 uint32_t debugOffset = get_ubo_aligned_offset(&curUBOOffset,
523 &curUBOMaxAlignment,
524 uniformType,
525 u.count());
526 #endif // SK_DEBUG
527
528 uint32_t bytesWritten = write(uniformType,
529 CType::kDefault,
530 dst,
531 u.count(),
532 srcs ? srcs[i] : nullptr);
533 SkASSERT(debugOffset == offset);
534
535 if (offsets) {
536 offsets[i] = offset;
537 }
538 offset += bytesWritten;
539 }
540
541 return offset;
542 }
543
544 } // namespace skgpu
545