1 /*
2 * Copyright 2021 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "experimental/graphite/src/UniformManager.h"
9
10 #include "experimental/graphite/src/DrawTypes.h"
11 #include "include/core/SkMatrix.h"
12 #include "include/private/SkHalf.h"
13 #include "include/private/SkTemplates.h"
14 #include "src/core/SkUniform.h"
15
16 // ensure that these types are the sizes the uniform data is expecting
17 static_assert(sizeof(int32_t) == 4);
18 static_assert(sizeof(float) == 4);
19 static_assert(sizeof(int16_t) == 2);
20 static_assert(sizeof(SkHalf) == 2);
21
22 namespace skgpu {
23
24 //////////////////////////////////////////////////////////////////////////////
25
UniformManager(Layout layout)26 UniformManager::UniformManager(Layout layout) : fLayout(layout) {}
27
28 template<typename BaseType>
tight_vec_size(int vecLength)29 static constexpr size_t tight_vec_size(int vecLength) {
30 return sizeof(BaseType) * vecLength;
31 }
32
33 /**
34 * From Section 7.6.2.2 "Standard Uniform Block Layout":
35 * 1. If the member is a scalar consuming N basic machine units, the base alignment is N.
36 * 2. If the member is a two- or four-component vector with components consuming N basic machine
37 * units, the base alignment is 2N or 4N, respectively.
38 * 3. If the member is a three-component vector with components consuming N
39 * basic machine units, the base alignment is 4N.
40 * 4. If the member is an array of scalars or vectors, the base alignment and array
41 * stride are set to match the base alignment of a single array element, according
42 * to rules (1), (2), and (3), and rounded up to the base alignment of a vec4. The
43 * array may have padding at the end; the base offset of the member following
44 * the array is rounded up to the next multiple of the base alignment.
45 * 5. If the member is a column-major matrix with C columns and R rows, the
46 * matrix is stored identically to an array of C column vectors with R components each,
47 * according to rule (4).
48 * 6. If the member is an array of S column-major matrices with C columns and
49 * R rows, the matrix is stored identically to a row of S × C column vectors
50 * with R components each, according to rule (4).
51 * 7. If the member is a row-major matrix with C columns and R rows, the matrix
52 * is stored identically to an array of R row vectors with C components each,
53 * according to rule (4).
54 * 8. If the member is an array of S row-major matrices with C columns and R
55 * rows, the matrix is stored identically to a row of S × R row vectors with C
56 * components each, according to rule (4).
57 * 9. If the member is a structure, the base alignment of the structure is N, where
58 * N is the largest base alignment value of any of its members, and rounded
59 * up to the base alignment of a vec4. The individual members of this substructure are then
60 * assigned offsets by applying this set of rules recursively,
61 * where the base offset of the first member of the sub-structure is equal to the
62 * aligned offset of the structure. The structure may have padding at the end;
63 * the base offset of the member following the sub-structure is rounded up to
64 * the next multiple of the base alignment of the structure.
65 * 10. If the member is an array of S structures, the S elements of the array are laid
66 * out in order, according to rule (9).
67 */
68 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
69 struct Rules140 {
70 /**
71 * For an array of scalars or vectors this returns the stride between array elements. For
72 * matrices or arrays of matrices this returns the stride between columns of the matrix. Note
73 * that for single (non-array) scalars or vectors we don't require a stride.
74 */
Strideskgpu::Rules14075 static constexpr size_t Stride(int count) {
76 SkASSERT(count >= 1 || count == SkUniform::kNonArray);
77 static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
78 static_assert(Cols >= 1 && Cols <= 4);
79 if (Cols != 1) {
80 // This is a matrix or array of matrices. We return the stride between columns.
81 SkASSERT(RowsOrVecLength > 1);
82 return Rules140<BaseType, RowsOrVecLength>::Stride(1);
83 }
84 if (count == 0) {
85 // Stride doesn't matter for a non-array.
86 return RowsOrVecLength * sizeof(BaseType);
87 }
88
89 // Rule 4.
90
91 // Alignment of vec4 by Rule 2.
92 constexpr size_t kVec4Alignment = tight_vec_size<float>(4);
93 // Get alignment of a single vector of BaseType by Rule 1, 2, or 3
94 int n = RowsOrVecLength == 3 ? 4 : RowsOrVecLength;
95 size_t kElementAlignment = tight_vec_size<BaseType>(n);
96 // Round kElementAlignment up to multiple of kVec4Alignment.
97 size_t m = (kElementAlignment + kVec4Alignment - 1) / kVec4Alignment;
98 return m * kVec4Alignment;
99 }
100 };
101
102 /**
103 * When using the std430 storage layout, shader storage blocks will be laid out in buffer storage
104 * identically to uniform and shader storage blocks using the std140 layout, except that the base
105 * alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are
106 * not rounded up a multiple of the base alignment of a vec4.
107 */
108 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
109 struct Rules430 {
Strideskgpu::Rules430110 static constexpr size_t Stride(int count) {
111 SkASSERT(count >= 1 || count == SkUniform::kNonArray);
112 static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
113 static_assert(Cols >= 1 && Cols <= 4);
114
115 if (Cols != 1) {
116 // This is a matrix or array of matrices. We return the stride between columns.
117 SkASSERT(RowsOrVecLength > 1);
118 return Rules430<BaseType, RowsOrVecLength>::Stride(1);
119 }
120 if (count == 0) {
121 // Stride doesn't matter for a non-array.
122 return RowsOrVecLength * sizeof(BaseType);
123 }
124 // Rule 4 without the round up to a multiple of align-of vec4.
125 return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
126 }
127 };
128
129 // The strides used here were derived from the rules we've imposed on ourselves in
130 // GrMtlPipelineStateDataManger. Everything is tight except 3-component which have the stride of
131 // their 4-component equivalents.
132 template<typename BaseType, int RowsOrVecLength = 1, int Cols = 1>
133 struct RulesMetal {
Strideskgpu::RulesMetal134 static constexpr size_t Stride(int count) {
135 SkASSERT(count >= 1 || count == SkUniform::kNonArray);
136 static_assert(RowsOrVecLength >= 1 && RowsOrVecLength <= 4);
137 static_assert(Cols >= 1 && Cols <= 4);
138 if (Cols != 1) {
139 // This is a matrix or array of matrices. We return the stride between columns.
140 SkASSERT(RowsOrVecLength > 1);
141 return RulesMetal<BaseType, RowsOrVecLength>::Stride(1);
142 }
143 if (count == 0) {
144 // Stride doesn't matter for a non-array.
145 return RowsOrVecLength * sizeof(BaseType);
146 }
147 return tight_vec_size<BaseType>(RowsOrVecLength == 3 ? 4 : RowsOrVecLength);
148 }
149 };
150
151 template<template<typename BaseType, int RowsOrVecLength, int Cols> class Rules>
152 class Writer {
153 private:
154 template <typename MemType, typename UniformType>
CopyUniforms(void * dst,const void * src,int numUniforms)155 static void CopyUniforms(void* dst, const void* src, int numUniforms) {
156 if constexpr (std::is_same<MemType, UniformType>::value) {
157 // Matching types--use memcpy.
158 std::memcpy(dst, src, numUniforms * sizeof(MemType));
159 return;
160 }
161
162 if constexpr (std::is_same<MemType, float>::value &&
163 std::is_same<UniformType, SkHalf>::value) {
164 // Convert floats to half.
165 const float* floatBits = static_cast<const float*>(src);
166 SkHalf* halfBits = static_cast<SkHalf*>(dst);
167 while (numUniforms-- > 0) {
168 *halfBits++ = SkFloatToHalf(*floatBits++);
169 }
170 return;
171 }
172
173 SK_ABORT("implement conversion from MemType to UniformType");
174 }
175
176 template <typename MemType, typename UniformType, int RowsOrVecLength = 1, int Cols = 1>
Write(void * dst,int n,const MemType src[])177 static uint32_t Write(void *dst, int n, const MemType src[]) {
178 size_t stride = Rules<UniformType, RowsOrVecLength, Cols>::Stride(n);
179 n = (n == SkUniform::kNonArray) ? 1 : n;
180 n *= Cols;
181
182 if (dst) {
183 if (stride == RowsOrVecLength * sizeof(UniformType)) {
184 CopyUniforms<MemType, UniformType>(dst, src, n * RowsOrVecLength);
185 } else {
186 for (int i = 0; i < n; ++i) {
187 CopyUniforms<MemType, UniformType>(dst, src, RowsOrVecLength);
188 src += RowsOrVecLength;
189 dst = SkTAddOffset<void>(dst, stride);
190 }
191 }
192 }
193
194 return n * stride;
195 }
196
197 template <typename UniformType>
WriteSkMatrices(void * dst,int n,const SkMatrix m[])198 static uint32_t WriteSkMatrices(void *dst, int n, const SkMatrix m[]) {
199 // Stride() will give us the stride of each column, so mul by 3 to get matrix stride.
200 size_t stride = 3 * Rules<UniformType, 3, 3>::Stride(1);
201 n = std::max(n, 1);
202
203 if (dst) {
204 size_t offset = 0;
205 for (int i = 0; i < n; ++i) {
206 float mt[] = {
207 m[i].get(SkMatrix::kMScaleX),
208 m[i].get(SkMatrix::kMSkewY),
209 m[i].get(SkMatrix::kMPersp0),
210 m[i].get(SkMatrix::kMSkewX),
211 m[i].get(SkMatrix::kMScaleY),
212 m[i].get(SkMatrix::kMPersp1),
213 m[i].get(SkMatrix::kMTransX),
214 m[i].get(SkMatrix::kMTransY),
215 m[i].get(SkMatrix::kMPersp2),
216 };
217 Write<float, UniformType, 3, 3>(SkTAddOffset<void>(dst, offset), 1, mt);
218 offset += stride;
219 }
220 }
221 return n * stride;
222 }
223
224 public:
WriteUniform(SkSLType type,CType ctype,void * dest,int n,const void * src)225 static uint32_t WriteUniform(SkSLType type,
226 CType ctype,
227 void *dest,
228 int n,
229 const void *src) {
230 SkASSERT(n >= 1 || n == SkUniform::kNonArray);
231 switch (type) {
232 case SkSLType::kInt:
233 return Write<int32_t, int32_t>(dest, n, static_cast<const int32_t *>(src));
234
235 case SkSLType::kInt2:
236 return Write<int32_t, int32_t, 2>(dest, n, static_cast<const int32_t *>(src));
237
238 case SkSLType::kInt3:
239 return Write<int32_t, int32_t, 3>(dest, n, static_cast<const int32_t *>(src));
240
241 case SkSLType::kInt4:
242 return Write<int32_t, int32_t, 4>(dest, n, static_cast<const int32_t *>(src));
243
244 case SkSLType::kHalf:
245 return Write<float, SkHalf>(dest, n, static_cast<const float *>(src));
246
247 case SkSLType::kFloat:
248 return Write<float, float>(dest, n, static_cast<const float *>(src));
249
250 case SkSLType::kHalf2:
251 return Write<float, SkHalf, 2>(dest, n, static_cast<const float *>(src));
252
253 case SkSLType::kFloat2:
254 return Write<float, float, 2>(dest, n, static_cast<const float *>(src));
255
256 case SkSLType::kHalf3:
257 return Write<float, SkHalf, 3>(dest, n, static_cast<const float *>(src));
258
259 case SkSLType::kFloat3:
260 return Write<float, float, 3>(dest, n, static_cast<const float *>(src));
261
262 case SkSLType::kHalf4:
263 return Write<float, SkHalf, 4>(dest, n, static_cast<const float *>(src));
264
265 case SkSLType::kFloat4:
266 return Write<float, float, 4>(dest, n, static_cast<const float *>(src));
267
268 case SkSLType::kHalf2x2:
269 return Write<float, SkHalf, 2, 2>(dest, n, static_cast<const float *>(src));
270
271 case SkSLType::kFloat2x2:
272 return Write<float, float, 2, 2>(dest, n, static_cast<const float *>(src));
273
274 case SkSLType::kHalf3x3:
275 switch (ctype) {
276 case CType::kDefault:
277 return Write<float, SkHalf, 3, 3>(dest, n, static_cast<const float *>(src));
278 case CType::kSkMatrix:
279 return WriteSkMatrices<SkHalf>(dest, n, static_cast<const SkMatrix *>(src));
280 }
281 SkUNREACHABLE;
282
283 case SkSLType::kFloat3x3:
284 switch (ctype) {
285 case CType::kDefault:
286 return Write<float, float, 3, 3>(dest, n, static_cast<const float *>(src));
287 case CType::kSkMatrix:
288 return WriteSkMatrices<float>(dest, n, static_cast<const SkMatrix *>(src));
289 }
290 SkUNREACHABLE;
291
292 case SkSLType::kHalf4x4:
293 return Write<float, SkHalf, 4, 4>(dest, n, static_cast<const float *>(src));
294
295 case SkSLType::kFloat4x4:
296 return Write<float, float, 4, 4>(dest, n, static_cast<const float *>(src));
297
298 default:
299 SK_ABORT("Unexpected uniform type");
300 }
301 }
302 };
303
304 #ifdef SK_DEBUG
305 // To determine whether a current offset is aligned, we can just 'and' the lowest bits with the
306 // alignment mask. A value of 0 means aligned, any other value is how many bytes past alignment we
307 // are. This works since all alignments are powers of 2. The mask is always (alignment - 1).
sksltype_to_alignment_mask(SkSLType type)308 static uint32_t sksltype_to_alignment_mask(SkSLType type) {
309 switch (type) {
310 case SkSLType::kInt:
311 case SkSLType::kUInt:
312 case SkSLType::kFloat:
313 return 0x3;
314 case SkSLType::kInt2:
315 case SkSLType::kUInt2:
316 case SkSLType::kFloat2:
317 return 0x7;
318 case SkSLType::kInt3:
319 case SkSLType::kUInt3:
320 case SkSLType::kFloat3:
321 case SkSLType::kInt4:
322 case SkSLType::kUInt4:
323 case SkSLType::kFloat4:
324 return 0xF;
325
326 case SkSLType::kFloat2x2:
327 return 0x7;
328 case SkSLType::kFloat3x3:
329 return 0xF;
330 case SkSLType::kFloat4x4:
331 return 0xF;
332
333 case SkSLType::kShort:
334 case SkSLType::kUShort:
335 case SkSLType::kHalf:
336 return 0x1;
337 case SkSLType::kShort2:
338 case SkSLType::kUShort2:
339 case SkSLType::kHalf2:
340 return 0x3;
341 case SkSLType::kShort3:
342 case SkSLType::kShort4:
343 case SkSLType::kUShort3:
344 case SkSLType::kUShort4:
345 case SkSLType::kHalf3:
346 case SkSLType::kHalf4:
347 return 0x7;
348
349 case SkSLType::kHalf2x2:
350 return 0x3;
351 case SkSLType::kHalf3x3:
352 return 0x7;
353 case SkSLType::kHalf4x4:
354 return 0x7;
355
356 // This query is only valid for certain types.
357 case SkSLType::kVoid:
358 case SkSLType::kBool:
359 case SkSLType::kBool2:
360 case SkSLType::kBool3:
361 case SkSLType::kBool4:
362 case SkSLType::kTexture2DSampler:
363 case SkSLType::kTextureExternalSampler:
364 case SkSLType::kTexture2DRectSampler:
365 case SkSLType::kSampler:
366 case SkSLType::kTexture2D:
367 case SkSLType::kInput:
368 break;
369 }
370 SK_ABORT("Unexpected type");
371 }
372
373 /** Returns the size in bytes taken up in Metal buffers for SkSLTypes. */
sksltype_to_mtl_size(SkSLType type)374 inline uint32_t sksltype_to_mtl_size(SkSLType type) {
375 switch (type) {
376 case SkSLType::kInt:
377 case SkSLType::kUInt:
378 case SkSLType::kFloat:
379 return 4;
380 case SkSLType::kInt2:
381 case SkSLType::kUInt2:
382 case SkSLType::kFloat2:
383 return 8;
384 case SkSLType::kInt3:
385 case SkSLType::kUInt3:
386 case SkSLType::kFloat3:
387 case SkSLType::kInt4:
388 case SkSLType::kUInt4:
389 case SkSLType::kFloat4:
390 return 16;
391
392 case SkSLType::kFloat2x2:
393 return 16;
394 case SkSLType::kFloat3x3:
395 return 48;
396 case SkSLType::kFloat4x4:
397 return 64;
398
399 case SkSLType::kShort:
400 case SkSLType::kUShort:
401 case SkSLType::kHalf:
402 return 2;
403 case SkSLType::kShort2:
404 case SkSLType::kUShort2:
405 case SkSLType::kHalf2:
406 return 4;
407 case SkSLType::kShort3:
408 case SkSLType::kShort4:
409 case SkSLType::kUShort3:
410 case SkSLType::kUShort4:
411 case SkSLType::kHalf3:
412 case SkSLType::kHalf4:
413 return 8;
414
415 case SkSLType::kHalf2x2:
416 return 8;
417 case SkSLType::kHalf3x3:
418 return 24;
419 case SkSLType::kHalf4x4:
420 return 32;
421
422 // This query is only valid for certain types.
423 case SkSLType::kVoid:
424 case SkSLType::kBool:
425 case SkSLType::kBool2:
426 case SkSLType::kBool3:
427 case SkSLType::kBool4:
428 case SkSLType::kTexture2DSampler:
429 case SkSLType::kTextureExternalSampler:
430 case SkSLType::kTexture2DRectSampler:
431 case SkSLType::kSampler:
432 case SkSLType::kTexture2D:
433 case SkSLType::kInput:
434 break;
435 }
436 SK_ABORT("Unexpected type");
437 }
438
439 // Given the current offset into the ubo, calculate the offset for the uniform we're trying to add
440 // taking into consideration all alignment requirements. The uniformOffset is set to the offset for
441 // the new uniform, and currentOffset is updated to be the offset to the end of the new uniform.
get_ubo_aligned_offset(uint32_t * currentOffset,uint32_t * maxAlignment,SkSLType type,int arrayCount)442 static uint32_t get_ubo_aligned_offset(uint32_t* currentOffset,
443 uint32_t* maxAlignment,
444 SkSLType type,
445 int arrayCount) {
446 uint32_t alignmentMask = sksltype_to_alignment_mask(type);
447 if (alignmentMask > *maxAlignment) {
448 *maxAlignment = alignmentMask;
449 }
450 uint32_t offsetDiff = *currentOffset & alignmentMask;
451 if (offsetDiff != 0) {
452 offsetDiff = alignmentMask - offsetDiff + 1;
453 }
454 uint32_t uniformOffset = *currentOffset + offsetDiff;
455 SkASSERT(sizeof(float) == 4);
456 if (arrayCount) {
457 *currentOffset = uniformOffset + sksltype_to_mtl_size(type) * arrayCount;
458 } else {
459 *currentOffset = uniformOffset + sksltype_to_mtl_size(type);
460 }
461 return uniformOffset;
462 }
463 #endif // SK_DEBUG
464
getUniformTypeForLayout(SkSLType type)465 SkSLType UniformManager::getUniformTypeForLayout(SkSLType type) {
466 if (fLayout != Layout::kMetal) {
467 // GL/Vk expect uniforms in 32-bit precision. Convert lower-precision types to 32-bit.
468 switch (type) {
469 case SkSLType::kShort: return SkSLType::kInt;
470 case SkSLType::kUShort: return SkSLType::kUInt;
471 case SkSLType::kHalf: return SkSLType::kFloat;
472
473 case SkSLType::kShort2: return SkSLType::kInt2;
474 case SkSLType::kUShort2: return SkSLType::kUInt2;
475 case SkSLType::kHalf2: return SkSLType::kFloat2;
476
477 case SkSLType::kShort3: return SkSLType::kInt3;
478 case SkSLType::kUShort3: return SkSLType::kUInt3;
479 case SkSLType::kHalf3: return SkSLType::kFloat3;
480
481 case SkSLType::kShort4: return SkSLType::kInt4;
482 case SkSLType::kUShort4: return SkSLType::kUInt4;
483 case SkSLType::kHalf4: return SkSLType::kFloat4;
484
485 case SkSLType::kHalf2x2: return SkSLType::kFloat2x2;
486 case SkSLType::kHalf3x3: return SkSLType::kFloat3x3;
487 case SkSLType::kHalf4x4: return SkSLType::kFloat4x4;
488
489 default: break;
490 }
491 }
492
493 return type;
494 }
495
writeUniforms(SkSpan<const SkUniform> uniforms,const void ** srcs,uint32_t * offsets,char * dst)496 uint32_t UniformManager::writeUniforms(SkSpan<const SkUniform> uniforms,
497 const void** srcs,
498 uint32_t* offsets,
499 char *dst) {
500 decltype(&Writer<Rules140>::WriteUniform) write;
501 switch (fLayout) {
502 case Layout::kStd140:
503 write = Writer<Rules140>::WriteUniform;
504 break;
505 case Layout::kStd430:
506 write = Writer<Rules430>::WriteUniform;
507 break;
508 case Layout::kMetal:
509 write = Writer<RulesMetal>::WriteUniform;
510 break;
511 }
512
513 #ifdef SK_DEBUG
514 uint32_t curUBOOffset = 0;
515 uint32_t curUBOMaxAlignment = 0;
516 #endif // SK_DEBUG
517
518 uint32_t offset = 0;
519
520 for (int i = 0; i < (int) uniforms.size(); ++i) {
521 const SkUniform& u = uniforms[i];
522 SkSLType uniformType = this->getUniformTypeForLayout(u.type());
523
524 #ifdef SK_DEBUG
525 uint32_t debugOffset = get_ubo_aligned_offset(&curUBOOffset,
526 &curUBOMaxAlignment,
527 uniformType,
528 u.count());
529 #endif // SK_DEBUG
530
531 uint32_t bytesWritten = write(uniformType,
532 CType::kDefault,
533 dst ? &dst[offset] : nullptr,
534 u.count(),
535 srcs ? srcs[i] : nullptr);
536 SkASSERT(debugOffset == offset);
537
538 if (offsets) {
539 offsets[i] = offset;
540 }
541 offset += bytesWritten;
542 }
543
544 return offset;
545 }
546
547 } // namespace skgpu
548