1 //
2 // Copyright 2014 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6
7 // copyvertex.inc.h: Implementation of vertex buffer copying and conversion functions
8
9 namespace rx
10 {
11
12 // Returns an aligned buffer to read the input from
13 template <typename T, size_t inputComponentCount>
GetAlignedOffsetInput(const T * offsetInput,T * alignedElement)14 inline const T *GetAlignedOffsetInput(const T *offsetInput, T *alignedElement)
15 {
16 if (reinterpret_cast<uintptr_t>(offsetInput) % sizeof(T) != 0)
17 {
18 // Applications may pass in arbitrarily aligned buffers as input.
19 // Certain architectures have restrictions regarding unaligned reads. Specifically, we crash
20 // on armeabi-v7a devices with a SIGBUS error when performing such operations. arm64 and
21 // x86-64 devices do not appear to have such issues.
22 //
23 // The workaround is to detect if the input buffer is unaligned and if so, perform a
24 // byte-wise copy of the unaligned portion and a memcpy of the rest of the buffer.
25 uint8_t *alignedBuffer = reinterpret_cast<uint8_t *>(&alignedElement[0]);
26 uintptr_t unalignedInputStartAddress = reinterpret_cast<uintptr_t>(offsetInput);
27 constexpr size_t kAlignmentMinusOne = sizeof(T) - 1;
28 uintptr_t alignedInputStartAddress =
29 (reinterpret_cast<uintptr_t>(offsetInput) + kAlignmentMinusOne) & ~(kAlignmentMinusOne);
30 ASSERT(alignedInputStartAddress >= unalignedInputStartAddress);
31
32 const size_t totalBytesToCopy = sizeof(T) * inputComponentCount;
33 const size_t unalignedBytesToCopy = alignedInputStartAddress - unalignedInputStartAddress;
34 ASSERT(totalBytesToCopy >= unalignedBytesToCopy);
35
36 // byte-wise copy of unaligned portion
37 for (size_t i = 0; i < unalignedBytesToCopy; i++)
38 {
39 alignedBuffer[i] = reinterpret_cast<const uint8_t *>(&offsetInput[0])[i];
40 }
41
42 // memcpy remaining buffer
43 memcpy(&alignedBuffer[unalignedBytesToCopy],
44 &reinterpret_cast<const uint8_t *>(&offsetInput[0])[unalignedBytesToCopy],
45 totalBytesToCopy - unalignedBytesToCopy);
46
47 return alignedElement;
48 }
49 else
50 {
51 return offsetInput;
52 }
53 }
54
55 template <typename T,
56 size_t inputComponentCount,
57 size_t outputComponentCount,
58 uint32_t alphaDefaultValueBits>
CopyNativeVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)59 inline void CopyNativeVertexData(const uint8_t *input, size_t stride, size_t count, uint8_t *output)
60 {
61 const size_t attribSize = sizeof(T) * inputComponentCount;
62
63 if (attribSize == stride && inputComponentCount == outputComponentCount)
64 {
65 memcpy(output, input, count * attribSize);
66 return;
67 }
68
69 if (inputComponentCount == outputComponentCount)
70 {
71 for (size_t i = 0; i < count; i++)
72 {
73 const T *offsetInput = reinterpret_cast<const T *>(input + (i * stride));
74 T offsetInputAligned[inputComponentCount];
75 offsetInput =
76 GetAlignedOffsetInput<T, inputComponentCount>(offsetInput, &offsetInputAligned[0]);
77
78 T *offsetOutput = reinterpret_cast<T *>(output) + i * outputComponentCount;
79
80 memcpy(offsetOutput, offsetInput, attribSize);
81 }
82 return;
83 }
84
85 const T defaultAlphaValue = gl::bitCast<T>(alphaDefaultValueBits);
86 const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
87
88 for (size_t i = 0; i < count; i++)
89 {
90 const T *offsetInput = reinterpret_cast<const T *>(input + (i * stride));
91 T offsetInputAligned[inputComponentCount];
92 ASSERT(sizeof(offsetInputAligned) == attribSize);
93 offsetInput =
94 GetAlignedOffsetInput<T, inputComponentCount>(offsetInput, &offsetInputAligned[0]);
95
96 T *offsetOutput = reinterpret_cast<T *>(output) + i * outputComponentCount;
97
98 memcpy(offsetOutput, offsetInput, attribSize);
99
100 if (inputComponentCount < lastNonAlphaOutputComponent)
101 {
102 // Set the remaining G/B channels to 0.
103 size_t numComponents = (lastNonAlphaOutputComponent - inputComponentCount);
104 memset(&offsetOutput[inputComponentCount], 0, numComponents * sizeof(T));
105 }
106
107 if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
108 {
109 // Set the remaining alpha channel to the defaultAlphaValue.
110 offsetOutput[3] = defaultAlphaValue;
111 }
112 }
113 }
114
115 template <size_t inputComponentCount, size_t outputComponentCount>
Copy8SintTo16SintVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)116 inline void Copy8SintTo16SintVertexData(const uint8_t *input,
117 size_t stride,
118 size_t count,
119 uint8_t *output)
120 {
121 const size_t lastNonAlphaOutputComponent = std::min<size_t>(outputComponentCount, 3);
122
123 for (size_t i = 0; i < count; i++)
124 {
125 const GLbyte *offsetInput = reinterpret_cast<const GLbyte *>(input + i * stride);
126 GLshort *offsetOutput = reinterpret_cast<GLshort *>(output) + i * outputComponentCount;
127
128 for (size_t j = 0; j < inputComponentCount; j++)
129 {
130 offsetOutput[j] = static_cast<GLshort>(offsetInput[j]);
131 }
132
133 for (size_t j = inputComponentCount; j < lastNonAlphaOutputComponent; j++)
134 {
135 // Set remaining G/B channels to 0.
136 offsetOutput[j] = 0;
137 }
138
139 if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
140 {
141 // On integer formats, we must set the Alpha channel to 1 if it's unused.
142 offsetOutput[3] = 1;
143 }
144 }
145 }
146
147 template <size_t inputComponentCount, size_t outputComponentCount>
Copy8SnormTo16SnormVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)148 inline void Copy8SnormTo16SnormVertexData(const uint8_t *input,
149 size_t stride,
150 size_t count,
151 uint8_t *output)
152 {
153 for (size_t i = 0; i < count; i++)
154 {
155 const GLbyte *offsetInput = reinterpret_cast<const GLbyte *>(input + i * stride);
156 GLshort *offsetOutput = reinterpret_cast<GLshort *>(output) + i * outputComponentCount;
157
158 for (size_t j = 0; j < inputComponentCount; j++)
159 {
160 // The original GLbyte value ranges from -128 to +127 (INT8_MAX).
161 // When converted to GLshort, the value must be scaled to between -32768 and +32767
162 // (INT16_MAX).
163 if (offsetInput[j] > 0)
164 {
165 offsetOutput[j] =
166 offsetInput[j] << 8 | offsetInput[j] << 1 | ((offsetInput[j] & 0x40) >> 6);
167 }
168 else
169 {
170 offsetOutput[j] = offsetInput[j] << 8;
171 }
172 }
173
174 for (size_t j = inputComponentCount; j < std::min<size_t>(outputComponentCount, 3); j++)
175 {
176 // Set remaining G/B channels to 0.
177 offsetOutput[j] = 0;
178 }
179
180 if (inputComponentCount < outputComponentCount && outputComponentCount == 4)
181 {
182 // On normalized formats, we must set the Alpha channel to the max value if it's unused.
183 offsetOutput[3] = INT16_MAX;
184 }
185 }
186 }
187
188 template <size_t inputComponentCount, size_t outputComponentCount>
Copy32FixedTo32FVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)189 inline void Copy32FixedTo32FVertexData(const uint8_t *input,
190 size_t stride,
191 size_t count,
192 uint8_t *output)
193 {
194 static const float divisor = 1.0f / (1 << 16);
195
196 for (size_t i = 0; i < count; i++)
197 {
198 const uint8_t *offsetInput = input + i * stride;
199 float *offsetOutput = reinterpret_cast<float *>(output) + i * outputComponentCount;
200
201 // GLfixed access must be 4-byte aligned on arm32, input and stride sometimes are not
202 if (reinterpret_cast<uintptr_t>(offsetInput) % sizeof(GLfixed) == 0)
203 {
204 for (size_t j = 0; j < inputComponentCount; j++)
205 {
206 offsetOutput[j] =
207 static_cast<float>(reinterpret_cast<const GLfixed *>(offsetInput)[j]) * divisor;
208 }
209 }
210 else
211 {
212 for (size_t j = 0; j < inputComponentCount; j++)
213 {
214 GLfixed alignedInput;
215 memcpy(&alignedInput, offsetInput + j * sizeof(GLfixed), sizeof(GLfixed));
216 offsetOutput[j] = static_cast<float>(alignedInput) * divisor;
217 }
218 }
219
220 // 4-component output formats would need special padding in the alpha channel.
221 static_assert(!(inputComponentCount < 4 && outputComponentCount == 4),
222 "An inputComponentCount less than 4 and an outputComponentCount equal to 4 "
223 "is not supported.");
224
225 for (size_t j = inputComponentCount; j < outputComponentCount; j++)
226 {
227 offsetOutput[j] = 0.0f;
228 }
229 }
230 }
231
232 template <typename T,
233 size_t inputComponentCount,
234 size_t outputComponentCount,
235 bool normalized,
236 bool toHalf>
CopyToFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)237 inline void CopyToFloatVertexData(const uint8_t *input,
238 size_t stride,
239 size_t count,
240 uint8_t *output)
241 {
242 typedef std::numeric_limits<T> NL;
243 typedef typename std::conditional<toHalf, GLhalf, float>::type outputType;
244
245 for (size_t i = 0; i < count; i++)
246 {
247 const T *offsetInput = reinterpret_cast<const T *>(input + (stride * i));
248 outputType *offsetOutput =
249 reinterpret_cast<outputType *>(output) + i * outputComponentCount;
250
251 T offsetInputAligned[inputComponentCount];
252 offsetInput =
253 GetAlignedOffsetInput<T, inputComponentCount>(offsetInput, &offsetInputAligned[0]);
254
255 for (size_t j = 0; j < inputComponentCount; j++)
256 {
257 float result = 0;
258
259 if (normalized)
260 {
261 if (NL::is_signed)
262 {
263 result = static_cast<float>(offsetInput[j]) / static_cast<float>(NL::max());
264 result = result >= -1.0f ? result : -1.0f;
265 }
266 else
267 {
268 result = static_cast<float>(offsetInput[j]) / static_cast<float>(NL::max());
269 }
270 }
271 else
272 {
273 result = static_cast<float>(offsetInput[j]);
274 }
275
276 if (toHalf)
277 {
278 offsetOutput[j] = gl::float32ToFloat16(result);
279 }
280 else
281 {
282 offsetOutput[j] = static_cast<outputType>(result);
283 }
284 }
285
286 for (size_t j = inputComponentCount; j < outputComponentCount; j++)
287 {
288 offsetOutput[j] = 0;
289 }
290
291 if (inputComponentCount < 4 && outputComponentCount == 4)
292 {
293 if (toHalf)
294 {
295 offsetOutput[3] = gl::Float16One;
296 }
297 else
298 {
299 offsetOutput[3] = static_cast<outputType>(gl::Float32One);
300 }
301 }
302 }
303 }
304
305 template <size_t inputComponentCount, size_t outputComponentCount>
Copy32FTo16FVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)306 void Copy32FTo16FVertexData(const uint8_t *input, size_t stride, size_t count, uint8_t *output)
307 {
308 const unsigned short kZero = gl::float32ToFloat16(0.0f);
309 const unsigned short kOne = gl::float32ToFloat16(1.0f);
310
311 for (size_t i = 0; i < count; i++)
312 {
313 const float *offsetInput = reinterpret_cast<const float *>(input + (stride * i));
314 unsigned short *offsetOutput =
315 reinterpret_cast<unsigned short *>(output) + i * outputComponentCount;
316
317 for (size_t j = 0; j < inputComponentCount; j++)
318 {
319 offsetOutput[j] = gl::float32ToFloat16(offsetInput[j]);
320 }
321
322 for (size_t j = inputComponentCount; j < outputComponentCount; j++)
323 {
324 offsetOutput[j] = (j == 3) ? kOne : kZero;
325 }
326 }
327 }
328
CopyXYZ32FToXYZ9E5(const uint8_t * input,size_t stride,size_t count,uint8_t * output)329 inline void CopyXYZ32FToXYZ9E5(const uint8_t *input, size_t stride, size_t count, uint8_t *output)
330 {
331 for (size_t i = 0; i < count; i++)
332 {
333 const float *offsetInput = reinterpret_cast<const float *>(input + (stride * i));
334 unsigned int *offsetOutput = reinterpret_cast<unsigned int *>(output) + i;
335
336 *offsetOutput = gl::convertRGBFloatsTo999E5(offsetInput[0], offsetInput[1], offsetInput[2]);
337 }
338 }
339
CopyXYZ32FToX11Y11B10F(const uint8_t * input,size_t stride,size_t count,uint8_t * output)340 inline void CopyXYZ32FToX11Y11B10F(const uint8_t *input,
341 size_t stride,
342 size_t count,
343 uint8_t *output)
344 {
345 for (size_t i = 0; i < count; i++)
346 {
347 const float *offsetInput = reinterpret_cast<const float *>(input + (stride * i));
348 unsigned int *offsetOutput = reinterpret_cast<unsigned int *>(output) + i;
349
350 *offsetOutput = gl::float32ToFloat11(offsetInput[0]) << 0 |
351 gl::float32ToFloat11(offsetInput[1]) << 11 |
352 gl::float32ToFloat10(offsetInput[2]) << 22;
353 }
354 }
355
356 namespace priv
357 {
358
359 template <bool isSigned, bool normalized, bool toFloat, bool toHalf>
CopyPackedRGB(uint32_t data,uint8_t * output)360 static inline void CopyPackedRGB(uint32_t data, uint8_t *output)
361 {
362 const uint32_t rgbSignMask = 0x200; // 1 set at the 9 bit
363 const uint32_t negativeMask = 0xFFFFFC00; // All bits from 10 to 31 set to 1
364
365 if (toFloat || toHalf)
366 {
367 GLfloat finalValue = static_cast<GLfloat>(data);
368 if (isSigned)
369 {
370 if (data & rgbSignMask)
371 {
372 int negativeNumber = data | negativeMask;
373 finalValue = static_cast<GLfloat>(negativeNumber);
374 }
375
376 if (normalized)
377 {
378 const int32_t maxValue = 0x1FF; // 1 set in bits 0 through 8
379 const int32_t minValue = 0xFFFFFE01; // Inverse of maxValue
380
381 // A 10-bit two's complement number has the possibility of being minValue - 1 but
382 // OpenGL's normalization rules dictate that it should be clamped to minValue in
383 // this case.
384 if (finalValue < minValue)
385 {
386 finalValue = minValue;
387 }
388
389 const int32_t halfRange = (maxValue - minValue) >> 1;
390 finalValue = ((finalValue - minValue) / halfRange) - 1.0f;
391 }
392 }
393 else
394 {
395 if (normalized)
396 {
397 const uint32_t maxValue = 0x3FF; // 1 set in bits 0 through 9
398 finalValue /= static_cast<GLfloat>(maxValue);
399 }
400 }
401
402 if (toHalf)
403 {
404 *reinterpret_cast<GLhalf *>(output) = gl::float32ToFloat16(finalValue);
405 }
406 else
407 {
408 *reinterpret_cast<GLfloat *>(output) = finalValue;
409 }
410 }
411 else
412 {
413 if (isSigned)
414 {
415 GLshort *intOutput = reinterpret_cast<GLshort *>(output);
416
417 if (data & rgbSignMask)
418 {
419 *intOutput = static_cast<GLshort>(data | negativeMask);
420 }
421 else
422 {
423 *intOutput = static_cast<GLshort>(data);
424 }
425 }
426 else
427 {
428 GLushort *uintOutput = reinterpret_cast<GLushort *>(output);
429 *uintOutput = static_cast<GLushort>(data);
430 }
431 }
432 }
433
434 template <bool isSigned, bool normalized, bool toFloat, bool toHalf>
CopyPackedAlpha(uint32_t data,uint8_t * output)435 inline void CopyPackedAlpha(uint32_t data, uint8_t *output)
436 {
437 ASSERT(data >= 0 && data <= 3);
438
439 if (toFloat || toHalf)
440 {
441 GLfloat finalValue = 0;
442 if (isSigned)
443 {
444 if (normalized)
445 {
446 switch (data)
447 {
448 case 0x0:
449 finalValue = 0.0f;
450 break;
451 case 0x1:
452 finalValue = 1.0f;
453 break;
454 case 0x2:
455 finalValue = -1.0f;
456 break;
457 case 0x3:
458 finalValue = -1.0f;
459 break;
460 default:
461 UNREACHABLE();
462 }
463 }
464 else
465 {
466 switch (data)
467 {
468 case 0x0:
469 finalValue = 0.0f;
470 break;
471 case 0x1:
472 finalValue = 1.0f;
473 break;
474 case 0x2:
475 finalValue = -2.0f;
476 break;
477 case 0x3:
478 finalValue = -1.0f;
479 break;
480 default:
481 UNREACHABLE();
482 }
483 }
484 }
485 else
486 {
487 if (normalized)
488 {
489 finalValue = data / 3.0f;
490 }
491 else
492 {
493 finalValue = static_cast<float>(data);
494 }
495 }
496
497 if (toHalf)
498 {
499 *reinterpret_cast<GLhalf *>(output) = gl::float32ToFloat16(finalValue);
500 }
501 else
502 {
503 *reinterpret_cast<GLfloat *>(output) = finalValue;
504 }
505 }
506 else
507 {
508 if (isSigned)
509 {
510 GLshort *intOutput = reinterpret_cast<GLshort *>(output);
511 switch (data)
512 {
513 case 0x0:
514 *intOutput = 0;
515 break;
516 case 0x1:
517 *intOutput = 1;
518 break;
519 case 0x2:
520 *intOutput = -2;
521 break;
522 case 0x3:
523 *intOutput = -1;
524 break;
525 default:
526 UNREACHABLE();
527 }
528 }
529 else
530 {
531 *reinterpret_cast<GLushort *>(output) = static_cast<GLushort>(data);
532 }
533 }
534 }
535
536 } // namespace priv
537
538 template <bool isSigned, bool normalized, bool toFloat, bool toHalf>
CopyXYZ10W2ToXYZWFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)539 inline void CopyXYZ10W2ToXYZWFloatVertexData(const uint8_t *input,
540 size_t stride,
541 size_t count,
542 uint8_t *output)
543 {
544 const size_t outputComponentSize = toFloat && !toHalf ? 4 : 2;
545 const size_t componentCount = 4;
546
547 const uint32_t rgbMask = 0x3FF; // 1 set in bits 0 through 9
548 const size_t redShift = 0; // red is bits 0 through 9
549 const size_t greenShift = 10; // green is bits 10 through 19
550 const size_t blueShift = 20; // blue is bits 20 through 29
551
552 const uint32_t alphaMask = 0x3; // 1 set in bits 0 and 1
553 const size_t alphaShift = 30; // Alpha is the 30 and 31 bits
554
555 for (size_t i = 0; i < count; i++)
556 {
557 GLuint packedValue = *reinterpret_cast<const GLuint *>(input + (i * stride));
558 uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount);
559
560 priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>(
561 (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize));
562 priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>(
563 (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize));
564 priv::CopyPackedRGB<isSigned, normalized, toFloat, toHalf>(
565 (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize));
566 priv::CopyPackedAlpha<isSigned, normalized, toFloat, toHalf>(
567 (packedValue >> alphaShift) & alphaMask, offsetOutput + (3 * outputComponentSize));
568 }
569 }
570
571 template <bool isSigned, bool normalized, bool toHalf>
CopyXYZ10ToXYZWFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)572 inline void CopyXYZ10ToXYZWFloatVertexData(const uint8_t *input,
573 size_t stride,
574 size_t count,
575 uint8_t *output)
576 {
577 const size_t outputComponentSize = toHalf ? 2 : 4;
578 const size_t componentCount = 4;
579
580 const uint32_t rgbMask = 0x3FF; // 1 set in bits 0 through 9
581 const size_t redShift = 22; // red is bits 22 through 31
582 const size_t greenShift = 12; // green is bits 12 through 21
583 const size_t blueShift = 2; // blue is bits 2 through 11
584
585 const uint32_t alphaDefaultValueBits = normalized ? (isSigned ? 0x1 : 0x3) : 0x1;
586
587 for (size_t i = 0; i < count; i++)
588 {
589 GLuint packedValue = *reinterpret_cast<const GLuint *>(input + (i * stride));
590 uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount);
591
592 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
593 (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize));
594 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
595 (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize));
596 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
597 (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize));
598 priv::CopyPackedAlpha<isSigned, normalized, true, toHalf>(
599 alphaDefaultValueBits, offsetOutput + (3 * outputComponentSize));
600 }
601 }
602
603 template <bool isSigned, bool normalized, bool toHalf>
CopyW2XYZ10ToXYZWFloatVertexData(const uint8_t * input,size_t stride,size_t count,uint8_t * output)604 inline void CopyW2XYZ10ToXYZWFloatVertexData(const uint8_t *input,
605 size_t stride,
606 size_t count,
607 uint8_t *output)
608 {
609 const size_t outputComponentSize = toHalf ? 2 : 4;
610 const size_t componentCount = 4;
611
612 const uint32_t rgbMask = 0x3FF; // 1 set in bits 0 through 9
613 const size_t redShift = 22; // red is bits 22 through 31
614 const size_t greenShift = 12; // green is bits 12 through 21
615 const size_t blueShift = 2; // blue is bits 2 through 11
616
617 const uint32_t alphaMask = 0x3; // 1 set in bits 0 and 1
618 const size_t alphaShift = 0; // Alpha is the 30 and 31 bits
619
620 for (size_t i = 0; i < count; i++)
621 {
622 GLuint packedValue = *reinterpret_cast<const GLuint *>(input + (i * stride));
623 uint8_t *offsetOutput = output + (i * outputComponentSize * componentCount);
624
625 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
626 (packedValue >> redShift) & rgbMask, offsetOutput + (0 * outputComponentSize));
627 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
628 (packedValue >> greenShift) & rgbMask, offsetOutput + (1 * outputComponentSize));
629 priv::CopyPackedRGB<isSigned, normalized, true, toHalf>(
630 (packedValue >> blueShift) & rgbMask, offsetOutput + (2 * outputComponentSize));
631 priv::CopyPackedAlpha<isSigned, normalized, true, toHalf>(
632 (packedValue >> alphaShift) & alphaMask, offsetOutput + (3 * outputComponentSize));
633 }
634 }
635 } // namespace rx
636