1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file format_conversion.h
24 *
25 * @brief API implementation
26 *
27 ******************************************************************************/
28 #include "format_types.h"
29 #include "format_traits.h"
30
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Load SIMD packed pixels in SOA format and converts to
33 /// SOA RGBA32_FLOAT format.
34 /// @param pSrc - source data in SOA form
35 /// @param dst - output data in SOA form
36 template<SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,simdvector & dst)37 INLINE void LoadSOA(const uint8_t *pSrc, simdvector &dst)
38 {
39 // fast path for float32
40 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
41 {
42 auto lambda = [&](int comp)
43 {
44 simdscalar vComp = _simd_load_ps((const float*)(pSrc + comp*sizeof(simdscalar)));
45
46 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
47 };
48
49 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
50 return;
51 }
52
53 auto lambda = [&](int comp)
54 {
55 // load SIMD components
56 simdscalar vComp = FormatTraits<SrcFormat>::loadSOA(comp, pSrc);
57
58 // unpack
59 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
60
61 // convert
62 if (FormatTraits<SrcFormat>::isNormalized(comp))
63 {
64 vComp = _simd_cvtepi32_ps(_simd_castps_si(vComp));
65 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
66 }
67
68 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
69
70 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8;
71 };
72
73 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
74 }
75
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Clamps the given component based on the requirements on the
78 /// Format template arg
79 /// @param vComp - SIMD vector of floats
80 /// @param Component - component
81 template<SWR_FORMAT Format>
Clamp(simdscalar const & vC,uint32_t Component)82 INLINE simdscalar Clamp(simdscalar const &vC, uint32_t Component)
83 {
84 simdscalar vComp = vC;
85 if (FormatTraits<Format>::isNormalized(Component))
86 {
87 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
88 {
89 vComp = _simd_max_ps(vComp, _simd_setzero_ps());
90 }
91
92 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
93 {
94 vComp = _simd_max_ps(vComp, _simd_set1_ps(-1.0f));
95 }
96 vComp = _simd_min_ps(vComp, _simd_set1_ps(1.0f));
97 }
98 else if (FormatTraits<Format>::GetBPC(Component) < 32)
99 {
100 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
101 {
102 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
103 int iMin = 0;
104 simdscalari vCompi = _simd_castps_si(vComp);
105 vCompi = _simd_max_epu32(vCompi, _simd_set1_epi32(iMin));
106 vCompi = _simd_min_epu32(vCompi, _simd_set1_epi32(iMax));
107 vComp = _simd_castsi_ps(vCompi);
108 }
109 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
110 {
111 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
112 int iMin = -1 - iMax;
113 simdscalari vCompi = _simd_castps_si(vComp);
114 vCompi = _simd_max_epi32(vCompi, _simd_set1_epi32(iMin));
115 vCompi = _simd_min_epi32(vCompi, _simd_set1_epi32(iMax));
116 vComp = _simd_castsi_ps(vCompi);
117 }
118 }
119
120 return vComp;
121 }
122
123 //////////////////////////////////////////////////////////////////////////
124 /// @brief Normalize the given component based on the requirements on the
125 /// Format template arg
126 /// @param vComp - SIMD vector of floats
127 /// @param Component - component
128 template<SWR_FORMAT Format>
Normalize(simdscalar const & vC,uint32_t Component)129 INLINE simdscalar Normalize(simdscalar const &vC, uint32_t Component)
130 {
131 simdscalar vComp = vC;
132 if (FormatTraits<Format>::isNormalized(Component))
133 {
134 vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<Format>::fromFloat(Component)));
135 vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
136 }
137 return vComp;
138 }
139
140 //////////////////////////////////////////////////////////////////////////
141 /// @brief Convert and store simdvector of pixels in SOA
142 /// RGBA32_FLOAT to SOA format
143 /// @param src - source data in SOA form
144 /// @param dst - output data in SOA form
145 template<SWR_FORMAT DstFormat>
StoreSOA(const simdvector & src,uint8_t * pDst)146 INLINE void StoreSOA(const simdvector &src, uint8_t *pDst)
147 {
148 // fast path for float32
149 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
150 {
151 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
152 {
153 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
154
155 // Gamma-correct
156 if (FormatTraits<DstFormat>::isSRGB)
157 {
158 if (comp < 3) // Input format is always RGBA32_FLOAT.
159 {
160 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
161 }
162 }
163
164 _simd_store_ps((float*)(pDst + comp*sizeof(simdscalar)), vComp);
165 }
166 return;
167 }
168
169 auto lambda = [&](int comp)
170 {
171 simdscalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
172
173 // Gamma-correct
174 if (FormatTraits<DstFormat>::isSRGB)
175 {
176 if (comp < 3) // Input format is always RGBA32_FLOAT.
177 {
178 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
179 }
180 }
181
182 // clamp
183 vComp = Clamp<DstFormat>(vComp, comp);
184
185 // normalize
186 vComp = Normalize<DstFormat>(vComp, comp);
187
188 // pack
189 vComp = FormatTraits<DstFormat>::pack(comp, vComp);
190
191 // store
192 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
193
194 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD_WIDTH) / 8;
195 };
196
197 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
198 }
199
200 #if ENABLE_AVX512_SIMD16
201 //////////////////////////////////////////////////////////////////////////
202 /// @brief Load SIMD packed pixels in SOA format and converts to
203 /// SOA RGBA32_FLOAT format.
204 /// @param pSrc - source data in SOA form
205 /// @param dst - output data in SOA form
206 template<SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,simd16vector & dst)207 INLINE void SIMDCALL LoadSOA(const uint8_t *pSrc, simd16vector &dst)
208 {
209 // fast path for float32
210 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<SrcFormat>::GetBPC(0) == 32))
211 {
212 auto lambda = [&](int comp)
213 {
214 simd16scalar vComp = _simd16_load_ps(reinterpret_cast<const float *>(pSrc + comp * sizeof(simd16scalar)));
215
216 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
217 };
218
219 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
220 return;
221 }
222
223 auto lambda = [&](int comp)
224 {
225 // load SIMD components
226 simd16scalar vComp = FormatTraits<SrcFormat>::loadSOA_16(comp, pSrc);
227
228 // unpack
229 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
230
231 // convert
232 if (FormatTraits<SrcFormat>::isNormalized(comp))
233 {
234 vComp = _simd16_cvtepi32_ps(_simd16_castps_si(vComp));
235 vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
236 }
237
238 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
239
240 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8;
241 };
242
243 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
244 }
245
246 //////////////////////////////////////////////////////////////////////////
247 /// @brief Clamps the given component based on the requirements on the
248 /// Format template arg
249 /// @param vComp - SIMD vector of floats
250 /// @param Component - component
251 template<SWR_FORMAT Format>
Clamp(simd16scalar const & v,uint32_t Component)252 INLINE simd16scalar SIMDCALL Clamp(simd16scalar const &v, uint32_t Component)
253 {
254 simd16scalar vComp = v;
255 if (FormatTraits<Format>::isNormalized(Component))
256 {
257 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
258 {
259 vComp = _simd16_max_ps(vComp, _simd16_setzero_ps());
260 }
261
262 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
263 {
264 vComp = _simd16_max_ps(vComp, _simd16_set1_ps(-1.0f));
265 }
266 vComp = _simd16_min_ps(vComp, _simd16_set1_ps(1.0f));
267 }
268 else if (FormatTraits<Format>::GetBPC(Component) < 32)
269 {
270 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
271 {
272 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
273 int iMin = 0;
274 simd16scalari vCompi = _simd16_castps_si(vComp);
275 vCompi = _simd16_max_epu32(vCompi, _simd16_set1_epi32(iMin));
276 vCompi = _simd16_min_epu32(vCompi, _simd16_set1_epi32(iMax));
277 vComp = _simd16_castsi_ps(vCompi);
278 }
279 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
280 {
281 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
282 int iMin = -1 - iMax;
283 simd16scalari vCompi = _simd16_castps_si(vComp);
284 vCompi = _simd16_max_epi32(vCompi, _simd16_set1_epi32(iMin));
285 vCompi = _simd16_min_epi32(vCompi, _simd16_set1_epi32(iMax));
286 vComp = _simd16_castsi_ps(vCompi);
287 }
288 }
289
290 return vComp;
291 }
292
293 //////////////////////////////////////////////////////////////////////////
294 /// @brief Normalize the given component based on the requirements on the
295 /// Format template arg
296 /// @param vComp - SIMD vector of floats
297 /// @param Component - component
298 template<SWR_FORMAT Format>
Normalize(simd16scalar const & vComp,uint32_t Component)299 INLINE simd16scalar SIMDCALL Normalize(simd16scalar const &vComp, uint32_t Component)
300 {
301 simd16scalar r = vComp;
302 if (FormatTraits<Format>::isNormalized(Component))
303 {
304 r = _simd16_mul_ps(r, _simd16_set1_ps(FormatTraits<Format>::fromFloat(Component)));
305 r = _simd16_castsi_ps(_simd16_cvtps_epi32(r));
306 }
307 return r;
308 }
309
310 //////////////////////////////////////////////////////////////////////////
311 /// @brief Convert and store simdvector of pixels in SOA
312 /// RGBA32_FLOAT to SOA format
313 /// @param src - source data in SOA form
314 /// @param dst - output data in SOA form
315 template<SWR_FORMAT DstFormat>
StoreSOA(const simd16vector & src,uint8_t * pDst)316 INLINE void SIMDCALL StoreSOA(const simd16vector &src, uint8_t *pDst)
317 {
318 // fast path for float32
319 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) && (FormatTraits<DstFormat>::GetBPC(0) == 32))
320 {
321 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
322 {
323 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
324
325 // Gamma-correct
326 if (FormatTraits<DstFormat>::isSRGB)
327 {
328 if (comp < 3) // Input format is always RGBA32_FLOAT.
329 {
330 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
331 }
332 }
333
334 _simd16_store_ps(reinterpret_cast<float *>(pDst + comp * sizeof(simd16scalar)), vComp);
335 }
336 return;
337 }
338
339 auto lambda = [&](int comp)
340 {
341 simd16scalar vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
342
343 // Gamma-correct
344 if (FormatTraits<DstFormat>::isSRGB)
345 {
346 if (comp < 3) // Input format is always RGBA32_FLOAT.
347 {
348 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
349 }
350 }
351
352 // clamp
353 vComp = Clamp<DstFormat>(vComp, comp);
354
355 // normalize
356 vComp = Normalize<DstFormat>(vComp, comp);
357
358 // pack
359 vComp = FormatTraits<DstFormat>::pack(comp, vComp);
360
361 // store
362 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
363
364 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * KNOB_SIMD16_WIDTH) / 8;
365 };
366
367 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
368 }
369
370 #endif
371