1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file format_conversion.h
24 *
25 * @brief API implementation
26 *
27 ******************************************************************************/
28 #include "format_types.h"
29 #include "format_traits.h"
30
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Load SIMD packed pixels in SOA format and converts to
33 /// SOA RGBA32_FLOAT format.
34 /// @param pSrc - source data in SOA form
35 /// @param dst - output data in SOA form
36 template <typename SIMD_T, SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,Vec4<SIMD_T> & dst)37 INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, Vec4<SIMD_T>& dst)
38 {
39 // fast path for float32
40 if ((FormatTraits<SrcFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
41 (FormatTraits<SrcFormat>::GetBPC(0) == 32))
42 {
43 auto lambda = [&](int comp)
44 {
45 Float<SIMD_T> vComp =
46 SIMD_T::load_ps(reinterpret_cast<const float*>(pSrc + comp * sizeof(Float<SIMD_T>)));
47
48 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
49 };
50
51 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
52 return;
53 }
54
55 auto lambda = [&](int comp)
56 {
57 // load SIMD components
58 Float<SIMD_T> vComp;
59 FormatTraits<SrcFormat>::loadSOA(comp, pSrc, vComp);
60
61 // unpack
62 vComp = FormatTraits<SrcFormat>::unpack(comp, vComp);
63
64 // convert
65 if (FormatTraits<SrcFormat>::isNormalized(comp))
66 {
67 vComp = SIMD_T::cvtepi32_ps(SIMD_T::castps_si(vComp));
68 vComp = SIMD_T::mul_ps(vComp, SIMD_T::set1_ps(FormatTraits<SrcFormat>::toFloat(comp)));
69 }
70
71 dst.v[FormatTraits<SrcFormat>::swizzle(comp)] = vComp;
72
73 // is there a better way to get this from the SIMD traits?
74 const uint32_t SIMD_WIDTH = sizeof(typename SIMD_T::Float) / sizeof(float);
75
76 pSrc += (FormatTraits<SrcFormat>::GetBPC(comp) * SIMD_WIDTH) / 8;
77 };
78
79 UnrollerL<0, FormatTraits<SrcFormat>::numComps, 1>::step(lambda);
80 }
81
82 template <SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,simdvector & dst)83 INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simdvector& dst)
84 {
85 LoadSOA<SIMD256, SrcFormat>(pSrc, dst);
86 }
87
88 template <SWR_FORMAT SrcFormat>
LoadSOA(const uint8_t * pSrc,simd16vector & dst)89 INLINE void SIMDCALL LoadSOA(const uint8_t* pSrc, simd16vector& dst)
90 {
91 LoadSOA<SIMD512, SrcFormat>(pSrc, dst);
92 }
93
94 //////////////////////////////////////////////////////////////////////////
95 /// @brief Clamps the given component based on the requirements on the
96 /// Format template arg
97 /// @param vComp - SIMD vector of floats
98 /// @param Component - component
99 template <typename SIMD_T, SWR_FORMAT Format>
Clamp(Float<SIMD_T> const & v,uint32_t Component)100 INLINE Float<SIMD_T> SIMDCALL Clamp(Float<SIMD_T> const& v, uint32_t Component)
101 {
102 Float<SIMD_T> vComp = v;
103 if (Component >= 4 || Component < 0)
104 {
105 // Component shouldn't out of <0;3> range
106 assert(false);
107 return vComp;
108 }
109 if (FormatTraits<Format>::isNormalized(Component))
110 {
111 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UNORM)
112 {
113 vComp = SIMD_T::max_ps(vComp, SIMD_T::setzero_ps());
114 }
115
116 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SNORM)
117 {
118 vComp = SIMD_T::max_ps(vComp, SIMD_T::set1_ps(-1.0f));
119 }
120 vComp = SIMD_T::min_ps(vComp, SIMD_T::set1_ps(1.0f));
121 }
122 else if (FormatTraits<Format>::GetBPC(Component) < 32)
123 {
124 if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_UINT)
125 {
126 int iMax = (1 << FormatTraits<Format>::GetBPC(Component)) - 1;
127 int iMin = 0;
128 Integer<SIMD_T> vCompi = SIMD_T::castps_si(vComp);
129 vCompi = SIMD_T::max_epu32(vCompi, SIMD_T::set1_epi32(iMin));
130 vCompi = SIMD_T::min_epu32(vCompi, SIMD_T::set1_epi32(iMax));
131 vComp = SIMD_T::castsi_ps(vCompi);
132 }
133 else if (FormatTraits<Format>::GetType(Component) == SWR_TYPE_SINT)
134 {
135 int iMax = (1 << (FormatTraits<Format>::GetBPC(Component) - 1)) - 1;
136 int iMin = -1 - iMax;
137 Integer<SIMD_T> vCompi = SIMD_T::castps_si(vComp);
138 vCompi = SIMD_T::max_epi32(vCompi, SIMD_T::set1_epi32(iMin));
139 vCompi = SIMD_T::min_epi32(vCompi, SIMD_T::set1_epi32(iMax));
140 vComp = SIMD_T::castsi_ps(vCompi);
141 }
142 }
143
144 return vComp;
145 }
146
147 template <SWR_FORMAT Format>
Clamp(simdscalar const & v,uint32_t Component)148 INLINE simdscalar SIMDCALL Clamp(simdscalar const& v, uint32_t Component)
149 {
150 return Clamp<SIMD256, Format>(v, Component);
151 }
152
153 template <SWR_FORMAT Format>
Clamp(simd16scalar const & v,uint32_t Component)154 INLINE simd16scalar SIMDCALL Clamp(simd16scalar const& v, uint32_t Component)
155 {
156 return Clamp<SIMD512, Format>(v, Component);
157 }
158
159 //////////////////////////////////////////////////////////////////////////
160 /// @brief Normalize the given component based on the requirements on the
161 /// Format template arg
162 /// @param vComp - SIMD vector of floats
163 /// @param Component - component
164 template <typename SIMD_T, SWR_FORMAT Format>
Normalize(Float<SIMD_T> const & vComp,uint32_t Component)165 INLINE Float<SIMD_T> SIMDCALL Normalize(Float<SIMD_T> const& vComp, uint32_t Component)
166 {
167 Float<SIMD_T> r = vComp;
168 if (FormatTraits<Format>::isNormalized(Component))
169 {
170 r = SIMD_T::mul_ps(r, SIMD_T::set1_ps(FormatTraits<Format>::fromFloat(Component)));
171 r = SIMD_T::castsi_ps(SIMD_T::cvtps_epi32(r));
172 }
173 return r;
174 }
175
176 template <SWR_FORMAT Format>
Normalize(simdscalar const & vComp,uint32_t Component)177 INLINE simdscalar SIMDCALL Normalize(simdscalar const& vComp, uint32_t Component)
178 {
179 return Normalize<SIMD256, Format>(vComp, Component);
180 }
181
182 template <SWR_FORMAT Format>
Normalize(simd16scalar const & vComp,uint32_t Component)183 INLINE simd16scalar SIMDCALL Normalize(simd16scalar const& vComp, uint32_t Component)
184 {
185 return Normalize<SIMD512, Format>(vComp, Component);
186 }
187
188 //////////////////////////////////////////////////////////////////////////
189 /// @brief Convert and store simdvector of pixels in SOA
190 /// RGBA32_FLOAT to SOA format
191 /// @param src - source data in SOA form
192 /// @param dst - output data in SOA form
193 template <typename SIMD_T, SWR_FORMAT DstFormat>
StoreSOA(const Vec4<SIMD_T> & src,uint8_t * pDst)194 INLINE void SIMDCALL StoreSOA(const Vec4<SIMD_T>& src, uint8_t* pDst)
195 {
196 // fast path for float32
197 if ((FormatTraits<DstFormat>::GetType(0) == SWR_TYPE_FLOAT) &&
198 (FormatTraits<DstFormat>::GetBPC(0) == 32))
199 {
200 for (uint32_t comp = 0; comp < FormatTraits<DstFormat>::numComps; ++comp)
201 {
202 Float<SIMD_T> vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
203
204 // Gamma-correct
205 if (FormatTraits<DstFormat>::isSRGB)
206 {
207 if (comp < 3) // Input format is always RGBA32_FLOAT.
208 {
209 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
210 }
211 }
212
213 SIMD_T::store_ps(reinterpret_cast<float*>(pDst + comp * sizeof(simd16scalar)), vComp);
214 }
215 return;
216 }
217
218 auto lambda = [&](int comp) {
219 Float<SIMD_T> vComp = src.v[FormatTraits<DstFormat>::swizzle(comp)];
220
221 // Gamma-correct
222 if (FormatTraits<DstFormat>::isSRGB)
223 {
224 if (comp < 3) // Input format is always RGBA32_FLOAT.
225 {
226 vComp = FormatTraits<R32G32B32A32_FLOAT>::convertSrgb(comp, vComp);
227 }
228 }
229
230 // clamp
231 vComp = Clamp<SIMD_T, DstFormat>(vComp, comp);
232
233 // normalize
234 vComp = Normalize<SIMD_T, DstFormat>(vComp, comp);
235
236 // pack
237 vComp = FormatTraits<DstFormat>::pack(comp, vComp);
238
239 // store
240 FormatTraits<DstFormat>::storeSOA(comp, pDst, vComp);
241
242 // is there a better way to get this from the SIMD traits?
243 const uint32_t SIMD_WIDTH = sizeof(typename SIMD_T::Float) / sizeof(float);
244
245 pDst += (FormatTraits<DstFormat>::GetBPC(comp) * SIMD_WIDTH) / 8;
246 };
247
248 UnrollerL<0, FormatTraits<DstFormat>::numComps, 1>::step(lambda);
249 }
250
251 template <SWR_FORMAT DstFormat>
StoreSOA(const simdvector & src,uint8_t * pDst)252 INLINE void SIMDCALL StoreSOA(const simdvector& src, uint8_t* pDst)
253 {
254 StoreSOA<SIMD256, DstFormat>(src, pDst);
255 }
256
257 template <SWR_FORMAT DstFormat>
StoreSOA(const simd16vector & src,uint8_t * pDst)258 INLINE void SIMDCALL StoreSOA(const simd16vector& src, uint8_t* pDst)
259 {
260 StoreSOA<SIMD512, DstFormat>(src, pDst);
261 }
262
263