1/// @ref core 2/// @file glm/detail/func_common_simd.inl 3 4#if GLM_ARCH & GLM_ARCH_SSE2_BIT 5 6#include "../simd/common.h" 7 8#include <immintrin.h> 9 10namespace glm{ 11namespace detail 12{ 13 template <precision P> 14 struct compute_abs_vector<float, P, tvec4, true> 15 { 16 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v) 17 { 18 tvec4<float, P> result(uninitialize); 19 result.data = glm_vec4_abs(v.data); 20 return result; 21 } 22 }; 23 24 template <precision P> 25 struct compute_abs_vector<int, P, tvec4, true> 26 { 27 GLM_FUNC_QUALIFIER static tvec4<int, P> call(tvec4<int, P> const & v) 28 { 29 tvec4<int, P> result(uninitialize); 30 result.data = glm_ivec4_abs(v.data); 31 return result; 32 } 33 }; 34 35 template <precision P> 36 struct compute_floor<float, P, tvec4, true> 37 { 38 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v) 39 { 40 tvec4<float, P> result(uninitialize); 41 result.data = glm_vec4_floor(v.data); 42 return result; 43 } 44 }; 45 46 template <precision P> 47 struct compute_ceil<float, P, tvec4, true> 48 { 49 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v) 50 { 51 tvec4<float, P> result(uninitialize); 52 result.data = glm_vec4_ceil(v.data); 53 return result; 54 } 55 }; 56 57 template <precision P> 58 struct compute_fract<float, P, tvec4, true> 59 { 60 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v) 61 { 62 tvec4<float, P> result(uninitialize); 63 result.data = glm_vec4_fract(v.data); 64 return result; 65 } 66 }; 67 68 template <precision P> 69 struct compute_round<float, P, tvec4, true> 70 { 71 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v) 72 { 73 tvec4<float, P> result(uninitialize); 74 result.data = glm_vec4_round(v.data); 75 return result; 76 } 77 }; 78 79 template <precision P> 80 struct compute_mod<float, P, tvec4, true> 81 { 82 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y) 83 { 84 tvec4<float, P> result(uninitialize); 85 result.data = glm_vec4_mod(x.data, y.data); 86 return result; 87 } 88 }; 89 90 template <precision P> 91 struct compute_min_vector<float, P, tvec4, true> 92 { 93 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2) 94 { 95 tvec4<float, P> result(uninitialize); 96 result.data = _mm_min_ps(v1.data, v2.data); 97 return result; 98 } 99 }; 100 101 template <precision P> 102 struct compute_min_vector<int32, P, tvec4, true> 103 { 104 GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2) 105 { 106 tvec4<int32, P> result(uninitialize); 107 result.data = _mm_min_epi32(v1.data, v2.data); 108 return result; 109 } 110 }; 111 112 template <precision P> 113 struct compute_min_vector<uint32, P, tvec4, true> 114 { 115 GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2) 116 { 117 tvec4<uint32, P> result(uninitialize); 118 result.data = _mm_min_epu32(v1.data, v2.data); 119 return result; 120 } 121 }; 122 123 template <precision P> 124 struct compute_max_vector<float, P, tvec4, true> 125 { 126 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & v1, tvec4<float, P> const & v2) 127 { 128 tvec4<float, P> result(uninitialize); 129 result.data = _mm_max_ps(v1.data, v2.data); 130 return result; 131 } 132 }; 133 134 template <precision P> 135 struct compute_max_vector<int32, P, tvec4, true> 136 { 137 GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & v1, tvec4<int32, P> const & v2) 138 { 139 tvec4<int32, P> result(uninitialize); 140 result.data = _mm_max_epi32(v1.data, v2.data); 141 return result; 142 } 143 }; 144 145 template <precision P> 146 struct compute_max_vector<uint32, P, tvec4, true> 147 { 148 GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & v1, tvec4<uint32, P> const & v2) 149 { 150 tvec4<uint32, P> result(uninitialize); 151 result.data = _mm_max_epu32(v1.data, v2.data); 152 return result; 153 } 154 }; 155 156 template <precision P> 157 struct compute_clamp_vector<float, P, tvec4, true> 158 { 159 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & minVal, tvec4<float, P> const & maxVal) 160 { 161 tvec4<float, P> result(uninitialize); 162 result.data = _mm_min_ps(_mm_max_ps(x.data, minVal.data), maxVal.data); 163 return result; 164 } 165 }; 166 167 template <precision P> 168 struct compute_clamp_vector<int32, P, tvec4, true> 169 { 170 GLM_FUNC_QUALIFIER static tvec4<int32, P> call(tvec4<int32, P> const & x, tvec4<int32, P> const & minVal, tvec4<int32, P> const & maxVal) 171 { 172 tvec4<int32, P> result(uninitialize); 173 result.data = _mm_min_epi32(_mm_max_epi32(x.data, minVal.data), maxVal.data); 174 return result; 175 } 176 }; 177 178 template <precision P> 179 struct compute_clamp_vector<uint32, P, tvec4, true> 180 { 181 GLM_FUNC_QUALIFIER static tvec4<uint32, P> call(tvec4<uint32, P> const & x, tvec4<uint32, P> const & minVal, tvec4<uint32, P> const & maxVal) 182 { 183 tvec4<uint32, P> result(uninitialize); 184 result.data = _mm_min_epu32(_mm_max_epu32(x.data, minVal.data), maxVal.data); 185 return result; 186 } 187 }; 188 189 template <precision P> 190 struct compute_mix_vector<float, bool, P, tvec4, true> 191 { 192 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const & x, tvec4<float, P> const & y, tvec4<bool, P> const & a) 193 { 194 __m128i const Load = _mm_set_epi32(-(int)a.w, -(int)a.z, -(int)a.y, -(int)a.x); 195 __m128 const Mask = _mm_castsi128_ps(Load); 196 197 tvec4<float, P> Result(uninitialize); 198# if 0 && GLM_ARCH & GLM_ARCH_AVX 199 Result.data = _mm_blendv_ps(x.data, y.data, Mask); 200# else 201 Result.data = _mm_or_ps(_mm_and_ps(Mask, y.data), _mm_andnot_ps(Mask, x.data)); 202# endif 203 return Result; 204 } 205 }; 206/* FIXME 207 template <precision P> 208 struct compute_step_vector<float, P, tvec4> 209 { 210 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge, tvec4<float, P> const& x) 211 { 212 tvec4<float, P> result(uninitialize); 213 result.data = glm_vec4_step(edge.data, x.data); 214 return result; 215 } 216 }; 217*/ 218 template <precision P> 219 struct compute_smoothstep_vector<float, P, tvec4, true> 220 { 221 GLM_FUNC_QUALIFIER static tvec4<float, P> call(tvec4<float, P> const& edge0, tvec4<float, P> const& edge1, tvec4<float, P> const& x) 222 { 223 tvec4<float, P> result(uninitialize); 224 result.data = glm_vec4_smoothstep(edge0.data, edge1.data, x.data); 225 return result; 226 } 227 }; 228}//namespace detail 229}//namespace glm 230 231#endif//GLM_ARCH & GLM_ARCH_SSE2_BIT 232