1 /*
2 * Copyright 2014 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #ifndef SkMath_opts_SSE2_DEFINED
9 #define SkMath_opts_SSE2_DEFINED
10
11 #include <emmintrin.h>
12
13 // Because no _mm_div_epi32() in SSE2, we use float division to emulate.
14 // When using this function, make sure a and b don't exceed float's precision.
shim_mm_div_epi32(const __m128i & a,const __m128i & b)15 static inline __m128i shim_mm_div_epi32(const __m128i& a, const __m128i& b) {
16 __m128 x = _mm_cvtepi32_ps(a);
17 __m128 y = _mm_cvtepi32_ps(b);
18 return _mm_cvttps_epi32(_mm_div_ps(x, y));
19 }
20
21 // Portable version of SkSqrtBits is in SkMath.cpp.
SkSqrtBits_SSE2(const __m128i & x,int count)22 static inline __m128i SkSqrtBits_SSE2(const __m128i& x, int count) {
23 __m128i root = _mm_setzero_si128();
24 __m128i remHi = _mm_setzero_si128();
25 __m128i remLo = x;
26 __m128i one128 = _mm_set1_epi32(1);
27
28 do {
29 root = _mm_slli_epi32(root, 1);
30
31 remHi = _mm_or_si128(_mm_slli_epi32(remHi, 2),
32 _mm_srli_epi32(remLo, 30));
33 remLo = _mm_slli_epi32(remLo, 2);
34
35 __m128i testDiv = _mm_slli_epi32(root, 1);
36 testDiv = _mm_add_epi32(testDiv, _mm_set1_epi32(1));
37
38 __m128i cmp = _mm_cmplt_epi32(remHi, testDiv);
39 __m128i remHi1 = _mm_and_si128(cmp, remHi);
40 __m128i root1 = _mm_and_si128(cmp, root);
41 __m128i remHi2 = _mm_andnot_si128(cmp, _mm_sub_epi32(remHi, testDiv));
42 __m128i root2 = _mm_andnot_si128(cmp, _mm_add_epi32(root, one128));
43
44 remHi = _mm_or_si128(remHi1, remHi2);
45 root = _mm_or_si128(root1, root2);
46 } while (--count >= 0);
47
48 return root;
49 }
50
51 #endif // SkMath_opts_SSE2_DEFINED
52