• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __TMMINTRIN_H
25 #define __TMMINTRIN_H
26 
27 #ifndef __SSSE3__
28 #error "SSSE3 instruction set not enabled"
29 #else
30 
31 #include <pmmintrin.h>
32 
33 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi8(__m64 a)34 _mm_abs_pi8(__m64 a)
35 {
36     return (__m64)__builtin_ia32_pabsb((__v8qi)a);
37 }
38 
39 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi8(__m128i a)40 _mm_abs_epi8(__m128i a)
41 {
42     return (__m128i)__builtin_ia32_pabsb128((__v16qi)a);
43 }
44 
45 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi16(__m64 a)46 _mm_abs_pi16(__m64 a)
47 {
48     return (__m64)__builtin_ia32_pabsw((__v4hi)a);
49 }
50 
51 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi16(__m128i a)52 _mm_abs_epi16(__m128i a)
53 {
54     return (__m128i)__builtin_ia32_pabsw128((__v8hi)a);
55 }
56 
57 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_abs_pi32(__m64 a)58 _mm_abs_pi32(__m64 a)
59 {
60     return (__m64)__builtin_ia32_pabsd((__v2si)a);
61 }
62 
63 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_abs_epi32(__m128i a)64 _mm_abs_epi32(__m128i a)
65 {
66     return (__m128i)__builtin_ia32_pabsd128((__v4si)a);
67 }
68 
69 #define _mm_alignr_epi8(a, b, n) (__builtin_ia32_palignr128((a), (b), (n)))
70 #define _mm_alignr_pi8(a, b, n) (__builtin_ia32_palignr((a), (b), (n)))
71 
72 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadd_epi16(__m128i a,__m128i b)73 _mm_hadd_epi16(__m128i a, __m128i b)
74 {
75     return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b);
76 }
77 
78 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadd_epi32(__m128i a,__m128i b)79 _mm_hadd_epi32(__m128i a, __m128i b)
80 {
81     return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b);
82 }
83 
84 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadd_pi16(__m64 a,__m64 b)85 _mm_hadd_pi16(__m64 a, __m64 b)
86 {
87     return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b);
88 }
89 
90 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadd_pi32(__m64 a,__m64 b)91 _mm_hadd_pi32(__m64 a, __m64 b)
92 {
93     return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b);
94 }
95 
96 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hadds_epi16(__m128i a,__m128i b)97 _mm_hadds_epi16(__m128i a, __m128i b)
98 {
99     return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b);
100 }
101 
102 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hadds_pi16(__m64 a,__m64 b)103 _mm_hadds_pi16(__m64 a, __m64 b)
104 {
105     return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b);
106 }
107 
108 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsub_epi16(__m128i a,__m128i b)109 _mm_hsub_epi16(__m128i a, __m128i b)
110 {
111     return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b);
112 }
113 
114 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsub_epi32(__m128i a,__m128i b)115 _mm_hsub_epi32(__m128i a, __m128i b)
116 {
117     return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b);
118 }
119 
120 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsub_pi16(__m64 a,__m64 b)121 _mm_hsub_pi16(__m64 a, __m64 b)
122 {
123     return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b);
124 }
125 
126 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsub_pi32(__m64 a,__m64 b)127 _mm_hsub_pi32(__m64 a, __m64 b)
128 {
129     return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b);
130 }
131 
132 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_hsubs_epi16(__m128i a,__m128i b)133 _mm_hsubs_epi16(__m128i a, __m128i b)
134 {
135     return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b);
136 }
137 
138 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_hsubs_pi16(__m64 a,__m64 b)139 _mm_hsubs_pi16(__m64 a, __m64 b)
140 {
141     return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b);
142 }
143 
144 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_maddubs_epi16(__m128i a,__m128i b)145 _mm_maddubs_epi16(__m128i a, __m128i b)
146 {
147     return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b);
148 }
149 
150 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_maddubs_pi16(__m64 a,__m64 b)151 _mm_maddubs_pi16(__m64 a, __m64 b)
152 {
153     return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b);
154 }
155 
156 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_mulhrs_epi16(__m128i a,__m128i b)157 _mm_mulhrs_epi16(__m128i a, __m128i b)
158 {
159     return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b);
160 }
161 
162 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_mulhrs_pi16(__m64 a,__m64 b)163 _mm_mulhrs_pi16(__m64 a, __m64 b)
164 {
165     return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b);
166 }
167 
168 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_shuffle_epi8(__m128i a,__m128i b)169 _mm_shuffle_epi8(__m128i a, __m128i b)
170 {
171     return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b);
172 }
173 
174 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_shuffle_pi8(__m64 a,__m64 b)175 _mm_shuffle_pi8(__m64 a, __m64 b)
176 {
177     return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b);
178 }
179 
180 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi8(__m128i a,__m128i b)181 _mm_sign_epi8(__m128i a, __m128i b)
182 {
183     return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b);
184 }
185 
186 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi16(__m128i a,__m128i b)187 _mm_sign_epi16(__m128i a, __m128i b)
188 {
189     return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b);
190 }
191 
192 static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sign_epi32(__m128i a,__m128i b)193 _mm_sign_epi32(__m128i a, __m128i b)
194 {
195     return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b);
196 }
197 
198 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi8(__m64 a,__m64 b)199 _mm_sign_pi8(__m64 a, __m64 b)
200 {
201     return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b);
202 }
203 
204 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi16(__m64 a,__m64 b)205 _mm_sign_pi16(__m64 a, __m64 b)
206 {
207     return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b);
208 }
209 
210 static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sign_pi32(__m64 a,__m64 b)211 _mm_sign_pi32(__m64 a, __m64 b)
212 {
213     return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b);
214 }
215 
216 #endif /* __SSSE3__ */
217 
218 #endif /* __TMMINTRIN_H */
219