• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X32
2; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X64
3
4; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c
5
6define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
7; X32-LABEL: test_mm_abs_epi8:
8; X32:       # BB#0:
9; X32-NEXT:    pabsb %xmm0, %xmm0
10; X32-NEXT:    retl
11;
12; X64-LABEL: test_mm_abs_epi8:
13; X64:       # BB#0:
14; X64-NEXT:    pabsb %xmm0, %xmm0
15; X64-NEXT:    retq
16  %arg = bitcast <2 x i64> %a0 to <16 x i8>
17  %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg)
18  %res = bitcast <16 x i8> %call to <2 x i64>
19  ret <2 x i64> %res
20}
21declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
22
23define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
24; X32-LABEL: test_mm_abs_epi16:
25; X32:       # BB#0:
26; X32-NEXT:    pabsw %xmm0, %xmm0
27; X32-NEXT:    retl
28;
29; X64-LABEL: test_mm_abs_epi16:
30; X64:       # BB#0:
31; X64-NEXT:    pabsw %xmm0, %xmm0
32; X64-NEXT:    retq
33  %arg = bitcast <2 x i64> %a0 to <8 x i16>
34  %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg)
35  %res = bitcast <8 x i16> %call to <2 x i64>
36  ret <2 x i64> %res
37}
38declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
39
40define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
41; X32-LABEL: test_mm_abs_epi32:
42; X32:       # BB#0:
43; X32-NEXT:    pabsd %xmm0, %xmm0
44; X32-NEXT:    retl
45;
46; X64-LABEL: test_mm_abs_epi32:
47; X64:       # BB#0:
48; X64-NEXT:    pabsd %xmm0, %xmm0
49; X64-NEXT:    retq
50  %arg = bitcast <2 x i64> %a0 to <4 x i32>
51  %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg)
52  %res = bitcast <4 x i32> %call to <2 x i64>
53  ret <2 x i64> %res
54}
55declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
56
57define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
58; X32-LABEL: test_mm_alignr_epi8:
59; X32:       # BB#0:
60; X32-NEXT:    palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
61; X32-NEXT:    movdqa %xmm1, %xmm0
62; X32-NEXT:    retl
63;
64; X64-LABEL: test_mm_alignr_epi8:
65; X64:       # BB#0:
66; X64-NEXT:    palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
67; X64-NEXT:    movdqa %xmm1, %xmm0
68; X64-NEXT:    retq
69  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
70  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
71  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
72  %res = bitcast <16 x i8> %shuf to <2 x i64>
73  ret <2 x i64> %res
74}
75
76define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) {
77; X32-LABEL: test_mm_hadd_epi16:
78; X32:       # BB#0:
79; X32-NEXT:    phaddw %xmm1, %xmm0
80; X32-NEXT:    retl
81;
82; X64-LABEL: test_mm_hadd_epi16:
83; X64:       # BB#0:
84; X64-NEXT:    phaddw %xmm1, %xmm0
85; X64-NEXT:    retq
86  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
87  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
88  %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
89  %res = bitcast <8 x i16> %call to <2 x i64>
90  ret <2 x i64> %res
91}
92declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
93
94define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) {
95; X32-LABEL: test_mm_hadd_epi32:
96; X32:       # BB#0:
97; X32-NEXT:    phaddd %xmm1, %xmm0
98; X32-NEXT:    retl
99;
100; X64-LABEL: test_mm_hadd_epi32:
101; X64:       # BB#0:
102; X64-NEXT:    phaddd %xmm1, %xmm0
103; X64-NEXT:    retq
104  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
105  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
106  %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
107  %res = bitcast <4 x i32> %call to <2 x i64>
108  ret <2 x i64> %res
109}
110declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
111
112define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) {
113; X32-LABEL: test_mm_hadds_epi16:
114; X32:       # BB#0:
115; X32-NEXT:    phaddsw %xmm1, %xmm0
116; X32-NEXT:    retl
117;
118; X64-LABEL: test_mm_hadds_epi16:
119; X64:       # BB#0:
120; X64-NEXT:    phaddsw %xmm1, %xmm0
121; X64-NEXT:    retq
122  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
123  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
124  %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
125  %res = bitcast <8 x i16> %call to <2 x i64>
126  ret <2 x i64> %res
127}
128declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
129
130define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) {
131; X32-LABEL: test_mm_hsub_epi16:
132; X32:       # BB#0:
133; X32-NEXT:    phsubw %xmm1, %xmm0
134; X32-NEXT:    retl
135;
136; X64-LABEL: test_mm_hsub_epi16:
137; X64:       # BB#0:
138; X64-NEXT:    phsubw %xmm1, %xmm0
139; X64-NEXT:    retq
140  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
141  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
142  %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
143  %res = bitcast <8 x i16> %call to <2 x i64>
144  ret <2 x i64> %res
145}
146declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
147
148define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) {
149; X32-LABEL: test_mm_hsub_epi32:
150; X32:       # BB#0:
151; X32-NEXT:    phsubd %xmm1, %xmm0
152; X32-NEXT:    retl
153;
154; X64-LABEL: test_mm_hsub_epi32:
155; X64:       # BB#0:
156; X64-NEXT:    phsubd %xmm1, %xmm0
157; X64-NEXT:    retq
158  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
159  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
160  %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
161  %res = bitcast <4 x i32> %call to <2 x i64>
162  ret <2 x i64> %res
163}
164declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
165
166define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
167; X32-LABEL: test_mm_hsubs_epi16:
168; X32:       # BB#0:
169; X32-NEXT:    phsubsw %xmm1, %xmm0
170; X32-NEXT:    retl
171;
172; X64-LABEL: test_mm_hsubs_epi16:
173; X64:       # BB#0:
174; X64-NEXT:    phsubsw %xmm1, %xmm0
175; X64-NEXT:    retq
176  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
177  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
178  %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
179  %res = bitcast <8 x i16> %call to <2 x i64>
180  ret <2 x i64> %res
181}
182declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
183
184define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
185; X32-LABEL: test_mm_maddubs_epi16:
186; X32:       # BB#0:
187; X32-NEXT:    pmaddubsw %xmm1, %xmm0
188; X32-NEXT:    retl
189;
190; X64-LABEL: test_mm_maddubs_epi16:
191; X64:       # BB#0:
192; X64-NEXT:    pmaddubsw %xmm1, %xmm0
193; X64-NEXT:    retq
194  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
195  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
196  %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1)
197  %res = bitcast <8 x i16> %call to <2 x i64>
198  ret <2 x i64> %res
199}
200declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
201
202define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
203; X32-LABEL: test_mm_mulhrs_epi16:
204; X32:       # BB#0:
205; X32-NEXT:    pmulhrsw %xmm1, %xmm0
206; X32-NEXT:    retl
207;
208; X64-LABEL: test_mm_mulhrs_epi16:
209; X64:       # BB#0:
210; X64-NEXT:    pmulhrsw %xmm1, %xmm0
211; X64-NEXT:    retq
212  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
213  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
214  %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
215  %res = bitcast <8 x i16> %call to <2 x i64>
216  ret <2 x i64> %res
217}
218declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
219
220define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) {
221; X32-LABEL: test_mm_shuffle_epi8:
222; X32:       # BB#0:
223; X32-NEXT:    pshufb %xmm1, %xmm0
224; X32-NEXT:    retl
225;
226; X64-LABEL: test_mm_shuffle_epi8:
227; X64:       # BB#0:
228; X64-NEXT:    pshufb %xmm1, %xmm0
229; X64-NEXT:    retq
230  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
231  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
232  %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
233  %res = bitcast <16 x i8> %call to <2 x i64>
234  ret <2 x i64> %res
235}
236declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
237
238define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) {
239; X32-LABEL: test_mm_sign_epi8:
240; X32:       # BB#0:
241; X32-NEXT:    psignb %xmm1, %xmm0
242; X32-NEXT:    retl
243;
244; X64-LABEL: test_mm_sign_epi8:
245; X64:       # BB#0:
246; X64-NEXT:    psignb %xmm1, %xmm0
247; X64-NEXT:    retq
248  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
249  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
250  %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
251  %res = bitcast <16 x i8> %call to <2 x i64>
252  ret <2 x i64> %res
253}
254declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
255
256define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) {
257; X32-LABEL: test_mm_sign_epi16:
258; X32:       # BB#0:
259; X32-NEXT:    psignw %xmm1, %xmm0
260; X32-NEXT:    retl
261;
262; X64-LABEL: test_mm_sign_epi16:
263; X64:       # BB#0:
264; X64-NEXT:    psignw %xmm1, %xmm0
265; X64-NEXT:    retq
266  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
267  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
268  %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
269  %res = bitcast <8 x i16> %call to <2 x i64>
270  ret <2 x i64> %res
271}
272declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
273
274define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) {
275; X32-LABEL: test_mm_sign_epi32:
276; X32:       # BB#0:
277; X32-NEXT:    psignd %xmm1, %xmm0
278; X32-NEXT:    retl
279;
280; X64-LABEL: test_mm_sign_epi32:
281; X64:       # BB#0:
282; X64-NEXT:    psignd %xmm1, %xmm0
283; X64-NEXT:    retq
284  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
285  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
286  %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
287  %res = bitcast <4 x i32> %call to <2 x i64>
288  ret <2 x i64> %res
289}
290declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
291