• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse3-builtins.c
6
7define <2 x double> @test_mm_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
8; X32-LABEL: test_mm_addsub_pd:
9; X32:       # BB#0:
10; X32-NEXT:    addsubpd %xmm1, %xmm0
11; X32-NEXT:    retl
12;
13; X64-LABEL: test_mm_addsub_pd:
14; X64:       # BB#0:
15; X64-NEXT:    addsubpd %xmm1, %xmm0
16; X64-NEXT:    retq
17  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
18  ret <2 x double> %res
19}
20declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
21
22define <4 x float> @test_mm_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
23; X32-LABEL: test_mm_addsub_ps:
24; X32:       # BB#0:
25; X32-NEXT:    addsubps %xmm1, %xmm0
26; X32-NEXT:    retl
27;
28; X64-LABEL: test_mm_addsub_ps:
29; X64:       # BB#0:
30; X64-NEXT:    addsubps %xmm1, %xmm0
31; X64-NEXT:    retq
32  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
33  ret <4 x float> %res
34}
35declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
36
37define <2 x double> @test_mm_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
38; X32-LABEL: test_mm_hadd_pd:
39; X32:       # BB#0:
40; X32-NEXT:    haddpd %xmm1, %xmm0
41; X32-NEXT:    retl
42;
43; X64-LABEL: test_mm_hadd_pd:
44; X64:       # BB#0:
45; X64-NEXT:    haddpd %xmm1, %xmm0
46; X64-NEXT:    retq
47  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
48  ret <2 x double> %res
49}
50declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
51
52define <4 x float> @test_mm_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
53; X32-LABEL: test_mm_hadd_ps:
54; X32:       # BB#0:
55; X32-NEXT:    haddps %xmm1, %xmm0
56; X32-NEXT:    retl
57;
58; X64-LABEL: test_mm_hadd_ps:
59; X64:       # BB#0:
60; X64-NEXT:    haddps %xmm1, %xmm0
61; X64-NEXT:    retq
62  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
63  ret <4 x float> %res
64}
65declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
66
67define <2 x double> @test_mm_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
68; X32-LABEL: test_mm_hsub_pd:
69; X32:       # BB#0:
70; X32-NEXT:    hsubpd %xmm1, %xmm0
71; X32-NEXT:    retl
72;
73; X64-LABEL: test_mm_hsub_pd:
74; X64:       # BB#0:
75; X64-NEXT:    hsubpd %xmm1, %xmm0
76; X64-NEXT:    retq
77  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
78  ret <2 x double> %res
79}
80declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
81
82define <4 x float> @test_mm_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
83; X32-LABEL: test_mm_hsub_ps:
84; X32:       # BB#0:
85; X32-NEXT:    hsubps %xmm1, %xmm0
86; X32-NEXT:    retl
87;
88; X64-LABEL: test_mm_hsub_ps:
89; X64:       # BB#0:
90; X64-NEXT:    hsubps %xmm1, %xmm0
91; X64-NEXT:    retq
92  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
93  ret <4 x float> %res
94}
95declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
96
97define <2 x i64> @test_mm_lddqu_si128(<2 x i64>* %a0) {
98; X32-LABEL: test_mm_lddqu_si128:
99; X32:       # BB#0:
100; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
101; X32-NEXT:    lddqu (%eax), %xmm0
102; X32-NEXT:    retl
103;
104; X64-LABEL: test_mm_lddqu_si128:
105; X64:       # BB#0:
106; X64-NEXT:    lddqu (%rdi), %xmm0
107; X64-NEXT:    retq
108  %bc = bitcast <2 x i64>* %a0 to i8*
109  %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %bc)
110  %res = bitcast <16 x i8> %call to <2 x i64>
111  ret <2 x i64> %res
112}
113declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
114
115define <2 x double> @test_mm_loaddup_pd(double* %a0) {
116; X32-LABEL: test_mm_loaddup_pd:
117; X32:       # BB#0:
118; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
119; X32-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
120; X32-NEXT:    retl
121;
122; X64-LABEL: test_mm_loaddup_pd:
123; X64:       # BB#0:
124; X64-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
125; X64-NEXT:    retq
126  %ld = load double, double* %a0
127  %res0 = insertelement <2 x double> undef, double %ld, i32 0
128  %res1 = insertelement <2 x double> %res0, double %ld, i32 1
129  ret <2 x double> %res1
130}
131
132define <2 x double> @test_mm_movedup_pd(<2 x double> %a0) {
133; X32-LABEL: test_mm_movedup_pd:
134; X32:       # BB#0:
135; X32-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
136; X32-NEXT:    retl
137;
138; X64-LABEL: test_mm_movedup_pd:
139; X64:       # BB#0:
140; X64-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
141; X64-NEXT:    retq
142  %res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> zeroinitializer
143  ret <2 x double> %res
144}
145
146define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
147; X32-LABEL: test_mm_movehdup_ps:
148; X32:       # BB#0:
149; X32-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
150; X32-NEXT:    retl
151;
152; X64-LABEL: test_mm_movehdup_ps:
153; X64:       # BB#0:
154; X64-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
155; X64-NEXT:    retq
156  %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
157  ret <4 x float> %res
158}
159
160define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
161; X32-LABEL: test_mm_moveldup_ps:
162; X32:       # BB#0:
163; X32-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
164; X32-NEXT:    retl
165;
166; X64-LABEL: test_mm_moveldup_ps:
167; X64:       # BB#0:
168; X64-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
169; X64-NEXT:    retq
170  %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
171  ret <4 x float> %res
172}
173