• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X32
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse3-builtins.c
6
7define <2 x double> @test_mm_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
8; X32-LABEL: test_mm_addsub_pd:
9; X32:       # BB#0:
10; X32-NEXT:    addsubpd %xmm1, %xmm0
11; X32-NEXT:    retl
12;
13; X64-LABEL: test_mm_addsub_pd:
14; X64:       # BB#0:
15; X64-NEXT:    addsubpd %xmm1, %xmm0
16; X64-NEXT:    retq
17  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
18  ret <2 x double> %res
19}
20declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
21
22define <4 x float> @test_mm_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
23; X32-LABEL: test_mm_addsub_ps:
24; X32:       # BB#0:
25; X32-NEXT:    addsubps %xmm1, %xmm0
26; X32-NEXT:    retl
27;
28; X64-LABEL: test_mm_addsub_ps:
29; X64:       # BB#0:
30; X64-NEXT:    addsubps %xmm1, %xmm0
31; X64-NEXT:    retq
32  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
33  ret <4 x float> %res
34}
35declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
36
37define <2 x double> @test_mm_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
38; X32-LABEL: test_mm_hadd_pd:
39; X32:       # BB#0:
40; X32-NEXT:    haddpd %xmm1, %xmm0
41; X32-NEXT:    retl
42;
43; X64-LABEL: test_mm_hadd_pd:
44; X64:       # BB#0:
45; X64-NEXT:    haddpd %xmm1, %xmm0
46; X64-NEXT:    retq
47  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
48  ret <2 x double> %res
49}
50declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
51
52define <4 x float> @test_mm_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
53; X32-LABEL: test_mm_hadd_ps:
54; X32:       # BB#0:
55; X32-NEXT:    haddps %xmm1, %xmm0
56; X32-NEXT:    retl
57;
58; X64-LABEL: test_mm_hadd_ps:
59; X64:       # BB#0:
60; X64-NEXT:    haddps %xmm1, %xmm0
61; X64-NEXT:    retq
62  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
63  ret <4 x float> %res
64}
65declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
66
67define <2 x double> @test_mm_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
68; X32-LABEL: test_mm_hsub_pd:
69; X32:       # BB#0:
70; X32-NEXT:    hsubpd %xmm1, %xmm0
71; X32-NEXT:    retl
72;
73; X64-LABEL: test_mm_hsub_pd:
74; X64:       # BB#0:
75; X64-NEXT:    hsubpd %xmm1, %xmm0
76; X64-NEXT:    retq
77  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
78  ret <2 x double> %res
79}
80declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
81
82define <4 x float> @test_mm_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
83; X32-LABEL: test_mm_hsub_ps:
84; X32:       # BB#0:
85; X32-NEXT:    hsubps %xmm1, %xmm0
86; X32-NEXT:    retl
87;
88; X64-LABEL: test_mm_hsub_ps:
89; X64:       # BB#0:
90; X64-NEXT:    hsubps %xmm1, %xmm0
91; X64-NEXT:    retq
92  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
93  ret <4 x float> %res
94}
95declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
96
97define <2 x i64> @test_mm_lddqu_si128(i8* %a0) {
98; X32-LABEL: test_mm_lddqu_si128:
99; X32:       # BB#0:
100; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
101; X32-NEXT:    lddqu (%eax), %xmm0
102; X32-NEXT:    retl
103;
104; X64-LABEL: test_mm_lddqu_si128:
105; X64:       # BB#0:
106; X64-NEXT:    lddqu (%rdi), %xmm0
107; X64-NEXT:    retq
108  %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
109  %res = bitcast <16 x i8> %call to <2 x i64>
110  ret <2 x i64> %res
111}
112declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
113
114define <2 x double> @test_mm_loaddup_pd(double* %a0) {
115; X32-LABEL: test_mm_loaddup_pd:
116; X32:       # BB#0:
117; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
118; X32-NEXT:    movddup (%eax), %xmm0
119; X32-NEXT:    retl
120;
121; X64-LABEL: test_mm_loaddup_pd:
122; X64:       # BB#0:
123; X64-NEXT:    movddup (%rdi), %xmm0
124; X64-NEXT:    retq
125  %ld = load double, double* %a0
126  %res0 = insertelement <2 x double> undef, double %ld, i32 0
127  %res1 = insertelement <2 x double> %res0, double %ld, i32 1
128  ret <2 x double> %res1
129}
130
131define <2 x double> @test_mm_movedup_pd(<2 x double> %a0) {
132; X32-LABEL: test_mm_movedup_pd:
133; X32:       # BB#0:
134; X32-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
135; X32-NEXT:    retl
136;
137; X64-LABEL: test_mm_movedup_pd:
138; X64:       # BB#0:
139; X64-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
140; X64-NEXT:    retq
141  %res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> zeroinitializer
142  ret <2 x double> %res
143}
144
145define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
146; X32-LABEL: test_mm_movehdup_ps:
147; X32:       # BB#0:
148; X32-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
149; X32-NEXT:    retl
150;
151; X64-LABEL: test_mm_movehdup_ps:
152; X64:       # BB#0:
153; X64-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
154; X64-NEXT:    retq
155  %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
156  ret <4 x float> %res
157}
158
159define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
160; X32-LABEL: test_mm_moveldup_ps:
161; X32:       # BB#0:
162; X32-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
163; X32-NEXT:    retl
164;
165; X64-LABEL: test_mm_moveldup_ps:
166; X64:       # BB#0:
167; X64-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
168; X64-NEXT:    retq
169  %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
170  ret <4 x float> %res
171}
172