• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mcpu=corei7   | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=X64
4
5; Make sure that we don't crash when legalizing vselect and vsetcc and that
6; we are able to generate vector blend instructions.
7
8define void @simple_widen(<2 x float> %a, <2 x float> %b) {
9; X86-LABEL: simple_widen:
10; X86:       # %bb.0: # %entry
11; X86-NEXT:    movlps %xmm1, (%eax)
12; X86-NEXT:    retl
13;
14; X64-LABEL: simple_widen:
15; X64:       # %bb.0: # %entry
16; X64-NEXT:    movlps %xmm1, (%rax)
17; X64-NEXT:    retq
18entry:
19  %0 = select <2 x i1> undef, <2 x float> %a, <2 x float> %b
20  store <2 x float> %0, <2 x float>* undef
21  ret void
22}
23
24define void @complex_inreg_work(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
25; X86-LABEL: complex_inreg_work:
26; X86:       # %bb.0: # %entry
27; X86-NEXT:    movaps %xmm0, %xmm3
28; X86-NEXT:    cmpordps %xmm2, %xmm2
29; X86-NEXT:    movaps %xmm2, %xmm0
30; X86-NEXT:    blendvps %xmm0, %xmm3, %xmm1
31; X86-NEXT:    movlps %xmm1, (%eax)
32; X86-NEXT:    retl
33;
34; X64-LABEL: complex_inreg_work:
35; X64:       # %bb.0: # %entry
36; X64-NEXT:    movaps %xmm0, %xmm3
37; X64-NEXT:    cmpordps %xmm2, %xmm2
38; X64-NEXT:    movaps %xmm2, %xmm0
39; X64-NEXT:    blendvps %xmm0, %xmm3, %xmm1
40; X64-NEXT:    movlps %xmm1, (%rax)
41; X64-NEXT:    retq
42entry:
43  %0 = fcmp oeq <2 x float> %c, %c
44  %1 = select <2 x i1> %0, <2 x float> %a, <2 x float> %b
45  store <2 x float> %1, <2 x float>* undef
46  ret void
47}
48
49define void @zero_test() {
50; X86-LABEL: zero_test:
51; X86:       # %bb.0: # %entry
52; X86-NEXT:    xorps %xmm0, %xmm0
53; X86-NEXT:    movlps %xmm0, (%eax)
54; X86-NEXT:    retl
55;
56; X64-LABEL: zero_test:
57; X64:       # %bb.0: # %entry
58; X64-NEXT:    xorps %xmm0, %xmm0
59; X64-NEXT:    movlps %xmm0, (%rax)
60; X64-NEXT:    retq
61entry:
62  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
63  store <2 x float> %0, <2 x float>* undef
64  ret void
65}
66
67define void @full_test() {
68; X86-LABEL: full_test:
69; X86:       # %bb.0: # %entry
70; X86-NEXT:    subl $60, %esp
71; X86-NEXT:    .cfi_def_cfa_offset 64
72; X86-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
73; X86-NEXT:    cvttps2dq %xmm2, %xmm0
74; X86-NEXT:    cvtdq2ps %xmm0, %xmm1
75; X86-NEXT:    xorps %xmm0, %xmm0
76; X86-NEXT:    cmpltps %xmm2, %xmm0
77; X86-NEXT:    movaps {{.*#+}} xmm3 = <1.0E+0,1.0E+0,u,u>
78; X86-NEXT:    addps %xmm1, %xmm3
79; X86-NEXT:    movaps %xmm1, %xmm4
80; X86-NEXT:    blendvps %xmm0, %xmm3, %xmm4
81; X86-NEXT:    cmpeqps %xmm2, %xmm1
82; X86-NEXT:    movaps %xmm1, %xmm0
83; X86-NEXT:    blendvps %xmm0, %xmm2, %xmm4
84; X86-NEXT:    movlps %xmm4, {{[0-9]+}}(%esp)
85; X86-NEXT:    movlps %xmm4, {{[0-9]+}}(%esp)
86; X86-NEXT:    addl $60, %esp
87; X86-NEXT:    .cfi_def_cfa_offset 4
88; X86-NEXT:    retl
89;
90; X64-LABEL: full_test:
91; X64:       # %bb.0: # %entry
92; X64-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
93; X64-NEXT:    cvttps2dq %xmm2, %xmm0
94; X64-NEXT:    cvtdq2ps %xmm0, %xmm1
95; X64-NEXT:    xorps %xmm0, %xmm0
96; X64-NEXT:    cmpltps %xmm2, %xmm0
97; X64-NEXT:    movaps {{.*#+}} xmm3 = <1.0E+0,1.0E+0,u,u>
98; X64-NEXT:    addps %xmm1, %xmm3
99; X64-NEXT:    movaps %xmm1, %xmm4
100; X64-NEXT:    blendvps %xmm0, %xmm3, %xmm4
101; X64-NEXT:    cmpeqps %xmm2, %xmm1
102; X64-NEXT:    movaps %xmm1, %xmm0
103; X64-NEXT:    blendvps %xmm0, %xmm2, %xmm4
104; X64-NEXT:    movlps %xmm4, -{{[0-9]+}}(%rsp)
105; X64-NEXT:    movlps %xmm4, -{{[0-9]+}}(%rsp)
106; X64-NEXT:    retq
107 entry:
108   %Cy300 = alloca <4 x float>
109   %Cy11a = alloca <2 x float>
110   %Cy118 = alloca <2 x float>
111   %Cy119 = alloca <2 x float>
112   br label %B1
113
114 B1:                                               ; preds = %entry
115   %0 = load <2 x float>, <2 x float>* %Cy119
116   %1 = fptosi <2 x float> %0 to <2 x i32>
117   %2 = sitofp <2 x i32> %1 to <2 x float>
118   %3 = fcmp ogt <2 x float> %0, zeroinitializer
119   %4 = fadd <2 x float> %2, <float 1.000000e+00, float 1.000000e+00>
120   %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2
121   %6 = fcmp oeq <2 x float> %2, %0
122   %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
123   store <2 x float> %7, <2 x float>* %Cy118
124   %8 = load <2 x float>, <2 x float>* %Cy118
125   store <2 x float> %8, <2 x float>* %Cy11a
126   ret void
127}
128