• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
4
5define i1 @fcmp_and_v2f64(<2 x double> %a) {
6; SSE-LABEL: @fcmp_and_v2f64(
7; SSE-NEXT:    [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
8; SSE-NEXT:    [[E2:%.*]] = extractelement <2 x double> [[A]], i32 1
9; SSE-NEXT:    [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01
10; SSE-NEXT:    [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00
11; SSE-NEXT:    [[R:%.*]] = and i1 [[CMP1]], [[CMP2]]
12; SSE-NEXT:    ret i1 [[R]]
13;
14; AVX-LABEL: @fcmp_and_v2f64(
15; AVX-NEXT:    [[TMP1:%.*]] = fcmp olt <2 x double> [[A:%.*]], <double 4.200000e+01, double -8.000000e+00>
16; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> undef, <2 x i32> <i32 1, i32 undef>
17; AVX-NEXT:    [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]]
18; AVX-NEXT:    [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
19; AVX-NEXT:    ret i1 [[R]]
20;
21  %e1 = extractelement <2 x double> %a, i32 0
22  %e2 = extractelement <2 x double> %a, i32 1
23  %cmp1 = fcmp olt double %e1, 42.0
24  %cmp2 = fcmp olt double %e2, -8.0
25  %r = and i1 %cmp1, %cmp2
26  ret i1 %r
27}
28
29define i1 @fcmp_or_v4f64(<4 x double> %a) {
30; SSE-LABEL: @fcmp_or_v4f64(
31; SSE-NEXT:    [[E1:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0
32; SSE-NEXT:    [[E2:%.*]] = extractelement <4 x double> [[A]], i64 2
33; SSE-NEXT:    [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01
34; SSE-NEXT:    [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00
35; SSE-NEXT:    [[R:%.*]] = or i1 [[CMP1]], [[CMP2]]
36; SSE-NEXT:    ret i1 [[R]]
37;
38; AVX-LABEL: @fcmp_or_v4f64(
39; AVX-NEXT:    [[TMP1:%.*]] = fcmp olt <4 x double> [[A:%.*]], <double 4.200000e+01, double undef, double -8.000000e+00, double undef>
40; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
41; AVX-NEXT:    [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[SHIFT]]
42; AVX-NEXT:    [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
43; AVX-NEXT:    ret i1 [[R]]
44;
45  %e1 = extractelement <4 x double> %a, i32 0
46  %e2 = extractelement <4 x double> %a, i64 2
47  %cmp1 = fcmp olt double %e1, 42.0
48  %cmp2 = fcmp olt double %e2, -8.0
49  %r = or i1 %cmp1, %cmp2
50  ret i1 %r
51}
52
53define i1 @icmp_xor_v4i32(<4 x i32> %a) {
54; CHECK-LABEL: @icmp_xor_v4i32(
55; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 undef, i32 -8, i32 undef, i32 42>
56; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
57; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
58; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
59; CHECK-NEXT:    ret i1 [[R]]
60;
61  %e1 = extractelement <4 x i32> %a, i32 3
62  %e2 = extractelement <4 x i32> %a, i32 1
63  %cmp1 = icmp sgt i32 %e1, 42
64  %cmp2 = icmp sgt i32 %e2, -8
65  %r = xor i1 %cmp1, %cmp2
66  ret i1 %r
67}
68
69; add is not canonical (should be xor), but that is ok.
70
71define i1 @icmp_add_v8i32(<8 x i32> %a) {
72; SSE-LABEL: @icmp_add_v8i32(
73; SSE-NEXT:    [[E1:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 7
74; SSE-NEXT:    [[E2:%.*]] = extractelement <8 x i32> [[A]], i32 2
75; SSE-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[E1]], 42
76; SSE-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[E2]], -8
77; SSE-NEXT:    [[R:%.*]] = add i1 [[CMP1]], [[CMP2]]
78; SSE-NEXT:    ret i1 [[R]]
79;
80; AVX-LABEL: @icmp_add_v8i32(
81; AVX-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 undef, i32 undef, i32 -8, i32 undef, i32 undef, i32 undef, i32 undef, i32 42>
82; AVX-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
83; AVX-NEXT:    [[TMP2:%.*]] = add <8 x i1> [[TMP1]], [[SHIFT]]
84; AVX-NEXT:    [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2
85; AVX-NEXT:    ret i1 [[R]]
86;
87  %e1 = extractelement <8 x i32> %a, i32 7
88  %e2 = extractelement <8 x i32> %a, i32 2
89  %cmp1 = icmp eq i32 %e1, 42
90  %cmp2 = icmp eq i32 %e2, -8
91  %r = add i1 %cmp1, %cmp2
92  ret i1 %r
93}
94
95; Negative test - this could CSE/simplify.
96
97define i1 @same_extract_index(<4 x i32> %a) {
98; CHECK-LABEL: @same_extract_index(
99; CHECK-NEXT:    [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2
100; CHECK-NEXT:    [[E2:%.*]] = extractelement <4 x i32> [[A]], i32 2
101; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i32 [[E1]], 42
102; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[E2]], -8
103; CHECK-NEXT:    [[R:%.*]] = and i1 [[CMP1]], [[CMP2]]
104; CHECK-NEXT:    ret i1 [[R]]
105;
106  %e1 = extractelement <4 x i32> %a, i32 2
107  %e2 = extractelement <4 x i32> %a, i32 2
108  %cmp1 = icmp ugt i32 %e1, 42
109  %cmp2 = icmp ugt i32 %e2, -8
110  %r = and i1 %cmp1, %cmp2
111  ret i1 %r
112}
113
114; Negative test - need identical predicates.
115
116define i1 @different_preds(<4 x i32> %a) {
117; CHECK-LABEL: @different_preds(
118; CHECK-NEXT:    [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1
119; CHECK-NEXT:    [[E2:%.*]] = extractelement <4 x i32> [[A]], i32 2
120; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42
121; CHECK-NEXT:    [[CMP2:%.*]] = icmp ugt i32 [[E2]], -8
122; CHECK-NEXT:    [[R:%.*]] = and i1 [[CMP1]], [[CMP2]]
123; CHECK-NEXT:    ret i1 [[R]]
124;
125  %e1 = extractelement <4 x i32> %a, i32 1
126  %e2 = extractelement <4 x i32> %a, i32 2
127  %cmp1 = icmp sgt i32 %e1, 42
128  %cmp2 = icmp ugt i32 %e2, -8
129  %r = and i1 %cmp1, %cmp2
130  ret i1 %r
131}
132
133; Negative test - need 1 source vector.
134
135define i1 @different_source_vec(<4 x i32> %a, <4 x i32> %b) {
136; CHECK-LABEL: @different_source_vec(
137; CHECK-NEXT:    [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1
138; CHECK-NEXT:    [[E2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2
139; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42
140; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8
141; CHECK-NEXT:    [[R:%.*]] = and i1 [[CMP1]], [[CMP2]]
142; CHECK-NEXT:    ret i1 [[R]]
143;
144  %e1 = extractelement <4 x i32> %a, i32 1
145  %e2 = extractelement <4 x i32> %b, i32 2
146  %cmp1 = icmp sgt i32 %e1, 42
147  %cmp2 = icmp sgt i32 %e2, -8
148  %r = and i1 %cmp1, %cmp2
149  ret i1 %r
150}
151
152; Negative test - don't try this with scalable vectors.
153
154define i1 @scalable(<vscale x 4 x i32> %a) {
155; CHECK-LABEL: @scalable(
156; CHECK-NEXT:    [[E1:%.*]] = extractelement <vscale x 4 x i32> [[A:%.*]], i32 3
157; CHECK-NEXT:    [[E2:%.*]] = extractelement <vscale x 4 x i32> [[A]], i32 1
158; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42
159; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8
160; CHECK-NEXT:    [[R:%.*]] = xor i1 [[CMP1]], [[CMP2]]
161; CHECK-NEXT:    ret i1 [[R]]
162;
163  %e1 = extractelement <vscale x 4 x i32> %a, i32 3
164  %e2 = extractelement <vscale x 4 x i32> %a, i32 1
165  %cmp1 = icmp sgt i32 %e1, 42
166  %cmp2 = icmp sgt i32 %e2, -8
167  %r = xor i1 %cmp1, %cmp2
168  ret i1 %r
169}
170