• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -instcombine -S < %s | FileCheck %s
3
4declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
5declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
6declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
7declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32, <4 x i1> %mask, <4 x double> %passthru)
8declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
9
10define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru)  {
11; CHECK-LABEL: @load_zeromask(
12; CHECK-NEXT:    ret <2 x double> [[PASSTHRU:%.*]]
13;
14  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
15  ret <2 x double> %res
16}
17
18define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru)  {
19; CHECK-LABEL: @load_onemask(
20; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
21; CHECK-NEXT:    ret <2 x double> [[UNMASKEDLOAD]]
22;
23  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
24  ret <2 x double> %res
25}
26
27define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru)  {
28; CHECK-LABEL: @load_undefmask(
29; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2
30; CHECK-NEXT:    ret <2 x double> [[UNMASKEDLOAD]]
31;
32  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru)
33  ret <2 x double> %res
34}
35
36@G = external global i8
37
38define <2 x double> @load_cemask(<2 x double>* %ptr, <2 x double> %passthru)  {
39; CHECK-LABEL: @load_cemask(
40; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 ptrtoint (i8* @G to i1)>, <2 x double> [[PASSTHRU:%.*]])
41; CHECK-NEXT:    ret <2 x double> [[RES]]
42;
43  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 ptrtoint (i8* @G to i1)>, <2 x double> %passthru)
44  ret <2 x double> %res
45}
46
47define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt)  {
48; CHECK-LABEL: @load_lane0(
49; CHECK-NEXT:    [[PTV2:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 1
50; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> [[PTV2]])
51; CHECK-NEXT:    ret <2 x double> [[RES]]
52;
53  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
54  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
55  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> %ptv2)
56  ret <2 x double> %res
57}
58
59define double @load_all(double* %base, double %pt)  {
60; CHECK-LABEL: @load_all(
61; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 undef, i64 2, i64 3>
62; CHECK-NEXT:    [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
63; CHECK-NEXT:    [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2
64; CHECK-NEXT:    ret double [[ELT]]
65;
66  %ptrs = getelementptr double, double* %base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
67  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef)
68  %elt = extractelement <4 x double> %res, i64 2
69  ret double %elt
70}
71
72define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask)  {
73; CHECK-LABEL: @load_generic(
74; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
75; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
76; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
77; CHECK-NEXT:    ret <2 x double> [[RES]]
78;
79  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
80  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
81  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
82  ret <2 x double> %res
83}
84
85define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask)  {
86; CHECK-LABEL: @load_speculative(
87; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
88; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
89; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
90; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
91; CHECK-NEXT:    ret <2 x double> [[TMP1]]
92;
93  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
94  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
95  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
96  ret <2 x double> %res
97}
98
99define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask)  {
100; CHECK-LABEL: @load_speculative_less_aligned(
101; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
102; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
103; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4
104; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]]
105; CHECK-NEXT:    ret <2 x double> [[TMP1]]
106;
107  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
108  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
109  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
110  ret <2 x double> %res
111}
112
113; Can't speculate since only half of required size is known deref
114
115define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask)  {
116; CHECK-LABEL: @load_spec_neg_size(
117; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
118; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
119; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]])
120; CHECK-NEXT:    ret <2 x double> [[RES]]
121;
122  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
123  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
124  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2)
125  ret <2 x double> %res
126}
127
128; Can only speculate one lane (but it's the only one active)
129define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask)  {
130; CHECK-LABEL: @load_spec_lan0(
131; CHECK-NEXT:    [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
132; CHECK-NEXT:    [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer
133; CHECK-NEXT:    [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
134; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]])
135; CHECK-NEXT:    ret <2 x double> [[RES]]
136;
137  %ptv1 = insertelement <2 x double> undef, double %pt, i64 0
138  %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1
139  %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
140  %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask2, <2 x double> %ptv2)
141  ret <2 x double> %res
142}
143
144define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val)  {
145; CHECK-LABEL: @store_zeromask(
146; CHECK-NEXT:    ret void
147;
148  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> zeroinitializer)
149  ret void
150}
151
152define void @store_onemask(<2 x double>* %ptr, <2 x double> %val)  {
153; CHECK-LABEL: @store_onemask(
154; CHECK-NEXT:    store <2 x double> [[VAL:%.*]], <2 x double>* [[PTR:%.*]], align 4
155; CHECK-NEXT:    ret void
156;
157  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
158  ret void
159}
160
161define void @store_demandedelts(<2 x double>* %ptr, double %val)  {
162; CHECK-LABEL: @store_demandedelts(
163; CHECK-NEXT:    [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0
164; CHECK-NEXT:    call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC1]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> <i1 true, i1 false>)
165; CHECK-NEXT:    ret void
166;
167  %valvec1 = insertelement <2 x double> undef, double %val, i32 0
168  %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
169  call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %valvec2, <2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>)
170  ret void
171}
172
173define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %passthru)  {
174; CHECK-LABEL: @gather_generic(
175; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]])
176; CHECK-NEXT:    ret <2 x double> [[RES]]
177;
178  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru)
179  ret <2 x double> %res
180}
181
182
183define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru)  {
184; CHECK-LABEL: @gather_zeromask(
185; CHECK-NEXT:    ret <2 x double> [[PASSTHRU:%.*]]
186;
187  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> zeroinitializer, <2 x double> %passthru)
188  ret <2 x double> %res
189}
190
191
192define <2 x double> @gather_onemask(<2 x double*> %ptrs, <2 x double> %passthru)  {
193; CHECK-LABEL: @gather_onemask(
194; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> undef)
195; CHECK-NEXT:    ret <2 x double> [[RES]]
196;
197  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru)
198  ret <2 x double> %res
199}
200
201define <4 x double> @gather_lane2(double* %base, double %pt)  {
202; CHECK-LABEL: @gather_lane2(
203; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 undef, i64 undef, i64 2, i64 undef>
204; CHECK-NEXT:    [[PT_V1:%.*]] = insertelement <4 x double> undef, double [[PT:%.*]], i64 0
205; CHECK-NEXT:    [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 0>
206; CHECK-NEXT:    [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> [[PT_V2]])
207; CHECK-NEXT:    ret <4 x double> [[RES]]
208;
209  %ptrs = getelementptr double, double *%base, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
210  %pt_v1 = insertelement <4 x double> undef, double %pt, i64 0
211  %pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> undef, <4 x i32> zeroinitializer
212  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %pt_v2)
213  ret <4 x double> %res
214}
215
216define <2 x double> @gather_lane0_maybe(double* %base, double %pt, <2 x i1> %mask)  {
217; CHECK-LABEL: @gather_lane0_maybe(
218; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
219; CHECK-NEXT:    [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
220; CHECK-NEXT:    [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
221; CHECK-NEXT:    [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
222; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
223; CHECK-NEXT:    ret <2 x double> [[RES]]
224;
225  %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
226  %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0
227  %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
228  %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
229  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
230  ret <2 x double> %res
231}
232
233define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, <2 x i1> %mask)  {
234; CHECK-LABEL: @gather_lane0_maybe_spec(
235; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
236; CHECK-NEXT:    [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0
237; CHECK-NEXT:    [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer
238; CHECK-NEXT:    [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1
239; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]])
240; CHECK-NEXT:    ret <2 x double> [[RES]]
241;
242  %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1>
243  %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0
244  %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1
245  %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1
246  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2)
247  ret <2 x double> %res
248}
249
250
251define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val)  {
252; CHECK-LABEL: @scatter_zeromask(
253; CHECK-NEXT:    ret void
254;
255  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> zeroinitializer)
256  ret void
257}
258
259define void @scatter_demandedelts(double* %ptr, double %val)  {
260; CHECK-LABEL: @scatter_demandedelts(
261; CHECK-NEXT:    [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> <i64 0, i64 undef>
262; CHECK-NEXT:    [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0
263; CHECK-NEXT:    call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC1]], <2 x double*> [[PTRS]], i32 8, <2 x i1> <i1 true, i1 false>)
264; CHECK-NEXT:    ret void
265;
266  %ptrs = getelementptr double, double* %ptr, <2 x i64> <i64 0, i64 1>
267  %valvec1 = insertelement <2 x double> undef, double %val, i32 0
268  %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1
269  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %valvec2, <2 x double*> %ptrs, i32 8, <2 x i1> <i1 true, i1 false>)
270  ret void
271}
272