1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -instcombine -S < %s | FileCheck %s 3 4declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0) 5declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask) 6declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru) 7declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32, <4 x i1> %mask, <4 x double> %passthru) 8declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask) 9 10define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) { 11; CHECK-LABEL: @load_zeromask( 12; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]] 13; 14 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru) 15 ret <2 x double> %res 16} 17 18define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) { 19; CHECK-LABEL: @load_onemask( 20; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2 21; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]] 22; 23 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru) 24 ret <2 x double> %res 25} 26 27define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru) { 28; CHECK-LABEL: @load_undefmask( 29; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 2 30; CHECK-NEXT: ret <2 x double> [[UNMASKEDLOAD]] 31; 32 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru) 33 ret <2 x double> %res 34} 35 36@G = external global i8 37 38define <2 x double> @load_cemask(<2 x double>* %ptr, <2 x double> %passthru) { 39; CHECK-LABEL: @load_cemask( 40; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 ptrtoint (i8* @G to i1)>, <2 x double> [[PASSTHRU:%.*]]) 41; CHECK-NEXT: ret <2 x double> [[RES]] 42; 43 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 ptrtoint (i8* @G to i1)>, <2 x double> %passthru) 44 ret <2 x double> %res 45} 46 47define <2 x double> @load_lane0(<2 x double>* %ptr, double %pt) { 48; CHECK-LABEL: @load_lane0( 49; CHECK-NEXT: [[PTV2:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 1 50; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> [[PTV2]]) 51; CHECK-NEXT: ret <2 x double> [[RES]] 52; 53 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 54 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 55 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 false>, <2 x double> %ptv2) 56 ret <2 x double> %res 57} 58 59define double @load_all(double* %base, double %pt) { 60; CHECK-LABEL: @load_all( 61; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 0, i64 undef, i64 2, i64 3> 62; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef) 63; CHECK-NEXT: [[ELT:%.*]] = extractelement <4 x double> [[RES]], i64 2 64; CHECK-NEXT: ret double [[ELT]] 65; 66 %ptrs = getelementptr double, double* %base, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 67 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x double> undef) 68 %elt = extractelement <4 x double> %res, i64 2 69 ret double %elt 70} 71 72define <2 x double> @load_generic(<2 x double>* %ptr, double %pt, <2 x i1> %mask) { 73; CHECK-LABEL: @load_generic( 74; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 75; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer 76; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) 77; CHECK-NEXT: ret <2 x double> [[RES]] 78; 79 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 80 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 81 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) 82 ret <2 x double> %res 83} 84 85define <2 x double> @load_speculative(<2 x double>* dereferenceable(16) align 4 %ptr, double %pt, <2 x i1> %mask) { 86; CHECK-LABEL: @load_speculative( 87; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 88; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer 89; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 90; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] 91; CHECK-NEXT: ret <2 x double> [[TMP1]] 92; 93 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 94 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 95 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) 96 ret <2 x double> %res 97} 98 99define <2 x double> @load_speculative_less_aligned(<2 x double>* dereferenceable(16) %ptr, double %pt, <2 x i1> %mask) { 100; CHECK-LABEL: @load_speculative_less_aligned( 101; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 102; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer 103; CHECK-NEXT: [[UNMASKEDLOAD:%.*]] = load <2 x double>, <2 x double>* [[PTR:%.*]], align 4 104; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[MASK:%.*]], <2 x double> [[UNMASKEDLOAD]], <2 x double> [[PTV2]] 105; CHECK-NEXT: ret <2 x double> [[TMP1]] 106; 107 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 108 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 109 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) 110 ret <2 x double> %res 111} 112 113; Can't speculate since only half of required size is known deref 114 115define <2 x double> @load_spec_neg_size(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { 116; CHECK-LABEL: @load_spec_neg_size( 117; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 118; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer 119; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PTV2]]) 120; CHECK-NEXT: ret <2 x double> [[RES]] 121; 122 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 123 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 124 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask, <2 x double> %ptv2) 125 ret <2 x double> %res 126} 127 128; Can only speculate one lane (but it's the only one active) 129define <2 x double> @load_spec_lan0(<2 x double>* dereferenceable(8) %ptr, double %pt, <2 x i1> %mask) { 130; CHECK-LABEL: @load_spec_lan0( 131; CHECK-NEXT: [[PTV1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 132; CHECK-NEXT: [[PTV2:%.*]] = shufflevector <2 x double> [[PTV1]], <2 x double> undef, <2 x i32> zeroinitializer 133; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1 134; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* nonnull [[PTR:%.*]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PTV2]]) 135; CHECK-NEXT: ret <2 x double> [[RES]] 136; 137 %ptv1 = insertelement <2 x double> undef, double %pt, i64 0 138 %ptv2 = insertelement <2 x double> %ptv1, double %pt, i64 1 139 %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1 140 %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 4, <2 x i1> %mask2, <2 x double> %ptv2) 141 ret <2 x double> %res 142} 143 144define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) { 145; CHECK-LABEL: @store_zeromask( 146; CHECK-NEXT: ret void 147; 148 call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> zeroinitializer) 149 ret void 150} 151 152define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) { 153; CHECK-LABEL: @store_onemask( 154; CHECK-NEXT: store <2 x double> [[VAL:%.*]], <2 x double>* [[PTR:%.*]], align 4 155; CHECK-NEXT: ret void 156; 157 call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>) 158 ret void 159} 160 161define void @store_demandedelts(<2 x double>* %ptr, double %val) { 162; CHECK-LABEL: @store_demandedelts( 163; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0 164; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[VALVEC1]], <2 x double>* [[PTR:%.*]], i32 4, <2 x i1> <i1 true, i1 false>) 165; CHECK-NEXT: ret void 166; 167 %valvec1 = insertelement <2 x double> undef, double %val, i32 0 168 %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1 169 call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %valvec2, <2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>) 170 ret void 171} 172 173define <2 x double> @gather_generic(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %passthru) { 174; CHECK-LABEL: @gather_generic( 175; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> [[MASK:%.*]], <2 x double> [[PASSTHRU:%.*]]) 176; CHECK-NEXT: ret <2 x double> [[RES]] 177; 178 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %passthru) 179 ret <2 x double> %res 180} 181 182 183define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) { 184; CHECK-LABEL: @gather_zeromask( 185; CHECK-NEXT: ret <2 x double> [[PASSTHRU:%.*]] 186; 187 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> zeroinitializer, <2 x double> %passthru) 188 ret <2 x double> %res 189} 190 191 192define <2 x double> @gather_onemask(<2 x double*> %ptrs, <2 x double> %passthru) { 193; CHECK-LABEL: @gather_onemask( 194; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS:%.*]], i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> undef) 195; CHECK-NEXT: ret <2 x double> [[RES]] 196; 197 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru) 198 ret <2 x double> %res 199} 200 201define <4 x double> @gather_lane2(double* %base, double %pt) { 202; CHECK-LABEL: @gather_lane2( 203; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <4 x i64> <i64 undef, i64 undef, i64 2, i64 undef> 204; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <4 x double> undef, double [[PT:%.*]], i64 0 205; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <4 x double> [[PT_V1]], <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 0> 206; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> [[PTRS]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> [[PT_V2]]) 207; CHECK-NEXT: ret <4 x double> [[RES]] 208; 209 %ptrs = getelementptr double, double *%base, <4 x i64> <i64 0, i64 1, i64 2, i64 3> 210 %pt_v1 = insertelement <4 x double> undef, double %pt, i64 0 211 %pt_v2 = shufflevector <4 x double> %pt_v1, <4 x double> undef, <4 x i32> zeroinitializer 212 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %ptrs, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %pt_v2) 213 ret <4 x double> %res 214} 215 216define <2 x double> @gather_lane0_maybe(double* %base, double %pt, <2 x i1> %mask) { 217; CHECK-LABEL: @gather_lane0_maybe( 218; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1> 219; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 220; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer 221; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1 222; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]]) 223; CHECK-NEXT: ret <2 x double> [[RES]] 224; 225 %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1> 226 %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0 227 %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1 228 %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1 229 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2) 230 ret <2 x double> %res 231} 232 233define <2 x double> @gather_lane0_maybe_spec(double* %base, double %pt, <2 x i1> %mask) { 234; CHECK-LABEL: @gather_lane0_maybe_spec( 235; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[BASE:%.*]], <2 x i64> <i64 0, i64 1> 236; CHECK-NEXT: [[PT_V1:%.*]] = insertelement <2 x double> undef, double [[PT:%.*]], i64 0 237; CHECK-NEXT: [[PT_V2:%.*]] = shufflevector <2 x double> [[PT_V1]], <2 x double> undef, <2 x i32> zeroinitializer 238; CHECK-NEXT: [[MASK2:%.*]] = insertelement <2 x i1> [[MASK:%.*]], i1 false, i64 1 239; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> [[PTRS]], i32 4, <2 x i1> [[MASK2]], <2 x double> [[PT_V2]]) 240; CHECK-NEXT: ret <2 x double> [[RES]] 241; 242 %ptrs = getelementptr double, double *%base, <2 x i64> <i64 0, i64 1> 243 %pt_v1 = insertelement <2 x double> undef, double %pt, i64 0 244 %pt_v2 = insertelement <2 x double> %pt_v1, double %pt, i64 1 245 %mask2 = insertelement <2 x i1> %mask, i1 false, i64 1 246 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask2, <2 x double> %pt_v2) 247 ret <2 x double> %res 248} 249 250 251define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) { 252; CHECK-LABEL: @scatter_zeromask( 253; CHECK-NEXT: ret void 254; 255 call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 8, <2 x i1> zeroinitializer) 256 ret void 257} 258 259define void @scatter_demandedelts(double* %ptr, double %val) { 260; CHECK-LABEL: @scatter_demandedelts( 261; CHECK-NEXT: [[PTRS:%.*]] = getelementptr double, double* [[PTR:%.*]], <2 x i64> <i64 0, i64 undef> 262; CHECK-NEXT: [[VALVEC1:%.*]] = insertelement <2 x double> undef, double [[VAL:%.*]], i32 0 263; CHECK-NEXT: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> [[VALVEC1]], <2 x double*> [[PTRS]], i32 8, <2 x i1> <i1 true, i1 false>) 264; CHECK-NEXT: ret void 265; 266 %ptrs = getelementptr double, double* %ptr, <2 x i64> <i64 0, i64 1> 267 %valvec1 = insertelement <2 x double> undef, double %val, i32 0 268 %valvec2 = insertelement <2 x double> %valvec1, double %val, i32 1 269 call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %valvec2, <2 x double*> %ptrs, i32 8, <2 x i1> <i1 true, i1 false>) 270 ret void 271} 272