1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -O2 -mattr=avx512f -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 3; RUN: llc < %s -O2 -mattr=avx512f -mtriple=i386-unknown | FileCheck %s --check-prefix=CHECK32 4; RUN: llc < %s -O2 -mattr=avx512vl -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 5; RUN: llc < %s -O2 -mattr=avx512vl -mtriple=i386-unknown | FileCheck %s --check-prefix=CHECK32 6 7define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 { 8; CHECK64-LABEL: test_mm_mask_move_ss: 9; CHECK64: # %bb.0: # %entry 10; CHECK64-NEXT: kmovw %edi, %k1 11; CHECK64-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} 12; CHECK64-NEXT: retq 13; 14; CHECK32-LABEL: test_mm_mask_move_ss: 15; CHECK32: # %bb.0: # %entry 16; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al 17; CHECK32-NEXT: kmovw %eax, %k1 18; CHECK32-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} 19; CHECK32-NEXT: retl 20entry: 21 %0 = and i8 %__U, 1 22 %tobool.i = icmp ne i8 %0, 0 23 %__B.elt.i = extractelement <4 x float> %__B, i32 0 24 %__W.elt.i = extractelement <4 x float> %__W, i32 0 25 %vecext1.i = select i1 %tobool.i, float %__B.elt.i, float %__W.elt.i 26 %vecins.i = insertelement <4 x float> %__A, float %vecext1.i, i32 0 27 ret <4 x float> %vecins.i 28} 29 30define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 { 31; CHECK64-LABEL: test_mm_maskz_move_ss: 32; CHECK64: # %bb.0: # %entry 33; CHECK64-NEXT: kmovw %edi, %k1 34; CHECK64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} 35; CHECK64-NEXT: retq 36; 37; CHECK32-LABEL: test_mm_maskz_move_ss: 38; CHECK32: # %bb.0: # %entry 39; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al 40; CHECK32-NEXT: kmovw %eax, %k1 41; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} 42; CHECK32-NEXT: retl 43entry: 44 %0 = and i8 %__U, 1 45 %tobool.i = icmp ne i8 %0, 0 46 %vecext.i = extractelement <4 x float> %__B, i32 0 47 %cond.i = select i1 %tobool.i, float %vecext.i, float 0.000000e+00 48 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 49 ret <4 x float> %vecins.i 50} 51 52define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 { 53; CHECK64-LABEL: test_mm_mask_move_sd: 54; CHECK64: # %bb.0: # %entry 55; CHECK64-NEXT: kmovw %edi, %k1 56; CHECK64-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} 57; CHECK64-NEXT: retq 58; 59; CHECK32-LABEL: test_mm_mask_move_sd: 60; CHECK32: # %bb.0: # %entry 61; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al 62; CHECK32-NEXT: kmovw %eax, %k1 63; CHECK32-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} 64; CHECK32-NEXT: retl 65entry: 66 %0 = and i8 %__U, 1 67 %tobool.i = icmp ne i8 %0, 0 68 %__B.elt.i = extractelement <2 x double> %__B, i32 0 69 %__W.elt.i = extractelement <2 x double> %__W, i32 0 70 %vecext1.i = select i1 %tobool.i, double %__B.elt.i, double %__W.elt.i 71 %vecins.i = insertelement <2 x double> %__A, double %vecext1.i, i32 0 72 ret <2 x double> %vecins.i 73} 74 75define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 { 76; CHECK64-LABEL: test_mm_maskz_move_sd: 77; CHECK64: # %bb.0: # %entry 78; CHECK64-NEXT: kmovw %edi, %k1 79; CHECK64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} 80; CHECK64-NEXT: retq 81; 82; CHECK32-LABEL: test_mm_maskz_move_sd: 83; CHECK32: # %bb.0: # %entry 84; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al 85; CHECK32-NEXT: kmovw %eax, %k1 86; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} 87; CHECK32-NEXT: retl 88entry: 89 %0 = and i8 %__U, 1 90 %tobool.i = icmp ne i8 %0, 0 91 %vecext.i = extractelement <2 x double> %__B, i32 0 92 %cond.i = select i1 %tobool.i, double %vecext.i, double 0.000000e+00 93 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 94 ret <2 x double> %vecins.i 95} 96 97define void @test_mm_mask_store_ss(float* %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #1 { 98; CHECK64-LABEL: test_mm_mask_store_ss: 99; CHECK64: # %bb.0: # %entry 100; CHECK64-NEXT: kmovw %esi, %k1 101; CHECK64-NEXT: vmovss %xmm0, (%rdi) {%k1} 102; CHECK64-NEXT: retq 103; 104; CHECK32-LABEL: test_mm_mask_store_ss: 105; CHECK32: # %bb.0: # %entry 106; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 107; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 108; CHECK32-NEXT: kmovw %ecx, %k1 109; CHECK32-NEXT: vmovss %xmm0, (%eax) {%k1} 110; CHECK32-NEXT: retl 111entry: 112 %0 = bitcast float* %__W to <16 x float>* 113 %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 114 %1 = and i8 %__U, 1 115 %conv2.i = zext i8 %1 to i16 116 %2 = bitcast i16 %conv2.i to <16 x i1> 117 tail call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> %shuffle.i.i, <16 x float>* %0, i32 16, <16 x i1> %2) #5 118 ret void 119} 120 121define void @test_mm_mask_store_sd(double* %__W, i8 zeroext %__U, <2 x double> %__A) local_unnamed_addr #1 { 122; CHECK64-LABEL: test_mm_mask_store_sd: 123; CHECK64: # %bb.0: # %entry 124; CHECK64-NEXT: kmovw %esi, %k1 125; CHECK64-NEXT: vmovsd %xmm0, (%rdi) {%k1} 126; CHECK64-NEXT: retq 127; 128; CHECK32-LABEL: test_mm_mask_store_sd: 129; CHECK32: # %bb.0: # %entry 130; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 131; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 132; CHECK32-NEXT: kmovw %ecx, %k1 133; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1} 134; CHECK32-NEXT: retl 135entry: 136 %0 = bitcast double* %__W to <8 x double>* 137 %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 138 %1 = and i8 %__U, 1 139 %2 = bitcast i8 %1 to <8 x i1> 140 tail call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> %shuffle.i.i, <8 x double>* %0, i32 16, <8 x i1> %2) #5 141 ret void 142} 143 144define <4 x float> @test_mm_mask_load_ss(<4 x float> %__A, i8 zeroext %__U, float* %__W) local_unnamed_addr #2 { 145; CHECK64-LABEL: test_mm_mask_load_ss: 146; CHECK64: # %bb.0: # %entry 147; CHECK64-NEXT: kmovw %edi, %k1 148; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} 149; CHECK64-NEXT: retq 150; 151; CHECK32-LABEL: test_mm_mask_load_ss: 152; CHECK32: # %bb.0: # %entry 153; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 154; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 155; CHECK32-NEXT: kmovw %ecx, %k1 156; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} 157; CHECK32-NEXT: retl 158entry: 159 %shuffle.i = shufflevector <4 x float> %__A, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 160 %0 = bitcast float* %__W to <16 x float>* 161 %shuffle.i.i = shufflevector <4 x float> %shuffle.i, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 162 %1 = and i8 %__U, 1 163 %conv2.i = zext i8 %1 to i16 164 %2 = bitcast i16 %conv2.i to <16 x i1> 165 %3 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 16, <16 x i1> %2, <16 x float> %shuffle.i.i) #5 166 %shuffle4.i = shufflevector <16 x float> %3, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 167 ret <4 x float> %shuffle4.i 168} 169 170define <2 x double> @test_mm_mask_load_sd(<2 x double> %__A, i8 zeroext %__U, double* %__W) local_unnamed_addr #2 { 171; CHECK64-LABEL: test_mm_mask_load_sd: 172; CHECK64: # %bb.0: # %entry 173; CHECK64-NEXT: kmovw %edi, %k1 174; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} 175; CHECK64-NEXT: retq 176; 177; CHECK32-LABEL: test_mm_mask_load_sd: 178; CHECK32: # %bb.0: # %entry 179; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 180; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 181; CHECK32-NEXT: kmovw %ecx, %k1 182; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} 183; CHECK32-NEXT: retl 184entry: 185 %shuffle5.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1 186 %0 = bitcast double* %__W to <8 x double>* 187 %shuffle.i.i = shufflevector <2 x double> %shuffle5.i, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 188 %1 = and i8 %__U, 1 189 %2 = bitcast i8 %1 to <8 x i1> 190 %3 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 16, <8 x i1> %2, <8 x double> %shuffle.i.i) #5 191 %shuffle3.i = shufflevector <8 x double> %3, <8 x double> undef, <2 x i32> <i32 0, i32 1> 192 ret <2 x double> %shuffle3.i 193} 194 195define <4 x float> @test_mm_maskz_load_ss(i8 zeroext %__U, float* %__W) local_unnamed_addr #2 { 196; CHECK64-LABEL: test_mm_maskz_load_ss: 197; CHECK64: # %bb.0: # %entry 198; CHECK64-NEXT: kmovw %edi, %k1 199; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} {z} 200; CHECK64-NEXT: retq 201; 202; CHECK32-LABEL: test_mm_maskz_load_ss: 203; CHECK32: # %bb.0: # %entry 204; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 205; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 206; CHECK32-NEXT: kmovw %ecx, %k1 207; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} {z} 208; CHECK32-NEXT: retl 209entry: 210 %0 = bitcast float* %__W to <16 x float>* 211 %1 = and i8 %__U, 1 212 %conv2.i = zext i8 %1 to i16 213 %2 = bitcast i16 %conv2.i to <16 x i1> 214 %3 = tail call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %0, i32 16, <16 x i1> %2, <16 x float> zeroinitializer) #5 215 %shuffle.i = shufflevector <16 x float> %3, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 216 ret <4 x float> %shuffle.i 217} 218 219define <2 x double> @test_mm_maskz_load_sd(i8 zeroext %__U, double* %__W) local_unnamed_addr #2 { 220; CHECK64-LABEL: test_mm_maskz_load_sd: 221; CHECK64: # %bb.0: # %entry 222; CHECK64-NEXT: kmovw %edi, %k1 223; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} {z} 224; CHECK64-NEXT: retq 225; 226; CHECK32-LABEL: test_mm_maskz_load_sd: 227; CHECK32: # %bb.0: # %entry 228; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 229; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 230; CHECK32-NEXT: kmovw %ecx, %k1 231; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z} 232; CHECK32-NEXT: retl 233entry: 234 %0 = bitcast double* %__W to <8 x double>* 235 %1 = and i8 %__U, 1 236 %2 = bitcast i8 %1 to <8 x i1> 237 %3 = tail call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* %0, i32 16, <8 x i1> %2, <8 x double> zeroinitializer) #5 238 %shuffle.i = shufflevector <8 x double> %3, <8 x double> undef, <2 x i32> <i32 0, i32 1> 239 ret <2 x double> %shuffle.i 240} 241 242; The tests below match clang's newer codegen that uses 128-bit masked load/stores. 243 244define void @test_mm_mask_store_ss_2(float* %__P, i8 zeroext %__U, <4 x float> %__A) { 245; CHECK64-LABEL: test_mm_mask_store_ss_2: 246; CHECK64: # %bb.0: # %entry 247; CHECK64-NEXT: kmovw %esi, %k1 248; CHECK64-NEXT: vmovss %xmm0, (%rdi) {%k1} 249; CHECK64-NEXT: retq 250; 251; CHECK32-LABEL: test_mm_mask_store_ss_2: 252; CHECK32: # %bb.0: # %entry 253; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 254; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 255; CHECK32-NEXT: kmovw %ecx, %k1 256; CHECK32-NEXT: vmovss %xmm0, (%eax) {%k1} 257; CHECK32-NEXT: retl 258entry: 259 %0 = bitcast float* %__P to <4 x float>* 260 %1 = and i8 %__U, 1 261 %2 = bitcast i8 %1 to <8 x i1> 262 %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 263 tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %__A, <4 x float>* %0, i32 1, <4 x i1> %extract.i) 264 ret void 265} 266 267define void @test_mm_mask_store_sd_2(double* %__P, i8 zeroext %__U, <2 x double> %__A) { 268; CHECK64-LABEL: test_mm_mask_store_sd_2: 269; CHECK64: # %bb.0: # %entry 270; CHECK64-NEXT: kmovw %esi, %k1 271; CHECK64-NEXT: vmovsd %xmm0, (%rdi) {%k1} 272; CHECK64-NEXT: retq 273; 274; CHECK32-LABEL: test_mm_mask_store_sd_2: 275; CHECK32: # %bb.0: # %entry 276; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 277; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 278; CHECK32-NEXT: kmovw %ecx, %k1 279; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1} 280; CHECK32-NEXT: retl 281entry: 282 %0 = bitcast double* %__P to <2 x double>* 283 %1 = and i8 %__U, 1 284 %2 = bitcast i8 %1 to <8 x i1> 285 %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 286 tail call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %__A, <2 x double>* %0, i32 1, <2 x i1> %extract.i) 287 ret void 288} 289 290define <4 x float> @test_mm_mask_load_ss_2(<4 x float> %__A, i8 zeroext %__U, float* readonly %__W) { 291; CHECK64-LABEL: test_mm_mask_load_ss_2: 292; CHECK64: # %bb.0: # %entry 293; CHECK64-NEXT: kmovw %edi, %k1 294; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} 295; CHECK64-NEXT: retq 296; 297; CHECK32-LABEL: test_mm_mask_load_ss_2: 298; CHECK32: # %bb.0: # %entry 299; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 300; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 301; CHECK32-NEXT: kmovw %ecx, %k1 302; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} 303; CHECK32-NEXT: retl 304entry: 305 %shuffle.i = shufflevector <4 x float> %__A, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 306 %0 = bitcast float* %__W to <4 x float>* 307 %1 = and i8 %__U, 1 308 %2 = bitcast i8 %1 to <8 x i1> 309 %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 310 %3 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> %shuffle.i) 311 ret <4 x float> %3 312} 313 314define <4 x float> @test_mm_maskz_load_ss_2(i8 zeroext %__U, float* readonly %__W) { 315; CHECK64-LABEL: test_mm_maskz_load_ss_2: 316; CHECK64: # %bb.0: # %entry 317; CHECK64-NEXT: kmovw %edi, %k1 318; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} {z} 319; CHECK64-NEXT: retq 320; 321; CHECK32-LABEL: test_mm_maskz_load_ss_2: 322; CHECK32: # %bb.0: # %entry 323; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 324; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 325; CHECK32-NEXT: kmovw %ecx, %k1 326; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} {z} 327; CHECK32-NEXT: retl 328entry: 329 %0 = bitcast float* %__W to <4 x float>* 330 %1 = and i8 %__U, 1 331 %2 = bitcast i8 %1 to <8 x i1> 332 %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 333 %3 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 1, <4 x i1> %extract.i, <4 x float> zeroinitializer) 334 ret <4 x float> %3 335} 336 337define <2 x double> @test_mm_mask_load_sd_2(<2 x double> %__A, i8 zeroext %__U, double* readonly %__W) { 338; CHECK64-LABEL: test_mm_mask_load_sd_2: 339; CHECK64: # %bb.0: # %entry 340; CHECK64-NEXT: kmovw %edi, %k1 341; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} 342; CHECK64-NEXT: retq 343; 344; CHECK32-LABEL: test_mm_mask_load_sd_2: 345; CHECK32: # %bb.0: # %entry 346; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 347; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 348; CHECK32-NEXT: kmovw %ecx, %k1 349; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} 350; CHECK32-NEXT: retl 351entry: 352 %shuffle3.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1 353 %0 = bitcast double* %__W to <2 x double>* 354 %1 = and i8 %__U, 1 355 %2 = bitcast i8 %1 to <8 x i1> 356 %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 357 %3 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> %shuffle3.i) 358 ret <2 x double> %3 359} 360 361define <2 x double> @test_mm_maskz_load_sd_2(i8 zeroext %__U, double* readonly %__W) { 362; CHECK64-LABEL: test_mm_maskz_load_sd_2: 363; CHECK64: # %bb.0: # %entry 364; CHECK64-NEXT: kmovw %edi, %k1 365; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} {z} 366; CHECK64-NEXT: retq 367; 368; CHECK32-LABEL: test_mm_maskz_load_sd_2: 369; CHECK32: # %bb.0: # %entry 370; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 371; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl 372; CHECK32-NEXT: kmovw %ecx, %k1 373; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z} 374; CHECK32-NEXT: retl 375entry: 376 %0 = bitcast double* %__W to <2 x double>* 377 %1 = and i8 %__U, 1 378 %2 = bitcast i8 %1 to <8 x i1> 379 %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 380 %3 = tail call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %0, i32 1, <2 x i1> %extract.i, <2 x double> zeroinitializer) 381 ret <2 x double> %3 382} 383 384 385declare void @llvm.masked.store.v16f32.p0v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>) #3 386 387declare void @llvm.masked.store.v8f64.p0v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>) #3 388 389declare <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>) #4 390 391declare <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>) #4 392 393declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) 394 395declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>) 396 397declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) 398 399declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>) 400