1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64 3; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_SMALL 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE 6; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32 7; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR 8; RUN: opt -mtriple=x86_64-apple-darwin -passes=scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR 9; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null 10 11@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16 12 13; SCALAR-LABEL: test1 14; SCALAR: extractelement <16 x float*> 15; SCALAR-NEXT: load float 16; SCALAR-NEXT: insertelement <16 x float> 17; SCALAR-NEXT: extractelement <16 x float*> 18; SCALAR-NEXT: load float 19 20define <16 x float> @test1(float* %base, <16 x i32> %ind) { 21; KNL_64-LABEL: test1: 22; KNL_64: # %bb.0: 23; KNL_64-NEXT: kxnorw %k0, %k0, %k1 24; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 25; KNL_64-NEXT: vmovaps %zmm1, %zmm0 26; KNL_64-NEXT: retq 27; 28; KNL_32-LABEL: test1: 29; KNL_32: # %bb.0: 30; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 31; KNL_32-NEXT: kxnorw %k0, %k0, %k1 32; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 33; KNL_32-NEXT: vmovaps %zmm1, %zmm0 34; KNL_32-NEXT: retl 35; 36; SKX-LABEL: test1: 37; SKX: # %bb.0: 38; SKX-NEXT: kxnorw %k0, %k0, %k1 39; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 40; SKX-NEXT: vmovaps %zmm1, %zmm0 41; SKX-NEXT: retq 42; 43; SKX_32-LABEL: test1: 44; SKX_32: # %bb.0: 45; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 46; SKX_32-NEXT: kxnorw %k0, %k0, %k1 47; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 48; SKX_32-NEXT: vmovaps %zmm1, %zmm0 49; SKX_32-NEXT: retl 50 51 %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 52 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 53 54 %sext_ind = sext <16 x i32> %ind to <16 x i64> 55 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 56 57 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 58 ret <16 x float>%res 59} 60 61declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>) 62declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>) 63declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> ) 64 65 66; SCALAR-LABEL: test2 67; SCALAR: extractelement <16 x float*> 68; SCALAR-NEXT: load float 69; SCALAR-NEXT: insertelement <16 x float> 70; SCALAR-NEXT: br label %else 71; SCALAR: else: 72; SCALAR-NEXT: %res.phi.else = phi 73; SCALAR-NEXT: and i16 %{{.*}}, 2 74; SCALAR-NEXT: icmp ne i16 %{{.*}}, 0 75; SCALAR-NEXT: br i1 %{{.*}}, label %cond.load1, label %else2 76 77define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) { 78; KNL_64-LABEL: test2: 79; KNL_64: # %bb.0: 80; KNL_64-NEXT: kmovw %esi, %k1 81; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 82; KNL_64-NEXT: vmovaps %zmm1, %zmm0 83; KNL_64-NEXT: retq 84; 85; KNL_32-LABEL: test2: 86; KNL_32: # %bb.0: 87; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 88; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 89; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 90; KNL_32-NEXT: vmovaps %zmm1, %zmm0 91; KNL_32-NEXT: retl 92; 93; SKX-LABEL: test2: 94; SKX: # %bb.0: 95; SKX-NEXT: kmovw %esi, %k1 96; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 97; SKX-NEXT: vmovaps %zmm1, %zmm0 98; SKX-NEXT: retq 99; 100; SKX_32-LABEL: test2: 101; SKX_32: # %bb.0: 102; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 103; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 104; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 105; SKX_32-NEXT: vmovaps %zmm1, %zmm0 106; SKX_32-NEXT: retl 107 108 %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 109 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 110 111 %sext_ind = sext <16 x i32> %ind to <16 x i64> 112 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 113 %imask = bitcast i16 %mask to <16 x i1> 114 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef) 115 ret <16 x float> %res 116} 117 118define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) { 119; KNL_64-LABEL: test3: 120; KNL_64: # %bb.0: 121; KNL_64-NEXT: kmovw %esi, %k1 122; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} 123; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0 124; KNL_64-NEXT: retq 125; 126; KNL_32-LABEL: test3: 127; KNL_32: # %bb.0: 128; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 129; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 130; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} 131; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0 132; KNL_32-NEXT: retl 133; 134; SKX-LABEL: test3: 135; SKX: # %bb.0: 136; SKX-NEXT: kmovw %esi, %k1 137; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} 138; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 139; SKX-NEXT: retq 140; 141; SKX_32-LABEL: test3: 142; SKX_32: # %bb.0: 143; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 144; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 145; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} 146; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0 147; SKX_32-NEXT: retl 148 149 %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 150 %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 151 152 %sext_ind = sext <16 x i32> %ind to <16 x i64> 153 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind 154 %imask = bitcast i16 %mask to <16 x i1> 155 %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) 156 ret <16 x i32> %res 157} 158 159 160define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) { 161; KNL_64-LABEL: test4: 162; KNL_64: # %bb.0: 163; KNL_64-NEXT: kmovw %esi, %k1 164; KNL_64-NEXT: kmovw %k1, %k2 165; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2} 166; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2 167; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1} 168; KNL_64-NEXT: vpaddd %zmm2, %zmm1, %zmm0 169; KNL_64-NEXT: retq 170; 171; KNL_32-LABEL: test4: 172; KNL_32: # %bb.0: 173; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 174; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 175; KNL_32-NEXT: kmovw %k1, %k2 176; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2} 177; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2 178; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1} 179; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 180; KNL_32-NEXT: retl 181; 182; SKX-LABEL: test4: 183; SKX: # %bb.0: 184; SKX-NEXT: kmovw %esi, %k1 185; SKX-NEXT: kmovw %k1, %k2 186; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2} 187; SKX-NEXT: vmovdqa64 %zmm1, %zmm2 188; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1} 189; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm0 190; SKX-NEXT: retq 191; 192; SKX_32-LABEL: test4: 193; SKX_32: # %bb.0: 194; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 195; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 196; SKX_32-NEXT: kmovw %k1, %k2 197; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2} 198; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2 199; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1} 200; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 201; SKX_32-NEXT: retl 202 203 %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 204 %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 205 206 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 207 %imask = bitcast i16 %mask to <16 x i1> 208 %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef) 209 %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) 210 %res = add <16 x i32> %gt1, %gt2 211 ret <16 x i32> %res 212} 213 214 215; SCALAR-LABEL: test5 216; SCALAR: and i16 %scalar_mask, 1 217; SCALAR-NEXT: icmp ne i16 %{{.*}}, 0 218; SCALAR-NEXT: br i1 %{{.*}}, label %cond.store, label %else 219; SCALAR: cond.store: 220; SCALAR-NEXT: %Elt0 = extractelement <16 x i32> %val, i64 0 221; SCALAR-NEXT: %Ptr0 = extractelement <16 x i32*> %gep.random, i64 0 222; SCALAR-NEXT: store i32 %Elt0, i32* %Ptr0, align 4 223; SCALAR-NEXT: br label %else 224; SCALAR: else: 225; SCALAR-NEXT: and i16 %scalar_mask, 2 226; SCALAR-NEXT: icmp ne i16 %{{.*}}, 0 227; SCALAR-NEXT: br i1 %{{.*}}, label %cond.store1, label %else2 228 229define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { 230; KNL_64-LABEL: test5: 231; KNL_64: # %bb.0: 232; KNL_64-NEXT: kmovw %esi, %k1 233; KNL_64-NEXT: kmovw %k1, %k2 234; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2} 235; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} 236; KNL_64-NEXT: vzeroupper 237; KNL_64-NEXT: retq 238; 239; KNL_32-LABEL: test5: 240; KNL_32: # %bb.0: 241; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 242; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 243; KNL_32-NEXT: kmovw %k1, %k2 244; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2} 245; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} 246; KNL_32-NEXT: vzeroupper 247; KNL_32-NEXT: retl 248; 249; SKX-LABEL: test5: 250; SKX: # %bb.0: 251; SKX-NEXT: kmovw %esi, %k1 252; SKX-NEXT: kmovw %k1, %k2 253; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2} 254; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} 255; SKX-NEXT: vzeroupper 256; SKX-NEXT: retq 257; 258; SKX_32-LABEL: test5: 259; SKX_32: # %bb.0: 260; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 261; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 262; SKX_32-NEXT: kmovw %k1, %k2 263; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2} 264; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} 265; SKX_32-NEXT: vzeroupper 266; SKX_32-NEXT: retl 267 268 %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0 269 %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer 270 271 %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind 272 %imask = bitcast i16 %mask to <16 x i1> 273 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 274 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) 275 ret void 276} 277 278declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> ) 279declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> ) 280 281 282; SCALAR-LABEL: test6 283; SCALAR: store i32 %Elt0, i32* %Ptr01, align 4 284; SCALAR-NEXT: %Elt1 = extractelement <8 x i32> %a1, i64 1 285; SCALAR-NEXT: %Ptr12 = extractelement <8 x i32*> %ptr, i64 1 286; SCALAR-NEXT: store i32 %Elt1, i32* %Ptr12, align 4 287; SCALAR-NEXT: %Elt2 = extractelement <8 x i32> %a1, i64 2 288; SCALAR-NEXT: %Ptr23 = extractelement <8 x i32*> %ptr, i64 2 289; SCALAR-NEXT: store i32 %Elt2, i32* %Ptr23, align 4 290 291define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) { 292; KNL_64-LABEL: test6: 293; KNL_64: # %bb.0: 294; KNL_64-NEXT: kxnorw %k0, %k0, %k1 295; KNL_64-NEXT: kxnorw %k0, %k0, %k2 296; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} 297; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} 298; KNL_64-NEXT: vmovdqa %ymm2, %ymm0 299; KNL_64-NEXT: retq 300; 301; KNL_32-LABEL: test6: 302; KNL_32: # %bb.0: 303; KNL_32-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 304; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 305; KNL_32-NEXT: movw $255, %ax 306; KNL_32-NEXT: kmovw %eax, %k1 307; KNL_32-NEXT: kmovw %k1, %k2 308; KNL_32-NEXT: vpgatherdd (,%zmm1), %zmm2 {%k2} 309; KNL_32-NEXT: vpscatterdd %zmm0, (,%zmm1) {%k1} 310; KNL_32-NEXT: vmovdqa %ymm2, %ymm0 311; KNL_32-NEXT: retl 312; 313; SKX-LABEL: test6: 314; SKX: # %bb.0: 315; SKX-NEXT: kxnorw %k0, %k0, %k1 316; SKX-NEXT: kxnorw %k0, %k0, %k2 317; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} 318; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} 319; SKX-NEXT: vmovdqa %ymm2, %ymm0 320; SKX-NEXT: retq 321; 322; SKX_32-LABEL: test6: 323; SKX_32: # %bb.0: 324; SKX_32-NEXT: kxnorw %k0, %k0, %k1 325; SKX_32-NEXT: kxnorw %k0, %k0, %k2 326; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2} 327; SKX_32-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1} 328; SKX_32-NEXT: vmovdqa %ymm2, %ymm0 329; SKX_32-NEXT: retl 330 331 %a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 332 333 call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 334 ret <8 x i32>%a 335} 336 337define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) { 338; 339; KNL_64-LABEL: test7: 340; KNL_64: # %bb.0: 341; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 342; KNL_64-NEXT: kmovw %esi, %k0 343; KNL_64-NEXT: kshiftlw $8, %k0, %k0 344; KNL_64-NEXT: kshiftrw $8, %k0, %k1 345; KNL_64-NEXT: kmovw %k1, %k2 346; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2} 347; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm2 348; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1} 349; KNL_64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 350; KNL_64-NEXT: retq 351; 352; KNL_32-LABEL: test7: 353; KNL_32: # %bb.0: 354; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 355; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 356; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 357; KNL_32-NEXT: kmovw %ecx, %k0 358; KNL_32-NEXT: kshiftlw $8, %k0, %k0 359; KNL_32-NEXT: kshiftrw $8, %k0, %k1 360; KNL_32-NEXT: kmovw %k1, %k2 361; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2} 362; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2 363; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1} 364; KNL_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0 365; KNL_32-NEXT: retl 366; 367; SKX-LABEL: test7: 368; SKX: # %bb.0: 369; SKX-NEXT: kmovw %esi, %k1 370; SKX-NEXT: kmovw %k1, %k2 371; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2} 372; SKX-NEXT: vmovdqa %ymm1, %ymm2 373; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1} 374; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0 375; SKX-NEXT: retq 376; 377; SKX_32-LABEL: test7: 378; SKX_32: # %bb.0: 379; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 380; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1 381; SKX_32-NEXT: kmovw %k1, %k2 382; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2} 383; SKX_32-NEXT: vmovdqa %ymm1, %ymm2 384; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1} 385; SKX_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0 386; SKX_32-NEXT: retl 387 388 %broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0 389 %broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer 390 391 %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind 392 %imask = bitcast i8 %mask to <8 x i1> 393 %gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef) 394 %gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1) 395 %res = add <8 x i32> %gt1, %gt2 396 ret <8 x i32> %res 397} 398 399; No uniform base in this case, index <8 x i64> contains addresses, 400; each gather call will be split into two 401define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) { 402; KNL_64-LABEL: test8: 403; KNL_64: # %bb.0: 404; KNL_64-NEXT: kmovw %edi, %k1 405; KNL_64-NEXT: kshiftrw $8, %k1, %k2 406; KNL_64-NEXT: kmovw %k2, %k3 407; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3} 408; KNL_64-NEXT: kmovw %k1, %k3 409; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3} 410; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm4 411; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} 412; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} 413; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0 414; KNL_64-NEXT: vpaddd %zmm0, %zmm4, %zmm0 415; KNL_64-NEXT: retq 416; 417; KNL_32-LABEL: test8: 418; KNL_32: # %bb.0: 419; KNL_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 420; KNL_32-NEXT: kmovw %k1, %k2 421; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2} 422; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm2 423; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} 424; KNL_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 425; KNL_32-NEXT: retl 426; 427; SKX-LABEL: test8: 428; SKX: # %bb.0: 429; SKX-NEXT: kmovw %edi, %k1 430; SKX-NEXT: kshiftrw $8, %k1, %k2 431; SKX-NEXT: kmovw %k2, %k3 432; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3} 433; SKX-NEXT: kmovw %k1, %k3 434; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3} 435; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm4 436; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} 437; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} 438; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0 439; SKX-NEXT: vpaddd %zmm0, %zmm4, %zmm0 440; SKX-NEXT: retq 441; 442; SKX_32-LABEL: test8: 443; SKX_32: # %bb.0: 444; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 445; SKX_32-NEXT: kmovw %k1, %k2 446; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k2} 447; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2 448; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} 449; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0 450; SKX_32-NEXT: retl 451 452 %imask = bitcast i16 %mask to <16 x i1> 453 %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef) 454 %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1) 455 %res = add <16 x i32> %gt1, %gt2 456 ret <16 x i32> %res 457} 458 459%struct.RT = type { i8, [10 x [20 x i32]], i8 } 460%struct.ST = type { i32, double, %struct.RT } 461 462; Masked gather for aggregate types 463; Test9 and Test10 should give the same result (scalar and vector indices in GEP) 464 465 466define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) { 467; KNL_64-LABEL: test9: 468; KNL_64: # %bb.0: # %entry 469; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2 470; KNL_64-NEXT: vpbroadcastq {{.*#+}} zmm3 = [824,824,824,824,824,824,824,824] 471; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4 472; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0 473; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0 474; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0 475; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 476; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero 477; KNL_64-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 478; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 479; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 480; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 481; KNL_64-NEXT: kxnorw %k0, %k0, %k1 482; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} 483; KNL_64-NEXT: retq 484; 485; KNL_32-LABEL: test9: 486; KNL_32: # %bb.0: # %entry 487; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2 488; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [80,80,80,80,80,80,80,80] 489; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1 490; KNL_32-NEXT: vpmovqd %zmm0, %ymm0 491; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820] 492; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0 493; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 494; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68] 495; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 496; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm1 497; KNL_32-NEXT: movw $255, %ax 498; KNL_32-NEXT: kmovw %eax, %k1 499; KNL_32-NEXT: vpgatherdd (,%zmm1), %zmm0 {%k1} 500; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 501; KNL_32-NEXT: retl 502; 503; SKX_SMALL-LABEL: test9: 504; SKX_SMALL: # %bb.0: # %entry 505; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 506; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 507; SKX_SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero 508; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 509; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 510; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 511; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 512; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 513; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} 514; SKX_SMALL-NEXT: retq 515; 516; SKX_LARGE-LABEL: test9: 517; SKX_LARGE: # %bb.0: # %entry 518; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 519; SKX_LARGE-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero 520; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 521; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 522; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 523; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 524; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 525; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0 526; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 527; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1 528; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 529; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} 530; SKX_LARGE-NEXT: retq 531; 532; SKX_32-LABEL: test9: 533; SKX_32: # %bb.0: # %entry 534; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1 535; SKX_32-NEXT: vpmovqd %zmm0, %ymm0 536; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 537; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0 538; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 539; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1 540; SKX_32-NEXT: kxnorw %k0, %k0, %k1 541; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1} 542; SKX_32-NEXT: retl 543entry: 544 %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0 545 %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer 546 547 %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13> 548 %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 549 ret <8 x i32> %res 550} 551 552define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) { 553; KNL_64-LABEL: test10: 554; KNL_64: # %bb.0: # %entry 555; KNL_64-NEXT: vpbroadcastq %rdi, %zmm2 556; KNL_64-NEXT: vpbroadcastq {{.*#+}} zmm3 = [824,824,824,824,824,824,824,824] 557; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm4 558; KNL_64-NEXT: vpsrlq $32, %zmm0, %zmm0 559; KNL_64-NEXT: vpmuludq %zmm3, %zmm0, %zmm0 560; KNL_64-NEXT: vpsllq $32, %zmm0, %zmm0 561; KNL_64-NEXT: vpaddq %zmm2, %zmm0, %zmm0 562; KNL_64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero 563; KNL_64-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 564; KNL_64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 565; KNL_64-NEXT: vpaddq %zmm0, %zmm4, %zmm0 566; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 567; KNL_64-NEXT: kxnorw %k0, %k0, %k1 568; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} 569; KNL_64-NEXT: retq 570; 571; KNL_32-LABEL: test10: 572; KNL_32: # %bb.0: # %entry 573; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2 574; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [80,80,80,80,80,80,80,80] 575; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1 576; KNL_32-NEXT: vpmovqd %zmm0, %ymm0 577; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820] 578; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0 579; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 580; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68] 581; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 582; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm1 583; KNL_32-NEXT: movw $255, %ax 584; KNL_32-NEXT: kmovw %eax, %k1 585; KNL_32-NEXT: vpgatherdd (,%zmm1), %zmm0 {%k1} 586; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 587; KNL_32-NEXT: retl 588; 589; SKX_SMALL-LABEL: test10: 590; SKX_SMALL: # %bb.0: # %entry 591; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2 592; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0 593; SKX_SMALL-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero 594; SKX_SMALL-NEXT: vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1 595; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0 596; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0 597; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 598; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 599; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} 600; SKX_SMALL-NEXT: retq 601; 602; SKX_LARGE-LABEL: test10: 603; SKX_LARGE: # %bb.0: # %entry 604; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2 605; SKX_LARGE-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero 606; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 607; SKX_LARGE-NEXT: vpmuldq (%rax){1to8}, %zmm1, %zmm1 608; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 609; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0 610; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0 611; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0 612; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 613; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1 614; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 615; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1} 616; SKX_LARGE-NEXT: retq 617; 618; SKX_32-LABEL: test10: 619; SKX_32: # %bb.0: # %entry 620; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1 621; SKX_32-NEXT: vpmovqd %zmm0, %ymm0 622; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 623; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0 624; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 625; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1 626; SKX_32-NEXT: kxnorw %k0, %k0, %k1 627; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1} 628; SKX_32-NEXT: retl 629entry: 630 %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0 631 %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer 632 633 %arrayidx = getelementptr %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13 634 %res = call <8 x i32 > @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 635 ret <8 x i32> %res 636} 637 638; Splat index in GEP, requires broadcast 639define <16 x float> @test11(float* %base, i32 %ind) { 640; KNL_64-LABEL: test11: 641; KNL_64: # %bb.0: 642; KNL_64-NEXT: movslq %esi, %rax 643; KNL_64-NEXT: leaq (%rdi,%rax,4), %rax 644; KNL_64-NEXT: vxorps %xmm1, %xmm1, %xmm1 645; KNL_64-NEXT: kxnorw %k0, %k0, %k1 646; KNL_64-NEXT: vgatherdps (%rax,%zmm1,4), %zmm0 {%k1} 647; KNL_64-NEXT: retq 648; 649; KNL_32-LABEL: test11: 650; KNL_32: # %bb.0: 651; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 652; KNL_32-NEXT: shll $2, %eax 653; KNL_32-NEXT: addl {{[0-9]+}}(%esp), %eax 654; KNL_32-NEXT: vxorps %xmm1, %xmm1, %xmm1 655; KNL_32-NEXT: kxnorw %k0, %k0, %k1 656; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 657; KNL_32-NEXT: retl 658; 659; SKX-LABEL: test11: 660; SKX: # %bb.0: 661; SKX-NEXT: movslq %esi, %rax 662; SKX-NEXT: leaq (%rdi,%rax,4), %rax 663; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 664; SKX-NEXT: kxnorw %k0, %k0, %k1 665; SKX-NEXT: vgatherdps (%rax,%zmm1,4), %zmm0 {%k1} 666; SKX-NEXT: retq 667; 668; SKX_32-LABEL: test11: 669; SKX_32: # %bb.0: 670; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 671; SKX_32-NEXT: shll $2, %eax 672; SKX_32-NEXT: addl {{[0-9]+}}(%esp), %eax 673; SKX_32-NEXT: vxorps %xmm1, %xmm1, %xmm1 674; SKX_32-NEXT: kxnorw %k0, %k0, %k1 675; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 676; SKX_32-NEXT: retl 677 678 %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 679 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 680 681 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind 682 683 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 684 ret <16 x float>%res 685} 686 687; We are checking the uniform base here. It is taken directly from input to vgatherdps 688define <16 x float> @test12(float* %base, <16 x i32> %ind) { 689; KNL_64-LABEL: test12: 690; KNL_64: # %bb.0: 691; KNL_64-NEXT: kxnorw %k0, %k0, %k1 692; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 693; KNL_64-NEXT: vmovaps %zmm1, %zmm0 694; KNL_64-NEXT: retq 695; 696; KNL_32-LABEL: test12: 697; KNL_32: # %bb.0: 698; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 699; KNL_32-NEXT: kxnorw %k0, %k0, %k1 700; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 701; KNL_32-NEXT: vmovaps %zmm1, %zmm0 702; KNL_32-NEXT: retl 703; 704; SKX-LABEL: test12: 705; SKX: # %bb.0: 706; SKX-NEXT: kxnorw %k0, %k0, %k1 707; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 708; SKX-NEXT: vmovaps %zmm1, %zmm0 709; SKX-NEXT: retq 710; 711; SKX_32-LABEL: test12: 712; SKX_32: # %bb.0: 713; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 714; SKX_32-NEXT: kxnorw %k0, %k0, %k1 715; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 716; SKX_32-NEXT: vmovaps %zmm1, %zmm0 717; SKX_32-NEXT: retl 718 719 %sext_ind = sext <16 x i32> %ind to <16 x i64> 720 %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind 721 722 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 723 ret <16 x float>%res 724} 725 726; The same as the previous, but the mask is undefined 727define <16 x float> @test13(float* %base, <16 x i32> %ind) { 728; KNL_64-LABEL: test13: 729; KNL_64: # %bb.0: 730; KNL_64-NEXT: kxnorw %k0, %k0, %k1 731; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 732; KNL_64-NEXT: vmovaps %zmm1, %zmm0 733; KNL_64-NEXT: retq 734; 735; KNL_32-LABEL: test13: 736; KNL_32: # %bb.0: 737; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 738; KNL_32-NEXT: kxnorw %k0, %k0, %k1 739; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 740; KNL_32-NEXT: vmovaps %zmm1, %zmm0 741; KNL_32-NEXT: retl 742; 743; SKX-LABEL: test13: 744; SKX: # %bb.0: 745; SKX-NEXT: kxnorw %k0, %k0, %k1 746; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 747; SKX-NEXT: vmovaps %zmm1, %zmm0 748; SKX-NEXT: retq 749; 750; SKX_32-LABEL: test13: 751; SKX_32: # %bb.0: 752; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 753; SKX_32-NEXT: kxnorw %k0, %k0, %k1 754; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 755; SKX_32-NEXT: vmovaps %zmm1, %zmm0 756; SKX_32-NEXT: retl 757 758 %sext_ind = sext <16 x i32> %ind to <16 x i64> 759 %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind 760 761 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 762 ret <16 x float>%res 763} 764 765; The base pointer is not splat, can't find unform base 766define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) { 767; KNL_64-LABEL: test14: 768; KNL_64: # %bb.0: 769; KNL_64-NEXT: vmovq %xmm0, %rax 770; KNL_64-NEXT: vmovd %esi, %xmm0 771; KNL_64-NEXT: vpbroadcastd %xmm0, %ymm0 772; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm0 773; KNL_64-NEXT: vpsllq $2, %zmm0, %zmm0 774; KNL_64-NEXT: kxnorw %k0, %k0, %k1 775; KNL_64-NEXT: vgatherqps (%rax,%zmm0), %ymm1 {%k1} 776; KNL_64-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0 777; KNL_64-NEXT: retq 778; 779; KNL_32-LABEL: test14: 780; KNL_32: # %bb.0: 781; KNL_32-NEXT: vmovd %xmm0, %eax 782; KNL_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1 783; KNL_32-NEXT: kxnorw %k0, %k0, %k1 784; KNL_32-NEXT: vgatherdps (%eax,%zmm1), %zmm0 {%k1} 785; KNL_32-NEXT: retl 786; 787; SKX-LABEL: test14: 788; SKX: # %bb.0: 789; SKX-NEXT: vmovq %xmm0, %rax 790; SKX-NEXT: vpbroadcastd %esi, %ymm0 791; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 792; SKX-NEXT: vpsllq $2, %zmm0, %zmm0 793; SKX-NEXT: kxnorw %k0, %k0, %k1 794; SKX-NEXT: vgatherqps (%rax,%zmm0), %ymm1 {%k1} 795; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm1, %zmm0 796; SKX-NEXT: retq 797; 798; SKX_32-LABEL: test14: 799; SKX_32: # %bb.0: 800; SKX_32-NEXT: vmovd %xmm0, %eax 801; SKX_32-NEXT: vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1 802; SKX_32-NEXT: kxnorw %k0, %k0, %k1 803; SKX_32-NEXT: vgatherdps (%eax,%zmm1), %zmm0 {%k1} 804; SKX_32-NEXT: retl 805 806 %broadcast.splatinsert = insertelement <16 x float*> %vec, float* %base, i32 1 807 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 808 809 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind 810 811 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 812 ret <16 x float>%res 813} 814 815declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>) 816declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>) 817declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>) 818 819; Gather smaller than existing instruction 820define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { 821; KNL_64-LABEL: test15: 822; KNL_64: # %bb.0: 823; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 824; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 825; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 826; KNL_64-NEXT: kshiftlw $12, %k0, %k0 827; KNL_64-NEXT: kshiftrw $12, %k0, %k1 828; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 829; KNL_64-NEXT: vmovaps %xmm1, %xmm0 830; KNL_64-NEXT: vzeroupper 831; KNL_64-NEXT: retq 832; 833; KNL_32-LABEL: test15: 834; KNL_32: # %bb.0: 835; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 836; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 837; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 838; KNL_32-NEXT: kshiftlw $12, %k0, %k0 839; KNL_32-NEXT: kshiftrw $12, %k0, %k1 840; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 841; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 842; KNL_32-NEXT: vmovaps %xmm1, %xmm0 843; KNL_32-NEXT: vzeroupper 844; KNL_32-NEXT: retl 845; 846; SKX-LABEL: test15: 847; SKX: # %bb.0: 848; SKX-NEXT: vpslld $31, %xmm1, %xmm1 849; SKX-NEXT: vpmovd2m %xmm1, %k1 850; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1} 851; SKX-NEXT: vmovaps %xmm1, %xmm0 852; SKX-NEXT: retq 853; 854; SKX_32-LABEL: test15: 855; SKX_32: # %bb.0: 856; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 857; SKX_32-NEXT: vpmovd2m %xmm1, %k1 858; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 859; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1} 860; SKX_32-NEXT: vmovaps %xmm1, %xmm0 861; SKX_32-NEXT: retl 862 863 %sext_ind = sext <4 x i32> %ind to <4 x i64> 864 %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind 865 %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef) 866 ret <4 x float>%res 867} 868 869; Gather smaller than existing instruction 870define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x double> %src0) { 871; KNL_64-LABEL: test16: 872; KNL_64: # %bb.0: 873; KNL_64-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 874; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 875; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 876; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 877; KNL_64-NEXT: kshiftlw $12, %k0, %k0 878; KNL_64-NEXT: kshiftrw $12, %k0, %k1 879; KNL_64-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k1} 880; KNL_64-NEXT: vmovapd %ymm2, %ymm0 881; KNL_64-NEXT: retq 882; 883; KNL_32-LABEL: test16: 884; KNL_32: # %bb.0: 885; KNL_32-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 886; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 887; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 888; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 889; KNL_32-NEXT: kshiftlw $12, %k0, %k0 890; KNL_32-NEXT: kshiftrw $12, %k0, %k1 891; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 892; KNL_32-NEXT: vgatherdpd (%eax,%ymm0,8), %zmm2 {%k1} 893; KNL_32-NEXT: vmovapd %ymm2, %ymm0 894; KNL_32-NEXT: retl 895; 896; SKX-LABEL: test16: 897; SKX: # %bb.0: 898; SKX-NEXT: vpslld $31, %xmm1, %xmm1 899; SKX-NEXT: vpmovd2m %xmm1, %k1 900; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1} 901; SKX-NEXT: vmovapd %ymm2, %ymm0 902; SKX-NEXT: retq 903; 904; SKX_32-LABEL: test16: 905; SKX_32: # %bb.0: 906; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 907; SKX_32-NEXT: vpmovd2m %xmm1, %k1 908; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 909; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1} 910; SKX_32-NEXT: vmovapd %ymm2, %ymm0 911; SKX_32-NEXT: retl 912 913 %sext_ind = sext <4 x i32> %ind to <4 x i64> 914 %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind 915 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0) 916 ret <4 x double>%res 917} 918 919define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x double> %src0) { 920; KNL_64-LABEL: test17: 921; KNL_64: # %bb.0: 922; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 923; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 924; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 925; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 926; KNL_64-NEXT: kshiftlw $14, %k0, %k0 927; KNL_64-NEXT: kshiftrw $14, %k0, %k1 928; KNL_64-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k1} 929; KNL_64-NEXT: vmovapd %xmm2, %xmm0 930; KNL_64-NEXT: vzeroupper 931; KNL_64-NEXT: retq 932; 933; KNL_32-LABEL: test17: 934; KNL_32: # %bb.0: 935; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 936; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 937; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 938; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 939; KNL_32-NEXT: kshiftlw $14, %k0, %k0 940; KNL_32-NEXT: kshiftrw $14, %k0, %k1 941; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 942; KNL_32-NEXT: vgatherdpd (%eax,%ymm0,8), %zmm2 {%k1} 943; KNL_32-NEXT: vmovapd %xmm2, %xmm0 944; KNL_32-NEXT: vzeroupper 945; KNL_32-NEXT: retl 946; 947; SKX-LABEL: test17: 948; SKX: # %bb.0: 949; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 950; SKX-NEXT: vpmovq2m %xmm1, %k1 951; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %xmm2 {%k1} 952; SKX-NEXT: vmovapd %xmm2, %xmm0 953; SKX-NEXT: retq 954; 955; SKX_32-LABEL: test17: 956; SKX_32: # %bb.0: 957; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 958; SKX_32-NEXT: vpmovq2m %xmm1, %k1 959; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 960; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %xmm2 {%k1} 961; SKX_32-NEXT: vmovapd %xmm2, %xmm0 962; SKX_32-NEXT: retl 963 964 %sext_ind = sext <2 x i32> %ind to <2 x i64> 965 %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind 966 %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0) 967 ret <2 x double>%res 968} 969 970declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> ) 971declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> ) 972declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> ) 973declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> ) 974declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> ) 975 976define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { 977; KNL_64-LABEL: test18: 978; KNL_64: # %bb.0: 979; KNL_64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 980; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 981; KNL_64-NEXT: vpslld $31, %xmm2, %xmm2 982; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k0 983; KNL_64-NEXT: kshiftlw $12, %k0, %k0 984; KNL_64-NEXT: kshiftrw $12, %k0, %k1 985; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} 986; KNL_64-NEXT: vzeroupper 987; KNL_64-NEXT: retq 988; 989; KNL_32-LABEL: test18: 990; KNL_32: # %bb.0: 991; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 992; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 993; KNL_32-NEXT: vpslld $31, %xmm2, %xmm2 994; KNL_32-NEXT: vptestmd %zmm2, %zmm2, %k0 995; KNL_32-NEXT: kshiftlw $12, %k0, %k0 996; KNL_32-NEXT: kshiftrw $12, %k0, %k1 997; KNL_32-NEXT: vpscatterdd %zmm0, (,%zmm1) {%k1} 998; KNL_32-NEXT: vzeroupper 999; KNL_32-NEXT: retl 1000; 1001; SKX-LABEL: test18: 1002; SKX: # %bb.0: 1003; SKX-NEXT: vpslld $31, %xmm2, %xmm2 1004; SKX-NEXT: vpmovd2m %xmm2, %k1 1005; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} 1006; SKX-NEXT: vzeroupper 1007; SKX-NEXT: retq 1008; 1009; SKX_32-LABEL: test18: 1010; SKX_32: # %bb.0: 1011; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2 1012; SKX_32-NEXT: vpmovd2m %xmm2, %k1 1013; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1} 1014; SKX_32-NEXT: retl 1015 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) 1016 ret void 1017} 1018 1019define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind) { 1020; KNL_64-LABEL: test19: 1021; KNL_64: # %bb.0: 1022; KNL_64-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1023; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1024; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 1025; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 1026; KNL_64-NEXT: kshiftlw $12, %k0, %k0 1027; KNL_64-NEXT: kshiftrw $12, %k0, %k1 1028; KNL_64-NEXT: vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1} 1029; KNL_64-NEXT: vzeroupper 1030; KNL_64-NEXT: retq 1031; 1032; KNL_32-LABEL: test19: 1033; KNL_32: # %bb.0: 1034; KNL_32-NEXT: # kill: def $ymm2 killed $ymm2 def $zmm2 1035; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1036; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 1037; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 1038; KNL_32-NEXT: kshiftlw $12, %k0, %k0 1039; KNL_32-NEXT: kshiftrw $12, %k0, %k1 1040; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1041; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1} 1042; KNL_32-NEXT: vzeroupper 1043; KNL_32-NEXT: retl 1044; 1045; SKX-LABEL: test19: 1046; SKX: # %bb.0: 1047; SKX-NEXT: vpslld $31, %xmm1, %xmm1 1048; SKX-NEXT: vpmovd2m %xmm1, %k1 1049; SKX-NEXT: vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1} 1050; SKX-NEXT: vzeroupper 1051; SKX-NEXT: retq 1052; 1053; SKX_32-LABEL: test19: 1054; SKX_32: # %bb.0: 1055; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 1056; SKX_32-NEXT: vpmovd2m %xmm1, %k1 1057; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1058; SKX_32-NEXT: vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1} 1059; SKX_32-NEXT: vzeroupper 1060; SKX_32-NEXT: retl 1061 %gep = getelementptr double, double* %ptr, <4 x i64> %ind 1062 call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask) 1063 ret void 1064} 1065 1066; Data type requires widening 1067define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) { 1068; KNL_64-LABEL: test20: 1069; KNL_64: # %bb.0: 1070; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1071; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1072; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 1073; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 1074; KNL_64-NEXT: kshiftlw $14, %k0, %k0 1075; KNL_64-NEXT: kshiftrw $14, %k0, %k1 1076; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k1} 1077; KNL_64-NEXT: vzeroupper 1078; KNL_64-NEXT: retq 1079; 1080; KNL_32-LABEL: test20: 1081; KNL_32: # %bb.0: 1082; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1083; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1084; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 1085; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 1086; KNL_32-NEXT: kshiftlw $14, %k0, %k0 1087; KNL_32-NEXT: kshiftrw $14, %k0, %k1 1088; KNL_32-NEXT: vscatterdps %zmm0, (,%zmm1) {%k1} 1089; KNL_32-NEXT: vzeroupper 1090; KNL_32-NEXT: retl 1091; 1092; SKX-LABEL: test20: 1093; SKX: # %bb.0: 1094; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 1095; SKX-NEXT: vpmovq2m %xmm2, %k1 1096; SKX-NEXT: vscatterqps %xmm0, (,%xmm1) {%k1} 1097; SKX-NEXT: retq 1098; 1099; SKX_32-LABEL: test20: 1100; SKX_32: # %bb.0: 1101; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 1102; SKX_32-NEXT: vpmovq2m %xmm2, %k1 1103; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1} 1104; SKX_32-NEXT: retl 1105 call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask) 1106 ret void 1107} 1108 1109; Data type requires promotion 1110define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { 1111; KNL_64-LABEL: test21: 1112; KNL_64: # %bb.0: 1113; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1114; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1115; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 1116; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 1117; KNL_64-NEXT: kshiftlw $14, %k0, %k0 1118; KNL_64-NEXT: kshiftrw $14, %k0, %k1 1119; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} 1120; KNL_64-NEXT: vzeroupper 1121; KNL_64-NEXT: retq 1122; 1123; KNL_32-LABEL: test21: 1124; KNL_32: # %bb.0: 1125; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1126; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1127; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 1128; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 1129; KNL_32-NEXT: kshiftlw $14, %k0, %k0 1130; KNL_32-NEXT: kshiftrw $14, %k0, %k1 1131; KNL_32-NEXT: vpscatterdd %zmm0, (,%zmm1) {%k1} 1132; KNL_32-NEXT: vzeroupper 1133; KNL_32-NEXT: retl 1134; 1135; SKX-LABEL: test21: 1136; SKX: # %bb.0: 1137; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 1138; SKX-NEXT: vpmovq2m %xmm2, %k1 1139; SKX-NEXT: vpscatterqd %xmm0, (,%xmm1) {%k1} 1140; SKX-NEXT: retq 1141; 1142; SKX_32-LABEL: test21: 1143; SKX_32: # %bb.0: 1144; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 1145; SKX_32-NEXT: vpmovq2m %xmm2, %k1 1146; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1} 1147; SKX_32-NEXT: retl 1148 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask) 1149 ret void 1150} 1151 1152; The result type requires widening 1153declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>) 1154 1155define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) { 1156; KNL_64-LABEL: test22: 1157; KNL_64: # %bb.0: 1158; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1159; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1160; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 1161; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 1162; KNL_64-NEXT: kshiftlw $14, %k0, %k0 1163; KNL_64-NEXT: kshiftrw $14, %k0, %k1 1164; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1} 1165; KNL_64-NEXT: vmovaps %xmm2, %xmm0 1166; KNL_64-NEXT: vzeroupper 1167; KNL_64-NEXT: retq 1168; 1169; KNL_32-LABEL: test22: 1170; KNL_32: # %bb.0: 1171; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1172; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1173; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 1174; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 1175; KNL_32-NEXT: kshiftlw $14, %k0, %k0 1176; KNL_32-NEXT: kshiftrw $14, %k0, %k1 1177; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1178; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm2 {%k1} 1179; KNL_32-NEXT: vmovaps %xmm2, %xmm0 1180; KNL_32-NEXT: vzeroupper 1181; KNL_32-NEXT: retl 1182; 1183; SKX-LABEL: test22: 1184; SKX: # %bb.0: 1185; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 1186; SKX-NEXT: vpmovq2m %xmm1, %k1 1187; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1} 1188; SKX-NEXT: vmovaps %xmm2, %xmm0 1189; SKX-NEXT: retq 1190; 1191; SKX_32-LABEL: test22: 1192; SKX_32: # %bb.0: 1193; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 1194; SKX_32-NEXT: vpmovq2m %xmm1, %k1 1195; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1196; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1} 1197; SKX_32-NEXT: vmovaps %xmm2, %xmm0 1198; SKX_32-NEXT: retl 1199 %sext_ind = sext <2 x i32> %ind to <2 x i64> 1200 %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind 1201 %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) 1202 ret <2 x float>%res 1203} 1204 1205define <2 x float> @test22a(float* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x float> %src0) { 1206; KNL_64-LABEL: test22a: 1207; KNL_64: # %bb.0: 1208; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 1209; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1210; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 1211; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 1212; KNL_64-NEXT: kshiftlw $14, %k0, %k0 1213; KNL_64-NEXT: kshiftrw $14, %k0, %k1 1214; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1} 1215; KNL_64-NEXT: vmovaps %xmm2, %xmm0 1216; KNL_64-NEXT: vzeroupper 1217; KNL_64-NEXT: retq 1218; 1219; KNL_32-LABEL: test22a: 1220; KNL_32: # %bb.0: 1221; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 1222; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1223; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 1224; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 1225; KNL_32-NEXT: kshiftlw $14, %k0, %k0 1226; KNL_32-NEXT: kshiftrw $14, %k0, %k1 1227; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1228; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm2 {%k1} 1229; KNL_32-NEXT: vmovaps %xmm2, %xmm0 1230; KNL_32-NEXT: vzeroupper 1231; KNL_32-NEXT: retl 1232; 1233; SKX-LABEL: test22a: 1234; SKX: # %bb.0: 1235; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 1236; SKX-NEXT: vpmovq2m %xmm1, %k1 1237; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1} 1238; SKX-NEXT: vmovaps %xmm2, %xmm0 1239; SKX-NEXT: retq 1240; 1241; SKX_32-LABEL: test22a: 1242; SKX_32: # %bb.0: 1243; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 1244; SKX_32-NEXT: vpmovq2m %xmm1, %k1 1245; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1246; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1} 1247; SKX_32-NEXT: vmovaps %xmm2, %xmm0 1248; SKX_32-NEXT: retl 1249 %gep.random = getelementptr float, float* %base, <2 x i64> %ind 1250 %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) 1251 ret <2 x float>%res 1252} 1253 1254declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>) 1255declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>) 1256 1257define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) { 1258; KNL_64-LABEL: test23: 1259; KNL_64: # %bb.0: 1260; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1261; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1262; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 1263; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 1264; KNL_64-NEXT: kshiftlw $14, %k0, %k0 1265; KNL_64-NEXT: kshiftrw $14, %k0, %k1 1266; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1} 1267; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 1268; KNL_64-NEXT: vzeroupper 1269; KNL_64-NEXT: retq 1270; 1271; KNL_32-LABEL: test23: 1272; KNL_32: # %bb.0: 1273; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1274; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1275; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 1276; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 1277; KNL_32-NEXT: kshiftlw $14, %k0, %k0 1278; KNL_32-NEXT: kshiftrw $14, %k0, %k1 1279; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1280; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1} 1281; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 1282; KNL_32-NEXT: vzeroupper 1283; KNL_32-NEXT: retl 1284; 1285; SKX-LABEL: test23: 1286; SKX: # %bb.0: 1287; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 1288; SKX-NEXT: vpmovq2m %xmm1, %k1 1289; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm2 {%k1} 1290; SKX-NEXT: vmovdqa %xmm2, %xmm0 1291; SKX-NEXT: retq 1292; 1293; SKX_32-LABEL: test23: 1294; SKX_32: # %bb.0: 1295; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 1296; SKX_32-NEXT: vpmovq2m %xmm1, %k1 1297; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1298; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm2 {%k1} 1299; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 1300; SKX_32-NEXT: retl 1301 %sext_ind = sext <2 x i32> %ind to <2 x i64> 1302 %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind 1303 %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0) 1304 ret <2 x i32>%res 1305} 1306 1307define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> %src0) { 1308; KNL_64-LABEL: test23b: 1309; KNL_64: # %bb.0: 1310; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 1311; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1312; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 1313; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 1314; KNL_64-NEXT: kshiftlw $14, %k0, %k0 1315; KNL_64-NEXT: kshiftrw $14, %k0, %k1 1316; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} 1317; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 1318; KNL_64-NEXT: vzeroupper 1319; KNL_64-NEXT: retq 1320; 1321; KNL_32-LABEL: test23b: 1322; KNL_32: # %bb.0: 1323; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 1324; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1325; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 1326; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 1327; KNL_32-NEXT: kshiftlw $14, %k0, %k0 1328; KNL_32-NEXT: kshiftrw $14, %k0, %k1 1329; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1330; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} 1331; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 1332; KNL_32-NEXT: vzeroupper 1333; KNL_32-NEXT: retl 1334; 1335; SKX-LABEL: test23b: 1336; SKX: # %bb.0: 1337; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 1338; SKX-NEXT: vpmovq2m %xmm1, %k1 1339; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm2 {%k1} 1340; SKX-NEXT: vmovdqa %xmm2, %xmm0 1341; SKX-NEXT: retq 1342; 1343; SKX_32-LABEL: test23b: 1344; SKX_32: # %bb.0: 1345; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 1346; SKX_32-NEXT: vpmovq2m %xmm1, %k1 1347; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1348; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm2 {%k1} 1349; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 1350; SKX_32-NEXT: retl 1351 %gep.random = getelementptr i32, i32* %base, <2 x i64> %ind 1352 %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0) 1353 ret <2 x i32>%res 1354} 1355 1356define <2 x i32> @test24(i32* %base, <2 x i32> %ind) { 1357; KNL_64-LABEL: test24: 1358; KNL_64: # %bb.0: 1359; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1360; KNL_64-NEXT: movw $3, %ax 1361; KNL_64-NEXT: kmovw %eax, %k1 1362; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} 1363; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 1364; KNL_64-NEXT: vzeroupper 1365; KNL_64-NEXT: retq 1366; 1367; KNL_32-LABEL: test24: 1368; KNL_32: # %bb.0: 1369; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1370; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1371; KNL_32-NEXT: movw $3, %cx 1372; KNL_32-NEXT: kmovw %ecx, %k1 1373; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} 1374; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 1375; KNL_32-NEXT: vzeroupper 1376; KNL_32-NEXT: retl 1377; 1378; SKX-LABEL: test24: 1379; SKX: # %bb.0: 1380; SKX-NEXT: movb $3, %al 1381; SKX-NEXT: kmovw %eax, %k1 1382; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1} 1383; SKX-NEXT: vmovdqa %xmm1, %xmm0 1384; SKX-NEXT: retq 1385; 1386; SKX_32-LABEL: test24: 1387; SKX_32: # %bb.0: 1388; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1389; SKX_32-NEXT: movb $3, %cl 1390; SKX_32-NEXT: kmovw %ecx, %k1 1391; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1} 1392; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 1393; SKX_32-NEXT: retl 1394 %sext_ind = sext <2 x i32> %ind to <2 x i64> 1395 %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind 1396 %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) 1397 ret <2 x i32>%res 1398} 1399 1400define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %src0) { 1401; KNL_64-LABEL: test25: 1402; KNL_64: # %bb.0: 1403; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1404; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1405; KNL_64-NEXT: vpsllq $63, %xmm1, %xmm1 1406; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k0 1407; KNL_64-NEXT: kshiftlw $14, %k0, %k0 1408; KNL_64-NEXT: kshiftrw $14, %k0, %k1 1409; KNL_64-NEXT: vpgatherdq (%rdi,%ymm0,8), %zmm2 {%k1} 1410; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 1411; KNL_64-NEXT: vzeroupper 1412; KNL_64-NEXT: retq 1413; 1414; KNL_32-LABEL: test25: 1415; KNL_32: # %bb.0: 1416; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1417; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1418; KNL_32-NEXT: vpsllq $63, %xmm1, %xmm1 1419; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k0 1420; KNL_32-NEXT: kshiftlw $14, %k0, %k0 1421; KNL_32-NEXT: kshiftrw $14, %k0, %k1 1422; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1423; KNL_32-NEXT: vpgatherdq (%eax,%ymm0,8), %zmm2 {%k1} 1424; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 1425; KNL_32-NEXT: vzeroupper 1426; KNL_32-NEXT: retl 1427; 1428; SKX-LABEL: test25: 1429; SKX: # %bb.0: 1430; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 1431; SKX-NEXT: vpmovq2m %xmm1, %k1 1432; SKX-NEXT: vpgatherdq (%rdi,%xmm0,8), %xmm2 {%k1} 1433; SKX-NEXT: vmovdqa %xmm2, %xmm0 1434; SKX-NEXT: retq 1435; 1436; SKX_32-LABEL: test25: 1437; SKX_32: # %bb.0: 1438; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 1439; SKX_32-NEXT: vpmovq2m %xmm1, %k1 1440; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1441; SKX_32-NEXT: vpgatherdq (%eax,%xmm0,8), %xmm2 {%k1} 1442; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 1443; SKX_32-NEXT: retl 1444 %sext_ind = sext <2 x i32> %ind to <2 x i64> 1445 %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind 1446 %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0) 1447 ret <2 x i64>%res 1448} 1449 1450define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) { 1451; KNL_64-LABEL: test26: 1452; KNL_64: # %bb.0: 1453; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1454; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1455; KNL_64-NEXT: movb $3, %al 1456; KNL_64-NEXT: kmovw %eax, %k1 1457; KNL_64-NEXT: vpgatherdq (%rdi,%ymm0,8), %zmm1 {%k1} 1458; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 1459; KNL_64-NEXT: vzeroupper 1460; KNL_64-NEXT: retq 1461; 1462; KNL_32-LABEL: test26: 1463; KNL_32: # %bb.0: 1464; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1465; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1466; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1467; KNL_32-NEXT: movb $3, %cl 1468; KNL_32-NEXT: kmovw %ecx, %k1 1469; KNL_32-NEXT: vpgatherdq (%eax,%ymm0,8), %zmm1 {%k1} 1470; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 1471; KNL_32-NEXT: vzeroupper 1472; KNL_32-NEXT: retl 1473; 1474; SKX-LABEL: test26: 1475; SKX: # %bb.0: 1476; SKX-NEXT: kxnorw %k0, %k0, %k1 1477; SKX-NEXT: vpgatherdq (%rdi,%xmm0,8), %xmm1 {%k1} 1478; SKX-NEXT: vmovdqa %xmm1, %xmm0 1479; SKX-NEXT: retq 1480; 1481; SKX_32-LABEL: test26: 1482; SKX_32: # %bb.0: 1483; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1484; SKX_32-NEXT: kxnorw %k0, %k0, %k1 1485; SKX_32-NEXT: vpgatherdq (%eax,%xmm0,8), %xmm1 {%k1} 1486; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 1487; SKX_32-NEXT: retl 1488 %sext_ind = sext <2 x i32> %ind to <2 x i64> 1489 %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind 1490 %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0) 1491 ret <2 x i64>%res 1492} 1493 1494; Result type requires widening; all-ones mask 1495define <2 x float> @test27(float* %base, <2 x i32> %ind) { 1496; KNL_64-LABEL: test27: 1497; KNL_64: # %bb.0: 1498; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1499; KNL_64-NEXT: movw $3, %ax 1500; KNL_64-NEXT: kmovw %eax, %k1 1501; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 1502; KNL_64-NEXT: vmovaps %xmm1, %xmm0 1503; KNL_64-NEXT: vzeroupper 1504; KNL_64-NEXT: retq 1505; 1506; KNL_32-LABEL: test27: 1507; KNL_32: # %bb.0: 1508; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1509; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1510; KNL_32-NEXT: movw $3, %cx 1511; KNL_32-NEXT: kmovw %ecx, %k1 1512; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 1513; KNL_32-NEXT: vmovaps %xmm1, %xmm0 1514; KNL_32-NEXT: vzeroupper 1515; KNL_32-NEXT: retl 1516; 1517; SKX-LABEL: test27: 1518; SKX: # %bb.0: 1519; SKX-NEXT: movb $3, %al 1520; SKX-NEXT: kmovw %eax, %k1 1521; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1} 1522; SKX-NEXT: vmovaps %xmm1, %xmm0 1523; SKX-NEXT: retq 1524; 1525; SKX_32-LABEL: test27: 1526; SKX_32: # %bb.0: 1527; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1528; SKX_32-NEXT: movb $3, %cl 1529; SKX_32-NEXT: kmovw %ecx, %k1 1530; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1} 1531; SKX_32-NEXT: vmovaps %xmm1, %xmm0 1532; SKX_32-NEXT: retl 1533 %sext_ind = sext <2 x i32> %ind to <2 x i64> 1534 %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind 1535 %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef) 1536 ret <2 x float>%res 1537} 1538 1539; Data type requires promotion, mask is all-ones 1540define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) { 1541; KNL_64-LABEL: test28: 1542; KNL_64: # %bb.0: 1543; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1544; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1545; KNL_64-NEXT: movb $3, %al 1546; KNL_64-NEXT: kmovw %eax, %k1 1547; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1} 1548; KNL_64-NEXT: vzeroupper 1549; KNL_64-NEXT: retq 1550; 1551; KNL_32-LABEL: test28: 1552; KNL_32: # %bb.0: 1553; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1554; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1555; KNL_32-NEXT: movw $3, %ax 1556; KNL_32-NEXT: kmovw %eax, %k1 1557; KNL_32-NEXT: vpscatterdd %zmm0, (,%zmm1) {%k1} 1558; KNL_32-NEXT: vzeroupper 1559; KNL_32-NEXT: retl 1560; 1561; SKX-LABEL: test28: 1562; SKX: # %bb.0: 1563; SKX-NEXT: kxnorw %k0, %k0, %k1 1564; SKX-NEXT: vpscatterqd %xmm0, (,%xmm1) {%k1} 1565; SKX-NEXT: retq 1566; 1567; SKX_32-LABEL: test28: 1568; SKX_32: # %bb.0: 1569; SKX_32-NEXT: movb $3, %al 1570; SKX_32-NEXT: kmovw %eax, %k1 1571; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1} 1572; SKX_32-NEXT: retl 1573 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>) 1574 ret void 1575} 1576 1577; SCALAR-LABEL: test29 1578; SCALAR: extractelement <16 x float*> 1579; SCALAR-NEXT: load float 1580; SCALAR-NEXT: insertelement <16 x float> 1581; SCALAR-NEXT: extractelement <16 x float*> 1582; SCALAR-NEXT: load float 1583 1584define <16 x float> @test29(float* %base, <16 x i32> %ind) { 1585; KNL_64-LABEL: test29: 1586; KNL_64: # %bb.0: 1587; KNL_64-NEXT: movw $44, %ax 1588; KNL_64-NEXT: kmovw %eax, %k1 1589; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 1590; KNL_64-NEXT: vmovaps %zmm1, %zmm0 1591; KNL_64-NEXT: retq 1592; 1593; KNL_32-LABEL: test29: 1594; KNL_32: # %bb.0: 1595; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1596; KNL_32-NEXT: movw $44, %cx 1597; KNL_32-NEXT: kmovw %ecx, %k1 1598; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 1599; KNL_32-NEXT: vmovaps %zmm1, %zmm0 1600; KNL_32-NEXT: retl 1601; 1602; SKX-LABEL: test29: 1603; SKX: # %bb.0: 1604; SKX-NEXT: movw $44, %ax 1605; SKX-NEXT: kmovw %eax, %k1 1606; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 1607; SKX-NEXT: vmovaps %zmm1, %zmm0 1608; SKX-NEXT: retq 1609; 1610; SKX_32-LABEL: test29: 1611; SKX_32: # %bb.0: 1612; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 1613; SKX_32-NEXT: movw $44, %cx 1614; SKX_32-NEXT: kmovw %ecx, %k1 1615; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 1616; SKX_32-NEXT: vmovaps %zmm1, %zmm0 1617; SKX_32-NEXT: retl 1618 1619 %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 1620 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 1621 1622 %sext_ind = sext <16 x i32> %ind to <16 x i64> 1623 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 1624 1625 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef) 1626 ret <16 x float>%res 1627} 1628 1629declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>) 1630define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { 1631; KNL_64-LABEL: test30: 1632; KNL_64: # %bb.0: 1633; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 1634; KNL_64-NEXT: movw $-3, %ax 1635; KNL_64-NEXT: kmovw %eax, %k0 1636; KNL_64-NEXT: andl $1, %edi 1637; KNL_64-NEXT: kmovw %edi, %k1 1638; KNL_64-NEXT: kandw %k0, %k1, %k0 1639; KNL_64-NEXT: kmovw %esi, %k1 1640; KNL_64-NEXT: kshiftlw $15, %k1, %k1 1641; KNL_64-NEXT: kshiftrw $14, %k1, %k1 1642; KNL_64-NEXT: korw %k1, %k0, %k0 1643; KNL_64-NEXT: movw $-5, %ax 1644; KNL_64-NEXT: kmovw %eax, %k1 1645; KNL_64-NEXT: kandw %k1, %k0, %k0 1646; KNL_64-NEXT: kmovw %edx, %k1 1647; KNL_64-NEXT: kshiftlw $15, %k1, %k1 1648; KNL_64-NEXT: kshiftrw $13, %k1, %k1 1649; KNL_64-NEXT: korw %k1, %k0, %k0 1650; KNL_64-NEXT: kshiftlw $12, %k0, %k0 1651; KNL_64-NEXT: kshiftrw $12, %k0, %k1 1652; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 1653; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 1654; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1655; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm2 {%k1} 1656; KNL_64-NEXT: vmovdqa %xmm2, %xmm0 1657; KNL_64-NEXT: vzeroupper 1658; KNL_64-NEXT: retq 1659; 1660; KNL_32-LABEL: test30: 1661; KNL_32: # %bb.0: 1662; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1663; KNL_32-NEXT: movw $-3, %ax 1664; KNL_32-NEXT: kmovw %eax, %k0 1665; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al 1666; KNL_32-NEXT: andl $1, %eax 1667; KNL_32-NEXT: kmovw %eax, %k1 1668; KNL_32-NEXT: kandw %k0, %k1, %k0 1669; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al 1670; KNL_32-NEXT: kmovw %eax, %k1 1671; KNL_32-NEXT: kshiftlw $15, %k1, %k1 1672; KNL_32-NEXT: kshiftrw $14, %k1, %k1 1673; KNL_32-NEXT: korw %k1, %k0, %k0 1674; KNL_32-NEXT: movw $-5, %ax 1675; KNL_32-NEXT: kmovw %eax, %k1 1676; KNL_32-NEXT: kandw %k1, %k0, %k0 1677; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al 1678; KNL_32-NEXT: kmovw %eax, %k1 1679; KNL_32-NEXT: kshiftlw $15, %k1, %k1 1680; KNL_32-NEXT: kshiftrw $13, %k1, %k1 1681; KNL_32-NEXT: korw %k1, %k0, %k0 1682; KNL_32-NEXT: kshiftlw $12, %k0, %k0 1683; KNL_32-NEXT: kshiftrw $12, %k0, %k1 1684; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 1685; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1686; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} 1687; KNL_32-NEXT: vmovdqa %xmm2, %xmm0 1688; KNL_32-NEXT: vzeroupper 1689; KNL_32-NEXT: retl 1690; 1691; SKX-LABEL: test30: 1692; SKX: # %bb.0: 1693; SKX-NEXT: movb $-3, %al 1694; SKX-NEXT: kmovw %eax, %k0 1695; SKX-NEXT: kmovw %edi, %k1 1696; SKX-NEXT: kshiftlb $7, %k1, %k1 1697; SKX-NEXT: kshiftrb $7, %k1, %k1 1698; SKX-NEXT: kandw %k0, %k1, %k0 1699; SKX-NEXT: kmovw %esi, %k1 1700; SKX-NEXT: kshiftlb $7, %k1, %k1 1701; SKX-NEXT: kshiftrb $6, %k1, %k1 1702; SKX-NEXT: korw %k1, %k0, %k0 1703; SKX-NEXT: movb $-5, %al 1704; SKX-NEXT: kmovw %eax, %k1 1705; SKX-NEXT: kandw %k1, %k0, %k0 1706; SKX-NEXT: kmovw %edx, %k1 1707; SKX-NEXT: kshiftlb $7, %k1, %k1 1708; SKX-NEXT: kshiftrb $5, %k1, %k1 1709; SKX-NEXT: korw %k1, %k0, %k1 1710; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 1711; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 1712; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1713; SKX-NEXT: vpgatherqd (,%ymm0), %xmm2 {%k1} 1714; SKX-NEXT: vmovdqa %xmm2, %xmm0 1715; SKX-NEXT: vzeroupper 1716; SKX-NEXT: retq 1717; 1718; SKX_32-LABEL: test30: 1719; SKX_32: # %bb.0: 1720; SKX_32-NEXT: movb $-3, %al 1721; SKX_32-NEXT: kmovw %eax, %k0 1722; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al 1723; SKX_32-NEXT: kmovw %eax, %k1 1724; SKX_32-NEXT: kshiftlb $7, %k1, %k1 1725; SKX_32-NEXT: kshiftrb $7, %k1, %k1 1726; SKX_32-NEXT: kandw %k0, %k1, %k0 1727; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al 1728; SKX_32-NEXT: kmovw %eax, %k1 1729; SKX_32-NEXT: kshiftlb $7, %k1, %k1 1730; SKX_32-NEXT: kshiftrb $6, %k1, %k1 1731; SKX_32-NEXT: korw %k1, %k0, %k0 1732; SKX_32-NEXT: movb $-5, %al 1733; SKX_32-NEXT: kmovw %eax, %k1 1734; SKX_32-NEXT: kandw %k1, %k0, %k0 1735; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al 1736; SKX_32-NEXT: kmovw %eax, %k1 1737; SKX_32-NEXT: kshiftlb $7, %k1, %k1 1738; SKX_32-NEXT: kshiftrb $5, %k1, %k1 1739; SKX_32-NEXT: korw %k1, %k0, %k1 1740; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 1741; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1742; SKX_32-NEXT: vpgatherdd (,%xmm0), %xmm2 {%k1} 1743; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 1744; SKX_32-NEXT: retl 1745 1746 %sext_ind = sext <3 x i32> %ind to <3 x i64> 1747 %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind 1748 %res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0) 1749 ret <3 x i32>%res 1750} 1751 1752; Non-power of 2 scatter 1753declare void @llvm.masked.scatter.v3i32.v3p0i32(<3 x i32>, <3 x i32*>, i32, <3 x i1>) 1754define void @test30b(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) { 1755; KNL_64-LABEL: test30b: 1756; KNL_64: # %bb.0: 1757; KNL_64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2 1758; KNL_64-NEXT: movw $-3, %ax 1759; KNL_64-NEXT: kmovw %eax, %k0 1760; KNL_64-NEXT: andl $1, %edi 1761; KNL_64-NEXT: kmovw %edi, %k1 1762; KNL_64-NEXT: kandw %k0, %k1, %k0 1763; KNL_64-NEXT: kmovw %esi, %k1 1764; KNL_64-NEXT: kshiftlw $15, %k1, %k1 1765; KNL_64-NEXT: kshiftrw $14, %k1, %k1 1766; KNL_64-NEXT: korw %k1, %k0, %k0 1767; KNL_64-NEXT: movw $-5, %ax 1768; KNL_64-NEXT: kmovw %eax, %k1 1769; KNL_64-NEXT: kandw %k1, %k0, %k0 1770; KNL_64-NEXT: kmovw %edx, %k1 1771; KNL_64-NEXT: kshiftlw $15, %k1, %k1 1772; KNL_64-NEXT: kshiftrw $13, %k1, %k1 1773; KNL_64-NEXT: korw %k1, %k0, %k0 1774; KNL_64-NEXT: kshiftlw $12, %k0, %k0 1775; KNL_64-NEXT: kshiftrw $12, %k0, %k1 1776; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1 1777; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1 1778; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1779; KNL_64-NEXT: vpscatterqd %ymm2, (,%zmm0) {%k1} 1780; KNL_64-NEXT: vzeroupper 1781; KNL_64-NEXT: retq 1782; 1783; KNL_32-LABEL: test30b: 1784; KNL_32: # %bb.0: 1785; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 1786; KNL_32-NEXT: movw $-3, %ax 1787; KNL_32-NEXT: kmovw %eax, %k0 1788; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al 1789; KNL_32-NEXT: andl $1, %eax 1790; KNL_32-NEXT: kmovw %eax, %k1 1791; KNL_32-NEXT: kandw %k0, %k1, %k0 1792; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al 1793; KNL_32-NEXT: kmovw %eax, %k1 1794; KNL_32-NEXT: kshiftlw $15, %k1, %k1 1795; KNL_32-NEXT: kshiftrw $14, %k1, %k1 1796; KNL_32-NEXT: korw %k1, %k0, %k0 1797; KNL_32-NEXT: movw $-5, %ax 1798; KNL_32-NEXT: kmovw %eax, %k1 1799; KNL_32-NEXT: kandw %k1, %k0, %k0 1800; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al 1801; KNL_32-NEXT: kmovw %eax, %k1 1802; KNL_32-NEXT: kshiftlw $15, %k1, %k1 1803; KNL_32-NEXT: kshiftrw $13, %k1, %k1 1804; KNL_32-NEXT: korw %k1, %k0, %k0 1805; KNL_32-NEXT: kshiftlw $12, %k0, %k0 1806; KNL_32-NEXT: kshiftrw $12, %k0, %k1 1807; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1 1808; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1809; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} 1810; KNL_32-NEXT: vzeroupper 1811; KNL_32-NEXT: retl 1812; 1813; SKX-LABEL: test30b: 1814; SKX: # %bb.0: 1815; SKX-NEXT: movb $-3, %al 1816; SKX-NEXT: kmovw %eax, %k0 1817; SKX-NEXT: kmovw %edi, %k1 1818; SKX-NEXT: kshiftlb $7, %k1, %k1 1819; SKX-NEXT: kshiftrb $7, %k1, %k1 1820; SKX-NEXT: kandw %k0, %k1, %k0 1821; SKX-NEXT: kmovw %esi, %k1 1822; SKX-NEXT: kshiftlb $7, %k1, %k1 1823; SKX-NEXT: kshiftrb $6, %k1, %k1 1824; SKX-NEXT: korw %k1, %k0, %k0 1825; SKX-NEXT: movb $-5, %al 1826; SKX-NEXT: kmovw %eax, %k1 1827; SKX-NEXT: kandw %k1, %k0, %k0 1828; SKX-NEXT: kmovw %edx, %k1 1829; SKX-NEXT: kshiftlb $7, %k1, %k1 1830; SKX-NEXT: kshiftrb $5, %k1, %k1 1831; SKX-NEXT: korw %k1, %k0, %k1 1832; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 1833; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 1834; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1835; SKX-NEXT: vpscatterqd %xmm2, (,%ymm0) {%k1} 1836; SKX-NEXT: vzeroupper 1837; SKX-NEXT: retq 1838; 1839; SKX_32-LABEL: test30b: 1840; SKX_32: # %bb.0: 1841; SKX_32-NEXT: movb $-3, %al 1842; SKX_32-NEXT: kmovw %eax, %k0 1843; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al 1844; SKX_32-NEXT: kmovw %eax, %k1 1845; SKX_32-NEXT: kshiftlb $7, %k1, %k1 1846; SKX_32-NEXT: kshiftrb $7, %k1, %k1 1847; SKX_32-NEXT: kandw %k0, %k1, %k0 1848; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al 1849; SKX_32-NEXT: kmovw %eax, %k1 1850; SKX_32-NEXT: kshiftlb $7, %k1, %k1 1851; SKX_32-NEXT: kshiftrb $6, %k1, %k1 1852; SKX_32-NEXT: korw %k1, %k0, %k0 1853; SKX_32-NEXT: movb $-5, %al 1854; SKX_32-NEXT: kmovw %eax, %k1 1855; SKX_32-NEXT: kandw %k1, %k0, %k0 1856; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al 1857; SKX_32-NEXT: kmovw %eax, %k1 1858; SKX_32-NEXT: kshiftlb $7, %k1, %k1 1859; SKX_32-NEXT: kshiftrb $5, %k1, %k1 1860; SKX_32-NEXT: korw %k1, %k0, %k1 1861; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 1862; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1863; SKX_32-NEXT: vpscatterdd %xmm2, (,%xmm0) {%k1} 1864; SKX_32-NEXT: retl 1865 %sext_ind = sext <3 x i32> %ind to <3 x i64> 1866 %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind 1867 call void @llvm.masked.scatter.v3i32.v3p0i32(<3 x i32> %src0, <3 x i32*> %gep.random, i32 4, <3 x i1> %mask) 1868 ret void 1869} 1870 1871declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>) 1872define <16 x float*> @test31(<16 x float**> %ptrs) { 1873; KNL_64-LABEL: test31: 1874; KNL_64: # %bb.0: 1875; KNL_64-NEXT: kxnorw %k0, %k0, %k1 1876; KNL_64-NEXT: kxnorw %k0, %k0, %k2 1877; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} 1878; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} 1879; KNL_64-NEXT: vmovdqa64 %zmm2, %zmm0 1880; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm1 1881; KNL_64-NEXT: retq 1882; 1883; KNL_32-LABEL: test31: 1884; KNL_32: # %bb.0: 1885; KNL_32-NEXT: kxnorw %k0, %k0, %k1 1886; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} 1887; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0 1888; KNL_32-NEXT: retl 1889; 1890; SKX-LABEL: test31: 1891; SKX: # %bb.0: 1892; SKX-NEXT: kxnorw %k0, %k0, %k1 1893; SKX-NEXT: kxnorw %k0, %k0, %k2 1894; SKX-NEXT: vpgatherqq (,%zmm0), %zmm2 {%k2} 1895; SKX-NEXT: vpgatherqq (,%zmm1), %zmm3 {%k1} 1896; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 1897; SKX-NEXT: vmovdqa64 %zmm3, %zmm1 1898; SKX-NEXT: retq 1899; 1900; SKX_32-LABEL: test31: 1901; SKX_32: # %bb.0: 1902; SKX_32-NEXT: kxnorw %k0, %k0, %k1 1903; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm1 {%k1} 1904; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0 1905; SKX_32-NEXT: retl 1906 1907 %res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef) 1908 ret <16 x float*>%res 1909} 1910 1911define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { 1912; KNL_64-LABEL: test_gather_16i32: 1913; KNL_64: # %bb.0: 1914; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 1915; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 1916; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 1917; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm2 1918; KNL_64-NEXT: kshiftrw $8, %k1, %k2 1919; KNL_64-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} 1920; KNL_64-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} 1921; KNL_64-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0 1922; KNL_64-NEXT: retq 1923; 1924; KNL_32-LABEL: test_gather_16i32: 1925; KNL_32: # %bb.0: 1926; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 1927; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 1928; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 1929; KNL_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} 1930; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 1931; KNL_32-NEXT: retl 1932; 1933; SKX-LABEL: test_gather_16i32: 1934; SKX: # %bb.0: 1935; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 1936; SKX-NEXT: vpslld $31, %zmm2, %zmm2 1937; SKX-NEXT: vpmovd2m %zmm2, %k1 1938; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm2 1939; SKX-NEXT: kshiftrw $8, %k1, %k2 1940; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} 1941; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1} 1942; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0 1943; SKX-NEXT: retq 1944; 1945; SKX_32-LABEL: test_gather_16i32: 1946; SKX_32: # %bb.0: 1947; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 1948; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 1949; SKX_32-NEXT: vpmovd2m %zmm1, %k1 1950; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} 1951; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 1952; SKX_32-NEXT: retl 1953 %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0) 1954 ret <16 x i32> %res 1955} 1956define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { 1957; KNL_64-LABEL: test_gather_16i64: 1958; KNL_64: # %bb.0: 1959; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 1960; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 1961; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 1962; KNL_64-NEXT: kshiftrw $8, %k1, %k2 1963; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1} 1964; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2} 1965; KNL_64-NEXT: vmovdqa64 %zmm3, %zmm0 1966; KNL_64-NEXT: vmovdqa64 %zmm4, %zmm1 1967; KNL_64-NEXT: retq 1968; 1969; KNL_32-LABEL: test_gather_16i64: 1970; KNL_32: # %bb.0: 1971; KNL_32-NEXT: pushl %ebp 1972; KNL_32-NEXT: .cfi_def_cfa_offset 8 1973; KNL_32-NEXT: .cfi_offset %ebp, -8 1974; KNL_32-NEXT: movl %esp, %ebp 1975; KNL_32-NEXT: .cfi_def_cfa_register %ebp 1976; KNL_32-NEXT: andl $-64, %esp 1977; KNL_32-NEXT: subl $64, %esp 1978; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 1979; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 1980; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 1981; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 1982; KNL_32-NEXT: kshiftrw $8, %k1, %k2 1983; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} 1984; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1985; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} 1986; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0 1987; KNL_32-NEXT: movl %ebp, %esp 1988; KNL_32-NEXT: popl %ebp 1989; KNL_32-NEXT: .cfi_def_cfa %esp, 4 1990; KNL_32-NEXT: retl 1991; 1992; SKX-LABEL: test_gather_16i64: 1993; SKX: # %bb.0: 1994; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 1995; SKX-NEXT: vpslld $31, %zmm2, %zmm2 1996; SKX-NEXT: vpmovd2m %zmm2, %k1 1997; SKX-NEXT: kshiftrw $8, %k1, %k2 1998; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1} 1999; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2} 2000; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 2001; SKX-NEXT: vmovdqa64 %zmm4, %zmm1 2002; SKX-NEXT: retq 2003; 2004; SKX_32-LABEL: test_gather_16i64: 2005; SKX_32: # %bb.0: 2006; SKX_32-NEXT: pushl %ebp 2007; SKX_32-NEXT: .cfi_def_cfa_offset 8 2008; SKX_32-NEXT: .cfi_offset %ebp, -8 2009; SKX_32-NEXT: movl %esp, %ebp 2010; SKX_32-NEXT: .cfi_def_cfa_register %ebp 2011; SKX_32-NEXT: andl $-64, %esp 2012; SKX_32-NEXT: subl $64, %esp 2013; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 2014; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 2015; SKX_32-NEXT: vpmovd2m %zmm1, %k1 2016; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 2017; SKX_32-NEXT: kshiftrw $8, %k1, %k2 2018; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} 2019; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 2020; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2} 2021; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 2022; SKX_32-NEXT: movl %ebp, %esp 2023; SKX_32-NEXT: popl %ebp 2024; SKX_32-NEXT: .cfi_def_cfa %esp, 4 2025; SKX_32-NEXT: retl 2026 %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0) 2027 ret <16 x i64> %res 2028} 2029declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0) 2030define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { 2031; KNL_64-LABEL: test_gather_16f32: 2032; KNL_64: # %bb.0: 2033; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 2034; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 2035; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 2036; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm2 2037; KNL_64-NEXT: kshiftrw $8, %k1, %k2 2038; KNL_64-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2} 2039; KNL_64-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1} 2040; KNL_64-NEXT: vinsertf64x4 $1, %ymm2, %zmm3, %zmm0 2041; KNL_64-NEXT: retq 2042; 2043; KNL_32-LABEL: test_gather_16f32: 2044; KNL_32: # %bb.0: 2045; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 2046; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 2047; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 2048; KNL_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} 2049; KNL_32-NEXT: vmovaps %zmm2, %zmm0 2050; KNL_32-NEXT: retl 2051; 2052; SKX-LABEL: test_gather_16f32: 2053; SKX: # %bb.0: 2054; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 2055; SKX-NEXT: vpslld $31, %zmm2, %zmm2 2056; SKX-NEXT: vpmovd2m %zmm2, %k1 2057; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm2 2058; SKX-NEXT: kshiftrw $8, %k1, %k2 2059; SKX-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2} 2060; SKX-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1} 2061; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm3, %zmm0 2062; SKX-NEXT: retq 2063; 2064; SKX_32-LABEL: test_gather_16f32: 2065; SKX_32: # %bb.0: 2066; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 2067; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 2068; SKX_32-NEXT: vpmovd2m %zmm1, %k1 2069; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} 2070; SKX_32-NEXT: vmovaps %zmm2, %zmm0 2071; SKX_32-NEXT: retl 2072 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0) 2073 ret <16 x float> %res 2074} 2075define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { 2076; KNL_64-LABEL: test_gather_16f64: 2077; KNL_64: # %bb.0: 2078; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 2079; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 2080; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 2081; KNL_64-NEXT: kshiftrw $8, %k1, %k2 2082; KNL_64-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1} 2083; KNL_64-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} 2084; KNL_64-NEXT: vmovapd %zmm3, %zmm0 2085; KNL_64-NEXT: vmovapd %zmm4, %zmm1 2086; KNL_64-NEXT: retq 2087; 2088; KNL_32-LABEL: test_gather_16f64: 2089; KNL_32: # %bb.0: 2090; KNL_32-NEXT: pushl %ebp 2091; KNL_32-NEXT: .cfi_def_cfa_offset 8 2092; KNL_32-NEXT: .cfi_offset %ebp, -8 2093; KNL_32-NEXT: movl %esp, %ebp 2094; KNL_32-NEXT: .cfi_def_cfa_register %ebp 2095; KNL_32-NEXT: andl $-64, %esp 2096; KNL_32-NEXT: subl $64, %esp 2097; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 2098; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 2099; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 2100; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 2101; KNL_32-NEXT: kshiftrw $8, %k1, %k2 2102; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} 2103; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2104; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} 2105; KNL_32-NEXT: vmovapd %zmm2, %zmm0 2106; KNL_32-NEXT: movl %ebp, %esp 2107; KNL_32-NEXT: popl %ebp 2108; KNL_32-NEXT: .cfi_def_cfa %esp, 4 2109; KNL_32-NEXT: retl 2110; 2111; SKX-LABEL: test_gather_16f64: 2112; SKX: # %bb.0: 2113; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 2114; SKX-NEXT: vpslld $31, %zmm2, %zmm2 2115; SKX-NEXT: vpmovd2m %zmm2, %k1 2116; SKX-NEXT: kshiftrw $8, %k1, %k2 2117; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1} 2118; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} 2119; SKX-NEXT: vmovapd %zmm3, %zmm0 2120; SKX-NEXT: vmovapd %zmm4, %zmm1 2121; SKX-NEXT: retq 2122; 2123; SKX_32-LABEL: test_gather_16f64: 2124; SKX_32: # %bb.0: 2125; SKX_32-NEXT: pushl %ebp 2126; SKX_32-NEXT: .cfi_def_cfa_offset 8 2127; SKX_32-NEXT: .cfi_offset %ebp, -8 2128; SKX_32-NEXT: movl %esp, %ebp 2129; SKX_32-NEXT: .cfi_def_cfa_register %ebp 2130; SKX_32-NEXT: andl $-64, %esp 2131; SKX_32-NEXT: subl $64, %esp 2132; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 2133; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 2134; SKX_32-NEXT: vpmovd2m %zmm1, %k1 2135; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 2136; SKX_32-NEXT: kshiftrw $8, %k1, %k2 2137; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} 2138; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2139; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} 2140; SKX_32-NEXT: vmovapd %zmm2, %zmm0 2141; SKX_32-NEXT: movl %ebp, %esp 2142; SKX_32-NEXT: popl %ebp 2143; SKX_32-NEXT: .cfi_def_cfa %esp, 4 2144; SKX_32-NEXT: retl 2145 %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0) 2146 ret <16 x double> %res 2147} 2148declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0) 2149define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0) { 2150; KNL_64-LABEL: test_scatter_16i32: 2151; KNL_64: # %bb.0: 2152; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 2153; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 2154; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 2155; KNL_64-NEXT: kshiftrw $8, %k1, %k2 2156; KNL_64-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1} 2157; KNL_64-NEXT: vextracti64x4 $1, %zmm3, %ymm0 2158; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2} 2159; KNL_64-NEXT: vzeroupper 2160; KNL_64-NEXT: retq 2161; 2162; KNL_32-LABEL: test_scatter_16i32: 2163; KNL_32: # %bb.0: 2164; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 2165; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 2166; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 2167; KNL_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} 2168; KNL_32-NEXT: vzeroupper 2169; KNL_32-NEXT: retl 2170; 2171; SKX-LABEL: test_scatter_16i32: 2172; SKX: # %bb.0: 2173; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 2174; SKX-NEXT: vpslld $31, %zmm2, %zmm2 2175; SKX-NEXT: vpmovd2m %zmm2, %k1 2176; SKX-NEXT: kshiftrw $8, %k1, %k2 2177; SKX-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1} 2178; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm0 2179; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2} 2180; SKX-NEXT: vzeroupper 2181; SKX-NEXT: retq 2182; 2183; SKX_32-LABEL: test_scatter_16i32: 2184; SKX_32: # %bb.0: 2185; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 2186; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 2187; SKX_32-NEXT: vpmovd2m %zmm1, %k1 2188; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} 2189; SKX_32-NEXT: vzeroupper 2190; SKX_32-NEXT: retl 2191 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask) 2192 ret void 2193} 2194define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0) { 2195; KNL_64-LABEL: test_scatter_16i64: 2196; KNL_64: # %bb.0: 2197; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 2198; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 2199; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 2200; KNL_64-NEXT: kshiftrw $8, %k1, %k2 2201; KNL_64-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} 2202; KNL_64-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} 2203; KNL_64-NEXT: vzeroupper 2204; KNL_64-NEXT: retq 2205; 2206; KNL_32-LABEL: test_scatter_16i64: 2207; KNL_32: # %bb.0: 2208; KNL_32-NEXT: pushl %ebp 2209; KNL_32-NEXT: .cfi_def_cfa_offset 8 2210; KNL_32-NEXT: .cfi_offset %ebp, -8 2211; KNL_32-NEXT: movl %esp, %ebp 2212; KNL_32-NEXT: .cfi_def_cfa_register %ebp 2213; KNL_32-NEXT: andl $-64, %esp 2214; KNL_32-NEXT: subl $64, %esp 2215; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 2216; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 2217; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 2218; KNL_32-NEXT: vmovdqa64 8(%ebp), %zmm1 2219; KNL_32-NEXT: kshiftrw $8, %k1, %k2 2220; KNL_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} 2221; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 2222; KNL_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} 2223; KNL_32-NEXT: movl %ebp, %esp 2224; KNL_32-NEXT: popl %ebp 2225; KNL_32-NEXT: .cfi_def_cfa %esp, 4 2226; KNL_32-NEXT: vzeroupper 2227; KNL_32-NEXT: retl 2228; 2229; SKX-LABEL: test_scatter_16i64: 2230; SKX: # %bb.0: 2231; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 2232; SKX-NEXT: vpslld $31, %zmm2, %zmm2 2233; SKX-NEXT: vpmovd2m %zmm2, %k1 2234; SKX-NEXT: kshiftrw $8, %k1, %k2 2235; SKX-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} 2236; SKX-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} 2237; SKX-NEXT: vzeroupper 2238; SKX-NEXT: retq 2239; 2240; SKX_32-LABEL: test_scatter_16i64: 2241; SKX_32: # %bb.0: 2242; SKX_32-NEXT: pushl %ebp 2243; SKX_32-NEXT: .cfi_def_cfa_offset 8 2244; SKX_32-NEXT: .cfi_offset %ebp, -8 2245; SKX_32-NEXT: movl %esp, %ebp 2246; SKX_32-NEXT: .cfi_def_cfa_register %ebp 2247; SKX_32-NEXT: andl $-64, %esp 2248; SKX_32-NEXT: subl $64, %esp 2249; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 2250; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 2251; SKX_32-NEXT: vpmovd2m %zmm1, %k1 2252; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 2253; SKX_32-NEXT: kshiftrw $8, %k1, %k2 2254; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} 2255; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 2256; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2} 2257; SKX_32-NEXT: movl %ebp, %esp 2258; SKX_32-NEXT: popl %ebp 2259; SKX_32-NEXT: .cfi_def_cfa %esp, 4 2260; SKX_32-NEXT: vzeroupper 2261; SKX_32-NEXT: retl 2262 call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask) 2263 ret void 2264} 2265declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask) 2266define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0) { 2267; KNL_64-LABEL: test_scatter_16f32: 2268; KNL_64: # %bb.0: 2269; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 2270; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 2271; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 2272; KNL_64-NEXT: kshiftrw $8, %k1, %k2 2273; KNL_64-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1} 2274; KNL_64-NEXT: vextractf64x4 $1, %zmm3, %ymm0 2275; KNL_64-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2} 2276; KNL_64-NEXT: vzeroupper 2277; KNL_64-NEXT: retq 2278; 2279; KNL_32-LABEL: test_scatter_16f32: 2280; KNL_32: # %bb.0: 2281; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 2282; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 2283; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 2284; KNL_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} 2285; KNL_32-NEXT: vzeroupper 2286; KNL_32-NEXT: retl 2287; 2288; SKX-LABEL: test_scatter_16f32: 2289; SKX: # %bb.0: 2290; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 2291; SKX-NEXT: vpslld $31, %zmm2, %zmm2 2292; SKX-NEXT: vpmovd2m %zmm2, %k1 2293; SKX-NEXT: kshiftrw $8, %k1, %k2 2294; SKX-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1} 2295; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm0 2296; SKX-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2} 2297; SKX-NEXT: vzeroupper 2298; SKX-NEXT: retq 2299; 2300; SKX_32-LABEL: test_scatter_16f32: 2301; SKX_32: # %bb.0: 2302; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 2303; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 2304; SKX_32-NEXT: vpmovd2m %zmm1, %k1 2305; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} 2306; SKX_32-NEXT: vzeroupper 2307; SKX_32-NEXT: retl 2308 call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask) 2309 ret void 2310} 2311declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask) 2312define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0) { 2313; KNL_64-LABEL: test_scatter_16f64: 2314; KNL_64: # %bb.0: 2315; KNL_64-NEXT: vpmovsxbd %xmm2, %zmm2 2316; KNL_64-NEXT: vpslld $31, %zmm2, %zmm2 2317; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1 2318; KNL_64-NEXT: kshiftrw $8, %k1, %k2 2319; KNL_64-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} 2320; KNL_64-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} 2321; KNL_64-NEXT: vzeroupper 2322; KNL_64-NEXT: retq 2323; 2324; KNL_32-LABEL: test_scatter_16f64: 2325; KNL_32: # %bb.0: 2326; KNL_32-NEXT: pushl %ebp 2327; KNL_32-NEXT: .cfi_def_cfa_offset 8 2328; KNL_32-NEXT: .cfi_offset %ebp, -8 2329; KNL_32-NEXT: movl %esp, %ebp 2330; KNL_32-NEXT: .cfi_def_cfa_register %ebp 2331; KNL_32-NEXT: andl $-64, %esp 2332; KNL_32-NEXT: subl $64, %esp 2333; KNL_32-NEXT: vpmovsxbd %xmm1, %zmm1 2334; KNL_32-NEXT: vpslld $31, %zmm1, %zmm1 2335; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k1 2336; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 2337; KNL_32-NEXT: kshiftrw $8, %k1, %k2 2338; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} 2339; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2340; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} 2341; KNL_32-NEXT: movl %ebp, %esp 2342; KNL_32-NEXT: popl %ebp 2343; KNL_32-NEXT: .cfi_def_cfa %esp, 4 2344; KNL_32-NEXT: vzeroupper 2345; KNL_32-NEXT: retl 2346; 2347; SKX-LABEL: test_scatter_16f64: 2348; SKX: # %bb.0: 2349; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 2350; SKX-NEXT: vpslld $31, %zmm2, %zmm2 2351; SKX-NEXT: vpmovd2m %zmm2, %k1 2352; SKX-NEXT: kshiftrw $8, %k1, %k2 2353; SKX-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} 2354; SKX-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} 2355; SKX-NEXT: vzeroupper 2356; SKX-NEXT: retq 2357; 2358; SKX_32-LABEL: test_scatter_16f64: 2359; SKX_32: # %bb.0: 2360; SKX_32-NEXT: pushl %ebp 2361; SKX_32-NEXT: .cfi_def_cfa_offset 8 2362; SKX_32-NEXT: .cfi_offset %ebp, -8 2363; SKX_32-NEXT: movl %esp, %ebp 2364; SKX_32-NEXT: .cfi_def_cfa_register %ebp 2365; SKX_32-NEXT: andl $-64, %esp 2366; SKX_32-NEXT: subl $64, %esp 2367; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 2368; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 2369; SKX_32-NEXT: vpmovd2m %zmm1, %k1 2370; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 2371; SKX_32-NEXT: kshiftrw $8, %k1, %k2 2372; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} 2373; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2374; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} 2375; SKX_32-NEXT: movl %ebp, %esp 2376; SKX_32-NEXT: popl %ebp 2377; SKX_32-NEXT: .cfi_def_cfa %esp, 4 2378; SKX_32-NEXT: vzeroupper 2379; SKX_32-NEXT: retl 2380 call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask) 2381 ret void 2382} 2383declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask) 2384 2385define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) { 2386; KNL_64-LABEL: test_pr28312: 2387; KNL_64: # %bb.0: 2388; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2389; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1 2390; KNL_64-NEXT: vptestmd %zmm1, %zmm1, %k0 2391; KNL_64-NEXT: kshiftlw $12, %k0, %k0 2392; KNL_64-NEXT: kshiftrw $12, %k0, %k1 2393; KNL_64-NEXT: vpgatherqq (,%zmm0), %zmm1 {%k1} 2394; KNL_64-NEXT: vpaddq %ymm1, %ymm1, %ymm0 2395; KNL_64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 2396; KNL_64-NEXT: retq 2397; 2398; KNL_32-LABEL: test_pr28312: 2399; KNL_32: # %bb.0: 2400; KNL_32-NEXT: pushl %ebp 2401; KNL_32-NEXT: .cfi_def_cfa_offset 8 2402; KNL_32-NEXT: .cfi_offset %ebp, -8 2403; KNL_32-NEXT: movl %esp, %ebp 2404; KNL_32-NEXT: .cfi_def_cfa_register %ebp 2405; KNL_32-NEXT: andl $-32, %esp 2406; KNL_32-NEXT: subl $32, %esp 2407; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 2408; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1 2409; KNL_32-NEXT: vptestmd %zmm1, %zmm1, %k0 2410; KNL_32-NEXT: kshiftlw $12, %k0, %k0 2411; KNL_32-NEXT: kshiftrw $12, %k0, %k1 2412; KNL_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k1} 2413; KNL_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 2414; KNL_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 2415; KNL_32-NEXT: movl %ebp, %esp 2416; KNL_32-NEXT: popl %ebp 2417; KNL_32-NEXT: .cfi_def_cfa %esp, 4 2418; KNL_32-NEXT: retl 2419; 2420; SKX-LABEL: test_pr28312: 2421; SKX: # %bb.0: 2422; SKX-NEXT: vpslld $31, %xmm1, %xmm1 2423; SKX-NEXT: vpmovd2m %xmm1, %k1 2424; SKX-NEXT: vpgatherqq (,%ymm0), %ymm1 {%k1} 2425; SKX-NEXT: vpaddq %ymm1, %ymm1, %ymm0 2426; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 2427; SKX-NEXT: retq 2428; 2429; SKX_32-LABEL: test_pr28312: 2430; SKX_32: # %bb.0: 2431; SKX_32-NEXT: pushl %ebp 2432; SKX_32-NEXT: .cfi_def_cfa_offset 8 2433; SKX_32-NEXT: .cfi_offset %ebp, -8 2434; SKX_32-NEXT: movl %esp, %ebp 2435; SKX_32-NEXT: .cfi_def_cfa_register %ebp 2436; SKX_32-NEXT: andl $-32, %esp 2437; SKX_32-NEXT: subl $32, %esp 2438; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 2439; SKX_32-NEXT: vpmovd2m %xmm1, %k1 2440; SKX_32-NEXT: vpgatherdq (,%xmm0), %ymm1 {%k1} 2441; SKX_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 2442; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 2443; SKX_32-NEXT: movl %ebp, %esp 2444; SKX_32-NEXT: popl %ebp 2445; SKX_32-NEXT: .cfi_def_cfa %esp, 4 2446; SKX_32-NEXT: retl 2447 %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) 2448 %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) 2449 %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef) 2450 %a = add <4 x i64> %g1, %g2 2451 %b = add <4 x i64> %a, %g3 2452 ret <4 x i64> %b 2453} 2454declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>) 2455 2456define <8 x i32> @test_global_array(<8 x i64> %indxs) { 2457; KNL_64-LABEL: test_global_array: 2458; KNL_64: # %bb.0: 2459; KNL_64-NEXT: kxnorw %k0, %k0, %k1 2460; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2461; KNL_64-NEXT: vmovdqa %ymm1, %ymm0 2462; KNL_64-NEXT: retq 2463; 2464; KNL_32-LABEL: test_global_array: 2465; KNL_32: # %bb.0: 2466; KNL_32-NEXT: kxnorw %k0, %k0, %k1 2467; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2468; KNL_32-NEXT: vmovdqa %ymm1, %ymm0 2469; KNL_32-NEXT: retl 2470; 2471; SKX_SMALL-LABEL: test_global_array: 2472; SKX_SMALL: # %bb.0: 2473; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 2474; SKX_SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2475; SKX_SMALL-NEXT: vmovdqa %ymm1, %ymm0 2476; SKX_SMALL-NEXT: retq 2477; 2478; SKX_LARGE-LABEL: test_global_array: 2479; SKX_LARGE: # %bb.0: 2480; SKX_LARGE-NEXT: movabsq $glob_array, %rax 2481; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 2482; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} 2483; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0 2484; SKX_LARGE-NEXT: retq 2485; 2486; SKX_32-LABEL: test_global_array: 2487; SKX_32: # %bb.0: 2488; SKX_32-NEXT: kxnorw %k0, %k0, %k1 2489; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2490; SKX_32-NEXT: vmovdqa %ymm1, %ymm0 2491; SKX_32-NEXT: retl 2492 %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs 2493 %g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 2494 ret <8 x i32> %g 2495} 2496 2497define <8 x i32> @test_global_array_zeroinitializer_index(<8 x i64> %indxs) { 2498; KNL_64-LABEL: test_global_array_zeroinitializer_index: 2499; KNL_64: # %bb.0: 2500; KNL_64-NEXT: kxnorw %k0, %k0, %k1 2501; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2502; KNL_64-NEXT: vmovdqa %ymm1, %ymm0 2503; KNL_64-NEXT: retq 2504; 2505; KNL_32-LABEL: test_global_array_zeroinitializer_index: 2506; KNL_32: # %bb.0: 2507; KNL_32-NEXT: kxnorw %k0, %k0, %k1 2508; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2509; KNL_32-NEXT: vmovdqa %ymm1, %ymm0 2510; KNL_32-NEXT: retl 2511; 2512; SKX_SMALL-LABEL: test_global_array_zeroinitializer_index: 2513; SKX_SMALL: # %bb.0: 2514; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 2515; SKX_SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2516; SKX_SMALL-NEXT: vmovdqa %ymm1, %ymm0 2517; SKX_SMALL-NEXT: retq 2518; 2519; SKX_LARGE-LABEL: test_global_array_zeroinitializer_index: 2520; SKX_LARGE: # %bb.0: 2521; SKX_LARGE-NEXT: movabsq $glob_array, %rax 2522; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 2523; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1} 2524; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0 2525; SKX_LARGE-NEXT: retq 2526; 2527; SKX_32-LABEL: test_global_array_zeroinitializer_index: 2528; SKX_32: # %bb.0: 2529; SKX_32-NEXT: kxnorw %k0, %k0, %k1 2530; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1} 2531; SKX_32-NEXT: vmovdqa %ymm1, %ymm0 2532; SKX_32-NEXT: retl 2533 %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, <8 x i64> zeroinitializer, <8 x i64> %indxs 2534 %g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 2535 ret <8 x i32> %g 2536} 2537 2538define void @v1_scatter(<1 x i32>%a1, <1 x i32*> %ptr, <1 x i1> %mask) { 2539; KNL_64-LABEL: v1_scatter: 2540; KNL_64: # %bb.0: 2541; KNL_64-NEXT: testb $1, %dl 2542; KNL_64-NEXT: je .LBB45_2 2543; KNL_64-NEXT: # %bb.1: # %cond.store 2544; KNL_64-NEXT: movl %edi, (%rsi) 2545; KNL_64-NEXT: .LBB45_2: # %else 2546; KNL_64-NEXT: retq 2547; 2548; KNL_32-LABEL: v1_scatter: 2549; KNL_32: # %bb.0: 2550; KNL_32-NEXT: testb $1, {{[0-9]+}}(%esp) 2551; KNL_32-NEXT: je .LBB45_2 2552; KNL_32-NEXT: # %bb.1: # %cond.store 2553; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2554; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2555; KNL_32-NEXT: movl %ecx, (%eax) 2556; KNL_32-NEXT: .LBB45_2: # %else 2557; KNL_32-NEXT: retl 2558; 2559; SKX-LABEL: v1_scatter: 2560; SKX: # %bb.0: 2561; SKX-NEXT: testb $1, %dl 2562; SKX-NEXT: je .LBB45_2 2563; SKX-NEXT: # %bb.1: # %cond.store 2564; SKX-NEXT: movl %edi, (%rsi) 2565; SKX-NEXT: .LBB45_2: # %else 2566; SKX-NEXT: retq 2567; 2568; SKX_32-LABEL: v1_scatter: 2569; SKX_32: # %bb.0: 2570; SKX_32-NEXT: testb $1, {{[0-9]+}}(%esp) 2571; SKX_32-NEXT: je .LBB45_2 2572; SKX_32-NEXT: # %bb.1: # %cond.store 2573; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2574; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2575; SKX_32-NEXT: movl %ecx, (%eax) 2576; SKX_32-NEXT: .LBB45_2: # %else 2577; SKX_32-NEXT: retl 2578 call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %a1, <1 x i32*> %ptr, i32 4, <1 x i1> %mask) 2579 ret void 2580} 2581declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x i1>) 2582 2583define <1 x i32> @v1_gather(<1 x i32*> %ptr, <1 x i1> %mask, <1 x i32> %src0) { 2584; KNL_64-LABEL: v1_gather: 2585; KNL_64: # %bb.0: 2586; KNL_64-NEXT: movl (%rdi), %eax 2587; KNL_64-NEXT: retq 2588; 2589; KNL_32-LABEL: v1_gather: 2590; KNL_32: # %bb.0: 2591; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2592; KNL_32-NEXT: movl (%eax), %eax 2593; KNL_32-NEXT: retl 2594; 2595; SKX-LABEL: v1_gather: 2596; SKX: # %bb.0: 2597; SKX-NEXT: movl (%rdi), %eax 2598; SKX-NEXT: retq 2599; 2600; SKX_32-LABEL: v1_gather: 2601; SKX_32: # %bb.0: 2602; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2603; SKX_32-NEXT: movl (%eax), %eax 2604; SKX_32-NEXT: retl 2605 %res = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> %ptr, i32 4, <1 x i1> <i1 true>, <1 x i32> %src0) 2606 ret <1 x i32>%res 2607} 2608declare <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*>, i32, <1 x i1>, <1 x i32>) 2609 2610; Make sure we don't crash when the index element type is larger than i64 and we need to widen the result 2611; This experienced a bad interaction when we widened and then tried to split. 2612define <2 x float> @large_index(float* %base, <2 x i128> %ind, <2 x i1> %mask, <2 x float> %src0) { 2613; KNL_64-LABEL: large_index: 2614; KNL_64: # %bb.0: 2615; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 2616; KNL_64-NEXT: vpsllq $63, %xmm0, %xmm0 2617; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 2618; KNL_64-NEXT: kshiftlw $14, %k0, %k0 2619; KNL_64-NEXT: kshiftrw $14, %k0, %k1 2620; KNL_64-NEXT: vmovq %rcx, %xmm0 2621; KNL_64-NEXT: vmovq %rsi, %xmm2 2622; KNL_64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 2623; KNL_64-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k1} 2624; KNL_64-NEXT: vmovaps %xmm1, %xmm0 2625; KNL_64-NEXT: vzeroupper 2626; KNL_64-NEXT: retq 2627; 2628; KNL_32-LABEL: large_index: 2629; KNL_32: # %bb.0: 2630; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 2631; KNL_32-NEXT: vpsllq $63, %xmm0, %xmm0 2632; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k0 2633; KNL_32-NEXT: kshiftlw $14, %k0, %k0 2634; KNL_32-NEXT: kshiftrw $14, %k0, %k1 2635; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2636; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2637; KNL_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2638; KNL_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2639; KNL_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2640; KNL_32-NEXT: vgatherqps (%eax,%zmm0,4), %ymm1 {%k1} 2641; KNL_32-NEXT: vmovaps %xmm1, %xmm0 2642; KNL_32-NEXT: vzeroupper 2643; KNL_32-NEXT: retl 2644; 2645; SKX-LABEL: large_index: 2646; SKX: # %bb.0: 2647; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 2648; SKX-NEXT: vpmovq2m %xmm0, %k1 2649; SKX-NEXT: vmovq %rcx, %xmm0 2650; SKX-NEXT: vmovq %rsi, %xmm2 2651; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 2652; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm1 {%k1} 2653; SKX-NEXT: vmovaps %xmm1, %xmm0 2654; SKX-NEXT: retq 2655; 2656; SKX_32-LABEL: large_index: 2657; SKX_32: # %bb.0: 2658; SKX_32-NEXT: vpsllq $63, %xmm0, %xmm0 2659; SKX_32-NEXT: vpmovq2m %xmm0, %k1 2660; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2661; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2662; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2663; SKX_32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2664; SKX_32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2665; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm1 {%k1} 2666; SKX_32-NEXT: vmovaps %xmm1, %xmm0 2667; SKX_32-NEXT: retl 2668 %gep.random = getelementptr float, float* %base, <2 x i128> %ind 2669 %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0) 2670 ret <2 x float>%res 2671} 2672 2673; Make sure we allow index to be sign extended from a smaller than i32 element size. 2674define <16 x float> @sext_i8_index(float* %base, <16 x i8> %ind) { 2675; KNL_64-LABEL: sext_i8_index: 2676; KNL_64: # %bb.0: 2677; KNL_64-NEXT: vpmovsxbd %xmm0, %zmm1 2678; KNL_64-NEXT: kxnorw %k0, %k0, %k1 2679; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2680; KNL_64-NEXT: retq 2681; 2682; KNL_32-LABEL: sext_i8_index: 2683; KNL_32: # %bb.0: 2684; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2685; KNL_32-NEXT: vpmovsxbd %xmm0, %zmm1 2686; KNL_32-NEXT: kxnorw %k0, %k0, %k1 2687; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2688; KNL_32-NEXT: retl 2689; 2690; SKX-LABEL: sext_i8_index: 2691; SKX: # %bb.0: 2692; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 2693; SKX-NEXT: kxnorw %k0, %k0, %k1 2694; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2695; SKX-NEXT: retq 2696; 2697; SKX_32-LABEL: sext_i8_index: 2698; SKX_32: # %bb.0: 2699; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2700; SKX_32-NEXT: vpmovsxbd %xmm0, %zmm1 2701; SKX_32-NEXT: kxnorw %k0, %k0, %k1 2702; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2703; SKX_32-NEXT: retl 2704 2705 %sext_ind = sext <16 x i8> %ind to <16 x i64> 2706 %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind 2707 2708 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 2709 ret <16 x float>%res 2710} 2711 2712; Make sure we allow index to be sign extended from a smaller than i32 element size. 2713define <8 x float> @sext_v8i8_index(float* %base, <8 x i8> %ind) { 2714; KNL_64-LABEL: sext_v8i8_index: 2715; KNL_64: # %bb.0: 2716; KNL_64-NEXT: vpmovsxbd %xmm0, %ymm1 2717; KNL_64-NEXT: movw $255, %ax 2718; KNL_64-NEXT: kmovw %eax, %k1 2719; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2720; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2721; KNL_64-NEXT: retq 2722; 2723; KNL_32-LABEL: sext_v8i8_index: 2724; KNL_32: # %bb.0: 2725; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2726; KNL_32-NEXT: vpmovsxbd %xmm0, %ymm1 2727; KNL_32-NEXT: movw $255, %cx 2728; KNL_32-NEXT: kmovw %ecx, %k1 2729; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2730; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2731; KNL_32-NEXT: retl 2732; 2733; SKX-LABEL: sext_v8i8_index: 2734; SKX: # %bb.0: 2735; SKX-NEXT: vpmovsxbd %xmm0, %ymm1 2736; SKX-NEXT: kxnorw %k0, %k0, %k1 2737; SKX-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} 2738; SKX-NEXT: retq 2739; 2740; SKX_32-LABEL: sext_v8i8_index: 2741; SKX_32: # %bb.0: 2742; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2743; SKX_32-NEXT: vpmovsxbd %xmm0, %ymm1 2744; SKX_32-NEXT: kxnorw %k0, %k0, %k1 2745; SKX_32-NEXT: vgatherdps (%eax,%ymm1,4), %ymm0 {%k1} 2746; SKX_32-NEXT: retl 2747 2748 %sext_ind = sext <8 x i8> %ind to <8 x i64> 2749 %gep.random = getelementptr float, float *%base, <8 x i64> %sext_ind 2750 2751 %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %gep.random, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef) 2752 ret <8 x float>%res 2753} 2754declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>) 2755 2756; Make sure we also allow index to be zero extended from a smaller than i32 element size. 2757define <16 x float> @zext_i8_index(float* %base, <16 x i8> %ind) { 2758; KNL_64-LABEL: zext_i8_index: 2759; KNL_64: # %bb.0: 2760; KNL_64-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2761; KNL_64-NEXT: kxnorw %k0, %k0, %k1 2762; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2763; KNL_64-NEXT: retq 2764; 2765; KNL_32-LABEL: zext_i8_index: 2766; KNL_32: # %bb.0: 2767; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2768; KNL_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2769; KNL_32-NEXT: kxnorw %k0, %k0, %k1 2770; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2771; KNL_32-NEXT: retl 2772; 2773; SKX-LABEL: zext_i8_index: 2774; SKX: # %bb.0: 2775; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2776; SKX-NEXT: kxnorw %k0, %k0, %k1 2777; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2778; SKX-NEXT: retq 2779; 2780; SKX_32-LABEL: zext_i8_index: 2781; SKX_32: # %bb.0: 2782; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2783; SKX_32-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 2784; SKX_32-NEXT: kxnorw %k0, %k0, %k1 2785; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2786; SKX_32-NEXT: retl 2787 2788 %zext_ind = zext <16 x i8> %ind to <16 x i64> 2789 %gep.random = getelementptr float, float *%base, <16 x i64> %zext_ind 2790 2791 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 2792 ret <16 x float>%res 2793} 2794 2795; Make sure we also allow index to be zero extended from a smaller than i32 element size. 2796define <8 x float> @zext_v8i8_index(float* %base, <8 x i8> %ind) { 2797; KNL_64-LABEL: zext_v8i8_index: 2798; KNL_64: # %bb.0: 2799; KNL_64-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2800; KNL_64-NEXT: movw $255, %ax 2801; KNL_64-NEXT: kmovw %eax, %k1 2802; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2803; KNL_64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2804; KNL_64-NEXT: retq 2805; 2806; KNL_32-LABEL: zext_v8i8_index: 2807; KNL_32: # %bb.0: 2808; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2809; KNL_32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2810; KNL_32-NEXT: movw $255, %cx 2811; KNL_32-NEXT: kmovw %ecx, %k1 2812; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2813; KNL_32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2814; KNL_32-NEXT: retl 2815; 2816; SKX-LABEL: zext_v8i8_index: 2817; SKX: # %bb.0: 2818; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2819; SKX-NEXT: kxnorw %k0, %k0, %k1 2820; SKX-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} 2821; SKX-NEXT: retq 2822; 2823; SKX_32-LABEL: zext_v8i8_index: 2824; SKX_32: # %bb.0: 2825; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2826; SKX_32-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 2827; SKX_32-NEXT: kxnorw %k0, %k0, %k1 2828; SKX_32-NEXT: vgatherdps (%eax,%ymm1,4), %ymm0 {%k1} 2829; SKX_32-NEXT: retl 2830 2831 %zext_ind = zext <8 x i8> %ind to <8 x i64> 2832 %gep.random = getelementptr float, float *%base, <8 x i64> %zext_ind 2833 2834 %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %gep.random, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef) 2835 ret <8 x float>%res 2836} 2837 2838; Index requires promotion 2839define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32> %ind, <2 x i1> %mask) { 2840; KNL_64-LABEL: test_scatter_2i32_index: 2841; KNL_64: # %bb.0: 2842; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 2843; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2844; KNL_64-NEXT: vpsllq $63, %xmm2, %xmm2 2845; KNL_64-NEXT: vptestmq %zmm2, %zmm2, %k0 2846; KNL_64-NEXT: kshiftlw $14, %k0, %k0 2847; KNL_64-NEXT: kshiftrw $14, %k0, %k1 2848; KNL_64-NEXT: vscatterdpd %zmm0, (%rdi,%ymm1,8) {%k1} 2849; KNL_64-NEXT: vzeroupper 2850; KNL_64-NEXT: retq 2851; 2852; KNL_32-LABEL: test_scatter_2i32_index: 2853; KNL_32: # %bb.0: 2854; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 2855; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2856; KNL_32-NEXT: vpsllq $63, %xmm2, %xmm2 2857; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k0 2858; KNL_32-NEXT: kshiftlw $14, %k0, %k0 2859; KNL_32-NEXT: kshiftrw $14, %k0, %k1 2860; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2861; KNL_32-NEXT: vscatterdpd %zmm0, (%eax,%ymm1,8) {%k1} 2862; KNL_32-NEXT: vzeroupper 2863; KNL_32-NEXT: retl 2864; 2865; SKX-LABEL: test_scatter_2i32_index: 2866; SKX: # %bb.0: 2867; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 2868; SKX-NEXT: vpmovq2m %xmm2, %k1 2869; SKX-NEXT: vscatterdpd %xmm0, (%rdi,%xmm1,8) {%k1} 2870; SKX-NEXT: retq 2871; 2872; SKX_32-LABEL: test_scatter_2i32_index: 2873; SKX_32: # %bb.0: 2874; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 2875; SKX_32-NEXT: vpmovq2m %xmm2, %k1 2876; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2877; SKX_32-NEXT: vscatterdpd %xmm0, (%eax,%xmm1,8) {%k1} 2878; SKX_32-NEXT: retl 2879 %gep = getelementptr double, double *%base, <2 x i32> %ind 2880 call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %gep, i32 4, <2 x i1> %mask) 2881 ret void 2882} 2883declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>) 2884 2885define <16 x float> @zext_index(float* %base, <16 x i32> %ind) { 2886; KNL_64-LABEL: zext_index: 2887; KNL_64: # %bb.0: 2888; KNL_64-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1 2889; KNL_64-NEXT: kxnorw %k0, %k0, %k1 2890; KNL_64-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2891; KNL_64-NEXT: retq 2892; 2893; KNL_32-LABEL: zext_index: 2894; KNL_32: # %bb.0: 2895; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2896; KNL_32-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm1 2897; KNL_32-NEXT: kxnorw %k0, %k0, %k1 2898; KNL_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2899; KNL_32-NEXT: retl 2900; 2901; SKX_SMALL-LABEL: zext_index: 2902; SKX_SMALL: # %bb.0: 2903; SKX_SMALL-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm1 2904; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 2905; SKX_SMALL-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2906; SKX_SMALL-NEXT: retq 2907; 2908; SKX_LARGE-LABEL: zext_index: 2909; SKX_LARGE: # %bb.0: 2910; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 2911; SKX_LARGE-NEXT: vandps (%rax){1to16}, %zmm0, %zmm1 2912; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 2913; SKX_LARGE-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1} 2914; SKX_LARGE-NEXT: retq 2915; 2916; SKX_32-LABEL: zext_index: 2917; SKX_32: # %bb.0: 2918; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 2919; SKX_32-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm1 2920; SKX_32-NEXT: kxnorw %k0, %k0, %k1 2921; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1} 2922; SKX_32-NEXT: retl 2923 %ind_masked = and <16 x i32> %ind, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 2924 %sext_ind = zext <16 x i32> %ind_masked to <16 x i64> 2925 %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind 2926 2927 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 2928 ret <16 x float>%res 2929} 2930 2931define <16 x double> @test_gather_setcc_split(double* %base, <16 x i32> %ind, <16 x i32> %cmp, <16 x double> %passthru) { 2932; KNL_64-LABEL: test_gather_setcc_split: 2933; KNL_64: # %bb.0: 2934; KNL_64-NEXT: vextracti64x4 $1, %zmm1, %ymm4 2935; KNL_64-NEXT: vptestnmd %zmm4, %zmm4, %k1 2936; KNL_64-NEXT: vptestnmd %zmm1, %zmm1, %k2 2937; KNL_64-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k2} 2938; KNL_64-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2939; KNL_64-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm3 {%k1} 2940; KNL_64-NEXT: vmovapd %zmm2, %zmm0 2941; KNL_64-NEXT: vmovapd %zmm3, %zmm1 2942; KNL_64-NEXT: retq 2943; 2944; KNL_32-LABEL: test_gather_setcc_split: 2945; KNL_32: # %bb.0: 2946; KNL_32-NEXT: pushl %ebp 2947; KNL_32-NEXT: .cfi_def_cfa_offset 8 2948; KNL_32-NEXT: .cfi_offset %ebp, -8 2949; KNL_32-NEXT: movl %esp, %ebp 2950; KNL_32-NEXT: .cfi_def_cfa_register %ebp 2951; KNL_32-NEXT: andl $-64, %esp 2952; KNL_32-NEXT: subl $64, %esp 2953; KNL_32-NEXT: vmovapd 72(%ebp), %zmm3 2954; KNL_32-NEXT: movl 8(%ebp), %eax 2955; KNL_32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 2956; KNL_32-NEXT: vptestnmd %zmm4, %zmm4, %k1 2957; KNL_32-NEXT: vptestnmd %zmm1, %zmm1, %k2 2958; KNL_32-NEXT: vgatherdpd (%eax,%ymm0,8), %zmm2 {%k2} 2959; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2960; KNL_32-NEXT: vgatherdpd (%eax,%ymm0,8), %zmm3 {%k1} 2961; KNL_32-NEXT: vmovapd %zmm2, %zmm0 2962; KNL_32-NEXT: vmovapd %zmm3, %zmm1 2963; KNL_32-NEXT: movl %ebp, %esp 2964; KNL_32-NEXT: popl %ebp 2965; KNL_32-NEXT: .cfi_def_cfa %esp, 4 2966; KNL_32-NEXT: retl 2967; 2968; SKX-LABEL: test_gather_setcc_split: 2969; SKX: # %bb.0: 2970; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm4 2971; SKX-NEXT: vptestnmd %ymm4, %ymm4, %k1 2972; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k2 2973; SKX-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k2} 2974; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2975; SKX-NEXT: vgatherdpd (%rdi,%ymm0,8), %zmm3 {%k1} 2976; SKX-NEXT: vmovapd %zmm2, %zmm0 2977; SKX-NEXT: vmovapd %zmm3, %zmm1 2978; SKX-NEXT: retq 2979; 2980; SKX_32-LABEL: test_gather_setcc_split: 2981; SKX_32: # %bb.0: 2982; SKX_32-NEXT: pushl %ebp 2983; SKX_32-NEXT: .cfi_def_cfa_offset 8 2984; SKX_32-NEXT: .cfi_offset %ebp, -8 2985; SKX_32-NEXT: movl %esp, %ebp 2986; SKX_32-NEXT: .cfi_def_cfa_register %ebp 2987; SKX_32-NEXT: andl $-64, %esp 2988; SKX_32-NEXT: subl $64, %esp 2989; SKX_32-NEXT: vmovapd 72(%ebp), %zmm3 2990; SKX_32-NEXT: movl 8(%ebp), %eax 2991; SKX_32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 2992; SKX_32-NEXT: vptestnmd %ymm4, %ymm4, %k1 2993; SKX_32-NEXT: vptestnmd %ymm1, %ymm1, %k2 2994; SKX_32-NEXT: vgatherdpd (%eax,%ymm0,8), %zmm2 {%k2} 2995; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 2996; SKX_32-NEXT: vgatherdpd (%eax,%ymm0,8), %zmm3 {%k1} 2997; SKX_32-NEXT: vmovapd %zmm2, %zmm0 2998; SKX_32-NEXT: vmovapd %zmm3, %zmm1 2999; SKX_32-NEXT: movl %ebp, %esp 3000; SKX_32-NEXT: popl %ebp 3001; SKX_32-NEXT: .cfi_def_cfa %esp, 4 3002; SKX_32-NEXT: retl 3003 %sext_ind = sext <16 x i32> %ind to <16 x i64> 3004 %gep.random = getelementptr double, double *%base, <16 x i64> %sext_ind 3005 3006 %mask = icmp eq <16 x i32> %cmp, zeroinitializer 3007 %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %gep.random, i32 4, <16 x i1> %mask, <16 x double> %passthru) 3008 ret <16 x double>%res 3009} 3010 3011define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32> %cmp, <16 x double> %src0) { 3012; KNL_64-LABEL: test_scatter_setcc_split: 3013; KNL_64: # %bb.0: 3014; KNL_64-NEXT: vextracti64x4 $1, %zmm1, %ymm4 3015; KNL_64-NEXT: vptestnmd %zmm4, %zmm4, %k1 3016; KNL_64-NEXT: vptestnmd %zmm1, %zmm1, %k2 3017; KNL_64-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2} 3018; KNL_64-NEXT: vextractf64x4 $1, %zmm0, %ymm0 3019; KNL_64-NEXT: vscatterdpd %zmm3, (%rdi,%ymm0,8) {%k1} 3020; KNL_64-NEXT: vzeroupper 3021; KNL_64-NEXT: retq 3022; 3023; KNL_32-LABEL: test_scatter_setcc_split: 3024; KNL_32: # %bb.0: 3025; KNL_32-NEXT: pushl %ebp 3026; KNL_32-NEXT: .cfi_def_cfa_offset 8 3027; KNL_32-NEXT: .cfi_offset %ebp, -8 3028; KNL_32-NEXT: movl %esp, %ebp 3029; KNL_32-NEXT: .cfi_def_cfa_register %ebp 3030; KNL_32-NEXT: andl $-64, %esp 3031; KNL_32-NEXT: subl $64, %esp 3032; KNL_32-NEXT: vmovapd 72(%ebp), %zmm3 3033; KNL_32-NEXT: movl 8(%ebp), %eax 3034; KNL_32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 3035; KNL_32-NEXT: vptestnmd %zmm4, %zmm4, %k1 3036; KNL_32-NEXT: vptestnmd %zmm1, %zmm1, %k2 3037; KNL_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2} 3038; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 3039; KNL_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm0,8) {%k1} 3040; KNL_32-NEXT: movl %ebp, %esp 3041; KNL_32-NEXT: popl %ebp 3042; KNL_32-NEXT: .cfi_def_cfa %esp, 4 3043; KNL_32-NEXT: vzeroupper 3044; KNL_32-NEXT: retl 3045; 3046; SKX-LABEL: test_scatter_setcc_split: 3047; SKX: # %bb.0: 3048; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm4 3049; SKX-NEXT: vptestnmd %ymm4, %ymm4, %k1 3050; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k2 3051; SKX-NEXT: vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2} 3052; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 3053; SKX-NEXT: vscatterdpd %zmm3, (%rdi,%ymm0,8) {%k1} 3054; SKX-NEXT: vzeroupper 3055; SKX-NEXT: retq 3056; 3057; SKX_32-LABEL: test_scatter_setcc_split: 3058; SKX_32: # %bb.0: 3059; SKX_32-NEXT: pushl %ebp 3060; SKX_32-NEXT: .cfi_def_cfa_offset 8 3061; SKX_32-NEXT: .cfi_offset %ebp, -8 3062; SKX_32-NEXT: movl %esp, %ebp 3063; SKX_32-NEXT: .cfi_def_cfa_register %ebp 3064; SKX_32-NEXT: andl $-64, %esp 3065; SKX_32-NEXT: subl $64, %esp 3066; SKX_32-NEXT: vmovapd 72(%ebp), %zmm3 3067; SKX_32-NEXT: movl 8(%ebp), %eax 3068; SKX_32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 3069; SKX_32-NEXT: vptestnmd %ymm4, %ymm4, %k1 3070; SKX_32-NEXT: vptestnmd %ymm1, %ymm1, %k2 3071; SKX_32-NEXT: vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2} 3072; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 3073; SKX_32-NEXT: vscatterdpd %zmm3, (%eax,%ymm0,8) {%k1} 3074; SKX_32-NEXT: movl %ebp, %esp 3075; SKX_32-NEXT: popl %ebp 3076; SKX_32-NEXT: .cfi_def_cfa %esp, 4 3077; SKX_32-NEXT: vzeroupper 3078; SKX_32-NEXT: retl 3079 %sext_ind = sext <16 x i32> %ind to <16 x i64> 3080 %gep.random = getelementptr double, double *%base, <16 x i64> %sext_ind 3081 3082 %mask = icmp eq <16 x i32> %cmp, zeroinitializer 3083 call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %gep.random, i32 4, <16 x i1> %mask) 3084 ret void 3085} 3086 3087; This test case previously triggered an infinite loop when the two gathers became identical after DAG combine removed the sign extend. 3088define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %foo) { 3089; KNL_64-LABEL: test_sext_cse: 3090; KNL_64: # %bb.0: 3091; KNL_64-NEXT: vmovaps %zmm0, (%rsi) 3092; KNL_64-NEXT: kxnorw %k0, %k0, %k1 3093; KNL_64-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 3094; KNL_64-NEXT: vaddps %zmm1, %zmm1, %zmm0 3095; KNL_64-NEXT: retq 3096; 3097; KNL_32-LABEL: test_sext_cse: 3098; KNL_32: # %bb.0: 3099; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3100; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx 3101; KNL_32-NEXT: vmovaps %zmm0, (%ecx) 3102; KNL_32-NEXT: kxnorw %k0, %k0, %k1 3103; KNL_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 3104; KNL_32-NEXT: vaddps %zmm1, %zmm1, %zmm0 3105; KNL_32-NEXT: retl 3106; 3107; SKX-LABEL: test_sext_cse: 3108; SKX: # %bb.0: 3109; SKX-NEXT: vmovaps %zmm0, (%rsi) 3110; SKX-NEXT: kxnorw %k0, %k0, %k1 3111; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 3112; SKX-NEXT: vaddps %zmm1, %zmm1, %zmm0 3113; SKX-NEXT: retq 3114; 3115; SKX_32-LABEL: test_sext_cse: 3116; SKX_32: # %bb.0: 3117; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3118; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %ecx 3119; SKX_32-NEXT: vmovaps %zmm0, (%ecx) 3120; SKX_32-NEXT: kxnorw %k0, %k0, %k1 3121; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1} 3122; SKX_32-NEXT: vaddps %zmm1, %zmm1, %zmm0 3123; SKX_32-NEXT: retl 3124 %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0 3125 %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer 3126 3127 %sext_ind = sext <16 x i32> %ind to <16 x i64> 3128 %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind 3129 3130 store <16 x i32> %ind, <16 x i32>* %foo 3131 %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 3132 %gep.random2 = getelementptr float, <16 x float*> %broadcast.splat, <16 x i32> %ind 3133 %res2 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random2, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) 3134 %res3 = fadd <16 x float> %res2, %res 3135 ret <16 x float>%res3 3136} 3137 3138define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) { 3139; ALL-LABEL: zero_mask: 3140; ALL: # %bb.0: 3141; ALL-NEXT: ret{{[l|q]}} 3142 call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer) 3143 ret void 3144} 3145 3146define <2 x i64> @gather_2i64_constant_indices(i64* %ptr, <2 x i1> %mask) { 3147; KNL_64-LABEL: gather_2i64_constant_indices: 3148; KNL_64: # %bb.0: 3149; KNL_64-NEXT: vpsllq $63, %xmm0, %xmm0 3150; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 3151; KNL_64-NEXT: kshiftlw $14, %k0, %k0 3152; KNL_64-NEXT: kshiftrw $14, %k0, %k1 3153; KNL_64-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4294967294,u,u,u,u,u,u> 3154; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 3155; KNL_64-NEXT: vpgatherdq (%rdi,%ymm1,8), %zmm0 {%k1} 3156; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3157; KNL_64-NEXT: vzeroupper 3158; KNL_64-NEXT: retq 3159; 3160; KNL_32-LABEL: gather_2i64_constant_indices: 3161; KNL_32: # %bb.0: 3162; KNL_32-NEXT: vpsllq $63, %xmm0, %xmm0 3163; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k0 3164; KNL_32-NEXT: kshiftlw $14, %k0, %k0 3165; KNL_32-NEXT: kshiftrw $14, %k0, %k1 3166; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3167; KNL_32-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4294967294,u,u,u,u,u,u> 3168; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3169; KNL_32-NEXT: vpgatherdq (%eax,%ymm1,8), %zmm0 {%k1} 3170; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3171; KNL_32-NEXT: vzeroupper 3172; KNL_32-NEXT: retl 3173; 3174; SKX_SMALL-LABEL: gather_2i64_constant_indices: 3175; SKX_SMALL: # %bb.0: 3176; SKX_SMALL-NEXT: vpsllq $63, %xmm0, %xmm0 3177; SKX_SMALL-NEXT: vpmovq2m %xmm0, %k1 3178; SKX_SMALL-NEXT: vmovdqa {{.*#+}} xmm1 = <0,4294967294,u,u> 3179; SKX_SMALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 3180; SKX_SMALL-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} 3181; SKX_SMALL-NEXT: retq 3182; 3183; SKX_LARGE-LABEL: gather_2i64_constant_indices: 3184; SKX_LARGE: # %bb.0: 3185; SKX_LARGE-NEXT: vpsllq $63, %xmm0, %xmm0 3186; SKX_LARGE-NEXT: vpmovq2m %xmm0, %k1 3187; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 3188; SKX_LARGE-NEXT: vmovdqa (%rax), %xmm1 3189; SKX_LARGE-NEXT: vpxor %xmm0, %xmm0, %xmm0 3190; SKX_LARGE-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} 3191; SKX_LARGE-NEXT: retq 3192; 3193; SKX_32-LABEL: gather_2i64_constant_indices: 3194; SKX_32: # %bb.0: 3195; SKX_32-NEXT: vpsllq $63, %xmm0, %xmm0 3196; SKX_32-NEXT: vpmovq2m %xmm0, %k1 3197; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3198; SKX_32-NEXT: vmovdqa {{.*#+}} xmm1 = <0,4294967294,u,u> 3199; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3200; SKX_32-NEXT: vpgatherdq (%eax,%xmm1,8), %xmm0 {%k1} 3201; SKX_32-NEXT: retl 3202 %gep = getelementptr i64, i64* %ptr, <2 x i64> <i64 0, i64 -2> 3203 %res = tail call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep, i32 8, <2 x i1> %mask, <2 x i64> zeroinitializer) #1 3204 ret <2 x i64> %res 3205} 3206 3207define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) { 3208; KNL_64-LABEL: gather_16i64_constant_indices: 3209; KNL_64: # %bb.0: 3210; KNL_64-NEXT: vpmovsxbd %xmm0, %zmm0 3211; KNL_64-NEXT: vpslld $31, %zmm0, %zmm0 3212; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 3213; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3214; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 3215; KNL_64-NEXT: vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1} 3216; KNL_64-NEXT: retq 3217; 3218; KNL_32-LABEL: gather_16i64_constant_indices: 3219; KNL_32: # %bb.0: 3220; KNL_32-NEXT: vpmovsxbd %xmm0, %zmm0 3221; KNL_32-NEXT: vpslld $31, %zmm0, %zmm0 3222; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 3223; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3224; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3225; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3226; KNL_32-NEXT: vpgatherdd (%eax,%zmm1,4), %zmm0 {%k1} 3227; KNL_32-NEXT: retl 3228; 3229; SKX_SMALL-LABEL: gather_16i64_constant_indices: 3230; SKX_SMALL: # %bb.0: 3231; SKX_SMALL-NEXT: vpmovsxbd %xmm0, %zmm0 3232; SKX_SMALL-NEXT: vpslld $31, %zmm0, %zmm0 3233; SKX_SMALL-NEXT: vpmovd2m %zmm0, %k1 3234; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3235; SKX_SMALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 3236; SKX_SMALL-NEXT: vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1} 3237; SKX_SMALL-NEXT: retq 3238; 3239; SKX_LARGE-LABEL: gather_16i64_constant_indices: 3240; SKX_LARGE: # %bb.0: 3241; SKX_LARGE-NEXT: vpmovsxbd %xmm0, %zmm0 3242; SKX_LARGE-NEXT: vpslld $31, %zmm0, %zmm0 3243; SKX_LARGE-NEXT: vpmovd2m %zmm0, %k1 3244; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 3245; SKX_LARGE-NEXT: vmovdqa64 (%rax), %zmm1 3246; SKX_LARGE-NEXT: vpxor %xmm0, %xmm0, %xmm0 3247; SKX_LARGE-NEXT: vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1} 3248; SKX_LARGE-NEXT: retq 3249; 3250; SKX_32-LABEL: gather_16i64_constant_indices: 3251; SKX_32: # %bb.0: 3252; SKX_32-NEXT: vpmovsxbd %xmm0, %zmm0 3253; SKX_32-NEXT: vpslld $31, %zmm0, %zmm0 3254; SKX_32-NEXT: vpmovd2m %zmm0, %k1 3255; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3256; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3257; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3258; SKX_32-NEXT: vpgatherdd (%eax,%zmm1,4), %zmm0 {%k1} 3259; SKX_32-NEXT: retl 3260 %gep = getelementptr i32, i32* %ptr, <16 x i64> <i64 0, i64 -2, i64 1, i64 -8, i64 10, i64 20, i64 50, i64 65536, i64 16777215, i64 2147483647, i64 100, i64 -2000, i64 -2147483648, i64 76897723, i64 7, i64 -67897687> 3261 %res = tail call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep, i32 4, <16 x i1> %mask, <16 x i32> zeroinitializer) #1 3262 ret <16 x i32> %res 3263} 3264 3265define void @scatter_2i64_constant_indices(i32* %ptr, <2 x i1> %mask, <2 x i32> %src0) { 3266; KNL_64-LABEL: scatter_2i64_constant_indices: 3267; KNL_64: # %bb.0: 3268; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3269; KNL_64-NEXT: vpsllq $63, %xmm0, %xmm0 3270; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 3271; KNL_64-NEXT: kshiftlw $14, %k0, %k0 3272; KNL_64-NEXT: kshiftrw $14, %k0, %k1 3273; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,4294967294,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 3274; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} 3275; KNL_64-NEXT: vzeroupper 3276; KNL_64-NEXT: retq 3277; 3278; KNL_32-LABEL: scatter_2i64_constant_indices: 3279; KNL_32: # %bb.0: 3280; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3281; KNL_32-NEXT: vpsllq $63, %xmm0, %xmm0 3282; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k0 3283; KNL_32-NEXT: kshiftlw $14, %k0, %k0 3284; KNL_32-NEXT: kshiftrw $14, %k0, %k1 3285; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3286; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,4294967294,u,u,u,u,u,u,u,u,u,u,u,u,u,u> 3287; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} 3288; KNL_32-NEXT: vzeroupper 3289; KNL_32-NEXT: retl 3290; 3291; SKX_SMALL-LABEL: scatter_2i64_constant_indices: 3292; SKX_SMALL: # %bb.0: 3293; SKX_SMALL-NEXT: vpsllq $63, %xmm0, %xmm0 3294; SKX_SMALL-NEXT: vpmovq2m %xmm0, %k1 3295; SKX_SMALL-NEXT: vmovdqa {{.*#+}} xmm0 = <0,4294967294,u,u> 3296; SKX_SMALL-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} 3297; SKX_SMALL-NEXT: retq 3298; 3299; SKX_LARGE-LABEL: scatter_2i64_constant_indices: 3300; SKX_LARGE: # %bb.0: 3301; SKX_LARGE-NEXT: vpsllq $63, %xmm0, %xmm0 3302; SKX_LARGE-NEXT: vpmovq2m %xmm0, %k1 3303; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 3304; SKX_LARGE-NEXT: vmovdqa (%rax), %xmm0 3305; SKX_LARGE-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} 3306; SKX_LARGE-NEXT: retq 3307; 3308; SKX_32-LABEL: scatter_2i64_constant_indices: 3309; SKX_32: # %bb.0: 3310; SKX_32-NEXT: vpsllq $63, %xmm0, %xmm0 3311; SKX_32-NEXT: vpmovq2m %xmm0, %k1 3312; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3313; SKX_32-NEXT: vmovdqa {{.*#+}} xmm0 = <0,4294967294,u,u> 3314; SKX_32-NEXT: vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1} 3315; SKX_32-NEXT: retl 3316 %gep = getelementptr i32, i32* %ptr, <2 x i64> <i64 0, i64 -2> 3317 call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %src0, <2 x i32*> %gep, i32 4, <2 x i1> %mask) 3318 ret void 3319} 3320 3321define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i32> %src0) { 3322; KNL_64-LABEL: scatter_16i64_constant_indices: 3323; KNL_64: # %bb.0: 3324; KNL_64-NEXT: vpmovsxbd %xmm0, %zmm0 3325; KNL_64-NEXT: vpslld $31, %zmm0, %zmm0 3326; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 3327; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3328; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} 3329; KNL_64-NEXT: vzeroupper 3330; KNL_64-NEXT: retq 3331; 3332; KNL_32-LABEL: scatter_16i64_constant_indices: 3333; KNL_32: # %bb.0: 3334; KNL_32-NEXT: vpmovsxbd %xmm0, %zmm0 3335; KNL_32-NEXT: vpslld $31, %zmm0, %zmm0 3336; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 3337; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3338; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3339; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} 3340; KNL_32-NEXT: vzeroupper 3341; KNL_32-NEXT: retl 3342; 3343; SKX_SMALL-LABEL: scatter_16i64_constant_indices: 3344; SKX_SMALL: # %bb.0: 3345; SKX_SMALL-NEXT: vpmovsxbd %xmm0, %zmm0 3346; SKX_SMALL-NEXT: vpslld $31, %zmm0, %zmm0 3347; SKX_SMALL-NEXT: vpmovd2m %zmm0, %k1 3348; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3349; SKX_SMALL-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} 3350; SKX_SMALL-NEXT: vzeroupper 3351; SKX_SMALL-NEXT: retq 3352; 3353; SKX_LARGE-LABEL: scatter_16i64_constant_indices: 3354; SKX_LARGE: # %bb.0: 3355; SKX_LARGE-NEXT: vpmovsxbd %xmm0, %zmm0 3356; SKX_LARGE-NEXT: vpslld $31, %zmm0, %zmm0 3357; SKX_LARGE-NEXT: vpmovd2m %zmm0, %k1 3358; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 3359; SKX_LARGE-NEXT: vmovdqa64 (%rax), %zmm0 3360; SKX_LARGE-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} 3361; SKX_LARGE-NEXT: vzeroupper 3362; SKX_LARGE-NEXT: retq 3363; 3364; SKX_32-LABEL: scatter_16i64_constant_indices: 3365; SKX_32: # %bb.0: 3366; SKX_32-NEXT: vpmovsxbd %xmm0, %zmm0 3367; SKX_32-NEXT: vpslld $31, %zmm0, %zmm0 3368; SKX_32-NEXT: vpmovd2m %zmm0, %k1 3369; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3370; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] 3371; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} 3372; SKX_32-NEXT: vzeroupper 3373; SKX_32-NEXT: retl 3374 %gep = getelementptr i32, i32* %ptr, <16 x i64> <i64 0, i64 -2, i64 1, i64 -8, i64 10, i64 20, i64 50, i64 65536, i64 16777215, i64 2147483647, i64 100, i64 -2000, i64 -2147483648, i64 76897723, i64 7, i64 -67897687> 3375 call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %gep, i32 4, <16 x i1> %mask) 3376 ret void 3377} 3378 3379define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) { 3380; KNL_64-LABEL: splat_ptr_gather: 3381; KNL_64: # %bb.0: 3382; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3383; KNL_64-NEXT: vpslld $31, %xmm0, %xmm0 3384; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k0 3385; KNL_64-NEXT: kshiftlw $12, %k0, %k0 3386; KNL_64-NEXT: kshiftrw $12, %k0, %k1 3387; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 3388; KNL_64-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1} 3389; KNL_64-NEXT: vmovdqa %xmm1, %xmm0 3390; KNL_64-NEXT: vzeroupper 3391; KNL_64-NEXT: retq 3392; 3393; KNL_32-LABEL: splat_ptr_gather: 3394; KNL_32: # %bb.0: 3395; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3396; KNL_32-NEXT: vpslld $31, %xmm0, %xmm0 3397; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0 3398; KNL_32-NEXT: kshiftlw $12, %k0, %k0 3399; KNL_32-NEXT: kshiftrw $12, %k0, %k1 3400; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3401; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3402; KNL_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1} 3403; KNL_32-NEXT: vmovdqa %xmm1, %xmm0 3404; KNL_32-NEXT: vzeroupper 3405; KNL_32-NEXT: retl 3406; 3407; SKX-LABEL: splat_ptr_gather: 3408; SKX: # %bb.0: 3409; SKX-NEXT: vpslld $31, %xmm0, %xmm0 3410; SKX-NEXT: vpmovd2m %xmm0, %k1 3411; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 3412; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1} 3413; SKX-NEXT: vmovdqa %xmm1, %xmm0 3414; SKX-NEXT: retq 3415; 3416; SKX_32-LABEL: splat_ptr_gather: 3417; SKX_32: # %bb.0: 3418; SKX_32-NEXT: vpslld $31, %xmm0, %xmm0 3419; SKX_32-NEXT: vpmovd2m %xmm0, %k1 3420; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3421; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3422; SKX_32-NEXT: vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1} 3423; SKX_32-NEXT: vmovdqa %xmm1, %xmm0 3424; SKX_32-NEXT: retl 3425 %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 3426 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer 3427 %3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru) 3428 ret <4 x i32> %3 3429} 3430declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>) 3431 3432define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) { 3433; KNL_64-LABEL: splat_ptr_scatter: 3434; KNL_64: # %bb.0: 3435; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3436; KNL_64-NEXT: vpslld $31, %xmm0, %xmm0 3437; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k0 3438; KNL_64-NEXT: kshiftlw $12, %k0, %k0 3439; KNL_64-NEXT: kshiftrw $12, %k0, %k1 3440; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 3441; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} 3442; KNL_64-NEXT: vzeroupper 3443; KNL_64-NEXT: retq 3444; 3445; KNL_32-LABEL: splat_ptr_scatter: 3446; KNL_32: # %bb.0: 3447; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3448; KNL_32-NEXT: vpslld $31, %xmm0, %xmm0 3449; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k0 3450; KNL_32-NEXT: kshiftlw $12, %k0, %k0 3451; KNL_32-NEXT: kshiftrw $12, %k0, %k1 3452; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3453; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3454; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} 3455; KNL_32-NEXT: vzeroupper 3456; KNL_32-NEXT: retl 3457; 3458; SKX-LABEL: splat_ptr_scatter: 3459; SKX: # %bb.0: 3460; SKX-NEXT: vpslld $31, %xmm0, %xmm0 3461; SKX-NEXT: vpmovd2m %xmm0, %k1 3462; SKX-NEXT: vpxor %xmm0, %xmm0, %xmm0 3463; SKX-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} 3464; SKX-NEXT: retq 3465; 3466; SKX_32-LABEL: splat_ptr_scatter: 3467; SKX_32: # %bb.0: 3468; SKX_32-NEXT: vpslld $31, %xmm0, %xmm0 3469; SKX_32-NEXT: vpmovd2m %xmm0, %k1 3470; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax 3471; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 3472; SKX_32-NEXT: vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1} 3473; SKX_32-NEXT: retl 3474 %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0 3475 %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer 3476 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %2, i32 4, <4 x i1> %mask) 3477 ret void 3478} 3479 3480%struct.foo = type { i8*, i64, i16, i16, i32 } 3481 3482; This used to cause fast-isel to generate bad copy instructions that would 3483; cause an error in copyPhysReg. 3484define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) { 3485; KNL_64-LABEL: pr45906: 3486; KNL_64: # %bb.0: # %bb 3487; KNL_64-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 3488; KNL_64-NEXT: kxnorw %k0, %k0, %k1 3489; KNL_64-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1} 3490; KNL_64-NEXT: retq 3491; 3492; KNL_32-LABEL: pr45906: 3493; KNL_32: # %bb.0: # %bb 3494; KNL_32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4] 3495; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm1 3496; KNL_32-NEXT: kxnorw %k0, %k0, %k1 3497; KNL_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1} 3498; KNL_32-NEXT: retl 3499; 3500; SKX_SMALL-LABEL: pr45906: 3501; SKX_SMALL: # %bb.0: # %bb 3502; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1 3503; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1 3504; SKX_SMALL-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1} 3505; SKX_SMALL-NEXT: retq 3506; 3507; SKX_LARGE-LABEL: pr45906: 3508; SKX_LARGE: # %bb.0: # %bb 3509; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax 3510; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1 3511; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1 3512; SKX_LARGE-NEXT: vpgatherqq (,%zmm1), %zmm0 {%k1} 3513; SKX_LARGE-NEXT: retq 3514; 3515; SKX_32-LABEL: pr45906: 3516; SKX_32: # %bb.0: # %bb 3517; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1 3518; SKX_32-NEXT: kxnorw %k0, %k0, %k1 3519; SKX_32-NEXT: vpgatherdq (,%ymm1), %zmm0 {%k1} 3520; SKX_32-NEXT: retl 3521bb: 3522 %tmp = getelementptr inbounds %struct.foo, <8 x %struct.foo*> %ptr, i64 0, i32 1 3523 %tmp1 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> %tmp, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef) 3524 ret <8 x i64> %tmp1 3525} 3526declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>) 3527