1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-speculative-load-hardening | FileCheck %s 3 4declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) 5 6define <4 x float> @test_llvm_x86_avx2_gather_d_ps(i8* %b, <4 x i32> %iv, <4 x float> %mask) #0 { 7; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: movq %rsp, %rax 10; CHECK-NEXT: movq $-1, %rcx 11; CHECK-NEXT: sarq $63, %rax 12; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 13; CHECK-NEXT: orq %rax, %rdi 14; CHECK-NEXT: vmovq %rax, %xmm3 15; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 16; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 17; CHECK-NEXT: vgatherdps %xmm1, (%rdi,%xmm0), %xmm2 18; CHECK-NEXT: shlq $47, %rax 19; CHECK-NEXT: vmovaps %xmm2, %xmm0 20; CHECK-NEXT: orq %rax, %rsp 21; CHECK-NEXT: retq 22entry: 23 %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x float> %mask, i8 1) 24 ret <4 x float> %v 25} 26 27declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) 28 29define <4 x float> @test_llvm_x86_avx2_gather_q_ps(i8* %b, <2 x i64> %iv, <4 x float> %mask) #0 { 30; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps: 31; CHECK: # %bb.0: # %entry 32; CHECK-NEXT: movq %rsp, %rax 33; CHECK-NEXT: movq $-1, %rcx 34; CHECK-NEXT: sarq $63, %rax 35; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 36; CHECK-NEXT: orq %rax, %rdi 37; CHECK-NEXT: vmovq %rax, %xmm3 38; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 39; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 40; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%xmm0), %xmm2 41; CHECK-NEXT: shlq $47, %rax 42; CHECK-NEXT: vmovaps %xmm2, %xmm0 43; CHECK-NEXT: orq %rax, %rsp 44; CHECK-NEXT: retq 45entry: 46 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x float> %mask, i8 1) 47 ret <4 x float> %v 48} 49 50declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) 51 52define <2 x double> @test_llvm_x86_avx2_gather_d_pd(i8* %b, <4 x i32> %iv, <2 x double> %mask) #0 { 53; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd: 54; CHECK: # %bb.0: # %entry 55; CHECK-NEXT: movq %rsp, %rax 56; CHECK-NEXT: movq $-1, %rcx 57; CHECK-NEXT: sarq $63, %rax 58; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 59; CHECK-NEXT: orq %rax, %rdi 60; CHECK-NEXT: vmovq %rax, %xmm3 61; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 62; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 63; CHECK-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2 64; CHECK-NEXT: shlq $47, %rax 65; CHECK-NEXT: vmovapd %xmm2, %xmm0 66; CHECK-NEXT: orq %rax, %rsp 67; CHECK-NEXT: retq 68entry: 69 %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x double> %mask, i8 1) 70 ret <2 x double> %v 71} 72 73declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) 74 75define <2 x double> @test_llvm_x86_avx2_gather_q_pd(i8* %b, <2 x i64> %iv, <2 x double> %mask) #0 { 76; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd: 77; CHECK: # %bb.0: # %entry 78; CHECK-NEXT: movq %rsp, %rax 79; CHECK-NEXT: movq $-1, %rcx 80; CHECK-NEXT: sarq $63, %rax 81; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 82; CHECK-NEXT: orq %rax, %rdi 83; CHECK-NEXT: vmovq %rax, %xmm3 84; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 85; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 86; CHECK-NEXT: vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2 87; CHECK-NEXT: shlq $47, %rax 88; CHECK-NEXT: vmovapd %xmm2, %xmm0 89; CHECK-NEXT: orq %rax, %rsp 90; CHECK-NEXT: retq 91entry: 92 %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x double> %mask, i8 1) 93 ret <2 x double> %v 94} 95 96declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) 97 98define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(i8* %b, <8 x i32> %iv, <8 x float> %mask) #0 { 99; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256: 100; CHECK: # %bb.0: # %entry 101; CHECK-NEXT: movq %rsp, %rax 102; CHECK-NEXT: movq $-1, %rcx 103; CHECK-NEXT: sarq $63, %rax 104; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 105; CHECK-NEXT: orq %rax, %rdi 106; CHECK-NEXT: vmovq %rax, %xmm3 107; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 108; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 109; CHECK-NEXT: vgatherdps %ymm1, (%rdi,%ymm0), %ymm2 110; CHECK-NEXT: shlq $47, %rax 111; CHECK-NEXT: vmovaps %ymm2, %ymm0 112; CHECK-NEXT: orq %rax, %rsp 113; CHECK-NEXT: retq 114entry: 115 %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x float> %mask, i8 1) 116 ret <8 x float> %v 117} 118 119declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) 120 121define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(i8* %b, <4 x i64> %iv, <4 x float> %mask) #0 { 122; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256: 123; CHECK: # %bb.0: # %entry 124; CHECK-NEXT: movq %rsp, %rax 125; CHECK-NEXT: movq $-1, %rcx 126; CHECK-NEXT: sarq $63, %rax 127; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 128; CHECK-NEXT: orq %rax, %rdi 129; CHECK-NEXT: vmovq %rax, %xmm3 130; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 131; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 132; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%ymm0), %xmm2 133; CHECK-NEXT: shlq $47, %rax 134; CHECK-NEXT: vmovaps %xmm2, %xmm0 135; CHECK-NEXT: orq %rax, %rsp 136; CHECK-NEXT: vzeroupper 137; CHECK-NEXT: retq 138entry: 139 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x float> %mask, i8 1) 140 ret <4 x float> %v 141} 142 143declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) 144 145define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(i8* %b, <4 x i32> %iv, <4 x double> %mask) #0 { 146; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256: 147; CHECK: # %bb.0: # %entry 148; CHECK-NEXT: movq %rsp, %rax 149; CHECK-NEXT: movq $-1, %rcx 150; CHECK-NEXT: sarq $63, %rax 151; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 152; CHECK-NEXT: orq %rax, %rdi 153; CHECK-NEXT: vmovq %rax, %xmm3 154; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 155; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 156; CHECK-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2 157; CHECK-NEXT: shlq $47, %rax 158; CHECK-NEXT: vmovapd %ymm2, %ymm0 159; CHECK-NEXT: orq %rax, %rsp 160; CHECK-NEXT: retq 161entry: 162 %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x double> %mask, i8 1) 163 ret <4 x double> %v 164} 165 166declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) 167 168define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(i8* %b, <4 x i64> %iv, <4 x double> %mask) #0 { 169; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256: 170; CHECK: # %bb.0: # %entry 171; CHECK-NEXT: movq %rsp, %rax 172; CHECK-NEXT: movq $-1, %rcx 173; CHECK-NEXT: sarq $63, %rax 174; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 175; CHECK-NEXT: orq %rax, %rdi 176; CHECK-NEXT: vmovq %rax, %xmm3 177; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 178; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 179; CHECK-NEXT: vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2 180; CHECK-NEXT: shlq $47, %rax 181; CHECK-NEXT: vmovapd %ymm2, %ymm0 182; CHECK-NEXT: orq %rax, %rsp 183; CHECK-NEXT: retq 184entry: 185 %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x double> %mask, i8 1) 186 ret <4 x double> %v 187} 188 189declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) 190 191define <4 x i32> @test_llvm_x86_avx2_gather_d_d(i8* %b, <4 x i32> %iv, <4 x i32> %mask) #0 { 192; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d: 193; CHECK: # %bb.0: # %entry 194; CHECK-NEXT: movq %rsp, %rax 195; CHECK-NEXT: movq $-1, %rcx 196; CHECK-NEXT: sarq $63, %rax 197; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 198; CHECK-NEXT: orq %rax, %rdi 199; CHECK-NEXT: vmovq %rax, %xmm3 200; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 201; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 202; CHECK-NEXT: vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2 203; CHECK-NEXT: shlq $47, %rax 204; CHECK-NEXT: vmovdqa %xmm2, %xmm0 205; CHECK-NEXT: orq %rax, %rsp 206; CHECK-NEXT: retq 207entry: 208 %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i32> %mask, i8 1) 209 ret <4 x i32> %v 210} 211 212declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) 213 214define <4 x i32> @test_llvm_x86_avx2_gather_q_d(i8* %b, <2 x i64> %iv, <4 x i32> %mask) #0 { 215; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d: 216; CHECK: # %bb.0: # %entry 217; CHECK-NEXT: movq %rsp, %rax 218; CHECK-NEXT: movq $-1, %rcx 219; CHECK-NEXT: sarq $63, %rax 220; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 221; CHECK-NEXT: orq %rax, %rdi 222; CHECK-NEXT: vmovq %rax, %xmm3 223; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 224; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 225; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2 226; CHECK-NEXT: shlq $47, %rax 227; CHECK-NEXT: vmovdqa %xmm2, %xmm0 228; CHECK-NEXT: orq %rax, %rsp 229; CHECK-NEXT: retq 230entry: 231 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, <4 x i32> %mask, i8 1) 232 ret <4 x i32> %v 233} 234 235declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) 236 237define <2 x i64> @test_llvm_x86_avx2_gather_d_q(i8* %b, <4 x i32> %iv, <2 x i64> %mask) #0 { 238; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q: 239; CHECK: # %bb.0: # %entry 240; CHECK-NEXT: movq %rsp, %rax 241; CHECK-NEXT: movq $-1, %rcx 242; CHECK-NEXT: sarq $63, %rax 243; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 244; CHECK-NEXT: orq %rax, %rdi 245; CHECK-NEXT: vmovq %rax, %xmm3 246; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 247; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 248; CHECK-NEXT: vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2 249; CHECK-NEXT: shlq $47, %rax 250; CHECK-NEXT: vmovdqa %xmm2, %xmm0 251; CHECK-NEXT: orq %rax, %rsp 252; CHECK-NEXT: retq 253entry: 254 %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <2 x i64> %mask, i8 1) 255 ret <2 x i64> %v 256} 257 258declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) 259 260define <2 x i64> @test_llvm_x86_avx2_gather_q_q(i8* %b, <2 x i64> %iv, <2 x i64> %mask) #0 { 261; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q: 262; CHECK: # %bb.0: # %entry 263; CHECK-NEXT: movq %rsp, %rax 264; CHECK-NEXT: movq $-1, %rcx 265; CHECK-NEXT: sarq $63, %rax 266; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 267; CHECK-NEXT: orq %rax, %rdi 268; CHECK-NEXT: vmovq %rax, %xmm3 269; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 270; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 271; CHECK-NEXT: vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2 272; CHECK-NEXT: shlq $47, %rax 273; CHECK-NEXT: vmovdqa %xmm2, %xmm0 274; CHECK-NEXT: orq %rax, %rsp 275; CHECK-NEXT: retq 276entry: 277 %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, <2 x i64> %mask, i8 1) 278 ret <2 x i64> %v 279} 280 281declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) 282 283define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(i8* %b, <8 x i32> %iv, <8 x i32> %mask) #0 { 284; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256: 285; CHECK: # %bb.0: # %entry 286; CHECK-NEXT: movq %rsp, %rax 287; CHECK-NEXT: movq $-1, %rcx 288; CHECK-NEXT: sarq $63, %rax 289; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 290; CHECK-NEXT: orq %rax, %rdi 291; CHECK-NEXT: vmovq %rax, %xmm3 292; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 293; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 294; CHECK-NEXT: vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2 295; CHECK-NEXT: shlq $47, %rax 296; CHECK-NEXT: vmovdqa %ymm2, %ymm0 297; CHECK-NEXT: orq %rax, %rsp 298; CHECK-NEXT: retq 299entry: 300 %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, <8 x i32> %mask, i8 1) 301 ret <8 x i32> %v 302} 303 304declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) 305 306define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(i8* %b, <4 x i64> %iv, <4 x i32> %mask) #0 { 307; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256: 308; CHECK: # %bb.0: # %entry 309; CHECK-NEXT: movq %rsp, %rax 310; CHECK-NEXT: movq $-1, %rcx 311; CHECK-NEXT: sarq $63, %rax 312; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 313; CHECK-NEXT: orq %rax, %rdi 314; CHECK-NEXT: vmovq %rax, %xmm3 315; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 316; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 317; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2 318; CHECK-NEXT: shlq $47, %rax 319; CHECK-NEXT: vmovdqa %xmm2, %xmm0 320; CHECK-NEXT: orq %rax, %rsp 321; CHECK-NEXT: vzeroupper 322; CHECK-NEXT: retq 323entry: 324 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i32> %mask, i8 1) 325 ret <4 x i32> %v 326} 327 328declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) 329 330define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(i8* %b, <4 x i32> %iv, <4 x i64> %mask) #0 { 331; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256: 332; CHECK: # %bb.0: # %entry 333; CHECK-NEXT: movq %rsp, %rax 334; CHECK-NEXT: movq $-1, %rcx 335; CHECK-NEXT: sarq $63, %rax 336; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 337; CHECK-NEXT: orq %rax, %rdi 338; CHECK-NEXT: vmovq %rax, %xmm3 339; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 340; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 341; CHECK-NEXT: vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2 342; CHECK-NEXT: shlq $47, %rax 343; CHECK-NEXT: vmovdqa %ymm2, %ymm0 344; CHECK-NEXT: orq %rax, %rsp 345; CHECK-NEXT: retq 346entry: 347 %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, <4 x i64> %mask, i8 1) 348 ret <4 x i64> %v 349} 350 351declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) 352 353define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(i8* %b, <4 x i64> %iv, <4 x i64> %mask) #0 { 354; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256: 355; CHECK: # %bb.0: # %entry 356; CHECK-NEXT: movq %rsp, %rax 357; CHECK-NEXT: movq $-1, %rcx 358; CHECK-NEXT: sarq $63, %rax 359; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 360; CHECK-NEXT: orq %rax, %rdi 361; CHECK-NEXT: vmovq %rax, %xmm3 362; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 363; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 364; CHECK-NEXT: vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2 365; CHECK-NEXT: shlq $47, %rax 366; CHECK-NEXT: vmovdqa %ymm2, %ymm0 367; CHECK-NEXT: orq %rax, %rsp 368; CHECK-NEXT: retq 369entry: 370 %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, <4 x i64> %mask, i8 1) 371 ret <4 x i64> %v 372} 373 374declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, i8*, <16 x i32>, i16, i32) 375 376define <16 x float> @test_llvm_x86_avx512_gather_dps_512(i8* %b, <16 x i32> %iv) #1 { 377; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512: 378; CHECK: # %bb.0: # %entry 379; CHECK-NEXT: movq %rsp, %rax 380; CHECK-NEXT: movq $-1, %rcx 381; CHECK-NEXT: sarq $63, %rax 382; CHECK-NEXT: kxnorw %k0, %k0, %k1 383; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 384; CHECK-NEXT: orq %rax, %rdi 385; CHECK-NEXT: vpbroadcastq %rax, %zmm2 386; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 387; CHECK-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1} 388; CHECK-NEXT: shlq $47, %rax 389; CHECK-NEXT: vmovaps %zmm1, %zmm0 390; CHECK-NEXT: orq %rax, %rsp 391; CHECK-NEXT: retq 392entry: 393 %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1) 394 ret <16 x float> %v 395} 396 397declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, i8*, <8 x i32>, i8, i32) 398 399define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(i8* %b, <8 x i32> %iv) #1 { 400; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512: 401; CHECK: # %bb.0: # %entry 402; CHECK-NEXT: movq %rsp, %rax 403; CHECK-NEXT: movq $-1, %rcx 404; CHECK-NEXT: sarq $63, %rax 405; CHECK-NEXT: kxnorw %k0, %k0, %k1 406; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 407; CHECK-NEXT: orq %rax, %rdi 408; CHECK-NEXT: vmovq %rax, %xmm2 409; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2 410; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 411; CHECK-NEXT: vgatherdpd (%rdi,%ymm0), %zmm1 {%k1} 412; CHECK-NEXT: shlq $47, %rax 413; CHECK-NEXT: vmovapd %zmm1, %zmm0 414; CHECK-NEXT: orq %rax, %rsp 415; CHECK-NEXT: retq 416entry: 417 %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 418 ret <8 x double> %v 419} 420 421declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, i8*, <8 x i64>, i8, i32) 422 423define <8 x float> @test_llvm_x86_avx512_gather_qps_512(i8* %b, <8 x i64> %iv) #1 { 424; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512: 425; CHECK: # %bb.0: # %entry 426; CHECK-NEXT: movq %rsp, %rax 427; CHECK-NEXT: movq $-1, %rcx 428; CHECK-NEXT: sarq $63, %rax 429; CHECK-NEXT: kxnorw %k0, %k0, %k1 430; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 431; CHECK-NEXT: orq %rax, %rdi 432; CHECK-NEXT: vpbroadcastq %rax, %zmm2 433; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 434; CHECK-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1} 435; CHECK-NEXT: shlq $47, %rax 436; CHECK-NEXT: vmovaps %ymm1, %ymm0 437; CHECK-NEXT: orq %rax, %rsp 438; CHECK-NEXT: retq 439entry: 440 %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 441 ret <8 x float> %v 442} 443 444declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, i8*, <8 x i64>, i8, i32) 445 446define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(i8* %b, <8 x i64> %iv) #1 { 447; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512: 448; CHECK: # %bb.0: # %entry 449; CHECK-NEXT: movq %rsp, %rax 450; CHECK-NEXT: movq $-1, %rcx 451; CHECK-NEXT: sarq $63, %rax 452; CHECK-NEXT: kxnorw %k0, %k0, %k1 453; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 454; CHECK-NEXT: orq %rax, %rdi 455; CHECK-NEXT: vpbroadcastq %rax, %zmm2 456; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 457; CHECK-NEXT: vgatherqpd (%rdi,%zmm0), %zmm1 {%k1} 458; CHECK-NEXT: shlq $47, %rax 459; CHECK-NEXT: vmovapd %zmm1, %zmm0 460; CHECK-NEXT: orq %rax, %rsp 461; CHECK-NEXT: retq 462entry: 463 %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 464 ret <8 x double> %v 465} 466 467declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, i8*, <16 x i32>, i16, i32) 468 469define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(i8* %b, <16 x i32> %iv) #1 { 470; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512: 471; CHECK: # %bb.0: # %entry 472; CHECK-NEXT: movq %rsp, %rax 473; CHECK-NEXT: movq $-1, %rcx 474; CHECK-NEXT: sarq $63, %rax 475; CHECK-NEXT: kxnorw %k0, %k0, %k1 476; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 477; CHECK-NEXT: orq %rax, %rdi 478; CHECK-NEXT: vpbroadcastq %rax, %zmm2 479; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 480; CHECK-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1} 481; CHECK-NEXT: shlq $47, %rax 482; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 483; CHECK-NEXT: orq %rax, %rsp 484; CHECK-NEXT: retq 485entry: 486 %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, i8* %b, <16 x i32> %iv, i16 -1, i32 1) 487 ret <16 x i32> %v 488} 489 490declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, i8*, <8 x i32>, i8, i32) 491 492define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(i8* %b, <8 x i32> %iv) #1 { 493; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512: 494; CHECK: # %bb.0: # %entry 495; CHECK-NEXT: movq %rsp, %rax 496; CHECK-NEXT: movq $-1, %rcx 497; CHECK-NEXT: sarq $63, %rax 498; CHECK-NEXT: kxnorw %k0, %k0, %k1 499; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 500; CHECK-NEXT: orq %rax, %rdi 501; CHECK-NEXT: vmovq %rax, %xmm2 502; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2 503; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 504; CHECK-NEXT: vpgatherdq (%rdi,%ymm0), %zmm1 {%k1} 505; CHECK-NEXT: shlq $47, %rax 506; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 507; CHECK-NEXT: orq %rax, %rsp 508; CHECK-NEXT: retq 509entry: 510 %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 511 ret <8 x i64> %v 512} 513 514 515declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, i8*, <8 x i64>, i8, i32) 516 517define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(i8* %b, <8 x i64> %iv) #1 { 518; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512: 519; CHECK: # %bb.0: # %entry 520; CHECK-NEXT: movq %rsp, %rax 521; CHECK-NEXT: movq $-1, %rcx 522; CHECK-NEXT: sarq $63, %rax 523; CHECK-NEXT: kxnorw %k0, %k0, %k1 524; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 525; CHECK-NEXT: orq %rax, %rdi 526; CHECK-NEXT: vpbroadcastq %rax, %zmm2 527; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 528; CHECK-NEXT: vpgatherqd (%rdi,%zmm0), %ymm1 {%k1} 529; CHECK-NEXT: shlq $47, %rax 530; CHECK-NEXT: vmovdqa %ymm1, %ymm0 531; CHECK-NEXT: orq %rax, %rsp 532; CHECK-NEXT: retq 533entry: 534 %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 535 ret <8 x i32> %v 536} 537 538declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, i8*, <8 x i64>, i8, i32) 539 540define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(i8* %b, <8 x i64> %iv) #1 { 541; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512: 542; CHECK: # %bb.0: # %entry 543; CHECK-NEXT: movq %rsp, %rax 544; CHECK-NEXT: movq $-1, %rcx 545; CHECK-NEXT: sarq $63, %rax 546; CHECK-NEXT: kxnorw %k0, %k0, %k1 547; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 548; CHECK-NEXT: orq %rax, %rdi 549; CHECK-NEXT: vpbroadcastq %rax, %zmm2 550; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 551; CHECK-NEXT: vpgatherqq (%rdi,%zmm0), %zmm1 {%k1} 552; CHECK-NEXT: shlq $47, %rax 553; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 554; CHECK-NEXT: orq %rax, %rsp 555; CHECK-NEXT: retq 556entry: 557 %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, i8* %b, <8 x i64> %iv, i8 -1, i32 1) 558 ret <8 x i64> %v 559} 560 561declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8*, i32, i32); 562 563define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, i8* %b) #1 { 564; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512: 565; CHECK: # %bb.0: # %entry 566; CHECK-NEXT: movq %rsp, %rax 567; CHECK-NEXT: movq $-1, %rcx 568; CHECK-NEXT: sarq $63, %rax 569; CHECK-NEXT: kxnorw %k0, %k0, %k1 570; CHECK-NEXT: orq %rax, %rdi 571; CHECK-NEXT: vpbroadcastq %rax, %zmm1 572; CHECK-NEXT: vporq %zmm0, %zmm1, %zmm0 573; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} 574; CHECK-NEXT: shlq $47, %rax 575; CHECK-NEXT: orq %rax, %rsp 576; CHECK-NEXT: vzeroupper 577; CHECK-NEXT: retq 578entry: 579 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, i8* %b, i32 4, i32 3) 580 ret void 581} 582 583declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32) 584 585define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(i8* %b, <4 x i32> %iv) #2 { 586; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf: 587; CHECK: # %bb.0: # %entry 588; CHECK-NEXT: movq %rsp, %rax 589; CHECK-NEXT: movq $-1, %rcx 590; CHECK-NEXT: sarq $63, %rax 591; CHECK-NEXT: kxnorw %k0, %k0, %k1 592; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 593; CHECK-NEXT: orq %rax, %rdi 594; CHECK-NEXT: vpbroadcastq %rax, %xmm2 595; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 596; CHECK-NEXT: vgatherdps (%rdi,%xmm0), %xmm1 {%k1} 597; CHECK-NEXT: shlq $47, %rax 598; CHECK-NEXT: vmovaps %xmm1, %xmm0 599; CHECK-NEXT: orq %rax, %rsp 600; CHECK-NEXT: retq 601entry: 602 %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 603 ret <4 x float> %v 604} 605 606declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32) 607 608define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(i8* %b, <2 x i64> %iv) #2 { 609; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf: 610; CHECK: # %bb.0: # %entry 611; CHECK-NEXT: movq %rsp, %rax 612; CHECK-NEXT: movq $-1, %rcx 613; CHECK-NEXT: sarq $63, %rax 614; CHECK-NEXT: kxnorw %k0, %k0, %k1 615; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 616; CHECK-NEXT: orq %rax, %rdi 617; CHECK-NEXT: vpbroadcastq %rax, %xmm2 618; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 619; CHECK-NEXT: vgatherqps (%rdi,%xmm0), %xmm1 {%k1} 620; CHECK-NEXT: shlq $47, %rax 621; CHECK-NEXT: vmovaps %xmm1, %xmm0 622; CHECK-NEXT: orq %rax, %rsp 623; CHECK-NEXT: retq 624entry: 625 %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 626 ret <4 x float> %v 627} 628 629declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32) 630 631define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(i8* %b, <4 x i32> %iv) #2 { 632; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df: 633; CHECK: # %bb.0: # %entry 634; CHECK-NEXT: movq %rsp, %rax 635; CHECK-NEXT: movq $-1, %rcx 636; CHECK-NEXT: sarq $63, %rax 637; CHECK-NEXT: kxnorw %k0, %k0, %k1 638; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 639; CHECK-NEXT: orq %rax, %rdi 640; CHECK-NEXT: vpbroadcastq %rax, %xmm2 641; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 642; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %xmm1 {%k1} 643; CHECK-NEXT: shlq $47, %rax 644; CHECK-NEXT: vmovapd %xmm1, %xmm0 645; CHECK-NEXT: orq %rax, %rsp 646; CHECK-NEXT: retq 647entry: 648 %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 649 ret <2 x double> %v 650} 651 652declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32) 653 654define <2 x double> @test_llvm_x86_avx512_gather3div2_df(i8* %b, <2 x i64> %iv) #2 { 655; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df: 656; CHECK: # %bb.0: # %entry 657; CHECK-NEXT: movq %rsp, %rax 658; CHECK-NEXT: movq $-1, %rcx 659; CHECK-NEXT: sarq $63, %rax 660; CHECK-NEXT: kxnorw %k0, %k0, %k1 661; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 662; CHECK-NEXT: orq %rax, %rdi 663; CHECK-NEXT: vpbroadcastq %rax, %xmm2 664; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 665; CHECK-NEXT: vgatherqpd (%rdi,%xmm0), %xmm1 {%k1} 666; CHECK-NEXT: shlq $47, %rax 667; CHECK-NEXT: vmovapd %xmm1, %xmm0 668; CHECK-NEXT: orq %rax, %rsp 669; CHECK-NEXT: retq 670entry: 671 %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 672 ret <2 x double> %v 673} 674 675declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32) 676 677define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(i8* %b, <8 x i32> %iv) #2 { 678; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf: 679; CHECK: # %bb.0: # %entry 680; CHECK-NEXT: movq %rsp, %rax 681; CHECK-NEXT: movq $-1, %rcx 682; CHECK-NEXT: sarq $63, %rax 683; CHECK-NEXT: kxnorw %k0, %k0, %k1 684; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 685; CHECK-NEXT: orq %rax, %rdi 686; CHECK-NEXT: vpbroadcastq %rax, %ymm2 687; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 688; CHECK-NEXT: vgatherdps (%rdi,%ymm0), %ymm1 {%k1} 689; CHECK-NEXT: shlq $47, %rax 690; CHECK-NEXT: vmovaps %ymm1, %ymm0 691; CHECK-NEXT: orq %rax, %rsp 692; CHECK-NEXT: retq 693entry: 694 %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 695 ret <8 x float> %v 696} 697 698declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32) 699 700define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(i8* %b, <4 x i64> %iv) #2 { 701; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf: 702; CHECK: # %bb.0: # %entry 703; CHECK-NEXT: movq %rsp, %rax 704; CHECK-NEXT: movq $-1, %rcx 705; CHECK-NEXT: sarq $63, %rax 706; CHECK-NEXT: kxnorw %k0, %k0, %k1 707; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 708; CHECK-NEXT: orq %rax, %rdi 709; CHECK-NEXT: vpbroadcastq %rax, %ymm2 710; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 711; CHECK-NEXT: vgatherqps (%rdi,%ymm0), %xmm1 {%k1} 712; CHECK-NEXT: shlq $47, %rax 713; CHECK-NEXT: vmovaps %xmm1, %xmm0 714; CHECK-NEXT: orq %rax, %rsp 715; CHECK-NEXT: vzeroupper 716; CHECK-NEXT: retq 717entry: 718 %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 719 ret <4 x float> %v 720} 721 722declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32) 723 724define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(i8* %b, <4 x i32> %iv) #2 { 725; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df: 726; CHECK: # %bb.0: # %entry 727; CHECK-NEXT: movq %rsp, %rax 728; CHECK-NEXT: movq $-1, %rcx 729; CHECK-NEXT: sarq $63, %rax 730; CHECK-NEXT: kxnorw %k0, %k0, %k1 731; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 732; CHECK-NEXT: orq %rax, %rdi 733; CHECK-NEXT: vpbroadcastq %rax, %xmm2 734; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 735; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %ymm1 {%k1} 736; CHECK-NEXT: shlq $47, %rax 737; CHECK-NEXT: vmovapd %ymm1, %ymm0 738; CHECK-NEXT: orq %rax, %rsp 739; CHECK-NEXT: retq 740entry: 741 %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 742 ret <4 x double> %v 743} 744 745declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32) 746 747define <4 x double> @test_llvm_x86_avx512_gather3div4_df(i8* %b, <4 x i64> %iv) #2 { 748; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df: 749; CHECK: # %bb.0: # %entry 750; CHECK-NEXT: movq %rsp, %rax 751; CHECK-NEXT: movq $-1, %rcx 752; CHECK-NEXT: sarq $63, %rax 753; CHECK-NEXT: kxnorw %k0, %k0, %k1 754; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 755; CHECK-NEXT: orq %rax, %rdi 756; CHECK-NEXT: vpbroadcastq %rax, %ymm2 757; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 758; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1} 759; CHECK-NEXT: shlq $47, %rax 760; CHECK-NEXT: vmovapd %ymm1, %ymm0 761; CHECK-NEXT: orq %rax, %rsp 762; CHECK-NEXT: retq 763entry: 764 %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 765 ret <4 x double> %v 766} 767 768declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32) 769 770define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(i8* %b, <4 x i32> %iv) #2 { 771; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si: 772; CHECK: # %bb.0: # %entry 773; CHECK-NEXT: movq %rsp, %rax 774; CHECK-NEXT: movq $-1, %rcx 775; CHECK-NEXT: sarq $63, %rax 776; CHECK-NEXT: kxnorw %k0, %k0, %k1 777; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 778; CHECK-NEXT: orq %rax, %rdi 779; CHECK-NEXT: vpbroadcastq %rax, %xmm2 780; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 781; CHECK-NEXT: vpgatherdd (%rdi,%xmm0), %xmm1 {%k1} 782; CHECK-NEXT: shlq $47, %rax 783; CHECK-NEXT: vmovdqa %xmm1, %xmm0 784; CHECK-NEXT: orq %rax, %rsp 785; CHECK-NEXT: retq 786entry: 787 %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 788 ret <4 x i32> %v 789} 790 791declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32) 792 793define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(i8* %b, <2 x i64> %iv) #2 { 794; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si: 795; CHECK: # %bb.0: # %entry 796; CHECK-NEXT: movq %rsp, %rax 797; CHECK-NEXT: movq $-1, %rcx 798; CHECK-NEXT: sarq $63, %rax 799; CHECK-NEXT: kxnorw %k0, %k0, %k1 800; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 801; CHECK-NEXT: orq %rax, %rdi 802; CHECK-NEXT: vpbroadcastq %rax, %xmm2 803; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 804; CHECK-NEXT: vpgatherqd (%rdi,%xmm0), %xmm1 {%k1} 805; CHECK-NEXT: shlq $47, %rax 806; CHECK-NEXT: vmovdqa %xmm1, %xmm0 807; CHECK-NEXT: orq %rax, %rsp 808; CHECK-NEXT: retq 809entry: 810 %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 811 ret <4 x i32> %v 812} 813 814declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32) 815 816define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(i8* %b, <4 x i32> %iv) #2 { 817; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di: 818; CHECK: # %bb.0: # %entry 819; CHECK-NEXT: movq %rsp, %rax 820; CHECK-NEXT: movq $-1, %rcx 821; CHECK-NEXT: sarq $63, %rax 822; CHECK-NEXT: kxnorw %k0, %k0, %k1 823; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 824; CHECK-NEXT: orq %rax, %rdi 825; CHECK-NEXT: vpbroadcastq %rax, %xmm2 826; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 827; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %xmm1 {%k1} 828; CHECK-NEXT: shlq $47, %rax 829; CHECK-NEXT: vmovdqa %xmm1, %xmm0 830; CHECK-NEXT: orq %rax, %rsp 831; CHECK-NEXT: retq 832entry: 833 %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 834 ret <2 x i64> %v 835} 836 837declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32) 838 839define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(i8* %b, <2 x i64> %iv) #2 { 840; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di: 841; CHECK: # %bb.0: # %entry 842; CHECK-NEXT: movq %rsp, %rax 843; CHECK-NEXT: movq $-1, %rcx 844; CHECK-NEXT: sarq $63, %rax 845; CHECK-NEXT: kxnorw %k0, %k0, %k1 846; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 847; CHECK-NEXT: orq %rax, %rdi 848; CHECK-NEXT: vpbroadcastq %rax, %xmm2 849; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 850; CHECK-NEXT: vpgatherqq (%rdi,%xmm0), %xmm1 {%k1} 851; CHECK-NEXT: shlq $47, %rax 852; CHECK-NEXT: vmovdqa %xmm1, %xmm0 853; CHECK-NEXT: orq %rax, %rsp 854; CHECK-NEXT: retq 855entry: 856 %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, i8* %b, <2 x i64> %iv, i8 -1, i32 1) 857 ret <2 x i64> %v 858} 859 860declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32) 861 862define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(i8* %b, <8 x i32> %iv) #2 { 863; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si: 864; CHECK: # %bb.0: # %entry 865; CHECK-NEXT: movq %rsp, %rax 866; CHECK-NEXT: movq $-1, %rcx 867; CHECK-NEXT: sarq $63, %rax 868; CHECK-NEXT: kxnorw %k0, %k0, %k1 869; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 870; CHECK-NEXT: orq %rax, %rdi 871; CHECK-NEXT: vpbroadcastq %rax, %ymm2 872; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 873; CHECK-NEXT: vpgatherdd (%rdi,%ymm0), %ymm1 {%k1} 874; CHECK-NEXT: shlq $47, %rax 875; CHECK-NEXT: vmovdqa %ymm1, %ymm0 876; CHECK-NEXT: orq %rax, %rsp 877; CHECK-NEXT: retq 878entry: 879 %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, i8* %b, <8 x i32> %iv, i8 -1, i32 1) 880 ret <8 x i32> %v 881} 882 883declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32) 884 885define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(i8* %b, <4 x i64> %iv) #2 { 886; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si: 887; CHECK: # %bb.0: # %entry 888; CHECK-NEXT: movq %rsp, %rax 889; CHECK-NEXT: movq $-1, %rcx 890; CHECK-NEXT: sarq $63, %rax 891; CHECK-NEXT: kxnorw %k0, %k0, %k1 892; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 893; CHECK-NEXT: orq %rax, %rdi 894; CHECK-NEXT: vpbroadcastq %rax, %ymm2 895; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 896; CHECK-NEXT: vpgatherqd (%rdi,%ymm0), %xmm1 {%k1} 897; CHECK-NEXT: shlq $47, %rax 898; CHECK-NEXT: vmovdqa %xmm1, %xmm0 899; CHECK-NEXT: orq %rax, %rsp 900; CHECK-NEXT: vzeroupper 901; CHECK-NEXT: retq 902entry: 903 %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 904 ret <4 x i32> %v 905} 906 907declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32) 908 909define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(i8* %b, <4 x i32> %iv) #2 { 910; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di: 911; CHECK: # %bb.0: # %entry 912; CHECK-NEXT: movq %rsp, %rax 913; CHECK-NEXT: movq $-1, %rcx 914; CHECK-NEXT: sarq $63, %rax 915; CHECK-NEXT: kxnorw %k0, %k0, %k1 916; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 917; CHECK-NEXT: orq %rax, %rdi 918; CHECK-NEXT: vpbroadcastq %rax, %xmm2 919; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 920; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %ymm1 {%k1} 921; CHECK-NEXT: shlq $47, %rax 922; CHECK-NEXT: vmovdqa %ymm1, %ymm0 923; CHECK-NEXT: orq %rax, %rsp 924; CHECK-NEXT: retq 925entry: 926 %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i32> %iv, i8 -1, i32 1) 927 ret <4 x i64> %v 928} 929 930declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32) 931 932define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(i8* %b, <4 x i64> %iv) #2 { 933; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di: 934; CHECK: # %bb.0: # %entry 935; CHECK-NEXT: movq %rsp, %rax 936; CHECK-NEXT: movq $-1, %rcx 937; CHECK-NEXT: sarq $63, %rax 938; CHECK-NEXT: kxnorw %k0, %k0, %k1 939; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 940; CHECK-NEXT: orq %rax, %rdi 941; CHECK-NEXT: vpbroadcastq %rax, %ymm2 942; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 943; CHECK-NEXT: vpgatherqq (%rdi,%ymm0), %ymm1 {%k1} 944; CHECK-NEXT: shlq $47, %rax 945; CHECK-NEXT: vmovdqa %ymm1, %ymm0 946; CHECK-NEXT: orq %rax, %rsp 947; CHECK-NEXT: retq 948entry: 949 %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, i8* %b, <4 x i64> %iv, i8 -1, i32 1) 950 ret <4 x i64> %v 951} 952 953attributes #0 = { nounwind "target-features"="+avx2" } 954attributes #1 = { nounwind "target-features"="+avx512f" } 955attributes #2 = { nounwind "target-features"="+avx512vl" } 956