1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s 3 4declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32) 5declare void @llvm.x86.avx512.scatter.dps.512 (i8*, i16, <16 x i32>, <16 x float>, i32) 6declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, i8*, <8 x i32>, i8, i32) 7declare void @llvm.x86.avx512.scatter.dpd.512 (i8*, i8, <8 x i32>, <8 x double>, i32) 8 9declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, i8*, <8 x i64>, i8, i32) 10declare void @llvm.x86.avx512.scatter.qps.512 (i8*, i8, <8 x i64>, <8 x float>, i32) 11declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, i8*, <8 x i64>, i8, i32) 12declare void @llvm.x86.avx512.scatter.qpd.512 (i8*, i8, <8 x i64>, <8 x double>, i32) 13 14define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base, i8* %stbuf) { 15; CHECK-LABEL: gather_mask_dps: 16; CHECK: ## BB#0: 17; CHECK-NEXT: kmovw %edi, %k1 18; CHECK-NEXT: kmovq %k1, %k2 19; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2} 20; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 21; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} 22; CHECK-NEXT: retq 23 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 24 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 25 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) 26 ret void 27} 28 29define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 30; CHECK-LABEL: gather_mask_dpd: 31; CHECK: ## BB#0: 32; CHECK-NEXT: kmovb %edi, %k1 33; CHECK-NEXT: kmovq %k1, %k2 34; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2} 35; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 36; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} 37; CHECK-NEXT: retq 38 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 39 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 40 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) 41 ret void 42} 43 44define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base, i8* %stbuf) { 45; CHECK-LABEL: gather_mask_qps: 46; CHECK: ## BB#0: 47; CHECK-NEXT: kmovb %edi, %k1 48; CHECK-NEXT: kmovq %k1, %k2 49; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2} 50; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 51; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} 52; CHECK-NEXT: retq 53 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 54 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 55 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) 56 ret void 57} 58 59define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, i8* %stbuf) { 60; CHECK-LABEL: gather_mask_qpd: 61; CHECK: ## BB#0: 62; CHECK-NEXT: kmovb %edi, %k1 63; CHECK-NEXT: kmovq %k1, %k2 64; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2} 65; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 66; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} 67; CHECK-NEXT: retq 68 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 69 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 70 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) 71 ret void 72} 73;; 74;; Integer Gather/Scatter 75;; 76declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, i8*, <16 x i32>, i16, i32) 77declare void @llvm.x86.avx512.scatter.dpi.512 (i8*, i16, <16 x i32>, <16 x i32>, i32) 78declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, i8*, <8 x i32>, i8, i32) 79declare void @llvm.x86.avx512.scatter.dpq.512 (i8*, i8, <8 x i32>, <8 x i64>, i32) 80 81declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, i8*, <8 x i64>, i8, i32) 82declare void @llvm.x86.avx512.scatter.qpi.512 (i8*, i8, <8 x i64>, <8 x i32>, i32) 83declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, i8*, <8 x i64>, i8, i32) 84declare void @llvm.x86.avx512.scatter.qpq.512 (i8*, i8, <8 x i64>, <8 x i64>, i32) 85 86define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %base, i8* %stbuf) { 87; CHECK-LABEL: gather_mask_dd: 88; CHECK: ## BB#0: 89; CHECK-NEXT: kmovw %edi, %k1 90; CHECK-NEXT: kmovq %k1, %k2 91; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2} 92; CHECK-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0 93; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} 94; CHECK-NEXT: retq 95 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 96 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 97 call void @llvm.x86.avx512.scatter.dpi.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) 98 ret void 99} 100 101define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base, i8* %stbuf) { 102; CHECK-LABEL: gather_mask_qd: 103; CHECK: ## BB#0: 104; CHECK-NEXT: kmovb %edi, %k1 105; CHECK-NEXT: kmovq %k1, %k2 106; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2} 107; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 108; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} 109; CHECK-NEXT: retq 110 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 111 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 112 call void @llvm.x86.avx512.scatter.qpi.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) 113 ret void 114} 115 116define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 117; CHECK-LABEL: gather_mask_qq: 118; CHECK: ## BB#0: 119; CHECK-NEXT: kmovb %edi, %k1 120; CHECK-NEXT: kmovq %k1, %k2 121; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2} 122; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 123; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} 124; CHECK-NEXT: retq 125 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 126 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 127 call void @llvm.x86.avx512.scatter.qpq.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) 128 ret void 129} 130 131define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base, i8* %stbuf) { 132; CHECK-LABEL: gather_mask_dq: 133; CHECK: ## BB#0: 134; CHECK-NEXT: kmovb %edi, %k1 135; CHECK-NEXT: kmovq %k1, %k2 136; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2} 137; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 138; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} 139; CHECK-NEXT: retq 140 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 141 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 142 call void @llvm.x86.avx512.scatter.dpq.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) 143 ret void 144} 145 146define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { 147; CHECK-LABEL: gather_mask_dpd_execdomain: 148; CHECK: ## BB#0: 149; CHECK-NEXT: kmovb %edi, %k1 150; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1} 151; CHECK-NEXT: vmovapd %zmm1, (%rdx) 152; CHECK-NEXT: retq 153 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, i8* %base, <8 x i32>%ind, i8 %mask, i32 4) 154 store <8 x double> %x, <8 x double>* %stbuf 155 ret void 156} 157 158define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %base, <8 x double>* %stbuf) { 159; CHECK-LABEL: gather_mask_qpd_execdomain: 160; CHECK: ## BB#0: 161; CHECK-NEXT: kmovb %edi, %k1 162; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1} 163; CHECK-NEXT: vmovapd %zmm1, (%rdx) 164; CHECK-NEXT: retq 165 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 166 store <8 x double> %x, <8 x double>* %stbuf 167 ret void 168} 169 170define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8* %base) { 171; CHECK-LABEL: gather_mask_dps_execdomain: 172; CHECK: ## BB#0: 173; CHECK-NEXT: kmovw %edi, %k1 174; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1} 175; CHECK-NEXT: vmovaps %zmm1, %zmm0 176; CHECK-NEXT: retq 177 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, i8* %base, <16 x i32>%ind, i16 %mask, i32 4) 178 ret <16 x float> %res; 179} 180 181define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %base) { 182; CHECK-LABEL: gather_mask_qps_execdomain: 183; CHECK: ## BB#0: 184; CHECK-NEXT: kmovb %edi, %k1 185; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1} 186; CHECK-NEXT: vmovaps %zmm1, %zmm0 187; CHECK-NEXT: retq 188 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 %mask, i32 4) 189 ret <8 x float> %res; 190} 191 192define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { 193; CHECK-LABEL: scatter_mask_dpd_execdomain: 194; CHECK: ## BB#0: 195; CHECK-NEXT: kmovb %esi, %k1 196; CHECK-NEXT: vmovapd (%rdi), %zmm1 197; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1} 198; CHECK-NEXT: retq 199 %x = load <8 x double>, <8 x double>* %src, align 64 200 call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4) 201 ret void 202} 203 204define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf) { 205; CHECK-LABEL: scatter_mask_qpd_execdomain: 206; CHECK: ## BB#0: 207; CHECK-NEXT: kmovb %esi, %k1 208; CHECK-NEXT: vmovapd (%rdi), %zmm1 209; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1} 210; CHECK-NEXT: retq 211 %x = load <8 x double>, <8 x double>* %src, align 64 212 call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4) 213 ret void 214} 215 216define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf) { 217; CHECK-LABEL: scatter_mask_dps_execdomain: 218; CHECK: ## BB#0: 219; CHECK-NEXT: kmovw %esi, %k1 220; CHECK-NEXT: vmovaps (%rdi), %zmm1 221; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1} 222; CHECK-NEXT: retq 223 %x = load <16 x float>, <16 x float>* %src, align 64 224 call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4) 225 ret void 226} 227 228define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf) { 229; CHECK-LABEL: scatter_mask_qps_execdomain: 230; CHECK: ## BB#0: 231; CHECK-NEXT: kmovb %esi, %k1 232; CHECK-NEXT: vmovaps (%rdi), %ymm1 233; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1} 234; CHECK-NEXT: retq 235 %x = load <8 x float>, <8 x float>* %src, align 32 236 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4) 237 ret void 238} 239 240define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf) { 241; CHECK-LABEL: gather_qps: 242; CHECK: ## BB#0: 243; CHECK-NEXT: kxnorw %k1, %k1, %k1 244; CHECK-NEXT: kxnorw %k2, %k2, %k2 245; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} 246; CHECK-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0 247; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} 248; CHECK-NEXT: retq 249 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, i8* %base, <8 x i64>%ind, i8 -1, i32 4) 250 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 251 call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4) 252 ret void 253} 254 255declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, i8* , i32, i32); 256declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32); 257define void @prefetch(<8 x i64> %ind, i8* %base) { 258; CHECK-LABEL: prefetch: 259; CHECK: ## BB#0: 260; CHECK-NEXT: kxnorw %k1, %k1, %k1 261; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1} 262; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1} 263; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1} 264; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1} 265; CHECK-NEXT: retq 266 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 0) 267 call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 4, i32 1) 268 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 0) 269 call void @llvm.x86.avx512.scatterpf.qps.512(i8 -1, <8 x i64> %ind, i8* %base, i32 2, i32 1) 270 ret void 271} 272 273 274declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, i8*, <2 x i64>, i8, i32) 275 276define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { 277; CHECK-LABEL: test_int_x86_avx512_gather3div2_df: 278; CHECK: ## BB#0: 279; CHECK-NEXT: kmovb %esi, %k1 280; CHECK-NEXT: vmovaps %zmm0, %zmm2 281; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1} 282; CHECK-NEXT: kxnorw %k1, %k1, %k1 283; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1} 284; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 285; CHECK-NEXT: retq 286 %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4) 287 %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2) 288 %res2 = fadd <2 x double> %res, %res1 289 ret <2 x double> %res2 290} 291 292declare <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64>, i8*, <2 x i64>, i8, i32) 293 294define <4 x i32>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { 295; CHECK-LABEL: test_int_x86_avx512_gather3div2_di: 296; CHECK: ## BB#0: 297; CHECK-NEXT: kmovb %esi, %k1 298; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1} 299; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 300; CHECK-NEXT: retq 301 %res = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8) 302 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 8) 303 %res2 = add <4 x i32> %res, %res1 304 ret <4 x i32> %res2 305} 306 307declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, i8*, <4 x i64>, i8, i32) 308 309define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { 310; CHECK-LABEL: test_int_x86_avx512_gather3div4_df: 311; CHECK: ## BB#0: 312; CHECK-NEXT: kmovb %esi, %k1 313; CHECK-NEXT: vmovaps %zmm0, %zmm2 314; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1} 315; CHECK-NEXT: kxnorw %k1, %k1, %k1 316; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1} 317; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 318; CHECK-NEXT: retq 319 %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4) 320 %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2) 321 %res2 = fadd <4 x double> %res, %res1 322 ret <4 x double> %res2 323} 324 325declare <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64>, i8*, <4 x i64>, i8, i32) 326 327define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { 328; CHECK-LABEL: test_int_x86_avx512_gather3div4_di: 329; CHECK: ## BB#0: 330; CHECK-NEXT: kmovb %esi, %k1 331; CHECK-NEXT: vmovaps %zmm0, %zmm2 332; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1} 333; CHECK-NEXT: kxnorw %k1, %k1, %k1 334; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} 335; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 336; CHECK-NEXT: retq 337 %res = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 8) 338 %res1 = call <8 x i32> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 8) 339 %res2 = add <8 x i32> %res, %res1 340 ret <8 x i32> %res2 341} 342 343declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, i8*, <2 x i64>, i8, i32) 344 345define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { 346; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf: 347; CHECK: ## BB#0: 348; CHECK-NEXT: kmovb %esi, %k1 349; CHECK-NEXT: vmovaps %zmm0, %zmm2 350; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1} 351; CHECK-NEXT: kxnorw %k1, %k1, %k1 352; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1} 353; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 354; CHECK-NEXT: retq 355 %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4) 356 %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2) 357 %res2 = fadd <4 x float> %res, %res1 358 ret <4 x float> %res2 359} 360 361declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, i8*, <2 x i64>, i8, i32) 362 363define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3) { 364; CHECK-LABEL: test_int_x86_avx512_gather3div4_si: 365; CHECK: ## BB#0: 366; CHECK-NEXT: kmovb %esi, %k1 367; CHECK-NEXT: kxnorw %k2, %k2, %k2 368; CHECK-NEXT: vmovaps %zmm0, %zmm2 369; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} 370; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1} 371; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 372; CHECK-NEXT: retq 373 %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 4) 374 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4) 375 %res2 = add <4 x i32> %res, %res1 376 ret <4 x i32> %res2 377} 378 379declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, i8*, <4 x i64>, i8, i32) 380 381define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { 382; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf: 383; CHECK: ## BB#0: 384; CHECK-NEXT: kmovb %esi, %k1 385; CHECK-NEXT: vmovaps %zmm0, %zmm2 386; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1} 387; CHECK-NEXT: kxnorw %k1, %k1, %k1 388; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1} 389; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 390; CHECK-NEXT: retq 391 %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4) 392 %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2) 393 %res2 = fadd <4 x float> %res, %res1 394 ret <4 x float> %res2 395} 396 397declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, i8*, <4 x i64>, i8, i32) 398 399define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3) { 400; CHECK-LABEL: test_int_x86_avx512_gather3div8_si: 401; CHECK: ## BB#0: 402; CHECK-NEXT: kmovb %esi, %k1 403; CHECK-NEXT: vmovaps %zmm0, %zmm2 404; CHECK-NEXT: kmovq %k1, %k2 405; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2} 406; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1} 407; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 408; CHECK-NEXT: retq 409 %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4) 410 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 2) 411 %res2 = add <4 x i32> %res, %res1 412 ret <4 x i32> %res2 413} 414 415declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, i8*, <4 x i32>, i8, i32) 416 417define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { 418; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df: 419; CHECK: ## BB#0: 420; CHECK-NEXT: kmovb %esi, %k1 421; CHECK-NEXT: vmovaps %zmm0, %zmm2 422; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1} 423; CHECK-NEXT: kxnorw %k1, %k1, %k1 424; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1} 425; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 426; CHECK-NEXT: retq 427 %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4) 428 %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2) 429 %res2 = fadd <2 x double> %res, %res1 430 ret <2 x double> %res2 431} 432 433declare <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, i8*, <4 x i32>, i8, i32) 434 435define <4 x i32>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { 436; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di: 437; CHECK: ## BB#0: 438; CHECK-NEXT: kmovb %esi, %k1 439; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} 440; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 441; CHECK-NEXT: retq 442 %res = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) 443 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) 444 %res2 = add <4 x i32> %res, %res1 445 ret <4 x i32> %res2 446} 447 448declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, i8*, <4 x i32>, i8, i32) 449 450define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { 451; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df: 452; CHECK: ## BB#0: 453; CHECK-NEXT: kmovb %esi, %k1 454; CHECK-NEXT: vmovaps %zmm0, %zmm2 455; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1} 456; CHECK-NEXT: kxnorw %k1, %k1, %k1 457; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1} 458; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 459; CHECK-NEXT: retq 460 %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4) 461 %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2) 462 %res2 = fadd <4 x double> %res, %res1 463 ret <4 x double> %res2 464} 465 466declare <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, i8*, <4 x i32>, i8, i32) 467 468define <8 x i32>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { 469; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di: 470; CHECK: ## BB#0: 471; CHECK-NEXT: kmovb %esi, %k1 472; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1} 473; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0 474; CHECK-NEXT: retq 475 %res = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) 476 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 8) 477 %res2 = add <8 x i32> %res, %res1 478 ret <8 x i32> %res2 479} 480 481declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, i8*, <4 x i32>, i8, i32) 482 483define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { 484; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf: 485; CHECK: ## BB#0: 486; CHECK-NEXT: kmovb %esi, %k1 487; CHECK-NEXT: vmovaps %zmm0, %zmm2 488; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1} 489; CHECK-NEXT: kxnorw %k1, %k1, %k1 490; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1} 491; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 492; CHECK-NEXT: retq 493 %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4) 494 %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2) 495 %res2 = fadd <4 x float> %res, %res1 496 ret <4 x float> %res2 497} 498 499declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, i8*, <4 x i32>, i8, i32) 500 501define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3) { 502; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si: 503; CHECK: ## BB#0: 504; CHECK-NEXT: kmovb %esi, %k1 505; CHECK-NEXT: kxnorw %k2, %k2, %k2 506; CHECK-NEXT: vmovaps %zmm0, %zmm2 507; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} 508; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1} 509; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 510; CHECK-NEXT: retq 511 %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4) 512 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2) 513 %res2 = add <4 x i32> %res, %res1 514 ret <4 x i32> %res2 515} 516 517declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, i8*, <8 x i32>, i8, i32) 518 519define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) { 520; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf: 521; CHECK: ## BB#0: 522; CHECK-NEXT: kmovb %esi, %k1 523; CHECK-NEXT: vmovaps %zmm0, %zmm2 524; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1} 525; CHECK-NEXT: kxnorw %k1, %k1, %k1 526; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1} 527; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 528; CHECK-NEXT: retq 529 %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4) 530 %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2) 531 %res2 = fadd <8 x float> %res, %res1 532 ret <8 x float> %res2 533} 534 535declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, i8*, <8 x i32>, i8, i32) 536 537define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3) { 538; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si: 539; CHECK: ## BB#0: 540; CHECK-NEXT: kmovb %esi, %k1 541; CHECK-NEXT: vmovaps %zmm0, %zmm2 542; CHECK-NEXT: kmovq %k1, %k2 543; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2} 544; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1} 545; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 546; CHECK-NEXT: retq 547 %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4) 548 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2) 549 %res2 = add <8 x i32> %res, %res1 550 ret <8 x i32> %res2 551} 552 553declare void @llvm.x86.avx512.scatterdiv2.df(i8*, i8, <2 x i64>, <2 x double>, i32) 554 555define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) { 556; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df: 557; CHECK: ## BB#0: 558; CHECK-NEXT: kmovb %esi, %k1 559; CHECK-NEXT: kxnorw %k2, %k2, %k2 560; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2} 561; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1} 562; CHECK-NEXT: retq 563 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2) 564 call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4) 565 ret void 566} 567 568declare void @llvm.x86.avx512.scatterdiv2.di(i8*, i8, <2 x i64>, <2 x i64>, i32) 569 570define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) { 571; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di: 572; CHECK: ## BB#0: 573; CHECK-NEXT: kmovb %esi, %k1 574; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1} 575; CHECK-NEXT: kxnorw %k1, %k1, %k1 576; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1} 577; CHECK-NEXT: retq 578 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2) 579 call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4) 580 ret void 581} 582 583declare void @llvm.x86.avx512.scatterdiv4.df(i8*, i8, <4 x i64>, <4 x double>, i32) 584 585define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) { 586; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df: 587; CHECK: ## BB#0: 588; CHECK-NEXT: kmovb %esi, %k1 589; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1} 590; CHECK-NEXT: kxnorw %k1, %k1, %k1 591; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} 592; CHECK-NEXT: retq 593 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2) 594 call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4) 595 ret void 596} 597 598declare void @llvm.x86.avx512.scatterdiv4.di(i8*, i8, <4 x i64>, <4 x i64>, i32) 599 600define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) { 601; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di: 602; CHECK: ## BB#0: 603; CHECK-NEXT: kmovb %esi, %k1 604; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1} 605; CHECK-NEXT: kxnorw %k1, %k1, %k1 606; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1} 607; CHECK-NEXT: retq 608 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2) 609 call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4) 610 ret void 611} 612 613declare void @llvm.x86.avx512.scatterdiv4.sf(i8*, i8, <2 x i64>, <4 x float>, i32) 614 615define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) { 616; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf: 617; CHECK: ## BB#0: 618; CHECK-NEXT: kmovb %esi, %k1 619; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1} 620; CHECK-NEXT: kxnorw %k1, %k1, %k1 621; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1} 622; CHECK-NEXT: retq 623 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2) 624 call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4) 625 ret void 626} 627 628declare void @llvm.x86.avx512.scatterdiv4.si(i8*, i8, <2 x i64>, <4 x i32>, i32) 629 630define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) { 631; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si: 632; CHECK: ## BB#0: 633; CHECK-NEXT: kmovb %esi, %k1 634; CHECK-NEXT: kxnorw %k2, %k2, %k2 635; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2} 636; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} 637; CHECK-NEXT: retq 638 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2) 639 call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4) 640 ret void 641} 642 643declare void @llvm.x86.avx512.scatterdiv8.sf(i8*, i8, <4 x i64>, <4 x float>, i32) 644 645define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) { 646; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf: 647; CHECK: ## BB#0: 648; CHECK-NEXT: kmovb %esi, %k1 649; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1} 650; CHECK-NEXT: kxnorw %k1, %k1, %k1 651; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1} 652; CHECK-NEXT: retq 653 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2) 654 call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4) 655 ret void 656} 657 658declare void @llvm.x86.avx512.scatterdiv8.si(i8*, i8, <4 x i64>, <4 x i32>, i32) 659 660define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) { 661; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si: 662; CHECK: ## BB#0: 663; CHECK-NEXT: kmovb %esi, %k1 664; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1} 665; CHECK-NEXT: kxnorw %k1, %k1, %k1 666; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1} 667; CHECK-NEXT: retq 668 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2) 669 call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4) 670 ret void 671} 672 673declare void @llvm.x86.avx512.scattersiv2.df(i8*, i8, <4 x i32>, <2 x double>, i32) 674 675define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) { 676; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df: 677; CHECK: ## BB#0: 678; CHECK-NEXT: kmovb %esi, %k1 679; CHECK-NEXT: kxnorw %k2, %k2, %k2 680; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2} 681; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1} 682; CHECK-NEXT: retq 683 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2) 684 call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4) 685 ret void 686} 687 688declare void @llvm.x86.avx512.scattersiv2.di(i8*, i8, <4 x i32>, <2 x i64>, i32) 689 690define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) { 691; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di: 692; CHECK: ## BB#0: 693; CHECK-NEXT: kmovb %esi, %k1 694; CHECK-NEXT: kxnorw %k2, %k2, %k2 695; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2} 696; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1} 697; CHECK-NEXT: retq 698 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2) 699 call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4) 700 ret void 701} 702 703declare void @llvm.x86.avx512.scattersiv4.df(i8*, i8, <4 x i32>, <4 x double>, i32) 704 705define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) { 706; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df: 707; CHECK: ## BB#0: 708; CHECK-NEXT: kmovb %esi, %k1 709; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1} 710; CHECK-NEXT: kxnorw %k1, %k1, %k1 711; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1} 712; CHECK-NEXT: retq 713 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2) 714 call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4) 715 ret void 716} 717 718declare void @llvm.x86.avx512.scattersiv4.di(i8*, i8, <4 x i32>, <4 x i64>, i32) 719 720define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) { 721; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di: 722; CHECK: ## BB#0: 723; CHECK-NEXT: kmovb %esi, %k1 724; CHECK-NEXT: kxnorw %k2, %k2, %k2 725; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2} 726; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1} 727; CHECK-NEXT: retq 728 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2) 729 call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4) 730 ret void 731} 732 733declare void @llvm.x86.avx512.scattersiv4.sf(i8*, i8, <4 x i32>, <4 x float>, i32) 734 735define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) { 736; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf: 737; CHECK: ## BB#0: 738; CHECK-NEXT: kmovb %esi, %k1 739; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1} 740; CHECK-NEXT: kxnorw %k1, %k1, %k1 741; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1} 742; CHECK-NEXT: retq 743 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2) 744 call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4) 745 ret void 746} 747 748declare void @llvm.x86.avx512.scattersiv4.si(i8*, i8, <4 x i32>, <4 x i32>, i32) 749 750define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) { 751; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si: 752; CHECK: ## BB#0: 753; CHECK-NEXT: kmovb %esi, %k1 754; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1} 755; CHECK-NEXT: kxnorw %k1, %k1, %k1 756; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} 757; CHECK-NEXT: retq 758 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2) 759 call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4) 760 ret void 761} 762 763declare void @llvm.x86.avx512.scattersiv8.sf(i8*, i8, <8 x i32>, <8 x float>, i32) 764 765define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) { 766; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf: 767; CHECK: ## BB#0: 768; CHECK-NEXT: kmovb %esi, %k1 769; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1} 770; CHECK-NEXT: kxnorw %k1, %k1, %k1 771; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} 772; CHECK-NEXT: retq 773 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2) 774 call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4) 775 ret void 776} 777 778declare void @llvm.x86.avx512.scattersiv8.si(i8*, i8, <8 x i32>, <8 x i32>, i32) 779 780define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) { 781; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si: 782; CHECK: ## BB#0: 783; CHECK-NEXT: kmovb %esi, %k1 784; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} 785; CHECK-NEXT: kxnorw %k1, %k1, %k1 786; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} 787; CHECK-NEXT: retq 788 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2) 789 call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4) 790 ret void 791} 792 793