1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s 3 4declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 5 6define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 7; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512: 8; CHECK: ## BB#0: 9; CHECK-NEXT: kmovb %edi, %k1 10; CHECK-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} 11; CHECK-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 12; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 13; CHECK-NEXT: retq 14 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 15 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 16 %res2 = add <8 x i64> %res, %res1 17 ret <8 x i64> %res2 18} 19 20declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 21 22define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 23; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512: 24; CHECK: ## BB#0: 25; CHECK-NEXT: kmovb %edi, %k1 26; CHECK-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} 27; CHECK-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 28; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 29; CHECK-NEXT: retq 30 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 31 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 32 %res2 = add <8 x i64> %res, %res1 33 ret <8 x i64> %res2 34} 35 36declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32) 37 38define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 39; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512: 40; CHECK: ## BB#0: 41; CHECK-NEXT: kmovb %edi, %k1 42; CHECK-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} 43; CHECK-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 44; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 45; CHECK-NEXT: retq 46 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 47 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 48 %res2 = add <8 x i64> %res, %res1 49 ret <8 x i64> %res2 50} 51 52declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 53 54define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 55; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512: 56; CHECK: ## BB#0: 57; CHECK-NEXT: kmovb %edi, %k1 58; CHECK-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} 59; CHECK-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 60; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 61; CHECK-NEXT: retq 62 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 63 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 64 %res2 = add <8 x i64> %res, %res1 65 ret <8 x i64> %res2 66} 67 68declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 69 70define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 71; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: 72; CHECK: ## BB#0: 73; CHECK-NEXT: kmovb %edi, %k1 74; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} 75; CHECK-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 76; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 77; CHECK-NEXT: retq 78 %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 79 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 80 %res2 = fadd <8 x double> %res, %res1 81 ret <8 x double> %res2 82} 83 84declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 85 86define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 87; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 88; CHECK: ## BB#0: 89; CHECK-NEXT: kmovb %edi, %k1 90; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} 91; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 92; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 93; CHECK-NEXT: retq 94 %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 95 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 96 %res2 = fadd <8 x float> %res, %res1 97 ret <8 x float> %res2 98} 99 100declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 101 102define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 103; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: 104; CHECK: ## BB#0: 105; CHECK-NEXT: kmovb %edi, %k1 106; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} 107; CHECK-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 108; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 109; CHECK-NEXT: retq 110 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 111 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 112 %res2 = add <8 x i64> %res, %res1 113 ret <8 x i64> %res2 114} 115 116declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 117 118define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 119; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: 120; CHECK: ## BB#0: 121; CHECK-NEXT: kmovb %edi, %k1 122; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} 123; CHECK-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 124; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 125; CHECK-NEXT: retq 126 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 127 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 128 %res2 = add <8 x i64> %res, %res1 129 ret <8 x i64> %res2 130} 131 132declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32) 133 134define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 135; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: 136; CHECK: ## BB#0: 137; CHECK-NEXT: kmovb %edi, %k1 138; CHECK-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} 139; CHECK-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 140; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 141; CHECK-NEXT: retq 142 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 143 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 144 %res2 = add <8 x i64> %res, %res1 145 ret <8 x i64> %res2 146} 147 148declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 149 150define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 151; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: 152; CHECK: ## BB#0: 153; CHECK-NEXT: kmovb %edi, %k1 154; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} 155; CHECK-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 156; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 157; CHECK-NEXT: retq 158 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 159 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 160 %res2 = add <8 x i64> %res, %res1 161 ret <8 x i64> %res2 162} 163 164declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 165 166define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 167; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: 168; CHECK: ## BB#0: 169; CHECK-NEXT: kmovb %edi, %k1 170; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} 171; CHECK-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 172; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 173; CHECK-NEXT: retq 174 %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 175 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 176 %res2 = fadd <8 x double> %res, %res1 177 ret <8 x double> %res2 178} 179 180declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 181 182define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 183; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 184; CHECK: ## BB#0: 185; CHECK-NEXT: kmovb %edi, %k1 186; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} 187; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 188; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 189; CHECK-NEXT: retq 190 %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 191 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 192 %res2 = fadd <8 x float> %res, %res1 193 ret <8 x float> %res2 194} 195 196declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 197 198define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 199; CHECK-LABEL: test_int_x86_avx512_mask_reduce_pd_512: 200; CHECK: ## BB#0: 201; CHECK-NEXT: kmovb %edi, %k1 202; CHECK-NEXT: vreducepd $8, %zmm0, %zmm1 {%k1} 203; CHECK-NEXT: vreducepd $4, {sae}, %zmm0, %zmm0 204; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 205; CHECK-NEXT: retq 206 %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4) 207 %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8) 208 %res2 = fadd <8 x double> %res, %res1 209 ret <8 x double> %res2 210} 211 212declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 213 214define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 215; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ps_512: 216; CHECK: ## BB#0: 217; CHECK-NEXT: kmovw %edi, %k1 218; CHECK-NEXT: vreduceps $44, {sae}, %zmm0, %zmm1 {%k1} 219; CHECK-NEXT: vreduceps $11, %zmm0, %zmm0 220; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 221; CHECK-NEXT: retq 222 %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8) 223 %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4) 224 %res2 = fadd <16 x float> %res, %res1 225 ret <16 x float> %res2 226} 227 228declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) 229 230define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 231; CHECK-LABEL: test_int_x86_avx512_mask_range_pd_512: 232; CHECK: ## BB#0: 233; CHECK-NEXT: kmovb %edi, %k1 234; CHECK-NEXT: vrangepd $8, %zmm1, %zmm0, %zmm2 {%k1} 235; CHECK-NEXT: vrangepd $4, {sae}, %zmm1, %zmm0, %zmm0 236; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 237; CHECK-NEXT: retq 238 %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4) 239 %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8) 240 %res2 = fadd <8 x double> %res, %res1 241 ret <8 x double> %res2 242} 243 244declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) 245 246define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 247; CHECK-LABEL: test_int_x86_avx512_mask_range_ps_512: 248; CHECK: ## BB#0: 249; CHECK-NEXT: kmovw %edi, %k1 250; CHECK-NEXT: vrangeps $88, %zmm1, %zmm0, %zmm2 {%k1} 251; CHECK-NEXT: vrangeps $4, {sae}, %zmm1, %zmm0, %zmm0 252; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 253; CHECK-NEXT: retq 254 %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4) 255 %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8) 256 %res2 = fadd <16 x float> %res, %res1 257 ret <16 x float> %res2 258} 259 260declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 261 262define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 263; CHECK-LABEL: test_int_x86_avx512_mask_reduce_ss: 264; CHECK: ## BB#0: 265; CHECK-NEXT: andl $1, %edi 266; CHECK-NEXT: kmovw %edi, %k1 267; CHECK-NEXT: vreducess $4, %xmm1, %xmm0, %xmm2 {%k1} 268; CHECK-NEXT: vreducess $4, {sae}, %xmm1, %xmm0, %xmm0 269; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 270; CHECK-NEXT: retq 271 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4) 272 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 273 %res2 = fadd <4 x float> %res, %res1 274 ret <4 x float> %res2 275} 276 277declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 278 279define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 280; CHECK-LABEL: test_int_x86_avx512_mask_range_ss: 281; CHECK: ## BB#0: 282; CHECK-NEXT: andl $1, %edi 283; CHECK-NEXT: kmovw %edi, %k1 284; CHECK-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm2 {%k1} 285; CHECK-NEXT: vrangess $4, {sae}, %xmm1, %xmm0, %xmm0 286; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0 287; CHECK-NEXT: retq 288 %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) 289 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 290 %res2 = fadd <4 x float> %res, %res1 291 ret <4 x float> %res2 292} 293 294declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 295 296define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 297; CHECK-LABEL: test_int_x86_avx512_mask_reduce_sd: 298; CHECK: ## BB#0: 299; CHECK-NEXT: andl $1, %edi 300; CHECK-NEXT: kmovw %edi, %k1 301; CHECK-NEXT: vreducesd $4, %xmm1, %xmm0, %xmm2 {%k1} 302; CHECK-NEXT: vreducesd $4, {sae}, %xmm1, %xmm0, %xmm0 303; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 304; CHECK-NEXT: retq 305 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 306 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 307 %res2 = fadd <2 x double> %res, %res1 308 ret <2 x double> %res2 309} 310 311declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 312 313define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 314; CHECK-LABEL: test_int_x86_avx512_mask_range_sd: 315; CHECK: ## BB#0: 316; CHECK-NEXT: andl $1, %edi 317; CHECK-NEXT: kmovw %edi, %k1 318; CHECK-NEXT: vrangesd $4, %xmm1, %xmm0, %xmm2 {%k1} 319; CHECK-NEXT: vrangesd $4, {sae}, %xmm1, %xmm0, %xmm0 320; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 321; CHECK-NEXT: retq 322 %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 323 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 324 %res2 = fadd <2 x double> %res, %res1 325 ret <2 x double> %res2 326} 327 328 329declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8) 330 331define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { 332; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: 333; CHECK: ## BB#0: 334; CHECK-NEXT: kmovb %edi, %k1 335; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} 336; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z} 337; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 338; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 339; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 340; CHECK-NEXT: retq 341 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 342 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 343 %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 344 %res3 = fadd <2 x double> %res, %res1 345 %res4 = fadd <2 x double> %res2, %res3 346 ret <2 x double> %res4 347} 348 349declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8) 350 351define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) { 352; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8: 353; CHECK: ## BB#0: 354; CHECK-NEXT: kmovb %edi, %k1 355; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} 356; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z} 357; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 358; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 359; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 360; CHECK-NEXT: retq 361 %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3) 362 %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3) 363 %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1) 364 %res3 = fadd <8 x float> %res, %res1 365 %res4 = fadd <8 x float> %res2, %res3 366 ret <8 x float> %res4 367} 368 369declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16) 370 371define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) { 372; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512: 373; CHECK: ## BB#0: 374; CHECK-NEXT: kmovw %edi, %k1 375; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 376; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 377; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 378; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 379; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 380; CHECK-NEXT: retq 381 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) 382 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) 383 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1) 384 %res3 = fadd <16 x float> %res, %res1 385 %res4 = fadd <16 x float> %res2, %res3 386 ret <16 x float> %res4 387} 388 389declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8) 390 391define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) { 392; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512: 393; CHECK: ## BB#0: 394; CHECK-NEXT: kmovb %edi, %k1 395; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 396; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 397; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 398; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 399; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 400; CHECK-NEXT: retq 401 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 402 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 403 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 404 %res3 = fadd <8 x double> %res, %res1 405 %res4 = fadd <8 x double> %res3, %res2 406 ret <8 x double> %res4 407} 408 409declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16) 410 411define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) { 412; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512: 413; CHECK: ## BB#0: 414; CHECK-NEXT: kmovw %edi, %k1 415; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 416; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 417; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 418; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 419; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 420; CHECK-NEXT: retq 421 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) 422 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) 423 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) 424 %res3 = add <16 x i32> %res, %res1 425 %res4 = add <16 x i32> %res3, %res2 426 ret <16 x i32> %res4 427} 428 429declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8) 430 431define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) { 432; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512: 433; CHECK: ## BB#0: 434; CHECK-NEXT: kmovb %edi, %k1 435; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 436; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 437; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 438; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 439; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 440; CHECK-NEXT: retq 441 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 442 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 443 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 444 %res3 = add <8 x i64> %res, %res1 445 %res4 = add <8 x i64> %res2, %res3 446 ret <8 x i64> %res4 447} 448 449declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) 450 451define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { 452; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: 453; CHECK: ## BB#0: 454; CHECK-NEXT: kmovb %edi, %k1 455; CHECK-NEXT: vfpclasspd $2, %zmm0, %k0 {%k1} 456; CHECK-NEXT: kmovb %k0, %ecx 457; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 458; CHECK-NEXT: kmovb %k0, %eax 459; CHECK-NEXT: addb %cl, %al 460; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 461; CHECK-NEXT: retq 462 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) 463 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) 464 %res2 = add i8 %res, %res1 465 ret i8 %res2 466} 467declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) 468 469define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { 470; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: 471; CHECK: ## BB#0: 472; CHECK-NEXT: kmovw %edi, %k1 473; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} 474; CHECK-NEXT: kmovw %k0, %ecx 475; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 476; CHECK-NEXT: kmovw %k0, %eax 477; CHECK-NEXT: addl %ecx, %eax 478; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 479; CHECK-NEXT: retq 480 %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1) 481 %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) 482 %res2 = add i16 %res, %res1 483 ret i16 %res2 484} 485 486declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) 487 488define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { 489; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd: 490; CHECK: ## BB#0: 491; CHECK-NEXT: andl $1, %edi 492; CHECK-NEXT: kmovw %edi, %k1 493; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} 494; CHECK-NEXT: kmovw %k0, %eax 495; CHECK-NEXT: testb %al, %al 496; CHECK-NEXT: je LBB28_2 497; CHECK-NEXT: ## BB#1: 498; CHECK-NEXT: movb $-1, %al 499; CHECK-NEXT: LBB28_2: 500; CHECK-NEXT: vfpclasssd $4, %xmm0, %k0 501; CHECK-NEXT: kmovw %k0, %ecx 502; CHECK-NEXT: testb %cl, %cl 503; CHECK-NEXT: je LBB28_4 504; CHECK-NEXT: ## BB#3: 505; CHECK-NEXT: movb $-1, %cl 506; CHECK-NEXT: LBB28_4: 507; CHECK-NEXT: addb %cl, %al 508; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 509; CHECK-NEXT: retq 510 %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1) 511 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1) 512 %res2 = add i8 %res, %res1 513 ret i8 %res2 514} 515 516declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) 517 518define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) { 519; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss: 520; CHECK: ## BB#0: 521; CHECK-NEXT: andl $1, %edi 522; CHECK-NEXT: kmovw %edi, %k1 523; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 {%k1} 524; CHECK-NEXT: kmovw %k0, %eax 525; CHECK-NEXT: testb %al, %al 526; CHECK-NEXT: je LBB29_2 527; CHECK-NEXT: ## BB#1: 528; CHECK-NEXT: movb $-1, %al 529; CHECK-NEXT: LBB29_2: 530; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 531; CHECK-NEXT: kmovw %k0, %ecx 532; CHECK-NEXT: testb %cl, %cl 533; CHECK-NEXT: je LBB29_4 534; CHECK-NEXT: ## BB#3: 535; CHECK-NEXT: movb $-1, %cl 536; CHECK-NEXT: LBB29_4: 537; CHECK-NEXT: addb %cl, %al 538; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill> 539; CHECK-NEXT: retq 540 %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1) 541 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) 542 %res2 = add i8 %res, %res1 543 ret i8 %res2 544} 545 546declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16) 547 548define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) { 549; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512: 550; CHECK: ## BB#0: 551; CHECK-NEXT: kmovw %edi, %k1 552; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm1 {%k1} 553; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z} 554; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm0 555; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 556; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 557; CHECK-NEXT: retq 558 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 %x3) 559 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3) 560 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1) 561 %res3 = fadd <16 x float> %res, %res1 562 %res4 = fadd <16 x float> %res3, %res2 563 ret <16 x float> %res4 564} 565 566declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16) 567 568define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) { 569; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512: 570; CHECK: ## BB#0: 571; CHECK-NEXT: kmovw %edi, %k1 572; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm1 {%k1} 573; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z} 574; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm0 575; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 576; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 577; CHECK-NEXT: retq 578 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) 579 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3) 580 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) 581 %res3 = add <16 x i32> %res, %res1 582 %res4 = add <16 x i32> %res3, %res2 583 ret <16 x i32> %res4 584} 585 586declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>) 587 588define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) { 589; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512: 590; CHECK: ## BB#0: 591; CHECK-NEXT: vpmovd2m %zmm0, %k0 592; CHECK-NEXT: kmovw %k0, %eax 593; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> 594; CHECK-NEXT: retq 595 %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0) 596 ret i16 %res 597} 598 599declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>) 600 601define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) { 602; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512: 603; CHECK: ## BB#0: 604; CHECK-NEXT: vpmovq2m %zmm0, %k0 605; CHECK-NEXT: kmovb %k0, %eax 606; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill> 607; CHECK-NEXT: retq 608 %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0) 609 ret i8 %res 610} 611 612declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16) 613 614define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) { 615; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_512: 616; CHECK: ## BB#0: 617; CHECK-NEXT: kmovw %edi, %k0 618; CHECK-NEXT: vpmovm2d %k0, %zmm0 619; CHECK-NEXT: retq 620 %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0) 621 ret <16 x i32> %res 622} 623 624declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8) 625 626define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) { 627; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512: 628; CHECK: ## BB#0: 629; CHECK-NEXT: kmovb %edi, %k0 630; CHECK-NEXT: vpmovm2q %k0, %zmm0 631; CHECK-NEXT: retq 632 %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0) 633 ret <8 x i64> %res 634} 635 636declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16) 637 638define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) { 639; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512: 640; CHECK: ## BB#0: 641; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 642; CHECK-NEXT: kmovw %edi, %k1 643; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 644; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 645; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 646; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 647; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 648; CHECK-NEXT: retq 649 650 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1) 651 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask) 652 %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask) 653 %res4 = fadd <16 x float> %res1, %res2 654 %res5 = fadd <16 x float> %res3, %res4 655 ret <16 x float> %res5 656} 657 658declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8) 659 660define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) { 661; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512: 662; CHECK: ## BB#0: 663; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 664; CHECK-NEXT: kmovb %edi, %k1 665; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] 666; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] 667; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] 668; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 669; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 670; CHECK-NEXT: retq 671 672 %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1) 673 %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask) 674 %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask) 675 %res4 = fadd <8 x double> %res1, %res2 676 %res5 = fadd <8 x double> %res3, %res4 677 ret <8 x double> %res5 678} 679 680declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16) 681 682define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) { 683; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512: 684; CHECK: ## BB#0: 685; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 686; CHECK-NEXT: kmovw %edi, %k1 687; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 688; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 689; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 690; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 691; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 692; CHECK-NEXT: retq 693 694 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1) 695 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) 696 %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) 697 %res4 = add <16 x i32> %res1, %res2 698 %res5 = add <16 x i32> %res3, %res4 699 ret <16 x i32> %res5 700} 701 702declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8) 703 704define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) { 705; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512: 706; CHECK: ## BB#0: 707; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 708; CHECK-NEXT: kmovb %edi, %k1 709; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1] 710; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1] 711; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] 712; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 713; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 714; CHECK-NEXT: retq 715 716 %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1) 717 %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) 718 %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask) 719 %res4 = add <8 x i64> %res1, %res2 720 %res5 = add <8 x i64> %res3, %res4 721 ret <8 x i64> %res5 722} 723