1 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s 3 4declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 5 6define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 7; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512: 8; CHECK: ## BB#0: 9; CHECK-NEXT: kmovb %edi, %k1 10; CHECK-NEXT: vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1} 11; CHECK-NEXT: vcvtpd2qq {rn-sae}, %zmm0, %zmm0 12; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 13; CHECK-NEXT: retq 14 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 15 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 16 %res2 = add <8 x i64> %res, %res1 17 ret <8 x i64> %res2 18} 19 20declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 21 22define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 23; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512: 24; CHECK: ## BB#0: 25; CHECK-NEXT: kmovb %edi, %k1 26; CHECK-NEXT: vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1} 27; CHECK-NEXT: vcvtpd2uqq {rn-sae}, %zmm0, %zmm0 28; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 29; CHECK-NEXT: retq 30 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2) 31 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0) 32 %res2 = add <8 x i64> %res, %res1 33 ret <8 x i64> %res2 34} 35 36declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32) 37 38define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 39; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512: 40; CHECK: ## BB#0: 41; CHECK-NEXT: kmovb %edi, %k1 42; CHECK-NEXT: vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1} 43; CHECK-NEXT: vcvtps2qq {rn-sae}, %ymm0, %zmm0 44; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 45; CHECK-NEXT: retq 46 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 47 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 48 %res2 = add <8 x i64> %res, %res1 49 ret <8 x i64> %res2 50} 51 52declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 53 54define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 55; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512: 56; CHECK: ## BB#0: 57; CHECK-NEXT: kmovb %edi, %k1 58; CHECK-NEXT: vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1} 59; CHECK-NEXT: vcvtps2uqq {rn-sae}, %ymm0, %zmm0 60; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 61; CHECK-NEXT: retq 62 %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2) 63 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0) 64 %res2 = add <8 x i64> %res, %res1 65 ret <8 x i64> %res2 66} 67 68declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 69 70define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 71; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512: 72; CHECK: ## BB#0: 73; CHECK-NEXT: kmovb %edi, %k1 74; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm1 {%k1} 75; CHECK-NEXT: vcvtqq2pd {rn-sae}, %zmm0, %zmm0 76; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 77; CHECK-NEXT: retq 78 %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 79 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 80 %res2 = fadd <8 x double> %res, %res1 81 ret <8 x double> %res2 82} 83 84declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 85 86define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 87; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512: 88; CHECK: ## BB#0: 89; CHECK-NEXT: kmovb %edi, %k1 90; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm1 {%k1} 91; CHECK-NEXT: vcvtqq2ps {rn-sae}, %zmm0, %ymm0 92; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 93; CHECK-NEXT: retq 94 %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 95 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 96 %res2 = fadd <8 x float> %res, %res1 97 ret <8 x float> %res2 98} 99 100declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 101 102define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 103; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: 104; CHECK: ## BB#0: 105; CHECK-NEXT: kmovb %edi, %k1 106; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} 107; CHECK-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 108; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 109; CHECK-NEXT: retq 110 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 111 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 112 %res2 = add <8 x i64> %res, %res1 113 ret <8 x i64> %res2 114} 115 116declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 117 118define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) { 119; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: 120; CHECK: ## BB#0: 121; CHECK-NEXT: kmovb %edi, %k1 122; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} 123; CHECK-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 124; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 125; CHECK-NEXT: retq 126 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4) 127 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8) 128 %res2 = add <8 x i64> %res, %res1 129 ret <8 x i64> %res2 130} 131 132declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32) 133 134define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 135; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: 136; CHECK: ## BB#0: 137; CHECK-NEXT: kmovb %edi, %k1 138; CHECK-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} 139; CHECK-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 140; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 141; CHECK-NEXT: retq 142 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 143 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 144 %res2 = add <8 x i64> %res, %res1 145 ret <8 x i64> %res2 146} 147 148declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 149 150define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) { 151; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: 152; CHECK: ## BB#0: 153; CHECK-NEXT: kmovb %edi, %k1 154; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} 155; CHECK-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 156; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 157; CHECK-NEXT: retq 158 %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4) 159 %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8) 160 %res2 = add <8 x i64> %res, %res1 161 ret <8 x i64> %res2 162} 163 164declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32) 165 166define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) { 167; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512: 168; CHECK: ## BB#0: 169; CHECK-NEXT: kmovb %edi, %k1 170; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm1 {%k1} 171; CHECK-NEXT: vcvtuqq2pd {rn-sae}, %zmm0, %zmm0 172; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 173; CHECK-NEXT: retq 174 %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4) 175 %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0) 176 %res2 = fadd <8 x double> %res, %res1 177 ret <8 x double> %res2 178} 179 180declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32) 181 182define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) { 183; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512: 184; CHECK: ## BB#0: 185; CHECK-NEXT: kmovb %edi, %k1 186; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm1 {%k1} 187; CHECK-NEXT: vcvtuqq2ps {rn-sae}, %zmm0, %ymm0 188; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 189; CHECK-NEXT: retq 190 %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4) 191 %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0) 192 %res2 = fadd <8 x float> %res, %res1 193 ret <8 x float> %res2 194} 195 196declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32) 197; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512 198; CHECK-NOT: call 199; CHECK: kmov 200; CHECK: vreducepd {{.*}}{%k1} 201; CHECK: vreducepd 202; CHECK: {sae} 203define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) { 204 %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4) 205 %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8) 206 %res2 = fadd <8 x double> %res, %res1 207 ret <8 x double> %res2 208} 209 210declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32) 211; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512 212; CHECK-NOT: call 213; CHECK: kmov 214; CHECK: vreduceps 215; CHECK: {sae} 216; CKECK: {%k1} 217; CHECK: vreduceps 218define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) { 219 %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8) 220 %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4) 221 %res2 = fadd <16 x float> %res, %res1 222 ret <16 x float> %res2 223} 224 225declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32) 226; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512 227; CHECK-NOT: call 228; CHECK: kmov 229; CHECK: vrangepd 230; CKECK: {%k1} 231; CHECK: vrangepd 232; CHECK: {sae} 233define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) { 234 %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4) 235 %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8) 236 %res2 = fadd <8 x double> %res, %res1 237 ret <8 x double> %res2 238} 239 240declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32) 241 242; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512 243; CHECK-NOT: call 244; CHECK: kmov 245; CHECK: vrangeps 246; CKECK: {%k1} 247; CHECK: vrangeps 248; CHECK: {sae} 249define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) { 250 %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4) 251 %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8) 252 %res2 = fadd <16 x float> %res, %res1 253 ret <16 x float> %res2 254} 255 256declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 257 258; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss 259; CHECK-NOT: call 260; CHECK: kmov 261; CHECK: vreducess 262; CKECK: {%k1} 263; CHECK: vreducess 264; CHECK: {sae} 265define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 266 %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4) 267 %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 268 %res2 = fadd <4 x float> %res, %res1 269 ret <4 x float> %res2 270} 271 272declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32) 273; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss 274; CHECK-NOT: call 275; CHECK: kmov 276; CHECK: vrangess 277; CHECK: {sae} 278; CKECK: {%k1} 279; CHECK: vrangess 280; CHECK: {sae} 281define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { 282 %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8) 283 %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8) 284 %res2 = fadd <4 x float> %res, %res1 285 ret <4 x float> %res2 286} 287 288declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 289 290; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd 291; CHECK-NOT: call 292; CHECK: kmov 293; CHECK: vreducesd 294; CKECK: {%k1} 295; CHECK: vreducesd 296; CHECK: {sae} 297define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 298 %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 299 %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 300 %res2 = fadd <2 x double> %res, %res1 301 ret <2 x double> %res2 302} 303 304declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32) 305; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd 306; CHECK-NOT: call 307; CHECK: kmov 308; CHECK: vrangesd 309; CKECK: {%k1} 310; CHECK: vrangesd 311; CHECK: {sae} 312define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { 313 %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4) 314 %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8) 315 %res2 = fadd <2 x double> %res, %res1 316 ret <2 x double> %res2 317} 318 319 320declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8) 321 322define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) { 323; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512: 324; CHECK: ## BB#0: 325; CHECK-NEXT: kmovb %edi, %k1 326; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1} 327; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z} 328; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0 329; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 330; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0 331; CHECK-NEXT: retq 332 %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3) 333 %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3) 334 %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1) 335 %res3 = fadd <2 x double> %res, %res1 336 %res4 = fadd <2 x double> %res2, %res3 337 ret <2 x double> %res4 338} 339 340declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8) 341 342define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) { 343; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8: 344; CHECK: ## BB#0: 345; CHECK-NEXT: kmovb %edi, %k1 346; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1} 347; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z} 348; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0 349; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0 350; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 351; CHECK-NEXT: retq 352 %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3) 353 %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3) 354 %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1) 355 %res3 = fadd <8 x float> %res, %res1 356 %res4 = fadd <8 x float> %res2, %res3 357 ret <8 x float> %res4 358} 359 360declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16) 361 362define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) { 363; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512: 364; CHECK: ## BB#0: 365; CHECK-NEXT: kmovw %edi, %k1 366; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 367; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 368; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 369; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 370; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 371; CHECK-NEXT: retq 372 %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) 373 %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) 374 %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1) 375 %res3 = fadd <16 x float> %res, %res1 376 %res4 = fadd <16 x float> %res2, %res3 377 ret <16 x float> %res4 378} 379 380declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8) 381 382define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) { 383; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512: 384; CHECK: ## BB#0: 385; CHECK-NEXT: kmovb %edi, %k1 386; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 387; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 388; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 389; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 390; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 391; CHECK-NEXT: retq 392 %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) 393 %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) 394 %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1) 395 %res3 = fadd <8 x double> %res, %res1 396 %res4 = fadd <8 x double> %res3, %res2 397 ret <8 x double> %res4 398} 399 400declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16) 401 402define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) { 403; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512: 404; CHECK: ## BB#0: 405; CHECK-NEXT: kmovw %edi, %k1 406; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} 407; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} 408; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 409; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 410; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 411; CHECK-NEXT: retq 412 %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) 413 %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) 414 %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) 415 %res3 = add <16 x i32> %res, %res1 416 %res4 = add <16 x i32> %res3, %res2 417 ret <16 x i32> %res4 418} 419 420declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8) 421 422define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) { 423; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512: 424; CHECK: ## BB#0: 425; CHECK-NEXT: kmovb %edi, %k1 426; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} 427; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} 428; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 429; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 430; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 431; CHECK-NEXT: retq 432 %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) 433 %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) 434 %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) 435 %res3 = add <8 x i64> %res, %res1 436 %res4 = add <8 x i64> %res2, %res3 437 ret <8 x i64> %res4 438} 439 440declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8) 441 442; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512 443; CHECK-NOT: call 444; CHECK: kmov 445; CHECK: vfpclasspd 446; CHECK: {%k1} 447; CHECK: vfpclasspd 448; CHECK: kmovb %k0 449define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) { 450 %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1) 451 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1) 452 %res2 = add i8 %res, %res1 453 ret i8 %res2 454} 455declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16) 456 457; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512 458; CHECK-NOT: call 459; CHECK: kmov 460; CHECK: vfpclassps 461; CHECK: vfpclassps 462; CHECK: {%k1} 463; CHECK: kmov 464define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { 465 %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1) 466 %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1) 467 %res2 = add i16 %res, %res1 468 ret i16 %res2 469} 470 471declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8) 472 473; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd 474; CHECK-NOT: call 475; CHECK: kmov 476; CHECK: vfpclasssd 477; CHECK: %k0 {%k1} 478; CHECK: vfpclasssd 479; CHECK: %k0 480define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) { 481 %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1) 482 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1) 483 %res2 = add i8 %res, %res1 484 ret i8 %res2 485} 486 487declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8) 488 489; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss 490; CHECK-NOT: call 491; CHECK: kmovw 492; CHECK: vfpclassss 493; CHECK: %k0 494; CHECK: {%k1} 495; CHECK: kmovw 496; CHECK: vfpclassss 497; CHECK: %k0 498define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) { 499 %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1) 500 %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1) 501 %res2 = add i8 %res, %res1 502 ret i8 %res2 503} 504 505declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16) 506 507define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) { 508; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512: 509; CHECK: ## BB#0: 510; CHECK-NEXT: kmovw %edi, %k1 511; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm1 {%k1} 512; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z} 513; CHECK-NEXT: vbroadcastf32x2 %xmm0, %zmm0 514; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm1 515; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 516; CHECK-NEXT: retq 517 %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 %x3) 518 %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3) 519 %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1) 520 %res3 = fadd <16 x float> %res, %res1 521 %res4 = fadd <16 x float> %res3, %res2 522 ret <16 x float> %res4 523} 524 525declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16) 526 527define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) { 528; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512: 529; CHECK: ## BB#0: 530; CHECK-NEXT: kmovw %edi, %k1 531; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm1 {%k1} 532; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z} 533; CHECK-NEXT: vbroadcasti32x2 %xmm0, %zmm0 534; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 535; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 536; CHECK-NEXT: retq 537 %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) 538 %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3) 539 %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1) 540 %res3 = add <16 x i32> %res, %res1 541 %res4 = add <16 x i32> %res3, %res2 542 ret <16 x i32> %res4 543} 544