1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl| FileCheck %s 2 3; 256-bit 4 5; CHECK-LABEL: vpaddq256_test 6; CHECK: vpaddq %ymm{{.*}} 7; CHECK: ret 8define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 9 %x = add <4 x i64> %i, %j 10 ret <4 x i64> %x 11} 12 13; CHECK-LABEL: vpaddq256_fold_test 14; CHECK: vpaddq (%rdi), %ymm{{.*}} 15; CHECK: ret 16define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind { 17 %tmp = load <4 x i64>, <4 x i64>* %j, align 4 18 %x = add <4 x i64> %i, %tmp 19 ret <4 x i64> %x 20} 21 22; CHECK-LABEL: vpaddq256_broadcast_test 23; CHECK: vpaddq LCP{{.*}}(%rip){1to4}, %ymm{{.*}} 24; CHECK: ret 25define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind { 26 %x = add <4 x i64> %i, <i64 1, i64 1, i64 1, i64 1> 27 ret <4 x i64> %x 28} 29 30; CHECK-LABEL: vpaddq256_broadcast2_test 31; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}} 32; CHECK: ret 33define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind { 34 %j = load i64, i64* %j.ptr 35 %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0 36 %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer 37 %x = add <4 x i64> %i, %j.v 38 ret <4 x i64> %x 39} 40 41; CHECK-LABEL: vpaddd256_test 42; CHECK: vpaddd %ymm{{.*}} 43; CHECK: ret 44define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 45 %x = add <8 x i32> %i, %j 46 ret <8 x i32> %x 47} 48 49; CHECK-LABEL: vpaddd256_fold_test 50; CHECK: vpaddd (%rdi), %ymm{{.*}} 51; CHECK: ret 52define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind { 53 %tmp = load <8 x i32>, <8 x i32>* %j, align 4 54 %x = add <8 x i32> %i, %tmp 55 ret <8 x i32> %x 56} 57 58; CHECK-LABEL: vpaddd256_broadcast_test 59; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*}} 60; CHECK: ret 61define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind { 62 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 63 ret <8 x i32> %x 64} 65 66; CHECK-LABEL: vpaddd256_mask_test 67; CHECK: vpaddd %ymm{{.*%k[1-7].*}} 68; CHECK: ret 69define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 70 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 71 %x = add <8 x i32> %i, %j 72 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 73 ret <8 x i32> %r 74} 75 76; CHECK-LABEL: vpaddd256_maskz_test 77; CHECK: vpaddd %ymm{{.*{%k[1-7]} {z}.*}} 78; CHECK: ret 79define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 80 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 81 %x = add <8 x i32> %i, %j 82 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 83 ret <8 x i32> %r 84} 85 86; CHECK-LABEL: vpaddd256_mask_fold_test 87; CHECK: vpaddd (%rdi), %ymm{{.*%k[1-7]}} 88; CHECK: ret 89define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone { 90 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 91 %j = load <8 x i32>, <8 x i32>* %j.ptr 92 %x = add <8 x i32> %i, %j 93 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 94 ret <8 x i32> %r 95} 96 97; CHECK-LABEL: vpaddd256_mask_broadcast_test 98; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]}}} 99; CHECK: ret 100define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 101 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 102 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 103 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 104 ret <8 x i32> %r 105} 106 107; CHECK-LABEL: vpaddd256_maskz_fold_test 108; CHECK: vpaddd (%rdi), %ymm{{.*{%k[1-7]} {z}}} 109; CHECK: ret 110define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone { 111 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 112 %j = load <8 x i32>, <8 x i32>* %j.ptr 113 %x = add <8 x i32> %i, %j 114 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 115 ret <8 x i32> %r 116} 117 118; CHECK-LABEL: vpaddd256_maskz_broadcast_test 119; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]} {z}}} 120; CHECK: ret 121define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 122 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 123 %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 124 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 125 ret <8 x i32> %r 126} 127 128; CHECK-LABEL: vpsubq256_test 129; CHECK: vpsubq %ymm{{.*}} 130; CHECK: ret 131define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 132 %x = sub <4 x i64> %i, %j 133 ret <4 x i64> %x 134} 135 136; CHECK-LABEL: vpsubd256_test 137; CHECK: vpsubd %ymm{{.*}} 138; CHECK: ret 139define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 140 %x = sub <8 x i32> %i, %j 141 ret <8 x i32> %x 142} 143 144; CHECK-LABEL: vpmulld256_test 145; CHECK: vpmulld %ymm{{.*}} 146; CHECK: ret 147define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) { 148 %x = mul <8 x i32> %i, %j 149 ret <8 x i32> %x 150} 151 152; CHECK-LABEL: test_vaddpd_256 153; CHECK: vaddpd{{.*}} 154; CHECK: ret 155define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) { 156entry: 157 %add.i = fadd <4 x double> %x, %y 158 ret <4 x double> %add.i 159} 160 161; CHECK-LABEL: test_fold_vaddpd_256 162; CHECK: vaddpd LCP{{.*}}(%rip){{.*}} 163; CHECK: ret 164define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) { 165entry: 166 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00> 167 ret <4 x double> %add.i 168} 169 170; CHECK-LABEL: test_broadcast_vaddpd_256 171; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0 172; CHECK: ret 173define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind { 174 %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 175 ret <8 x float> %b 176} 177 178; CHECK-LABEL: test_mask_vaddps_256 179; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 180; CHECK: ret 181define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, 182 <8 x float> %j, <8 x i32> %mask1) 183 nounwind readnone { 184 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 185 %x = fadd <8 x float> %i, %j 186 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 187 ret <8 x float> %r 188} 189 190; CHECK-LABEL: test_mask_vmulps_256 191; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 192; CHECK: ret 193define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, 194 <8 x float> %j, <8 x i32> %mask1) 195 nounwind readnone { 196 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 197 %x = fmul <8 x float> %i, %j 198 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 199 ret <8 x float> %r 200} 201 202; CHECK-LABEL: test_mask_vminps_256 203; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 204; CHECK: ret 205define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, 206 <8 x float> %j, <8 x i32> %mask1) 207 nounwind readnone { 208 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 209 %cmp_res = fcmp olt <8 x float> %i, %j 210 %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 211 %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst 212 ret <8 x float> %r 213} 214 215; CHECK-LABEL: test_mask_vmaxps_256 216; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 217; CHECK: ret 218define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, 219 <8 x float> %j, <8 x i32> %mask1) 220 nounwind readnone { 221 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 222 %cmp_res = fcmp ogt <8 x float> %i, %j 223 %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 224 %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst 225 ret <8 x float> %r 226} 227 228; CHECK-LABEL: test_mask_vsubps_256 229; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 230; CHECK: ret 231define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, 232 <8 x float> %j, <8 x i32> %mask1) 233 nounwind readnone { 234 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 235 %x = fsub <8 x float> %i, %j 236 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 237 ret <8 x float> %r 238} 239 240; CHECK-LABEL: test_mask_vdivps_256 241; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 242; CHECK: ret 243define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, 244 <8 x float> %j, <8 x i32> %mask1) 245 nounwind readnone { 246 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 247 %x = fdiv <8 x float> %i, %j 248 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 249 ret <8 x float> %r 250} 251 252; CHECK-LABEL: test_mask_vmulpd_256 253; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 254; CHECK: ret 255define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, 256 <4 x double> %j, <4 x i64> %mask1) 257 nounwind readnone { 258 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 259 %x = fmul <4 x double> %i, %j 260 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 261 ret <4 x double> %r 262} 263 264; CHECK-LABEL: test_mask_vminpd_256 265; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 266; CHECK: ret 267define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, 268 <4 x double> %j, <4 x i64> %mask1) 269 nounwind readnone { 270 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 271 %cmp_res = fcmp olt <4 x double> %i, %j 272 %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 273 %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst 274 ret <4 x double> %r 275} 276 277; CHECK-LABEL: test_mask_vmaxpd_256 278; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 279; CHECK: ret 280define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, 281 <4 x double> %j, <4 x i64> %mask1) 282 nounwind readnone { 283 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 284 %cmp_res = fcmp ogt <4 x double> %i, %j 285 %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 286 %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst 287 ret <4 x double> %r 288} 289 290; CHECK-LABEL: test_mask_vsubpd_256 291; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 292; CHECK: ret 293define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, 294 <4 x double> %j, <4 x i64> %mask1) 295 nounwind readnone { 296 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 297 %x = fsub <4 x double> %i, %j 298 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 299 ret <4 x double> %r 300} 301 302; CHECK-LABEL: test_mask_vdivpd_256 303; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 304; CHECK: ret 305define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, 306 <4 x double> %j, <4 x i64> %mask1) 307 nounwind readnone { 308 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 309 %x = fdiv <4 x double> %i, %j 310 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 311 ret <4 x double> %r 312} 313 314; CHECK-LABEL: test_mask_vaddpd_256 315; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}} 316; CHECK: ret 317define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, 318 <4 x double> %j, <4 x i64> %mask1) 319 nounwind readnone { 320 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 321 %x = fadd <4 x double> %i, %j 322 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 323 ret <4 x double> %r 324} 325 326; CHECK-LABEL: test_maskz_vaddpd_256 327; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}} 328; CHECK: ret 329define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, 330 <4 x i64> %mask1) nounwind readnone { 331 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 332 %x = fadd <4 x double> %i, %j 333 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 334 ret <4 x double> %r 335} 336 337; CHECK-LABEL: test_mask_fold_vaddpd_256 338; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}} 339; CHECK: ret 340define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, 341 <4 x double>* %j, <4 x i64> %mask1) 342 nounwind { 343 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 344 %tmp = load <4 x double>, <4 x double>* %j 345 %x = fadd <4 x double> %i, %tmp 346 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 347 ret <4 x double> %r 348} 349 350; CHECK-LABEL: test_maskz_fold_vaddpd_256 351; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}} 352; CHECK: ret 353define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j, 354 <4 x i64> %mask1) nounwind { 355 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 356 %tmp = load <4 x double>, <4 x double>* %j 357 %x = fadd <4 x double> %i, %tmp 358 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 359 ret <4 x double> %r 360} 361 362; CHECK-LABEL: test_broadcast2_vaddpd_256 363; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}} 364; CHECK: ret 365define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind { 366 %tmp = load double, double* %j 367 %b = insertelement <4 x double> undef, double %tmp, i32 0 368 %c = shufflevector <4 x double> %b, <4 x double> undef, 369 <4 x i32> zeroinitializer 370 %x = fadd <4 x double> %c, %i 371 ret <4 x double> %x 372} 373 374; CHECK-LABEL: test_mask_broadcast_vaddpd_256 375; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}} 376; CHECK: ret 377define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, 378 double* %j, <4 x i64> %mask1) nounwind { 379 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 380 %tmp = load double, double* %j 381 %b = insertelement <4 x double> undef, double %tmp, i32 0 382 %c = shufflevector <4 x double> %b, <4 x double> undef, 383 <4 x i32> zeroinitializer 384 %x = fadd <4 x double> %c, %i 385 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i 386 ret <4 x double> %r 387} 388 389; CHECK-LABEL: test_maskz_broadcast_vaddpd_256 390; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}} 391; CHECK: ret 392define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j, 393 <4 x i64> %mask1) nounwind { 394 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 395 %tmp = load double, double* %j 396 %b = insertelement <4 x double> undef, double %tmp, i32 0 397 %c = shufflevector <4 x double> %b, <4 x double> undef, 398 <4 x i32> zeroinitializer 399 %x = fadd <4 x double> %c, %i 400 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 401 ret <4 x double> %r 402} 403 404; 128-bit 405 406; CHECK-LABEL: vpaddq128_test 407; CHECK: vpaddq %xmm{{.*}} 408; CHECK: ret 409define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 410 %x = add <2 x i64> %i, %j 411 ret <2 x i64> %x 412} 413 414; CHECK-LABEL: vpaddq128_fold_test 415; CHECK: vpaddq (%rdi), %xmm{{.*}} 416; CHECK: ret 417define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind { 418 %tmp = load <2 x i64>, <2 x i64>* %j, align 4 419 %x = add <2 x i64> %i, %tmp 420 ret <2 x i64> %x 421} 422 423; CHECK-LABEL: vpaddq128_broadcast2_test 424; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}} 425; CHECK: ret 426define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind { 427 %tmp = load i64, i64* %j 428 %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0 429 %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1 430 %x = add <2 x i64> %i, %j.1 431 ret <2 x i64> %x 432} 433 434; CHECK-LABEL: vpaddd128_test 435; CHECK: vpaddd %xmm{{.*}} 436; CHECK: ret 437define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 438 %x = add <4 x i32> %i, %j 439 ret <4 x i32> %x 440} 441 442; CHECK-LABEL: vpaddd128_fold_test 443; CHECK: vpaddd (%rdi), %xmm{{.*}} 444; CHECK: ret 445define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind { 446 %tmp = load <4 x i32>, <4 x i32>* %j, align 4 447 %x = add <4 x i32> %i, %tmp 448 ret <4 x i32> %x 449} 450 451; CHECK-LABEL: vpaddd128_broadcast_test 452; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*}} 453; CHECK: ret 454define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind { 455 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1> 456 ret <4 x i32> %x 457} 458 459; CHECK-LABEL: vpaddd128_mask_test 460; CHECK: vpaddd %xmm{{.*%k[1-7].*}} 461; CHECK: ret 462define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 463 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 464 %x = add <4 x i32> %i, %j 465 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 466 ret <4 x i32> %r 467} 468 469; CHECK-LABEL: vpaddd128_maskz_test 470; CHECK: vpaddd %xmm{{.*{%k[1-7]} {z}.*}} 471; CHECK: ret 472define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 473 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 474 %x = add <4 x i32> %i, %j 475 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 476 ret <4 x i32> %r 477} 478 479; CHECK-LABEL: vpaddd128_mask_fold_test 480; CHECK: vpaddd (%rdi), %xmm{{.*%k[1-7]}} 481; CHECK: ret 482define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone { 483 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 484 %j = load <4 x i32>, <4 x i32>* %j.ptr 485 %x = add <4 x i32> %i, %j 486 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 487 ret <4 x i32> %r 488} 489 490; CHECK-LABEL: vpaddd128_mask_broadcast_test 491; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]}}} 492; CHECK: ret 493define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 494 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 495 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1> 496 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 497 ret <4 x i32> %r 498} 499 500; CHECK-LABEL: vpaddd128_maskz_fold_test 501; CHECK: vpaddd (%rdi), %xmm{{.*{%k[1-7]} {z}}} 502; CHECK: ret 503define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone { 504 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 505 %j = load <4 x i32>, <4 x i32>* %j.ptr 506 %x = add <4 x i32> %i, %j 507 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 508 ret <4 x i32> %r 509} 510 511; CHECK-LABEL: vpaddd128_maskz_broadcast_test 512; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]} {z}}} 513; CHECK: ret 514define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 515 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 516 %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1> 517 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 518 ret <4 x i32> %r 519} 520 521; CHECK-LABEL: vpsubq128_test 522; CHECK: vpsubq %xmm{{.*}} 523; CHECK: ret 524define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 525 %x = sub <2 x i64> %i, %j 526 ret <2 x i64> %x 527} 528 529; CHECK-LABEL: vpsubd128_test 530; CHECK: vpsubd %xmm{{.*}} 531; CHECK: ret 532define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 533 %x = sub <4 x i32> %i, %j 534 ret <4 x i32> %x 535} 536 537; CHECK-LABEL: vpmulld128_test 538; CHECK: vpmulld %xmm{{.*}} 539; CHECK: ret 540define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) { 541 %x = mul <4 x i32> %i, %j 542 ret <4 x i32> %x 543} 544 545; CHECK-LABEL: test_vaddpd_128 546; CHECK: vaddpd{{.*}} 547; CHECK: ret 548define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) { 549entry: 550 %add.i = fadd <2 x double> %x, %y 551 ret <2 x double> %add.i 552} 553 554; CHECK-LABEL: test_fold_vaddpd_128 555; CHECK: vaddpd LCP{{.*}}(%rip){{.*}} 556; CHECK: ret 557define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) { 558entry: 559 %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00> 560 ret <2 x double> %add.i 561} 562 563; CHECK-LABEL: test_broadcast_vaddpd_128 564; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0 565; CHECK: ret 566define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind { 567 %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 568 ret <4 x float> %b 569} 570 571; CHECK-LABEL: test_mask_vaddps_128 572; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 573; CHECK: ret 574define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i, 575 <4 x float> %j, <4 x i32> %mask1) 576 nounwind readnone { 577 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 578 %x = fadd <4 x float> %i, %j 579 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 580 ret <4 x float> %r 581} 582 583; CHECK-LABEL: test_mask_vmulps_128 584; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 585; CHECK: ret 586define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i, 587 <4 x float> %j, <4 x i32> %mask1) 588 nounwind readnone { 589 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 590 %x = fmul <4 x float> %i, %j 591 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 592 ret <4 x float> %r 593} 594 595; CHECK-LABEL: test_mask_vminps_128 596; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 597; CHECK: ret 598define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i, 599 <4 x float> %j, <4 x i32> %mask1) 600 nounwind readnone { 601 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 602 %cmp_res = fcmp olt <4 x float> %i, %j 603 %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 604 %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst 605 ret <4 x float> %r 606} 607 608; CHECK-LABEL: test_mask_vmaxps_128 609; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 610; CHECK: ret 611define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i, 612 <4 x float> %j, <4 x i32> %mask1) 613 nounwind readnone { 614 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 615 %cmp_res = fcmp ogt <4 x float> %i, %j 616 %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 617 %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst 618 ret <4 x float> %r 619} 620 621; CHECK-LABEL: test_mask_vsubps_128 622; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 623; CHECK: ret 624define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i, 625 <4 x float> %j, <4 x i32> %mask1) 626 nounwind readnone { 627 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 628 %x = fsub <4 x float> %i, %j 629 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 630 ret <4 x float> %r 631} 632 633 634; CHECK-LABEL: test_mask_vdivps_128 635; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 636; CHECK: ret 637define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i, 638 <4 x float> %j, <4 x i32> %mask1) 639 nounwind readnone { 640 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 641 %x = fdiv <4 x float> %i, %j 642 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 643 ret <4 x float> %r 644} 645 646; CHECK-LABEL: test_mask_vmulpd_128 647; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 648; CHECK: ret 649define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i, 650 <2 x double> %j, <2 x i64> %mask1) 651 nounwind readnone { 652 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 653 %x = fmul <2 x double> %i, %j 654 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 655 ret <2 x double> %r 656} 657 658; CHECK-LABEL: test_mask_vminpd_128 659; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 660; CHECK: ret 661define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i, 662 <2 x double> %j, <2 x i64> %mask1) 663 nounwind readnone { 664 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 665 %cmp_res = fcmp olt <2 x double> %i, %j 666 %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 667 %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst 668 ret <2 x double> %r 669} 670 671; CHECK-LABEL: test_mask_vmaxpd_128 672; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 673; CHECK: ret 674define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i, 675 <2 x double> %j, <2 x i64> %mask1) 676 nounwind readnone { 677 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 678 %cmp_res = fcmp ogt <2 x double> %i, %j 679 %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 680 %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst 681 ret <2 x double> %r 682} 683 684; CHECK-LABEL: test_mask_vsubpd_128 685; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 686; CHECK: ret 687define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i, 688 <2 x double> %j, <2 x i64> %mask1) 689 nounwind readnone { 690 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 691 %x = fsub <2 x double> %i, %j 692 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 693 ret <2 x double> %r 694} 695 696; CHECK-LABEL: test_mask_vdivpd_128 697; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 698; CHECK: ret 699define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i, 700 <2 x double> %j, <2 x i64> %mask1) 701 nounwind readnone { 702 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 703 %x = fdiv <2 x double> %i, %j 704 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 705 ret <2 x double> %r 706} 707 708; CHECK-LABEL: test_mask_vaddpd_128 709; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}} 710; CHECK: ret 711define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i, 712 <2 x double> %j, <2 x i64> %mask1) 713 nounwind readnone { 714 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 715 %x = fadd <2 x double> %i, %j 716 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 717 ret <2 x double> %r 718} 719 720; CHECK-LABEL: test_maskz_vaddpd_128 721; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}} 722; CHECK: ret 723define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j, 724 <2 x i64> %mask1) nounwind readnone { 725 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 726 %x = fadd <2 x double> %i, %j 727 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 728 ret <2 x double> %r 729} 730 731; CHECK-LABEL: test_mask_fold_vaddpd_128 732; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}} 733; CHECK: ret 734define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i, 735 <2 x double>* %j, <2 x i64> %mask1) 736 nounwind { 737 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 738 %tmp = load <2 x double>, <2 x double>* %j 739 %x = fadd <2 x double> %i, %tmp 740 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 741 ret <2 x double> %r 742} 743 744; CHECK-LABEL: test_maskz_fold_vaddpd_128 745; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}} 746; CHECK: ret 747define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j, 748 <2 x i64> %mask1) nounwind { 749 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 750 %tmp = load <2 x double>, <2 x double>* %j 751 %x = fadd <2 x double> %i, %tmp 752 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 753 ret <2 x double> %r 754} 755 756; CHECK-LABEL: test_broadcast2_vaddpd_128 757; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}} 758; CHECK: ret 759define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind { 760 %tmp = load double, double* %j 761 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 762 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 763 %x = fadd <2 x double> %j.1, %i 764 ret <2 x double> %x 765} 766 767; CHECK-LABEL: test_mask_broadcast_vaddpd_128 768; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}} 769; CHECK: ret 770define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, 771 double* %j, <2 x i64> %mask1) 772 nounwind { 773 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 774 %tmp = load double, double* %j 775 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 776 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 777 %x = fadd <2 x double> %j.1, %i 778 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i 779 ret <2 x double> %r 780} 781 782; CHECK-LABEL: test_maskz_broadcast_vaddpd_128 783; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}} 784; CHECK: ret 785define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j, 786 <2 x i64> %mask1) nounwind { 787 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 788 %tmp = load double, double* %j 789 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 790 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 791 %x = fadd <2 x double> %j.1, %i 792 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 793 ret <2 x double> %r 794} 795