1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8) 6 7define <4 x i32>@test_int_x86_avx512_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1) { 8; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_128: 9; CHECK: # %bb.0: 10; CHECK-NEXT: vplzcntd %xmm0, %xmm0 11; CHECK-NEXT: ret{{[l|q]}} 12 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 13 ret <4 x i32> %res 14} 15 16define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 17; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 18; X86: # %bb.0: 19; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 20; X86-NEXT: kmovw %eax, %k1 21; X86-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 22; X86-NEXT: vmovdqa %xmm1, %xmm0 23; X86-NEXT: retl 24; 25; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: 26; X64: # %bb.0: 27; X64-NEXT: kmovw %edi, %k1 28; X64-NEXT: vplzcntd %xmm0, %xmm1 {%k1} 29; X64-NEXT: vmovdqa %xmm1, %xmm0 30; X64-NEXT: retq 31 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 32 ret <4 x i32> %res 33} 34 35define <4 x i32>@test_int_x86_avx512_maskz_vplzcnt_d_128(<4 x i32> %x0, i8 %x2) { 36; X86-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128: 37; X86: # %bb.0: 38; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 39; X86-NEXT: kmovw %eax, %k1 40; X86-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 41; X86-NEXT: retl 42; 43; X64-LABEL: test_int_x86_avx512_maskz_vplzcnt_d_128: 44; X64: # %bb.0: 45; X64-NEXT: kmovw %edi, %k1 46; X64-NEXT: vplzcntd %xmm0, %xmm0 {%k1} {z} 47; X64-NEXT: retq 48 %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 49 ret <4 x i32> %res 50} 51 52declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8) 53 54define <8 x i32>@test_int_x86_avx512_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1) { 55; CHECK-LABEL: test_int_x86_avx512_vplzcnt_d_256: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vplzcntd %ymm0, %ymm0 58; CHECK-NEXT: ret{{[l|q]}} 59 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 60 ret <8 x i32> %res 61} 62 63define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 64; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 65; X86: # %bb.0: 66; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 67; X86-NEXT: kmovw %eax, %k1 68; X86-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 69; X86-NEXT: vmovdqa %ymm1, %ymm0 70; X86-NEXT: retl 71; 72; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: 73; X64: # %bb.0: 74; X64-NEXT: kmovw %edi, %k1 75; X64-NEXT: vplzcntd %ymm0, %ymm1 {%k1} 76; X64-NEXT: vmovdqa %ymm1, %ymm0 77; X64-NEXT: retq 78 %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 79 ret <8 x i32> %res 80} 81 82declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8) 83 84define <2 x i64>@test_int_x86_avx512_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1) { 85; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_128: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vplzcntq %xmm0, %xmm0 88; CHECK-NEXT: ret{{[l|q]}} 89 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 90 ret <2 x i64> %res 91} 92 93define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 94; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 95; X86: # %bb.0: 96; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 97; X86-NEXT: kmovw %eax, %k1 98; X86-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 99; X86-NEXT: vmovdqa %xmm1, %xmm0 100; X86-NEXT: retl 101; 102; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: 103; X64: # %bb.0: 104; X64-NEXT: kmovw %edi, %k1 105; X64-NEXT: vplzcntq %xmm0, %xmm1 {%k1} 106; X64-NEXT: vmovdqa %xmm1, %xmm0 107; X64-NEXT: retq 108 %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 109 ret <2 x i64> %res 110} 111 112declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8) 113 114define <4 x i64>@test_int_x86_avx512_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1) { 115; CHECK-LABEL: test_int_x86_avx512_vplzcnt_q_256: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vplzcntq %ymm0, %ymm0 118; CHECK-NEXT: ret{{[l|q]}} 119 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 120 ret <4 x i64> %res 121} 122 123define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 124; X86-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 125; X86: # %bb.0: 126; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 127; X86-NEXT: kmovw %eax, %k1 128; X86-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 129; X86-NEXT: vmovdqa %ymm1, %ymm0 130; X86-NEXT: retl 131; 132; X64-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: 133; X64: # %bb.0: 134; X64-NEXT: kmovw %edi, %k1 135; X64-NEXT: vplzcntq %ymm0, %ymm1 {%k1} 136; X64-NEXT: vmovdqa %ymm1, %ymm0 137; X64-NEXT: retq 138 %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 139 ret <4 x i64> %res 140} 141 142define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) { 143; X86-LABEL: test_x86_vbroadcastmw_256: 144; X86: # %bb.0: 145; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 146; X86-NEXT: vpbroadcastd %eax, %ymm0 147; X86-NEXT: retl 148; 149; X64-LABEL: test_x86_vbroadcastmw_256: 150; X64: # %bb.0: 151; X64-NEXT: movzwl %di, %eax 152; X64-NEXT: vpbroadcastd %eax, %ymm0 153; X64-NEXT: retq 154 %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; 155 ret <8 x i32> %res 156} 157declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16) 158 159define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) { 160; X86-LABEL: test_x86_vbroadcastmw_128: 161; X86: # %bb.0: 162; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 163; X86-NEXT: vpbroadcastd %eax, %xmm0 164; X86-NEXT: retl 165; 166; X64-LABEL: test_x86_vbroadcastmw_128: 167; X64: # %bb.0: 168; X64-NEXT: movzwl %di, %eax 169; X64-NEXT: vpbroadcastd %eax, %xmm0 170; X64-NEXT: retq 171 %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; 172 ret <4 x i32> %res 173} 174declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16) 175 176define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) { 177; X86-LABEL: test_x86_broadcastmb_256: 178; X86: # %bb.0: 179; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 180; X86-NEXT: vmovd %eax, %xmm0 181; X86-NEXT: vpbroadcastq %xmm0, %ymm0 182; X86-NEXT: retl 183; 184; X64-LABEL: test_x86_broadcastmb_256: 185; X64: # %bb.0: 186; X64-NEXT: # kill: def $edi killed $edi def $rdi 187; X64-NEXT: movzbl %dil, %eax 188; X64-NEXT: vpbroadcastq %rax, %ymm0 189; X64-NEXT: retq 190 %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; 191 ret <4 x i64> %res 192} 193declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8) 194 195define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) { 196; X86-LABEL: test_x86_broadcastmb_128: 197; X86: # %bb.0: 198; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 199; X86-NEXT: vmovd %eax, %xmm0 200; X86-NEXT: vpbroadcastq %xmm0, %xmm0 201; X86-NEXT: retl 202; 203; X64-LABEL: test_x86_broadcastmb_128: 204; X64: # %bb.0: 205; X64-NEXT: # kill: def $edi killed $edi def $rdi 206; X64-NEXT: movzbl %dil, %eax 207; X64-NEXT: vpbroadcastq %rax, %xmm0 208; X64-NEXT: retq 209 %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; 210 ret <2 x i64> %res 211} 212declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8) 213 214declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) 215 216define <4 x i32> @test_int_x86_avx512_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1) { 217; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_128: 218; CHECK: # %bb.0: 219; CHECK-NEXT: vpconflictd %xmm0, %xmm0 220; CHECK-NEXT: ret{{[l|q]}} 221 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) 222 ret <4 x i32> %res 223} 224 225define <4 x i32> @test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { 226; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 227; X86: # %bb.0: 228; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 229; X86-NEXT: kmovw %eax, %k1 230; X86-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 231; X86-NEXT: vmovdqa %xmm1, %xmm0 232; X86-NEXT: retl 233; 234; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: 235; X64: # %bb.0: 236; X64-NEXT: kmovw %edi, %k1 237; X64-NEXT: vpconflictd %xmm0, %xmm1 {%k1} 238; X64-NEXT: vmovdqa %xmm1, %xmm0 239; X64-NEXT: retq 240 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) 241 ret <4 x i32> %res 242} 243 244define <4 x i32> @test_int_x86_avx512_maskz_vpconflict_d_128(<4 x i32> %x0, i8 %x2) { 245; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128: 246; X86: # %bb.0: 247; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 248; X86-NEXT: kmovw %eax, %k1 249; X86-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} 250; X86-NEXT: retl 251; 252; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_128: 253; X64: # %bb.0: 254; X64-NEXT: kmovw %edi, %k1 255; X64-NEXT: vpconflictd %xmm0, %xmm0 {%k1} {z} 256; X64-NEXT: retq 257 %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) 258 ret <4 x i32> %res 259} 260 261declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) 262 263define <8 x i32> @test_int_x86_avx512_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1) { 264; CHECK-LABEL: test_int_x86_avx512_vpconflict_d_256: 265; CHECK: # %bb.0: 266; CHECK-NEXT: vpconflictd %ymm0, %ymm0 267; CHECK-NEXT: ret{{[l|q]}} 268 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) 269 ret <8 x i32> %res 270} 271 272define <8 x i32> @test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 273; X86-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 274; X86: # %bb.0: 275; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 276; X86-NEXT: kmovw %eax, %k1 277; X86-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 278; X86-NEXT: vmovdqa %ymm1, %ymm0 279; X86-NEXT: retl 280; 281; X64-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: 282; X64: # %bb.0: 283; X64-NEXT: kmovw %edi, %k1 284; X64-NEXT: vpconflictd %ymm0, %ymm1 {%k1} 285; X64-NEXT: vmovdqa %ymm1, %ymm0 286; X64-NEXT: retq 287 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) 288 ret <8 x i32> %res 289} 290 291define <8 x i32> @test_int_x86_avx512_maskz_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { 292; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256: 293; X86: # %bb.0: 294; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 295; X86-NEXT: kmovw %eax, %k1 296; X86-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} 297; X86-NEXT: retl 298; 299; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_d_256: 300; X64: # %bb.0: 301; X64-NEXT: kmovw %edi, %k1 302; X64-NEXT: vpconflictd %ymm0, %ymm0 {%k1} {z} 303; X64-NEXT: retq 304 %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> zeroinitializer, i8 %x2) 305 ret <8 x i32> %res 306} 307 308declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) 309 310define <2 x i64> @test_int_x86_avx512_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1) { 311; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_128: 312; CHECK: # %bb.0: 313; CHECK-NEXT: vpconflictq %xmm0, %xmm0 314; CHECK-NEXT: ret{{[l|q]}} 315 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) 316 ret <2 x i64> %res 317} 318 319define <2 x i64> @test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { 320; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 321; X86: # %bb.0: 322; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 323; X86-NEXT: kmovw %eax, %k1 324; X86-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 325; X86-NEXT: vmovdqa %xmm1, %xmm0 326; X86-NEXT: retl 327; 328; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: 329; X64: # %bb.0: 330; X64-NEXT: kmovw %edi, %k1 331; X64-NEXT: vpconflictq %xmm0, %xmm1 {%k1} 332; X64-NEXT: vmovdqa %xmm1, %xmm0 333; X64-NEXT: retq 334 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) 335 ret <2 x i64> %res 336} 337 338define <2 x i64> @test_int_x86_avx512_maskz_vpconflict_q_128(<2 x i64> %x0, i8 %x2) { 339; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128: 340; X86: # %bb.0: 341; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 342; X86-NEXT: kmovw %eax, %k1 343; X86-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} 344; X86-NEXT: retl 345; 346; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_128: 347; X64: # %bb.0: 348; X64-NEXT: kmovw %edi, %k1 349; X64-NEXT: vpconflictq %xmm0, %xmm0 {%k1} {z} 350; X64-NEXT: retq 351 %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> zeroinitializer, i8 %x2) 352 ret <2 x i64> %res 353} 354 355declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) 356 357define <4 x i64> @test_int_x86_avx512_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1) { 358; CHECK-LABEL: test_int_x86_avx512_vpconflict_q_256: 359; CHECK: # %bb.0: 360; CHECK-NEXT: vpconflictq %ymm0, %ymm0 361; CHECK-NEXT: ret{{[l|q]}} 362 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) 363 ret <4 x i64> %res 364} 365 366define <4 x i64> @test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { 367; X86-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 368; X86: # %bb.0: 369; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 370; X86-NEXT: kmovw %eax, %k1 371; X86-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 372; X86-NEXT: vmovdqa %ymm1, %ymm0 373; X86-NEXT: retl 374; 375; X64-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: 376; X64: # %bb.0: 377; X64-NEXT: kmovw %edi, %k1 378; X64-NEXT: vpconflictq %ymm0, %ymm1 {%k1} 379; X64-NEXT: vmovdqa %ymm1, %ymm0 380; X64-NEXT: retq 381 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) 382 ret <4 x i64> %res 383} 384 385define <4 x i64> @test_int_x86_avx512_maskz_vpconflict_q_256(<4 x i64> %x0, i8 %x2) { 386; X86-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256: 387; X86: # %bb.0: 388; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 389; X86-NEXT: kmovw %eax, %k1 390; X86-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} 391; X86-NEXT: retl 392; 393; X64-LABEL: test_int_x86_avx512_maskz_vpconflict_q_256: 394; X64: # %bb.0: 395; X64-NEXT: kmovw %edi, %k1 396; X64-NEXT: vpconflictq %ymm0, %ymm0 {%k1} {z} 397; X64-NEXT: retq 398 %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> zeroinitializer, i8 %x2) 399 ret <4 x i64> %res 400} 401 402