1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512dq -mattr=+avx512vl| FileCheck %s 3 4define <8 x i1> @test(<2 x i1> %a) { 5; CHECK-LABEL: test: 6; CHECK: # BB#0: 7; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 8; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 9; CHECK-NEXT: kshiftlb $2, %k0, %k0 10; CHECK-NEXT: vpmovm2w %k0, %xmm0 11; CHECK-NEXT: retq 12 %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef> 13 ret <8 x i1> %res 14} 15 16define <8 x i1> @test1(<2 x i1> %a) { 17; CHECK-LABEL: test1: 18; CHECK: # BB#0: 19; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 20; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 21; CHECK-NEXT: kshiftlb $4, %k0, %k0 22; CHECK-NEXT: vpmovm2w %k0, %xmm0 23; CHECK-NEXT: retq 24 %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef> 25 ret <8 x i1> %res 26} 27 28define <8 x i1> @test2(<2 x i1> %a) { 29; CHECK-LABEL: test2: 30; CHECK: # BB#0: 31; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 32; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 33; CHECK-NEXT: vpmovm2q %k0, %zmm0 34; CHECK-NEXT: vpxord %zmm1, %zmm1, %zmm1 35; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[0,1,0,1] 36; CHECK-NEXT: vpsllq $63, %zmm0, %zmm0 37; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k0 38; CHECK-NEXT: vpmovm2w %k0, %xmm0 39; CHECK-NEXT: retq 40 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef> 41 ret <8 x i1> %res 42} 43 44define <8 x i1> @test3(<4 x i1> %a) { 45; CHECK-LABEL: test3: 46; CHECK: # BB#0: 47; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 48; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 49; CHECK-NEXT: kshiftlb $4, %k0, %k0 50; CHECK-NEXT: kshiftrb $4, %k0, %k0 51; CHECK-NEXT: vpmovm2w %k0, %xmm0 52; CHECK-NEXT: retq 53 54 %res = shufflevector <4 x i1> %a, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 55 ret <8 x i1> %res 56} 57 58define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) { 59; CHECK-LABEL: test4: 60; CHECK: # BB#0: 61; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 62; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 63; CHECK-NEXT: vpslld $31, %xmm1, %xmm0 64; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 65; CHECK-NEXT: kshiftlb $4, %k1, %k1 66; CHECK-NEXT: kshiftlb $4, %k0, %k0 67; CHECK-NEXT: kshiftrb $4, %k0, %k0 68; CHECK-NEXT: korb %k1, %k0, %k0 69; CHECK-NEXT: vpmovm2w %k0, %xmm0 70; CHECK-NEXT: retq 71 72 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 73 ret <8 x i1> %res 74} 75 76define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) { 77; CHECK-LABEL: test5: 78; CHECK: # BB#0: 79; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 80; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 81; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0 82; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 83; CHECK-NEXT: kshiftlb $2, %k1, %k1 84; CHECK-NEXT: kshiftlb $2, %k0, %k0 85; CHECK-NEXT: kshiftrb $2, %k0, %k0 86; CHECK-NEXT: korb %k1, %k0, %k0 87; CHECK-NEXT: vpmovm2d %k0, %xmm0 88; CHECK-NEXT: retq 89 90 %res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 91 ret <4 x i1> %res 92} 93 94define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) { 95; CHECK-LABEL: test6: 96; CHECK: # BB#0: 97; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 98; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 99; CHECK-NEXT: vpsllq $63, %xmm1, %xmm0 100; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 101; CHECK-NEXT: kshiftlb $2, %k1, %k1 102; CHECK-NEXT: kshiftlb $2, %k0, %k0 103; CHECK-NEXT: kshiftrb $2, %k0, %k0 104; CHECK-NEXT: korb %k1, %k0, %k0 105; CHECK-NEXT: kunpckbw %k0, %k0, %k0 106; CHECK-NEXT: vpmovm2b %k0, %xmm0 107; CHECK-NEXT: retq 108 109 %res = shufflevector <2 x i1> %a, <2 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 110 ret <16 x i1> %res 111} 112 113define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) { 114; CHECK-LABEL: test7: 115; CHECK: # BB#0: 116; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 117; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 118; CHECK-NEXT: vpslld $31, %xmm1, %xmm0 119; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 120; CHECK-NEXT: kshiftlb $4, %k1, %k1 121; CHECK-NEXT: kshiftlb $4, %k0, %k0 122; CHECK-NEXT: kshiftrb $4, %k0, %k0 123; CHECK-NEXT: korb %k1, %k0, %k0 124; CHECK-NEXT: kunpckbw %k0, %k0, %k0 125; CHECK-NEXT: kunpckwd %k0, %k0, %k0 126; CHECK-NEXT: vpmovm2b %k0, %ymm0 127; CHECK-NEXT: retq 128 129 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 130 ret <32 x i1> %res 131} 132 133define <64 x i1> @test8(<8 x i1> %a, <8 x i1>%b) { 134; CHECK-LABEL: test8: 135; CHECK: # BB#0: 136; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 137; CHECK-NEXT: vpmovw2m %xmm1, %k0 138; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 139; CHECK-NEXT: vpmovw2m %xmm0, %k1 140; CHECK-NEXT: kunpckdq %k1, %k0, %k0 141; CHECK-NEXT: vpmovm2b %k0, %zmm0 142; CHECK-NEXT: retq 143 144 %res = shufflevector <8 x i1> %a, <8 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 145 ret <64 x i1> %res 146} 147 148define <4 x i1> @test9(<8 x i1> %a, <8 x i1> %b) { 149; CHECK-LABEL: test9: 150; CHECK: # BB#0: 151; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 152; CHECK-NEXT: vpmovw2m %xmm0, %k0 153; CHECK-NEXT: kshiftrw $4, %k0, %k0 154; CHECK-NEXT: vpmovm2d %k0, %xmm0 155; CHECK-NEXT: retq 156 %res = shufflevector <8 x i1> %a, <8 x i1> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 157 ret <4 x i1> %res 158} 159 160define <2 x i1> @test10(<4 x i1> %a, <4 x i1> %b) { 161; CHECK-LABEL: test10: 162; CHECK: # BB#0: 163; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 164; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 165; CHECK-NEXT: kshiftrw $2, %k0, %k0 166; CHECK-NEXT: vpmovm2q %k0, %xmm0 167; CHECK-NEXT: retq 168 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <2 x i32> <i32 2, i32 3> 169 ret <2 x i1> %res 170} 171