1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512dq -mattr=+avx512vl| FileCheck %s 3 4define <8 x i1> @test(<2 x i1> %a) { 5; CHECK-LABEL: test: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 8; CHECK-NEXT: vpmovq2m %xmm0, %k0 9; CHECK-NEXT: kshiftlb $2, %k0, %k0 10; CHECK-NEXT: vpmovm2w %k0, %xmm0 11; CHECK-NEXT: retq 12 %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef> 13 ret <8 x i1> %res 14} 15 16define <8 x i1> @test1(<2 x i1> %a) { 17; CHECK-LABEL: test1: 18; CHECK: # %bb.0: 19; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 20; CHECK-NEXT: vpmovq2m %xmm0, %k0 21; CHECK-NEXT: kshiftlb $4, %k0, %k0 22; CHECK-NEXT: vpmovm2w %k0, %xmm0 23; CHECK-NEXT: retq 24 %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef> 25 ret <8 x i1> %res 26} 27 28define <8 x i1> @test2(<2 x i1> %a) { 29; CHECK-LABEL: test2: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 32; CHECK-NEXT: vpmovq2m %xmm0, %k0 33; CHECK-NEXT: kshiftlb $4, %k0, %k0 34; CHECK-NEXT: vpmovm2w %k0, %xmm0 35; CHECK-NEXT: retq 36 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef> 37 ret <8 x i1> %res 38} 39 40define <8 x i1> @test3(<4 x i1> %a) { 41; CHECK-LABEL: test3: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 44; CHECK-NEXT: vpmovd2m %xmm0, %k0 45; CHECK-NEXT: vpmovm2w %k0, %xmm0 46; CHECK-NEXT: retq 47 48 %res = shufflevector <4 x i1> %a, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 49 ret <8 x i1> %res 50} 51 52define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) { 53; CHECK-LABEL: test4: 54; CHECK: # %bb.0: 55; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 56; CHECK-NEXT: vpmovd2m %xmm1, %k0 57; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 58; CHECK-NEXT: vpmovd2m %xmm0, %k1 59; CHECK-NEXT: kshiftlb $4, %k0, %k0 60; CHECK-NEXT: korb %k0, %k1, %k0 61; CHECK-NEXT: vpmovm2w %k0, %xmm0 62; CHECK-NEXT: retq 63 64 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 65 ret <8 x i1> %res 66} 67 68define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) { 69; CHECK-LABEL: test5: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1 72; CHECK-NEXT: vpmovq2m %xmm1, %k0 73; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 74; CHECK-NEXT: vpmovq2m %xmm0, %k1 75; CHECK-NEXT: kshiftlb $2, %k0, %k0 76; CHECK-NEXT: korw %k0, %k1, %k0 77; CHECK-NEXT: vpmovm2d %k0, %xmm0 78; CHECK-NEXT: retq 79 80 %res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 81 ret <4 x i1> %res 82} 83 84define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) { 85; CHECK-LABEL: test6: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1 88; CHECK-NEXT: vpmovq2m %xmm1, %k0 89; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 90; CHECK-NEXT: vpmovq2m %xmm0, %k1 91; CHECK-NEXT: kshiftlb $2, %k0, %k0 92; CHECK-NEXT: korw %k0, %k1, %k0 93; CHECK-NEXT: vpmovm2b %k0, %xmm0 94; CHECK-NEXT: retq 95 96 %res = shufflevector <2 x i1> %a, <2 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 97 ret <16 x i1> %res 98} 99 100define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) { 101; CHECK-LABEL: test7: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 104; CHECK-NEXT: vpmovd2m %xmm1, %k0 105; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 106; CHECK-NEXT: vpmovd2m %xmm0, %k1 107; CHECK-NEXT: kshiftlb $4, %k0, %k0 108; CHECK-NEXT: korb %k0, %k1, %k0 109; CHECK-NEXT: vpmovm2b %k0, %ymm0 110; CHECK-NEXT: retq 111 112 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 113 ret <32 x i1> %res 114} 115 116define <64 x i1> @test8(<8 x i1> %a, <8 x i1>%b) { 117; CHECK-LABEL: test8: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vpsllw $15, %xmm1, %xmm1 120; CHECK-NEXT: vpmovw2m %xmm1, %k0 121; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 122; CHECK-NEXT: vpmovw2m %xmm0, %k1 123; CHECK-NEXT: kunpckdq %k1, %k0, %k0 124; CHECK-NEXT: vpmovm2b %k0, %zmm0 125; CHECK-NEXT: retq 126 127 %res = shufflevector <8 x i1> %a, <8 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 128 ret <64 x i1> %res 129} 130 131define <4 x i1> @test9(<8 x i1> %a, <8 x i1> %b) { 132; CHECK-LABEL: test9: 133; CHECK: # %bb.0: 134; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 135; CHECK-NEXT: vpmovw2m %xmm0, %k0 136; CHECK-NEXT: kshiftrb $4, %k0, %k0 137; CHECK-NEXT: vpmovm2d %k0, %xmm0 138; CHECK-NEXT: retq 139 %res = shufflevector <8 x i1> %a, <8 x i1> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 140 ret <4 x i1> %res 141} 142 143define <2 x i1> @test10(<4 x i1> %a, <4 x i1> %b) { 144; CHECK-LABEL: test10: 145; CHECK: # %bb.0: 146; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 147; CHECK-NEXT: vpmovd2m %xmm0, %k0 148; CHECK-NEXT: kshiftrb $2, %k0, %k0 149; CHECK-NEXT: vpmovm2q %k0, %xmm0 150; CHECK-NEXT: retq 151 %res = shufflevector <4 x i1> %a, <4 x i1> %b, <2 x i32> <i32 2, i32 3> 152 ret <2 x i1> %res 153} 154 155define <8 x i1> @test11(<4 x i1> %a, <4 x i1>%b) { 156; CHECK-LABEL: test11: 157; CHECK: # %bb.0: 158; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 159; CHECK-NEXT: vpmovd2m %xmm0, %k0 160; CHECK-NEXT: kshiftlb $4, %k0, %k0 161; CHECK-NEXT: vpmovm2w %k0, %xmm0 162; CHECK-NEXT: retq 163 %res = shufflevector <4 x i1> %a, <4 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3> 164 ret <8 x i1> %res 165} 166 167define <16 x i1> @test12(<2 x i1> %a) { 168; CHECK-LABEL: test12: 169; CHECK: # %bb.0: 170; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 171; CHECK-NEXT: vpmovq2m %xmm0, %k0 172; CHECK-NEXT: kshiftlw $10, %k0, %k0 173; CHECK-NEXT: vpmovm2b %k0, %xmm0 174; CHECK-NEXT: retq 175 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef> 176 ret <16 x i1> %res 177} 178 179define <32 x i1> @test13(<2 x i1> %a) { 180; CHECK-LABEL: test13: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 183; CHECK-NEXT: vpmovq2m %xmm0, %k0 184; CHECK-NEXT: kshiftld $10, %k0, %k0 185; CHECK-NEXT: vpmovm2b %k0, %ymm0 186; CHECK-NEXT: retq 187 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 188 ret <32 x i1> %res 189} 190 191define <64 x i1> @test14(<2 x i1> %a) { 192; CHECK-LABEL: test14: 193; CHECK: # %bb.0: 194; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 195; CHECK-NEXT: vpmovq2m %xmm0, %k0 196; CHECK-NEXT: kshiftlq $10, %k0, %k0 197; CHECK-NEXT: vpmovm2b %k0, %zmm0 198; CHECK-NEXT: retq 199 %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <64 x i32> <i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 200 ret <64 x i1> %res 201} 202 203; Make sure we can recognize this shuffle as an insertion in to a zero vector. 204define i8 @test15(<2 x i64> %x) { 205; CHECK-LABEL: test15: 206; CHECK: # %bb.0: 207; CHECK-NEXT: vptestnmq %xmm0, %xmm0, %k0 208; CHECK-NEXT: kmovd %k0, %eax 209; CHECK-NEXT: # kill: def $al killed $al killed $eax 210; CHECK-NEXT: retq 211 %a = icmp eq <2 x i64> %x, zeroinitializer 212 %b = shufflevector <2 x i1> %a, <2 x i1> <i1 false, i1 undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 213 %c = bitcast <8 x i1> %b to i8 214 ret i8 %c 215} 216