1; NOTE: Assertions have been autogenerated by update_llc_test_checks.py 2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd,-avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CD 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CDBW 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=-avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW 6 7define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { 8; AVX512CD-LABEL: testv8i64: 9; AVX512CD: ## BB#0: 10; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1 11; AVX512CD-NEXT: vpsubq %zmm0, %zmm1, %zmm1 12; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0 13; AVX512CD-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0 14; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1 15; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 16; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm3 17; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 18; AVX512CD-NEXT: vpshufb %ymm3, %ymm4, %ymm3 19; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1 20; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1 21; AVX512CD-NEXT: vpshufb %ymm1, %ymm4, %ymm1 22; AVX512CD-NEXT: vpaddb %ymm3, %ymm1, %ymm1 23; AVX512CD-NEXT: vpxor %ymm3, %ymm3, %ymm3 24; AVX512CD-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 25; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm5 26; AVX512CD-NEXT: vpshufb %ymm5, %ymm4, %ymm5 27; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0 28; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm0 29; AVX512CD-NEXT: vpshufb %ymm0, %ymm4, %ymm0 30; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0 31; AVX512CD-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 32; AVX512CD-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 33; AVX512CD-NEXT: retq 34; 35; AVX512CDBW-LABEL: testv8i64: 36; AVX512CDBW: ## BB#0: 37; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 38; AVX512CDBW-NEXT: vpsubq %zmm0, %zmm1, %zmm2 39; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 40; AVX512CDBW-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0 41; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 42; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm3 43; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 44; AVX512CDBW-NEXT: vpshufb %zmm3, %zmm4, %zmm3 45; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0 46; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 47; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm4, %zmm0 48; AVX512CDBW-NEXT: vpaddb %zmm3, %zmm0, %zmm0 49; AVX512CDBW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 50; AVX512CDBW-NEXT: retq 51; 52; AVX512BW-LABEL: testv8i64: 53; AVX512BW: ## BB#0: 54; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 55; AVX512BW-NEXT: vpsubq %zmm0, %zmm1, %zmm2 56; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 57; AVX512BW-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0 58; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 59; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3 60; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 61; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3 62; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 63; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 64; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0 65; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0 66; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 67; AVX512BW-NEXT: retq 68 %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 0) 69 ret <8 x i64> %out 70} 71 72define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { 73; AVX512CD-LABEL: testv8i64u: 74; AVX512CD: ## BB#0: 75; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1 76; AVX512CD-NEXT: vpsubq %zmm0, %zmm1, %zmm1 77; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0 78; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0 79; AVX512CD-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1 80; AVX512CD-NEXT: vpsubq %zmm0, %zmm1, %zmm0 81; AVX512CD-NEXT: retq 82; 83; AVX512CDBW-LABEL: testv8i64u: 84; AVX512CDBW: ## BB#0: 85; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 86; AVX512CDBW-NEXT: vpsubq %zmm0, %zmm1, %zmm1 87; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 88; AVX512CDBW-NEXT: vplzcntq %zmm0, %zmm0 89; AVX512CDBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1 90; AVX512CDBW-NEXT: vpsubq %zmm0, %zmm1, %zmm0 91; AVX512CDBW-NEXT: retq 92; 93; AVX512BW-LABEL: testv8i64u: 94; AVX512BW: ## BB#0: 95; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 96; AVX512BW-NEXT: vpsubq %zmm0, %zmm1, %zmm2 97; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 98; AVX512BW-NEXT: vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0 99; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 100; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3 101; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 102; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3 103; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 104; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 105; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0 106; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0 107; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 108; AVX512BW-NEXT: retq 109 %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 -1) 110 ret <8 x i64> %out 111} 112 113define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { 114; AVX512CD-LABEL: testv16i32: 115; AVX512CD: ## BB#0: 116; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1 117; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1 118; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0 119; AVX512CD-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0 120; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1 121; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 122; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm3 123; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 124; AVX512CD-NEXT: vpshufb %ymm3, %ymm4, %ymm3 125; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1 126; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1 127; AVX512CD-NEXT: vpshufb %ymm1, %ymm4, %ymm1 128; AVX512CD-NEXT: vpaddb %ymm3, %ymm1, %ymm1 129; AVX512CD-NEXT: vpxor %ymm3, %ymm3, %ymm3 130; AVX512CD-NEXT: vpunpckhdq {{.*#+}} ymm5 = ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[6],ymm3[6],ymm1[7],ymm3[7] 131; AVX512CD-NEXT: vpsadbw %ymm3, %ymm5, %ymm5 132; AVX512CD-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[4],ymm3[4],ymm1[5],ymm3[5] 133; AVX512CD-NEXT: vpsadbw %ymm3, %ymm1, %ymm1 134; AVX512CD-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 135; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm5 136; AVX512CD-NEXT: vpshufb %ymm5, %ymm4, %ymm5 137; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0 138; AVX512CD-NEXT: vpand %ymm2, %ymm0, %ymm0 139; AVX512CD-NEXT: vpshufb %ymm0, %ymm4, %ymm0 140; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0 141; AVX512CD-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm3[2],ymm0[3],ymm3[3],ymm0[6],ymm3[6],ymm0[7],ymm3[7] 142; AVX512CD-NEXT: vpsadbw %ymm3, %ymm2, %ymm2 143; AVX512CD-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[1],ymm3[1],ymm0[4],ymm3[4],ymm0[5],ymm3[5] 144; AVX512CD-NEXT: vpsadbw %ymm3, %ymm0, %ymm0 145; AVX512CD-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 146; AVX512CD-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 147; AVX512CD-NEXT: retq 148; 149; AVX512CDBW-LABEL: testv16i32: 150; AVX512CDBW: ## BB#0: 151; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 152; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 153; AVX512CDBW-NEXT: vpandd %zmm2, %zmm0, %zmm0 154; AVX512CDBW-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0 155; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 156; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm3 157; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 158; AVX512CDBW-NEXT: vpshufb %zmm3, %zmm4, %zmm3 159; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0 160; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 161; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm4, %zmm0 162; AVX512CDBW-NEXT: vpaddb %zmm3, %zmm0, %zmm0 163; AVX512CDBW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 164; AVX512CDBW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 165; AVX512CDBW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 166; AVX512CDBW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 167; AVX512CDBW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 168; AVX512CDBW-NEXT: retq 169; 170; AVX512BW-LABEL: testv16i32: 171; AVX512BW: ## BB#0: 172; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 173; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 174; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0 175; AVX512BW-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0 176; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 177; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3 178; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 179; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3 180; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 181; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 182; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0 183; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0 184; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 185; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 186; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 187; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 188; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 189; AVX512BW-NEXT: retq 190 %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 0) 191 ret <16 x i32> %out 192} 193 194define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { 195; AVX512CD-LABEL: testv16i32u: 196; AVX512CD: ## BB#0: 197; AVX512CD-NEXT: vpxord %zmm1, %zmm1, %zmm1 198; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1 199; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0 200; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 201; AVX512CD-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 202; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0 203; AVX512CD-NEXT: retq 204; 205; AVX512CDBW-LABEL: testv16i32u: 206; AVX512CDBW: ## BB#0: 207; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 208; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm1 209; AVX512CDBW-NEXT: vpandd %zmm1, %zmm0, %zmm0 210; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 211; AVX512CDBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 212; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0 213; AVX512CDBW-NEXT: retq 214; 215; AVX512BW-LABEL: testv16i32u: 216; AVX512BW: ## BB#0: 217; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 218; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 219; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0 220; AVX512BW-NEXT: vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0 221; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 222; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm3 223; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 224; AVX512BW-NEXT: vpshufb %zmm3, %zmm4, %zmm3 225; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 226; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 227; AVX512BW-NEXT: vpshufb %zmm0, %zmm4, %zmm0 228; AVX512BW-NEXT: vpaddb %zmm3, %zmm0, %zmm0 229; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] 230; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 231; AVX512BW-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] 232; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 233; AVX512BW-NEXT: vpackuswb %zmm2, %zmm0, %zmm0 234; AVX512BW-NEXT: retq 235 %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 -1) 236 ret <16 x i32> %out 237} 238 239define <32 x i16> @testv32i16(<32 x i16> %in) nounwind { 240; AVX512CD-LABEL: testv32i16: 241; AVX512CD: ## BB#0: 242; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2 243; AVX512CD-NEXT: vpsubw %ymm0, %ymm2, %ymm3 244; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0 245; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 246; AVX512CD-NEXT: vpsubw %ymm3, %ymm0, %ymm0 247; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 248; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5 249; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 250; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5 251; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0 252; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0 253; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0 254; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0 255; AVX512CD-NEXT: vpsllw $8, %ymm0, %ymm5 256; AVX512CD-NEXT: vpaddb %ymm0, %ymm5, %ymm0 257; AVX512CD-NEXT: vpsrlw $8, %ymm0, %ymm0 258; AVX512CD-NEXT: vpsubw %ymm1, %ymm2, %ymm2 259; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1 260; AVX512CD-NEXT: vpsubw %ymm3, %ymm1, %ymm1 261; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2 262; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2 263; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1 264; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1 265; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1 266; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1 267; AVX512CD-NEXT: vpsllw $8, %ymm1, %ymm2 268; AVX512CD-NEXT: vpaddb %ymm1, %ymm2, %ymm1 269; AVX512CD-NEXT: vpsrlw $8, %ymm1, %ymm1 270; AVX512CD-NEXT: retq 271; 272; AVX512CDBW-LABEL: testv32i16: 273; AVX512CDBW: ## BB#0: 274; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 275; AVX512CDBW-NEXT: vpsubw %zmm0, %zmm1, %zmm1 276; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 277; AVX512CDBW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0 278; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 279; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2 280; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 281; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 282; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0 283; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 284; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 285; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 286; AVX512CDBW-NEXT: vpsllw $8, %zmm0, %zmm1 287; AVX512CDBW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 288; AVX512CDBW-NEXT: vpsrlw $8, %zmm0, %zmm0 289; AVX512CDBW-NEXT: retq 290; 291; AVX512BW-LABEL: testv32i16: 292; AVX512BW: ## BB#0: 293; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 294; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm1 295; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 296; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0 297; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 298; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 299; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 300; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 301; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 302; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 303; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 304; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 305; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 306; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 307; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 308; AVX512BW-NEXT: retq 309 %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 0) 310 ret <32 x i16> %out 311} 312 313define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind { 314; AVX512CD-LABEL: testv32i16u: 315; AVX512CD: ## BB#0: 316; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2 317; AVX512CD-NEXT: vpsubw %ymm0, %ymm2, %ymm3 318; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0 319; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 320; AVX512CD-NEXT: vpsubw %ymm3, %ymm0, %ymm0 321; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 322; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5 323; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 324; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5 325; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0 326; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0 327; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0 328; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0 329; AVX512CD-NEXT: vpsllw $8, %ymm0, %ymm5 330; AVX512CD-NEXT: vpaddb %ymm0, %ymm5, %ymm0 331; AVX512CD-NEXT: vpsrlw $8, %ymm0, %ymm0 332; AVX512CD-NEXT: vpsubw %ymm1, %ymm2, %ymm2 333; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1 334; AVX512CD-NEXT: vpsubw %ymm3, %ymm1, %ymm1 335; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2 336; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2 337; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1 338; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1 339; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1 340; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1 341; AVX512CD-NEXT: vpsllw $8, %ymm1, %ymm2 342; AVX512CD-NEXT: vpaddb %ymm1, %ymm2, %ymm1 343; AVX512CD-NEXT: vpsrlw $8, %ymm1, %ymm1 344; AVX512CD-NEXT: retq 345; 346; AVX512CDBW-LABEL: testv32i16u: 347; AVX512CDBW: ## BB#0: 348; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 349; AVX512CDBW-NEXT: vpsubw %zmm0, %zmm1, %zmm1 350; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 351; AVX512CDBW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0 352; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 353; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2 354; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 355; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 356; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0 357; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 358; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 359; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 360; AVX512CDBW-NEXT: vpsllw $8, %zmm0, %zmm1 361; AVX512CDBW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 362; AVX512CDBW-NEXT: vpsrlw $8, %zmm0, %zmm0 363; AVX512CDBW-NEXT: retq 364; 365; AVX512BW-LABEL: testv32i16u: 366; AVX512BW: ## BB#0: 367; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 368; AVX512BW-NEXT: vpsubw %zmm0, %zmm1, %zmm1 369; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 370; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %zmm0, %zmm0 371; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 372; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 373; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 374; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 375; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 376; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 377; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 378; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 379; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1 380; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 381; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 382; AVX512BW-NEXT: retq 383 %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 -1) 384 ret <32 x i16> %out 385} 386 387define <64 x i8> @testv64i8(<64 x i8> %in) nounwind { 388; AVX512CD-LABEL: testv64i8: 389; AVX512CD: ## BB#0: 390; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2 391; AVX512CD-NEXT: vpsubb %ymm0, %ymm2, %ymm3 392; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0 393; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 394; AVX512CD-NEXT: vpsubb %ymm3, %ymm0, %ymm0 395; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 396; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5 397; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 398; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5 399; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0 400; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0 401; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0 402; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0 403; AVX512CD-NEXT: vpsubb %ymm1, %ymm2, %ymm2 404; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1 405; AVX512CD-NEXT: vpsubb %ymm3, %ymm1, %ymm1 406; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2 407; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2 408; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1 409; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1 410; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1 411; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1 412; AVX512CD-NEXT: retq 413; 414; AVX512CDBW-LABEL: testv64i8: 415; AVX512CDBW: ## BB#0: 416; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 417; AVX512CDBW-NEXT: vpsubb %zmm0, %zmm1, %zmm1 418; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 419; AVX512CDBW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0 420; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 421; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2 422; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 423; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 424; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0 425; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 426; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 427; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 428; AVX512CDBW-NEXT: retq 429; 430; AVX512BW-LABEL: testv64i8: 431; AVX512BW: ## BB#0: 432; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 433; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm1 434; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 435; AVX512BW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0 436; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 437; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 438; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 439; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 440; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 441; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 442; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 443; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 444; AVX512BW-NEXT: retq 445 %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 0) 446 ret <64 x i8> %out 447} 448 449define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind { 450; AVX512CD-LABEL: testv64i8u: 451; AVX512CD: ## BB#0: 452; AVX512CD-NEXT: vpxor %ymm2, %ymm2, %ymm2 453; AVX512CD-NEXT: vpsubb %ymm0, %ymm2, %ymm3 454; AVX512CD-NEXT: vpand %ymm3, %ymm0, %ymm0 455; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 456; AVX512CD-NEXT: vpsubb %ymm3, %ymm0, %ymm0 457; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 458; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm5 459; AVX512CD-NEXT: vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 460; AVX512CD-NEXT: vpshufb %ymm5, %ymm6, %ymm5 461; AVX512CD-NEXT: vpsrlw $4, %ymm0, %ymm0 462; AVX512CD-NEXT: vpand %ymm4, %ymm0, %ymm0 463; AVX512CD-NEXT: vpshufb %ymm0, %ymm6, %ymm0 464; AVX512CD-NEXT: vpaddb %ymm5, %ymm0, %ymm0 465; AVX512CD-NEXT: vpsubb %ymm1, %ymm2, %ymm2 466; AVX512CD-NEXT: vpand %ymm2, %ymm1, %ymm1 467; AVX512CD-NEXT: vpsubb %ymm3, %ymm1, %ymm1 468; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm2 469; AVX512CD-NEXT: vpshufb %ymm2, %ymm6, %ymm2 470; AVX512CD-NEXT: vpsrlw $4, %ymm1, %ymm1 471; AVX512CD-NEXT: vpand %ymm4, %ymm1, %ymm1 472; AVX512CD-NEXT: vpshufb %ymm1, %ymm6, %ymm1 473; AVX512CD-NEXT: vpaddb %ymm2, %ymm1, %ymm1 474; AVX512CD-NEXT: retq 475; 476; AVX512CDBW-LABEL: testv64i8u: 477; AVX512CDBW: ## BB#0: 478; AVX512CDBW-NEXT: vpxord %zmm1, %zmm1, %zmm1 479; AVX512CDBW-NEXT: vpsubb %zmm0, %zmm1, %zmm1 480; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 481; AVX512CDBW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0 482; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 483; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm2 484; AVX512CDBW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 485; AVX512CDBW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 486; AVX512CDBW-NEXT: vpsrlw $4, %zmm0, %zmm0 487; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 488; AVX512CDBW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 489; AVX512CDBW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 490; AVX512CDBW-NEXT: retq 491; 492; AVX512BW-LABEL: testv64i8u: 493; AVX512BW: ## BB#0: 494; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 495; AVX512BW-NEXT: vpsubb %zmm0, %zmm1, %zmm1 496; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 497; AVX512BW-NEXT: vpsubb {{.*}}(%rip), %zmm0, %zmm0 498; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 499; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2 500; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 501; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 502; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 503; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 504; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 505; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 506; AVX512BW-NEXT: retq 507 %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 -1) 508 ret <64 x i8> %out 509} 510 511declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1) 512declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) 513declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1) 514declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1) 515