1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 4 5; 6; udiv by 7 7; 8 9define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind { 10; AVX-LABEL: test_div7_8i64: 11; AVX: # BB#0: 12; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 13; AVX-NEXT: vpextrq $1, %xmm1, %rcx 14; AVX-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 15; AVX-NEXT: movq %rcx, %rax 16; AVX-NEXT: mulq %rsi 17; AVX-NEXT: subq %rdx, %rcx 18; AVX-NEXT: shrq %rcx 19; AVX-NEXT: addq %rdx, %rcx 20; AVX-NEXT: shrq $2, %rcx 21; AVX-NEXT: vmovq %rcx, %xmm2 22; AVX-NEXT: vmovq %xmm1, %rcx 23; AVX-NEXT: movq %rcx, %rax 24; AVX-NEXT: mulq %rsi 25; AVX-NEXT: subq %rdx, %rcx 26; AVX-NEXT: shrq %rcx 27; AVX-NEXT: addq %rdx, %rcx 28; AVX-NEXT: shrq $2, %rcx 29; AVX-NEXT: vmovq %rcx, %xmm1 30; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 31; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 32; AVX-NEXT: vpextrq $1, %xmm2, %rcx 33; AVX-NEXT: movq %rcx, %rax 34; AVX-NEXT: mulq %rsi 35; AVX-NEXT: subq %rdx, %rcx 36; AVX-NEXT: shrq %rcx 37; AVX-NEXT: addq %rdx, %rcx 38; AVX-NEXT: shrq $2, %rcx 39; AVX-NEXT: vmovq %rcx, %xmm3 40; AVX-NEXT: vmovq %xmm2, %rcx 41; AVX-NEXT: movq %rcx, %rax 42; AVX-NEXT: mulq %rsi 43; AVX-NEXT: subq %rdx, %rcx 44; AVX-NEXT: shrq %rcx 45; AVX-NEXT: addq %rdx, %rcx 46; AVX-NEXT: shrq $2, %rcx 47; AVX-NEXT: vmovq %rcx, %xmm2 48; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 49; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 50; AVX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 51; AVX-NEXT: vpextrq $1, %xmm2, %rcx 52; AVX-NEXT: movq %rcx, %rax 53; AVX-NEXT: mulq %rsi 54; AVX-NEXT: subq %rdx, %rcx 55; AVX-NEXT: shrq %rcx 56; AVX-NEXT: addq %rdx, %rcx 57; AVX-NEXT: shrq $2, %rcx 58; AVX-NEXT: vmovq %rcx, %xmm3 59; AVX-NEXT: vmovq %xmm2, %rcx 60; AVX-NEXT: movq %rcx, %rax 61; AVX-NEXT: mulq %rsi 62; AVX-NEXT: subq %rdx, %rcx 63; AVX-NEXT: shrq %rcx 64; AVX-NEXT: addq %rdx, %rcx 65; AVX-NEXT: shrq $2, %rcx 66; AVX-NEXT: vmovq %rcx, %xmm2 67; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 68; AVX-NEXT: vpextrq $1, %xmm0, %rcx 69; AVX-NEXT: movq %rcx, %rax 70; AVX-NEXT: mulq %rsi 71; AVX-NEXT: subq %rdx, %rcx 72; AVX-NEXT: shrq %rcx 73; AVX-NEXT: addq %rdx, %rcx 74; AVX-NEXT: shrq $2, %rcx 75; AVX-NEXT: vmovq %rcx, %xmm3 76; AVX-NEXT: vmovq %xmm0, %rcx 77; AVX-NEXT: movq %rcx, %rax 78; AVX-NEXT: mulq %rsi 79; AVX-NEXT: subq %rdx, %rcx 80; AVX-NEXT: shrq %rcx 81; AVX-NEXT: addq %rdx, %rcx 82; AVX-NEXT: shrq $2, %rcx 83; AVX-NEXT: vmovq %rcx, %xmm0 84; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 85; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 86; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 87; AVX-NEXT: retq 88 %res = udiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 89 ret <8 x i64> %res 90} 91 92define <16 x i32> @test_div7_16i32(<16 x i32> %a) nounwind { 93; AVX-LABEL: test_div7_16i32: 94; AVX: # BB#0: 95; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 96; AVX-NEXT: vpextrd $1, %xmm1, %eax 97; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 98; AVX-NEXT: shrq $32, %rcx 99; AVX-NEXT: subl %ecx, %eax 100; AVX-NEXT: shrl %eax 101; AVX-NEXT: addl %ecx, %eax 102; AVX-NEXT: shrl $2, %eax 103; AVX-NEXT: vmovd %xmm1, %ecx 104; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 105; AVX-NEXT: shrq $32, %rdx 106; AVX-NEXT: subl %edx, %ecx 107; AVX-NEXT: shrl %ecx 108; AVX-NEXT: addl %edx, %ecx 109; AVX-NEXT: shrl $2, %ecx 110; AVX-NEXT: vmovd %ecx, %xmm2 111; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 112; AVX-NEXT: vpextrd $2, %xmm1, %eax 113; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 114; AVX-NEXT: shrq $32, %rcx 115; AVX-NEXT: subl %ecx, %eax 116; AVX-NEXT: shrl %eax 117; AVX-NEXT: addl %ecx, %eax 118; AVX-NEXT: shrl $2, %eax 119; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 120; AVX-NEXT: vpextrd $3, %xmm1, %eax 121; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 122; AVX-NEXT: shrq $32, %rcx 123; AVX-NEXT: subl %ecx, %eax 124; AVX-NEXT: shrl %eax 125; AVX-NEXT: addl %ecx, %eax 126; AVX-NEXT: shrl $2, %eax 127; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 128; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 129; AVX-NEXT: vpextrd $1, %xmm2, %eax 130; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 131; AVX-NEXT: shrq $32, %rcx 132; AVX-NEXT: subl %ecx, %eax 133; AVX-NEXT: shrl %eax 134; AVX-NEXT: addl %ecx, %eax 135; AVX-NEXT: shrl $2, %eax 136; AVX-NEXT: vmovd %xmm2, %ecx 137; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 138; AVX-NEXT: shrq $32, %rdx 139; AVX-NEXT: subl %edx, %ecx 140; AVX-NEXT: shrl %ecx 141; AVX-NEXT: addl %edx, %ecx 142; AVX-NEXT: shrl $2, %ecx 143; AVX-NEXT: vmovd %ecx, %xmm3 144; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 145; AVX-NEXT: vpextrd $2, %xmm2, %eax 146; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 147; AVX-NEXT: shrq $32, %rcx 148; AVX-NEXT: subl %ecx, %eax 149; AVX-NEXT: shrl %eax 150; AVX-NEXT: addl %ecx, %eax 151; AVX-NEXT: shrl $2, %eax 152; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 153; AVX-NEXT: vpextrd $3, %xmm2, %eax 154; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 155; AVX-NEXT: shrq $32, %rcx 156; AVX-NEXT: subl %ecx, %eax 157; AVX-NEXT: shrl %eax 158; AVX-NEXT: addl %ecx, %eax 159; AVX-NEXT: shrl $2, %eax 160; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2 161; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 162; AVX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 163; AVX-NEXT: vpextrd $1, %xmm2, %eax 164; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 165; AVX-NEXT: shrq $32, %rcx 166; AVX-NEXT: subl %ecx, %eax 167; AVX-NEXT: shrl %eax 168; AVX-NEXT: addl %ecx, %eax 169; AVX-NEXT: shrl $2, %eax 170; AVX-NEXT: vmovd %xmm2, %ecx 171; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 172; AVX-NEXT: shrq $32, %rdx 173; AVX-NEXT: subl %edx, %ecx 174; AVX-NEXT: shrl %ecx 175; AVX-NEXT: addl %edx, %ecx 176; AVX-NEXT: shrl $2, %ecx 177; AVX-NEXT: vmovd %ecx, %xmm3 178; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 179; AVX-NEXT: vpextrd $2, %xmm2, %eax 180; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 181; AVX-NEXT: shrq $32, %rcx 182; AVX-NEXT: subl %ecx, %eax 183; AVX-NEXT: shrl %eax 184; AVX-NEXT: addl %ecx, %eax 185; AVX-NEXT: shrl $2, %eax 186; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 187; AVX-NEXT: vpextrd $3, %xmm2, %eax 188; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 189; AVX-NEXT: shrq $32, %rcx 190; AVX-NEXT: subl %ecx, %eax 191; AVX-NEXT: shrl %eax 192; AVX-NEXT: addl %ecx, %eax 193; AVX-NEXT: shrl $2, %eax 194; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2 195; AVX-NEXT: vpextrd $1, %xmm0, %eax 196; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 197; AVX-NEXT: shrq $32, %rcx 198; AVX-NEXT: subl %ecx, %eax 199; AVX-NEXT: shrl %eax 200; AVX-NEXT: addl %ecx, %eax 201; AVX-NEXT: shrl $2, %eax 202; AVX-NEXT: vmovd %xmm0, %ecx 203; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 204; AVX-NEXT: shrq $32, %rdx 205; AVX-NEXT: subl %edx, %ecx 206; AVX-NEXT: shrl %ecx 207; AVX-NEXT: addl %edx, %ecx 208; AVX-NEXT: shrl $2, %ecx 209; AVX-NEXT: vmovd %ecx, %xmm3 210; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 211; AVX-NEXT: vpextrd $2, %xmm0, %eax 212; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 213; AVX-NEXT: shrq $32, %rcx 214; AVX-NEXT: subl %ecx, %eax 215; AVX-NEXT: shrl %eax 216; AVX-NEXT: addl %ecx, %eax 217; AVX-NEXT: shrl $2, %eax 218; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 219; AVX-NEXT: vpextrd $3, %xmm0, %eax 220; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 221; AVX-NEXT: shrq $32, %rcx 222; AVX-NEXT: subl %ecx, %eax 223; AVX-NEXT: shrl %eax 224; AVX-NEXT: addl %ecx, %eax 225; AVX-NEXT: shrl $2, %eax 226; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0 227; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 228; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 229; AVX-NEXT: retq 230 %res = udiv <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 231 ret <16 x i32> %res 232} 233 234define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind { 235; AVX512F-LABEL: test_div7_32i16: 236; AVX512F: # BB#0: 237; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363] 238; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3 239; AVX512F-NEXT: vpsubw %ymm3, %ymm0, %ymm0 240; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0 241; AVX512F-NEXT: vpaddw %ymm3, %ymm0, %ymm0 242; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0 243; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2 244; AVX512F-NEXT: vpsubw %ymm2, %ymm1, %ymm1 245; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 246; AVX512F-NEXT: vpaddw %ymm2, %ymm1, %ymm1 247; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1 248; AVX512F-NEXT: retq 249; 250; AVX512BW-LABEL: test_div7_32i16: 251; AVX512BW: # BB#0: 252; AVX512BW-NEXT: vpmulhuw {{.*}}(%rip), %zmm0, %zmm1 253; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0 254; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm0 255; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 256; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm0 257; AVX512BW-NEXT: retq 258 %res = udiv <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 259 ret <32 x i16> %res 260} 261 262define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { 263; AVX512F-LABEL: test_div7_64i8: 264; AVX512F: # BB#0: 265; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] 266; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3 267; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero 268; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 269; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero 270; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4 271; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 272; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 273; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 274; AVX512F-NEXT: vpmullw %ymm2, %ymm5, %ymm5 275; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 276; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm5[2,3],ymm4[2,3] 277; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 278; AVX512F-NEXT: vpackuswb %ymm6, %ymm4, %ymm4 279; AVX512F-NEXT: vpsubb %ymm4, %ymm0, %ymm0 280; AVX512F-NEXT: vpsrlw $1, %ymm0, %ymm0 281; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 282; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 283; AVX512F-NEXT: vpaddb %ymm4, %ymm0, %ymm0 284; AVX512F-NEXT: vpsrlw $2, %ymm0, %ymm0 285; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 286; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0 287; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm6 288; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm6 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero 289; AVX512F-NEXT: vpmullw %ymm3, %ymm6, %ymm3 290; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 291; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm6 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 292; AVX512F-NEXT: vpmullw %ymm2, %ymm6, %ymm2 293; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 294; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm2[2,3],ymm3[2,3] 295; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 296; AVX512F-NEXT: vpackuswb %ymm6, %ymm2, %ymm2 297; AVX512F-NEXT: vpsubb %ymm2, %ymm1, %ymm1 298; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 299; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1 300; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1 301; AVX512F-NEXT: vpsrlw $2, %ymm1, %ymm1 302; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1 303; AVX512F-NEXT: retq 304; 305; AVX512BW-LABEL: test_div7_64i8: 306; AVX512BW: # BB#0: 307; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 308; AVX512BW-NEXT: vpextrb $1, %xmm1, %eax 309; AVX512BW-NEXT: imull $37, %eax, %ecx 310; AVX512BW-NEXT: shrl $8, %ecx 311; AVX512BW-NEXT: subb %cl, %al 312; AVX512BW-NEXT: shrb %al 313; AVX512BW-NEXT: addb %cl, %al 314; AVX512BW-NEXT: shrb $2, %al 315; AVX512BW-NEXT: movzbl %al, %eax 316; AVX512BW-NEXT: vpextrb $0, %xmm1, %ecx 317; AVX512BW-NEXT: imull $37, %ecx, %edx 318; AVX512BW-NEXT: shrl $8, %edx 319; AVX512BW-NEXT: subb %dl, %cl 320; AVX512BW-NEXT: shrb %cl 321; AVX512BW-NEXT: addb %dl, %cl 322; AVX512BW-NEXT: shrb $2, %cl 323; AVX512BW-NEXT: movzbl %cl, %ecx 324; AVX512BW-NEXT: vmovd %ecx, %xmm2 325; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 326; AVX512BW-NEXT: vpextrb $2, %xmm1, %eax 327; AVX512BW-NEXT: imull $37, %eax, %ecx 328; AVX512BW-NEXT: shrl $8, %ecx 329; AVX512BW-NEXT: subb %cl, %al 330; AVX512BW-NEXT: shrb %al 331; AVX512BW-NEXT: addb %cl, %al 332; AVX512BW-NEXT: shrb $2, %al 333; AVX512BW-NEXT: movzbl %al, %eax 334; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 335; AVX512BW-NEXT: vpextrb $3, %xmm1, %eax 336; AVX512BW-NEXT: imull $37, %eax, %ecx 337; AVX512BW-NEXT: shrl $8, %ecx 338; AVX512BW-NEXT: subb %cl, %al 339; AVX512BW-NEXT: shrb %al 340; AVX512BW-NEXT: addb %cl, %al 341; AVX512BW-NEXT: shrb $2, %al 342; AVX512BW-NEXT: movzbl %al, %eax 343; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 344; AVX512BW-NEXT: vpextrb $4, %xmm1, %eax 345; AVX512BW-NEXT: imull $37, %eax, %ecx 346; AVX512BW-NEXT: shrl $8, %ecx 347; AVX512BW-NEXT: subb %cl, %al 348; AVX512BW-NEXT: shrb %al 349; AVX512BW-NEXT: addb %cl, %al 350; AVX512BW-NEXT: shrb $2, %al 351; AVX512BW-NEXT: movzbl %al, %eax 352; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 353; AVX512BW-NEXT: vpextrb $5, %xmm1, %eax 354; AVX512BW-NEXT: imull $37, %eax, %ecx 355; AVX512BW-NEXT: shrl $8, %ecx 356; AVX512BW-NEXT: subb %cl, %al 357; AVX512BW-NEXT: shrb %al 358; AVX512BW-NEXT: addb %cl, %al 359; AVX512BW-NEXT: shrb $2, %al 360; AVX512BW-NEXT: movzbl %al, %eax 361; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 362; AVX512BW-NEXT: vpextrb $6, %xmm1, %eax 363; AVX512BW-NEXT: imull $37, %eax, %ecx 364; AVX512BW-NEXT: shrl $8, %ecx 365; AVX512BW-NEXT: subb %cl, %al 366; AVX512BW-NEXT: shrb %al 367; AVX512BW-NEXT: addb %cl, %al 368; AVX512BW-NEXT: shrb $2, %al 369; AVX512BW-NEXT: movzbl %al, %eax 370; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 371; AVX512BW-NEXT: vpextrb $7, %xmm1, %eax 372; AVX512BW-NEXT: imull $37, %eax, %ecx 373; AVX512BW-NEXT: shrl $8, %ecx 374; AVX512BW-NEXT: subb %cl, %al 375; AVX512BW-NEXT: shrb %al 376; AVX512BW-NEXT: addb %cl, %al 377; AVX512BW-NEXT: shrb $2, %al 378; AVX512BW-NEXT: movzbl %al, %eax 379; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 380; AVX512BW-NEXT: vpextrb $8, %xmm1, %eax 381; AVX512BW-NEXT: imull $37, %eax, %ecx 382; AVX512BW-NEXT: shrl $8, %ecx 383; AVX512BW-NEXT: subb %cl, %al 384; AVX512BW-NEXT: shrb %al 385; AVX512BW-NEXT: addb %cl, %al 386; AVX512BW-NEXT: shrb $2, %al 387; AVX512BW-NEXT: movzbl %al, %eax 388; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 389; AVX512BW-NEXT: vpextrb $9, %xmm1, %eax 390; AVX512BW-NEXT: imull $37, %eax, %ecx 391; AVX512BW-NEXT: shrl $8, %ecx 392; AVX512BW-NEXT: subb %cl, %al 393; AVX512BW-NEXT: shrb %al 394; AVX512BW-NEXT: addb %cl, %al 395; AVX512BW-NEXT: shrb $2, %al 396; AVX512BW-NEXT: movzbl %al, %eax 397; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 398; AVX512BW-NEXT: vpextrb $10, %xmm1, %eax 399; AVX512BW-NEXT: imull $37, %eax, %ecx 400; AVX512BW-NEXT: shrl $8, %ecx 401; AVX512BW-NEXT: subb %cl, %al 402; AVX512BW-NEXT: shrb %al 403; AVX512BW-NEXT: addb %cl, %al 404; AVX512BW-NEXT: shrb $2, %al 405; AVX512BW-NEXT: movzbl %al, %eax 406; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 407; AVX512BW-NEXT: vpextrb $11, %xmm1, %eax 408; AVX512BW-NEXT: imull $37, %eax, %ecx 409; AVX512BW-NEXT: shrl $8, %ecx 410; AVX512BW-NEXT: subb %cl, %al 411; AVX512BW-NEXT: shrb %al 412; AVX512BW-NEXT: addb %cl, %al 413; AVX512BW-NEXT: shrb $2, %al 414; AVX512BW-NEXT: movzbl %al, %eax 415; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 416; AVX512BW-NEXT: vpextrb $12, %xmm1, %eax 417; AVX512BW-NEXT: imull $37, %eax, %ecx 418; AVX512BW-NEXT: shrl $8, %ecx 419; AVX512BW-NEXT: subb %cl, %al 420; AVX512BW-NEXT: shrb %al 421; AVX512BW-NEXT: addb %cl, %al 422; AVX512BW-NEXT: shrb $2, %al 423; AVX512BW-NEXT: movzbl %al, %eax 424; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 425; AVX512BW-NEXT: vpextrb $13, %xmm1, %eax 426; AVX512BW-NEXT: imull $37, %eax, %ecx 427; AVX512BW-NEXT: shrl $8, %ecx 428; AVX512BW-NEXT: subb %cl, %al 429; AVX512BW-NEXT: shrb %al 430; AVX512BW-NEXT: addb %cl, %al 431; AVX512BW-NEXT: shrb $2, %al 432; AVX512BW-NEXT: movzbl %al, %eax 433; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 434; AVX512BW-NEXT: vpextrb $14, %xmm1, %eax 435; AVX512BW-NEXT: imull $37, %eax, %ecx 436; AVX512BW-NEXT: shrl $8, %ecx 437; AVX512BW-NEXT: subb %cl, %al 438; AVX512BW-NEXT: shrb %al 439; AVX512BW-NEXT: addb %cl, %al 440; AVX512BW-NEXT: shrb $2, %al 441; AVX512BW-NEXT: movzbl %al, %eax 442; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 443; AVX512BW-NEXT: vpextrb $15, %xmm1, %eax 444; AVX512BW-NEXT: imull $37, %eax, %ecx 445; AVX512BW-NEXT: shrl $8, %ecx 446; AVX512BW-NEXT: subb %cl, %al 447; AVX512BW-NEXT: shrb %al 448; AVX512BW-NEXT: addb %cl, %al 449; AVX512BW-NEXT: shrb $2, %al 450; AVX512BW-NEXT: movzbl %al, %eax 451; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 452; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 453; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax 454; AVX512BW-NEXT: imull $37, %eax, %ecx 455; AVX512BW-NEXT: shrl $8, %ecx 456; AVX512BW-NEXT: subb %cl, %al 457; AVX512BW-NEXT: shrb %al 458; AVX512BW-NEXT: addb %cl, %al 459; AVX512BW-NEXT: shrb $2, %al 460; AVX512BW-NEXT: movzbl %al, %eax 461; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx 462; AVX512BW-NEXT: imull $37, %ecx, %edx 463; AVX512BW-NEXT: shrl $8, %edx 464; AVX512BW-NEXT: subb %dl, %cl 465; AVX512BW-NEXT: shrb %cl 466; AVX512BW-NEXT: addb %dl, %cl 467; AVX512BW-NEXT: shrb $2, %cl 468; AVX512BW-NEXT: movzbl %cl, %ecx 469; AVX512BW-NEXT: vmovd %ecx, %xmm3 470; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 471; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax 472; AVX512BW-NEXT: imull $37, %eax, %ecx 473; AVX512BW-NEXT: shrl $8, %ecx 474; AVX512BW-NEXT: subb %cl, %al 475; AVX512BW-NEXT: shrb %al 476; AVX512BW-NEXT: addb %cl, %al 477; AVX512BW-NEXT: shrb $2, %al 478; AVX512BW-NEXT: movzbl %al, %eax 479; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 480; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax 481; AVX512BW-NEXT: imull $37, %eax, %ecx 482; AVX512BW-NEXT: shrl $8, %ecx 483; AVX512BW-NEXT: subb %cl, %al 484; AVX512BW-NEXT: shrb %al 485; AVX512BW-NEXT: addb %cl, %al 486; AVX512BW-NEXT: shrb $2, %al 487; AVX512BW-NEXT: movzbl %al, %eax 488; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 489; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax 490; AVX512BW-NEXT: imull $37, %eax, %ecx 491; AVX512BW-NEXT: shrl $8, %ecx 492; AVX512BW-NEXT: subb %cl, %al 493; AVX512BW-NEXT: shrb %al 494; AVX512BW-NEXT: addb %cl, %al 495; AVX512BW-NEXT: shrb $2, %al 496; AVX512BW-NEXT: movzbl %al, %eax 497; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 498; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax 499; AVX512BW-NEXT: imull $37, %eax, %ecx 500; AVX512BW-NEXT: shrl $8, %ecx 501; AVX512BW-NEXT: subb %cl, %al 502; AVX512BW-NEXT: shrb %al 503; AVX512BW-NEXT: addb %cl, %al 504; AVX512BW-NEXT: shrb $2, %al 505; AVX512BW-NEXT: movzbl %al, %eax 506; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 507; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax 508; AVX512BW-NEXT: imull $37, %eax, %ecx 509; AVX512BW-NEXT: shrl $8, %ecx 510; AVX512BW-NEXT: subb %cl, %al 511; AVX512BW-NEXT: shrb %al 512; AVX512BW-NEXT: addb %cl, %al 513; AVX512BW-NEXT: shrb $2, %al 514; AVX512BW-NEXT: movzbl %al, %eax 515; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 516; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax 517; AVX512BW-NEXT: imull $37, %eax, %ecx 518; AVX512BW-NEXT: shrl $8, %ecx 519; AVX512BW-NEXT: subb %cl, %al 520; AVX512BW-NEXT: shrb %al 521; AVX512BW-NEXT: addb %cl, %al 522; AVX512BW-NEXT: shrb $2, %al 523; AVX512BW-NEXT: movzbl %al, %eax 524; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 525; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax 526; AVX512BW-NEXT: imull $37, %eax, %ecx 527; AVX512BW-NEXT: shrl $8, %ecx 528; AVX512BW-NEXT: subb %cl, %al 529; AVX512BW-NEXT: shrb %al 530; AVX512BW-NEXT: addb %cl, %al 531; AVX512BW-NEXT: shrb $2, %al 532; AVX512BW-NEXT: movzbl %al, %eax 533; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 534; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax 535; AVX512BW-NEXT: imull $37, %eax, %ecx 536; AVX512BW-NEXT: shrl $8, %ecx 537; AVX512BW-NEXT: subb %cl, %al 538; AVX512BW-NEXT: shrb %al 539; AVX512BW-NEXT: addb %cl, %al 540; AVX512BW-NEXT: shrb $2, %al 541; AVX512BW-NEXT: movzbl %al, %eax 542; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 543; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax 544; AVX512BW-NEXT: imull $37, %eax, %ecx 545; AVX512BW-NEXT: shrl $8, %ecx 546; AVX512BW-NEXT: subb %cl, %al 547; AVX512BW-NEXT: shrb %al 548; AVX512BW-NEXT: addb %cl, %al 549; AVX512BW-NEXT: shrb $2, %al 550; AVX512BW-NEXT: movzbl %al, %eax 551; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 552; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax 553; AVX512BW-NEXT: imull $37, %eax, %ecx 554; AVX512BW-NEXT: shrl $8, %ecx 555; AVX512BW-NEXT: subb %cl, %al 556; AVX512BW-NEXT: shrb %al 557; AVX512BW-NEXT: addb %cl, %al 558; AVX512BW-NEXT: shrb $2, %al 559; AVX512BW-NEXT: movzbl %al, %eax 560; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 561; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax 562; AVX512BW-NEXT: imull $37, %eax, %ecx 563; AVX512BW-NEXT: shrl $8, %ecx 564; AVX512BW-NEXT: subb %cl, %al 565; AVX512BW-NEXT: shrb %al 566; AVX512BW-NEXT: addb %cl, %al 567; AVX512BW-NEXT: shrb $2, %al 568; AVX512BW-NEXT: movzbl %al, %eax 569; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 570; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax 571; AVX512BW-NEXT: imull $37, %eax, %ecx 572; AVX512BW-NEXT: shrl $8, %ecx 573; AVX512BW-NEXT: subb %cl, %al 574; AVX512BW-NEXT: shrb %al 575; AVX512BW-NEXT: addb %cl, %al 576; AVX512BW-NEXT: shrb $2, %al 577; AVX512BW-NEXT: movzbl %al, %eax 578; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 579; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax 580; AVX512BW-NEXT: imull $37, %eax, %ecx 581; AVX512BW-NEXT: shrl $8, %ecx 582; AVX512BW-NEXT: subb %cl, %al 583; AVX512BW-NEXT: shrb %al 584; AVX512BW-NEXT: addb %cl, %al 585; AVX512BW-NEXT: shrb $2, %al 586; AVX512BW-NEXT: movzbl %al, %eax 587; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 588; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax 589; AVX512BW-NEXT: imull $37, %eax, %ecx 590; AVX512BW-NEXT: shrl $8, %ecx 591; AVX512BW-NEXT: subb %cl, %al 592; AVX512BW-NEXT: shrb %al 593; AVX512BW-NEXT: addb %cl, %al 594; AVX512BW-NEXT: shrb $2, %al 595; AVX512BW-NEXT: movzbl %al, %eax 596; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 597; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 598; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 599; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax 600; AVX512BW-NEXT: imull $37, %eax, %ecx 601; AVX512BW-NEXT: shrl $8, %ecx 602; AVX512BW-NEXT: subb %cl, %al 603; AVX512BW-NEXT: shrb %al 604; AVX512BW-NEXT: addb %cl, %al 605; AVX512BW-NEXT: shrb $2, %al 606; AVX512BW-NEXT: movzbl %al, %eax 607; AVX512BW-NEXT: vpextrb $0, %xmm2, %ecx 608; AVX512BW-NEXT: imull $37, %ecx, %edx 609; AVX512BW-NEXT: shrl $8, %edx 610; AVX512BW-NEXT: subb %dl, %cl 611; AVX512BW-NEXT: shrb %cl 612; AVX512BW-NEXT: addb %dl, %cl 613; AVX512BW-NEXT: shrb $2, %cl 614; AVX512BW-NEXT: movzbl %cl, %ecx 615; AVX512BW-NEXT: vmovd %ecx, %xmm3 616; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 617; AVX512BW-NEXT: vpextrb $2, %xmm2, %eax 618; AVX512BW-NEXT: imull $37, %eax, %ecx 619; AVX512BW-NEXT: shrl $8, %ecx 620; AVX512BW-NEXT: subb %cl, %al 621; AVX512BW-NEXT: shrb %al 622; AVX512BW-NEXT: addb %cl, %al 623; AVX512BW-NEXT: shrb $2, %al 624; AVX512BW-NEXT: movzbl %al, %eax 625; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 626; AVX512BW-NEXT: vpextrb $3, %xmm2, %eax 627; AVX512BW-NEXT: imull $37, %eax, %ecx 628; AVX512BW-NEXT: shrl $8, %ecx 629; AVX512BW-NEXT: subb %cl, %al 630; AVX512BW-NEXT: shrb %al 631; AVX512BW-NEXT: addb %cl, %al 632; AVX512BW-NEXT: shrb $2, %al 633; AVX512BW-NEXT: movzbl %al, %eax 634; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 635; AVX512BW-NEXT: vpextrb $4, %xmm2, %eax 636; AVX512BW-NEXT: imull $37, %eax, %ecx 637; AVX512BW-NEXT: shrl $8, %ecx 638; AVX512BW-NEXT: subb %cl, %al 639; AVX512BW-NEXT: shrb %al 640; AVX512BW-NEXT: addb %cl, %al 641; AVX512BW-NEXT: shrb $2, %al 642; AVX512BW-NEXT: movzbl %al, %eax 643; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 644; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax 645; AVX512BW-NEXT: imull $37, %eax, %ecx 646; AVX512BW-NEXT: shrl $8, %ecx 647; AVX512BW-NEXT: subb %cl, %al 648; AVX512BW-NEXT: shrb %al 649; AVX512BW-NEXT: addb %cl, %al 650; AVX512BW-NEXT: shrb $2, %al 651; AVX512BW-NEXT: movzbl %al, %eax 652; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 653; AVX512BW-NEXT: vpextrb $6, %xmm2, %eax 654; AVX512BW-NEXT: imull $37, %eax, %ecx 655; AVX512BW-NEXT: shrl $8, %ecx 656; AVX512BW-NEXT: subb %cl, %al 657; AVX512BW-NEXT: shrb %al 658; AVX512BW-NEXT: addb %cl, %al 659; AVX512BW-NEXT: shrb $2, %al 660; AVX512BW-NEXT: movzbl %al, %eax 661; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 662; AVX512BW-NEXT: vpextrb $7, %xmm2, %eax 663; AVX512BW-NEXT: imull $37, %eax, %ecx 664; AVX512BW-NEXT: shrl $8, %ecx 665; AVX512BW-NEXT: subb %cl, %al 666; AVX512BW-NEXT: shrb %al 667; AVX512BW-NEXT: addb %cl, %al 668; AVX512BW-NEXT: shrb $2, %al 669; AVX512BW-NEXT: movzbl %al, %eax 670; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 671; AVX512BW-NEXT: vpextrb $8, %xmm2, %eax 672; AVX512BW-NEXT: imull $37, %eax, %ecx 673; AVX512BW-NEXT: shrl $8, %ecx 674; AVX512BW-NEXT: subb %cl, %al 675; AVX512BW-NEXT: shrb %al 676; AVX512BW-NEXT: addb %cl, %al 677; AVX512BW-NEXT: shrb $2, %al 678; AVX512BW-NEXT: movzbl %al, %eax 679; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 680; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax 681; AVX512BW-NEXT: imull $37, %eax, %ecx 682; AVX512BW-NEXT: shrl $8, %ecx 683; AVX512BW-NEXT: subb %cl, %al 684; AVX512BW-NEXT: shrb %al 685; AVX512BW-NEXT: addb %cl, %al 686; AVX512BW-NEXT: shrb $2, %al 687; AVX512BW-NEXT: movzbl %al, %eax 688; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 689; AVX512BW-NEXT: vpextrb $10, %xmm2, %eax 690; AVX512BW-NEXT: imull $37, %eax, %ecx 691; AVX512BW-NEXT: shrl $8, %ecx 692; AVX512BW-NEXT: subb %cl, %al 693; AVX512BW-NEXT: shrb %al 694; AVX512BW-NEXT: addb %cl, %al 695; AVX512BW-NEXT: shrb $2, %al 696; AVX512BW-NEXT: movzbl %al, %eax 697; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 698; AVX512BW-NEXT: vpextrb $11, %xmm2, %eax 699; AVX512BW-NEXT: imull $37, %eax, %ecx 700; AVX512BW-NEXT: shrl $8, %ecx 701; AVX512BW-NEXT: subb %cl, %al 702; AVX512BW-NEXT: shrb %al 703; AVX512BW-NEXT: addb %cl, %al 704; AVX512BW-NEXT: shrb $2, %al 705; AVX512BW-NEXT: movzbl %al, %eax 706; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 707; AVX512BW-NEXT: vpextrb $12, %xmm2, %eax 708; AVX512BW-NEXT: imull $37, %eax, %ecx 709; AVX512BW-NEXT: shrl $8, %ecx 710; AVX512BW-NEXT: subb %cl, %al 711; AVX512BW-NEXT: shrb %al 712; AVX512BW-NEXT: addb %cl, %al 713; AVX512BW-NEXT: shrb $2, %al 714; AVX512BW-NEXT: movzbl %al, %eax 715; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 716; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax 717; AVX512BW-NEXT: imull $37, %eax, %ecx 718; AVX512BW-NEXT: shrl $8, %ecx 719; AVX512BW-NEXT: subb %cl, %al 720; AVX512BW-NEXT: shrb %al 721; AVX512BW-NEXT: addb %cl, %al 722; AVX512BW-NEXT: shrb $2, %al 723; AVX512BW-NEXT: movzbl %al, %eax 724; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 725; AVX512BW-NEXT: vpextrb $14, %xmm2, %eax 726; AVX512BW-NEXT: imull $37, %eax, %ecx 727; AVX512BW-NEXT: shrl $8, %ecx 728; AVX512BW-NEXT: subb %cl, %al 729; AVX512BW-NEXT: shrb %al 730; AVX512BW-NEXT: addb %cl, %al 731; AVX512BW-NEXT: shrb $2, %al 732; AVX512BW-NEXT: movzbl %al, %eax 733; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 734; AVX512BW-NEXT: vpextrb $15, %xmm2, %eax 735; AVX512BW-NEXT: imull $37, %eax, %ecx 736; AVX512BW-NEXT: shrl $8, %ecx 737; AVX512BW-NEXT: subb %cl, %al 738; AVX512BW-NEXT: shrb %al 739; AVX512BW-NEXT: addb %cl, %al 740; AVX512BW-NEXT: shrb $2, %al 741; AVX512BW-NEXT: movzbl %al, %eax 742; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 743; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax 744; AVX512BW-NEXT: imull $37, %eax, %ecx 745; AVX512BW-NEXT: shrl $8, %ecx 746; AVX512BW-NEXT: subb %cl, %al 747; AVX512BW-NEXT: shrb %al 748; AVX512BW-NEXT: addb %cl, %al 749; AVX512BW-NEXT: shrb $2, %al 750; AVX512BW-NEXT: movzbl %al, %eax 751; AVX512BW-NEXT: vpextrb $0, %xmm0, %ecx 752; AVX512BW-NEXT: imull $37, %ecx, %edx 753; AVX512BW-NEXT: shrl $8, %edx 754; AVX512BW-NEXT: subb %dl, %cl 755; AVX512BW-NEXT: shrb %cl 756; AVX512BW-NEXT: addb %dl, %cl 757; AVX512BW-NEXT: shrb $2, %cl 758; AVX512BW-NEXT: movzbl %cl, %ecx 759; AVX512BW-NEXT: vmovd %ecx, %xmm3 760; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 761; AVX512BW-NEXT: vpextrb $2, %xmm0, %eax 762; AVX512BW-NEXT: imull $37, %eax, %ecx 763; AVX512BW-NEXT: shrl $8, %ecx 764; AVX512BW-NEXT: subb %cl, %al 765; AVX512BW-NEXT: shrb %al 766; AVX512BW-NEXT: addb %cl, %al 767; AVX512BW-NEXT: shrb $2, %al 768; AVX512BW-NEXT: movzbl %al, %eax 769; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 770; AVX512BW-NEXT: vpextrb $3, %xmm0, %eax 771; AVX512BW-NEXT: imull $37, %eax, %ecx 772; AVX512BW-NEXT: shrl $8, %ecx 773; AVX512BW-NEXT: subb %cl, %al 774; AVX512BW-NEXT: shrb %al 775; AVX512BW-NEXT: addb %cl, %al 776; AVX512BW-NEXT: shrb $2, %al 777; AVX512BW-NEXT: movzbl %al, %eax 778; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 779; AVX512BW-NEXT: vpextrb $4, %xmm0, %eax 780; AVX512BW-NEXT: imull $37, %eax, %ecx 781; AVX512BW-NEXT: shrl $8, %ecx 782; AVX512BW-NEXT: subb %cl, %al 783; AVX512BW-NEXT: shrb %al 784; AVX512BW-NEXT: addb %cl, %al 785; AVX512BW-NEXT: shrb $2, %al 786; AVX512BW-NEXT: movzbl %al, %eax 787; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 788; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax 789; AVX512BW-NEXT: imull $37, %eax, %ecx 790; AVX512BW-NEXT: shrl $8, %ecx 791; AVX512BW-NEXT: subb %cl, %al 792; AVX512BW-NEXT: shrb %al 793; AVX512BW-NEXT: addb %cl, %al 794; AVX512BW-NEXT: shrb $2, %al 795; AVX512BW-NEXT: movzbl %al, %eax 796; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 797; AVX512BW-NEXT: vpextrb $6, %xmm0, %eax 798; AVX512BW-NEXT: imull $37, %eax, %ecx 799; AVX512BW-NEXT: shrl $8, %ecx 800; AVX512BW-NEXT: subb %cl, %al 801; AVX512BW-NEXT: shrb %al 802; AVX512BW-NEXT: addb %cl, %al 803; AVX512BW-NEXT: shrb $2, %al 804; AVX512BW-NEXT: movzbl %al, %eax 805; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 806; AVX512BW-NEXT: vpextrb $7, %xmm0, %eax 807; AVX512BW-NEXT: imull $37, %eax, %ecx 808; AVX512BW-NEXT: shrl $8, %ecx 809; AVX512BW-NEXT: subb %cl, %al 810; AVX512BW-NEXT: shrb %al 811; AVX512BW-NEXT: addb %cl, %al 812; AVX512BW-NEXT: shrb $2, %al 813; AVX512BW-NEXT: movzbl %al, %eax 814; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 815; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax 816; AVX512BW-NEXT: imull $37, %eax, %ecx 817; AVX512BW-NEXT: shrl $8, %ecx 818; AVX512BW-NEXT: subb %cl, %al 819; AVX512BW-NEXT: shrb %al 820; AVX512BW-NEXT: addb %cl, %al 821; AVX512BW-NEXT: shrb $2, %al 822; AVX512BW-NEXT: movzbl %al, %eax 823; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 824; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax 825; AVX512BW-NEXT: imull $37, %eax, %ecx 826; AVX512BW-NEXT: shrl $8, %ecx 827; AVX512BW-NEXT: subb %cl, %al 828; AVX512BW-NEXT: shrb %al 829; AVX512BW-NEXT: addb %cl, %al 830; AVX512BW-NEXT: shrb $2, %al 831; AVX512BW-NEXT: movzbl %al, %eax 832; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 833; AVX512BW-NEXT: vpextrb $10, %xmm0, %eax 834; AVX512BW-NEXT: imull $37, %eax, %ecx 835; AVX512BW-NEXT: shrl $8, %ecx 836; AVX512BW-NEXT: subb %cl, %al 837; AVX512BW-NEXT: shrb %al 838; AVX512BW-NEXT: addb %cl, %al 839; AVX512BW-NEXT: shrb $2, %al 840; AVX512BW-NEXT: movzbl %al, %eax 841; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 842; AVX512BW-NEXT: vpextrb $11, %xmm0, %eax 843; AVX512BW-NEXT: imull $37, %eax, %ecx 844; AVX512BW-NEXT: shrl $8, %ecx 845; AVX512BW-NEXT: subb %cl, %al 846; AVX512BW-NEXT: shrb %al 847; AVX512BW-NEXT: addb %cl, %al 848; AVX512BW-NEXT: shrb $2, %al 849; AVX512BW-NEXT: movzbl %al, %eax 850; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 851; AVX512BW-NEXT: vpextrb $12, %xmm0, %eax 852; AVX512BW-NEXT: imull $37, %eax, %ecx 853; AVX512BW-NEXT: shrl $8, %ecx 854; AVX512BW-NEXT: subb %cl, %al 855; AVX512BW-NEXT: shrb %al 856; AVX512BW-NEXT: addb %cl, %al 857; AVX512BW-NEXT: shrb $2, %al 858; AVX512BW-NEXT: movzbl %al, %eax 859; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 860; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax 861; AVX512BW-NEXT: imull $37, %eax, %ecx 862; AVX512BW-NEXT: shrl $8, %ecx 863; AVX512BW-NEXT: subb %cl, %al 864; AVX512BW-NEXT: shrb %al 865; AVX512BW-NEXT: addb %cl, %al 866; AVX512BW-NEXT: shrb $2, %al 867; AVX512BW-NEXT: movzbl %al, %eax 868; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 869; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax 870; AVX512BW-NEXT: imull $37, %eax, %ecx 871; AVX512BW-NEXT: shrl $8, %ecx 872; AVX512BW-NEXT: subb %cl, %al 873; AVX512BW-NEXT: shrb %al 874; AVX512BW-NEXT: addb %cl, %al 875; AVX512BW-NEXT: shrb $2, %al 876; AVX512BW-NEXT: movzbl %al, %eax 877; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 878; AVX512BW-NEXT: vpextrb $15, %xmm0, %eax 879; AVX512BW-NEXT: imull $37, %eax, %ecx 880; AVX512BW-NEXT: shrl $8, %ecx 881; AVX512BW-NEXT: subb %cl, %al 882; AVX512BW-NEXT: shrb %al 883; AVX512BW-NEXT: addb %cl, %al 884; AVX512BW-NEXT: shrb $2, %al 885; AVX512BW-NEXT: movzbl %al, %eax 886; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 887; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 888; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 889; AVX512BW-NEXT: retq 890 %res = udiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7> 891 ret <64 x i8> %res 892} 893 894; 895; urem by 7 896; 897 898define <8 x i64> @test_rem7_8i64(<8 x i64> %a) nounwind { 899; AVX-LABEL: test_rem7_8i64: 900; AVX: # BB#0: 901; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 902; AVX-NEXT: vpextrq $1, %xmm1, %rcx 903; AVX-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493 904; AVX-NEXT: movq %rcx, %rax 905; AVX-NEXT: mulq %rsi 906; AVX-NEXT: movq %rcx, %rax 907; AVX-NEXT: subq %rdx, %rax 908; AVX-NEXT: shrq %rax 909; AVX-NEXT: addq %rdx, %rax 910; AVX-NEXT: shrq $2, %rax 911; AVX-NEXT: leaq (,%rax,8), %rdx 912; AVX-NEXT: subq %rax, %rdx 913; AVX-NEXT: subq %rdx, %rcx 914; AVX-NEXT: vmovq %rcx, %xmm2 915; AVX-NEXT: vmovq %xmm1, %rcx 916; AVX-NEXT: movq %rcx, %rax 917; AVX-NEXT: mulq %rsi 918; AVX-NEXT: movq %rcx, %rax 919; AVX-NEXT: subq %rdx, %rax 920; AVX-NEXT: shrq %rax 921; AVX-NEXT: addq %rdx, %rax 922; AVX-NEXT: shrq $2, %rax 923; AVX-NEXT: leaq (,%rax,8), %rdx 924; AVX-NEXT: subq %rax, %rdx 925; AVX-NEXT: subq %rdx, %rcx 926; AVX-NEXT: vmovq %rcx, %xmm1 927; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 928; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 929; AVX-NEXT: vpextrq $1, %xmm2, %rcx 930; AVX-NEXT: movq %rcx, %rax 931; AVX-NEXT: mulq %rsi 932; AVX-NEXT: movq %rcx, %rax 933; AVX-NEXT: subq %rdx, %rax 934; AVX-NEXT: shrq %rax 935; AVX-NEXT: addq %rdx, %rax 936; AVX-NEXT: shrq $2, %rax 937; AVX-NEXT: leaq (,%rax,8), %rdx 938; AVX-NEXT: subq %rax, %rdx 939; AVX-NEXT: subq %rdx, %rcx 940; AVX-NEXT: vmovq %rcx, %xmm3 941; AVX-NEXT: vmovq %xmm2, %rcx 942; AVX-NEXT: movq %rcx, %rax 943; AVX-NEXT: mulq %rsi 944; AVX-NEXT: movq %rcx, %rax 945; AVX-NEXT: subq %rdx, %rax 946; AVX-NEXT: shrq %rax 947; AVX-NEXT: addq %rdx, %rax 948; AVX-NEXT: shrq $2, %rax 949; AVX-NEXT: leaq (,%rax,8), %rdx 950; AVX-NEXT: subq %rax, %rdx 951; AVX-NEXT: subq %rdx, %rcx 952; AVX-NEXT: vmovq %rcx, %xmm2 953; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 954; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 955; AVX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 956; AVX-NEXT: vpextrq $1, %xmm2, %rcx 957; AVX-NEXT: movq %rcx, %rax 958; AVX-NEXT: mulq %rsi 959; AVX-NEXT: movq %rcx, %rax 960; AVX-NEXT: subq %rdx, %rax 961; AVX-NEXT: shrq %rax 962; AVX-NEXT: addq %rdx, %rax 963; AVX-NEXT: shrq $2, %rax 964; AVX-NEXT: leaq (,%rax,8), %rdx 965; AVX-NEXT: subq %rax, %rdx 966; AVX-NEXT: subq %rdx, %rcx 967; AVX-NEXT: vmovq %rcx, %xmm3 968; AVX-NEXT: vmovq %xmm2, %rcx 969; AVX-NEXT: movq %rcx, %rax 970; AVX-NEXT: mulq %rsi 971; AVX-NEXT: movq %rcx, %rax 972; AVX-NEXT: subq %rdx, %rax 973; AVX-NEXT: shrq %rax 974; AVX-NEXT: addq %rdx, %rax 975; AVX-NEXT: shrq $2, %rax 976; AVX-NEXT: leaq (,%rax,8), %rdx 977; AVX-NEXT: subq %rax, %rdx 978; AVX-NEXT: subq %rdx, %rcx 979; AVX-NEXT: vmovq %rcx, %xmm2 980; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 981; AVX-NEXT: vpextrq $1, %xmm0, %rcx 982; AVX-NEXT: movq %rcx, %rax 983; AVX-NEXT: mulq %rsi 984; AVX-NEXT: movq %rcx, %rax 985; AVX-NEXT: subq %rdx, %rax 986; AVX-NEXT: shrq %rax 987; AVX-NEXT: addq %rdx, %rax 988; AVX-NEXT: shrq $2, %rax 989; AVX-NEXT: leaq (,%rax,8), %rdx 990; AVX-NEXT: subq %rax, %rdx 991; AVX-NEXT: subq %rdx, %rcx 992; AVX-NEXT: vmovq %rcx, %xmm3 993; AVX-NEXT: vmovq %xmm0, %rcx 994; AVX-NEXT: movq %rcx, %rax 995; AVX-NEXT: mulq %rsi 996; AVX-NEXT: movq %rcx, %rax 997; AVX-NEXT: subq %rdx, %rax 998; AVX-NEXT: shrq %rax 999; AVX-NEXT: addq %rdx, %rax 1000; AVX-NEXT: shrq $2, %rax 1001; AVX-NEXT: leaq (,%rax,8), %rdx 1002; AVX-NEXT: subq %rax, %rdx 1003; AVX-NEXT: subq %rdx, %rcx 1004; AVX-NEXT: vmovq %rcx, %xmm0 1005; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 1006; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1007; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1008; AVX-NEXT: retq 1009 %res = urem <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7> 1010 ret <8 x i64> %res 1011} 1012 1013define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind { 1014; AVX-LABEL: test_rem7_16i32: 1015; AVX: # BB#0: 1016; AVX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 1017; AVX-NEXT: vpextrd $1, %xmm1, %eax 1018; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1019; AVX-NEXT: shrq $32, %rcx 1020; AVX-NEXT: movl %eax, %edx 1021; AVX-NEXT: subl %ecx, %edx 1022; AVX-NEXT: shrl %edx 1023; AVX-NEXT: addl %ecx, %edx 1024; AVX-NEXT: shrl $2, %edx 1025; AVX-NEXT: leal (,%rdx,8), %ecx 1026; AVX-NEXT: subl %edx, %ecx 1027; AVX-NEXT: subl %ecx, %eax 1028; AVX-NEXT: vmovd %xmm1, %ecx 1029; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 1030; AVX-NEXT: shrq $32, %rdx 1031; AVX-NEXT: movl %ecx, %esi 1032; AVX-NEXT: subl %edx, %esi 1033; AVX-NEXT: shrl %esi 1034; AVX-NEXT: addl %edx, %esi 1035; AVX-NEXT: shrl $2, %esi 1036; AVX-NEXT: leal (,%rsi,8), %edx 1037; AVX-NEXT: subl %esi, %edx 1038; AVX-NEXT: subl %edx, %ecx 1039; AVX-NEXT: vmovd %ecx, %xmm2 1040; AVX-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2 1041; AVX-NEXT: vpextrd $2, %xmm1, %eax 1042; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1043; AVX-NEXT: shrq $32, %rcx 1044; AVX-NEXT: movl %eax, %edx 1045; AVX-NEXT: subl %ecx, %edx 1046; AVX-NEXT: shrl %edx 1047; AVX-NEXT: addl %ecx, %edx 1048; AVX-NEXT: shrl $2, %edx 1049; AVX-NEXT: leal (,%rdx,8), %ecx 1050; AVX-NEXT: subl %edx, %ecx 1051; AVX-NEXT: subl %ecx, %eax 1052; AVX-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 1053; AVX-NEXT: vpextrd $3, %xmm1, %eax 1054; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1055; AVX-NEXT: shrq $32, %rcx 1056; AVX-NEXT: movl %eax, %edx 1057; AVX-NEXT: subl %ecx, %edx 1058; AVX-NEXT: shrl %edx 1059; AVX-NEXT: addl %ecx, %edx 1060; AVX-NEXT: shrl $2, %edx 1061; AVX-NEXT: leal (,%rdx,8), %ecx 1062; AVX-NEXT: subl %edx, %ecx 1063; AVX-NEXT: subl %ecx, %eax 1064; AVX-NEXT: vpinsrd $3, %eax, %xmm2, %xmm1 1065; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1066; AVX-NEXT: vpextrd $1, %xmm2, %eax 1067; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1068; AVX-NEXT: shrq $32, %rcx 1069; AVX-NEXT: movl %eax, %edx 1070; AVX-NEXT: subl %ecx, %edx 1071; AVX-NEXT: shrl %edx 1072; AVX-NEXT: addl %ecx, %edx 1073; AVX-NEXT: shrl $2, %edx 1074; AVX-NEXT: leal (,%rdx,8), %ecx 1075; AVX-NEXT: subl %edx, %ecx 1076; AVX-NEXT: subl %ecx, %eax 1077; AVX-NEXT: vmovd %xmm2, %ecx 1078; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 1079; AVX-NEXT: shrq $32, %rdx 1080; AVX-NEXT: movl %ecx, %esi 1081; AVX-NEXT: subl %edx, %esi 1082; AVX-NEXT: shrl %esi 1083; AVX-NEXT: addl %edx, %esi 1084; AVX-NEXT: shrl $2, %esi 1085; AVX-NEXT: leal (,%rsi,8), %edx 1086; AVX-NEXT: subl %esi, %edx 1087; AVX-NEXT: subl %edx, %ecx 1088; AVX-NEXT: vmovd %ecx, %xmm3 1089; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 1090; AVX-NEXT: vpextrd $2, %xmm2, %eax 1091; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1092; AVX-NEXT: shrq $32, %rcx 1093; AVX-NEXT: movl %eax, %edx 1094; AVX-NEXT: subl %ecx, %edx 1095; AVX-NEXT: shrl %edx 1096; AVX-NEXT: addl %ecx, %edx 1097; AVX-NEXT: shrl $2, %edx 1098; AVX-NEXT: leal (,%rdx,8), %ecx 1099; AVX-NEXT: subl %edx, %ecx 1100; AVX-NEXT: subl %ecx, %eax 1101; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 1102; AVX-NEXT: vpextrd $3, %xmm2, %eax 1103; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1104; AVX-NEXT: shrq $32, %rcx 1105; AVX-NEXT: movl %eax, %edx 1106; AVX-NEXT: subl %ecx, %edx 1107; AVX-NEXT: shrl %edx 1108; AVX-NEXT: addl %ecx, %edx 1109; AVX-NEXT: shrl $2, %edx 1110; AVX-NEXT: leal (,%rdx,8), %ecx 1111; AVX-NEXT: subl %edx, %ecx 1112; AVX-NEXT: subl %ecx, %eax 1113; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2 1114; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 1115; AVX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 1116; AVX-NEXT: vpextrd $1, %xmm2, %eax 1117; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1118; AVX-NEXT: shrq $32, %rcx 1119; AVX-NEXT: movl %eax, %edx 1120; AVX-NEXT: subl %ecx, %edx 1121; AVX-NEXT: shrl %edx 1122; AVX-NEXT: addl %ecx, %edx 1123; AVX-NEXT: shrl $2, %edx 1124; AVX-NEXT: leal (,%rdx,8), %ecx 1125; AVX-NEXT: subl %edx, %ecx 1126; AVX-NEXT: subl %ecx, %eax 1127; AVX-NEXT: vmovd %xmm2, %ecx 1128; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 1129; AVX-NEXT: shrq $32, %rdx 1130; AVX-NEXT: movl %ecx, %esi 1131; AVX-NEXT: subl %edx, %esi 1132; AVX-NEXT: shrl %esi 1133; AVX-NEXT: addl %edx, %esi 1134; AVX-NEXT: shrl $2, %esi 1135; AVX-NEXT: leal (,%rsi,8), %edx 1136; AVX-NEXT: subl %esi, %edx 1137; AVX-NEXT: subl %edx, %ecx 1138; AVX-NEXT: vmovd %ecx, %xmm3 1139; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 1140; AVX-NEXT: vpextrd $2, %xmm2, %eax 1141; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1142; AVX-NEXT: shrq $32, %rcx 1143; AVX-NEXT: movl %eax, %edx 1144; AVX-NEXT: subl %ecx, %edx 1145; AVX-NEXT: shrl %edx 1146; AVX-NEXT: addl %ecx, %edx 1147; AVX-NEXT: shrl $2, %edx 1148; AVX-NEXT: leal (,%rdx,8), %ecx 1149; AVX-NEXT: subl %edx, %ecx 1150; AVX-NEXT: subl %ecx, %eax 1151; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 1152; AVX-NEXT: vpextrd $3, %xmm2, %eax 1153; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1154; AVX-NEXT: shrq $32, %rcx 1155; AVX-NEXT: movl %eax, %edx 1156; AVX-NEXT: subl %ecx, %edx 1157; AVX-NEXT: shrl %edx 1158; AVX-NEXT: addl %ecx, %edx 1159; AVX-NEXT: shrl $2, %edx 1160; AVX-NEXT: leal (,%rdx,8), %ecx 1161; AVX-NEXT: subl %edx, %ecx 1162; AVX-NEXT: subl %ecx, %eax 1163; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm2 1164; AVX-NEXT: vpextrd $1, %xmm0, %eax 1165; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1166; AVX-NEXT: shrq $32, %rcx 1167; AVX-NEXT: movl %eax, %edx 1168; AVX-NEXT: subl %ecx, %edx 1169; AVX-NEXT: shrl %edx 1170; AVX-NEXT: addl %ecx, %edx 1171; AVX-NEXT: shrl $2, %edx 1172; AVX-NEXT: leal (,%rdx,8), %ecx 1173; AVX-NEXT: subl %edx, %ecx 1174; AVX-NEXT: subl %ecx, %eax 1175; AVX-NEXT: vmovd %xmm0, %ecx 1176; AVX-NEXT: imulq $613566757, %rcx, %rdx # imm = 0x24924925 1177; AVX-NEXT: shrq $32, %rdx 1178; AVX-NEXT: movl %ecx, %esi 1179; AVX-NEXT: subl %edx, %esi 1180; AVX-NEXT: shrl %esi 1181; AVX-NEXT: addl %edx, %esi 1182; AVX-NEXT: shrl $2, %esi 1183; AVX-NEXT: leal (,%rsi,8), %edx 1184; AVX-NEXT: subl %esi, %edx 1185; AVX-NEXT: subl %edx, %ecx 1186; AVX-NEXT: vmovd %ecx, %xmm3 1187; AVX-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 1188; AVX-NEXT: vpextrd $2, %xmm0, %eax 1189; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1190; AVX-NEXT: shrq $32, %rcx 1191; AVX-NEXT: movl %eax, %edx 1192; AVX-NEXT: subl %ecx, %edx 1193; AVX-NEXT: shrl %edx 1194; AVX-NEXT: addl %ecx, %edx 1195; AVX-NEXT: shrl $2, %edx 1196; AVX-NEXT: leal (,%rdx,8), %ecx 1197; AVX-NEXT: subl %edx, %ecx 1198; AVX-NEXT: subl %ecx, %eax 1199; AVX-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3 1200; AVX-NEXT: vpextrd $3, %xmm0, %eax 1201; AVX-NEXT: imulq $613566757, %rax, %rcx # imm = 0x24924925 1202; AVX-NEXT: shrq $32, %rcx 1203; AVX-NEXT: movl %eax, %edx 1204; AVX-NEXT: subl %ecx, %edx 1205; AVX-NEXT: shrl %edx 1206; AVX-NEXT: addl %ecx, %edx 1207; AVX-NEXT: shrl $2, %edx 1208; AVX-NEXT: leal (,%rdx,8), %ecx 1209; AVX-NEXT: subl %edx, %ecx 1210; AVX-NEXT: subl %ecx, %eax 1211; AVX-NEXT: vpinsrd $3, %eax, %xmm3, %xmm0 1212; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1213; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1214; AVX-NEXT: retq 1215 %res = urem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 1216 ret <16 x i32> %res 1217} 1218 1219define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind { 1220; AVX512F-LABEL: test_rem7_32i16: 1221; AVX512F: # BB#0: 1222; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363] 1223; AVX512F-NEXT: vpmulhuw %ymm2, %ymm0, %ymm3 1224; AVX512F-NEXT: vpsubw %ymm3, %ymm0, %ymm4 1225; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm4 1226; AVX512F-NEXT: vpaddw %ymm3, %ymm4, %ymm3 1227; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm3 1228; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] 1229; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3 1230; AVX512F-NEXT: vpsubw %ymm3, %ymm0, %ymm0 1231; AVX512F-NEXT: vpmulhuw %ymm2, %ymm1, %ymm2 1232; AVX512F-NEXT: vpsubw %ymm2, %ymm1, %ymm3 1233; AVX512F-NEXT: vpsrlw $1, %ymm3, %ymm3 1234; AVX512F-NEXT: vpaddw %ymm2, %ymm3, %ymm2 1235; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2 1236; AVX512F-NEXT: vpmullw %ymm4, %ymm2, %ymm2 1237; AVX512F-NEXT: vpsubw %ymm2, %ymm1, %ymm1 1238; AVX512F-NEXT: retq 1239; 1240; AVX512BW-LABEL: test_rem7_32i16: 1241; AVX512BW: # BB#0: 1242; AVX512BW-NEXT: vpmulhuw {{.*}}(%rip), %zmm0, %zmm1 1243; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 1244; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm2 1245; AVX512BW-NEXT: vpaddw %zmm1, %zmm2, %zmm1 1246; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm1 1247; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %zmm1, %zmm1 1248; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0 1249; AVX512BW-NEXT: retq 1250 %res = urem <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 1251 ret <32 x i16> %res 1252} 1253 1254define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { 1255; AVX512F-LABEL: test_rem7_64i8: 1256; AVX512F: # BB#0: 1257; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] 1258; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm2 1259; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero 1260; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 1261; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero 1262; AVX512F-NEXT: vpmullw %ymm2, %ymm4, %ymm4 1263; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm5 1264; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero 1265; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 1266; AVX512F-NEXT: vpmullw %ymm4, %ymm3, %ymm3 1267; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 1268; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm3[2,3],ymm5[2,3] 1269; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm3, %ymm3 1270; AVX512F-NEXT: vpackuswb %ymm6, %ymm3, %ymm3 1271; AVX512F-NEXT: vpsubb %ymm3, %ymm0, %ymm5 1272; AVX512F-NEXT: vpsrlw $1, %ymm5, %ymm6 1273; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] 1274; AVX512F-NEXT: vpand %ymm5, %ymm6, %ymm6 1275; AVX512F-NEXT: vpaddb %ymm3, %ymm6, %ymm3 1276; AVX512F-NEXT: vpsrlw $2, %ymm3, %ymm3 1277; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] 1278; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm7 1279; AVX512F-NEXT: vpmovsxbw %xmm7, %ymm8 1280; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3 1281; AVX512F-NEXT: vpmullw %ymm3, %ymm8, %ymm8 1282; AVX512F-NEXT: vpmovsxwd %ymm8, %zmm8 1283; AVX512F-NEXT: vpmovdb %zmm8, %xmm8 1284; AVX512F-NEXT: vextracti128 $1, %ymm7, %xmm7 1285; AVX512F-NEXT: vpmovsxbw %xmm7, %ymm7 1286; AVX512F-NEXT: vpmullw %ymm3, %ymm7, %ymm7 1287; AVX512F-NEXT: vpmovsxwd %ymm7, %zmm7 1288; AVX512F-NEXT: vpmovdb %zmm7, %xmm7 1289; AVX512F-NEXT: vinserti128 $1, %xmm7, %ymm8, %ymm7 1290; AVX512F-NEXT: vpsubb %ymm7, %ymm0, %ymm0 1291; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm7 1292; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm7 = xmm7[0],zero,xmm7[1],zero,xmm7[2],zero,xmm7[3],zero,xmm7[4],zero,xmm7[5],zero,xmm7[6],zero,xmm7[7],zero,xmm7[8],zero,xmm7[9],zero,xmm7[10],zero,xmm7[11],zero,xmm7[12],zero,xmm7[13],zero,xmm7[14],zero,xmm7[15],zero 1293; AVX512F-NEXT: vpmullw %ymm2, %ymm7, %ymm2 1294; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 1295; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm7 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 1296; AVX512F-NEXT: vpmullw %ymm4, %ymm7, %ymm4 1297; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 1298; AVX512F-NEXT: vperm2i128 {{.*#+}} ymm7 = ymm4[2,3],ymm2[2,3] 1299; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 1300; AVX512F-NEXT: vpackuswb %ymm7, %ymm2, %ymm2 1301; AVX512F-NEXT: vpsubb %ymm2, %ymm1, %ymm4 1302; AVX512F-NEXT: vpsrlw $1, %ymm4, %ymm4 1303; AVX512F-NEXT: vpand %ymm5, %ymm4, %ymm4 1304; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm2 1305; AVX512F-NEXT: vpsrlw $2, %ymm2, %ymm2 1306; AVX512F-NEXT: vpand %ymm6, %ymm2, %ymm2 1307; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm4 1308; AVX512F-NEXT: vpmullw %ymm3, %ymm4, %ymm4 1309; AVX512F-NEXT: vpmovsxwd %ymm4, %zmm4 1310; AVX512F-NEXT: vpmovdb %zmm4, %xmm4 1311; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm2 1312; AVX512F-NEXT: vpmovsxbw %xmm2, %ymm2 1313; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2 1314; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2 1315; AVX512F-NEXT: vpmovdb %zmm2, %xmm2 1316; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2 1317; AVX512F-NEXT: vpsubb %ymm2, %ymm1, %ymm1 1318; AVX512F-NEXT: retq 1319; 1320; AVX512BW-LABEL: test_rem7_64i8: 1321; AVX512BW: # BB#0: 1322; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm1 1323; AVX512BW-NEXT: vpextrb $1, %xmm1, %edx 1324; AVX512BW-NEXT: imull $37, %edx, %ecx 1325; AVX512BW-NEXT: shrl $8, %ecx 1326; AVX512BW-NEXT: movl %edx, %eax 1327; AVX512BW-NEXT: subb %cl, %al 1328; AVX512BW-NEXT: shrb %al 1329; AVX512BW-NEXT: addb %cl, %al 1330; AVX512BW-NEXT: shrb $2, %al 1331; AVX512BW-NEXT: movb $7, %cl 1332; AVX512BW-NEXT: mulb %cl 1333; AVX512BW-NEXT: subb %al, %dl 1334; AVX512BW-NEXT: movzbl %dl, %edx 1335; AVX512BW-NEXT: vpextrb $0, %xmm1, %esi 1336; AVX512BW-NEXT: imull $37, %esi, %edi 1337; AVX512BW-NEXT: shrl $8, %edi 1338; AVX512BW-NEXT: movl %esi, %eax 1339; AVX512BW-NEXT: subb %dil, %al 1340; AVX512BW-NEXT: shrb %al 1341; AVX512BW-NEXT: addb %dil, %al 1342; AVX512BW-NEXT: shrb $2, %al 1343; AVX512BW-NEXT: mulb %cl 1344; AVX512BW-NEXT: subb %al, %sil 1345; AVX512BW-NEXT: movzbl %sil, %eax 1346; AVX512BW-NEXT: vmovd %eax, %xmm2 1347; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm2, %xmm2 1348; AVX512BW-NEXT: vpextrb $2, %xmm1, %edx 1349; AVX512BW-NEXT: imull $37, %edx, %esi 1350; AVX512BW-NEXT: shrl $8, %esi 1351; AVX512BW-NEXT: movl %edx, %eax 1352; AVX512BW-NEXT: subb %sil, %al 1353; AVX512BW-NEXT: shrb %al 1354; AVX512BW-NEXT: addb %sil, %al 1355; AVX512BW-NEXT: shrb $2, %al 1356; AVX512BW-NEXT: mulb %cl 1357; AVX512BW-NEXT: subb %al, %dl 1358; AVX512BW-NEXT: movzbl %dl, %eax 1359; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 1360; AVX512BW-NEXT: vpextrb $3, %xmm1, %edx 1361; AVX512BW-NEXT: imull $37, %edx, %esi 1362; AVX512BW-NEXT: shrl $8, %esi 1363; AVX512BW-NEXT: movl %edx, %eax 1364; AVX512BW-NEXT: subb %sil, %al 1365; AVX512BW-NEXT: shrb %al 1366; AVX512BW-NEXT: addb %sil, %al 1367; AVX512BW-NEXT: shrb $2, %al 1368; AVX512BW-NEXT: mulb %cl 1369; AVX512BW-NEXT: subb %al, %dl 1370; AVX512BW-NEXT: movzbl %dl, %eax 1371; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 1372; AVX512BW-NEXT: vpextrb $4, %xmm1, %edx 1373; AVX512BW-NEXT: imull $37, %edx, %esi 1374; AVX512BW-NEXT: shrl $8, %esi 1375; AVX512BW-NEXT: movl %edx, %eax 1376; AVX512BW-NEXT: subb %sil, %al 1377; AVX512BW-NEXT: shrb %al 1378; AVX512BW-NEXT: addb %sil, %al 1379; AVX512BW-NEXT: shrb $2, %al 1380; AVX512BW-NEXT: mulb %cl 1381; AVX512BW-NEXT: subb %al, %dl 1382; AVX512BW-NEXT: movzbl %dl, %eax 1383; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 1384; AVX512BW-NEXT: vpextrb $5, %xmm1, %edx 1385; AVX512BW-NEXT: imull $37, %edx, %esi 1386; AVX512BW-NEXT: shrl $8, %esi 1387; AVX512BW-NEXT: movl %edx, %eax 1388; AVX512BW-NEXT: subb %sil, %al 1389; AVX512BW-NEXT: shrb %al 1390; AVX512BW-NEXT: addb %sil, %al 1391; AVX512BW-NEXT: shrb $2, %al 1392; AVX512BW-NEXT: mulb %cl 1393; AVX512BW-NEXT: subb %al, %dl 1394; AVX512BW-NEXT: movzbl %dl, %eax 1395; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 1396; AVX512BW-NEXT: vpextrb $6, %xmm1, %edx 1397; AVX512BW-NEXT: imull $37, %edx, %esi 1398; AVX512BW-NEXT: shrl $8, %esi 1399; AVX512BW-NEXT: movl %edx, %eax 1400; AVX512BW-NEXT: subb %sil, %al 1401; AVX512BW-NEXT: shrb %al 1402; AVX512BW-NEXT: addb %sil, %al 1403; AVX512BW-NEXT: shrb $2, %al 1404; AVX512BW-NEXT: mulb %cl 1405; AVX512BW-NEXT: subb %al, %dl 1406; AVX512BW-NEXT: movzbl %dl, %eax 1407; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 1408; AVX512BW-NEXT: vpextrb $7, %xmm1, %edx 1409; AVX512BW-NEXT: imull $37, %edx, %esi 1410; AVX512BW-NEXT: shrl $8, %esi 1411; AVX512BW-NEXT: movl %edx, %eax 1412; AVX512BW-NEXT: subb %sil, %al 1413; AVX512BW-NEXT: shrb %al 1414; AVX512BW-NEXT: addb %sil, %al 1415; AVX512BW-NEXT: shrb $2, %al 1416; AVX512BW-NEXT: mulb %cl 1417; AVX512BW-NEXT: subb %al, %dl 1418; AVX512BW-NEXT: movzbl %dl, %eax 1419; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 1420; AVX512BW-NEXT: vpextrb $8, %xmm1, %edx 1421; AVX512BW-NEXT: imull $37, %edx, %esi 1422; AVX512BW-NEXT: shrl $8, %esi 1423; AVX512BW-NEXT: movl %edx, %eax 1424; AVX512BW-NEXT: subb %sil, %al 1425; AVX512BW-NEXT: shrb %al 1426; AVX512BW-NEXT: addb %sil, %al 1427; AVX512BW-NEXT: shrb $2, %al 1428; AVX512BW-NEXT: mulb %cl 1429; AVX512BW-NEXT: subb %al, %dl 1430; AVX512BW-NEXT: movzbl %dl, %eax 1431; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 1432; AVX512BW-NEXT: vpextrb $9, %xmm1, %edx 1433; AVX512BW-NEXT: imull $37, %edx, %esi 1434; AVX512BW-NEXT: shrl $8, %esi 1435; AVX512BW-NEXT: movl %edx, %eax 1436; AVX512BW-NEXT: subb %sil, %al 1437; AVX512BW-NEXT: shrb %al 1438; AVX512BW-NEXT: addb %sil, %al 1439; AVX512BW-NEXT: shrb $2, %al 1440; AVX512BW-NEXT: mulb %cl 1441; AVX512BW-NEXT: subb %al, %dl 1442; AVX512BW-NEXT: movzbl %dl, %eax 1443; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 1444; AVX512BW-NEXT: vpextrb $10, %xmm1, %edx 1445; AVX512BW-NEXT: imull $37, %edx, %esi 1446; AVX512BW-NEXT: shrl $8, %esi 1447; AVX512BW-NEXT: movl %edx, %eax 1448; AVX512BW-NEXT: subb %sil, %al 1449; AVX512BW-NEXT: shrb %al 1450; AVX512BW-NEXT: addb %sil, %al 1451; AVX512BW-NEXT: shrb $2, %al 1452; AVX512BW-NEXT: mulb %cl 1453; AVX512BW-NEXT: subb %al, %dl 1454; AVX512BW-NEXT: movzbl %dl, %eax 1455; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 1456; AVX512BW-NEXT: vpextrb $11, %xmm1, %edx 1457; AVX512BW-NEXT: imull $37, %edx, %esi 1458; AVX512BW-NEXT: shrl $8, %esi 1459; AVX512BW-NEXT: movl %edx, %eax 1460; AVX512BW-NEXT: subb %sil, %al 1461; AVX512BW-NEXT: shrb %al 1462; AVX512BW-NEXT: addb %sil, %al 1463; AVX512BW-NEXT: shrb $2, %al 1464; AVX512BW-NEXT: mulb %cl 1465; AVX512BW-NEXT: subb %al, %dl 1466; AVX512BW-NEXT: movzbl %dl, %eax 1467; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 1468; AVX512BW-NEXT: vpextrb $12, %xmm1, %edx 1469; AVX512BW-NEXT: imull $37, %edx, %esi 1470; AVX512BW-NEXT: shrl $8, %esi 1471; AVX512BW-NEXT: movl %edx, %eax 1472; AVX512BW-NEXT: subb %sil, %al 1473; AVX512BW-NEXT: shrb %al 1474; AVX512BW-NEXT: addb %sil, %al 1475; AVX512BW-NEXT: shrb $2, %al 1476; AVX512BW-NEXT: mulb %cl 1477; AVX512BW-NEXT: subb %al, %dl 1478; AVX512BW-NEXT: movzbl %dl, %eax 1479; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 1480; AVX512BW-NEXT: vpextrb $13, %xmm1, %edx 1481; AVX512BW-NEXT: imull $37, %edx, %esi 1482; AVX512BW-NEXT: shrl $8, %esi 1483; AVX512BW-NEXT: movl %edx, %eax 1484; AVX512BW-NEXT: subb %sil, %al 1485; AVX512BW-NEXT: shrb %al 1486; AVX512BW-NEXT: addb %sil, %al 1487; AVX512BW-NEXT: shrb $2, %al 1488; AVX512BW-NEXT: mulb %cl 1489; AVX512BW-NEXT: subb %al, %dl 1490; AVX512BW-NEXT: movzbl %dl, %eax 1491; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 1492; AVX512BW-NEXT: vpextrb $14, %xmm1, %edx 1493; AVX512BW-NEXT: imull $37, %edx, %esi 1494; AVX512BW-NEXT: shrl $8, %esi 1495; AVX512BW-NEXT: movl %edx, %eax 1496; AVX512BW-NEXT: subb %sil, %al 1497; AVX512BW-NEXT: shrb %al 1498; AVX512BW-NEXT: addb %sil, %al 1499; AVX512BW-NEXT: shrb $2, %al 1500; AVX512BW-NEXT: mulb %cl 1501; AVX512BW-NEXT: subb %al, %dl 1502; AVX512BW-NEXT: movzbl %dl, %eax 1503; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 1504; AVX512BW-NEXT: vpextrb $15, %xmm1, %edx 1505; AVX512BW-NEXT: imull $37, %edx, %esi 1506; AVX512BW-NEXT: shrl $8, %esi 1507; AVX512BW-NEXT: movl %edx, %eax 1508; AVX512BW-NEXT: subb %sil, %al 1509; AVX512BW-NEXT: shrb %al 1510; AVX512BW-NEXT: addb %sil, %al 1511; AVX512BW-NEXT: shrb $2, %al 1512; AVX512BW-NEXT: mulb %cl 1513; AVX512BW-NEXT: subb %al, %dl 1514; AVX512BW-NEXT: movzbl %dl, %eax 1515; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1 1516; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2 1517; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx 1518; AVX512BW-NEXT: imull $37, %edx, %esi 1519; AVX512BW-NEXT: shrl $8, %esi 1520; AVX512BW-NEXT: movl %edx, %eax 1521; AVX512BW-NEXT: subb %sil, %al 1522; AVX512BW-NEXT: shrb %al 1523; AVX512BW-NEXT: addb %sil, %al 1524; AVX512BW-NEXT: shrb $2, %al 1525; AVX512BW-NEXT: mulb %cl 1526; AVX512BW-NEXT: subb %al, %dl 1527; AVX512BW-NEXT: movzbl %dl, %edx 1528; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi 1529; AVX512BW-NEXT: imull $37, %esi, %edi 1530; AVX512BW-NEXT: shrl $8, %edi 1531; AVX512BW-NEXT: movl %esi, %eax 1532; AVX512BW-NEXT: subb %dil, %al 1533; AVX512BW-NEXT: shrb %al 1534; AVX512BW-NEXT: addb %dil, %al 1535; AVX512BW-NEXT: shrb $2, %al 1536; AVX512BW-NEXT: mulb %cl 1537; AVX512BW-NEXT: subb %al, %sil 1538; AVX512BW-NEXT: movzbl %sil, %eax 1539; AVX512BW-NEXT: vmovd %eax, %xmm3 1540; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 1541; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx 1542; AVX512BW-NEXT: imull $37, %edx, %esi 1543; AVX512BW-NEXT: shrl $8, %esi 1544; AVX512BW-NEXT: movl %edx, %eax 1545; AVX512BW-NEXT: subb %sil, %al 1546; AVX512BW-NEXT: shrb %al 1547; AVX512BW-NEXT: addb %sil, %al 1548; AVX512BW-NEXT: shrb $2, %al 1549; AVX512BW-NEXT: mulb %cl 1550; AVX512BW-NEXT: subb %al, %dl 1551; AVX512BW-NEXT: movzbl %dl, %eax 1552; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 1553; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx 1554; AVX512BW-NEXT: imull $37, %edx, %esi 1555; AVX512BW-NEXT: shrl $8, %esi 1556; AVX512BW-NEXT: movl %edx, %eax 1557; AVX512BW-NEXT: subb %sil, %al 1558; AVX512BW-NEXT: shrb %al 1559; AVX512BW-NEXT: addb %sil, %al 1560; AVX512BW-NEXT: shrb $2, %al 1561; AVX512BW-NEXT: mulb %cl 1562; AVX512BW-NEXT: subb %al, %dl 1563; AVX512BW-NEXT: movzbl %dl, %eax 1564; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 1565; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx 1566; AVX512BW-NEXT: imull $37, %edx, %esi 1567; AVX512BW-NEXT: shrl $8, %esi 1568; AVX512BW-NEXT: movl %edx, %eax 1569; AVX512BW-NEXT: subb %sil, %al 1570; AVX512BW-NEXT: shrb %al 1571; AVX512BW-NEXT: addb %sil, %al 1572; AVX512BW-NEXT: shrb $2, %al 1573; AVX512BW-NEXT: mulb %cl 1574; AVX512BW-NEXT: subb %al, %dl 1575; AVX512BW-NEXT: movzbl %dl, %eax 1576; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 1577; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx 1578; AVX512BW-NEXT: imull $37, %edx, %esi 1579; AVX512BW-NEXT: shrl $8, %esi 1580; AVX512BW-NEXT: movl %edx, %eax 1581; AVX512BW-NEXT: subb %sil, %al 1582; AVX512BW-NEXT: shrb %al 1583; AVX512BW-NEXT: addb %sil, %al 1584; AVX512BW-NEXT: shrb $2, %al 1585; AVX512BW-NEXT: mulb %cl 1586; AVX512BW-NEXT: subb %al, %dl 1587; AVX512BW-NEXT: movzbl %dl, %eax 1588; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 1589; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx 1590; AVX512BW-NEXT: imull $37, %edx, %esi 1591; AVX512BW-NEXT: shrl $8, %esi 1592; AVX512BW-NEXT: movl %edx, %eax 1593; AVX512BW-NEXT: subb %sil, %al 1594; AVX512BW-NEXT: shrb %al 1595; AVX512BW-NEXT: addb %sil, %al 1596; AVX512BW-NEXT: shrb $2, %al 1597; AVX512BW-NEXT: mulb %cl 1598; AVX512BW-NEXT: subb %al, %dl 1599; AVX512BW-NEXT: movzbl %dl, %eax 1600; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 1601; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx 1602; AVX512BW-NEXT: imull $37, %edx, %esi 1603; AVX512BW-NEXT: shrl $8, %esi 1604; AVX512BW-NEXT: movl %edx, %eax 1605; AVX512BW-NEXT: subb %sil, %al 1606; AVX512BW-NEXT: shrb %al 1607; AVX512BW-NEXT: addb %sil, %al 1608; AVX512BW-NEXT: shrb $2, %al 1609; AVX512BW-NEXT: mulb %cl 1610; AVX512BW-NEXT: subb %al, %dl 1611; AVX512BW-NEXT: movzbl %dl, %eax 1612; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 1613; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx 1614; AVX512BW-NEXT: imull $37, %edx, %esi 1615; AVX512BW-NEXT: shrl $8, %esi 1616; AVX512BW-NEXT: movl %edx, %eax 1617; AVX512BW-NEXT: subb %sil, %al 1618; AVX512BW-NEXT: shrb %al 1619; AVX512BW-NEXT: addb %sil, %al 1620; AVX512BW-NEXT: shrb $2, %al 1621; AVX512BW-NEXT: mulb %cl 1622; AVX512BW-NEXT: subb %al, %dl 1623; AVX512BW-NEXT: movzbl %dl, %eax 1624; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 1625; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx 1626; AVX512BW-NEXT: imull $37, %edx, %esi 1627; AVX512BW-NEXT: shrl $8, %esi 1628; AVX512BW-NEXT: movl %edx, %eax 1629; AVX512BW-NEXT: subb %sil, %al 1630; AVX512BW-NEXT: shrb %al 1631; AVX512BW-NEXT: addb %sil, %al 1632; AVX512BW-NEXT: shrb $2, %al 1633; AVX512BW-NEXT: mulb %cl 1634; AVX512BW-NEXT: subb %al, %dl 1635; AVX512BW-NEXT: movzbl %dl, %eax 1636; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 1637; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx 1638; AVX512BW-NEXT: imull $37, %edx, %esi 1639; AVX512BW-NEXT: shrl $8, %esi 1640; AVX512BW-NEXT: movl %edx, %eax 1641; AVX512BW-NEXT: subb %sil, %al 1642; AVX512BW-NEXT: shrb %al 1643; AVX512BW-NEXT: addb %sil, %al 1644; AVX512BW-NEXT: shrb $2, %al 1645; AVX512BW-NEXT: mulb %cl 1646; AVX512BW-NEXT: subb %al, %dl 1647; AVX512BW-NEXT: movzbl %dl, %eax 1648; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 1649; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx 1650; AVX512BW-NEXT: imull $37, %edx, %esi 1651; AVX512BW-NEXT: shrl $8, %esi 1652; AVX512BW-NEXT: movl %edx, %eax 1653; AVX512BW-NEXT: subb %sil, %al 1654; AVX512BW-NEXT: shrb %al 1655; AVX512BW-NEXT: addb %sil, %al 1656; AVX512BW-NEXT: shrb $2, %al 1657; AVX512BW-NEXT: mulb %cl 1658; AVX512BW-NEXT: subb %al, %dl 1659; AVX512BW-NEXT: movzbl %dl, %eax 1660; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 1661; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx 1662; AVX512BW-NEXT: imull $37, %edx, %esi 1663; AVX512BW-NEXT: shrl $8, %esi 1664; AVX512BW-NEXT: movl %edx, %eax 1665; AVX512BW-NEXT: subb %sil, %al 1666; AVX512BW-NEXT: shrb %al 1667; AVX512BW-NEXT: addb %sil, %al 1668; AVX512BW-NEXT: shrb $2, %al 1669; AVX512BW-NEXT: mulb %cl 1670; AVX512BW-NEXT: subb %al, %dl 1671; AVX512BW-NEXT: movzbl %dl, %eax 1672; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1673; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx 1674; AVX512BW-NEXT: imull $37, %edx, %esi 1675; AVX512BW-NEXT: shrl $8, %esi 1676; AVX512BW-NEXT: movl %edx, %eax 1677; AVX512BW-NEXT: subb %sil, %al 1678; AVX512BW-NEXT: shrb %al 1679; AVX512BW-NEXT: addb %sil, %al 1680; AVX512BW-NEXT: shrb $2, %al 1681; AVX512BW-NEXT: mulb %cl 1682; AVX512BW-NEXT: subb %al, %dl 1683; AVX512BW-NEXT: movzbl %dl, %eax 1684; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1685; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx 1686; AVX512BW-NEXT: imull $37, %edx, %esi 1687; AVX512BW-NEXT: shrl $8, %esi 1688; AVX512BW-NEXT: movl %edx, %eax 1689; AVX512BW-NEXT: subb %sil, %al 1690; AVX512BW-NEXT: shrb %al 1691; AVX512BW-NEXT: addb %sil, %al 1692; AVX512BW-NEXT: shrb $2, %al 1693; AVX512BW-NEXT: mulb %cl 1694; AVX512BW-NEXT: subb %al, %dl 1695; AVX512BW-NEXT: movzbl %dl, %eax 1696; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1697; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx 1698; AVX512BW-NEXT: imull $37, %edx, %esi 1699; AVX512BW-NEXT: shrl $8, %esi 1700; AVX512BW-NEXT: movl %edx, %eax 1701; AVX512BW-NEXT: subb %sil, %al 1702; AVX512BW-NEXT: shrb %al 1703; AVX512BW-NEXT: addb %sil, %al 1704; AVX512BW-NEXT: shrb $2, %al 1705; AVX512BW-NEXT: mulb %cl 1706; AVX512BW-NEXT: subb %al, %dl 1707; AVX512BW-NEXT: movzbl %dl, %eax 1708; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 1709; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 1710; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2 1711; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx 1712; AVX512BW-NEXT: imull $37, %edx, %esi 1713; AVX512BW-NEXT: shrl $8, %esi 1714; AVX512BW-NEXT: movl %edx, %eax 1715; AVX512BW-NEXT: subb %sil, %al 1716; AVX512BW-NEXT: shrb %al 1717; AVX512BW-NEXT: addb %sil, %al 1718; AVX512BW-NEXT: shrb $2, %al 1719; AVX512BW-NEXT: mulb %cl 1720; AVX512BW-NEXT: subb %al, %dl 1721; AVX512BW-NEXT: movzbl %dl, %edx 1722; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi 1723; AVX512BW-NEXT: imull $37, %esi, %edi 1724; AVX512BW-NEXT: shrl $8, %edi 1725; AVX512BW-NEXT: movl %esi, %eax 1726; AVX512BW-NEXT: subb %dil, %al 1727; AVX512BW-NEXT: shrb %al 1728; AVX512BW-NEXT: addb %dil, %al 1729; AVX512BW-NEXT: shrb $2, %al 1730; AVX512BW-NEXT: mulb %cl 1731; AVX512BW-NEXT: subb %al, %sil 1732; AVX512BW-NEXT: movzbl %sil, %eax 1733; AVX512BW-NEXT: vmovd %eax, %xmm3 1734; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 1735; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx 1736; AVX512BW-NEXT: imull $37, %edx, %esi 1737; AVX512BW-NEXT: shrl $8, %esi 1738; AVX512BW-NEXT: movl %edx, %eax 1739; AVX512BW-NEXT: subb %sil, %al 1740; AVX512BW-NEXT: shrb %al 1741; AVX512BW-NEXT: addb %sil, %al 1742; AVX512BW-NEXT: shrb $2, %al 1743; AVX512BW-NEXT: mulb %cl 1744; AVX512BW-NEXT: subb %al, %dl 1745; AVX512BW-NEXT: movzbl %dl, %eax 1746; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 1747; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx 1748; AVX512BW-NEXT: imull $37, %edx, %esi 1749; AVX512BW-NEXT: shrl $8, %esi 1750; AVX512BW-NEXT: movl %edx, %eax 1751; AVX512BW-NEXT: subb %sil, %al 1752; AVX512BW-NEXT: shrb %al 1753; AVX512BW-NEXT: addb %sil, %al 1754; AVX512BW-NEXT: shrb $2, %al 1755; AVX512BW-NEXT: mulb %cl 1756; AVX512BW-NEXT: subb %al, %dl 1757; AVX512BW-NEXT: movzbl %dl, %eax 1758; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 1759; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx 1760; AVX512BW-NEXT: imull $37, %edx, %esi 1761; AVX512BW-NEXT: shrl $8, %esi 1762; AVX512BW-NEXT: movl %edx, %eax 1763; AVX512BW-NEXT: subb %sil, %al 1764; AVX512BW-NEXT: shrb %al 1765; AVX512BW-NEXT: addb %sil, %al 1766; AVX512BW-NEXT: shrb $2, %al 1767; AVX512BW-NEXT: mulb %cl 1768; AVX512BW-NEXT: subb %al, %dl 1769; AVX512BW-NEXT: movzbl %dl, %eax 1770; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 1771; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx 1772; AVX512BW-NEXT: imull $37, %edx, %esi 1773; AVX512BW-NEXT: shrl $8, %esi 1774; AVX512BW-NEXT: movl %edx, %eax 1775; AVX512BW-NEXT: subb %sil, %al 1776; AVX512BW-NEXT: shrb %al 1777; AVX512BW-NEXT: addb %sil, %al 1778; AVX512BW-NEXT: shrb $2, %al 1779; AVX512BW-NEXT: mulb %cl 1780; AVX512BW-NEXT: subb %al, %dl 1781; AVX512BW-NEXT: movzbl %dl, %eax 1782; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 1783; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx 1784; AVX512BW-NEXT: imull $37, %edx, %esi 1785; AVX512BW-NEXT: shrl $8, %esi 1786; AVX512BW-NEXT: movl %edx, %eax 1787; AVX512BW-NEXT: subb %sil, %al 1788; AVX512BW-NEXT: shrb %al 1789; AVX512BW-NEXT: addb %sil, %al 1790; AVX512BW-NEXT: shrb $2, %al 1791; AVX512BW-NEXT: mulb %cl 1792; AVX512BW-NEXT: subb %al, %dl 1793; AVX512BW-NEXT: movzbl %dl, %eax 1794; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 1795; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx 1796; AVX512BW-NEXT: imull $37, %edx, %esi 1797; AVX512BW-NEXT: shrl $8, %esi 1798; AVX512BW-NEXT: movl %edx, %eax 1799; AVX512BW-NEXT: subb %sil, %al 1800; AVX512BW-NEXT: shrb %al 1801; AVX512BW-NEXT: addb %sil, %al 1802; AVX512BW-NEXT: shrb $2, %al 1803; AVX512BW-NEXT: mulb %cl 1804; AVX512BW-NEXT: subb %al, %dl 1805; AVX512BW-NEXT: movzbl %dl, %eax 1806; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 1807; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx 1808; AVX512BW-NEXT: imull $37, %edx, %esi 1809; AVX512BW-NEXT: shrl $8, %esi 1810; AVX512BW-NEXT: movl %edx, %eax 1811; AVX512BW-NEXT: subb %sil, %al 1812; AVX512BW-NEXT: shrb %al 1813; AVX512BW-NEXT: addb %sil, %al 1814; AVX512BW-NEXT: shrb $2, %al 1815; AVX512BW-NEXT: mulb %cl 1816; AVX512BW-NEXT: subb %al, %dl 1817; AVX512BW-NEXT: movzbl %dl, %eax 1818; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 1819; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx 1820; AVX512BW-NEXT: imull $37, %edx, %esi 1821; AVX512BW-NEXT: shrl $8, %esi 1822; AVX512BW-NEXT: movl %edx, %eax 1823; AVX512BW-NEXT: subb %sil, %al 1824; AVX512BW-NEXT: shrb %al 1825; AVX512BW-NEXT: addb %sil, %al 1826; AVX512BW-NEXT: shrb $2, %al 1827; AVX512BW-NEXT: mulb %cl 1828; AVX512BW-NEXT: subb %al, %dl 1829; AVX512BW-NEXT: movzbl %dl, %eax 1830; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 1831; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx 1832; AVX512BW-NEXT: imull $37, %edx, %esi 1833; AVX512BW-NEXT: shrl $8, %esi 1834; AVX512BW-NEXT: movl %edx, %eax 1835; AVX512BW-NEXT: subb %sil, %al 1836; AVX512BW-NEXT: shrb %al 1837; AVX512BW-NEXT: addb %sil, %al 1838; AVX512BW-NEXT: shrb $2, %al 1839; AVX512BW-NEXT: mulb %cl 1840; AVX512BW-NEXT: subb %al, %dl 1841; AVX512BW-NEXT: movzbl %dl, %eax 1842; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 1843; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx 1844; AVX512BW-NEXT: imull $37, %edx, %esi 1845; AVX512BW-NEXT: shrl $8, %esi 1846; AVX512BW-NEXT: movl %edx, %eax 1847; AVX512BW-NEXT: subb %sil, %al 1848; AVX512BW-NEXT: shrb %al 1849; AVX512BW-NEXT: addb %sil, %al 1850; AVX512BW-NEXT: shrb $2, %al 1851; AVX512BW-NEXT: mulb %cl 1852; AVX512BW-NEXT: subb %al, %dl 1853; AVX512BW-NEXT: movzbl %dl, %eax 1854; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 1855; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx 1856; AVX512BW-NEXT: imull $37, %edx, %esi 1857; AVX512BW-NEXT: shrl $8, %esi 1858; AVX512BW-NEXT: movl %edx, %eax 1859; AVX512BW-NEXT: subb %sil, %al 1860; AVX512BW-NEXT: shrb %al 1861; AVX512BW-NEXT: addb %sil, %al 1862; AVX512BW-NEXT: shrb $2, %al 1863; AVX512BW-NEXT: mulb %cl 1864; AVX512BW-NEXT: subb %al, %dl 1865; AVX512BW-NEXT: movzbl %dl, %eax 1866; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 1867; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx 1868; AVX512BW-NEXT: imull $37, %edx, %esi 1869; AVX512BW-NEXT: shrl $8, %esi 1870; AVX512BW-NEXT: movl %edx, %eax 1871; AVX512BW-NEXT: subb %sil, %al 1872; AVX512BW-NEXT: shrb %al 1873; AVX512BW-NEXT: addb %sil, %al 1874; AVX512BW-NEXT: shrb $2, %al 1875; AVX512BW-NEXT: mulb %cl 1876; AVX512BW-NEXT: subb %al, %dl 1877; AVX512BW-NEXT: movzbl %dl, %eax 1878; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 1879; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx 1880; AVX512BW-NEXT: imull $37, %edx, %esi 1881; AVX512BW-NEXT: shrl $8, %esi 1882; AVX512BW-NEXT: movl %edx, %eax 1883; AVX512BW-NEXT: subb %sil, %al 1884; AVX512BW-NEXT: shrb %al 1885; AVX512BW-NEXT: addb %sil, %al 1886; AVX512BW-NEXT: shrb $2, %al 1887; AVX512BW-NEXT: mulb %cl 1888; AVX512BW-NEXT: subb %al, %dl 1889; AVX512BW-NEXT: movzbl %dl, %eax 1890; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 1891; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx 1892; AVX512BW-NEXT: imull $37, %edx, %esi 1893; AVX512BW-NEXT: shrl $8, %esi 1894; AVX512BW-NEXT: movl %edx, %eax 1895; AVX512BW-NEXT: subb %sil, %al 1896; AVX512BW-NEXT: shrb %al 1897; AVX512BW-NEXT: addb %sil, %al 1898; AVX512BW-NEXT: shrb $2, %al 1899; AVX512BW-NEXT: mulb %cl 1900; AVX512BW-NEXT: subb %al, %dl 1901; AVX512BW-NEXT: movzbl %dl, %eax 1902; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm2 1903; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx 1904; AVX512BW-NEXT: imull $37, %edx, %esi 1905; AVX512BW-NEXT: shrl $8, %esi 1906; AVX512BW-NEXT: movl %edx, %eax 1907; AVX512BW-NEXT: subb %sil, %al 1908; AVX512BW-NEXT: shrb %al 1909; AVX512BW-NEXT: addb %sil, %al 1910; AVX512BW-NEXT: shrb $2, %al 1911; AVX512BW-NEXT: mulb %cl 1912; AVX512BW-NEXT: subb %al, %dl 1913; AVX512BW-NEXT: movzbl %dl, %edx 1914; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi 1915; AVX512BW-NEXT: imull $37, %esi, %edi 1916; AVX512BW-NEXT: shrl $8, %edi 1917; AVX512BW-NEXT: movl %esi, %eax 1918; AVX512BW-NEXT: subb %dil, %al 1919; AVX512BW-NEXT: shrb %al 1920; AVX512BW-NEXT: addb %dil, %al 1921; AVX512BW-NEXT: shrb $2, %al 1922; AVX512BW-NEXT: mulb %cl 1923; AVX512BW-NEXT: subb %al, %sil 1924; AVX512BW-NEXT: movzbl %sil, %eax 1925; AVX512BW-NEXT: vmovd %eax, %xmm3 1926; AVX512BW-NEXT: vpinsrb $1, %edx, %xmm3, %xmm3 1927; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx 1928; AVX512BW-NEXT: imull $37, %edx, %esi 1929; AVX512BW-NEXT: shrl $8, %esi 1930; AVX512BW-NEXT: movl %edx, %eax 1931; AVX512BW-NEXT: subb %sil, %al 1932; AVX512BW-NEXT: shrb %al 1933; AVX512BW-NEXT: addb %sil, %al 1934; AVX512BW-NEXT: shrb $2, %al 1935; AVX512BW-NEXT: mulb %cl 1936; AVX512BW-NEXT: subb %al, %dl 1937; AVX512BW-NEXT: movzbl %dl, %eax 1938; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 1939; AVX512BW-NEXT: vpextrb $3, %xmm0, %edx 1940; AVX512BW-NEXT: imull $37, %edx, %esi 1941; AVX512BW-NEXT: shrl $8, %esi 1942; AVX512BW-NEXT: movl %edx, %eax 1943; AVX512BW-NEXT: subb %sil, %al 1944; AVX512BW-NEXT: shrb %al 1945; AVX512BW-NEXT: addb %sil, %al 1946; AVX512BW-NEXT: shrb $2, %al 1947; AVX512BW-NEXT: mulb %cl 1948; AVX512BW-NEXT: subb %al, %dl 1949; AVX512BW-NEXT: movzbl %dl, %eax 1950; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 1951; AVX512BW-NEXT: vpextrb $4, %xmm0, %edx 1952; AVX512BW-NEXT: imull $37, %edx, %esi 1953; AVX512BW-NEXT: shrl $8, %esi 1954; AVX512BW-NEXT: movl %edx, %eax 1955; AVX512BW-NEXT: subb %sil, %al 1956; AVX512BW-NEXT: shrb %al 1957; AVX512BW-NEXT: addb %sil, %al 1958; AVX512BW-NEXT: shrb $2, %al 1959; AVX512BW-NEXT: mulb %cl 1960; AVX512BW-NEXT: subb %al, %dl 1961; AVX512BW-NEXT: movzbl %dl, %eax 1962; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 1963; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx 1964; AVX512BW-NEXT: imull $37, %edx, %esi 1965; AVX512BW-NEXT: shrl $8, %esi 1966; AVX512BW-NEXT: movl %edx, %eax 1967; AVX512BW-NEXT: subb %sil, %al 1968; AVX512BW-NEXT: shrb %al 1969; AVX512BW-NEXT: addb %sil, %al 1970; AVX512BW-NEXT: shrb $2, %al 1971; AVX512BW-NEXT: mulb %cl 1972; AVX512BW-NEXT: subb %al, %dl 1973; AVX512BW-NEXT: movzbl %dl, %eax 1974; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 1975; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx 1976; AVX512BW-NEXT: imull $37, %edx, %esi 1977; AVX512BW-NEXT: shrl $8, %esi 1978; AVX512BW-NEXT: movl %edx, %eax 1979; AVX512BW-NEXT: subb %sil, %al 1980; AVX512BW-NEXT: shrb %al 1981; AVX512BW-NEXT: addb %sil, %al 1982; AVX512BW-NEXT: shrb $2, %al 1983; AVX512BW-NEXT: mulb %cl 1984; AVX512BW-NEXT: subb %al, %dl 1985; AVX512BW-NEXT: movzbl %dl, %eax 1986; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 1987; AVX512BW-NEXT: vpextrb $7, %xmm0, %edx 1988; AVX512BW-NEXT: imull $37, %edx, %esi 1989; AVX512BW-NEXT: shrl $8, %esi 1990; AVX512BW-NEXT: movl %edx, %eax 1991; AVX512BW-NEXT: subb %sil, %al 1992; AVX512BW-NEXT: shrb %al 1993; AVX512BW-NEXT: addb %sil, %al 1994; AVX512BW-NEXT: shrb $2, %al 1995; AVX512BW-NEXT: mulb %cl 1996; AVX512BW-NEXT: subb %al, %dl 1997; AVX512BW-NEXT: movzbl %dl, %eax 1998; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 1999; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx 2000; AVX512BW-NEXT: imull $37, %edx, %esi 2001; AVX512BW-NEXT: shrl $8, %esi 2002; AVX512BW-NEXT: movl %edx, %eax 2003; AVX512BW-NEXT: subb %sil, %al 2004; AVX512BW-NEXT: shrb %al 2005; AVX512BW-NEXT: addb %sil, %al 2006; AVX512BW-NEXT: shrb $2, %al 2007; AVX512BW-NEXT: mulb %cl 2008; AVX512BW-NEXT: subb %al, %dl 2009; AVX512BW-NEXT: movzbl %dl, %eax 2010; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 2011; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx 2012; AVX512BW-NEXT: imull $37, %edx, %esi 2013; AVX512BW-NEXT: shrl $8, %esi 2014; AVX512BW-NEXT: movl %edx, %eax 2015; AVX512BW-NEXT: subb %sil, %al 2016; AVX512BW-NEXT: shrb %al 2017; AVX512BW-NEXT: addb %sil, %al 2018; AVX512BW-NEXT: shrb $2, %al 2019; AVX512BW-NEXT: mulb %cl 2020; AVX512BW-NEXT: subb %al, %dl 2021; AVX512BW-NEXT: movzbl %dl, %eax 2022; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 2023; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx 2024; AVX512BW-NEXT: imull $37, %edx, %esi 2025; AVX512BW-NEXT: shrl $8, %esi 2026; AVX512BW-NEXT: movl %edx, %eax 2027; AVX512BW-NEXT: subb %sil, %al 2028; AVX512BW-NEXT: shrb %al 2029; AVX512BW-NEXT: addb %sil, %al 2030; AVX512BW-NEXT: shrb $2, %al 2031; AVX512BW-NEXT: mulb %cl 2032; AVX512BW-NEXT: subb %al, %dl 2033; AVX512BW-NEXT: movzbl %dl, %eax 2034; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 2035; AVX512BW-NEXT: vpextrb $11, %xmm0, %edx 2036; AVX512BW-NEXT: imull $37, %edx, %esi 2037; AVX512BW-NEXT: shrl $8, %esi 2038; AVX512BW-NEXT: movl %edx, %eax 2039; AVX512BW-NEXT: subb %sil, %al 2040; AVX512BW-NEXT: shrb %al 2041; AVX512BW-NEXT: addb %sil, %al 2042; AVX512BW-NEXT: shrb $2, %al 2043; AVX512BW-NEXT: mulb %cl 2044; AVX512BW-NEXT: subb %al, %dl 2045; AVX512BW-NEXT: movzbl %dl, %eax 2046; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 2047; AVX512BW-NEXT: vpextrb $12, %xmm0, %edx 2048; AVX512BW-NEXT: imull $37, %edx, %esi 2049; AVX512BW-NEXT: shrl $8, %esi 2050; AVX512BW-NEXT: movl %edx, %eax 2051; AVX512BW-NEXT: subb %sil, %al 2052; AVX512BW-NEXT: shrb %al 2053; AVX512BW-NEXT: addb %sil, %al 2054; AVX512BW-NEXT: shrb $2, %al 2055; AVX512BW-NEXT: mulb %cl 2056; AVX512BW-NEXT: subb %al, %dl 2057; AVX512BW-NEXT: movzbl %dl, %eax 2058; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 2059; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx 2060; AVX512BW-NEXT: imull $37, %edx, %esi 2061; AVX512BW-NEXT: shrl $8, %esi 2062; AVX512BW-NEXT: movl %edx, %eax 2063; AVX512BW-NEXT: subb %sil, %al 2064; AVX512BW-NEXT: shrb %al 2065; AVX512BW-NEXT: addb %sil, %al 2066; AVX512BW-NEXT: shrb $2, %al 2067; AVX512BW-NEXT: mulb %cl 2068; AVX512BW-NEXT: subb %al, %dl 2069; AVX512BW-NEXT: movzbl %dl, %eax 2070; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 2071; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx 2072; AVX512BW-NEXT: imull $37, %edx, %esi 2073; AVX512BW-NEXT: shrl $8, %esi 2074; AVX512BW-NEXT: movl %edx, %eax 2075; AVX512BW-NEXT: subb %sil, %al 2076; AVX512BW-NEXT: shrb %al 2077; AVX512BW-NEXT: addb %sil, %al 2078; AVX512BW-NEXT: shrb $2, %al 2079; AVX512BW-NEXT: mulb %cl 2080; AVX512BW-NEXT: subb %al, %dl 2081; AVX512BW-NEXT: movzbl %dl, %eax 2082; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 2083; AVX512BW-NEXT: vpextrb $15, %xmm0, %edx 2084; AVX512BW-NEXT: imull $37, %edx, %esi 2085; AVX512BW-NEXT: shrl $8, %esi 2086; AVX512BW-NEXT: movl %edx, %eax 2087; AVX512BW-NEXT: subb %sil, %al 2088; AVX512BW-NEXT: shrb %al 2089; AVX512BW-NEXT: addb %sil, %al 2090; AVX512BW-NEXT: shrb $2, %al 2091; AVX512BW-NEXT: mulb %cl 2092; AVX512BW-NEXT: subb %al, %dl 2093; AVX512BW-NEXT: movzbl %dl, %eax 2094; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm3, %xmm0 2095; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 2096; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 2097; AVX512BW-NEXT: retq 2098 %res = urem <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7> 2099 ret <64 x i8> %res 2100} 2101