1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=SSE41 3; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s --check-prefix=SSE 4; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX 5 6target triple = "x86_64-unknown-unknown" 7 8define <4 x i32> @test1(<4 x i32> %a) #0 { 9; SSE41-LABEL: test1: 10; SSE41: # BB#0: 11; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 12; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 13; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 14; SSE41-NEXT: pmuludq %xmm2, %xmm3 15; SSE41-NEXT: pmuludq %xmm0, %xmm1 16; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 17; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 18; SSE41-NEXT: psubd %xmm1, %xmm0 19; SSE41-NEXT: psrld $1, %xmm0 20; SSE41-NEXT: paddd %xmm1, %xmm0 21; SSE41-NEXT: psrld $2, %xmm0 22; SSE41-NEXT: retq 23; 24; SSE-LABEL: test1: 25; SSE: # BB#0: 26; SSE-NEXT: movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757] 27; SSE-NEXT: movdqa %xmm0, %xmm2 28; SSE-NEXT: pmuludq %xmm1, %xmm2 29; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 30; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 31; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 32; SSE-NEXT: pmuludq %xmm1, %xmm3 33; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 34; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 35; SSE-NEXT: psubd %xmm2, %xmm0 36; SSE-NEXT: psrld $1, %xmm0 37; SSE-NEXT: paddd %xmm2, %xmm0 38; SSE-NEXT: psrld $2, %xmm0 39; SSE-NEXT: retq 40; 41; AVX-LABEL: test1: 42; AVX: # BB#0: 43; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 44; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 45; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 46; AVX-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 47; AVX-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 48; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 49; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 50; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 51; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 52; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 53; AVX-NEXT: vpsrld $2, %xmm0, %xmm0 54; AVX-NEXT: retq 55 %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7> 56 ret <4 x i32> %div 57} 58 59define <8 x i32> @test2(<8 x i32> %a) #0 { 60; SSE41-LABEL: test2: 61; SSE41: # BB#0: 62; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757] 63; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 64; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 65; SSE41-NEXT: pmuludq %xmm3, %xmm4 66; SSE41-NEXT: movdqa %xmm0, %xmm5 67; SSE41-NEXT: pmuludq %xmm2, %xmm5 68; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 69; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7] 70; SSE41-NEXT: psubd %xmm5, %xmm0 71; SSE41-NEXT: psrld $1, %xmm0 72; SSE41-NEXT: paddd %xmm5, %xmm0 73; SSE41-NEXT: psrld $2, %xmm0 74; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 75; SSE41-NEXT: pmuludq %xmm3, %xmm4 76; SSE41-NEXT: pmuludq %xmm1, %xmm2 77; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 78; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 79; SSE41-NEXT: psubd %xmm2, %xmm1 80; SSE41-NEXT: psrld $1, %xmm1 81; SSE41-NEXT: paddd %xmm2, %xmm1 82; SSE41-NEXT: psrld $2, %xmm1 83; SSE41-NEXT: retq 84; 85; SSE-LABEL: test2: 86; SSE: # BB#0: 87; SSE-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757] 88; SSE-NEXT: movdqa %xmm0, %xmm3 89; SSE-NEXT: pmuludq %xmm2, %xmm3 90; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 91; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3] 92; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 93; SSE-NEXT: pmuludq %xmm4, %xmm5 94; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 95; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] 96; SSE-NEXT: psubd %xmm3, %xmm0 97; SSE-NEXT: psrld $1, %xmm0 98; SSE-NEXT: paddd %xmm3, %xmm0 99; SSE-NEXT: psrld $2, %xmm0 100; SSE-NEXT: pmuludq %xmm1, %xmm2 101; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 102; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 103; SSE-NEXT: pmuludq %xmm4, %xmm3 104; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 105; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 106; SSE-NEXT: psubd %xmm2, %xmm1 107; SSE-NEXT: psrld $1, %xmm1 108; SSE-NEXT: paddd %xmm2, %xmm1 109; SSE-NEXT: psrld $2, %xmm1 110; SSE-NEXT: retq 111; 112; AVX-LABEL: test2: 113; AVX: # BB#0: 114; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 115; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] 116; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] 117; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm2 118; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm1 119; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] 120; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] 121; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 122; AVX-NEXT: vpsrld $1, %ymm0, %ymm0 123; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 124; AVX-NEXT: vpsrld $2, %ymm0, %ymm0 125; AVX-NEXT: retq 126 %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7> 127 ret <8 x i32> %div 128} 129 130define <8 x i16> @test3(<8 x i16> %a) #0 { 131; SSE41-LABEL: test3: 132; SSE41: # BB#0: 133; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363] 134; SSE41-NEXT: pmulhuw %xmm0, %xmm1 135; SSE41-NEXT: psubw %xmm1, %xmm0 136; SSE41-NEXT: psrlw $1, %xmm0 137; SSE41-NEXT: paddw %xmm1, %xmm0 138; SSE41-NEXT: psrlw $2, %xmm0 139; SSE41-NEXT: retq 140; 141; SSE-LABEL: test3: 142; SSE: # BB#0: 143; SSE-NEXT: movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363] 144; SSE-NEXT: pmulhuw %xmm0, %xmm1 145; SSE-NEXT: psubw %xmm1, %xmm0 146; SSE-NEXT: psrlw $1, %xmm0 147; SSE-NEXT: paddw %xmm1, %xmm0 148; SSE-NEXT: psrlw $2, %xmm0 149; SSE-NEXT: retq 150; 151; AVX-LABEL: test3: 152; AVX: # BB#0: 153; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 154; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 155; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 156; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 157; AVX-NEXT: vpsrlw $2, %xmm0, %xmm0 158; AVX-NEXT: retq 159 %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 160 ret <8 x i16> %div 161} 162 163define <16 x i16> @test4(<16 x i16> %a) #0 { 164; SSE41-LABEL: test4: 165; SSE41: # BB#0: 166; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363] 167; SSE41-NEXT: movdqa %xmm0, %xmm3 168; SSE41-NEXT: pmulhuw %xmm2, %xmm3 169; SSE41-NEXT: psubw %xmm3, %xmm0 170; SSE41-NEXT: psrlw $1, %xmm0 171; SSE41-NEXT: paddw %xmm3, %xmm0 172; SSE41-NEXT: psrlw $2, %xmm0 173; SSE41-NEXT: pmulhuw %xmm1, %xmm2 174; SSE41-NEXT: psubw %xmm2, %xmm1 175; SSE41-NEXT: psrlw $1, %xmm1 176; SSE41-NEXT: paddw %xmm2, %xmm1 177; SSE41-NEXT: psrlw $2, %xmm1 178; SSE41-NEXT: retq 179; 180; SSE-LABEL: test4: 181; SSE: # BB#0: 182; SSE-NEXT: movdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363] 183; SSE-NEXT: movdqa %xmm0, %xmm3 184; SSE-NEXT: pmulhuw %xmm2, %xmm3 185; SSE-NEXT: psubw %xmm3, %xmm0 186; SSE-NEXT: psrlw $1, %xmm0 187; SSE-NEXT: paddw %xmm3, %xmm0 188; SSE-NEXT: psrlw $2, %xmm0 189; SSE-NEXT: pmulhuw %xmm1, %xmm2 190; SSE-NEXT: psubw %xmm2, %xmm1 191; SSE-NEXT: psrlw $1, %xmm1 192; SSE-NEXT: paddw %xmm2, %xmm1 193; SSE-NEXT: psrlw $2, %xmm1 194; SSE-NEXT: retq 195; 196; AVX-LABEL: test4: 197; AVX: # BB#0: 198; AVX-NEXT: vpmulhuw {{.*}}(%rip), %ymm0, %ymm1 199; AVX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 200; AVX-NEXT: vpsrlw $1, %ymm0, %ymm0 201; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 202; AVX-NEXT: vpsrlw $2, %ymm0, %ymm0 203; AVX-NEXT: retq 204 %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7> 205 ret <16 x i16> %div 206} 207 208define <8 x i16> @test5(<8 x i16> %a) #0 { 209; SSE41-LABEL: test5: 210; SSE41: # BB#0: 211; SSE41-NEXT: pmulhw {{.*}}(%rip), %xmm0 212; SSE41-NEXT: movdqa %xmm0, %xmm1 213; SSE41-NEXT: psrlw $15, %xmm1 214; SSE41-NEXT: psraw $1, %xmm0 215; SSE41-NEXT: paddw %xmm1, %xmm0 216; SSE41-NEXT: retq 217; 218; SSE-LABEL: test5: 219; SSE: # BB#0: 220; SSE-NEXT: pmulhw {{.*}}(%rip), %xmm0 221; SSE-NEXT: movdqa %xmm0, %xmm1 222; SSE-NEXT: psrlw $15, %xmm1 223; SSE-NEXT: psraw $1, %xmm0 224; SSE-NEXT: paddw %xmm1, %xmm0 225; SSE-NEXT: retq 226; 227; AVX-LABEL: test5: 228; AVX: # BB#0: 229; AVX-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm0 230; AVX-NEXT: vpsrlw $15, %xmm0, %xmm1 231; AVX-NEXT: vpsraw $1, %xmm0, %xmm0 232; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 233; AVX-NEXT: retq 234 %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> 235 ret <8 x i16> %div 236} 237 238define <16 x i16> @test6(<16 x i16> %a) #0 { 239; SSE41-LABEL: test6: 240; SSE41: # BB#0: 241; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725] 242; SSE41-NEXT: pmulhw %xmm2, %xmm0 243; SSE41-NEXT: movdqa %xmm0, %xmm3 244; SSE41-NEXT: psrlw $15, %xmm3 245; SSE41-NEXT: psraw $1, %xmm0 246; SSE41-NEXT: paddw %xmm3, %xmm0 247; SSE41-NEXT: pmulhw %xmm2, %xmm1 248; SSE41-NEXT: movdqa %xmm1, %xmm2 249; SSE41-NEXT: psrlw $15, %xmm2 250; SSE41-NEXT: psraw $1, %xmm1 251; SSE41-NEXT: paddw %xmm2, %xmm1 252; SSE41-NEXT: retq 253; 254; SSE-LABEL: test6: 255; SSE: # BB#0: 256; SSE-NEXT: movdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725] 257; SSE-NEXT: pmulhw %xmm2, %xmm0 258; SSE-NEXT: movdqa %xmm0, %xmm3 259; SSE-NEXT: psrlw $15, %xmm3 260; SSE-NEXT: psraw $1, %xmm0 261; SSE-NEXT: paddw %xmm3, %xmm0 262; SSE-NEXT: pmulhw %xmm2, %xmm1 263; SSE-NEXT: movdqa %xmm1, %xmm2 264; SSE-NEXT: psrlw $15, %xmm2 265; SSE-NEXT: psraw $1, %xmm1 266; SSE-NEXT: paddw %xmm2, %xmm1 267; SSE-NEXT: retq 268; 269; AVX-LABEL: test6: 270; AVX: # BB#0: 271; AVX-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm0 272; AVX-NEXT: vpsrlw $15, %ymm0, %ymm1 273; AVX-NEXT: vpsraw $1, %ymm0, %ymm0 274; AVX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 275; AVX-NEXT: retq 276 %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7> 277 ret <16 x i16> %div 278} 279 280define <16 x i8> @test7(<16 x i8> %a) #0 { 281; SSE41-LABEL: test7: 282; SSE41: # BB#0: 283; SSE41-NEXT: pextrb $1, %xmm0, %eax 284; SSE41-NEXT: movsbl %al, %eax 285; SSE41-NEXT: imull $-109, %eax, %ecx 286; SSE41-NEXT: shrl $8, %ecx 287; SSE41-NEXT: addb %cl, %al 288; SSE41-NEXT: movb %al, %cl 289; SSE41-NEXT: shrb $7, %cl 290; SSE41-NEXT: sarb $2, %al 291; SSE41-NEXT: addb %cl, %al 292; SSE41-NEXT: movzbl %al, %eax 293; SSE41-NEXT: pextrb $0, %xmm0, %ecx 294; SSE41-NEXT: movsbl %cl, %ecx 295; SSE41-NEXT: imull $-109, %ecx, %edx 296; SSE41-NEXT: shrl $8, %edx 297; SSE41-NEXT: addb %dl, %cl 298; SSE41-NEXT: movb %cl, %dl 299; SSE41-NEXT: shrb $7, %dl 300; SSE41-NEXT: sarb $2, %cl 301; SSE41-NEXT: addb %dl, %cl 302; SSE41-NEXT: movzbl %cl, %ecx 303; SSE41-NEXT: movd %ecx, %xmm1 304; SSE41-NEXT: pinsrb $1, %eax, %xmm1 305; SSE41-NEXT: pextrb $2, %xmm0, %eax 306; SSE41-NEXT: movsbl %al, %eax 307; SSE41-NEXT: imull $-109, %eax, %ecx 308; SSE41-NEXT: shrl $8, %ecx 309; SSE41-NEXT: addb %cl, %al 310; SSE41-NEXT: movb %al, %cl 311; SSE41-NEXT: shrb $7, %cl 312; SSE41-NEXT: sarb $2, %al 313; SSE41-NEXT: addb %cl, %al 314; SSE41-NEXT: movzbl %al, %eax 315; SSE41-NEXT: pinsrb $2, %eax, %xmm1 316; SSE41-NEXT: pextrb $3, %xmm0, %eax 317; SSE41-NEXT: movsbl %al, %eax 318; SSE41-NEXT: imull $-109, %eax, %ecx 319; SSE41-NEXT: shrl $8, %ecx 320; SSE41-NEXT: addb %cl, %al 321; SSE41-NEXT: movb %al, %cl 322; SSE41-NEXT: shrb $7, %cl 323; SSE41-NEXT: sarb $2, %al 324; SSE41-NEXT: addb %cl, %al 325; SSE41-NEXT: movzbl %al, %eax 326; SSE41-NEXT: pinsrb $3, %eax, %xmm1 327; SSE41-NEXT: pextrb $4, %xmm0, %eax 328; SSE41-NEXT: movsbl %al, %eax 329; SSE41-NEXT: imull $-109, %eax, %ecx 330; SSE41-NEXT: shrl $8, %ecx 331; SSE41-NEXT: addb %cl, %al 332; SSE41-NEXT: movb %al, %cl 333; SSE41-NEXT: shrb $7, %cl 334; SSE41-NEXT: sarb $2, %al 335; SSE41-NEXT: addb %cl, %al 336; SSE41-NEXT: movzbl %al, %eax 337; SSE41-NEXT: pinsrb $4, %eax, %xmm1 338; SSE41-NEXT: pextrb $5, %xmm0, %eax 339; SSE41-NEXT: movsbl %al, %eax 340; SSE41-NEXT: imull $-109, %eax, %ecx 341; SSE41-NEXT: shrl $8, %ecx 342; SSE41-NEXT: addb %cl, %al 343; SSE41-NEXT: movb %al, %cl 344; SSE41-NEXT: shrb $7, %cl 345; SSE41-NEXT: sarb $2, %al 346; SSE41-NEXT: addb %cl, %al 347; SSE41-NEXT: movzbl %al, %eax 348; SSE41-NEXT: pinsrb $5, %eax, %xmm1 349; SSE41-NEXT: pextrb $6, %xmm0, %eax 350; SSE41-NEXT: movsbl %al, %eax 351; SSE41-NEXT: imull $-109, %eax, %ecx 352; SSE41-NEXT: shrl $8, %ecx 353; SSE41-NEXT: addb %cl, %al 354; SSE41-NEXT: movb %al, %cl 355; SSE41-NEXT: shrb $7, %cl 356; SSE41-NEXT: sarb $2, %al 357; SSE41-NEXT: addb %cl, %al 358; SSE41-NEXT: movzbl %al, %eax 359; SSE41-NEXT: pinsrb $6, %eax, %xmm1 360; SSE41-NEXT: pextrb $7, %xmm0, %eax 361; SSE41-NEXT: movsbl %al, %eax 362; SSE41-NEXT: imull $-109, %eax, %ecx 363; SSE41-NEXT: shrl $8, %ecx 364; SSE41-NEXT: addb %cl, %al 365; SSE41-NEXT: movb %al, %cl 366; SSE41-NEXT: shrb $7, %cl 367; SSE41-NEXT: sarb $2, %al 368; SSE41-NEXT: addb %cl, %al 369; SSE41-NEXT: movzbl %al, %eax 370; SSE41-NEXT: pinsrb $7, %eax, %xmm1 371; SSE41-NEXT: pextrb $8, %xmm0, %eax 372; SSE41-NEXT: movsbl %al, %eax 373; SSE41-NEXT: imull $-109, %eax, %ecx 374; SSE41-NEXT: shrl $8, %ecx 375; SSE41-NEXT: addb %cl, %al 376; SSE41-NEXT: movb %al, %cl 377; SSE41-NEXT: shrb $7, %cl 378; SSE41-NEXT: sarb $2, %al 379; SSE41-NEXT: addb %cl, %al 380; SSE41-NEXT: movzbl %al, %eax 381; SSE41-NEXT: pinsrb $8, %eax, %xmm1 382; SSE41-NEXT: pextrb $9, %xmm0, %eax 383; SSE41-NEXT: movsbl %al, %eax 384; SSE41-NEXT: imull $-109, %eax, %ecx 385; SSE41-NEXT: shrl $8, %ecx 386; SSE41-NEXT: addb %cl, %al 387; SSE41-NEXT: movb %al, %cl 388; SSE41-NEXT: shrb $7, %cl 389; SSE41-NEXT: sarb $2, %al 390; SSE41-NEXT: addb %cl, %al 391; SSE41-NEXT: movzbl %al, %eax 392; SSE41-NEXT: pinsrb $9, %eax, %xmm1 393; SSE41-NEXT: pextrb $10, %xmm0, %eax 394; SSE41-NEXT: movsbl %al, %eax 395; SSE41-NEXT: imull $-109, %eax, %ecx 396; SSE41-NEXT: shrl $8, %ecx 397; SSE41-NEXT: addb %cl, %al 398; SSE41-NEXT: movb %al, %cl 399; SSE41-NEXT: shrb $7, %cl 400; SSE41-NEXT: sarb $2, %al 401; SSE41-NEXT: addb %cl, %al 402; SSE41-NEXT: movzbl %al, %eax 403; SSE41-NEXT: pinsrb $10, %eax, %xmm1 404; SSE41-NEXT: pextrb $11, %xmm0, %eax 405; SSE41-NEXT: movsbl %al, %eax 406; SSE41-NEXT: imull $-109, %eax, %ecx 407; SSE41-NEXT: shrl $8, %ecx 408; SSE41-NEXT: addb %cl, %al 409; SSE41-NEXT: movb %al, %cl 410; SSE41-NEXT: shrb $7, %cl 411; SSE41-NEXT: sarb $2, %al 412; SSE41-NEXT: addb %cl, %al 413; SSE41-NEXT: movzbl %al, %eax 414; SSE41-NEXT: pinsrb $11, %eax, %xmm1 415; SSE41-NEXT: pextrb $12, %xmm0, %eax 416; SSE41-NEXT: movsbl %al, %eax 417; SSE41-NEXT: imull $-109, %eax, %ecx 418; SSE41-NEXT: shrl $8, %ecx 419; SSE41-NEXT: addb %cl, %al 420; SSE41-NEXT: movb %al, %cl 421; SSE41-NEXT: shrb $7, %cl 422; SSE41-NEXT: sarb $2, %al 423; SSE41-NEXT: addb %cl, %al 424; SSE41-NEXT: movzbl %al, %eax 425; SSE41-NEXT: pinsrb $12, %eax, %xmm1 426; SSE41-NEXT: pextrb $13, %xmm0, %eax 427; SSE41-NEXT: movsbl %al, %eax 428; SSE41-NEXT: imull $-109, %eax, %ecx 429; SSE41-NEXT: shrl $8, %ecx 430; SSE41-NEXT: addb %cl, %al 431; SSE41-NEXT: movb %al, %cl 432; SSE41-NEXT: shrb $7, %cl 433; SSE41-NEXT: sarb $2, %al 434; SSE41-NEXT: addb %cl, %al 435; SSE41-NEXT: movzbl %al, %eax 436; SSE41-NEXT: pinsrb $13, %eax, %xmm1 437; SSE41-NEXT: pextrb $14, %xmm0, %eax 438; SSE41-NEXT: movsbl %al, %eax 439; SSE41-NEXT: imull $-109, %eax, %ecx 440; SSE41-NEXT: shrl $8, %ecx 441; SSE41-NEXT: addb %cl, %al 442; SSE41-NEXT: movb %al, %cl 443; SSE41-NEXT: shrb $7, %cl 444; SSE41-NEXT: sarb $2, %al 445; SSE41-NEXT: addb %cl, %al 446; SSE41-NEXT: movzbl %al, %eax 447; SSE41-NEXT: pinsrb $14, %eax, %xmm1 448; SSE41-NEXT: pextrb $15, %xmm0, %eax 449; SSE41-NEXT: movsbl %al, %eax 450; SSE41-NEXT: imull $-109, %eax, %ecx 451; SSE41-NEXT: shrl $8, %ecx 452; SSE41-NEXT: addb %cl, %al 453; SSE41-NEXT: movb %al, %cl 454; SSE41-NEXT: shrb $7, %cl 455; SSE41-NEXT: sarb $2, %al 456; SSE41-NEXT: addb %cl, %al 457; SSE41-NEXT: movzbl %al, %eax 458; SSE41-NEXT: pinsrb $15, %eax, %xmm1 459; SSE41-NEXT: movdqa %xmm1, %xmm0 460; SSE41-NEXT: retq 461; 462; SSE-LABEL: test7: 463; SSE: # BB#0: 464; SSE-NEXT: pushq %rbp 465; SSE-NEXT: pushq %r14 466; SSE-NEXT: pushq %rbx 467; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 468; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax 469; SSE-NEXT: imull $-109, %eax, %ecx 470; SSE-NEXT: shrl $8, %ecx 471; SSE-NEXT: addb %al, %cl 472; SSE-NEXT: movb %cl, %al 473; SSE-NEXT: shrb $7, %al 474; SSE-NEXT: sarb $2, %cl 475; SSE-NEXT: addb %al, %cl 476; SSE-NEXT: movzbl %cl, %eax 477; SSE-NEXT: movd %eax, %xmm0 478; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r14d 479; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %edx 480; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r9d 481; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax 482; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r11d 483; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %ecx 484; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r8d 485; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %esi 486; SSE-NEXT: imull $-109, %esi, %edi 487; SSE-NEXT: shrl $8, %edi 488; SSE-NEXT: addb %sil, %dil 489; SSE-NEXT: movb %dil, %bl 490; SSE-NEXT: shrb $7, %bl 491; SSE-NEXT: sarb $2, %dil 492; SSE-NEXT: addb %bl, %dil 493; SSE-NEXT: movzbl %dil, %esi 494; SSE-NEXT: movd %esi, %xmm1 495; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 496; SSE-NEXT: imull $-109, %eax, %esi 497; SSE-NEXT: shrl $8, %esi 498; SSE-NEXT: addb %al, %sil 499; SSE-NEXT: movb %sil, %al 500; SSE-NEXT: shrb $7, %al 501; SSE-NEXT: sarb $2, %sil 502; SSE-NEXT: addb %al, %sil 503; SSE-NEXT: movzbl %sil, %eax 504; SSE-NEXT: movd %eax, %xmm2 505; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %ebp 506; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %esi 507; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %r10d 508; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %edi 509; SSE-NEXT: imull $-109, %edi, %ebx 510; SSE-NEXT: shrl $8, %ebx 511; SSE-NEXT: addb %dil, %bl 512; SSE-NEXT: movb %bl, %al 513; SSE-NEXT: shrb $7, %al 514; SSE-NEXT: sarb $2, %bl 515; SSE-NEXT: addb %al, %bl 516; SSE-NEXT: movzbl %bl, %eax 517; SSE-NEXT: movd %eax, %xmm0 518; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 519; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 520; SSE-NEXT: imull $-109, %edx, %eax 521; SSE-NEXT: shrl $8, %eax 522; SSE-NEXT: addb %dl, %al 523; SSE-NEXT: movb %al, %dl 524; SSE-NEXT: shrb $7, %dl 525; SSE-NEXT: sarb $2, %al 526; SSE-NEXT: addb %dl, %al 527; SSE-NEXT: movzbl %al, %eax 528; SSE-NEXT: movd %eax, %xmm1 529; SSE-NEXT: imull $-109, %esi, %eax 530; SSE-NEXT: shrl $8, %eax 531; SSE-NEXT: addb %sil, %al 532; SSE-NEXT: movb %al, %dl 533; SSE-NEXT: shrb $7, %dl 534; SSE-NEXT: sarb $2, %al 535; SSE-NEXT: addb %dl, %al 536; SSE-NEXT: movzbl %al, %eax 537; SSE-NEXT: movd %eax, %xmm2 538; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 539; SSE-NEXT: imull $-109, %ecx, %eax 540; SSE-NEXT: shrl $8, %eax 541; SSE-NEXT: addb %cl, %al 542; SSE-NEXT: movb %al, %cl 543; SSE-NEXT: shrb $7, %cl 544; SSE-NEXT: sarb $2, %al 545; SSE-NEXT: addb %cl, %al 546; SSE-NEXT: movzbl %al, %eax 547; SSE-NEXT: movd %eax, %xmm3 548; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %ecx 549; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax 550; SSE-NEXT: imull $-109, %eax, %edx 551; SSE-NEXT: shrl $8, %edx 552; SSE-NEXT: addb %al, %dl 553; SSE-NEXT: movb %dl, %al 554; SSE-NEXT: shrb $7, %al 555; SSE-NEXT: sarb $2, %dl 556; SSE-NEXT: addb %al, %dl 557; SSE-NEXT: movzbl %dl, %eax 558; SSE-NEXT: movd %eax, %xmm1 559; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 560; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 561; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 562; SSE-NEXT: imull $-109, %r14d, %eax 563; SSE-NEXT: shrl $8, %eax 564; SSE-NEXT: addb %r14b, %al 565; SSE-NEXT: movb %al, %dl 566; SSE-NEXT: shrb $7, %dl 567; SSE-NEXT: sarb $2, %al 568; SSE-NEXT: addb %dl, %al 569; SSE-NEXT: movzbl %al, %eax 570; SSE-NEXT: movd %eax, %xmm2 571; SSE-NEXT: imull $-109, %ebp, %eax 572; SSE-NEXT: shrl $8, %eax 573; SSE-NEXT: addb %bpl, %al 574; SSE-NEXT: movb %al, %dl 575; SSE-NEXT: shrb $7, %dl 576; SSE-NEXT: sarb $2, %al 577; SSE-NEXT: addb %dl, %al 578; SSE-NEXT: movzbl %al, %eax 579; SSE-NEXT: movd %eax, %xmm0 580; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 581; SSE-NEXT: imull $-109, %r11d, %eax 582; SSE-NEXT: shrl $8, %eax 583; SSE-NEXT: addb %r11b, %al 584; SSE-NEXT: movb %al, %dl 585; SSE-NEXT: shrb $7, %dl 586; SSE-NEXT: sarb $2, %al 587; SSE-NEXT: addb %dl, %al 588; SSE-NEXT: movzbl %al, %eax 589; SSE-NEXT: movd %eax, %xmm3 590; SSE-NEXT: imull $-109, %ecx, %eax 591; SSE-NEXT: shrl $8, %eax 592; SSE-NEXT: addb %cl, %al 593; SSE-NEXT: movb %al, %cl 594; SSE-NEXT: shrb $7, %cl 595; SSE-NEXT: sarb $2, %al 596; SSE-NEXT: addb %cl, %al 597; SSE-NEXT: movzbl %al, %eax 598; SSE-NEXT: movd %eax, %xmm2 599; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 600; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 601; SSE-NEXT: imull $-109, %r9d, %eax 602; SSE-NEXT: shrl $8, %eax 603; SSE-NEXT: addb %r9b, %al 604; SSE-NEXT: movb %al, %cl 605; SSE-NEXT: shrb $7, %cl 606; SSE-NEXT: sarb $2, %al 607; SSE-NEXT: addb %cl, %al 608; SSE-NEXT: movzbl %al, %eax 609; SSE-NEXT: movd %eax, %xmm0 610; SSE-NEXT: imull $-109, %r10d, %eax 611; SSE-NEXT: shrl $8, %eax 612; SSE-NEXT: addb %r10b, %al 613; SSE-NEXT: movb %al, %cl 614; SSE-NEXT: shrb $7, %cl 615; SSE-NEXT: sarb $2, %al 616; SSE-NEXT: addb %cl, %al 617; SSE-NEXT: movzbl %al, %eax 618; SSE-NEXT: movd %eax, %xmm3 619; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 620; SSE-NEXT: imull $-109, %r8d, %eax 621; SSE-NEXT: shrl $8, %eax 622; SSE-NEXT: addb %r8b, %al 623; SSE-NEXT: movb %al, %cl 624; SSE-NEXT: shrb $7, %cl 625; SSE-NEXT: sarb $2, %al 626; SSE-NEXT: addb %cl, %al 627; SSE-NEXT: movzbl %al, %eax 628; SSE-NEXT: movd %eax, %xmm4 629; SSE-NEXT: movsbl -{{[0-9]+}}(%rsp), %eax 630; SSE-NEXT: imull $-109, %eax, %ecx 631; SSE-NEXT: shrl $8, %ecx 632; SSE-NEXT: addb %al, %cl 633; SSE-NEXT: movb %cl, %al 634; SSE-NEXT: shrb $7, %al 635; SSE-NEXT: sarb $2, %cl 636; SSE-NEXT: addb %al, %cl 637; SSE-NEXT: movzbl %cl, %eax 638; SSE-NEXT: movd %eax, %xmm0 639; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 640; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 641; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 642; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 643; SSE-NEXT: popq %rbx 644; SSE-NEXT: popq %r14 645; SSE-NEXT: popq %rbp 646; SSE-NEXT: retq 647; 648; AVX-LABEL: test7: 649; AVX: # BB#0: 650; AVX-NEXT: vpextrb $1, %xmm0, %eax 651; AVX-NEXT: movsbl %al, %eax 652; AVX-NEXT: imull $-109, %eax, %ecx 653; AVX-NEXT: shrl $8, %ecx 654; AVX-NEXT: addb %cl, %al 655; AVX-NEXT: movb %al, %cl 656; AVX-NEXT: shrb $7, %cl 657; AVX-NEXT: sarb $2, %al 658; AVX-NEXT: addb %cl, %al 659; AVX-NEXT: movzbl %al, %eax 660; AVX-NEXT: vpextrb $0, %xmm0, %ecx 661; AVX-NEXT: movsbl %cl, %ecx 662; AVX-NEXT: imull $-109, %ecx, %edx 663; AVX-NEXT: shrl $8, %edx 664; AVX-NEXT: addb %dl, %cl 665; AVX-NEXT: movb %cl, %dl 666; AVX-NEXT: shrb $7, %dl 667; AVX-NEXT: sarb $2, %cl 668; AVX-NEXT: addb %dl, %cl 669; AVX-NEXT: movzbl %cl, %ecx 670; AVX-NEXT: vmovd %ecx, %xmm1 671; AVX-NEXT: vpextrb $2, %xmm0, %ecx 672; AVX-NEXT: movsbl %cl, %ecx 673; AVX-NEXT: imull $-109, %ecx, %edx 674; AVX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 675; AVX-NEXT: shrl $8, %edx 676; AVX-NEXT: addb %dl, %cl 677; AVX-NEXT: movb %cl, %al 678; AVX-NEXT: shrb $7, %al 679; AVX-NEXT: sarb $2, %cl 680; AVX-NEXT: addb %al, %cl 681; AVX-NEXT: movzbl %cl, %eax 682; AVX-NEXT: vpextrb $3, %xmm0, %ecx 683; AVX-NEXT: movsbl %cl, %ecx 684; AVX-NEXT: imull $-109, %ecx, %edx 685; AVX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 686; AVX-NEXT: shrl $8, %edx 687; AVX-NEXT: addb %dl, %cl 688; AVX-NEXT: movb %cl, %al 689; AVX-NEXT: shrb $7, %al 690; AVX-NEXT: sarb $2, %cl 691; AVX-NEXT: addb %al, %cl 692; AVX-NEXT: movzbl %cl, %eax 693; AVX-NEXT: vpextrb $4, %xmm0, %ecx 694; AVX-NEXT: movsbl %cl, %ecx 695; AVX-NEXT: imull $-109, %ecx, %edx 696; AVX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 697; AVX-NEXT: shrl $8, %edx 698; AVX-NEXT: addb %dl, %cl 699; AVX-NEXT: movb %cl, %al 700; AVX-NEXT: shrb $7, %al 701; AVX-NEXT: sarb $2, %cl 702; AVX-NEXT: addb %al, %cl 703; AVX-NEXT: movzbl %cl, %eax 704; AVX-NEXT: vpextrb $5, %xmm0, %ecx 705; AVX-NEXT: movsbl %cl, %ecx 706; AVX-NEXT: imull $-109, %ecx, %edx 707; AVX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 708; AVX-NEXT: shrl $8, %edx 709; AVX-NEXT: addb %dl, %cl 710; AVX-NEXT: movb %cl, %al 711; AVX-NEXT: shrb $7, %al 712; AVX-NEXT: sarb $2, %cl 713; AVX-NEXT: addb %al, %cl 714; AVX-NEXT: movzbl %cl, %eax 715; AVX-NEXT: vpextrb $6, %xmm0, %ecx 716; AVX-NEXT: movsbl %cl, %ecx 717; AVX-NEXT: imull $-109, %ecx, %edx 718; AVX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 719; AVX-NEXT: shrl $8, %edx 720; AVX-NEXT: addb %dl, %cl 721; AVX-NEXT: movb %cl, %al 722; AVX-NEXT: shrb $7, %al 723; AVX-NEXT: sarb $2, %cl 724; AVX-NEXT: addb %al, %cl 725; AVX-NEXT: movzbl %cl, %eax 726; AVX-NEXT: vpextrb $7, %xmm0, %ecx 727; AVX-NEXT: movsbl %cl, %ecx 728; AVX-NEXT: imull $-109, %ecx, %edx 729; AVX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 730; AVX-NEXT: shrl $8, %edx 731; AVX-NEXT: addb %dl, %cl 732; AVX-NEXT: movb %cl, %al 733; AVX-NEXT: shrb $7, %al 734; AVX-NEXT: sarb $2, %cl 735; AVX-NEXT: addb %al, %cl 736; AVX-NEXT: movzbl %cl, %eax 737; AVX-NEXT: vpextrb $8, %xmm0, %ecx 738; AVX-NEXT: movsbl %cl, %ecx 739; AVX-NEXT: imull $-109, %ecx, %edx 740; AVX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 741; AVX-NEXT: shrl $8, %edx 742; AVX-NEXT: addb %dl, %cl 743; AVX-NEXT: movb %cl, %al 744; AVX-NEXT: shrb $7, %al 745; AVX-NEXT: sarb $2, %cl 746; AVX-NEXT: addb %al, %cl 747; AVX-NEXT: movzbl %cl, %eax 748; AVX-NEXT: vpextrb $9, %xmm0, %ecx 749; AVX-NEXT: movsbl %cl, %ecx 750; AVX-NEXT: imull $-109, %ecx, %edx 751; AVX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 752; AVX-NEXT: shrl $8, %edx 753; AVX-NEXT: addb %dl, %cl 754; AVX-NEXT: movb %cl, %al 755; AVX-NEXT: shrb $7, %al 756; AVX-NEXT: sarb $2, %cl 757; AVX-NEXT: addb %al, %cl 758; AVX-NEXT: movzbl %cl, %eax 759; AVX-NEXT: vpextrb $10, %xmm0, %ecx 760; AVX-NEXT: movsbl %cl, %ecx 761; AVX-NEXT: imull $-109, %ecx, %edx 762; AVX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 763; AVX-NEXT: shrl $8, %edx 764; AVX-NEXT: addb %dl, %cl 765; AVX-NEXT: movb %cl, %al 766; AVX-NEXT: shrb $7, %al 767; AVX-NEXT: sarb $2, %cl 768; AVX-NEXT: addb %al, %cl 769; AVX-NEXT: movzbl %cl, %eax 770; AVX-NEXT: vpextrb $11, %xmm0, %ecx 771; AVX-NEXT: movsbl %cl, %ecx 772; AVX-NEXT: imull $-109, %ecx, %edx 773; AVX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 774; AVX-NEXT: shrl $8, %edx 775; AVX-NEXT: addb %dl, %cl 776; AVX-NEXT: movb %cl, %al 777; AVX-NEXT: shrb $7, %al 778; AVX-NEXT: sarb $2, %cl 779; AVX-NEXT: addb %al, %cl 780; AVX-NEXT: movzbl %cl, %eax 781; AVX-NEXT: vpextrb $12, %xmm0, %ecx 782; AVX-NEXT: movsbl %cl, %ecx 783; AVX-NEXT: imull $-109, %ecx, %edx 784; AVX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 785; AVX-NEXT: shrl $8, %edx 786; AVX-NEXT: addb %dl, %cl 787; AVX-NEXT: movb %cl, %al 788; AVX-NEXT: shrb $7, %al 789; AVX-NEXT: sarb $2, %cl 790; AVX-NEXT: addb %al, %cl 791; AVX-NEXT: movzbl %cl, %eax 792; AVX-NEXT: vpextrb $13, %xmm0, %ecx 793; AVX-NEXT: movsbl %cl, %ecx 794; AVX-NEXT: imull $-109, %ecx, %edx 795; AVX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 796; AVX-NEXT: shrl $8, %edx 797; AVX-NEXT: addb %dl, %cl 798; AVX-NEXT: movb %cl, %al 799; AVX-NEXT: shrb $7, %al 800; AVX-NEXT: sarb $2, %cl 801; AVX-NEXT: addb %al, %cl 802; AVX-NEXT: movzbl %cl, %eax 803; AVX-NEXT: vpextrb $14, %xmm0, %ecx 804; AVX-NEXT: movsbl %cl, %ecx 805; AVX-NEXT: imull $-109, %ecx, %edx 806; AVX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 807; AVX-NEXT: shrl $8, %edx 808; AVX-NEXT: addb %dl, %cl 809; AVX-NEXT: movb %cl, %al 810; AVX-NEXT: shrb $7, %al 811; AVX-NEXT: sarb $2, %cl 812; AVX-NEXT: addb %al, %cl 813; AVX-NEXT: movzbl %cl, %eax 814; AVX-NEXT: vpextrb $15, %xmm0, %ecx 815; AVX-NEXT: movsbl %cl, %ecx 816; AVX-NEXT: imull $-109, %ecx, %edx 817; AVX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm0 818; AVX-NEXT: shrl $8, %edx 819; AVX-NEXT: addb %dl, %cl 820; AVX-NEXT: movb %cl, %al 821; AVX-NEXT: shrb $7, %al 822; AVX-NEXT: sarb $2, %cl 823; AVX-NEXT: addb %al, %cl 824; AVX-NEXT: movzbl %cl, %eax 825; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 826; AVX-NEXT: retq 827 %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7> 828 ret <16 x i8> %div 829} 830 831define <4 x i32> @test8(<4 x i32> %a) #0 { 832; SSE41-LABEL: test8: 833; SSE41: # BB#0: 834; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] 835; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 836; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 837; SSE41-NEXT: pmuldq %xmm2, %xmm3 838; SSE41-NEXT: pmuldq %xmm0, %xmm1 839; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 840; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 841; SSE41-NEXT: paddd %xmm0, %xmm1 842; SSE41-NEXT: movdqa %xmm1, %xmm0 843; SSE41-NEXT: psrld $31, %xmm0 844; SSE41-NEXT: psrad $2, %xmm1 845; SSE41-NEXT: paddd %xmm0, %xmm1 846; SSE41-NEXT: movdqa %xmm1, %xmm0 847; SSE41-NEXT: retq 848; 849; SSE-LABEL: test8: 850; SSE: # BB#0: 851; SSE-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027] 852; SSE-NEXT: movdqa %xmm0, %xmm2 853; SSE-NEXT: psrad $31, %xmm2 854; SSE-NEXT: pand %xmm1, %xmm2 855; SSE-NEXT: movdqa %xmm0, %xmm3 856; SSE-NEXT: pmuludq %xmm1, %xmm3 857; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 858; SSE-NEXT: psrad $31, %xmm1 859; SSE-NEXT: pand %xmm0, %xmm1 860; SSE-NEXT: paddd %xmm1, %xmm2 861; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] 862; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 863; SSE-NEXT: pmuludq %xmm4, %xmm3 864; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 865; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 866; SSE-NEXT: psubd %xmm2, %xmm1 867; SSE-NEXT: paddd %xmm0, %xmm1 868; SSE-NEXT: movdqa %xmm1, %xmm0 869; SSE-NEXT: psrld $31, %xmm0 870; SSE-NEXT: psrad $2, %xmm1 871; SSE-NEXT: paddd %xmm0, %xmm1 872; SSE-NEXT: movdqa %xmm1, %xmm0 873; SSE-NEXT: retq 874; 875; AVX-LABEL: test8: 876; AVX: # BB#0: 877; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 878; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 879; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 880; AVX-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 881; AVX-NEXT: vpmuldq %xmm1, %xmm0, %xmm1 882; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 883; AVX-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 884; AVX-NEXT: vpaddd %xmm0, %xmm1, %xmm0 885; AVX-NEXT: vpsrld $31, %xmm0, %xmm1 886; AVX-NEXT: vpsrad $2, %xmm0, %xmm0 887; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 888; AVX-NEXT: retq 889 %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7> 890 ret <4 x i32> %div 891} 892 893define <8 x i32> @test9(<8 x i32> %a) #0 { 894; SSE41-LABEL: test9: 895; SSE41: # BB#0: 896; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 897; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 898; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 899; SSE41-NEXT: pmuldq %xmm4, %xmm5 900; SSE41-NEXT: movdqa %xmm0, %xmm2 901; SSE41-NEXT: pmuldq %xmm3, %xmm2 902; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 903; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7] 904; SSE41-NEXT: paddd %xmm0, %xmm2 905; SSE41-NEXT: movdqa %xmm2, %xmm0 906; SSE41-NEXT: psrld $31, %xmm0 907; SSE41-NEXT: psrad $2, %xmm2 908; SSE41-NEXT: paddd %xmm0, %xmm2 909; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 910; SSE41-NEXT: pmuldq %xmm4, %xmm0 911; SSE41-NEXT: pmuldq %xmm1, %xmm3 912; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 913; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm0[2,3],xmm3[4,5],xmm0[6,7] 914; SSE41-NEXT: paddd %xmm1, %xmm3 915; SSE41-NEXT: movdqa %xmm3, %xmm0 916; SSE41-NEXT: psrld $31, %xmm0 917; SSE41-NEXT: psrad $2, %xmm3 918; SSE41-NEXT: paddd %xmm0, %xmm3 919; SSE41-NEXT: movdqa %xmm2, %xmm0 920; SSE41-NEXT: movdqa %xmm3, %xmm1 921; SSE41-NEXT: retq 922; 923; SSE-LABEL: test9: 924; SSE: # BB#0: 925; SSE-NEXT: movdqa %xmm0, %xmm2 926; SSE-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] 927; SSE-NEXT: movdqa %xmm3, %xmm4 928; SSE-NEXT: psrad $31, %xmm4 929; SSE-NEXT: movdqa %xmm4, %xmm0 930; SSE-NEXT: pand %xmm2, %xmm0 931; SSE-NEXT: movdqa %xmm2, %xmm5 932; SSE-NEXT: psrad $31, %xmm5 933; SSE-NEXT: pand %xmm3, %xmm5 934; SSE-NEXT: paddd %xmm0, %xmm5 935; SSE-NEXT: movdqa %xmm2, %xmm0 936; SSE-NEXT: pmuludq %xmm3, %xmm0 937; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 938; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,1,3,3] 939; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[1,1,3,3] 940; SSE-NEXT: pmuludq %xmm6, %xmm7 941; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,3,2,3] 942; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1] 943; SSE-NEXT: psubd %xmm5, %xmm0 944; SSE-NEXT: paddd %xmm2, %xmm0 945; SSE-NEXT: movdqa %xmm0, %xmm2 946; SSE-NEXT: psrld $31, %xmm2 947; SSE-NEXT: psrad $2, %xmm0 948; SSE-NEXT: paddd %xmm2, %xmm0 949; SSE-NEXT: pand %xmm1, %xmm4 950; SSE-NEXT: movdqa %xmm1, %xmm5 951; SSE-NEXT: psrad $31, %xmm5 952; SSE-NEXT: pand %xmm3, %xmm5 953; SSE-NEXT: paddd %xmm4, %xmm5 954; SSE-NEXT: pmuludq %xmm1, %xmm3 955; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3] 956; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 957; SSE-NEXT: pmuludq %xmm6, %xmm3 958; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 959; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 960; SSE-NEXT: psubd %xmm5, %xmm2 961; SSE-NEXT: paddd %xmm1, %xmm2 962; SSE-NEXT: movdqa %xmm2, %xmm1 963; SSE-NEXT: psrld $31, %xmm1 964; SSE-NEXT: psrad $2, %xmm2 965; SSE-NEXT: paddd %xmm1, %xmm2 966; SSE-NEXT: movdqa %xmm2, %xmm1 967; SSE-NEXT: retq 968; 969; AVX-LABEL: test9: 970; AVX: # BB#0: 971; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 972; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] 973; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] 974; AVX-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 975; AVX-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 976; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] 977; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] 978; AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 979; AVX-NEXT: vpsrld $31, %ymm0, %ymm1 980; AVX-NEXT: vpsrad $2, %ymm0, %ymm0 981; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 982; AVX-NEXT: retq 983 %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7> 984 ret <8 x i32> %div 985} 986 987define <8 x i32> @test10(<8 x i32> %a) #0 { 988; SSE41-LABEL: test10: 989; SSE41: # BB#0: 990; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757] 991; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 992; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 993; SSE41-NEXT: pmuludq %xmm3, %xmm4 994; SSE41-NEXT: movdqa %xmm0, %xmm5 995; SSE41-NEXT: pmuludq %xmm2, %xmm5 996; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 997; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7] 998; SSE41-NEXT: movdqa %xmm0, %xmm4 999; SSE41-NEXT: psubd %xmm5, %xmm4 1000; SSE41-NEXT: psrld $1, %xmm4 1001; SSE41-NEXT: paddd %xmm5, %xmm4 1002; SSE41-NEXT: psrld $2, %xmm4 1003; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [7,7,7,7] 1004; SSE41-NEXT: pmulld %xmm5, %xmm4 1005; SSE41-NEXT: psubd %xmm4, %xmm0 1006; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3] 1007; SSE41-NEXT: pmuludq %xmm3, %xmm4 1008; SSE41-NEXT: pmuludq %xmm1, %xmm2 1009; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1010; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 1011; SSE41-NEXT: movdqa %xmm1, %xmm3 1012; SSE41-NEXT: psubd %xmm2, %xmm3 1013; SSE41-NEXT: psrld $1, %xmm3 1014; SSE41-NEXT: paddd %xmm2, %xmm3 1015; SSE41-NEXT: psrld $2, %xmm3 1016; SSE41-NEXT: pmulld %xmm5, %xmm3 1017; SSE41-NEXT: psubd %xmm3, %xmm1 1018; SSE41-NEXT: retq 1019; 1020; SSE-LABEL: test10: 1021; SSE: # BB#0: 1022; SSE-NEXT: movdqa {{.*#+}} xmm3 = [613566757,613566757,613566757,613566757] 1023; SSE-NEXT: movdqa %xmm0, %xmm2 1024; SSE-NEXT: pmuludq %xmm3, %xmm2 1025; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1026; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] 1027; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3] 1028; SSE-NEXT: pmuludq %xmm4, %xmm5 1029; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3] 1030; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1031; SSE-NEXT: movdqa %xmm0, %xmm5 1032; SSE-NEXT: psubd %xmm2, %xmm5 1033; SSE-NEXT: psrld $1, %xmm5 1034; SSE-NEXT: paddd %xmm2, %xmm5 1035; SSE-NEXT: psrld $2, %xmm5 1036; SSE-NEXT: movdqa {{.*#+}} xmm2 = [7,7,7,7] 1037; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3] 1038; SSE-NEXT: pmuludq %xmm2, %xmm5 1039; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] 1040; SSE-NEXT: pmuludq %xmm2, %xmm6 1041; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] 1042; SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] 1043; SSE-NEXT: psubd %xmm5, %xmm0 1044; SSE-NEXT: pmuludq %xmm1, %xmm3 1045; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 1046; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3] 1047; SSE-NEXT: pmuludq %xmm4, %xmm5 1048; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,3,2,3] 1049; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1050; SSE-NEXT: movdqa %xmm1, %xmm4 1051; SSE-NEXT: psubd %xmm3, %xmm4 1052; SSE-NEXT: psrld $1, %xmm4 1053; SSE-NEXT: paddd %xmm3, %xmm4 1054; SSE-NEXT: psrld $2, %xmm4 1055; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] 1056; SSE-NEXT: pmuludq %xmm2, %xmm4 1057; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] 1058; SSE-NEXT: pmuludq %xmm2, %xmm3 1059; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] 1060; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1061; SSE-NEXT: psubd %xmm4, %xmm1 1062; SSE-NEXT: retq 1063; 1064; AVX-LABEL: test10: 1065; AVX: # BB#0: 1066; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1067; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] 1068; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] 1069; AVX-NEXT: vpmuludq %ymm2, %ymm3, %ymm2 1070; AVX-NEXT: vpmuludq %ymm1, %ymm0, %ymm1 1071; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] 1072; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] 1073; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm2 1074; AVX-NEXT: vpsrld $1, %ymm2, %ymm2 1075; AVX-NEXT: vpaddd %ymm1, %ymm2, %ymm1 1076; AVX-NEXT: vpsrld $2, %ymm1, %ymm1 1077; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 1078; AVX-NEXT: vpmulld %ymm2, %ymm1, %ymm1 1079; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 1080; AVX-NEXT: retq 1081 %rem = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7> 1082 ret <8 x i32> %rem 1083} 1084 1085define <8 x i32> @test11(<8 x i32> %a) #0 { 1086; SSE41-LABEL: test11: 1087; SSE41: # BB#0: 1088; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 1089; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1090; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1091; SSE41-NEXT: pmuldq %xmm3, %xmm4 1092; SSE41-NEXT: movdqa %xmm0, %xmm5 1093; SSE41-NEXT: pmuldq %xmm2, %xmm5 1094; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] 1095; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0,1],xmm4[2,3],xmm5[4,5],xmm4[6,7] 1096; SSE41-NEXT: paddd %xmm0, %xmm5 1097; SSE41-NEXT: movdqa %xmm5, %xmm4 1098; SSE41-NEXT: psrld $31, %xmm4 1099; SSE41-NEXT: psrad $2, %xmm5 1100; SSE41-NEXT: paddd %xmm4, %xmm5 1101; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [7,7,7,7] 1102; SSE41-NEXT: pmulld %xmm4, %xmm5 1103; SSE41-NEXT: psubd %xmm5, %xmm0 1104; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3] 1105; SSE41-NEXT: pmuldq %xmm3, %xmm5 1106; SSE41-NEXT: pmuldq %xmm1, %xmm2 1107; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 1108; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm5[2,3],xmm2[4,5],xmm5[6,7] 1109; SSE41-NEXT: paddd %xmm1, %xmm2 1110; SSE41-NEXT: movdqa %xmm2, %xmm3 1111; SSE41-NEXT: psrld $31, %xmm3 1112; SSE41-NEXT: psrad $2, %xmm2 1113; SSE41-NEXT: paddd %xmm3, %xmm2 1114; SSE41-NEXT: pmulld %xmm4, %xmm2 1115; SSE41-NEXT: psubd %xmm2, %xmm1 1116; SSE41-NEXT: retq 1117; 1118; SSE-LABEL: test11: 1119; SSE: # BB#0: 1120; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027] 1121; SSE-NEXT: movdqa %xmm2, %xmm3 1122; SSE-NEXT: psrad $31, %xmm3 1123; SSE-NEXT: movdqa %xmm3, %xmm4 1124; SSE-NEXT: pand %xmm0, %xmm4 1125; SSE-NEXT: movdqa %xmm0, %xmm6 1126; SSE-NEXT: psrad $31, %xmm6 1127; SSE-NEXT: pand %xmm2, %xmm6 1128; SSE-NEXT: paddd %xmm4, %xmm6 1129; SSE-NEXT: movdqa %xmm0, %xmm4 1130; SSE-NEXT: pmuludq %xmm2, %xmm4 1131; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[1,3,2,3] 1132; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3] 1133; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3] 1134; SSE-NEXT: pmuludq %xmm5, %xmm4 1135; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 1136; SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1] 1137; SSE-NEXT: psubd %xmm6, %xmm7 1138; SSE-NEXT: paddd %xmm0, %xmm7 1139; SSE-NEXT: movdqa %xmm7, %xmm4 1140; SSE-NEXT: psrld $31, %xmm4 1141; SSE-NEXT: psrad $2, %xmm7 1142; SSE-NEXT: paddd %xmm4, %xmm7 1143; SSE-NEXT: movdqa {{.*#+}} xmm4 = [7,7,7,7] 1144; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3] 1145; SSE-NEXT: pmuludq %xmm4, %xmm7 1146; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,2,2,3] 1147; SSE-NEXT: pmuludq %xmm4, %xmm6 1148; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3] 1149; SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] 1150; SSE-NEXT: psubd %xmm7, %xmm0 1151; SSE-NEXT: pand %xmm1, %xmm3 1152; SSE-NEXT: movdqa %xmm1, %xmm6 1153; SSE-NEXT: psrad $31, %xmm6 1154; SSE-NEXT: pand %xmm2, %xmm6 1155; SSE-NEXT: paddd %xmm3, %xmm6 1156; SSE-NEXT: pmuludq %xmm1, %xmm2 1157; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 1158; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] 1159; SSE-NEXT: pmuludq %xmm5, %xmm3 1160; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 1161; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1162; SSE-NEXT: psubd %xmm6, %xmm2 1163; SSE-NEXT: paddd %xmm1, %xmm2 1164; SSE-NEXT: movdqa %xmm2, %xmm3 1165; SSE-NEXT: psrld $31, %xmm3 1166; SSE-NEXT: psrad $2, %xmm2 1167; SSE-NEXT: paddd %xmm3, %xmm2 1168; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3] 1169; SSE-NEXT: pmuludq %xmm4, %xmm2 1170; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] 1171; SSE-NEXT: pmuludq %xmm4, %xmm3 1172; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] 1173; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 1174; SSE-NEXT: psubd %xmm2, %xmm1 1175; SSE-NEXT: retq 1176; 1177; AVX-LABEL: test11: 1178; AVX: # BB#0: 1179; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 1180; AVX-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7] 1181; AVX-NEXT: vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7] 1182; AVX-NEXT: vpmuldq %ymm2, %ymm3, %ymm2 1183; AVX-NEXT: vpmuldq %ymm1, %ymm0, %ymm1 1184; AVX-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7] 1185; AVX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7] 1186; AVX-NEXT: vpaddd %ymm0, %ymm1, %ymm1 1187; AVX-NEXT: vpsrld $31, %ymm1, %ymm2 1188; AVX-NEXT: vpsrad $2, %ymm1, %ymm1 1189; AVX-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1190; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 1191; AVX-NEXT: vpmulld %ymm2, %ymm1, %ymm1 1192; AVX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 1193; AVX-NEXT: retq 1194 %rem = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7> 1195 ret <8 x i32> %rem 1196} 1197 1198define <2 x i16> @test12() #0 { 1199; SSE41-LABEL: test12: 1200; SSE41: # BB#0: 1201; SSE41-NEXT: xorps %xmm0, %xmm0 1202; SSE41-NEXT: retq 1203; 1204; SSE-LABEL: test12: 1205; SSE: # BB#0: 1206; SSE-NEXT: xorps %xmm0, %xmm0 1207; SSE-NEXT: retq 1208; 1209; AVX-LABEL: test12: 1210; AVX: # BB#0: 1211; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 1212; AVX-NEXT: retq 1213 %I8 = insertelement <2 x i16> zeroinitializer, i16 -1, i32 0 1214 %I9 = insertelement <2 x i16> %I8, i16 -1, i32 1 1215 %B9 = urem <2 x i16> %I9, %I9 1216 ret <2 x i16> %B9 1217} 1218 1219define <4 x i32> @PR20355(<4 x i32> %a) #0 { 1220; SSE41-LABEL: PR20355: 1221; SSE41: # BB#0: # %entry 1222; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766] 1223; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1224; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1225; SSE41-NEXT: pmuldq %xmm2, %xmm3 1226; SSE41-NEXT: pmuldq %xmm1, %xmm0 1227; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 1228; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] 1229; SSE41-NEXT: movdqa %xmm1, %xmm0 1230; SSE41-NEXT: psrld $31, %xmm0 1231; SSE41-NEXT: paddd %xmm1, %xmm0 1232; SSE41-NEXT: retq 1233; 1234; SSE-LABEL: PR20355: 1235; SSE: # BB#0: # %entry 1236; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766] 1237; SSE-NEXT: movdqa %xmm1, %xmm2 1238; SSE-NEXT: psrad $31, %xmm2 1239; SSE-NEXT: pand %xmm0, %xmm2 1240; SSE-NEXT: movdqa %xmm0, %xmm3 1241; SSE-NEXT: psrad $31, %xmm3 1242; SSE-NEXT: pand %xmm1, %xmm3 1243; SSE-NEXT: paddd %xmm2, %xmm3 1244; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 1245; SSE-NEXT: pmuludq %xmm1, %xmm0 1246; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] 1247; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] 1248; SSE-NEXT: pmuludq %xmm2, %xmm0 1249; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 1250; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] 1251; SSE-NEXT: psubd %xmm3, %xmm4 1252; SSE-NEXT: movdqa %xmm4, %xmm0 1253; SSE-NEXT: psrld $31, %xmm0 1254; SSE-NEXT: paddd %xmm4, %xmm0 1255; SSE-NEXT: retq 1256; 1257; AVX-LABEL: PR20355: 1258; AVX: # BB#0: # %entry 1259; AVX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 1260; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] 1261; AVX-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 1262; AVX-NEXT: vpmuldq %xmm2, %xmm3, %xmm2 1263; AVX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 1264; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1265; AVX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 1266; AVX-NEXT: vpsrld $31, %xmm0, %xmm1 1267; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1268; AVX-NEXT: retq 1269entry: 1270 %sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 1271 ret <4 x i32> %sdiv 1272} 1273 1274attributes #0 = { nounwind } 1275