1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 4 5declare i4 @llvm.smul.fix.i4 (i4, i4, i32) 6declare i32 @llvm.smul.fix.i32 (i32, i32, i32) 7declare i64 @llvm.smul.fix.i64 (i64, i64, i32) 8declare <4 x i32> @llvm.smul.fix.v4i32(<4 x i32>, <4 x i32>, i32) 9 10define i32 @func(i32 %x, i32 %y) nounwind { 11; X64-LABEL: func: 12; X64: # %bb.0: 13; X64-NEXT: movslq %esi, %rax 14; X64-NEXT: movslq %edi, %rcx 15; X64-NEXT: imulq %rax, %rcx 16; X64-NEXT: movq %rcx, %rax 17; X64-NEXT: shrq $32, %rax 18; X64-NEXT: shldl $30, %ecx, %eax 19; X64-NEXT: # kill: def $eax killed $eax killed $rax 20; X64-NEXT: retq 21; 22; X86-LABEL: func: 23; X86: # %bb.0: 24; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 25; X86-NEXT: imull {{[0-9]+}}(%esp) 26; X86-NEXT: shrdl $2, %edx, %eax 27; X86-NEXT: retl 28 %tmp = call i32 @llvm.smul.fix.i32(i32 %x, i32 %y, i32 2) 29 ret i32 %tmp 30} 31 32define i64 @func2(i64 %x, i64 %y) { 33; X64-LABEL: func2: 34; X64: # %bb.0: 35; X64-NEXT: movq %rdi, %rax 36; X64-NEXT: imulq %rsi 37; X64-NEXT: shrdq $2, %rdx, %rax 38; X64-NEXT: retq 39; 40; X86-LABEL: func2: 41; X86: # %bb.0: 42; X86-NEXT: pushl %ebp 43; X86-NEXT: .cfi_def_cfa_offset 8 44; X86-NEXT: pushl %ebx 45; X86-NEXT: .cfi_def_cfa_offset 12 46; X86-NEXT: pushl %edi 47; X86-NEXT: .cfi_def_cfa_offset 16 48; X86-NEXT: pushl %esi 49; X86-NEXT: .cfi_def_cfa_offset 20 50; X86-NEXT: .cfi_offset %esi, -20 51; X86-NEXT: .cfi_offset %edi, -16 52; X86-NEXT: .cfi_offset %ebx, -12 53; X86-NEXT: .cfi_offset %ebp, -8 54; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 55; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 56; X86-NEXT: movl %ebx, %eax 57; X86-NEXT: mull {{[0-9]+}}(%esp) 58; X86-NEXT: movl %edx, %esi 59; X86-NEXT: movl %eax, %edi 60; X86-NEXT: movl %ebx, %eax 61; X86-NEXT: mull %ecx 62; X86-NEXT: movl %eax, %ebx 63; X86-NEXT: movl %edx, %ebp 64; X86-NEXT: addl %edi, %ebp 65; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 66; X86-NEXT: adcl $0, %esi 67; X86-NEXT: movl %edi, %eax 68; X86-NEXT: mull %ecx 69; X86-NEXT: addl %ebp, %eax 70; X86-NEXT: adcl %esi, %edx 71; X86-NEXT: movl %edi, %esi 72; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 73; X86-NEXT: addl %edx, %esi 74; X86-NEXT: movl %esi, %ebp 75; X86-NEXT: subl %ecx, %ebp 76; X86-NEXT: testl %edi, %edi 77; X86-NEXT: cmovnsl %esi, %ebp 78; X86-NEXT: movl %ebp, %edx 79; X86-NEXT: subl {{[0-9]+}}(%esp), %edx 80; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) 81; X86-NEXT: cmovnsl %ebp, %edx 82; X86-NEXT: shldl $30, %eax, %edx 83; X86-NEXT: shldl $30, %ebx, %eax 84; X86-NEXT: popl %esi 85; X86-NEXT: .cfi_def_cfa_offset 16 86; X86-NEXT: popl %edi 87; X86-NEXT: .cfi_def_cfa_offset 12 88; X86-NEXT: popl %ebx 89; X86-NEXT: .cfi_def_cfa_offset 8 90; X86-NEXT: popl %ebp 91; X86-NEXT: .cfi_def_cfa_offset 4 92; X86-NEXT: retl 93 %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2) 94 ret i64 %tmp 95} 96 97define i4 @func3(i4 %x, i4 %y) nounwind { 98; X64-LABEL: func3: 99; X64: # %bb.0: 100; X64-NEXT: shlb $4, %dil 101; X64-NEXT: sarb $4, %dil 102; X64-NEXT: shlb $4, %sil 103; X64-NEXT: sarb $4, %sil 104; X64-NEXT: movsbl %sil, %ecx 105; X64-NEXT: movsbl %dil, %eax 106; X64-NEXT: imull %ecx, %eax 107; X64-NEXT: movl %eax, %ecx 108; X64-NEXT: shrb $2, %cl 109; X64-NEXT: shrl $8, %eax 110; X64-NEXT: shlb $6, %al 111; X64-NEXT: orb %cl, %al 112; X64-NEXT: # kill: def $al killed $al killed $eax 113; X64-NEXT: retq 114; 115; X86-LABEL: func3: 116; X86: # %bb.0: 117; X86-NEXT: movb {{[0-9]+}}(%esp), %al 118; X86-NEXT: shlb $4, %al 119; X86-NEXT: sarb $4, %al 120; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 121; X86-NEXT: shlb $4, %cl 122; X86-NEXT: sarb $4, %cl 123; X86-NEXT: movsbl %cl, %ecx 124; X86-NEXT: movsbl %al, %eax 125; X86-NEXT: imull %ecx, %eax 126; X86-NEXT: shlb $6, %ah 127; X86-NEXT: shrb $2, %al 128; X86-NEXT: orb %ah, %al 129; X86-NEXT: # kill: def $al killed $al killed $eax 130; X86-NEXT: retl 131 %tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 2) 132 ret i4 %tmp 133} 134 135define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { 136; X64-LABEL: vec: 137; X64: # %bb.0: 138; X64-NEXT: pxor %xmm2, %xmm2 139; X64-NEXT: pxor %xmm3, %xmm3 140; X64-NEXT: pcmpgtd %xmm1, %xmm3 141; X64-NEXT: pand %xmm0, %xmm3 142; X64-NEXT: pcmpgtd %xmm0, %xmm2 143; X64-NEXT: pand %xmm1, %xmm2 144; X64-NEXT: paddd %xmm3, %xmm2 145; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] 146; X64-NEXT: pmuludq %xmm1, %xmm0 147; X64-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] 148; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 149; X64-NEXT: pmuludq %xmm3, %xmm1 150; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3] 151; X64-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] 152; X64-NEXT: psubd %xmm2, %xmm4 153; X64-NEXT: pslld $30, %xmm4 154; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 155; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 156; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 157; X64-NEXT: psrld $2, %xmm0 158; X64-NEXT: por %xmm4, %xmm0 159; X64-NEXT: retq 160; 161; X86-LABEL: vec: 162; X86: # %bb.0: 163; X86-NEXT: pushl %ebp 164; X86-NEXT: pushl %ebx 165; X86-NEXT: pushl %edi 166; X86-NEXT: pushl %esi 167; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 168; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 169; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 170; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 171; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 172; X86-NEXT: imull {{[0-9]+}}(%esp) 173; X86-NEXT: movl %edx, %ebp 174; X86-NEXT: shldl $30, %eax, %ebp 175; X86-NEXT: movl %ebx, %eax 176; X86-NEXT: imull {{[0-9]+}}(%esp) 177; X86-NEXT: movl %edx, %ebx 178; X86-NEXT: shldl $30, %eax, %ebx 179; X86-NEXT: movl %edi, %eax 180; X86-NEXT: imull {{[0-9]+}}(%esp) 181; X86-NEXT: movl %edx, %edi 182; X86-NEXT: shldl $30, %eax, %edi 183; X86-NEXT: movl %esi, %eax 184; X86-NEXT: imull {{[0-9]+}}(%esp) 185; X86-NEXT: shldl $30, %eax, %edx 186; X86-NEXT: movl %edx, 12(%ecx) 187; X86-NEXT: movl %edi, 8(%ecx) 188; X86-NEXT: movl %ebx, 4(%ecx) 189; X86-NEXT: movl %ebp, (%ecx) 190; X86-NEXT: movl %ecx, %eax 191; X86-NEXT: popl %esi 192; X86-NEXT: popl %edi 193; X86-NEXT: popl %ebx 194; X86-NEXT: popl %ebp 195; X86-NEXT: retl $4 196 %tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2) 197 ret <4 x i32> %tmp 198} 199 200; These result in regular integer multiplication 201define i32 @func4(i32 %x, i32 %y) nounwind { 202; X64-LABEL: func4: 203; X64: # %bb.0: 204; X64-NEXT: movl %edi, %eax 205; X64-NEXT: imull %esi, %eax 206; X64-NEXT: retq 207; 208; X86-LABEL: func4: 209; X86: # %bb.0: 210; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 211; X86-NEXT: imull {{[0-9]+}}(%esp), %eax 212; X86-NEXT: retl 213 %tmp = call i32 @llvm.smul.fix.i32(i32 %x, i32 %y, i32 0) 214 ret i32 %tmp 215} 216 217define i64 @func5(i64 %x, i64 %y) { 218; X64-LABEL: func5: 219; X64: # %bb.0: 220; X64-NEXT: movq %rdi, %rax 221; X64-NEXT: imulq %rsi, %rax 222; X64-NEXT: retq 223; 224; X86-LABEL: func5: 225; X86: # %bb.0: 226; X86-NEXT: pushl %esi 227; X86-NEXT: .cfi_def_cfa_offset 8 228; X86-NEXT: .cfi_offset %esi, -8 229; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 230; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 231; X86-NEXT: movl %ecx, %eax 232; X86-NEXT: mull %esi 233; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 234; X86-NEXT: addl %ecx, %edx 235; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 236; X86-NEXT: addl %esi, %edx 237; X86-NEXT: popl %esi 238; X86-NEXT: .cfi_def_cfa_offset 4 239; X86-NEXT: retl 240 %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 0) 241 ret i64 %tmp 242} 243 244define i4 @func6(i4 %x, i4 %y) nounwind { 245; X64-LABEL: func6: 246; X64: # %bb.0: 247; X64-NEXT: movl %edi, %eax 248; X64-NEXT: shlb $4, %al 249; X64-NEXT: sarb $4, %al 250; X64-NEXT: shlb $4, %sil 251; X64-NEXT: sarb $4, %sil 252; X64-NEXT: # kill: def $al killed $al killed $eax 253; X64-NEXT: mulb %sil 254; X64-NEXT: retq 255; 256; X86-LABEL: func6: 257; X86: # %bb.0: 258; X86-NEXT: movb {{[0-9]+}}(%esp), %al 259; X86-NEXT: shlb $4, %al 260; X86-NEXT: sarb $4, %al 261; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 262; X86-NEXT: shlb $4, %cl 263; X86-NEXT: sarb $4, %cl 264; X86-NEXT: mulb %cl 265; X86-NEXT: retl 266 %tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 0) 267 ret i4 %tmp 268} 269 270define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind { 271; X64-LABEL: vec2: 272; X64: # %bb.0: 273; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 274; X64-NEXT: pmuludq %xmm1, %xmm0 275; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 276; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 277; X64-NEXT: pmuludq %xmm2, %xmm1 278; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 279; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 280; X64-NEXT: retq 281; 282; X86-LABEL: vec2: 283; X86: # %bb.0: 284; X86-NEXT: pushl %edi 285; X86-NEXT: pushl %esi 286; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 287; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 288; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 289; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 290; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 291; X86-NEXT: imull {{[0-9]+}}(%esp), %edi 292; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 293; X86-NEXT: imull {{[0-9]+}}(%esp), %edx 294; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 295; X86-NEXT: movl %ecx, 12(%eax) 296; X86-NEXT: movl %edx, 8(%eax) 297; X86-NEXT: movl %esi, 4(%eax) 298; X86-NEXT: movl %edi, (%eax) 299; X86-NEXT: popl %esi 300; X86-NEXT: popl %edi 301; X86-NEXT: retl $4 302 %tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0) 303 ret <4 x i32> %tmp 304} 305 306define i64 @func7(i64 %x, i64 %y) nounwind { 307; X64-LABEL: func7: 308; X64: # %bb.0: 309; X64-NEXT: movq %rdi, %rax 310; X64-NEXT: imulq %rsi 311; X64-NEXT: shrdq $32, %rdx, %rax 312; X64-NEXT: retq 313; 314; X86-LABEL: func7: 315; X86: # %bb.0: 316; X86-NEXT: pushl %ebp 317; X86-NEXT: pushl %ebx 318; X86-NEXT: pushl %edi 319; X86-NEXT: pushl %esi 320; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 321; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 322; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 323; X86-NEXT: movl %ecx, %eax 324; X86-NEXT: mull {{[0-9]+}}(%esp) 325; X86-NEXT: movl %edx, %edi 326; X86-NEXT: movl %eax, %ebx 327; X86-NEXT: movl %ecx, %eax 328; X86-NEXT: mull %ebp 329; X86-NEXT: addl %edx, %ebx 330; X86-NEXT: adcl $0, %edi 331; X86-NEXT: movl %esi, %eax 332; X86-NEXT: mull %ebp 333; X86-NEXT: addl %ebx, %eax 334; X86-NEXT: adcl %edi, %edx 335; X86-NEXT: movl %esi, %edi 336; X86-NEXT: imull {{[0-9]+}}(%esp), %edi 337; X86-NEXT: addl %edx, %edi 338; X86-NEXT: movl %edi, %ebx 339; X86-NEXT: subl %ebp, %ebx 340; X86-NEXT: testl %esi, %esi 341; X86-NEXT: cmovnsl %edi, %ebx 342; X86-NEXT: movl %ebx, %edx 343; X86-NEXT: subl %ecx, %edx 344; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) 345; X86-NEXT: cmovnsl %ebx, %edx 346; X86-NEXT: popl %esi 347; X86-NEXT: popl %edi 348; X86-NEXT: popl %ebx 349; X86-NEXT: popl %ebp 350; X86-NEXT: retl 351 %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 32) 352 ret i64 %tmp 353} 354 355define i64 @func8(i64 %x, i64 %y) nounwind { 356; X64-LABEL: func8: 357; X64: # %bb.0: 358; X64-NEXT: movq %rdi, %rax 359; X64-NEXT: imulq %rsi 360; X64-NEXT: shrdq $63, %rdx, %rax 361; X64-NEXT: retq 362; 363; X86-LABEL: func8: 364; X86: # %bb.0: 365; X86-NEXT: pushl %ebp 366; X86-NEXT: pushl %ebx 367; X86-NEXT: pushl %edi 368; X86-NEXT: pushl %esi 369; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 370; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 371; X86-NEXT: movl %ecx, %eax 372; X86-NEXT: mull {{[0-9]+}}(%esp) 373; X86-NEXT: movl %edx, %edi 374; X86-NEXT: movl %eax, %ebx 375; X86-NEXT: movl %ecx, %eax 376; X86-NEXT: mull {{[0-9]+}}(%esp) 377; X86-NEXT: addl %edx, %ebx 378; X86-NEXT: adcl $0, %edi 379; X86-NEXT: movl %esi, %eax 380; X86-NEXT: imull {{[0-9]+}}(%esp) 381; X86-NEXT: movl %edx, %ebp 382; X86-NEXT: movl %eax, %ecx 383; X86-NEXT: movl %esi, %eax 384; X86-NEXT: mull {{[0-9]+}}(%esp) 385; X86-NEXT: addl %ebx, %eax 386; X86-NEXT: adcl %edi, %edx 387; X86-NEXT: adcl $0, %ebp 388; X86-NEXT: addl %ecx, %edx 389; X86-NEXT: adcl $0, %ebp 390; X86-NEXT: movl %edx, %ecx 391; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx 392; X86-NEXT: movl %ebp, %esi 393; X86-NEXT: sbbl $0, %esi 394; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) 395; X86-NEXT: cmovnsl %ebp, %esi 396; X86-NEXT: cmovnsl %edx, %ecx 397; X86-NEXT: movl %ecx, %edi 398; X86-NEXT: subl {{[0-9]+}}(%esp), %edi 399; X86-NEXT: movl %esi, %edx 400; X86-NEXT: sbbl $0, %edx 401; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) 402; X86-NEXT: cmovnsl %esi, %edx 403; X86-NEXT: cmovnsl %ecx, %edi 404; X86-NEXT: shldl $1, %edi, %edx 405; X86-NEXT: shrdl $31, %edi, %eax 406; X86-NEXT: popl %esi 407; X86-NEXT: popl %edi 408; X86-NEXT: popl %ebx 409; X86-NEXT: popl %ebp 410; X86-NEXT: retl 411 %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 63) 412 ret i64 %tmp 413} 414