1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86 4 5declare i4 @llvm.umul.fix.i4 (i4, i4, i32) 6declare i32 @llvm.umul.fix.i32 (i32, i32, i32) 7declare i64 @llvm.umul.fix.i64 (i64, i64, i32) 8declare <4 x i32> @llvm.umul.fix.v4i32(<4 x i32>, <4 x i32>, i32) 9 10define i32 @func(i32 %x, i32 %y) nounwind { 11; X64-LABEL: func: 12; X64: # %bb.0: 13; X64-NEXT: movl %esi, %eax 14; X64-NEXT: movl %edi, %ecx 15; X64-NEXT: imulq %rax, %rcx 16; X64-NEXT: movq %rcx, %rax 17; X64-NEXT: shrq $32, %rax 18; X64-NEXT: shldl $30, %ecx, %eax 19; X64-NEXT: # kill: def $eax killed $eax killed $rax 20; X64-NEXT: retq 21; 22; X86-LABEL: func: 23; X86: # %bb.0: 24; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 25; X86-NEXT: mull {{[0-9]+}}(%esp) 26; X86-NEXT: shrdl $2, %edx, %eax 27; X86-NEXT: retl 28 %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 2) 29 ret i32 %tmp 30} 31 32define i64 @func2(i64 %x, i64 %y) nounwind { 33; X64-LABEL: func2: 34; X64: # %bb.0: 35; X64-NEXT: movq %rdi, %rax 36; X64-NEXT: mulq %rsi 37; X64-NEXT: shrdq $2, %rdx, %rax 38; X64-NEXT: retq 39; 40; X86-LABEL: func2: 41; X86: # %bb.0: 42; X86-NEXT: pushl %ebp 43; X86-NEXT: pushl %ebx 44; X86-NEXT: pushl %edi 45; X86-NEXT: pushl %esi 46; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 47; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 48; X86-NEXT: movl %ecx, %eax 49; X86-NEXT: mull {{[0-9]+}}(%esp) 50; X86-NEXT: movl %edx, %edi 51; X86-NEXT: movl %eax, %ebx 52; X86-NEXT: movl %ecx, %eax 53; X86-NEXT: mull {{[0-9]+}}(%esp) 54; X86-NEXT: movl %eax, %ecx 55; X86-NEXT: movl %edx, %ebp 56; X86-NEXT: addl %ebx, %ebp 57; X86-NEXT: adcl $0, %edi 58; X86-NEXT: movl %esi, %eax 59; X86-NEXT: mull {{[0-9]+}}(%esp) 60; X86-NEXT: addl %ebp, %eax 61; X86-NEXT: adcl %edi, %edx 62; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 63; X86-NEXT: addl %edx, %esi 64; X86-NEXT: shldl $30, %eax, %esi 65; X86-NEXT: shldl $30, %ecx, %eax 66; X86-NEXT: movl %esi, %edx 67; X86-NEXT: popl %esi 68; X86-NEXT: popl %edi 69; X86-NEXT: popl %ebx 70; X86-NEXT: popl %ebp 71; X86-NEXT: retl 72 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 2) 73 ret i64 %tmp 74} 75 76define i4 @func3(i4 %x, i4 %y) nounwind { 77; X64-LABEL: func3: 78; X64: # %bb.0: 79; X64-NEXT: movl %edi, %eax 80; X64-NEXT: andl $15, %esi 81; X64-NEXT: andl $15, %eax 82; X64-NEXT: imull %esi, %eax 83; X64-NEXT: shrb $2, %al 84; X64-NEXT: # kill: def $al killed $al killed $eax 85; X64-NEXT: retq 86; 87; X86-LABEL: func3: 88; X86: # %bb.0: 89; X86-NEXT: movb {{[0-9]+}}(%esp), %al 90; X86-NEXT: andb $15, %al 91; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 92; X86-NEXT: andb $15, %cl 93; X86-NEXT: movzbl %cl, %ecx 94; X86-NEXT: movzbl %al, %eax 95; X86-NEXT: imull %ecx, %eax 96; X86-NEXT: shrb $2, %al 97; X86-NEXT: # kill: def $al killed $al killed $eax 98; X86-NEXT: retl 99 %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 2) 100 ret i4 %tmp 101} 102 103define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { 104; X64-LABEL: vec: 105; X64: # %bb.0: 106; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 107; X64-NEXT: pmuludq %xmm1, %xmm0 108; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3] 109; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 110; X64-NEXT: pmuludq %xmm2, %xmm1 111; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3] 112; X64-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 113; X64-NEXT: psrld $2, %xmm3 114; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 115; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 116; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 117; X64-NEXT: pslld $30, %xmm0 118; X64-NEXT: por %xmm3, %xmm0 119; X64-NEXT: retq 120; 121; X86-LABEL: vec: 122; X86: # %bb.0: 123; X86-NEXT: pushl %ebp 124; X86-NEXT: pushl %ebx 125; X86-NEXT: pushl %edi 126; X86-NEXT: pushl %esi 127; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 128; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 129; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 130; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 131; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 132; X86-NEXT: mull {{[0-9]+}}(%esp) 133; X86-NEXT: movl %edx, %ebp 134; X86-NEXT: shldl $30, %eax, %ebp 135; X86-NEXT: movl %ebx, %eax 136; X86-NEXT: mull {{[0-9]+}}(%esp) 137; X86-NEXT: movl %edx, %ebx 138; X86-NEXT: shldl $30, %eax, %ebx 139; X86-NEXT: movl %edi, %eax 140; X86-NEXT: mull {{[0-9]+}}(%esp) 141; X86-NEXT: movl %edx, %edi 142; X86-NEXT: shldl $30, %eax, %edi 143; X86-NEXT: movl %esi, %eax 144; X86-NEXT: mull {{[0-9]+}}(%esp) 145; X86-NEXT: shldl $30, %eax, %edx 146; X86-NEXT: movl %edx, 12(%ecx) 147; X86-NEXT: movl %edi, 8(%ecx) 148; X86-NEXT: movl %ebx, 4(%ecx) 149; X86-NEXT: movl %ebp, (%ecx) 150; X86-NEXT: movl %ecx, %eax 151; X86-NEXT: popl %esi 152; X86-NEXT: popl %edi 153; X86-NEXT: popl %ebx 154; X86-NEXT: popl %ebp 155; X86-NEXT: retl $4 156 %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2) 157 ret <4 x i32> %tmp 158} 159 160; These result in regular integer multiplication 161define i32 @func4(i32 %x, i32 %y) nounwind { 162; X64-LABEL: func4: 163; X64: # %bb.0: 164; X64-NEXT: movl %edi, %eax 165; X64-NEXT: imull %esi, %eax 166; X64-NEXT: retq 167; 168; X86-LABEL: func4: 169; X86: # %bb.0: 170; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 171; X86-NEXT: imull {{[0-9]+}}(%esp), %eax 172; X86-NEXT: retl 173 %tmp = call i32 @llvm.umul.fix.i32(i32 %x, i32 %y, i32 0) 174 ret i32 %tmp 175} 176 177define i64 @func5(i64 %x, i64 %y) nounwind { 178; X64-LABEL: func5: 179; X64: # %bb.0: 180; X64-NEXT: movq %rdi, %rax 181; X64-NEXT: imulq %rsi, %rax 182; X64-NEXT: retq 183; 184; X86-LABEL: func5: 185; X86: # %bb.0: 186; X86-NEXT: pushl %esi 187; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 188; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 189; X86-NEXT: movl %ecx, %eax 190; X86-NEXT: mull %esi 191; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 192; X86-NEXT: addl %ecx, %edx 193; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 194; X86-NEXT: addl %esi, %edx 195; X86-NEXT: popl %esi 196; X86-NEXT: retl 197 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 0) 198 ret i64 %tmp 199} 200 201define i4 @func6(i4 %x, i4 %y) nounwind { 202; X64-LABEL: func6: 203; X64: # %bb.0: 204; X64-NEXT: movl %edi, %eax 205; X64-NEXT: andb $15, %al 206; X64-NEXT: andb $15, %sil 207; X64-NEXT: # kill: def $al killed $al killed $eax 208; X64-NEXT: mulb %sil 209; X64-NEXT: retq 210; 211; X86-LABEL: func6: 212; X86: # %bb.0: 213; X86-NEXT: movb {{[0-9]+}}(%esp), %al 214; X86-NEXT: andb $15, %al 215; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 216; X86-NEXT: andb $15, %cl 217; X86-NEXT: mulb %cl 218; X86-NEXT: retl 219 %tmp = call i4 @llvm.umul.fix.i4(i4 %x, i4 %y, i32 0) 220 ret i4 %tmp 221} 222 223define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind { 224; X64-LABEL: vec2: 225; X64: # %bb.0: 226; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 227; X64-NEXT: pmuludq %xmm1, %xmm0 228; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 229; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 230; X64-NEXT: pmuludq %xmm2, %xmm1 231; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 232; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 233; X64-NEXT: retq 234; 235; X86-LABEL: vec2: 236; X86: # %bb.0: 237; X86-NEXT: pushl %edi 238; X86-NEXT: pushl %esi 239; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 240; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 241; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 242; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 243; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 244; X86-NEXT: imull {{[0-9]+}}(%esp), %edi 245; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 246; X86-NEXT: imull {{[0-9]+}}(%esp), %edx 247; X86-NEXT: imull {{[0-9]+}}(%esp), %ecx 248; X86-NEXT: movl %ecx, 12(%eax) 249; X86-NEXT: movl %edx, 8(%eax) 250; X86-NEXT: movl %esi, 4(%eax) 251; X86-NEXT: movl %edi, (%eax) 252; X86-NEXT: popl %esi 253; X86-NEXT: popl %edi 254; X86-NEXT: retl $4 255 %tmp = call <4 x i32> @llvm.umul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0) 256 ret <4 x i32> %tmp 257} 258 259define i64 @func7(i64 %x, i64 %y) nounwind { 260; X64-LABEL: func7: 261; X64: # %bb.0: 262; X64-NEXT: movq %rdi, %rax 263; X64-NEXT: mulq %rsi 264; X64-NEXT: shrdq $32, %rdx, %rax 265; X64-NEXT: retq 266; 267; X86-LABEL: func7: 268; X86: # %bb.0: 269; X86-NEXT: pushl %ebp 270; X86-NEXT: pushl %ebx 271; X86-NEXT: pushl %edi 272; X86-NEXT: pushl %esi 273; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 274; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 275; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 276; X86-NEXT: movl %ecx, %eax 277; X86-NEXT: mull {{[0-9]+}}(%esp) 278; X86-NEXT: movl %edx, %edi 279; X86-NEXT: movl %eax, %ebx 280; X86-NEXT: movl %ecx, %eax 281; X86-NEXT: mull %ebp 282; X86-NEXT: addl %edx, %ebx 283; X86-NEXT: adcl $0, %edi 284; X86-NEXT: movl %esi, %eax 285; X86-NEXT: mull %ebp 286; X86-NEXT: addl %ebx, %eax 287; X86-NEXT: adcl %edi, %edx 288; X86-NEXT: imull {{[0-9]+}}(%esp), %esi 289; X86-NEXT: addl %esi, %edx 290; X86-NEXT: popl %esi 291; X86-NEXT: popl %edi 292; X86-NEXT: popl %ebx 293; X86-NEXT: popl %ebp 294; X86-NEXT: retl 295 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 32) 296 ret i64 %tmp 297} 298 299define i64 @func8(i64 %x, i64 %y) nounwind { 300; X64-LABEL: func8: 301; X64: # %bb.0: 302; X64-NEXT: movq %rdi, %rax 303; X64-NEXT: mulq %rsi 304; X64-NEXT: shrdq $63, %rdx, %rax 305; X64-NEXT: retq 306; 307; X86-LABEL: func8: 308; X86: # %bb.0: 309; X86-NEXT: pushl %ebp 310; X86-NEXT: pushl %ebx 311; X86-NEXT: pushl %edi 312; X86-NEXT: pushl %esi 313; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 314; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 315; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 316; X86-NEXT: movl %ecx, %eax 317; X86-NEXT: mull %ebp 318; X86-NEXT: movl %edx, %edi 319; X86-NEXT: movl %eax, %ebx 320; X86-NEXT: movl %ecx, %eax 321; X86-NEXT: mull %esi 322; X86-NEXT: addl %edx, %ebx 323; X86-NEXT: adcl $0, %edi 324; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 325; X86-NEXT: mull %ebp 326; X86-NEXT: movl %edx, %ecx 327; X86-NEXT: movl %eax, %ebp 328; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 329; X86-NEXT: mull %esi 330; X86-NEXT: addl %ebx, %eax 331; X86-NEXT: adcl %edi, %edx 332; X86-NEXT: adcl $0, %ecx 333; X86-NEXT: addl %ebp, %edx 334; X86-NEXT: adcl $0, %ecx 335; X86-NEXT: shldl $1, %edx, %ecx 336; X86-NEXT: shrdl $31, %edx, %eax 337; X86-NEXT: movl %ecx, %edx 338; X86-NEXT: popl %esi 339; X86-NEXT: popl %edi 340; X86-NEXT: popl %ebx 341; X86-NEXT: popl %ebp 342; X86-NEXT: retl 343 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 63) 344 ret i64 %tmp 345} 346 347define i64 @func9(i64 %x, i64 %y) nounwind { 348; X64-LABEL: func9: 349; X64: # %bb.0: 350; X64-NEXT: movq %rdi, %rax 351; X64-NEXT: mulq %rsi 352; X64-NEXT: movq %rdx, %rax 353; X64-NEXT: retq 354; 355; X86-LABEL: func9: 356; X86: # %bb.0: 357; X86-NEXT: pushl %ebp 358; X86-NEXT: pushl %ebx 359; X86-NEXT: pushl %edi 360; X86-NEXT: pushl %esi 361; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 362; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 363; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 364; X86-NEXT: movl %ecx, %eax 365; X86-NEXT: mull %ebp 366; X86-NEXT: movl %edx, %edi 367; X86-NEXT: movl %eax, %ebx 368; X86-NEXT: movl %ecx, %eax 369; X86-NEXT: mull %esi 370; X86-NEXT: movl %edx, %ecx 371; X86-NEXT: addl %ebx, %ecx 372; X86-NEXT: adcl $0, %edi 373; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 374; X86-NEXT: mull %ebp 375; X86-NEXT: movl %edx, %ebx 376; X86-NEXT: movl %eax, %ebp 377; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 378; X86-NEXT: mull %esi 379; X86-NEXT: addl %ecx, %eax 380; X86-NEXT: adcl %edi, %edx 381; X86-NEXT: adcl $0, %ebx 382; X86-NEXT: addl %edx, %ebp 383; X86-NEXT: adcl $0, %ebx 384; X86-NEXT: movl %ebp, %eax 385; X86-NEXT: movl %ebx, %edx 386; X86-NEXT: popl %esi 387; X86-NEXT: popl %edi 388; X86-NEXT: popl %ebx 389; X86-NEXT: popl %ebp 390; X86-NEXT: retl 391 %tmp = call i64 @llvm.umul.fix.i64(i64 %x, i64 %y, i32 64) 392 ret i64 %tmp 393} 394