• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
3; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
4
5declare  i4  @llvm.smul.fix.i4   (i4,  i4, i32)
6declare  i32 @llvm.smul.fix.i32  (i32, i32, i32)
7declare  i64 @llvm.smul.fix.i64  (i64, i64, i32)
8declare  <4 x i32> @llvm.smul.fix.v4i32(<4 x i32>, <4 x i32>, i32)
9
10define i32 @func(i32 %x, i32 %y) nounwind {
11; X64-LABEL: func:
12; X64:       # %bb.0:
13; X64-NEXT:    movslq %esi, %rax
14; X64-NEXT:    movslq %edi, %rcx
15; X64-NEXT:    imulq %rax, %rcx
16; X64-NEXT:    movq %rcx, %rax
17; X64-NEXT:    shrq $32, %rax
18; X64-NEXT:    shldl $30, %ecx, %eax
19; X64-NEXT:    # kill: def $eax killed $eax killed $rax
20; X64-NEXT:    retq
21;
22; X86-LABEL: func:
23; X86:       # %bb.0:
24; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
25; X86-NEXT:    imull {{[0-9]+}}(%esp)
26; X86-NEXT:    shrdl $2, %edx, %eax
27; X86-NEXT:    retl
28  %tmp = call i32 @llvm.smul.fix.i32(i32 %x, i32 %y, i32 2)
29  ret i32 %tmp
30}
31
32define i64 @func2(i64 %x, i64 %y) {
33; X64-LABEL: func2:
34; X64:       # %bb.0:
35; X64-NEXT:    movq %rdi, %rax
36; X64-NEXT:    imulq %rsi
37; X64-NEXT:    shrdq $2, %rdx, %rax
38; X64-NEXT:    retq
39;
40; X86-LABEL: func2:
41; X86:       # %bb.0:
42; X86-NEXT:    pushl %ebp
43; X86-NEXT:    .cfi_def_cfa_offset 8
44; X86-NEXT:    pushl %ebx
45; X86-NEXT:    .cfi_def_cfa_offset 12
46; X86-NEXT:    pushl %edi
47; X86-NEXT:    .cfi_def_cfa_offset 16
48; X86-NEXT:    pushl %esi
49; X86-NEXT:    .cfi_def_cfa_offset 20
50; X86-NEXT:    .cfi_offset %esi, -20
51; X86-NEXT:    .cfi_offset %edi, -16
52; X86-NEXT:    .cfi_offset %ebx, -12
53; X86-NEXT:    .cfi_offset %ebp, -8
54; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
55; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
56; X86-NEXT:    movl %ebx, %eax
57; X86-NEXT:    mull {{[0-9]+}}(%esp)
58; X86-NEXT:    movl %edx, %esi
59; X86-NEXT:    movl %eax, %edi
60; X86-NEXT:    movl %ebx, %eax
61; X86-NEXT:    mull %ecx
62; X86-NEXT:    movl %eax, %ebx
63; X86-NEXT:    movl %edx, %ebp
64; X86-NEXT:    addl %edi, %ebp
65; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
66; X86-NEXT:    adcl $0, %esi
67; X86-NEXT:    movl %edi, %eax
68; X86-NEXT:    mull %ecx
69; X86-NEXT:    addl %ebp, %eax
70; X86-NEXT:    adcl %esi, %edx
71; X86-NEXT:    movl %edi, %esi
72; X86-NEXT:    imull {{[0-9]+}}(%esp), %esi
73; X86-NEXT:    addl %edx, %esi
74; X86-NEXT:    movl %esi, %ebp
75; X86-NEXT:    subl %ecx, %ebp
76; X86-NEXT:    testl %edi, %edi
77; X86-NEXT:    cmovnsl %esi, %ebp
78; X86-NEXT:    movl %ebp, %edx
79; X86-NEXT:    subl {{[0-9]+}}(%esp), %edx
80; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
81; X86-NEXT:    cmovnsl %ebp, %edx
82; X86-NEXT:    shldl $30, %eax, %edx
83; X86-NEXT:    shldl $30, %ebx, %eax
84; X86-NEXT:    popl %esi
85; X86-NEXT:    .cfi_def_cfa_offset 16
86; X86-NEXT:    popl %edi
87; X86-NEXT:    .cfi_def_cfa_offset 12
88; X86-NEXT:    popl %ebx
89; X86-NEXT:    .cfi_def_cfa_offset 8
90; X86-NEXT:    popl %ebp
91; X86-NEXT:    .cfi_def_cfa_offset 4
92; X86-NEXT:    retl
93  %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2)
94  ret i64 %tmp
95}
96
97define i4 @func3(i4 %x, i4 %y) nounwind {
98; X64-LABEL: func3:
99; X64:       # %bb.0:
100; X64-NEXT:    shlb $4, %dil
101; X64-NEXT:    sarb $4, %dil
102; X64-NEXT:    shlb $4, %sil
103; X64-NEXT:    sarb $4, %sil
104; X64-NEXT:    movsbl %sil, %ecx
105; X64-NEXT:    movsbl %dil, %eax
106; X64-NEXT:    imull %ecx, %eax
107; X64-NEXT:    movl %eax, %ecx
108; X64-NEXT:    shrb $2, %cl
109; X64-NEXT:    shrl $8, %eax
110; X64-NEXT:    shlb $6, %al
111; X64-NEXT:    orb %cl, %al
112; X64-NEXT:    # kill: def $al killed $al killed $eax
113; X64-NEXT:    retq
114;
115; X86-LABEL: func3:
116; X86:       # %bb.0:
117; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
118; X86-NEXT:    shlb $4, %al
119; X86-NEXT:    sarb $4, %al
120; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
121; X86-NEXT:    shlb $4, %cl
122; X86-NEXT:    sarb $4, %cl
123; X86-NEXT:    movsbl %cl, %ecx
124; X86-NEXT:    movsbl %al, %eax
125; X86-NEXT:    imull %ecx, %eax
126; X86-NEXT:    shlb $6, %ah
127; X86-NEXT:    shrb $2, %al
128; X86-NEXT:    orb %ah, %al
129; X86-NEXT:    # kill: def $al killed $al killed $eax
130; X86-NEXT:    retl
131  %tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 2)
132  ret i4 %tmp
133}
134
135define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
136; X64-LABEL: vec:
137; X64:       # %bb.0:
138; X64-NEXT:    pxor %xmm2, %xmm2
139; X64-NEXT:    pxor %xmm3, %xmm3
140; X64-NEXT:    pcmpgtd %xmm1, %xmm3
141; X64-NEXT:    pand %xmm0, %xmm3
142; X64-NEXT:    pcmpgtd %xmm0, %xmm2
143; X64-NEXT:    pand %xmm1, %xmm2
144; X64-NEXT:    paddd %xmm3, %xmm2
145; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
146; X64-NEXT:    pmuludq %xmm1, %xmm0
147; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
148; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
149; X64-NEXT:    pmuludq %xmm3, %xmm1
150; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,3,2,3]
151; X64-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
152; X64-NEXT:    psubd %xmm2, %xmm4
153; X64-NEXT:    pslld $30, %xmm4
154; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
155; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
156; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
157; X64-NEXT:    psrld $2, %xmm0
158; X64-NEXT:    por %xmm4, %xmm0
159; X64-NEXT:    retq
160;
161; X86-LABEL: vec:
162; X86:       # %bb.0:
163; X86-NEXT:    pushl %ebp
164; X86-NEXT:    pushl %ebx
165; X86-NEXT:    pushl %edi
166; X86-NEXT:    pushl %esi
167; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
168; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
169; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
170; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
171; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
172; X86-NEXT:    imull {{[0-9]+}}(%esp)
173; X86-NEXT:    movl %edx, %ebp
174; X86-NEXT:    shldl $30, %eax, %ebp
175; X86-NEXT:    movl %ebx, %eax
176; X86-NEXT:    imull {{[0-9]+}}(%esp)
177; X86-NEXT:    movl %edx, %ebx
178; X86-NEXT:    shldl $30, %eax, %ebx
179; X86-NEXT:    movl %edi, %eax
180; X86-NEXT:    imull {{[0-9]+}}(%esp)
181; X86-NEXT:    movl %edx, %edi
182; X86-NEXT:    shldl $30, %eax, %edi
183; X86-NEXT:    movl %esi, %eax
184; X86-NEXT:    imull {{[0-9]+}}(%esp)
185; X86-NEXT:    shldl $30, %eax, %edx
186; X86-NEXT:    movl %edx, 12(%ecx)
187; X86-NEXT:    movl %edi, 8(%ecx)
188; X86-NEXT:    movl %ebx, 4(%ecx)
189; X86-NEXT:    movl %ebp, (%ecx)
190; X86-NEXT:    movl %ecx, %eax
191; X86-NEXT:    popl %esi
192; X86-NEXT:    popl %edi
193; X86-NEXT:    popl %ebx
194; X86-NEXT:    popl %ebp
195; X86-NEXT:    retl $4
196  %tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 2)
197  ret <4 x i32> %tmp
198}
199
200; These result in regular integer multiplication
201define i32 @func4(i32 %x, i32 %y) nounwind {
202; X64-LABEL: func4:
203; X64:       # %bb.0:
204; X64-NEXT:    movl %edi, %eax
205; X64-NEXT:    imull %esi, %eax
206; X64-NEXT:    retq
207;
208; X86-LABEL: func4:
209; X86:       # %bb.0:
210; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
211; X86-NEXT:    imull {{[0-9]+}}(%esp), %eax
212; X86-NEXT:    retl
213  %tmp = call i32 @llvm.smul.fix.i32(i32 %x, i32 %y, i32 0)
214  ret i32 %tmp
215}
216
217define i64 @func5(i64 %x, i64 %y) {
218; X64-LABEL: func5:
219; X64:       # %bb.0:
220; X64-NEXT:    movq %rdi, %rax
221; X64-NEXT:    imulq %rsi, %rax
222; X64-NEXT:    retq
223;
224; X86-LABEL: func5:
225; X86:       # %bb.0:
226; X86-NEXT:    pushl %esi
227; X86-NEXT:    .cfi_def_cfa_offset 8
228; X86-NEXT:    .cfi_offset %esi, -8
229; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
230; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
231; X86-NEXT:    movl %ecx, %eax
232; X86-NEXT:    mull %esi
233; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
234; X86-NEXT:    addl %ecx, %edx
235; X86-NEXT:    imull {{[0-9]+}}(%esp), %esi
236; X86-NEXT:    addl %esi, %edx
237; X86-NEXT:    popl %esi
238; X86-NEXT:    .cfi_def_cfa_offset 4
239; X86-NEXT:    retl
240  %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 0)
241  ret i64 %tmp
242}
243
244define i4 @func6(i4 %x, i4 %y) nounwind {
245; X64-LABEL: func6:
246; X64:       # %bb.0:
247; X64-NEXT:    movl %edi, %eax
248; X64-NEXT:    shlb $4, %al
249; X64-NEXT:    sarb $4, %al
250; X64-NEXT:    shlb $4, %sil
251; X64-NEXT:    sarb $4, %sil
252; X64-NEXT:    # kill: def $al killed $al killed $eax
253; X64-NEXT:    mulb %sil
254; X64-NEXT:    retq
255;
256; X86-LABEL: func6:
257; X86:       # %bb.0:
258; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
259; X86-NEXT:    shlb $4, %al
260; X86-NEXT:    sarb $4, %al
261; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
262; X86-NEXT:    shlb $4, %cl
263; X86-NEXT:    sarb $4, %cl
264; X86-NEXT:    mulb %cl
265; X86-NEXT:    retl
266  %tmp = call i4 @llvm.smul.fix.i4(i4 %x, i4 %y, i32 0)
267  ret i4 %tmp
268}
269
270define <4 x i32> @vec2(<4 x i32> %x, <4 x i32> %y) nounwind {
271; X64-LABEL: vec2:
272; X64:       # %bb.0:
273; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
274; X64-NEXT:    pmuludq %xmm1, %xmm0
275; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
276; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
277; X64-NEXT:    pmuludq %xmm2, %xmm1
278; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
279; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
280; X64-NEXT:    retq
281;
282; X86-LABEL: vec2:
283; X86:       # %bb.0:
284; X86-NEXT:    pushl %edi
285; X86-NEXT:    pushl %esi
286; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
287; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
288; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
289; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
290; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
291; X86-NEXT:    imull {{[0-9]+}}(%esp), %edi
292; X86-NEXT:    imull {{[0-9]+}}(%esp), %esi
293; X86-NEXT:    imull {{[0-9]+}}(%esp), %edx
294; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
295; X86-NEXT:    movl %ecx, 12(%eax)
296; X86-NEXT:    movl %edx, 8(%eax)
297; X86-NEXT:    movl %esi, 4(%eax)
298; X86-NEXT:    movl %edi, (%eax)
299; X86-NEXT:    popl %esi
300; X86-NEXT:    popl %edi
301; X86-NEXT:    retl $4
302  %tmp = call <4 x i32> @llvm.smul.fix.v4i32(<4 x i32> %x, <4 x i32> %y, i32 0)
303  ret <4 x i32> %tmp
304}
305
306define i64 @func7(i64 %x, i64 %y) nounwind {
307; X64-LABEL: func7:
308; X64:       # %bb.0:
309; X64-NEXT:    movq %rdi, %rax
310; X64-NEXT:    imulq %rsi
311; X64-NEXT:    shrdq $32, %rdx, %rax
312; X64-NEXT:    retq
313;
314; X86-LABEL: func7:
315; X86:       # %bb.0:
316; X86-NEXT:    pushl %ebp
317; X86-NEXT:    pushl %ebx
318; X86-NEXT:    pushl %edi
319; X86-NEXT:    pushl %esi
320; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
321; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
322; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
323; X86-NEXT:    movl %ecx, %eax
324; X86-NEXT:    mull {{[0-9]+}}(%esp)
325; X86-NEXT:    movl %edx, %edi
326; X86-NEXT:    movl %eax, %ebx
327; X86-NEXT:    movl %ecx, %eax
328; X86-NEXT:    mull %ebp
329; X86-NEXT:    addl %edx, %ebx
330; X86-NEXT:    adcl $0, %edi
331; X86-NEXT:    movl %esi, %eax
332; X86-NEXT:    mull %ebp
333; X86-NEXT:    addl %ebx, %eax
334; X86-NEXT:    adcl %edi, %edx
335; X86-NEXT:    movl %esi, %edi
336; X86-NEXT:    imull {{[0-9]+}}(%esp), %edi
337; X86-NEXT:    addl %edx, %edi
338; X86-NEXT:    movl %edi, %ebx
339; X86-NEXT:    subl %ebp, %ebx
340; X86-NEXT:    testl %esi, %esi
341; X86-NEXT:    cmovnsl %edi, %ebx
342; X86-NEXT:    movl %ebx, %edx
343; X86-NEXT:    subl %ecx, %edx
344; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
345; X86-NEXT:    cmovnsl %ebx, %edx
346; X86-NEXT:    popl %esi
347; X86-NEXT:    popl %edi
348; X86-NEXT:    popl %ebx
349; X86-NEXT:    popl %ebp
350; X86-NEXT:    retl
351  %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 32)
352  ret i64 %tmp
353}
354
355define i64 @func8(i64 %x, i64 %y) nounwind {
356; X64-LABEL: func8:
357; X64:       # %bb.0:
358; X64-NEXT:    movq %rdi, %rax
359; X64-NEXT:    imulq %rsi
360; X64-NEXT:    shrdq $63, %rdx, %rax
361; X64-NEXT:    retq
362;
363; X86-LABEL: func8:
364; X86:       # %bb.0:
365; X86-NEXT:    pushl %ebp
366; X86-NEXT:    pushl %ebx
367; X86-NEXT:    pushl %edi
368; X86-NEXT:    pushl %esi
369; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
370; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
371; X86-NEXT:    movl %ecx, %eax
372; X86-NEXT:    mull {{[0-9]+}}(%esp)
373; X86-NEXT:    movl %edx, %edi
374; X86-NEXT:    movl %eax, %ebx
375; X86-NEXT:    movl %ecx, %eax
376; X86-NEXT:    mull {{[0-9]+}}(%esp)
377; X86-NEXT:    addl %edx, %ebx
378; X86-NEXT:    adcl $0, %edi
379; X86-NEXT:    movl %esi, %eax
380; X86-NEXT:    imull {{[0-9]+}}(%esp)
381; X86-NEXT:    movl %edx, %ebp
382; X86-NEXT:    movl %eax, %ecx
383; X86-NEXT:    movl %esi, %eax
384; X86-NEXT:    mull {{[0-9]+}}(%esp)
385; X86-NEXT:    addl %ebx, %eax
386; X86-NEXT:    adcl %edi, %edx
387; X86-NEXT:    adcl $0, %ebp
388; X86-NEXT:    addl %ecx, %edx
389; X86-NEXT:    adcl $0, %ebp
390; X86-NEXT:    movl %edx, %ecx
391; X86-NEXT:    subl {{[0-9]+}}(%esp), %ecx
392; X86-NEXT:    movl %ebp, %esi
393; X86-NEXT:    sbbl $0, %esi
394; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
395; X86-NEXT:    cmovnsl %ebp, %esi
396; X86-NEXT:    cmovnsl %edx, %ecx
397; X86-NEXT:    movl %ecx, %edi
398; X86-NEXT:    subl {{[0-9]+}}(%esp), %edi
399; X86-NEXT:    movl %esi, %edx
400; X86-NEXT:    sbbl $0, %edx
401; X86-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
402; X86-NEXT:    cmovnsl %esi, %edx
403; X86-NEXT:    cmovnsl %ecx, %edi
404; X86-NEXT:    shldl $1, %edi, %edx
405; X86-NEXT:    shrdl $31, %edi, %eax
406; X86-NEXT:    popl %esi
407; X86-NEXT:    popl %edi
408; X86-NEXT:    popl %ebx
409; X86-NEXT:    popl %ebp
410; X86-NEXT:    retl
411  %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 63)
412  ret i64 %tmp
413}
414