• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
3; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c  -fixup-byte-word-insts=0 \
5; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
7; RUN:    | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0  \
9; RUN:    | FileCheck %s -check-prefixes=CHECK-I686
10
11define void @test_load_store(half* %in, half* %out) #0 {
12; BWON-LABEL: test_load_store:
13; BWON:       # %bb.0:
14; BWON-NEXT:    movzwl (%rdi), %eax
15; BWON-NEXT:    movw %ax, (%rsi)
16; BWON-NEXT:    retq
17;
18; BWOFF-LABEL: test_load_store:
19; BWOFF:       # %bb.0:
20; BWOFF-NEXT:    movw (%rdi), %ax
21; BWOFF-NEXT:    movw %ax, (%rsi)
22; BWOFF-NEXT:    retq
23;
24; CHECK-I686-LABEL: test_load_store:
25; CHECK-I686:       # %bb.0:
26; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
27; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
28; CHECK-I686-NEXT:    movw (%ecx), %cx
29; CHECK-I686-NEXT:    movw %cx, (%eax)
30; CHECK-I686-NEXT:    retl
31  %val = load half, half* %in
32  store half %val, half* %out
33  ret void
34}
35
36define i16 @test_bitcast_from_half(half* %addr) #0 {
37; BWON-LABEL: test_bitcast_from_half:
38; BWON:       # %bb.0:
39; BWON-NEXT:    movzwl (%rdi), %eax
40; BWON-NEXT:    retq
41;
42; BWOFF-LABEL: test_bitcast_from_half:
43; BWOFF:       # %bb.0:
44; BWOFF-NEXT:    movw (%rdi), %ax
45; BWOFF-NEXT:    retq
46;
47; CHECK-I686-LABEL: test_bitcast_from_half:
48; CHECK-I686:       # %bb.0:
49; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
50; CHECK-I686-NEXT:    movw (%eax), %ax
51; CHECK-I686-NEXT:    retl
52  %val = load half, half* %addr
53  %val_int = bitcast half %val to i16
54  ret i16 %val_int
55}
56
57define void @test_bitcast_to_half(half* %addr, i16 %in) #0 {
58; CHECK-LABEL: test_bitcast_to_half:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    movw %si, (%rdi)
61; CHECK-NEXT:    retq
62;
63; CHECK-I686-LABEL: test_bitcast_to_half:
64; CHECK-I686:       # %bb.0:
65; CHECK-I686-NEXT:    movw {{[0-9]+}}(%esp), %ax
66; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
67; CHECK-I686-NEXT:    movw %ax, (%ecx)
68; CHECK-I686-NEXT:    retl
69  %val_fp = bitcast i16 %in to half
70  store half %val_fp, half* %addr
71  ret void
72}
73
74define float @test_extend32(half* %addr) #0 {
75; CHECK-LIBCALL-LABEL: test_extend32:
76; CHECK-LIBCALL:       # %bb.0:
77; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
78; CHECK-LIBCALL-NEXT:    jmp __gnu_h2f_ieee # TAILCALL
79;
80; BWON-F16C-LABEL: test_extend32:
81; BWON-F16C:       # %bb.0:
82; BWON-F16C-NEXT:    movswl (%rdi), %eax
83; BWON-F16C-NEXT:    vmovd %eax, %xmm0
84; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
85; BWON-F16C-NEXT:    retq
86;
87; CHECK-I686-LABEL: test_extend32:
88; CHECK-I686:       # %bb.0:
89; CHECK-I686-NEXT:    subl $12, %esp
90; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
91; CHECK-I686-NEXT:    movzwl (%eax), %eax
92; CHECK-I686-NEXT:    movl %eax, (%esp)
93; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
94; CHECK-I686-NEXT:    addl $12, %esp
95; CHECK-I686-NEXT:    retl
96  %val16 = load half, half* %addr
97  %val32 = fpext half %val16 to float
98  ret float %val32
99}
100
101define double @test_extend64(half* %addr) #0 {
102; CHECK-LIBCALL-LABEL: test_extend64:
103; CHECK-LIBCALL:       # %bb.0:
104; CHECK-LIBCALL-NEXT:    pushq %rax
105; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
106; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
107; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
108; CHECK-LIBCALL-NEXT:    popq %rax
109; CHECK-LIBCALL-NEXT:    retq
110;
111; BWON-F16C-LABEL: test_extend64:
112; BWON-F16C:       # %bb.0:
113; BWON-F16C-NEXT:    movswl (%rdi), %eax
114; BWON-F16C-NEXT:    vmovd %eax, %xmm0
115; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
116; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
117; BWON-F16C-NEXT:    retq
118;
119; CHECK-I686-LABEL: test_extend64:
120; CHECK-I686:       # %bb.0:
121; CHECK-I686-NEXT:    subl $12, %esp
122; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
123; CHECK-I686-NEXT:    movzwl (%eax), %eax
124; CHECK-I686-NEXT:    movl %eax, (%esp)
125; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
126; CHECK-I686-NEXT:    addl $12, %esp
127; CHECK-I686-NEXT:    retl
128  %val16 = load half, half* %addr
129  %val32 = fpext half %val16 to double
130  ret double %val32
131}
132
133define void @test_trunc32(float %in, half* %addr) #0 {
134; CHECK-LIBCALL-LABEL: test_trunc32:
135; CHECK-LIBCALL:       # %bb.0:
136; CHECK-LIBCALL-NEXT:    pushq %rbx
137; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
138; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
139; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
140; CHECK-LIBCALL-NEXT:    popq %rbx
141; CHECK-LIBCALL-NEXT:    retq
142;
143; BWON-F16C-LABEL: test_trunc32:
144; BWON-F16C:       # %bb.0:
145; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
146; BWON-F16C-NEXT:    vmovd %xmm0, %eax
147; BWON-F16C-NEXT:    movw %ax, (%rdi)
148; BWON-F16C-NEXT:    retq
149;
150; CHECK-I686-LABEL: test_trunc32:
151; CHECK-I686:       # %bb.0:
152; CHECK-I686-NEXT:    pushl %esi
153; CHECK-I686-NEXT:    subl $8, %esp
154; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
155; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
156; CHECK-I686-NEXT:    movss %xmm0, (%esp)
157; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
158; CHECK-I686-NEXT:    movw %ax, (%esi)
159; CHECK-I686-NEXT:    addl $8, %esp
160; CHECK-I686-NEXT:    popl %esi
161; CHECK-I686-NEXT:    retl
162  %val16 = fptrunc float %in to half
163  store half %val16, half* %addr
164  ret void
165}
166
167define void @test_trunc64(double %in, half* %addr) #0 {
168; CHECK-LABEL: test_trunc64:
169; CHECK:       # %bb.0:
170; CHECK-NEXT:    pushq %rbx
171; CHECK-NEXT:    movq %rdi, %rbx
172; CHECK-NEXT:    callq __truncdfhf2
173; CHECK-NEXT:    movw %ax, (%rbx)
174; CHECK-NEXT:    popq %rbx
175; CHECK-NEXT:    retq
176;
177; CHECK-I686-LABEL: test_trunc64:
178; CHECK-I686:       # %bb.0:
179; CHECK-I686-NEXT:    pushl %esi
180; CHECK-I686-NEXT:    subl $8, %esp
181; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
182; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
183; CHECK-I686-NEXT:    movsd %xmm0, (%esp)
184; CHECK-I686-NEXT:    calll __truncdfhf2
185; CHECK-I686-NEXT:    movw %ax, (%esi)
186; CHECK-I686-NEXT:    addl $8, %esp
187; CHECK-I686-NEXT:    popl %esi
188; CHECK-I686-NEXT:    retl
189  %val16 = fptrunc double %in to half
190  store half %val16, half* %addr
191  ret void
192}
193
194define i64 @test_fptosi_i64(half* %p) #0 {
195; CHECK-LIBCALL-LABEL: test_fptosi_i64:
196; CHECK-LIBCALL:       # %bb.0:
197; CHECK-LIBCALL-NEXT:    pushq %rax
198; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
199; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
200; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
201; CHECK-LIBCALL-NEXT:    popq %rcx
202; CHECK-LIBCALL-NEXT:    retq
203;
204; BWON-F16C-LABEL: test_fptosi_i64:
205; BWON-F16C:       # %bb.0:
206; BWON-F16C-NEXT:    movswl (%rdi), %eax
207; BWON-F16C-NEXT:    vmovd %eax, %xmm0
208; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
209; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
210; BWON-F16C-NEXT:    retq
211;
212; CHECK-I686-LABEL: test_fptosi_i64:
213; CHECK-I686:       # %bb.0:
214; CHECK-I686-NEXT:    subl $12, %esp
215; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
216; CHECK-I686-NEXT:    movzwl (%eax), %eax
217; CHECK-I686-NEXT:    movl %eax, (%esp)
218; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
219; CHECK-I686-NEXT:    fstps (%esp)
220; CHECK-I686-NEXT:    calll __fixsfdi
221; CHECK-I686-NEXT:    addl $12, %esp
222; CHECK-I686-NEXT:    retl
223  %a = load half, half* %p, align 2
224  %r = fptosi half %a to i64
225  ret i64 %r
226}
227
228define void @test_sitofp_i64(i64 %a, half* %p) #0 {
229; CHECK-LIBCALL-LABEL: test_sitofp_i64:
230; CHECK-LIBCALL:       # %bb.0:
231; CHECK-LIBCALL-NEXT:    pushq %rbx
232; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
233; CHECK-LIBCALL-NEXT:    cvtsi2ssq %rdi, %xmm0
234; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
235; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
236; CHECK-LIBCALL-NEXT:    popq %rbx
237; CHECK-LIBCALL-NEXT:    retq
238;
239; BWON-F16C-LABEL: test_sitofp_i64:
240; BWON-F16C:       # %bb.0:
241; BWON-F16C-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
242; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
243; BWON-F16C-NEXT:    vmovd %xmm0, %eax
244; BWON-F16C-NEXT:    movw %ax, (%rsi)
245; BWON-F16C-NEXT:    retq
246;
247; CHECK-I686-LABEL: test_sitofp_i64:
248; CHECK-I686:       # %bb.0:
249; CHECK-I686-NEXT:    pushl %esi
250; CHECK-I686-NEXT:    subl $24, %esp
251; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
252; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
253; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
254; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
255; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
256; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
257; CHECK-I686-NEXT:    movss %xmm0, (%esp)
258; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
259; CHECK-I686-NEXT:    movw %ax, (%esi)
260; CHECK-I686-NEXT:    addl $24, %esp
261; CHECK-I686-NEXT:    popl %esi
262; CHECK-I686-NEXT:    retl
263  %r = sitofp i64 %a to half
264  store half %r, half* %p
265  ret void
266}
267
268define i64 @test_fptoui_i64(half* %p) #0 {
269; CHECK-LIBCALL-LABEL: test_fptoui_i64:
270; CHECK-LIBCALL:       # %bb.0:
271; CHECK-LIBCALL-NEXT:    pushq %rax
272; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
273; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
274; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
275; CHECK-LIBCALL-NEXT:    movaps %xmm0, %xmm2
276; CHECK-LIBCALL-NEXT:    subss %xmm1, %xmm2
277; CHECK-LIBCALL-NEXT:    cvttss2si %xmm2, %rax
278; CHECK-LIBCALL-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
279; CHECK-LIBCALL-NEXT:    xorq %rax, %rcx
280; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
281; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
282; CHECK-LIBCALL-NEXT:    cmovaeq %rcx, %rax
283; CHECK-LIBCALL-NEXT:    popq %rcx
284; CHECK-LIBCALL-NEXT:    retq
285;
286; BWON-F16C-LABEL: test_fptoui_i64:
287; BWON-F16C:       # %bb.0:
288; BWON-F16C-NEXT:    movswl (%rdi), %eax
289; BWON-F16C-NEXT:    vmovd %eax, %xmm0
290; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
291; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
292; BWON-F16C-NEXT:    vsubss %xmm1, %xmm0, %xmm2
293; BWON-F16C-NEXT:    vcvttss2si %xmm2, %rax
294; BWON-F16C-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
295; BWON-F16C-NEXT:    xorq %rax, %rcx
296; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
297; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
298; BWON-F16C-NEXT:    cmovaeq %rcx, %rax
299; BWON-F16C-NEXT:    retq
300;
301; CHECK-I686-LABEL: test_fptoui_i64:
302; CHECK-I686:       # %bb.0:
303; CHECK-I686-NEXT:    subl $12, %esp
304; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
305; CHECK-I686-NEXT:    movzwl (%eax), %eax
306; CHECK-I686-NEXT:    movl %eax, (%esp)
307; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
308; CHECK-I686-NEXT:    fstps (%esp)
309; CHECK-I686-NEXT:    calll __fixunssfdi
310; CHECK-I686-NEXT:    addl $12, %esp
311; CHECK-I686-NEXT:    retl
312  %a = load half, half* %p, align 2
313  %r = fptoui half %a to i64
314  ret i64 %r
315}
316
317define void @test_uitofp_i64(i64 %a, half* %p) #0 {
318; CHECK-LIBCALL-LABEL: test_uitofp_i64:
319; CHECK-LIBCALL:       # %bb.0:
320; CHECK-LIBCALL-NEXT:    pushq %rbx
321; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
322; CHECK-LIBCALL-NEXT:    testq %rdi, %rdi
323; CHECK-LIBCALL-NEXT:    js .LBB10_1
324; CHECK-LIBCALL-NEXT:  # %bb.2:
325; CHECK-LIBCALL-NEXT:    cvtsi2ssq %rdi, %xmm0
326; CHECK-LIBCALL-NEXT:    jmp .LBB10_3
327; CHECK-LIBCALL-NEXT:  .LBB10_1:
328; CHECK-LIBCALL-NEXT:    movq %rdi, %rax
329; CHECK-LIBCALL-NEXT:    shrq %rax
330; CHECK-LIBCALL-NEXT:    andl $1, %edi
331; CHECK-LIBCALL-NEXT:    orq %rax, %rdi
332; CHECK-LIBCALL-NEXT:    cvtsi2ssq %rdi, %xmm0
333; CHECK-LIBCALL-NEXT:    addss %xmm0, %xmm0
334; CHECK-LIBCALL-NEXT:  .LBB10_3:
335; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
336; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
337; CHECK-LIBCALL-NEXT:    popq %rbx
338; CHECK-LIBCALL-NEXT:    retq
339;
340; BWON-F16C-LABEL: test_uitofp_i64:
341; BWON-F16C:       # %bb.0:
342; BWON-F16C-NEXT:    testq %rdi, %rdi
343; BWON-F16C-NEXT:    js .LBB10_1
344; BWON-F16C-NEXT:  # %bb.2:
345; BWON-F16C-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
346; BWON-F16C-NEXT:    jmp .LBB10_3
347; BWON-F16C-NEXT:  .LBB10_1:
348; BWON-F16C-NEXT:    movq %rdi, %rax
349; BWON-F16C-NEXT:    shrq %rax
350; BWON-F16C-NEXT:    andl $1, %edi
351; BWON-F16C-NEXT:    orq %rax, %rdi
352; BWON-F16C-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
353; BWON-F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
354; BWON-F16C-NEXT:  .LBB10_3:
355; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
356; BWON-F16C-NEXT:    vmovd %xmm0, %eax
357; BWON-F16C-NEXT:    movw %ax, (%rsi)
358; BWON-F16C-NEXT:    retq
359;
360; CHECK-I686-LABEL: test_uitofp_i64:
361; CHECK-I686:       # %bb.0:
362; CHECK-I686-NEXT:    pushl %esi
363; CHECK-I686-NEXT:    subl $24, %esp
364; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
365; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
366; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
367; CHECK-I686-NEXT:    xorl %eax, %eax
368; CHECK-I686-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
369; CHECK-I686-NEXT:    setns %al
370; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
371; CHECK-I686-NEXT:    fadds {{\.LCPI.*}}(,%eax,4)
372; CHECK-I686-NEXT:    fstps (%esp)
373; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
374; CHECK-I686-NEXT:    movw %ax, (%esi)
375; CHECK-I686-NEXT:    addl $24, %esp
376; CHECK-I686-NEXT:    popl %esi
377; CHECK-I686-NEXT:    retl
378  %r = uitofp i64 %a to half
379  store half %r, half* %p
380  ret void
381}
382
383define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
384; CHECK-LIBCALL-LABEL: test_extend32_vec4:
385; CHECK-LIBCALL:       # %bb.0:
386; CHECK-LIBCALL-NEXT:    pushq %rbx
387; CHECK-LIBCALL-NEXT:    subq $48, %rsp
388; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
389; CHECK-LIBCALL-NEXT:    movzwl (%rdi), %edi
390; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
391; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
392; CHECK-LIBCALL-NEXT:    movzwl 2(%rbx), %edi
393; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
394; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
395; CHECK-LIBCALL-NEXT:    movzwl 4(%rbx), %edi
396; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
397; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
398; CHECK-LIBCALL-NEXT:    movzwl 6(%rbx), %edi
399; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
400; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
401; CHECK-LIBCALL-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
402; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
403; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
404; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
405; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
406; CHECK-LIBCALL-NEXT:    addq $48, %rsp
407; CHECK-LIBCALL-NEXT:    popq %rbx
408; CHECK-LIBCALL-NEXT:    retq
409;
410; BWON-F16C-LABEL: test_extend32_vec4:
411; BWON-F16C:       # %bb.0:
412; BWON-F16C-NEXT:    movswl 6(%rdi), %eax
413; BWON-F16C-NEXT:    vmovd %eax, %xmm0
414; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
415; BWON-F16C-NEXT:    movswl 4(%rdi), %eax
416; BWON-F16C-NEXT:    vmovd %eax, %xmm1
417; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
418; BWON-F16C-NEXT:    movswl (%rdi), %eax
419; BWON-F16C-NEXT:    vmovd %eax, %xmm2
420; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
421; BWON-F16C-NEXT:    movswl 2(%rdi), %eax
422; BWON-F16C-NEXT:    vmovd %eax, %xmm3
423; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
424; BWON-F16C-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
425; BWON-F16C-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
426; BWON-F16C-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
427; BWON-F16C-NEXT:    retq
428;
429; CHECK-I686-LABEL: test_extend32_vec4:
430; CHECK-I686:       # %bb.0:
431; CHECK-I686-NEXT:    pushl %esi
432; CHECK-I686-NEXT:    subl $56, %esp
433; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
434; CHECK-I686-NEXT:    movzwl 4(%esi), %eax
435; CHECK-I686-NEXT:    movl %eax, (%esp)
436; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
437; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
438; CHECK-I686-NEXT:    movzwl 2(%esi), %eax
439; CHECK-I686-NEXT:    movl %eax, (%esp)
440; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
441; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
442; CHECK-I686-NEXT:    movzwl (%esi), %eax
443; CHECK-I686-NEXT:    movl %eax, (%esp)
444; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
445; CHECK-I686-NEXT:    movzwl 6(%esi), %eax
446; CHECK-I686-NEXT:    movl %eax, (%esp)
447; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
448; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
449; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
450; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
451; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
452; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
453; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
454; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
455; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
456; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
457; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
458; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
459; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
460; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
461; CHECK-I686-NEXT:    addl $56, %esp
462; CHECK-I686-NEXT:    popl %esi
463; CHECK-I686-NEXT:    retl
464  %a = load <4 x half>, <4 x half>* %p, align 8
465  %b = fpext <4 x half> %a to <4 x float>
466  ret <4 x float> %b
467}
468
469define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 {
470; CHECK-LIBCALL-LABEL: test_extend64_vec4:
471; CHECK-LIBCALL:       # %bb.0:
472; CHECK-LIBCALL-NEXT:    pushq %rbx
473; CHECK-LIBCALL-NEXT:    subq $16, %rsp
474; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
475; CHECK-LIBCALL-NEXT:    movzwl 4(%rdi), %edi
476; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
477; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
478; CHECK-LIBCALL-NEXT:    movzwl 6(%rbx), %edi
479; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
480; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
481; CHECK-LIBCALL-NEXT:    movzwl (%rbx), %edi
482; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
483; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
484; CHECK-LIBCALL-NEXT:    movzwl 2(%rbx), %edi
485; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
486; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm1
487; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
488; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
489; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
490; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
491; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
492; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
493; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm1, %xmm2
494; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
495; CHECK-LIBCALL-NEXT:    # xmm1 = mem[0],zero,zero,zero
496; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm1, %xmm1
497; CHECK-LIBCALL-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
498; CHECK-LIBCALL-NEXT:    addq $16, %rsp
499; CHECK-LIBCALL-NEXT:    popq %rbx
500; CHECK-LIBCALL-NEXT:    retq
501;
502; BWON-F16C-LABEL: test_extend64_vec4:
503; BWON-F16C:       # %bb.0:
504; BWON-F16C-NEXT:    movswl (%rdi), %eax
505; BWON-F16C-NEXT:    vmovd %eax, %xmm0
506; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
507; BWON-F16C-NEXT:    movswl 2(%rdi), %eax
508; BWON-F16C-NEXT:    vmovd %eax, %xmm1
509; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
510; BWON-F16C-NEXT:    movswl 4(%rdi), %eax
511; BWON-F16C-NEXT:    vmovd %eax, %xmm2
512; BWON-F16C-NEXT:    vcvtph2ps %xmm2, %xmm2
513; BWON-F16C-NEXT:    movswl 6(%rdi), %eax
514; BWON-F16C-NEXT:    vmovd %eax, %xmm3
515; BWON-F16C-NEXT:    vcvtph2ps %xmm3, %xmm3
516; BWON-F16C-NEXT:    vcvtss2sd %xmm3, %xmm3, %xmm3
517; BWON-F16C-NEXT:    vcvtss2sd %xmm2, %xmm2, %xmm2
518; BWON-F16C-NEXT:    vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
519; BWON-F16C-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1
520; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
521; BWON-F16C-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
522; BWON-F16C-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
523; BWON-F16C-NEXT:    retq
524;
525; CHECK-I686-LABEL: test_extend64_vec4:
526; CHECK-I686:       # %bb.0:
527; CHECK-I686-NEXT:    pushl %esi
528; CHECK-I686-NEXT:    subl $88, %esp
529; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
530; CHECK-I686-NEXT:    movzwl 6(%esi), %eax
531; CHECK-I686-NEXT:    movl %eax, (%esp)
532; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
533; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
534; CHECK-I686-NEXT:    movzwl 4(%esi), %eax
535; CHECK-I686-NEXT:    movl %eax, (%esp)
536; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
537; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
538; CHECK-I686-NEXT:    movzwl 2(%esi), %eax
539; CHECK-I686-NEXT:    movl %eax, (%esp)
540; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
541; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
542; CHECK-I686-NEXT:    movzwl (%esi), %eax
543; CHECK-I686-NEXT:    movl %eax, (%esp)
544; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
545; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
546; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
547; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
548; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
549; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
550; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
551; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
552; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
553; CHECK-I686-NEXT:    movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
554; CHECK-I686-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
555; CHECK-I686-NEXT:    movhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
556; CHECK-I686-NEXT:    addl $88, %esp
557; CHECK-I686-NEXT:    popl %esi
558; CHECK-I686-NEXT:    retl
559  %a = load <4 x half>, <4 x half>* %p, align 8
560  %b = fpext <4 x half> %a to <4 x double>
561  ret <4 x double> %b
562}
563
564define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 {
565; BWON-NOF16C-LABEL: test_trunc32_vec4:
566; BWON-NOF16C:       # %bb.0:
567; BWON-NOF16C-NEXT:    pushq %rbp
568; BWON-NOF16C-NEXT:    pushq %r15
569; BWON-NOF16C-NEXT:    pushq %r14
570; BWON-NOF16C-NEXT:    pushq %rbx
571; BWON-NOF16C-NEXT:    subq $24, %rsp
572; BWON-NOF16C-NEXT:    movq %rdi, %rbx
573; BWON-NOF16C-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
574; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
575; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
576; BWON-NOF16C-NEXT:    movl %eax, %r14d
577; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
578; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
579; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
580; BWON-NOF16C-NEXT:    movl %eax, %r15d
581; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
582; BWON-NOF16C-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
583; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
584; BWON-NOF16C-NEXT:    movl %eax, %ebp
585; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
586; BWON-NOF16C-NEXT:    callq __gnu_f2h_ieee
587; BWON-NOF16C-NEXT:    movw %ax, (%rbx)
588; BWON-NOF16C-NEXT:    movw %bp, 6(%rbx)
589; BWON-NOF16C-NEXT:    movw %r15w, 4(%rbx)
590; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
591; BWON-NOF16C-NEXT:    addq $24, %rsp
592; BWON-NOF16C-NEXT:    popq %rbx
593; BWON-NOF16C-NEXT:    popq %r14
594; BWON-NOF16C-NEXT:    popq %r15
595; BWON-NOF16C-NEXT:    popq %rbp
596; BWON-NOF16C-NEXT:    retq
597;
598; BWOFF-LABEL: test_trunc32_vec4:
599; BWOFF:       # %bb.0:
600; BWOFF-NEXT:    pushq %rbp
601; BWOFF-NEXT:    pushq %r15
602; BWOFF-NEXT:    pushq %r14
603; BWOFF-NEXT:    pushq %rbx
604; BWOFF-NEXT:    subq $24, %rsp
605; BWOFF-NEXT:    movq %rdi, %rbx
606; BWOFF-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
607; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
608; BWOFF-NEXT:    callq __gnu_f2h_ieee
609; BWOFF-NEXT:    movw %ax, %r14w
610; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
611; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
612; BWOFF-NEXT:    callq __gnu_f2h_ieee
613; BWOFF-NEXT:    movw %ax, %r15w
614; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
615; BWOFF-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
616; BWOFF-NEXT:    callq __gnu_f2h_ieee
617; BWOFF-NEXT:    movw %ax, %bp
618; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
619; BWOFF-NEXT:    callq __gnu_f2h_ieee
620; BWOFF-NEXT:    movw %ax, (%rbx)
621; BWOFF-NEXT:    movw %bp, 6(%rbx)
622; BWOFF-NEXT:    movw %r15w, 4(%rbx)
623; BWOFF-NEXT:    movw %r14w, 2(%rbx)
624; BWOFF-NEXT:    addq $24, %rsp
625; BWOFF-NEXT:    popq %rbx
626; BWOFF-NEXT:    popq %r14
627; BWOFF-NEXT:    popq %r15
628; BWOFF-NEXT:    popq %rbp
629; BWOFF-NEXT:    retq
630;
631; BWON-F16C-LABEL: test_trunc32_vec4:
632; BWON-F16C:       # %bb.0:
633; BWON-F16C-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
634; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
635; BWON-F16C-NEXT:    vmovd %xmm1, %eax
636; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
637; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
638; BWON-F16C-NEXT:    vmovd %xmm1, %ecx
639; BWON-F16C-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
640; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
641; BWON-F16C-NEXT:    vmovd %xmm1, %edx
642; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
643; BWON-F16C-NEXT:    vmovd %xmm0, %esi
644; BWON-F16C-NEXT:    movw %si, (%rdi)
645; BWON-F16C-NEXT:    movw %dx, 6(%rdi)
646; BWON-F16C-NEXT:    movw %cx, 4(%rdi)
647; BWON-F16C-NEXT:    movw %ax, 2(%rdi)
648; BWON-F16C-NEXT:    retq
649;
650; CHECK-I686-LABEL: test_trunc32_vec4:
651; CHECK-I686:       # %bb.0:
652; CHECK-I686-NEXT:    pushl %ebp
653; CHECK-I686-NEXT:    pushl %ebx
654; CHECK-I686-NEXT:    pushl %edi
655; CHECK-I686-NEXT:    pushl %esi
656; CHECK-I686-NEXT:    subl $44, %esp
657; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
658; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
659; CHECK-I686-NEXT:    movaps %xmm0, %xmm1
660; CHECK-I686-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
661; CHECK-I686-NEXT:    movss %xmm1, (%esp)
662; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
663; CHECK-I686-NEXT:    movw %ax, %si
664; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
665; CHECK-I686-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
666; CHECK-I686-NEXT:    movss %xmm0, (%esp)
667; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
668; CHECK-I686-NEXT:    movw %ax, %di
669; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
670; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
671; CHECK-I686-NEXT:    movss %xmm0, (%esp)
672; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
673; CHECK-I686-NEXT:    movw %ax, %bx
674; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
675; CHECK-I686-NEXT:    movss %xmm0, (%esp)
676; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
677; CHECK-I686-NEXT:    movw %ax, (%ebp)
678; CHECK-I686-NEXT:    movw %bx, 6(%ebp)
679; CHECK-I686-NEXT:    movw %di, 4(%ebp)
680; CHECK-I686-NEXT:    movw %si, 2(%ebp)
681; CHECK-I686-NEXT:    addl $44, %esp
682; CHECK-I686-NEXT:    popl %esi
683; CHECK-I686-NEXT:    popl %edi
684; CHECK-I686-NEXT:    popl %ebx
685; CHECK-I686-NEXT:    popl %ebp
686; CHECK-I686-NEXT:    retl
687  %v = fptrunc <4 x float> %a to <4 x half>
688  store <4 x half> %v, <4 x half>* %p
689  ret void
690}
691
692define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 {
693; BWON-NOF16C-LABEL: test_trunc64_vec4:
694; BWON-NOF16C:       # %bb.0:
695; BWON-NOF16C-NEXT:    pushq %rbp
696; BWON-NOF16C-NEXT:    pushq %r15
697; BWON-NOF16C-NEXT:    pushq %r14
698; BWON-NOF16C-NEXT:    pushq %rbx
699; BWON-NOF16C-NEXT:    subq $40, %rsp
700; BWON-NOF16C-NEXT:    movq %rdi, %rbx
701; BWON-NOF16C-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
702; BWON-NOF16C-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
703; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
704; BWON-NOF16C-NEXT:    callq __truncdfhf2
705; BWON-NOF16C-NEXT:    movl %eax, %r14d
706; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
707; BWON-NOF16C-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
708; BWON-NOF16C-NEXT:    callq __truncdfhf2
709; BWON-NOF16C-NEXT:    movl %eax, %r15d
710; BWON-NOF16C-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
711; BWON-NOF16C-NEXT:    callq __truncdfhf2
712; BWON-NOF16C-NEXT:    movl %eax, %ebp
713; BWON-NOF16C-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
714; BWON-NOF16C-NEXT:    callq __truncdfhf2
715; BWON-NOF16C-NEXT:    movw %ax, 4(%rbx)
716; BWON-NOF16C-NEXT:    movw %bp, (%rbx)
717; BWON-NOF16C-NEXT:    movw %r15w, 6(%rbx)
718; BWON-NOF16C-NEXT:    movw %r14w, 2(%rbx)
719; BWON-NOF16C-NEXT:    addq $40, %rsp
720; BWON-NOF16C-NEXT:    popq %rbx
721; BWON-NOF16C-NEXT:    popq %r14
722; BWON-NOF16C-NEXT:    popq %r15
723; BWON-NOF16C-NEXT:    popq %rbp
724; BWON-NOF16C-NEXT:    retq
725;
726; BWOFF-LABEL: test_trunc64_vec4:
727; BWOFF:       # %bb.0:
728; BWOFF-NEXT:    pushq %rbp
729; BWOFF-NEXT:    pushq %r15
730; BWOFF-NEXT:    pushq %r14
731; BWOFF-NEXT:    pushq %rbx
732; BWOFF-NEXT:    subq $40, %rsp
733; BWOFF-NEXT:    movq %rdi, %rbx
734; BWOFF-NEXT:    movaps %xmm1, (%rsp) # 16-byte Spill
735; BWOFF-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
736; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
737; BWOFF-NEXT:    callq __truncdfhf2
738; BWOFF-NEXT:    movw %ax, %r14w
739; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
740; BWOFF-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
741; BWOFF-NEXT:    callq __truncdfhf2
742; BWOFF-NEXT:    movw %ax, %r15w
743; BWOFF-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
744; BWOFF-NEXT:    callq __truncdfhf2
745; BWOFF-NEXT:    movw %ax, %bp
746; BWOFF-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
747; BWOFF-NEXT:    callq __truncdfhf2
748; BWOFF-NEXT:    movw %ax, 4(%rbx)
749; BWOFF-NEXT:    movw %bp, (%rbx)
750; BWOFF-NEXT:    movw %r15w, 6(%rbx)
751; BWOFF-NEXT:    movw %r14w, 2(%rbx)
752; BWOFF-NEXT:    addq $40, %rsp
753; BWOFF-NEXT:    popq %rbx
754; BWOFF-NEXT:    popq %r14
755; BWOFF-NEXT:    popq %r15
756; BWOFF-NEXT:    popq %rbp
757; BWOFF-NEXT:    retq
758;
759; BWON-F16C-LABEL: test_trunc64_vec4:
760; BWON-F16C:       # %bb.0:
761; BWON-F16C-NEXT:    pushq %rbp
762; BWON-F16C-NEXT:    pushq %r15
763; BWON-F16C-NEXT:    pushq %r14
764; BWON-F16C-NEXT:    pushq %rbx
765; BWON-F16C-NEXT:    subq $88, %rsp
766; BWON-F16C-NEXT:    movq %rdi, %rbx
767; BWON-F16C-NEXT:    vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
768; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
769; BWON-F16C-NEXT:    vzeroupper
770; BWON-F16C-NEXT:    callq __truncdfhf2
771; BWON-F16C-NEXT:    movl %eax, %r14d
772; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
773; BWON-F16C-NEXT:    vextractf128 $1, %ymm0, %xmm0
774; BWON-F16C-NEXT:    vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
775; BWON-F16C-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
776; BWON-F16C-NEXT:    vzeroupper
777; BWON-F16C-NEXT:    callq __truncdfhf2
778; BWON-F16C-NEXT:    movl %eax, %r15d
779; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
780; BWON-F16C-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
781; BWON-F16C-NEXT:    vzeroupper
782; BWON-F16C-NEXT:    callq __truncdfhf2
783; BWON-F16C-NEXT:    movl %eax, %ebp
784; BWON-F16C-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
785; BWON-F16C-NEXT:    callq __truncdfhf2
786; BWON-F16C-NEXT:    movw %ax, 4(%rbx)
787; BWON-F16C-NEXT:    movw %bp, (%rbx)
788; BWON-F16C-NEXT:    movw %r15w, 6(%rbx)
789; BWON-F16C-NEXT:    movw %r14w, 2(%rbx)
790; BWON-F16C-NEXT:    addq $88, %rsp
791; BWON-F16C-NEXT:    popq %rbx
792; BWON-F16C-NEXT:    popq %r14
793; BWON-F16C-NEXT:    popq %r15
794; BWON-F16C-NEXT:    popq %rbp
795; BWON-F16C-NEXT:    retq
796;
797; CHECK-I686-LABEL: test_trunc64_vec4:
798; CHECK-I686:       # %bb.0:
799; CHECK-I686-NEXT:    pushl %ebp
800; CHECK-I686-NEXT:    pushl %ebx
801; CHECK-I686-NEXT:    pushl %edi
802; CHECK-I686-NEXT:    pushl %esi
803; CHECK-I686-NEXT:    subl $60, %esp
804; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
805; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
806; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
807; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
808; CHECK-I686-NEXT:    calll __truncdfhf2
809; CHECK-I686-NEXT:    movw %ax, %si
810; CHECK-I686-NEXT:    movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
811; CHECK-I686-NEXT:    movhpd %xmm0, (%esp)
812; CHECK-I686-NEXT:    calll __truncdfhf2
813; CHECK-I686-NEXT:    movw %ax, %di
814; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
815; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
816; CHECK-I686-NEXT:    calll __truncdfhf2
817; CHECK-I686-NEXT:    movw %ax, %bx
818; CHECK-I686-NEXT:    movapd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
819; CHECK-I686-NEXT:    movhpd %xmm0, (%esp)
820; CHECK-I686-NEXT:    calll __truncdfhf2
821; CHECK-I686-NEXT:    movw %ax, 6(%ebp)
822; CHECK-I686-NEXT:    movw %bx, 4(%ebp)
823; CHECK-I686-NEXT:    movw %di, 2(%ebp)
824; CHECK-I686-NEXT:    movw %si, (%ebp)
825; CHECK-I686-NEXT:    addl $60, %esp
826; CHECK-I686-NEXT:    popl %esi
827; CHECK-I686-NEXT:    popl %edi
828; CHECK-I686-NEXT:    popl %ebx
829; CHECK-I686-NEXT:    popl %ebp
830; CHECK-I686-NEXT:    retl
831  %v = fptrunc <4 x double> %a to <4 x half>
832  store <4 x half> %v, <4 x half>* %p
833  ret void
834}
835
836declare float @test_floatret();
837
838; On i686, if SSE2 is available, the return value from test_floatret is loaded
839; to f80 and then rounded to f32.  The DAG combiner should not combine this
840; fp_round and the subsequent fptrunc from float to half.
841define half @test_f80trunc_nodagcombine() #0 {
842; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
843; CHECK-LIBCALL:       # %bb.0:
844; CHECK-LIBCALL-NEXT:    pushq %rax
845; CHECK-LIBCALL-NEXT:    callq test_floatret
846; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
847; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
848; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
849; CHECK-LIBCALL-NEXT:    popq %rax
850; CHECK-LIBCALL-NEXT:    retq
851;
852; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
853; BWON-F16C:       # %bb.0:
854; BWON-F16C-NEXT:    pushq %rax
855; BWON-F16C-NEXT:    callq test_floatret
856; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
857; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
858; BWON-F16C-NEXT:    popq %rax
859; BWON-F16C-NEXT:    retq
860;
861; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
862; CHECK-I686:       # %bb.0:
863; CHECK-I686-NEXT:    subl $12, %esp
864; CHECK-I686-NEXT:    calll test_floatret
865; CHECK-I686-NEXT:    fstps (%esp)
866; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
867; CHECK-I686-NEXT:    movzwl %ax, %eax
868; CHECK-I686-NEXT:    movl %eax, (%esp)
869; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
870; CHECK-I686-NEXT:    addl $12, %esp
871; CHECK-I686-NEXT:    retl
872  %1 = call float @test_floatret()
873  %2 = fptrunc float %1 to half
874  ret half %2
875}
876
877
878
879
880define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
881; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
882; CHECK-LIBCALL:       # %bb.0:
883; CHECK-LIBCALL-NEXT:    pushq %rbx
884; CHECK-LIBCALL-NEXT:    subq $16, %rsp
885; CHECK-LIBCALL-NEXT:    movl %edi, %ebx
886; CHECK-LIBCALL-NEXT:    movzwl (%rsi), %edi
887; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
888; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
889; CHECK-LIBCALL-NEXT:    cvtsi2ssl %ebx, %xmm0
890; CHECK-LIBCALL-NEXT:    callq __gnu_f2h_ieee
891; CHECK-LIBCALL-NEXT:    movzwl %ax, %edi
892; CHECK-LIBCALL-NEXT:    callq __gnu_h2f_ieee
893; CHECK-LIBCALL-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
894; CHECK-LIBCALL-NEXT:    addq $16, %rsp
895; CHECK-LIBCALL-NEXT:    popq %rbx
896; CHECK-LIBCALL-NEXT:    retq
897;
898; BWON-F16C-LABEL: test_sitofp_fadd_i32:
899; BWON-F16C:       # %bb.0:
900; BWON-F16C-NEXT:    movswl (%rsi), %eax
901; BWON-F16C-NEXT:    vmovd %eax, %xmm0
902; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
903; BWON-F16C-NEXT:    vcvtsi2ssl %edi, %xmm1, %xmm1
904; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
905; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
906; BWON-F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
907; BWON-F16C-NEXT:    retq
908;
909; CHECK-I686-LABEL: test_sitofp_fadd_i32:
910; CHECK-I686:       # %bb.0:
911; CHECK-I686-NEXT:    subl $28, %esp
912; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
913; CHECK-I686-NEXT:    movzwl (%eax), %eax
914; CHECK-I686-NEXT:    movl %eax, (%esp)
915; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
916; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
917; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
918; CHECK-I686-NEXT:    movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
919; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
920; CHECK-I686-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
921; CHECK-I686-NEXT:    movss %xmm0, (%esp)
922; CHECK-I686-NEXT:    calll __gnu_f2h_ieee
923; CHECK-I686-NEXT:    movzwl %ax, %eax
924; CHECK-I686-NEXT:    movl %eax, (%esp)
925; CHECK-I686-NEXT:    calll __gnu_h2f_ieee
926; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
927; CHECK-I686-NEXT:    movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload
928; CHECK-I686-NEXT:    # xmm0 = mem[0],zero,zero,zero
929; CHECK-I686-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
930; CHECK-I686-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
931; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
932; CHECK-I686-NEXT:    addl $28, %esp
933; CHECK-I686-NEXT:    retl
934  %tmp0 = load half, half* %b
935  %tmp1 = sitofp i32 %a to half
936  %tmp2 = fadd half %tmp0, %tmp1
937  %tmp3 = fpext half %tmp2 to float
938  ret float %tmp3
939}
940
941attributes #0 = { nounwind }
942