• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
2; rdar://13082402
3
4define float @t1(i32* nocapture %src) nounwind ssp {
5entry:
6; CHECK-LABEL: t1:
7; CHECK: ldr s0, [x0]
8; CHECK: scvtf s0, s0
9  %tmp1 = load i32* %src, align 4
10  %tmp2 = sitofp i32 %tmp1 to float
11  ret float %tmp2
12}
13
14define float @t2(i32* nocapture %src) nounwind ssp {
15entry:
16; CHECK-LABEL: t2:
17; CHECK: ldr s0, [x0]
18; CHECK: ucvtf s0, s0
19  %tmp1 = load i32* %src, align 4
20  %tmp2 = uitofp i32 %tmp1 to float
21  ret float %tmp2
22}
23
24define double @t3(i64* nocapture %src) nounwind ssp {
25entry:
26; CHECK-LABEL: t3:
27; CHECK: ldr d0, [x0]
28; CHECK: scvtf d0, d0
29  %tmp1 = load i64* %src, align 4
30  %tmp2 = sitofp i64 %tmp1 to double
31  ret double %tmp2
32}
33
34define double @t4(i64* nocapture %src) nounwind ssp {
35entry:
36; CHECK-LABEL: t4:
37; CHECK: ldr d0, [x0]
38; CHECK: ucvtf d0, d0
39  %tmp1 = load i64* %src, align 4
40  %tmp2 = uitofp i64 %tmp1 to double
41  ret double %tmp2
42}
43
44; rdar://13136456
45define double @t5(i32* nocapture %src) nounwind ssp optsize {
46entry:
47; CHECK-LABEL: t5:
48; CHECK: ldr [[REG:w[0-9]+]], [x0]
49; CHECK: scvtf d0, [[REG]]
50  %tmp1 = load i32* %src, align 4
51  %tmp2 = sitofp i32 %tmp1 to double
52  ret double %tmp2
53}
54
55; Check that we load in FP register when we want to convert into
56; floating point value.
57; This is much faster than loading on GPR and making the conversion
58; GPR -> FPR.
59; <rdar://problem/14599607>
60;
61; Check the flollowing patterns for signed/unsigned:
62; 1. load with scaled imm to float.
63; 2. load with scaled register to float.
64; 3. load with scaled imm to double.
65; 4. load with scaled register to double.
66; 5. load with unscaled imm to float.
67; 6. load with unscaled imm to double.
68; With loading size: 8, 16, 32, and 64-bits.
69
70; ********* 1. load with scaled imm to float. *********
71define float @fct1(i8* nocapture %sp0) {
72; CHECK-LABEL: fct1:
73; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
74; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
75; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
76entry:
77  %addr = getelementptr i8* %sp0, i64 1
78  %pix_sp0.0.copyload = load i8* %addr, align 1
79  %val = uitofp i8 %pix_sp0.0.copyload to float
80  %vmull.i = fmul float %val, %val
81  ret float %vmull.i
82}
83
84define float @fct2(i16* nocapture %sp0) {
85; CHECK-LABEL: fct2:
86; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
87; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
88; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
89entry:
90  %addr = getelementptr i16* %sp0, i64 1
91  %pix_sp0.0.copyload = load i16* %addr, align 1
92  %val = uitofp i16 %pix_sp0.0.copyload to float
93  %vmull.i = fmul float %val, %val
94  ret float %vmull.i
95}
96
97define float @fct3(i32* nocapture %sp0) {
98; CHECK-LABEL: fct3:
99; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
100; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
101; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
102entry:
103  %addr = getelementptr i32* %sp0, i64 1
104  %pix_sp0.0.copyload = load i32* %addr, align 1
105  %val = uitofp i32 %pix_sp0.0.copyload to float
106  %vmull.i = fmul float %val, %val
107  ret float %vmull.i
108}
109
110; i64 -> f32 is not supported on floating point unit.
111define float @fct4(i64* nocapture %sp0) {
112; CHECK-LABEL: fct4:
113; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
114; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
115; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
116entry:
117  %addr = getelementptr i64* %sp0, i64 1
118  %pix_sp0.0.copyload = load i64* %addr, align 1
119  %val = uitofp i64 %pix_sp0.0.copyload to float
120  %vmull.i = fmul float %val, %val
121  ret float %vmull.i
122}
123
124; ********* 2. load with scaled register to float. *********
125define float @fct5(i8* nocapture %sp0, i64 %offset) {
126; CHECK-LABEL: fct5:
127; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
128; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
129; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
130entry:
131  %addr = getelementptr i8* %sp0, i64 %offset
132  %pix_sp0.0.copyload = load i8* %addr, align 1
133  %val = uitofp i8 %pix_sp0.0.copyload to float
134  %vmull.i = fmul float %val, %val
135  ret float %vmull.i
136}
137
138define float @fct6(i16* nocapture %sp0, i64 %offset) {
139; CHECK-LABEL: fct6:
140; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
141; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
142; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
143entry:
144  %addr = getelementptr i16* %sp0, i64 %offset
145  %pix_sp0.0.copyload = load i16* %addr, align 1
146  %val = uitofp i16 %pix_sp0.0.copyload to float
147  %vmull.i = fmul float %val, %val
148  ret float %vmull.i
149}
150
151define float @fct7(i32* nocapture %sp0, i64 %offset) {
152; CHECK-LABEL: fct7:
153; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
154; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
155; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
156entry:
157  %addr = getelementptr i32* %sp0, i64 %offset
158  %pix_sp0.0.copyload = load i32* %addr, align 1
159  %val = uitofp i32 %pix_sp0.0.copyload to float
160  %vmull.i = fmul float %val, %val
161  ret float %vmull.i
162}
163
164; i64 -> f32 is not supported on floating point unit.
165define float @fct8(i64* nocapture %sp0, i64 %offset) {
166; CHECK-LABEL: fct8:
167; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
168; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
169; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
170entry:
171  %addr = getelementptr i64* %sp0, i64 %offset
172  %pix_sp0.0.copyload = load i64* %addr, align 1
173  %val = uitofp i64 %pix_sp0.0.copyload to float
174  %vmull.i = fmul float %val, %val
175  ret float %vmull.i
176}
177
178
179; ********* 3. load with scaled imm to double. *********
180define double @fct9(i8* nocapture %sp0) {
181; CHECK-LABEL: fct9:
182; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
183; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
184; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
185entry:
186  %addr = getelementptr i8* %sp0, i64 1
187  %pix_sp0.0.copyload = load i8* %addr, align 1
188  %val = uitofp i8 %pix_sp0.0.copyload to double
189  %vmull.i = fmul double %val, %val
190  ret double %vmull.i
191}
192
193define double @fct10(i16* nocapture %sp0) {
194; CHECK-LABEL: fct10:
195; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
196; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
197; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
198entry:
199  %addr = getelementptr i16* %sp0, i64 1
200  %pix_sp0.0.copyload = load i16* %addr, align 1
201  %val = uitofp i16 %pix_sp0.0.copyload to double
202  %vmull.i = fmul double %val, %val
203  ret double %vmull.i
204}
205
206define double @fct11(i32* nocapture %sp0) {
207; CHECK-LABEL: fct11:
208; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
209; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
210; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
211entry:
212  %addr = getelementptr i32* %sp0, i64 1
213  %pix_sp0.0.copyload = load i32* %addr, align 1
214  %val = uitofp i32 %pix_sp0.0.copyload to double
215  %vmull.i = fmul double %val, %val
216  ret double %vmull.i
217}
218
219define double @fct12(i64* nocapture %sp0) {
220; CHECK-LABEL: fct12:
221; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
222; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
223; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
224entry:
225  %addr = getelementptr i64* %sp0, i64 1
226  %pix_sp0.0.copyload = load i64* %addr, align 1
227  %val = uitofp i64 %pix_sp0.0.copyload to double
228  %vmull.i = fmul double %val, %val
229  ret double %vmull.i
230}
231
232; ********* 4. load with scaled register to double. *********
233define double @fct13(i8* nocapture %sp0, i64 %offset) {
234; CHECK-LABEL: fct13:
235; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
236; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
237; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
238entry:
239  %addr = getelementptr i8* %sp0, i64 %offset
240  %pix_sp0.0.copyload = load i8* %addr, align 1
241  %val = uitofp i8 %pix_sp0.0.copyload to double
242  %vmull.i = fmul double %val, %val
243  ret double %vmull.i
244}
245
246define double @fct14(i16* nocapture %sp0, i64 %offset) {
247; CHECK-LABEL: fct14:
248; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
249; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
250; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
251entry:
252  %addr = getelementptr i16* %sp0, i64 %offset
253  %pix_sp0.0.copyload = load i16* %addr, align 1
254  %val = uitofp i16 %pix_sp0.0.copyload to double
255  %vmull.i = fmul double %val, %val
256  ret double %vmull.i
257}
258
259define double @fct15(i32* nocapture %sp0, i64 %offset) {
260; CHECK-LABEL: fct15:
261; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
262; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
263; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
264entry:
265  %addr = getelementptr i32* %sp0, i64 %offset
266  %pix_sp0.0.copyload = load i32* %addr, align 1
267  %val = uitofp i32 %pix_sp0.0.copyload to double
268  %vmull.i = fmul double %val, %val
269  ret double %vmull.i
270}
271
272define double @fct16(i64* nocapture %sp0, i64 %offset) {
273; CHECK-LABEL: fct16:
274; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
275; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
276; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
277entry:
278  %addr = getelementptr i64* %sp0, i64 %offset
279  %pix_sp0.0.copyload = load i64* %addr, align 1
280  %val = uitofp i64 %pix_sp0.0.copyload to double
281  %vmull.i = fmul double %val, %val
282  ret double %vmull.i
283}
284
285; ********* 5. load with unscaled imm to float. *********
286define float @fct17(i8* nocapture %sp0) {
287entry:
288; CHECK-LABEL: fct17:
289; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
290; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
291; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
292  %bitcast = ptrtoint i8* %sp0 to i64
293  %add = add i64 %bitcast, -1
294  %addr = inttoptr i64 %add to i8*
295  %pix_sp0.0.copyload = load i8* %addr, align 1
296  %val = uitofp i8 %pix_sp0.0.copyload to float
297  %vmull.i = fmul float %val, %val
298  ret float %vmull.i
299}
300
301define float @fct18(i16* nocapture %sp0) {
302; CHECK-LABEL: fct18:
303; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
304; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
305; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
306  %bitcast = ptrtoint i16* %sp0 to i64
307  %add = add i64 %bitcast, 1
308  %addr = inttoptr i64 %add to i16*
309  %pix_sp0.0.copyload = load i16* %addr, align 1
310  %val = uitofp i16 %pix_sp0.0.copyload to float
311  %vmull.i = fmul float %val, %val
312  ret float %vmull.i
313}
314
315define float @fct19(i32* nocapture %sp0) {
316; CHECK-LABEL: fct19:
317; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
318; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], s[[REGNUM]]
319; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
320  %bitcast = ptrtoint i32* %sp0 to i64
321  %add = add i64 %bitcast, 1
322  %addr = inttoptr i64 %add to i32*
323  %pix_sp0.0.copyload = load i32* %addr, align 1
324  %val = uitofp i32 %pix_sp0.0.copyload to float
325  %vmull.i = fmul float %val, %val
326  ret float %vmull.i
327}
328
329; i64 -> f32 is not supported on floating point unit.
330define float @fct20(i64* nocapture %sp0) {
331; CHECK-LABEL: fct20:
332; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
333; CHECK-NEXT: ucvtf [[REG:s[0-9]+]], x[[REGNUM]]
334; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
335  %bitcast = ptrtoint i64* %sp0 to i64
336  %add = add i64 %bitcast, 1
337  %addr = inttoptr i64 %add to i64*
338  %pix_sp0.0.copyload = load i64* %addr, align 1
339  %val = uitofp i64 %pix_sp0.0.copyload to float
340  %vmull.i = fmul float %val, %val
341  ret float %vmull.i
342
343}
344
345; ********* 6. load with unscaled imm to double. *********
346define double @fct21(i8* nocapture %sp0) {
347entry:
348; CHECK-LABEL: fct21:
349; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
350; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
351; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
352  %bitcast = ptrtoint i8* %sp0 to i64
353  %add = add i64 %bitcast, -1
354  %addr = inttoptr i64 %add to i8*
355  %pix_sp0.0.copyload = load i8* %addr, align 1
356  %val = uitofp i8 %pix_sp0.0.copyload to double
357  %vmull.i = fmul double %val, %val
358  ret double %vmull.i
359}
360
361define double @fct22(i16* nocapture %sp0) {
362; CHECK-LABEL: fct22:
363; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
364; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
365; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
366  %bitcast = ptrtoint i16* %sp0 to i64
367  %add = add i64 %bitcast, 1
368  %addr = inttoptr i64 %add to i16*
369  %pix_sp0.0.copyload = load i16* %addr, align 1
370  %val = uitofp i16 %pix_sp0.0.copyload to double
371  %vmull.i = fmul double %val, %val
372  ret double %vmull.i
373}
374
375define double @fct23(i32* nocapture %sp0) {
376; CHECK-LABEL: fct23:
377; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
378; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
379; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
380  %bitcast = ptrtoint i32* %sp0 to i64
381  %add = add i64 %bitcast, 1
382  %addr = inttoptr i64 %add to i32*
383  %pix_sp0.0.copyload = load i32* %addr, align 1
384  %val = uitofp i32 %pix_sp0.0.copyload to double
385  %vmull.i = fmul double %val, %val
386  ret double %vmull.i
387}
388
389define double @fct24(i64* nocapture %sp0) {
390; CHECK-LABEL: fct24:
391; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
392; CHECK-NEXT: ucvtf [[REG:d[0-9]+]], d[[REGNUM]]
393; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
394  %bitcast = ptrtoint i64* %sp0 to i64
395  %add = add i64 %bitcast, 1
396  %addr = inttoptr i64 %add to i64*
397  %pix_sp0.0.copyload = load i64* %addr, align 1
398  %val = uitofp i64 %pix_sp0.0.copyload to double
399  %vmull.i = fmul double %val, %val
400  ret double %vmull.i
401
402}
403
404; ********* 1s. load with scaled imm to float. *********
405define float @sfct1(i8* nocapture %sp0) {
406; CHECK-LABEL: sfct1:
407; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
408; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
409; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
410; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
411; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
412entry:
413  %addr = getelementptr i8* %sp0, i64 1
414  %pix_sp0.0.copyload = load i8* %addr, align 1
415  %val = sitofp i8 %pix_sp0.0.copyload to float
416  %vmull.i = fmul float %val, %val
417  ret float %vmull.i
418}
419
420define float @sfct2(i16* nocapture %sp0) {
421; CHECK-LABEL: sfct2:
422; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
423; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
424; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
425; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
426entry:
427  %addr = getelementptr i16* %sp0, i64 1
428  %pix_sp0.0.copyload = load i16* %addr, align 1
429  %val = sitofp i16 %pix_sp0.0.copyload to float
430  %vmull.i = fmul float %val, %val
431  ret float %vmull.i
432}
433
434define float @sfct3(i32* nocapture %sp0) {
435; CHECK-LABEL: sfct3:
436; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
437; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
438; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
439entry:
440  %addr = getelementptr i32* %sp0, i64 1
441  %pix_sp0.0.copyload = load i32* %addr, align 1
442  %val = sitofp i32 %pix_sp0.0.copyload to float
443  %vmull.i = fmul float %val, %val
444  ret float %vmull.i
445}
446
447; i64 -> f32 is not supported on floating point unit.
448define float @sfct4(i64* nocapture %sp0) {
449; CHECK-LABEL: sfct4:
450; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, #8]
451; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
452; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
453entry:
454  %addr = getelementptr i64* %sp0, i64 1
455  %pix_sp0.0.copyload = load i64* %addr, align 1
456  %val = sitofp i64 %pix_sp0.0.copyload to float
457  %vmull.i = fmul float %val, %val
458  ret float %vmull.i
459}
460
461; ********* 2s. load with scaled register to float. *********
462define float @sfct5(i8* nocapture %sp0, i64 %offset) {
463; CHECK-LABEL: sfct5:
464; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
465; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
466; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
467; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
468; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
469entry:
470  %addr = getelementptr i8* %sp0, i64 %offset
471  %pix_sp0.0.copyload = load i8* %addr, align 1
472  %val = sitofp i8 %pix_sp0.0.copyload to float
473  %vmull.i = fmul float %val, %val
474  ret float %vmull.i
475}
476
477define float @sfct6(i16* nocapture %sp0, i64 %offset) {
478; CHECK-LABEL: sfct6:
479; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
480; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
481; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
482; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
483entry:
484  %addr = getelementptr i16* %sp0, i64 %offset
485  %pix_sp0.0.copyload = load i16* %addr, align 1
486  %val = sitofp i16 %pix_sp0.0.copyload to float
487  %vmull.i = fmul float %val, %val
488  ret float %vmull.i
489}
490
491define float @sfct7(i32* nocapture %sp0, i64 %offset) {
492; CHECK-LABEL: sfct7:
493; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
494; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
495; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
496entry:
497  %addr = getelementptr i32* %sp0, i64 %offset
498  %pix_sp0.0.copyload = load i32* %addr, align 1
499  %val = sitofp i32 %pix_sp0.0.copyload to float
500  %vmull.i = fmul float %val, %val
501  ret float %vmull.i
502}
503
504; i64 -> f32 is not supported on floating point unit.
505define float @sfct8(i64* nocapture %sp0, i64 %offset) {
506; CHECK-LABEL: sfct8:
507; CHECK: ldr x[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
508; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
509; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
510entry:
511  %addr = getelementptr i64* %sp0, i64 %offset
512  %pix_sp0.0.copyload = load i64* %addr, align 1
513  %val = sitofp i64 %pix_sp0.0.copyload to float
514  %vmull.i = fmul float %val, %val
515  ret float %vmull.i
516}
517
518; ********* 3s. load with scaled imm to double. *********
519define double @sfct9(i8* nocapture %sp0) {
520; CHECK-LABEL: sfct9:
521; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
522; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
523; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
524entry:
525  %addr = getelementptr i8* %sp0, i64 1
526  %pix_sp0.0.copyload = load i8* %addr, align 1
527  %val = sitofp i8 %pix_sp0.0.copyload to double
528  %vmull.i = fmul double %val, %val
529  ret double %vmull.i
530}
531
532define double @sfct10(i16* nocapture %sp0) {
533; CHECK-LABEL: sfct10:
534; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
535; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
536; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
537; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
538; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
539entry:
540  %addr = getelementptr i16* %sp0, i64 1
541  %pix_sp0.0.copyload = load i16* %addr, align 1
542  %val = sitofp i16 %pix_sp0.0.copyload to double
543  %vmull.i = fmul double %val, %val
544  ret double %vmull.i
545}
546
547define double @sfct11(i32* nocapture %sp0) {
548; CHECK-LABEL: sfct11:
549; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
550; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
551; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
552; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
553entry:
554  %addr = getelementptr i32* %sp0, i64 1
555  %pix_sp0.0.copyload = load i32* %addr, align 1
556  %val = sitofp i32 %pix_sp0.0.copyload to double
557  %vmull.i = fmul double %val, %val
558  ret double %vmull.i
559}
560
561define double @sfct12(i64* nocapture %sp0) {
562; CHECK-LABEL: sfct12:
563; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
564; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
565; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
566entry:
567  %addr = getelementptr i64* %sp0, i64 1
568  %pix_sp0.0.copyload = load i64* %addr, align 1
569  %val = sitofp i64 %pix_sp0.0.copyload to double
570  %vmull.i = fmul double %val, %val
571  ret double %vmull.i
572}
573
574; ********* 4s. load with scaled register to double. *********
575define double @sfct13(i8* nocapture %sp0, i64 %offset) {
576; CHECK-LABEL: sfct13:
577; CHECK: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
578; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
579; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
580entry:
581  %addr = getelementptr i8* %sp0, i64 %offset
582  %pix_sp0.0.copyload = load i8* %addr, align 1
583  %val = sitofp i8 %pix_sp0.0.copyload to double
584  %vmull.i = fmul double %val, %val
585  ret double %vmull.i
586}
587
588define double @sfct14(i16* nocapture %sp0, i64 %offset) {
589; CHECK-LABEL: sfct14:
590; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
591; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
592; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
593; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
594; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
595entry:
596  %addr = getelementptr i16* %sp0, i64 %offset
597  %pix_sp0.0.copyload = load i16* %addr, align 1
598  %val = sitofp i16 %pix_sp0.0.copyload to double
599  %vmull.i = fmul double %val, %val
600  ret double %vmull.i
601}
602
603define double @sfct15(i32* nocapture %sp0, i64 %offset) {
604; CHECK-LABEL: sfct15:
605; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
606; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
607; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
608; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
609entry:
610  %addr = getelementptr i32* %sp0, i64 %offset
611  %pix_sp0.0.copyload = load i32* %addr, align 1
612  %val = sitofp i32 %pix_sp0.0.copyload to double
613  %vmull.i = fmul double %val, %val
614  ret double %vmull.i
615}
616
617define double @sfct16(i64* nocapture %sp0, i64 %offset) {
618; CHECK-LABEL: sfct16:
619; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, x1, lsl #3]
620; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
621; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
622entry:
623  %addr = getelementptr i64* %sp0, i64 %offset
624  %pix_sp0.0.copyload = load i64* %addr, align 1
625  %val = sitofp i64 %pix_sp0.0.copyload to double
626  %vmull.i = fmul double %val, %val
627  ret double %vmull.i
628}
629
630; ********* 5s. load with unscaled imm to float. *********
631define float @sfct17(i8* nocapture %sp0) {
632entry:
633; CHECK-LABEL: sfct17:
634; CHECK: ldur b[[REGNUM:[0-9]+]], [x0, #-1]
635; CHECK-NEXT: sshll.8h [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
636; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
637; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
638; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
639  %bitcast = ptrtoint i8* %sp0 to i64
640  %add = add i64 %bitcast, -1
641  %addr = inttoptr i64 %add to i8*
642  %pix_sp0.0.copyload = load i8* %addr, align 1
643  %val = sitofp i8 %pix_sp0.0.copyload to float
644  %vmull.i = fmul float %val, %val
645  ret float %vmull.i
646}
647
648define float @sfct18(i16* nocapture %sp0) {
649; CHECK-LABEL: sfct18:
650; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
651; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
652; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
653; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
654  %bitcast = ptrtoint i16* %sp0 to i64
655  %add = add i64 %bitcast, 1
656  %addr = inttoptr i64 %add to i16*
657  %pix_sp0.0.copyload = load i16* %addr, align 1
658  %val = sitofp i16 %pix_sp0.0.copyload to float
659  %vmull.i = fmul float %val, %val
660  ret float %vmull.i
661}
662
663define float @sfct19(i32* nocapture %sp0) {
664; CHECK-LABEL: sfct19:
665; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
666; CHECK-NEXT: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
667; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
668  %bitcast = ptrtoint i32* %sp0 to i64
669  %add = add i64 %bitcast, 1
670  %addr = inttoptr i64 %add to i32*
671  %pix_sp0.0.copyload = load i32* %addr, align 1
672  %val = sitofp i32 %pix_sp0.0.copyload to float
673  %vmull.i = fmul float %val, %val
674  ret float %vmull.i
675}
676
677; i64 -> f32 is not supported on floating point unit.
678define float @sfct20(i64* nocapture %sp0) {
679; CHECK-LABEL: sfct20:
680; CHECK: ldur x[[REGNUM:[0-9]+]], [x0, #1]
681; CHECK-NEXT: scvtf [[REG:s[0-9]+]], x[[REGNUM]]
682; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
683  %bitcast = ptrtoint i64* %sp0 to i64
684  %add = add i64 %bitcast, 1
685  %addr = inttoptr i64 %add to i64*
686  %pix_sp0.0.copyload = load i64* %addr, align 1
687  %val = sitofp i64 %pix_sp0.0.copyload to float
688  %vmull.i = fmul float %val, %val
689  ret float %vmull.i
690
691}
692
693; ********* 6s. load with unscaled imm to double. *********
694define double @sfct21(i8* nocapture %sp0) {
695entry:
696; CHECK-LABEL: sfct21:
697; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
698; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
699; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
700  %bitcast = ptrtoint i8* %sp0 to i64
701  %add = add i64 %bitcast, -1
702  %addr = inttoptr i64 %add to i8*
703  %pix_sp0.0.copyload = load i8* %addr, align 1
704  %val = sitofp i8 %pix_sp0.0.copyload to double
705  %vmull.i = fmul double %val, %val
706  ret double %vmull.i
707}
708
709define double @sfct22(i16* nocapture %sp0) {
710; CHECK-LABEL: sfct22:
711; CHECK: ldur h[[REGNUM:[0-9]+]], [x0, #1]
712; CHECK-NEXT: sshll.4s [[SEXTREG1:v[0-9]+]], v[[REGNUM]], #0
713; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
714; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
715; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
716  %bitcast = ptrtoint i16* %sp0 to i64
717  %add = add i64 %bitcast, 1
718  %addr = inttoptr i64 %add to i16*
719  %pix_sp0.0.copyload = load i16* %addr, align 1
720  %val = sitofp i16 %pix_sp0.0.copyload to double
721  %vmull.i = fmul double %val, %val
722  ret double %vmull.i
723}
724
725define double @sfct23(i32* nocapture %sp0) {
726; CHECK-LABEL: sfct23:
727; CHECK: ldur s[[REGNUM:[0-9]+]], [x0, #1]
728; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], v[[REGNUM]], #0
729; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
730; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
731  %bitcast = ptrtoint i32* %sp0 to i64
732  %add = add i64 %bitcast, 1
733  %addr = inttoptr i64 %add to i32*
734  %pix_sp0.0.copyload = load i32* %addr, align 1
735  %val = sitofp i32 %pix_sp0.0.copyload to double
736  %vmull.i = fmul double %val, %val
737  ret double %vmull.i
738}
739
740define double @sfct24(i64* nocapture %sp0) {
741; CHECK-LABEL: sfct24:
742; CHECK: ldur d[[REGNUM:[0-9]+]], [x0, #1]
743; CHECK-NEXT: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
744; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
745  %bitcast = ptrtoint i64* %sp0 to i64
746  %add = add i64 %bitcast, 1
747  %addr = inttoptr i64 %add to i64*
748  %pix_sp0.0.copyload = load i64* %addr, align 1
749  %val = sitofp i64 %pix_sp0.0.copyload to double
750  %vmull.i = fmul double %val, %val
751  ret double %vmull.i
752
753}
754
755; Check that we do not use SSHLL code sequence when code size is a concern.
756define float @codesize_sfct17(i8* nocapture %sp0) optsize {
757entry:
758; CHECK-LABEL: codesize_sfct17:
759; CHECK: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
760; CHECK-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
761; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
762  %bitcast = ptrtoint i8* %sp0 to i64
763  %add = add i64 %bitcast, -1
764  %addr = inttoptr i64 %add to i8*
765  %pix_sp0.0.copyload = load i8* %addr, align 1
766  %val = sitofp i8 %pix_sp0.0.copyload to float
767  %vmull.i = fmul float %val, %val
768  ret float %vmull.i
769}
770
771define double @codesize_sfct11(i32* nocapture %sp0) minsize {
772; CHECK-LABEL: sfct11:
773; CHECK: ldr w[[REGNUM:[0-9]+]], [x0, #4]
774; CHECK-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
775; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
776entry:
777  %addr = getelementptr i32* %sp0, i64 1
778  %pix_sp0.0.copyload = load i32* %addr, align 1
779  %val = sitofp i32 %pix_sp0.0.copyload to double
780  %vmull.i = fmul double %val, %val
781  ret double %vmull.i
782}
783
784; Adding fp128 custom lowering makes these a little fragile since we have to
785; return the correct mix of Legal/Expand from the custom method.
786;
787; rdar://problem/14991489
788
789define float @float_from_i128(i128 %in) {
790; CHECK-LABEL: float_from_i128:
791; CHECK: bl {{_?__floatuntisf}}
792  %conv = uitofp i128 %in to float
793  ret float %conv
794}
795
796define double @double_from_i128(i128 %in) {
797; CHECK-LABEL: double_from_i128:
798; CHECK: bl {{_?__floattidf}}
799  %conv = sitofp i128 %in to double
800  ret double %conv
801}
802
803define fp128 @fp128_from_i128(i128 %in) {
804; CHECK-LABEL: fp128_from_i128:
805; CHECK: bl {{_?__floatuntitf}}
806  %conv = uitofp i128 %in to fp128
807  ret fp128 %conv
808}
809
810define i128 @i128_from_float(float %in) {
811; CHECK-LABEL: i128_from_float
812; CHECK: bl {{_?__fixsfti}}
813  %conv = fptosi float %in to i128
814  ret i128 %conv
815}
816
817define i128 @i128_from_double(double %in) {
818; CHECK-LABEL: i128_from_double
819; CHECK: bl {{_?__fixunsdfti}}
820  %conv = fptoui double %in to i128
821  ret i128 %conv
822}
823
824define i128 @i128_from_fp128(fp128 %in) {
825; CHECK-LABEL: i128_from_fp128
826; CHECK: bl {{_?__fixtfti}}
827  %conv = fptosi fp128 %in to i128
828  ret i128 %conv
829}
830
831