• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
2; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim -fast-isel | FileCheck -check-prefix=FAST %s
3
4; rdar://12648441
5; Generated from arm64-arguments.c with -O2.
6; Test passing structs with size < 8, < 16 and > 16
7; with alignment of 16 and without
8
9; Structs with size < 8
10%struct.s38 = type { i32, i16 }
11; With alignment of 16, the size will be padded to multiple of 16 bytes.
12%struct.s39 = type { i32, i16, [10 x i8] }
13; Structs with size < 16
14%struct.s40 = type { i32, i16, i32, i16 }
15%struct.s41 = type { i32, i16, i32, i16 }
16; Structs with size > 16
17%struct.s42 = type { i32, i16, i32, i16, i32, i16 }
18%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
19
20@g38 = common global %struct.s38 zeroinitializer, align 4
21@g38_2 = common global %struct.s38 zeroinitializer, align 4
22@g39 = common global %struct.s39 zeroinitializer, align 16
23@g39_2 = common global %struct.s39 zeroinitializer, align 16
24@g40 = common global %struct.s40 zeroinitializer, align 4
25@g40_2 = common global %struct.s40 zeroinitializer, align 4
26@g41 = common global %struct.s41 zeroinitializer, align 16
27@g41_2 = common global %struct.s41 zeroinitializer, align 16
28@g42 = common global %struct.s42 zeroinitializer, align 4
29@g42_2 = common global %struct.s42 zeroinitializer, align 4
30@g43 = common global %struct.s43 zeroinitializer, align 16
31@g43_2 = common global %struct.s43 zeroinitializer, align 16
32
33; structs with size < 8 bytes, passed via i64 in x1 and x2
34define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
35entry:
36; CHECK-LABEL: f38
37; CHECK: add w[[A:[0-9]+]], w1, w0
38; CHECK: add {{w[0-9]+}}, w[[A]], w2
39  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
40  %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
41  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
42  %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
43  %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
44  %sext = trunc i64 %sext8 to i32
45  %conv = ashr exact i32 %sext, 16
46  %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
47  %sext10 = trunc i64 %sext1011 to i32
48  %conv6 = ashr exact i32 %sext10, 16
49  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
50  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
51  %add4 = add i32 %add3, %conv
52  %add7 = add i32 %add4, %conv6
53  ret i32 %add7
54}
55
56define i32 @caller38() #1 {
57entry:
58; CHECK-LABEL: caller38
59; CHECK: ldr x1,
60; CHECK: ldr x2,
61  %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
62  %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
63  %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
64  ret i32 %call
65}
66
67declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
68                i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
69
70; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
71; i9 at [sp]
72define i32 @caller38_stack() #1 {
73entry:
74; CHECK-LABEL: caller38_stack
75; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
76; CHECK: mov w[[C:[0-9]+]], #9
77; CHECK: str w[[C]], [sp]
78  %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
79  %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
80  %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
81                                   i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
82  ret i32 %call
83}
84
85; structs with size < 8 bytes, alignment of 16
86; passed via i128 in x1 and x3
87define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
88entry:
89; CHECK-LABEL: f39
90; CHECK: add w[[A:[0-9]+]], w1, w0
91; CHECK: add {{w[0-9]+}}, w[[A]], w3
92  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
93  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
94  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
95  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
96  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
97  %sext = trunc i128 %sext8 to i32
98  %conv = ashr exact i32 %sext, 16
99  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
100  %sext10 = trunc i128 %sext1011 to i32
101  %conv6 = ashr exact i32 %sext10, 16
102  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
103  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
104  %add4 = add i32 %add3, %conv
105  %add7 = add i32 %add4, %conv6
106  ret i32 %add7
107}
108
109define i32 @caller39() #1 {
110entry:
111; CHECK-LABEL: caller39
112; CHECK: ldp x1, x2,
113; CHECK: ldp x3, x4,
114  %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
115  %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
116  %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
117  ret i32 %call
118}
119
120declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
121                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
122
123; structs with size < 8 bytes, alignment 16
124; passed on stack at [sp+16] and [sp+32]
125define i32 @caller39_stack() #1 {
126entry:
127; CHECK-LABEL: caller39_stack
128; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
129; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
130; CHECK: mov w[[C:[0-9]+]], #9
131; CHECK: str w[[C]], [sp]
132  %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
133  %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
134  %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
135                                   i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
136  ret i32 %call
137}
138
139; structs with size < 16 bytes
140; passed via i128 in x1 and x3
141define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
142entry:
143; CHECK-LABEL: f40
144; CHECK: add w[[A:[0-9]+]], w1, w0
145; CHECK: add {{w[0-9]+}}, w[[A]], w3
146  %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
147  %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
148  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
149  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
150  %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
151  %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
152  %sext = trunc i64 %sext8 to i32
153  %conv = ashr exact i32 %sext, 16
154  %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
155  %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
156  %sext10 = trunc i64 %sext1011 to i32
157  %conv6 = ashr exact i32 %sext10, 16
158  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
159  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
160  %add4 = add i32 %add3, %conv
161  %add7 = add i32 %add4, %conv6
162  ret i32 %add7
163}
164
165define i32 @caller40() #1 {
166entry:
167; CHECK-LABEL: caller40
168; CHECK: ldp x1, x2,
169; CHECK: ldp x3, x4,
170  %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
171  %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
172  %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
173  ret i32 %call
174}
175
176declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
177                i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
178
179; structs with size < 16 bytes
180; passed on stack at [sp+8] and [sp+24]
181define i32 @caller40_stack() #1 {
182entry:
183; CHECK-LABEL: caller40_stack
184; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
185; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
186; CHECK: mov w[[C:[0-9]+]], #9
187; CHECK: str w[[C]], [sp]
188  %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
189  %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
190  %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
191                         i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
192  ret i32 %call
193}
194
195; structs with size < 16 bytes, alignment of 16
196; passed via i128 in x1 and x3
197define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
198entry:
199; CHECK-LABEL: f41
200; CHECK: add w[[A:[0-9]+]], w1, w0
201; CHECK: add {{w[0-9]+}}, w[[A]], w3
202  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
203  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
204  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
205  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
206  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
207  %sext = trunc i128 %sext8 to i32
208  %conv = ashr exact i32 %sext, 16
209  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
210  %sext10 = trunc i128 %sext1011 to i32
211  %conv6 = ashr exact i32 %sext10, 16
212  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
213  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
214  %add4 = add i32 %add3, %conv
215  %add7 = add i32 %add4, %conv6
216  ret i32 %add7
217}
218
219define i32 @caller41() #1 {
220entry:
221; CHECK-LABEL: caller41
222; CHECK: ldp x1, x2,
223; CHECK: ldp x3, x4,
224  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
225  %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
226  %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
227  ret i32 %call
228}
229
230declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
231                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
232
233; structs with size < 16 bytes, alignment of 16
234; passed on stack at [sp+16] and [sp+32]
235define i32 @caller41_stack() #1 {
236entry:
237; CHECK-LABEL: caller41_stack
238; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
239; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
240; CHECK: mov w[[C:[0-9]+]], #9
241; CHECK: str w[[C]], [sp]
242  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
243  %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
244  %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
245                            i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
246  ret i32 %call
247}
248
249; structs with size of 22 bytes, passed indirectly in x1 and x2
250define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
251entry:
252; CHECK-LABEL: f42
253; CHECK: ldr w[[A:[0-9]+]], [x1]
254; CHECK: ldr w[[B:[0-9]+]], [x2]
255; CHECK: add w[[C:[0-9]+]], w[[A]], w0
256; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
257; FAST: f42
258; FAST: ldr w[[A:[0-9]+]], [x1]
259; FAST: ldr w[[B:[0-9]+]], [x2]
260; FAST: add w[[C:[0-9]+]], w[[A]], w0
261; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
262  %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0
263  %0 = load i32, i32* %i1, align 4, !tbaa !0
264  %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0
265  %1 = load i32, i32* %i2, align 4, !tbaa !0
266  %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1
267  %2 = load i16, i16* %s, align 2, !tbaa !3
268  %conv = sext i16 %2 to i32
269  %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1
270  %3 = load i16, i16* %s5, align 2, !tbaa !3
271  %conv6 = sext i16 %3 to i32
272  %add = add i32 %0, %i
273  %add3 = add i32 %add, %1
274  %add4 = add i32 %add3, %conv
275  %add7 = add i32 %add4, %conv6
276  ret i32 %add7
277}
278
279; For s1, we allocate a 22-byte space, pass its address via x1
280define i32 @caller42() #3 {
281entry:
282; CHECK-LABEL: caller42
283; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
284; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
285; CHECK-DAG: str {{x[0-9]+}}, [sp, #16]
286; CHECK-DAG: str {{q[0-9]+}}, [sp]
287; CHECK: add x1, sp, #32
288; CHECK: mov x2, sp
289; Space for s1 is allocated at sp+32
290; Space for s2 is allocated at sp
291
292; FAST-LABEL: caller42
293; FAST: sub sp, sp, #96
294; Space for s1 is allocated at fp-24 = sp+56
295; FAST: sub x[[A:[0-9]+]], x29, #24
296; Call memcpy with size = 24 (0x18)
297; FAST: orr {{x[0-9]+}}, xzr, #0x18
298; Space for s2 is allocated at sp+32
299; FAST: add x[[A:[0-9]+]], sp, #32
300; FAST: bl _memcpy
301  %tmp = alloca %struct.s42, align 4
302  %tmp1 = alloca %struct.s42, align 4
303  %0 = bitcast %struct.s42* %tmp to i8*
304  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
305  %1 = bitcast %struct.s42* %tmp1 to i8*
306  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
307  %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
308  ret i32 %call
309}
310
311declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4
312
313declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
314                       i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
315                       %struct.s42* nocapture %s2) #2
316
317define i32 @caller42_stack() #3 {
318entry:
319; CHECK-LABEL: caller42_stack
320; CHECK: sub sp, sp, #112
321; CHECK: add x29, sp, #96
322; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16]
323; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
324; CHECK-DAG: str {{x[0-9]+}}, [sp, #48]
325; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
326; Space for s1 is allocated at x29-32 = sp+64
327; Space for s2 is allocated at sp+32
328; CHECK: add x[[B:[0-9]+]], sp, #32
329; CHECK: str x[[B]], [sp, #16]
330; CHECK: sub x[[A:[0-9]+]], x29, #32
331; Address of s1 is passed on stack at sp+8
332; CHECK: str x[[A]], [sp, #8]
333; CHECK: mov w[[C:[0-9]+]], #9
334; CHECK: str w[[C]], [sp]
335
336; FAST-LABEL: caller42_stack
337; Space for s1 is allocated at fp-24
338; FAST: sub x[[A:[0-9]+]], x29, #24
339; Call memcpy with size = 24 (0x18)
340; FAST: orr {{x[0-9]+}}, xzr, #0x18
341; FAST: bl _memcpy
342; Space for s2 is allocated at fp-48
343; FAST: sub x[[B:[0-9]+]], x29, #48
344; Call memcpy again
345; FAST: bl _memcpy
346; Address of s1 is passed on stack at sp+8
347; FAST: str {{w[0-9]+}}, [sp]
348; FAST: str {{x[0-9]+}}, [sp, #8]
349; FAST: str {{x[0-9]+}}, [sp, #16]
350  %tmp = alloca %struct.s42, align 4
351  %tmp1 = alloca %struct.s42, align 4
352  %0 = bitcast %struct.s42* %tmp to i8*
353  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4
354  %1 = bitcast %struct.s42* %tmp1 to i8*
355  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4
356  %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
357                       i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
358  ret i32 %call
359}
360
361; structs with size of 22 bytes, alignment of 16
362; passed indirectly in x1 and x2
363define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
364entry:
365; CHECK-LABEL: f43
366; CHECK: ldr w[[A:[0-9]+]], [x1]
367; CHECK: ldr w[[B:[0-9]+]], [x2]
368; CHECK: add w[[C:[0-9]+]], w[[A]], w0
369; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
370; FAST-LABEL: f43
371; FAST: ldr w[[A:[0-9]+]], [x1]
372; FAST: ldr w[[B:[0-9]+]], [x2]
373; FAST: add w[[C:[0-9]+]], w[[A]], w0
374; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
375  %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0
376  %0 = load i32, i32* %i1, align 4, !tbaa !0
377  %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0
378  %1 = load i32, i32* %i2, align 4, !tbaa !0
379  %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1
380  %2 = load i16, i16* %s, align 2, !tbaa !3
381  %conv = sext i16 %2 to i32
382  %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1
383  %3 = load i16, i16* %s5, align 2, !tbaa !3
384  %conv6 = sext i16 %3 to i32
385  %add = add i32 %0, %i
386  %add3 = add i32 %add, %1
387  %add4 = add i32 %add3, %conv
388  %add7 = add i32 %add4, %conv6
389  ret i32 %add7
390}
391
392define i32 @caller43() #3 {
393entry:
394; CHECK-LABEL: caller43
395; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
396; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
397; CHECK-DAG: str {{q[0-9]+}}, [sp, #16]
398; CHECK-DAG: str {{q[0-9]+}}, [sp]
399; CHECK: add x1, sp, #32
400; CHECK: mov x2, sp
401; Space for s1 is allocated at sp+32
402; Space for s2 is allocated at sp
403
404; FAST-LABEL: caller43
405; FAST: add x29, sp, #64
406; Space for s1 is allocated at sp+32
407; Space for s2 is allocated at sp
408; FAST: str {{x[0-9]+}}, [sp, #32]
409; FAST: str {{x[0-9]+}}, [sp, #40]
410; FAST: str {{x[0-9]+}}, [sp, #48]
411; FAST: str {{x[0-9]+}}, [sp, #56]
412; FAST: str {{x[0-9]+}}, [sp]
413; FAST: str {{x[0-9]+}}, [sp, #8]
414; FAST: str {{x[0-9]+}}, [sp, #16]
415; FAST: str {{x[0-9]+}}, [sp, #24]
416; FAST: add x1, sp, #32
417; FAST: mov x2, sp
418  %tmp = alloca %struct.s43, align 16
419  %tmp1 = alloca %struct.s43, align 16
420  %0 = bitcast %struct.s43* %tmp to i8*
421  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
422  %1 = bitcast %struct.s43* %tmp1 to i8*
423  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
424  %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
425  ret i32 %call
426}
427
428declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
429                       i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
430                       %struct.s43* nocapture %s2) #2
431
432define i32 @caller43_stack() #3 {
433entry:
434; CHECK-LABEL: caller43_stack
435; CHECK: sub sp, sp, #112
436; CHECK: add x29, sp, #96
437; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16]
438; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32]
439; CHECK-DAG: str {{q[0-9]+}}, [sp, #48]
440; CHECK-DAG: str {{q[0-9]+}}, [sp, #32]
441; Space for s1 is allocated at x29-32 = sp+64
442; Space for s2 is allocated at sp+32
443; CHECK: add x[[B:[0-9]+]], sp, #32
444; CHECK: str x[[B]], [sp, #16]
445; CHECK: sub x[[A:[0-9]+]], x29, #32
446; Address of s1 is passed on stack at sp+8
447; CHECK: str x[[A]], [sp, #8]
448; CHECK: mov w[[C:[0-9]+]], #9
449; CHECK: str w[[C]], [sp]
450
451; FAST-LABEL: caller43_stack
452; FAST: sub sp, sp, #112
453; Space for s1 is allocated at fp-32 = sp+64
454; Space for s2 is allocated at sp+32
455; FAST: stur {{x[0-9]+}}, [x29, #-32]
456; FAST: stur {{x[0-9]+}}, [x29, #-24]
457; FAST: stur {{x[0-9]+}}, [x29, #-16]
458; FAST: stur {{x[0-9]+}}, [x29, #-8]
459; FAST: str {{x[0-9]+}}, [sp, #32]
460; FAST: str {{x[0-9]+}}, [sp, #40]
461; FAST: str {{x[0-9]+}}, [sp, #48]
462; FAST: str {{x[0-9]+}}, [sp, #56]
463; FAST: str {{w[0-9]+}}, [sp]
464; Address of s1 is passed on stack at sp+8
465; FAST: sub x[[A:[0-9]+]], x29, #32
466; FAST: str x[[A]], [sp, #8]
467; FAST: add x[[B:[0-9]+]], sp, #32
468; FAST: str x[[B]], [sp, #16]
469  %tmp = alloca %struct.s43, align 16
470  %tmp1 = alloca %struct.s43, align 16
471  %0 = bitcast %struct.s43* %tmp to i8*
472  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4
473  %1 = bitcast %struct.s43* %tmp1 to i8*
474  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4
475  %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
476                       i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
477  ret i32 %call
478}
479
480; rdar://13668927
481; Check that we don't split an i128.
482declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
483                               i32 %i6, i32 %i7, i128 %s1, i32 %i8)
484
485define i32 @i128_split() {
486entry:
487; CHECK-LABEL: i128_split
488; "i128 %0" should be on stack at [sp].
489; "i32 8" should be on stack at [sp, #16].
490; CHECK: str {{w[0-9]+}}, [sp, #16]
491; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
492; FAST-LABEL: i128_split
493; FAST: sub sp, sp
494; FAST: mov x[[ADDR:[0-9]+]], sp
495; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
496; Load/Store opt is disabled with -O0, so the i128 is split.
497; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
498; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
499  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
500  %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
501                                           i32 6, i32 7, i128 %0, i32 8) #5
502  ret i32 %call
503}
504
505declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
506                               i32 %i6, i32 %i7, i64 %s1, i32 %i8)
507
508define i32 @i64_split() {
509entry:
510; CHECK-LABEL: i64_split
511; "i64 %0" should be in register x7.
512; "i32 8" should be on stack at [sp].
513; CHECK: ldr x7, [{{x[0-9]+}}]
514; CHECK: str {{w[0-9]+}}, [sp]
515; FAST-LABEL: i64_split
516; FAST: ldr x7, [{{x[0-9]+}}]
517; FAST: mov x[[R0:[0-9]+]], sp
518; FAST: orr w[[R1:[0-9]+]], wzr, #0x8
519; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
520  %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
521  %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
522                                    i32 6, i32 7, i64 %0, i32 8) #5
523  ret i32 %call
524}
525
526attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
527attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
528attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
529attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
530attributes #4 = { nounwind }
531attributes #5 = { nobuiltin }
532
533!0 = !{!"int", !1}
534!1 = !{!"omnipotent char", !2}
535!2 = !{!"Simple C/C++ TBAA"}
536!3 = !{!"short", !1}
537!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}
538