• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s
2; RUN: llc < %s -O0 -disable-fp-elim | FileCheck -check-prefix=FAST %s
3target triple = "arm64-apple-darwin"
4
5; rdar://12648441
6; Generated from arm64-arguments.c with -O2.
7; Test passing structs with size < 8, < 16 and > 16
8; with alignment of 16 and without
9
10; Structs with size < 8
11%struct.s38 = type { i32, i16 }
12; With alignment of 16, the size will be padded to multiple of 16 bytes.
13%struct.s39 = type { i32, i16, [10 x i8] }
14; Structs with size < 16
15%struct.s40 = type { i32, i16, i32, i16 }
16%struct.s41 = type { i32, i16, i32, i16 }
17; Structs with size > 16
18%struct.s42 = type { i32, i16, i32, i16, i32, i16 }
19%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] }
20
21@g38 = common global %struct.s38 zeroinitializer, align 4
22@g38_2 = common global %struct.s38 zeroinitializer, align 4
23@g39 = common global %struct.s39 zeroinitializer, align 16
24@g39_2 = common global %struct.s39 zeroinitializer, align 16
25@g40 = common global %struct.s40 zeroinitializer, align 4
26@g40_2 = common global %struct.s40 zeroinitializer, align 4
27@g41 = common global %struct.s41 zeroinitializer, align 16
28@g41_2 = common global %struct.s41 zeroinitializer, align 16
29@g42 = common global %struct.s42 zeroinitializer, align 4
30@g42_2 = common global %struct.s42 zeroinitializer, align 4
31@g43 = common global %struct.s43 zeroinitializer, align 16
32@g43_2 = common global %struct.s43 zeroinitializer, align 16
33
34; structs with size < 8 bytes, passed via i64 in x1 and x2
35define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
36entry:
37; CHECK-LABEL: f38
38; CHECK: add w[[A:[0-9]+]], w1, w0
39; CHECK: add {{w[0-9]+}}, w[[A]], w2
40  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
41  %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32
42  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32
43  %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32
44  %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16
45  %sext = trunc i64 %sext8 to i32
46  %conv = ashr exact i32 %sext, 16
47  %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16
48  %sext10 = trunc i64 %sext1011 to i32
49  %conv6 = ashr exact i32 %sext10, 16
50  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
51  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
52  %add4 = add i32 %add3, %conv
53  %add7 = add i32 %add4, %conv6
54  ret i32 %add7
55}
56
57define i32 @caller38() #1 {
58entry:
59; CHECK-LABEL: caller38
60; CHECK: ldr x1,
61; CHECK: ldr x2,
62  %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
63  %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
64  %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
65  ret i32 %call
66}
67
68declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
69                i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0
70
71; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16]
72; i9 at [sp]
73define i32 @caller38_stack() #1 {
74entry:
75; CHECK-LABEL: caller38_stack
76; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
77; CHECK: mov w[[C:[0-9]+]], #9
78; CHECK: str w[[C]], [sp]
79  %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
80  %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
81  %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
82                                   i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
83  ret i32 %call
84}
85
86; structs with size < 8 bytes, alignment of 16
87; passed via i128 in x1 and x3
88define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
89entry:
90; CHECK-LABEL: f39
91; CHECK: add w[[A:[0-9]+]], w1, w0
92; CHECK: add {{w[0-9]+}}, w[[A]], w3
93  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
94  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
95  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
96  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
97  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
98  %sext = trunc i128 %sext8 to i32
99  %conv = ashr exact i32 %sext, 16
100  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
101  %sext10 = trunc i128 %sext1011 to i32
102  %conv6 = ashr exact i32 %sext10, 16
103  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
104  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
105  %add4 = add i32 %add3, %conv
106  %add7 = add i32 %add4, %conv6
107  ret i32 %add7
108}
109
110define i32 @caller39() #1 {
111entry:
112; CHECK-LABEL: caller39
113; CHECK: ldp x1, x2,
114; CHECK: ldp x3, x4,
115  %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
116  %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
117  %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
118  ret i32 %call
119}
120
121declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
122                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
123
124; structs with size < 8 bytes, alignment 16
125; passed on stack at [sp+16] and [sp+32]
126define i32 @caller39_stack() #1 {
127entry:
128; CHECK-LABEL: caller39_stack
129; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
130; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
131; CHECK: mov w[[C:[0-9]+]], #9
132; CHECK: str w[[C]], [sp]
133  %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
134  %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
135  %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
136                                   i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
137  ret i32 %call
138}
139
140; structs with size < 16 bytes
141; passed via i128 in x1 and x3
142define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
143entry:
144; CHECK-LABEL: f40
145; CHECK: add w[[A:[0-9]+]], w1, w0
146; CHECK: add {{w[0-9]+}}, w[[A]], w3
147  %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
148  %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0
149  %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32
150  %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32
151  %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32
152  %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16
153  %sext = trunc i64 %sext8 to i32
154  %conv = ashr exact i32 %sext, 16
155  %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32
156  %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16
157  %sext10 = trunc i64 %sext1011 to i32
158  %conv6 = ashr exact i32 %sext10, 16
159  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
160  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
161  %add4 = add i32 %add3, %conv
162  %add7 = add i32 %add4, %conv6
163  ret i32 %add7
164}
165
166define i32 @caller40() #1 {
167entry:
168; CHECK-LABEL: caller40
169; CHECK: ldp x1, x2,
170; CHECK: ldp x3, x4,
171  %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
172  %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
173  %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
174  ret i32 %call
175}
176
177declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
178                i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0
179
180; structs with size < 16 bytes
181; passed on stack at [sp+8] and [sp+24]
182define i32 @caller40_stack() #1 {
183entry:
184; CHECK-LABEL: caller40_stack
185; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
186; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
187; CHECK: mov w[[C:[0-9]+]], #9
188; CHECK: str w[[C]], [sp]
189  %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
190  %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
191  %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
192                         i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
193  ret i32 %call
194}
195
196; structs with size < 16 bytes, alignment of 16
197; passed via i128 in x1 and x3
198define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
199entry:
200; CHECK-LABEL: f41
201; CHECK: add w[[A:[0-9]+]], w1, w0
202; CHECK: add {{w[0-9]+}}, w[[A]], w3
203  %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
204  %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32
205  %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32
206  %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32
207  %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16
208  %sext = trunc i128 %sext8 to i32
209  %conv = ashr exact i32 %sext, 16
210  %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16
211  %sext10 = trunc i128 %sext1011 to i32
212  %conv6 = ashr exact i32 %sext10, 16
213  %add = add i32 %s1.sroa.0.0.extract.trunc, %i
214  %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc
215  %add4 = add i32 %add3, %conv
216  %add7 = add i32 %add4, %conv6
217  ret i32 %add7
218}
219
220define i32 @caller41() #1 {
221entry:
222; CHECK-LABEL: caller41
223; CHECK: ldp x1, x2,
224; CHECK: ldp x3, x4,
225  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
226  %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
227  %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
228  ret i32 %call
229}
230
231declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
232                i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0
233
234; structs with size < 16 bytes, alignment of 16
235; passed on stack at [sp+16] and [sp+32]
236define i32 @caller41_stack() #1 {
237entry:
238; CHECK-LABEL: caller41_stack
239; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
240; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
241; CHECK: mov w[[C:[0-9]+]], #9
242; CHECK: str w[[C]], [sp]
243  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
244  %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
245  %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
246                            i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
247  ret i32 %call
248}
249
250; structs with size of 22 bytes, passed indirectly in x1 and x2
251define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
252entry:
253; CHECK-LABEL: f42
254; CHECK: ldr w[[A:[0-9]+]], [x1]
255; CHECK: ldr w[[B:[0-9]+]], [x2]
256; CHECK: add w[[C:[0-9]+]], w[[A]], w0
257; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
258; FAST: f42
259; FAST: ldr w[[A:[0-9]+]], [x1]
260; FAST: ldr w[[B:[0-9]+]], [x2]
261; FAST: add w[[C:[0-9]+]], w[[A]], w0
262; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
263  %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0
264  %0 = load i32, i32* %i1, align 4, !tbaa !0
265  %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0
266  %1 = load i32, i32* %i2, align 4, !tbaa !0
267  %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1
268  %2 = load i16, i16* %s, align 2, !tbaa !3
269  %conv = sext i16 %2 to i32
270  %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1
271  %3 = load i16, i16* %s5, align 2, !tbaa !3
272  %conv6 = sext i16 %3 to i32
273  %add = add i32 %0, %i
274  %add3 = add i32 %add, %1
275  %add4 = add i32 %add3, %conv
276  %add7 = add i32 %add4, %conv6
277  ret i32 %add7
278}
279
280; For s1, we allocate a 22-byte space, pass its address via x1
281define i32 @caller42() #3 {
282entry:
283; CHECK-LABEL: caller42
284; CHECK: str {{x[0-9]+}}, [sp, #48]
285; CHECK: str {{q[0-9]+}}, [sp, #32]
286; CHECK: str {{x[0-9]+}}, [sp, #16]
287; CHECK: str {{q[0-9]+}}, [sp]
288; CHECK: add x1, sp, #32
289; CHECK: mov x2, sp
290; Space for s1 is allocated at sp+32
291; Space for s2 is allocated at sp
292
293; FAST-LABEL: caller42
294; FAST: sub sp, sp, #112
295; Space for s1 is allocated at fp-24 = sp+72
296; Space for s2 is allocated at sp+48
297; FAST: sub x[[A:[0-9]+]], x29, #24
298; FAST: add x[[A:[0-9]+]], sp, #48
299; Call memcpy with size = 24 (0x18)
300; FAST: orr {{x[0-9]+}}, xzr, #0x18
301  %tmp = alloca %struct.s42, align 4
302  %tmp1 = alloca %struct.s42, align 4
303  %0 = bitcast %struct.s42* %tmp to i8*
304  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
305  %1 = bitcast %struct.s42* %tmp1 to i8*
306  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
307  %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5
308  ret i32 %call
309}
310
311declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4
312
313declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
314                       i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1,
315                       %struct.s42* nocapture %s2) #2
316
317define i32 @caller42_stack() #3 {
318entry:
319; CHECK-LABEL: caller42_stack
320; CHECK: sub sp, sp, #112
321; CHECK: add x29, sp, #96
322; CHECK: stur {{x[0-9]+}}, [x29, #-16]
323; CHECK: stur {{q[0-9]+}}, [x29, #-32]
324; CHECK: str {{x[0-9]+}}, [sp, #48]
325; CHECK: str {{q[0-9]+}}, [sp, #32]
326; Space for s1 is allocated at x29-32 = sp+64
327; Space for s2 is allocated at sp+32
328; CHECK: add x[[B:[0-9]+]], sp, #32
329; CHECK: str x[[B]], [sp, #16]
330; CHECK: sub x[[A:[0-9]+]], x29, #32
331; Address of s1 is passed on stack at sp+8
332; CHECK: str x[[A]], [sp, #8]
333; CHECK: mov w[[C:[0-9]+]], #9
334; CHECK: str w[[C]], [sp]
335
336; FAST-LABEL: caller42_stack
337; Space for s1 is allocated at fp-24
338; Space for s2 is allocated at fp-48
339; FAST: sub x[[A:[0-9]+]], x29, #24
340; FAST: sub x[[B:[0-9]+]], x29, #48
341; Call memcpy with size = 24 (0x18)
342; FAST: orr {{x[0-9]+}}, xzr, #0x18
343; FAST: str {{w[0-9]+}}, [sp]
344; Address of s1 is passed on stack at sp+8
345; FAST: str {{x[0-9]+}}, [sp, #8]
346; FAST: str {{x[0-9]+}}, [sp, #16]
347  %tmp = alloca %struct.s42, align 4
348  %tmp1 = alloca %struct.s42, align 4
349  %0 = bitcast %struct.s42* %tmp to i8*
350  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
351  %1 = bitcast %struct.s42* %tmp1 to i8*
352  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4
353  %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
354                       i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5
355  ret i32 %call
356}
357
358; structs with size of 22 bytes, alignment of 16
359; passed indirectly in x1 and x2
360define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
361entry:
362; CHECK-LABEL: f43
363; CHECK: ldr w[[A:[0-9]+]], [x1]
364; CHECK: ldr w[[B:[0-9]+]], [x2]
365; CHECK: add w[[C:[0-9]+]], w[[A]], w0
366; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
367; FAST-LABEL: f43
368; FAST: ldr w[[A:[0-9]+]], [x1]
369; FAST: ldr w[[B:[0-9]+]], [x2]
370; FAST: add w[[C:[0-9]+]], w[[A]], w0
371; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
372  %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0
373  %0 = load i32, i32* %i1, align 4, !tbaa !0
374  %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0
375  %1 = load i32, i32* %i2, align 4, !tbaa !0
376  %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1
377  %2 = load i16, i16* %s, align 2, !tbaa !3
378  %conv = sext i16 %2 to i32
379  %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1
380  %3 = load i16, i16* %s5, align 2, !tbaa !3
381  %conv6 = sext i16 %3 to i32
382  %add = add i32 %0, %i
383  %add3 = add i32 %add, %1
384  %add4 = add i32 %add3, %conv
385  %add7 = add i32 %add4, %conv6
386  ret i32 %add7
387}
388
389define i32 @caller43() #3 {
390entry:
391; CHECK-LABEL: caller43
392; CHECK: str {{q[0-9]+}}, [sp, #48]
393; CHECK: str {{q[0-9]+}}, [sp, #32]
394; CHECK: str {{q[0-9]+}}, [sp, #16]
395; CHECK: str {{q[0-9]+}}, [sp]
396; CHECK: add x1, sp, #32
397; CHECK: mov x2, sp
398; Space for s1 is allocated at sp+32
399; Space for s2 is allocated at sp
400
401; FAST-LABEL: caller43
402; FAST: add x29, sp, #64
403; Space for s1 is allocated at sp+32
404; Space for s2 is allocated at sp
405; FAST: add x1, sp, #32
406; FAST: mov x2, sp
407; FAST: str {{x[0-9]+}}, [sp, #32]
408; FAST: str {{x[0-9]+}}, [sp, #40]
409; FAST: str {{x[0-9]+}}, [sp, #48]
410; FAST: str {{x[0-9]+}}, [sp, #56]
411; FAST: str {{x[0-9]+}}, [sp]
412; FAST: str {{x[0-9]+}}, [sp, #8]
413; FAST: str {{x[0-9]+}}, [sp, #16]
414; FAST: str {{x[0-9]+}}, [sp, #24]
415  %tmp = alloca %struct.s43, align 16
416  %tmp1 = alloca %struct.s43, align 16
417  %0 = bitcast %struct.s43* %tmp to i8*
418  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
419  %1 = bitcast %struct.s43* %tmp1 to i8*
420  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
421  %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5
422  ret i32 %call
423}
424
425declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
426                       i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1,
427                       %struct.s43* nocapture %s2) #2
428
429define i32 @caller43_stack() #3 {
430entry:
431; CHECK-LABEL: caller43_stack
432; CHECK: sub sp, sp, #112
433; CHECK: add x29, sp, #96
434; CHECK: stur {{q[0-9]+}}, [x29, #-16]
435; CHECK: stur {{q[0-9]+}}, [x29, #-32]
436; CHECK: str {{q[0-9]+}}, [sp, #48]
437; CHECK: str {{q[0-9]+}}, [sp, #32]
438; Space for s1 is allocated at x29-32 = sp+64
439; Space for s2 is allocated at sp+32
440; CHECK: add x[[B:[0-9]+]], sp, #32
441; CHECK: str x[[B]], [sp, #16]
442; CHECK: sub x[[A:[0-9]+]], x29, #32
443; Address of s1 is passed on stack at sp+8
444; CHECK: str x[[A]], [sp, #8]
445; CHECK: mov w[[C:[0-9]+]], #9
446; CHECK: str w[[C]], [sp]
447
448; FAST-LABEL: caller43_stack
449; FAST: sub sp, sp, #112
450; Space for s1 is allocated at fp-32 = sp+64
451; Space for s2 is allocated at sp+32
452; FAST: sub x[[A:[0-9]+]], x29, #32
453; FAST: add x[[B:[0-9]+]], sp, #32
454; FAST: stur {{x[0-9]+}}, [x29, #-32]
455; FAST: stur {{x[0-9]+}}, [x29, #-24]
456; FAST: stur {{x[0-9]+}}, [x29, #-16]
457; FAST: stur {{x[0-9]+}}, [x29, #-8]
458; FAST: str {{x[0-9]+}}, [sp, #32]
459; FAST: str {{x[0-9]+}}, [sp, #40]
460; FAST: str {{x[0-9]+}}, [sp, #48]
461; FAST: str {{x[0-9]+}}, [sp, #56]
462; FAST: str {{w[0-9]+}}, [sp]
463; Address of s1 is passed on stack at sp+8
464; FAST: str {{x[0-9]+}}, [sp, #8]
465; FAST: str {{x[0-9]+}}, [sp, #16]
466  %tmp = alloca %struct.s43, align 16
467  %tmp1 = alloca %struct.s43, align 16
468  %0 = bitcast %struct.s43* %tmp to i8*
469  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
470  %1 = bitcast %struct.s43* %tmp1 to i8*
471  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4
472  %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
473                       i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5
474  ret i32 %call
475}
476
477; rdar://13668927
478; Check that we don't split an i128.
479declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
480                               i32 %i6, i32 %i7, i128 %s1, i32 %i8)
481
482define i32 @i128_split() {
483entry:
484; CHECK-LABEL: i128_split
485; "i128 %0" should be on stack at [sp].
486; "i32 8" should be on stack at [sp, #16].
487; CHECK: str {{w[0-9]+}}, [sp, #16]
488; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
489; FAST-LABEL: i128_split
490; FAST: sub sp, sp
491; FAST: mov x[[ADDR:[0-9]+]], sp
492; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
493; Load/Store opt is disabled with -O0, so the i128 is split.
494; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
495; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
496  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
497  %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
498                                           i32 6, i32 7, i128 %0, i32 8) #5
499  ret i32 %call
500}
501
502declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
503                               i32 %i6, i32 %i7, i64 %s1, i32 %i8)
504
505define i32 @i64_split() {
506entry:
507; CHECK-LABEL: i64_split
508; "i64 %0" should be in register x7.
509; "i32 8" should be on stack at [sp].
510; CHECK: ldr x7, [{{x[0-9]+}}]
511; CHECK: str {{w[0-9]+}}, [sp]
512; FAST-LABEL: i64_split
513; FAST: ldr x7, [{{x[0-9]+}}]
514; FAST: mov x[[R0:[0-9]+]], sp
515; FAST: orr w[[R1:[0-9]+]], wzr, #0x8
516; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
517  %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
518  %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
519                                    i32 6, i32 7, i64 %0, i32 8) #5
520  ret i32 %call
521}
522
523attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
524attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
525attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
526attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" }
527attributes #4 = { nounwind }
528attributes #5 = { nobuiltin }
529
530!0 = !{!"int", !1}
531!1 = !{!"omnipotent char", !2}
532!2 = !{!"Simple C/C++ TBAA"}
533!3 = !{!"short", !1}
534!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}
535