1; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false -disable-fp-elim | FileCheck %s 2; RUN: llc -fast-isel-sink-local-values < %s -mtriple=arm64-apple-darwin -O0 -disable-fp-elim -fast-isel | FileCheck -check-prefix=FAST %s 3 4; rdar://12648441 5; Generated from arm64-arguments.c with -O2. 6; Test passing structs with size < 8, < 16 and > 16 7; with alignment of 16 and without 8 9; Structs with size < 8 10%struct.s38 = type { i32, i16 } 11; With alignment of 16, the size will be padded to multiple of 16 bytes. 12%struct.s39 = type { i32, i16, [10 x i8] } 13; Structs with size < 16 14%struct.s40 = type { i32, i16, i32, i16 } 15%struct.s41 = type { i32, i16, i32, i16 } 16; Structs with size > 16 17%struct.s42 = type { i32, i16, i32, i16, i32, i16 } 18%struct.s43 = type { i32, i16, i32, i16, i32, i16, [10 x i8] } 19 20@g38 = common global %struct.s38 zeroinitializer, align 4 21@g38_2 = common global %struct.s38 zeroinitializer, align 4 22@g39 = common global %struct.s39 zeroinitializer, align 16 23@g39_2 = common global %struct.s39 zeroinitializer, align 16 24@g40 = common global %struct.s40 zeroinitializer, align 4 25@g40_2 = common global %struct.s40 zeroinitializer, align 4 26@g41 = common global %struct.s41 zeroinitializer, align 16 27@g41_2 = common global %struct.s41 zeroinitializer, align 16 28@g42 = common global %struct.s42 zeroinitializer, align 4 29@g42_2 = common global %struct.s42 zeroinitializer, align 4 30@g43 = common global %struct.s43 zeroinitializer, align 16 31@g43_2 = common global %struct.s43 zeroinitializer, align 16 32 33; structs with size < 8 bytes, passed via i64 in x1 and x2 34define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 { 35entry: 36; CHECK-LABEL: f38 37; CHECK: add w[[A:[0-9]+]], w1, w0 38; CHECK: add {{w[0-9]+}}, w[[A]], w2 39 %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32 40 %s1.sroa.1.4.extract.shift = lshr i64 %s1.coerce, 32 41 %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce to i32 42 %s2.sroa.1.4.extract.shift = lshr i64 %s2.coerce, 32 43 %sext8 = shl nuw nsw i64 %s1.sroa.1.4.extract.shift, 16 44 %sext = trunc i64 %sext8 to i32 45 %conv = ashr exact i32 %sext, 16 46 %sext1011 = shl nuw nsw i64 %s2.sroa.1.4.extract.shift, 16 47 %sext10 = trunc i64 %sext1011 to i32 48 %conv6 = ashr exact i32 %sext10, 16 49 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 50 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 51 %add4 = add i32 %add3, %conv 52 %add7 = add i32 %add4, %conv6 53 ret i32 %add7 54} 55 56define i32 @caller38() #1 { 57entry: 58; CHECK-LABEL: caller38 59; CHECK: ldr x1, 60; CHECK: ldr x2, 61 %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 62 %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 63 %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5 64 ret i32 %call 65} 66 67declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 68 i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce) #0 69 70; structs with size < 8 bytes, passed on stack at [sp+8] and [sp+16] 71; i9 at [sp] 72define i32 @caller38_stack() #1 { 73entry: 74; CHECK-LABEL: caller38_stack 75; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] 76; CHECK: mov w[[C:[0-9]+]], #9 77; CHECK: str w[[C]], [sp] 78 %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4 79 %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4 80 %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 81 i32 7, i32 8, i32 9, i64 %0, i64 %1) #5 82 ret i32 %call 83} 84 85; structs with size < 8 bytes, alignment of 16 86; passed via i128 in x1 and x3 87define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { 88entry: 89; CHECK-LABEL: f39 90; CHECK: add w[[A:[0-9]+]], w1, w0 91; CHECK: add {{w[0-9]+}}, w[[A]], w3 92 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 93 %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 94 %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 95 %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 96 %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 97 %sext = trunc i128 %sext8 to i32 98 %conv = ashr exact i32 %sext, 16 99 %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 100 %sext10 = trunc i128 %sext1011 to i32 101 %conv6 = ashr exact i32 %sext10, 16 102 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 103 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 104 %add4 = add i32 %add3, %conv 105 %add7 = add i32 %add4, %conv6 106 ret i32 %add7 107} 108 109define i32 @caller39() #1 { 110entry: 111; CHECK-LABEL: caller39 112; CHECK: ldp x1, x2, 113; CHECK: ldp x3, x4, 114 %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 115 %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 116 %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5 117 ret i32 %call 118} 119 120declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 121 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 122 123; structs with size < 8 bytes, alignment 16 124; passed on stack at [sp+16] and [sp+32] 125define i32 @caller39_stack() #1 { 126entry: 127; CHECK-LABEL: caller39_stack 128; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] 129; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] 130; CHECK: mov w[[C:[0-9]+]], #9 131; CHECK: str w[[C]], [sp] 132 %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16 133 %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16 134 %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 135 i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 136 ret i32 %call 137} 138 139; structs with size < 16 bytes 140; passed via i128 in x1 and x3 141define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 { 142entry: 143; CHECK-LABEL: f40 144; CHECK: add w[[A:[0-9]+]], w1, w0 145; CHECK: add {{w[0-9]+}}, w[[A]], w3 146 %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0 147 %s2.coerce.fca.0.extract = extractvalue [2 x i64] %s2.coerce, 0 148 %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce.fca.0.extract to i32 149 %s2.sroa.0.0.extract.trunc = trunc i64 %s2.coerce.fca.0.extract to i32 150 %s1.sroa.0.4.extract.shift = lshr i64 %s1.coerce.fca.0.extract, 32 151 %sext8 = shl nuw nsw i64 %s1.sroa.0.4.extract.shift, 16 152 %sext = trunc i64 %sext8 to i32 153 %conv = ashr exact i32 %sext, 16 154 %s2.sroa.0.4.extract.shift = lshr i64 %s2.coerce.fca.0.extract, 32 155 %sext1011 = shl nuw nsw i64 %s2.sroa.0.4.extract.shift, 16 156 %sext10 = trunc i64 %sext1011 to i32 157 %conv6 = ashr exact i32 %sext10, 16 158 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 159 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 160 %add4 = add i32 %add3, %conv 161 %add7 = add i32 %add4, %conv6 162 ret i32 %add7 163} 164 165define i32 @caller40() #1 { 166entry: 167; CHECK-LABEL: caller40 168; CHECK: ldp x1, x2, 169; CHECK: ldp x3, x4, 170 %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 171 %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 172 %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5 173 ret i32 %call 174} 175 176declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 177 i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 178 179; structs with size < 16 bytes 180; passed on stack at [sp+8] and [sp+24] 181define i32 @caller40_stack() #1 { 182entry: 183; CHECK-LABEL: caller40_stack 184; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24] 185; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8] 186; CHECK: mov w[[C:[0-9]+]], #9 187; CHECK: str w[[C]], [sp] 188 %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4 189 %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4 190 %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 191 i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5 192 ret i32 %call 193} 194 195; structs with size < 16 bytes, alignment of 16 196; passed via i128 in x1 and x3 197define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 { 198entry: 199; CHECK-LABEL: f41 200; CHECK: add w[[A:[0-9]+]], w1, w0 201; CHECK: add {{w[0-9]+}}, w[[A]], w3 202 %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32 203 %s1.sroa.1.4.extract.shift = lshr i128 %s1.coerce, 32 204 %s2.sroa.0.0.extract.trunc = trunc i128 %s2.coerce to i32 205 %s2.sroa.1.4.extract.shift = lshr i128 %s2.coerce, 32 206 %sext8 = shl nuw nsw i128 %s1.sroa.1.4.extract.shift, 16 207 %sext = trunc i128 %sext8 to i32 208 %conv = ashr exact i32 %sext, 16 209 %sext1011 = shl nuw nsw i128 %s2.sroa.1.4.extract.shift, 16 210 %sext10 = trunc i128 %sext1011 to i32 211 %conv6 = ashr exact i32 %sext10, 16 212 %add = add i32 %s1.sroa.0.0.extract.trunc, %i 213 %add3 = add i32 %add, %s2.sroa.0.0.extract.trunc 214 %add4 = add i32 %add3, %conv 215 %add7 = add i32 %add4, %conv6 216 ret i32 %add7 217} 218 219define i32 @caller41() #1 { 220entry: 221; CHECK-LABEL: caller41 222; CHECK: ldp x1, x2, 223; CHECK: ldp x3, x4, 224 %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 225 %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 226 %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5 227 ret i32 %call 228} 229 230declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 231 i32 %i7, i32 %i8, i32 %i9, i128 %s1.coerce, i128 %s2.coerce) #0 232 233; structs with size < 16 bytes, alignment of 16 234; passed on stack at [sp+16] and [sp+32] 235define i32 @caller41_stack() #1 { 236entry: 237; CHECK-LABEL: caller41_stack 238; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32] 239; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16] 240; CHECK: mov w[[C:[0-9]+]], #9 241; CHECK: str w[[C]], [sp] 242 %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 243 %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16 244 %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 245 i32 7, i32 8, i32 9, i128 %0, i128 %1) #5 246 ret i32 %call 247} 248 249; structs with size of 22 bytes, passed indirectly in x1 and x2 250define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 { 251entry: 252; CHECK-LABEL: f42 253; CHECK: ldr w[[A:[0-9]+]], [x1] 254; CHECK: ldr w[[B:[0-9]+]], [x2] 255; CHECK: add w[[C:[0-9]+]], w[[A]], w0 256; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] 257; FAST: f42 258; FAST: ldr w[[A:[0-9]+]], [x1] 259; FAST: ldr w[[B:[0-9]+]], [x2] 260; FAST: add w[[C:[0-9]+]], w[[A]], w0 261; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] 262 %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0 263 %0 = load i32, i32* %i1, align 4, !tbaa !0 264 %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0 265 %1 = load i32, i32* %i2, align 4, !tbaa !0 266 %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1 267 %2 = load i16, i16* %s, align 2, !tbaa !3 268 %conv = sext i16 %2 to i32 269 %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1 270 %3 = load i16, i16* %s5, align 2, !tbaa !3 271 %conv6 = sext i16 %3 to i32 272 %add = add i32 %0, %i 273 %add3 = add i32 %add, %1 274 %add4 = add i32 %add3, %conv 275 %add7 = add i32 %add4, %conv6 276 ret i32 %add7 277} 278 279; For s1, we allocate a 22-byte space, pass its address via x1 280define i32 @caller42() #3 { 281entry: 282; CHECK-LABEL: caller42 283; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] 284; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 285; CHECK-DAG: str {{x[0-9]+}}, [sp, #16] 286; CHECK-DAG: str {{q[0-9]+}}, [sp] 287; CHECK: add x1, sp, #32 288; CHECK: mov x2, sp 289; Space for s1 is allocated at sp+32 290; Space for s2 is allocated at sp 291 292; FAST-LABEL: caller42 293; FAST: sub sp, sp, #96 294; Space for s1 is allocated at fp-24 = sp+56 295; FAST: sub x[[A:[0-9]+]], x29, #24 296; Call memcpy with size = 24 (0x18) 297; FAST: orr {{x[0-9]+}}, xzr, #0x18 298; Space for s2 is allocated at sp+32 299; FAST: add x[[A:[0-9]+]], sp, #32 300; FAST: bl _memcpy 301 %tmp = alloca %struct.s42, align 4 302 %tmp1 = alloca %struct.s42, align 4 303 %0 = bitcast %struct.s42* %tmp to i8* 304 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 305 %1 = bitcast %struct.s42* %tmp1 to i8* 306 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 307 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 308 ret i32 %call 309} 310 311declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4 312 313declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 314 i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, 315 %struct.s42* nocapture %s2) #2 316 317define i32 @caller42_stack() #3 { 318entry: 319; CHECK-LABEL: caller42_stack 320; CHECK: sub sp, sp, #112 321; CHECK: add x29, sp, #96 322; CHECK-DAG: stur {{x[0-9]+}}, [x29, #-16] 323; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] 324; CHECK-DAG: str {{x[0-9]+}}, [sp, #48] 325; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 326; Space for s1 is allocated at x29-32 = sp+64 327; Space for s2 is allocated at sp+32 328; CHECK: add x[[B:[0-9]+]], sp, #32 329; CHECK: str x[[B]], [sp, #16] 330; CHECK: sub x[[A:[0-9]+]], x29, #32 331; Address of s1 is passed on stack at sp+8 332; CHECK: str x[[A]], [sp, #8] 333; CHECK: mov w[[C:[0-9]+]], #9 334; CHECK: str w[[C]], [sp] 335 336; FAST-LABEL: caller42_stack 337; Space for s1 is allocated at fp-24 338; FAST: sub x[[A:[0-9]+]], x29, #24 339; Call memcpy with size = 24 (0x18) 340; FAST: orr {{x[0-9]+}}, xzr, #0x18 341; FAST: bl _memcpy 342; Space for s2 is allocated at fp-48 343; FAST: sub x[[B:[0-9]+]], x29, #48 344; Call memcpy again 345; FAST: bl _memcpy 346; Address of s1 is passed on stack at sp+8 347; FAST: str {{w[0-9]+}}, [sp] 348; FAST: str {{x[0-9]+}}, [sp, #8] 349; FAST: str {{x[0-9]+}}, [sp, #16] 350 %tmp = alloca %struct.s42, align 4 351 %tmp1 = alloca %struct.s42, align 4 352 %0 = bitcast %struct.s42* %tmp to i8* 353 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 354 %1 = bitcast %struct.s42* %tmp1 to i8* 355 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 356 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 357 i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 358 ret i32 %call 359} 360 361; structs with size of 22 bytes, alignment of 16 362; passed indirectly in x1 and x2 363define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 { 364entry: 365; CHECK-LABEL: f43 366; CHECK: ldr w[[A:[0-9]+]], [x1] 367; CHECK: ldr w[[B:[0-9]+]], [x2] 368; CHECK: add w[[C:[0-9]+]], w[[A]], w0 369; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]] 370; FAST-LABEL: f43 371; FAST: ldr w[[A:[0-9]+]], [x1] 372; FAST: ldr w[[B:[0-9]+]], [x2] 373; FAST: add w[[C:[0-9]+]], w[[A]], w0 374; FAST: add {{w[0-9]+}}, w[[C]], w[[B]] 375 %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0 376 %0 = load i32, i32* %i1, align 4, !tbaa !0 377 %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0 378 %1 = load i32, i32* %i2, align 4, !tbaa !0 379 %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1 380 %2 = load i16, i16* %s, align 2, !tbaa !3 381 %conv = sext i16 %2 to i32 382 %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1 383 %3 = load i16, i16* %s5, align 2, !tbaa !3 384 %conv6 = sext i16 %3 to i32 385 %add = add i32 %0, %i 386 %add3 = add i32 %add, %1 387 %add4 = add i32 %add3, %conv 388 %add7 = add i32 %add4, %conv6 389 ret i32 %add7 390} 391 392define i32 @caller43() #3 { 393entry: 394; CHECK-LABEL: caller43 395; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] 396; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 397; CHECK-DAG: str {{q[0-9]+}}, [sp, #16] 398; CHECK-DAG: str {{q[0-9]+}}, [sp] 399; CHECK: add x1, sp, #32 400; CHECK: mov x2, sp 401; Space for s1 is allocated at sp+32 402; Space for s2 is allocated at sp 403 404; FAST-LABEL: caller43 405; FAST: add x29, sp, #64 406; Space for s1 is allocated at sp+32 407; Space for s2 is allocated at sp 408; FAST: str {{x[0-9]+}}, [sp, #32] 409; FAST: str {{x[0-9]+}}, [sp, #40] 410; FAST: str {{x[0-9]+}}, [sp, #48] 411; FAST: str {{x[0-9]+}}, [sp, #56] 412; FAST: str {{x[0-9]+}}, [sp] 413; FAST: str {{x[0-9]+}}, [sp, #8] 414; FAST: str {{x[0-9]+}}, [sp, #16] 415; FAST: str {{x[0-9]+}}, [sp, #24] 416; FAST: add x1, sp, #32 417; FAST: mov x2, sp 418 %tmp = alloca %struct.s43, align 16 419 %tmp1 = alloca %struct.s43, align 16 420 %0 = bitcast %struct.s43* %tmp to i8* 421 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 422 %1 = bitcast %struct.s43* %tmp1 to i8* 423 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 424 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 425 ret i32 %call 426} 427 428declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, 429 i32 %i7, i32 %i8, i32 %i9, %struct.s43* nocapture %s1, 430 %struct.s43* nocapture %s2) #2 431 432define i32 @caller43_stack() #3 { 433entry: 434; CHECK-LABEL: caller43_stack 435; CHECK: sub sp, sp, #112 436; CHECK: add x29, sp, #96 437; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-16] 438; CHECK-DAG: stur {{q[0-9]+}}, [x29, #-32] 439; CHECK-DAG: str {{q[0-9]+}}, [sp, #48] 440; CHECK-DAG: str {{q[0-9]+}}, [sp, #32] 441; Space for s1 is allocated at x29-32 = sp+64 442; Space for s2 is allocated at sp+32 443; CHECK: add x[[B:[0-9]+]], sp, #32 444; CHECK: str x[[B]], [sp, #16] 445; CHECK: sub x[[A:[0-9]+]], x29, #32 446; Address of s1 is passed on stack at sp+8 447; CHECK: str x[[A]], [sp, #8] 448; CHECK: mov w[[C:[0-9]+]], #9 449; CHECK: str w[[C]], [sp] 450 451; FAST-LABEL: caller43_stack 452; FAST: sub sp, sp, #112 453; Space for s1 is allocated at fp-32 = sp+64 454; Space for s2 is allocated at sp+32 455; FAST: stur {{x[0-9]+}}, [x29, #-32] 456; FAST: stur {{x[0-9]+}}, [x29, #-24] 457; FAST: stur {{x[0-9]+}}, [x29, #-16] 458; FAST: stur {{x[0-9]+}}, [x29, #-8] 459; FAST: str {{x[0-9]+}}, [sp, #32] 460; FAST: str {{x[0-9]+}}, [sp, #40] 461; FAST: str {{x[0-9]+}}, [sp, #48] 462; FAST: str {{x[0-9]+}}, [sp, #56] 463; FAST: str {{w[0-9]+}}, [sp] 464; Address of s1 is passed on stack at sp+8 465; FAST: sub x[[A:[0-9]+]], x29, #32 466; FAST: str x[[A]], [sp, #8] 467; FAST: add x[[B:[0-9]+]], sp, #32 468; FAST: str x[[B]], [sp, #16] 469 %tmp = alloca %struct.s43, align 16 470 %tmp1 = alloca %struct.s43, align 16 471 %0 = bitcast %struct.s43* %tmp to i8* 472 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 473 %1 = bitcast %struct.s43* %tmp1 to i8* 474 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 475 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, 476 i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 477 ret i32 %call 478} 479 480; rdar://13668927 481; Check that we don't split an i128. 482declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, 483 i32 %i6, i32 %i7, i128 %s1, i32 %i8) 484 485define i32 @i128_split() { 486entry: 487; CHECK-LABEL: i128_split 488; "i128 %0" should be on stack at [sp]. 489; "i32 8" should be on stack at [sp, #16]. 490; CHECK: str {{w[0-9]+}}, [sp, #16] 491; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp] 492; FAST-LABEL: i128_split 493; FAST: sub sp, sp 494; FAST: mov x[[ADDR:[0-9]+]], sp 495; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16] 496; Load/Store opt is disabled with -O0, so the i128 is split. 497; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8] 498; FAST: str {{x[0-9]+}}, [x[[ADDR]]] 499 %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16 500 %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5, 501 i32 6, i32 7, i128 %0, i32 8) #5 502 ret i32 %call 503} 504 505declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, 506 i32 %i6, i32 %i7, i64 %s1, i32 %i8) 507 508define i32 @i64_split() { 509entry: 510; CHECK-LABEL: i64_split 511; "i64 %0" should be in register x7. 512; "i32 8" should be on stack at [sp]. 513; CHECK: ldr x7, [{{x[0-9]+}}] 514; CHECK: str {{w[0-9]+}}, [sp] 515; FAST-LABEL: i64_split 516; FAST: ldr x7, [{{x[0-9]+}}] 517; FAST: mov x[[R0:[0-9]+]], sp 518; FAST: orr w[[R1:[0-9]+]], wzr, #0x8 519; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}} 520 %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16 521 %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5, 522 i32 6, i32 7, i64 %0, i32 8) #5 523 ret i32 %call 524} 525 526attributes #0 = { noinline nounwind readnone "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 527attributes #1 = { nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 528attributes #2 = { noinline nounwind readonly "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 529attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } 530attributes #4 = { nounwind } 531attributes #5 = { nobuiltin } 532 533!0 = !{!"int", !1} 534!1 = !{!"omnipotent char", !2} 535!2 = !{!"Simple C/C++ TBAA"} 536!3 = !{!"short", !1} 537!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3} 538