1; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s 2; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON 3target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" 4target triple = "thumbv7-apple-ios" 5 6; CHECK: f 7; This function is forced to spill a double. 8; Verify that the spill slot is properly aligned. 9; 10; The caller-saved r4 is used as a scratch register for stack realignment. 11; CHECK: push {r4, r7, lr} 12; CHECK: bfc r4, #0, #3 13; CHECK: mov sp, r4 14define void @f(double* nocapture %p) nounwind ssp { 15entry: 16 %0 = load double, double* %p, align 4 17 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind 18 tail call void @g() nounwind 19 store double %0, double* %p, align 4 20 ret void 21} 22 23; NEON: f 24; NEON: push {r4, r7, lr} 25; NEON: sub.w r4, sp, #64 26; NEON: bfc r4, #0, #4 27; Stack pointer must be updated before the spills. 28; NEON: mov sp, r4 29; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! 30; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128] 31; Stack pointer adjustment for the stack frame contents. 32; This could legally happen before the spills. 33; Since the spill slot is only 8 bytes, technically it would be fine to only 34; subtract #8 here. That would leave sp less aligned than some stack slots, 35; and would probably blow MFI's mind. 36; NEON: sub sp, #16 37; The epilog is free to use another scratch register than r4. 38; NEON: add r[[R4:[0-9]+]], sp, #16 39; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]! 40; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128] 41; The stack pointer restore must happen after the reloads. 42; NEON: mov sp, 43; NEON: pop 44 45declare void @g() 46 47; Spill 7 d-registers. 48define void @f7(double* nocapture %p) nounwind ssp { 49entry: 50 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind 51 ret void 52} 53 54; NEON: f7 55; NEON: push {r4, r7, lr} 56; NEON: sub.w r4, sp, #56 57; NEON: bfc r4, #0, #4 58; Stack pointer must be updated before the spills. 59; NEON: mov sp, r4 60; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! 61; NEON: vst1.64 {d12, d13}, [r4:128] 62; NEON: vstr d14, [r4, #16] 63; Epilog 64; NEON: vld1.64 {d8, d9, d10, d11}, 65; NEON: vld1.64 {d12, d13}, 66; NEON: vldr d14, 67; The stack pointer restore must happen after the reloads. 68; NEON: mov sp, 69; NEON: pop 70 71; Spill 7 d-registers, leave a hole. 72define void @f3plus4(double* nocapture %p) nounwind ssp { 73entry: 74 tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind 75 ret void 76} 77 78; Aligned spilling only works for contiguous ranges starting from d8. 79; The rest goes to the standard vpush instructions. 80; NEON: f3plus4 81; NEON: push {r4, r7, lr} 82; NEON: vpush {d12, d13, d14, d15} 83; NEON: sub.w r4, sp, #24 84; NEON: bfc r4, #0, #4 85; Stack pointer must be updated before the spills. 86; NEON: mov sp, r4 87; NEON: vst1.64 {d8, d9}, [r4:128] 88; NEON: vstr d10, [r4, #16] 89; Epilog 90; NEON: vld1.64 {d8, d9}, 91; NEON: vldr d10, [{{.*}}, #16] 92; The stack pointer restore must happen after the reloads. 93; NEON: mov sp, 94; NEON: vpop {d12, d13, d14, d15} 95; NEON: pop 96