1; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s 2 3%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> } 4 5define i32 @test_i64_align() { 6; CHECK-LABEL: test_i64_align: 7; CHECL: movs r0, #8 8 ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32) 9} 10 11define i32 @test_f64_align() { 12; CHECK-LABEL: test_f64_align: 13; CHECL: movs r0, #24 14 ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32) 15} 16 17define i32 @test_v2f32_align() { 18; CHECK-LABEL: test_v2f32_align: 19; CHECL: movs r0, #40 20 ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32) 21} 22 23define i32 @test_v4f32_align() { 24; CHECK-LABEL: test_v4f32_align: 25; CHECL: movs r0, #64 26 ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32) 27} 28 29; Key point here is than an extra register has to be saved so that the DPRs end 30; up in an aligned location (as prologue/epilogue inserter had calculated). 31define void @test_dpr_unwind_align() { 32; CHECK-LABEL: test_dpr_unwind_align: 33; CHECK: push {r5, r6, r7, lr} 34; CHECK-NOT: sub sp 35; CHECK: vpush {d8, d9} 36; CHECK: .cfi_offset d9, -24 37; CHECK: .cfi_offset d8, -32 38; [...] 39; CHECK: bl _test_i64_align 40; CHECK-NOT: add sp, 41; CHECK: vpop {d8, d9} 42; CHECK-NOT: add sp, 43; CHECK: pop {r5, r6, r7, pc} 44 45 call void asm sideeffect "", "~{r6},~{d8},~{d9}"() 46 47 ; Whatever 48 call i32 @test_i64_align() 49 ret void 50} 51 52; This time, there's no viable way to tack CS-registers onto the list: a real SP 53; adjustment needs to be performed to put d8 and d9 where they should be. 54define void @test_dpr_unwind_align_manually() { 55; CHECK-LABEL: test_dpr_unwind_align_manually: 56; CHECK: push {r4, r5, r6, r7, lr} 57; CHECK-NOT: sub sp 58; CHECK: push.w {r8, r11} 59; CHECK: sub sp, #4 60; CHECK: vpush {d8, d9} 61; CHECK: .cfi_offset d9, -40 62; CHECK: .cfi_offset d8, -48 63; [...] 64; CHECK: bl _test_i64_align 65; CHECK-NOT: add sp, 66; CHECK: vpop {d8, d9} 67; CHECK: add sp, #4 68; CHECK: pop.w {r8, r11} 69; CHECK: pop {r4, r5, r6, r7, pc} 70 71 call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"() 72 73 ; Whatever 74 call i32 @test_i64_align() 75 ret void 76} 77 78; If there's only a CS1 area, the sub should be in the right place: 79define void @test_dpr_unwind_align_just_cs1() { 80; CHECK-LABEL: test_dpr_unwind_align_just_cs1: 81; CHECK: push {r4, r5, r6, r7, lr} 82; CHECK: sub sp, #4 83; CHECK: vpush {d8, d9} 84; CHECK: .cfi_offset d9, -32 85; CHECK: .cfi_offset d8, -40 86; CHECK: sub sp, #8 87; [...] 88; CHECK: bl _test_i64_align 89; CHECK: add sp, #8 90; CHECK: vpop {d8, d9} 91; CHECK: add sp, #4 92; CHECK: pop {r4, r5, r6, r7, pc} 93 94 call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"() 95 96 ; Whatever 97 call i32 @test_i64_align() 98 ret void 99} 100 101; If there are no DPRs, we shouldn't try to align the stack in stages anyway 102define void @test_dpr_unwind_align_no_dprs() { 103; CHECK-LABEL: test_dpr_unwind_align_no_dprs: 104; CHECK: push {r4, r5, r6, r7, lr} 105; CHECK: sub sp, #12 106; [...] 107; CHECK: bl _test_i64_align 108; CHECK: add sp, #12 109; CHECK: pop {r4, r5, r6, r7, pc} 110 111 call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"() 112 113 ; Whatever 114 call i32 @test_i64_align() 115 ret void 116} 117 118; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on 119; the stack. 120define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) { 121; CHECK-LABEL: test_v128_stack_pass: 122; CHECK: add r[[ADDR:[0-9]+]], sp, #16 123; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128] 124 125 ret <4 x float> %in 126} 127 128declare void @varargs(i32, ...) 129 130; When varargs are enabled, we go down a different route. Still want 128-bit 131; alignment though. 132define void @test_v128_stack_pass_varargs(<4 x float> %in) { 133; CHECK-LABEL: test_v128_stack_pass_varargs: 134; CHECK: add r[[ADDR:[0-9]+]], sp, #16 135; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128] 136 137 call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in) 138 ret void 139} 140 141; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give 142; a single pointer), 64-bit quantities must be pass 143define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) { 144; CHECK-LABEL: test_64bit_gpr_align: 145; CHECK: ldr [[RHS:r[0-9]+]], [sp] 146; CHECK: adds r0, [[RHS]], r2 147; CHECK: adc r1, r3, #0 148 149 %ext = zext i32 %sp to i64 150 %sum = add i64 %ext, %r2_r3 151 ret i64 %sum 152} 153