• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/******************************************************************************
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2015 Martin Storsjo
4 * Copyright © 2015 Janne Grunau
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice, this
11 *    list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 *    this list of conditions and the following disclaimer in the documentation
15 *    and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#ifndef DAV1D_SRC_ARM_32_UTIL_S
30#define DAV1D_SRC_ARM_32_UTIL_S
31
32#include "config.h"
33#include "src/arm/asm.S"
34
35.macro movrel_local rd, val, offset=0
36#if defined(PIC)
37        ldr             \rd,  90001f
38        b               90002f
3990001:
40        .word           \val + \offset - (90002f + 8 - 4 * CONFIG_THUMB)
4190002:
42        add             \rd,  \rd,  pc
43#else
44        movw            \rd, #:lower16:\val+\offset
45        movt            \rd, #:upper16:\val+\offset
46#endif
47.endm
48
49.macro movrel rd, val, offset=0
50#if defined(PIC) && defined(__APPLE__)
51        ldr             \rd,  1f
52        b               2f
531:
54        .word           3f - (2f + 8 - 4 * CONFIG_THUMB)
552:
56        ldr             \rd,  [pc, \rd]
57.if \offset < 0
58        sub             \rd,  \rd,  #-(\offset)
59.elseif \offset > 0
60        add             \rd,  \rd,  #\offset
61.endif
62        .non_lazy_symbol_pointer
633:
64        .indirect_symbol \val
65        .word       0
66        .text
67#else
68        movrel_local    \rd, \val, \offset
69#endif
70.endm
71
72// This macro clobbers r7 (and r12 on windows) and stores data at the
73// bottom of the stack; sp is the start of the space allocated that
74// the caller can use.
75.macro sub_sp_align space
76#if CONFIG_THUMB
77        mov             r7,  sp
78        and             r7,  r7,  #15
79#else
80        and             r7,  sp,  #15
81#endif
82        sub             sp,  sp,  r7
83        // Now the stack is aligned, store the amount of adjustment back
84        // on the stack, as we don't want to waste a register as frame
85        // pointer.
86        str             r7,  [sp, #-16]!
87#ifdef _WIN32
88.if \space > 8192
89        // Here, we'd need to touch two (or more) pages while decrementing
90        // the stack pointer.
91        .error          "sub_sp_align doesn't support values over 8K at the moment"
92.elseif \space > 4096
93        sub             r7,  sp,  #4096
94        ldr             r12, [r7]
95        sub             r7,  r7,  #(\space - 4096)
96        mov             sp,  r7
97.else
98        sub             sp,  sp,  #\space
99.endif
100#else
101.if \space >= 4096
102        sub             sp,  sp,  #(\space)/4096*4096
103.endif
104.if (\space % 4096) != 0
105        sub             sp,  sp,  #(\space)%4096
106.endif
107#endif
108.endm
109
110.macro add_sp_align space
111.if \space >= 4096
112        add             sp,  sp,  #(\space)/4096*4096
113.endif
114.if (\space % 4096) != 0
115        add             sp,  sp,  #(\space)%4096
116.endif
117        ldr             r7,  [sp], #16
118        // Add back the original stack adjustment
119        add             sp,  sp,  r7
120.endm
121
122.macro transpose_8x8b q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7
123        vtrn.32         \q0,  \q2
124        vtrn.32         \q1,  \q3
125
126        vtrn.16         \r0,  \r2
127        vtrn.16         \r1,  \r3
128        vtrn.16         \r4,  \r6
129        vtrn.16         \r5,  \r7
130
131        vtrn.8          \r0,  \r1
132        vtrn.8          \r2,  \r3
133        vtrn.8          \r4,  \r5
134        vtrn.8          \r6,  \r7
135.endm
136
137.macro transpose_8x8h r0, r1, r2, r3, r4, r5, r6, r7, d0, d1, d2, d3, d4, d5, d6, d7
138        vswp            \d0,  \d4
139        vswp            \d1,  \d5
140        vswp            \d2,  \d6
141        vswp            \d3,  \d7
142
143        vtrn.32         \r0,  \r2
144        vtrn.32         \r1,  \r3
145        vtrn.32         \r4,  \r6
146        vtrn.32         \r5,  \r7
147
148        vtrn.16         \r0,  \r1
149        vtrn.16         \r2,  \r3
150        vtrn.16         \r4,  \r5
151        vtrn.16         \r6,  \r7
152.endm
153
154.macro transpose_4x8b q0, q1, r0, r1, r2, r3
155        vtrn.16         \q0,  \q1
156
157        vtrn.8          \r0,  \r1
158        vtrn.8          \r2,  \r3
159.endm
160
161.macro transpose_4x4s q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7
162        vswp            \r1,  \r4 // vtrn.64 \q0, \q2
163        vswp            \r3,  \r6 // vtrn.64 \q1, \q3
164
165        vtrn.32         \q0,  \q1
166        vtrn.32         \q2,  \q3
167.endm
168
169.macro transpose_4x4h q0, q1, r0, r1, r2, r3
170        vtrn.32         \q0,  \q1
171
172        vtrn.16         \r0,  \r1
173        vtrn.16         \r2,  \r3
174.endm
175
176.macro transpose_4x8h r0, r1, r2, r3
177        vtrn.32         \r0,  \r2
178        vtrn.32         \r1,  \r3
179
180        vtrn.16         \r0,  \r1
181        vtrn.16         \r2,  \r3
182.endm
183
184#endif /* DAV1D_SRC_ARM_32_UTIL_S */
185