• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (c) 2013 RISC OS Open Ltd
3 * Author: Ben Avison <bavison@riscosopen.org>
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#include "libavutil/arm/asm.S"
23
24RESULT  .req    a1
25BUF     .req    a1
26SIZE    .req    a2
27PATTERN .req    a3
28PTR     .req    a4
29DAT0    .req    v1
30DAT1    .req    v2
31DAT2    .req    v3
32DAT3    .req    v4
33TMP0    .req    v5
34TMP1    .req    v6
35TMP2    .req    ip
36TMP3    .req    lr
37
38#define PRELOAD_DISTANCE 4
39
40.macro innerloop4
41        ldr     DAT0, [PTR], #4
42        subs    SIZE, SIZE, #4 @ C flag survives rest of macro
43        sub     TMP0, DAT0, PATTERN, lsr #14
44        bic     TMP0, TMP0, DAT0
45        ands    TMP0, TMP0, PATTERN
46.endm
47
48.macro innerloop16  decrement, do_preload
49        ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
50 .ifnc "\do_preload",""
51        pld     [PTR, #PRELOAD_DISTANCE*32]
52 .endif
53 .ifnc "\decrement",""
54        subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
55 .endif
56        sub     TMP0, DAT0, PATTERN, lsr #14
57        sub     TMP1, DAT1, PATTERN, lsr #14
58        bic     TMP0, TMP0, DAT0
59        bic     TMP1, TMP1, DAT1
60        sub     TMP2, DAT2, PATTERN, lsr #14
61        sub     TMP3, DAT3, PATTERN, lsr #14
62        ands    TMP0, TMP0, PATTERN
63        bic     TMP2, TMP2, DAT2
64        it      eq
65        andseq  TMP1, TMP1, PATTERN
66        bic     TMP3, TMP3, DAT3
67        itt     eq
68        andseq  TMP2, TMP2, PATTERN
69        andseq  TMP3, TMP3, PATTERN
70.endm
71
72/* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
73function ff_startcode_find_candidate_armv6, export=1
74        push    {v1-v6,lr}
75        mov     PTR, BUF
76        @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
77        @ before using code that does preloads
78        cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
79        blo     60f
80
81        @ Get to word-alignment, 1 byte at a time
82        tst     PTR, #3
83        beq     2f
841:      ldrb    DAT0, [PTR], #1
85        sub     SIZE, SIZE, #1
86        teq     DAT0, #0
87        beq     90f
88        tst     PTR, #3
89        bne     1b
902:      @ Get to 4-word alignment, 1 word at a time
91        ldr     PATTERN, =0x80008000
92        setend  be
93        tst     PTR, #12
94        beq     4f
953:      innerloop4
96        bne     91f
97        tst     PTR, #12
98        bne     3b
994:      @ Get to cacheline (8-word) alignment
100        tst     PTR, #16
101        beq     5f
102        innerloop16  16
103        bne     93f
1045:      @ Check complete cachelines, with preloading
105        @ We need to stop when there are still (PRELOAD_DISTANCE+1)
106        @ complete cachelines to go
107        sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
1086:      innerloop16  , do_preload
109        bne     93f
110        innerloop16  32
111        bne     93f
112        bcs     6b
113        @ Preload trailing part-cacheline, if any
114        tst     SIZE, #31
115        beq     7f
116        pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
117        @ Check remaining data without doing any more preloads. First
118        @ do in chunks of 4 words:
1197:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
120        bmi     9f
1218:      innerloop16  16
122        bne     93f
123        bcs     8b
124        @ Then in words:
1259:      adds    SIZE, SIZE, #16 - 4
126        bmi     11f
12710:     innerloop4
128        bne     91f
129        bcs     10b
13011:     setend  le
131        @ Check second byte of final halfword
132        ldrb    DAT0, [PTR, #-1]
133        teq     DAT0, #0
134        beq     90f
135        @ Check any remaining bytes
136        tst     SIZE, #3
137        beq     13f
13812:     ldrb    DAT0, [PTR], #1
139        sub     SIZE, SIZE, #1
140        teq     DAT0, #0
141        beq     90f
142        tst     SIZE, #3
143        bne     12b
144        @ No candidate found
14513:     sub     RESULT, PTR, BUF
146        b       99f
147
14860:     @ Small buffer - simply check by looping over bytes
149        subs    SIZE, SIZE, #1
150        bcc     99f
15161:     ldrb    DAT0, [PTR], #1
152        subs    SIZE, SIZE, #1
153        teq     DAT0, #0
154        beq     90f
155        bcs     61b
156        @ No candidate found
157        sub     RESULT, PTR, BUF
158        b       99f
159
16090:     @ Found a candidate at the preceding byte
161        sub     RESULT, PTR, BUF
162        sub     RESULT, RESULT, #1
163        b       99f
164
16591:     @ Found a candidate somewhere in the preceding 4 bytes
166        sub     RESULT, PTR, BUF
167        sub     RESULT, RESULT, #4
168        sub     TMP0, DAT0, #0x20000
169        bics    TMP0, TMP0, DAT0
170        itt     pl
171        ldrbpl  DAT0, [PTR, #-3]
172        addpl   RESULT, RESULT, #2
173        bpl     92f
174        teq     RESULT, #0
175        beq     98f @ don't look back a byte if found at first byte in buffer
176        ldrb    DAT0, [PTR, #-5]
17792:     teq     DAT0, #0
178        it      eq
179        subeq   RESULT, RESULT, #1
180        b       98f
181
18293:     @ Found a candidate somewhere in the preceding 16 bytes
183        sub     RESULT, PTR, BUF
184        sub     RESULT, RESULT, #16
185        teq     TMP0, #0
186        beq     95f @ not in first 4 bytes
187        sub     TMP0, DAT0, #0x20000
188        bics    TMP0, TMP0, DAT0
189        itt     pl
190        ldrbpl  DAT0, [PTR, #-15]
191        addpl   RESULT, RESULT, #2
192        bpl     94f
193        teq     RESULT, #0
194        beq     98f @ don't look back a byte if found at first byte in buffer
195        ldrb    DAT0, [PTR, #-17]
19694:     teq     DAT0, #0
197        it      eq
198        subeq   RESULT, RESULT, #1
199        b       98f
20095:     add     RESULT, RESULT, #4
201        teq     TMP1, #0
202        beq     96f @ not in next 4 bytes
203        sub     TMP1, DAT1, #0x20000
204        bics    TMP1, TMP1, DAT1
205        itee    mi
206        ldrbmi  DAT0, [PTR, #-13]
207        ldrbpl  DAT0, [PTR, #-11]
208        addpl   RESULT, RESULT, #2
209        teq     DAT0, #0
210        it      eq
211        subeq   RESULT, RESULT, #1
212        b       98f
21396:     add     RESULT, RESULT, #4
214        teq     TMP2, #0
215        beq     97f @ not in next 4 bytes
216        sub     TMP2, DAT2, #0x20000
217        bics    TMP2, TMP2, DAT2
218        itee    mi
219        ldrbmi  DAT0, [PTR, #-9]
220        ldrbpl  DAT0, [PTR, #-7]
221        addpl   RESULT, RESULT, #2
222        teq     DAT0, #0
223        it      eq
224        subeq   RESULT, RESULT, #1
225        b       98f
22697:     add     RESULT, RESULT, #4
227        sub     TMP3, DAT3, #0x20000
228        bics    TMP3, TMP3, DAT3
229        itee    mi
230        ldrbmi  DAT0, [PTR, #-5]
231        ldrbpl  DAT0, [PTR, #-3]
232        addpl   RESULT, RESULT, #2
233        teq     DAT0, #0
234        it      eq
235        subeq   RESULT, RESULT, #1
236        @ drop through to 98f
23798:     setend  le
23899:     pop     {v1-v6,pc}
239endfunc
240
241        .unreq  RESULT
242        .unreq  BUF
243        .unreq  SIZE
244        .unreq  PATTERN
245        .unreq  PTR
246        .unreq  DAT0
247        .unreq  DAT1
248        .unreq  DAT2
249        .unreq  DAT3
250        .unreq  TMP0
251        .unreq  TMP1
252        .unreq  TMP2
253        .unreq  TMP3
254