• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_subtract_mby_armv6|
13    EXPORT  |vp8_subtract_mbuv_armv6|
14    EXPORT  |vp8_subtract_b_armv6|
15
16    INCLUDE asm_enc_offsets.asm
17
18    ARM
19    REQUIRE8
20    PRESERVE8
21
22    AREA ||.text||, CODE, READONLY, ALIGN=2
23
24; r0    BLOCK *be
25; r1    BLOCKD *bd
26; r2    int pitch
27|vp8_subtract_b_armv6| PROC
28
29    stmfd   sp!, {r4-r9}
30
31    ldr     r4, [r0, #vp8_block_base_src]
32    ldr     r5, [r0, #vp8_block_src]
33    ldr     r6, [r0, #vp8_block_src_diff]
34
35    ldr     r3, [r4]
36    ldr     r7, [r0, #vp8_block_src_stride]
37    add     r3, r3, r5          ; src = *base_src + src
38    ldr     r8, [r1, #vp8_blockd_predictor]
39
40    mov     r9, #4              ; loop count
41
42loop_block
43
44    ldr     r0, [r3], r7        ; src
45    ldr     r1, [r8], r2        ; pred
46
47    uxtb16  r4, r0              ; [s2 | s0]
48    uxtb16  r5, r1              ; [p2 | p0]
49    uxtb16  r0, r0, ror #8      ; [s3 | s1]
50    uxtb16  r1, r1, ror #8      ; [p3 | p1]
51
52    usub16  r4, r4, r5          ; [d2 | d0]
53    usub16  r5, r0, r1          ; [d3 | d1]
54
55    subs    r9, r9, #1          ; decrement loop counter
56
57    pkhbt   r0, r4, r5, lsl #16 ; [d1 | d0]
58    pkhtb   r1, r5, r4, asr #16 ; [d3 | d2]
59
60    str     r0, [r6, #0]        ; diff
61    str     r1, [r6, #4]        ; diff
62
63    add     r6, r6, r2, lsl #1  ; update diff pointer
64    bne     loop_block
65
66    ldmfd   sp!, {r4-r9}
67    mov     pc, lr
68
69    ENDP
70
71
72; r0    short *diff
73; r1    unsigned char *usrc
74; r2    unsigned char *vsrc
75; r3    unsigned char *pred
76; stack int stride
77|vp8_subtract_mbuv_armv6| PROC
78
79    stmfd   sp!, {r4-r12, lr}
80
81    add     r0, r0, #512        ; set *diff point to Cb
82    add     r3, r3, #256        ; set *pred point to Cb
83
84    mov     r4, #8              ; loop count
85    ldr     r5, [sp, #40]       ; stride
86
87    ; Subtract U block
88loop_u
89    ldr     r6, [r1]            ; src       (A)
90    ldr     r7, [r3], #4        ; pred      (A)
91
92    uxtb16  r8, r6              ; [s2 | s0] (A)
93    uxtb16  r9, r7              ; [p2 | p0] (A)
94    uxtb16  r10, r6, ror #8     ; [s3 | s1] (A)
95    uxtb16  r11, r7, ror #8     ; [p3 | p1] (A)
96
97    usub16  r6, r8, r9          ; [d2 | d0] (A)
98    usub16  r7, r10, r11        ; [d3 | d1] (A)
99
100    ldr     r10, [r1, #4]       ; src       (B)
101    ldr     r11, [r3], #4       ; pred      (B)
102
103    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (A)
104    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (A)
105
106    str     r8, [r0], #4        ; diff      (A)
107    uxtb16  r8, r10             ; [s2 | s0] (B)
108    str     r9, [r0], #4        ; diff      (A)
109
110    uxtb16  r9, r11             ; [p2 | p0] (B)
111    uxtb16  r10, r10, ror #8    ; [s3 | s1] (B)
112    uxtb16  r11, r11, ror #8    ; [p3 | p1] (B)
113
114    usub16  r6, r8, r9          ; [d2 | d0] (B)
115    usub16  r7, r10, r11        ; [d3 | d1] (B)
116
117    add     r1, r1, r5          ; update usrc pointer
118
119    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (B)
120    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (B)
121
122    str     r8, [r0], #4        ; diff      (B)
123    subs    r4, r4, #1          ; update loop counter
124    str     r9, [r0], #4        ; diff      (B)
125
126    bne     loop_u
127
128    mov     r4, #8              ; loop count
129
130    ; Subtract V block
131loop_v
132    ldr     r6, [r2]            ; src       (A)
133    ldr     r7, [r3], #4        ; pred      (A)
134
135    uxtb16  r8, r6              ; [s2 | s0] (A)
136    uxtb16  r9, r7              ; [p2 | p0] (A)
137    uxtb16  r10, r6, ror #8     ; [s3 | s1] (A)
138    uxtb16  r11, r7, ror #8     ; [p3 | p1] (A)
139
140    usub16  r6, r8, r9          ; [d2 | d0] (A)
141    usub16  r7, r10, r11        ; [d3 | d1] (A)
142
143    ldr     r10, [r2, #4]       ; src       (B)
144    ldr     r11, [r3], #4       ; pred      (B)
145
146    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (A)
147    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (A)
148
149    str     r8, [r0], #4        ; diff      (A)
150    uxtb16  r8, r10             ; [s2 | s0] (B)
151    str     r9, [r0], #4        ; diff      (A)
152
153    uxtb16  r9, r11             ; [p2 | p0] (B)
154    uxtb16  r10, r10, ror #8    ; [s3 | s1] (B)
155    uxtb16  r11, r11, ror #8    ; [p3 | p1] (B)
156
157    usub16  r6, r8, r9          ; [d2 | d0] (B)
158    usub16  r7, r10, r11        ; [d3 | d1] (B)
159
160    add     r2, r2, r5          ; update vsrc pointer
161
162    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (B)
163    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (B)
164
165    str     r8, [r0], #4        ; diff      (B)
166    subs    r4, r4, #1          ; update loop counter
167    str     r9, [r0], #4        ; diff      (B)
168
169    bne     loop_v
170
171    ldmfd   sp!, {r4-r12, pc}
172
173    ENDP
174
175
176; r0    short *diff
177; r1    unsigned char *src
178; r2    unsigned char *pred
179; r3    int stride
180|vp8_subtract_mby_armv6| PROC
181
182    stmfd   sp!, {r4-r11}
183
184    mov     r4, #16
185loop
186    ldr     r6, [r1]            ; src       (A)
187    ldr     r7, [r2], #4        ; pred      (A)
188
189    uxtb16  r8, r6              ; [s2 | s0] (A)
190    uxtb16  r9, r7              ; [p2 | p0] (A)
191    uxtb16  r10, r6, ror #8     ; [s3 | s1] (A)
192    uxtb16  r11, r7, ror #8     ; [p3 | p1] (A)
193
194    usub16  r6, r8, r9          ; [d2 | d0] (A)
195    usub16  r7, r10, r11        ; [d3 | d1] (A)
196
197    ldr     r10, [r1, #4]       ; src       (B)
198    ldr     r11, [r2], #4       ; pred      (B)
199
200    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (A)
201    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (A)
202
203    str     r8, [r0], #4        ; diff      (A)
204    uxtb16  r8, r10             ; [s2 | s0] (B)
205    str     r9, [r0], #4        ; diff      (A)
206
207    uxtb16  r9, r11             ; [p2 | p0] (B)
208    uxtb16  r10, r10, ror #8    ; [s3 | s1] (B)
209    uxtb16  r11, r11, ror #8    ; [p3 | p1] (B)
210
211    usub16  r6, r8, r9          ; [d2 | d0] (B)
212    usub16  r7, r10, r11        ; [d3 | d1] (B)
213
214    ldr     r10, [r1, #8]       ; src       (C)
215    ldr     r11, [r2], #4       ; pred      (C)
216
217    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (B)
218    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (B)
219
220    str     r8, [r0], #4        ; diff      (B)
221    uxtb16  r8, r10             ; [s2 | s0] (C)
222    str     r9, [r0], #4        ; diff      (B)
223
224    uxtb16  r9, r11             ; [p2 | p0] (C)
225    uxtb16  r10, r10, ror #8    ; [s3 | s1] (C)
226    uxtb16  r11, r11, ror #8    ; [p3 | p1] (C)
227
228    usub16  r6, r8, r9          ; [d2 | d0] (C)
229    usub16  r7, r10, r11        ; [d3 | d1] (C)
230
231    ldr     r10, [r1, #12]      ; src       (D)
232    ldr     r11, [r2], #4       ; pred      (D)
233
234    pkhbt   r8, r6, r7, lsl #16  ; [d1 | d0] (C)
235    pkhtb   r9, r7, r6, asr #16  ; [d3 | d2] (C)
236
237    str     r8, [r0], #4        ; diff      (C)
238    uxtb16  r8, r10             ; [s2 | s0] (D)
239    str     r9, [r0], #4        ; diff      (C)
240
241    uxtb16  r9, r11             ; [p2 | p0] (D)
242    uxtb16  r10, r10, ror #8    ; [s3 | s1] (D)
243    uxtb16  r11, r11, ror #8    ; [p3 | p1] (D)
244
245    usub16  r6, r8, r9          ; [d2 | d0] (D)
246    usub16  r7, r10, r11        ; [d3 | d1] (D)
247
248    add     r1, r1, r3          ; update src pointer
249
250    pkhbt   r8, r6, r7, lsl #16 ; [d1 | d0] (D)
251    pkhtb   r9, r7, r6, asr #16 ; [d3 | d2] (D)
252
253    str     r8, [r0], #4        ; diff      (D)
254    subs    r4, r4, #1          ; update loop counter
255    str     r9, [r0], #4        ; diff      (D)
256
257    bne     loop
258
259    ldmfd   sp!, {r4-r11}
260    mov     pc, lr
261
262    ENDP
263
264    END
265
266