• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_variance_halfpixvar16x16_hv_armv6|
13
14    ARM
15    REQUIRE8
16    PRESERVE8
17
18    AREA ||.text||, CODE, READONLY, ALIGN=2
19
20; r0    unsigned char *src_ptr
21; r1    int source_stride
22; r2    unsigned char *ref_ptr
23; r3    int  recon_stride
24; stack unsigned int *sse
25|vp8_variance_halfpixvar16x16_hv_armv6| PROC
26
27    stmfd   sp!, {r4-r12, lr}
28    mov     r8, #0              ; initialize sum = 0
29    ldr     r10, c80808080
30    mov     r11, #0             ; initialize sse = 0
31    mov     r12, #16            ; set loop counter to 16 (=block height)
32    mov     lr, #0              ; constant zero
33loop
34    add     r9, r0, r1          ; pointer to pixels on the next row
35    ; 1st 4 pixels
36    ldr     r4, [r0, #0]        ; load source pixels a, row N
37    ldr     r6, [r0, #1]        ; load source pixels b, row N
38    ldr     r5, [r9, #0]        ; load source pixels c, row N+1
39    ldr     r7, [r9, #1]        ; load source pixels d, row N+1
40
41    ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
42    mvn     r6, r6
43    uhsub8  r4, r4, r6
44    eor     r4, r4, r10
45    ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
46    mvn     r7, r7
47    uhsub8  r5, r5, r7
48    eor     r5, r5, r10
49    ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
50    mvn     r5, r5
51    uhsub8  r4, r4, r5
52    ldr     r5, [r2, #0]        ; load 4 ref pixels
53    eor     r4, r4, r10
54
55    usub8   r6, r4, r5          ; calculate difference
56    sel     r7, r6, lr          ; select bytes with positive difference
57    usub8   r6, r5, r4          ; calculate difference with reversed operands
58    sel     r6, r6, lr          ; select bytes with negative difference
59
60    ; calculate partial sums
61    usad8   r4, r7, lr          ; calculate sum of positive differences
62    usad8   r5, r6, lr          ; calculate sum of negative differences
63    orr     r6, r6, r7          ; differences of all 4 pixels
64    ; calculate total sum
65    adds    r8, r8, r4          ; add positive differences to sum
66    subs    r8, r8, r5          ; substract negative differences from sum
67
68    ; calculate sse
69    uxtb16  r5, r6              ; byte (two pixels) to halfwords
70    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
71    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
72
73    ; 2nd 4 pixels
74    ldr     r4, [r0, #4]        ; load source pixels a, row N
75    ldr     r6, [r0, #5]        ; load source pixels b, row N
76    ldr     r5, [r9, #4]        ; load source pixels c, row N+1
77
78    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
79
80    ldr     r7, [r9, #5]        ; load source pixels d, row N+1
81
82    ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
83    mvn     r6, r6
84    uhsub8  r4, r4, r6
85    eor     r4, r4, r10
86    ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
87    mvn     r7, r7
88    uhsub8  r5, r5, r7
89    eor     r5, r5, r10
90    ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
91    mvn     r5, r5
92    uhsub8  r4, r4, r5
93    ldr     r5, [r2, #4]        ; load 4 ref pixels
94    eor     r4, r4, r10
95
96    usub8   r6, r4, r5          ; calculate difference
97    sel     r7, r6, lr          ; select bytes with positive difference
98    usub8   r6, r5, r4          ; calculate difference with reversed operands
99    sel     r6, r6, lr          ; select bytes with negative difference
100
101    ; calculate partial sums
102    usad8   r4, r7, lr          ; calculate sum of positive differences
103    usad8   r5, r6, lr          ; calculate sum of negative differences
104    orr     r6, r6, r7          ; differences of all 4 pixels
105
106    ; calculate total sum
107    add     r8, r8, r4          ; add positive differences to sum
108    sub     r8, r8, r5          ; substract negative differences from sum
109
110    ; calculate sse
111    uxtb16  r5, r6              ; byte (two pixels) to halfwords
112    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
113    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
114
115    ; 3rd 4 pixels
116    ldr     r4, [r0, #8]        ; load source pixels a, row N
117    ldr     r6, [r0, #9]        ; load source pixels b, row N
118    ldr     r5, [r9, #8]        ; load source pixels c, row N+1
119
120    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
121
122    ldr     r7, [r9, #9]        ; load source pixels d, row N+1
123
124    ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
125    mvn     r6, r6
126    uhsub8  r4, r4, r6
127    eor     r4, r4, r10
128    ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
129    mvn     r7, r7
130    uhsub8  r5, r5, r7
131    eor     r5, r5, r10
132    ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
133    mvn     r5, r5
134    uhsub8  r4, r4, r5
135    ldr     r5, [r2, #8]        ; load 4 ref pixels
136    eor     r4, r4, r10
137
138    usub8   r6, r4, r5          ; calculate difference
139    sel     r7, r6, lr          ; select bytes with positive difference
140    usub8   r6, r5, r4          ; calculate difference with reversed operands
141    sel     r6, r6, lr          ; select bytes with negative difference
142
143    ; calculate partial sums
144    usad8   r4, r7, lr          ; calculate sum of positive differences
145    usad8   r5, r6, lr          ; calculate sum of negative differences
146    orr     r6, r6, r7          ; differences of all 4 pixels
147
148    ; calculate total sum
149    add     r8, r8, r4          ; add positive differences to sum
150    sub     r8, r8, r5          ; substract negative differences from sum
151
152    ; calculate sse
153    uxtb16  r5, r6              ; byte (two pixels) to halfwords
154    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
155    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
156
157    ; 4th 4 pixels
158    ldr     r4, [r0, #12]       ; load source pixels a, row N
159    ldr     r6, [r0, #13]       ; load source pixels b, row N
160    ldr     r5, [r9, #12]       ; load source pixels c, row N+1
161    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
162    ldr     r7, [r9, #13]       ; load source pixels d, row N+1
163
164    ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
165    mvn     r6, r6
166    uhsub8  r4, r4, r6
167    eor     r4, r4, r10
168    ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
169    mvn     r7, r7
170    uhsub8  r5, r5, r7
171    eor     r5, r5, r10
172    ; z = (x + y + 1) >> 1, interpolate half pixel values vertically
173    mvn     r5, r5
174    uhsub8  r4, r4, r5
175    ldr     r5, [r2, #12]       ; load 4 ref pixels
176    eor     r4, r4, r10
177
178    usub8   r6, r4, r5          ; calculate difference
179    add     r0, r0, r1          ; set src_ptr to next row
180    sel     r7, r6, lr          ; select bytes with positive difference
181    usub8   r6, r5, r4          ; calculate difference with reversed operands
182    add     r2, r2, r3          ; set dst_ptr to next row
183    sel     r6, r6, lr          ; select bytes with negative difference
184
185    ; calculate partial sums
186    usad8   r4, r7, lr          ; calculate sum of positive differences
187    usad8   r5, r6, lr          ; calculate sum of negative differences
188    orr     r6, r6, r7          ; differences of all 4 pixels
189
190    ; calculate total sum
191    add     r8, r8, r4          ; add positive differences to sum
192    sub     r8, r8, r5          ; substract negative differences from sum
193
194    ; calculate sse
195    uxtb16  r5, r6              ; byte (two pixels) to halfwords
196    uxtb16  r7, r6, ror #8      ; another two pixels to halfwords
197    smlad   r11, r5, r5, r11    ; dual signed multiply, add and accumulate (1)
198    subs    r12, r12, #1
199    smlad   r11, r7, r7, r11    ; dual signed multiply, add and accumulate (2)
200
201    bne     loop
202
203    ; return stuff
204    ldr     r6, [sp, #40]       ; get address of sse
205    mul     r0, r8, r8          ; sum * sum
206    str     r11, [r6]           ; store sse
207    sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8))
208
209    ldmfd   sp!, {r4-r12, pc}
210
211    ENDP
212
213c80808080
214    DCD     0x80808080
215
216    END
217