• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_mse16x16_armv6|
13
14    ARM
15
16    AREA ||.text||, CODE, READONLY, ALIGN=2
17
18; r0    unsigned char *src_ptr
19; r1    int source_stride
20; r2    unsigned char *ref_ptr
21; r3    int  recon_stride
22; stack unsigned int *sse
23;
24;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
25;      So, we can remove this part of calculation.
26
27|vp8_mse16x16_armv6| PROC
28
29    push    {r4-r9, lr}
30    mov     r12, #16            ; set loop counter to 16 (=block height)
31
32    mov     r4, #0              ; initialize sse = 0
33
34loop
35    ; 1st 4 pixels
36    ldr     r5, [r0, #0x0]      ; load 4 src pixels
37    ldr     r6, [r2, #0x0]      ; load 4 ref pixels
38
39    mov     lr, #0              ; constant zero
40
41    usub8   r8, r5, r6          ; calculate difference
42    sel     r7, r8, lr          ; select bytes with positive difference
43    usub8   r9, r6, r5          ; calculate difference with reversed operands
44    sel     r8, r9, lr          ; select bytes with negative difference
45
46    ; calculate partial sums
47    usad8   r5, r7, lr          ; calculate sum of positive differences
48    usad8   r6, r8, lr          ; calculate sum of negative differences
49    orr     r8, r8, r7          ; differences of all 4 pixels
50
51    ldr     r5, [r0, #0x4]      ; load 4 src pixels
52
53    ; calculate sse
54    uxtb16  r6, r8              ; byte (two pixels) to halfwords
55    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
56    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
57
58    ; 2nd 4 pixels
59    ldr     r6, [r2, #0x4]      ; load 4 ref pixels
60    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
61
62    usub8   r8, r5, r6          ; calculate difference
63    sel     r7, r8, lr          ; select bytes with positive difference
64    usub8   r9, r6, r5          ; calculate difference with reversed operands
65    sel     r8, r9, lr          ; select bytes with negative difference
66
67    ; calculate partial sums
68    usad8   r5, r7, lr          ; calculate sum of positive differences
69    usad8   r6, r8, lr          ; calculate sum of negative differences
70    orr     r8, r8, r7          ; differences of all 4 pixels
71    ldr     r5, [r0, #0x8]      ; load 4 src pixels
72    ; calculate sse
73    uxtb16  r6, r8              ; byte (two pixels) to halfwords
74    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
75    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
76
77    ; 3rd 4 pixels
78    ldr     r6, [r2, #0x8]      ; load 4 ref pixels
79    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
80
81    usub8   r8, r5, r6          ; calculate difference
82    sel     r7, r8, lr          ; select bytes with positive difference
83    usub8   r9, r6, r5          ; calculate difference with reversed operands
84    sel     r8, r9, lr          ; select bytes with negative difference
85
86    ; calculate partial sums
87    usad8   r5, r7, lr          ; calculate sum of positive differences
88    usad8   r6, r8, lr          ; calculate sum of negative differences
89    orr     r8, r8, r7          ; differences of all 4 pixels
90
91    ldr     r5, [r0, #0xc]      ; load 4 src pixels
92
93    ; calculate sse
94    uxtb16  r6, r8              ; byte (two pixels) to halfwords
95    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
96    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
97
98    ; 4th 4 pixels
99    ldr     r6, [r2, #0xc]      ; load 4 ref pixels
100    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
101
102    usub8   r8, r5, r6          ; calculate difference
103    add     r0, r0, r1          ; set src_ptr to next row
104    sel     r7, r8, lr          ; select bytes with positive difference
105    usub8   r9, r6, r5          ; calculate difference with reversed operands
106    add     r2, r2, r3          ; set dst_ptr to next row
107    sel     r8, r9, lr          ; select bytes with negative difference
108
109    ; calculate partial sums
110    usad8   r5, r7, lr          ; calculate sum of positive differences
111    usad8   r6, r8, lr          ; calculate sum of negative differences
112    orr     r8, r8, r7          ; differences of all 4 pixels
113
114    subs    r12, r12, #1        ; next row
115
116    ; calculate sse
117    uxtb16  r6, r8              ; byte (two pixels) to halfwords
118    uxtb16  r7, r8, ror #8      ; another two pixels to halfwords
119    smlad   r4, r6, r6, r4      ; dual signed multiply, add and accumulate (1)
120    smlad   r4, r7, r7, r4      ; dual signed multiply, add and accumulate (2)
121
122    bne     loop
123
124    ; return stuff
125    ldr     r1, [sp, #28]       ; get address of sse
126    mov     r0, r4              ; return sse
127    str     r4, [r1]            ; store sse
128
129    pop     {r4-r9, pc}
130
131    ENDP
132
133    END
134