• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11    EXPORT |vp8_short_walsh4x4_armv6|
12
13    ARM
14    REQUIRE8
15    PRESERVE8
16
17    AREA    |.text|, CODE, READONLY  ; name this block of code
18
19;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch)
20|vp8_short_walsh4x4_armv6| PROC
21
22    stmdb       sp!, {r4 - r11, lr}
23
24    mov         r12, r2              ; ugh. not clean
25    ldr         r2, [r0]             ; [1  |  0]
26    ldr         r3, [r0, #4]         ; [3  |  2]
27    ldr         r4, [r0, r12]!       ; [5  |  4]
28    ldr         r5, [r0, #4]         ; [7  |  6]
29    ldr         r6, [r0, r12]!       ; [9  |  8]
30    ldr         r7, [r0, #4]         ; [11 | 10]
31    ldr         r8, [r0, r12]!       ; [13 | 12]
32    ldr         r9, [r0, #4]         ; [15 | 14]
33
34    qsubaddx    r10, r2, r3          ; [c1|a1] [1-2   |   0+3]
35    qaddsubx    r11, r2, r3          ; [b1|d1] [1+2   |   0-3]
36    qsubaddx    r12, r4, r5          ; [c1|a1] [5-6   |   4+7]
37    qaddsubx    lr, r4, r5           ; [b1|d1] [5+6   |   4-7]
38
39    qaddsubx    r2, r10, r11         ; [1 | 2] [c1+d1 | a1-b1]
40    qaddsubx    r3, r11, r10         ; [0 | 3] [b1+a1 | d1-c1]
41    qaddsubx    r4, r12, lr          ; [5 | 6] [c1+d1 | a1-b1]
42    qaddsubx    r5, lr, r12          ; [4 | 7] [b1+a1 | d1-c1]
43
44    qsubaddx    r10, r6, r7          ; [c1|a1] [9-10  |  8+11]
45    qaddsubx    r11, r6, r7          ; [b1|d1] [9+10  |  8-11]
46    qsubaddx    r12, r8, r9          ; [c1|a1] [13-14 | 12+15]
47    qaddsubx    lr, r8, r9           ; [b1|d1] [13+14 | 12-15]
48
49    qaddsubx    r6, r10, r11         ; [9 |10] [c1+d1 | a1-b1]
50    qaddsubx    r7, r11, r10         ; [8 |11] [b1+a1 | d1-c1]
51    qaddsubx    r8, r12, lr          ; [13|14] [c1+d1 | a1-b1]
52    qaddsubx    r9, lr, r12          ; [12|15] [b1+a1 | d1-c1]
53
54    ; first transform complete
55
56    qadd16      r10, r3, r9          ; a1 [0+12  |  3+15]
57    qadd16      r11, r5, r7          ; b1 [4+8   |  7+11]
58    qsub16      r12, r5, r7          ; c1 [4-8   |  7-11]
59    qsub16      lr, r3, r9           ; d1 [0-12  |  3-15]
60
61    qadd16      r3, r10, r11         ; a2 [a1+b1] [0 | 3]
62    qadd16      r5, r12, lr          ; b2 [c1+d1] [4 | 7]
63    qsub16      r7, r10, r11         ; c2 [a1-b1] [8 |11]
64    qsub16      r9, lr, r12          ; d2 [d1-c1] [12|15]
65
66    qadd16      r10, r2, r8          ; a1 [1+13  |  2+14]
67    qadd16      r11, r4, r6          ; b1 [5+9   |  6+10]
68    qsub16      r12, r4, r6          ; c1 [5-9   |  6-10]
69    qsub16      lr, r2, r8           ; d1 [1-13  |  2-14]
70
71    qadd16      r2, r10, r11         ; a2 [a1+b1] [1 | 2]
72    qadd16      r4, r12, lr          ; b2 [c1+d1] [5 | 6]
73    qsub16      r6, r10, r11         ; c2 [a1-b1] [9 |10]
74    qsub16      r8, lr, r12          ; d2 [d1-c1] [13|14]
75
76    ; [a-d]2 += ([a-d]2 > 0)
77
78    asrs        r10, r3, #16
79    addpl       r10, r10, #1         ; [~0]
80    asrs        r11, r2, #16
81    addpl       r11, r11, #1         ; [~1]
82    lsl         r11, r11, #15        ; [1  |  x]
83    pkhtb       r10, r11, r10, asr #1; [1  |  0]
84    str         r10, [r1], #4
85
86    lsls        r11, r2, #16
87    addpl       r11, r11, #0x10000   ; [~2]
88    lsls        r12, r3, #16
89    addpl       r12, r12, #0x10000   ; [~3]
90    asr         r12, r12, #1         ; [3  |  x]
91    pkhtb       r11, r12, r11, asr #17; [3  |  2]
92    str         r11, [r1], #4
93
94    asrs        r2, r5, #16
95    addpl       r2, r2, #1           ; [~4]
96    asrs        r3, r4, #16
97    addpl       r3, r3, #1           ; [~5]
98    lsl         r3, r3, #15          ; [5  |  x]
99    pkhtb       r2, r3, r2, asr #1   ; [5  |  4]
100    str         r2, [r1], #4
101
102    lsls        r2, r4, #16
103    addpl       r2, r2, #0x10000     ; [~6]
104    lsls        r3, r5, #16
105    addpl       r3, r3, #0x10000     ; [~7]
106    asr         r3, r3, #1           ; [7  |  x]
107    pkhtb       r2, r3, r2, asr #17  ; [7  |  6]
108    str         r2, [r1], #4
109
110    asrs        r2, r7, #16
111    addpl       r2, r2, #1           ; [~8]
112    asrs        r3, r6, #16
113    addpl       r3, r3, #1           ; [~9]
114    lsl         r3, r3, #15          ; [9  |  x]
115    pkhtb       r2, r3, r2, asr #1   ; [9  |  8]
116    str         r2, [r1], #4
117
118    lsls        r2, r6, #16
119    addpl       r2, r2, #0x10000     ; [~10]
120    lsls        r3, r7, #16
121    addpl       r3, r3, #0x10000     ; [~11]
122    asr         r3, r3, #1           ; [11 |  x]
123    pkhtb       r2, r3, r2, asr #17  ; [11 | 10]
124    str         r2, [r1], #4
125
126    asrs        r2, r9, #16
127    addpl       r2, r2, #1           ; [~12]
128    asrs        r3, r8, #16
129    addpl       r3, r3, #1           ; [~13]
130    lsl         r3, r3, #15          ; [13 |  x]
131    pkhtb       r2, r3, r2, asr #1   ; [13 | 12]
132    str         r2, [r1], #4
133
134    lsls        r2, r8, #16
135    addpl       r2, r2, #0x10000     ; [~14]
136    lsls        r3, r9, #16
137    addpl       r3, r3, #0x10000     ; [~15]
138    asr         r3, r3, #1           ; [15 |  x]
139    pkhtb       r2, r3, r2, asr #17  ; [15 | 14]
140    str         r2, [r1]
141
142    ldmia       sp!, {r4 - r11, pc}
143    ENDP        ; |vp8_short_walsh4x4_armv6|
144
145    END
146