• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT |vp8_subtract_b_neon_func|
13    EXPORT |vp8_subtract_mby_neon|
14    EXPORT |vp8_subtract_mbuv_neon|
15
16    ARM
17    REQUIRE8
18    PRESERVE8
19
20    AREA ||.text||, CODE, READONLY, ALIGN=2
21;=========================================
22;void vp8_subtract_b_neon_func(short *diff, unsigned char *src, unsigned char *pred, int stride, int pitch);
23|vp8_subtract_b_neon_func| PROC
24    ldr             r12, [sp]               ;load pitch
25
26    vld1.8          {d0}, [r1], r3          ;load src
27    vld1.8          {d1}, [r2], r12         ;load pred
28    vld1.8          {d2}, [r1], r3
29    vld1.8          {d3}, [r2], r12
30    vld1.8          {d4}, [r1], r3
31    vld1.8          {d5}, [r2], r12
32    vld1.8          {d6}, [r1], r3
33    vld1.8          {d7}, [r2], r12
34
35    vsubl.u8        q10, d0, d1
36    vsubl.u8        q11, d2, d3
37    vsubl.u8        q12, d4, d5
38    vsubl.u8        q13, d6, d7
39
40    mov             r12, r12, lsl #1
41
42    vst1.16         {d20}, [r0], r12        ;store diff
43    vst1.16         {d22}, [r0], r12
44    vst1.16         {d24}, [r0], r12
45    vst1.16         {d26}, [r0], r12
46
47    bx              lr
48    ENDP
49
50;==========================================
51;void vp8_subtract_mby_neon(short *diff, unsigned char *src, unsigned char *pred, int stride)
52|vp8_subtract_mby_neon| PROC
53    mov             r12, #4
54
55subtract_mby_loop
56    vld1.8          {q0}, [r1], r3          ;load src
57    vld1.8          {q1}, [r2]!             ;load pred
58    vld1.8          {q2}, [r1], r3
59    vld1.8          {q3}, [r2]!
60    vld1.8          {q4}, [r1], r3
61    vld1.8          {q5}, [r2]!
62    vld1.8          {q6}, [r1], r3
63    vld1.8          {q7}, [r2]!
64
65    vsubl.u8        q8, d0, d2
66    vsubl.u8        q9, d1, d3
67    vsubl.u8        q10, d4, d6
68    vsubl.u8        q11, d5, d7
69    vsubl.u8        q12, d8, d10
70    vsubl.u8        q13, d9, d11
71    vsubl.u8        q14, d12, d14
72    vsubl.u8        q15, d13, d15
73
74    vst1.16         {q8}, [r0]!             ;store diff
75    vst1.16         {q9}, [r0]!
76    vst1.16         {q10}, [r0]!
77    vst1.16         {q11}, [r0]!
78    vst1.16         {q12}, [r0]!
79    vst1.16         {q13}, [r0]!
80    vst1.16         {q14}, [r0]!
81    vst1.16         {q15}, [r0]!
82
83    subs            r12, r12, #1
84    bne             subtract_mby_loop
85
86    bx              lr
87    ENDP
88
89;=================================
90;void vp8_subtract_mbuv_neon(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
91|vp8_subtract_mbuv_neon| PROC
92    ldr             r12, [sp]
93
94;u
95    add             r0, r0, #512        ;   short *udiff = diff + 256;
96    add             r3, r3, #256        ;   unsigned char *upred = pred + 256;
97
98    vld1.8          {d0}, [r1], r12         ;load src
99    vld1.8          {d1}, [r3]!             ;load pred
100    vld1.8          {d2}, [r1], r12
101    vld1.8          {d3}, [r3]!
102    vld1.8          {d4}, [r1], r12
103    vld1.8          {d5}, [r3]!
104    vld1.8          {d6}, [r1], r12
105    vld1.8          {d7}, [r3]!
106    vld1.8          {d8}, [r1], r12
107    vld1.8          {d9}, [r3]!
108    vld1.8          {d10}, [r1], r12
109    vld1.8          {d11}, [r3]!
110    vld1.8          {d12}, [r1], r12
111    vld1.8          {d13}, [r3]!
112    vld1.8          {d14}, [r1], r12
113    vld1.8          {d15}, [r3]!
114
115    vsubl.u8        q8, d0, d1
116    vsubl.u8        q9, d2, d3
117    vsubl.u8        q10, d4, d5
118    vsubl.u8        q11, d6, d7
119    vsubl.u8        q12, d8, d9
120    vsubl.u8        q13, d10, d11
121    vsubl.u8        q14, d12, d13
122    vsubl.u8        q15, d14, d15
123
124    vst1.16         {q8}, [r0]!             ;store diff
125    vst1.16         {q9}, [r0]!
126    vst1.16         {q10}, [r0]!
127    vst1.16         {q11}, [r0]!
128    vst1.16         {q12}, [r0]!
129    vst1.16         {q13}, [r0]!
130    vst1.16         {q14}, [r0]!
131    vst1.16         {q15}, [r0]!
132
133;v
134    vld1.8          {d0}, [r2], r12         ;load src
135    vld1.8          {d1}, [r3]!             ;load pred
136    vld1.8          {d2}, [r2], r12
137    vld1.8          {d3}, [r3]!
138    vld1.8          {d4}, [r2], r12
139    vld1.8          {d5}, [r3]!
140    vld1.8          {d6}, [r2], r12
141    vld1.8          {d7}, [r3]!
142    vld1.8          {d8}, [r2], r12
143    vld1.8          {d9}, [r3]!
144    vld1.8          {d10}, [r2], r12
145    vld1.8          {d11}, [r3]!
146    vld1.8          {d12}, [r2], r12
147    vld1.8          {d13}, [r3]!
148    vld1.8          {d14}, [r2], r12
149    vld1.8          {d15}, [r3]!
150
151    vsubl.u8        q8, d0, d1
152    vsubl.u8        q9, d2, d3
153    vsubl.u8        q10, d4, d5
154    vsubl.u8        q11, d6, d7
155    vsubl.u8        q12, d8, d9
156    vsubl.u8        q13, d10, d11
157    vsubl.u8        q14, d12, d13
158    vsubl.u8        q15, d14, d15
159
160    vst1.16         {q8}, [r0]!             ;store diff
161    vst1.16         {q9}, [r0]!
162    vst1.16         {q10}, [r0]!
163    vst1.16         {q11}, [r0]!
164    vst1.16         {q12}, [r0]!
165    vst1.16         {q13}, [r0]!
166    vst1.16         {q14}, [r0]!
167    vst1.16         {q15}, [r0]!
168
169    bx              lr
170    ENDP
171
172    END
173