• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1;
2;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3;
4;  Use of this source code is governed by a BSD-style license
5;  that can be found in the LICENSE file in the root of the source
6;  tree. An additional intellectual property rights grant can be found
7;  in the file PATENTS.  All contributing project authors may
8;  be found in the AUTHORS file in the root of the source tree.
9;
10
11
12    EXPORT  |vp8_yv12_copy_src_frame_func_neon|
13    ARM
14    REQUIRE8
15    PRESERVE8
16
17    INCLUDE asm_com_offsets.asm
18
19    AREA ||.text||, CODE, READONLY, ALIGN=2
20;Note: This function is used to copy source data in src_buffer[i] at beginning of
21;the encoding. The buffer has a width and height of cpi->oxcf.Width and cpi->oxcf.Height,
22;which can be ANY numbers(NOT always multiples of 16 or 4).
23
24;void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
25
26|vp8_yv12_copy_src_frame_func_neon| PROC
27    push            {r4 - r11, lr}
28    vpush           {d8 - d15}
29
30    ;Copy Y plane
31    ldr             r4, [r0, #yv12_buffer_config_y_height]
32    ldr             r5, [r0, #yv12_buffer_config_y_width]
33    ldr             r6, [r0, #yv12_buffer_config_y_stride]
34    ldr             r7, [r1, #yv12_buffer_config_y_stride]
35    ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
36    ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
37
38    add             r10, r2, r6             ;second row src
39    add             r11, r3, r7             ;second row dst
40    mov             r6, r6, lsl #1
41    mov             r7, r7, lsl #1
42    sub             r6, r6, r5              ;adjust stride
43    sub             r7, r7, r5
44
45    ; copy two rows at one time
46    mov             lr, r4, lsr #1
47
48cp_src_to_dst_height_loop
49    mov             r12, r5
50
51cp_width_128_loop
52    vld1.8          {q0, q1}, [r2]!
53    vld1.8          {q4, q5}, [r10]!
54    vld1.8          {q2, q3}, [r2]!
55    vld1.8          {q6, q7}, [r10]!
56    vld1.8          {q8, q9}, [r2]!
57    vld1.8          {q12, q13}, [r10]!
58    vld1.8          {q10, q11}, [r2]!
59    vld1.8          {q14, q15}, [r10]!
60    sub             r12, r12, #128
61    cmp             r12, #128
62    vst1.8          {q0, q1}, [r3]!
63    vst1.8          {q4, q5}, [r11]!
64    vst1.8          {q2, q3}, [r3]!
65    vst1.8          {q6, q7}, [r11]!
66    vst1.8          {q8, q9}, [r3]!
67    vst1.8          {q12, q13}, [r11]!
68    vst1.8          {q10, q11}, [r3]!
69    vst1.8          {q14, q15}, [r11]!
70    bhs             cp_width_128_loop
71
72    cmp             r12, #0
73    beq             cp_width_done
74
75cp_width_8_loop
76    vld1.8          {d0}, [r2]!
77    vld1.8          {d1}, [r10]!
78    sub             r12, r12, #8
79    cmp             r12, #8
80    vst1.8          {d0}, [r3]!
81    vst1.8          {d1}, [r11]!
82    bhs             cp_width_8_loop
83
84    cmp             r12, #0
85    beq             cp_width_done
86
87cp_width_1_loop
88    ldrb            r8, [r2], #1
89    subs            r12, r12, #1
90    strb            r8, [r3], #1
91    ldrb            r8, [r10], #1
92    strb            r8, [r11], #1
93    bne             cp_width_1_loop
94
95cp_width_done
96    subs            lr, lr, #1
97    add             r2, r2, r6
98    add             r3, r3, r7
99    add             r10, r10, r6
100    add             r11, r11, r7
101    bne             cp_src_to_dst_height_loop
102
103;copy last line for Y if y_height is odd
104    tst             r4, #1
105    beq             cp_width_done_1
106    mov             r12, r5
107
108cp_width_128_loop_1
109    vld1.8          {q0, q1}, [r2]!
110    vld1.8          {q2, q3}, [r2]!
111    vld1.8          {q8, q9}, [r2]!
112    vld1.8          {q10, q11}, [r2]!
113    sub             r12, r12, #128
114    cmp             r12, #128
115    vst1.8          {q0, q1}, [r3]!
116    vst1.8          {q2, q3}, [r3]!
117    vst1.8          {q8, q9}, [r3]!
118    vst1.8          {q10, q11}, [r3]!
119    bhs             cp_width_128_loop_1
120
121    cmp             r12, #0
122    beq             cp_width_done_1
123
124cp_width_8_loop_1
125    vld1.8          {d0}, [r2]!
126    sub             r12, r12, #8
127    cmp             r12, #8
128    vst1.8          {d0}, [r3]!
129    bhs             cp_width_8_loop_1
130
131    cmp             r12, #0
132    beq             cp_width_done_1
133
134cp_width_1_loop_1
135    ldrb            r8, [r2], #1
136    subs            r12, r12, #1
137    strb            r8, [r3], #1
138    bne             cp_width_1_loop_1
139cp_width_done_1
140
141;Copy U & V planes
142    ldr             r4, [r0, #yv12_buffer_config_uv_height]
143    ldr             r5, [r0, #yv12_buffer_config_uv_width]
144    ldr             r6, [r0, #yv12_buffer_config_uv_stride]
145    ldr             r7, [r1, #yv12_buffer_config_uv_stride]
146    ldr             r2, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
147    ldr             r3, [r1, #yv12_buffer_config_u_buffer]       ;dstptr1
148
149    add             r10, r2, r6             ;second row src
150    add             r11, r3, r7             ;second row dst
151    mov             r6, r6, lsl #1
152    mov             r7, r7, lsl #1
153    sub             r6, r6, r5              ;adjust stride
154    sub             r7, r7, r5
155
156    mov             r9, #2
157
158cp_uv_loop
159    ;copy two rows at one time
160    mov             lr, r4, lsr #1
161
162cp_src_to_dst_height_uv_loop
163    mov             r12, r5
164
165cp_width_uv_64_loop
166    vld1.8          {q0, q1}, [r2]!
167    vld1.8          {q4, q5}, [r10]!
168    vld1.8          {q2, q3}, [r2]!
169    vld1.8          {q6, q7}, [r10]!
170    sub             r12, r12, #64
171    cmp             r12, #64
172    vst1.8          {q0, q1}, [r3]!
173    vst1.8          {q4, q5}, [r11]!
174    vst1.8          {q2, q3}, [r3]!
175    vst1.8          {q6, q7}, [r11]!
176    bhs             cp_width_uv_64_loop
177
178    cmp             r12, #0
179    beq             cp_width_uv_done
180
181cp_width_uv_8_loop
182    vld1.8          {d0}, [r2]!
183    vld1.8          {d1}, [r10]!
184    sub             r12, r12, #8
185    cmp             r12, #8
186    vst1.8          {d0}, [r3]!
187    vst1.8          {d1}, [r11]!
188    bhs             cp_width_uv_8_loop
189
190    cmp             r12, #0
191    beq             cp_width_uv_done
192
193cp_width_uv_1_loop
194    ldrb            r8, [r2], #1
195    subs            r12, r12, #1
196    strb            r8, [r3], #1
197    ldrb            r8, [r10], #1
198    strb            r8, [r11], #1
199    bne             cp_width_uv_1_loop
200
201cp_width_uv_done
202    subs            lr, lr, #1
203    add             r2, r2, r6
204    add             r3, r3, r7
205    add             r10, r10, r6
206    add             r11, r11, r7
207    bne             cp_src_to_dst_height_uv_loop
208
209;copy last line for U & V if uv_height is odd
210    tst             r4, #1
211    beq             cp_width_uv_done_1
212    mov             r12, r5
213
214cp_width_uv_64_loop_1
215    vld1.8          {q0, q1}, [r2]!
216    vld1.8          {q2, q3}, [r2]!
217    sub             r12, r12, #64
218    cmp             r12, #64
219    vst1.8          {q0, q1}, [r3]!
220    vst1.8          {q2, q3}, [r3]!
221    bhs             cp_width_uv_64_loop_1
222
223    cmp             r12, #0
224    beq             cp_width_uv_done_1
225
226cp_width_uv_8_loop_1
227    vld1.8          {d0}, [r2]!
228    sub             r12, r12, #8
229    cmp             r12, #8
230    vst1.8          {d0}, [r3]!
231    bhs             cp_width_uv_8_loop_1
232
233    cmp             r12, #0
234    beq             cp_width_uv_done_1
235
236cp_width_uv_1_loop_1
237    ldrb            r8, [r2], #1
238    subs            r12, r12, #1
239    strb            r8, [r3], #1
240    bne             cp_width_uv_1_loop_1
241cp_width_uv_done_1
242
243    subs            r9, r9, #1
244    ldrne           r2, [r0, #yv12_buffer_config_v_buffer]      ;srcptr1
245    ldrne           r3, [r1, #yv12_buffer_config_v_buffer]      ;dstptr1
246    ldrne           r10, [r0, #yv12_buffer_config_uv_stride]
247    ldrne           r11, [r1, #yv12_buffer_config_uv_stride]
248
249    addne           r10, r2, r10                ;second row src
250    addne           r11, r3, r11                ;second row dst
251
252    bne             cp_uv_loop
253
254    vpop            {d8 - d15}
255    pop             {r4 - r11, pc}
256
257    ENDP
258    END
259