1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_yv12_copy_frame_func_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 INCLUDE asm_com_offsets.asm 18 19 AREA ||.text||, CODE, READONLY, ALIGN=2 20 21;void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); 22 23|vp8_yv12_copy_frame_func_neon| PROC 24 push {r4 - r11, lr} 25 vpush {d8 - d15} 26 27 sub sp, sp, #16 28 29 ;Copy Y plane 30 ldr r8, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 31 ldr r9, [r1, #yv12_buffer_config_u_buffer] ;srcptr1 32 ldr r10, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 33 ldr r11, [r1, #yv12_buffer_config_v_buffer] ;srcptr1 34 35 ldr r4, [r0, #yv12_buffer_config_y_height] 36 ldr r5, [r0, #yv12_buffer_config_y_width] 37 ldr r6, [r0, #yv12_buffer_config_y_stride] 38 ldr r7, [r1, #yv12_buffer_config_y_stride] 39 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 40 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 41 42 str r8, [sp] 43 str r9, [sp, #4] 44 str r10, [sp, #8] 45 str r11, [sp, #12] 46 47 ; copy two rows at one time 48 mov lr, r4, lsr #1 49 50cp_src_to_dst_height_loop 51 mov r8, r2 52 mov r9, r3 53 add r10, r2, r6 54 add r11, r3, r7 55 mov r12, r5, lsr #7 56 57cp_src_to_dst_width_loop 58 vld1.8 {q0, q1}, [r8]! 59 vld1.8 {q8, q9}, [r10]! 60 vld1.8 {q2, q3}, [r8]! 61 vld1.8 {q10, q11}, [r10]! 62 vld1.8 {q4, q5}, [r8]! 63 vld1.8 {q12, q13}, [r10]! 64 vld1.8 {q6, q7}, [r8]! 65 vld1.8 {q14, q15}, [r10]! 66 67 subs r12, r12, #1 68 69 vst1.8 {q0, q1}, [r9]! 70 vst1.8 {q8, q9}, [r11]! 71 vst1.8 {q2, q3}, [r9]! 72 vst1.8 {q10, q11}, [r11]! 73 vst1.8 {q4, q5}, [r9]! 74 vst1.8 {q12, q13}, [r11]! 75 vst1.8 {q6, q7}, [r9]! 76 vst1.8 {q14, q15}, [r11]! 77 78 bne cp_src_to_dst_width_loop 79 80 subs lr, lr, #1 81 add r2, r2, r6, lsl #1 82 add r3, r3, r7, lsl #1 83 84 bne cp_src_to_dst_height_loop 85 86 ands r10, r5, #0x7f ;check to see if extra copy is needed 87 sub r11, r5, r10 88 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 89 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 90 bne extra_cp_src_to_dst_width 91end_of_cp_src_to_dst 92 93;Copy U & V planes 94 ldr r2, [sp] ;srcptr1 95 ldr r3, [sp, #4] ;dstptr1 96 mov r4, r4, lsr #1 ;src uv_height 97 mov r5, r5, lsr #1 ;src uv_width 98 mov r6, r6, lsr #1 ;src uv_stride 99 mov r7, r7, lsr #1 ;dst uv_stride 100 101 mov r1, #2 102 103cp_uv_loop 104 105 ;copy two rows at one time 106 mov lr, r4, lsr #1 107 108cp_src_to_dst_height_uv_loop 109 mov r8, r2 110 mov r9, r3 111 add r10, r2, r6 112 add r11, r3, r7 113 mov r12, r5, lsr #6 114 115cp_src_to_dst_width_uv_loop 116 vld1.8 {q0, q1}, [r8]! 117 vld1.8 {q8, q9}, [r10]! 118 vld1.8 {q2, q3}, [r8]! 119 vld1.8 {q10, q11}, [r10]! 120 121 subs r12, r12, #1 122 123 vst1.8 {q0, q1}, [r9]! 124 vst1.8 {q8, q9}, [r11]! 125 vst1.8 {q2, q3}, [r9]! 126 vst1.8 {q10, q11}, [r11]! 127 128 bne cp_src_to_dst_width_uv_loop 129 130 subs lr, lr, #1 131 add r2, r2, r6, lsl #1 132 add r3, r3, r7, lsl #1 133 134 bne cp_src_to_dst_height_uv_loop 135 136 ands r10, r5, #0x3f ;check to see if extra copy is needed 137 sub r11, r5, r10 138 ldr r2, [sp] ;srcptr1 139 ldr r3, [sp, #4] ;dstptr1 140 bne extra_cp_src_to_dst_uv_width 141end_of_cp_src_to_dst_uv 142 143 subs r1, r1, #1 144 145 addne sp, sp, #8 146 147 ldrne r2, [sp] ;srcptr1 148 ldrne r3, [sp, #4] ;dstptr1 149 150 bne cp_uv_loop 151 152 add sp, sp, #8 153 154 vpop {d8 - d15} 155 pop {r4 - r11, pc} 156 157;============================= 158extra_cp_src_to_dst_width 159 add r2, r2, r11 160 add r3, r3, r11 161 add r0, r8, r6 162 add r11, r9, r7 163 164 mov lr, r4, lsr #1 165extra_cp_src_to_dst_height_loop 166 mov r8, r2 167 mov r9, r3 168 add r0, r8, r6 169 add r11, r9, r7 170 171 mov r12, r10 172 173extra_cp_src_to_dst_width_loop 174 vld1.8 {q0}, [r8]! 175 vld1.8 {q1}, [r0]! 176 177 subs r12, r12, #16 178 179 vst1.8 {q0}, [r9]! 180 vst1.8 {q1}, [r11]! 181 bne extra_cp_src_to_dst_width_loop 182 183 subs lr, lr, #1 184 185 add r2, r2, r6, lsl #1 186 add r3, r3, r7, lsl #1 187 188 bne extra_cp_src_to_dst_height_loop 189 190 b end_of_cp_src_to_dst 191 192;================================= 193extra_cp_src_to_dst_uv_width 194 add r2, r2, r11 195 add r3, r3, r11 196 add r0, r8, r6 197 add r11, r9, r7 198 199 mov lr, r4, lsr #1 200extra_cp_src_to_dst_height_uv_loop 201 mov r8, r2 202 mov r9, r3 203 add r0, r8, r6 204 add r11, r9, r7 205 206 mov r12, r10 207 208extra_cp_src_to_dst_width_uv_loop 209 vld1.8 {d0}, [r8]! 210 vld1.8 {d1}, [r0]! 211 212 subs r12, r12, #8 213 214 vst1.8 {d0}, [r9]! 215 vst1.8 {d1}, [r11]! 216 bne extra_cp_src_to_dst_width_uv_loop 217 218 subs lr, lr, #1 219 220 add r2, r2, r6, lsl #1 221 add r3, r3, r7, lsl #1 222 223 bne extra_cp_src_to_dst_height_uv_loop 224 225 b end_of_cp_src_to_dst_uv 226 227 ENDP 228 END 229