1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_yv12_copy_src_frame_func_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 INCLUDE asm_com_offsets.asm 18 19 AREA ||.text||, CODE, READONLY, ALIGN=2 20;Note: This function is used to copy source data in src_buffer[i] at beginning of 21;the encoding. The buffer has a width and height of cpi->oxcf.Width and cpi->oxcf.Height, 22;which can be ANY numbers(NOT always multiples of 16 or 4). 23 24;void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); 25 26|vp8_yv12_copy_src_frame_func_neon| PROC 27 push {r4 - r11, lr} 28 vpush {d8 - d15} 29 30 ;Copy Y plane 31 ldr r4, [r0, #yv12_buffer_config_y_height] 32 ldr r5, [r0, #yv12_buffer_config_y_width] 33 ldr r6, [r0, #yv12_buffer_config_y_stride] 34 ldr r7, [r1, #yv12_buffer_config_y_stride] 35 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 36 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 37 38 add r10, r2, r6 ;second row src 39 add r11, r3, r7 ;second row dst 40 mov r6, r6, lsl #1 41 mov r7, r7, lsl #1 42 sub r6, r6, r5 ;adjust stride 43 sub r7, r7, r5 44 45 ; copy two rows at one time 46 mov lr, r4, lsr #1 47 48cp_src_to_dst_height_loop 49 mov r12, r5 50 51cp_width_128_loop 52 vld1.8 {q0, q1}, [r2]! 53 vld1.8 {q4, q5}, [r10]! 54 vld1.8 {q2, q3}, [r2]! 55 vld1.8 {q6, q7}, [r10]! 56 vld1.8 {q8, q9}, [r2]! 57 vld1.8 {q12, q13}, [r10]! 58 vld1.8 {q10, q11}, [r2]! 59 vld1.8 {q14, q15}, [r10]! 60 sub r12, r12, #128 61 cmp r12, #128 62 vst1.8 {q0, q1}, [r3]! 63 vst1.8 {q4, q5}, [r11]! 64 vst1.8 {q2, q3}, [r3]! 65 vst1.8 {q6, q7}, [r11]! 66 vst1.8 {q8, q9}, [r3]! 67 vst1.8 {q12, q13}, [r11]! 68 vst1.8 {q10, q11}, [r3]! 69 vst1.8 {q14, q15}, [r11]! 70 bhs cp_width_128_loop 71 72 cmp r12, #0 73 beq cp_width_done 74 75cp_width_8_loop 76 vld1.8 {d0}, [r2]! 77 vld1.8 {d1}, [r10]! 78 sub r12, r12, #8 79 cmp r12, #8 80 vst1.8 {d0}, [r3]! 81 vst1.8 {d1}, [r11]! 82 bhs cp_width_8_loop 83 84 cmp r12, #0 85 beq cp_width_done 86 87cp_width_1_loop 88 ldrb r8, [r2], #1 89 subs r12, r12, #1 90 strb r8, [r3], #1 91 ldrb r8, [r10], #1 92 strb r8, [r11], #1 93 bne cp_width_1_loop 94 95cp_width_done 96 subs lr, lr, #1 97 add r2, r2, r6 98 add r3, r3, r7 99 add r10, r10, r6 100 add r11, r11, r7 101 bne cp_src_to_dst_height_loop 102 103;copy last line for Y if y_height is odd 104 tst r4, #1 105 beq cp_width_done_1 106 mov r12, r5 107 108cp_width_128_loop_1 109 vld1.8 {q0, q1}, [r2]! 110 vld1.8 {q2, q3}, [r2]! 111 vld1.8 {q8, q9}, [r2]! 112 vld1.8 {q10, q11}, [r2]! 113 sub r12, r12, #128 114 cmp r12, #128 115 vst1.8 {q0, q1}, [r3]! 116 vst1.8 {q2, q3}, [r3]! 117 vst1.8 {q8, q9}, [r3]! 118 vst1.8 {q10, q11}, [r3]! 119 bhs cp_width_128_loop_1 120 121 cmp r12, #0 122 beq cp_width_done_1 123 124cp_width_8_loop_1 125 vld1.8 {d0}, [r2]! 126 sub r12, r12, #8 127 cmp r12, #8 128 vst1.8 {d0}, [r3]! 129 bhs cp_width_8_loop_1 130 131 cmp r12, #0 132 beq cp_width_done_1 133 134cp_width_1_loop_1 135 ldrb r8, [r2], #1 136 subs r12, r12, #1 137 strb r8, [r3], #1 138 bne cp_width_1_loop_1 139cp_width_done_1 140 141;Copy U & V planes 142 ldr r4, [r0, #yv12_buffer_config_uv_height] 143 ldr r5, [r0, #yv12_buffer_config_uv_width] 144 ldr r6, [r0, #yv12_buffer_config_uv_stride] 145 ldr r7, [r1, #yv12_buffer_config_uv_stride] 146 ldr r2, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 147 ldr r3, [r1, #yv12_buffer_config_u_buffer] ;dstptr1 148 149 add r10, r2, r6 ;second row src 150 add r11, r3, r7 ;second row dst 151 mov r6, r6, lsl #1 152 mov r7, r7, lsl #1 153 sub r6, r6, r5 ;adjust stride 154 sub r7, r7, r5 155 156 mov r9, #2 157 158cp_uv_loop 159 ;copy two rows at one time 160 mov lr, r4, lsr #1 161 162cp_src_to_dst_height_uv_loop 163 mov r12, r5 164 165cp_width_uv_64_loop 166 vld1.8 {q0, q1}, [r2]! 167 vld1.8 {q4, q5}, [r10]! 168 vld1.8 {q2, q3}, [r2]! 169 vld1.8 {q6, q7}, [r10]! 170 sub r12, r12, #64 171 cmp r12, #64 172 vst1.8 {q0, q1}, [r3]! 173 vst1.8 {q4, q5}, [r11]! 174 vst1.8 {q2, q3}, [r3]! 175 vst1.8 {q6, q7}, [r11]! 176 bhs cp_width_uv_64_loop 177 178 cmp r12, #0 179 beq cp_width_uv_done 180 181cp_width_uv_8_loop 182 vld1.8 {d0}, [r2]! 183 vld1.8 {d1}, [r10]! 184 sub r12, r12, #8 185 cmp r12, #8 186 vst1.8 {d0}, [r3]! 187 vst1.8 {d1}, [r11]! 188 bhs cp_width_uv_8_loop 189 190 cmp r12, #0 191 beq cp_width_uv_done 192 193cp_width_uv_1_loop 194 ldrb r8, [r2], #1 195 subs r12, r12, #1 196 strb r8, [r3], #1 197 ldrb r8, [r10], #1 198 strb r8, [r11], #1 199 bne cp_width_uv_1_loop 200 201cp_width_uv_done 202 subs lr, lr, #1 203 add r2, r2, r6 204 add r3, r3, r7 205 add r10, r10, r6 206 add r11, r11, r7 207 bne cp_src_to_dst_height_uv_loop 208 209;copy last line for U & V if uv_height is odd 210 tst r4, #1 211 beq cp_width_uv_done_1 212 mov r12, r5 213 214cp_width_uv_64_loop_1 215 vld1.8 {q0, q1}, [r2]! 216 vld1.8 {q2, q3}, [r2]! 217 sub r12, r12, #64 218 cmp r12, #64 219 vst1.8 {q0, q1}, [r3]! 220 vst1.8 {q2, q3}, [r3]! 221 bhs cp_width_uv_64_loop_1 222 223 cmp r12, #0 224 beq cp_width_uv_done_1 225 226cp_width_uv_8_loop_1 227 vld1.8 {d0}, [r2]! 228 sub r12, r12, #8 229 cmp r12, #8 230 vst1.8 {d0}, [r3]! 231 bhs cp_width_uv_8_loop_1 232 233 cmp r12, #0 234 beq cp_width_uv_done_1 235 236cp_width_uv_1_loop_1 237 ldrb r8, [r2], #1 238 subs r12, r12, #1 239 strb r8, [r3], #1 240 bne cp_width_uv_1_loop_1 241cp_width_uv_done_1 242 243 subs r9, r9, #1 244 ldrne r2, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 245 ldrne r3, [r1, #yv12_buffer_config_v_buffer] ;dstptr1 246 ldrne r10, [r0, #yv12_buffer_config_uv_stride] 247 ldrne r11, [r1, #yv12_buffer_config_uv_stride] 248 249 addne r10, r2, r10 ;second row src 250 addne r11, r3, r11 ;second row dst 251 252 bne cp_uv_loop 253 254 vpop {d8 - d15} 255 pop {r4 - r11, pc} 256 257 ENDP 258 END 259