1@/***************************************************************************** 2@* 3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4@* 5@* Licensed under the Apache License, Version 2.0 (the "License"); 6@* you may not use this file except in compliance with the License. 7@* You may obtain a copy of the License at: 8@* 9@* http://www.apache.org/licenses/LICENSE-2.0 10@* 11@* Unless required by applicable law or agreed to in writing, software 12@* distributed under the License is distributed on an "AS IS" BASIS, 13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@* See the License for the specific language governing permissions and 15@* limitations under the License. 16@* 17@*****************************************************************************/ 18@/** 19@/******************************************************************************* 20@* @file 21@* ihevcd_fmt_conv_420sp_to_420sp.s 22@* 23@* @brief 24@* contains function definitions for format conversions 25@* 26@* @author 27@* ittiam 28@* 29@* @par list of functions: 30@* 31@* 32@* @remarks 33@* none 34@* 35@*******************************************************************************/ 36 .equ DO1STROUNDING, 0 37 38 @ ARM 39 @ 40 @ PRESERVE8 41 42.text 43.p2align 2 44 45 46 47 48 49@/***************************************************************************** 50@* * 51@* Function Name : ihevcd_fmt_conv_420sp_to_420sp() * 52@* * 53@* Description : This function conversts the image from YUV420SP color * 54@* space to 420SP color space(UV interleaved). * 55@* * 56@* Arguments : R0 pu1_y * 57@* R1 pu1_uv * 58@* R2 pu1_dest_y * 59@* R3 pu1_dest_uv * 60@* [R13 #40] u2_width * 61@* [R13 #44] u2_height * 62@* [R13 #48] u2_stridey * 63@* [R13 #52] u2_stridechroma * 64@* [R13 #56] u2_dest_stridey * 65@* [R13 #60] u2_dest_stridechroma * 66@* * 67@* Values Returned : None * 68@* * 69@* Register Usage : R0 - R14 * 70@* * 71@* Stack Usage : 40 Bytes * 72@* * 73@* Interruptibility : Interruptible * 74@* * 75@* Known Limitations * 76@* Assumptions: Image Width: Assumed to be multiple of 2 and * 77@* Image Height: Assumed to be even. * 78@* * 79@* Revision History : * 80@* DD MM YYYY Author(s) Changes (Describe the changes made) * 81@* 16 05 2012 Naveen SR draft * 82@* * 83@*****************************************************************************/ 84 85 .global ihevcd_fmt_conv_420sp_to_420sp_a9q 86.type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function 87ihevcd_fmt_conv_420sp_to_420sp_a9q: 88 89 STMFD sp!,{r4-r12, lr} 90 91 92 LDR r5,[sp,#56] @//Load u2_dest_stridey 93 94 LDR r7,[sp,#48] @//Load u2_stridey 95 LDR r8,[sp,#40] @//Load u2_width 96 LDR r9,[sp,#44] @//Load u2_height 97 98 SUB r10,r7,r8 @// Src Y increment 99 SUB r11,r5,r8 @// Dst Y increment 100 101 @/* Copy Y */ 102 103 MOV r4,r9 @// Copying height 104y_row_loop: 105 MOV r6,r8 @// Copying width 106 107y_col_loop: 108 PLD [r0, #128] 109 SUB r6,r6,#32 110 VLD1.8 D0,[r0]! 111 VLD1.8 D1,[r0]! 112 VLD1.8 D2,[r0]! 113 VLD1.8 D3,[r0]! 114 VST1.8 D0,[R2]! 115 VST1.8 D1,[R2]! 116 VST1.8 D2,[R2]! 117 VST1.8 D3,[R2]! 118 CMP r6,#32 119 BGE y_col_loop 120 CMP r6,#0 121 BEQ y_col_loop_end 122 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 123 @//Ex if width is 162, above loop will process 160 pixels. And 124 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 125 @// and written using VLD1 and VST1 126 RSB r6,r6,#32 127 SUB r0,r0,r6 128 SUB R2,R2,r6 129 VLD1.8 D0,[r0]! 130 VLD1.8 D1,[r0]! 131 VLD1.8 D2,[r0]! 132 VLD1.8 D3,[r0]! 133 VST1.8 D0,[R2]! 134 VST1.8 D1,[R2]! 135 VST1.8 D2,[R2]! 136 VST1.8 D3,[R2]! 137 138y_col_loop_end: 139 ADD r0, r0, r10 140 ADD R2, R2, r11 141 SUBS r4, r4, #1 142 BGT y_row_loop 143 144 145 146 @/* Copy UV */ 147 148 LDR r5,[sp,#60] @//Load u2_dest_stridechroma 149 LDR r7,[sp,#52] @//Load u2_stridechroma 150 151 MOV r9,r9,LSR #1 @// height/2 152@ MOV r8,r8,LSR #1 @// Width/2 153 154 MOV R2,R3 @pu1_dest_uv 155 156 SUB r10,r7,r8 @// Src UV increment 157 SUB r11,r5,r8 @// Dst UV increment 158 159 MOV r4,r9 @// Copying height 160uv_row_loop: 161 MOV r6,r8 @// Copying width 162 163uv_col_loop: 164 165 PLD [r1, #128] 166 SUB r6,r6,#16 167 VLD1.8 D0,[r1]! 168 VLD1.8 D1,[r1]! 169 VST1.8 D0,[R2]! 170 VST1.8 D1,[R2]! 171 CMP r6,#16 172 BGE uv_col_loop 173 CMP r6,#0 174 BEQ u_col_loop_end 175 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 176 @//Ex if width is 162, above loop will process 160 pixels. And 177 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 178 @// and written using VLD1 and VST1 179 RSB r6,r6,#16 180 SUB r1,r1,r6 181 SUB R2,R2,r6 182 VLD1.8 D0, [r1]! 183 VLD1.8 D1, [r1]! 184 VST1.8 D0, [R2]! 185 VST1.8 D1, [R2]! 186 187u_col_loop_end: 188 ADD r1, r1, r10 189 ADD R2, R2, r11 190 SUBS r4, r4, #1 191 BGT uv_row_loop 192 193exit: 194 LDMFD sp!,{r4-r12, pc} 195 196 197 .section .note.GNU-stack,"",%progbits 198 199