@/***************************************************************************** @* @* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore @* @* Licensed under the Apache License, Version 2.0 (the "License"); @* you may not use this file except in compliance with the License. @* You may obtain a copy of the License at: @* @* http://www.apache.org/licenses/LICENSE-2.0 @* @* Unless required by applicable law or agreed to in writing, software @* distributed under the License is distributed on an "AS IS" BASIS, @* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @* See the License for the specific language governing permissions and @* limitations under the License. @* @*****************************************************************************/ @/** @/******************************************************************************* @* @file @* ihevcd_fmt_conv_420sp_to_420sp.s @* @* @brief @* contains function definitions for format conversions @* @* @author @* ittiam @* @* @par list of functions: @* @* @* @remarks @* none @* @*******************************************************************************/ .equ DO1STROUNDING, 0 @ ARM @ @ PRESERVE8 .text .p2align 2 @/***************************************************************************** @* * @* Function Name : ihevcd_fmt_conv_420sp_to_420sp() * @* * @* Description : This function conversts the image from YUV420SP color * @* space to 420SP color space(UV interleaved). * @* * @* Arguments : R0 pu1_y * @* R1 pu1_uv * @* R2 pu1_dest_y * @* R3 pu1_dest_uv * @* [R13 #40] u2_width * @* [R13 #44] u2_height * @* [R13 #48] u2_stridey * @* [R13 #52] u2_stridechroma * @* [R13 #56] u2_dest_stridey * @* [R13 #60] u2_dest_stridechroma * @* * @* Values Returned : None * @* * @* Register Usage : R0 - R14 * @* * @* Stack Usage : 40 Bytes * @* * @* Interruptibility : Interruptible * @* * @* Known Limitations * @* Assumptions: Image Width: Assumed to be multiple of 2 and * @* Image Height: Assumed to be even. * @* * @* Revision History : * @* DD MM YYYY Author(s) Changes (Describe the changes made) * @* 16 05 2012 Naveen SR draft * @* * @*****************************************************************************/ .global ihevcd_fmt_conv_420sp_to_420sp_a9q .type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function ihevcd_fmt_conv_420sp_to_420sp_a9q: STMFD sp!,{r4-r12, lr} LDR r5,[sp,#56] @//Load u2_dest_stridey LDR r7,[sp,#48] @//Load u2_stridey LDR r8,[sp,#40] @//Load u2_width LDR r9,[sp,#44] @//Load u2_height SUB r10,r7,r8 @// Src Y increment SUB r11,r5,r8 @// Dst Y increment @/* Copy Y */ MOV r4,r9 @// Copying height y_row_loop: MOV r6,r8 @// Copying width y_col_loop: PLD [r0, #128] SUB r6,r6,#32 VLD1.8 D0,[r0]! VLD1.8 D1,[r0]! VLD1.8 D2,[r0]! VLD1.8 D3,[r0]! VST1.8 D0,[R2]! VST1.8 D1,[R2]! VST1.8 D2,[R2]! VST1.8 D3,[R2]! CMP r6,#32 BGE y_col_loop CMP r6,#0 BEQ y_col_loop_end @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read @//Ex if width is 162, above loop will process 160 pixels. And @//Both source and destination will point to 146th pixel and then 16 bytes will be read @// and written using VLD1 and VST1 RSB r6,r6,#32 SUB r0,r0,r6 SUB R2,R2,r6 VLD1.8 D0,[r0]! VLD1.8 D1,[r0]! VLD1.8 D2,[r0]! VLD1.8 D3,[r0]! VST1.8 D0,[R2]! VST1.8 D1,[R2]! VST1.8 D2,[R2]! VST1.8 D3,[R2]! y_col_loop_end: ADD r0, r0, r10 ADD R2, R2, r11 SUBS r4, r4, #1 BGT y_row_loop @/* Copy UV */ LDR r5,[sp,#60] @//Load u2_dest_stridechroma LDR r7,[sp,#52] @//Load u2_stridechroma MOV r9,r9,LSR #1 @// height/2 @ MOV r8,r8,LSR #1 @// Width/2 MOV R2,R3 @pu1_dest_uv SUB r10,r7,r8 @// Src UV increment SUB r11,r5,r8 @// Dst UV increment MOV r4,r9 @// Copying height uv_row_loop: MOV r6,r8 @// Copying width uv_col_loop: PLD [r1, #128] SUB r6,r6,#16 VLD1.8 D0,[r1]! VLD1.8 D1,[r1]! VST1.8 D0,[R2]! VST1.8 D1,[R2]! CMP r6,#16 BGE uv_col_loop CMP r6,#0 BEQ u_col_loop_end @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read @//Ex if width is 162, above loop will process 160 pixels. And @//Both source and destination will point to 146th pixel and then 16 bytes will be read @// and written using VLD1 and VST1 RSB r6,r6,#16 SUB r1,r1,r6 SUB R2,R2,r6 VLD1.8 D0, [r1]! VLD1.8 D1, [r1]! VST1.8 D0, [R2]! VST1.8 D1, [R2]! u_col_loop_end: ADD r1, r1, r10 ADD R2, R2, r11 SUBS r4, r4, #1 BGT uv_row_loop exit: LDMFD sp!,{r4-r12, pc} .section .note.GNU-stack,"",%progbits