1///***************************************************************************** 2//* 3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//*****************************************************************************/ 18///** 19///******************************************************************************* 20//* //file 21//* ihevcd_fmt_conv_420sp_to_420sp.s 22//* 23//* //brief 24//* contains function definitions for format conversions 25//* 26//* //author 27//* ittiam 28//* 29//* //par list of functions: 30//* 31//* 32//* //remarks 33//* none 34//* 35//*******************************************************************************/ 36 .equ DO1STROUNDING, 0 37 38 // ARM 39 // 40 // PRESERVE8 41 42.text 43.p2align 2 44 45.include "ihevc_neon_macros.s" 46 47 48 49 50///***************************************************************************** 51//* * 52//* Function Name : ihevcd_fmt_conv_420sp_to_420sp() * 53//* * 54//* Description : This function conversts the image from YUV420SP color * 55//* space to 420SP color space(UV interleaved). * 56//* * 57//* Arguments : x0 pu1_y * 58//* x1 pu1_uv * 59//* x2 pu1_dest_y * 60//* x3 pu1_dest_uv * 61//* [x13 #40] u2_width * 62//* [x13 #44] u2_height * 63//* [x13 #48] u2_stridey * 64//* [x13 #52] u2_stridechroma * 65//* [x13 #56] u2_dest_stridey * 66//* [x13 #60] u2_dest_stridechroma * 67//* * 68//* Values Returned : None * 69//* * 70//* Register Usage : x0 - x14 * 71//* * 72//* Stack Usage : 40 Bytes * 73//* * 74//* Interruptibility : Interruptible * 75//* * 76//* Known Limitations * 77//* Assumptions: Image Width: Assumed to be multiple of 2 and * 78//* Image Height: Assumed to be even. * 79//* * 80//* Revision History : * 81//* DD MM YYYY Author(s) Changes (Describe the changes made) * 82//* 16 05 2012 Naveen SR draft * 83//* * 84//*****************************************************************************/ 85 86 .global ihevcd_fmt_conv_420sp_to_420sp_av8 87.type ihevcd_fmt_conv_420sp_to_420sp_a9q, %function 88ihevcd_fmt_conv_420sp_to_420sp_av8: 89 90 // STMFD sp!,{x4-x12, x14} 91 push_v_regs 92 stp x19, x20,[sp,#-16]! 93 94 mov x8, x4 ////Load u2_width 95 mov x9, x5 ////Load u2_height 96 97 LDR w5, [sp,#80] ////Load u2_dest_stridey 98 sxtw x5,w5 99 100 mov x7, x6 ////Load u2_stridey 101 102 SUB x10,x7,x8 //// Src Y increment 103 SUB x11,x5,x8 //// Dst Y increment 104 105 ///* Copy Y */ 106 107 MOV x4,x9 //// Copying height 108y_row_loop: 109 MOV x6,x8 //// Copying width 110 111y_col_loop: 112 prfm PLDL1KEEP,[x0, #128] 113 SUB x6,x6,#32 114 LD1 {v0.8b},[x0],#8 115 LD1 {v1.8b},[x0],#8 116 LD1 {v2.8b},[x0],#8 117 LD1 {v3.8b},[x0],#8 118 ST1 {v0.8b},[x2],#8 119 ST1 {v1.8b},[x2],#8 120 ST1 {v2.8b},[x2],#8 121 ST1 {v3.8b},[x2],#8 122 CMP x6,#32 123 BGE y_col_loop 124 CMP x6,#0 125 BEQ y_col_loop_end 126 ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 127 ////Ex if width is 162, above loop will process 160 pixels. And 128 ////Both source and destination will point to 146th pixel and then 16 bytes will be read 129 //// and written using VLD1 and VST1 130 sub x20,x6,#32 131 neg x6, x20 132 SUB x0,x0,x6 133 SUB x2,x2,x6 134 LD1 {v0.8b},[x0],#8 135 LD1 {v1.8b},[x0],#8 136 LD1 {v2.8b},[x0],#8 137 LD1 {v3.8b},[x0],#8 138 ST1 {v0.8b},[x2],#8 139 ST1 {v1.8b},[x2],#8 140 ST1 {v2.8b},[x2],#8 141 ST1 {v3.8b},[x2],#8 142 143y_col_loop_end: 144 ADD x0, x0, x10 145 ADD x2, x2, x11 146 SUBS x4, x4, #1 147 BGT y_row_loop 148 149 150 151 ///* Copy UV */ 152 153 LDR w5, [sp,#88] ////Load u2_dest_stridechroma 154 sxtw x5,w5 155 156 LSR x9, x9, #1 //// height/2 157// MOV x8,x8,LSR #1 @// Width/2 158 159 MOV x2,x3 //pu1_dest_uv 160 161 SUB x10,x7,x8 //// Src UV increment 162 SUB x11,x5,x8 //// Dst UV increment 163 164 MOV x4,x9 //// Copying height 165uv_row_loop: 166 MOV x6,x8 //// Copying width 167 168uv_col_loop: 169 170 prfm PLDL1KEEP,[x1, #128] 171 SUB x6,x6,#16 172 LD1 {v0.8b},[x1],#8 173 LD1 {v1.8b},[x1],#8 174 ST1 {v0.8b},[x2],#8 175 ST1 {v1.8b},[x2],#8 176 CMP x6,#16 177 BGE uv_col_loop 178 CMP x6,#0 179 BEQ u_col_loop_end 180 ////If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 181 ////Ex if width is 162, above loop will process 160 pixels. And 182 ////Both source and destination will point to 146th pixel and then 16 bytes will be read 183 //// and written using VLD1 and VST1 184 sub x20,x6,#16 185 neg x6, x20 186 SUB x1,x1,x6 187 SUB x2,x2,x6 188 LD1 {v0.8b},[x1],#8 189 LD1 {v1.8b},[x1],#8 190 ST1 {v0.8b},[x2],#8 191 ST1 {v1.8b},[x2],#8 192 193u_col_loop_end: 194 ADD x1, x1, x10 195 ADD x2, x2, x11 196 SUBS x4, x4, #1 197 BGT uv_row_loop 198 199exit: 200 // LDMFD sp!,{x4-x12, pc} 201 ldp x19, x20,[sp],#16 202 pop_v_regs 203 ret 204 205 206 .section .note.GNU-stack,"",%progbits 207 208