1@/***************************************************************************** 2@* 3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4@* 5@* Licensed under the Apache License, Version 2.0 (the "License"); 6@* you may not use this file except in compliance with the License. 7@* You may obtain a copy of the License at: 8@* 9@* http://www.apache.org/licenses/LICENSE-2.0 10@* 11@* Unless required by applicable law or agreed to in writing, software 12@* distributed under the License is distributed on an "AS IS" BASIS, 13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@* See the License for the specific language governing permissions and 15@* limitations under the License. 16@* 17@*****************************************************************************/ 18@/** 19@******************************************************************************* 20@* @file 21@* ihevc_intra_pred_chroma_ver_neon.s 22@* 23@* @brief 24@* contains function definitions for intra prediction dc filtering. 25@* functions are coded using neon intrinsics and can be compiled using 26 27@* rvct 28@* 29@* @author 30@* yogeswaran rs 31@* 32@* @par list of functions: 33@* 34@* 35@* @remarks 36@* none 37@* 38@******************************************************************************* 39@*/ 40@/** 41@******************************************************************************* 42@* 43@* @brief 44@* luma intraprediction filter for dc input 45@* 46@* @par description: 47@* 48@* @param[in] pu1_ref 49@* uword8 pointer to the source 50@* 51@* @param[out] pu1_dst 52@* uword8 pointer to the destination 53@* 54@* @param[in] src_strd 55@* integer source stride 56@* 57@* @param[in] dst_strd 58@* integer destination stride 59@* 60@* @param[in] nt 61@* size of tranform block 62@* 63@* @param[in] mode 64@* type of filtering 65@* 66@* @returns 67@* 68@* @remarks 69@* none 70@* 71@******************************************************************************* 72@*/ 73 74@void ihevc_intra_pred_chroma_ver(uword8 *pu1_ref, 75@ word32 src_strd, 76@ uword8 *pu1_dst, 77@ word32 dst_strd, 78@ word32 nt, 79@ word32 mode) 80@**************variables vs registers***************************************** 81@r0 => *pu1_ref 82@r1 => src_strd 83@r2 => *pu1_dst 84@r3 => dst_strd 85 86@stack contents from #40 87@ nt 88@ mode 89 90.text 91.align 4 92 93 94 95 96.globl ihevc_intra_pred_chroma_ver_a9q 97 98.type ihevc_intra_pred_chroma_ver_a9q, %function 99 100ihevc_intra_pred_chroma_ver_a9q: 101 102 stmfd sp!, {r4-r12, r14} @stack stores the values of the arguments 103 104 ldr r4,[sp,#40] @loads nt 105 lsl r5, r4, #2 @4nt 106 107 108 cmp r4, #8 109 beq blk_8 110 blt blk_4 111 112copy_16: 113 add r5, r5, #2 @2nt+2 114 add r6, r0, r5 @&src[2nt+1] 115 116 add r5, r2, r3 @pu1_dst + dst_strd 117 vld2.8 {d20,d21}, [r6]! @16 loads (col 0:15) 118 add r8, r5, r3 119 120 add r10, r8, r3 121 vld2.8 {d22,d23}, [r6] @16 loads (col 16:31) 122 lsl r11, r3, #2 123 124 add r11, r11, #0xfffffff0 125 126 127 vst2.8 {d20,d21}, [r2]! 128 vst2.8 {d20,d21}, [r5]! 129 vst2.8 {d20,d21}, [r8]! 130 vst2.8 {d20,d21}, [r10]! 131 132 vst2.8 {d22,d23}, [r2], r11 133 vst2.8 {d22,d23}, [r5], r11 134 vst2.8 {d22,d23}, [r8], r11 135 vst2.8 {d22,d23}, [r10], r11 136 137 subs r4, r4, #4 138 139kernel_copy_16: 140 vst2.8 {d20,d21}, [r2]! 141 vst2.8 {d20,d21}, [r5]! 142 vst2.8 {d20,d21}, [r8]! 143 vst2.8 {d20,d21}, [r10]! 144 145 vst2.8 {d22,d23}, [r2], r11 146 vst2.8 {d22,d23}, [r5], r11 147 vst2.8 {d22,d23}, [r8], r11 148 vst2.8 {d22,d23}, [r10], r11 149 150 subs r4, r4, #4 151 152 153 vst2.8 {d20,d21}, [r2]! 154 vst2.8 {d20,d21}, [r5]! 155 vst2.8 {d20,d21}, [r8]! 156 vst2.8 {d20,d21}, [r10]! 157 158 vst2.8 {d22,d23}, [r2], r11 159 vst2.8 {d22,d23}, [r5], r11 160 vst2.8 {d22,d23}, [r8], r11 161 vst2.8 {d22,d23}, [r10], r11 162 163 subs r4, r4, #4 164 165 vst2.8 {d20,d21}, [r2]! 166 vst2.8 {d20,d21}, [r5]! 167 vst2.8 {d20,d21}, [r8]! 168 vst2.8 {d20,d21}, [r10]! 169 170 vst2.8 {d22,d23}, [r2], r11 171 vst2.8 {d22,d23}, [r5], r11 172 vst2.8 {d22,d23}, [r8], r11 173 vst2.8 {d22,d23}, [r10], r11 174 175 subs r4, r4, #4 176 bne kernel_copy_16 177 178 b end_func 179 180blk_8: 181 182 add r5, r5, #2 @2nt+2 183 add r6, r0, r5 @&src[2nt+1] 184 185 add r5, r2, r3 @pu1_dst + dst_strd 186 vld2.8 {d20,d21}, [r6]! @16 loads (col 0:15) 187 add r8, r5, r3 188 189 add r10, r8, r3 190 vld2.8 {d22,d23}, [r6] @16 loads (col 16:31) 191 192 lsl r11,r3,#2 193 194 vst2.8 {d20,d21}, [r2],r11 195 vst2.8 {d20,d21}, [r5],r11 196 vst2.8 {d20,d21}, [r8],r11 197 vst2.8 {d20,d21}, [r10],r11 198 199 vst2.8 {d20,d21}, [r2] 200 vst2.8 {d20,d21}, [r5] 201 vst2.8 {d20,d21}, [r8] 202 vst2.8 {d20,d21}, [r10] 203 204 subs r4, r4, #8 205 beq end_func 206 207blk_4: 208 209 @lsl r5, r4, #2 @4nt 210 add r5, r5, #2 @2nt+2 211 add r6, r0, r5 @&src[2nt+1] 212 213 vld1.8 {d0},[r6] 214 add r5, r2, r3 @pu1_dst + dst_strd 215 216 vst1.8 {d0},[r2] 217 add r8, r5, r3 218 vst1.8 {d0},[r5] 219 add r10, r8, r3 220 vst1.8 {d0},[r8] 221 vst1.8 {d0},[r10] 222 223 224 225end_func: 226 ldmfd sp!,{r4-r12,r15} @reload the registers from sp 227 228 229 230