1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21@/** 22@****************************************************************************** 23@* 24@* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC ) 25@* and do the prediction. 26@* 27@* @par Description 28@* This function evaluates first three 16x16 modes and compute corresponding sad 29@* and return the buffer predicted with best mode. 30@* 31@* @param[in] pu1_src 32@* UWORD8 pointer to the source 33@* 34@** @param[in] pu1_ngbr_pels_i16 35@* UWORD8 pointer to neighbouring pels 36@* 37@* @param[out] pu1_dst 38@* UWORD8 pointer to the destination 39@* 40@* @param[in] src_strd 41@* integer source stride 42@* 43@* @param[in] dst_strd 44@* integer destination stride 45@* 46@* @param[in] u4_n_avblty 47@* availability of neighbouring pixels 48@* 49@* @param[in] u4_intra_mode 50@* Pointer to the variable in which best mode is returned 51@* 52@* @param[in] pu4_sadmin 53@* Pointer to the variable in which minimum sad is returned 54@* 55@* @param[in] u4_valid_intra_modes 56@* Says what all modes are valid 57@* 58@* 59@* @return none 60@* 61@****************************************************************************** 62@*/ 63@ 64@void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src, 65@ UWORD8 *pu1_ngbr_pels_i16, 66@ UWORD8 *pu1_dst, 67@ UWORD32 src_strd, 68@ UWORD32 dst_strd, 69@ WORD32 u4_n_avblty, 70@ UWORD32 *u4_intra_mode, 71@ WORD32 *pu4_sadmin, 72@ UWORD32 u4_valid_intra_modes) 73@ 74.text 75.p2align 2 76 77 .global ih264e_evaluate_intra16x16_modes_a9q 78 79ih264e_evaluate_intra16x16_modes_a9q: 80 81@r0 = pu1_src, 82@r1 = pu1_ngbr_pels_i16, 83@r2 = pu1_dst, 84@r3 = src_strd, 85@r4 = dst_strd, 86@r5 = u4_n_avblty, 87@r6 = u4_intra_mode, 88@r7 = pu4_sadmin 89 90 91 92 stmfd sp!, {r4-r12, r14} @store register values to stack 93 ldr r5, [sp, #44] 94 95 96 vpush {d8-d15} 97 vld1.32 {q4}, [r1]! 98 sub r6, r1, #1 99 add r1, r1, #1 100 mov r10, #0 101 vld1.32 {q5}, [r1]! 102 mov r11, #0 103 mov r4, #0 104 @/* Left available ???? */ 105 ands r7, r5, #01 106 movne r10, #1 107 108 @/* Top available ???? */ 109 ands r8, r5, #04 110 lsl r9, r10, #3 111 movne r11, #1 112 lsl r12, r11, #3 113 adds r8, r9, r12 114 115 116 @/* None available :( */ 117 moveq r4, #128 118 119 120 121@/fINDING dc val*/ 122 @---------------------- 123 vaddl.u8 q15, d8, d9 124 125 vaddl.u8 q14, d10, d11 126 127 vadd.u16 q15, q14, q15 128 @ VLD1.32 {q2},[r0],r3;row 2 129 vadd.u16 d30, d31, d30 130 vpadd.u16 d30, d30 131 @ VLD1.32 {q3},[r0],r3 ;row 3 132 vpadd.u16 d30, d30 133 @--------------------- 134 135 136 vmov.u16 r7, d30[0] 137 add r7, r7, r8 138 add r11, r11, #3 139 add r8, r10, r11 140 141 lsr r7, r8 142 add r7, r4, r7 143 vld1.32 {q0}, [r0], r3 @ source r0w 0 144 vdup.8 q15, r7 @dc val 145 146@/* computing SADs for all three modes*/ 147 ldrb r7, [r6] 148 vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=0; 149 @/vertical row 0; 150 vabdl.u8 q8, d0, d10 151 vabdl.u8 q9, d1, d11 152 sub r6, r6, #1 153 @/HORZ row 0; 154 vabdl.u8 q13, d0, d20 155 vabdl.u8 q14, d1, d21 156 mov r1, #15 157 @/dc row 0; 158 vabdl.u8 q11, d0, d30 159 vabdl.u8 q12, d1, d31 160 161 162loop: 163 vld1.32 {q1}, [r0], r3 @row i 164 @/dc row i; 165 vabal.u8 q11, d2, d30 166 ldrb r7, [r6] 167 vabal.u8 q12, d3, d31 168 169 @/vertical row i; 170 vabal.u8 q8, d2, d10 171 vdup.8 q10, r7 @/HORIZONTAL VALUE ROW=i; 172 sub r6, r6, #1 173 vabal.u8 q9, d3, d11 174 175 subs r1, r1, #1 176 @/HORZ row i; 177 vabal.u8 q13, d2, d20 178 vabal.u8 q14, d3, d21 179 bne loop 180 181 @------------------------------------------------------------------------------ 182 183 vadd.i16 q9, q9, q8 @/VERT 184 vadd.i16 d18, d19, d18 @/VERT 185 vpaddl.u16 d18, d18 @/VERT 186 vadd.i16 q14, q13, q14 @/HORZ 187 vadd.i16 d28, d29, d28 @/HORZ 188 vpaddl.u32 d18, d18 @/VERT 189 vpaddl.u16 d28, d28 @/HORZ 190 191 vpaddl.u32 d28, d28 @/HORZ 192 vmov.u32 r8, d18[0] @ vert 193 vadd.i16 q12, q11, q12 @/DC 194 vmov.u32 r9, d28[0] @horz 195 mov r11, #1 196 vadd.i16 d24, d24, d25 @/DC 197 lsl r11 , #30 198 199 @----------------------- 200 ldr r0, [sp, #120] @ u4_valid_intra_modes 201 @-------------------------------------------- 202 ands r7, r0, #01 @ vert mode valid???????????? 203 moveq r8, r11 204 vpaddl.u16 d24, d24 @/DC 205 206 ands r6, r0, #02 @ horz mode valid???????????? 207 moveq r9, r11 208 vpaddl.u32 d24, d24 @/DC 209 210 vmov.u32 r10, d24[0] @dc 211@-------------------------------- 212 ldr r4, [sp, #104] @r4 = dst_strd, 213 ldr r7, [sp, #116] @r7 = pu4_sadmin 214@---------------------------------------------- 215 ands r6, r0, #04 @ dc mode valid???????????? 216 moveq r10, r11 217 218 @--------------------------- 219 ldr r6, [sp, #112] @ R6 =MODE 220 @-------------------------- 221 222 cmp r8, r9 223 bgt not_vert 224 cmp r8, r10 225 bgt do_dc 226 227 @/---------------------- 228 @DO VERTICAL PREDICTION 229 str r8 , [r7] @MIN SAD 230 mov r8, #0 231 str r8 , [r6] @ MODE 232 vmov q15, q5 233 234 b do_dc_vert 235 @----------------------------- 236not_vert: 237 cmp r9, r10 238 bgt do_dc 239 240 @/---------------------- 241 @DO HORIZONTAL 242 vdup.8 q5, d9[7] @0 243 str r9 , [r7] @MIN SAD 244 vdup.8 q6, d9[6] @1 245 mov r9, #1 246 vdup.8 q7, d9[5] @2 247 vst1.32 {d10, d11} , [r2], r4 @0 248 vdup.8 q8, d9[4] @3 249 str r9 , [r6] @ MODE 250 vdup.8 q9, d9[3] @4 251 vst1.32 {d12, d13} , [r2], r4 @1 252 vdup.8 q10, d9[2] @5 253 vst1.32 {d14, d15} , [r2], r4 @2 254 vdup.8 q11, d9[1] @6 255 vst1.32 {d16, d17} , [r2], r4 @3 256 vdup.8 q12, d9[0] @7 257 vst1.32 {d18, d19} , [r2], r4 @4 258 vdup.8 q13, d8[7] @8 259 vst1.32 {d20, d21} , [r2], r4 @5 260 vdup.8 q14, d8[6] @9 261 vst1.32 {d22, d23} , [r2], r4 @6 262 vdup.8 q15, d8[5] @10 263 vst1.32 {d24, d25} , [r2], r4 @7 264 vdup.8 q1, d8[4] @11 265 vst1.32 {d26, d27} , [r2], r4 @8 266 vdup.8 q2, d8[3] @12 267 vst1.32 {d28, d29} , [r2], r4 @9 268 vdup.8 q3, d8[2] @13 269 vst1.32 {d30, d31}, [r2], r4 @10 270 vdup.8 q5, d8[1] @14 271 vst1.32 {d2, d3} , [r2], r4 @11 272 vdup.8 q6, d8[0] @15 273 vst1.32 {d4, d5} , [r2], r4 @12 274 275 vst1.32 {d6, d7} , [r2], r4 @13 276 277 vst1.32 {d10, d11} , [r2], r4 @14 278 279 vst1.32 {d12, d13} , [r2], r4 @15 280 b end_func 281 282 283 @/----------------------------- 284 285do_dc: @/--------------------------------- 286 @DO DC 287 str r10 , [r7] @MIN SAD 288 mov r10, #2 289 str r10 , [r6] @ MODE 290do_dc_vert: 291 vst1.32 {d30, d31}, [r2], r4 @0 292 vst1.32 {d30, d31}, [r2], r4 @1 293 vst1.32 {d30, d31}, [r2], r4 @2 294 vst1.32 {d30, d31}, [r2], r4 @3 295 vst1.32 {d30, d31}, [r2], r4 @4 296 vst1.32 {d30, d31}, [r2], r4 @5 297 vst1.32 {d30, d31}, [r2], r4 @6 298 vst1.32 {d30, d31}, [r2], r4 @7 299 vst1.32 {d30, d31}, [r2], r4 @8 300 vst1.32 {d30, d31}, [r2], r4 @9 301 vst1.32 {d30, d31}, [r2], r4 @10 302 vst1.32 {d30, d31}, [r2], r4 @11 303 vst1.32 {d30, d31}, [r2], r4 @12 304 vst1.32 {d30, d31}, [r2], r4 @13 305 vst1.32 {d30, d31}, [r2], r4 @14 306 vst1.32 {d30, d31}, [r2], r4 @15 307 @/------------------ 308end_func: 309 vpop {d8-d15} 310 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 311 312 313