1@/***************************************************************************** 2@* 3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4@* 5@* Licensed under the Apache License, Version 2.0 (the "License"); 6@* you may not use this file except in compliance with the License. 7@* You may obtain a copy of the License at: 8@* 9@* http://www.apache.org/licenses/LICENSE-2.0 10@* 11@* Unless required by applicable law or agreed to in writing, software 12@* distributed under the License is distributed on an "AS IS" BASIS, 13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@* See the License for the specific language governing permissions and 15@* limitations under the License. 16@* 17@*****************************************************************************/ 18@/** 19@ ******************************************************************************* 20@ * @file 21@ * ihevc_padding_neon.s 22@ * 23@ * @brief 24@ * contains function definitions padding 25@ * 26@ * @author 27@ * naveen sr 28@ * 29@ * @par list of functions: 30@ * - ihevc_pad_left_luma() 31@ * - ihevc_pad_left_chroma() 32@ * 33@ * @remarks 34@ * none 35@ * 36@ ******************************************************************************* 37@*/ 38 39@/** 40@******************************************************************************* 41@* 42@* @brief 43@* padding (luma block) at the left of a 2d array 44@* 45@* @par description: 46@* the left column of a 2d array is replicated for pad_size times at the left 47@* 48@* 49@* @param[in] pu1_src 50@* uword8 pointer to the source 51@* 52@* @param[in] src_strd 53@* integer source stride 54@* 55@* @param[in] ht 56@* integer height of the array 57@* 58@* @param[in] wd 59@* integer width of the array 60@* 61@* @param[in] pad_size 62@* integer -padding size of the array 63@* 64@* @param[in] ht 65@* integer height of the array 66@* 67@* @param[in] wd 68@* integer width of the array 69@* 70@* @returns 71@* 72@* @remarks 73@* none 74@* 75@******************************************************************************* 76@*/ 77@.if pad_left_luma == c 78@void ihevc_pad_left_luma(uword8 *pu1_src, 79@ word32 src_strd, 80@ word32 ht, 81@ word32 pad_size) 82@**************variables vs registers************************* 83@ r0 => *pu1_src 84@ r1 => src_strd 85@ r2 => ht 86@ r3 => pad_size 87 88.text 89.align 4 90 91 92 93 94.globl ihevc_pad_left_luma_a9q 95 96.type ihevc_pad_left_luma_a9q, %function 97 98ihevc_pad_left_luma_a9q: 99 100 stmfd sp!, {r4-r11,lr} @stack stores the values of the arguments 101 102loop_start_luma_left: 103 @ pad size is assumed to be pad_left = 80 104 sub r4,r0,r3 105 106 ldrb r8,[r0] 107 add r0,r1 108 ldrb r9,[r0] 109 add r0,r1 110 ldrb r10,[r0] 111 add r0,r1 112 ldrb r11,[r0] 113 add r0,r1 114 115 vdup.u8 q0,r8 116 vdup.u8 q1,r9 117 vdup.u8 q2,r10 118 vdup.u8 q3,r11 119 120 add r5,r4,r1 121 122 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 123 vst1.8 {d0,d1},[r4]! @ 16 bytes store 124 vst1.8 {d0,d1},[r4]! @ 16 bytes store 125 vst1.8 {d0,d1},[r4]! @ 16 bytes store 126 vst1.8 {d0,d1},[r4] @ 16 bytes store 127 128 add r6,r5,r1 129 130 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 131 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 132 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 133 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 134 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 135 136 add r7,r6,r1 137 138 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 139 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 140 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 141 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 142 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 143 144 subs r2,#4 145 146 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 147 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 148 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 149 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 150 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 151 152 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 153 154 bne loop_start_luma_left 155 156 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 157 158 159 160 161 162@/** 163@******************************************************************************* 164@* 165@* @brief 166@* padding (chroma block) at the left of a 2d array 167@* 168@* @par description: 169@* the left column of a 2d array is replicated for pad_size times at the left 170@* 171@* 172@* @param[in] pu1_src 173@* uword8 pointer to the source 174@* 175@* @param[in] src_strd 176@* integer source stride 177@* 178@* @param[in] ht 179@* integer height of the array 180@* 181@* @param[in] wd 182@* integer width of the array (each colour component) 183@* 184@* @param[in] pad_size 185@* integer -padding size of the array 186@* 187@* @param[in] ht 188@* integer height of the array 189@* 190@* @param[in] wd 191@* integer width of the array 192@* 193@* @returns 194@* 195@* @remarks 196@* none 197@* 198@******************************************************************************* 199@*/ 200@.if pad_left_chroma == c 201@void ihevc_pad_left_chroma(uword8 *pu1_src, 202@ word32 src_strd, 203@ word32 ht, 204@ word32 pad_size) 205@{ 206@ r0 => *pu1_src 207@ r1 => src_strd 208@ r2 => ht 209@ r3 => pad_size 210 211 212 213.globl ihevc_pad_left_chroma_a9q 214 215.type ihevc_pad_left_chroma_a9q, %function 216 217ihevc_pad_left_chroma_a9q: 218 219 stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 220 221loop_start_chroma_left: 222 @ pad size is assumed to be pad_left = 80 223 sub r4,r0,r3 224 225 ldrh r8,[r0] 226 add r0,r1 227 ldrh r9,[r0] 228 add r0,r1 229 ldrh r10,[r0] 230 add r0,r1 231 ldrh r11,[r0] 232 add r0,r1 233 234 vdup.u16 q0,r8 235 vdup.u16 q1,r9 236 vdup.u16 q2,r10 237 vdup.u16 q3,r11 238 239 add r5,r4,r1 240 241 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 242 vst1.8 {d0,d1},[r4]! @ 16 bytes store 243 vst1.8 {d0,d1},[r4]! @ 16 bytes store 244 vst1.8 {d0,d1},[r4]! @ 16 bytes store 245 vst1.8 {d0,d1},[r4] @ 16 bytes store 246 247 add r6,r5,r1 248 249 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 250 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 251 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 252 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 253 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 254 255 add r7,r6,r1 256 257 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 258 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 259 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 260 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 261 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 262 263 subs r2,#4 264 265 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 266 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 267 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 268 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 269 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 270 271 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 272 273 bne loop_start_chroma_left 274 275 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 276 277 278 279 280 281@/** 282@******************************************************************************* 283@* 284@* @brief 285@* padding (luma block) at the right of a 2d array 286@* 287@* @par description: 288@* the right column of a 2d array is replicated for pad_size times at the right 289@* 290@* 291@* @param[in] pu1_src 292@* uword8 pointer to the source 293@* 294@* @param[in] src_strd 295@* integer source stride 296@* 297@* @param[in] ht 298@* integer height of the array 299@* 300@* @param[in] wd 301@* integer width of the array 302@* 303@* @param[in] pad_size 304@* integer -padding size of the array 305@* 306@* @param[in] ht 307@* integer height of the array 308@* 309@* @param[in] wd 310@* integer width of the array 311@* 312@* @returns 313@* 314@* @remarks 315@* none 316@* 317@******************************************************************************* 318@*/ 319@.if pad_right_luma == c 320@void ihevc_pad_right_luma(uword8 *pu1_src, 321@ word32 src_strd, 322@ word32 ht, 323@ word32 pad_size) 324@{ 325@ word32 row@ 326@ 327@ for(row = 0@ row < ht@ row++) 328@ { 329@ memset(pu1_src, *(pu1_src -1), pad_size)@ 330@ 331@ pu1_src += src_strd@ 332@ } 333@} 334@ 335@ r0 => *pu1_src 336@ r1 => src_strd 337@ r2 => ht 338@ r3 => pad_size 339 340 341 342.globl ihevc_pad_right_luma_a9q 343 344.type ihevc_pad_right_luma_a9q, %function 345 346ihevc_pad_right_luma_a9q: 347 348 stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 349 350loop_start_luma_right: 351 @ pad size is assumed to be pad_left = 80 352 mov r4,r0 353 354 ldrb r8,[r0, #-1] 355 add r0,r1 356 ldrb r9,[r0, #-1] 357 add r0,r1 358 ldrb r10,[r0, #-1] 359 add r0,r1 360 ldrb r11,[r0, #-1] 361 add r0,r1 362 363 add r5,r4,r1 364 add r6,r5,r1 365 add r7,r6,r1 366 367 vdup.u8 q0,r8 368 vdup.u8 q1,r9 369 vdup.u8 q2,r10 370 vdup.u8 q3,r11 371 372 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 373 vst1.8 {d0,d1},[r4]! @ 16 bytes store 374 vst1.8 {d0,d1},[r4]! @ 16 bytes store 375 vst1.8 {d0,d1},[r4]! @ 16 bytes store 376 vst1.8 {d0,d1},[r4] @ 16 bytes store 377 378 379 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 380 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 381 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 382 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 383 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 384 385 subs r2,#4 386 387 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 388 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 389 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 390 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 391 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 392 393 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 394 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 395 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 396 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 397 vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store 398 399 400 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 401 402 403 bne loop_start_luma_right 404 405 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 406 407 408 409 410 411@/** 412@******************************************************************************* 413@* 414@* @brief 415@@* padding (chroma block) at the right of a 2d array 416@* 417@* @par description: 418@* the right column of a 2d array is replicated for pad_size times at the right 419@* 420@* 421@* @param[in] pu1_src 422@@* uword8 pointer to the source 423@* 424@* @param[in] src_strd 425@* integer source stride 426@* 427@* @param[in] ht 428@@* integer height of the array 429@* 430@* @param[in] wd 431@* integer width of the array (each colour component) 432@* 433@* @param[in] pad_size 434@* integer -padding size of the array 435@* 436@* @param[in] ht 437@@* integer height of the array 438@* 439@* @param[in] wd 440@* integer width of the array 441@* 442@* @returns 443@* 444@* @remarks 445@* none 446@* 447@******************************************************************************* 448@*/ 449@.if pad_right_chroma == c 450@void ihevc_pad_right_chroma(uword8 *pu1_src, 451@ word32 src_strd, 452@ word32 ht, 453@ word32 pad_size) 454@ r0 => *pu1_src 455@ r1 => src_strd 456@ r2 => ht 457@ r3 => pad_size 458 459 460 461.globl ihevc_pad_right_chroma_a9q 462 463.type ihevc_pad_right_chroma_a9q, %function 464 465ihevc_pad_right_chroma_a9q: 466 467 stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 468 469loop_start_chroma_right: 470 @ pad size is assumed to be pad_left = 80 471 mov r4,r0 472 473 ldrh r8,[r0, #-2] 474 add r0,r1 475 ldrh r9,[r0, #-2] 476 add r0,r1 477 ldrh r10,[r0, #-2] 478 add r0,r1 479 ldrh r11,[r0, #-2] 480 add r0,r1 481 482 vdup.u16 q0,r8 483 vdup.u16 q1,r9 484 vdup.u16 q2,r10 485 vdup.u16 q3,r11 486 487 add r5,r4,r1 488 489 vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 490 vst1.8 {d0,d1},[r4]! @ 16 bytes store 491 vst1.8 {d0,d1},[r4]! @ 16 bytes store 492 vst1.8 {d0,d1},[r4]! @ 16 bytes store 493 vst1.8 {d0,d1},[r4] @ 16 bytes store 494 495 add r6,r5,r1 496 497 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 498 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 499 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 500 vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 501 vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 502 503 add r7,r6,r1 504 505 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 506 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 507 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 508 vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 509 vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 510 511 subs r2,#4 512 513 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 514 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 515 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 516 vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 517 vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store 518 519 @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 520 521 bne loop_start_chroma_right 522 523 ldmfd sp!,{r4-r11,pc} @reload the registers from sp 524 525 526 527 528 529 530 531 532