1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20///** 21//****************************************************************************** 22//* @file 23//* ih264_intra_pred_luma_8x8_av8.s 24//* 25//* @brief 26//* Contains function definitions for intra 8x8 Luma prediction . 27//* 28//* @author 29//* Ittiam 30//* 31//* @par List of Functions: 32//* 33//* -ih264_intra_pred_luma_8x8_mode_vert_av8 34//* -ih264_intra_pred_luma_8x8_mode_horz_av8 35//* -ih264_intra_pred_luma_8x8_mode_dc_av8 36//* -ih264_intra_pred_luma_8x8_mode_diag_dl_av8 37//* -ih264_intra_pred_luma_8x8_mode_diag_dr_av8 38//* -ih264_intra_pred_luma_8x8_mode_vert_r_av8 39//* -ih264_intra_pred_luma_8x8_mode_horz_d_av8 40//* -ih264_intra_pred_luma_8x8_mode_vert_l_av8 41//* -ih264_intra_pred_luma_8x8_mode_horz_u_av8 42//* 43//* @remarks 44//* None 45//* 46//******************************************************************************* 47//*/ 48 49///* All the functions here are replicated from ih264_intra_pred_filters.c 50// 51 52///** 53///** 54///** 55 56.text 57.p2align 2 58.include "ih264_neon_macros.s" 59 60.extern ih264_gai1_intrapred_luma_8x8_horz_u 61 62 63 64///** 65//******************************************************************************* 66//* 67//*ih264_intra_pred_luma_8x8_mode_vert 68//* 69//* @brief 70//* Perform Intra prediction for luma_8x8 mode:vertical 71//* 72//* @par Description: 73//* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 74//* 75//* @param[in] pu1_src 76//* UWORD8 pointer to the source 77//* 78//* @param[out] pu1_dst 79//* UWORD8 pointer to the destination 80//* 81//* @param[in] src_strd 82//* integer source stride 83//* 84//* @param[in] dst_strd 85//* integer destination stride 86//* 87//* @param[in] ui_neighboravailability 88//* availability of neighbouring pixels(Not used in this function) 89//* 90//* @returns 91//* 92//* @remarks 93//* None 94//* 95//******************************************************************************* 96//void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src, 97// UWORD8 *pu1_dst, 98// WORD32 src_strd, 99// WORD32 dst_strd, 100// WORD32 ui_neighboravailability) 101 102//**************Variables Vs Registers***************************************** 103// x0 => *pu1_src 104// x1 => *pu1_dst 105// x2 => src_strd 106// x3 => dst_strd 107// x4 => ui_neighboravailability 108 109 110 .global ih264_intra_pred_luma_8x8_mode_vert_av8 111 112ih264_intra_pred_luma_8x8_mode_vert_av8: 113 114 // STMFD sp!, {x4-x12, x14} //store register values to stack 115 push_v_regs 116 //stp x19, x20,[sp,#-16]! 117 118 add x0, x0, #9 119 ld1 {v0.8b}, [x0] 120 121 st1 {v0.8b}, [x1], x3 122 st1 {v0.8b}, [x1], x3 123 st1 {v0.8b}, [x1], x3 124 st1 {v0.8b}, [x1], x3 125 st1 {v0.8b}, [x1], x3 126 st1 {v0.8b}, [x1], x3 127 st1 {v0.8b}, [x1], x3 128 st1 {v0.8b}, [x1], x3 129 130 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 131 //ldp x19, x20,[sp],#16 132 pop_v_regs 133 ret 134 135 136 137 138 139///****************************************************************************** 140 141 142///** 143//******************************************************************************* 144//* 145//*ih264_intra_pred_luma_8x8_mode_horz 146//* 147//* @brief 148//* Perform Intra prediction for luma_8x8 mode:horizontal 149//* 150//* @par Description: 151//* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 152//* 153//* @param[in] pu1_src 154//* UWORD8 pointer to the source 155//* 156//* @param[out] pu1_dst 157//* UWORD8 pointer to the destination 158//* 159//* @param[in] src_strd 160//* integer source stride 161//* 162//* @param[in] dst_strd 163//* integer destination stride 164//* 165//* @param[in] ui_neighboravailability 166//* availability of neighbouring pixels(Not used in this function) 167//* 168//* @returns 169//* 170//* @remarks 171//* None 172//* 173//******************************************************************************* 174//*/ 175//void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src, 176// UWORD8 *pu1_dst, 177// WORD32 src_strd, 178// WORD32 dst_strd, 179// WORD32 ui_neighboravailability) 180//**************Variables Vs Registers***************************************** 181// x0 => *pu1_src 182// x1 => *pu1_dst 183// x2 => src_strd 184// x3 => dst_strd 185// x4 => ui_neighboravailability 186 187 188 .global ih264_intra_pred_luma_8x8_mode_horz_av8 189 190ih264_intra_pred_luma_8x8_mode_horz_av8: 191 192 193 194 // STMFD sp!, {x4-x12, x14} //store register values to stack 195 push_v_regs 196 stp x19, x20, [sp, #-16]! 197 add x0, x0, #7 198 mov x2 , #-1 199 200 ldrb w5, [x0], #-1 201 sxtw x5, w5 202 ldrb w6, [x0], #-1 203 sxtw x6, w6 204 dup v0.8b, w5 205 st1 {v0.8b}, [x1], x3 206 ldrb w7, [x0], #-1 207 sxtw x7, w7 208 dup v1.8b, w6 209 st1 {v1.8b}, [x1], x3 210 dup v2.8b, w7 211 ldrb w8, [x0], #-1 212 sxtw x8, w8 213 dup v3.8b, w8 214 st1 {v2.8b}, [x1], x3 215 ldrb w5, [x0], #-1 216 sxtw x5, w5 217 st1 {v3.8b}, [x1], x3 218 dup v0.8b, w5 219 ldrb w6, [x0], #-1 220 sxtw x6, w6 221 st1 {v0.8b}, [x1], x3 222 ldrb w7, [x0], #-1 223 sxtw x7, w7 224 dup v1.8b, w6 225 dup v2.8b, w7 226 st1 {v1.8b}, [x1], x3 227 ldrb w8, [x0], #-1 228 sxtw x8, w8 229 dup v3.8b, w8 230 st1 {v2.8b}, [x1], x3 231 st1 {v3.8b}, [x1], x3 232 233 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 234 ldp x19, x20, [sp], #16 235 pop_v_regs 236 ret 237 238 239 240 241 242 243 244///****************************************************************************** 245 246 247///** 248//******************************************************************************* 249//* 250//*ih264_intra_pred_luma_8x8_mode_dc 251//* 252//* @brief 253//* Perform Intra prediction for luma_8x8 mode:DC 254//* 255//* @par Description: 256//* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3 257//* 258//* @param[in] pu1_src 259//* UWORD8 pointer to the source 260//* 261//* @param[out] pu1_dst 262//* UWORD8 pointer to the destination 263//* 264//* @param[in] src_strd 265//* integer source stride 266//* 267//* @param[in] dst_strd 268//* integer destination stride 269//* 270//* @param[in] ui_neighboravailability 271//* availability of neighbouring pixels 272//* 273//* @returns 274//* 275//* @remarks 276//* None 277//* 278//*******************************************************************************/ 279//void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src, 280// UWORD8 *pu1_dst, 281// WORD32 src_strd, 282// WORD32 dst_strd, 283// WORD32 ui_neighboravailability) 284 285//**************Variables Vs Registers***************************************** 286// x0 => *pu1_src 287// x1 => *pu1_dst 288// x2 => src_strd 289// x3 => dst_strd 290// x4 => ui_neighboravailability 291 292 293 .global ih264_intra_pred_luma_8x8_mode_dc_av8 294 295ih264_intra_pred_luma_8x8_mode_dc_av8: 296 297 298 299 // STMFD sp!, {x4-x12, x14} //store register values to stack 300 push_v_regs 301 stp x19, x20, [sp, #-16]! 302 303 ands x6, x4, #0x01 304 beq top_available //LEFT NOT AVAILABLE 305 306 add x10, x0, #7 307 mov x2, #-1 308 ldrb w5, [x10], -1 309 sxtw x5, w5 310 ldrb w6, [x10], -1 311 sxtw x6, w6 312 ldrb w7, [x10], -1 313 sxtw x7, w7 314 add x5, x5, x6 315 ldrb w8, [x10], -1 316 sxtw x8, w8 317 add x5, x5, x7 318 ldrb w6, [x10], -1 319 sxtw x6, w6 320 add x5, x5, x8 321 ldrb w7, [x10], -1 322 sxtw x7, w7 323 add x5, x5, x6 324 ldrb w8, [x10], -1 325 sxtw x8, w8 326 add x5, x5, x7 327 ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 328 add x5, x5, x8 329 ldrb w6, [x10], -1 330 sxtw x6, w6 331 add x5, x5, x6 332 beq left_available 333 add x10, x0, #9 334 // BOTH LEFT AND TOP AVAILABLE 335 ld1 {v0.8b}, [x10] 336 uaddlp v1.4h, v0.8b 337 uaddlp v3.2s, v1.4h 338 uaddlp v2.1d, v3.2s 339 dup v10.8h, w5 340 dup v8.8h, v2.h[0] 341 add v12.8h, v8.8h , v10.8h 342 sqrshrun v31.8b, v12.8h, #4 343 st1 {v31.8b}, [x1], x3 344 st1 {v31.8b}, [x1], x3 345 st1 {v31.8b}, [x1], x3 346 st1 {v31.8b}, [x1], x3 347 st1 {v31.8b}, [x1], x3 348 st1 {v31.8b}, [x1], x3 349 st1 {v31.8b}, [x1], x3 350 st1 {v31.8b}, [x1], x3 351 b end_func 352 353top_available: // ONLT TOP AVAILABLE 354 ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 355 beq none_available 356 357 add x10, x0, #9 358 ld1 {v10.8b}, [x10] 359 uaddlp v14.4h, v10.8b 360 uaddlp v13.2s, v14.4h 361 uaddlp v12.1d, v13.2s 362 rshrn v4.8b, v12.8h, #3 363 dup v31.8b, v4.b[0] 364 st1 {v31.8b}, [x1], x3 365 st1 {v31.8b}, [x1], x3 366 st1 {v31.8b}, [x1], x3 367 st1 {v31.8b}, [x1], x3 368 st1 {v31.8b}, [x1], x3 369 st1 {v31.8b}, [x1], x3 370 st1 {v31.8b}, [x1], x3 371 st1 {v31.8b}, [x1], x3 372 b end_func 373 374 375left_available: //ONLY LEFT AVAILABLE 376 add x5, x5, #4 377 lsr x5, x5, #3 378 dup v0.8b, w5 379 st1 {v0.8b}, [x1], x3 380 st1 {v0.8b}, [x1], x3 381 st1 {v0.8b}, [x1], x3 382 st1 {v0.8b}, [x1], x3 383 st1 {v0.8b}, [x1], x3 384 st1 {v0.8b}, [x1], x3 385 st1 {v0.8b}, [x1], x3 386 st1 {v0.8b}, [x1], x3 387 b end_func 388 389none_available: //NONE AVAILABLE 390 mov x9, #128 391 dup v0.8b, w9 392 st1 {v0.8b}, [x1], x3 393 st1 {v0.8b}, [x1], x3 394 st1 {v0.8b}, [x1], x3 395 st1 {v0.8b}, [x1], x3 396 st1 {v0.8b}, [x1], x3 397 st1 {v0.8b}, [x1], x3 398 st1 {v0.8b}, [x1], x3 399 st1 {v0.8b}, [x1], x3 400 401 402end_func: 403 404 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 405 ldp x19, x20, [sp], #16 406 pop_v_regs 407 ret 408 409 410 411 412 413 414///** 415//******************************************************************************* 416//* 417//*ih264_intra_pred_luma_8x8_mode_diag_dl 418//* 419//* @brief 420//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left 421//* 422//* @par Description: 423//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4 424//* 425//* @param[in] pu1_src 426//* UWORD8 pointer to the source 427//* 428//* @param[out] pu1_dst 429//* UWORD8 pointer to the destination 430//* 431//* @param[in] src_strd 432//* integer source stride 433//* 434//* @param[in] dst_strd 435//* integer destination stride 436//* 437//* @param[in] ui_neighboravailability 438//* availability of neighbouring pixels 439//* 440//* @returns 441//* 442//* @remarks 443//* None 444//* 445//*******************************************************************************/ 446//void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src, 447// UWORD8 *pu1_dst, 448// WORD32 src_strd, 449// WORD32 dst_strd, 450// WORD32 ui_neighboravailability) 451 452//**************Variables Vs Registers***************************************** 453// x0 => *pu1_src 454// x1 => *pu1_dst 455// x2 => src_strd 456// x3 => dst_strd 457// x4 => ui_neighboravailability 458 459 .global ih264_intra_pred_luma_8x8_mode_diag_dl_av8 460 461ih264_intra_pred_luma_8x8_mode_diag_dl_av8: 462 463 // STMFD sp!, {x4-x12, x14} //store register values to stack 464 push_v_regs 465 stp x19, x20, [sp, #-16]! 466 467 add x0, x0, #9 468 sub x5, x3, #4 469 add x6, x0, #15 470 ld1 { v0.16b}, [x0] 471 mov v1.d[0], v0.d[1] 472 ext v4.16b, v0.16b , v0.16b , #2 473 mov v5.d[0], v4.d[1] 474 ext v2.16b, v0.16b , v0.16b , #1 475 mov v3.d[0], v2.d[1] 476 ld1 {v5.b}[6], [x6] 477 // q1 = q0 shifted to left once 478 // q2 = q1 shifted to left once 479 uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121 480 uaddl v22.8h, v1.8b, v3.8b 481 uaddl v24.8h, v2.8b, v4.8b 482 uaddl v26.8h, v3.8b, v5.8b 483 add v24.8h, v20.8h , v24.8h 484 add v26.8h, v22.8h , v26.8h 485 486 sqrshrun v4.8b, v24.8h, #2 487 sqrshrun v5.8b, v26.8h, #2 488 mov v4.d[1], v5.d[0] 489 //Q2 has all FILT121 values 490 st1 {v4.8b}, [x1], x3 491 ext v18.16b, v4.16b , v4.16b , #1 492 ext v16.16b, v18.16b , v18.16b , #1 493 st1 {v18.8b}, [x1], x3 494 ext v14.16b, v16.16b , v16.16b , #1 495 st1 {v16.8b}, [x1], x3 496 st1 {v14.8b}, [x1], x3 497 st1 {v4.s}[1], [x1], #4 498 st1 {v5.s}[0], [x1], x5 499 st1 {v18.s}[1], [x1], #4 500 st1 {v18.s}[2], [x1], x5 501 st1 {v16.s}[1], [x1], #4 502 st1 {v16.s}[2], [x1], x5 503 st1 {v14.s}[1], [x1], #4 504 st1 {v14.s}[2], [x1], x5 505 506 507end_func_diag_dl: 508 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 509 ldp x19, x20, [sp], #16 510 pop_v_regs 511 ret 512 513 514 515 516///** 517//******************************************************************************* 518//* 519//*ih264_intra_pred_luma_8x8_mode_diag_dr 520//* 521//* @brief 522//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right 523//* 524//* @par Description: 525//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5 526//* 527//* @param[in] pu1_src 528//* UWORD8 pointer to the source 529//* 530//* @param[out] pu1_dst 531//* UWORD8 pointer to the destination 532//* 533//* @param[in] src_strd 534//* integer source stride 535//* 536//* @param[in] dst_strd 537//* integer destination stride 538//* 539//* @param[in] ui_neighboravailability 540//* availability of neighbouring pixels 541//* 542//* @returns 543//* 544//* @remarks 545//* None 546//* 547//*******************************************************************************/ 548//void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src, 549// UWORD8 *pu1_dst, 550// WORD32 src_strd, 551// WORD32 dst_strd, 552// WORD32 ui_neighboravailability) 553 554//**************Variables Vs Registers***************************************** 555// x0 => *pu1_src 556// x1 => *pu1_dst 557// x2 => src_strd 558// x3 => dst_strd 559// x4 => ui_neighboravailability 560 561 562 .global ih264_intra_pred_luma_8x8_mode_diag_dr_av8 563 564ih264_intra_pred_luma_8x8_mode_diag_dr_av8: 565 566 // STMFD sp!, {x4-x12, x14} //store register values to stack 567 push_v_regs 568 stp x19, x20, [sp, #-16]! 569 570 571 ld1 { v0.16b}, [x0] 572 mov v1.d[0], v0.d[1] 573 add x0, x0, #1 574 ld1 { v2.16b}, [x0] 575 mov v3.d[0], v2.d[1] 576 ext v4.16b, v2.16b , v2.16b , #1 577 mov v5.d[0], v4.d[1] 578 // q1 = q0 shifted to left once 579 // q2 = q1 shifted to left once 580 uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121 581 uaddl v22.8h, v1.8b, v3.8b 582 uaddl v24.8h, v2.8b, v4.8b 583 uaddl v26.8h, v3.8b, v5.8b 584 add v24.8h, v20.8h , v24.8h 585 add v26.8h, v22.8h , v26.8h 586 sqrshrun v4.8b, v24.8h, #2 587 sqrshrun v5.8b, v26.8h, #2 588 mov v4.d[1], v5.d[0] 589 //Q2 has all FILT121 values 590 sub x5, x3, #4 591 ext v18.16b, v4.16b , v4.16b , #15 592 st1 {v18.d}[1], [x1], x3 593 ext v16.16b, v18.16b , v18.16b , #15 594 st1 {v16.d}[1], [x1], x3 595 ext v14.16b, v16.16b , v16.16b , #15 596 st1 {v14.d}[1], [x1], x3 597 st1 {v4.s}[1], [x1], #4 598 st1 {v5.s}[0], [x1], x5 599 st1 {v18.s}[1], [x1], #4 600 st1 {v18.s}[2], [x1], x5 601 st1 {v16.s}[1], [x1], #4 602 st1 {v16.s}[2], [x1], x5 603 st1 {v14.s}[1], [x1], #4 604 st1 {v14.s}[2], [x1], x5 605 st1 {v4.8b}, [x1], x3 606 607end_func_diag_dr: 608 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 609 ldp x19, x20, [sp], #16 610 pop_v_regs 611 ret 612 613 614 615 616///** 617//******************************************************************************* 618//* 619//*ih264_intra_pred_luma_8x8_mode_vert_r 620//* 621//* @brief 622//* Perform Intra prediction for luma_8x8 mode:Vertical_Right 623//* 624//* @par Description: 625//* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6 626//* 627//* @param[in] pu1_src 628//* UWORD8 pointer to the source 629//* 630//* @param[out] pu1_dst 631//* UWORD8 pointer to the destination 632//* 633//* @param[in] src_strd 634//* integer source stride 635//* 636//* @param[in] dst_strd 637//* integer destination stride 638//* 639//* @param[in] ui_neighboravailability 640//* availability of neighbouring pixels 641//* 642//* @returns 643//* 644//* @remarks 645//* None 646//* 647//*******************************************************************************/ 648//void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src, 649// UWORD8 *pu1_dst, 650// WORD32 src_strd, 651// WORD32 dst_strd, 652// WORD32 ui_neighboravailability) 653 654//**************Variables Vs Registers***************************************** 655// x0 => *pu1_src 656// x1 => *pu1_dst 657// x2 => src_strd 658// x3 => dst_strd 659// x4 => ui_neighboravailability 660 661 662 .global ih264_intra_pred_luma_8x8_mode_vert_r_av8 663 664ih264_intra_pred_luma_8x8_mode_vert_r_av8: 665 666 // STMFD sp!, {x4-x12, x14} //store register values to stack 667 push_v_regs 668 stp x19, x20, [sp, #-16]! 669 670 ld1 { v0.16b}, [x0] 671 mov v1.d[0], v0.d[1] 672 add x0, x0, #1 673 ld1 { v2.16b}, [x0] 674 mov v3.d[0], v2.d[1] 675 ext v4.16b, v2.16b , v2.16b , #1 676 mov v5.d[0], v4.d[1] 677 // q1 = q0 shifted to left once 678 // q2 = q1 shifted to left once 679 uaddl v20.8h, v0.8b, v2.8b 680 uaddl v22.8h, v1.8b, v3.8b 681 uaddl v24.8h, v2.8b, v4.8b 682 uaddl v26.8h, v3.8b, v5.8b 683 add v24.8h, v20.8h , v24.8h 684 add v26.8h, v22.8h , v26.8h 685 686 sqrshrun v4.8b, v20.8h, #1 687 sqrshrun v5.8b, v22.8h, #1 688 mov v4.d[1], v5.d[0] 689 sqrshrun v6.8b, v24.8h, #2 690 sqrshrun v7.8b, v26.8h, #2 691 mov v6.d[1], v7.d[0] 692 //Q2 has all FILT11 values 693 //Q3 has all FILT121 values 694 sub x5, x3, #6 695 sub x6, x3, #4 696 st1 {v5.8b}, [x1], x3 // row 0 697 ext v18.16b, v6.16b , v6.16b , #15 698 mov v22.16b , v18.16b 699 ext v16.16b, v4.16b , v4.16b , #1 700 st1 {v18.d}[1], [x1], x3 //row 1 701 mov v14.16b , v16.16b 702 ext v20.16b, v4.16b , v4.16b , #15 703 uzp1 v17.16b, v16.16b, v18.16b 704 uzp2 v18.16b, v16.16b, v18.16b 705 mov v16.16b , v17.16b 706 //row 2 707 ext v12.16b, v16.16b , v16.16b , #1 708 st1 {v20.d}[1], [x1] 709 st1 {v6.b}[6], [x1], x3 710 //row 3 711 712 st1 {v12.h}[5], [x1], #2 713 st1 {v6.s}[2], [x1], #4 714 st1 {v6.h}[6], [x1], x5 715 //row 4 716 st1 {v18.h}[5], [x1], #2 717 st1 {v4.s}[2], [x1], #4 718 st1 {v4.h}[6], [x1], x5 719 //row 5 720 ext v26.16b, v18.16b , v18.16b , #1 721 st1 {v16.h}[5], [x1], #2 722 st1 {v22.s}[2], [x1], #4 723 st1 {v22.h}[6], [x1], x5 724 //row 6 725 st1 {v26.h}[4], [x1], #2 726 st1 {v26.b}[10], [x1], #1 727 st1 {v4.b}[8], [x1], #1 728 st1 {v14.s}[2], [x1], x6 729 //row 7 730 st1 {v12.s}[2], [x1], #4 731 st1 {v6.s}[2], [x1], #4 732 733end_func_vert_r: 734 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 735 ldp x19, x20, [sp], #16 736 pop_v_regs 737 ret 738 739 740 741 742///** 743//******************************************************************************* 744//* 745//*ih264_intra_pred_luma_8x8_mode_horz_d 746//* 747//* @brief 748//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down 749//* 750//* @par Description: 751//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7 752//* 753//* @param[in] pu1_src 754//* UWORD8 pointer to the source 755//* 756//* @param[out] pu1_dst 757//* UWORD8 pointer to the destination 758//* 759//* @param[in] src_strd 760//* integer source stride 761//* 762//* @param[in] dst_strd 763//* integer destination stride 764//* 765//* @param[in] ui_neighboravailability 766//* availability of neighbouring pixels 767//* 768//* @returns 769//* 770//* @remarks 771//* None 772//* 773//*******************************************************************************/ 774//void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src, 775// UWORD8 *pu1_dst, 776// WORD32 src_strd, 777// WORD32 dst_strd, 778// WORD32 ui_neighboravailability) 779 780//**************Variables Vs Registers***************************************** 781// x0 => *pu1_src 782// x1 => *pu1_dst 783// x2 => src_strd 784// x3 => dst_strd 785// x4 => ui_neighboravailability 786 787 .global ih264_intra_pred_luma_8x8_mode_horz_d_av8 788 789ih264_intra_pred_luma_8x8_mode_horz_d_av8: 790 791 // STMFD sp!, {x4-x12, x14} //store register values to stack 792 push_v_regs 793 stp x19, x20, [sp, #-16]! 794 795 ld1 { v0.16b}, [x0] 796 mov v1.d[0], v0.d[1] 797 add x0, x0, #1 798 ld1 { v2.16b}, [x0] 799 mov v3.d[0], v2.d[1] 800 ext v4.16b, v2.16b , v2.16b , #1 801 mov v5.d[0], v4.d[1] 802 // q1 = q0 shifted to left once 803 // q2 = q1 shifted to left once 804 uaddl v20.8h, v0.8b, v2.8b 805 uaddl v22.8h, v1.8b, v3.8b 806 uaddl v24.8h, v2.8b, v4.8b 807 uaddl v26.8h, v3.8b, v5.8b 808 add v24.8h, v20.8h , v24.8h 809 add v26.8h, v22.8h , v26.8h 810 811 sqrshrun v4.8b, v20.8h, #1 812 sqrshrun v5.8b, v22.8h, #1 813 mov v4.d[1], v5.d[0] 814 sqrshrun v6.8b, v24.8h, #2 815 sqrshrun v7.8b, v26.8h, #2 816 mov v6.d[1], v7.d[0] 817 //Q2 has all FILT11 values 818 //Q3 has all FILT121 values 819 mov v8.16b, v4.16b 820 mov v10.16b, v6.16b 821 sub x6, x3, #6 822 trn1 v9.16b, v8.16b, v10.16b 823 trn2 v10.16b, v8.16b, v10.16b // 824 mov v8.16b, v9.16b 825 mov v12.16b, v8.16b 826 mov v14.16b, v10.16b 827 sub x5, x3, #4 828 trn1 v13.8h, v12.8h, v14.8h 829 trn2 v14.8h, v12.8h, v14.8h 830 mov v12.16b, v13.16b 831 ext v16.16b, v6.16b , v6.16b , #14 832 //ROW 0 833 st1 {v16.d}[1], [x1] 834 st1 {v10.h}[3], [x1], x3 835 836 //ROW 1 837 st1 {v14.s}[1], [x1], #4 838 st1 {v6.s}[2], [x1], x5 839 //ROW 2 840 st1 {v10.h}[2], [x1], #2 841 st1 {v14.s}[1], [x1], #4 842 st1 {v7.h}[0], [x1], x6 843 //ROW 3 844 st1 {v12.s}[1], [x1], #4 845 st1 {v14.s}[1], [x1], x5 846 //ROW 4 847 st1 {v14.h}[1], [x1], #2 848 st1 {v12.s}[1], [x1], #4 849 st1 {v14.h}[2], [x1], x6 850 //ROW 5 851 st1 {v14.s}[0], [x1], #4 852 st1 {v12.s}[1], [x1], x5 853 //ROW 6 854 st1 {v10.h}[0], [x1], #2 855 st1 {v8.h}[1], [x1], #2 856 st1 {v14.h}[1], [x1], #2 857 st1 {v12.h}[2], [x1], x6 858 //ROW 7 859 st1 {v12.s}[0], [x1], #4 860 st1 {v14.s}[0], [x1], x5 861 862end_func_horz_d: 863 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 864 ldp x19, x20, [sp], #16 865 pop_v_regs 866 ret 867 868 869 870 871 872///** 873//******************************************************************************* 874//* 875//*ih264_intra_pred_luma_8x8_mode_vert_l 876//* 877//* @brief 878//* Perform Intra prediction for luma_8x8 mode:Vertical_Left 879//* 880//* @par Description: 881//* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8 882//* 883//* @param[in] pu1_src 884//* UWORD8 pointer to the source 885//* 886//* @param[out] pu1_dst 887//* UWORD8 pointer to the destination 888//* 889//* @param[in] src_strd 890//* integer source stride 891//* 892//* @param[in] dst_strd 893//* integer destination stride 894//* 895//* @param[in] ui_neighboravailability 896//* availability of neighbouring pixels 897//* 898//* @returns 899//* 900//* @remarks 901//* None 902//* 903//*******************************************************************************/ 904//void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src, 905// UWORD8 *pu1_dst, 906// WORD32 src_strd, 907// WORD32 dst_strd, 908// WORD32 ui_neighboravailability) 909 910//**************Variables Vs Registers***************************************** 911// x0 => *pu1_src 912// x1 => *pu1_dst 913// x2 => src_strd 914// x3 => dst_strd 915// x4 => ui_neighboravailability 916 917 918 .global ih264_intra_pred_luma_8x8_mode_vert_l_av8 919 920ih264_intra_pred_luma_8x8_mode_vert_l_av8: 921 922 // STMFD sp!, {x4-x12, x14} //Restoring registers from stack 923 push_v_regs 924 stp x19, x20, [sp, #-16]! 925 add x0, x0, #9 926 ld1 { v0.16b}, [x0] 927 mov v1.d[0], v0.d[1] 928 add x0, x0, #1 929 ld1 { v2.16b}, [x0] 930 mov v3.d[0], v2.d[1] 931 ext v4.16b, v2.16b , v2.16b , #1 932 mov v5.d[0], v4.d[1] 933 uaddl v20.8h, v0.8b, v2.8b 934 uaddl v22.8h, v1.8b, v3.8b 935 uaddl v24.8h, v2.8b, v4.8b 936 uaddl v26.8h, v3.8b, v5.8b 937 add v24.8h, v20.8h , v24.8h 938 add v26.8h, v22.8h , v26.8h 939 940 sqrshrun v4.8b, v20.8h, #1 941 sqrshrun v5.8b, v22.8h, #1 942 mov v4.d[1], v5.d[0] 943 sqrshrun v6.8b, v24.8h, #2 944 ext v8.16b, v4.16b , v4.16b , #1 945 sqrshrun v7.8b, v26.8h, #2 946 mov v6.d[1], v7.d[0] 947 //Q2 has all FILT11 values 948 //Q3 has all FILT121 values 949 950 ext v10.16b, v6.16b , v6.16b , #1 951 //ROW 0,1 952 st1 {v4.8b}, [x1], x3 953 st1 {v6.8b}, [x1], x3 954 955 ext v12.16b, v8.16b , v8.16b , #1 956 ext v14.16b, v10.16b , v10.16b , #1 957 //ROW 2,3 958 st1 {v8.8b}, [x1], x3 959 st1 {v10.8b}, [x1], x3 960 961 ext v16.16b, v12.16b , v12.16b , #1 962 ext v18.16b, v14.16b , v14.16b , #1 963 //ROW 4,5 964 st1 {v12.8b}, [x1], x3 965 st1 {v14.8b}, [x1], x3 966 //ROW 6,7 967 st1 {v16.8b}, [x1], x3 968 st1 {v18.8b}, [x1], x3 969 970end_func_vert_l: 971 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 972 ldp x19, x20, [sp], #16 973 pop_v_regs 974 ret 975 976 977 978 979 980///** 981//******************************************************************************* 982//* 983//*ih264_intra_pred_luma_8x8_mode_horz_u 984//* 985//* @brief 986//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up 987//* 988//* @par Description: 989//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9 990//* 991//* @param[in] pu1_src 992//* UWORD8 pointer to the source 993//* 994//* @param[out] pu1_dst 995//* UWORD8 pointer to the destination 996//* 997//* @param[in] src_strd 998//* integer source stride 999//* 1000//* @param[in] dst_strd 1001//* integer destination stride 1002//* 1003//* @param[in] ui_neighboravailability 1004//* availability of neighbouring pixels 1005//* 1006//* @returns 1007//* 1008//* @remarks 1009//* None 1010//* 1011//*******************************************************************************/ 1012//void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src, 1013// UWORD8 *pu1_dst, 1014// WORD32 src_strd, 1015// WORD32 dst_strd, 1016// WORD32 ui_neighboravailability) 1017 1018//**************Variables Vs Registers***************************************** 1019// x0 => *pu1_src 1020// x1 => *pu1_dst 1021// x2 => src_strd 1022// x3 => dst_strd 1023// x4 => ui_neighboravailability 1024 1025 .global ih264_intra_pred_luma_8x8_mode_horz_u_av8 1026 1027ih264_intra_pred_luma_8x8_mode_horz_u_av8: 1028 1029 // STMFD sp!, {x4-x12, x14} //store register values to stack 1030 push_v_regs 1031 stp x19, x20, [sp, #-16]! 1032 1033 ld1 {v0.8b}, [x0] 1034 ld1 {v1.b}[7], [x0] 1035 mov v0.d[1], v1.d[0] 1036 ext v2.16b, v0.16b , v0.16b , #1 1037 mov v3.d[0], v2.d[1] 1038 ext v4.16b, v2.16b , v2.16b , #1 1039 mov v5.d[0], v4.d[1] 1040 1041 adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u 1042 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u] 1043 uaddl v20.8h, v0.8b, v2.8b 1044 uaddl v22.8h, v1.8b, v3.8b 1045 uaddl v24.8h, v2.8b, v4.8b 1046 uaddl v26.8h, v3.8b, v5.8b 1047 add v24.8h, v20.8h , v24.8h 1048 add v26.8h, v22.8h , v26.8h 1049 ld1 { v10.16b}, [x12] 1050 mov v11.d[0], v10.d[1] 1051 sqrshrun v4.8b, v20.8h, #1 1052 sqrshrun v5.8b, v22.8h, #1 1053 mov v4.d[1], v5.d[0] 1054 sqrshrun v6.8b, v24.8h, #2 1055 sqrshrun v7.8b, v26.8h, #2 1056 mov v6.d[1], v7.d[0] 1057 //Q2 has all FILT11 values 1058 //Q3 has all FILT121 values 1059 mov v30.16b, v4.16b 1060 mov v31.16b, v6.16b 1061 tbl v12.8b, {v30.16b, v31.16b}, v10.8b 1062 dup v14.16b, v5.b[7] // 1063 tbl v13.8b, {v30.16b, v31.16b}, v11.8b 1064 mov v12.d[1], v13.d[0] 1065 ext v16.16b, v12.16b , v14.16b , #2 1066 ext v18.16b, v16.16b , v14.16b , #2 1067 st1 {v12.8b}, [x1], x3 //0 1068 ext v20.16b, v18.16b , v14.16b , #2 1069 st1 {v16.8b}, [x1], x3 //1 1070 st1 {v18.8b}, [x1], x3 //2 1071 st1 {v20.8b}, [x1], x3 //3 1072 st1 {v13.8b}, [x1], x3 //4 1073 st1 {v16.d}[1], [x1], x3 //5 1074 st1 {v18.d}[1], [x1], x3 //6 1075 st1 {v20.d}[1], [x1], x3 //7 1076 1077 1078end_func_horz_u: 1079 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 1080 ldp x19, x20, [sp], #16 1081 pop_v_regs 1082 ret 1083 1084 1085