1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20///** 21//****************************************************************************** 22//* @file 23//* ih264_intra_pred_luma_4x4_av8.s 24//* 25//* @brief 26//* Contains function definitions for intra 4x4 Luma prediction . 27//* 28//* @author 29//* Ittiam 30//* 31//* @par List of Functions: 32//* 33//* -ih264_intra_pred_luma_4x4_mode_vert_av8 34//* -ih264_intra_pred_luma_4x4_mode_horz_av8 35//* -ih264_intra_pred_luma_4x4_mode_dc_av8 36//* -ih264_intra_pred_luma_4x4_mode_diag_dl_av8 37//* -ih264_intra_pred_luma_4x4_mode_diag_dr_av8 38//* -ih264_intra_pred_luma_4x4_mode_vert_r_av8 39//* -ih264_intra_pred_luma_4x4_mode_horz_d_av8 40//* -ih264_intra_pred_luma_4x4_mode_vert_l_av8 41//* -ih264_intra_pred_luma_4x4_mode_horz_u_av8 42//* 43//* @remarks 44//* None 45//* 46//******************************************************************************* 47//*/ 48 49///* All the functions here are replicated from ih264_intra_pred_filters.c 50// 51 52///** 53///** 54///** 55// 56 57.text 58.p2align 2 59.include "ih264_neon_macros.s" 60 61 62 63 64///** 65//******************************************************************************* 66//* 67//*ih264_intra_pred_luma_4x4_mode_vert 68//* 69//* @brief 70//* Perform Intra prediction for luma_4x4 mode:vertical 71//* 72//* @par Description: 73//* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 74//* 75//* @param[in] pu1_src 76//* UWORD8 pointer to the source 77//* 78//* @param[out] pu1_dst 79//* UWORD8 pointer to the destination 80//* 81//* @param[in] src_strd 82//* integer source stride 83//* 84//* @param[in] dst_strd 85//* integer destination stride 86//* 87//* @param[in] ui_neighboravailability 88//* availability of neighbouring pixels(Not used in this function) 89//* 90//* @returns 91//* 92//* @remarks 93//* None 94//* 95//******************************************************************************* 96//void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src, 97// UWORD8 *pu1_dst, 98// WORD32 src_strd, 99// WORD32 dst_strd, 100// WORD32 ui_neighboravailability) 101 102//**************Variables Vs Registers***************************************** 103// x0 => *pu1_src 104// x1 => *pu1_dst 105// x2 => src_strd 106// x3 => dst_strd 107// x4 => ui_neighboravailability 108 109 .global ih264_intra_pred_luma_4x4_mode_vert_av8 110 111ih264_intra_pred_luma_4x4_mode_vert_av8: 112 113 push_v_regs 114 115 add x0, x0, #5 116 117 ld1 {v0.s}[0], [x0] 118 st1 {v0.s}[0], [x1], x3 119 st1 {v0.s}[0], [x1], x3 120 st1 {v0.s}[0], [x1], x3 121 st1 {v0.s}[0], [x1], x3 122 123 pop_v_regs 124 ret 125 126 127 128 129 130///****************************************************************************** 131 132 133///** 134//******************************************************************************* 135//* 136//*ih264_intra_pred_luma_4x4_mode_horz 137//* 138//* @brief 139//* Perform Intra prediction for luma_4x4 mode:horizontal 140//* 141//* @par Description: 142//* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 143//* 144//* @param[in] pu1_src 145//* UWORD8 pointer to the source 146//* 147//* @param[out] pu1_dst 148//* UWORD8 pointer to the destination 149//* 150//* @param[in] src_strd 151//* integer source stride 152//* 153//* @param[in] dst_strd 154//* integer destination stride 155//* 156//* @param[in] ui_neighboravailability 157//* availability of neighbouring pixels(Not used in this function) 158//* 159//* @returns 160//* 161//* @remarks 162//* None 163//* 164//******************************************************************************* 165//*/ 166//void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src, 167// UWORD8 *pu1_dst, 168// WORD32 src_strd, 169// WORD32 dst_strd, 170// WORD32 ui_neighboravailability) 171//**************Variables Vs Registers***************************************** 172// x0 => *pu1_src 173// x1 => *pu1_dst 174// x2 => src_strd 175// x3 => dst_strd 176// x4 => ui_neighboravailability 177 178 179 180 .global ih264_intra_pred_luma_4x4_mode_horz_av8 181 182ih264_intra_pred_luma_4x4_mode_horz_av8: 183 184 push_v_regs 185 186 ld1 {v1.s}[0], [x0] 187 dup v0.8b, v1.b[3] 188 dup v2.8b, v1.b[2] 189 st1 {v0.s}[0], [x1], x3 190 dup v3.8b, v1.b[1] 191 st1 {v2.s}[0], [x1], x3 192 dup v4.8b, v1.b[0] 193 st1 {v3.s}[0], [x1], x3 194 st1 {v4.s}[0], [x1], x3 195 196 pop_v_regs 197 ret 198 199 200 201 202 203 204 205///****************************************************************************** 206 207 208///** 209//******************************************************************************* 210//* 211//*ih264_intra_pred_luma_4x4_mode_dc 212//* 213//* @brief 214//* Perform Intra prediction for luma_4x4 mode:DC 215//* 216//* @par Description: 217//* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 218//* 219//* @param[in] pu1_src 220//* UWORD8 pointer to the source 221//* 222//* @param[out] pu1_dst 223//* UWORD8 pointer to the destination 224//* 225//* @param[in] src_strd 226//* integer source stride 227//* 228//* @param[in] dst_strd 229//* integer destination stride 230//* 231//* @param[in] ui_neighboravailability 232//* availability of neighbouring pixels 233//* 234//* @returns 235//* 236//* @remarks 237//* None 238//* 239//*******************************************************************************/ 240//void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src, 241// UWORD8 *pu1_dst, 242// WORD32 src_strd, 243// WORD32 dst_strd, 244// WORD32 ui_neighboravailability) 245 246//**************Variables Vs Registers***************************************** 247// x0 => *pu1_src 248// x1 => *pu1_dst 249// x2 => src_strd 250// x3 => dst_strd 251// x4 => ui_neighboravailability 252 253 254 255 .global ih264_intra_pred_luma_4x4_mode_dc_av8 256 257ih264_intra_pred_luma_4x4_mode_dc_av8: 258 259 260 261 262 push_v_regs 263 stp x19, x20, [sp, #-16]! 264 265 ands x5, x4, #0x01 266 beq top_available //LEFT NOT AVAILABLE 267 268 add x10, x0, #3 269 mov x2, #-1 270 ldrb w5, [x10], #-1 271 sxtw x5, w5 272 ldrb w6, [x10], #-1 273 sxtw x6, w6 274 ldrb w7, [x10], #-1 275 sxtw x7, w7 276 add x5, x5, x6 277 ldrb w8, [x10], #-1 278 sxtw x8, w8 279 add x5, x5, x7 280 ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 281 add x5, x5, x8 282 beq left_available 283 add x10, x0, #5 284 // BOTH LEFT AND TOP AVAILABLE 285 ldrb w6, [x10], #1 286 sxtw x6, w6 287 ldrb w7, [x10], #1 288 sxtw x7, w7 289 add x5, x5, x6 290 ldrb w8, [x10], #1 291 sxtw x8, w8 292 add x5, x5, x7 293 ldrb w9, [x10], #1 294 sxtw x9, w9 295 add x5, x5, x8 296 add x5, x5, x9 297 add x5, x5, #4 298 lsr x5, x5, #3 299 dup v0.8b, w5 300 st1 {v0.s}[0], [x1], x3 301 st1 {v0.s}[0], [x1], x3 302 st1 {v0.s}[0], [x1], x3 303 st1 {v0.s}[0], [x1], x3 304 b end_func 305 306top_available: // ONLT TOP AVAILABLE 307 ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 308 beq none_available 309 310 add x10, x0, #5 311 ldrb w6, [x10], #1 312 sxtw x6, w6 313 ldrb w7, [x10], #1 314 sxtw x7, w7 315 ldrb w8, [x10], #1 316 sxtw x8, w8 317 add x5, x6, x7 318 ldrb w9, [x10], #1 319 sxtw x9, w9 320 add x5, x5, x8 321 add x5, x5, x9 322 add x5, x5, #2 323 lsr x5, x5, #2 324 dup v0.8b, w5 325 st1 {v0.s}[0], [x1], x3 326 st1 {v0.s}[0], [x1], x3 327 st1 {v0.s}[0], [x1], x3 328 st1 {v0.s}[0], [x1], x3 329 b end_func 330 331left_available: //ONLY LEFT AVAILABLE 332 add x5, x5, #2 333 lsr x5, x5, #2 334 dup v0.8b, w5 335 st1 {v0.s}[0], [x1], x3 336 st1 {v0.s}[0], [x1], x3 337 st1 {v0.s}[0], [x1], x3 338 st1 {v0.s}[0], [x1], x3 339 b end_func 340 341none_available: //NONE AVAILABLE 342 mov x5, #128 343 dup v0.8b, w5 344 st1 {v0.s}[0], [x1], x3 345 st1 {v0.s}[0], [x1], x3 346 st1 {v0.s}[0], [x1], x3 347 st1 {v0.s}[0], [x1], x3 348 b end_func 349 350 351end_func: 352 353 ldp x19, x20, [sp], #16 354 pop_v_regs 355 ret 356 357 358 359 360 361 362 363///** 364//******************************************************************************* 365//* 366//*ih264_intra_pred_luma_4x4_mode_diag_dl 367//* 368//* @brief 369//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left 370//* 371//* @par Description: 372//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 373//* 374//* @param[in] pu1_src 375//* UWORD8 pointer to the source 376//* 377//* @param[out] pu1_dst 378//* UWORD8 pointer to the destination 379//* 380//* @param[in] src_strd 381//* integer source stride 382//* 383//* @param[in] dst_strd 384//* integer destination stride 385//* 386//* @param[in] ui_neighboravailability 387//* availability of neighbouring pixels 388//* 389//* @returns 390//* 391//* @remarks 392//* None 393//* 394//*******************************************************************************/ 395//void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src, 396// UWORD8 *pu1_dst, 397// WORD32 src_strd, 398// WORD32 dst_strd, 399// WORD32 ui_neighboravailability) 400 401//**************Variables Vs Registers***************************************** 402// x0 => *pu1_src 403// x1 => *pu1_dst 404// x2 => src_strd 405// x3 => dst_strd 406// x4 => ui_neighboravailability 407 408 409 .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8 410 411ih264_intra_pred_luma_4x4_mode_diag_dl_av8: 412 413 414 push_v_regs 415 stp x19, x20, [sp, #-16]! 416 417 add x0, x0, #5 418 sub x5, x3, #2 419 add x6, x0, #7 420 ld1 {v0.8b}, [x0] 421 ext v1.8b, v0.8b , v0.8b , #1 422 ext v2.8b, v0.8b , v0.8b , #2 423 ld1 {v2.b}[6], [x6] 424 uaddl v20.8h, v0.8b, v1.8b 425 uaddl v22.8h, v1.8b, v2.8b 426 add v24.8h, v20.8h , v22.8h 427 sqrshrun v3.8b, v24.8h, #2 428 st1 {v3.s}[0], [x1], x3 429 ext v4.8b, v3.8b , v3.8b , #1 430 st1 {v4.s}[0], [x1], x3 431 st1 {v3.h}[1], [x1], #2 432 st1 {v3.h}[2], [x1], x5 433 st1 {v4.h}[1], [x1], #2 434 st1 {v4.h}[2], [x1] 435 436end_func_diag_dl: 437 438 ldp x19, x20, [sp], #16 439 pop_v_regs 440 ret 441 442 443 444 445 446 447 448 449 450///** 451//******************************************************************************* 452//* 453//*ih264_intra_pred_luma_4x4_mode_diag_dr 454//* 455//* @brief 456//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right 457//* 458//* @par Description: 459//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 460//* 461//* @param[in] pu1_src 462//* UWORD8 pointer to the source 463//* 464//* @param[out] pu1_dst 465//* UWORD8 pointer to the destination 466//* 467//* @param[in] src_strd 468//* integer source stride 469//* 470//* @param[in] dst_strd 471//* integer destination stride 472//* 473//* @param[in] ui_neighboravailability 474//* availability of neighbouring pixels 475//* 476//* @returns 477//* 478//* @remarks 479//* None 480//* 481//*******************************************************************************/ 482//void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src, 483// UWORD8 *pu1_dst, 484// WORD32 src_strd, 485// WORD32 dst_strd, 486// WORD32 ui_neighboravailability) 487 488//**************Variables Vs Registers***************************************** 489// x0 => *pu1_src 490// x1 => *pu1_dst 491// x2 => src_strd 492// x3 => dst_strd 493// x4 => ui_neighboravailability 494 495 496 .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8 497 498ih264_intra_pred_luma_4x4_mode_diag_dr_av8: 499 500 push_v_regs 501 stp x19, x20, [sp, #-16]! 502 503 504 ld1 {v0.8b}, [x0] 505 add x0, x0, #1 506 ld1 {v1.8b}, [x0] 507 ext v2.8b, v1.8b , v1.8b , #1 508 uaddl v20.8h, v0.8b, v1.8b 509 uaddl v22.8h, v1.8b, v2.8b 510 add v24.8h, v20.8h , v22.8h 511 sqrshrun v3.8b, v24.8h, #2 512 513 ext v4.8b, v3.8b , v3.8b , #1 514 sub x5, x3, #2 515 st1 {v4.h}[1], [x1], #2 516 st1 {v4.h}[2], [x1], x5 517 st1 {v3.h}[1], [x1], #2 518 st1 {v3.h}[2], [x1], x5 519 st1 {v4.s}[0], [x1], x3 520 st1 {v3.s}[0], [x1], x3 521 522end_func_diag_dr: 523 ldp x19, x20, [sp], #16 524 pop_v_regs 525 ret 526 527 528 529 530 531 532 533///** 534//******************************************************************************* 535//* 536//*ih264_intra_pred_luma_4x4_mode_vert_r 537//* 538//* @brief 539//* Perform Intra prediction for luma_4x4 mode:Vertical_Right 540//* 541//* @par Description: 542//* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 543//* 544//* @param[in] pu1_src 545//* UWORD8 pointer to the source 546//* 547//* @param[out] pu1_dst 548//* UWORD8 pointer to the destination 549//* 550//* @param[in] src_strd 551//* integer source stride 552//* 553//* @param[in] dst_strd 554//* integer destination stride 555//* 556//* @param[in] ui_neighboravailability 557//* availability of neighbouring pixels 558//* 559//* @returns 560//* 561//* @remarks 562//* None 563//* 564//*******************************************************************************/ 565//void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src, 566// UWORD8 *pu1_dst, 567// WORD32 src_strd, 568// WORD32 dst_strd, 569// WORD32 ui_neighboravailability) 570 571//**************Variables Vs Registers***************************************** 572// x0 => *pu1_src 573// x1 => *pu1_dst 574// x2 => src_strd 575// x3 => dst_strd 576// x4 => ui_neighboravailability 577 578 579 .global ih264_intra_pred_luma_4x4_mode_vert_r_av8 580 581ih264_intra_pred_luma_4x4_mode_vert_r_av8: 582 583 push_v_regs 584 stp x19, x20, [sp, #-16]! 585 586 587 ld1 {v0.8b}, [x0] 588 add x0, x0, #1 589 ld1 {v1.8b}, [x0] 590 ext v2.8b, v1.8b , v1.8b , #1 591 uaddl v20.8h, v0.8b, v1.8b 592 uaddl v22.8h, v1.8b, v2.8b 593 add v24.8h, v20.8h , v22.8h 594 sqrshrun v4.8b, v20.8h, #1 595 sqrshrun v3.8b, v24.8h, #2 596 sub x5, x3, #2 597 ext v5.8b, v3.8b , v3.8b , #3 598 st1 {v4.s}[1], [x1], x3 599 st1 {v5.s}[0], [x1], x3 600 sub x8, x3, #3 601 st1 {v3.b}[2], [x1], #1 602 st1 {v4.h}[2], [x1], #2 603 st1 {v4.b}[6], [x1], x8 604 st1 {v3.b}[1], [x1], #1 605 st1 {v5.h}[0], [x1], #2 606 st1 {v5.b}[2], [x1] 607 608 609end_func_vert_r: 610 ldp x19, x20, [sp], #16 611 pop_v_regs 612 ret 613 614 615 616 617 618///** 619//******************************************************************************* 620//* 621//*ih264_intra_pred_luma_4x4_mode_horz_d 622//* 623//* @brief 624//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down 625//* 626//* @par Description: 627//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 628//* 629//* @param[in] pu1_src 630//* UWORD8 pointer to the source 631//* 632//* @param[out] pu1_dst 633//* UWORD8 pointer to the destination 634//* 635//* @param[in] src_strd 636//* integer source stride 637//* 638//* @param[in] dst_strd 639//* integer destination stride 640//* 641//* @param[in] ui_neighboravailability 642//* availability of neighbouring pixels 643//* 644//* @returns 645//* 646//* @remarks 647//* None 648//* 649//*******************************************************************************/ 650//void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src, 651// UWORD8 *pu1_dst, 652// WORD32 src_strd, 653// WORD32 dst_strd, 654// WORD32 ui_neighboravailability) 655 656//**************Variables Vs Registers***************************************** 657// x0 => *pu1_src 658// x1 => *pu1_dst 659// x2 => src_strd 660// x3 => dst_strd 661// x4 => ui_neighboravailability 662 663 664 .global ih264_intra_pred_luma_4x4_mode_horz_d_av8 665 666ih264_intra_pred_luma_4x4_mode_horz_d_av8: 667 668 push_v_regs 669 stp x19, x20, [sp, #-16]! 670 671 ld1 {v0.8b}, [x0] 672 add x0, x0, #1 673 ld1 {v1.8b}, [x0] 674 ext v2.8b, v1.8b , v0.8b , #1 675 uaddl v20.8h, v0.8b, v1.8b 676 uaddl v22.8h, v1.8b, v2.8b 677 add v24.8h, v20.8h , v22.8h 678 sqrshrun v4.8b, v20.8h, #1 679 sqrshrun v5.8b, v24.8h, #2 680 sub x5, x3, #2 681 mov v6.8b, v5.8b 682 trn1 v10.8b, v4.8b, v5.8b 683 trn2 v5.8b, v4.8b, v5.8b // 684 mov v4.8b, v10.8b 685 st1 {v5.h}[1], [x1], #2 686 st1 {v6.h}[2], [x1], x5 687 st1 {v4.h}[1], [x1], #2 688 st1 {v5.h}[1], [x1], x5 689 st1 {v5.h}[0], [x1], #2 690 st1 {v4.h}[1], [x1], x5 691 st1 {v4.h}[0], [x1], #2 692 st1 {v5.h}[0], [x1], x5 693 694end_func_horz_d: 695 ldp x19, x20, [sp], #16 696 pop_v_regs 697 ret 698 699 700 701 702 703 704 705///** 706//******************************************************************************* 707//* 708//*ih264_intra_pred_luma_4x4_mode_vert_l 709//* 710//* @brief 711//* Perform Intra prediction for luma_4x4 mode:Vertical_Left 712//* 713//* @par Description: 714//* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 715//* 716//* @param[in] pu1_src 717//* UWORD8 pointer to the source 718//* 719//* @param[out] pu1_dst 720//* UWORD8 pointer to the destination 721//* 722//* @param[in] src_strd 723//* integer source stride 724//* 725//* @param[in] dst_strd 726//* integer destination stride 727//* 728//* @param[in] ui_neighboravailability 729//* availability of neighbouring pixels 730//* 731//* @returns 732//* 733//* @remarks 734//* None 735//* 736//*******************************************************************************/ 737//void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src, 738// UWORD8 *pu1_dst, 739// WORD32 src_strd, 740// WORD32 dst_strd, 741// WORD32 ui_neighboravailability) 742 743//**************Variables Vs Registers***************************************** 744// x0 => *pu1_src 745// x1 => *pu1_dst 746// x2 => src_strd 747// x3 => dst_strd 748// x4 => ui_neighboravailability 749 750 751 .global ih264_intra_pred_luma_4x4_mode_vert_l_av8 752 753ih264_intra_pred_luma_4x4_mode_vert_l_av8: 754 755 push_v_regs 756 stp x19, x20, [sp, #-16]! 757 add x0, x0, #4 758 ld1 {v0.8b}, [x0] 759 add x0, x0, #1 760 ld1 {v1.8b}, [x0] 761 ext v2.8b, v1.8b , v0.8b , #1 762 uaddl v20.8h, v0.8b, v1.8b 763 uaddl v22.8h, v1.8b, v2.8b 764 add v24.8h, v20.8h , v22.8h 765 sqrshrun v4.8b, v20.8h, #1 766 sqrshrun v5.8b, v24.8h, #2 767 ext v6.8b, v4.8b , v4.8b , #1 768 ext v7.8b, v5.8b , v5.8b , #1 769 st1 {v6.s}[0], [x1], x3 770 ext v8.8b, v4.8b , v4.8b , #2 771 ext v9.8b, v5.8b , v5.8b , #2 772 st1 {v7.s}[0], [x1], x3 773 st1 {v8.s}[0], [x1], x3 774 st1 {v9.s}[0], [x1], x3 775 776end_func_vert_l: 777 ldp x19, x20, [sp], #16 778 pop_v_regs 779 ret 780 781 782 783 784 785 786 787///** 788//******************************************************************************* 789//* 790//*ih264_intra_pred_luma_4x4_mode_horz_u 791//* 792//* @brief 793//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up 794//* 795//* @par Description: 796//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 797//* 798//* @param[in] pu1_src 799//* UWORD8 pointer to the source 800//* 801//* @param[out] pu1_dst 802//* UWORD8 pointer to the destination 803//* 804//* @param[in] src_strd 805//* integer source stride 806//* 807//* @param[in] dst_strd 808//* integer destination stride 809//* 810//* @param[in] ui_neighboravailability 811//* availability of neighbouring pixels 812//* 813//* @returns 814//* 815//* @remarks 816//* None 817//* 818//*******************************************************************************/ 819//void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src, 820// UWORD8 *pu1_dst, 821// WORD32 src_strd, 822// WORD32 dst_strd, 823// WORD32 ui_neighboravailability) 824 825//**************Variables Vs Registers***************************************** 826// x0 => *pu1_src 827// x1 => *pu1_dst 828// x2 => src_strd 829// x3 => dst_strd 830// x4 => ui_neighboravailability 831 832 833 .global ih264_intra_pred_luma_4x4_mode_horz_u_av8 834 835ih264_intra_pred_luma_4x4_mode_horz_u_av8: 836 837 push_v_regs 838 stp x19, x20, [sp, #-16]! 839 mov x10, x0 840 ld1 {v0.8b}, [x0] 841 ldrb w9, [x0], #1 842 sxtw x9, w9 843 ext v1.8b, v0.8b , v0.8b , #1 844 ld1 {v0.b}[7], [x10] 845 ext v2.8b, v1.8b , v1.8b , #1 846 uaddl v20.8h, v0.8b, v1.8b 847 uaddl v22.8h, v1.8b, v2.8b 848 add v24.8h, v20.8h , v22.8h 849 sqrshrun v4.8b, v20.8h, #1 850 sqrshrun v5.8b, v24.8h, #2 851 mov v6.8b, v4.8b 852 ext v6.8b, v5.8b , v4.8b , #1 853 st1 {v4.b}[2], [x1], #1 854 st1 {v6.b}[0], [x1], #1 855 trn1 v10.8b, v6.8b, v5.8b 856 trn2 v5.8b, v6.8b, v5.8b // 857 mov v6.8b , v10.8b 858 sub x5, x3, #2 859 trn1 v10.8b, v4.8b, v6.8b 860 trn2 v6.8b, v4.8b, v6.8b // 861 mov v4.8b , v10.8b 862 dup v7.8b, w9 863 st1 {v6.h}[0], [x1], x5 864 st1 {v6.h}[0], [x1], #2 865 st1 {v5.h}[3], [x1], x5 866 st1 {v5.h}[3], [x1], #2 867 st1 {v7.h}[3], [x1], x5 868 st1 {v7.s}[0], [x1], x3 869 870end_func_horz_u: 871 ldp x19, x20, [sp], #16 872 pop_v_regs 873 ret 874 875 876 877