1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20///** 21//****************************************************************************** 22//* @file 23//* ih264_intra_pred_luma_4x4_av8.s 24//* 25//* @brief 26//* Contains function definitions for intra 4x4 Luma prediction . 27//* 28//* @author 29//* Ittiam 30//* 31//* @par List of Functions: 32//* 33//* -ih264_intra_pred_luma_4x4_mode_vert_av8 34//* -ih264_intra_pred_luma_4x4_mode_horz_av8 35//* -ih264_intra_pred_luma_4x4_mode_dc_av8 36//* -ih264_intra_pred_luma_4x4_mode_diag_dl_av8 37//* -ih264_intra_pred_luma_4x4_mode_diag_dr_av8 38//* -ih264_intra_pred_luma_4x4_mode_vert_r_av8 39//* -ih264_intra_pred_luma_4x4_mode_horz_d_av8 40//* -ih264_intra_pred_luma_4x4_mode_vert_l_av8 41//* -ih264_intra_pred_luma_4x4_mode_horz_u_av8 42//* 43//* @remarks 44//* None 45//* 46//******************************************************************************* 47//*/ 48 49///* All the functions here are replicated from ih264_intra_pred_filters.c 50// 51 52///** 53///** 54///** 55// 56 57.text 58.p2align 2 59.include "ih264_neon_macros.s" 60 61 62 63 64///** 65//******************************************************************************* 66//* 67//*ih264_intra_pred_luma_4x4_mode_vert 68//* 69//* @brief 70//* Perform Intra prediction for luma_4x4 mode:vertical 71//* 72//* @par Description: 73//* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 74//* 75//* @param[in] pu1_src 76//* UWORD8 pointer to the source 77//* 78//* @param[out] pu1_dst 79//* UWORD8 pointer to the destination 80//* 81//* @param[in] src_strd 82//* integer source stride 83//* 84//* @param[in] dst_strd 85//* integer destination stride 86//* 87//* @param[in] ui_neighboravailability 88//* availability of neighbouring pixels(Not used in this function) 89//* 90//* @returns 91//* 92//* @remarks 93//* None 94//* 95//******************************************************************************* 96//void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src, 97// UWORD8 *pu1_dst, 98// WORD32 src_strd, 99// WORD32 dst_strd, 100// WORD32 ui_neighboravailability) 101 102//**************Variables Vs Registers***************************************** 103// x0 => *pu1_src 104// x1 => *pu1_dst 105// w2 => src_strd 106// w3 => dst_strd 107// w4 => ui_neighboravailability 108 109 .global ih264_intra_pred_luma_4x4_mode_vert_av8 110 111ih264_intra_pred_luma_4x4_mode_vert_av8: 112 113 push_v_regs 114 sxtw x3, w3 115 116 add x0, x0, #5 117 118 ld1 {v0.s}[0], [x0] 119 st1 {v0.s}[0], [x1], x3 120 st1 {v0.s}[0], [x1], x3 121 st1 {v0.s}[0], [x1], x3 122 st1 {v0.s}[0], [x1], x3 123 124 pop_v_regs 125 ret 126 127 128 129 130 131///****************************************************************************** 132 133 134///** 135//******************************************************************************* 136//* 137//*ih264_intra_pred_luma_4x4_mode_horz 138//* 139//* @brief 140//* Perform Intra prediction for luma_4x4 mode:horizontal 141//* 142//* @par Description: 143//* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 144//* 145//* @param[in] pu1_src 146//* UWORD8 pointer to the source 147//* 148//* @param[out] pu1_dst 149//* UWORD8 pointer to the destination 150//* 151//* @param[in] src_strd 152//* integer source stride 153//* 154//* @param[in] dst_strd 155//* integer destination stride 156//* 157//* @param[in] ui_neighboravailability 158//* availability of neighbouring pixels(Not used in this function) 159//* 160//* @returns 161//* 162//* @remarks 163//* None 164//* 165//******************************************************************************* 166//*/ 167//void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src, 168// UWORD8 *pu1_dst, 169// WORD32 src_strd, 170// WORD32 dst_strd, 171// WORD32 ui_neighboravailability) 172//**************Variables Vs Registers***************************************** 173// x0 => *pu1_src 174// x1 => *pu1_dst 175// w2 => src_strd 176// w3 => dst_strd 177// w4 => ui_neighboravailability 178 179 180 181 .global ih264_intra_pred_luma_4x4_mode_horz_av8 182 183ih264_intra_pred_luma_4x4_mode_horz_av8: 184 185 push_v_regs 186 sxtw x3, w3 187 188 ld1 {v1.s}[0], [x0] 189 dup v0.8b, v1.b[3] 190 dup v2.8b, v1.b[2] 191 st1 {v0.s}[0], [x1], x3 192 dup v3.8b, v1.b[1] 193 st1 {v2.s}[0], [x1], x3 194 dup v4.8b, v1.b[0] 195 st1 {v3.s}[0], [x1], x3 196 st1 {v4.s}[0], [x1], x3 197 198 pop_v_regs 199 ret 200 201 202 203 204 205 206 207///****************************************************************************** 208 209 210///** 211//******************************************************************************* 212//* 213//*ih264_intra_pred_luma_4x4_mode_dc 214//* 215//* @brief 216//* Perform Intra prediction for luma_4x4 mode:DC 217//* 218//* @par Description: 219//* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 220//* 221//* @param[in] pu1_src 222//* UWORD8 pointer to the source 223//* 224//* @param[out] pu1_dst 225//* UWORD8 pointer to the destination 226//* 227//* @param[in] src_strd 228//* integer source stride 229//* 230//* @param[in] dst_strd 231//* integer destination stride 232//* 233//* @param[in] ui_neighboravailability 234//* availability of neighbouring pixels 235//* 236//* @returns 237//* 238//* @remarks 239//* None 240//* 241//*******************************************************************************/ 242//void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src, 243// UWORD8 *pu1_dst, 244// WORD32 src_strd, 245// WORD32 dst_strd, 246// WORD32 ui_neighboravailability) 247 248//**************Variables Vs Registers***************************************** 249// x0 => *pu1_src 250// x1 => *pu1_dst 251// w2 => src_strd 252// w3 => dst_strd 253// w4 => ui_neighboravailability 254 255 256 257 .global ih264_intra_pred_luma_4x4_mode_dc_av8 258 259ih264_intra_pred_luma_4x4_mode_dc_av8: 260 261 262 263 264 push_v_regs 265 stp x19, x20, [sp, #-16]! 266 sxtw x3, w3 267 268 ands w5, w4, #0x01 269 beq top_available //LEFT NOT AVAILABLE 270 271 add x10, x0, #3 272 mov x2, #-1 273 ldrb w5, [x10], #-1 274 ldrb w6, [x10], #-1 275 ldrb w7, [x10], #-1 276 add w5, w5, w6 277 ldrb w8, [x10], #-1 278 add w5, w5, w7 279 ands w11, w4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 280 add w5, w5, w8 281 beq left_available 282 add x10, x0, #5 283 // BOTH LEFT AND TOP AVAILABLE 284 ldrb w6, [x10], #1 285 ldrb w7, [x10], #1 286 add w5, w5, w6 287 ldrb w8, [x10], #1 288 add w5, w5, w7 289 ldrb w9, [x10], #1 290 add w5, w5, w8 291 add w5, w5, w9 292 add w5, w5, #4 293 lsr w5, w5, #3 294 dup v0.8b, w5 295 st1 {v0.s}[0], [x1], x3 296 st1 {v0.s}[0], [x1], x3 297 st1 {v0.s}[0], [x1], x3 298 st1 {v0.s}[0], [x1], x3 299 b end_func 300 301top_available: // ONLT TOP AVAILABLE 302 ands w11, w4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 303 beq none_available 304 305 add x10, x0, #5 306 ldrb w6, [x10], #1 307 ldrb w7, [x10], #1 308 ldrb w8, [x10], #1 309 add w5, w6, w7 310 ldrb w9, [x10], #1 311 add w5, w5, w8 312 add w5, w5, w9 313 add w5, w5, #2 314 lsr w5, w5, #2 315 dup v0.8b, w5 316 st1 {v0.s}[0], [x1], x3 317 st1 {v0.s}[0], [x1], x3 318 st1 {v0.s}[0], [x1], x3 319 st1 {v0.s}[0], [x1], x3 320 b end_func 321 322left_available: //ONLY LEFT AVAILABLE 323 add x5, x5, #2 324 lsr x5, x5, #2 325 dup v0.8b, w5 326 st1 {v0.s}[0], [x1], x3 327 st1 {v0.s}[0], [x1], x3 328 st1 {v0.s}[0], [x1], x3 329 st1 {v0.s}[0], [x1], x3 330 b end_func 331 332none_available: //NONE AVAILABLE 333 mov x5, #128 334 dup v0.8b, w5 335 st1 {v0.s}[0], [x1], x3 336 st1 {v0.s}[0], [x1], x3 337 st1 {v0.s}[0], [x1], x3 338 st1 {v0.s}[0], [x1], x3 339 b end_func 340 341 342end_func: 343 344 ldp x19, x20, [sp], #16 345 pop_v_regs 346 ret 347 348 349 350 351 352 353 354///** 355//******************************************************************************* 356//* 357//*ih264_intra_pred_luma_4x4_mode_diag_dl 358//* 359//* @brief 360//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left 361//* 362//* @par Description: 363//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 364//* 365//* @param[in] pu1_src 366//* UWORD8 pointer to the source 367//* 368//* @param[out] pu1_dst 369//* UWORD8 pointer to the destination 370//* 371//* @param[in] src_strd 372//* integer source stride 373//* 374//* @param[in] dst_strd 375//* integer destination stride 376//* 377//* @param[in] ui_neighboravailability 378//* availability of neighbouring pixels 379//* 380//* @returns 381//* 382//* @remarks 383//* None 384//* 385//*******************************************************************************/ 386//void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src, 387// UWORD8 *pu1_dst, 388// WORD32 src_strd, 389// WORD32 dst_strd, 390// WORD32 ui_neighboravailability) 391 392//**************Variables Vs Registers***************************************** 393// x0 => *pu1_src 394// x1 => *pu1_dst 395// w2 => src_strd 396// w3 => dst_strd 397// w4 => ui_neighboravailability 398 399 400 .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8 401 402ih264_intra_pred_luma_4x4_mode_diag_dl_av8: 403 404 405 push_v_regs 406 stp x19, x20, [sp, #-16]! 407 sxtw x3, w3 408 409 add x0, x0, #5 410 sub x5, x3, #2 411 add x6, x0, #7 412 ld1 {v0.8b}, [x0] 413 ext v1.8b, v0.8b , v0.8b , #1 414 ext v2.8b, v0.8b , v0.8b , #2 415 ld1 {v2.b}[6], [x6] 416 uaddl v20.8h, v0.8b, v1.8b 417 uaddl v22.8h, v1.8b, v2.8b 418 add v24.8h, v20.8h , v22.8h 419 sqrshrun v3.8b, v24.8h, #2 420 st1 {v3.s}[0], [x1], x3 421 ext v4.8b, v3.8b , v3.8b , #1 422 st1 {v4.s}[0], [x1], x3 423 st1 {v3.h}[1], [x1], #2 424 st1 {v3.h}[2], [x1], x5 425 st1 {v4.h}[1], [x1], #2 426 st1 {v4.h}[2], [x1] 427 428end_func_diag_dl: 429 430 ldp x19, x20, [sp], #16 431 pop_v_regs 432 ret 433 434 435 436 437 438 439 440 441 442///** 443//******************************************************************************* 444//* 445//*ih264_intra_pred_luma_4x4_mode_diag_dr 446//* 447//* @brief 448//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right 449//* 450//* @par Description: 451//* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 452//* 453//* @param[in] pu1_src 454//* UWORD8 pointer to the source 455//* 456//* @param[out] pu1_dst 457//* UWORD8 pointer to the destination 458//* 459//* @param[in] src_strd 460//* integer source stride 461//* 462//* @param[in] dst_strd 463//* integer destination stride 464//* 465//* @param[in] ui_neighboravailability 466//* availability of neighbouring pixels 467//* 468//* @returns 469//* 470//* @remarks 471//* None 472//* 473//*******************************************************************************/ 474//void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src, 475// UWORD8 *pu1_dst, 476// WORD32 src_strd, 477// WORD32 dst_strd, 478// WORD32 ui_neighboravailability) 479 480//**************Variables Vs Registers***************************************** 481// x0 => *pu1_src 482// x1 => *pu1_dst 483// w2 => src_strd 484// w3 => dst_strd 485// w4 => ui_neighboravailability 486 487 488 .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8 489 490ih264_intra_pred_luma_4x4_mode_diag_dr_av8: 491 492 push_v_regs 493 stp x19, x20, [sp, #-16]! 494 sxtw x3, w3 495 496 497 ld1 {v0.8b}, [x0] 498 add x0, x0, #1 499 ld1 {v1.8b}, [x0] 500 ext v2.8b, v1.8b , v1.8b , #1 501 uaddl v20.8h, v0.8b, v1.8b 502 uaddl v22.8h, v1.8b, v2.8b 503 add v24.8h, v20.8h , v22.8h 504 sqrshrun v3.8b, v24.8h, #2 505 506 ext v4.8b, v3.8b , v3.8b , #1 507 sub x5, x3, #2 508 st1 {v4.h}[1], [x1], #2 509 st1 {v4.h}[2], [x1], x5 510 st1 {v3.h}[1], [x1], #2 511 st1 {v3.h}[2], [x1], x5 512 st1 {v4.s}[0], [x1], x3 513 st1 {v3.s}[0], [x1], x3 514 515end_func_diag_dr: 516 ldp x19, x20, [sp], #16 517 pop_v_regs 518 ret 519 520 521 522 523 524 525 526///** 527//******************************************************************************* 528//* 529//*ih264_intra_pred_luma_4x4_mode_vert_r 530//* 531//* @brief 532//* Perform Intra prediction for luma_4x4 mode:Vertical_Right 533//* 534//* @par Description: 535//* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 536//* 537//* @param[in] pu1_src 538//* UWORD8 pointer to the source 539//* 540//* @param[out] pu1_dst 541//* UWORD8 pointer to the destination 542//* 543//* @param[in] src_strd 544//* integer source stride 545//* 546//* @param[in] dst_strd 547//* integer destination stride 548//* 549//* @param[in] ui_neighboravailability 550//* availability of neighbouring pixels 551//* 552//* @returns 553//* 554//* @remarks 555//* None 556//* 557//*******************************************************************************/ 558//void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src, 559// UWORD8 *pu1_dst, 560// WORD32 src_strd, 561// WORD32 dst_strd, 562// WORD32 ui_neighboravailability) 563 564//**************Variables Vs Registers***************************************** 565// x0 => *pu1_src 566// x1 => *pu1_dst 567// w2 => src_strd 568// w3 => dst_strd 569// w4 => ui_neighboravailability 570 571 572 .global ih264_intra_pred_luma_4x4_mode_vert_r_av8 573 574ih264_intra_pred_luma_4x4_mode_vert_r_av8: 575 576 push_v_regs 577 stp x19, x20, [sp, #-16]! 578 sxtw x3, w3 579 580 581 ld1 {v0.8b}, [x0] 582 add x0, x0, #1 583 ld1 {v1.8b}, [x0] 584 ext v2.8b, v1.8b , v1.8b , #1 585 uaddl v20.8h, v0.8b, v1.8b 586 uaddl v22.8h, v1.8b, v2.8b 587 add v24.8h, v20.8h , v22.8h 588 sqrshrun v4.8b, v20.8h, #1 589 sqrshrun v3.8b, v24.8h, #2 590 sub x5, x3, #2 591 ext v5.8b, v3.8b , v3.8b , #3 592 st1 {v4.s}[1], [x1], x3 593 st1 {v5.s}[0], [x1], x3 594 sub x8, x3, #3 595 st1 {v3.b}[2], [x1], #1 596 st1 {v4.h}[2], [x1], #2 597 st1 {v4.b}[6], [x1], x8 598 st1 {v3.b}[1], [x1], #1 599 st1 {v5.h}[0], [x1], #2 600 st1 {v5.b}[2], [x1] 601 602 603end_func_vert_r: 604 ldp x19, x20, [sp], #16 605 pop_v_regs 606 ret 607 608 609 610 611 612///** 613//******************************************************************************* 614//* 615//*ih264_intra_pred_luma_4x4_mode_horz_d 616//* 617//* @brief 618//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down 619//* 620//* @par Description: 621//* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 622//* 623//* @param[in] pu1_src 624//* UWORD8 pointer to the source 625//* 626//* @param[out] pu1_dst 627//* UWORD8 pointer to the destination 628//* 629//* @param[in] src_strd 630//* integer source stride 631//* 632//* @param[in] dst_strd 633//* integer destination stride 634//* 635//* @param[in] ui_neighboravailability 636//* availability of neighbouring pixels 637//* 638//* @returns 639//* 640//* @remarks 641//* None 642//* 643//*******************************************************************************/ 644//void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src, 645// UWORD8 *pu1_dst, 646// WORD32 src_strd, 647// WORD32 dst_strd, 648// WORD32 ui_neighboravailability) 649 650//**************Variables Vs Registers***************************************** 651// x0 => *pu1_src 652// x1 => *pu1_dst 653// w2 => src_strd 654// w3 => dst_strd 655// w4 => ui_neighboravailability 656 657 658 .global ih264_intra_pred_luma_4x4_mode_horz_d_av8 659 660ih264_intra_pred_luma_4x4_mode_horz_d_av8: 661 662 push_v_regs 663 stp x19, x20, [sp, #-16]! 664 sxtw x3, w3 665 666 ld1 {v0.8b}, [x0] 667 add x0, x0, #1 668 ld1 {v1.8b}, [x0] 669 ext v2.8b, v1.8b , v0.8b , #1 670 uaddl v20.8h, v0.8b, v1.8b 671 uaddl v22.8h, v1.8b, v2.8b 672 add v24.8h, v20.8h , v22.8h 673 sqrshrun v4.8b, v20.8h, #1 674 sqrshrun v5.8b, v24.8h, #2 675 sub x5, x3, #2 676 mov v6.8b, v5.8b 677 trn1 v10.8b, v4.8b, v5.8b 678 trn2 v5.8b, v4.8b, v5.8b // 679 mov v4.8b, v10.8b 680 st1 {v5.h}[1], [x1], #2 681 st1 {v6.h}[2], [x1], x5 682 st1 {v4.h}[1], [x1], #2 683 st1 {v5.h}[1], [x1], x5 684 st1 {v5.h}[0], [x1], #2 685 st1 {v4.h}[1], [x1], x5 686 st1 {v4.h}[0], [x1], #2 687 st1 {v5.h}[0], [x1], x5 688 689end_func_horz_d: 690 ldp x19, x20, [sp], #16 691 pop_v_regs 692 ret 693 694 695 696 697 698 699 700///** 701//******************************************************************************* 702//* 703//*ih264_intra_pred_luma_4x4_mode_vert_l 704//* 705//* @brief 706//* Perform Intra prediction for luma_4x4 mode:Vertical_Left 707//* 708//* @par Description: 709//* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 710//* 711//* @param[in] pu1_src 712//* UWORD8 pointer to the source 713//* 714//* @param[out] pu1_dst 715//* UWORD8 pointer to the destination 716//* 717//* @param[in] src_strd 718//* integer source stride 719//* 720//* @param[in] dst_strd 721//* integer destination stride 722//* 723//* @param[in] ui_neighboravailability 724//* availability of neighbouring pixels 725//* 726//* @returns 727//* 728//* @remarks 729//* None 730//* 731//*******************************************************************************/ 732//void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src, 733// UWORD8 *pu1_dst, 734// WORD32 src_strd, 735// WORD32 dst_strd, 736// WORD32 ui_neighboravailability) 737 738//**************Variables Vs Registers***************************************** 739// x0 => *pu1_src 740// x1 => *pu1_dst 741// w2 => src_strd 742// w3 => dst_strd 743// w4 => ui_neighboravailability 744 745 746 .global ih264_intra_pred_luma_4x4_mode_vert_l_av8 747 748ih264_intra_pred_luma_4x4_mode_vert_l_av8: 749 750 push_v_regs 751 stp x19, x20, [sp, #-16]! 752 sxtw x3, w3 753 add x0, x0, #4 754 ld1 {v0.8b}, [x0] 755 add x0, x0, #1 756 ld1 {v1.8b}, [x0] 757 ext v2.8b, v1.8b , v0.8b , #1 758 uaddl v20.8h, v0.8b, v1.8b 759 uaddl v22.8h, v1.8b, v2.8b 760 add v24.8h, v20.8h , v22.8h 761 sqrshrun v4.8b, v20.8h, #1 762 sqrshrun v5.8b, v24.8h, #2 763 ext v6.8b, v4.8b , v4.8b , #1 764 ext v7.8b, v5.8b , v5.8b , #1 765 st1 {v6.s}[0], [x1], x3 766 ext v8.8b, v4.8b , v4.8b , #2 767 ext v9.8b, v5.8b , v5.8b , #2 768 st1 {v7.s}[0], [x1], x3 769 st1 {v8.s}[0], [x1], x3 770 st1 {v9.s}[0], [x1], x3 771 772end_func_vert_l: 773 ldp x19, x20, [sp], #16 774 pop_v_regs 775 ret 776 777 778 779 780 781 782 783///** 784//******************************************************************************* 785//* 786//*ih264_intra_pred_luma_4x4_mode_horz_u 787//* 788//* @brief 789//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up 790//* 791//* @par Description: 792//* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 793//* 794//* @param[in] pu1_src 795//* UWORD8 pointer to the source 796//* 797//* @param[out] pu1_dst 798//* UWORD8 pointer to the destination 799//* 800//* @param[in] src_strd 801//* integer source stride 802//* 803//* @param[in] dst_strd 804//* integer destination stride 805//* 806//* @param[in] ui_neighboravailability 807//* availability of neighbouring pixels 808//* 809//* @returns 810//* 811//* @remarks 812//* None 813//* 814//*******************************************************************************/ 815//void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src, 816// UWORD8 *pu1_dst, 817// WORD32 src_strd, 818// WORD32 dst_strd, 819// WORD32 ui_neighboravailability) 820 821//**************Variables Vs Registers***************************************** 822// x0 => *pu1_src 823// x1 => *pu1_dst 824// w2 => src_strd 825// w3 => dst_strd 826// w4 => ui_neighboravailability 827 828 829 .global ih264_intra_pred_luma_4x4_mode_horz_u_av8 830 831ih264_intra_pred_luma_4x4_mode_horz_u_av8: 832 833 push_v_regs 834 sxtw x3, w3 835 stp x19, x20, [sp, #-16]! 836 mov x10, x0 837 ld1 {v0.8b}, [x0] 838 ldrb w9, [x0], #1 839 ext v1.8b, v0.8b , v0.8b , #1 840 ld1 {v0.b}[7], [x10] 841 ext v2.8b, v1.8b , v1.8b , #1 842 uaddl v20.8h, v0.8b, v1.8b 843 uaddl v22.8h, v1.8b, v2.8b 844 add v24.8h, v20.8h , v22.8h 845 sqrshrun v4.8b, v20.8h, #1 846 sqrshrun v5.8b, v24.8h, #2 847 mov v6.8b, v4.8b 848 ext v6.8b, v5.8b , v4.8b , #1 849 st1 {v4.b}[2], [x1], #1 850 st1 {v6.b}[0], [x1], #1 851 trn1 v10.8b, v6.8b, v5.8b 852 trn2 v5.8b, v6.8b, v5.8b // 853 mov v6.8b , v10.8b 854 sub x5, x3, #2 855 trn1 v10.8b, v4.8b, v6.8b 856 trn2 v6.8b, v4.8b, v6.8b // 857 mov v4.8b , v10.8b 858 dup v7.8b, w9 859 st1 {v6.h}[0], [x1], x5 860 st1 {v6.h}[0], [x1], #2 861 st1 {v5.h}[3], [x1], x5 862 st1 {v5.h}[3], [x1], #2 863 st1 {v7.h}[3], [x1], x5 864 st1 {v7.s}[0], [x1], x3 865 866end_func_horz_u: 867 ldp x19, x20, [sp], #16 868 pop_v_regs 869 ret 870 871 872 873