1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21@/* 22@//---------------------------------------------------------------------------- 23@// File Name : impeg2_inter_pred.s 24@// 25@// Description : This file has motion compensation related 26@// interpolation functions on Neon + CortexA-8 platform 27@// 28@// Reference Document : 29@// 30@// Revision History : 31@// Date Author Detail Description 32@// ------------ ---------------- ---------------------------------- 33@// 18 jun 2010 S Hamsalekha Created 34@// 35@//------------------------------------------------------------------------- 36@*/ 37 38@/* 39@// ---------------------------------------------------------------------------- 40@// Include Files 41@// ---------------------------------------------------------------------------- 42@*/ 43.text 44.p2align 2 45 46 47@/* 48@// ---------------------------------------------------------------------------- 49@// Struct/Union Types and Define 50@// ---------------------------------------------------------------------------- 51@*/ 52 53 54@/* 55@// ---------------------------------------------------------------------------- 56@// Static Global Data section variables 57@// ---------------------------------------------------------------------------- 58@*/ 59@// -------------------------- NONE -------------------------------------------- 60 61 62@/* 63@// ---------------------------------------------------------------------------- 64@// Static Prototype Functions 65@// ---------------------------------------------------------------------------- 66@*/ 67@// -------------------------- NONE -------------------------------------------- 68 69@/* 70@// ---------------------------------------------------------------------------- 71@// Exported functions 72@// ---------------------------------------------------------------------------- 73@*/ 74 75@//--------------------------------------------------------------------------- 76@// Function Name : impeg2_copy_mb_a9q() 77@// 78@// Detail Description : Copies one MB worth of data from src to the dst 79@// 80@// Inputs : r0 - pointer to src 81@// r1 - pointer to dst 82@// r2 - source width 83@// r3 - destination width 84@// Registers Used : r4, r5, d0, d1 85@// 86@// Stack Usage : 12 bytes 87@// 88@// Outputs : 89@// 90@// Return Data : None 91@// 92@// Programming Note : <program limitation> 93@//----------------------------------------------------------------------------- 94@*/ 95 96 97 98 .global impeg2_copy_mb_a9q 99 100 101impeg2_copy_mb_a9q: 102 103 stmfd r13!, {r4, r5, r14} 104 105 106 ldr r4, [r0] @src->y 107 ldr r5, [r1] @dst->y 108 @Read one row of data from the src 109 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 110 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 111 112 @//Repeat 15 times for y 113 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 114 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 115 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 116 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 117 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 118 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 119 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 120 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 121 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 122 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 123 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 124 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 125 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 126 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 127 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 128 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 129 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 130 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 131 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 132 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 133 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 134 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 135 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 136 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 137 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 138 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 139 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 140 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 141 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 142 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 143 144 mov r2, r2, lsr #1 @src_offset /= 2 145 mov r3, r3, lsr #1 @dst_offset /= 2 146 147 ldr r4, [r0, #4] @src->u 148 ldr r5, [r1, #4] @dst->u 149 @Read one row of data from the src 150 vld1.8 {d0}, [r4], r2 @Load and increment src 151 vst1.8 {d0}, [r5], r3 @Store and increment dst 152 153 @//Repeat 7 times for u 154 vld1.8 {d0}, [r4], r2 @Load and increment src 155 vst1.8 {d0}, [r5], r3 @Store and increment dst 156 vld1.8 {d0}, [r4], r2 @Load and increment src 157 vst1.8 {d0}, [r5], r3 @Store and increment dst 158 vld1.8 {d0}, [r4], r2 @Load and increment src 159 vst1.8 {d0}, [r5], r3 @Store and increment dst 160 vld1.8 {d0}, [r4], r2 @Load and increment src 161 vst1.8 {d0}, [r5], r3 @Store and increment dst 162 vld1.8 {d0}, [r4], r2 @Load and increment src 163 vst1.8 {d0}, [r5], r3 @Store and increment dst 164 vld1.8 {d0}, [r4], r2 @Load and increment src 165 vst1.8 {d0}, [r5], r3 @Store and increment dst 166 vld1.8 {d0}, [r4], r2 @Load and increment src 167 vst1.8 {d0}, [r5], r3 @Store and increment dst 168 169 ldr r4, [r0, #8] @src->v 170 ldr r5, [r1, #8] @dst->v 171 @Read one row of data from the src 172 vld1.8 {d0}, [r4], r2 @Load and increment src 173 vst1.8 {d0}, [r5], r3 @Store and increment dst 174 175 @//Repeat 7 times for v 176 vld1.8 {d0}, [r4], r2 @Load and increment src 177 vst1.8 {d0}, [r5], r3 @Store and increment dst 178 vld1.8 {d0}, [r4], r2 @Load and increment src 179 vst1.8 {d0}, [r5], r3 @Store and increment dst 180 vld1.8 {d0}, [r4], r2 @Load and increment src 181 vst1.8 {d0}, [r5], r3 @Store and increment dst 182 vld1.8 {d0}, [r4], r2 @Load and increment src 183 vst1.8 {d0}, [r5], r3 @Store and increment dst 184 vld1.8 {d0}, [r4], r2 @Load and increment src 185 vst1.8 {d0}, [r5], r3 @Store and increment dst 186 vld1.8 {d0}, [r4], r2 @Load and increment src 187 vst1.8 {d0}, [r5], r3 @Store and increment dst 188 vld1.8 {d0}, [r4], r2 @Load and increment src 189 vst1.8 {d0}, [r5], r3 @Store and increment dst 190 191 ldmfd r13!, {r4, r5, pc} 192 193 194 195 196@/* 197@//--------------------------------------------------------------------------- 198@// Function Name : impeg2_mc_fullx_halfy_8x8_a9q() 199@// 200@// Detail Description : This function pastes the reference block in the 201@// current frame buffer.This function is called for 202@// blocks that are not coded and have motion vectors 203@// with a half pel resolution. 204@// 205@// Inputs : r0 - out : Current Block Pointer 206@// r1 - ref : Refernce Block Pointer 207@// r2 - ref_wid : Refernce Block Width 208@// r3 - out_wid ; Current Block Width 209@// 210@// Registers Used : D0-D9 211@// 212@// Stack Usage : 4 bytes 213@// 214@// Outputs : The Motion Compensated Block 215@// 216@// Return Data : None 217@// 218@// Programming Note : <program limitation> 219@//----------------------------------------------------------------------------- 220@*/ 221 222 .global impeg2_mc_fullx_halfy_8x8_a9q 223 224impeg2_mc_fullx_halfy_8x8_a9q: 225 226 stmfd r13!, {r14} 227 add r14, r1, r2 228 mov r2, r2, lsl #1 229 230@/* Load 8 + 1 rows from reference block */ 231@/* Do the addition with out rounding off as rounding value is 1 */ 232 vld1.8 {d0}, [r1], r2 @// first row hence r1 = D0 233 vld1.8 {d2}, [r14], r2 @// second row hence r2 = D2 234 vld1.8 {d4}, [r1], r2 @// third row hence r3 = D4 235 vld1.8 {d6}, [r14], r2 @// fourth row hence r4 = D6 236 vld1.8 {d1}, [r1], r2 @// fifth row hence r5 = D1 237 vld1.8 {d3}, [r14], r2 @// sixth row hence r6 = D3 238 vrhadd.u8 d9, d1, d6 @// estimated row 4 = D9 239 vld1.8 {d5}, [r1], r2 @// seventh row hence r7 = D5 240 vrhadd.u8 q0, q0, q1 @// estimated row 1 = D0, row 5 = D1 241 vld1.8 {d7}, [r14], r2 @// eighth row hence r8 = D7 242 vrhadd.u8 q1, q1, q2 @// estimated row 2 = D2, row 6 = D3 243 vld1.8 {d8}, [r1], r2 @// ninth row hence r9 = D8 244 vrhadd.u8 q2, q2, q3 @// estimated row 3 = D4, row 7 = D5 245 246 add r14, r0, r3 247 mov r3, r3, lsl #1 248 249@/* Store the eight rows calculated above */ 250 vst1.8 {d2}, [r14], r3 @// second row hence D2 251 vrhadd.u8 d7, d7, d8 @// estimated row 8 = D7 252 vst1.8 {d0}, [r0], r3 @// first row hence D0 253 vst1.8 {d9}, [r14], r3 @// fourth row hence D9 254 vst1.8 {d4}, [r0], r3 @// third row hence D4 255 vst1.8 {d3}, [r14], r3 @// sixth row hence r6 = D3 256 vst1.8 {d1}, [r0], r3 @// fifth row hence r5 = D1 257 vst1.8 {d7}, [r14], r3 @// eighth row hence r8 = D7 258 vst1.8 {d5}, [r0], r3 @// seventh row hence r7 = D5 259 260 ldmfd sp!, {pc} 261 262 263 264 265 266 267@/* 268@//--------------------------------------------------------------------------- 269@// Function Name : impeg2_mc_halfx_fully_8x8_a9q() 270@// 271@// Detail Description : This function pastes the reference block in the 272@// current frame buffer.This function is called for 273@// blocks that are not coded and have motion vectors 274@// with a half pel resolutionand VopRoundingType is 0 .. 275@// 276@// Inputs : r0 - out : Current Block Pointer 277@// r1 - ref : Refernce Block Pointer 278@// r2 - ref_wid : Refernce Block Width 279@// r3 - out_wid ; Current Block Width 280@// 281@// Registers Used : r12, r14, d0-d10, d12-d14, d16-d18, d20-d22 282 283@// 284@// Stack Usage : 8 bytes 285@// 286@// Outputs : The Motion Compensated Block 287@// 288@// Return Data : None 289@// 290@// Programming Note : <program limitation> 291@//----------------------------------------------------------------------------- 292@*/ 293 294 295 296 .global impeg2_mc_halfx_fully_8x8_a9q 297 298 299 300impeg2_mc_halfx_fully_8x8_a9q: 301 302 stmfd sp!, {r12, lr} 303 304 add r14, r1, r2, lsl #2 305 306 add r12, r0, r3, lsl#2 307 308 vld1.8 {d0, d1}, [r1], r2 @load 16 pixels of row1 309 310 vld1.8 {d2, d3}, [r14], r2 @ row5 311 312 313 vld1.8 {d4, d5}, [r1], r2 @load 16 pixels row2 314 315 vld1.8 {d6, d7}, [r14], r2 @row6 316 317 318 vext.8 d8, d0, d1, #1 @Extract pixels (1-8) of row1 319 320 vext.8 d12, d2, d3, #1 @Extract pixels (1-8) of row5 321 322 vext.8 d16, d4, d5, #1 @Extract pixels (1-8) of row2 323 324 vext.8 d20, d6, d7, #1 @Extract pixels (1-8) of row6 325 326 327 vld1.8 {d9, d10}, [r1], r2 @load row3 328 329 vld1.8 {d13, d14}, [r14], r2 @load row7 330 331 vld1.8 {d17, d18}, [r1], r2 @load row4 332 333 vld1.8 {d21, d22}, [r14], r2 @load row8 334 335 336 vext.8 d1, d9, d10, #1 @Extract pixels (1-8) of row3 337 338 vext.8 d3, d13, d14, #1 @Extract pixels (1-8) of row7 339 340 341 342 vext.8 d5, d17, d18, #1 @Extract pixels (1-8) of row4 343 344 vext.8 d7, d21, d22, #1 @Extract pixels (1-8) of row8 345 346 347 vrhadd.u8 q0, q0, q4 @operate on row1 and row3 348 349 vrhadd.u8 q1, q1, q6 @operate on row5 and row7 350 351 352 vrhadd.u8 q2, q2, q8 @operate on row2 and row4 353 354 355 356 vrhadd.u8 q3, q3, q10 @operate on row6 and row8 357 358 vst1.8 d0, [r0], r3 @store row1 359 360 vst1.8 d2, [r12], r3 @store row5 361 362 vst1.8 d4, [r0], r3 @store row2 363 364 vst1.8 d6, [r12], r3 @store row6 365 366 vst1.8 d1, [r0], r3 @store row3 367 368 vst1.8 d3, [r12], r3 @store row7 369 370 vst1.8 d5, [r0], r3 @store row4 371 372 vst1.8 d7, [r12], r3 @store row8 373 374 375 376 ldmfd sp!, {r12, pc} 377 378 379 380 381 382 383 384 385@/* 386@//--------------------------------------------------------------------------- 387@// Function Name : impeg2_mc_halfx_halfy_8x8_a9q() 388@// 389@// Detail Description : This function pastes the reference block in the 390@// current frame buffer.This function is called for 391@// blocks that are not coded and have motion vectors 392@// with a half pel resolutionand VopRoundingType is 0 .. 393@// 394@// Inputs : r0 - out : Current Block Pointer 395@// r1 - ref : Refernce Block Pointer 396@// r2 - ref_wid : Refernce Block Width 397@// r3 - out_wid ; Current Block Width 398@// 399@// Registers Used : r14, q0-q15 400 401@// 402@// Stack Usage : 4 bytes 403@// 404@// Outputs : The Motion Compensated Block 405@// 406@// Return Data : None 407@// 408@// Programming Note : <program limitation> 409@//----------------------------------------------------------------------------- 410@*/ 411 412 413 .global impeg2_mc_halfx_halfy_8x8_a9q 414 415impeg2_mc_halfx_halfy_8x8_a9q: 416 417 stmfd sp!, {r14} 418 419 add r14, r1, r2, lsl #2 420 421 vld1.8 {d0, d1}, [r1], r2 @load 16 pixels of row1 422 423 vld1.8 {d2, d3}, [r14], r2 @ row5 424 425 vld1.8 {d4, d5}, [r1], r2 @load 16 pixels row2 426 427 vld1.8 {d6, d7}, [r14], r2 @row6 428 429 vext.8 d1, d0, d1, #1 @Extract pixels (1-8) of row1 430 431 432 433 vext.8 d3, d2, d3, #1 @Extract pixels (1-8) of row5 434 435 436 437 vext.8 d5, d4, d5, #1 @Extract pixels (1-8) of row2 438 439 vext.8 d7, d6, d7, #1 @Extract pixels (1-8) of row6 440 441 442 443 444 vld1.8 {d8, d9}, [r1], r2 @load row3 445 446 447 448 vld1.8 {d10, d11}, [r14], r2 @load row7 449 450 vld1.8 {d12, d13}, [r1], r2 @load row4 451 452 vld1.8 {d14, d15}, [r14], r2 @load row8 453 454 vext.8 d9, d8, d9, #1 @Extract pixels (1-8) of row3 455 456 vld1.8 {d16, d17}, [r14], r2 @load row9 457 458 459 460 461 462 vext.8 d11, d10, d11, #1 @Extract pixels (1-8) of row7 463 464 465 466 vext.8 d13, d12, d13, #1 @Extract pixels (1-8) of row4 467 468 469 470 vext.8 d15, d14, d15, #1 @Extract pixels (1-8) of row8 471 472 vext.8 d17, d16, d17, #1 @Extract pixels (1-8) of row9 473 474 475 @interpolation in x direction 476 477 vaddl.u8 q0, d0, d1 @operate row1 478 479 vaddl.u8 q1, d2, d3 @operate row5 480 481 vaddl.u8 q2, d4, d5 @operate row2 482 483 vaddl.u8 q3, d6, d7 @operate row6 484 485 vaddl.u8 q4, d8, d9 @operate row3 486 487 vaddl.u8 q5, d10, d11 @operate row7 488 489 vaddl.u8 q6, d12, d13 @operate row4 490 491 vaddl.u8 q7, d14, d15 @operate row8 492 493 vaddl.u8 q8, d16, d17 @operate row9 494 495 @interpolation in y direction 496 497 add r14, r0, r3, lsl #2 498 499 500 501 vadd.u16 q9, q0, q2 @operate row1 and row2 502 503 vadd.u16 q13, q1, q3 @operate row5 and row6 504 505 vadd.u16 q10, q2, q4 @operate row2 and row3 506 507 vadd.u16 q14, q3, q5 @operate row6 and row7 508 509 vrshrn.u16 d18, q9, #2 @row1 510 511 vrshrn.u16 d26, q13, #2 @row5 512 513 vrshrn.u16 d20, q10, #2 @row2 514 515 vrshrn.u16 d28, q14, #2 @row6 516 517 vadd.u16 q11, q4, q6 @operate row3 and row4 518 519 vst1.8 d18, [r0], r3 @store row1 520 521 vadd.u16 q15, q5, q7 @operate row7 and row8 522 523 vst1.8 d26, [r14], r3 @store row5 524 525 vadd.u16 q12, q6, q1 @operate row4 and row5 526 527 vst1.8 d20, [r0], r3 @store row2 528 529 vadd.u16 q7, q7, q8 @operate row8 and row9 530 531 vst1.8 d28, [r14], r3 @store row6 532 533 534 535 vrshrn.u16 d22, q11, #2 @row3 536 537 vrshrn.u16 d30, q15, #2 @row7 538 539 vrshrn.u16 d24, q12, #2 @row4 540 541 vrshrn.u16 d14, q7, #2 @row8 542 543 544 vst1.8 d22, [r0], r3 @store row3 545 vst1.8 d30, [r14], r3 @store row7 546 vst1.8 d24, [r0], r3 @store row4 547 vst1.8 d14, [r14], r3 @store row8 548 549 550 551 ldmfd sp!, {pc} 552 553 554 555 556 557@/* 558@//--------------------------------------------------------------------------- 559@// Function Name : impeg2_mc_fullx_fully_8x8_a9q() 560@// 561@// Detail Description : This function pastes the reference block in the 562@// current frame buffer.This function is called for 563@// blocks that are not coded and have motion vectors 564@// with a half pel resolutionand .. 565@// 566@// Inputs : r0 - out : Current Block Pointer 567@// r1 - ref : Refernce Block Pointer 568@// r2 - ref_wid : Refernce Block Width 569@// r3 - out_wid ; Current Block Width 570@// 571@// Registers Used : r12, r14, d0-d3 572 573@// 574@// Stack Usage : 8 bytes 575@// 576@// Outputs : The Motion Compensated Block 577@// 578@// Return Data : None 579@// 580@// Programming Note : <program limitation> 581@//----------------------------------------------------------------------------- 582@*/ 583 584 585 .global impeg2_mc_fullx_fully_8x8_a9q 586impeg2_mc_fullx_fully_8x8_a9q: 587 588 589 stmfd sp!, {r12, lr} 590 591 add r14, r1, r2, lsl #2 592 593 add r12, r0, r3, lsl #2 594 595 596 vld1.8 d0, [r1], r2 @load row1 597 598 vld1.8 d1, [r14], r2 @load row4 599 600 vld1.8 d2, [r1], r2 @load row2 601 602 vld1.8 d3, [r14], r2 @load row5 603 604 605 vst1.8 d0, [r0], r3 @store row1 606 607 vst1.8 d1, [r12], r3 @store row4 608 609 vst1.8 d2, [r0], r3 @store row2 610 611 vst1.8 d3, [r12], r3 @store row5 612 613 614 vld1.8 d0, [r1], r2 @load row3 615 616 vld1.8 d1, [r14], r2 @load row6 617 618 vld1.8 d2, [r1], r2 @load row4 619 620 vld1.8 d3, [r14], r2 @load row8 621 622 623 vst1.8 d0, [r0], r3 @store row3 624 625 vst1.8 d1, [r12], r3 @store row6 626 627 vst1.8 d2, [r0], r3 @store row4 628 629 vst1.8 d3, [r12], r3 @store row8 630 631 632 ldmfd sp!, {r12, pc} 633 634 635 636 637 638@/* 639@//--------------------------------------------------------------------------- 640@// Function Name : impeg2_interpolate_a9q() 641@// 642@// Detail Description : interpolates two buffers and adds pred 643@// 644@// Inputs : r0 - pointer to src1 645@// r1 - pointer to src2 646@// r2 - dest buf 647@// r3 - dst stride 648@// Registers Used : r4, r5, r7, r14, d0-d15 649@// 650@// Stack Usage : 20 bytes 651@// 652@// Outputs : The Motion Compensated Block 653@// 654@// Return Data : None 655@// 656@// Programming Note : <program limitation> 657@//----------------------------------------------------------------------------- 658@*/ 659 660 661 .global impeg2_interpolate_a9q 662 663 664impeg2_interpolate_a9q: 665 666 stmfd r13!, {r4, r5, r7, r12, r14} 667 668 ldr r4, [r0, #0] @ptr_y src1 669 670 ldr r5, [r1, #0] @ptr_y src2 671 672 ldr r7, [r2, #0] @ptr_y dst buf 673 674 mov r12, #4 @counter for number of blocks 675 676 677interp_lumablocks_stride: 678 679 vld1.8 {d0, d1}, [r4]! @row1 src1 680 681 vld1.8 {d2, d3}, [r4]! @row2 src1 682 683 vld1.8 {d4, d5}, [r4]! @row3 src1 684 685 vld1.8 {d6, d7}, [r4]! @row4 src1 686 687 688 vld1.8 {d8, d9}, [r5]! @row1 src2 689 690 vld1.8 {d10, d11}, [r5]! @row2 src2 691 692 vld1.8 {d12, d13}, [r5]! @row3 src2 693 694 vld1.8 {d14, d15}, [r5]! @row4 src2 695 696 697 698 699 vrhadd.u8 q0, q0, q4 @operate on row1 700 701 vrhadd.u8 q1, q1, q5 @operate on row2 702 703 vrhadd.u8 q2, q2, q6 @operate on row3 704 705 vrhadd.u8 q3, q3, q7 @operate on row4 706 707 708 709 vst1.8 {d0, d1}, [r7], r3 @row1 710 711 vst1.8 {d2, d3}, [r7], r3 @row2 712 713 vst1.8 {d4, d5}, [r7], r3 @row3 714 715 vst1.8 {d6, d7}, [r7], r3 @row4 716 717 subs r12, r12, #1 718 719 bne interp_lumablocks_stride 720 721 722 mov r3, r3, lsr #1 @stride >> 1 723 724 ldr r4, [r0, #4] @ptr_u src1 725 726 ldr r5, [r1, #4] @ptr_u src2 727 728 ldr r7 , [r2, #4] @ptr_u dst buf 729 730 mov r12, #2 @counter for number of blocks 731 732 733 734@chroma blocks 735 736interp_chromablocks_stride: 737 738 vld1.8 {d0, d1}, [r4]! @row1 & 2 src1 739 740 vld1.8 {d2, d3}, [r4]! @row3 & 4 src1 741 742 vld1.8 {d4, d5}, [r4]! @row5 & 6 src1 743 744 vld1.8 {d6, d7}, [r4]! @row7 & 8 src1 745 746 747 vld1.8 {d8, d9}, [r5]! @row1 & 2 src2 748 749 vld1.8 {d10, d11}, [r5]! @row3 & 4 src2 750 751 vld1.8 {d12, d13}, [r5]! @row5 & 6 src2 752 753 vld1.8 {d14, d15}, [r5]! @row7 & 8 src2 754 755 756 757 758 vrhadd.u8 q0, q0, q4 @operate on row1 & 2 759 760 vrhadd.u8 q1, q1, q5 @operate on row3 & 4 761 762 vrhadd.u8 q2, q2, q6 @operate on row5 & 6 763 764 vrhadd.u8 q3, q3, q7 @operate on row7 & 8 765 766 767 vst1.8 {d0}, [r7], r3 @row1 768 769 vst1.8 {d1}, [r7], r3 @row2 770 771 vst1.8 {d2}, [r7], r3 @row3 772 773 vst1.8 {d3}, [r7], r3 @row4 774 775 vst1.8 {d4}, [r7], r3 @row5 776 777 vst1.8 {d5}, [r7], r3 @row6 778 779 vst1.8 {d6}, [r7], r3 @row7 780 781 vst1.8 {d7}, [r7], r3 @row8 782 783 784 785 ldr r4, [r0, #8] @ptr_v src1 786 787 ldr r5, [r1, #8] @ptr_v src2 788 789 ldr r7, [r2, #8] @ptr_v dst buf 790 791 subs r12, r12, #1 792 793 bne interp_chromablocks_stride 794 795 796 ldmfd r13!, {r4, r5, r7, r12, pc} 797 798 799 800 801 802