1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21@/* 22@//---------------------------------------------------------------------------- 23@// File Name : impeg2_inter_pred.s 24@// 25@// Description : This file has motion compensation related 26@// interpolation functions on Neon + CortexA-8 platform 27@// 28@// Reference Document : 29@// 30@// Revision History : 31@// Date Author Detail Description 32@// ------------ ---------------- ---------------------------------- 33@// 18 jun 2010 S Hamsalekha Created 34@// 35@//------------------------------------------------------------------------- 36@*/ 37 38@/* 39@// ---------------------------------------------------------------------------- 40@// Include Files 41@// ---------------------------------------------------------------------------- 42@*/ 43.text 44.p2align 2 45 46 47@/* 48@// ---------------------------------------------------------------------------- 49@// Struct/Union Types and Define 50@// ---------------------------------------------------------------------------- 51@*/ 52 53 54@/* 55@// ---------------------------------------------------------------------------- 56@// Static Global Data section variables 57@// ---------------------------------------------------------------------------- 58@*/ 59@// -------------------------- NONE -------------------------------------------- 60 61 62@/* 63@// ---------------------------------------------------------------------------- 64@// Static Prototype Functions 65@// ---------------------------------------------------------------------------- 66@*/ 67@// -------------------------- NONE -------------------------------------------- 68 69@/* 70@// ---------------------------------------------------------------------------- 71@// Exported functions 72@// ---------------------------------------------------------------------------- 73@*/ 74 75@//--------------------------------------------------------------------------- 76@// Function Name : impeg2_copy_mb_a9q() 77@// 78@// Detail Description : Copies one MB worth of data from src to the dst 79@// 80@// Inputs : r0 - pointer to src 81@// r1 - pointer to dst 82@// r2 - source width 83@// r3 - destination width 84@// Registers Used : r4, r5, d0, d1 85@// 86@// Stack Usage : 12 bytes 87@// 88@// Outputs : 89@// 90@// Return Data : None 91@// 92@// Programming Note : <program limitation> 93@//----------------------------------------------------------------------------- 94@*/ 95 96 97 98 .global impeg2_copy_mb_a9q 99 100 101impeg2_copy_mb_a9q: 102 103 stmfd sp!, {r4, r5, r14} 104 105 106 ldr r4, [r0] @src->y 107 ldr r5, [r1] @dst->y 108 @Read one row of data from the src 109 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 110 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 111 112 @//Repeat 15 times for y 113 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 114 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 115 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 116 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 117 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 118 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 119 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 120 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 121 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 122 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 123 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 124 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 125 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 126 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 127 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 128 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 129 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 130 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 131 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 132 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 133 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 134 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 135 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 136 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 137 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 138 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 139 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 140 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 141 vld1.8 {d0, d1}, [r4], r2 @Load and increment src 142 vst1.8 {d0, d1}, [r5], r3 @Store and increment dst 143 144 mov r2, r2, lsr #1 @src_offset /= 2 145 mov r3, r3, lsr #1 @dst_offset /= 2 146 147 ldr r4, [r0, #4] @src->u 148 ldr r5, [r1, #4] @dst->u 149 @Read one row of data from the src 150 vld1.8 {d0}, [r4], r2 @Load and increment src 151 vst1.8 {d0}, [r5], r3 @Store and increment dst 152 153 @//Repeat 7 times for u 154 vld1.8 {d0}, [r4], r2 @Load and increment src 155 vst1.8 {d0}, [r5], r3 @Store and increment dst 156 vld1.8 {d0}, [r4], r2 @Load and increment src 157 vst1.8 {d0}, [r5], r3 @Store and increment dst 158 vld1.8 {d0}, [r4], r2 @Load and increment src 159 vst1.8 {d0}, [r5], r3 @Store and increment dst 160 vld1.8 {d0}, [r4], r2 @Load and increment src 161 vst1.8 {d0}, [r5], r3 @Store and increment dst 162 vld1.8 {d0}, [r4], r2 @Load and increment src 163 vst1.8 {d0}, [r5], r3 @Store and increment dst 164 vld1.8 {d0}, [r4], r2 @Load and increment src 165 vst1.8 {d0}, [r5], r3 @Store and increment dst 166 vld1.8 {d0}, [r4], r2 @Load and increment src 167 vst1.8 {d0}, [r5], r3 @Store and increment dst 168 169 ldr r4, [r0, #8] @src->v 170 ldr r5, [r1, #8] @dst->v 171 @Read one row of data from the src 172 vld1.8 {d0}, [r4], r2 @Load and increment src 173 vst1.8 {d0}, [r5], r3 @Store and increment dst 174 175 @//Repeat 7 times for v 176 vld1.8 {d0}, [r4], r2 @Load and increment src 177 vst1.8 {d0}, [r5], r3 @Store and increment dst 178 vld1.8 {d0}, [r4], r2 @Load and increment src 179 vst1.8 {d0}, [r5], r3 @Store and increment dst 180 vld1.8 {d0}, [r4], r2 @Load and increment src 181 vst1.8 {d0}, [r5], r3 @Store and increment dst 182 vld1.8 {d0}, [r4], r2 @Load and increment src 183 vst1.8 {d0}, [r5], r3 @Store and increment dst 184 vld1.8 {d0}, [r4], r2 @Load and increment src 185 vst1.8 {d0}, [r5], r3 @Store and increment dst 186 vld1.8 {d0}, [r4], r2 @Load and increment src 187 vst1.8 {d0}, [r5], r3 @Store and increment dst 188 vld1.8 {d0}, [r4], r2 @Load and increment src 189 vst1.8 {d0}, [r5], r3 @Store and increment dst 190 191 ldmfd sp!, {r4, r5, pc} 192 193 194 195 196@/* 197@//--------------------------------------------------------------------------- 198@// Function Name : impeg2_mc_fullx_halfy_8x8_a9q() 199@// 200@// Detail Description : This function pastes the reference block in the 201@// current frame buffer.This function is called for 202@// blocks that are not coded and have motion vectors 203@// with a half pel resolution. 204@// 205@// Inputs : r0 - out : Current Block Pointer 206@// r1 - ref : Refernce Block Pointer 207@// r2 - ref_wid : Refernce Block Width 208@// r3 - out_wid ; Current Block Width 209@// 210@// Registers Used : D0-D9 211@// 212@// Stack Usage : 4 bytes 213@// 214@// Outputs : The Motion Compensated Block 215@// 216@// Return Data : None 217@// 218@// Programming Note : <program limitation> 219@//----------------------------------------------------------------------------- 220@*/ 221 222 .global impeg2_mc_fullx_halfy_8x8_a9q 223 224impeg2_mc_fullx_halfy_8x8_a9q: 225 226 stmfd sp!, {r14} 227 vpush {d8-d9} 228 add r14, r1, r2 229 mov r2, r2, lsl #1 230 231@/* Load 8 + 1 rows from reference block */ 232@/* Do the addition with out rounding off as rounding value is 1 */ 233 vld1.8 {d0}, [r1], r2 @// first row hence r1 = D0 234 vld1.8 {d2}, [r14], r2 @// second row hence r2 = D2 235 vld1.8 {d4}, [r1], r2 @// third row hence r3 = D4 236 vld1.8 {d6}, [r14], r2 @// fourth row hence r4 = D6 237 vld1.8 {d1}, [r1], r2 @// fifth row hence r5 = D1 238 vld1.8 {d3}, [r14], r2 @// sixth row hence r6 = D3 239 vrhadd.u8 d9, d1, d6 @// estimated row 4 = D9 240 vld1.8 {d5}, [r1], r2 @// seventh row hence r7 = D5 241 vrhadd.u8 q0, q0, q1 @// estimated row 1 = D0, row 5 = D1 242 vld1.8 {d7}, [r14], r2 @// eighth row hence r8 = D7 243 vrhadd.u8 q1, q1, q2 @// estimated row 2 = D2, row 6 = D3 244 vld1.8 {d8}, [r1], r2 @// ninth row hence r9 = D8 245 vrhadd.u8 q2, q2, q3 @// estimated row 3 = D4, row 7 = D5 246 247 add r14, r0, r3 248 mov r3, r3, lsl #1 249 250@/* Store the eight rows calculated above */ 251 vst1.8 {d2}, [r14], r3 @// second row hence D2 252 vrhadd.u8 d7, d7, d8 @// estimated row 8 = D7 253 vst1.8 {d0}, [r0], r3 @// first row hence D0 254 vst1.8 {d9}, [r14], r3 @// fourth row hence D9 255 vst1.8 {d4}, [r0], r3 @// third row hence D4 256 vst1.8 {d3}, [r14], r3 @// sixth row hence r6 = D3 257 vst1.8 {d1}, [r0], r3 @// fifth row hence r5 = D1 258 vst1.8 {d7}, [r14], r3 @// eighth row hence r8 = D7 259 vst1.8 {d5}, [r0], r3 @// seventh row hence r7 = D5 260 261 vpop {d8-d9} 262 ldmfd sp!, {pc} 263 264 265 266 267 268 269@/* 270@//--------------------------------------------------------------------------- 271@// Function Name : impeg2_mc_halfx_fully_8x8_a9q() 272@// 273@// Detail Description : This function pastes the reference block in the 274@// current frame buffer.This function is called for 275@// blocks that are not coded and have motion vectors 276@// with a half pel resolutionand VopRoundingType is 0 .. 277@// 278@// Inputs : r0 - out : Current Block Pointer 279@// r1 - ref : Refernce Block Pointer 280@// r2 - ref_wid : Refernce Block Width 281@// r3 - out_wid ; Current Block Width 282@// 283@// Registers Used : r12, r14, d0-d10, d12-d14, d16-d18, d20-d22 284 285@// 286@// Stack Usage : 8 bytes 287@// 288@// Outputs : The Motion Compensated Block 289@// 290@// Return Data : None 291@// 292@// Programming Note : <program limitation> 293@//----------------------------------------------------------------------------- 294@*/ 295 296 297 298 .global impeg2_mc_halfx_fully_8x8_a9q 299 300 301 302impeg2_mc_halfx_fully_8x8_a9q: 303 304 stmfd sp!, {r12, lr} 305 306 add r14, r1, r2, lsl #2 307 308 add r12, r0, r3, lsl#2 309 310 vld1.8 {d0, d1}, [r1], r2 @load 16 pixels of row1 311 312 vld1.8 {d2, d3}, [r14], r2 @ row5 313 314 315 vld1.8 {d4, d5}, [r1], r2 @load 16 pixels row2 316 317 vld1.8 {d6, d7}, [r14], r2 @row6 318 319 320 vext.8 d24, d0, d1, #1 @Extract pixels (1-8) of row1 321 322 vext.8 d28, d2, d3, #1 @Extract pixels (1-8) of row5 323 324 vext.8 d16, d4, d5, #1 @Extract pixels (1-8) of row2 325 326 vext.8 d20, d6, d7, #1 @Extract pixels (1-8) of row6 327 328 329 vld1.8 {d25, d26}, [r1], r2 @load row3 330 331 vld1.8 {d29, d30}, [r14], r2 @load row7 332 333 vld1.8 {d17, d18}, [r1], r2 @load row4 334 335 vld1.8 {d21, d22}, [r14], r2 @load row8 336 337 338 vext.8 d1, d25, d26, #1 @Extract pixels (1-8) of row3 339 340 vext.8 d3, d29, d30, #1 @Extract pixels (1-8) of row7 341 342 343 344 vext.8 d5, d17, d18, #1 @Extract pixels (1-8) of row4 345 346 vext.8 d7, d21, d22, #1 @Extract pixels (1-8) of row8 347 348 349 vrhadd.u8 q0, q0, q12 @operate on row1 and row3 350 351 vrhadd.u8 q1, q1, q14 @operate on row5 and row7 352 353 354 vrhadd.u8 q2, q2, q8 @operate on row2 and row4 355 356 357 358 vrhadd.u8 q3, q3, q10 @operate on row6 and row8 359 360 vst1.8 d0, [r0], r3 @store row1 361 362 vst1.8 d2, [r12], r3 @store row5 363 364 vst1.8 d4, [r0], r3 @store row2 365 366 vst1.8 d6, [r12], r3 @store row6 367 368 vst1.8 d1, [r0], r3 @store row3 369 370 vst1.8 d3, [r12], r3 @store row7 371 372 vst1.8 d5, [r0], r3 @store row4 373 374 vst1.8 d7, [r12], r3 @store row8 375 376 377 378 ldmfd sp!, {r12, pc} 379 380 381 382 383 384 385 386 387@/* 388@//--------------------------------------------------------------------------- 389@// Function Name : impeg2_mc_halfx_halfy_8x8_a9q() 390@// 391@// Detail Description : This function pastes the reference block in the 392@// current frame buffer.This function is called for 393@// blocks that are not coded and have motion vectors 394@// with a half pel resolutionand VopRoundingType is 0 .. 395@// 396@// Inputs : r0 - out : Current Block Pointer 397@// r1 - ref : Refernce Block Pointer 398@// r2 - ref_wid : Refernce Block Width 399@// r3 - out_wid ; Current Block Width 400@// 401@// Registers Used : r14, q0-q15 402 403@// 404@// Stack Usage : 4 bytes 405@// 406@// Outputs : The Motion Compensated Block 407@// 408@// Return Data : None 409@// 410@// Programming Note : <program limitation> 411@//----------------------------------------------------------------------------- 412@*/ 413 414 415 .global impeg2_mc_halfx_halfy_8x8_a9q 416 417impeg2_mc_halfx_halfy_8x8_a9q: 418 419 stmfd sp!, {r14} 420 vpush {d8-d15} 421 422 add r14, r1, r2, lsl #2 423 424 vld1.8 {d0, d1}, [r1], r2 @load 16 pixels of row1 425 426 vld1.8 {d2, d3}, [r14], r2 @ row5 427 428 vld1.8 {d4, d5}, [r1], r2 @load 16 pixels row2 429 430 vld1.8 {d6, d7}, [r14], r2 @row6 431 432 vext.8 d1, d0, d1, #1 @Extract pixels (1-8) of row1 433 434 435 436 vext.8 d3, d2, d3, #1 @Extract pixels (1-8) of row5 437 438 439 440 vext.8 d5, d4, d5, #1 @Extract pixels (1-8) of row2 441 442 vext.8 d7, d6, d7, #1 @Extract pixels (1-8) of row6 443 444 445 446 447 vld1.8 {d8, d9}, [r1], r2 @load row3 448 449 450 451 vld1.8 {d10, d11}, [r14], r2 @load row7 452 453 vld1.8 {d12, d13}, [r1], r2 @load row4 454 455 vld1.8 {d14, d15}, [r14], r2 @load row8 456 457 vext.8 d9, d8, d9, #1 @Extract pixels (1-8) of row3 458 459 vld1.8 {d16, d17}, [r14], r2 @load row9 460 461 462 463 464 465 vext.8 d11, d10, d11, #1 @Extract pixels (1-8) of row7 466 467 468 469 vext.8 d13, d12, d13, #1 @Extract pixels (1-8) of row4 470 471 472 473 vext.8 d15, d14, d15, #1 @Extract pixels (1-8) of row8 474 475 vext.8 d17, d16, d17, #1 @Extract pixels (1-8) of row9 476 477 478 @interpolation in x direction 479 480 vaddl.u8 q0, d0, d1 @operate row1 481 482 vaddl.u8 q1, d2, d3 @operate row5 483 484 vaddl.u8 q2, d4, d5 @operate row2 485 486 vaddl.u8 q3, d6, d7 @operate row6 487 488 vaddl.u8 q4, d8, d9 @operate row3 489 490 vaddl.u8 q5, d10, d11 @operate row7 491 492 vaddl.u8 q6, d12, d13 @operate row4 493 494 vaddl.u8 q7, d14, d15 @operate row8 495 496 vaddl.u8 q8, d16, d17 @operate row9 497 498 @interpolation in y direction 499 500 add r14, r0, r3, lsl #2 501 502 503 504 vadd.u16 q9, q0, q2 @operate row1 and row2 505 506 vadd.u16 q13, q1, q3 @operate row5 and row6 507 508 vadd.u16 q10, q2, q4 @operate row2 and row3 509 510 vadd.u16 q14, q3, q5 @operate row6 and row7 511 512 vrshrn.u16 d18, q9, #2 @row1 513 514 vrshrn.u16 d26, q13, #2 @row5 515 516 vrshrn.u16 d20, q10, #2 @row2 517 518 vrshrn.u16 d28, q14, #2 @row6 519 520 vadd.u16 q11, q4, q6 @operate row3 and row4 521 522 vst1.8 d18, [r0], r3 @store row1 523 524 vadd.u16 q15, q5, q7 @operate row7 and row8 525 526 vst1.8 d26, [r14], r3 @store row5 527 528 vadd.u16 q12, q6, q1 @operate row4 and row5 529 530 vst1.8 d20, [r0], r3 @store row2 531 532 vadd.u16 q7, q7, q8 @operate row8 and row9 533 534 vst1.8 d28, [r14], r3 @store row6 535 536 537 538 vrshrn.u16 d22, q11, #2 @row3 539 540 vrshrn.u16 d30, q15, #2 @row7 541 542 vrshrn.u16 d24, q12, #2 @row4 543 544 vrshrn.u16 d14, q7, #2 @row8 545 546 547 vst1.8 d22, [r0], r3 @store row3 548 vst1.8 d30, [r14], r3 @store row7 549 vst1.8 d24, [r0], r3 @store row4 550 vst1.8 d14, [r14], r3 @store row8 551 552 553 554 vpop {d8-d15} 555 ldmfd sp!, {pc} 556 557 558 559 560 561@/* 562@//--------------------------------------------------------------------------- 563@// Function Name : impeg2_mc_fullx_fully_8x8_a9q() 564@// 565@// Detail Description : This function pastes the reference block in the 566@// current frame buffer.This function is called for 567@// blocks that are not coded and have motion vectors 568@// with a half pel resolutionand .. 569@// 570@// Inputs : r0 - out : Current Block Pointer 571@// r1 - ref : Refernce Block Pointer 572@// r2 - ref_wid : Refernce Block Width 573@// r3 - out_wid ; Current Block Width 574@// 575@// Registers Used : r12, r14, d0-d3 576 577@// 578@// Stack Usage : 8 bytes 579@// 580@// Outputs : The Motion Compensated Block 581@// 582@// Return Data : None 583@// 584@// Programming Note : <program limitation> 585@//----------------------------------------------------------------------------- 586@*/ 587 588 589 .global impeg2_mc_fullx_fully_8x8_a9q 590impeg2_mc_fullx_fully_8x8_a9q: 591 592 593 stmfd sp!, {r12, lr} 594 595 add r14, r1, r2, lsl #2 596 597 add r12, r0, r3, lsl #2 598 599 600 vld1.8 d0, [r1], r2 @load row1 601 602 vld1.8 d1, [r14], r2 @load row4 603 604 vld1.8 d2, [r1], r2 @load row2 605 606 vld1.8 d3, [r14], r2 @load row5 607 608 609 vst1.8 d0, [r0], r3 @store row1 610 611 vst1.8 d1, [r12], r3 @store row4 612 613 vst1.8 d2, [r0], r3 @store row2 614 615 vst1.8 d3, [r12], r3 @store row5 616 617 618 vld1.8 d0, [r1], r2 @load row3 619 620 vld1.8 d1, [r14], r2 @load row6 621 622 vld1.8 d2, [r1], r2 @load row4 623 624 vld1.8 d3, [r14], r2 @load row8 625 626 627 vst1.8 d0, [r0], r3 @store row3 628 629 vst1.8 d1, [r12], r3 @store row6 630 631 vst1.8 d2, [r0], r3 @store row4 632 633 vst1.8 d3, [r12], r3 @store row8 634 635 636 ldmfd sp!, {r12, pc} 637 638 639 640 641 642@/* 643@//--------------------------------------------------------------------------- 644@// Function Name : impeg2_interpolate_a9q() 645@// 646@// Detail Description : interpolates two buffers and adds pred 647@// 648@// Inputs : r0 - pointer to src1 649@// r1 - pointer to src2 650@// r2 - dest buf 651@// r3 - dst stride 652@// Registers Used : r4, r5, r7, r14, d0-d15 653@// 654@// Stack Usage : 20 bytes 655@// 656@// Outputs : The Motion Compensated Block 657@// 658@// Return Data : None 659@// 660@// Programming Note : <program limitation> 661@//----------------------------------------------------------------------------- 662@*/ 663 664 665 .global impeg2_interpolate_a9q 666 667 668impeg2_interpolate_a9q: 669 670 stmfd sp!, {r4, r5, r7, r12, r14} 671 vpush {d8-d15} 672 673 ldr r4, [r0, #0] @ptr_y src1 674 675 ldr r5, [r1, #0] @ptr_y src2 676 677 ldr r7, [r2, #0] @ptr_y dst buf 678 679 mov r12, #4 @counter for number of blocks 680 681 682interp_lumablocks_stride: 683 684 vld1.8 {d0, d1}, [r4]! @row1 src1 685 686 vld1.8 {d2, d3}, [r4]! @row2 src1 687 688 vld1.8 {d4, d5}, [r4]! @row3 src1 689 690 vld1.8 {d6, d7}, [r4]! @row4 src1 691 692 693 vld1.8 {d8, d9}, [r5]! @row1 src2 694 695 vld1.8 {d10, d11}, [r5]! @row2 src2 696 697 vld1.8 {d12, d13}, [r5]! @row3 src2 698 699 vld1.8 {d14, d15}, [r5]! @row4 src2 700 701 702 703 704 vrhadd.u8 q0, q0, q4 @operate on row1 705 706 vrhadd.u8 q1, q1, q5 @operate on row2 707 708 vrhadd.u8 q2, q2, q6 @operate on row3 709 710 vrhadd.u8 q3, q3, q7 @operate on row4 711 712 713 714 vst1.8 {d0, d1}, [r7], r3 @row1 715 716 vst1.8 {d2, d3}, [r7], r3 @row2 717 718 vst1.8 {d4, d5}, [r7], r3 @row3 719 720 vst1.8 {d6, d7}, [r7], r3 @row4 721 722 subs r12, r12, #1 723 724 bne interp_lumablocks_stride 725 726 727 mov r3, r3, lsr #1 @stride >> 1 728 729 ldr r4, [r0, #4] @ptr_u src1 730 731 ldr r5, [r1, #4] @ptr_u src2 732 733 ldr r7 , [r2, #4] @ptr_u dst buf 734 735 mov r12, #2 @counter for number of blocks 736 737 738 739@chroma blocks 740 741interp_chromablocks_stride: 742 743 vld1.8 {d0, d1}, [r4]! @row1 & 2 src1 744 745 vld1.8 {d2, d3}, [r4]! @row3 & 4 src1 746 747 vld1.8 {d4, d5}, [r4]! @row5 & 6 src1 748 749 vld1.8 {d6, d7}, [r4]! @row7 & 8 src1 750 751 752 vld1.8 {d8, d9}, [r5]! @row1 & 2 src2 753 754 vld1.8 {d10, d11}, [r5]! @row3 & 4 src2 755 756 vld1.8 {d12, d13}, [r5]! @row5 & 6 src2 757 758 vld1.8 {d14, d15}, [r5]! @row7 & 8 src2 759 760 761 762 763 vrhadd.u8 q0, q0, q4 @operate on row1 & 2 764 765 vrhadd.u8 q1, q1, q5 @operate on row3 & 4 766 767 vrhadd.u8 q2, q2, q6 @operate on row5 & 6 768 769 vrhadd.u8 q3, q3, q7 @operate on row7 & 8 770 771 772 vst1.8 {d0}, [r7], r3 @row1 773 774 vst1.8 {d1}, [r7], r3 @row2 775 776 vst1.8 {d2}, [r7], r3 @row3 777 778 vst1.8 {d3}, [r7], r3 @row4 779 780 vst1.8 {d4}, [r7], r3 @row5 781 782 vst1.8 {d5}, [r7], r3 @row6 783 784 vst1.8 {d6}, [r7], r3 @row7 785 786 vst1.8 {d7}, [r7], r3 @row8 787 788 789 790 ldr r4, [r0, #8] @ptr_v src1 791 792 ldr r5, [r1, #8] @ptr_v src2 793 794 ldr r7, [r2, #8] @ptr_v dst buf 795 796 subs r12, r12, #1 797 798 bne interp_chromablocks_stride 799 800 801 vpop {d8-d15} 802 ldmfd sp!, {r4, r5, r7, r12, pc} 803 804 805 806 807 808