1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 #include "oscl_base_macros.h" // for OSCL_UNUSED_ARG 19 #include "mp4enc_lib.h" 20 #include "mp4lib_int.h" 21 #include "dct_inline.h" 22 23 #define FDCT_SHIFT 10 24 25 #ifdef __cplusplus 26 extern "C" 27 { 28 #endif 29 30 /**************************************************************************/ 31 /* Function: BlockDCT_AANwSub 32 Date: 7/31/01 33 Input: 34 Output: out[64] ==> next block 35 Purpose: Do subtraction for zero MV first 36 Modified: 37 **************************************************************************/ 38 BlockDCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)39 Void BlockDCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) 40 { 41 Short *dst; 42 Int k0, k1, k2, k3, k4, k5, k6, k7; 43 Int round; 44 Int k12 = 0x022A02D4; 45 Int k14 = 0x0188053A; 46 Int abs_sum; 47 Int mask; 48 Int tmp, tmp2; 49 Int ColTh; 50 51 dst = out + 64 ; 52 ColTh = *dst; 53 out += 128; 54 round = 1 << (FDCT_SHIFT - 1); 55 56 do /* fdct_nextrow */ 57 { 58 /* assuming the block is word-aligned */ 59 mask = 0x1FE; 60 tmp = *((Int*) cur); /* contains 4 pixels */ 61 tmp2 = *((Int*) pred); /* prediction 4 pixels */ 62 k0 = tmp2 & 0xFF; 63 k1 = mask & (tmp << 1); 64 k0 = k1 - (k0 << 1); 65 k1 = (tmp2 >> 8) & 0xFF; 66 k2 = mask & (tmp >> 7); 67 k1 = k2 - (k1 << 1); 68 k2 = (tmp2 >> 16) & 0xFF; 69 k3 = mask & (tmp >> 15); 70 k2 = k3 - (k2 << 1); 71 k3 = (tmp2 >> 24) & 0xFF; 72 k4 = mask & (tmp >> 23); 73 k3 = k4 - (k3 << 1); 74 tmp = *((Int*)(cur + 4)); /* another 4 pixels */ 75 tmp2 = *((Int*)(pred + 4)); 76 k4 = tmp2 & 0xFF; 77 k5 = mask & (tmp << 1); 78 k4 = k5 - (k4 << 1); 79 k5 = (tmp2 >> 8) & 0xFF; 80 k6 = mask & (tmp >> 7); 81 k5 = k6 - (k5 << 1); 82 k6 = (tmp2 >> 16) & 0xFF; 83 k7 = mask & (tmp >> 15); 84 k6 = k7 - (k6 << 1); 85 k7 = (tmp2 >> 24) & 0xFF; 86 tmp = mask & (tmp >> 23); 87 k7 = tmp - (k7 << 1); 88 cur += width; 89 pred += 16; 90 91 /* fdct_1 */ 92 k0 = k0 + k7; 93 k7 = k0 - (k7 << 1); 94 k1 = k1 + k6; 95 k6 = k1 - (k6 << 1); 96 k2 = k2 + k5; 97 k5 = k2 - (k5 << 1); 98 k3 = k3 + k4; 99 k4 = k3 - (k4 << 1); 100 101 k0 = k0 + k3; 102 k3 = k0 - (k3 << 1); 103 k1 = k1 + k2; 104 k2 = k1 - (k2 << 1); 105 106 k0 = k0 + k1; 107 k1 = k0 - (k1 << 1); 108 /**********/ 109 dst[0] = k0; 110 dst[4] = k1; /* col. 4 */ 111 /* fdct_2 */ 112 k4 = k4 + k5; 113 k5 = k5 + k6; 114 k6 = k6 + k7; 115 k2 = k2 + k3; 116 /* MUL2C k2,k5,724,FDCT_SHIFT */ 117 /* k0, k1 become scratch */ 118 /* assume FAST MULTIPLY */ 119 k1 = mla724(k12, k5, round); 120 k0 = mla724(k12, k2, round); 121 122 k5 = k1 >> FDCT_SHIFT; 123 k2 = k0 >> FDCT_SHIFT; 124 /*****************/ 125 k2 = k2 + k3; 126 k3 = (k3 << 1) - k2; 127 /********/ 128 dst[2] = k2; /* col. 2 */ 129 k3 <<= 1; /* scale up col. 6 */ 130 dst[6] = k3; /* col. 6 */ 131 /* fdct_3 */ 132 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 133 /* assume FAST MULTIPLY */ 134 /* k0, k1 are output */ 135 k0 = k4 - k6; 136 137 k1 = mla392(k0, k14, round); 138 k0 = mla554(k4, k12, k1); 139 k1 = mla1338(k6, k14, k1); 140 141 k4 = k0 >> FDCT_SHIFT; 142 k6 = k1 >> FDCT_SHIFT; 143 /***********************/ 144 k5 = k5 + k7; 145 k7 = (k7 << 1) - k5; 146 k4 = k4 + k7; 147 k7 = (k7 << 1) - k4; 148 k5 = k5 + k6; 149 k4 <<= 1; /* scale up col.5 */ 150 k6 = k5 - (k6 << 1); 151 /********/ 152 dst[5] = k4; /* col. 5 */ 153 k6 <<= 2; /* scale up col. 7 */ 154 dst[1] = k5; /* col. 1 */ 155 dst[7] = k6; /* col. 7 */ 156 dst[3] = k7; /* col. 3 */ 157 dst += 8; 158 } 159 while (dst < out); 160 161 out -= 64; 162 dst = out + 8; 163 164 /* Vertical Block Loop */ 165 do /* Vertical 8xDCT loop */ 166 { 167 k0 = out[0]; 168 k1 = out[8]; 169 k2 = out[16]; 170 k3 = out[24]; 171 k4 = out[32]; 172 k5 = out[40]; 173 k6 = out[48]; 174 k7 = out[56]; 175 /* deadzone thresholding for column */ 176 177 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 178 179 if (abs_sum < ColTh) 180 { 181 out[0] = 0x7fff; 182 out++; 183 continue; 184 } 185 186 /* fdct_1 */ 187 k0 = k0 + k7; 188 k7 = k0 - (k7 << 1); 189 k1 = k1 + k6; 190 k6 = k1 - (k6 << 1); 191 k2 = k2 + k5; 192 k5 = k2 - (k5 << 1); 193 k3 = k3 + k4; 194 k4 = k3 - (k4 << 1); 195 196 k0 = k0 + k3; 197 k3 = k0 - (k3 << 1); 198 k1 = k1 + k2; 199 k2 = k1 - (k2 << 1); 200 201 k0 = k0 + k1; 202 k1 = k0 - (k1 << 1); 203 /**********/ 204 out[32] = k1; /* row 4 */ 205 out[0] = k0; /* row 0 */ 206 /* fdct_2 */ 207 k4 = k4 + k5; 208 k5 = k5 + k6; 209 k6 = k6 + k7; 210 k2 = k2 + k3; 211 /* MUL2C k2,k5,724,FDCT_SHIFT */ 212 /* k0, k1 become scratch */ 213 /* assume FAST MULTIPLY */ 214 k1 = mla724(k12, k5, round); 215 k0 = mla724(k12, k2, round); 216 217 k5 = k1 >> FDCT_SHIFT; 218 k2 = k0 >> FDCT_SHIFT; 219 /*****************/ 220 k2 = k2 + k3; 221 k3 = (k3 << 1) - k2; 222 k3 <<= 1; /* scale up col. 6 */ 223 /********/ 224 out[48] = k3; /* row 6 */ 225 out[16] = k2; /* row 2 */ 226 /* fdct_3 */ 227 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 228 /* assume FAST MULTIPLY */ 229 /* k0, k1 are output */ 230 k0 = k4 - k6; 231 232 k1 = mla392(k0, k14, round); 233 k0 = mla554(k4, k12, k1); 234 k1 = mla1338(k6, k14, k1); 235 236 k4 = k0 >> FDCT_SHIFT; 237 k6 = k1 >> FDCT_SHIFT; 238 /***********************/ 239 k5 = k5 + k7; 240 k7 = (k7 << 1) - k5; 241 k4 = k4 + k7; 242 k7 = (k7 << 1) - k4; 243 k5 = k5 + k6; 244 k4 <<= 1; /* scale up col. 5 */ 245 k6 = k5 - (k6 << 1); 246 /********/ 247 out[24] = k7 ; /* row 3 */ 248 k6 <<= 2; /* scale up col. 7 */ 249 out[56] = k6 ; /* row 7 */ 250 out[8] = k5 ; /* row 1 */ 251 out[40] = k4 ; /* row 5 */ 252 out++; 253 } 254 while ((UInt)out < (UInt)dst) ; 255 256 return ; 257 } 258 259 /**************************************************************************/ 260 /* Function: Block4x4DCT_AANwSub 261 Date: 7/31/01 262 Input: 263 Output: out[64] ==> next block 264 Purpose: Do subtraction for zero MV first before 4x4 DCT 265 Modified: 266 **************************************************************************/ 267 Block4x4DCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)268 Void Block4x4DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) 269 { 270 Short *dst; 271 register Int k0, k1, k2, k3, k4, k5, k6, k7; 272 Int round; 273 Int k12 = 0x022A02D4; 274 Int k14 = 0x0188053A; 275 Int mask; 276 Int tmp, tmp2; 277 Int abs_sum; 278 Int ColTh; 279 280 dst = out + 64 ; 281 ColTh = *dst; 282 out += 128; 283 round = 1 << (FDCT_SHIFT - 1); 284 285 do /* fdct_nextrow */ 286 { 287 /* assuming the block is word-aligned */ 288 mask = 0x1FE; 289 tmp = *((Int*) cur); /* contains 4 pixels */ 290 tmp2 = *((Int*) pred); /* prediction 4 pixels */ 291 k0 = tmp2 & 0xFF; 292 k1 = mask & (tmp << 1); 293 k0 = k1 - (k0 << 1); 294 k1 = (tmp2 >> 8) & 0xFF; 295 k2 = mask & (tmp >> 7); 296 k1 = k2 - (k1 << 1); 297 k2 = (tmp2 >> 16) & 0xFF; 298 k3 = mask & (tmp >> 15); 299 k2 = k3 - (k2 << 1); 300 k3 = (tmp2 >> 24) & 0xFF; 301 k4 = mask & (tmp >> 23); 302 k3 = k4 - (k3 << 1); 303 tmp = *((Int*)(cur + 4)); /* another 4 pixels */ 304 tmp2 = *((Int*)(pred + 4)); 305 k4 = tmp2 & 0xFF; 306 k5 = mask & (tmp << 1); 307 k4 = k5 - (k4 << 1); 308 k5 = (tmp2 >> 8) & 0xFF; 309 k6 = mask & (tmp >> 7); 310 k5 = k6 - (k5 << 1); 311 k6 = (tmp2 >> 16) & 0xFF; 312 k7 = mask & (tmp >> 15); 313 k6 = k7 - (k6 << 1); 314 k7 = (tmp2 >> 24) & 0xFF; 315 tmp = mask & (tmp >> 23); 316 k7 = tmp - (k7 << 1); 317 cur += width; 318 pred += 16; 319 320 /* fdct_1 */ 321 k0 = k0 + k7; 322 k7 = k0 - (k7 << 1); 323 k1 = k1 + k6; 324 k6 = k1 - (k6 << 1); 325 k2 = k2 + k5; 326 k5 = k2 - (k5 << 1); 327 k3 = k3 + k4; 328 k4 = k3 - (k4 << 1); 329 330 k0 = k0 + k3; 331 k3 = k0 - (k3 << 1); 332 k1 = k1 + k2; 333 k2 = k1 - (k2 << 1); 334 335 k0 = k0 + k1; 336 /**********/ 337 dst[0] = k0; 338 /* fdct_2 */ 339 k4 = k4 + k5; 340 k5 = k5 + k6; 341 k6 = k6 + k7; 342 k2 = k2 + k3; 343 /* MUL2C k2,k5,724,FDCT_SHIFT */ 344 /* k0, k1 become scratch */ 345 /* assume FAST MULTIPLY */ 346 k1 = mla724(k12, k5, round); 347 k0 = mla724(k12, k2, round); 348 349 k5 = k1 >> FDCT_SHIFT; 350 k2 = k0 >> FDCT_SHIFT; 351 /*****************/ 352 k2 = k2 + k3; 353 /********/ 354 dst[2] = k2; /* col. 2 */ 355 /* fdct_3 */ 356 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 357 /* assume FAST MULTIPLY */ 358 /* k0, k1 are output */ 359 k0 = k4 - k6; 360 361 k1 = mla392(k0, k14, round); 362 k0 = mla554(k4, k12, k1); 363 k1 = mla1338(k6, k14, k1); 364 365 k4 = k0 >> FDCT_SHIFT; 366 k6 = k1 >> FDCT_SHIFT; 367 /***********************/ 368 k5 = k5 + k7; 369 k7 = (k7 << 1) - k5; 370 k7 = k7 - k4; 371 k5 = k5 + k6; 372 /********/ 373 dst[1] = k5; /* col. 1 */ 374 dst[3] = k7; /* col. 3 */ 375 dst += 8; 376 } 377 while (dst < out); 378 379 out -= 64; 380 dst = out + 4; 381 382 /* Vertical Block Loop */ 383 do /* Vertical 8xDCT loop */ 384 { 385 k0 = out[0]; 386 k1 = out[8]; 387 k2 = out[16]; 388 k3 = out[24]; 389 k4 = out[32]; 390 k5 = out[40]; 391 k6 = out[48]; 392 k7 = out[56]; 393 394 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 395 396 if (abs_sum < ColTh) 397 { 398 out[0] = 0x7fff; 399 out++; 400 continue; 401 } 402 /* fdct_1 */ 403 k0 = k0 + k7; 404 k7 = k0 - (k7 << 1); 405 k1 = k1 + k6; 406 k6 = k1 - (k6 << 1); 407 k2 = k2 + k5; 408 k5 = k2 - (k5 << 1); 409 k3 = k3 + k4; 410 k4 = k3 - (k4 << 1); 411 412 k0 = k0 + k3; 413 k3 = k0 - (k3 << 1); 414 k1 = k1 + k2; 415 k2 = k1 - (k2 << 1); 416 417 k0 = k0 + k1; 418 /**********/ 419 out[0] = k0; /* row 0 */ 420 /* fdct_2 */ 421 k4 = k4 + k5; 422 k5 = k5 + k6; 423 k6 = k6 + k7; 424 k2 = k2 + k3; 425 /* MUL2C k2,k5,724,FDCT_SHIFT */ 426 /* k0, k1 become scratch */ 427 /* assume FAST MULTIPLY */ 428 k1 = mla724(k12, k5, round); 429 k0 = mla724(k12, k2, round); 430 431 k5 = k1 >> FDCT_SHIFT; 432 k2 = k0 >> FDCT_SHIFT; 433 /*****************/ 434 k2 = k2 + k3; 435 /********/ 436 out[16] = k2; /* row 2 */ 437 /* fdct_3 */ 438 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 439 /* assume FAST MULTIPLY */ 440 /* k0, k1 are output */ 441 k0 = k4 - k6; 442 443 k1 = mla392(k0, k14, round); 444 k0 = mla554(k4, k12, k1); 445 k1 = mla1338(k6, k14, k1); 446 447 k4 = k0 >> FDCT_SHIFT; 448 k6 = k1 >> FDCT_SHIFT; 449 /***********************/ 450 k5 = k5 + k7; 451 k7 = (k7 << 1) - k5; 452 k7 = k7 - k4 ; 453 k5 = k5 + k6; 454 /********/ 455 out[24] = k7 ; /* row 3 */ 456 out[8] = k5 ; /* row 1 */ 457 out++; 458 } 459 while ((UInt)out < (UInt)dst) ; 460 461 return ; 462 } 463 464 /**************************************************************************/ 465 /* Function: Block2x2DCT_AANwSub 466 Date: 7/31/01 467 Input: 468 Output: out[64] ==> next block 469 Purpose: Do subtraction for zero MV first before 2x2 DCT 470 Modified: 471 **************************************************************************/ 472 473 Block2x2DCT_AANwSub(Short * out,UChar * cur,UChar * pred,Int width)474 Void Block2x2DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) 475 { 476 Short *dst; 477 register Int k0, k1, k2, k3, k4, k5, k6, k7; 478 Int round; 479 Int k12 = 0x022A02D4; 480 Int k14 = 0x018803B2; 481 Int mask; 482 Int tmp, tmp2; 483 Int abs_sum; 484 Int ColTh; 485 486 dst = out + 64 ; 487 ColTh = *dst; 488 out += 128; 489 round = 1 << (FDCT_SHIFT - 1); 490 491 do /* fdct_nextrow */ 492 { 493 /* assuming the block is word-aligned */ 494 mask = 0x1FE; 495 tmp = *((Int*) cur); /* contains 4 pixels */ 496 tmp2 = *((Int*) pred); /* prediction 4 pixels */ 497 k0 = tmp2 & 0xFF; 498 k1 = mask & (tmp << 1); 499 k0 = k1 - (k0 << 1); 500 k1 = (tmp2 >> 8) & 0xFF; 501 k2 = mask & (tmp >> 7); 502 k1 = k2 - (k1 << 1); 503 k2 = (tmp2 >> 16) & 0xFF; 504 k3 = mask & (tmp >> 15); 505 k2 = k3 - (k2 << 1); 506 k3 = (tmp2 >> 24) & 0xFF; 507 k4 = mask & (tmp >> 23); 508 k3 = k4 - (k3 << 1); 509 tmp = *((Int*)(cur + 4)); /* another 4 pixels */ 510 tmp2 = *((Int*)(pred + 4)); 511 k4 = tmp2 & 0xFF; 512 k5 = mask & (tmp << 1); 513 k4 = k5 - (k4 << 1); 514 k5 = (tmp2 >> 8) & 0xFF; 515 k6 = mask & (tmp >> 7); 516 k5 = k6 - (k5 << 1); 517 k6 = (tmp2 >> 16) & 0xFF; 518 k7 = mask & (tmp >> 15); 519 k6 = k7 - (k6 << 1); 520 k7 = (tmp2 >> 24) & 0xFF; 521 tmp = mask & (tmp >> 23); 522 k7 = tmp - (k7 << 1); 523 cur += width; 524 pred += 16; 525 526 /* fdct_1 */ 527 k0 = k0 + k7; 528 k7 = k0 - (k7 << 1); 529 k1 = k1 + k6; 530 k6 = k1 - (k6 << 1); 531 k2 = k2 + k5; 532 k5 = k2 - (k5 << 1); 533 k3 = k3 + k4; 534 k4 = k3 - (k4 << 1); 535 536 k0 = k0 + k3; 537 k3 = k0 - (k3 << 1); 538 k1 = k1 + k2; 539 k2 = k1 - (k2 << 1); 540 541 k0 = k0 + k1; 542 /**********/ 543 dst[0] = k0; 544 /* fdct_2 */ 545 k4 = k4 + k5; 546 k5 = k5 + k6; 547 k6 = k6 + k7; 548 /* MUL2C k2,k5,724,FDCT_SHIFT */ 549 /* k0, k1 become scratch */ 550 /* assume FAST MULTIPLY */ 551 k1 = mla724(k12, k5, round); 552 553 k5 = k1 >> FDCT_SHIFT; 554 /*****************/ 555 /********/ 556 /* fdct_3 */ 557 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 558 /* assume FAST MULTIPLY */ 559 /* k0, k1 are output */ 560 k1 = mla392(k4, k14, round); 561 k1 = mla946(k6, k14, k1); 562 563 k6 = k1 >> FDCT_SHIFT; 564 /***********************/ 565 k5 = k5 + k7; 566 k5 = k5 + k6; 567 /********/ 568 dst[1] = k5; 569 dst += 8; 570 } 571 while (dst < out); 572 out -= 64; 573 dst = out + 2; 574 /* Vertical Block Loop */ 575 do /* Vertical 8xDCT loop */ 576 { 577 k0 = out[0]; 578 k1 = out[8]; 579 k2 = out[16]; 580 k3 = out[24]; 581 k4 = out[32]; 582 k5 = out[40]; 583 k6 = out[48]; 584 k7 = out[56]; 585 586 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 587 588 if (abs_sum < ColTh) 589 { 590 out[0] = 0x7fff; 591 out++; 592 continue; 593 } 594 /* fdct_1 */ 595 k0 = k0 + k7; 596 k7 = k0 - (k7 << 1); 597 k1 = k1 + k6; 598 k6 = k1 - (k6 << 1); 599 k2 = k2 + k5; 600 k5 = k2 - (k5 << 1); 601 k3 = k3 + k4; 602 k4 = k3 - (k4 << 1); 603 604 k0 = k0 + k3; 605 k3 = k0 - (k3 << 1); 606 k1 = k1 + k2; 607 k2 = k1 - (k2 << 1); 608 609 k0 = k0 + k1; 610 /**********/ 611 out[0] = k0; /* row 0 */ 612 /* fdct_2 */ 613 k4 = k4 + k5; 614 k5 = k5 + k6; 615 k6 = k6 + k7; 616 /* MUL2C k2,k5,724,FDCT_SHIFT */ 617 /* k0, k1 become scratch */ 618 /* assume FAST MULTIPLY */ 619 k1 = mla724(k12, k5, round); 620 621 k5 = k1 >> FDCT_SHIFT; 622 /*****************/ 623 /********/ 624 /* fdct_3 */ 625 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 626 /* assume FAST MULTIPLY */ 627 /* k0, k1 are output */ 628 k1 = mla392(k4, k14, round); 629 k1 = mla946(k6, k14, k1); 630 631 k6 = k1 >> FDCT_SHIFT; 632 /***********************/ 633 k5 = k5 + k7; 634 k5 = k5 + k6; 635 /********/ 636 out[8] = k5 ; /* row 1 */ 637 out++; 638 } 639 while ((UInt)out < (UInt)dst) ; 640 641 return ; 642 } 643 644 /**************************************************************************/ 645 /* Function: BlockDCT_AANIntra 646 Date: 8/9/01 647 Input: rec 648 Output: out[64] ==> next block 649 Purpose: Input directly from rec frame. 650 Modified: 651 **************************************************************************/ 652 BlockDCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)653 Void BlockDCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 654 { 655 Short *dst; 656 Int k0, k1, k2, k3, k4, k5, k6, k7; 657 Int round; 658 Int k12 = 0x022A02D4; 659 Int k14 = 0x0188053A; 660 Int abs_sum; 661 Int mask; 662 Int *curInt, tmp; 663 Int ColTh; 664 665 OSCL_UNUSED_ARG(dummy2); 666 667 dst = out + 64 ; 668 ColTh = *dst; 669 out += 128; 670 round = 1 << (FDCT_SHIFT - 1); 671 672 do /* fdct_nextrow */ 673 { 674 mask = 0x1FE; 675 curInt = (Int*) cur; 676 tmp = curInt[0]; /* contains 4 pixels */ 677 k0 = mask & (tmp << 1); 678 k1 = mask & (tmp >> 7); 679 k2 = mask & (tmp >> 15); 680 k3 = mask & (tmp >> 23); 681 tmp = curInt[1]; /* another 4 pixels */ 682 k4 = mask & (tmp << 1); 683 k5 = mask & (tmp >> 7); 684 k6 = mask & (tmp >> 15); 685 k7 = mask & (tmp >> 23); 686 cur += width; 687 /* fdct_1 */ 688 k0 = k0 + k7; 689 k7 = k0 - (k7 << 1); 690 k1 = k1 + k6; 691 k6 = k1 - (k6 << 1); 692 k2 = k2 + k5; 693 k5 = k2 - (k5 << 1); 694 k3 = k3 + k4; 695 k4 = k3 - (k4 << 1); 696 697 k0 = k0 + k3; 698 k3 = k0 - (k3 << 1); 699 k1 = k1 + k2; 700 k2 = k1 - (k2 << 1); 701 702 k0 = k0 + k1; 703 k1 = k0 - (k1 << 1); 704 /**********/ 705 dst[0] = k0; 706 dst[4] = k1; /* col. 4 */ 707 /* fdct_2 */ 708 k4 = k4 + k5; 709 k5 = k5 + k6; 710 k6 = k6 + k7; 711 k2 = k2 + k3; 712 /* MUL2C k2,k5,724,FDCT_SHIFT */ 713 /* k0, k1 become scratch */ 714 /* assume FAST MULTIPLY */ 715 k1 = mla724(k12, k5, round); 716 k0 = mla724(k12, k2, round); 717 718 k5 = k1 >> FDCT_SHIFT; 719 k2 = k0 >> FDCT_SHIFT; 720 /*****************/ 721 k2 = k2 + k3; 722 k3 = (k3 << 1) - k2; 723 /********/ 724 dst[2] = k2; /* col. 2 */ 725 k3 <<= 1; /* scale up col. 6 */ 726 dst[6] = k3; /* col. 6 */ 727 /* fdct_3 */ 728 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 729 /* assume FAST MULTIPLY */ 730 /* k0, k1 are output */ 731 k0 = k4 - k6; 732 733 k1 = mla392(k0, k14, round); 734 k0 = mla554(k4, k12, k1); 735 k1 = mla1338(k6, k14, k1); 736 737 k4 = k0 >> FDCT_SHIFT; 738 k6 = k1 >> FDCT_SHIFT; 739 /***********************/ 740 k5 = k5 + k7; 741 k7 = (k7 << 1) - k5; 742 k4 = k4 + k7; 743 k7 = (k7 << 1) - k4; 744 k5 = k5 + k6; 745 k4 <<= 1; /* scale up col.5 */ 746 k6 = k5 - (k6 << 1); 747 /********/ 748 dst[5] = k4; /* col. 5 */ 749 k6 <<= 2; /* scale up col. 7 */ 750 dst[1] = k5; /* col. 1 */ 751 dst[7] = k6; /* col. 7 */ 752 dst[3] = k7; /* col. 3 */ 753 dst += 8; 754 } 755 while (dst < out); 756 757 out -= 64; 758 dst = out + 8; 759 760 /* Vertical Block Loop */ 761 do /* Vertical 8xDCT loop */ 762 { 763 k0 = out[0]; 764 k1 = out[8]; 765 k2 = out[16]; 766 k3 = out[24]; 767 k4 = out[32]; 768 k5 = out[40]; 769 k6 = out[48]; 770 k7 = out[56]; 771 /* deadzone thresholding for column */ 772 773 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 774 775 if (abs_sum < ColTh) 776 { 777 out[0] = 0x7fff; 778 out++; 779 continue; 780 } 781 782 /* fdct_1 */ 783 k0 = k0 + k7; 784 k7 = k0 - (k7 << 1); 785 k1 = k1 + k6; 786 k6 = k1 - (k6 << 1); 787 k2 = k2 + k5; 788 k5 = k2 - (k5 << 1); 789 k3 = k3 + k4; 790 k4 = k3 - (k4 << 1); 791 792 k0 = k0 + k3; 793 k3 = k0 - (k3 << 1); 794 k1 = k1 + k2; 795 k2 = k1 - (k2 << 1); 796 797 k0 = k0 + k1; 798 k1 = k0 - (k1 << 1); 799 /**********/ 800 out[32] = k1; /* row 4 */ 801 out[0] = k0; /* row 0 */ 802 /* fdct_2 */ 803 k4 = k4 + k5; 804 k5 = k5 + k6; 805 k6 = k6 + k7; 806 k2 = k2 + k3; 807 /* MUL2C k2,k5,724,FDCT_SHIFT */ 808 /* k0, k1 become scratch */ 809 /* assume FAST MULTIPLY */ 810 k1 = mla724(k12, k5, round); 811 k0 = mla724(k12, k2, round); 812 813 k5 = k1 >> FDCT_SHIFT; 814 k2 = k0 >> FDCT_SHIFT; 815 /*****************/ 816 k2 = k2 + k3; 817 k3 = (k3 << 1) - k2; 818 k3 <<= 1; /* scale up col. 6 */ 819 /********/ 820 out[48] = k3; /* row 6 */ 821 out[16] = k2; /* row 2 */ 822 /* fdct_3 */ 823 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 824 /* assume FAST MULTIPLY */ 825 /* k0, k1 are output */ 826 k0 = k4 - k6; 827 828 k1 = mla392(k0, k14, round); 829 k0 = mla554(k4, k12, k1); 830 k1 = mla1338(k6, k14, k1); 831 832 k4 = k0 >> FDCT_SHIFT; 833 k6 = k1 >> FDCT_SHIFT; 834 /***********************/ 835 k5 = k5 + k7; 836 k7 = (k7 << 1) - k5; 837 k4 = k4 + k7; 838 k7 = (k7 << 1) - k4; 839 k5 = k5 + k6; 840 k4 <<= 1; /* scale up col. 5 */ 841 k6 = k5 - (k6 << 1); 842 /********/ 843 out[24] = k7 ; /* row 3 */ 844 k6 <<= 2; /* scale up col. 7 */ 845 out[56] = k6 ; /* row 7 */ 846 out[8] = k5 ; /* row 1 */ 847 out[40] = k4 ; /* row 5 */ 848 out++; 849 } 850 while ((UInt)out < (UInt)dst) ; 851 852 return ; 853 } 854 855 /**************************************************************************/ 856 /* Function: Block4x4DCT_AANIntra 857 Date: 8/9/01 858 Input: prev 859 Output: out[64] ==> next block 860 Purpose: Input directly from prev frame. output 2x2 DCT 861 Modified: 862 **************************************************************************/ 863 Block4x4DCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)864 Void Block4x4DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 865 { 866 Short *dst; 867 register Int k0, k1, k2, k3, k4, k5, k6, k7; 868 Int round; 869 Int k12 = 0x022A02D4; 870 Int k14 = 0x0188053A; 871 Int mask; 872 Int *curInt, tmp; 873 Int abs_sum; 874 Int ColTh; 875 876 OSCL_UNUSED_ARG(dummy2); 877 878 dst = out + 64 ; 879 ColTh = *dst; 880 out += 128; 881 round = 1 << (FDCT_SHIFT - 1); 882 883 do /* fdct_nextrow */ 884 { 885 mask = 0x1FE; 886 curInt = (Int*) cur; 887 tmp = curInt[0]; /* contains 4 pixels */ 888 k0 = mask & (tmp << 1); 889 k1 = mask & (tmp >> 7); 890 k2 = mask & (tmp >> 15); 891 k3 = mask & (tmp >> 23); 892 tmp = curInt[1]; /* another 4 pixels */ 893 k4 = mask & (tmp << 1); 894 k5 = mask & (tmp >> 7); 895 k6 = mask & (tmp >> 15); 896 k7 = mask & (tmp >> 23); 897 cur += width; 898 /* fdct_1 */ 899 k0 = k0 + k7; 900 k7 = k0 - (k7 << 1); 901 k1 = k1 + k6; 902 k6 = k1 - (k6 << 1); 903 k2 = k2 + k5; 904 k5 = k2 - (k5 << 1); 905 k3 = k3 + k4; 906 k4 = k3 - (k4 << 1); 907 908 k0 = k0 + k3; 909 k3 = k0 - (k3 << 1); 910 k1 = k1 + k2; 911 k2 = k1 - (k2 << 1); 912 913 k0 = k0 + k1; 914 /**********/ 915 dst[0] = k0; 916 /* fdct_2 */ 917 k4 = k4 + k5; 918 k5 = k5 + k6; 919 k6 = k6 + k7; 920 k2 = k2 + k3; 921 /* MUL2C k2,k5,724,FDCT_SHIFT */ 922 /* k0, k1 become scratch */ 923 /* assume FAST MULTIPLY */ 924 k1 = mla724(k12, k5, round); 925 k0 = mla724(k12, k2, round); 926 927 k5 = k1 >> FDCT_SHIFT; 928 k2 = k0 >> FDCT_SHIFT; 929 /*****************/ 930 k2 = k2 + k3; 931 /********/ 932 dst[2] = k2; /* col. 2 */ 933 /* fdct_3 */ 934 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 935 /* assume FAST MULTIPLY */ 936 /* k0, k1 are output */ 937 k0 = k4 - k6; 938 939 k1 = mla392(k0, k14, round); 940 k0 = mla554(k4, k12, k1); 941 k1 = mla1338(k6, k14, k1); 942 943 k4 = k0 >> FDCT_SHIFT; 944 k6 = k1 >> FDCT_SHIFT; 945 /***********************/ 946 k5 = k5 + k7; 947 k7 = (k7 << 1) - k5; 948 k7 = k7 - k4; 949 k5 = k5 + k6; 950 /********/ 951 dst[1] = k5; /* col. 1 */ 952 dst[3] = k7; /* col. 3 */ 953 dst += 8; 954 } 955 while (dst < out); 956 957 out -= 64; 958 dst = out + 4; 959 960 /* Vertical Block Loop */ 961 do /* Vertical 8xDCT loop */ 962 { 963 k0 = out[0]; 964 k1 = out[8]; 965 k2 = out[16]; 966 k3 = out[24]; 967 k4 = out[32]; 968 k5 = out[40]; 969 k6 = out[48]; 970 k7 = out[56]; 971 972 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 973 974 if (abs_sum < ColTh) 975 { 976 out[0] = 0x7fff; 977 out++; 978 continue; 979 } 980 /* fdct_1 */ 981 k0 = k0 + k7; 982 k7 = k0 - (k7 << 1); 983 k1 = k1 + k6; 984 k6 = k1 - (k6 << 1); 985 k2 = k2 + k5; 986 k5 = k2 - (k5 << 1); 987 k3 = k3 + k4; 988 k4 = k3 - (k4 << 1); 989 990 k0 = k0 + k3; 991 k3 = k0 - (k3 << 1); 992 k1 = k1 + k2; 993 k2 = k1 - (k2 << 1); 994 995 k0 = k0 + k1; 996 /**********/ 997 out[0] = k0; /* row 0 */ 998 /* fdct_2 */ 999 k4 = k4 + k5; 1000 k5 = k5 + k6; 1001 k6 = k6 + k7; 1002 k2 = k2 + k3; 1003 /* MUL2C k2,k5,724,FDCT_SHIFT */ 1004 /* k0, k1 become scratch */ 1005 /* assume FAST MULTIPLY */ 1006 k1 = mla724(k12, k5, round); 1007 k0 = mla724(k12, k2, round); 1008 1009 k5 = k1 >> FDCT_SHIFT; 1010 k2 = k0 >> FDCT_SHIFT; 1011 /*****************/ 1012 k2 = k2 + k3; 1013 /********/ 1014 out[16] = k2; /* row 2 */ 1015 /* fdct_3 */ 1016 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 1017 /* assume FAST MULTIPLY */ 1018 /* k0, k1 are output */ 1019 k0 = k4 - k6; 1020 1021 k1 = mla392(k0, k14, round); 1022 k0 = mla554(k4, k12, k1); 1023 k1 = mla1338(k6, k14, k1); 1024 1025 k4 = k0 >> FDCT_SHIFT; 1026 k6 = k1 >> FDCT_SHIFT; 1027 /***********************/ 1028 k5 = k5 + k7; 1029 k7 = (k7 << 1) - k5; 1030 k7 = k7 - k4 ; 1031 k5 = k5 + k6; 1032 /********/ 1033 out[24] = k7 ; /* row 3 */ 1034 out[8] = k5 ; /* row 1 */ 1035 out++; 1036 } 1037 while ((UInt)out < (UInt)dst) ; 1038 1039 return ; 1040 } 1041 1042 /**************************************************************************/ 1043 /* Function: Block2x2DCT_AANIntra 1044 Date: 8/9/01 1045 Input: prev 1046 Output: out[64] ==> next block 1047 Purpose: Input directly from prev frame. output 2x2 DCT 1048 Modified: 1049 **************************************************************************/ 1050 Block2x2DCT_AANIntra(Short * out,UChar * cur,UChar * dummy2,Int width)1051 Void Block2x2DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 1052 { 1053 Short *dst; 1054 register Int k0, k1, k2, k3, k4, k5, k6, k7; 1055 Int round; 1056 Int k12 = 0x022A02D4; 1057 Int k14 = 0x018803B2; 1058 Int mask; 1059 Int *curInt, tmp; 1060 Int abs_sum; 1061 Int ColTh; 1062 1063 OSCL_UNUSED_ARG(dummy2); 1064 1065 dst = out + 64 ; 1066 ColTh = *dst; 1067 out += 128; 1068 round = 1 << (FDCT_SHIFT - 1); 1069 1070 do /* fdct_nextrow */ 1071 { 1072 mask = 0x1FE; 1073 curInt = (Int*) cur; 1074 tmp = curInt[0]; /* contains 4 pixels */ 1075 k0 = mask & (tmp << 1); 1076 k1 = mask & (tmp >> 7); 1077 k2 = mask & (tmp >> 15); 1078 k3 = mask & (tmp >> 23); 1079 tmp = curInt[1]; /* another 4 pixels */ 1080 k4 = mask & (tmp << 1); 1081 k5 = mask & (tmp >> 7); 1082 k6 = mask & (tmp >> 15); 1083 k7 = mask & (tmp >> 23); 1084 cur += width; 1085 1086 /* fdct_1 */ 1087 k0 = k0 + k7; 1088 k7 = k0 - (k7 << 1); 1089 k1 = k1 + k6; 1090 k6 = k1 - (k6 << 1); 1091 k2 = k2 + k5; 1092 k5 = k2 - (k5 << 1); 1093 k3 = k3 + k4; 1094 k4 = k3 - (k4 << 1); 1095 1096 k0 = k0 + k3; 1097 k3 = k0 - (k3 << 1); 1098 k1 = k1 + k2; 1099 k2 = k1 - (k2 << 1); 1100 1101 k0 = k0 + k1; 1102 /**********/ 1103 dst[0] = k0; 1104 /* fdct_2 */ 1105 k4 = k4 + k5; 1106 k5 = k5 + k6; 1107 k6 = k6 + k7; 1108 /* MUL2C k2,k5,724,FDCT_SHIFT */ 1109 /* k0, k1 become scratch */ 1110 /* assume FAST MULTIPLY */ 1111 k1 = mla724(k12, k5, round); 1112 1113 k5 = k1 >> FDCT_SHIFT; 1114 /*****************/ 1115 /********/ 1116 /* fdct_3 */ 1117 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 1118 /* assume FAST MULTIPLY */ 1119 /* k0, k1 are output */ 1120 k1 = mla392(k4, k14, round); 1121 k1 = mla946(k6, k14, k1); 1122 1123 k6 = k1 >> FDCT_SHIFT; 1124 /***********************/ 1125 k5 = k5 + k7; 1126 k5 = k5 + k6; 1127 /********/ 1128 dst[1] = k5; 1129 dst += 8; 1130 } 1131 while (dst < out); 1132 out -= 64; 1133 dst = out + 2; 1134 /* Vertical Block Loop */ 1135 do /* Vertical 8xDCT loop */ 1136 { 1137 k0 = out[0]; 1138 k1 = out[8]; 1139 k2 = out[16]; 1140 k3 = out[24]; 1141 k4 = out[32]; 1142 k5 = out[40]; 1143 k6 = out[48]; 1144 k7 = out[56]; 1145 1146 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 1147 1148 if (abs_sum < ColTh) 1149 { 1150 out[0] = 0x7fff; 1151 out++; 1152 continue; 1153 } 1154 /* fdct_1 */ 1155 k0 = k0 + k7; 1156 k7 = k0 - (k7 << 1); 1157 k1 = k1 + k6; 1158 k6 = k1 - (k6 << 1); 1159 k2 = k2 + k5; 1160 k5 = k2 - (k5 << 1); 1161 k3 = k3 + k4; 1162 k4 = k3 - (k4 << 1); 1163 1164 k0 = k0 + k3; 1165 k3 = k0 - (k3 << 1); 1166 k1 = k1 + k2; 1167 k2 = k1 - (k2 << 1); 1168 1169 k0 = k0 + k1; 1170 /**********/ 1171 out[0] = k0; /* row 0 */ 1172 /* fdct_2 */ 1173 k4 = k4 + k5; 1174 k5 = k5 + k6; 1175 k6 = k6 + k7; 1176 /* MUL2C k2,k5,724,FDCT_SHIFT */ 1177 /* k0, k1 become scratch */ 1178 /* assume FAST MULTIPLY */ 1179 k1 = mla724(k12, k5, round); 1180 1181 k5 = k1 >> FDCT_SHIFT; 1182 /*****************/ 1183 /********/ 1184 /* fdct_3 */ 1185 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 1186 /* assume FAST MULTIPLY */ 1187 /* k0, k1 are output */ 1188 k1 = mla392(k4, k14, round); 1189 k1 = mla946(k6, k14, k1); 1190 1191 k6 = k1 >> FDCT_SHIFT; 1192 /***********************/ 1193 k5 = k5 + k7; 1194 k5 = k5 + k6; 1195 /********/ 1196 out[8] = k5 ; /* row 1 */ 1197 out++; 1198 } 1199 while ((UInt)out < (UInt)dst) ; 1200 1201 return ; 1202 } 1203 /**************************************************************************/ 1204 /* Function: Block1x1DCTwSub 1205 Date: 8/9/01 1206 Input: block 1207 Output: y 1208 Purpose: Compute DC value only 1209 Modified: 1210 **************************************************************************/ Block1x1DCTwSub(Short * out,UChar * cur,UChar * pred,Int width)1211 void Block1x1DCTwSub(Short *out, UChar *cur, UChar *pred, Int width) 1212 { 1213 UChar *end; 1214 Int temp = 0; 1215 Int offset2; 1216 1217 offset2 = width - 8; 1218 end = pred + (16 << 3); 1219 do 1220 { 1221 temp += (*cur++ - *pred++); 1222 temp += (*cur++ - *pred++); 1223 temp += (*cur++ - *pred++); 1224 temp += (*cur++ - *pred++); 1225 temp += (*cur++ - *pred++); 1226 temp += (*cur++ - *pred++); 1227 temp += (*cur++ - *pred++); 1228 temp += (*cur++ - *pred++); 1229 cur += offset2; 1230 pred += 8; 1231 } 1232 while (pred < end) ; 1233 1234 out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0; 1235 out[0] = temp >> 3; 1236 1237 return ; 1238 } 1239 1240 /**************************************************************************/ 1241 /* Function: Block1x1DCTIntra 1242 Date: 8/9/01 1243 Input: prev 1244 Output: out 1245 Purpose: Compute DC value only 1246 Modified: 1247 **************************************************************************/ Block1x1DCTIntra(Short * out,UChar * cur,UChar * dummy2,Int width)1248 void Block1x1DCTIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 1249 { 1250 UChar *end; 1251 Int temp = 0; 1252 ULong word; 1253 1254 OSCL_UNUSED_ARG(dummy2); 1255 1256 end = cur + (width << 3); 1257 do 1258 { 1259 word = *((ULong*)cur); 1260 temp += (word >> 24); 1261 temp += ((word >> 16) & 0xFF); 1262 temp += ((word >> 8) & 0xFF); 1263 temp += (word & 0xFF); 1264 1265 word = *((ULong*)(cur + 4)); 1266 temp += (word >> 24); 1267 temp += ((word >> 16) & 0xFF); 1268 temp += ((word >> 8) & 0xFF); 1269 temp += (word & 0xFF); 1270 1271 cur += width; 1272 } 1273 while (cur < end) ; 1274 1275 out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0; 1276 out[0] = temp >> 3; 1277 1278 return ; 1279 } 1280 1281 #ifdef __cplusplus 1282 } 1283 #endif 1284 1285