1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function 2; 2018-02-06: Igor Pavlov : Public domain 3; 4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() 5; function for check at link time. 6; That code is tightly coupled with LzmaDec_TryDummy() 7; and with another functions in LzmaDec.c file. 8; CLzmaDec structure, (probs) array layout, input and output of 9; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM). 10 11ifndef x64 12; x64=1 13; .err <x64_IS_REQUIRED> 14endif 15 16include 7zAsm.asm 17 18MY_ASM_START 19 20_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' 21 22MY_ALIGN macro num:req 23 align num 24endm 25 26MY_ALIGN_16 macro 27 MY_ALIGN 16 28endm 29 30MY_ALIGN_32 macro 31 MY_ALIGN 32 32endm 33 34MY_ALIGN_64 macro 35 MY_ALIGN 64 36endm 37 38 39; _LZMA_SIZE_OPT equ 1 40 41; _LZMA_PROB32 equ 1 42 43ifdef _LZMA_PROB32 44 PSHIFT equ 2 45 PLOAD macro dest, mem 46 mov dest, dword ptr [mem] 47 endm 48 PSTORE macro src, mem 49 mov dword ptr [mem], src 50 endm 51else 52 PSHIFT equ 1 53 PLOAD macro dest, mem 54 movzx dest, word ptr [mem] 55 endm 56 PSTORE macro src, mem 57 mov word ptr [mem], @CatStr(src, _W) 58 endm 59endif 60 61PMULT equ (1 SHL PSHIFT) 62PMULT_HALF equ (1 SHL (PSHIFT - 1)) 63PMULT_2 equ (1 SHL (PSHIFT + 1)) 64 65 66; x0 range 67; x1 pbPos / (prob) TREE 68; x2 probBranch / prm (MATCHED) / pbPos / cnt 69; x3 sym 70;====== r4 === RSP 71; x5 cod 72; x6 t1 NORM_CALC / probs_state / dist 73; x7 t0 NORM_CALC / prob2 IF_BIT_1 74; x8 state 75; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg 76; x10 kBitModelTotal_reg 77; r11 probs 78; x12 offs (MATCHED) / dic / len_temp 79; x13 processedPos 80; x14 bit (MATCHED) / dicPos 81; r15 buf 82 83 84cod equ x5 85cod_L equ x5_L 86range equ x0 87state equ x8 88state_R equ r8 89buf equ r15 90processedPos equ x13 91kBitModelTotal_reg equ x10 92 93probBranch equ x2 94probBranch_R equ r2 95probBranch_W equ x2_W 96 97pbPos equ x1 98pbPos_R equ r1 99 100cnt equ x2 101cnt_R equ r2 102 103lpMask_reg equ x9 104dicPos equ r14 105 106sym equ x3 107sym_R equ r3 108sym_L equ x3_L 109 110probs equ r11 111dic equ r12 112 113t0 equ x7 114t0_W equ x7_W 115t0_R equ r7 116 117prob2 equ t0 118prob2_W equ t0_W 119 120t1 equ x6 121t1_R equ r6 122 123probs_state equ t1 124probs_state_R equ t1_R 125 126prm equ r2 127match equ x9 128match_R equ r9 129offs equ x12 130offs_R equ r12 131bit equ x14 132bit_R equ r14 133 134sym2 equ x9 135sym2_R equ r9 136 137len_temp equ x12 138 139dist equ sym 140dist2 equ x9 141 142 143 144kNumBitModelTotalBits equ 11 145kBitModelTotal equ (1 SHL kNumBitModelTotalBits) 146kNumMoveBits equ 5 147kBitModelOffset equ ((1 SHL kNumMoveBits) - 1) 148kTopValue equ (1 SHL 24) 149 150NORM_2 macro 151 ; movzx t0, BYTE PTR [buf] 152 shl cod, 8 153 mov cod_L, BYTE PTR [buf] 154 shl range, 8 155 ; or cod, t0 156 inc buf 157endm 158 159 160NORM macro 161 cmp range, kTopValue 162 jae SHORT @F 163 NORM_2 164@@: 165endm 166 167 168; ---------- Branch MACROS ---------- 169 170UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req 171 mov prob2, kBitModelTotal_reg 172 sub prob2, probBranch 173 shr prob2, kNumMoveBits 174 add probBranch, prob2 175 PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT 176endm 177 178 179UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req 180 sub prob2, range 181 sub cod, range 182 mov range, prob2 183 mov prob2, probBranch 184 shr probBranch, kNumMoveBits 185 sub prob2, probBranch 186 PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT 187endm 188 189 190CMP_COD macro probsArray:req, probOffset:req, probDisp:req 191 PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT 192 NORM 193 mov prob2, range 194 shr range, kNumBitModelTotalBits 195 imul range, probBranch 196 cmp cod, range 197endm 198 199 200IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req 201 CMP_COD probsArray, probOffset, probDisp 202 jae toLabel 203endm 204 205 206IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req 207 IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel 208 UPDATE_0 probsArray, probOffset, probDisp 209endm 210 211 212IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req 213 CMP_COD probsArray, probOffset, probDisp 214 jb toLabel 215endm 216 217 218; ---------- CMOV MACROS ---------- 219 220NORM_CALC macro prob:req 221 NORM 222 mov t0, range 223 shr range, kNumBitModelTotalBits 224 imul range, prob 225 sub t0, range 226 mov t1, cod 227 sub cod, range 228endm 229 230 231PUP macro prob:req, probPtr:req 232 sub t0, prob 233 ; only sar works for both 16/32 bit prob modes 234 sar t0, kNumMoveBits 235 add t0, prob 236 PSTORE t0, probPtr 237endm 238 239 240PUP_SUB macro prob:req, probPtr:req, symSub:req 241 sbb sym, symSub 242 PUP prob, probPtr 243endm 244 245 246PUP_COD macro prob:req, probPtr:req, symSub:req 247 mov t0, kBitModelOffset 248 cmovb cod, t1 249 mov t1, sym 250 cmovb t0, kBitModelTotal_reg 251 PUP_SUB prob, probPtr, symSub 252endm 253 254 255BIT_0 macro prob:req, probNext:req 256 PLOAD prob, probs + 1 * PMULT 257 PLOAD probNext, probs + 1 * PMULT_2 258 259 NORM_CALC prob 260 261 cmovae range, t0 262 PLOAD t0, probs + 1 * PMULT_2 + PMULT 263 cmovae probNext, t0 264 mov t0, kBitModelOffset 265 cmovb cod, t1 266 cmovb t0, kBitModelTotal_reg 267 mov sym, 2 268 PUP_SUB prob, probs + 1 * PMULT, 0 - 1 269endm 270 271 272BIT_1 macro prob:req, probNext:req 273 PLOAD probNext, probs + sym_R * PMULT_2 274 add sym, sym 275 276 NORM_CALC prob 277 278 cmovae range, t0 279 PLOAD t0, probs + sym_R * PMULT + PMULT 280 cmovae probNext, t0 281 PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1 282endm 283 284 285BIT_2 macro prob:req, symSub:req 286 add sym, sym 287 288 NORM_CALC prob 289 290 cmovae range, t0 291 PUP_COD prob, probs + t1_R * PMULT_HALF, symSub 292endm 293 294 295; ---------- MATCHED LITERAL ---------- 296 297LITM_0 macro 298 mov offs, 256 * PMULT 299 shl match, (PSHIFT + 1) 300 mov bit, offs 301 and bit, match 302 PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT 303 lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT] 304 ; lea prm, [probs + 256 * PMULT + 1 * PMULT] 305 ; add prm, bit_R 306 xor offs, bit 307 add match, match 308 309 NORM_CALC x1 310 311 cmovae offs, bit 312 mov bit, match 313 cmovae range, t0 314 mov t0, kBitModelOffset 315 cmovb cod, t1 316 cmovb t0, kBitModelTotal_reg 317 mov sym, 0 318 PUP_SUB x1, prm, -2-1 319endm 320 321 322LITM macro 323 and bit, offs 324 lea prm, [probs + offs_R * 1] 325 add prm, bit_R 326 PLOAD x1, prm + sym_R * PMULT 327 xor offs, bit 328 add sym, sym 329 add match, match 330 331 NORM_CALC x1 332 333 cmovae offs, bit 334 mov bit, match 335 cmovae range, t0 336 PUP_COD x1, prm + t1_R * PMULT_HALF, - 1 337endm 338 339 340LITM_2 macro 341 and bit, offs 342 lea prm, [probs + offs_R * 1] 343 add prm, bit_R 344 PLOAD x1, prm + sym_R * PMULT 345 add sym, sym 346 347 NORM_CALC x1 348 349 cmovae range, t0 350 PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1 351endm 352 353 354; ---------- REVERSE BITS ---------- 355 356REV_0 macro prob:req, probNext:req 357 ; PLOAD prob, probs + 1 * PMULT 358 ; lea sym2_R, [probs + 2 * PMULT] 359 ; PLOAD probNext, probs + 2 * PMULT 360 PLOAD probNext, sym2_R 361 362 NORM_CALC prob 363 364 cmovae range, t0 365 PLOAD t0, probs + 3 * PMULT 366 cmovae probNext, t0 367 cmovb cod, t1 368 mov t0, kBitModelOffset 369 cmovb t0, kBitModelTotal_reg 370 lea t1_R, [probs + 3 * PMULT] 371 cmovae sym2_R, t1_R 372 PUP prob, probs + 1 * PMULT 373endm 374 375 376REV_1 macro prob:req, probNext:req, step:req 377 add sym2_R, step * PMULT 378 PLOAD probNext, sym2_R 379 380 NORM_CALC prob 381 382 cmovae range, t0 383 PLOAD t0, sym2_R + step * PMULT 384 cmovae probNext, t0 385 cmovb cod, t1 386 mov t0, kBitModelOffset 387 cmovb t0, kBitModelTotal_reg 388 lea t1_R, [sym2_R + step * PMULT] 389 cmovae sym2_R, t1_R 390 PUP prob, t1_R - step * PMULT_2 391endm 392 393 394REV_2 macro prob:req, step:req 395 sub sym2_R, probs 396 shr sym2, PSHIFT 397 or sym, sym2 398 399 NORM_CALC prob 400 401 cmovae range, t0 402 lea t0, [sym - step] 403 cmovb sym, t0 404 cmovb cod, t1 405 mov t0, kBitModelOffset 406 cmovb t0, kBitModelTotal_reg 407 PUP prob, probs + sym2_R * PMULT 408endm 409 410 411REV_1_VAR macro prob:req 412 PLOAD prob, sym_R 413 mov probs, sym_R 414 add sym_R, sym2_R 415 416 NORM_CALC prob 417 418 cmovae range, t0 419 lea t0_R, [sym_R + sym2_R] 420 cmovae sym_R, t0_R 421 mov t0, kBitModelOffset 422 cmovb cod, t1 423 ; mov t1, kBitModelTotal 424 ; cmovb t0, t1 425 cmovb t0, kBitModelTotal_reg 426 add sym2, sym2 427 PUP prob, probs 428endm 429 430 431 432 433LIT_PROBS macro lpMaskParam:req 434 ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); 435 mov t0, processedPos 436 shl t0, 8 437 add sym, t0 438 and sym, lpMaskParam 439 add probs_state_R, pbPos_R 440 mov x1, LOC lc2 441 lea sym, dword ptr[sym_R + 2 * sym_R] 442 add probs, Literal * PMULT 443 shl sym, x1_L 444 add probs, sym_R 445 UPDATE_0 probs_state_R, 0, IsMatch 446 inc processedPos 447endm 448 449 450 451kNumPosBitsMax equ 4 452kNumPosStatesMax equ (1 SHL kNumPosBitsMax) 453 454kLenNumLowBits equ 3 455kLenNumLowSymbols equ (1 SHL kLenNumLowBits) 456kLenNumHighBits equ 8 457kLenNumHighSymbols equ (1 SHL kLenNumHighBits) 458kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols) 459 460LenLow equ 0 461LenChoice equ LenLow 462LenChoice2 equ (LenLow + kLenNumLowSymbols) 463LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax) 464 465kNumStates equ 12 466kNumStates2 equ 16 467kNumLitStates equ 7 468 469kStartPosModelIndex equ 4 470kEndPosModelIndex equ 14 471kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1)) 472 473kNumPosSlotBits equ 6 474kNumLenToPosStates equ 4 475 476kNumAlignBits equ 4 477kAlignTableSize equ (1 SHL kNumAlignBits) 478 479kMatchMinLen equ 2 480kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) 481 482kStartOffset equ 1664 483SpecPos equ (-kStartOffset) 484IsRep0Long equ (SpecPos + kNumFullDistances) 485RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax)) 486LenCoder equ (RepLenCoder + kNumLenProbs) 487IsMatch equ (LenCoder + kNumLenProbs) 488kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax)) 489IsRep equ (kAlign + kAlignTableSize) 490IsRepG0 equ (IsRep + kNumStates) 491IsRepG1 equ (IsRepG0 + kNumStates) 492IsRepG2 equ (IsRepG1 + kNumStates) 493PosSlot equ (IsRepG2 + kNumStates) 494Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits)) 495NUM_BASE_PROBS equ (Literal + kStartOffset) 496 497if kAlign ne 0 498 .err <Stop_Compiling_Bad_LZMA_kAlign> 499endif 500 501if NUM_BASE_PROBS ne 1984 502 .err <Stop_Compiling_Bad_LZMA_PROBS> 503endif 504 505 506PTR_FIELD equ dq ? 507 508CLzmaDec_Asm struct 509 lc db ? 510 lp db ? 511 pb db ? 512 _pad_ db ? 513 dicSize dd ? 514 515 probs_Spec PTR_FIELD 516 probs_1664 PTR_FIELD 517 dic_Spec PTR_FIELD 518 dicBufSize PTR_FIELD 519 dicPos_Spec PTR_FIELD 520 buf_Spec PTR_FIELD 521 522 range_Spec dd ? 523 code_Spec dd ? 524 processedPos_Spec dd ? 525 checkDicSize dd ? 526 rep0 dd ? 527 rep1 dd ? 528 rep2 dd ? 529 rep3 dd ? 530 state_Spec dd ? 531 remainLen dd ? 532CLzmaDec_Asm ends 533 534 535CLzmaDec_Asm_Loc struct 536 OLD_RSP PTR_FIELD 537 lzmaPtr PTR_FIELD 538 _pad0_ PTR_FIELD 539 _pad1_ PTR_FIELD 540 _pad2_ PTR_FIELD 541 dicBufSize PTR_FIELD 542 probs_Spec PTR_FIELD 543 dic_Spec PTR_FIELD 544 545 limit PTR_FIELD 546 bufLimit PTR_FIELD 547 lc2 dd ? 548 lpMask dd ? 549 pbMask dd ? 550 checkDicSize dd ? 551 552 _pad_ dd ? 553 remainLen dd ? 554 dicPos_Spec PTR_FIELD 555 rep0 dd ? 556 rep1 dd ? 557 rep2 dd ? 558 rep3 dd ? 559CLzmaDec_Asm_Loc ends 560 561 562GLOB_2 equ [sym_R].CLzmaDec_Asm. 563GLOB equ [r1].CLzmaDec_Asm. 564LOC_0 equ [r0].CLzmaDec_Asm_Loc. 565LOC equ [RSP].CLzmaDec_Asm_Loc. 566 567 568COPY_VAR macro name 569 mov t0, GLOB_2 name 570 mov LOC_0 name, t0 571endm 572 573 574RESTORE_VAR macro name 575 mov t0, LOC name 576 mov GLOB name, t0 577endm 578 579 580 581IsMatchBranch_Pre macro reg 582 ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; 583 mov pbPos, LOC pbMask 584 and pbPos, processedPos 585 shl pbPos, (kLenNumLowBits + 1 + PSHIFT) 586 lea probs_state_R, [probs + state_R] 587endm 588 589 590IsMatchBranch macro reg 591 IsMatchBranch_Pre 592 IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label 593endm 594 595 596CheckLimits macro reg 597 cmp buf, LOC bufLimit 598 jae fin_OK 599 cmp dicPos, LOC limit 600 jae fin_OK 601endm 602 603 604 605; RSP is (16x + 8) bytes aligned in WIN64-x64 606; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8) 607 608PARAM_lzma equ REG_PARAM_0 609PARAM_limit equ REG_PARAM_1 610PARAM_bufLimit equ REG_PARAM_2 611 612; MY_ALIGN_64 613MY_PROC LzmaDec_DecodeReal_3, 3 614MY_PUSH_PRESERVED_REGS 615 616 lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)] 617 and r0, -128 618 mov r5, RSP 619 mov RSP, r0 620 mov LOC_0 Old_RSP, r5 621 mov LOC_0 lzmaPtr, PARAM_lzma 622 623 mov LOC_0 remainLen, 0 ; remainLen must be ZERO 624 625 mov LOC_0 bufLimit, PARAM_bufLimit 626 mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2 627 mov dic, GLOB_2 dic_Spec 628 add PARAM_limit, dic 629 mov LOC_0 limit, PARAM_limit 630 631 COPY_VAR(rep0) 632 COPY_VAR(rep1) 633 COPY_VAR(rep2) 634 COPY_VAR(rep3) 635 636 mov dicPos, GLOB_2 dicPos_Spec 637 add dicPos, dic 638 mov LOC_0 dicPos_Spec, dicPos 639 mov LOC_0 dic_Spec, dic 640 641 mov x1_L, GLOB_2 pb 642 mov t0, 1 643 shl t0, x1_L 644 dec t0 645 mov LOC_0 pbMask, t0 646 647 ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; 648 ; unsigned lc = p->prop.lc; 649 ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); 650 651 mov x1_L, GLOB_2 lc 652 mov x2, 100h 653 mov t0, x2 654 shr x2, x1_L 655 ; inc x1 656 add x1_L, PSHIFT 657 mov LOC_0 lc2, x1 658 mov x1_L, GLOB_2 lp 659 shl t0, x1_L 660 sub t0, x2 661 mov LOC_0 lpMask, t0 662 mov lpMask_reg, t0 663 664 ; mov probs, GLOB_2 probs_Spec 665 ; add probs, kStartOffset SHL PSHIFT 666 mov probs, GLOB_2 probs_1664 667 mov LOC_0 probs_Spec, probs 668 669 mov t0_R, GLOB_2 dicBufSize 670 mov LOC_0 dicBufSize, t0_R 671 672 mov x1, GLOB_2 checkDicSize 673 mov LOC_0 checkDicSize, x1 674 675 mov processedPos, GLOB_2 processedPos_Spec 676 677 mov state, GLOB_2 state_Spec 678 shl state, PSHIFT 679 680 mov buf, GLOB_2 buf_Spec 681 mov range, GLOB_2 range_Spec 682 mov cod, GLOB_2 code_Spec 683 mov kBitModelTotal_reg, kBitModelTotal 684 xor sym, sym 685 686 ; if (processedPos != 0 || checkDicSize != 0) 687 or x1, processedPos 688 jz @f 689 690 add t0_R, dic 691 cmp dicPos, dic 692 cmovnz t0_R, dicPos 693 movzx sym, byte ptr[t0_R - 1] 694 695@@: 696 IsMatchBranch_Pre 697 cmp state, 4 * PMULT 698 jb lit_end 699 cmp state, kNumLitStates * PMULT 700 jb lit_matched_end 701 jmp lz_end 702 703 704 705 706; ---------- LITERAL ---------- 707MY_ALIGN_64 708lit_start: 709 xor state, state 710lit_start_2: 711 LIT_PROBS lpMask_reg 712 713 ifdef _LZMA_SIZE_OPT 714 715 PLOAD x1, probs + 1 * PMULT 716 mov sym, 1 717MY_ALIGN_16 718lit_loop: 719 BIT_1 x1, x2 720 mov x1, x2 721 cmp sym, 127 722 jbe lit_loop 723 724 else 725 726 BIT_0 x1, x2 727 BIT_1 x2, x1 728 BIT_1 x1, x2 729 BIT_1 x2, x1 730 BIT_1 x1, x2 731 BIT_1 x2, x1 732 BIT_1 x1, x2 733 734 endif 735 736 BIT_2 x2, 256 - 1 737 738 ; mov dic, LOC dic_Spec 739 mov probs, LOC probs_Spec 740 IsMatchBranch_Pre 741 mov byte ptr[dicPos], sym_L 742 inc dicPos 743 744 CheckLimits 745lit_end: 746 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start 747 748 ; jmp IsMatch_label 749 750; ---------- MATCHES ---------- 751; MY_ALIGN_32 752IsMatch_label: 753 UPDATE_1 probs_state_R, pbPos_R, IsMatch 754 IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label 755 756 add probs, LenCoder * PMULT 757 add state, kNumStates * PMULT 758 759; ---------- LEN DECODE ---------- 760len_decode: 761 mov len_temp, 8 - 1 - kMatchMinLen 762 IF_BIT_0_NOUP probs, 0, 0, len_mid_0 763 UPDATE_1 probs, 0, 0 764 add probs, (1 SHL (kLenNumLowBits + PSHIFT)) 765 mov len_temp, -1 - kMatchMinLen 766 IF_BIT_0_NOUP probs, 0, 0, len_mid_0 767 UPDATE_1 probs, 0, 0 768 add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT)) 769 mov sym, 1 770 PLOAD x1, probs + 1 * PMULT 771 772MY_ALIGN_32 773len8_loop: 774 BIT_1 x1, x2 775 mov x1, x2 776 cmp sym, 64 777 jb len8_loop 778 779 mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen 780 jmp len_mid_2 781 782MY_ALIGN_32 783len_mid_0: 784 UPDATE_0 probs, 0, 0 785 add probs, pbPos_R 786 BIT_0 x2, x1 787len_mid_2: 788 BIT_1 x1, x2 789 BIT_2 x2, len_temp 790 mov probs, LOC probs_Spec 791 cmp state, kNumStates * PMULT 792 jb copy_match 793 794 795; ---------- DECODE DISTANCE ---------- 796 ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); 797 798 mov t0, 3 + kMatchMinLen 799 cmp sym, 3 + kMatchMinLen 800 cmovb t0, sym 801 add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT)) 802 shl t0, (kNumPosSlotBits + PSHIFT) 803 add probs, t0_R 804 805 ; sym = Len 806 ; mov LOC remainLen, sym 807 mov len_temp, sym 808 809 ifdef _LZMA_SIZE_OPT 810 811 PLOAD x1, probs + 1 * PMULT 812 mov sym, 1 813MY_ALIGN_16 814slot_loop: 815 BIT_1 x1, x2 816 mov x1, x2 817 cmp sym, 32 818 jb slot_loop 819 820 else 821 822 BIT_0 x1, x2 823 BIT_1 x2, x1 824 BIT_1 x1, x2 825 BIT_1 x2, x1 826 BIT_1 x1, x2 827 828 endif 829 830 mov x1, sym 831 BIT_2 x2, 64-1 832 833 and sym, 3 834 mov probs, LOC probs_Spec 835 cmp x1, 32 + kEndPosModelIndex / 2 836 jb short_dist 837 838 ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); 839 sub x1, (32 + 1 + kNumAlignBits) 840 ; distance = (2 | (distance & 1)); 841 or sym, 2 842 PLOAD x2, probs + 1 * PMULT 843 shl sym, kNumAlignBits + 1 844 lea sym2_R, [probs + 2 * PMULT] 845 846 jmp direct_norm 847 ; lea t1, [sym_R + (1 SHL kNumAlignBits)] 848 ; cmp range, kTopValue 849 ; jb direct_norm 850 851; ---------- DIRECT DISTANCE ---------- 852MY_ALIGN_32 853direct_loop: 854 shr range, 1 855 mov t0, cod 856 sub cod, range 857 cmovs cod, t0 858 cmovns sym, t1 859 860 comment ~ 861 sub cod, range 862 mov x2, cod 863 sar x2, 31 864 lea sym, dword ptr [r2 + sym_R * 2 + 1] 865 and x2, range 866 add cod, x2 867 ~ 868 dec x1 869 je direct_end 870 871 add sym, sym 872direct_norm: 873 lea t1, [sym_R + (1 SHL kNumAlignBits)] 874 cmp range, kTopValue 875 jae near ptr direct_loop 876 ; we align for 32 here with "near ptr" command above 877 NORM_2 878 jmp direct_loop 879 880MY_ALIGN_32 881direct_end: 882 ; prob = + kAlign; 883 ; distance <<= kNumAlignBits; 884 REV_0 x2, x1 885 REV_1 x1, x2, 2 886 REV_1 x2, x1, 4 887 REV_2 x1, 8 888 889decode_dist_end: 890 891 ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) 892 893 mov t0, LOC checkDicSize 894 test t0, t0 895 cmove t0, processedPos 896 cmp sym, t0 897 jae end_of_payload 898 899 ; rep3 = rep2; 900 ; rep2 = rep1; 901 ; rep1 = rep0; 902 ; rep0 = distance + 1; 903 904 inc sym 905 mov t0, LOC rep0 906 mov t1, LOC rep1 907 mov x1, LOC rep2 908 mov LOC rep0, sym 909 ; mov sym, LOC remainLen 910 mov sym, len_temp 911 mov LOC rep1, t0 912 mov LOC rep2, t1 913 mov LOC rep3, x1 914 915 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; 916 cmp state, (kNumStates + kNumLitStates) * PMULT 917 mov state, kNumLitStates * PMULT 918 mov t0, (kNumLitStates + 3) * PMULT 919 cmovae state, t0 920 921 922; ---------- COPY MATCH ---------- 923copy_match: 924 925 ; len += kMatchMinLen; 926 ; add sym, kMatchMinLen 927 928 ; if ((rem = limit - dicPos) == 0) 929 ; { 930 ; p->dicPos = dicPos; 931 ; return SZ_ERROR_DATA; 932 ; } 933 mov cnt_R, LOC limit 934 sub cnt_R, dicPos 935 jz fin_ERROR 936 937 ; curLen = ((rem < len) ? (unsigned)rem : len); 938 cmp cnt_R, sym_R 939 ; cmovae cnt_R, sym_R ; 64-bit 940 cmovae cnt, sym ; 32-bit 941 942 mov dic, LOC dic_Spec 943 mov x1, LOC rep0 944 945 mov t0_R, dicPos 946 add dicPos, cnt_R 947 ; processedPos += curLen; 948 add processedPos, cnt 949 ; len -= curLen; 950 sub sym, cnt 951 mov LOC remainLen, sym 952 953 sub t0_R, dic 954 955 ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); 956 sub t0_R, r1 957 jae @f 958 959 mov r1, LOC dicBufSize 960 add t0_R, r1 961 sub r1, t0_R 962 cmp cnt_R, r1 963 ja copy_match_cross 964@@: 965 ; if (curLen <= dicBufSize - pos) 966 967; ---------- COPY MATCH FAST ---------- 968 ; Byte *dest = dic + dicPos; 969 ; mov r1, dic 970 ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; 971 ; sub t0_R, dicPos 972 ; dicPos += curLen; 973 974 ; const Byte *lim = dest + curLen; 975 add t0_R, dic 976 movzx sym, byte ptr[t0_R] 977 add t0_R, cnt_R 978 neg cnt_R 979 ; lea r1, [dicPos - 1] 980copy_common: 981 dec dicPos 982 ; cmp LOC rep0, 1 983 ; je rep0Label 984 985 ; t0_R - src_lim 986 ; r1 - dest_lim - 1 987 ; cnt_R - (-cnt) 988 989 IsMatchBranch_Pre 990 inc cnt_R 991 jz copy_end 992MY_ALIGN_16 993@@: 994 mov byte ptr[cnt_R * 1 + dicPos], sym_L 995 movzx sym, byte ptr[cnt_R * 1 + t0_R] 996 inc cnt_R 997 jnz @b 998 999copy_end: 1000lz_end_match: 1001 mov byte ptr[dicPos], sym_L 1002 inc dicPos 1003 1004 ; IsMatchBranch_Pre 1005 CheckLimits 1006lz_end: 1007 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label 1008 1009 1010 1011; ---------- LITERAL MATCHED ---------- 1012 1013 LIT_PROBS LOC lpMask 1014 1015 ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; 1016 mov x1, LOC rep0 1017 ; mov dic, LOC dic_Spec 1018 mov LOC dicPos_Spec, dicPos 1019 1020 ; state -= (state < 10) ? 3 : 6; 1021 lea t0, [state_R - 6 * PMULT] 1022 sub state, 3 * PMULT 1023 cmp state, 7 * PMULT 1024 cmovae state, t0 1025 1026 sub dicPos, dic 1027 sub dicPos, r1 1028 jae @f 1029 add dicPos, LOC dicBufSize 1030@@: 1031 comment ~ 1032 xor t0, t0 1033 sub dicPos, r1 1034 cmovb t0_R, LOC dicBufSize 1035 ~ 1036 1037 movzx match, byte ptr[dic + dicPos * 1] 1038 1039 ifdef _LZMA_SIZE_OPT 1040 1041 mov offs, 256 * PMULT 1042 shl match, (PSHIFT + 1) 1043 mov bit, match 1044 mov sym, 1 1045MY_ALIGN_16 1046litm_loop: 1047 LITM 1048 cmp sym, 256 1049 jb litm_loop 1050 sub sym, 256 1051 1052 else 1053 1054 LITM_0 1055 LITM 1056 LITM 1057 LITM 1058 LITM 1059 LITM 1060 LITM 1061 LITM_2 1062 1063 endif 1064 1065 mov probs, LOC probs_Spec 1066 IsMatchBranch_Pre 1067 ; mov dic, LOC dic_Spec 1068 mov dicPos, LOC dicPos_Spec 1069 mov byte ptr[dicPos], sym_L 1070 inc dicPos 1071 1072 CheckLimits 1073lit_matched_end: 1074 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label 1075 ; IsMatchBranch 1076 mov lpMask_reg, LOC lpMask 1077 sub state, 3 * PMULT 1078 jmp lit_start_2 1079 1080 1081 1082; ---------- REP 0 LITERAL ---------- 1083MY_ALIGN_32 1084IsRep0Short_label: 1085 UPDATE_0 probs_state_R, pbPos_R, IsRep0Long 1086 1087 ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; 1088 mov dic, LOC dic_Spec 1089 mov t0_R, dicPos 1090 mov probBranch, LOC rep0 1091 sub t0_R, dic 1092 1093 sub probs, RepLenCoder * PMULT 1094 inc processedPos 1095 ; state = state < kNumLitStates ? 9 : 11; 1096 or state, 1 * PMULT 1097 IsMatchBranch_Pre 1098 1099 sub t0_R, probBranch_R 1100 jae @f 1101 add t0_R, LOC dicBufSize 1102@@: 1103 movzx sym, byte ptr[dic + t0_R * 1] 1104 jmp lz_end_match 1105 1106 1107MY_ALIGN_32 1108IsRep_label: 1109 UPDATE_1 probs_state_R, 0, IsRep 1110 1111 ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode. 1112 ; So we don't check it here. 1113 1114 ; mov t0, processedPos 1115 ; or t0, LOC checkDicSize 1116 ; jz fin_ERROR_2 1117 1118 ; state = state < kNumLitStates ? 8 : 11; 1119 cmp state, kNumLitStates * PMULT 1120 mov state, 8 * PMULT 1121 mov probBranch, 11 * PMULT 1122 cmovae state, probBranch 1123 1124 ; prob = probs + RepLenCoder; 1125 add probs, RepLenCoder * PMULT 1126 1127 IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label 1128 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label 1129 UPDATE_1 probs_state_R, pbPos_R, IsRep0Long 1130 jmp len_decode 1131 1132MY_ALIGN_32 1133IsRepG0_label: 1134 UPDATE_1 probs_state_R, 0, IsRepG0 1135 mov dist2, LOC rep0 1136 mov dist, LOC rep1 1137 mov LOC rep1, dist2 1138 1139 IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label 1140 mov LOC rep0, dist 1141 jmp len_decode 1142 1143; MY_ALIGN_32 1144IsRepG1_label: 1145 UPDATE_1 probs_state_R, 0, IsRepG1 1146 mov dist2, LOC rep2 1147 mov LOC rep2, dist 1148 1149 IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label 1150 mov LOC rep0, dist2 1151 jmp len_decode 1152 1153; MY_ALIGN_32 1154IsRepG2_label: 1155 UPDATE_1 probs_state_R, 0, IsRepG2 1156 mov dist, LOC rep3 1157 mov LOC rep3, dist2 1158 mov LOC rep0, dist 1159 jmp len_decode 1160 1161 1162 1163; ---------- SPEC SHORT DISTANCE ---------- 1164 1165MY_ALIGN_32 1166short_dist: 1167 sub x1, 32 + 1 1168 jbe decode_dist_end 1169 or sym, 2 1170 shl sym, x1_L 1171 lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT] 1172 mov sym2, PMULT ; step 1173MY_ALIGN_32 1174spec_loop: 1175 REV_1_VAR x2 1176 dec x1 1177 jnz spec_loop 1178 1179 mov probs, LOC probs_Spec 1180 sub sym, sym2 1181 sub sym, SpecPos * PMULT 1182 sub sym_R, probs 1183 shr sym, PSHIFT 1184 1185 jmp decode_dist_end 1186 1187 1188; ---------- COPY MATCH CROSS ---------- 1189copy_match_cross: 1190 ; t0_R - src pos 1191 ; r1 - len to dicBufSize 1192 ; cnt_R - total copy len 1193 1194 mov t1_R, t0_R ; srcPos 1195 mov t0_R, dic 1196 mov r1, LOC dicBufSize ; 1197 neg cnt_R 1198@@: 1199 movzx sym, byte ptr[t1_R * 1 + t0_R] 1200 inc t1_R 1201 mov byte ptr[cnt_R * 1 + dicPos], sym_L 1202 inc cnt_R 1203 cmp t1_R, r1 1204 jne @b 1205 1206 movzx sym, byte ptr[t0_R] 1207 sub t0_R, cnt_R 1208 jmp copy_common 1209 1210 1211 1212 1213fin_ERROR: 1214 mov LOC remainLen, len_temp 1215; fin_ERROR_2: 1216 mov sym, 1 1217 jmp fin 1218 1219end_of_payload: 1220 cmp sym, 0FFFFFFFFh ; -1 1221 jne fin_ERROR 1222 1223 mov LOC remainLen, kMatchSpecLenStart 1224 sub state, kNumStates * PMULT 1225 1226fin_OK: 1227 xor sym, sym 1228 1229fin: 1230 NORM 1231 1232 mov r1, LOC lzmaPtr 1233 1234 sub dicPos, LOC dic_Spec 1235 mov GLOB dicPos_Spec, dicPos 1236 mov GLOB buf_Spec, buf 1237 mov GLOB range_Spec, range 1238 mov GLOB code_Spec, cod 1239 shr state, PSHIFT 1240 mov GLOB state_Spec, state 1241 mov GLOB processedPos_Spec, processedPos 1242 1243 RESTORE_VAR(remainLen) 1244 RESTORE_VAR(rep0) 1245 RESTORE_VAR(rep1) 1246 RESTORE_VAR(rep2) 1247 RESTORE_VAR(rep3) 1248 1249 mov x0, sym 1250 1251 mov RSP, LOC Old_RSP 1252 1253MY_POP_PRESERVED_REGS 1254MY_ENDP 1255 1256_TEXT$LZMADECOPT ENDS 1257 1258end 1259