1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function 2; 2021-02-23: Igor Pavlov : Public domain 3; 4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() 5; function for check at link time. 6; That code is tightly coupled with LzmaDec_TryDummy() 7; and with another functions in LzmaDec.c file. 8; CLzmaDec structure, (probs) array layout, input and output of 9; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM). 10 11ifndef x64 12; x64=1 13; .err <x64_IS_REQUIRED> 14endif 15 16include 7zAsm.asm 17 18MY_ASM_START 19 20_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' 21 22MY_ALIGN macro num:req 23 align num 24endm 25 26MY_ALIGN_16 macro 27 MY_ALIGN 16 28endm 29 30MY_ALIGN_32 macro 31 MY_ALIGN 32 32endm 33 34MY_ALIGN_64 macro 35 MY_ALIGN 64 36endm 37 38 39; _LZMA_SIZE_OPT equ 1 40 41; _LZMA_PROB32 equ 1 42 43ifdef _LZMA_PROB32 44 PSHIFT equ 2 45 PLOAD macro dest, mem 46 mov dest, dword ptr [mem] 47 endm 48 PSTORE macro src, mem 49 mov dword ptr [mem], src 50 endm 51else 52 PSHIFT equ 1 53 PLOAD macro dest, mem 54 movzx dest, word ptr [mem] 55 endm 56 PSTORE macro src, mem 57 mov word ptr [mem], @CatStr(src, _W) 58 endm 59endif 60 61PMULT equ (1 SHL PSHIFT) 62PMULT_HALF equ (1 SHL (PSHIFT - 1)) 63PMULT_2 equ (1 SHL (PSHIFT + 1)) 64 65kMatchSpecLen_Error_Data equ (1 SHL 9) 66 67; x0 range 68; x1 pbPos / (prob) TREE 69; x2 probBranch / prm (MATCHED) / pbPos / cnt 70; x3 sym 71;====== r4 === RSP 72; x5 cod 73; x6 t1 NORM_CALC / probs_state / dist 74; x7 t0 NORM_CALC / prob2 IF_BIT_1 75; x8 state 76; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg 77; x10 kBitModelTotal_reg 78; r11 probs 79; x12 offs (MATCHED) / dic / len_temp 80; x13 processedPos 81; x14 bit (MATCHED) / dicPos 82; r15 buf 83 84 85cod equ x5 86cod_L equ x5_L 87range equ x0 88state equ x8 89state_R equ r8 90buf equ r15 91processedPos equ x13 92kBitModelTotal_reg equ x10 93 94probBranch equ x2 95probBranch_R equ r2 96probBranch_W equ x2_W 97 98pbPos equ x1 99pbPos_R equ r1 100 101cnt equ x2 102cnt_R equ r2 103 104lpMask_reg equ x9 105dicPos equ r14 106 107sym equ x3 108sym_R equ r3 109sym_L equ x3_L 110 111probs equ r11 112dic equ r12 113 114t0 equ x7 115t0_W equ x7_W 116t0_R equ r7 117 118prob2 equ t0 119prob2_W equ t0_W 120 121t1 equ x6 122t1_R equ r6 123 124probs_state equ t1 125probs_state_R equ t1_R 126 127prm equ r2 128match equ x9 129match_R equ r9 130offs equ x12 131offs_R equ r12 132bit equ x14 133bit_R equ r14 134 135sym2 equ x9 136sym2_R equ r9 137 138len_temp equ x12 139 140dist equ sym 141dist2 equ x9 142 143 144 145kNumBitModelTotalBits equ 11 146kBitModelTotal equ (1 SHL kNumBitModelTotalBits) 147kNumMoveBits equ 5 148kBitModelOffset equ ((1 SHL kNumMoveBits) - 1) 149kTopValue equ (1 SHL 24) 150 151NORM_2 macro 152 ; movzx t0, BYTE PTR [buf] 153 shl cod, 8 154 mov cod_L, BYTE PTR [buf] 155 shl range, 8 156 ; or cod, t0 157 inc buf 158endm 159 160 161NORM macro 162 cmp range, kTopValue 163 jae SHORT @F 164 NORM_2 165@@: 166endm 167 168 169; ---------- Branch MACROS ---------- 170 171UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req 172 mov prob2, kBitModelTotal_reg 173 sub prob2, probBranch 174 shr prob2, kNumMoveBits 175 add probBranch, prob2 176 PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT 177endm 178 179 180UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req 181 sub prob2, range 182 sub cod, range 183 mov range, prob2 184 mov prob2, probBranch 185 shr probBranch, kNumMoveBits 186 sub prob2, probBranch 187 PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT 188endm 189 190 191CMP_COD macro probsArray:req, probOffset:req, probDisp:req 192 PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT 193 NORM 194 mov prob2, range 195 shr range, kNumBitModelTotalBits 196 imul range, probBranch 197 cmp cod, range 198endm 199 200 201IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req 202 CMP_COD probsArray, probOffset, probDisp 203 jae toLabel 204endm 205 206 207IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req 208 IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel 209 UPDATE_0 probsArray, probOffset, probDisp 210endm 211 212 213IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req 214 CMP_COD probsArray, probOffset, probDisp 215 jb toLabel 216endm 217 218 219; ---------- CMOV MACROS ---------- 220 221NORM_CALC macro prob:req 222 NORM 223 mov t0, range 224 shr range, kNumBitModelTotalBits 225 imul range, prob 226 sub t0, range 227 mov t1, cod 228 sub cod, range 229endm 230 231 232PUP macro prob:req, probPtr:req 233 sub t0, prob 234 ; only sar works for both 16/32 bit prob modes 235 sar t0, kNumMoveBits 236 add t0, prob 237 PSTORE t0, probPtr 238endm 239 240 241PUP_SUB macro prob:req, probPtr:req, symSub:req 242 sbb sym, symSub 243 PUP prob, probPtr 244endm 245 246 247PUP_COD macro prob:req, probPtr:req, symSub:req 248 mov t0, kBitModelOffset 249 cmovb cod, t1 250 mov t1, sym 251 cmovb t0, kBitModelTotal_reg 252 PUP_SUB prob, probPtr, symSub 253endm 254 255 256BIT_0 macro prob:req, probNext:req 257 PLOAD prob, probs + 1 * PMULT 258 PLOAD probNext, probs + 1 * PMULT_2 259 260 NORM_CALC prob 261 262 cmovae range, t0 263 PLOAD t0, probs + 1 * PMULT_2 + PMULT 264 cmovae probNext, t0 265 mov t0, kBitModelOffset 266 cmovb cod, t1 267 cmovb t0, kBitModelTotal_reg 268 mov sym, 2 269 PUP_SUB prob, probs + 1 * PMULT, 0 - 1 270endm 271 272 273BIT_1 macro prob:req, probNext:req 274 PLOAD probNext, probs + sym_R * PMULT_2 275 add sym, sym 276 277 NORM_CALC prob 278 279 cmovae range, t0 280 PLOAD t0, probs + sym_R * PMULT + PMULT 281 cmovae probNext, t0 282 PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1 283endm 284 285 286BIT_2 macro prob:req, symSub:req 287 add sym, sym 288 289 NORM_CALC prob 290 291 cmovae range, t0 292 PUP_COD prob, probs + t1_R * PMULT_HALF, symSub 293endm 294 295 296; ---------- MATCHED LITERAL ---------- 297 298LITM_0 macro 299 mov offs, 256 * PMULT 300 shl match, (PSHIFT + 1) 301 mov bit, offs 302 and bit, match 303 PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT 304 lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT] 305 ; lea prm, [probs + 256 * PMULT + 1 * PMULT] 306 ; add prm, bit_R 307 xor offs, bit 308 add match, match 309 310 NORM_CALC x1 311 312 cmovae offs, bit 313 mov bit, match 314 cmovae range, t0 315 mov t0, kBitModelOffset 316 cmovb cod, t1 317 cmovb t0, kBitModelTotal_reg 318 mov sym, 0 319 PUP_SUB x1, prm, -2-1 320endm 321 322 323LITM macro 324 and bit, offs 325 lea prm, [probs + offs_R * 1] 326 add prm, bit_R 327 PLOAD x1, prm + sym_R * PMULT 328 xor offs, bit 329 add sym, sym 330 add match, match 331 332 NORM_CALC x1 333 334 cmovae offs, bit 335 mov bit, match 336 cmovae range, t0 337 PUP_COD x1, prm + t1_R * PMULT_HALF, - 1 338endm 339 340 341LITM_2 macro 342 and bit, offs 343 lea prm, [probs + offs_R * 1] 344 add prm, bit_R 345 PLOAD x1, prm + sym_R * PMULT 346 add sym, sym 347 348 NORM_CALC x1 349 350 cmovae range, t0 351 PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1 352endm 353 354 355; ---------- REVERSE BITS ---------- 356 357REV_0 macro prob:req, probNext:req 358 ; PLOAD prob, probs + 1 * PMULT 359 ; lea sym2_R, [probs + 2 * PMULT] 360 ; PLOAD probNext, probs + 2 * PMULT 361 PLOAD probNext, sym2_R 362 363 NORM_CALC prob 364 365 cmovae range, t0 366 PLOAD t0, probs + 3 * PMULT 367 cmovae probNext, t0 368 cmovb cod, t1 369 mov t0, kBitModelOffset 370 cmovb t0, kBitModelTotal_reg 371 lea t1_R, [probs + 3 * PMULT] 372 cmovae sym2_R, t1_R 373 PUP prob, probs + 1 * PMULT 374endm 375 376 377REV_1 macro prob:req, probNext:req, step:req 378 add sym2_R, step * PMULT 379 PLOAD probNext, sym2_R 380 381 NORM_CALC prob 382 383 cmovae range, t0 384 PLOAD t0, sym2_R + step * PMULT 385 cmovae probNext, t0 386 cmovb cod, t1 387 mov t0, kBitModelOffset 388 cmovb t0, kBitModelTotal_reg 389 lea t1_R, [sym2_R + step * PMULT] 390 cmovae sym2_R, t1_R 391 PUP prob, t1_R - step * PMULT_2 392endm 393 394 395REV_2 macro prob:req, step:req 396 sub sym2_R, probs 397 shr sym2, PSHIFT 398 or sym, sym2 399 400 NORM_CALC prob 401 402 cmovae range, t0 403 lea t0, [sym - step] 404 cmovb sym, t0 405 cmovb cod, t1 406 mov t0, kBitModelOffset 407 cmovb t0, kBitModelTotal_reg 408 PUP prob, probs + sym2_R * PMULT 409endm 410 411 412REV_1_VAR macro prob:req 413 PLOAD prob, sym_R 414 mov probs, sym_R 415 add sym_R, sym2_R 416 417 NORM_CALC prob 418 419 cmovae range, t0 420 lea t0_R, [sym_R + 1 * sym2_R] 421 cmovae sym_R, t0_R 422 mov t0, kBitModelOffset 423 cmovb cod, t1 424 ; mov t1, kBitModelTotal 425 ; cmovb t0, t1 426 cmovb t0, kBitModelTotal_reg 427 add sym2, sym2 428 PUP prob, probs 429endm 430 431 432 433 434LIT_PROBS macro lpMaskParam:req 435 ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); 436 mov t0, processedPos 437 shl t0, 8 438 add sym, t0 439 and sym, lpMaskParam 440 add probs_state_R, pbPos_R 441 mov x1, LOC lc2 442 lea sym, dword ptr[sym_R + 2 * sym_R] 443 add probs, Literal * PMULT 444 shl sym, x1_L 445 add probs, sym_R 446 UPDATE_0 probs_state_R, 0, IsMatch 447 inc processedPos 448endm 449 450 451 452kNumPosBitsMax equ 4 453kNumPosStatesMax equ (1 SHL kNumPosBitsMax) 454 455kLenNumLowBits equ 3 456kLenNumLowSymbols equ (1 SHL kLenNumLowBits) 457kLenNumHighBits equ 8 458kLenNumHighSymbols equ (1 SHL kLenNumHighBits) 459kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols) 460 461LenLow equ 0 462LenChoice equ LenLow 463LenChoice2 equ (LenLow + kLenNumLowSymbols) 464LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax) 465 466kNumStates equ 12 467kNumStates2 equ 16 468kNumLitStates equ 7 469 470kStartPosModelIndex equ 4 471kEndPosModelIndex equ 14 472kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1)) 473 474kNumPosSlotBits equ 6 475kNumLenToPosStates equ 4 476 477kNumAlignBits equ 4 478kAlignTableSize equ (1 SHL kNumAlignBits) 479 480kMatchMinLen equ 2 481kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) 482 483kStartOffset equ 1664 484SpecPos equ (-kStartOffset) 485IsRep0Long equ (SpecPos + kNumFullDistances) 486RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax)) 487LenCoder equ (RepLenCoder + kNumLenProbs) 488IsMatch equ (LenCoder + kNumLenProbs) 489kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax)) 490IsRep equ (kAlign + kAlignTableSize) 491IsRepG0 equ (IsRep + kNumStates) 492IsRepG1 equ (IsRepG0 + kNumStates) 493IsRepG2 equ (IsRepG1 + kNumStates) 494PosSlot equ (IsRepG2 + kNumStates) 495Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits)) 496NUM_BASE_PROBS equ (Literal + kStartOffset) 497 498if kAlign ne 0 499 .err <Stop_Compiling_Bad_LZMA_kAlign> 500endif 501 502if NUM_BASE_PROBS ne 1984 503 .err <Stop_Compiling_Bad_LZMA_PROBS> 504endif 505 506 507PTR_FIELD equ dq ? 508 509CLzmaDec_Asm struct 510 lc db ? 511 lp db ? 512 pb db ? 513 _pad_ db ? 514 dicSize dd ? 515 516 probs_Spec PTR_FIELD 517 probs_1664 PTR_FIELD 518 dic_Spec PTR_FIELD 519 dicBufSize PTR_FIELD 520 dicPos_Spec PTR_FIELD 521 buf_Spec PTR_FIELD 522 523 range_Spec dd ? 524 code_Spec dd ? 525 processedPos_Spec dd ? 526 checkDicSize dd ? 527 rep0 dd ? 528 rep1 dd ? 529 rep2 dd ? 530 rep3 dd ? 531 state_Spec dd ? 532 remainLen dd ? 533CLzmaDec_Asm ends 534 535 536CLzmaDec_Asm_Loc struct 537 OLD_RSP PTR_FIELD 538 lzmaPtr PTR_FIELD 539 _pad0_ PTR_FIELD 540 _pad1_ PTR_FIELD 541 _pad2_ PTR_FIELD 542 dicBufSize PTR_FIELD 543 probs_Spec PTR_FIELD 544 dic_Spec PTR_FIELD 545 546 limit PTR_FIELD 547 bufLimit PTR_FIELD 548 lc2 dd ? 549 lpMask dd ? 550 pbMask dd ? 551 checkDicSize dd ? 552 553 _pad_ dd ? 554 remainLen dd ? 555 dicPos_Spec PTR_FIELD 556 rep0 dd ? 557 rep1 dd ? 558 rep2 dd ? 559 rep3 dd ? 560CLzmaDec_Asm_Loc ends 561 562 563GLOB_2 equ [sym_R].CLzmaDec_Asm. 564GLOB equ [r1].CLzmaDec_Asm. 565LOC_0 equ [r0].CLzmaDec_Asm_Loc. 566LOC equ [RSP].CLzmaDec_Asm_Loc. 567 568 569COPY_VAR macro name 570 mov t0, GLOB_2 name 571 mov LOC_0 name, t0 572endm 573 574 575RESTORE_VAR macro name 576 mov t0, LOC name 577 mov GLOB name, t0 578endm 579 580 581 582IsMatchBranch_Pre macro reg 583 ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; 584 mov pbPos, LOC pbMask 585 and pbPos, processedPos 586 shl pbPos, (kLenNumLowBits + 1 + PSHIFT) 587 lea probs_state_R, [probs + 1 * state_R] 588endm 589 590 591IsMatchBranch macro reg 592 IsMatchBranch_Pre 593 IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label 594endm 595 596 597CheckLimits macro reg 598 cmp buf, LOC bufLimit 599 jae fin_OK 600 cmp dicPos, LOC limit 601 jae fin_OK 602endm 603 604 605 606; RSP is (16x + 8) bytes aligned in WIN64-x64 607; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8) 608 609PARAM_lzma equ REG_ABI_PARAM_0 610PARAM_limit equ REG_ABI_PARAM_1 611PARAM_bufLimit equ REG_ABI_PARAM_2 612 613; MY_ALIGN_64 614MY_PROC LzmaDec_DecodeReal_3, 3 615MY_PUSH_PRESERVED_ABI_REGS 616 617 lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)] 618 and r0, -128 619 mov r5, RSP 620 mov RSP, r0 621 mov LOC_0 Old_RSP, r5 622 mov LOC_0 lzmaPtr, PARAM_lzma 623 624 mov LOC_0 remainLen, 0 ; remainLen must be ZERO 625 626 mov LOC_0 bufLimit, PARAM_bufLimit 627 mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2 628 mov dic, GLOB_2 dic_Spec 629 add PARAM_limit, dic 630 mov LOC_0 limit, PARAM_limit 631 632 COPY_VAR(rep0) 633 COPY_VAR(rep1) 634 COPY_VAR(rep2) 635 COPY_VAR(rep3) 636 637 mov dicPos, GLOB_2 dicPos_Spec 638 add dicPos, dic 639 mov LOC_0 dicPos_Spec, dicPos 640 mov LOC_0 dic_Spec, dic 641 642 mov x1_L, GLOB_2 pb 643 mov t0, 1 644 shl t0, x1_L 645 dec t0 646 mov LOC_0 pbMask, t0 647 648 ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; 649 ; unsigned lc = p->prop.lc; 650 ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); 651 652 mov x1_L, GLOB_2 lc 653 mov x2, 100h 654 mov t0, x2 655 shr x2, x1_L 656 ; inc x1 657 add x1_L, PSHIFT 658 mov LOC_0 lc2, x1 659 mov x1_L, GLOB_2 lp 660 shl t0, x1_L 661 sub t0, x2 662 mov LOC_0 lpMask, t0 663 mov lpMask_reg, t0 664 665 ; mov probs, GLOB_2 probs_Spec 666 ; add probs, kStartOffset SHL PSHIFT 667 mov probs, GLOB_2 probs_1664 668 mov LOC_0 probs_Spec, probs 669 670 mov t0_R, GLOB_2 dicBufSize 671 mov LOC_0 dicBufSize, t0_R 672 673 mov x1, GLOB_2 checkDicSize 674 mov LOC_0 checkDicSize, x1 675 676 mov processedPos, GLOB_2 processedPos_Spec 677 678 mov state, GLOB_2 state_Spec 679 shl state, PSHIFT 680 681 mov buf, GLOB_2 buf_Spec 682 mov range, GLOB_2 range_Spec 683 mov cod, GLOB_2 code_Spec 684 mov kBitModelTotal_reg, kBitModelTotal 685 xor sym, sym 686 687 ; if (processedPos != 0 || checkDicSize != 0) 688 or x1, processedPos 689 jz @f 690 691 add t0_R, dic 692 cmp dicPos, dic 693 cmovnz t0_R, dicPos 694 movzx sym, byte ptr[t0_R - 1] 695 696@@: 697 IsMatchBranch_Pre 698 cmp state, 4 * PMULT 699 jb lit_end 700 cmp state, kNumLitStates * PMULT 701 jb lit_matched_end 702 jmp lz_end 703 704 705 706 707; ---------- LITERAL ---------- 708MY_ALIGN_64 709lit_start: 710 xor state, state 711lit_start_2: 712 LIT_PROBS lpMask_reg 713 714 ifdef _LZMA_SIZE_OPT 715 716 PLOAD x1, probs + 1 * PMULT 717 mov sym, 1 718MY_ALIGN_16 719lit_loop: 720 BIT_1 x1, x2 721 mov x1, x2 722 cmp sym, 127 723 jbe lit_loop 724 725 else 726 727 BIT_0 x1, x2 728 BIT_1 x2, x1 729 BIT_1 x1, x2 730 BIT_1 x2, x1 731 BIT_1 x1, x2 732 BIT_1 x2, x1 733 BIT_1 x1, x2 734 735 endif 736 737 BIT_2 x2, 256 - 1 738 739 ; mov dic, LOC dic_Spec 740 mov probs, LOC probs_Spec 741 IsMatchBranch_Pre 742 mov byte ptr[dicPos], sym_L 743 inc dicPos 744 745 CheckLimits 746lit_end: 747 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start 748 749 ; jmp IsMatch_label 750 751; ---------- MATCHES ---------- 752; MY_ALIGN_32 753IsMatch_label: 754 UPDATE_1 probs_state_R, pbPos_R, IsMatch 755 IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label 756 757 add probs, LenCoder * PMULT 758 add state, kNumStates * PMULT 759 760; ---------- LEN DECODE ---------- 761len_decode: 762 mov len_temp, 8 - 1 - kMatchMinLen 763 IF_BIT_0_NOUP probs, 0, 0, len_mid_0 764 UPDATE_1 probs, 0, 0 765 add probs, (1 SHL (kLenNumLowBits + PSHIFT)) 766 mov len_temp, -1 - kMatchMinLen 767 IF_BIT_0_NOUP probs, 0, 0, len_mid_0 768 UPDATE_1 probs, 0, 0 769 add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT)) 770 mov sym, 1 771 PLOAD x1, probs + 1 * PMULT 772 773MY_ALIGN_32 774len8_loop: 775 BIT_1 x1, x2 776 mov x1, x2 777 cmp sym, 64 778 jb len8_loop 779 780 mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen 781 jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs 782 783MY_ALIGN_32 784len_mid_0: 785 UPDATE_0 probs, 0, 0 786 add probs, pbPos_R 787 BIT_0 x2, x1 788len_mid_2: 789 BIT_1 x1, x2 790 BIT_2 x2, len_temp 791 mov probs, LOC probs_Spec 792 cmp state, kNumStates * PMULT 793 jb copy_match 794 795 796; ---------- DECODE DISTANCE ---------- 797 ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); 798 799 mov t0, 3 + kMatchMinLen 800 cmp sym, 3 + kMatchMinLen 801 cmovb t0, sym 802 add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT)) 803 shl t0, (kNumPosSlotBits + PSHIFT) 804 add probs, t0_R 805 806 ; sym = Len 807 ; mov LOC remainLen, sym 808 mov len_temp, sym 809 810 ifdef _LZMA_SIZE_OPT 811 812 PLOAD x1, probs + 1 * PMULT 813 mov sym, 1 814MY_ALIGN_16 815slot_loop: 816 BIT_1 x1, x2 817 mov x1, x2 818 cmp sym, 32 819 jb slot_loop 820 821 else 822 823 BIT_0 x1, x2 824 BIT_1 x2, x1 825 BIT_1 x1, x2 826 BIT_1 x2, x1 827 BIT_1 x1, x2 828 829 endif 830 831 mov x1, sym 832 BIT_2 x2, 64-1 833 834 and sym, 3 835 mov probs, LOC probs_Spec 836 cmp x1, 32 + kEndPosModelIndex / 2 837 jb short_dist 838 839 ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); 840 sub x1, (32 + 1 + kNumAlignBits) 841 ; distance = (2 | (distance & 1)); 842 or sym, 2 843 PLOAD x2, probs + 1 * PMULT 844 shl sym, kNumAlignBits + 1 845 lea sym2_R, [probs + 2 * PMULT] 846 847 jmp direct_norm 848 ; lea t1, [sym_R + (1 SHL kNumAlignBits)] 849 ; cmp range, kTopValue 850 ; jb direct_norm 851 852; ---------- DIRECT DISTANCE ---------- 853MY_ALIGN_32 854direct_loop: 855 shr range, 1 856 mov t0, cod 857 sub cod, range 858 cmovs cod, t0 859 cmovns sym, t1 860 861 comment ~ 862 sub cod, range 863 mov x2, cod 864 sar x2, 31 865 lea sym, dword ptr [r2 + sym_R * 2 + 1] 866 and x2, range 867 add cod, x2 868 ~ 869 dec x1 870 je direct_end 871 872 add sym, sym 873direct_norm: 874 lea t1, [sym_R + (1 SHL kNumAlignBits)] 875 cmp range, kTopValue 876 jae near ptr direct_loop 877 ; we align for 32 here with "near ptr" command above 878 NORM_2 879 jmp direct_loop 880 881MY_ALIGN_32 882direct_end: 883 ; prob = + kAlign; 884 ; distance <<= kNumAlignBits; 885 REV_0 x2, x1 886 REV_1 x1, x2, 2 887 REV_1 x2, x1, 4 888 REV_2 x1, 8 889 890decode_dist_end: 891 892 ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) 893 894 mov t1, LOC rep0 895 mov x1, LOC rep1 896 mov x2, LOC rep2 897 898 mov t0, LOC checkDicSize 899 test t0, t0 900 cmove t0, processedPos 901 cmp sym, t0 902 jae end_of_payload 903 ; jmp end_of_payload ; for debug 904 905 ; rep3 = rep2; 906 ; rep2 = rep1; 907 ; rep1 = rep0; 908 ; rep0 = distance + 1; 909 910 inc sym 911 mov LOC rep0, sym 912 ; mov sym, LOC remainLen 913 mov sym, len_temp 914 mov LOC rep1, t1 915 mov LOC rep2, x1 916 mov LOC rep3, x2 917 918 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; 919 cmp state, (kNumStates + kNumLitStates) * PMULT 920 mov state, kNumLitStates * PMULT 921 mov t0, (kNumLitStates + 3) * PMULT 922 cmovae state, t0 923 924 925; ---------- COPY MATCH ---------- 926copy_match: 927 928 ; len += kMatchMinLen; 929 ; add sym, kMatchMinLen 930 931 ; if ((rem = limit - dicPos) == 0) 932 ; { 933 ; p->dicPos = dicPos; 934 ; return SZ_ERROR_DATA; 935 ; } 936 mov cnt_R, LOC limit 937 sub cnt_R, dicPos 938 jz fin_dicPos_LIMIT 939 940 ; curLen = ((rem < len) ? (unsigned)rem : len); 941 cmp cnt_R, sym_R 942 ; cmovae cnt_R, sym_R ; 64-bit 943 cmovae cnt, sym ; 32-bit 944 945 mov dic, LOC dic_Spec 946 mov x1, LOC rep0 947 948 mov t0_R, dicPos 949 add dicPos, cnt_R 950 ; processedPos += curLen; 951 add processedPos, cnt 952 ; len -= curLen; 953 sub sym, cnt 954 mov LOC remainLen, sym 955 956 sub t0_R, dic 957 958 ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); 959 sub t0_R, r1 960 jae @f 961 962 mov r1, LOC dicBufSize 963 add t0_R, r1 964 sub r1, t0_R 965 cmp cnt_R, r1 966 ja copy_match_cross 967@@: 968 ; if (curLen <= dicBufSize - pos) 969 970; ---------- COPY MATCH FAST ---------- 971 ; Byte *dest = dic + dicPos; 972 ; mov r1, dic 973 ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; 974 ; sub t0_R, dicPos 975 ; dicPos += curLen; 976 977 ; const Byte *lim = dest + curLen; 978 add t0_R, dic 979 movzx sym, byte ptr[t0_R] 980 add t0_R, cnt_R 981 neg cnt_R 982 ; lea r1, [dicPos - 1] 983copy_common: 984 dec dicPos 985 ; cmp LOC rep0, 1 986 ; je rep0Label 987 988 ; t0_R - src_lim 989 ; r1 - dest_lim - 1 990 ; cnt_R - (-cnt) 991 992 IsMatchBranch_Pre 993 inc cnt_R 994 jz copy_end 995MY_ALIGN_16 996@@: 997 mov byte ptr[cnt_R * 1 + dicPos], sym_L 998 movzx sym, byte ptr[cnt_R * 1 + t0_R] 999 inc cnt_R 1000 jnz @b 1001 1002copy_end: 1003lz_end_match: 1004 mov byte ptr[dicPos], sym_L 1005 inc dicPos 1006 1007 ; IsMatchBranch_Pre 1008 CheckLimits 1009lz_end: 1010 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label 1011 1012 1013 1014; ---------- LITERAL MATCHED ---------- 1015 1016 LIT_PROBS LOC lpMask 1017 1018 ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; 1019 mov x1, LOC rep0 1020 ; mov dic, LOC dic_Spec 1021 mov LOC dicPos_Spec, dicPos 1022 1023 ; state -= (state < 10) ? 3 : 6; 1024 lea t0, [state_R - 6 * PMULT] 1025 sub state, 3 * PMULT 1026 cmp state, 7 * PMULT 1027 cmovae state, t0 1028 1029 sub dicPos, dic 1030 sub dicPos, r1 1031 jae @f 1032 add dicPos, LOC dicBufSize 1033@@: 1034 comment ~ 1035 xor t0, t0 1036 sub dicPos, r1 1037 cmovb t0_R, LOC dicBufSize 1038 ~ 1039 1040 movzx match, byte ptr[dic + dicPos * 1] 1041 1042 ifdef _LZMA_SIZE_OPT 1043 1044 mov offs, 256 * PMULT 1045 shl match, (PSHIFT + 1) 1046 mov bit, match 1047 mov sym, 1 1048MY_ALIGN_16 1049litm_loop: 1050 LITM 1051 cmp sym, 256 1052 jb litm_loop 1053 sub sym, 256 1054 1055 else 1056 1057 LITM_0 1058 LITM 1059 LITM 1060 LITM 1061 LITM 1062 LITM 1063 LITM 1064 LITM_2 1065 1066 endif 1067 1068 mov probs, LOC probs_Spec 1069 IsMatchBranch_Pre 1070 ; mov dic, LOC dic_Spec 1071 mov dicPos, LOC dicPos_Spec 1072 mov byte ptr[dicPos], sym_L 1073 inc dicPos 1074 1075 CheckLimits 1076lit_matched_end: 1077 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label 1078 ; IsMatchBranch 1079 mov lpMask_reg, LOC lpMask 1080 sub state, 3 * PMULT 1081 jmp lit_start_2 1082 1083 1084 1085; ---------- REP 0 LITERAL ---------- 1086MY_ALIGN_32 1087IsRep0Short_label: 1088 UPDATE_0 probs_state_R, pbPos_R, IsRep0Long 1089 1090 ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; 1091 mov dic, LOC dic_Spec 1092 mov t0_R, dicPos 1093 mov probBranch, LOC rep0 1094 sub t0_R, dic 1095 1096 sub probs, RepLenCoder * PMULT 1097 1098 ; state = state < kNumLitStates ? 9 : 11; 1099 or state, 1 * PMULT 1100 1101 ; the caller doesn't allow (dicPos >= limit) case for REP_SHORT 1102 ; so we don't need the following (dicPos == limit) check here: 1103 ; cmp dicPos, LOC limit 1104 ; jae fin_dicPos_LIMIT_REP_SHORT 1105 1106 inc processedPos 1107 1108 IsMatchBranch_Pre 1109 1110; xor sym, sym 1111; sub t0_R, probBranch_R 1112; cmovb sym_R, LOC dicBufSize 1113; add t0_R, sym_R 1114 sub t0_R, probBranch_R 1115 jae @f 1116 add t0_R, LOC dicBufSize 1117@@: 1118 movzx sym, byte ptr[dic + t0_R * 1] 1119 jmp lz_end_match 1120 1121 1122MY_ALIGN_32 1123IsRep_label: 1124 UPDATE_1 probs_state_R, 0, IsRep 1125 1126 ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode. 1127 ; So we don't check it here. 1128 1129 ; mov t0, processedPos 1130 ; or t0, LOC checkDicSize 1131 ; jz fin_ERROR_2 1132 1133 ; state = state < kNumLitStates ? 8 : 11; 1134 cmp state, kNumLitStates * PMULT 1135 mov state, 8 * PMULT 1136 mov probBranch, 11 * PMULT 1137 cmovae state, probBranch 1138 1139 ; prob = probs + RepLenCoder; 1140 add probs, RepLenCoder * PMULT 1141 1142 IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label 1143 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label 1144 UPDATE_1 probs_state_R, pbPos_R, IsRep0Long 1145 jmp len_decode 1146 1147MY_ALIGN_32 1148IsRepG0_label: 1149 UPDATE_1 probs_state_R, 0, IsRepG0 1150 mov dist2, LOC rep0 1151 mov dist, LOC rep1 1152 mov LOC rep1, dist2 1153 1154 IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label 1155 mov LOC rep0, dist 1156 jmp len_decode 1157 1158; MY_ALIGN_32 1159IsRepG1_label: 1160 UPDATE_1 probs_state_R, 0, IsRepG1 1161 mov dist2, LOC rep2 1162 mov LOC rep2, dist 1163 1164 IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label 1165 mov LOC rep0, dist2 1166 jmp len_decode 1167 1168; MY_ALIGN_32 1169IsRepG2_label: 1170 UPDATE_1 probs_state_R, 0, IsRepG2 1171 mov dist, LOC rep3 1172 mov LOC rep3, dist2 1173 mov LOC rep0, dist 1174 jmp len_decode 1175 1176 1177 1178; ---------- SPEC SHORT DISTANCE ---------- 1179 1180MY_ALIGN_32 1181short_dist: 1182 sub x1, 32 + 1 1183 jbe decode_dist_end 1184 or sym, 2 1185 shl sym, x1_L 1186 lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT] 1187 mov sym2, PMULT ; step 1188MY_ALIGN_32 1189spec_loop: 1190 REV_1_VAR x2 1191 dec x1 1192 jnz spec_loop 1193 1194 mov probs, LOC probs_Spec 1195 sub sym, sym2 1196 sub sym, SpecPos * PMULT 1197 sub sym_R, probs 1198 shr sym, PSHIFT 1199 1200 jmp decode_dist_end 1201 1202 1203; ---------- COPY MATCH CROSS ---------- 1204copy_match_cross: 1205 ; t0_R - src pos 1206 ; r1 - len to dicBufSize 1207 ; cnt_R - total copy len 1208 1209 mov t1_R, t0_R ; srcPos 1210 mov t0_R, dic 1211 mov r1, LOC dicBufSize ; 1212 neg cnt_R 1213@@: 1214 movzx sym, byte ptr[t1_R * 1 + t0_R] 1215 inc t1_R 1216 mov byte ptr[cnt_R * 1 + dicPos], sym_L 1217 inc cnt_R 1218 cmp t1_R, r1 1219 jne @b 1220 1221 movzx sym, byte ptr[t0_R] 1222 sub t0_R, cnt_R 1223 jmp copy_common 1224 1225 1226 1227 1228; fin_dicPos_LIMIT_REP_SHORT: 1229 ; mov sym, 1 1230 1231fin_dicPos_LIMIT: 1232 mov LOC remainLen, sym 1233 jmp fin_OK 1234 ; For more strict mode we can stop decoding with error 1235 ; mov sym, 1 1236 ; jmp fin 1237 1238 1239fin_ERROR_MATCH_DIST: 1240 1241 ; rep3 = rep2; 1242 ; rep2 = rep1; 1243 ; rep1 = rep0; 1244 ; rep0 = distance + 1; 1245 1246 add len_temp, kMatchSpecLen_Error_Data 1247 mov LOC remainLen, len_temp 1248 1249 mov LOC rep0, sym 1250 mov LOC rep1, t1 1251 mov LOC rep2, x1 1252 mov LOC rep3, x2 1253 1254 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; 1255 cmp state, (kNumStates + kNumLitStates) * PMULT 1256 mov state, kNumLitStates * PMULT 1257 mov t0, (kNumLitStates + 3) * PMULT 1258 cmovae state, t0 1259 1260 ; jmp fin_OK 1261 mov sym, 1 1262 jmp fin 1263 1264end_of_payload: 1265 inc sym 1266 jnz fin_ERROR_MATCH_DIST 1267 1268 mov LOC remainLen, kMatchSpecLenStart 1269 sub state, kNumStates * PMULT 1270 1271fin_OK: 1272 xor sym, sym 1273 1274fin: 1275 NORM 1276 1277 mov r1, LOC lzmaPtr 1278 1279 sub dicPos, LOC dic_Spec 1280 mov GLOB dicPos_Spec, dicPos 1281 mov GLOB buf_Spec, buf 1282 mov GLOB range_Spec, range 1283 mov GLOB code_Spec, cod 1284 shr state, PSHIFT 1285 mov GLOB state_Spec, state 1286 mov GLOB processedPos_Spec, processedPos 1287 1288 RESTORE_VAR(remainLen) 1289 RESTORE_VAR(rep0) 1290 RESTORE_VAR(rep1) 1291 RESTORE_VAR(rep2) 1292 RESTORE_VAR(rep3) 1293 1294 mov x0, sym 1295 1296 mov RSP, LOC Old_RSP 1297 1298MY_POP_PRESERVED_ABI_REGS 1299MY_ENDP 1300 1301_TEXT$LZMADECOPT ENDS 1302 1303end 1304