1// Copyright 2019 The Wuffs Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// ---------------- 16 17// Package flatecut produces DEFLATE-formatted data subject to a maximum 18// compressed size. 19// 20// The typical compression problem is to encode all of the given source data in 21// some number of bytes. This package's problem is finding a reasonably long 22// prefix of the source data that encodes in up to a given number of bytes. 23package flatecut 24 25import ( 26 "bytes" 27 "compress/flate" 28 "errors" 29 "io" 30) 31 32var ( 33 errMaxEncodedLenTooSmall = errors.New("flatecut: maxEncodedLen is too small") 34 35 errInternalInconsistentDecodedLen = errors.New("flatecut: internal: inconsistent decodedLen") 36 errInternalNoProgress = errors.New("flatecut: internal: no progress") 37 errInternalReplaceWithSingleBlock = errors.New("flatecut: internal: replace with single block") 38 errInternalSomeProgress = errors.New("flatecut: internal: some progress") 39 40 errInvalidBadBlockLength = errors.New("flatecut: invalid input: bad block length") 41 errInvalidBadBlockType = errors.New("flatecut: invalid input: bad block type") 42 errInvalidBadCodeLengths = errors.New("flatecut: invalid input: bad code lengths") 43 errInvalidBadHuffmanTree = errors.New("flatecut: invalid input: bad Huffman tree") 44 errInvalidBadSymbol = errors.New("flatecut: invalid input: bad symbol") 45 errInvalidNoEndOfBlock = errors.New("flatecut: invalid input: no end-of-block") 46 errInvalidNotEnoughData = errors.New("flatecut: invalid input: not enough data") 47 errInvalidTooManyCodes = errors.New("flatecut: invalid input: too many codes") 48) 49 50var ( 51 // codeOrder is defined in RFC 1951 section 3.2.7. 52 codeOrder = [19]uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} 53 54 // These tables are defined in RFC 1951 section 3.2.5. 55 // 56 // The l-tables' indexes are biased by 256. 57 lBases = [32]int32{ 58 mostNegativeInt32, 59 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 60 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 61 mostNegativeInt32, mostNegativeInt32, 62 } 63 lExtras = [32]uint32{ 64 0, 65 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 66 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 67 0, 0, 68 } 69 dBases = [32]int32{ 70 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 71 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 72 mostNegativeInt32, mostNegativeInt32, 73 } 74 dExtras = [32]uint32{ 75 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 76 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 77 0, 0, 78 } 79) 80 81const ( 82 // SmallestValidMaxEncodedLen is the length in bytes of the smallest valid 83 // DEFLATE-encoded data. 84 SmallestValidMaxEncodedLen = 2 85 86 maxCodeBits = 15 87 maxNumCodes = 288 88 89 mostNegativeInt32 = -0x80000000 90) 91 92func loadU64LE(b []byte) uint64 { 93 _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 94 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | 95 uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 96 97} 98 99type bitstream struct { 100 // bytes[index] is the next byte to load into the 'bits' field. 101 bytes []byte 102 index int 103 104 // The low nBits bits of the 'bits' field hold the next bits (in LSB-first 105 // order). 106 bits uint64 107 nBits uint32 108} 109 110func (b *bitstream) take(nBits uint32) int32 { 111 for b.nBits < nBits { 112 if b.index >= len(b.bytes) { 113 return mostNegativeInt32 114 } 115 b.bits |= uint64(b.bytes[b.index]) << b.nBits 116 b.nBits += 8 117 b.index++ 118 } 119 120 mask := ((uint32(1)) << nBits) - 1 121 ret := uint32(b.bits) & mask 122 b.bits >>= nBits 123 b.nBits -= nBits 124 return int32(ret) 125} 126 127// huffman represents a DEFLATE Huffman tree. 128// 129// For the "Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3, 3, 2, 130// 4, 4)" example from RFC 1951 section 3.2.2, the huffman struct is 131// initialized by calling: 132// 133// h.construct([]uint32{ 134// 'A': 3, 'B': 3, 'C': 3, 'D': 3, 'E': 3, 'F': 2, 'G': 4, 'H': 4, 135// }) 136// 137// which results in: 138// 139// huffman{ 140// counts: [maxCodeBits + 1]uint32{ 141// 2: 1, 3: 5, 4: 2, 142// }, 143// symbols: [maxNumCodes]int32{ 144// 0: 'F', 1: 'A', 2: 'B', 3: 'C', 4: 'D', 5: 'E', 6: 'G', 7: 'H', 145// }, 146// lookUpTable: [256]uint32{ 147// etc, 148// }, 149// } 150// 151// Continuing the example from the RFC, decoding "1110" from the bitstream will 152// produce the 'G' symbol. 153type huffman struct { 154 counts [maxCodeBits + 1]uint32 155 symbols [maxNumCodes]int32 156 157 // lookUpTable holds cached results of the slowDecode method. 158 // 159 // The uint8 key is the next 8 bits of input. 160 // 161 // The uint32 value's low 16 bits are the symbol, the high 16 bits are the 162 // number of bits used to encode that symbol. 163 // 164 // Zero means that the entry is invalid. For example, the encoded symbol 165 // might be longer than 8 bits. 166 lookUpTable [256]uint32 167} 168 169func (h *huffman) decode(b *bitstream) int32 { 170 if b.nBits >= 8 { 171 // No-op. 172 } else if b.index < (len(b.bytes) - 8) { 173 // This is "Variant 4" of 174 // https://fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/ 175 u := loadU64LE(b.bytes[b.index:]) 176 b.bits |= u << b.nBits 177 b.index += int((63 - b.nBits) >> 3) 178 b.nBits |= 56 179 } else if b.index < len(b.bytes) { 180 b.bits |= uint64(b.bytes[b.index]) << b.nBits 181 b.nBits += 8 182 b.index++ 183 } else { 184 goto slow 185 } 186 187 if x := h.lookUpTable[b.bits&0xFF]; x != 0 { 188 n := x >> 16 189 b.bits >>= n 190 b.nBits -= n 191 return int32(x & 0xFFFF) 192 } 193 194slow: 195 return h.slowDecode(b) 196} 197 198func (h *huffman) slowDecode(b *bitstream) int32 { 199 code := uint32(0) // The i bits from the bitstream. 200 first := uint32(0) // The first Huffman code of length i. 201 symIndex := uint32(0) // How many elements of h.symbols we've gone past. 202 203 // The "at this point" comments in the loop detail the `"1110" decodes to 204 // 'G'` example discussed above in the huffman type's doc comment. 205 // 206 // Note that, as a loop invariant, code >= first. 207 for i := 1; i <= maxCodeBits; i++ { 208 if b.nBits == 0 { 209 if b.index >= len(b.bytes) { 210 return mostNegativeInt32 211 } 212 b.bits = uint64(b.bytes[b.index]) 213 b.nBits = 8 214 b.index++ 215 } 216 217 code |= uint32(b.bits & 1) 218 b.bits >>= 1 219 b.nBits -= 1 220 221 // At this point: 222 // - When i == 1, code is 1, symIndex is 0, first is 0. 223 // - When i == 2, code is 3, symIndex is 0, first is 0. 224 // - When i == 3, code is 7, symIndex is 1, first is 2. 225 // - When i == 4, code is 14, symIndex is 6, first is 14. 226 227 // Recall that h.counts is: {0, 0, 1, 5, 2, 0, 0, ...}. 228 count := h.counts[i] 229 if code < (count + first) { 230 // At this point: 231 // - When i == 4, code is 14, symIndex is 6, first is 14. 232 // 233 // h.symbols[6+14-14] is indeed 'G'. 234 return h.symbols[symIndex+code-first] 235 } 236 237 symIndex += count 238 first += count 239 first <<= 1 240 code <<= 1 241 242 // At this point: 243 // - When i == 1, code is 2, symIndex is 0, first is 0. 244 // - When i == 2, code is 6, symIndex is 1, first is 2. 245 // - When i == 3, code is 14, symIndex is 6, first is 14. 246 } 247 return mostNegativeInt32 248} 249 250func (h *huffman) construct(lengths []uint32) (endCodeBits uint32, endCodeNBits uint32, retErr error) { 251 for i := range h.counts { 252 h.counts[i] = 0 253 } 254 for _, x := range lengths { 255 h.counts[x]++ 256 } 257 if h.counts[0] >= uint32(len(lengths)) { 258 return 0, 0, errInvalidBadHuffmanTree 259 } 260 261 // Check for an over- or under-subscribed Huffman tree, and for the 262 // end-of-block code (with value 256). 263 const endCode = 256 264 remaining := uint32(1) 265 endCodeLength := uint32(0) 266 if len(lengths) > endCode { 267 endCodeLength = lengths[endCode] 268 } 269 for i := uint32(1); i <= maxCodeBits; i++ { 270 remaining *= 2 271 if remaining < h.counts[i] { 272 return 0, 0, errInvalidBadHuffmanTree 273 } 274 remaining -= h.counts[i] 275 276 if i == endCodeLength { 277 remainingForEndCode := remaining 278 for _, l := range lengths[endCode+1:] { 279 if l == endCodeLength { 280 remainingForEndCode++ 281 } 282 } 283 endCodeBits = ((uint32(1) << endCodeLength) - 1) - remainingForEndCode 284 endCodeNBits = endCodeLength 285 } 286 } 287 if remaining != 0 { 288 if ((h.counts[0] + 1) == uint32(len(lengths))) && (h.counts[1] == 1) { 289 // No-op. We allow a degenerate Huffman tree with only one code (of 290 // length 1 bit). 291 } else { 292 return 0, 0, errInvalidBadHuffmanTree 293 } 294 } 295 296 offsets := [maxCodeBits + 1]uint32{} 297 for i := 1; i < maxCodeBits; i++ { 298 offsets[i+1] = offsets[i] + h.counts[i] 299 } 300 301 for symbol, length := range lengths { 302 if length != 0 { 303 h.symbols[offsets[length]] = int32(symbol) 304 offsets[length]++ 305 } 306 } 307 h.constructLookUpTable() 308 return endCodeBits, endCodeNBits, nil 309} 310 311func (h *huffman) constructLookUpTable() { 312 b := bitstream{ 313 bytes: []byte{0x00}, 314 } 315 for i := range h.lookUpTable { 316 b.bytes[0] = uint8(i) 317 b.index = 0 318 b.bits = 0 319 b.nBits = 0 320 if x := h.slowDecode(&b); x >= 0 { 321 h.lookUpTable[i] = ((8 - b.nBits) << 16) | uint32(x) 322 } else { 323 h.lookUpTable[i] = 0 324 } 325 } 326} 327 328// cutSingleBlock is an implementation of Cut whose output consists of a single 329// DEFLATE block. 330// 331// If maxEncodedLen is sufficiently large, this will be a Stored block (i.e. a 332// header followed by literal bytes). Otherwise, it will set encoding[:2] so 333// that decoding produces zero bytes. 334// 335// A precondition is that maxEncodedLen >= SmallestValidMaxEncodedLen. 336func cutSingleBlock(encoded []byte, maxEncodedLen int) (encodedLen int, decodedLen int, retErr error) { 337 if maxEncodedLen < SmallestValidMaxEncodedLen { 338 panic("unreachable") 339 } 340 341 // Try re-encoding as a single Stored block: up to 0xFFFF literal bytes, 342 // with a 5 byte prefix. 343 if maxEncodedLen > 5 { 344 n := maxEncodedLen - 5 345 if n > 0xFFFF { 346 n = 0xFFFF 347 } 348 349 buf := make([]byte, n) 350 n, err := io.ReadFull(flate.NewReader(bytes.NewReader(encoded)), buf) 351 if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { 352 return 0, 0, err 353 } 354 355 if n > 0 { 356 encoded[0] = 0x01 // finalBlock = true, blockType = 0 (Stored). 357 encoded[1] = uint8(n >> 0) 358 encoded[2] = uint8(n >> 8) 359 encoded[3] = ^encoded[1] 360 encoded[4] = ^encoded[2] 361 copy(encoded[5:], buf[:n]) 362 return n + 5, n, nil 363 } 364 } 365 366 // Set encoded[:2] to hold: 367 // - 1 bit ...._...._...._...1 finalBlock = true. 368 // - 2 bits ...._...._...._.01. blockType = 1 (Static Huffman). 369 // - 7 bits ...._..00_0000_0... litLenSymbol = 256 (end-of-block). 370 // - 6 bits 0000_00.._...._.... padding. 371 encoded[0] = 0x03 372 encoded[1] = 0x00 373 return 2, 0, nil 374} 375 376// Cut modifies encoded's contents such that encoded[:encodedLen] is valid 377// DEFLATE-compressed data, assuming that encoded starts off containing valid 378// DEFLATE-compressed data. 379// 380// If a nil error is returned, then encodedLen <= maxEncodedLen will hold. 381// 382// Decompressing that modified, shorter byte slice produces a prefix (of length 383// decodedLen) of the decompression of the original, longer byte slice. 384// 385// If w is non-nil, that prefix is also written to w. If a non-nil error is 386// returned, incomplete data might still be written to w. 387// 388// It does not necessarily return the largest possible decodedLen. 389func Cut(w io.Writer, encoded []byte, maxEncodedLen int) (encodedLen int, decodedLen int, retErr error) { 390 if maxEncodedLen < SmallestValidMaxEncodedLen { 391 return 0, 0, errMaxEncodedLenTooSmall 392 } 393 const m = 1 << 30 394 if uint64(maxEncodedLen) > m { 395 maxEncodedLen = m 396 } 397 if maxEncodedLen > len(encoded) { 398 maxEncodedLen = len(encoded) 399 } 400 if maxEncodedLen < SmallestValidMaxEncodedLen { 401 return 0, 0, errInvalidNotEnoughData 402 } 403 404 c := cutter{ 405 bits: bitstream{ 406 bytes: encoded, 407 }, 408 maxEncodedLen: maxEncodedLen, 409 } 410 encodedLen, decodedLen, err := c.cut() 411 if err != nil { 412 return 0, 0, err 413 } 414 415 if w != nil { 416 // TODO: writing to w directly, in cutter's doStored and doHuffman, 417 // might be faster than re-walking the bitstream with compress/flate. 418 r := flate.NewReader(bytes.NewReader(encoded[:encodedLen])) 419 if n, err := io.Copy(w, r); err != nil { 420 r.Close() 421 return 0, 0, err 422 } else if n != int64(decodedLen) { 423 r.Close() 424 return 0, 0, errInternalInconsistentDecodedLen 425 } 426 if err := r.Close(); err != nil { 427 return 0, 0, err 428 } 429 } 430 431 return encodedLen, decodedLen, nil 432} 433 434type cutter struct { 435 bits bitstream 436 437 maxEncodedLen int 438 decodedLen int32 439 440 endCodeBits uint32 441 endCodeNBits uint32 442 443 lHuff huffman 444 dHuff huffman 445} 446 447func (c *cutter) cut() (encodedLen int, decodedLen int, retErr error) { 448 prevFinalBlockIndex := -1 449 prevFinalBlockNBits := uint32(0) 450 451 for { 452 finalBlock := c.bits.take(1) 453 if finalBlock < 0 { 454 return 0, 0, errInvalidNotEnoughData 455 } 456 457 finalBlockIndex := c.bits.index 458 finalBlockNBits := c.bits.nBits 459 for finalBlockNBits >= 8 { 460 finalBlockIndex-- 461 finalBlockNBits -= 8 462 } 463 464 blockType := c.bits.take(2) 465 if blockType < 0 { 466 return 0, 0, errInvalidNotEnoughData 467 } 468 469 err := error(nil) 470 switch blockType { 471 case 0: 472 err = c.doStored() 473 case 1: 474 err = c.doStaticHuffman(prevFinalBlockIndex < 0) 475 case 2: 476 err = c.doDynamicHuffman(prevFinalBlockIndex < 0) 477 case 3: 478 return 0, 0, errInvalidBadBlockType 479 } 480 481 for c.bits.nBits >= 8 { 482 c.bits.index-- 483 c.bits.nBits -= 8 484 } 485 486 switch err { 487 case nil: 488 if finalBlock == 0 { 489 prevFinalBlockIndex = finalBlockIndex 490 prevFinalBlockNBits = finalBlockNBits 491 continue 492 } 493 494 case errInternalNoProgress: 495 if prevFinalBlockIndex < 0 { 496 return cutSingleBlock(c.bits.bytes, c.maxEncodedLen) 497 } 498 499 // Un-read to just before the finalBlock bit. 500 c.bits.index = finalBlockIndex 501 c.bits.nBits = finalBlockNBits + 1 502 for c.bits.nBits >= 8 { 503 c.bits.index-- 504 c.bits.nBits -= 8 505 } 506 507 finalBlockIndex = prevFinalBlockIndex 508 finalBlockNBits = prevFinalBlockNBits 509 fallthrough 510 511 case errInternalSomeProgress: 512 // Set the n'th bit (LSB=0, MSB=7) of 513 // c.bits.bytes[finalBlockIndex-1] to be 1. 514 n := 7 - finalBlockNBits 515 mask := uint32(1) << n 516 c.bits.bytes[finalBlockIndex-1] |= uint8(mask) 517 518 case errInternalReplaceWithSingleBlock: 519 return cutSingleBlock(c.bits.bytes, c.maxEncodedLen) 520 521 default: 522 return 0, 0, err 523 524 } 525 break 526 } 527 528 if c.bits.nBits != 0 { 529 // Clear the high c.bits.nBits bits of c.bits.bytes[c.bits.index-1]. 530 mask := (uint32(1) << (8 - c.bits.nBits)) - 1 531 c.bits.bytes[c.bits.index-1] &= uint8(mask) 532 } 533 534 return c.bits.index, int(c.decodedLen), nil 535} 536 537func (c *cutter) doStored() error { 538 for c.bits.nBits >= 8 { 539 c.bits.index-- 540 c.bits.nBits -= 8 541 } 542 if (c.maxEncodedLen < c.bits.index) || ((c.maxEncodedLen - c.bits.index) < 4) { 543 return errInternalNoProgress 544 } 545 546 length := uint32(c.bits.bytes[c.bits.index+0]) | uint32(c.bits.bytes[c.bits.index+1])<<8 547 invLen := uint32(c.bits.bytes[c.bits.index+2]) | uint32(c.bits.bytes[c.bits.index+3])<<8 548 if length+invLen != 0xFFFF { 549 return errInvalidBadBlockLength 550 } 551 552 // Check for potential overflow. 553 if (c.decodedLen + int32(length)) < 0 { 554 return errInternalNoProgress 555 } 556 557 index := c.bits.index + 4 558 if remaining := c.maxEncodedLen - index; remaining >= int(length) { 559 c.bits.index = index + int(length) 560 c.bits.bits = 0 561 c.bits.nBits = 0 562 c.decodedLen += int32(length) 563 return nil 564 } else if remaining == 0 { 565 return errInternalNoProgress 566 } else { 567 length = uint32(remaining) 568 invLen = 0xFFFF - length 569 } 570 571 c.bits.bytes[c.bits.index+0] = uint8(length >> 0) 572 c.bits.bytes[c.bits.index+1] = uint8(length >> 8) 573 c.bits.bytes[c.bits.index+2] = uint8(invLen >> 0) 574 c.bits.bytes[c.bits.index+3] = uint8(invLen >> 8) 575 c.bits.index = index + int(length) 576 c.bits.bits = 0 577 c.bits.nBits = 0 578 c.decodedLen += int32(length) 579 return errInternalSomeProgress 580} 581 582func (c *cutter) doStaticHuffman(isFirstBlock bool) error { 583 const ( 584 numLCodes = 288 585 numDCodes = 32 586 ) 587 588 // Initialize lengths as per RFC 1951 section 3.2.6. 589 lengths := make([]uint32, numLCodes+numDCodes) 590 i := 0 591 for ; i < 144; i++ { 592 lengths[i] = 8 593 } 594 for ; i < 256; i++ { 595 lengths[i] = 9 596 } 597 for ; i < 280; i++ { 598 lengths[i] = 7 599 } 600 for ; i < 288; i++ { 601 lengths[i] = 8 602 } 603 for ; i < 320; i++ { 604 lengths[i] = 5 605 } 606 607 return c.doHuffman(isFirstBlock, lengths[:numLCodes], lengths[numLCodes:]) 608} 609 610func (c *cutter) doDynamicHuffman(isFirstBlock bool) error { 611 numLCodes := 257 + c.bits.take(5) 612 if numLCodes < 0 { 613 return errInvalidNotEnoughData 614 } 615 616 numDCodes := 1 + c.bits.take(5) 617 if numDCodes < 0 { 618 return errInvalidNotEnoughData 619 } 620 621 numCodeLengths := 4 + c.bits.take(4) 622 if numCodeLengths < 0 { 623 return errInvalidNotEnoughData 624 } 625 626 // The 286 and 30 numbers come from RFC 1951 section 3.2.5. 627 if (numLCodes > 286) || (numDCodes > 30) { 628 return errInvalidTooManyCodes 629 } 630 631 lengths := make([]uint32, numLCodes+numDCodes) 632 for i := int32(0); i < numCodeLengths; i++ { 633 x := c.bits.take(3) 634 if x < 0 { 635 return errInvalidNotEnoughData 636 } 637 lengths[codeOrder[i]] = uint32(x) 638 } 639 640 if _, _, err := c.lHuff.construct(lengths); err != nil { 641 return err 642 } 643 644 for i := int32(0); i < (numLCodes + numDCodes); { 645 symbol := c.lHuff.decode(&c.bits) 646 if symbol < 0 { 647 return errInvalidBadCodeLengths 648 } 649 value, count := uint32(0), int32(0) 650 651 switch symbol { 652 default: 653 lengths[i] = uint32(symbol) 654 i++ 655 continue 656 657 case 16: 658 if i == 0 { 659 return errInvalidBadCodeLengths 660 } 661 value = lengths[i-1] 662 count = 3 + c.bits.take(2) 663 664 case 17: 665 count = 3 + c.bits.take(3) 666 667 case 18: 668 count = 11 + c.bits.take(7) 669 } 670 if count < 0 { 671 return errInvalidNotEnoughData 672 } 673 674 if (i + count) > (numLCodes + numDCodes) { 675 return errInvalidBadCodeLengths 676 } 677 for ; count > 0; count-- { 678 lengths[i] = value 679 i++ 680 } 681 } 682 683 return c.doHuffman(isFirstBlock, lengths[:numLCodes], lengths[numLCodes:]) 684} 685 686func (c *cutter) doHuffman(isFirstBlock bool, lLengths []uint32, dLengths []uint32) error { 687 err := error(nil) 688 if c.endCodeBits, c.endCodeNBits, err = c.lHuff.construct(lLengths); err != nil { 689 return err 690 } 691 if c.endCodeNBits == 0 { 692 return errInvalidNoEndOfBlock 693 } 694 if _, _, err := c.dHuff.construct(dLengths); err != nil { 695 return err 696 } 697 698 for c.bits.nBits >= 8 { 699 c.bits.index-- 700 c.bits.nBits -= 8 701 } 702 if c.bits.index > c.maxEncodedLen { 703 return errInternalNoProgress 704 } 705 706 checkpointIndex := -1 707 checkpointNBits := uint32(0) 708 decodedLen := c.decodedLen 709 710 for { 711 lSymbol := c.lHuff.decode(&c.bits) 712 if lSymbol < 0 { 713 return errInvalidBadSymbol 714 715 } else if lSymbol < 256 { 716 // It's a literal byte. 717 decodedLen++ 718 719 } else if lSymbol > 256 { 720 // It's a length/distance copy. 721 length := lBases[lSymbol-256] + c.bits.take(lExtras[lSymbol-256]) 722 if length < 0 { 723 if lBases[lSymbol-256] < 0 { 724 return errInvalidBadSymbol 725 } 726 return errInvalidNotEnoughData 727 } 728 729 dSymbol := c.dHuff.decode(&c.bits) 730 if dSymbol < 0 { 731 return errInvalidBadSymbol 732 } 733 distance := dBases[dSymbol] + c.bits.take(dExtras[dSymbol]) 734 if distance < 0 { 735 if dBases[dSymbol] < 0 { 736 return errInvalidBadSymbol 737 } 738 return errInvalidNotEnoughData 739 } 740 741 decodedLen += length 742 743 } else { 744 // It's the end-of-block. 745 return nil 746 } 747 748 // Check for overflow. 749 if decodedLen < 0 { 750 break 751 } 752 753 // Check the maxEncodedLen budget, considering that we might still need 754 // to write an end-of-block code. 755 encodedBits := 8*uint64(c.bits.index) - uint64(c.bits.nBits) 756 maxEncodedBits := 8 * uint64(c.maxEncodedLen) 757 if encodedBits+uint64(c.endCodeNBits) > maxEncodedBits { 758 break 759 } 760 761 checkpointIndex = c.bits.index 762 checkpointNBits = c.bits.nBits 763 c.decodedLen = decodedLen 764 } 765 766 if checkpointIndex < 0 { 767 return errInternalNoProgress 768 } 769 770 // If we're the first block in the DEFLATE stream, check if we would be 771 // better off replacing the Huffman block with a Stored block. 772 if isFirstBlock && (c.maxEncodedLen > 5) { 773 n := c.maxEncodedLen - 5 774 if n > 0xFFFF { 775 n = 0xFFFF 776 } 777 if c.decodedLen < int32(n) { 778 return errInternalReplaceWithSingleBlock 779 } 780 } 781 782 c.bits.index = checkpointIndex 783 c.bits.nBits = checkpointNBits 784 for c.bits.nBits >= 8 { 785 c.bits.index-- 786 c.bits.nBits -= 8 787 } 788 c.writeEndCode() 789 return errInternalSomeProgress 790} 791 792func (c *cutter) writeEndCode() { 793 // Change c.bits.bytes[c.bits.index-1:]'s bits to have the end-of-block 794 // code. That code's bits are given MSB-to-LSB but the wire format reads 795 // LSB-to-MSB. 796 for j := c.endCodeNBits; j > 0; j-- { 797 if c.bits.nBits == 0 { 798 c.bits.index++ 799 c.bits.nBits = 8 800 } 801 c.bits.nBits-- 802 803 // Set the n'th bit (LSB=0, MSB=7) of c.bits.bytes[c.bits.index-1] to 804 // be b. 805 n := 7 - c.bits.nBits 806 b := (c.endCodeBits >> (j - 1)) & 1 807 mask := uint32(1) << n 808 c.bits.bytes[c.bits.index-1] &^= uint8(mask) 809 c.bits.bytes[c.bits.index-1] |= uint8(mask * b) 810 } 811} 812