• Home
  • Raw
  • Download

Lines Matching +full:results +full:- +full:code

1 /* enough.c -- determine the maximum size of inflate's Huffman code tables over
15 Fix bug for initial root table size == max - 1
19 Clean up code indentation
20 1.5 5 Aug 2018 Clean up code style, formatting, and comments
26 maximum code length in bits to determine the maximum table size for zlib's
31 in the same code for the counting, as do permutations of the assignments of
34 We build a code from shorter to longer lengths, determining how many symbols
36 be coded, what the last code length used was, and how many bit patterns of
37 that length remain unused. Then we add one to the code length and double the
38 number of unused patterns to graduate to the next code length. We then
39 assign all portions of the remaining symbols to that code length that
40 preserve the properties of a correct and eventually complete code. Those
45 The inflate Huffman decoding algorithm uses two-level lookup tables for
46 speed. There is a single first-level table to decode codes up to root bits
51 pointed to regardless of the bits that follow the short code. If the code is
52 longer than root bits, then the table entry points to a second-level table.
53 The size of that table is determined by the longest code with that root-bit
54 prefix. If that longest code has length len, then the table has size 1 <<
55 (len - root), to index the remaining bits in that set of codes. Each
56 subsequent root-bit prefix then has its own sub-table. The total number of
57 table entries required by the code is calculated incrementally as the number
59 than root bits, then root is reduced to the longest code length, resulting
60 in a single, smaller, one-level table.
63 the log2 of the number of symbols), where the shortest code has more bits
65 code. This program, by design, does not handle that case, so it is verified
69 the default arguments), the intermediate states in the build-up of a code
72 the maximum code length in bits. However this is a very small price to pay
76 intermediate states are noted by a non-zero count in a saved-results array.
78 are used to look at all sub-codes from those junctures for their inflate
81 construction of those sub-codes and the associated calculation of the table
83 Beginning the code examination at (root + 1) bit codes, which is enabled by
87 approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes
89 for the default arguments of 286 symbols limited to 15-bit codes.
92 exceed the capacity of an eight-byte integer with a large number of symbols
93 and a large maximum code length, so multiple-precision arithmetic would need
99 remaining at the maximum length. This limits the maximum code length to the
101 the symbols in a flat code. The code_t type identifies where the bit-pattern
115 typedef uintmax_t big_t; // type for code counting
118 struct tab { // type for been-here check
123 /* The array for saving results, num[], is indexed with this triplet:
125 syms: number of symbols remaining to code
129 Those indices are constrained thusly when saving results:
131 syms: 3..totsym (totsym == total symbols to code)
132 left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6)
133 len: 1..max - 1 (max == maximum code length in bits)
135 syms == 2 is not saved since that immediately leads to a single code. left
138 ends at syms-1 since left == syms immediately results in a single code.
139 (left > sym is not allowed since that would result in an incomplete code.)
140 len is less than max, since the code completes immediately when len == max.
144 the array with length max-1 lists for the len index, with syms-3 of those
145 for each symbol. There are totsym-2 of those, with each one varying in
149 For the deflate example of 286 symbols limited to 15-bit codes, the array
150 has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half
151 of the space allocated for saved results is actually used -- not all
159 remaining unused entries in the current inflate sub-table. Each indexed
169 For the deflate example of 286 symbols limited to 15-bit codes, the bit
173 // Type for a variable-length, allocated string.
182 s->str[0] = 0; in string_clear()
183 s->len = 0; in string_clear()
188 s->size = 16; in string_init()
189 s->str = malloc(s->size); in string_init()
190 assert(s->str != NULL && "out of memory"); in string_init()
196 free(s->str); in string_free()
197 s->str = NULL; in string_free()
198 s->size = 0; in string_free()
199 s->len = 0; in string_free()
202 // Save the results of printf with fmt and the subsequent argument list to s.
207 size_t len = s->len; in string_printf()
208 int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); in string_printf()
210 s->len += ret; in string_printf()
211 if (s->size < s->len + 1) { in string_printf()
213 s->size <<= 1; in string_printf()
214 assert(s->size != 0 && "overflow"); in string_printf()
215 } while (s->size < s->len + 1); in string_printf()
216 s->str = realloc(s->str, s->size); in string_printf()
217 assert(s->str != NULL && "out of memory"); in string_printf()
218 vsnprintf(s->str + len, s->size - len, fmt, ap); in string_printf()
226 int root; // size of base code table in bits
227 int large; // largest code table so far
231 int *code; // number of symbols assigned to each bit length member
232 big_t *num; // saved results array for code counting
238 return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + in map()
239 (left >> 1) - 1) * (g.max - 1) + in map()
240 len - 1; in map()
253 free(g.code); g.code = NULL; in cleanup()
259 // len unused -- return -1 if there is an overflow in the counting. Keep a
260 // record of previous results in num to prevent repeating the same calculation.
262 // see if only one possible code in count()
273 return got; // we have -- return the saved result in count()
275 // we need to use at least this many bit patterns so that the code won't be in count()
277 int least = (left << 1) - syms; in count()
283 // no limit to the code length, this would become: most = left - 1) in count()
284 int most = (((code_t)left << (g.max - len)) - syms) / in count()
285 (((code_t)1 << (g.max - len)) - 1); in count()
290 got = count(syms - use, (left - use) << 1, len + 1); in count()
292 if (got == (big_t)-1 || sum < got) // overflow in count()
293 return (big_t)-1; in count()
311 mem -= 1 << g.root; // mem always includes the root table in been_here()
323 // we haven't been here before -- set the bit to show we have now in been_here()
335 memset(vector + g.done[index].len, 0, length - g.done[index].len); in been_here()
359 // number of code structures used so far is mem, and the number remaining in
360 // the current sub-table is rem.
362 // see if we have a complete code in examine()
364 // set the last code entry in examine()
365 g.code[len] = left; in examine()
367 // complete computation of memory used by this code in examine()
369 left -= rem; in examine()
370 rem = 1 << (len - g.root); in examine()
375 // if this is at the maximum, show the sub-code in examine()
378 // printed sub-codes from the previous maximum in examine()
384 // compute the starting state for this sub-code in examine()
387 for (int bits = g.max; bits > g.root; bits--) { in examine()
388 syms += g.code[bits]; in examine()
389 left -= g.code[bits]; in examine()
394 // print the starting state and the resulting sub-code to g.out in examine()
396 syms, g.root + 1, ((1 << g.root) - left) << 1); in examine()
398 if (g.code[bits]) in examine()
399 string_printf(&g.out, " %d[%d]", g.code[bits], bits); in examine()
404 g.code[len] = 0; in examine()
412 // we need to use at least this many bit patterns so that the code won't be in examine()
414 int least = (left << 1) - syms; in examine()
420 // no limit to the code length, this would become: most = left - 1) in examine()
421 int most = (((code_t)left << (g.max - len)) - syms) / in examine()
422 (((code_t)1 << (g.max - len)) - 1); in examine()
424 // occupy least table spaces, creating new sub-tables as needed in examine()
427 use -= rem; in examine()
428 rem = 1 << (len - g.root); in examine()
431 rem -= use; in examine()
435 g.code[len] = use; in examine()
436 examine(syms - use, (left - use) << 1, len + 1, in examine()
437 mem + (rem ? 1 << (len - g.root) : 0), rem << 1); in examine()
439 rem = 1 << (len - g.root); in examine()
442 rem--; in examine()
446 g.code[len] = 0; in examine()
449 // Look at all sub-codes starting with root + 1 bits. Look at only the valid
450 // intermediate code states (syms, left, len). For each completed code,
455 // clear code in enough()
457 g.code[n] = 0; in enough()
460 string_clear(&g.out); // empty saved results in enough()
473 if (g.num[index - 1] && n <= left << 1) in enough()
474 examine((n - left) << 1, (n - left) << 1, g.root + 1, in enough()
484 // maximum number of symbols, initial root table size, and maximum code length
485 // in bits -- those are the command arguments in that order. The default values
486 // are 286, 9, and 15 respectively, for the deflate literal/length code. The
491 // associated sub-code (starting at root + 1 == 10 bits) is shown.
493 // To count and examine prefix codes that are not length-limited, provide a
496 // For the deflate literal/length code, use "enough". For the deflate distance
497 // code, use "enough 30 6".
500 g.code = NULL; in main()
505 // get arguments -- default to the deflate literal/length code in main()
523 // if not restricting the code length, the longest is syms - 1 in main()
524 if (g.max > syms - 1) in main()
525 g.max = syms - 1; in main()
533 if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { in main()
534 fputs("abort: code length too long for internal types\n", stderr); in main()
538 // reject impossible code requests in main()
539 if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { in main()
545 // allocate code vector in main()
546 g.code = calloc(g.max + 1, sizeof(int)); in main()
547 assert(g.code != NULL && "out of memory"); in main()
549 // determine size of saved results array, checking for overflows, in main()
552 g.num = NULL; // won't be saving any results in main()
555 int n = (syms - 1) >> 1; in main()
556 assert(g.size <= (size_t)-1 / n && "overflow"); in main()
558 n = g.max - 1; in main()
559 assert(g.size <= (size_t)-1 / n && "overflow"); in main()
570 assert(got != (big_t)-1 && sum >= got && "overflow"); in main()
573 if (g.max < syms - 1) in main()
574 printf(" (%d-bit length limit)\n", g.max); in main()
592 fputs("cannot handle minimum code lengths > root", stderr); in main()