• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1diff --git a/jdhuff.c b/jdhuff.c
2index 679d221..b5a91eb 100644
3--- a/jdhuff.c
4+++ b/jdhuff.c
5@@ -28,6 +28,7 @@
6 #include "jpegcomp.h"
7 #include "jstdhuff.c"
8
9+#define INNER_BUF_SIZE 40960
10
11 /*
12  * Expanded entropy decoder object for Huffman decoding.
13@@ -51,6 +52,7 @@ typedef struct {
14
15   /* These fields are NOT loaded into local working state. */
16   unsigned int restarts_to_go;  /* MCUs left in this restart interval */
17+  boolean use_inner_buf;
18
19   /* Pointers to derived tables (these workspaces have image lifespan) */
20   d_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];
21@@ -64,10 +66,45 @@ typedef struct {
22   /* Whether we care about the DC and AC coefficient values for each block */
23   boolean dc_needed[D_MAX_BLOCKS_IN_MCU];
24   boolean ac_needed[D_MAX_BLOCKS_IN_MCU];
25+
26+  JOCTET *buffer;
27+  const JOCTET *next_input_byte;
28+  size_t bytes_in_buffer;
29+  size_t out_buf_size;
30 } huff_entropy_decoder;
31
32 typedef huff_entropy_decoder *huff_entropy_ptr;
33
34+/*
35+ * Figure F.12: extend sign bit.
36+ * On some machines, a shift and add will be faster than a table lookup.
37+ */
38+
39+#define AVOID_TABLES
40+#ifdef AVOID_TABLES
41+
42+#define NEG_1  ((unsigned int)-1)
43+#define HUFF_EXTEND(x, s) \
44+  ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1)))
45+
46+#else
47+
48+#define HUFF_EXTEND(x, s) \
49+  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
50+
51+static const int extend_test[16] = {   /* entry n is 2**(n-1) */
52+  0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
53+  0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
54+};
55+
56+static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */
57+  0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1,
58+  ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1,
59+  ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1,
60+  ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1
61+};
62+
63+#endif /* AVOID_TABLES */
64
65 /*
66  * Initialize for a Huffman-compressed scan.
67@@ -127,6 +164,30 @@ start_pass_huff_decoder(j_decompress_ptr cinfo)
68
69   /* Initialize restart counter */
70   entropy->restarts_to_go = cinfo->restart_interval;
71+
72+  // only used when outer buffer size is very small
73+  entropy->use_inner_buf = FALSE;
74+  // bytes left after header has been read
75+  struct jpeg_source_mgr *src  = cinfo->src;
76+
77+  if (cinfo->restart_interval) {
78+      return;
79+  }
80+  // 排除mem_src的情况
81+  if (!(src->next_input_byte[src->bytes_in_buffer - 1] == JPEG_EOI
82+      && src->next_input_byte[src->bytes_in_buffer - 2] == 0xFF)) {
83+    if (src->bytes_in_buffer <= INNER_BUF_SIZE) {
84+      entropy->use_inner_buf = TRUE;
85+      entropy->buffer =  (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, INNER_BUF_SIZE);
86+
87+      memcpy(entropy->buffer, src->next_input_byte, src->bytes_in_buffer);
88+      entropy->bytes_in_buffer = src->bytes_in_buffer;
89+      entropy->next_input_byte = entropy->buffer;
90+
91+      (*src->fill_input_buffer)(cinfo); // there are unused data in out buffer
92+      entropy->out_buf_size = src->bytes_in_buffer;
93+    }
94+  }
95 }
96
97
98@@ -172,7 +233,7 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
99   /* Figure C.1: make table of Huffman code length for each symbol */
100
101   p = 0;
102-  for (l = 1; l <= 16; l++) {
103+  for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) {
104     i = (int)htbl->bits[l];
105     if (i < 0 || p + i > 256)   /* protect against table overrun */
106       ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
107@@ -205,7 +266,7 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
108   /* Figure F.15: generate decoding tables for bit-sequential decoding */
109
110   p = 0;
111-  for (l = 1; l <= 16; l++) {
112+  for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) {
113     if (htbl->bits[l]) {
114       /* valoffset[l] = huffval[] index of 1st symbol of code length l,
115        * minus the minimum code of length l
116@@ -227,18 +288,107 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
117    * with that code.
118    */
119
120-  for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
121+  for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) {
122     dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
123-
124-  p = 0;
125-  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
126-    for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
127-      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
128-      /* Generate left-justified code followed by all possible bit sequences */
129-      lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
130-      for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
131-        dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
132-        lookbits++;
133+  }
134+  if (!isDC) {
135+    // Look up tables for AC, index is huffman code, value is the symbol and the length
136+    // htbl->bits[l], number of symbol that of which the code length is l
137+    // htbl->huffval[l], symbol in order
138+
139+    // nb <= LOOKAHEAD
140+    p = 0;
141+    int coef0;
142+    for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
143+      for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
144+        /* l = current code's length, p = its index in huffcode[] & huffval[]. */
145+        /* Generate left-justified code followed by all possible bit sequences */
146+        UINT8 rs = htbl->huffval[p]; // run length symbol (zero num + coeff bits)
147+        UINT8 coef_bits = rs & 0x0f;
148+        if ((l + coef_bits) <= HUFF_LOOKAHEAD) {
149+          // save DCT coeffs in higher bits
150+          for (coef0 = 0; coef0 < 1 << coef_bits; coef0++) {
151+            int16_t coef_value = HUFF_EXTEND(coef0, coef_bits);  // save value after extended.
152+            lookbits = (huffcode[p] << (HUFF_LOOKAHEAD - l)) | (coef0 << (HUFF_LOOKAHEAD - l - coef_bits));
153+            for (ctr = 1 << (HUFF_LOOKAHEAD - l - coef_bits); ctr > 0; ctr--) {
154+              if (coef_bits == 0 && (rs >> 4) != 0xF) { // 00
155+                dtbl->lookup[lookbits] = MAKE_COEF1(coef_value) | MAKE_NB(l + coef_bits) | MAKE_ZERO_NUM1(63);
156+              } else { // F0 and other symbols
157+                dtbl->lookup[lookbits] = MAKE_COEF1(coef_value) | MAKE_NB(l + coef_bits) | MAKE_ZERO_NUM1(rs >> 4);
158+              }
159+              lookbits++;
160+            }
161+          }
162+        } else {
163+          // same as the original lookup table
164+          lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
165+          for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
166+            dtbl->lookup[lookbits] = MAKE_NB(l) | MAKE_SYM(rs);
167+            lookbits++;
168+          }
169+        }
170+      }
171+    }
172+    // nb > LOOKAHEAD
173+    int offset = 0;
174+    int base = 1 << HUFF_LOOKAHEAD;
175+    int short_tbl_index = 0xFFFFFFFF;
176+    int cur_long_tbl_base = 1 << HUFF_LOOKAHEAD;
177+    int left;
178+    int offset_bit = 0;
179+    int first = p;  // the index of the first code of this length.
180+    int max_code_len;
181+    for (max_code_len = MAX_HUFF_CODE_LEN; max_code_len >= 1; max_code_len--) {
182+      if(htbl->bits[max_code_len]) {
183+          break;
184+      }
185+    }
186+    for (l = HUFF_LOOKAHEAD + 1; l <= MAX_HUFF_CODE_LEN; l++) {
187+      for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
188+        UINT8 rs = htbl->huffval[p]; // run length symbol (zero num + coeff bits)
189+        UINT8 coef_bits = rs & 0x0f;
190+        // similar as 1st table as before
191+        lookbits = huffcode[p] >> (l - HUFF_LOOKAHEAD); // index in 1st table
192+        // check if a new 2nd tbl should be created
193+        if (lookbits != short_tbl_index) {
194+          short_tbl_index = lookbits;
195+          cur_long_tbl_base += offset;
196+          offset = 0;
197+          offset_bit = l - HUFF_LOOKAHEAD;
198+          left = (1 << offset_bit) - (htbl->bits[l] - (p - first));
199+          while (offset_bit + HUFF_LOOKAHEAD < max_code_len && left > 0) {
200+            offset_bit++;
201+            left = (left << 1) - htbl->bits[offset_bit + HUFF_LOOKAHEAD];
202+          }
203+        }
204+        base = cur_long_tbl_base;
205+        // set 1st table value
206+        dtbl->lookup[lookbits] = MAKE_BASE(base) | MAKE_NB(l) | MAKE_EXTRA_BITS(offset_bit);
207+        // set 2nd table value
208+        for (ctr = 0; ctr < (1 << (offset_bit - (l - HUFF_LOOKAHEAD))); ctr++) {
209+          if (coef_bits == 0) {
210+            dtbl->lookup[base + offset] = MAKE_NB(l) | MAKE_SYM(rs) | MAKE_COEF_BITS(0xF);
211+          } else {
212+            dtbl->lookup[base + offset] = MAKE_NB(l) | MAKE_SYM(rs);
213+          }
214+          offset++;
215+        }
216+      }
217+      first = p;
218+    }
219+  } else {
220+    for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
221+      dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
222+    p = 0;
223+    for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
224+      for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
225+        /* l = current code's length, p = its index in huffcode[] & huffval[]. */
226+        /* Generate left-justified code followed by all possible bit sequences */
227+        lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
228+        for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
229+          dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
230+          lookbits++;
231+        }
232       }
233     }
234   }
235@@ -291,6 +441,8 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
236   register const JOCTET *next_input_byte = state->next_input_byte;
237   register size_t bytes_in_buffer = state->bytes_in_buffer;
238   j_decompress_ptr cinfo = state->cinfo;
239+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
240+  struct jpeg_source_mgr *src  = cinfo->src;
241
242   /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
243   /* (It is assumed that no request will be for more than that many bits.) */
244@@ -302,10 +454,50 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
245
246       /* Attempt to read a byte */
247       if (bytes_in_buffer == 0) {
248-        if (!(*cinfo->src->fill_input_buffer) (cinfo))
249-          return FALSE;
250-        next_input_byte = cinfo->src->next_input_byte;
251-        bytes_in_buffer = cinfo->src->bytes_in_buffer;
252+        if (!entropy->use_inner_buf) {
253+          if (!(*src->fill_input_buffer)(cinfo))
254+            return FALSE;
255+          next_input_byte = src->next_input_byte;
256+          bytes_in_buffer = src->bytes_in_buffer;
257+        } else {
258+          if (entropy->out_buf_size <= INNER_BUF_SIZE) {
259+            entropy->bytes_in_buffer = 0;
260+            while (entropy->bytes_in_buffer <= INNER_BUF_SIZE - entropy->out_buf_size) {
261+              memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, src->bytes_in_buffer);
262+              entropy->bytes_in_buffer += src->bytes_in_buffer;
263+              if (src->next_input_byte[src->bytes_in_buffer - 1] == JPEG_EOI
264+                  && src->next_input_byte[src->bytes_in_buffer - 2] == 0xFF) {
265+                src->next_input_byte += src->bytes_in_buffer - 2;
266+                src->bytes_in_buffer = 2;
267+                break;
268+              }
269+              if (!(*src->fill_input_buffer) (cinfo)) {
270+                return FALSE;
271+              }
272+            }
273+          } else {
274+            // fill up the inner buffer
275+            size_t fill_size = MIN(INNER_BUF_SIZE, src->bytes_in_buffer);
276+            memcpy(entropy->buffer, src->next_input_byte, fill_size);
277+            src->bytes_in_buffer -= fill_size;
278+            src->next_input_byte += fill_size;
279+            entropy->bytes_in_buffer = fill_size;
280+            if (!src->bytes_in_buffer && !(entropy->buffer[entropy->bytes_in_buffer - 1] == JPEG_EOI
281+                && entropy->buffer[entropy->bytes_in_buffer - 2] == 0xFF)) {
282+              if (!(*src->fill_input_buffer)(cinfo)) {
283+                return FALSE;
284+              }
285+              fill_size = INNER_BUF_SIZE - entropy->bytes_in_buffer;
286+              memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, fill_size);
287+              src->bytes_in_buffer -= fill_size;
288+              src->next_input_byte += fill_size;
289+              entropy->bytes_in_buffer += fill_size;
290+            }
291+          }
292+          entropy->next_input_byte = entropy->buffer;
293+          next_input_byte = entropy->buffer;
294+          bytes_in_buffer = entropy->bytes_in_buffer;
295+        }
296       }
297       bytes_in_buffer--;
298       c = *next_input_byte++;
299@@ -319,10 +511,50 @@ jpeg_fill_bit_buffer(bitread_working_state *state,
300          */
301         do {
302           if (bytes_in_buffer == 0) {
303-            if (!(*cinfo->src->fill_input_buffer) (cinfo))
304-              return FALSE;
305-            next_input_byte = cinfo->src->next_input_byte;
306-            bytes_in_buffer = cinfo->src->bytes_in_buffer;
307+            if (!entropy->use_inner_buf) {
308+              if (!(*src->fill_input_buffer)(cinfo))
309+                return FALSE;
310+              next_input_byte = src->next_input_byte;
311+              bytes_in_buffer = src->bytes_in_buffer;
312+            } else {
313+              if (entropy->out_buf_size <= INNER_BUF_SIZE) {
314+                entropy->bytes_in_buffer = 0;
315+                while (entropy->bytes_in_buffer <= INNER_BUF_SIZE - entropy->out_buf_size) {
316+                  memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, src->bytes_in_buffer);
317+                  entropy->bytes_in_buffer += src->bytes_in_buffer;
318+                  if (src->next_input_byte[src->bytes_in_buffer - 1] == JPEG_EOI
319+                      && src->next_input_byte[src->bytes_in_buffer - 2] == 0xFF) {
320+                    src->next_input_byte += src->bytes_in_buffer - 2;
321+                    src->bytes_in_buffer = 2;
322+                    break;
323+                  }
324+                  if (!(*src->fill_input_buffer) (cinfo)) {
325+                    return FALSE;
326+                  }
327+                }
328+              } else {
329+                // fill up the inner buffer
330+                size_t fill_size = MIN(INNER_BUF_SIZE, src->bytes_in_buffer);
331+                memcpy(entropy->buffer, src->next_input_byte, fill_size);
332+                src->bytes_in_buffer -= fill_size;
333+                src->next_input_byte += fill_size;
334+                entropy->bytes_in_buffer = fill_size;
335+                if (!src->bytes_in_buffer && !(entropy->buffer[entropy->bytes_in_buffer - 1] == JPEG_EOI
336+                    && entropy->buffer[entropy->bytes_in_buffer - 2] == 0xFF)) {
337+                  if (!(*src->fill_input_buffer)(cinfo)) {
338+                    return FALSE;
339+                  }
340+                  fill_size = INNER_BUF_SIZE - entropy->bytes_in_buffer;
341+                  memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, fill_size);
342+                  src->bytes_in_buffer -= fill_size;
343+                  src->next_input_byte += fill_size;
344+                  entropy->bytes_in_buffer += fill_size;
345+                }
346+              }
347+              entropy->next_input_byte = entropy->buffer;
348+              next_input_byte = entropy->buffer;
349+              bytes_in_buffer = entropy->bytes_in_buffer;
350+            }
351           }
352           bytes_in_buffer--;
353           c = *next_input_byte++;
354@@ -460,7 +692,7 @@ jpeg_huff_decode(bitread_working_state *state,
355
356   /* With garbage input we may reach the sentinel value l = 17. */
357
358-  if (l > 16) {
359+  if (l > MAX_HUFF_CODE_LEN) {
360     WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
361     return 0;                   /* fake a zero as the safest result */
362   }
363@@ -468,39 +700,6 @@ jpeg_huff_decode(bitread_working_state *state,
364   return htbl->pub->huffval[(int)(code + htbl->valoffset[l])];
365 }
366
367-
368-/*
369- * Figure F.12: extend sign bit.
370- * On some machines, a shift and add will be faster than a table lookup.
371- */
372-
373-#define AVOID_TABLES
374-#ifdef AVOID_TABLES
375-
376-#define NEG_1  ((unsigned int)-1)
377-#define HUFF_EXTEND(x, s) \
378-  ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1)))
379-
380-#else
381-
382-#define HUFF_EXTEND(x, s) \
383-  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
384-
385-static const int extend_test[16] = {   /* entry n is 2**(n-1) */
386-  0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
387-  0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
388-};
389-
390-static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */
391-  0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1,
392-  ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1,
393-  ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1,
394-  ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1
395-};
396-
397-#endif /* AVOID_TABLES */
398-
399-
400 /*
401  * Check for a restart marker & resynchronize decoder.
402  * Returns FALSE if must suspend.
403@@ -556,7 +755,11 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
404   /* Outer loop handles each block in the MCU */
405
406   /* Load up working state */
407-  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
408+  if (entropy->use_inner_buf) {
409+    BITREAD_LOAD_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate);
410+  } else {
411+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
412+  }
413   state = entropy->saved;
414
415   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
416@@ -599,25 +802,62 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
417       /* Section F.2.2.2: decode the AC coefficients */
418       /* Since zeroes are skipped, output area must be cleared beforehand */
419       for (k = 1; k < DCTSIZE2; k++) {
420-        HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
421-
422-        r = s >> 4;
423-        s &= 15;
424-
425-        if (s) {
426-          k += r;
427-          CHECK_BIT_BUFFER(br_state, s, return FALSE);
428-          r = GET_BITS(s);
429-          s = HUFF_EXTEND(r, s);
430-          /* Output coefficient in natural (dezigzagged) order.
431-           * Note: the extra entries in jpeg_natural_order[] will save us
432-           * if k >= DCTSIZE2, which could happen if the data is corrupted.
433-           */
434-          (*block)[jpeg_natural_order[k]] = (JCOEF)s;
435+        register int nb, look;
436+        if (bits_left < HUFF_LOOKAHEAD) {
437+          if (!jpeg_fill_bit_buffer(&br_state, get_buffer, bits_left, 0)) {
438+            return FALSE;
439+          }
440+          get_buffer = br_state.get_buffer;
441+          bits_left = br_state.bits_left;
442+          if (bits_left < HUFF_LOOKAHEAD) {
443+            nb = 1;
444+            goto slowlabel;
445+          }
446+        }
447+        look = PEEK_BITS(HUFF_LOOKAHEAD);
448+        r = actbl->lookup[look];
449+        nb = GET_NB(r);
450+        uint32_t zero_num;
451+        uint32_t coef_bits = GET_COEF_BITS(r);
452+        if (nb <= HUFF_LOOKAHEAD) {
453+          DROP_BITS(nb);
454+          s = actbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1);
455+          zero_num = GET_ZERO_NUM1(r);
456+          k += zero_num;
457+          if (coef_bits == 0) {
458+            s = GET_COEF1(r);
459+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
460+          } else {
461+            CHECK_BIT_BUFFER(br_state, (int)coef_bits, return FALSE);
462+            r = GET_BITS(coef_bits);
463+            s = HUFF_EXTEND(r, coef_bits);
464+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
465+          }
466         } else {
467-          if (r != 15)
468-            break;
469-          k += 15;
470+        slowlabel:
471+          nb = 1;
472+          if ((s = jpeg_huff_decode(&br_state, get_buffer, bits_left, actbl, nb)) < 0) { return FALSE; }
473+          get_buffer = br_state.get_buffer;
474+          bits_left = br_state.bits_left;
475+
476+          r = s >> 4;
477+          s &= 15;
478+
479+          if (s) {
480+            k += r;
481+            CHECK_BIT_BUFFER(br_state, s, return FALSE);
482+            r = GET_BITS(s);
483+            s = HUFF_EXTEND(r, s);
484+            /* Output coefficient in natural (dezigzagged) order.
485+             * Note: the extra entries in jpeg_natural_order[] will save us
486+             * if k >= DCTSIZE2, which could happen if the data is corrupted.
487+             */
488+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
489+          } else {
490+            if (r != 15)
491+              break;
492+            k += 15;
493+          }
494         }
495       }
496
497@@ -626,26 +866,62 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
498       /* Section F.2.2.2: decode the AC coefficients */
499       /* In this path we just discard the values */
500       for (k = 1; k < DCTSIZE2; k++) {
501-        HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
502-
503-        r = s >> 4;
504-        s &= 15;
505-
506-        if (s) {
507-          k += r;
508-          CHECK_BIT_BUFFER(br_state, s, return FALSE);
509-          DROP_BITS(s);
510+        register int nb, look;
511+        if (bits_left < HUFF_LOOKAHEAD) {
512+          if (!jpeg_fill_bit_buffer(&br_state, get_buffer, bits_left, 0)) {
513+            return FALSE;
514+          }
515+          get_buffer = br_state.get_buffer;
516+          bits_left = br_state.bits_left;
517+          if (bits_left < HUFF_LOOKAHEAD) {
518+            nb = 1;
519+            goto slowlabel2;
520+          }
521+        }
522+        look = PEEK_BITS(HUFF_LOOKAHEAD);
523+        r = actbl->lookup[look];
524+        nb = GET_NB(r);
525+        uint32_t zero_num;
526+        uint32_t coef_bits = GET_COEF_BITS(r);
527+        if (nb <= HUFF_LOOKAHEAD) {
528+          DROP_BITS(nb);
529+          s = actbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1);
530+          zero_num = GET_ZERO_NUM1(r);
531+          k += zero_num;
532+          if (coef_bits != 0) {
533+            CHECK_BIT_BUFFER(br_state, (int)coef_bits, return FALSE);
534+            DROP_BITS(coef_bits);
535+          }
536         } else {
537-          if (r != 15)
538-            break;
539-          k += 15;
540+        slowlabel2:
541+          nb = 1;
542+          if ((s = jpeg_huff_decode(&br_state, get_buffer, bits_left, actbl, nb)) < 0) { return FALSE; }
543+          get_buffer = br_state.get_buffer;
544+          bits_left = br_state.bits_left;
545+
546+          r = s >> 4;
547+          s &= 15;
548+
549+          if (s) {
550+            k += r;
551+            CHECK_BIT_BUFFER(br_state, s, return FALSE);
552+            DROP_BITS(s);
553+          } else {
554+            if (r != 15)
555+              break;
556+            k += 15;
557+          }
558         }
559       }
560     }
561   }
562
563   /* Completed MCU, so update state */
564-  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
565+  if (entropy->use_inner_buf) {
566+    BITREAD_SAVE_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate);
567+  } else {
568+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
569+  }
570   entropy->saved = state;
571   return TRUE;
572 }
573@@ -668,7 +944,11 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
574   /* Outer loop handles each block in the MCU */
575
576   /* Load up working state */
577-  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
578+  if (entropy->use_inner_buf) {
579+    BITREAD_LOAD_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate);
580+  } else {
581+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
582+  }
583   buffer = (JOCTET *)br_state.next_input_byte;
584   state = entropy->saved;
585
586@@ -678,6 +958,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
587     d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn];
588     register int s, k, r, l;
589
590+    // DC
591     HUFF_DECODE_FAST(s, l, dctbl);
592     if (s) {
593       FILL_BIT_BUFFER_FAST
594@@ -699,36 +980,93 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
595     if (entropy->ac_needed[blkn] && block) {
596
597       for (k = 1; k < DCTSIZE2; k++) {
598-        HUFF_DECODE_FAST(s, l, actbl);
599-        r = s >> 4;
600-        s &= 15;
601-
602-        if (s) {
603-          k += r;
604-          FILL_BIT_BUFFER_FAST
605-          r = GET_BITS(s);
606-          s = HUFF_EXTEND(r, s);
607-          (*block)[jpeg_natural_order[k]] = (JCOEF)s;
608+        FILL_BIT_BUFFER_FAST;
609+        r = PEEK_BITS(HUFF_LOOKAHEAD);    // 先读取look_ahead位
610+        r = actbl->lookup[r];
611+        l = GET_NB(r);
612+        uint32_t zero_num;
613+        uint32_t coef_bits = GET_COEF_BITS(r);
614+
615+        if (l <= HUFF_LOOKAHEAD) {
616+          zero_num = GET_ZERO_NUM1(r);
617+          DROP_BITS(l);
618+          if (coef_bits == 0) {
619+            s = GET_COEF1(r);
620+            k += zero_num;
621+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
622+          } else {
623+            FILL_BIT_BUFFER_FAST
624+            r = GET_BITS(coef_bits);
625+            s = HUFF_EXTEND(r, coef_bits);
626+            k += zero_num;
627+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
628+          }
629         } else {
630-          if (r != 15) break;
631-          k += 15;
632+          uint32_t base = GET_BASE(r);  // 高16位为base
633+          uint32_t offset_bits = GET_EXTRA_BITS(r);  // 低8位为offset_bits, l = nb 为二级表的最大码长
634+          r = PEEK_BITS(l); // 前HUFF_LOOKAHEAD位已使用,只使用低nb - HUFF_LOOKAHEAD位, 取低offset_bits作为二级表索引
635+          s = actbl->lookup[base + (r & ((1 << offset_bits) - 1))];
636+          l = GET_NB(s); // 实际码长
637+          coef_bits = GET_COEF_BITS(s);
638+          zero_num = GET_ZERO_NUM1(s);
639+          DROP_BITS(l);
640+          if (coef_bits == 0xF) {
641+            if (zero_num != 0xF) {
642+              break;
643+            } else {
644+              k += 15;
645+            }
646+          } else {
647+            FILL_BIT_BUFFER_FAST
648+            r = GET_BITS(coef_bits);
649+            s = HUFF_EXTEND(r, coef_bits);
650+            k += zero_num;
651+            (*block)[jpeg_natural_order[k]] = (JCOEF)s;
652+          }
653         }
654       }
655
656     } else {
657
658       for (k = 1; k < DCTSIZE2; k++) {
659-        HUFF_DECODE_FAST(s, l, actbl);
660-        r = s >> 4;
661-        s &= 15;
662-
663-        if (s) {
664-          k += r;
665-          FILL_BIT_BUFFER_FAST
666-          DROP_BITS(s);
667+        FILL_BIT_BUFFER_FAST;
668+        r = PEEK_BITS(HUFF_LOOKAHEAD);    // 先读取look_ahead位
669+        r = actbl->lookup[r];
670+        l = GET_NB(r);
671+        uint32_t zero_num;
672+        uint32_t coef_bits = GET_COEF_BITS(r);
673+
674+        if (l <= HUFF_LOOKAHEAD) {
675+          zero_num = GET_ZERO_NUM1(r);
676+          DROP_BITS(l);
677+          if (coef_bits == 0) {
678+            s = GET_COEF1(r);
679+            k += zero_num;
680+          } else {
681+            FILL_BIT_BUFFER_FAST
682+            DROP_BITS(coef_bits);
683+            k += zero_num;
684+          }
685         } else {
686-          if (r != 15) break;
687-          k += 15;
688+          uint32_t base = GET_BASE(r);  // 高16位为base
689+          uint32_t offset_bits = GET_EXTRA_BITS(r);  // 低8位为offset_bits, l = nb 为二级表的最大码长
690+          r = PEEK_BITS(l); // 前HUFF_LOOKAHEAD位已使用,只使用低nb - HUFF_LOOKAHEAD位, 取低offset_bits作为二级表索引
691+          s = actbl->lookup[base + (r & ((1 << offset_bits) - 1))];
692+          l = GET_NB(s); // 实际码长
693+          coef_bits = GET_COEF_BITS(s);
694+          zero_num = GET_ZERO_NUM1(s);
695+          DROP_BITS(l);
696+          if (coef_bits == 0xF) {
697+            if (zero_num != 0xF) {
698+              break;
699+            } else {
700+              k += 15;
701+            }
702+          } else {
703+            FILL_BIT_BUFFER_FAST
704+            DROP_BITS(coef_bits);
705+            k += zero_num;
706+          }
707         }
708       }
709     }
710@@ -741,7 +1079,11 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
711
712   br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
713   br_state.next_input_byte = buffer;
714-  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
715+  if (entropy->use_inner_buf) {
716+    BITREAD_SAVE_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate);
717+  } else {
718+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
719+  }
720   entropy->saved = state;
721   return TRUE;
722 }
723@@ -778,9 +1120,16 @@ decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
724     usefast = 0;
725   }
726
727-  if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU ||
728-      cinfo->unread_marker != 0)
729-    usefast = 0;
730+  if (entropy->use_inner_buf) {
731+      if (entropy->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU ||
732+          cinfo->unread_marker != 0)
733+          usefast = 0;
734+  } else {
735+      if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU ||
736+          cinfo->unread_marker != 0)
737+      usefast = 0;
738+  }
739+
740
741   /* If we've run out of data, just leave the MCU set to zeroes.
742    * This way, we return uniform gray for the remainder of the segment.
743@@ -826,6 +1175,8 @@ jinit_huff_decoder(j_decompress_ptr cinfo)
744   cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
745   entropy->pub.start_pass = start_pass_huff_decoder;
746   entropy->pub.decode_mcu = decode_mcu;
747+  entropy->next_input_byte =  NULL;
748+  entropy->bytes_in_buffer = 0;
749
750   /* Mark tables unallocated */
751   for (i = 0; i < NUM_HUFF_TBLS; i++) {
752diff --git a/jdhuff.h b/jdhuff.h
753index cfa0b7f..b83393b 100644
754--- a/jdhuff.h
755+++ b/jdhuff.h
756@@ -16,10 +16,53 @@
757
758 #include "jconfigint.h"
759
760+#include <stdint.h>
761+#include "jconfigint.h"
762
763 /* Derived data constructed for each Huffman table */
764+#define MAX_HUFF_CODE_LEN 16
765+
766+#define HUFF_LOOKAHEAD  10      /* # of bits of lookahead  9-13 maybe */
767+#define HUFF_AC_SYMBOLS 192
768+
769+#define HUFF_L_REM (16 - HUFF_LOOKAHEAD)
770+#define HUFF_L_DUP ((1 << HUFF_L_REM) - (HUFF_L_REM + 1))
771+#define HUFF_L_UNUSED ((1 << HUFF_L_REM) - (1 << ((HUFF_L_REM)/2)) - (1 << ((HUFF_L_REM + 1)/2)) + 1)
772+#define HUFF_L_SIZE (HUFF_AC_SYMBOLS + HUFF_L_DUP + HUFF_L_UNUSED)
773+#define HUFF_CODE_LARGE_LONG_ALIGNED (HUFF_L_SIZE + (-HUFF_L_SIZE & 0xf))
774+
775+#define COEF_BITS_OFFSET 0
776+#define COEF_BITS_BITS 4
777+#define ZERO_NUM1_OFFSET 4
778+#define ZERO_NUM_BITS 7
779+#define NB_OFFSET 11
780+#define NB_BITS 5
781+#define COEF1_OFFSET 16
782+#define COEF_VALUE_BITS 16
783+
784+#define EXTRA_BITS_OFFSET COEF_BITS_OFFSET  // 2nd table offset bits
785+#define EXTRA_BITS_BITS COEF_BITS_BITS
786+
787+#define SYM_OFFSET COEF_BITS_OFFSET
788+
789+#define MAKE_BITS(x, s)  (x) << (s)
790+#define GETS_BITS(x, s, l)  (((x) >> (s)) & ((0x1L << (l)) - 1))
791+
792+#define MAKE_ZERO_NUM1(x) MAKE_BITS(x, ZERO_NUM1_OFFSET)
793+#define MAKE_COEF_BITS(x) MAKE_BITS(x, COEF_BITS_OFFSET)
794+#define MAKE_SYM(x) MAKE_BITS(x, SYM_OFFSET)
795+#define MAKE_NB(x) MAKE_BITS(x, NB_OFFSET)
796+#define MAKE_COEF1(x) (uint64_t) MAKE_BITS((uint16_t) x, COEF1_OFFSET)
797+#define MAKE_BASE(x) MAKE_BITS(x, COEF1_OFFSET)
798+#define MAKE_EXTRA_BITS(x) MAKE_BITS(x, EXTRA_BITS_OFFSET)
799+
800+#define GET_ZERO_NUM1(x) GETS_BITS(x, ZERO_NUM1_OFFSET, ZERO_NUM_BITS)
801+#define GET_COEF_BITS(x) GETS_BITS(x, COEF_BITS_OFFSET, COEF_BITS_BITS)
802+#define GET_NB(x) GETS_BITS(x, NB_OFFSET, NB_BITS)
803+#define GET_COEF1(x) GETS_BITS(x, COEF1_OFFSET, COEF_VALUE_BITS)
804+#define GET_BASE(x) GETS_BITS(x, COEF1_OFFSET, COEF_VALUE_BITS)
805+#define GET_EXTRA_BITS(x) GETS_BITS(x, EXTRA_BITS_OFFSET, EXTRA_BITS_BITS)
806
807-#define HUFF_LOOKAHEAD  8       /* # of bits of lookahead */
808
809 typedef struct {
810   /* Basic tables: (element [0] of each array is unused) */
811@@ -44,7 +87,7 @@ typedef struct {
812    * if too long.  The next 8 bits of each entry contain the
813    * symbol.
814    */
815-  int lookup[1 << HUFF_LOOKAHEAD];
816+  int lookup[(1 << HUFF_LOOKAHEAD) + HUFF_CODE_LARGE_LONG_ALIGNED];
817 } d_derived_tbl;
818
819 /* Expand a Huffman table definition into the derived format */
820@@ -130,12 +173,25 @@ typedef struct {                /* Bitreading working state within an MCU */
821   get_buffer = permstate.get_buffer; \
822   bits_left = permstate.bits_left;
823
824+#define BITREAD_LOAD_STATE_INNER_BUF(cinfop, entropy, permstate) \
825+  br_state.cinfo = cinfop; \
826+  br_state.next_input_byte = entropy->next_input_byte; \
827+  br_state.bytes_in_buffer = entropy->bytes_in_buffer; \
828+  get_buffer = permstate.get_buffer; \
829+  bits_left = permstate.bits_left;
830+
831 #define BITREAD_SAVE_STATE(cinfop, permstate) \
832   cinfop->src->next_input_byte = br_state.next_input_byte; \
833   cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
834   permstate.get_buffer = get_buffer; \
835   permstate.bits_left = bits_left
836
837+#define BITREAD_SAVE_STATE_INNER_BUF(cinfop, entropy, permstate) \
838+  entropy->next_input_byte = br_state.next_input_byte;            \
839+  entropy->bytes_in_buffer = br_state.bytes_in_buffer;           \
840+  permstate.get_buffer = get_buffer; \
841+  permstate.bits_left = bits_left
842+
843 /*
844  * These macros provide the in-line portion of bit fetching.
845  * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
846diff --git a/jdphuff.c b/jdphuff.c
847index c6d82ca..00ed535 100644
848--- a/jdphuff.c
849+++ b/jdphuff.c
850@@ -52,7 +52,7 @@ typedef struct {
851
852   /* These fields are NOT loaded into local working state. */
853   unsigned int restarts_to_go;  /* MCUs left in this restart interval */
854-
855+  boolean use_inner_buf;
856   /* Pointers to derived tables (these workspaces have image lifespan) */
857   d_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
858
859@@ -71,6 +71,126 @@ METHODDEF(boolean) decode_mcu_DC_refine(j_decompress_ptr cinfo,
860 METHODDEF(boolean) decode_mcu_AC_refine(j_decompress_ptr cinfo,
861                                         JBLOCKROW *MCU_data);
862
863+LOCAL(void)
864+jpeg_make_dp_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
865+                        d_derived_tbl **pdtbl)
866+{
867+  JHUFF_TBL *htbl;
868+  d_derived_tbl *dtbl;
869+  int p, i, l, si, numsymbols;
870+  int lookbits, ctr;
871+  char huffsize[257];
872+  unsigned int huffcode[257];
873+  unsigned int code;
874+
875+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
876+   * paralleling the order of the symbols themselves in htbl->huffval[].
877+   */
878+
879+  /* Find the input Huffman table */
880+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
881+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
882+  htbl =
883+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
884+  if (htbl == NULL)
885+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
886+
887+  /* Allocate a workspace if we haven't already done so. */
888+  if (*pdtbl == NULL)
889+    *pdtbl = (d_derived_tbl *)
890+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
891+                                  sizeof(d_derived_tbl));
892+  dtbl = *pdtbl;
893+  dtbl->pub = htbl;             /* fill in back link */
894+
895+  /* Figure C.1: make table of Huffman code length for each symbol */
896+
897+  p = 0;
898+  for (l = 1; l <= 16; l++) {
899+    i = (int)htbl->bits[l];
900+    if (i < 0 || p + i > 256)   /* protect against table overrun */
901+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
902+    while (i--)
903+      huffsize[p++] = (char)l;
904+  }
905+  huffsize[p] = 0;
906+  numsymbols = p;
907+
908+  /* Figure C.2: generate the codes themselves */
909+  /* We also validate that the counts represent a legal Huffman code tree. */
910+
911+  code = 0;
912+  si = huffsize[0];
913+  p = 0;
914+  while (huffsize[p]) {
915+    while (((int)huffsize[p]) == si) {
916+      huffcode[p++] = code;
917+      code++;
918+    }
919+    /* code is now 1 more than the last code used for codelength si; but
920+     * it must still fit in si bits, since no code is allowed to be all ones.
921+     */
922+    if (((JLONG)code) >= (((JLONG)1) << si))
923+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
924+    code <<= 1;
925+    si++;
926+  }
927+
928+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
929+
930+  p = 0;
931+  for (l = 1; l <= 16; l++) {
932+    if (htbl->bits[l]) {
933+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
934+       * minus the minimum code of length l
935+       */
936+      dtbl->valoffset[l] = (JLONG)p - (JLONG)huffcode[p];
937+      p += htbl->bits[l];
938+      dtbl->maxcode[l] = huffcode[p - 1]; /* maximum code of length l */
939+    } else {
940+      dtbl->maxcode[l] = -1;    /* -1 if no codes of this length */
941+    }
942+  }
943+  dtbl->valoffset[17] = 0;
944+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
945+
946+  /* Compute lookahead tables to speed up decoding.
947+   * First we set all the table entries to 0, indicating "too long";
948+   * then we iterate through the Huffman codes that are short enough and
949+   * fill in all the entries that correspond to bit sequences starting
950+   * with that code.
951+   */
952+
953+  for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
954+    dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
955+
956+  p = 0;
957+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
958+    for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
959+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
960+      /* Generate left-justified code followed by all possible bit sequences */
961+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
962+      for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
963+        dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
964+        lookbits++;
965+      }
966+    }
967+  }
968+
969+  /* Validate symbols as being reasonable.
970+   * For AC tables, we make no check, but accept all byte values 0..255.
971+   * For DC tables, we require the symbols to be in range 0..15.
972+   * (Tighter bounds could be applied depending on the data depth and mode,
973+   * but this is sufficient to ensure safe decoding.)
974+   */
975+  if (isDC) {
976+    for (i = 0; i < numsymbols; i++) {
977+      int sym = htbl->huffval[i];
978+      if (sym < 0 || sym > 15)
979+        ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
980+    }
981+  }
982+}
983
984 /*
985  * Initialize for a Huffman-compressed scan.
986@@ -163,12 +283,12 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo)
987       if (cinfo->Ah == 0) {     /* DC refinement needs no table */
988         tbl = compptr->dc_tbl_no;
989         pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
990-        jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl);
991+        jpeg_make_dp_derived_tbl(cinfo, TRUE, tbl, pdtbl);
992       }
993     } else {
994       tbl = compptr->ac_tbl_no;
995       pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
996-      jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl);
997+      jpeg_make_dp_derived_tbl(cinfo, FALSE, tbl, pdtbl);
998       /* remember the single active table */
999       entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
1000     }
1001@@ -657,6 +777,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo)
1002   entropy = (phuff_entropy_ptr)
1003     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1004                                 sizeof(phuff_entropy_decoder));
1005+  entropy->use_inner_buf = 0;
1006   cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
1007   entropy->pub.start_pass = start_pass_phuff_decoder;
1008
1009diff --git a/jutils.c b/jutils.c
1010index 5c5bb17..23931b7 100644
1011--- a/jutils.c
1012+++ b/jutils.c
1013@@ -53,7 +53,7 @@ const int jpeg_zigzag_order[DCTSIZE2] = {
1014  * fake entries.
1015  */
1016
1017-const int jpeg_natural_order[DCTSIZE2 + 16] = {
1018+const int jpeg_natural_order[DCTSIZE2 + 64] = {
1019   0,  1,  8, 16,  9,  2,  3, 10,
1020  17, 24, 32, 25, 18, 11,  4,  5,
1021  12, 19, 26, 33, 40, 48, 41, 34,
1022@@ -63,6 +63,12 @@ const int jpeg_natural_order[DCTSIZE2 + 16] = {
1023  58, 59, 52, 45, 38, 31, 39, 46,
1024  53, 60, 61, 54, 47, 55, 62, 63,
1025  63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
1026+ 63, 63, 63, 63, 63, 63, 63, 63,
1027+ 63, 63, 63, 63, 63, 63, 63, 63,
1028+ 63, 63, 63, 63, 63, 63, 63, 63,
1029+ 63, 63, 63, 63, 63, 63, 63, 63,
1030+ 63, 63, 63, 63, 63, 63, 63, 63,
1031+ 63, 63, 63, 63, 63, 63, 63, 63,
1032  63, 63, 63, 63, 63, 63, 63, 63
1033 };
1034
1035