diff --git a/jdhuff.c b/jdhuff.c index 679d221..b5a91eb 100644 --- a/jdhuff.c +++ b/jdhuff.c @@ -28,6 +28,7 @@ #include "jpegcomp.h" #include "jstdhuff.c" +#define INNER_BUF_SIZE 40960 /* * Expanded entropy decoder object for Huffman decoding. @@ -51,6 +52,7 @@ typedef struct { /* These fields are NOT loaded into local working state. */ unsigned int restarts_to_go; /* MCUs left in this restart interval */ + boolean use_inner_buf; /* Pointers to derived tables (these workspaces have image lifespan) */ d_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; @@ -64,10 +66,45 @@ typedef struct { /* Whether we care about the DC and AC coefficient values for each block */ boolean dc_needed[D_MAX_BLOCKS_IN_MCU]; boolean ac_needed[D_MAX_BLOCKS_IN_MCU]; + + JOCTET *buffer; + const JOCTET *next_input_byte; + size_t bytes_in_buffer; + size_t out_buf_size; } huff_entropy_decoder; typedef huff_entropy_decoder *huff_entropy_ptr; +/* + * Figure F.12: extend sign bit. + * On some machines, a shift and add will be faster than a table lookup. + */ + +#define AVOID_TABLES +#ifdef AVOID_TABLES + +#define NEG_1 ((unsigned int)-1) +#define HUFF_EXTEND(x, s) \ + ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1))) + +#else + +#define HUFF_EXTEND(x, s) \ + ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) + +static const int extend_test[16] = { /* entry n is 2**(n-1) */ + 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, + 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 +}; + +static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */ + 0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1, + ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1, + ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1, + ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1 +}; + +#endif /* AVOID_TABLES */ /* * Initialize for a Huffman-compressed scan. @@ -127,6 +164,30 @@ start_pass_huff_decoder(j_decompress_ptr cinfo) /* Initialize restart counter */ entropy->restarts_to_go = cinfo->restart_interval; + + // only used when outer buffer size is very small + entropy->use_inner_buf = FALSE; + // bytes left after header has been read + struct jpeg_source_mgr *src = cinfo->src; + + if (cinfo->restart_interval) { + return; + } + // 排除mem_src的情况 + if (!(src->next_input_byte[src->bytes_in_buffer - 1] == JPEG_EOI + && src->next_input_byte[src->bytes_in_buffer - 2] == 0xFF)) { + if (src->bytes_in_buffer <= INNER_BUF_SIZE) { + entropy->use_inner_buf = TRUE; + entropy->buffer = (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, INNER_BUF_SIZE); + + memcpy(entropy->buffer, src->next_input_byte, src->bytes_in_buffer); + entropy->bytes_in_buffer = src->bytes_in_buffer; + entropy->next_input_byte = entropy->buffer; + + (*src->fill_input_buffer)(cinfo); // there are unused data in out buffer + entropy->out_buf_size = src->bytes_in_buffer; + } + } } @@ -172,7 +233,7 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno, /* Figure C.1: make table of Huffman code length for each symbol */ p = 0; - for (l = 1; l <= 16; l++) { + for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) { i = (int)htbl->bits[l]; if (i < 0 || p + i > 256) /* protect against table overrun */ ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); @@ -205,7 +266,7 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno, /* Figure F.15: generate decoding tables for bit-sequential decoding */ p = 0; - for (l = 1; l <= 16; l++) { + for (l = 1; l <= MAX_HUFF_CODE_LEN; l++) { if (htbl->bits[l]) { /* valoffset[l] = huffval[] index of 1st symbol of code length l, * minus the minimum code of length l @@ -227,18 +288,107 @@ jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno, * with that code. */ - for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) + for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) { dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD; - - p = 0; - for (l = 1; l <= HUFF_LOOKAHEAD; l++) { - for (i = 1; i <= (int)htbl->bits[l]; i++, p++) { - /* l = current code's length, p = its index in huffcode[] & huffval[]. */ - /* Generate left-justified code followed by all possible bit sequences */ - lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l); - for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) { - dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; - lookbits++; + } + if (!isDC) { + // Look up tables for AC, index is huffman code, value is the symbol and the length + // htbl->bits[l], number of symbol that of which the code length is l + // htbl->huffval[l], symbol in order + + // nb <= LOOKAHEAD + p = 0; + int coef0; + for (l = 1; l <= HUFF_LOOKAHEAD; l++) { + for (i = 1; i <= (int)htbl->bits[l]; i++, p++) { + /* l = current code's length, p = its index in huffcode[] & huffval[]. */ + /* Generate left-justified code followed by all possible bit sequences */ + UINT8 rs = htbl->huffval[p]; // run length symbol (zero num + coeff bits) + UINT8 coef_bits = rs & 0x0f; + if ((l + coef_bits) <= HUFF_LOOKAHEAD) { + // save DCT coeffs in higher bits + for (coef0 = 0; coef0 < 1 << coef_bits; coef0++) { + int16_t coef_value = HUFF_EXTEND(coef0, coef_bits); // save value after extended. + lookbits = (huffcode[p] << (HUFF_LOOKAHEAD - l)) | (coef0 << (HUFF_LOOKAHEAD - l - coef_bits)); + for (ctr = 1 << (HUFF_LOOKAHEAD - l - coef_bits); ctr > 0; ctr--) { + if (coef_bits == 0 && (rs >> 4) != 0xF) { // 00 + dtbl->lookup[lookbits] = MAKE_COEF1(coef_value) | MAKE_NB(l + coef_bits) | MAKE_ZERO_NUM1(63); + } else { // F0 and other symbols + dtbl->lookup[lookbits] = MAKE_COEF1(coef_value) | MAKE_NB(l + coef_bits) | MAKE_ZERO_NUM1(rs >> 4); + } + lookbits++; + } + } + } else { + // same as the original lookup table + lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l); + for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) { + dtbl->lookup[lookbits] = MAKE_NB(l) | MAKE_SYM(rs); + lookbits++; + } + } + } + } + // nb > LOOKAHEAD + int offset = 0; + int base = 1 << HUFF_LOOKAHEAD; + int short_tbl_index = 0xFFFFFFFF; + int cur_long_tbl_base = 1 << HUFF_LOOKAHEAD; + int left; + int offset_bit = 0; + int first = p; // the index of the first code of this length. + int max_code_len; + for (max_code_len = MAX_HUFF_CODE_LEN; max_code_len >= 1; max_code_len--) { + if(htbl->bits[max_code_len]) { + break; + } + } + for (l = HUFF_LOOKAHEAD + 1; l <= MAX_HUFF_CODE_LEN; l++) { + for (i = 1; i <= (int)htbl->bits[l]; i++, p++) { + UINT8 rs = htbl->huffval[p]; // run length symbol (zero num + coeff bits) + UINT8 coef_bits = rs & 0x0f; + // similar as 1st table as before + lookbits = huffcode[p] >> (l - HUFF_LOOKAHEAD); // index in 1st table + // check if a new 2nd tbl should be created + if (lookbits != short_tbl_index) { + short_tbl_index = lookbits; + cur_long_tbl_base += offset; + offset = 0; + offset_bit = l - HUFF_LOOKAHEAD; + left = (1 << offset_bit) - (htbl->bits[l] - (p - first)); + while (offset_bit + HUFF_LOOKAHEAD < max_code_len && left > 0) { + offset_bit++; + left = (left << 1) - htbl->bits[offset_bit + HUFF_LOOKAHEAD]; + } + } + base = cur_long_tbl_base; + // set 1st table value + dtbl->lookup[lookbits] = MAKE_BASE(base) | MAKE_NB(l) | MAKE_EXTRA_BITS(offset_bit); + // set 2nd table value + for (ctr = 0; ctr < (1 << (offset_bit - (l - HUFF_LOOKAHEAD))); ctr++) { + if (coef_bits == 0) { + dtbl->lookup[base + offset] = MAKE_NB(l) | MAKE_SYM(rs) | MAKE_COEF_BITS(0xF); + } else { + dtbl->lookup[base + offset] = MAKE_NB(l) | MAKE_SYM(rs); + } + offset++; + } + } + first = p; + } + } else { + for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) + dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD; + p = 0; + for (l = 1; l <= HUFF_LOOKAHEAD; l++) { + for (i = 1; i <= (int)htbl->bits[l]; i++, p++) { + /* l = current code's length, p = its index in huffcode[] & huffval[]. */ + /* Generate left-justified code followed by all possible bit sequences */ + lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l); + for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) { + dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; + lookbits++; + } } } } @@ -291,6 +441,8 @@ jpeg_fill_bit_buffer(bitread_working_state *state, register const JOCTET *next_input_byte = state->next_input_byte; register size_t bytes_in_buffer = state->bytes_in_buffer; j_decompress_ptr cinfo = state->cinfo; + huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; + struct jpeg_source_mgr *src = cinfo->src; /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */ /* (It is assumed that no request will be for more than that many bits.) */ @@ -302,10 +454,50 @@ jpeg_fill_bit_buffer(bitread_working_state *state, /* Attempt to read a byte */ if (bytes_in_buffer == 0) { - if (!(*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; + if (!entropy->use_inner_buf) { + if (!(*src->fill_input_buffer)(cinfo)) + return FALSE; + next_input_byte = src->next_input_byte; + bytes_in_buffer = src->bytes_in_buffer; + } else { + if (entropy->out_buf_size <= INNER_BUF_SIZE) { + entropy->bytes_in_buffer = 0; + while (entropy->bytes_in_buffer <= INNER_BUF_SIZE - entropy->out_buf_size) { + memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, src->bytes_in_buffer); + entropy->bytes_in_buffer += src->bytes_in_buffer; + if (src->next_input_byte[src->bytes_in_buffer - 1] == JPEG_EOI + && src->next_input_byte[src->bytes_in_buffer - 2] == 0xFF) { + src->next_input_byte += src->bytes_in_buffer - 2; + src->bytes_in_buffer = 2; + break; + } + if (!(*src->fill_input_buffer) (cinfo)) { + return FALSE; + } + } + } else { + // fill up the inner buffer + size_t fill_size = MIN(INNER_BUF_SIZE, src->bytes_in_buffer); + memcpy(entropy->buffer, src->next_input_byte, fill_size); + src->bytes_in_buffer -= fill_size; + src->next_input_byte += fill_size; + entropy->bytes_in_buffer = fill_size; + if (!src->bytes_in_buffer && !(entropy->buffer[entropy->bytes_in_buffer - 1] == JPEG_EOI + && entropy->buffer[entropy->bytes_in_buffer - 2] == 0xFF)) { + if (!(*src->fill_input_buffer)(cinfo)) { + return FALSE; + } + fill_size = INNER_BUF_SIZE - entropy->bytes_in_buffer; + memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, fill_size); + src->bytes_in_buffer -= fill_size; + src->next_input_byte += fill_size; + entropy->bytes_in_buffer += fill_size; + } + } + entropy->next_input_byte = entropy->buffer; + next_input_byte = entropy->buffer; + bytes_in_buffer = entropy->bytes_in_buffer; + } } bytes_in_buffer--; c = *next_input_byte++; @@ -319,10 +511,50 @@ jpeg_fill_bit_buffer(bitread_working_state *state, */ do { if (bytes_in_buffer == 0) { - if (!(*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; + if (!entropy->use_inner_buf) { + if (!(*src->fill_input_buffer)(cinfo)) + return FALSE; + next_input_byte = src->next_input_byte; + bytes_in_buffer = src->bytes_in_buffer; + } else { + if (entropy->out_buf_size <= INNER_BUF_SIZE) { + entropy->bytes_in_buffer = 0; + while (entropy->bytes_in_buffer <= INNER_BUF_SIZE - entropy->out_buf_size) { + memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, src->bytes_in_buffer); + entropy->bytes_in_buffer += src->bytes_in_buffer; + if (src->next_input_byte[src->bytes_in_buffer - 1] == JPEG_EOI + && src->next_input_byte[src->bytes_in_buffer - 2] == 0xFF) { + src->next_input_byte += src->bytes_in_buffer - 2; + src->bytes_in_buffer = 2; + break; + } + if (!(*src->fill_input_buffer) (cinfo)) { + return FALSE; + } + } + } else { + // fill up the inner buffer + size_t fill_size = MIN(INNER_BUF_SIZE, src->bytes_in_buffer); + memcpy(entropy->buffer, src->next_input_byte, fill_size); + src->bytes_in_buffer -= fill_size; + src->next_input_byte += fill_size; + entropy->bytes_in_buffer = fill_size; + if (!src->bytes_in_buffer && !(entropy->buffer[entropy->bytes_in_buffer - 1] == JPEG_EOI + && entropy->buffer[entropy->bytes_in_buffer - 2] == 0xFF)) { + if (!(*src->fill_input_buffer)(cinfo)) { + return FALSE; + } + fill_size = INNER_BUF_SIZE - entropy->bytes_in_buffer; + memcpy(entropy->buffer + entropy->bytes_in_buffer, src->next_input_byte, fill_size); + src->bytes_in_buffer -= fill_size; + src->next_input_byte += fill_size; + entropy->bytes_in_buffer += fill_size; + } + } + entropy->next_input_byte = entropy->buffer; + next_input_byte = entropy->buffer; + bytes_in_buffer = entropy->bytes_in_buffer; + } } bytes_in_buffer--; c = *next_input_byte++; @@ -460,7 +692,7 @@ jpeg_huff_decode(bitread_working_state *state, /* With garbage input we may reach the sentinel value l = 17. */ - if (l > 16) { + if (l > MAX_HUFF_CODE_LEN) { WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE); return 0; /* fake a zero as the safest result */ } @@ -468,39 +700,6 @@ jpeg_huff_decode(bitread_working_state *state, return htbl->pub->huffval[(int)(code + htbl->valoffset[l])]; } - -/* - * Figure F.12: extend sign bit. - * On some machines, a shift and add will be faster than a table lookup. - */ - -#define AVOID_TABLES -#ifdef AVOID_TABLES - -#define NEG_1 ((unsigned int)-1) -#define HUFF_EXTEND(x, s) \ - ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1))) - -#else - -#define HUFF_EXTEND(x, s) \ - ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) - -static const int extend_test[16] = { /* entry n is 2**(n-1) */ - 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 -}; - -static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */ - 0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1, - ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1, - ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1, - ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1 -}; - -#endif /* AVOID_TABLES */ - - /* * Check for a restart marker & resynchronize decoder. * Returns FALSE if must suspend. @@ -556,7 +755,11 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Outer loop handles each block in the MCU */ /* Load up working state */ - BITREAD_LOAD_STATE(cinfo, entropy->bitstate); + if (entropy->use_inner_buf) { + BITREAD_LOAD_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate); + } else { + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); + } state = entropy->saved; for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { @@ -599,25 +802,62 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Section F.2.2.2: decode the AC coefficients */ /* Since zeroes are skipped, output area must be cleared beforehand */ for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE(s, br_state, actbl, return FALSE, label2); - - r = s >> 4; - s &= 15; - - if (s) { - k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - /* Output coefficient in natural (dezigzagged) order. - * Note: the extra entries in jpeg_natural_order[] will save us - * if k >= DCTSIZE2, which could happen if the data is corrupted. - */ - (*block)[jpeg_natural_order[k]] = (JCOEF)s; + register int nb, look; + if (bits_left < HUFF_LOOKAHEAD) { + if (!jpeg_fill_bit_buffer(&br_state, get_buffer, bits_left, 0)) { + return FALSE; + } + get_buffer = br_state.get_buffer; + bits_left = br_state.bits_left; + if (bits_left < HUFF_LOOKAHEAD) { + nb = 1; + goto slowlabel; + } + } + look = PEEK_BITS(HUFF_LOOKAHEAD); + r = actbl->lookup[look]; + nb = GET_NB(r); + uint32_t zero_num; + uint32_t coef_bits = GET_COEF_BITS(r); + if (nb <= HUFF_LOOKAHEAD) { + DROP_BITS(nb); + s = actbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); + zero_num = GET_ZERO_NUM1(r); + k += zero_num; + if (coef_bits == 0) { + s = GET_COEF1(r); + (*block)[jpeg_natural_order[k]] = (JCOEF)s; + } else { + CHECK_BIT_BUFFER(br_state, (int)coef_bits, return FALSE); + r = GET_BITS(coef_bits); + s = HUFF_EXTEND(r, coef_bits); + (*block)[jpeg_natural_order[k]] = (JCOEF)s; + } } else { - if (r != 15) - break; - k += 15; + slowlabel: + nb = 1; + if ((s = jpeg_huff_decode(&br_state, get_buffer, bits_left, actbl, nb)) < 0) { return FALSE; } + get_buffer = br_state.get_buffer; + bits_left = br_state.bits_left; + + r = s >> 4; + s &= 15; + + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + /* Output coefficient in natural (dezigzagged) order. + * Note: the extra entries in jpeg_natural_order[] will save us + * if k >= DCTSIZE2, which could happen if the data is corrupted. + */ + (*block)[jpeg_natural_order[k]] = (JCOEF)s; + } else { + if (r != 15) + break; + k += 15; + } } } @@ -626,26 +866,62 @@ decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Section F.2.2.2: decode the AC coefficients */ /* In this path we just discard the values */ for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE(s, br_state, actbl, return FALSE, label3); - - r = s >> 4; - s &= 15; - - if (s) { - k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - DROP_BITS(s); + register int nb, look; + if (bits_left < HUFF_LOOKAHEAD) { + if (!jpeg_fill_bit_buffer(&br_state, get_buffer, bits_left, 0)) { + return FALSE; + } + get_buffer = br_state.get_buffer; + bits_left = br_state.bits_left; + if (bits_left < HUFF_LOOKAHEAD) { + nb = 1; + goto slowlabel2; + } + } + look = PEEK_BITS(HUFF_LOOKAHEAD); + r = actbl->lookup[look]; + nb = GET_NB(r); + uint32_t zero_num; + uint32_t coef_bits = GET_COEF_BITS(r); + if (nb <= HUFF_LOOKAHEAD) { + DROP_BITS(nb); + s = actbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); + zero_num = GET_ZERO_NUM1(r); + k += zero_num; + if (coef_bits != 0) { + CHECK_BIT_BUFFER(br_state, (int)coef_bits, return FALSE); + DROP_BITS(coef_bits); + } } else { - if (r != 15) - break; - k += 15; + slowlabel2: + nb = 1; + if ((s = jpeg_huff_decode(&br_state, get_buffer, bits_left, actbl, nb)) < 0) { return FALSE; } + get_buffer = br_state.get_buffer; + bits_left = br_state.bits_left; + + r = s >> 4; + s &= 15; + + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + DROP_BITS(s); + } else { + if (r != 15) + break; + k += 15; + } } } } } /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo, entropy->bitstate); + if (entropy->use_inner_buf) { + BITREAD_SAVE_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate); + } else { + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); + } entropy->saved = state; return TRUE; } @@ -668,7 +944,11 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) /* Outer loop handles each block in the MCU */ /* Load up working state */ - BITREAD_LOAD_STATE(cinfo, entropy->bitstate); + if (entropy->use_inner_buf) { + BITREAD_LOAD_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate); + } else { + BITREAD_LOAD_STATE(cinfo, entropy->bitstate); + } buffer = (JOCTET *)br_state.next_input_byte; state = entropy->saved; @@ -678,6 +958,7 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn]; register int s, k, r, l; + // DC HUFF_DECODE_FAST(s, l, dctbl); if (s) { FILL_BIT_BUFFER_FAST @@ -699,36 +980,93 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) if (entropy->ac_needed[blkn] && block) { for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE_FAST(s, l, actbl); - r = s >> 4; - s &= 15; - - if (s) { - k += r; - FILL_BIT_BUFFER_FAST - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - (*block)[jpeg_natural_order[k]] = (JCOEF)s; + FILL_BIT_BUFFER_FAST; + r = PEEK_BITS(HUFF_LOOKAHEAD); // 先读取look_ahead位 + r = actbl->lookup[r]; + l = GET_NB(r); + uint32_t zero_num; + uint32_t coef_bits = GET_COEF_BITS(r); + + if (l <= HUFF_LOOKAHEAD) { + zero_num = GET_ZERO_NUM1(r); + DROP_BITS(l); + if (coef_bits == 0) { + s = GET_COEF1(r); + k += zero_num; + (*block)[jpeg_natural_order[k]] = (JCOEF)s; + } else { + FILL_BIT_BUFFER_FAST + r = GET_BITS(coef_bits); + s = HUFF_EXTEND(r, coef_bits); + k += zero_num; + (*block)[jpeg_natural_order[k]] = (JCOEF)s; + } } else { - if (r != 15) break; - k += 15; + uint32_t base = GET_BASE(r); // 高16位为base + uint32_t offset_bits = GET_EXTRA_BITS(r); // 低8位为offset_bits, l = nb 为二级表的最大码长 + r = PEEK_BITS(l); // 前HUFF_LOOKAHEAD位已使用,只使用低nb - HUFF_LOOKAHEAD位, 取低offset_bits作为二级表索引 + s = actbl->lookup[base + (r & ((1 << offset_bits) - 1))]; + l = GET_NB(s); // 实际码长 + coef_bits = GET_COEF_BITS(s); + zero_num = GET_ZERO_NUM1(s); + DROP_BITS(l); + if (coef_bits == 0xF) { + if (zero_num != 0xF) { + break; + } else { + k += 15; + } + } else { + FILL_BIT_BUFFER_FAST + r = GET_BITS(coef_bits); + s = HUFF_EXTEND(r, coef_bits); + k += zero_num; + (*block)[jpeg_natural_order[k]] = (JCOEF)s; + } } } } else { for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE_FAST(s, l, actbl); - r = s >> 4; - s &= 15; - - if (s) { - k += r; - FILL_BIT_BUFFER_FAST - DROP_BITS(s); + FILL_BIT_BUFFER_FAST; + r = PEEK_BITS(HUFF_LOOKAHEAD); // 先读取look_ahead位 + r = actbl->lookup[r]; + l = GET_NB(r); + uint32_t zero_num; + uint32_t coef_bits = GET_COEF_BITS(r); + + if (l <= HUFF_LOOKAHEAD) { + zero_num = GET_ZERO_NUM1(r); + DROP_BITS(l); + if (coef_bits == 0) { + s = GET_COEF1(r); + k += zero_num; + } else { + FILL_BIT_BUFFER_FAST + DROP_BITS(coef_bits); + k += zero_num; + } } else { - if (r != 15) break; - k += 15; + uint32_t base = GET_BASE(r); // 高16位为base + uint32_t offset_bits = GET_EXTRA_BITS(r); // 低8位为offset_bits, l = nb 为二级表的最大码长 + r = PEEK_BITS(l); // 前HUFF_LOOKAHEAD位已使用,只使用低nb - HUFF_LOOKAHEAD位, 取低offset_bits作为二级表索引 + s = actbl->lookup[base + (r & ((1 << offset_bits) - 1))]; + l = GET_NB(s); // 实际码长 + coef_bits = GET_COEF_BITS(s); + zero_num = GET_ZERO_NUM1(s); + DROP_BITS(l); + if (coef_bits == 0xF) { + if (zero_num != 0xF) { + break; + } else { + k += 15; + } + } else { + FILL_BIT_BUFFER_FAST + DROP_BITS(coef_bits); + k += zero_num; + } } } } @@ -741,7 +1079,11 @@ decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte); br_state.next_input_byte = buffer; - BITREAD_SAVE_STATE(cinfo, entropy->bitstate); + if (entropy->use_inner_buf) { + BITREAD_SAVE_STATE_INNER_BUF(cinfo, entropy, entropy->bitstate); + } else { + BITREAD_SAVE_STATE(cinfo, entropy->bitstate); + } entropy->saved = state; return TRUE; } @@ -778,9 +1120,16 @@ decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data) usefast = 0; } - if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU || - cinfo->unread_marker != 0) - usefast = 0; + if (entropy->use_inner_buf) { + if (entropy->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU || + cinfo->unread_marker != 0) + usefast = 0; + } else { + if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU || + cinfo->unread_marker != 0) + usefast = 0; + } + /* If we've run out of data, just leave the MCU set to zeroes. * This way, we return uniform gray for the remainder of the segment. @@ -826,6 +1175,8 @@ jinit_huff_decoder(j_decompress_ptr cinfo) cinfo->entropy = (struct jpeg_entropy_decoder *)entropy; entropy->pub.start_pass = start_pass_huff_decoder; entropy->pub.decode_mcu = decode_mcu; + entropy->next_input_byte = NULL; + entropy->bytes_in_buffer = 0; /* Mark tables unallocated */ for (i = 0; i < NUM_HUFF_TBLS; i++) { diff --git a/jdhuff.h b/jdhuff.h index cfa0b7f..b83393b 100644 --- a/jdhuff.h +++ b/jdhuff.h @@ -16,10 +16,53 @@ #include "jconfigint.h" +#include +#include "jconfigint.h" /* Derived data constructed for each Huffman table */ +#define MAX_HUFF_CODE_LEN 16 + +#define HUFF_LOOKAHEAD 10 /* # of bits of lookahead 9-13 maybe */ +#define HUFF_AC_SYMBOLS 192 + +#define HUFF_L_REM (16 - HUFF_LOOKAHEAD) +#define HUFF_L_DUP ((1 << HUFF_L_REM) - (HUFF_L_REM + 1)) +#define HUFF_L_UNUSED ((1 << HUFF_L_REM) - (1 << ((HUFF_L_REM)/2)) - (1 << ((HUFF_L_REM + 1)/2)) + 1) +#define HUFF_L_SIZE (HUFF_AC_SYMBOLS + HUFF_L_DUP + HUFF_L_UNUSED) +#define HUFF_CODE_LARGE_LONG_ALIGNED (HUFF_L_SIZE + (-HUFF_L_SIZE & 0xf)) + +#define COEF_BITS_OFFSET 0 +#define COEF_BITS_BITS 4 +#define ZERO_NUM1_OFFSET 4 +#define ZERO_NUM_BITS 7 +#define NB_OFFSET 11 +#define NB_BITS 5 +#define COEF1_OFFSET 16 +#define COEF_VALUE_BITS 16 + +#define EXTRA_BITS_OFFSET COEF_BITS_OFFSET // 2nd table offset bits +#define EXTRA_BITS_BITS COEF_BITS_BITS + +#define SYM_OFFSET COEF_BITS_OFFSET + +#define MAKE_BITS(x, s) (x) << (s) +#define GETS_BITS(x, s, l) (((x) >> (s)) & ((0x1L << (l)) - 1)) + +#define MAKE_ZERO_NUM1(x) MAKE_BITS(x, ZERO_NUM1_OFFSET) +#define MAKE_COEF_BITS(x) MAKE_BITS(x, COEF_BITS_OFFSET) +#define MAKE_SYM(x) MAKE_BITS(x, SYM_OFFSET) +#define MAKE_NB(x) MAKE_BITS(x, NB_OFFSET) +#define MAKE_COEF1(x) (uint64_t) MAKE_BITS((uint16_t) x, COEF1_OFFSET) +#define MAKE_BASE(x) MAKE_BITS(x, COEF1_OFFSET) +#define MAKE_EXTRA_BITS(x) MAKE_BITS(x, EXTRA_BITS_OFFSET) + +#define GET_ZERO_NUM1(x) GETS_BITS(x, ZERO_NUM1_OFFSET, ZERO_NUM_BITS) +#define GET_COEF_BITS(x) GETS_BITS(x, COEF_BITS_OFFSET, COEF_BITS_BITS) +#define GET_NB(x) GETS_BITS(x, NB_OFFSET, NB_BITS) +#define GET_COEF1(x) GETS_BITS(x, COEF1_OFFSET, COEF_VALUE_BITS) +#define GET_BASE(x) GETS_BITS(x, COEF1_OFFSET, COEF_VALUE_BITS) +#define GET_EXTRA_BITS(x) GETS_BITS(x, EXTRA_BITS_OFFSET, EXTRA_BITS_BITS) -#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ typedef struct { /* Basic tables: (element [0] of each array is unused) */ @@ -44,7 +87,7 @@ typedef struct { * if too long. The next 8 bits of each entry contain the * symbol. */ - int lookup[1 << HUFF_LOOKAHEAD]; + int lookup[(1 << HUFF_LOOKAHEAD) + HUFF_CODE_LARGE_LONG_ALIGNED]; } d_derived_tbl; /* Expand a Huffman table definition into the derived format */ @@ -130,12 +173,25 @@ typedef struct { /* Bitreading working state within an MCU */ get_buffer = permstate.get_buffer; \ bits_left = permstate.bits_left; +#define BITREAD_LOAD_STATE_INNER_BUF(cinfop, entropy, permstate) \ + br_state.cinfo = cinfop; \ + br_state.next_input_byte = entropy->next_input_byte; \ + br_state.bytes_in_buffer = entropy->bytes_in_buffer; \ + get_buffer = permstate.get_buffer; \ + bits_left = permstate.bits_left; + #define BITREAD_SAVE_STATE(cinfop, permstate) \ cinfop->src->next_input_byte = br_state.next_input_byte; \ cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ permstate.get_buffer = get_buffer; \ permstate.bits_left = bits_left +#define BITREAD_SAVE_STATE_INNER_BUF(cinfop, entropy, permstate) \ + entropy->next_input_byte = br_state.next_input_byte; \ + entropy->bytes_in_buffer = br_state.bytes_in_buffer; \ + permstate.get_buffer = get_buffer; \ + permstate.bits_left = bits_left + /* * These macros provide the in-line portion of bit fetching. * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer diff --git a/jdphuff.c b/jdphuff.c index c6d82ca..00ed535 100644 --- a/jdphuff.c +++ b/jdphuff.c @@ -52,7 +52,7 @@ typedef struct { /* These fields are NOT loaded into local working state. */ unsigned int restarts_to_go; /* MCUs left in this restart interval */ - + boolean use_inner_buf; /* Pointers to derived tables (these workspaces have image lifespan) */ d_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; @@ -71,6 +71,126 @@ METHODDEF(boolean) decode_mcu_DC_refine(j_decompress_ptr cinfo, METHODDEF(boolean) decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data); +LOCAL(void) +jpeg_make_dp_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno, + d_derived_tbl **pdtbl) +{ + JHUFF_TBL *htbl; + d_derived_tbl *dtbl; + int p, i, l, si, numsymbols; + int lookbits, ctr; + char huffsize[257]; + unsigned int huffcode[257]; + unsigned int code; + + /* Note that huffsize[] and huffcode[] are filled in code-length order, + * paralleling the order of the symbols themselves in htbl->huffval[]. + */ + + /* Find the input Huffman table */ + if (tblno < 0 || tblno >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); + htbl = + isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; + if (htbl == NULL) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); + + /* Allocate a workspace if we haven't already done so. */ + if (*pdtbl == NULL) + *pdtbl = (d_derived_tbl *) + (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, + sizeof(d_derived_tbl)); + dtbl = *pdtbl; + dtbl->pub = htbl; /* fill in back link */ + + /* Figure C.1: make table of Huffman code length for each symbol */ + + p = 0; + for (l = 1; l <= 16; l++) { + i = (int)htbl->bits[l]; + if (i < 0 || p + i > 256) /* protect against table overrun */ + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + while (i--) + huffsize[p++] = (char)l; + } + huffsize[p] = 0; + numsymbols = p; + + /* Figure C.2: generate the codes themselves */ + /* We also validate that the counts represent a legal Huffman code tree. */ + + code = 0; + si = huffsize[0]; + p = 0; + while (huffsize[p]) { + while (((int)huffsize[p]) == si) { + huffcode[p++] = code; + code++; + } + /* code is now 1 more than the last code used for codelength si; but + * it must still fit in si bits, since no code is allowed to be all ones. + */ + if (((JLONG)code) >= (((JLONG)1) << si)) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + code <<= 1; + si++; + } + + /* Figure F.15: generate decoding tables for bit-sequential decoding */ + + p = 0; + for (l = 1; l <= 16; l++) { + if (htbl->bits[l]) { + /* valoffset[l] = huffval[] index of 1st symbol of code length l, + * minus the minimum code of length l + */ + dtbl->valoffset[l] = (JLONG)p - (JLONG)huffcode[p]; + p += htbl->bits[l]; + dtbl->maxcode[l] = huffcode[p - 1]; /* maximum code of length l */ + } else { + dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ + } + } + dtbl->valoffset[17] = 0; + dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */ + + /* Compute lookahead tables to speed up decoding. + * First we set all the table entries to 0, indicating "too long"; + * then we iterate through the Huffman codes that are short enough and + * fill in all the entries that correspond to bit sequences starting + * with that code. + */ + + for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++) + dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD; + + p = 0; + for (l = 1; l <= HUFF_LOOKAHEAD; l++) { + for (i = 1; i <= (int)htbl->bits[l]; i++, p++) { + /* l = current code's length, p = its index in huffcode[] & huffval[]. */ + /* Generate left-justified code followed by all possible bit sequences */ + lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l); + for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) { + dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p]; + lookbits++; + } + } + } + + /* Validate symbols as being reasonable. + * For AC tables, we make no check, but accept all byte values 0..255. + * For DC tables, we require the symbols to be in range 0..15. + * (Tighter bounds could be applied depending on the data depth and mode, + * but this is sufficient to ensure safe decoding.) + */ + if (isDC) { + for (i = 0; i < numsymbols; i++) { + int sym = htbl->huffval[i]; + if (sym < 0 || sym > 15) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + } + } +} /* * Initialize for a Huffman-compressed scan. @@ -163,12 +283,12 @@ start_pass_phuff_decoder(j_decompress_ptr cinfo) if (cinfo->Ah == 0) { /* DC refinement needs no table */ tbl = compptr->dc_tbl_no; pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl; - jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl); + jpeg_make_dp_derived_tbl(cinfo, TRUE, tbl, pdtbl); } } else { tbl = compptr->ac_tbl_no; pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl; - jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl); + jpeg_make_dp_derived_tbl(cinfo, FALSE, tbl, pdtbl); /* remember the single active table */ entropy->ac_derived_tbl = entropy->derived_tbls[tbl]; } @@ -657,6 +777,7 @@ jinit_phuff_decoder(j_decompress_ptr cinfo) entropy = (phuff_entropy_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, sizeof(phuff_entropy_decoder)); + entropy->use_inner_buf = 0; cinfo->entropy = (struct jpeg_entropy_decoder *)entropy; entropy->pub.start_pass = start_pass_phuff_decoder; diff --git a/jutils.c b/jutils.c index 5c5bb17..23931b7 100644 --- a/jutils.c +++ b/jutils.c @@ -53,7 +53,7 @@ const int jpeg_zigzag_order[DCTSIZE2] = { * fake entries. */ -const int jpeg_natural_order[DCTSIZE2 + 16] = { +const int jpeg_natural_order[DCTSIZE2 + 64] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, @@ -63,6 +63,12 @@ const int jpeg_natural_order[DCTSIZE2 + 16] = { 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */ + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 };