• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * jcphuff.c
3  *
4  * This file was part of the Independent JPEG Group's software:
5  * Copyright (C) 1995-1997, Thomas G. Lane.
6  * libjpeg-turbo Modifications:
7  * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
8  * Copyright (C) 2016, 2018, Matthieu Darbois.
9  * Copyright (C) 2020, Arm Limited.
10  * For conditions of distribution and use, see the accompanying README.ijg
11  * file.
12  *
13  * This file contains Huffman entropy encoding routines for progressive JPEG.
14  *
15  * We do not support output suspension in this module, since the library
16  * currently does not allow multiple-scan files to be written with output
17  * suspension.
18  */
19 
20 #define JPEG_INTERNALS
21 #include "jinclude.h"
22 #include "jpeglib.h"
23 #include "jsimd.h"
24 #include "jconfigint.h"
25 #include <limits.h>
26 
27 #ifdef HAVE_INTRIN_H
28 #include <intrin.h>
29 #ifdef _MSC_VER
30 #ifdef HAVE_BITSCANFORWARD64
31 #pragma intrinsic(_BitScanForward64)
32 #endif
33 #ifdef HAVE_BITSCANFORWARD
34 #pragma intrinsic(_BitScanForward)
35 #endif
36 #endif
37 #endif
38 
39 #ifdef C_PROGRESSIVE_SUPPORTED
40 
41 /*
42  * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
43  * used for bit counting rather than the lookup table.  This will reduce the
44  * memory footprint by 64k, which is important for some mobile applications
45  * that create many isolated instances of libjpeg-turbo (web browsers, for
46  * instance.)  This may improve performance on some mobile platforms as well.
47  * This feature is enabled by default only on Arm processors, because some x86
48  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
49  * shown to have a significant performance impact even on the x86 chips that
50  * have a fast implementation of it.  When building for Armv6, you can
51  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
52  * flags (this defines __thumb__).
53  */
54 
55 #if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \
56     defined(_M_ARM64)
57 #if !defined(__thumb__) || defined(__thumb2__)
58 #define USE_CLZ_INTRINSIC
59 #endif
60 #endif
61 
62 #ifdef USE_CLZ_INTRINSIC
63 #if defined(_MSC_VER) && !defined(__clang__)
64 #define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
65 #else
66 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
67 #endif
68 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
69 #else
70 #include "jpeg_nbits_table.h"
71 #define JPEG_NBITS(x)          (jpeg_nbits_table[x])
72 #define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
73 #endif
74 
75 
76 /* Expanded entropy encoder object for progressive Huffman encoding. */
77 
78 typedef struct {
79   struct jpeg_entropy_encoder pub; /* public fields */
80 
81   /* Pointer to routine to prepare data for encode_mcu_AC_first() */
82   void (*AC_first_prepare) (const JCOEF *block,
83                             const int *jpeg_natural_order_start, int Sl,
84                             int Al, JCOEF *values, size_t *zerobits);
85   /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
86   int (*AC_refine_prepare) (const JCOEF *block,
87                             const int *jpeg_natural_order_start, int Sl,
88                             int Al, JCOEF *absvalues, size_t *bits);
89 
90   /* Mode flag: TRUE for optimization, FALSE for actual data output */
91   boolean gather_statistics;
92 
93   /* Bit-level coding status.
94    * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
95    */
96   JOCTET *next_output_byte;     /* => next byte to write in buffer */
97   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
98   size_t put_buffer;            /* current bit-accumulation buffer */
99   int put_bits;                 /* # of bits now in it */
100   j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
101 
102   /* Coding status for DC components */
103   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
104 
105   /* Coding status for AC components */
106   int ac_tbl_no;                /* the table number of the single component */
107   unsigned int EOBRUN;          /* run length of EOBs */
108   unsigned int BE;              /* # of buffered correction bits before MCU */
109   char *bit_buffer;             /* buffer for correction bits (1 per char) */
110   /* packing correction bits tightly would save some space but cost time... */
111 
112   unsigned int restarts_to_go;  /* MCUs left in this restart interval */
113   int next_restart_num;         /* next restart number to write (0-7) */
114 
115   /* Pointers to derived tables (these workspaces have image lifespan).
116    * Since any one scan codes only DC or only AC, we only need one set
117    * of tables, not one for DC and one for AC.
118    */
119   c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
120 
121   /* Statistics tables for optimization; again, one set is enough */
122   long *count_ptrs[NUM_HUFF_TBLS];
123 } phuff_entropy_encoder;
124 
125 typedef phuff_entropy_encoder *phuff_entropy_ptr;
126 
127 /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
128  * buffer can hold.  Larger sizes may slightly improve compression, but
129  * 1000 is already well into the realm of overkill.
130  * The minimum safe size is 64 bits.
131  */
132 
133 #define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
134 
135 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
136  * We assume that int right shift is unsigned if JLONG right shift is,
137  * which should be safe.
138  */
139 
140 #ifdef RIGHT_SHIFT_IS_UNSIGNED
141 #define ISHIFT_TEMPS    int ishift_temp;
142 #define IRIGHT_SHIFT(x, shft) \
143   ((ishift_temp = (x)) < 0 ? \
144    (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
145    (ishift_temp >> (shft)))
146 #else
147 #define ISHIFT_TEMPS
148 #define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))
149 #endif
150 
151 #define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))
152 
153 /* Forward declarations */
154 METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
155                                        JBLOCKROW *MCU_data);
156 METHODDEF(void) encode_mcu_AC_first_prepare
157   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
158    JCOEF *values, size_t *zerobits);
159 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
160                                        JBLOCKROW *MCU_data);
161 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
162                                         JBLOCKROW *MCU_data);
163 METHODDEF(int) encode_mcu_AC_refine_prepare
164   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
165    JCOEF *absvalues, size_t *bits);
166 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
167                                         JBLOCKROW *MCU_data);
168 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
169 METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
170 
171 
172 /* Count bit loop zeroes */
173 INLINE
METHODDEF(int)174 METHODDEF(int)
175 count_zeroes(size_t *x)
176 {
177 #if defined(HAVE_BUILTIN_CTZL)
178   int result;
179   result = __builtin_ctzl(*x);
180   *x >>= result;
181 #elif defined(HAVE_BITSCANFORWARD64)
182   unsigned long result;
183   _BitScanForward64(&result, *x);
184   *x >>= result;
185 #elif defined(HAVE_BITSCANFORWARD)
186   unsigned long result;
187   _BitScanForward(&result, *x);
188   *x >>= result;
189 #else
190   int result = 0;
191   while ((*x & 1) == 0) {
192     ++result;
193     *x >>= 1;
194   }
195 #endif
196   return (int)result;
197 }
198 
199 
200 /*
201  * Initialize for a Huffman-compressed scan using progressive JPEG.
202  */
203 
204 METHODDEF(void)
start_pass_phuff(j_compress_ptr cinfo,boolean gather_statistics)205 start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
206 {
207   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
208   boolean is_DC_band;
209   int ci, tbl;
210   jpeg_component_info *compptr;
211 
212   entropy->cinfo = cinfo;
213   entropy->gather_statistics = gather_statistics;
214 
215   is_DC_band = (cinfo->Ss == 0);
216 
217   /* We assume jcmaster.c already validated the scan parameters. */
218 
219   /* Select execution routines */
220   if (cinfo->Ah == 0) {
221     if (is_DC_band)
222       entropy->pub.encode_mcu = encode_mcu_DC_first;
223     else
224       entropy->pub.encode_mcu = encode_mcu_AC_first;
225     if (jsimd_can_encode_mcu_AC_first_prepare())
226       entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
227     else
228       entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
229   } else {
230     if (is_DC_band)
231       entropy->pub.encode_mcu = encode_mcu_DC_refine;
232     else {
233       entropy->pub.encode_mcu = encode_mcu_AC_refine;
234       if (jsimd_can_encode_mcu_AC_refine_prepare())
235         entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
236       else
237         entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
238       /* AC refinement needs a correction bit buffer */
239       if (entropy->bit_buffer == NULL)
240         entropy->bit_buffer = (char *)
241           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
242                                       MAX_CORR_BITS * sizeof(char));
243     }
244   }
245   if (gather_statistics)
246     entropy->pub.finish_pass = finish_pass_gather_phuff;
247   else
248     entropy->pub.finish_pass = finish_pass_phuff;
249 
250   /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
251    * for AC coefficients.
252    */
253   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
254     compptr = cinfo->cur_comp_info[ci];
255     /* Initialize DC predictions to 0 */
256     entropy->last_dc_val[ci] = 0;
257     /* Get table index */
258     if (is_DC_band) {
259       if (cinfo->Ah != 0)       /* DC refinement needs no table */
260         continue;
261       tbl = compptr->dc_tbl_no;
262     } else {
263       entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
264     }
265     if (gather_statistics) {
266       /* Check for invalid table index */
267       /* (make_c_derived_tbl does this in the other path) */
268       if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
269         ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
270       /* Allocate and zero the statistics tables */
271       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
272       if (entropy->count_ptrs[tbl] == NULL)
273         entropy->count_ptrs[tbl] = (long *)
274           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
275                                       257 * sizeof(long));
276       MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
277     } else {
278       /* Compute derived values for Huffman table */
279       /* We may do this more than once for a table, but it's not expensive */
280       jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
281                               &entropy->derived_tbls[tbl]);
282     }
283   }
284 
285   /* Initialize AC stuff */
286   entropy->EOBRUN = 0;
287   entropy->BE = 0;
288 
289   /* Initialize bit buffer to empty */
290   entropy->put_buffer = 0;
291   entropy->put_bits = 0;
292 
293   /* Initialize restart stuff */
294   entropy->restarts_to_go = cinfo->restart_interval;
295   entropy->next_restart_num = 0;
296 }
297 
298 
299 /* Outputting bytes to the file.
300  * NB: these must be called only when actually outputting,
301  * that is, entropy->gather_statistics == FALSE.
302  */
303 
304 /* Emit a byte */
305 #define emit_byte(entropy, val) { \
306   *(entropy)->next_output_byte++ = (JOCTET)(val); \
307   if (--(entropy)->free_in_buffer == 0) \
308     dump_buffer(entropy); \
309 }
310 
311 
312 LOCAL(void)
dump_buffer(phuff_entropy_ptr entropy)313 dump_buffer(phuff_entropy_ptr entropy)
314 /* Empty the output buffer; we do not support suspension in this module. */
315 {
316   struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
317 
318   if (!(*dest->empty_output_buffer) (entropy->cinfo))
319     ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
320   /* After a successful buffer dump, must reset buffer pointers */
321   entropy->next_output_byte = dest->next_output_byte;
322   entropy->free_in_buffer = dest->free_in_buffer;
323 }
324 
325 
326 /* Outputting bits to the file */
327 
328 /* Only the right 24 bits of put_buffer are used; the valid bits are
329  * left-justified in this part.  At most 16 bits can be passed to emit_bits
330  * in one call, and we never retain more than 7 bits in put_buffer
331  * between calls, so 24 bits are sufficient.
332  */
333 
334 LOCAL(void)
emit_bits(phuff_entropy_ptr entropy,unsigned int code,int size)335 emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
336 /* Emit some bits, unless we are in gather mode */
337 {
338   /* This routine is heavily used, so it's worth coding tightly. */
339   register size_t put_buffer = (size_t)code;
340   register int put_bits = entropy->put_bits;
341 
342   /* if size is 0, caller used an invalid Huffman table entry */
343   if (size == 0)
344     ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
345 
346   if (entropy->gather_statistics)
347     return;                     /* do nothing if we're only getting stats */
348 
349   put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
350 
351   put_bits += size;             /* new number of bits in buffer */
352 
353   put_buffer <<= 24 - put_bits; /* align incoming bits */
354 
355   put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
356 
357   while (put_bits >= 8) {
358     int c = (int)((put_buffer >> 16) & 0xFF);
359 
360     emit_byte(entropy, c);
361     if (c == 0xFF) {            /* need to stuff a zero byte? */
362       emit_byte(entropy, 0);
363     }
364     put_buffer <<= 8;
365     put_bits -= 8;
366   }
367 
368   entropy->put_buffer = put_buffer; /* update variables */
369   entropy->put_bits = put_bits;
370 }
371 
372 
373 LOCAL(void)
flush_bits(phuff_entropy_ptr entropy)374 flush_bits(phuff_entropy_ptr entropy)
375 {
376   emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
377   entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
378   entropy->put_bits = 0;
379 }
380 
381 
382 /*
383  * Emit (or just count) a Huffman symbol.
384  */
385 
386 LOCAL(void)
emit_symbol(phuff_entropy_ptr entropy,int tbl_no,int symbol)387 emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
388 {
389   if (entropy->gather_statistics)
390     entropy->count_ptrs[tbl_no][symbol]++;
391   else {
392     c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
393     emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
394   }
395 }
396 
397 
398 /*
399  * Emit bits from a correction bit buffer.
400  */
401 
402 LOCAL(void)
emit_buffered_bits(phuff_entropy_ptr entropy,char * bufstart,unsigned int nbits)403 emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
404                    unsigned int nbits)
405 {
406   if (entropy->gather_statistics)
407     return;                     /* no real work */
408 
409   while (nbits > 0) {
410     emit_bits(entropy, (unsigned int)(*bufstart), 1);
411     bufstart++;
412     nbits--;
413   }
414 }
415 
416 
417 /*
418  * Emit any pending EOBRUN symbol.
419  */
420 
421 LOCAL(void)
emit_eobrun(phuff_entropy_ptr entropy)422 emit_eobrun(phuff_entropy_ptr entropy)
423 {
424   register int temp, nbits;
425 
426   if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */
427     temp = entropy->EOBRUN;
428     nbits = JPEG_NBITS_NONZERO(temp) - 1;
429     /* safety check: shouldn't happen given limited correction-bit buffer */
430     if (nbits > 14)
431       ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
432 
433     emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
434     if (nbits)
435       emit_bits(entropy, entropy->EOBRUN, nbits);
436 
437     entropy->EOBRUN = 0;
438 
439     /* Emit any buffered correction bits */
440     emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
441     entropy->BE = 0;
442   }
443 }
444 
445 
446 /*
447  * Emit a restart marker & resynchronize predictions.
448  */
449 
450 LOCAL(void)
emit_restart(phuff_entropy_ptr entropy,int restart_num)451 emit_restart(phuff_entropy_ptr entropy, int restart_num)
452 {
453   int ci;
454 
455   emit_eobrun(entropy);
456 
457   if (!entropy->gather_statistics) {
458     flush_bits(entropy);
459     emit_byte(entropy, 0xFF);
460     emit_byte(entropy, JPEG_RST0 + restart_num);
461   }
462 
463   if (entropy->cinfo->Ss == 0) {
464     /* Re-initialize DC predictions to 0 */
465     for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
466       entropy->last_dc_val[ci] = 0;
467   } else {
468     /* Re-initialize all AC-related fields to 0 */
469     entropy->EOBRUN = 0;
470     entropy->BE = 0;
471   }
472 }
473 
474 
475 /*
476  * MCU encoding for DC initial scan (either spectral selection,
477  * or first pass of successive approximation).
478  */
479 
480 METHODDEF(boolean)
encode_mcu_DC_first(j_compress_ptr cinfo,JBLOCKROW * MCU_data)481 encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
482 {
483   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
484   register int temp, temp2, temp3;
485   register int nbits;
486   int blkn, ci;
487   int Al = cinfo->Al;
488   JBLOCKROW block;
489   jpeg_component_info *compptr;
490   ISHIFT_TEMPS
491 
492   entropy->next_output_byte = cinfo->dest->next_output_byte;
493   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
494 
495   /* Emit restart marker if needed */
496   if (cinfo->restart_interval)
497     if (entropy->restarts_to_go == 0)
498       emit_restart(entropy, entropy->next_restart_num);
499 
500   /* Encode the MCU data blocks */
501   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
502     block = MCU_data[blkn];
503     ci = cinfo->MCU_membership[blkn];
504     compptr = cinfo->cur_comp_info[ci];
505 
506     /* Compute the DC value after the required point transform by Al.
507      * This is simply an arithmetic right shift.
508      */
509     temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
510 
511     /* DC differences are figured on the point-transformed values. */
512     temp = temp2 - entropy->last_dc_val[ci];
513     entropy->last_dc_val[ci] = temp2;
514 
515     /* Encode the DC coefficient difference per section G.1.2.1 */
516 
517     /* This is a well-known technique for obtaining the absolute value without
518      * a branch.  It is derived from an assembly language technique presented
519      * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
520      * 1997 by Agner Fog.
521      */
522     temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
523     temp ^= temp3;
524     temp -= temp3;              /* temp is abs value of input */
525     /* For a negative input, want temp2 = bitwise complement of abs(input) */
526     temp2 = temp ^ temp3;
527 
528     /* Find the number of bits needed for the magnitude of the coefficient */
529     nbits = JPEG_NBITS(temp);
530     /* Check for out-of-range coefficient values.
531      * Since we're encoding a difference, the range limit is twice as much.
532      */
533     if (nbits > MAX_COEF_BITS + 1)
534       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
535 
536     /* Count/emit the Huffman-coded symbol for the number of bits */
537     emit_symbol(entropy, compptr->dc_tbl_no, nbits);
538 
539     /* Emit that number of bits of the value, if positive, */
540     /* or the complement of its magnitude, if negative. */
541     if (nbits)                  /* emit_bits rejects calls with size 0 */
542       emit_bits(entropy, (unsigned int)temp2, nbits);
543   }
544 
545   cinfo->dest->next_output_byte = entropy->next_output_byte;
546   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
547 
548   /* Update restart-interval state too */
549   if (cinfo->restart_interval) {
550     if (entropy->restarts_to_go == 0) {
551       entropy->restarts_to_go = cinfo->restart_interval;
552       entropy->next_restart_num++;
553       entropy->next_restart_num &= 7;
554     }
555     entropy->restarts_to_go--;
556   }
557 
558   return TRUE;
559 }
560 
561 
562 /*
563  * Data preparation for encode_mcu_AC_first().
564  */
565 
566 #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
567   for (k = 0; k < Sl; k++) { \
568     temp = block[jpeg_natural_order_start[k]]; \
569     if (temp == 0) \
570       continue; \
571     /* We must apply the point transform by Al.  For AC coefficients this \
572      * is an integer division with rounding towards 0.  To do this portably \
573      * in C, we shift after obtaining the absolute value; so the code is \
574      * interwoven with finding the abs value (temp) and output bits (temp2). \
575      */ \
576     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
577     temp ^= temp2; \
578     temp -= temp2;              /* temp is abs value of input */ \
579     temp >>= Al;                /* apply the point transform */ \
580     /* Watch out for case that nonzero coef is zero after point transform */ \
581     if (temp == 0) \
582       continue; \
583     /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
584     temp2 ^= temp; \
585     values[k] = temp; \
586     values[k + DCTSIZE2] = temp2; \
587     zerobits |= ((size_t)1U) << k; \
588   } \
589 }
590 
591 METHODDEF(void)
encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * values,size_t * bits)592 encode_mcu_AC_first_prepare(const JCOEF *block,
593                             const int *jpeg_natural_order_start, int Sl,
594                             int Al, JCOEF *values, size_t *bits)
595 {
596   register int k, temp, temp2;
597   size_t zerobits = 0U;
598   int Sl0 = Sl;
599 
600 #if SIZEOF_SIZE_T == 4
601   if (Sl0 > 32)
602     Sl0 = 32;
603 #endif
604 
605   COMPUTE_ABSVALUES_AC_FIRST(Sl0);
606 
607   bits[0] = zerobits;
608 #if SIZEOF_SIZE_T == 4
609   zerobits = 0U;
610 
611   if (Sl > 32) {
612     Sl -= 32;
613     jpeg_natural_order_start += 32;
614     values += 32;
615 
616     COMPUTE_ABSVALUES_AC_FIRST(Sl);
617   }
618   bits[1] = zerobits;
619 #endif
620 }
621 
622 /*
623  * MCU encoding for AC initial scan (either spectral selection,
624  * or first pass of successive approximation).
625  */
626 
627 #define ENCODE_COEFS_AC_FIRST(label) { \
628   while (zerobits) { \
629     r = count_zeroes(&zerobits); \
630     cvalue += r; \
631 label \
632     temp  = cvalue[0]; \
633     temp2 = cvalue[DCTSIZE2]; \
634     \
635     /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
636     while (r > 15) { \
637       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
638       r -= 16; \
639     } \
640     \
641     /* Find the number of bits needed for the magnitude of the coefficient */ \
642     nbits = JPEG_NBITS_NONZERO(temp);  /* there must be at least one 1 bit */ \
643     /* Check for out-of-range coefficient values */ \
644     if (nbits > MAX_COEF_BITS) \
645       ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
646     \
647     /* Count/emit Huffman symbol for run length / number of bits */ \
648     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
649     \
650     /* Emit that number of bits of the value, if positive, */ \
651     /* or the complement of its magnitude, if negative. */ \
652     emit_bits(entropy, (unsigned int)temp2, nbits); \
653     \
654     cvalue++; \
655     zerobits >>= 1; \
656   } \
657 }
658 
659 METHODDEF(boolean)
encode_mcu_AC_first(j_compress_ptr cinfo,JBLOCKROW * MCU_data)660 encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
661 {
662   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
663   register int temp, temp2;
664   register int nbits, r;
665   int Sl = cinfo->Se - cinfo->Ss + 1;
666   int Al = cinfo->Al;
667   JCOEF values_unaligned[2 * DCTSIZE2 + 15];
668   JCOEF *values;
669   const JCOEF *cvalue;
670   size_t zerobits;
671   size_t bits[8 / SIZEOF_SIZE_T];
672 
673   entropy->next_output_byte = cinfo->dest->next_output_byte;
674   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
675 
676   /* Emit restart marker if needed */
677   if (cinfo->restart_interval)
678     if (entropy->restarts_to_go == 0)
679       emit_restart(entropy, entropy->next_restart_num);
680 
681 #ifdef WITH_SIMD
682   cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16);
683 #else
684   /* Not using SIMD, so alignment is not needed */
685   cvalue = values = values_unaligned;
686 #endif
687 
688   /* Prepare data */
689   entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
690                             Sl, Al, values, bits);
691 
692   zerobits = bits[0];
693 #if SIZEOF_SIZE_T == 4
694   zerobits |= bits[1];
695 #endif
696 
697   /* Emit any pending EOBRUN */
698   if (zerobits && (entropy->EOBRUN > 0))
699     emit_eobrun(entropy);
700 
701 #if SIZEOF_SIZE_T == 4
702   zerobits = bits[0];
703 #endif
704 
705   /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
706 
707   ENCODE_COEFS_AC_FIRST((void)0;);
708 
709 #if SIZEOF_SIZE_T == 4
710   zerobits = bits[1];
711   if (zerobits) {
712     int diff = ((values + DCTSIZE2 / 2) - cvalue);
713     r = count_zeroes(&zerobits);
714     r += diff;
715     cvalue += r;
716     goto first_iter_ac_first;
717   }
718 
719   ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
720 #endif
721 
722   if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
723     entropy->EOBRUN++;          /* count an EOB */
724     if (entropy->EOBRUN == 0x7FFF)
725       emit_eobrun(entropy);     /* force it out to avoid overflow */
726   }
727 
728   cinfo->dest->next_output_byte = entropy->next_output_byte;
729   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
730 
731   /* Update restart-interval state too */
732   if (cinfo->restart_interval) {
733     if (entropy->restarts_to_go == 0) {
734       entropy->restarts_to_go = cinfo->restart_interval;
735       entropy->next_restart_num++;
736       entropy->next_restart_num &= 7;
737     }
738     entropy->restarts_to_go--;
739   }
740 
741   return TRUE;
742 }
743 
744 
745 /*
746  * MCU encoding for DC successive approximation refinement scan.
747  * Note: we assume such scans can be multi-component, although the spec
748  * is not very clear on the point.
749  */
750 
751 METHODDEF(boolean)
encode_mcu_DC_refine(j_compress_ptr cinfo,JBLOCKROW * MCU_data)752 encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
753 {
754   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
755   register int temp;
756   int blkn;
757   int Al = cinfo->Al;
758   JBLOCKROW block;
759 
760   entropy->next_output_byte = cinfo->dest->next_output_byte;
761   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
762 
763   /* Emit restart marker if needed */
764   if (cinfo->restart_interval)
765     if (entropy->restarts_to_go == 0)
766       emit_restart(entropy, entropy->next_restart_num);
767 
768   /* Encode the MCU data blocks */
769   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
770     block = MCU_data[blkn];
771 
772     /* We simply emit the Al'th bit of the DC coefficient value. */
773     temp = (*block)[0];
774     emit_bits(entropy, (unsigned int)(temp >> Al), 1);
775   }
776 
777   cinfo->dest->next_output_byte = entropy->next_output_byte;
778   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
779 
780   /* Update restart-interval state too */
781   if (cinfo->restart_interval) {
782     if (entropy->restarts_to_go == 0) {
783       entropy->restarts_to_go = cinfo->restart_interval;
784       entropy->next_restart_num++;
785       entropy->next_restart_num &= 7;
786     }
787     entropy->restarts_to_go--;
788   }
789 
790   return TRUE;
791 }
792 
793 
794 /*
795  * Data preparation for encode_mcu_AC_refine().
796  */
797 
798 #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
799   /* It is convenient to make a pre-pass to determine the transformed \
800    * coefficients' absolute values and the EOB position. \
801    */ \
802   for (k = 0; k < Sl; k++) { \
803     temp = block[jpeg_natural_order_start[k]]; \
804     /* We must apply the point transform by Al.  For AC coefficients this \
805      * is an integer division with rounding towards 0.  To do this portably \
806      * in C, we shift after obtaining the absolute value. \
807      */ \
808     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
809     temp ^= temp2; \
810     temp -= temp2;              /* temp is abs value of input */ \
811     temp >>= Al;                /* apply the point transform */ \
812     if (temp != 0) { \
813       zerobits |= ((size_t)1U) << k; \
814       signbits |= ((size_t)(temp2 + 1)) << k; \
815     } \
816     absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
817     if (temp == 1) \
818       EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \
819   } \
820 }
821 
822 METHODDEF(int)
encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,JCOEF * absvalues,size_t * bits)823 encode_mcu_AC_refine_prepare(const JCOEF *block,
824                              const int *jpeg_natural_order_start, int Sl,
825                              int Al, JCOEF *absvalues, size_t *bits)
826 {
827   register int k, temp, temp2;
828   int EOB = 0;
829   size_t zerobits = 0U, signbits = 0U;
830   int Sl0 = Sl;
831 
832 #if SIZEOF_SIZE_T == 4
833   if (Sl0 > 32)
834     Sl0 = 32;
835 #endif
836 
837   COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
838 
839   bits[0] = zerobits;
840 #if SIZEOF_SIZE_T == 8
841   bits[1] = signbits;
842 #else
843   bits[2] = signbits;
844 
845   zerobits = 0U;
846   signbits = 0U;
847 
848   if (Sl > 32) {
849     Sl -= 32;
850     jpeg_natural_order_start += 32;
851     absvalues += 32;
852 
853     COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
854   }
855 
856   bits[1] = zerobits;
857   bits[3] = signbits;
858 #endif
859 
860   return EOB;
861 }
862 
863 
864 /*
865  * MCU encoding for AC successive approximation refinement scan.
866  */
867 
868 #define ENCODE_COEFS_AC_REFINE(label) { \
869   while (zerobits) { \
870     idx = count_zeroes(&zerobits); \
871     r += idx; \
872     cabsvalue += idx; \
873     signbits >>= idx; \
874 label \
875     /* Emit any required ZRLs, but not if they can be folded into EOB */ \
876     while (r > 15 && (cabsvalue <= EOBPTR)) { \
877       /* emit any pending EOBRUN and the BE correction bits */ \
878       emit_eobrun(entropy); \
879       /* Emit ZRL */ \
880       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
881       r -= 16; \
882       /* Emit buffered correction bits that must be associated with ZRL */ \
883       emit_buffered_bits(entropy, BR_buffer, BR); \
884       BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
885       BR = 0; \
886     } \
887     \
888     temp = *cabsvalue++; \
889     \
890     /* If the coef was previously nonzero, it only needs a correction bit. \
891      * NOTE: a straight translation of the spec's figure G.7 would suggest \
892      * that we also need to test r > 15.  But if r > 15, we can only get here \
893      * if k > EOB, which implies that this coefficient is not 1. \
894      */ \
895     if (temp > 1) { \
896       /* The correction bit is the next bit of the absolute value. */ \
897       BR_buffer[BR++] = (char)(temp & 1); \
898       signbits >>= 1; \
899       zerobits >>= 1; \
900       continue; \
901     } \
902     \
903     /* Emit any pending EOBRUN and the BE correction bits */ \
904     emit_eobrun(entropy); \
905     \
906     /* Count/emit Huffman symbol for run length / number of bits */ \
907     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
908     \
909     /* Emit output bit for newly-nonzero coef */ \
910     temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
911     emit_bits(entropy, (unsigned int)temp, 1); \
912     \
913     /* Emit buffered correction bits that must be associated with this code */ \
914     emit_buffered_bits(entropy, BR_buffer, BR); \
915     BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
916     BR = 0; \
917     r = 0;                      /* reset zero run length */ \
918     signbits >>= 1; \
919     zerobits >>= 1; \
920   } \
921 }
922 
923 METHODDEF(boolean)
encode_mcu_AC_refine(j_compress_ptr cinfo,JBLOCKROW * MCU_data)924 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
925 {
926   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
927   register int temp, r, idx;
928   char *BR_buffer;
929   unsigned int BR;
930   int Sl = cinfo->Se - cinfo->Ss + 1;
931   int Al = cinfo->Al;
932   JCOEF absvalues_unaligned[DCTSIZE2 + 15];
933   JCOEF *absvalues;
934   const JCOEF *cabsvalue, *EOBPTR;
935   size_t zerobits, signbits;
936   size_t bits[16 / SIZEOF_SIZE_T];
937 
938   entropy->next_output_byte = cinfo->dest->next_output_byte;
939   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
940 
941   /* Emit restart marker if needed */
942   if (cinfo->restart_interval)
943     if (entropy->restarts_to_go == 0)
944       emit_restart(entropy, entropy->next_restart_num);
945 
946 #ifdef WITH_SIMD
947   cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16);
948 #else
949   /* Not using SIMD, so alignment is not needed */
950   cabsvalue = absvalues = absvalues_unaligned;
951 #endif
952 
953   /* Prepare data */
954   EOBPTR = absvalues +
955     entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
956                                Sl, Al, absvalues, bits);
957 
958   /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
959 
960   r = 0;                        /* r = run length of zeros */
961   BR = 0;                       /* BR = count of buffered bits added now */
962   BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
963 
964   zerobits = bits[0];
965 #if SIZEOF_SIZE_T == 8
966   signbits = bits[1];
967 #else
968   signbits = bits[2];
969 #endif
970   ENCODE_COEFS_AC_REFINE((void)0;);
971 
972 #if SIZEOF_SIZE_T == 4
973   zerobits = bits[1];
974   signbits = bits[3];
975 
976   if (zerobits) {
977     int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
978     idx = count_zeroes(&zerobits);
979     signbits >>= idx;
980     idx += diff;
981     r += idx;
982     cabsvalue += idx;
983     goto first_iter_ac_refine;
984   }
985 
986   ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
987 #endif
988 
989   r |= (int)((absvalues + Sl) - cabsvalue);
990 
991   if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */
992     entropy->EOBRUN++;          /* count an EOB */
993     entropy->BE += BR;          /* concat my correction bits to older ones */
994     /* We force out the EOB if we risk either:
995      * 1. overflow of the EOB counter;
996      * 2. overflow of the correction bit buffer during the next MCU.
997      */
998     if (entropy->EOBRUN == 0x7FFF ||
999         entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
1000       emit_eobrun(entropy);
1001   }
1002 
1003   cinfo->dest->next_output_byte = entropy->next_output_byte;
1004   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1005 
1006   /* Update restart-interval state too */
1007   if (cinfo->restart_interval) {
1008     if (entropy->restarts_to_go == 0) {
1009       entropy->restarts_to_go = cinfo->restart_interval;
1010       entropy->next_restart_num++;
1011       entropy->next_restart_num &= 7;
1012     }
1013     entropy->restarts_to_go--;
1014   }
1015 
1016   return TRUE;
1017 }
1018 
1019 
1020 /*
1021  * Finish up at the end of a Huffman-compressed progressive scan.
1022  */
1023 
1024 METHODDEF(void)
finish_pass_phuff(j_compress_ptr cinfo)1025 finish_pass_phuff(j_compress_ptr cinfo)
1026 {
1027   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1028 
1029   entropy->next_output_byte = cinfo->dest->next_output_byte;
1030   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
1031 
1032   /* Flush out any buffered data */
1033   emit_eobrun(entropy);
1034   flush_bits(entropy);
1035 
1036   cinfo->dest->next_output_byte = entropy->next_output_byte;
1037   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1038 }
1039 
1040 
1041 /*
1042  * Finish up a statistics-gathering pass and create the new Huffman tables.
1043  */
1044 
1045 METHODDEF(void)
finish_pass_gather_phuff(j_compress_ptr cinfo)1046 finish_pass_gather_phuff(j_compress_ptr cinfo)
1047 {
1048   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1049   boolean is_DC_band;
1050   int ci, tbl;
1051   jpeg_component_info *compptr;
1052   JHUFF_TBL **htblptr;
1053   boolean did[NUM_HUFF_TBLS];
1054 
1055   /* Flush out buffered data (all we care about is counting the EOB symbol) */
1056   emit_eobrun(entropy);
1057 
1058   is_DC_band = (cinfo->Ss == 0);
1059 
1060   /* It's important not to apply jpeg_gen_optimal_table more than once
1061    * per table, because it clobbers the input frequency counts!
1062    */
1063   MEMZERO(did, sizeof(did));
1064 
1065   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
1066     compptr = cinfo->cur_comp_info[ci];
1067     if (is_DC_band) {
1068       if (cinfo->Ah != 0)       /* DC refinement needs no table */
1069         continue;
1070       tbl = compptr->dc_tbl_no;
1071     } else {
1072       tbl = compptr->ac_tbl_no;
1073     }
1074     if (!did[tbl]) {
1075       if (is_DC_band)
1076         htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
1077       else
1078         htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
1079       if (*htblptr == NULL)
1080         *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
1081       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
1082       did[tbl] = TRUE;
1083     }
1084   }
1085 }
1086 
1087 
1088 /*
1089  * Module initialization routine for progressive Huffman entropy encoding.
1090  */
1091 
1092 GLOBAL(void)
jinit_phuff_encoder(j_compress_ptr cinfo)1093 jinit_phuff_encoder(j_compress_ptr cinfo)
1094 {
1095   phuff_entropy_ptr entropy;
1096   int i;
1097 
1098   entropy = (phuff_entropy_ptr)
1099     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1100                                 sizeof(phuff_entropy_encoder));
1101   cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
1102   entropy->pub.start_pass = start_pass_phuff;
1103 
1104   /* Mark tables unallocated */
1105   for (i = 0; i < NUM_HUFF_TBLS; i++) {
1106     entropy->derived_tbls[i] = NULL;
1107     entropy->count_ptrs[i] = NULL;
1108   }
1109   entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */
1110 }
1111 
1112 #endif /* C_PROGRESSIVE_SUPPORTED */
1113