1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19
20 /*
21 Bounded package merge algorithm, based on the paper
22 "A Fast and Space-Economical Algorithm for Length-Limited Coding
23 Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
24 */
25
26 #include "katajainen.h"
27 #include <assert.h>
28 #include <stdlib.h>
29
30 typedef struct Node Node;
31
32 /*
33 Nodes forming chains. Also used to represent leaves.
34 */
35 struct Node {
36 size_t weight; /* Total weight (symbol count) of this chain. */
37 Node* tail; /* Previous node(s) of this chain, or 0 if none. */
38 int count; /* Leaf symbol index, or number of leaves before this chain. */
39 char inuse; /* Tracking for garbage collection. */
40 };
41
42 /*
43 Memory pool for nodes.
44 */
45 typedef struct NodePool {
46 Node* nodes; /* The pool. */
47 Node* next; /* Pointer to a possibly free node in the pool. */
48 int size; /* Size of the memory pool. */
49 } NodePool;
50
51 /*
52 Initializes a chain node with the given values and marks it as in use.
53 */
InitNode(size_t weight,int count,Node * tail,Node * node)54 static void InitNode(size_t weight, int count, Node* tail, Node* node) {
55 node->weight = weight;
56 node->count = count;
57 node->tail = tail;
58 node->inuse = 1;
59 }
60
61 /*
62 Finds a free location in the memory pool. Performs garbage collection if needed.
63 lists: If given, used to mark in-use nodes during garbage collection.
64 maxbits: Size of lists.
65 pool: Memory pool to get free node from.
66 */
GetFreeNode(Node * (* lists)[2],int maxbits,NodePool * pool)67 static Node* GetFreeNode(Node* (*lists)[2], int maxbits, NodePool* pool) {
68 for (;;) {
69 if (pool->next >= &pool->nodes[pool->size]) {
70 /* Garbage collection. */
71 int i;
72 for (i = 0; i < pool->size; i++) {
73 pool->nodes[i].inuse = 0;
74 }
75 if (lists) {
76 for (i = 0; i < maxbits * 2; i++) {
77 Node* node;
78 for (node = lists[i / 2][i % 2]; node; node = node->tail) {
79 node->inuse = 1;
80 }
81 }
82 }
83 pool->next = &pool->nodes[0];
84 }
85 if (!pool->next->inuse) break; /* Found one. */
86 pool->next++;
87 }
88 return pool->next++;
89 }
90
91
92 /*
93 Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
94 new chain is, depending on the weights, a leaf or a combination of two chains
95 from the previous list.
96 lists: The lists of chains.
97 maxbits: Number of lists.
98 leaves: The leaves, one per symbol.
99 numsymbols: Number of leaves.
100 pool: the node memory pool.
101 index: The index of the list in which a new chain or leaf is required.
102 final: Whether this is the last time this function is called. If it is then it
103 is no more needed to recursively call self.
104 */
BoundaryPM(Node * (* lists)[2],int maxbits,Node * leaves,int numsymbols,NodePool * pool,int index,char final)105 static void BoundaryPM(Node* (*lists)[2], int maxbits,
106 Node* leaves, int numsymbols, NodePool* pool, int index, char final) {
107 Node* newchain;
108 Node* oldchain;
109 int lastcount = lists[index][1]->count; /* Count of last chain of list. */
110
111 if (index == 0 && lastcount >= numsymbols) return;
112
113 newchain = GetFreeNode(lists, maxbits, pool);
114 oldchain = lists[index][1];
115
116 /* These are set up before the recursive calls below, so that there is a list
117 pointing to the new node, to let the garbage collection know it's in use. */
118 lists[index][0] = oldchain;
119 lists[index][1] = newchain;
120
121 if (index == 0) {
122 /* New leaf node in list 0. */
123 InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain);
124 } else {
125 size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
126 if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
127 /* New leaf inserted in list, so count is incremented. */
128 InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail,
129 newchain);
130 } else {
131 InitNode(sum, lastcount, lists[index - 1][1], newchain);
132 if (!final) {
133 /* Two lookahead chains of previous list used up, create new ones. */
134 BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
135 BoundaryPM(lists, maxbits, leaves, numsymbols, pool, index - 1, 0);
136 }
137 }
138 }
139 }
140
141 /*
142 Initializes each list with as lookahead chains the two leaves with lowest
143 weights.
144 */
InitLists(NodePool * pool,const Node * leaves,int maxbits,Node * (* lists)[2])145 static void InitLists(
146 NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
147 int i;
148 Node* node0 = GetFreeNode(0, maxbits, pool);
149 Node* node1 = GetFreeNode(0, maxbits, pool);
150 InitNode(leaves[0].weight, 1, 0, node0);
151 InitNode(leaves[1].weight, 2, 0, node1);
152 for (i = 0; i < maxbits; i++) {
153 lists[i][0] = node0;
154 lists[i][1] = node1;
155 }
156 }
157
158 /*
159 Converts result of boundary package-merge to the bitlengths. The result in the
160 last chain of the last list contains the amount of active leaves in each list.
161 chain: Chain to extract the bit length from (last chain from last list).
162 */
ExtractBitLengths(Node * chain,Node * leaves,unsigned * bitlengths)163 static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) {
164 Node* node;
165 for (node = chain; node; node = node->tail) {
166 int i;
167 for (i = 0; i < node->count; i++) {
168 bitlengths[leaves[i].count]++;
169 }
170 }
171 }
172
173 /*
174 Comparator for sorting the leaves. Has the function signature for qsort.
175 */
LeafComparator(const void * a,const void * b)176 static int LeafComparator(const void* a, const void* b) {
177 return ((const Node*)a)->weight - ((const Node*)b)->weight;
178 }
179
ZopfliLengthLimitedCodeLengths(const size_t * frequencies,int n,int maxbits,unsigned * bitlengths)180 int ZopfliLengthLimitedCodeLengths(
181 const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) {
182 NodePool pool;
183 int i;
184 int numsymbols = 0; /* Amount of symbols with frequency > 0. */
185 int numBoundaryPMRuns;
186
187 /* Array of lists of chains. Each list requires only two lookahead chains at
188 a time, so each list is a array of two Node*'s. */
189 Node* (*lists)[2];
190
191 /* One leaf per symbol. Only numsymbols leaves will be used. */
192 Node* leaves = (Node*)malloc(n * sizeof(*leaves));
193
194 /* Initialize all bitlengths at 0. */
195 for (i = 0; i < n; i++) {
196 bitlengths[i] = 0;
197 }
198
199 /* Count used symbols and place them in the leaves. */
200 for (i = 0; i < n; i++) {
201 if (frequencies[i]) {
202 leaves[numsymbols].weight = frequencies[i];
203 leaves[numsymbols].count = i; /* Index of symbol this leaf represents. */
204 numsymbols++;
205 }
206 }
207
208 /* Check special cases and error conditions. */
209 if ((1 << maxbits) < numsymbols) {
210 free(leaves);
211 return 1; /* Error, too few maxbits to represent symbols. */
212 }
213 if (numsymbols == 0) {
214 free(leaves);
215 return 0; /* No symbols at all. OK. */
216 }
217 if (numsymbols == 1) {
218 bitlengths[leaves[0].count] = 1;
219 free(leaves);
220 return 0; /* Only one symbol, give it bitlength 1, not 0. OK. */
221 }
222
223 /* Sort the leaves from lightest to heaviest. */
224 qsort(leaves, numsymbols, sizeof(Node), LeafComparator);
225
226 /* Initialize node memory pool. */
227 pool.size = 2 * maxbits * (maxbits + 1);
228 pool.nodes = (Node*)malloc(pool.size * sizeof(*pool.nodes));
229 pool.next = pool.nodes;
230 for (i = 0; i < pool.size; i++) {
231 pool.nodes[i].inuse = 0;
232 }
233
234 lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists));
235 InitLists(&pool, leaves, maxbits, lists);
236
237 /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two
238 are already created in the initialization. Each BoundaryPM run creates one. */
239 numBoundaryPMRuns = 2 * numsymbols - 4;
240 for (i = 0; i < numBoundaryPMRuns; i++) {
241 char final = i == numBoundaryPMRuns - 1;
242 BoundaryPM(lists, maxbits, leaves, numsymbols, &pool, maxbits - 1, final);
243 }
244
245 ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths);
246
247 free(lists);
248 free(leaves);
249 free(pool.nodes);
250 return 0; /* OK. */
251 }
252