• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Google Inc. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.google.typography.font.tools.conversion.eot;
18 
19 /**
20  * Implement LZCOMP compression algorithm as defined in MicroType Express, part of the EOT
21  * draft spec at {@link "http://www.w3.org/Submission/MTX/"}
22  *
23  * Java implementation based on http://www.w3.org/Submission/MTX/ reference code
24  *
25  * @author Raph Levien
26  */
27 public class LzcompCompress {
28 
29   private static final int MAX_2BYTE_DIST = 512;
30   private static final int DIST_MIN = 1;
31   private static final int DIST_WIDTH = 3;
32   private static final int LEN_MIN = 2;
33   private static final int LEN_MIN3 = 3;
34   private static final int LEN_WIDTH = 3;
35   private static final int BIT_RANGE = LEN_WIDTH - 1;
36   private static final int PRELOAD_SIZE = 2 * 32 * 96 + 4 * 256;
37   private static final int DEFAULT_MAX_COPY_DIST = 0x7fffffff;
38 
39   private BitIOWriter bits;
40   private boolean usingRunLength;
41   private int length1;
42   private int maxCopyDist = DEFAULT_MAX_COPY_DIST;
43   private HuffmanEncoder distEncoder;
44   private HuffmanEncoder lenEncoder;
45   private HuffmanEncoder symEncoder;
46   private int numDistRanges;
47   private int distMax;
48   private int dup2;
49   private int dup4;
50   private int dup6;
51   private int numSyms;
52   private byte[] buf;
53   private HashNode[] hashTable;
54 
55   private static class HashNode {
56     int index;
57     HashNode next;
58   }
59 
LzcompCompress()60   private LzcompCompress() {
61     bits = new BitIOWriter();
62     usingRunLength = false;
63   }
64 
write(byte[] dataIn)65   private void write(byte[] dataIn) {
66     bits.writeBit(usingRunLength);
67     length1 = dataIn.length;
68     // If we want to do runlengths, here's the place, but I'm not convinced it's useful
69     setDistRange(length1);
70     distEncoder = new HuffmanEncoder(bits, 1 << DIST_WIDTH);
71     lenEncoder = new HuffmanEncoder(bits, 1 << LEN_WIDTH);
72     symEncoder = new HuffmanEncoder(bits, numSyms);
73     buf = new byte[PRELOAD_SIZE + length1];
74     System.arraycopy(dataIn, 0, buf, PRELOAD_SIZE, length1);
75     encode();
76     bits.flush();
77   }
78 
setDistRange(int length)79   void setDistRange(int length) {
80     numDistRanges = 1;
81     distMax = DIST_MIN + (1 << (DIST_WIDTH * numDistRanges)) - 1;
82     while (distMax < length1) {
83       numDistRanges++;
84       distMax = DIST_MIN + (1 << (DIST_WIDTH * numDistRanges)) - 1;
85     }
86     dup2 = 256 + (1 << LEN_WIDTH) * numDistRanges;
87     dup4 = dup2 + 1;
88     dup6 = dup4 + 1;
89     numSyms = dup6 + 1;
90   }
91 
encode()92   private void encode() {
93     int maxIndex = length1 + PRELOAD_SIZE;
94     initializeModel();
95     bits.writeValue(length1, 24);
96     int limit = length1 + PRELOAD_SIZE;
97     int[] dist = new int[1];
98     for (int i = PRELOAD_SIZE; i < limit; ) {
99       int here = i;
100       int len = makeCopyDecision(i++, dist);
101       if (len > 0) {
102         int distRanges = getNumDistRanges(dist[0]);
103         encodeLength(len, dist[0], distRanges);
104         encodeDistance2(dist[0], distRanges);
105         for (int j = 1; j < len; j++) {
106           updateModel(i++);
107         }
108       } else {
109         byte c = buf[here];
110         if (here >= 2 && c == buf[here - 2]) {
111           symEncoder.writeSymbol(dup2);
112         } else if (here >= 4 && c == buf[here - 4]) {
113           symEncoder.writeSymbol(dup4);
114         } else if (here >= 6 && c == buf[here - 6]) {
115           symEncoder.writeSymbol(dup6);
116         } else {
117           symEncoder.writeSymbol(buf[here] & 0xff);
118         }
119       }
120     }
121   }
initializeModel()122   void initializeModel() {
123     hashTable = new HashNode[0x10000];
124     int i = 0;
125     for (int k = 0; k < 32; k++) {
126       for (int j = 0; j < 96; j++) {
127         buf[i] = (byte)k;
128         updateModel(i++);
129         buf[i]= (byte)j;
130         updateModel(i++);
131       }
132     }
133     for (int j = 0; i < PRELOAD_SIZE && j < 256; j++) {
134       buf[i] = (byte)j;
135       updateModel(i++);
136       buf[i] = (byte)j;
137       updateModel(i++);
138       buf[i] = (byte)j;
139       updateModel(i++);
140       buf[i] = (byte)j;
141       updateModel(i++);
142     }
143   }
144 
makeCopyDecision(int index, int[] bestDist)145   private int makeCopyDecision(int index, int[] bestDist) {
146     int[] dist1 = new int[1];
147     int[] gain1 = new int[1];
148     int[] costPerByte1 = new int[1];
149     int here = index;
150     int len1 = findMatch(index, dist1, gain1, costPerByte1);
151     updateModel(index++);
152     if (gain1[0] > 0) {
153       int[] dist2 = new int[1];
154       int[] gain2 = new int[1];
155       int[] costPerByte2 = new int[1];
156       int len2 = findMatch(index, dist2, gain2, costPerByte2);
157       int symbolCost = symEncoder.writeSymbolCost(buf[here] & 0xff);
158       if (gain2[0] >= gain1[0] && costPerByte1[0] > (costPerByte2[0] * len2 + symbolCost) /
159           (len2 + 1)) {
160         len1 = 0;
161       } else if (len1 > 3) {
162         len2 = findMatch(here + len1, dist2, gain2, costPerByte2);
163         if (len2 >= 2) {
164           int[] dist3 = new int[1];
165           int[] gain3 = new int[1];
166           int[] costPerByte3 = new int[1];
167           int len3 = findMatch(here + len1 - 1, dist3, gain3, costPerByte3);
168           if (len3 > len2 && costPerByte3[0] < costPerByte2[0]) {
169             int distRanges = getNumDistRanges(dist1[0] + 1);
170             int lenBitCount = encodeLengthCost(len1 - 1, dist1[0] + 1, distRanges);
171             int distBitCount = encodeDistance2Cost(dist1[0] + 1, distRanges);
172             int cost1B = lenBitCount + distBitCount + costPerByte3[0] * len3;
173             int cost1A = costPerByte1[0] * len1 + costPerByte2[0] * len2;
174             if ((cost1A / (len1 + len2)) > (cost1B / (len1 - 1 + len3))) {
175               len1--;
176               dist1[0]++;
177             }
178           }
179         }
180       }
181       if (len1 == 2) {
182         if (here >= 2 && buf[here] == buf[here - 2]) {
183           int dup2Cost = symEncoder.writeSymbolCost(dup2);
184           if (costPerByte1[0] * 2 > dup2Cost + symEncoder.writeSymbolCost(buf[here + 1] & 0xff)) {
185             len1 = 0;
186           }
187         } else if (here >= 1 && here + 1 < buf.length && buf[here + 1] == buf[here - 1]) {
188           int dup2Cost = symEncoder.writeSymbolCost(dup2);
189           if (costPerByte1[0] * 2 > symbolCost + dup2Cost) {
190             len1 = 0;
191           }
192         }
193       }
194     }
195     bestDist[0] = dist1[0];
196     return len1;
197   }
198 
199   // consider refactoring signature to return PotentialMatch object with fields set...
findMatch(int index, int[] distOut, int[] gainOut, int[] costPerByteOut)200   int findMatch(int index, int[] distOut, int[] gainOut, int[] costPerByteOut) {
201     final int maxCostCacheLength = 32;
202     int[] literalCostCache = new int[maxCostCacheLength + 1];
203     int maxIndexMinusIndex = buf.length - index;
204     int bestLength = 0;
205     int bestDist = 0;
206     int bestGain = 0;
207     int bestCopyCost = 0;
208     int maxComputedLength = 0;
209     if (maxIndexMinusIndex > 1) {
210       int pos = ((buf[index] & 0xff) << 8) | (buf[index + 1] & 0xff);
211       HashNode prevNode = null;
212       int hNodeCount = 0;
213       for (HashNode hNode = hashTable[pos]; hNode != null; prevNode = hNode, hNode = hNode.next) {
214         int i = hNode.index;
215         int dist = index - i;
216         hNodeCount++;
217         if (hNodeCount > 256 || dist > maxCopyDist) {
218           if (hashTable[pos] == hNode) {
219             hashTable[pos] = null;
220           } else {
221             prevNode.next = null;
222           }
223           break;
224         }
225         int maxLen = index - i;
226         if (maxIndexMinusIndex < maxLen) {
227           maxLen = maxIndexMinusIndex;
228         }
229         if (maxLen < LEN_MIN) {
230           continue;
231         }
232         i += 2;
233         int length = 2;
234         for (length = 2; length < maxLen && buf[i] == buf[index + length]; length++) {
235           i++;
236         }
237         if (length < LEN_MIN) {
238           continue;
239         }
240         dist = dist - length + 1;
241         if (dist > distMax || (length == 2&& dist >= MAX_2BYTE_DIST)) {
242           continue;
243         }
244         if (length <= bestLength && dist > bestDist) {
245           if (length <= bestLength - 2) {
246             continue;
247           }
248           if (dist > (bestDist << DIST_WIDTH)) {
249             if (length < bestLength || dist > (bestDist << (DIST_WIDTH + 1))) {
250               continue;
251             }
252           }
253         }
254         int literalCost = 0;
255         if (length > maxComputedLength) {
256           int limit = length;
257           if (limit > maxCostCacheLength) limit = maxCostCacheLength;
258           for (i = maxComputedLength; i < limit; i++) {
259             byte c = buf[index + i];
260             literalCostCache[i + 1] = literalCostCache[i] + symEncoder.writeSymbolCost(c & 0xff);
261           }
262           maxComputedLength = limit;
263           if (length > maxCostCacheLength) {
264             literalCost = literalCostCache[maxCostCacheLength];
265             literalCost += literalCost / maxCostCacheLength * (length - maxCostCacheLength);
266           } else {
267             literalCost = literalCostCache[length];
268           }
269         } else {
270           literalCost = literalCostCache[length];
271         }
272         if (literalCost > bestGain) {
273           int distRanges = getNumDistRanges(dist);
274           int copyCost = encodeLengthCost(length, dist, distRanges);
275           if (literalCost - copyCost - (distRanges << 16) > bestGain) {
276             copyCost += encodeDistance2Cost(dist, distRanges);
277             int gain = literalCost - copyCost;
278             if (gain > bestGain) {
279               bestGain = gain;
280               bestLength = length;
281               bestDist = dist;
282               bestCopyCost = copyCost;
283             }
284           }
285         }
286       }
287     }
288     costPerByteOut[0] = bestLength > 0 ? bestCopyCost / bestLength : 0;
289     distOut[0] = bestDist;
290     gainOut[0] = bestGain;
291     return bestLength;
292   }
293 
getNumDistRanges(int dist)294   private int getNumDistRanges(int dist) {
295     int bitsNeeded = HuffmanEncoder.bitsUsed(dist - DIST_MIN);
296     int distRanges = (bitsNeeded + DIST_WIDTH - 1) / DIST_WIDTH;
297     return distRanges;
298   }
299 
encodeLength(int value, int dist, int numDistRanges)300   private void encodeLength(int value, int dist, int numDistRanges) {
301     if (dist >= MAX_2BYTE_DIST) {
302       value -= LEN_MIN3;
303     } else {
304       value -= LEN_MIN;
305     }
306     int bitsUsed = HuffmanEncoder.bitsUsed(value);
307     int i = BIT_RANGE;
308     while (i < bitsUsed) {
309       i += BIT_RANGE;
310     }
311     int mask = 1 << (i - 1);
312     int symbol = bitsUsed > BIT_RANGE ? 2 : 0;
313     if ((value & mask) != 0) {
314       symbol |= 1;
315     }
316     mask >>= 1;
317     symbol <<= 1;
318     if ((value & mask) != 0) {
319       symbol |= 1;
320     }
321     mask >>= 1;
322     symEncoder.writeSymbol(256 + symbol + (numDistRanges - 1) * (1 << LEN_WIDTH));
323     for (i = bitsUsed - BIT_RANGE; i >= 1; i -= BIT_RANGE) {
324       symbol = i > BIT_RANGE ? 2 : 0;
325       if ((value & mask) != 0) {
326         symbol |= 1;
327       }
328       mask >>= 1;
329       symbol <<= 1;
330       if ((value & mask) != 0) {
331         symbol |= 1;
332       }
333       mask >>= 1;
334       lenEncoder.writeSymbol(symbol);
335     }
336   }
337 
encodeLengthCost(int value, int dist, int numDistRanges)338   private int encodeLengthCost(int value, int dist, int numDistRanges) {
339     if (dist >= MAX_2BYTE_DIST) {
340       value -= LEN_MIN3;
341     } else {
342       value -= LEN_MIN;
343     }
344     int bitsUsed = HuffmanEncoder.bitsUsed(value);
345     int i = BIT_RANGE;
346     while (i < bitsUsed) {
347       i += BIT_RANGE;
348     }
349     int mask = 1 << (i - 1);
350     int symbol = bitsUsed > BIT_RANGE ? 2 : 0;
351     if ((value & mask) != 0) {
352       symbol |= 1;
353     }
354     mask >>= 1;
355     symbol <<= 1;
356     if ((value & mask) != 0) {
357       symbol |= 1;
358     }
359     mask >>= 1;
360     int cost = symEncoder.writeSymbolCost(256 + symbol + (numDistRanges - 1) * (1 << LEN_WIDTH));
361     for (i = bitsUsed - BIT_RANGE; i >= 1; i -= BIT_RANGE) {
362       symbol = i > BIT_RANGE ? 2 : 0;
363       if ((value & mask) != 0) {
364         symbol |= 1;
365       }
366       mask >>= 1;
367       symbol <<= 1;
368       if ((value & mask) != 0) {
369         symbol |= 1;
370       }
371       mask >>= 1;
372       cost += lenEncoder.writeSymbolCost(symbol);
373     }
374     return cost;
375   }
376 
encodeDistance2(int value, int distRanges)377   private void encodeDistance2(int value, int distRanges) {
378     value -= DIST_MIN;
379     final int mask = (1 << DIST_WIDTH) - 1;
380     for (int i = (distRanges - 1) * DIST_WIDTH; i >= 0; i -= DIST_WIDTH) {
381       distEncoder.writeSymbol((value >> i) & mask);
382     }
383   }
384 
encodeDistance2Cost(int value, int distRanges)385   private int encodeDistance2Cost(int value, int distRanges) {
386     int cost = 0;
387     value -= DIST_MIN;
388     final int mask = (1 << DIST_WIDTH) - 1;
389     for (int i = (distRanges - 1) * DIST_WIDTH; i >= 0; i -= DIST_WIDTH) {
390       cost += distEncoder.writeSymbolCost((value >> i) & mask);
391     }
392     return cost;
393   }
394 
updateModel(int index)395   private void updateModel(int index) {
396     byte c = buf[index];
397     if (index > 0) {
398       HashNode hNode = new HashNode();
399       byte prevC = buf[index - 1];
400       int pos = ((prevC & 0xff) << 8) | (c & 0xff);
401       hNode.index = index - 1;
402       hNode.next = hashTable[pos];
403       hashTable[pos] = hNode;
404     }
405   }
406 
toByteArray()407   private byte[] toByteArray() {
408     return bits.toByteArray();
409   }
410 
compress(byte[] dataIn)411   public static byte[] compress(byte[] dataIn) {
412     LzcompCompress compressor = new LzcompCompress();
413     compressor.write(dataIn);
414     return compressor.toByteArray();
415   }
416 
getPreloadSize()417   public static int getPreloadSize() {
418     return PRELOAD_SIZE;
419   }
420 }
421