1 /* 2 * Copyright 2011 Google Inc. All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.google.typography.font.tools.conversion.eot; 18 19 /** 20 * Implement LZCOMP compression algorithm as defined in MicroType Express, part of the EOT 21 * draft spec at {@link "http://www.w3.org/Submission/MTX/"} 22 * 23 * Java implementation based on http://www.w3.org/Submission/MTX/ reference code 24 * 25 * @author Raph Levien 26 */ 27 public class LzcompCompress { 28 29 private static final int MAX_2BYTE_DIST = 512; 30 private static final int DIST_MIN = 1; 31 private static final int DIST_WIDTH = 3; 32 private static final int LEN_MIN = 2; 33 private static final int LEN_MIN3 = 3; 34 private static final int LEN_WIDTH = 3; 35 private static final int BIT_RANGE = LEN_WIDTH - 1; 36 private static final int PRELOAD_SIZE = 2 * 32 * 96 + 4 * 256; 37 private static final int DEFAULT_MAX_COPY_DIST = 0x7fffffff; 38 39 private BitIOWriter bits; 40 private boolean usingRunLength; 41 private int length1; 42 private int maxCopyDist = DEFAULT_MAX_COPY_DIST; 43 private HuffmanEncoder distEncoder; 44 private HuffmanEncoder lenEncoder; 45 private HuffmanEncoder symEncoder; 46 private int numDistRanges; 47 private int distMax; 48 private int dup2; 49 private int dup4; 50 private int dup6; 51 private int numSyms; 52 private byte[] buf; 53 private HashNode[] hashTable; 54 55 private static class HashNode { 56 int index; 57 HashNode next; 58 } 59 LzcompCompress()60 private LzcompCompress() { 61 bits = new BitIOWriter(); 62 usingRunLength = false; 63 } 64 write(byte[] dataIn)65 private void write(byte[] dataIn) { 66 bits.writeBit(usingRunLength); 67 length1 = dataIn.length; 68 // If we want to do runlengths, here's the place, but I'm not convinced it's useful 69 setDistRange(length1); 70 distEncoder = new HuffmanEncoder(bits, 1 << DIST_WIDTH); 71 lenEncoder = new HuffmanEncoder(bits, 1 << LEN_WIDTH); 72 symEncoder = new HuffmanEncoder(bits, numSyms); 73 buf = new byte[PRELOAD_SIZE + length1]; 74 System.arraycopy(dataIn, 0, buf, PRELOAD_SIZE, length1); 75 encode(); 76 bits.flush(); 77 } 78 setDistRange(int length)79 void setDistRange(int length) { 80 numDistRanges = 1; 81 distMax = DIST_MIN + (1 << (DIST_WIDTH * numDistRanges)) - 1; 82 while (distMax < length1) { 83 numDistRanges++; 84 distMax = DIST_MIN + (1 << (DIST_WIDTH * numDistRanges)) - 1; 85 } 86 dup2 = 256 + (1 << LEN_WIDTH) * numDistRanges; 87 dup4 = dup2 + 1; 88 dup6 = dup4 + 1; 89 numSyms = dup6 + 1; 90 } 91 encode()92 private void encode() { 93 int maxIndex = length1 + PRELOAD_SIZE; 94 initializeModel(); 95 bits.writeValue(length1, 24); 96 int limit = length1 + PRELOAD_SIZE; 97 int[] dist = new int[1]; 98 for (int i = PRELOAD_SIZE; i < limit; ) { 99 int here = i; 100 int len = makeCopyDecision(i++, dist); 101 if (len > 0) { 102 int distRanges = getNumDistRanges(dist[0]); 103 encodeLength(len, dist[0], distRanges); 104 encodeDistance2(dist[0], distRanges); 105 for (int j = 1; j < len; j++) { 106 updateModel(i++); 107 } 108 } else { 109 byte c = buf[here]; 110 if (here >= 2 && c == buf[here - 2]) { 111 symEncoder.writeSymbol(dup2); 112 } else if (here >= 4 && c == buf[here - 4]) { 113 symEncoder.writeSymbol(dup4); 114 } else if (here >= 6 && c == buf[here - 6]) { 115 symEncoder.writeSymbol(dup6); 116 } else { 117 symEncoder.writeSymbol(buf[here] & 0xff); 118 } 119 } 120 } 121 } initializeModel()122 void initializeModel() { 123 hashTable = new HashNode[0x10000]; 124 int i = 0; 125 for (int k = 0; k < 32; k++) { 126 for (int j = 0; j < 96; j++) { 127 buf[i] = (byte)k; 128 updateModel(i++); 129 buf[i]= (byte)j; 130 updateModel(i++); 131 } 132 } 133 for (int j = 0; i < PRELOAD_SIZE && j < 256; j++) { 134 buf[i] = (byte)j; 135 updateModel(i++); 136 buf[i] = (byte)j; 137 updateModel(i++); 138 buf[i] = (byte)j; 139 updateModel(i++); 140 buf[i] = (byte)j; 141 updateModel(i++); 142 } 143 } 144 makeCopyDecision(int index, int[] bestDist)145 private int makeCopyDecision(int index, int[] bestDist) { 146 int[] dist1 = new int[1]; 147 int[] gain1 = new int[1]; 148 int[] costPerByte1 = new int[1]; 149 int here = index; 150 int len1 = findMatch(index, dist1, gain1, costPerByte1); 151 updateModel(index++); 152 if (gain1[0] > 0) { 153 int[] dist2 = new int[1]; 154 int[] gain2 = new int[1]; 155 int[] costPerByte2 = new int[1]; 156 int len2 = findMatch(index, dist2, gain2, costPerByte2); 157 int symbolCost = symEncoder.writeSymbolCost(buf[here] & 0xff); 158 if (gain2[0] >= gain1[0] && costPerByte1[0] > (costPerByte2[0] * len2 + symbolCost) / 159 (len2 + 1)) { 160 len1 = 0; 161 } else if (len1 > 3) { 162 len2 = findMatch(here + len1, dist2, gain2, costPerByte2); 163 if (len2 >= 2) { 164 int[] dist3 = new int[1]; 165 int[] gain3 = new int[1]; 166 int[] costPerByte3 = new int[1]; 167 int len3 = findMatch(here + len1 - 1, dist3, gain3, costPerByte3); 168 if (len3 > len2 && costPerByte3[0] < costPerByte2[0]) { 169 int distRanges = getNumDistRanges(dist1[0] + 1); 170 int lenBitCount = encodeLengthCost(len1 - 1, dist1[0] + 1, distRanges); 171 int distBitCount = encodeDistance2Cost(dist1[0] + 1, distRanges); 172 int cost1B = lenBitCount + distBitCount + costPerByte3[0] * len3; 173 int cost1A = costPerByte1[0] * len1 + costPerByte2[0] * len2; 174 if ((cost1A / (len1 + len2)) > (cost1B / (len1 - 1 + len3))) { 175 len1--; 176 dist1[0]++; 177 } 178 } 179 } 180 } 181 if (len1 == 2) { 182 if (here >= 2 && buf[here] == buf[here - 2]) { 183 int dup2Cost = symEncoder.writeSymbolCost(dup2); 184 if (costPerByte1[0] * 2 > dup2Cost + symEncoder.writeSymbolCost(buf[here + 1] & 0xff)) { 185 len1 = 0; 186 } 187 } else if (here >= 1 && here + 1 < buf.length && buf[here + 1] == buf[here - 1]) { 188 int dup2Cost = symEncoder.writeSymbolCost(dup2); 189 if (costPerByte1[0] * 2 > symbolCost + dup2Cost) { 190 len1 = 0; 191 } 192 } 193 } 194 } 195 bestDist[0] = dist1[0]; 196 return len1; 197 } 198 199 // consider refactoring signature to return PotentialMatch object with fields set... findMatch(int index, int[] distOut, int[] gainOut, int[] costPerByteOut)200 int findMatch(int index, int[] distOut, int[] gainOut, int[] costPerByteOut) { 201 final int maxCostCacheLength = 32; 202 int[] literalCostCache = new int[maxCostCacheLength + 1]; 203 int maxIndexMinusIndex = buf.length - index; 204 int bestLength = 0; 205 int bestDist = 0; 206 int bestGain = 0; 207 int bestCopyCost = 0; 208 int maxComputedLength = 0; 209 if (maxIndexMinusIndex > 1) { 210 int pos = ((buf[index] & 0xff) << 8) | (buf[index + 1] & 0xff); 211 HashNode prevNode = null; 212 int hNodeCount = 0; 213 for (HashNode hNode = hashTable[pos]; hNode != null; prevNode = hNode, hNode = hNode.next) { 214 int i = hNode.index; 215 int dist = index - i; 216 hNodeCount++; 217 if (hNodeCount > 256 || dist > maxCopyDist) { 218 if (hashTable[pos] == hNode) { 219 hashTable[pos] = null; 220 } else { 221 prevNode.next = null; 222 } 223 break; 224 } 225 int maxLen = index - i; 226 if (maxIndexMinusIndex < maxLen) { 227 maxLen = maxIndexMinusIndex; 228 } 229 if (maxLen < LEN_MIN) { 230 continue; 231 } 232 i += 2; 233 int length = 2; 234 for (length = 2; length < maxLen && buf[i] == buf[index + length]; length++) { 235 i++; 236 } 237 if (length < LEN_MIN) { 238 continue; 239 } 240 dist = dist - length + 1; 241 if (dist > distMax || (length == 2&& dist >= MAX_2BYTE_DIST)) { 242 continue; 243 } 244 if (length <= bestLength && dist > bestDist) { 245 if (length <= bestLength - 2) { 246 continue; 247 } 248 if (dist > (bestDist << DIST_WIDTH)) { 249 if (length < bestLength || dist > (bestDist << (DIST_WIDTH + 1))) { 250 continue; 251 } 252 } 253 } 254 int literalCost = 0; 255 if (length > maxComputedLength) { 256 int limit = length; 257 if (limit > maxCostCacheLength) limit = maxCostCacheLength; 258 for (i = maxComputedLength; i < limit; i++) { 259 byte c = buf[index + i]; 260 literalCostCache[i + 1] = literalCostCache[i] + symEncoder.writeSymbolCost(c & 0xff); 261 } 262 maxComputedLength = limit; 263 if (length > maxCostCacheLength) { 264 literalCost = literalCostCache[maxCostCacheLength]; 265 literalCost += literalCost / maxCostCacheLength * (length - maxCostCacheLength); 266 } else { 267 literalCost = literalCostCache[length]; 268 } 269 } else { 270 literalCost = literalCostCache[length]; 271 } 272 if (literalCost > bestGain) { 273 int distRanges = getNumDistRanges(dist); 274 int copyCost = encodeLengthCost(length, dist, distRanges); 275 if (literalCost - copyCost - (distRanges << 16) > bestGain) { 276 copyCost += encodeDistance2Cost(dist, distRanges); 277 int gain = literalCost - copyCost; 278 if (gain > bestGain) { 279 bestGain = gain; 280 bestLength = length; 281 bestDist = dist; 282 bestCopyCost = copyCost; 283 } 284 } 285 } 286 } 287 } 288 costPerByteOut[0] = bestLength > 0 ? bestCopyCost / bestLength : 0; 289 distOut[0] = bestDist; 290 gainOut[0] = bestGain; 291 return bestLength; 292 } 293 getNumDistRanges(int dist)294 private int getNumDistRanges(int dist) { 295 int bitsNeeded = HuffmanEncoder.bitsUsed(dist - DIST_MIN); 296 int distRanges = (bitsNeeded + DIST_WIDTH - 1) / DIST_WIDTH; 297 return distRanges; 298 } 299 encodeLength(int value, int dist, int numDistRanges)300 private void encodeLength(int value, int dist, int numDistRanges) { 301 if (dist >= MAX_2BYTE_DIST) { 302 value -= LEN_MIN3; 303 } else { 304 value -= LEN_MIN; 305 } 306 int bitsUsed = HuffmanEncoder.bitsUsed(value); 307 int i = BIT_RANGE; 308 while (i < bitsUsed) { 309 i += BIT_RANGE; 310 } 311 int mask = 1 << (i - 1); 312 int symbol = bitsUsed > BIT_RANGE ? 2 : 0; 313 if ((value & mask) != 0) { 314 symbol |= 1; 315 } 316 mask >>= 1; 317 symbol <<= 1; 318 if ((value & mask) != 0) { 319 symbol |= 1; 320 } 321 mask >>= 1; 322 symEncoder.writeSymbol(256 + symbol + (numDistRanges - 1) * (1 << LEN_WIDTH)); 323 for (i = bitsUsed - BIT_RANGE; i >= 1; i -= BIT_RANGE) { 324 symbol = i > BIT_RANGE ? 2 : 0; 325 if ((value & mask) != 0) { 326 symbol |= 1; 327 } 328 mask >>= 1; 329 symbol <<= 1; 330 if ((value & mask) != 0) { 331 symbol |= 1; 332 } 333 mask >>= 1; 334 lenEncoder.writeSymbol(symbol); 335 } 336 } 337 encodeLengthCost(int value, int dist, int numDistRanges)338 private int encodeLengthCost(int value, int dist, int numDistRanges) { 339 if (dist >= MAX_2BYTE_DIST) { 340 value -= LEN_MIN3; 341 } else { 342 value -= LEN_MIN; 343 } 344 int bitsUsed = HuffmanEncoder.bitsUsed(value); 345 int i = BIT_RANGE; 346 while (i < bitsUsed) { 347 i += BIT_RANGE; 348 } 349 int mask = 1 << (i - 1); 350 int symbol = bitsUsed > BIT_RANGE ? 2 : 0; 351 if ((value & mask) != 0) { 352 symbol |= 1; 353 } 354 mask >>= 1; 355 symbol <<= 1; 356 if ((value & mask) != 0) { 357 symbol |= 1; 358 } 359 mask >>= 1; 360 int cost = symEncoder.writeSymbolCost(256 + symbol + (numDistRanges - 1) * (1 << LEN_WIDTH)); 361 for (i = bitsUsed - BIT_RANGE; i >= 1; i -= BIT_RANGE) { 362 symbol = i > BIT_RANGE ? 2 : 0; 363 if ((value & mask) != 0) { 364 symbol |= 1; 365 } 366 mask >>= 1; 367 symbol <<= 1; 368 if ((value & mask) != 0) { 369 symbol |= 1; 370 } 371 mask >>= 1; 372 cost += lenEncoder.writeSymbolCost(symbol); 373 } 374 return cost; 375 } 376 encodeDistance2(int value, int distRanges)377 private void encodeDistance2(int value, int distRanges) { 378 value -= DIST_MIN; 379 final int mask = (1 << DIST_WIDTH) - 1; 380 for (int i = (distRanges - 1) * DIST_WIDTH; i >= 0; i -= DIST_WIDTH) { 381 distEncoder.writeSymbol((value >> i) & mask); 382 } 383 } 384 encodeDistance2Cost(int value, int distRanges)385 private int encodeDistance2Cost(int value, int distRanges) { 386 int cost = 0; 387 value -= DIST_MIN; 388 final int mask = (1 << DIST_WIDTH) - 1; 389 for (int i = (distRanges - 1) * DIST_WIDTH; i >= 0; i -= DIST_WIDTH) { 390 cost += distEncoder.writeSymbolCost((value >> i) & mask); 391 } 392 return cost; 393 } 394 updateModel(int index)395 private void updateModel(int index) { 396 byte c = buf[index]; 397 if (index > 0) { 398 HashNode hNode = new HashNode(); 399 byte prevC = buf[index - 1]; 400 int pos = ((prevC & 0xff) << 8) | (c & 0xff); 401 hNode.index = index - 1; 402 hNode.next = hashTable[pos]; 403 hashTable[pos] = hNode; 404 } 405 } 406 toByteArray()407 private byte[] toByteArray() { 408 return bits.toByteArray(); 409 } 410 compress(byte[] dataIn)411 public static byte[] compress(byte[] dataIn) { 412 LzcompCompress compressor = new LzcompCompress(); 413 compressor.write(dataIn); 414 return compressor.toByteArray(); 415 } 416 getPreloadSize()417 public static int getPreloadSize() { 418 return PRELOAD_SIZE; 419 } 420 } 421