1 /* Copyright 2017 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5 */ 6 7 package org.brotli.enc; 8 9 import java.nio.Buffer; 10 import java.nio.ByteBuffer; 11 import java.nio.ByteOrder; 12 import java.nio.IntBuffer; 13 import java.nio.ShortBuffer; 14 15 /** 16 * Java prepared (raw) dictionary producer. 17 */ 18 public class PreparedDictionaryGenerator { 19 20 private static final int MAGIC = 0xDEBCEDE0; 21 private static final long HASH_MULTIPLIER = 0x1fe35a7bd3579bd3L; 22 23 private static class PreparedDictionaryImpl implements PreparedDictionary { 24 private final ByteBuffer data; 25 PreparedDictionaryImpl(ByteBuffer data)26 private PreparedDictionaryImpl(ByteBuffer data) { 27 this.data = data; 28 } 29 30 @Override getData()31 public ByteBuffer getData() { 32 return data; 33 } 34 } 35 36 // Disallow instantiation. PreparedDictionaryGenerator()37 private PreparedDictionaryGenerator() { } 38 generate(ByteBuffer src)39 public static PreparedDictionary generate(ByteBuffer src) { 40 return generate(src, 17, 3, 40, 5); 41 } 42 generate(ByteBuffer src, int bucketBits, int slotBits, int hashBits, int blockBits)43 public static PreparedDictionary generate(ByteBuffer src, 44 int bucketBits, int slotBits, int hashBits, int blockBits) { 45 ((Buffer) src).clear(); // Just in case... 46 if (blockBits > 12) { 47 throw new IllegalArgumentException("blockBits is too big"); 48 } 49 if (bucketBits >= 24) { 50 throw new IllegalArgumentException("bucketBits is too big"); 51 } 52 if (bucketBits - slotBits >= 16) { 53 throw new IllegalArgumentException("slotBits is too small"); 54 } 55 int bucketLimit = 1 << blockBits; 56 int numBuckets = 1 << bucketBits; 57 int numSlots = 1 << slotBits; 58 int slotMask = numSlots - 1; 59 int hashShift = 64 - bucketBits; 60 long hashMask = (~0L) >>> (64 - hashBits); 61 int sourceSize = src.capacity(); 62 if (sourceSize < 8) { 63 throw new IllegalArgumentException("src is too short"); 64 } 65 66 /* Step 1: create "bloated" hasher. */ 67 short[] num = new short[numBuckets]; 68 int[] bucketHeads = new int[numBuckets]; 69 int[] nextBucket = new int[sourceSize]; 70 71 long accumulator = 0; 72 for (int i = 0; i < 7; ++i) { 73 accumulator |= (src.get(i) & 0xFFL) << (8 * i); 74 } 75 accumulator <<= 8; 76 /* TODO: apply custom "store" order. */ 77 for (int i = 0; i + 7 < sourceSize; ++i) { 78 accumulator = (accumulator >>> 8) | ((src.get(i + 7) & 0xFFL) << 56); 79 long h = (accumulator & hashMask) * HASH_MULTIPLIER; 80 int key = (int) (h >>> hashShift); 81 int count = num[key]; 82 nextBucket[i] = (count == 0) ? -1 : bucketHeads[key]; 83 bucketHeads[key] = i; 84 count++; 85 if (count > bucketLimit) { 86 count = bucketLimit; 87 } 88 num[key] = (short) count; 89 } 90 91 /* Step 2: find slot limits. */ 92 int[] slotLimit = new int[numSlots]; 93 int[] slotSize = new int[numSlots]; 94 int totalItems = 0; 95 for (int i = 0; i < numSlots; ++i) { 96 boolean overflow = false; 97 slotLimit[i] = bucketLimit; 98 while (true) { 99 overflow = false; 100 int limit = slotLimit[i]; 101 int count = 0; 102 for (int j = i; j < numBuckets; j += numSlots) { 103 int size = num[j]; 104 /* Last chain may span behind 64K limit; overflow happens only if 105 we are about to use 0xFFFF+ as item offset. */ 106 if (count >= 0xFFFF) { 107 overflow = true; 108 break; 109 } 110 if (size > limit) { 111 size = limit; 112 } 113 count += size; 114 } 115 if (!overflow) { 116 slotSize[i] = count; 117 totalItems += count; 118 break; 119 } 120 slotLimit[i]--; 121 } 122 } 123 124 /* Step 3: transfer data to "slim" hasher. */ 125 int part0 = 6 * 4; 126 int part1 = numSlots * 4; 127 int part2 = numBuckets * 2; 128 int part3 = totalItems * 4; 129 int allocSize = part0 + part1 + part2 + part3 + sourceSize; 130 ByteBuffer flat = ByteBuffer.allocateDirect(allocSize); 131 ByteBuffer pointer = flat.slice(); 132 pointer.order(ByteOrder.nativeOrder()); 133 134 IntBuffer struct = pointer.asIntBuffer(); 135 pointer.position(pointer.position() + part0); 136 IntBuffer slotOffsets = pointer.asIntBuffer(); 137 pointer.position(pointer.position() + part1); 138 ShortBuffer heads = pointer.asShortBuffer(); 139 pointer.position(pointer.position() + part2); 140 IntBuffer items = pointer.asIntBuffer(); 141 pointer.position(pointer.position() + part3); 142 ByteBuffer sourceCopy = pointer.slice(); 143 144 /* magic */ struct.put(0, MAGIC); 145 /* source_offset */ struct.put(1, totalItems); 146 /* source_size */ struct.put(2, sourceSize); 147 /* hash_bits */ struct.put(3, hashBits); 148 /* bucket_bits */ struct.put(4, bucketBits); 149 /* slot_bits */ struct.put(5, slotBits); 150 151 totalItems = 0; 152 for (int i = 0; i < numSlots; ++i) { 153 slotOffsets.put(i, totalItems); 154 totalItems += slotSize[i]; 155 slotSize[i] = 0; 156 } 157 158 for (int i = 0; i < numBuckets; ++i) { 159 int slot = i & slotMask; 160 int count = num[i]; 161 if (count > slotLimit[slot]) { 162 count = slotLimit[slot]; 163 } 164 if (count == 0) { 165 heads.put(i, (short) 0xFFFF); 166 continue; 167 } 168 int cursor = slotSize[slot]; 169 heads.put(i, (short) cursor); 170 cursor += slotOffsets.get(slot); 171 slotSize[slot] += count; 172 int pos = bucketHeads[i]; 173 for (int j = 0; j < count; j++) { 174 items.put(cursor++, pos); 175 pos = nextBucket[pos]; 176 } 177 cursor--; 178 items.put(cursor, items.get(cursor) | 0x80000000); 179 } 180 181 sourceCopy.put(src); 182 183 return new PreparedDictionaryImpl(flat); 184 } 185 } 186