1 /* Copyright 2017 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6
7 #include "compound_dictionary.h"
8
9 #include <brotli/types.h>
10
11 #include "../common/platform.h"
12 #include "memory.h"
13 #include "quality.h"
14
CreatePreparedDictionaryWithParams(MemoryManager * m,const uint8_t * source,size_t source_size,uint32_t bucket_bits,uint32_t slot_bits,uint32_t hash_bits,uint16_t bucket_limit)15 static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
16 const uint8_t* source, size_t source_size, uint32_t bucket_bits,
17 uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
18 /* Step 1: create "bloated" hasher. */
19 uint32_t num_slots = 1u << slot_bits;
20 uint32_t num_buckets = 1u << bucket_bits;
21 uint32_t hash_shift = 64u - bucket_bits;
22 uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
23 uint32_t slot_mask = num_slots - 1;
24 size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
25 (sizeof(uint32_t) << slot_bits) +
26 (sizeof(uint16_t) << bucket_bits) +
27 (sizeof(uint32_t) << bucket_bits) +
28 (sizeof(uint32_t) * source_size);
29 uint8_t* flat = NULL;
30 PreparedDictionary* result = NULL;
31 uint16_t* num = NULL;
32 uint32_t* bucket_heads = NULL;
33 uint32_t* next_bucket = NULL;
34 uint32_t* slot_offsets = NULL;
35 uint16_t* heads = NULL;
36 uint32_t* items = NULL;
37 uint8_t** source_ref = NULL;
38 uint32_t i;
39 uint32_t* slot_size = NULL;
40 uint32_t* slot_limit = NULL;
41 uint32_t total_items = 0;
42 if (slot_bits > 16) return NULL;
43 if (slot_bits > bucket_bits) return NULL;
44 if (bucket_bits - slot_bits >= 16) return NULL;
45
46 flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
47 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
48
49 slot_size = (uint32_t*)flat;
50 slot_limit = (uint32_t*)(&slot_size[num_slots]);
51 num = (uint16_t*)(&slot_limit[num_slots]);
52 bucket_heads = (uint32_t*)(&num[num_buckets]);
53 next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
54 memset(num, 0, num_buckets * sizeof(num[0]));
55
56 /* TODO(eustas): apply custom "store" order. */
57 for (i = 0; i + 7 < source_size; ++i) {
58 const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
59 kPreparedDictionaryHashMul64Long;
60 const uint32_t key = (uint32_t)(h >> hash_shift);
61 uint16_t count = num[key];
62 next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
63 bucket_heads[key] = i;
64 count++;
65 if (count > bucket_limit) count = bucket_limit;
66 num[key] = count;
67 }
68
69 /* Step 2: find slot limits. */
70 for (i = 0; i < num_slots; ++i) {
71 BROTLI_BOOL overflow = BROTLI_FALSE;
72 slot_limit[i] = bucket_limit;
73 while (BROTLI_TRUE) {
74 uint32_t limit = slot_limit[i];
75 size_t j;
76 uint32_t count = 0;
77 overflow = BROTLI_FALSE;
78 for (j = i; j < num_buckets; j += num_slots) {
79 uint32_t size = num[j];
80 /* Last chain may span behind 64K limit; overflow happens only if
81 we are about to use 0xFFFF+ as item offset. */
82 if (count >= 0xFFFF) {
83 overflow = BROTLI_TRUE;
84 break;
85 }
86 if (size > limit) size = limit;
87 count += size;
88 }
89 if (!overflow) {
90 slot_size[i] = count;
91 total_items += count;
92 break;
93 }
94 slot_limit[i]--;
95 }
96 }
97
98 /* Step 3: transfer data to "slim" hasher. */
99 alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
100 (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
101 sizeof(uint8_t*);
102
103 result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
104 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
105 BROTLI_FREE(m, flat);
106 return NULL;
107 }
108 slot_offsets = (uint32_t*)(&result[1]);
109 heads = (uint16_t*)(&slot_offsets[num_slots]);
110 items = (uint32_t*)(&heads[num_buckets]);
111 source_ref = (uint8_t**)(&items[total_items]);
112
113 result->magic = kLeanPreparedDictionaryMagic;
114 result->num_items = total_items;
115 result->source_size = (uint32_t)source_size;
116 result->hash_bits = hash_bits;
117 result->bucket_bits = bucket_bits;
118 result->slot_bits = slot_bits;
119 BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
120
121 total_items = 0;
122 for (i = 0; i < num_slots; ++i) {
123 slot_offsets[i] = total_items;
124 total_items += slot_size[i];
125 slot_size[i] = 0;
126 }
127 for (i = 0; i < num_buckets; ++i) {
128 uint32_t slot = i & slot_mask;
129 uint32_t count = num[i];
130 uint32_t pos;
131 size_t j;
132 size_t cursor = slot_size[slot];
133 if (count > slot_limit[slot]) count = slot_limit[slot];
134 if (count == 0) {
135 heads[i] = 0xFFFF;
136 continue;
137 }
138 heads[i] = (uint16_t)cursor;
139 cursor += slot_offsets[slot];
140 slot_size[slot] += count;
141 pos = bucket_heads[i];
142 for (j = 0; j < count; j++) {
143 items[cursor++] = pos;
144 pos = next_bucket[pos];
145 }
146 items[cursor - 1] |= 0x80000000;
147 }
148
149 BROTLI_FREE(m, flat);
150 return result;
151 }
152
CreatePreparedDictionary(MemoryManager * m,const uint8_t * source,size_t source_size)153 PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
154 const uint8_t* source, size_t source_size) {
155 uint32_t bucket_bits = 17;
156 uint32_t slot_bits = 7;
157 uint32_t hash_bits = 40;
158 uint16_t bucket_limit = 32;
159 size_t volume = 16u << bucket_bits;
160 /* Tune parameters to fit dictionary size. */
161 while (volume < source_size && bucket_bits < 22) {
162 bucket_bits++;
163 slot_bits++;
164 volume <<= 1;
165 }
166 return CreatePreparedDictionaryWithParams(m,
167 source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
168 }
169
DestroyPreparedDictionary(MemoryManager * m,PreparedDictionary * dictionary)170 void DestroyPreparedDictionary(MemoryManager* m,
171 PreparedDictionary* dictionary) {
172 if (!dictionary) return;
173 BROTLI_FREE(m, dictionary);
174 }
175
AttachPreparedDictionary(CompoundDictionary * compound,const PreparedDictionary * dictionary)176 BROTLI_BOOL AttachPreparedDictionary(
177 CompoundDictionary* compound, const PreparedDictionary* dictionary) {
178 size_t length = 0;
179 size_t index = 0;
180
181 if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
182 return BROTLI_FALSE;
183 }
184
185 if (!dictionary) return BROTLI_FALSE;
186
187 length = dictionary->source_size;
188 index = compound->num_chunks;
189 compound->total_size += length;
190 compound->chunks[index] = dictionary;
191 compound->chunk_offsets[index + 1] = compound->total_size;
192 {
193 uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
194 uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
195 uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
196 const void* tail = (void*)&items[dictionary->num_items];
197 if (dictionary->magic == kPreparedDictionaryMagic) {
198 compound->chunk_source[index] = (const uint8_t*)tail;
199 } else {
200 /* dictionary->magic == kLeanPreparedDictionaryMagic */
201 compound->chunk_source[index] =
202 (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
203 }
204 }
205 compound->num_chunks++;
206 return BROTLI_TRUE;
207 }
208