• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2017 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 #include "compound_dictionary.h"
8 
9 #include <brotli/types.h>
10 
11 #include "../common/platform.h"
12 #include "memory.h"
13 #include "quality.h"
14 
CreatePreparedDictionaryWithParams(MemoryManager * m,const uint8_t * source,size_t source_size,uint32_t bucket_bits,uint32_t slot_bits,uint32_t hash_bits,uint16_t bucket_limit)15 static PreparedDictionary* CreatePreparedDictionaryWithParams(MemoryManager* m,
16     const uint8_t* source, size_t source_size, uint32_t bucket_bits,
17     uint32_t slot_bits, uint32_t hash_bits, uint16_t bucket_limit) {
18   /* Step 1: create "bloated" hasher. */
19   uint32_t num_slots = 1u << slot_bits;
20   uint32_t num_buckets = 1u << bucket_bits;
21   uint32_t hash_shift = 64u - bucket_bits;
22   uint64_t hash_mask = (~((uint64_t)0U)) >> (64 - hash_bits);
23   uint32_t slot_mask = num_slots - 1;
24   size_t alloc_size = (sizeof(uint32_t) << slot_bits) +
25       (sizeof(uint32_t) << slot_bits) +
26       (sizeof(uint16_t) << bucket_bits) +
27       (sizeof(uint32_t) << bucket_bits) +
28       (sizeof(uint32_t) * source_size);
29   uint8_t* flat = NULL;
30   PreparedDictionary* result = NULL;
31   uint16_t* num = NULL;
32   uint32_t* bucket_heads = NULL;
33   uint32_t* next_bucket = NULL;
34   uint32_t* slot_offsets = NULL;
35   uint16_t* heads = NULL;
36   uint32_t* items = NULL;
37   uint8_t** source_ref = NULL;
38   uint32_t i;
39   uint32_t* slot_size = NULL;
40   uint32_t* slot_limit = NULL;
41   uint32_t total_items = 0;
42   if (slot_bits > 16) return NULL;
43   if (slot_bits > bucket_bits) return NULL;
44   if (bucket_bits - slot_bits >= 16) return NULL;
45 
46   flat = BROTLI_ALLOC(m, uint8_t, alloc_size);
47   if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(flat)) return NULL;
48 
49   slot_size = (uint32_t*)flat;
50   slot_limit = (uint32_t*)(&slot_size[num_slots]);
51   num = (uint16_t*)(&slot_limit[num_slots]);
52   bucket_heads = (uint32_t*)(&num[num_buckets]);
53   next_bucket = (uint32_t*)(&bucket_heads[num_buckets]);
54   memset(num, 0, num_buckets * sizeof(num[0]));
55 
56   /* TODO(eustas): apply custom "store" order. */
57   for (i = 0; i + 7 < source_size; ++i) {
58     const uint64_t h = (BROTLI_UNALIGNED_LOAD64LE(&source[i]) & hash_mask) *
59         kPreparedDictionaryHashMul64Long;
60     const uint32_t key = (uint32_t)(h >> hash_shift);
61     uint16_t count = num[key];
62     next_bucket[i] = (count == 0) ? ((uint32_t)(-1)) : bucket_heads[key];
63     bucket_heads[key] = i;
64     count++;
65     if (count > bucket_limit) count = bucket_limit;
66     num[key] = count;
67   }
68 
69   /* Step 2: find slot limits. */
70   for (i = 0; i < num_slots; ++i) {
71     BROTLI_BOOL overflow = BROTLI_FALSE;
72     slot_limit[i] = bucket_limit;
73     while (BROTLI_TRUE) {
74       uint32_t limit = slot_limit[i];
75       size_t j;
76       uint32_t count = 0;
77       overflow = BROTLI_FALSE;
78       for (j = i; j < num_buckets; j += num_slots) {
79         uint32_t size = num[j];
80         /* Last chain may span behind 64K limit; overflow happens only if
81            we are about to use 0xFFFF+ as item offset. */
82         if (count >= 0xFFFF) {
83           overflow = BROTLI_TRUE;
84           break;
85         }
86         if (size > limit) size = limit;
87         count += size;
88       }
89       if (!overflow) {
90         slot_size[i] = count;
91         total_items += count;
92         break;
93       }
94       slot_limit[i]--;
95     }
96   }
97 
98   /* Step 3: transfer data to "slim" hasher. */
99   alloc_size = sizeof(PreparedDictionary) + (sizeof(uint32_t) << slot_bits) +
100       (sizeof(uint16_t) << bucket_bits) + (sizeof(uint32_t) * total_items) +
101       sizeof(uint8_t*);
102 
103   result = (PreparedDictionary*)BROTLI_ALLOC(m, uint8_t, alloc_size);
104   if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(result)) {
105     BROTLI_FREE(m, flat);
106     return NULL;
107   }
108   slot_offsets = (uint32_t*)(&result[1]);
109   heads = (uint16_t*)(&slot_offsets[num_slots]);
110   items = (uint32_t*)(&heads[num_buckets]);
111   source_ref = (uint8_t**)(&items[total_items]);
112 
113   result->magic = kLeanPreparedDictionaryMagic;
114   result->num_items = total_items;
115   result->source_size = (uint32_t)source_size;
116   result->hash_bits = hash_bits;
117   result->bucket_bits = bucket_bits;
118   result->slot_bits = slot_bits;
119   BROTLI_UNALIGNED_STORE_PTR(source_ref, source);
120 
121   total_items = 0;
122   for (i = 0; i < num_slots; ++i) {
123     slot_offsets[i] = total_items;
124     total_items += slot_size[i];
125     slot_size[i] = 0;
126   }
127   for (i = 0; i < num_buckets; ++i) {
128     uint32_t slot = i & slot_mask;
129     uint32_t count = num[i];
130     uint32_t pos;
131     size_t j;
132     size_t cursor = slot_size[slot];
133     if (count > slot_limit[slot]) count = slot_limit[slot];
134     if (count == 0) {
135       heads[i] = 0xFFFF;
136       continue;
137     }
138     heads[i] = (uint16_t)cursor;
139     cursor += slot_offsets[slot];
140     slot_size[slot] += count;
141     pos = bucket_heads[i];
142     for (j = 0; j < count; j++) {
143       items[cursor++] = pos;
144       pos = next_bucket[pos];
145     }
146     items[cursor - 1] |= 0x80000000;
147   }
148 
149   BROTLI_FREE(m, flat);
150   return result;
151 }
152 
CreatePreparedDictionary(MemoryManager * m,const uint8_t * source,size_t source_size)153 PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
154     const uint8_t* source, size_t source_size) {
155   uint32_t bucket_bits = 17;
156   uint32_t slot_bits = 7;
157   uint32_t hash_bits = 40;
158   uint16_t bucket_limit = 32;
159   size_t volume = 16u << bucket_bits;
160   /* Tune parameters to fit dictionary size. */
161   while (volume < source_size && bucket_bits < 22) {
162     bucket_bits++;
163     slot_bits++;
164     volume <<= 1;
165   }
166   return CreatePreparedDictionaryWithParams(m,
167       source, source_size, bucket_bits, slot_bits, hash_bits, bucket_limit);
168 }
169 
DestroyPreparedDictionary(MemoryManager * m,PreparedDictionary * dictionary)170 void DestroyPreparedDictionary(MemoryManager* m,
171     PreparedDictionary* dictionary) {
172   if (!dictionary) return;
173   BROTLI_FREE(m, dictionary);
174 }
175 
AttachPreparedDictionary(CompoundDictionary * compound,const PreparedDictionary * dictionary)176 BROTLI_BOOL AttachPreparedDictionary(
177     CompoundDictionary* compound, const PreparedDictionary* dictionary) {
178   size_t length = 0;
179   size_t index = 0;
180 
181   if (compound->num_chunks == SHARED_BROTLI_MAX_COMPOUND_DICTS) {
182     return BROTLI_FALSE;
183   }
184 
185   if (!dictionary) return BROTLI_FALSE;
186 
187   length = dictionary->source_size;
188   index = compound->num_chunks;
189   compound->total_size += length;
190   compound->chunks[index] = dictionary;
191   compound->chunk_offsets[index + 1] = compound->total_size;
192   {
193     uint32_t* slot_offsets = (uint32_t*)(&dictionary[1]);
194     uint16_t* heads = (uint16_t*)(&slot_offsets[1u << dictionary->slot_bits]);
195     uint32_t* items = (uint32_t*)(&heads[1u << dictionary->bucket_bits]);
196     const void* tail = (void*)&items[dictionary->num_items];
197     if (dictionary->magic == kPreparedDictionaryMagic) {
198       compound->chunk_source[index] = (const uint8_t*)tail;
199     } else {
200       /* dictionary->magic == kLeanPreparedDictionaryMagic */
201       compound->chunk_source[index] =
202           (const uint8_t*)BROTLI_UNALIGNED_LOAD_PTR((const uint8_t**)tail);
203     }
204   }
205   compound->num_chunks++;
206   return BROTLI_TRUE;
207 }
208