• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2013 Google Inc. All Rights Reserved.
2 
3    Distributed under MIT license.
4    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 */
6 
7 /* Block split point selection utilities. */
8 
9 #include "block_splitter.h"
10 
11 #include <string.h>  /* memcpy, memset */
12 
13 #include "../common/platform.h"
14 #include "bit_cost.h"
15 #include "cluster.h"
16 #include "command.h"
17 #include "fast_log.h"
18 #include "histogram.h"
19 #include "memory.h"
20 #include "quality.h"
21 
22 #if defined(__cplusplus) || defined(c_plusplus)
23 extern "C" {
24 #endif
25 
26 static const size_t kMaxLiteralHistograms = 100;
27 static const size_t kMaxCommandHistograms = 50;
28 static const double kLiteralBlockSwitchCost = 28.1;
29 static const double kCommandBlockSwitchCost = 13.5;
30 static const double kDistanceBlockSwitchCost = 14.6;
31 static const size_t kLiteralStrideLength = 70;
32 static const size_t kCommandStrideLength = 40;
33 static const size_t kDistanceStrideLength = 40;
34 static const size_t kSymbolsPerLiteralHistogram = 544;
35 static const size_t kSymbolsPerCommandHistogram = 530;
36 static const size_t kSymbolsPerDistanceHistogram = 544;
37 static const size_t kMinLengthForBlockSplitting = 128;
38 static const size_t kIterMulForRefining = 2;
39 static const size_t kMinItersForRefining = 100;
40 
CountLiterals(const Command * cmds,const size_t num_commands)41 static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
42   /* Count how many we have. */
43   size_t total_length = 0;
44   size_t i;
45   for (i = 0; i < num_commands; ++i) {
46     total_length += cmds[i].insert_len_;
47   }
48   return total_length;
49 }
50 
CopyLiteralsToByteArray(const Command * cmds,const size_t num_commands,const uint8_t * data,const size_t offset,const size_t mask,uint8_t * literals)51 static void CopyLiteralsToByteArray(const Command* cmds,
52                                     const size_t num_commands,
53                                     const uint8_t* data,
54                                     const size_t offset,
55                                     const size_t mask,
56                                     uint8_t* literals) {
57   size_t pos = 0;
58   size_t from_pos = offset & mask;
59   size_t i;
60   for (i = 0; i < num_commands; ++i) {
61     size_t insert_len = cmds[i].insert_len_;
62     if (from_pos + insert_len > mask) {
63       size_t head_size = mask + 1 - from_pos;
64       memcpy(literals + pos, data + from_pos, head_size);
65       from_pos = 0;
66       pos += head_size;
67       insert_len -= head_size;
68     }
69     if (insert_len > 0) {
70       memcpy(literals + pos, data + from_pos, insert_len);
71       pos += insert_len;
72     }
73     from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
74   }
75 }
76 
MyRand(uint32_t * seed)77 static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
78   /* Initial seed should be 7. In this case, loop length is (1 << 29). */
79   *seed *= 16807U;
80   return *seed;
81 }
82 
BitCost(size_t count)83 static BROTLI_INLINE double BitCost(size_t count) {
84   return count == 0 ? -2.0 : FastLog2(count);
85 }
86 
87 #define HISTOGRAMS_PER_BATCH 64
88 #define CLUSTERS_PER_BATCH 16
89 
90 #define FN(X) X ## Literal
91 #define DataType uint8_t
92 /* NOLINTNEXTLINE(build/include) */
93 #include "block_splitter_inc.h"
94 #undef DataType
95 #undef FN
96 
97 #define FN(X) X ## Command
98 #define DataType uint16_t
99 /* NOLINTNEXTLINE(build/include) */
100 #include "block_splitter_inc.h"
101 #undef FN
102 
103 #define FN(X) X ## Distance
104 /* NOLINTNEXTLINE(build/include) */
105 #include "block_splitter_inc.h"
106 #undef DataType
107 #undef FN
108 
BrotliInitBlockSplit(BlockSplit * self)109 void BrotliInitBlockSplit(BlockSplit* self) {
110   self->num_types = 0;
111   self->num_blocks = 0;
112   self->types = 0;
113   self->lengths = 0;
114   self->types_alloc_size = 0;
115   self->lengths_alloc_size = 0;
116 }
117 
BrotliDestroyBlockSplit(MemoryManager * m,BlockSplit * self)118 void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
119   BROTLI_FREE(m, self->types);
120   BROTLI_FREE(m, self->lengths);
121 }
122 
123 /* Extracts literals, command distance and prefix codes, then applies
124  * SplitByteVector to create partitioning. */
BrotliSplitBlock(MemoryManager * m,const Command * cmds,const size_t num_commands,const uint8_t * data,const size_t pos,const size_t mask,const BrotliEncoderParams * params,BlockSplit * literal_split,BlockSplit * insert_and_copy_split,BlockSplit * dist_split)125 void BrotliSplitBlock(MemoryManager* m,
126                       const Command* cmds,
127                       const size_t num_commands,
128                       const uint8_t* data,
129                       const size_t pos,
130                       const size_t mask,
131                       const BrotliEncoderParams* params,
132                       BlockSplit* literal_split,
133                       BlockSplit* insert_and_copy_split,
134                       BlockSplit* dist_split) {
135   {
136     size_t literals_count = CountLiterals(cmds, num_commands);
137     uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
138     if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(literals)) return;
139     /* Create a continuous array of literals. */
140     CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
141     /* Create the block split on the array of literals.
142      * Literal histograms can have alphabet size up to 256.
143      * Though, to accomodate context modeling, less than half of maximum size
144      * is allowed. */
145     SplitByteVectorLiteral(
146         m, literals, literals_count,
147         kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
148         kLiteralStrideLength, kLiteralBlockSwitchCost, params,
149         literal_split);
150     if (BROTLI_IS_OOM(m)) return;
151     BROTLI_FREE(m, literals);
152     /* NB: this might be a good place for injecting extra splitting without
153      *     increasing encoder complexity; however, output parition would be less
154      *     optimal than one produced with forced splitting inside
155      *     SplitByteVector (FindBlocks / ClusterBlocks). */
156   }
157 
158   {
159     /* Compute prefix codes for commands. */
160     uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
161     size_t i;
162     if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(insert_and_copy_codes)) return;
163     for (i = 0; i < num_commands; ++i) {
164       insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
165     }
166     /* Create the block split on the array of command prefixes. */
167     SplitByteVectorCommand(
168         m, insert_and_copy_codes, num_commands,
169         kSymbolsPerCommandHistogram, kMaxCommandHistograms,
170         kCommandStrideLength, kCommandBlockSwitchCost, params,
171         insert_and_copy_split);
172     if (BROTLI_IS_OOM(m)) return;
173     /* TODO(eustas): reuse for distances? */
174     BROTLI_FREE(m, insert_and_copy_codes);
175   }
176 
177   {
178     /* Create a continuous array of distance prefixes. */
179     uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
180     size_t j = 0;
181     size_t i;
182     if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(distance_prefixes)) return;
183     for (i = 0; i < num_commands; ++i) {
184       const Command* cmd = &cmds[i];
185       if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
186         distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF;
187       }
188     }
189     /* Create the block split on the array of distance prefixes. */
190     SplitByteVectorDistance(
191         m, distance_prefixes, j,
192         kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
193         kDistanceStrideLength, kDistanceBlockSwitchCost, params,
194         dist_split);
195     if (BROTLI_IS_OOM(m)) return;
196     BROTLI_FREE(m, distance_prefixes);
197   }
198 }
199 
200 #if defined(BROTLI_TEST)
201 size_t CountLiteralsForTest(const Command*, const size_t);
CountLiteralsForTest(const Command * cmds,const size_t num_commands)202 size_t CountLiteralsForTest(const Command* cmds, const size_t num_commands) {
203   return CountLiterals(cmds, num_commands);
204 }
205 
206 void CopyLiteralsToByteArrayForTest(const Command*,
207     const size_t, const uint8_t*, const size_t, const size_t, uint8_t*);
CopyLiteralsToByteArrayForTest(const Command * cmds,const size_t num_commands,const uint8_t * data,const size_t offset,const size_t mask,uint8_t * literals)208 void CopyLiteralsToByteArrayForTest(const Command* cmds,
209     const size_t num_commands, const uint8_t* data, const size_t offset,
210     const size_t mask, uint8_t* literals) {
211   CopyLiteralsToByteArray(cmds, num_commands, data, offset, mask, literals);
212 }
213 #endif
214 
215 #if defined(__cplusplus) || defined(c_plusplus)
216 }  /* extern "C" */
217 #endif
218