1 /*
2 * Copyright (c) 2017-2020, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 #include "seqgen.h"
12 #include "mem.h"
13 #include <string.h>
14
15 #define MIN(a, b) ((a) < (b) ? (a) : (b))
16
17 static const size_t kMatchBytes = 128;
18
19 #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
SEQ_randByte(unsigned * src)20 static BYTE SEQ_randByte(unsigned* src)
21 {
22 static const U32 prime1 = 2654435761U;
23 static const U32 prime2 = 2246822519U;
24 U32 rand32 = *src;
25 rand32 *= prime1;
26 rand32 ^= prime2;
27 rand32 = SEQ_rotl32(rand32, 13);
28 *src = rand32;
29 return (BYTE)(rand32 >> 5);
30 }
31
SEQ_initStream(unsigned seed)32 SEQ_stream SEQ_initStream(unsigned seed)
33 {
34 SEQ_stream stream;
35 stream.state = 0;
36 XXH64_reset(&stream.xxh, 0);
37 stream.seed = seed;
38 return stream;
39 }
40
41 /* Generates a single guard byte, then match length + 1 of a different byte,
42 * then another guard byte.
43 */
SEQ_gen_matchLength(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)44 static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
45 SEQ_outBuffer* out)
46 {
47 typedef enum {
48 ml_first_byte = 0,
49 ml_match_bytes,
50 ml_last_byte,
51 } ml_state;
52 BYTE* const ostart = (BYTE*)out->dst;
53 BYTE* const oend = ostart + out->size;
54 BYTE* op = ostart + out->pos;
55
56 switch ((ml_state)stream->state) {
57 case ml_first_byte:
58 /* Generate a single byte and pick a different byte for the match */
59 if (op >= oend) {
60 stream->bytesLeft = 1;
61 break;
62 }
63 *op = SEQ_randByte(&stream->seed) & 0xFF;
64 do {
65 stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
66 } while (*op == stream->saved);
67 ++op;
68 /* State transition */
69 stream->state = ml_match_bytes;
70 stream->bytesLeft = value + 1;
71 /* fall-through */
72 case ml_match_bytes: {
73 /* Copy matchLength + 1 bytes to the output buffer */
74 size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
75 if (setLength > 0) {
76 memset(op, stream->saved, setLength);
77 op += setLength;
78 stream->bytesLeft -= setLength;
79 }
80 if (stream->bytesLeft > 0)
81 break;
82 /* State transition */
83 stream->state = ml_last_byte;
84 }
85 /* fall-through */
86 case ml_last_byte:
87 /* Generate a single byte and pick a different byte for the match */
88 if (op >= oend) {
89 stream->bytesLeft = 1;
90 break;
91 }
92 do {
93 *op = SEQ_randByte(&stream->seed) & 0xFF;
94 } while (*op == stream->saved);
95 ++op;
96 /* State transition */
97 /* fall-through */
98 default:
99 stream->state = 0;
100 stream->bytesLeft = 0;
101 break;
102 }
103 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
104 out->pos = op - ostart;
105 return stream->bytesLeft;
106 }
107
108 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
109 * Generates literal length - kMatchBytes random bytes < 128.
110 * Generates another kMatchBytes using the saved seed to generate a match.
111 * This way the match is easy to find for the compressors.
112 */
SEQ_gen_litLength(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)113 static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
114 {
115 typedef enum {
116 ll_start = 0,
117 ll_run_bytes,
118 ll_literals,
119 ll_run_match,
120 } ll_state;
121 BYTE* const ostart = (BYTE*)out->dst;
122 BYTE* const oend = ostart + out->size;
123 BYTE* op = ostart + out->pos;
124
125 switch ((ll_state)stream->state) {
126 case ll_start:
127 stream->state = ll_run_bytes;
128 stream->saved = stream->seed;
129 stream->bytesLeft = MIN(kMatchBytes, value);
130 /* fall-through */
131 case ll_run_bytes:
132 while (stream->bytesLeft > 0 && op < oend) {
133 *op++ = SEQ_randByte(&stream->seed) | 0x80;
134 --stream->bytesLeft;
135 }
136 if (stream->bytesLeft > 0)
137 break;
138 /* State transition */
139 stream->state = ll_literals;
140 stream->bytesLeft = value - MIN(kMatchBytes, value);
141 /* fall-through */
142 case ll_literals:
143 while (stream->bytesLeft > 0 && op < oend) {
144 *op++ = SEQ_randByte(&stream->seed) & 0x7F;
145 --stream->bytesLeft;
146 }
147 if (stream->bytesLeft > 0)
148 break;
149 /* State transition */
150 stream->state = ll_run_match;
151 stream->bytesLeft = MIN(kMatchBytes, value);
152 /* fall-through */
153 case ll_run_match: {
154 while (stream->bytesLeft > 0 && op < oend) {
155 *op++ = SEQ_randByte(&stream->saved) | 0x80;
156 --stream->bytesLeft;
157 }
158 if (stream->bytesLeft > 0)
159 break;
160 }
161 /* fall-through */
162 default:
163 stream->state = 0;
164 stream->bytesLeft = 0;
165 break;
166 }
167 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
168 out->pos = op - ostart;
169 return stream->bytesLeft;
170 }
171
172 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
173 * Generates offset - kMatchBytes of zeros to get a large offset without
174 * polluting the hash tables.
175 * Generates another kMatchBytes using the saved seed to generate a with the
176 * required offset.
177 */
SEQ_gen_offset(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)178 static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
179 {
180 typedef enum {
181 of_start = 0,
182 of_run_bytes,
183 of_offset,
184 of_run_match,
185 } of_state;
186 BYTE* const ostart = (BYTE*)out->dst;
187 BYTE* const oend = ostart + out->size;
188 BYTE* op = ostart + out->pos;
189
190 switch ((of_state)stream->state) {
191 case of_start:
192 stream->state = of_run_bytes;
193 stream->saved = stream->seed;
194 stream->bytesLeft = MIN(value, kMatchBytes);
195 /* fall-through */
196 case of_run_bytes: {
197 while (stream->bytesLeft > 0 && op < oend) {
198 *op++ = SEQ_randByte(&stream->seed) | 0x80;
199 --stream->bytesLeft;
200 }
201 if (stream->bytesLeft > 0)
202 break;
203 /* State transition */
204 stream->state = of_offset;
205 stream->bytesLeft = value - MIN(value, kMatchBytes);
206 }
207 /* fall-through */
208 case of_offset: {
209 /* Copy matchLength + 1 bytes to the output buffer */
210 size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
211 if (setLength > 0) {
212 memset(op, 0, setLength);
213 op += setLength;
214 stream->bytesLeft -= setLength;
215 }
216 if (stream->bytesLeft > 0)
217 break;
218 /* State transition */
219 stream->state = of_run_match;
220 stream->bytesLeft = MIN(value, kMatchBytes);
221 }
222 /* fall-through */
223 case of_run_match: {
224 while (stream->bytesLeft > 0 && op < oend) {
225 *op++ = SEQ_randByte(&stream->saved) | 0x80;
226 --stream->bytesLeft;
227 }
228 if (stream->bytesLeft > 0)
229 break;
230 }
231 /* fall-through */
232 default:
233 stream->state = 0;
234 stream->bytesLeft = 0;
235 break;
236 }
237 XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
238 out->pos = op - ostart;
239 return stream->bytesLeft;
240 }
241
242 /* Returns the number of bytes left to generate.
243 * Must pass the same type/value until it returns 0.
244 */
SEQ_gen(SEQ_stream * stream,SEQ_gen_type type,unsigned value,SEQ_outBuffer * out)245 size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
246 {
247 switch (type) {
248 case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
249 case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
250 case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
251 case SEQ_gen_max: /* fall-through */
252 default: return 0;
253 }
254 }
255
256 /* Returns the xxhash of the data produced so far */
SEQ_digest(SEQ_stream const * stream)257 XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
258 {
259 return XXH64_digest(&stream->xxh);
260 }
261