• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2020, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #include "seqgen.h"
12 #include "mem.h"
13 #include <string.h>
14 
15 #define MIN(a, b)  ((a) < (b) ? (a) : (b))
16 
17 static const size_t kMatchBytes = 128;
18 
19 #define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
SEQ_randByte(unsigned * src)20 static BYTE SEQ_randByte(unsigned* src)
21 {
22     static const U32 prime1 = 2654435761U;
23     static const U32 prime2 = 2246822519U;
24     U32 rand32 = *src;
25     rand32 *= prime1;
26     rand32 ^= prime2;
27     rand32  = SEQ_rotl32(rand32, 13);
28     *src = rand32;
29     return (BYTE)(rand32 >> 5);
30 }
31 
SEQ_initStream(unsigned seed)32 SEQ_stream SEQ_initStream(unsigned seed)
33 {
34     SEQ_stream stream;
35     stream.state = 0;
36     XXH64_reset(&stream.xxh, 0);
37     stream.seed = seed;
38     return stream;
39 }
40 
41 /* Generates a single guard byte, then match length + 1 of a different byte,
42  * then another guard byte.
43  */
SEQ_gen_matchLength(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)44 static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
45                                   SEQ_outBuffer* out)
46 {
47     typedef enum {
48         ml_first_byte = 0,
49         ml_match_bytes,
50         ml_last_byte,
51     } ml_state;
52     BYTE* const ostart = (BYTE*)out->dst;
53     BYTE* const oend = ostart + out->size;
54     BYTE* op = ostart + out->pos;
55 
56     switch ((ml_state)stream->state) {
57     case ml_first_byte:
58         /* Generate a single byte and pick a different byte for the match */
59         if (op >= oend) {
60             stream->bytesLeft = 1;
61             break;
62         }
63         *op = SEQ_randByte(&stream->seed) & 0xFF;
64         do {
65             stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
66         } while (*op == stream->saved);
67         ++op;
68         /* State transition */
69         stream->state = ml_match_bytes;
70         stream->bytesLeft = value + 1;
71     /* fall-through */
72     case ml_match_bytes: {
73         /* Copy matchLength + 1 bytes to the output buffer */
74         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
75         if (setLength > 0) {
76             memset(op, stream->saved, setLength);
77             op += setLength;
78             stream->bytesLeft -= setLength;
79         }
80         if (stream->bytesLeft > 0)
81             break;
82         /* State transition */
83         stream->state = ml_last_byte;
84     }
85     /* fall-through */
86     case ml_last_byte:
87         /* Generate a single byte and pick a different byte for the match */
88         if (op >= oend) {
89             stream->bytesLeft = 1;
90             break;
91         }
92         do {
93             *op = SEQ_randByte(&stream->seed) & 0xFF;
94         } while (*op == stream->saved);
95         ++op;
96         /* State transition */
97     /* fall-through */
98     default:
99         stream->state = 0;
100         stream->bytesLeft = 0;
101         break;
102     }
103     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
104     out->pos = op - ostart;
105     return stream->bytesLeft;
106 }
107 
108 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
109  * Generates literal length - kMatchBytes random bytes < 128.
110  * Generates another kMatchBytes using the saved seed to generate a match.
111  * This way the match is easy to find for the compressors.
112  */
SEQ_gen_litLength(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)113 static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
114 {
115     typedef enum {
116         ll_start = 0,
117         ll_run_bytes,
118         ll_literals,
119         ll_run_match,
120     } ll_state;
121     BYTE* const ostart = (BYTE*)out->dst;
122     BYTE* const oend = ostart + out->size;
123     BYTE* op = ostart + out->pos;
124 
125     switch ((ll_state)stream->state) {
126     case ll_start:
127         stream->state = ll_run_bytes;
128         stream->saved = stream->seed;
129         stream->bytesLeft = MIN(kMatchBytes, value);
130     /* fall-through */
131     case ll_run_bytes:
132         while (stream->bytesLeft > 0 && op < oend) {
133             *op++ = SEQ_randByte(&stream->seed) | 0x80;
134             --stream->bytesLeft;
135         }
136         if (stream->bytesLeft > 0)
137             break;
138         /* State transition */
139         stream->state = ll_literals;
140         stream->bytesLeft = value - MIN(kMatchBytes, value);
141     /* fall-through */
142     case ll_literals:
143         while (stream->bytesLeft > 0 && op < oend) {
144             *op++ = SEQ_randByte(&stream->seed) & 0x7F;
145             --stream->bytesLeft;
146         }
147         if (stream->bytesLeft > 0)
148             break;
149         /* State transition */
150         stream->state = ll_run_match;
151         stream->bytesLeft = MIN(kMatchBytes, value);
152     /* fall-through */
153     case ll_run_match: {
154         while (stream->bytesLeft > 0 && op < oend) {
155             *op++ = SEQ_randByte(&stream->saved) | 0x80;
156             --stream->bytesLeft;
157         }
158         if (stream->bytesLeft > 0)
159             break;
160     }
161     /* fall-through */
162     default:
163         stream->state = 0;
164         stream->bytesLeft = 0;
165         break;
166     }
167     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
168     out->pos = op - ostart;
169     return stream->bytesLeft;
170 }
171 
172 /* Saves the current seed then generates kMatchBytes random bytes >= 128.
173  * Generates offset - kMatchBytes of zeros to get a large offset without
174  * polluting the hash tables.
175  * Generates another kMatchBytes using the saved seed to generate a with the
176  * required offset.
177  */
SEQ_gen_offset(SEQ_stream * stream,unsigned value,SEQ_outBuffer * out)178 static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
179 {
180     typedef enum {
181         of_start = 0,
182         of_run_bytes,
183         of_offset,
184         of_run_match,
185     } of_state;
186     BYTE* const ostart = (BYTE*)out->dst;
187     BYTE* const oend = ostart + out->size;
188     BYTE* op = ostart + out->pos;
189 
190     switch ((of_state)stream->state) {
191     case of_start:
192         stream->state = of_run_bytes;
193         stream->saved = stream->seed;
194         stream->bytesLeft = MIN(value, kMatchBytes);
195     /* fall-through */
196     case of_run_bytes: {
197         while (stream->bytesLeft > 0 && op < oend) {
198             *op++ = SEQ_randByte(&stream->seed) | 0x80;
199             --stream->bytesLeft;
200         }
201         if (stream->bytesLeft > 0)
202             break;
203         /* State transition */
204         stream->state = of_offset;
205         stream->bytesLeft = value - MIN(value, kMatchBytes);
206     }
207     /* fall-through */
208     case of_offset: {
209         /* Copy matchLength + 1 bytes to the output buffer */
210         size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
211         if (setLength > 0) {
212             memset(op, 0, setLength);
213             op += setLength;
214             stream->bytesLeft -= setLength;
215         }
216         if (stream->bytesLeft > 0)
217             break;
218         /* State transition */
219         stream->state = of_run_match;
220         stream->bytesLeft = MIN(value, kMatchBytes);
221     }
222     /* fall-through */
223     case of_run_match: {
224         while (stream->bytesLeft > 0 && op < oend) {
225             *op++ = SEQ_randByte(&stream->saved) | 0x80;
226             --stream->bytesLeft;
227         }
228         if (stream->bytesLeft > 0)
229             break;
230     }
231     /* fall-through */
232     default:
233         stream->state = 0;
234         stream->bytesLeft = 0;
235         break;
236     }
237     XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
238     out->pos = op - ostart;
239     return stream->bytesLeft;
240 }
241 
242 /* Returns the number of bytes left to generate.
243  * Must pass the same type/value until it returns 0.
244  */
SEQ_gen(SEQ_stream * stream,SEQ_gen_type type,unsigned value,SEQ_outBuffer * out)245 size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
246 {
247     switch (type) {
248         case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
249         case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
250         case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
251         case SEQ_gen_max: /* fall-through */
252         default: return 0;
253     }
254 }
255 
256 /* Returns the xxhash of the data produced so far */
SEQ_digest(SEQ_stream const * stream)257 XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
258 {
259     return XXH64_digest(&stream->xxh);
260 }
261