1 #include "fuzz_test.h"
2
3 #include <algorithm>
4
5 #include "flatbuffers/flatbuffers.h"
6 #include "flatbuffers/idl.h"
7 #include "test_assert.h"
8
9 namespace flatbuffers {
10 namespace tests {
11 namespace {
12
13 // Include simple random number generator to ensure results will be the
14 // same cross platform.
15 // http://en.wikipedia.org/wiki/Park%E2%80%93Miller_random_number_generator
16 uint32_t lcg_seed = 48271;
lcg_rand()17 uint32_t lcg_rand() {
18 return lcg_seed =
19 (static_cast<uint64_t>(lcg_seed) * 279470273UL) % 4294967291UL;
20 }
lcg_reset()21 void lcg_reset() { lcg_seed = 48271; }
22
23 template<typename T>
CompareTableFieldValue(flatbuffers::Table * table,flatbuffers::voffset_t voffset,T val)24 static void CompareTableFieldValue(flatbuffers::Table *table,
25 flatbuffers::voffset_t voffset, T val) {
26 T read = table->GetField(voffset, static_cast<T>(0));
27 TEST_EQ(read, val);
28 }
29
30 } // namespace
31
32 // Low level stress/fuzz test: serialize/deserialize a variety of
33 // different kinds of data in different combinations
FuzzTest1()34 void FuzzTest1() {
35 // Values we're testing against: chosen to ensure no bits get chopped
36 // off anywhere, and also be different from eachother.
37 const uint8_t bool_val = true;
38 const int8_t char_val = -127; // 0x81
39 const uint8_t uchar_val = 0xFF;
40 const int16_t short_val = -32222; // 0x8222;
41 const uint16_t ushort_val = 0xFEEE;
42 const int32_t int_val = 0x83333333;
43 const uint32_t uint_val = 0xFDDDDDDD;
44 const int64_t long_val = 0x8444444444444444LL;
45 const uint64_t ulong_val = 0xFCCCCCCCCCCCCCCCULL;
46 const float float_val = 3.14159f;
47 const double double_val = 3.14159265359;
48
49 const int test_values_max = 11;
50 const flatbuffers::voffset_t fields_per_object = 4;
51 const int num_fuzz_objects = 10000; // The higher, the more thorough :)
52
53 flatbuffers::FlatBufferBuilder builder;
54
55 lcg_reset(); // Keep it deterministic.
56
57 flatbuffers::uoffset_t objects[num_fuzz_objects];
58
59 // Generate num_fuzz_objects random objects each consisting of
60 // fields_per_object fields, each of a random type.
61 for (int i = 0; i < num_fuzz_objects; i++) {
62 auto start = builder.StartTable();
63 for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
64 int choice = lcg_rand() % test_values_max;
65 auto off = flatbuffers::FieldIndexToOffset(f);
66 switch (choice) {
67 case 0: builder.AddElement<uint8_t>(off, bool_val, 0); break;
68 case 1: builder.AddElement<int8_t>(off, char_val, 0); break;
69 case 2: builder.AddElement<uint8_t>(off, uchar_val, 0); break;
70 case 3: builder.AddElement<int16_t>(off, short_val, 0); break;
71 case 4: builder.AddElement<uint16_t>(off, ushort_val, 0); break;
72 case 5: builder.AddElement<int32_t>(off, int_val, 0); break;
73 case 6: builder.AddElement<uint32_t>(off, uint_val, 0); break;
74 case 7: builder.AddElement<int64_t>(off, long_val, 0); break;
75 case 8: builder.AddElement<uint64_t>(off, ulong_val, 0); break;
76 case 9: builder.AddElement<float>(off, float_val, 0); break;
77 case 10: builder.AddElement<double>(off, double_val, 0); break;
78 }
79 }
80 objects[i] = builder.EndTable(start);
81 }
82 builder.PreAlign<flatbuffers::largest_scalar_t>(0); // Align whole buffer.
83
84 lcg_reset(); // Reset.
85
86 uint8_t *eob = builder.GetCurrentBufferPointer() + builder.GetSize();
87
88 // Test that all objects we generated are readable and return the
89 // expected values. We generate random objects in the same order
90 // so this is deterministic.
91 for (int i = 0; i < num_fuzz_objects; i++) {
92 auto table = reinterpret_cast<flatbuffers::Table *>(eob - objects[i]);
93 for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
94 int choice = lcg_rand() % test_values_max;
95 flatbuffers::voffset_t off = flatbuffers::FieldIndexToOffset(f);
96 switch (choice) {
97 case 0: CompareTableFieldValue(table, off, bool_val); break;
98 case 1: CompareTableFieldValue(table, off, char_val); break;
99 case 2: CompareTableFieldValue(table, off, uchar_val); break;
100 case 3: CompareTableFieldValue(table, off, short_val); break;
101 case 4: CompareTableFieldValue(table, off, ushort_val); break;
102 case 5: CompareTableFieldValue(table, off, int_val); break;
103 case 6: CompareTableFieldValue(table, off, uint_val); break;
104 case 7: CompareTableFieldValue(table, off, long_val); break;
105 case 8: CompareTableFieldValue(table, off, ulong_val); break;
106 case 9: CompareTableFieldValue(table, off, float_val); break;
107 case 10: CompareTableFieldValue(table, off, double_val); break;
108 }
109 }
110 }
111 }
112
113 // High level stress/fuzz test: generate a big schema and
114 // matching json data in random combinations, then parse both,
115 // generate json back from the binary, and compare with the original.
FuzzTest2()116 void FuzzTest2() {
117 lcg_reset(); // Keep it deterministic.
118
119 const int num_definitions = 30;
120 const int num_struct_definitions = 5; // Subset of num_definitions.
121 const int fields_per_definition = 15;
122 const int instances_per_definition = 5;
123 const int deprecation_rate = 10; // 1 in deprecation_rate fields will
124 // be deprecated.
125
126 std::string schema = "namespace test;\n\n";
127
128 struct RndDef {
129 std::string instances[instances_per_definition];
130
131 // Since we're generating schema and corresponding data in tandem,
132 // this convenience function adds strings to both at once.
133 static void Add(RndDef (&definitions_l)[num_definitions],
134 std::string &schema_l, const int instances_per_definition_l,
135 const char *schema_add, const char *instance_add,
136 int definition) {
137 schema_l += schema_add;
138 for (int i = 0; i < instances_per_definition_l; i++)
139 definitions_l[definition].instances[i] += instance_add;
140 }
141 };
142
143 // clang-format off
144 #define AddToSchemaAndInstances(schema_add, instance_add) \
145 RndDef::Add(definitions, schema, instances_per_definition, \
146 schema_add, instance_add, definition)
147
148 #define Dummy() \
149 RndDef::Add(definitions, schema, instances_per_definition, \
150 "byte", "1", definition)
151 // clang-format on
152
153 RndDef definitions[num_definitions];
154
155 // We are going to generate num_definitions, the first
156 // num_struct_definitions will be structs, the rest tables. For each
157 // generate random fields, some of which may be struct/table types
158 // referring to previously generated structs/tables.
159 // Simultanenously, we generate instances_per_definition JSON data
160 // definitions, which will have identical structure to the schema
161 // being generated. We generate multiple instances such that when creating
162 // hierarchy, we get some variety by picking one randomly.
163 for (int definition = 0; definition < num_definitions; definition++) {
164 std::string definition_name = "D" + flatbuffers::NumToString(definition);
165
166 bool is_struct = definition < num_struct_definitions;
167
168 AddToSchemaAndInstances(
169 ((is_struct ? "struct " : "table ") + definition_name + " {\n").c_str(),
170 "{\n");
171
172 for (int field = 0; field < fields_per_definition; field++) {
173 const bool is_last_field = field == fields_per_definition - 1;
174
175 // Deprecate 1 in deprecation_rate fields. Only table fields can be
176 // deprecated.
177 // Don't deprecate the last field to avoid dangling commas in JSON.
178 const bool deprecated =
179 !is_struct && !is_last_field && (lcg_rand() % deprecation_rate == 0);
180
181 std::string field_name = "f" + flatbuffers::NumToString(field);
182 AddToSchemaAndInstances((" " + field_name + ":").c_str(),
183 deprecated ? "" : (field_name + ": ").c_str());
184 // Pick random type:
185 auto base_type = static_cast<flatbuffers::BaseType>(
186 lcg_rand() % (flatbuffers::BASE_TYPE_UNION + 1));
187 switch (base_type) {
188 case flatbuffers::BASE_TYPE_STRING:
189 if (is_struct) {
190 Dummy(); // No strings in structs.
191 } else {
192 AddToSchemaAndInstances("string", deprecated ? "" : "\"hi\"");
193 }
194 break;
195 case flatbuffers::BASE_TYPE_VECTOR:
196 if (is_struct) {
197 Dummy(); // No vectors in structs.
198 } else {
199 AddToSchemaAndInstances("[ubyte]",
200 deprecated ? "" : "[\n0,\n1,\n255\n]");
201 }
202 break;
203 case flatbuffers::BASE_TYPE_NONE:
204 case flatbuffers::BASE_TYPE_UTYPE:
205 case flatbuffers::BASE_TYPE_STRUCT:
206 case flatbuffers::BASE_TYPE_UNION:
207 if (definition) {
208 // Pick a random previous definition and random data instance of
209 // that definition.
210 int defref = lcg_rand() % definition;
211 int instance = lcg_rand() % instances_per_definition;
212 AddToSchemaAndInstances(
213 ("D" + flatbuffers::NumToString(defref)).c_str(),
214 deprecated ? ""
215 : definitions[defref].instances[instance].c_str());
216 } else {
217 // If this is the first definition, we have no definition we can
218 // refer to.
219 Dummy();
220 }
221 break;
222 case flatbuffers::BASE_TYPE_BOOL:
223 AddToSchemaAndInstances(
224 "bool", deprecated ? "" : (lcg_rand() % 2 ? "true" : "false"));
225 break;
226 case flatbuffers::BASE_TYPE_ARRAY:
227 if (!is_struct) {
228 AddToSchemaAndInstances(
229 "ubyte",
230 deprecated ? "" : "255"); // No fixed-length arrays in tables.
231 } else {
232 AddToSchemaAndInstances("[int:3]", deprecated ? "" : "[\n,\n,\n]");
233 }
234 break;
235 default:
236 // All the scalar types.
237 schema += flatbuffers::TypeName(base_type);
238
239 if (!deprecated) {
240 // We want each instance to use its own random value.
241 for (int inst = 0; inst < instances_per_definition; inst++)
242 definitions[definition].instances[inst] +=
243 flatbuffers::IsFloat(base_type)
244 ? flatbuffers::NumToString<double>(lcg_rand() % 128)
245 .c_str()
246 : flatbuffers::NumToString<int>(lcg_rand() % 128).c_str();
247 }
248 }
249 AddToSchemaAndInstances(deprecated ? "(deprecated);\n" : ";\n",
250 deprecated ? ""
251 : is_last_field ? "\n"
252 : ",\n");
253 }
254 AddToSchemaAndInstances("}\n\n", "}");
255 }
256
257 schema += "root_type D" + flatbuffers::NumToString(num_definitions - 1);
258 schema += ";\n";
259
260 flatbuffers::Parser parser;
261
262 // Will not compare against the original if we don't write defaults
263 parser.builder_.ForceDefaults(true);
264
265 // Parse the schema, parse the generated data, then generate text back
266 // from the binary and compare against the original.
267 TEST_EQ(parser.Parse(schema.c_str()), true);
268
269 const std::string &json =
270 definitions[num_definitions - 1].instances[0] + "\n";
271
272 TEST_EQ(parser.Parse(json.c_str()), true);
273
274 std::string jsongen;
275 parser.opts.indent_step = 0;
276 auto result = GenText(parser, parser.builder_.GetBufferPointer(), &jsongen);
277 TEST_NULL(result);
278
279 if (jsongen != json) {
280 // These strings are larger than a megabyte, so we show the bytes around
281 // the first bytes that are different rather than the whole string.
282 size_t len = std::min(json.length(), jsongen.length());
283 for (size_t i = 0; i < len; i++) {
284 if (json[i] != jsongen[i]) {
285 i -= std::min(static_cast<size_t>(10), i); // show some context;
286 size_t end = std::min(len, i + 20);
287 for (; i < end; i++)
288 TEST_OUTPUT_LINE("at %d: found \"%c\", expected \"%c\"\n",
289 static_cast<int>(i), jsongen[i], json[i]);
290 break;
291 }
292 }
293 TEST_NOTNULL(nullptr); //-V501 (this comment suppresses CWE-570 warning)
294 }
295
296 // clang-format off
297 #ifdef FLATBUFFERS_TEST_VERBOSE
298 TEST_OUTPUT_LINE("%dk schema tested with %dk of json\n",
299 static_cast<int>(schema.length() / 1024),
300 static_cast<int>(json.length() / 1024));
301 #endif
302 // clang-format on
303 }
304
305 } // namespace tests
306 } // namespace flatbuffers
307