• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "fuzz_test.h"
2 
3 #include <algorithm>
4 
5 #include "flatbuffers/flatbuffers.h"
6 #include "flatbuffers/idl.h"
7 #include "test_assert.h"
8 
9 namespace flatbuffers {
10 namespace tests {
11 namespace {
12 
13 // Include simple random number generator to ensure results will be the
14 // same cross platform.
15 // http://en.wikipedia.org/wiki/Park%E2%80%93Miller_random_number_generator
16 uint32_t lcg_seed = 48271;
lcg_rand()17 uint32_t lcg_rand() {
18   return lcg_seed =
19              (static_cast<uint64_t>(lcg_seed) * 279470273UL) % 4294967291UL;
20 }
lcg_reset()21 void lcg_reset() { lcg_seed = 48271; }
22 
23 template<typename T>
CompareTableFieldValue(flatbuffers::Table * table,flatbuffers::voffset_t voffset,T val)24 static void CompareTableFieldValue(flatbuffers::Table *table,
25                                    flatbuffers::voffset_t voffset, T val) {
26   T read = table->GetField(voffset, static_cast<T>(0));
27   TEST_EQ(read, val);
28 }
29 
30 }  // namespace
31 
32 // Low level stress/fuzz test: serialize/deserialize a variety of
33 // different kinds of data in different combinations
FuzzTest1()34 void FuzzTest1() {
35   // Values we're testing against: chosen to ensure no bits get chopped
36   // off anywhere, and also be different from eachother.
37   const uint8_t bool_val = true;
38   const int8_t char_val = -127;  // 0x81
39   const uint8_t uchar_val = 0xFF;
40   const int16_t short_val = -32222;  // 0x8222;
41   const uint16_t ushort_val = 0xFEEE;
42   const int32_t int_val = 0x83333333;
43   const uint32_t uint_val = 0xFDDDDDDD;
44   const int64_t long_val = 0x8444444444444444LL;
45   const uint64_t ulong_val = 0xFCCCCCCCCCCCCCCCULL;
46   const float float_val = 3.14159f;
47   const double double_val = 3.14159265359;
48 
49   const int test_values_max = 11;
50   const flatbuffers::voffset_t fields_per_object = 4;
51   const int num_fuzz_objects = 10000;  // The higher, the more thorough :)
52 
53   flatbuffers::FlatBufferBuilder builder;
54 
55   lcg_reset();  // Keep it deterministic.
56 
57   flatbuffers::uoffset_t objects[num_fuzz_objects];
58 
59   // Generate num_fuzz_objects random objects each consisting of
60   // fields_per_object fields, each of a random type.
61   for (int i = 0; i < num_fuzz_objects; i++) {
62     auto start = builder.StartTable();
63     for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
64       int choice = lcg_rand() % test_values_max;
65       auto off = flatbuffers::FieldIndexToOffset(f);
66       switch (choice) {
67         case 0: builder.AddElement<uint8_t>(off, bool_val, 0); break;
68         case 1: builder.AddElement<int8_t>(off, char_val, 0); break;
69         case 2: builder.AddElement<uint8_t>(off, uchar_val, 0); break;
70         case 3: builder.AddElement<int16_t>(off, short_val, 0); break;
71         case 4: builder.AddElement<uint16_t>(off, ushort_val, 0); break;
72         case 5: builder.AddElement<int32_t>(off, int_val, 0); break;
73         case 6: builder.AddElement<uint32_t>(off, uint_val, 0); break;
74         case 7: builder.AddElement<int64_t>(off, long_val, 0); break;
75         case 8: builder.AddElement<uint64_t>(off, ulong_val, 0); break;
76         case 9: builder.AddElement<float>(off, float_val, 0); break;
77         case 10: builder.AddElement<double>(off, double_val, 0); break;
78       }
79     }
80     objects[i] = builder.EndTable(start);
81   }
82   builder.PreAlign<flatbuffers::largest_scalar_t>(0);  // Align whole buffer.
83 
84   lcg_reset();  // Reset.
85 
86   uint8_t *eob = builder.GetCurrentBufferPointer() + builder.GetSize();
87 
88   // Test that all objects we generated are readable and return the
89   // expected values. We generate random objects in the same order
90   // so this is deterministic.
91   for (int i = 0; i < num_fuzz_objects; i++) {
92     auto table = reinterpret_cast<flatbuffers::Table *>(eob - objects[i]);
93     for (flatbuffers::voffset_t f = 0; f < fields_per_object; f++) {
94       int choice = lcg_rand() % test_values_max;
95       flatbuffers::voffset_t off = flatbuffers::FieldIndexToOffset(f);
96       switch (choice) {
97         case 0: CompareTableFieldValue(table, off, bool_val); break;
98         case 1: CompareTableFieldValue(table, off, char_val); break;
99         case 2: CompareTableFieldValue(table, off, uchar_val); break;
100         case 3: CompareTableFieldValue(table, off, short_val); break;
101         case 4: CompareTableFieldValue(table, off, ushort_val); break;
102         case 5: CompareTableFieldValue(table, off, int_val); break;
103         case 6: CompareTableFieldValue(table, off, uint_val); break;
104         case 7: CompareTableFieldValue(table, off, long_val); break;
105         case 8: CompareTableFieldValue(table, off, ulong_val); break;
106         case 9: CompareTableFieldValue(table, off, float_val); break;
107         case 10: CompareTableFieldValue(table, off, double_val); break;
108       }
109     }
110   }
111 }
112 
113 // High level stress/fuzz test: generate a big schema and
114 // matching json data in random combinations, then parse both,
115 // generate json back from the binary, and compare with the original.
FuzzTest2()116 void FuzzTest2() {
117   lcg_reset();  // Keep it deterministic.
118 
119   const int num_definitions = 30;
120   const int num_struct_definitions = 5;  // Subset of num_definitions.
121   const int fields_per_definition = 15;
122   const int instances_per_definition = 5;
123   const int deprecation_rate = 10;  // 1 in deprecation_rate fields will
124                                     // be deprecated.
125 
126   std::string schema = "namespace test;\n\n";
127 
128   struct RndDef {
129     std::string instances[instances_per_definition];
130 
131     // Since we're generating schema and corresponding data in tandem,
132     // this convenience function adds strings to both at once.
133     static void Add(RndDef (&definitions_l)[num_definitions],
134                     std::string &schema_l, const int instances_per_definition_l,
135                     const char *schema_add, const char *instance_add,
136                     int definition) {
137       schema_l += schema_add;
138       for (int i = 0; i < instances_per_definition_l; i++)
139         definitions_l[definition].instances[i] += instance_add;
140     }
141   };
142 
143   // clang-format off
144   #define AddToSchemaAndInstances(schema_add, instance_add) \
145     RndDef::Add(definitions, schema, instances_per_definition, \
146                 schema_add, instance_add, definition)
147 
148   #define Dummy() \
149     RndDef::Add(definitions, schema, instances_per_definition, \
150                 "byte", "1", definition)
151   // clang-format on
152 
153   RndDef definitions[num_definitions];
154 
155   // We are going to generate num_definitions, the first
156   // num_struct_definitions will be structs, the rest tables. For each
157   // generate random fields, some of which may be struct/table types
158   // referring to previously generated structs/tables.
159   // Simultanenously, we generate instances_per_definition JSON data
160   // definitions, which will have identical structure to the schema
161   // being generated. We generate multiple instances such that when creating
162   // hierarchy, we get some variety by picking one randomly.
163   for (int definition = 0; definition < num_definitions; definition++) {
164     std::string definition_name = "D" + flatbuffers::NumToString(definition);
165 
166     bool is_struct = definition < num_struct_definitions;
167 
168     AddToSchemaAndInstances(
169         ((is_struct ? "struct " : "table ") + definition_name + " {\n").c_str(),
170         "{\n");
171 
172     for (int field = 0; field < fields_per_definition; field++) {
173       const bool is_last_field = field == fields_per_definition - 1;
174 
175       // Deprecate 1 in deprecation_rate fields. Only table fields can be
176       // deprecated.
177       // Don't deprecate the last field to avoid dangling commas in JSON.
178       const bool deprecated =
179           !is_struct && !is_last_field && (lcg_rand() % deprecation_rate == 0);
180 
181       std::string field_name = "f" + flatbuffers::NumToString(field);
182       AddToSchemaAndInstances(("  " + field_name + ":").c_str(),
183                               deprecated ? "" : (field_name + ": ").c_str());
184       // Pick random type:
185       auto base_type = static_cast<flatbuffers::BaseType>(
186           lcg_rand() % (flatbuffers::BASE_TYPE_UNION + 1));
187       switch (base_type) {
188         case flatbuffers::BASE_TYPE_STRING:
189           if (is_struct) {
190             Dummy();  // No strings in structs.
191           } else {
192             AddToSchemaAndInstances("string", deprecated ? "" : "\"hi\"");
193           }
194           break;
195         case flatbuffers::BASE_TYPE_VECTOR:
196           if (is_struct) {
197             Dummy();  // No vectors in structs.
198           } else {
199             AddToSchemaAndInstances("[ubyte]",
200                                     deprecated ? "" : "[\n0,\n1,\n255\n]");
201           }
202           break;
203         case flatbuffers::BASE_TYPE_NONE:
204         case flatbuffers::BASE_TYPE_UTYPE:
205         case flatbuffers::BASE_TYPE_STRUCT:
206         case flatbuffers::BASE_TYPE_UNION:
207           if (definition) {
208             // Pick a random previous definition and random data instance of
209             // that definition.
210             int defref = lcg_rand() % definition;
211             int instance = lcg_rand() % instances_per_definition;
212             AddToSchemaAndInstances(
213                 ("D" + flatbuffers::NumToString(defref)).c_str(),
214                 deprecated ? ""
215                            : definitions[defref].instances[instance].c_str());
216           } else {
217             // If this is the first definition, we have no definition we can
218             // refer to.
219             Dummy();
220           }
221           break;
222         case flatbuffers::BASE_TYPE_BOOL:
223           AddToSchemaAndInstances(
224               "bool", deprecated ? "" : (lcg_rand() % 2 ? "true" : "false"));
225           break;
226         case flatbuffers::BASE_TYPE_ARRAY:
227           if (!is_struct) {
228             AddToSchemaAndInstances(
229                 "ubyte",
230                 deprecated ? "" : "255");  // No fixed-length arrays in tables.
231           } else {
232             AddToSchemaAndInstances("[int:3]", deprecated ? "" : "[\n,\n,\n]");
233           }
234           break;
235         default:
236           // All the scalar types.
237           schema += flatbuffers::TypeName(base_type);
238 
239           if (!deprecated) {
240             // We want each instance to use its own random value.
241             for (int inst = 0; inst < instances_per_definition; inst++)
242               definitions[definition].instances[inst] +=
243                   flatbuffers::IsFloat(base_type)
244                       ? flatbuffers::NumToString<double>(lcg_rand() % 128)
245                             .c_str()
246                       : flatbuffers::NumToString<int>(lcg_rand() % 128).c_str();
247           }
248       }
249       AddToSchemaAndInstances(deprecated ? "(deprecated);\n" : ";\n",
250                               deprecated      ? ""
251                               : is_last_field ? "\n"
252                                               : ",\n");
253     }
254     AddToSchemaAndInstances("}\n\n", "}");
255   }
256 
257   schema += "root_type D" + flatbuffers::NumToString(num_definitions - 1);
258   schema += ";\n";
259 
260   flatbuffers::Parser parser;
261 
262   // Will not compare against the original if we don't write defaults
263   parser.builder_.ForceDefaults(true);
264 
265   // Parse the schema, parse the generated data, then generate text back
266   // from the binary and compare against the original.
267   TEST_EQ(parser.Parse(schema.c_str()), true);
268 
269   const std::string &json =
270       definitions[num_definitions - 1].instances[0] + "\n";
271 
272   TEST_EQ(parser.Parse(json.c_str()), true);
273 
274   std::string jsongen;
275   parser.opts.indent_step = 0;
276   auto result = GenText(parser, parser.builder_.GetBufferPointer(), &jsongen);
277   TEST_NULL(result);
278 
279   if (jsongen != json) {
280     // These strings are larger than a megabyte, so we show the bytes around
281     // the first bytes that are different rather than the whole string.
282     size_t len = std::min(json.length(), jsongen.length());
283     for (size_t i = 0; i < len; i++) {
284       if (json[i] != jsongen[i]) {
285         i -= std::min(static_cast<size_t>(10), i);  // show some context;
286         size_t end = std::min(len, i + 20);
287         for (; i < end; i++)
288           TEST_OUTPUT_LINE("at %d: found \"%c\", expected \"%c\"\n",
289                            static_cast<int>(i), jsongen[i], json[i]);
290         break;
291       }
292     }
293     TEST_NOTNULL(nullptr);  //-V501 (this comment suppresses CWE-570 warning)
294   }
295 
296   // clang-format off
297   #ifdef FLATBUFFERS_TEST_VERBOSE
298     TEST_OUTPUT_LINE("%dk schema tested with %dk of json\n",
299                      static_cast<int>(schema.length() / 1024),
300                      static_cast<int>(json.length() / 1024));
301   #endif
302   // clang-format on
303 }
304 
305 }  // namespace tests
306 }  // namespace flatbuffers
307