• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2014 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <assert.h>
18 #include <stddef.h>
19 #include <stdint.h>
20 
21 #include <algorithm>
22 #include <clocale>
23 #include <memory>
24 #include <regex>
25 #include <string>
26 
27 #include "flatbuffers/idl.h"
28 #include "test_init.h"
29 
30 static constexpr size_t kMinInputLength = 1;
31 static constexpr size_t kMaxInputLength = 3000;
32 
33 static constexpr uint8_t flags_scalar_type = 0x0F;  // type of scalar value
34 static constexpr uint8_t flags_quotes_kind = 0x10;  // quote " or '
35 // reserved for future: json {named} or [unnamed]
36 // static constexpr uint8_t flags_json_bracer = 0x20;
37 
38 // Find all 'subj' sub-strings and replace first character of sub-string.
39 // BreakSequence("testest","tes", 'X') -> "XesXest".
40 // BreakSequence("xxx","xx", 'Y') -> "YYx".
BreakSequence(std::string & s,const char * subj,char repl)41 static void BreakSequence(std::string &s, const char *subj, char repl) {
42   size_t pos = 0;
43   while (pos = s.find(subj, pos), pos != std::string::npos) {
44     s.at(pos) = repl;
45     pos++;
46   }
47 }
48 
49 // Remove all leading and trailing symbols matched with pattern set.
50 // StripString("xy{xy}y", "xy") -> "{xy}"
StripString(const std::string & s,const char * pattern,size_t * pos=nullptr)51 static std::string StripString(const std::string &s, const char *pattern,
52                                size_t *pos = nullptr) {
53   if (pos) *pos = 0;
54   // leading
55   auto first = s.find_first_not_of(pattern);
56   if (std::string::npos == first) return "";
57   if (pos) *pos = first;
58   // trailing
59   auto last = s.find_last_not_of(pattern);
60   assert(last < s.length());
61   assert(first <= last);
62   return s.substr(first, last - first + 1);
63 }
64 
65 class RegexMatcher {
66  protected:
67   virtual bool MatchNumber(const std::string &input) const = 0;
68 
69  public:
70   virtual ~RegexMatcher() = default;
71 
72   struct MatchResult {
73     size_t pos{ 0 };
74     size_t len{ 0 };
75     bool res{ false };
76     bool quoted{ false };
77   };
78 
Match(const std::string & input) const79   MatchResult Match(const std::string &input) const {
80     MatchResult r;
81     // strip leading and trailing "spaces" accepted by flatbuffer
82     auto test = StripString(input, "\t\r\n ", &r.pos);
83     r.len = test.size();
84     // check quotes
85     if (test.size() >= 2) {
86       auto fch = test.front();
87       auto lch = test.back();
88       r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
89       if (r.quoted) {
90         // remove quotes for regex test
91         test = test.substr(1, test.size() - 2);
92       }
93     }
94     // Fast check:
95     if (test.empty()) return r;
96     // A string with a valid scalar shouldn't have non-ascii or non-printable
97     // symbols.
98     for (auto c : test) {
99       if ((c < ' ') || (c > '~')) return r;
100     }
101     // Check with regex
102     r.res = MatchNumber(test);
103     return r;
104   }
105 
MatchRegexList(const std::string & input,const std::vector<std::regex> & re_list) const106   bool MatchRegexList(const std::string &input,
107                       const std::vector<std::regex> &re_list) const {
108     auto str = StripString(input, " ");
109     if (str.empty()) return false;
110     for (auto &re : re_list) {
111       std::smatch match;
112       if (std::regex_match(str, match, re)) return true;
113     }
114     return false;
115   }
116 };
117 
118 class IntegerRegex : public RegexMatcher {
119  protected:
MatchNumber(const std::string & input) const120   bool MatchNumber(const std::string &input) const override {
121     static const std::vector<std::regex> re_list = {
122       std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
123 
124       std::regex{ R"(^[-+]?0[xX][0-9a-fA-F]+$)",
125                   std::regex_constants::optimize }
126     };
127     return MatchRegexList(input, re_list);
128   }
129 
130  public:
131   IntegerRegex() = default;
132   virtual ~IntegerRegex() = default;
133 };
134 
135 class UIntegerRegex : public RegexMatcher {
136  protected:
MatchNumber(const std::string & input) const137   bool MatchNumber(const std::string &input) const override {
138     static const std::vector<std::regex> re_list = {
139       std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
140       std::regex{ R"(^[+]?0[xX][0-9a-fA-F]+$)",
141                   std::regex_constants::optimize },
142       // accept -0 number
143       std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
144     };
145     return MatchRegexList(input, re_list);
146   }
147 
148  public:
149   UIntegerRegex() = default;
150   virtual ~UIntegerRegex() = default;
151 };
152 
153 class BooleanRegex : public IntegerRegex {
154  protected:
MatchNumber(const std::string & input) const155   bool MatchNumber(const std::string &input) const override {
156     if (input == "true" || input == "false") return true;
157     return IntegerRegex::MatchNumber(input);
158   }
159 
160  public:
161   BooleanRegex() = default;
162   virtual ~BooleanRegex() = default;
163 };
164 
165 class FloatRegex : public RegexMatcher {
166  protected:
MatchNumber(const std::string & input) const167   bool MatchNumber(const std::string &input) const override {
168     static const std::vector<std::regex> re_list = {
169       // hex-float
170       std::regex{
171           R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
172           std::regex_constants::optimize },
173       // dec-float
174       std::regex{
175           R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
176           std::regex_constants::optimize },
177 
178       std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)",
179                   std::regex_constants::optimize | std::regex_constants::icase }
180     };
181     return MatchRegexList(input, re_list);
182   }
183 
184  public:
185   FloatRegex() = default;
186   virtual ~FloatRegex() = default;
187 };
188 
189 class ScalarReferenceResult {
190  private:
ScalarReferenceResult(const char * _type,RegexMatcher::MatchResult _matched)191   ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
192       : type(_type), matched(_matched) {}
193 
194  public:
195   // Decode scalar type and check if the input string satisfies the scalar type.
Check(uint8_t code,const std::string & input)196   static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
197     switch (code) {
198       case 0x0: return { "double", FloatRegex().Match(input) };
199       case 0x1: return { "float", FloatRegex().Match(input) };
200       case 0x2: return { "int8", IntegerRegex().Match(input) };
201       case 0x3: return { "int16", IntegerRegex().Match(input) };
202       case 0x4: return { "int32", IntegerRegex().Match(input) };
203       case 0x5: return { "int64", IntegerRegex().Match(input) };
204       case 0x6: return { "uint8", UIntegerRegex().Match(input) };
205       case 0x7: return { "uint16", UIntegerRegex().Match(input) };
206       case 0x8: return { "uint32", UIntegerRegex().Match(input) };
207       case 0x9: return { "uint64", UIntegerRegex().Match(input) };
208       case 0xA: return { "bool", BooleanRegex().Match(input) };
209       default: return { "float", FloatRegex().Match(input) };
210     };
211   }
212 
213   const char *type;
214   const RegexMatcher::MatchResult matched;
215 };
216 
Parse(flatbuffers::Parser & parser,const std::string & json,std::string * _text)217 bool Parse(flatbuffers::Parser &parser, const std::string &json,
218            std::string *_text) {
219   auto done = parser.ParseJson(json.c_str());
220   if (done) {
221     TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
222             true);
223   } else {
224     *_text = parser.error_;
225   }
226   return done;
227 }
228 
229 // Utility for test run.
230 OneTimeTestInit OneTimeTestInit::one_time_init_;
231 
232 // llvm std::regex have problem with stack overflow, limit maximum length.
233 // ./scalar_fuzzer -max_len=3000
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)234 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
235   // Reserve one byte for Parser flags and one byte for repetition counter.
236   if (size < 3) return 0;
237   const uint8_t flags = data[0];
238   // normalize to ascii alphabet
239   const int extra_rep_number =
240       std::max(5, (data[1] > '0' ? (data[1] - '0') : 0));
241   data += 2;
242   size -= 2;  // bypass
243 
244   // Guarantee 0-termination.
245   const std::string original(reinterpret_cast<const char *>(data), size);
246   auto input = std::string(original.c_str());  // until '\0'
247   if (input.size() < kMinInputLength || input.size() > kMaxInputLength)
248     return 0;
249 
250   // Break comments in json to avoid complexity with regex matcher.
251   // The string " 12345 /* text */" will be accepted if insert it to string
252   // expression: "table X { Y: " + " 12345 /* text */" + "; }.
253   // But strings like this will complicate regex matcher.
254   // We reject this by transform "/* text */ 12345" to "@* text */ 12345".
255   BreakSequence(input, "//", '@');  // "//" -> "@/"
256   BreakSequence(input, "/*", '@');  // "/*" -> "@*"
257   // { "$schema: "text" } is exceptional case.
258   // This key:value ignored by the parser. Numbers can not have $.
259   BreakSequence(input, "$schema", '@');  // "$schema" -> "@schema"
260   // Break all known scalar functions (todo: add them to regex?):
261   for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
262     BreakSequence(input, f, '_');  // ident -> ident
263   }
264 
265   // Extract type of scalar from 'flags' and check if the input string satisfies
266   // the scalar type.
267   const auto ref_res =
268       ScalarReferenceResult::Check(flags & flags_scalar_type, input);
269   auto &recheck = ref_res.matched;
270 
271   // Create parser
272   flatbuffers::IDLOptions opts;
273   opts.force_defaults = true;
274   opts.output_default_scalars_in_json = true;
275   opts.indent_step = -1;
276   opts.strict_json = true;
277 
278   flatbuffers::Parser parser(opts);
279   auto schema =
280       "table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
281   TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
282 
283   // The fuzzer can adjust the number repetition if a side-effects have found.
284   // Each test should pass at least two times to ensure that the parser doesn't
285   // have any hidden-states or locale-depended effects.
286   for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
287     // Each even run (0,2,4..) will test locale independed code.
288     auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
289     // Set new locale.
290     if (use_locale) {
291       FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
292     }
293 
294     // Parse original input as-is.
295     auto orig_scalar = "{\"Y\" : " + input + "}";
296     std::string orig_back;
297     auto orig_done = Parse(parser, orig_scalar, &orig_back);
298 
299     if (recheck.res != orig_done) {
300       // look for "does not fit" or "doesn't fit" or "out of range"
301       auto not_fit =
302           (true == recheck.res)
303               ? ((orig_back.find("does not fit") != std::string::npos) ||
304                  (orig_back.find("out of range") != std::string::npos))
305               : false;
306 
307       if (false == not_fit) {
308         TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
309                          recheck.res);
310         TEST_EQ_STR(orig_back.c_str(),
311                     input.substr(recheck.pos, recheck.len).c_str());
312         TEST_EQ_FUNC(orig_done, recheck.res);
313       }
314     }
315 
316     // Try to make quoted string and test it.
317     std::string qouted_input;
318     if (true == recheck.quoted) {
319       // we can't simply remove quotes, they may be nested "'12'".
320       // Original string "\'12\'" converted to "'12'".
321       // The string can be an invalid string by JSON rules, but after quotes
322       // removed can transform to valid.
323       assert(recheck.len >= 2);
324     } else {
325       const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
326       qouted_input = input;  // copy
327       qouted_input.insert(recheck.pos + recheck.len, 1, quote);
328       qouted_input.insert(recheck.pos, 1, quote);
329     }
330 
331     // Test quoted version of the string
332     if (!qouted_input.empty()) {
333       auto fix_scalar = "{\"Y\" : " + qouted_input + "}";
334       std::string fix_back;
335       auto fix_done = Parse(parser, fix_scalar, &fix_back);
336 
337       if (orig_done != fix_done) {
338         TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
339                          orig_done);
340         TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
341       }
342       if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
343       TEST_EQ_FUNC(fix_done, orig_done);
344     }
345 
346     // Create new parser and test default value
347     if (true == orig_done) {
348       flatbuffers::Parser def_parser(opts);  // re-use options
349       auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
350                         input + "; } root_type X;" +
351                         "{}";  // <- with empty json {}!
352 
353       auto def_done = def_parser.Parse(def_schema.c_str());
354       if (false == def_done) {
355         TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
356                          def_parser.error_.c_str());
357         FLATBUFFERS_ASSERT(false);
358       }
359       // Compare with print.
360       std::string ref_string, def_string;
361       FLATBUFFERS_ASSERT(GenerateText(
362           parser, parser.builder_.GetBufferPointer(), &ref_string));
363       FLATBUFFERS_ASSERT(GenerateText(
364           def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
365       if (ref_string != def_string) {
366         TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
367                          ref_string.c_str());
368         FLATBUFFERS_ASSERT(false);
369       }
370     }
371 
372     // Restore locale.
373     if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
374   }
375   return 0;
376 }
377