• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *
3  * An exhaustive set of tests for parsing both valid and invalid protobuf
4  * input, with buffer breaks in arbitrary places.
5  *
6  * Tests to add:
7  * - string/bytes
8  * - unknown field handler called appropriately
9  * - unknown fields can be inserted in random places
10  * - fuzzing of valid input
11  * - resource limits (max stack depth, max string len)
12  * - testing of groups
13  * - more throrough testing of sequences
14  * - test skipping of submessages
15  * - test suspending the decoder
16  * - buffers that are close enough to the end of the address space that
17  *   pointers overflow (this might be difficult).
18  * - a few "kitchen sink" examples (one proto that uses all types, lots
19  *   of submsg/sequences, etc.
20  * - test different handlers at every level and whether handlers fire at
21  *   the correct field path.
22  * - test skips that extend past the end of current buffer (where decoder
23  *   returns value greater than the size param).
24  */
25 
26 #ifndef __STDC_FORMAT_MACROS
27 #define __STDC_FORMAT_MACROS  // For PRIuS, etc.
28 #endif
29 
30 #include <inttypes.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sstream>
36 
37 #include "tests/test_util.h"
38 #include "tests/upb_test.h"
39 #include "tests/pb/test_decoder.upbdefs.h"
40 
41 #ifdef AMALGAMATED
42 #include "upb.h"
43 #else  // AMALGAMATED
44 #include "upb/handlers.h"
45 #include "upb/pb/decoder.h"
46 #include "upb/pb/varint.int.h"
47 #include "upb/upb.h"
48 #endif  // !AMALGAMATED
49 
50 #include "upb/port_def.inc"
51 
52 #undef PRINT_FAILURE
53 #define PRINT_FAILURE(expr)                                           \
54   fprintf(stderr, "Assertion failed: %s:%d\n", __FILE__, __LINE__);   \
55   fprintf(stderr, "expr: %s\n", #expr);                               \
56   if (testhash) {                                                     \
57     fprintf(stderr, "assertion failed running test %x.\n", testhash); \
58     if (!filter_hash) {                                               \
59       fprintf(stderr,                                                 \
60               "Run with the arg %x to run only this test. "           \
61               "(This will also turn on extra debugging output)\n",    \
62               testhash);                                              \
63     }                                                                 \
64     fprintf(stderr, "Failed at %02.2f%% through tests.\n",            \
65             (float)completed * 100 / total);                          \
66   }
67 
68 #define MAX_NESTING 64
69 
70 #define LINE(x) x "\n"
71 
72 uint32_t filter_hash = 0;
73 double completed;
74 double total;
75 double *count;
76 
77 enum TestMode {
78   COUNT_ONLY = 1,
79   NO_HANDLERS = 2,
80   ALL_HANDLERS = 3
81 } test_mode;
82 
83 // Copied from decoder.c, since this is not a public interface.
84 typedef struct {
85   uint8_t native_wire_type;
86   bool is_numeric;
87 } upb_decoder_typeinfo;
88 
89 static const upb_decoder_typeinfo upb_decoder_types[] = {
90   {UPB_WIRE_TYPE_END_GROUP,   false},  // ENDGROUP
91   {UPB_WIRE_TYPE_64BIT,       true},   // DOUBLE
92   {UPB_WIRE_TYPE_32BIT,       true},   // FLOAT
93   {UPB_WIRE_TYPE_VARINT,      true},   // INT64
94   {UPB_WIRE_TYPE_VARINT,      true},   // UINT64
95   {UPB_WIRE_TYPE_VARINT,      true},   // INT32
96   {UPB_WIRE_TYPE_64BIT,       true},   // FIXED64
97   {UPB_WIRE_TYPE_32BIT,       true},   // FIXED32
98   {UPB_WIRE_TYPE_VARINT,      true},   // BOOL
99   {UPB_WIRE_TYPE_DELIMITED,   false},  // STRING
100   {UPB_WIRE_TYPE_START_GROUP, false},  // GROUP
101   {UPB_WIRE_TYPE_DELIMITED,   false},  // MESSAGE
102   {UPB_WIRE_TYPE_DELIMITED,   false},  // BYTES
103   {UPB_WIRE_TYPE_VARINT,      true},   // UINT32
104   {UPB_WIRE_TYPE_VARINT,      true},   // ENUM
105   {UPB_WIRE_TYPE_32BIT,       true},   // SFIXED32
106   {UPB_WIRE_TYPE_64BIT,       true},   // SFIXED64
107   {UPB_WIRE_TYPE_VARINT,      true},   // SINT32
108   {UPB_WIRE_TYPE_VARINT,      true},   // SINT64
109 };
110 
111 #ifndef USE_GOOGLE
112 using std::string;
113 #endif
114 
vappendf(string * str,const char * format,va_list args)115 void vappendf(string* str, const char *format, va_list args) {
116   va_list copy;
117   _upb_va_copy(copy, args);
118 
119   int count = vsnprintf(NULL, 0, format, args);
120   if (count >= 0)
121   {
122     UPB_ASSERT(count < 32768);
123     char *buffer = new char[count + 1];
124     UPB_ASSERT(buffer);
125     count = vsnprintf(buffer, count + 1, format, copy);
126     UPB_ASSERT(count >= 0);
127     str->append(buffer, count);
128     delete [] buffer;
129   }
130   va_end(copy);
131 }
132 
appendf(string * str,const char * fmt,...)133 void appendf(string* str, const char *fmt, ...) {
134   va_list args;
135   va_start(args, fmt);
136   vappendf(str, fmt, args);
137   va_end(args);
138 }
139 
PrintBinary(const string & str)140 void PrintBinary(const string& str) {
141   for (size_t i = 0; i < str.size(); i++) {
142     if (isprint(str[i])) {
143       fprintf(stderr, "%c", str[i]);
144     } else {
145       fprintf(stderr, "\\x%02x", (int)(uint8_t)str[i]);
146     }
147   }
148 }
149 
150 /* Routines for building arbitrary protos *************************************/
151 
152 const string empty;
153 
cat(const string & a,const string & b,const string & c=empty,const string & d=empty,const string & e=empty,const string & f=empty,const string & g=empty,const string & h=empty,const string & i=empty,const string & j=empty,const string & k=empty,const string & l=empty)154 string cat(const string& a, const string& b,
155            const string& c = empty,
156            const string& d = empty,
157            const string& e = empty,
158            const string& f = empty,
159            const string& g = empty,
160            const string& h = empty,
161            const string& i = empty,
162            const string& j = empty,
163            const string& k = empty,
164            const string& l = empty) {
165   string ret;
166   ret.reserve(a.size() + b.size() + c.size() + d.size() + e.size() + f.size() +
167               g.size() + h.size() + i.size() + j.size() + k.size() + l.size());
168   ret.append(a);
169   ret.append(b);
170   ret.append(c);
171   ret.append(d);
172   ret.append(e);
173   ret.append(f);
174   ret.append(g);
175   ret.append(h);
176   ret.append(i);
177   ret.append(j);
178   ret.append(k);
179   ret.append(l);
180   return ret;
181 }
182 
183 template <typename T>
num2string(T num)184 string num2string(T num) {
185   std::ostringstream ss;
186   ss << num;
187   return ss.str();
188 }
189 
varint(uint64_t x)190 string varint(uint64_t x) {
191   char buf[UPB_PB_VARINT_MAX_LEN];
192   size_t len = upb_vencode64(x, buf);
193   return string(buf, len);
194 }
195 
196 // TODO: proper byte-swapping for big-endian machines.
fixed32(void * data)197 string fixed32(void *data) { return string(static_cast<char*>(data), 4); }
fixed64(void * data)198 string fixed64(void *data) { return string(static_cast<char*>(data), 8); }
199 
delim(const string & buf)200 string delim(const string& buf) { return cat(varint(buf.size()), buf); }
uint32(uint32_t u32)201 string uint32(uint32_t u32) { return fixed32(&u32); }
uint64(uint64_t u64)202 string uint64(uint64_t u64) { return fixed64(&u64); }
flt(float f)203 string flt(float f) { return fixed32(&f); }
dbl(double d)204 string dbl(double d) { return fixed64(&d); }
zz32(int32_t x)205 string zz32(int32_t x) { return varint(upb_zzenc_32(x)); }
zz64(int64_t x)206 string zz64(int64_t x) { return varint(upb_zzenc_64(x)); }
207 
tag(uint32_t fieldnum,char wire_type)208 string tag(uint32_t fieldnum, char wire_type) {
209   return varint((fieldnum << 3) | wire_type);
210 }
211 
submsg(uint32_t fn,const string & buf)212 string submsg(uint32_t fn, const string& buf) {
213   return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
214 }
215 
group(uint32_t fn,const string & buf)216 string group(uint32_t fn, const string& buf) {
217   return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf,
218              tag(fn, UPB_WIRE_TYPE_END_GROUP));
219 }
220 
221 // Like delim()/submsg(), but intentionally encodes an incorrect length.
222 // These help test when a delimited boundary doesn't land in the right place.
badlen_delim(int err,const string & buf)223 string badlen_delim(int err, const string& buf) {
224   return cat(varint(buf.size() + err), buf);
225 }
226 
badlen_submsg(int err,uint32_t fn,const string & buf)227 string badlen_submsg(int err, uint32_t fn, const string& buf) {
228   return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), badlen_delim(err, buf) );
229 }
230 
231 
232 /* A set of handlers that covers all .proto types *****************************/
233 
234 // The handlers simply append to a string indicating what handlers were called.
235 // This string is similar to protobuf text format but fields are referred to by
236 // number instead of name and sequences are explicitly delimited.  We indent
237 // using the closure depth to test that the stack of closures is properly
238 // handled.
239 
240 int closures[MAX_NESTING];
241 string output;
242 
indentbuf(string * buf,int depth)243 void indentbuf(string *buf, int depth) {
244   buf->append(2 * depth, ' ');
245 }
246 
247 #define NUMERIC_VALUE_HANDLER(member, ctype, fmt)                   \
248   bool value_##member(int* depth, const uint32_t* num, ctype val) { \
249     indentbuf(&output, *depth);                                     \
250     appendf(&output, "%" PRIu32 ":%" fmt "\n", *num, val);          \
251     return true;                                                    \
252   }
253 
NUMERIC_VALUE_HANDLER(uint32,uint32_t,PRIu32)254 NUMERIC_VALUE_HANDLER(uint32, uint32_t, PRIu32)
255 NUMERIC_VALUE_HANDLER(uint64, uint64_t, PRIu64)
256 NUMERIC_VALUE_HANDLER(int32,  int32_t,  PRId32)
257 NUMERIC_VALUE_HANDLER(int64,  int64_t,  PRId64)
258 NUMERIC_VALUE_HANDLER(float,  float,    "g")
259 NUMERIC_VALUE_HANDLER(double, double,   "g")
260 
261 bool value_bool(int* depth, const uint32_t* num, bool val) {
262   indentbuf(&output, *depth);
263   appendf(&output, "%" PRIu32 ":%s\n", *num, val ? "true" : "false");
264   return true;
265 }
266 
startstr(int * depth,const uint32_t * num,size_t size_hint)267 int* startstr(int* depth, const uint32_t* num, size_t size_hint) {
268   indentbuf(&output, *depth);
269   appendf(&output, "%" PRIu32 ":(%zu)\"", *num, size_hint);
270   return depth + 1;
271 }
272 
value_string(int * depth,const uint32_t * num,const char * buf,size_t n,const upb_bufhandle * handle)273 size_t value_string(int* depth, const uint32_t* num, const char* buf,
274                     size_t n, const upb_bufhandle* handle) {
275   UPB_UNUSED(num);
276   UPB_UNUSED(depth);
277   output.append(buf, n);
278   ASSERT(handle == &global_handle);
279   return n;
280 }
281 
endstr(int * depth,const uint32_t * num)282 bool endstr(int* depth, const uint32_t* num) {
283   UPB_UNUSED(num);
284   output.append("\n");
285   indentbuf(&output, *depth);
286   appendf(&output, "%" PRIu32 ":\"\n", *num);
287   return true;
288 }
289 
startsubmsg(int * depth,const uint32_t * num)290 int* startsubmsg(int* depth, const uint32_t* num) {
291   indentbuf(&output, *depth);
292   appendf(&output, "%" PRIu32 ":{\n", *num);
293   return depth + 1;
294 }
295 
endsubmsg(int * depth,const uint32_t * num)296 bool endsubmsg(int* depth, const uint32_t* num) {
297   UPB_UNUSED(num);
298   indentbuf(&output, *depth);
299   output.append("}\n");
300   return true;
301 }
302 
startseq(int * depth,const uint32_t * num)303 int* startseq(int* depth, const uint32_t* num) {
304   indentbuf(&output, *depth);
305   appendf(&output, "%" PRIu32 ":[\n", *num);
306   return depth + 1;
307 }
308 
endseq(int * depth,const uint32_t * num)309 bool endseq(int* depth, const uint32_t* num) {
310   UPB_UNUSED(num);
311   indentbuf(&output, *depth);
312   output.append("]\n");
313   return true;
314 }
315 
startmsg(int * depth)316 bool startmsg(int* depth) {
317   indentbuf(&output, *depth);
318   output.append("<\n");
319   return true;
320 }
321 
endmsg(int * depth,upb_status * status)322 bool endmsg(int* depth, upb_status* status) {
323   UPB_UNUSED(status);
324   indentbuf(&output, *depth);
325   output.append(">\n");
326   return true;
327 }
328 
free_uint32(void * val)329 void free_uint32(void *val) {
330   uint32_t *u32 = static_cast<uint32_t*>(val);
331   delete u32;
332 }
333 
334 template<class T, bool F(int*, const uint32_t*, T)>
doreg(upb::HandlersPtr h,uint32_t num)335 void doreg(upb::HandlersPtr h, uint32_t num) {
336   upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num);
337   ASSERT(f);
338   ASSERT(h.SetValueHandler<T>(f, UpbBind(F, new uint32_t(num))));
339   if (f.IsSequence()) {
340     ASSERT(h.SetStartSequenceHandler(f, UpbBind(startseq, new uint32_t(num))));
341     ASSERT(h.SetEndSequenceHandler(f, UpbBind(endseq, new uint32_t(num))));
342   }
343 }
344 
345 // The repeated field number to correspond to the given non-repeated field
346 // number.
rep_fn(uint32_t fn)347 uint32_t rep_fn(uint32_t fn) {
348   return (UPB_MAX_FIELDNUMBER - 1000) + fn;
349 }
350 
351 #define NOP_FIELD 40
352 #define UNKNOWN_FIELD 666
353 
354 template <class T, bool F(int*, const uint32_t*, T)>
reg(upb::HandlersPtr h,upb_descriptortype_t type)355 void reg(upb::HandlersPtr h, upb_descriptortype_t type) {
356   // We register both a repeated and a non-repeated field for every type.
357   // For the non-repeated field we make the field number the same as the
358   // type.  For the repeated field we make it a function of the type.
359   doreg<T, F>(h, type);
360   doreg<T, F>(h, rep_fn(type));
361 }
362 
regseq(upb::HandlersPtr h,upb::FieldDefPtr f,uint32_t num)363 void regseq(upb::HandlersPtr h, upb::FieldDefPtr f, uint32_t num) {
364   ASSERT(h.SetStartSequenceHandler(f, UpbBind(startseq, new uint32_t(num))));
365   ASSERT(h.SetEndSequenceHandler(f, UpbBind(endseq, new uint32_t(num))));
366 }
367 
reg_subm(upb::HandlersPtr h,uint32_t num)368 void reg_subm(upb::HandlersPtr h, uint32_t num) {
369   upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num);
370   ASSERT(f);
371   if (f.IsSequence()) regseq(h, f, num);
372   ASSERT(
373       h.SetStartSubMessageHandler(f, UpbBind(startsubmsg, new uint32_t(num))));
374   ASSERT(h.SetEndSubMessageHandler(f, UpbBind(endsubmsg, new uint32_t(num))));
375 }
376 
reg_str(upb::HandlersPtr h,uint32_t num)377 void reg_str(upb::HandlersPtr h, uint32_t num) {
378   upb::FieldDefPtr f = h.message_def().FindFieldByNumber(num);
379   ASSERT(f);
380   if (f.IsSequence()) regseq(h, f, num);
381   ASSERT(h.SetStartStringHandler(f, UpbBind(startstr, new uint32_t(num))));
382   ASSERT(h.SetEndStringHandler(f, UpbBind(endstr, new uint32_t(num))));
383   ASSERT(h.SetStringHandler(f, UpbBind(value_string, new uint32_t(num))));
384 }
385 
386 struct HandlerRegisterData {
387   TestMode mode;
388 };
389 
callback(const void * closure,upb::Handlers * h_ptr)390 void callback(const void *closure, upb::Handlers* h_ptr) {
391   upb::HandlersPtr h(h_ptr);
392   const HandlerRegisterData* data =
393       static_cast<const HandlerRegisterData*>(closure);
394   if (data->mode == ALL_HANDLERS) {
395     h.SetStartMessageHandler(UpbMakeHandler(startmsg));
396     h.SetEndMessageHandler(UpbMakeHandler(endmsg));
397 
398     // Register handlers for each type.
399     reg<double,   value_double>(h, UPB_DESCRIPTOR_TYPE_DOUBLE);
400     reg<float,    value_float> (h, UPB_DESCRIPTOR_TYPE_FLOAT);
401     reg<int64_t,  value_int64> (h, UPB_DESCRIPTOR_TYPE_INT64);
402     reg<uint64_t, value_uint64>(h, UPB_DESCRIPTOR_TYPE_UINT64);
403     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_INT32);
404     reg<uint64_t, value_uint64>(h, UPB_DESCRIPTOR_TYPE_FIXED64);
405     reg<uint32_t, value_uint32>(h, UPB_DESCRIPTOR_TYPE_FIXED32);
406     reg<bool,     value_bool>  (h, UPB_DESCRIPTOR_TYPE_BOOL);
407     reg<uint32_t, value_uint32>(h, UPB_DESCRIPTOR_TYPE_UINT32);
408     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_ENUM);
409     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_SFIXED32);
410     reg<int64_t,  value_int64> (h, UPB_DESCRIPTOR_TYPE_SFIXED64);
411     reg<int32_t,  value_int32> (h, UPB_DESCRIPTOR_TYPE_SINT32);
412     reg<int64_t,  value_int64> (h, UPB_DESCRIPTOR_TYPE_SINT64);
413 
414     reg_str(h, UPB_DESCRIPTOR_TYPE_STRING);
415     reg_str(h, UPB_DESCRIPTOR_TYPE_BYTES);
416     reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_STRING));
417     reg_str(h, rep_fn(UPB_DESCRIPTOR_TYPE_BYTES));
418 
419     // Register submessage/group handlers that are self-recursive
420     // to this type, eg: message M { optional M m = 1; }
421     reg_subm(h, UPB_DESCRIPTOR_TYPE_MESSAGE);
422     reg_subm(h, rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE));
423 
424     if (h.message_def().full_name() == std::string("DecoderTest")) {
425       reg_subm(h, UPB_DESCRIPTOR_TYPE_GROUP);
426       reg_subm(h, rep_fn(UPB_DESCRIPTOR_TYPE_GROUP));
427     }
428 
429     // For NOP_FIELD we register no handlers, so we can pad a proto freely without
430     // changing the output.
431   }
432 }
433 
434 /* Running of test cases ******************************************************/
435 
436 const upb::Handlers *global_handlers;
437 upb::pb::DecoderMethodPtr global_method;
438 
CreateDecoder(upb::Arena * arena,upb::pb::DecoderMethodPtr method,upb::Sink sink,upb::Status * status)439 upb::pb::DecoderPtr CreateDecoder(upb::Arena* arena,
440                                   upb::pb::DecoderMethodPtr method,
441                                   upb::Sink sink, upb::Status* status) {
442   upb::pb::DecoderPtr ret =
443       upb::pb::DecoderPtr::Create(arena, method, sink, status);
444   ret.set_max_nesting(MAX_NESTING);
445   return ret;
446 }
447 
Hash(const string & proto,const string * expected_output,size_t seam1,size_t seam2,bool may_skip)448 uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
449               size_t seam2, bool may_skip) {
450   uint32_t hash = upb_murmur_hash2(proto.c_str(), proto.size(), 0);
451   if (expected_output)
452     hash = upb_murmur_hash2(expected_output->c_str(), expected_output->size(), hash);
453   hash = upb_murmur_hash2(&seam1, sizeof(seam1), hash);
454   hash = upb_murmur_hash2(&seam2, sizeof(seam2), hash);
455   hash = upb_murmur_hash2(&may_skip, sizeof(may_skip), hash);
456   return hash;
457 }
458 
CheckBytesParsed(upb::pb::DecoderPtr decoder,size_t ofs)459 void CheckBytesParsed(upb::pb::DecoderPtr decoder, size_t ofs) {
460   // We can't have parsed more data than the decoder callback is telling us it
461   // parsed.
462   ASSERT(decoder.BytesParsed() <= ofs);
463 
464   // The difference between what we've decoded and what the decoder has accepted
465   // represents the internally buffered amount.  This amount should not exceed
466   // this value which comes from decoder.int.h.
467   ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES));
468 }
469 
parse(VerboseParserEnvironment * env,upb::pb::DecoderPtr decoder,int bytes)470 static bool parse(VerboseParserEnvironment* env,
471                   upb::pb::DecoderPtr decoder, int bytes) {
472   CheckBytesParsed(decoder, env->ofs());
473   bool ret = env->ParseBuffer(bytes);
474   if (ret) {
475     CheckBytesParsed(decoder, env->ofs());
476   }
477 
478   return ret;
479 }
480 
do_run_decoder(VerboseParserEnvironment * env,upb::pb::DecoderPtr decoder,const string & proto,const string * expected_output,size_t i,size_t j,bool may_skip)481 void do_run_decoder(VerboseParserEnvironment* env, upb::pb::DecoderPtr decoder,
482                     const string& proto, const string* expected_output,
483                     size_t i, size_t j, bool may_skip) {
484   env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
485   decoder.Reset();
486 
487   testhash = Hash(proto, expected_output, i, j, may_skip);
488   if (filter_hash && testhash != filter_hash) return;
489   if (test_mode != COUNT_ONLY) {
490     output.clear();
491 
492     if (filter_hash) {
493       fprintf(stderr, "RUNNING TEST CASE, hash=%x\n", testhash);
494       fprintf(stderr, "Input (len=%u): ", (unsigned)proto.size());
495       PrintBinary(proto);
496       fprintf(stderr, "\n");
497       if (expected_output) {
498         if (test_mode == ALL_HANDLERS) {
499           fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
500         } else if (test_mode == NO_HANDLERS) {
501           fprintf(stderr,
502                   "No handlers are registered, BUT if they were "
503                   "the expected output would be: %s\n",
504                   expected_output->c_str());
505         }
506       } else {
507         fprintf(stderr, "Expected to FAIL\n");
508       }
509     }
510 
511     bool ok = env->Start() &&
512               parse(env, decoder, (int)i) &&
513               parse(env, decoder, (int)(j - i)) &&
514               parse(env, decoder, -1) &&
515               env->End();
516 
517     ASSERT(env->CheckConsistency());
518 
519     if (test_mode == ALL_HANDLERS) {
520       if (expected_output) {
521         if (output != *expected_output) {
522           fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
523                   output.c_str(), expected_output->c_str());
524         }
525         ASSERT(ok);
526         ASSERT(output == *expected_output);
527       } else {
528         if (ok) {
529           fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
530                   output.c_str());
531         }
532         ASSERT(!ok);
533       }
534     }
535   }
536   (*count)++;
537 }
538 
run_decoder(const string & proto,const string * expected_output)539 void run_decoder(const string& proto, const string* expected_output) {
540   VerboseParserEnvironment env(filter_hash != 0);
541   upb::Sink sink(global_handlers, &closures[0]);
542   upb::pb::DecoderPtr decoder = CreateDecoder(env.arena(), global_method, sink, env.status());
543   env.ResetBytesSink(decoder.input());
544   for (size_t i = 0; i < proto.size(); i++) {
545     for (size_t j = i; j < UPB_MIN(proto.size(), i + 5); j++) {
546       do_run_decoder(&env, decoder, proto, expected_output, i, j, true);
547       if (env.SkippedWithNull()) {
548         do_run_decoder(&env, decoder, proto, expected_output, i, j, false);
549       }
550     }
551   }
552   testhash = 0;
553 }
554 
555 const static string thirty_byte_nop = cat(
556     tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) );
557 
558 // Indents and wraps text as if it were a submessage with this field number
wrap_text(int32_t fn,const string & text)559 string wrap_text(int32_t fn, const string& text) {
560   string wrapped_text = text;
561   size_t pos = 0;
562   string replace_with = "\n  ";
563   while ((pos = wrapped_text.find("\n", pos)) != string::npos &&
564          pos != wrapped_text.size() - 1) {
565     wrapped_text.replace(pos, 1, replace_with);
566     pos += replace_with.size();
567   }
568   wrapped_text = cat(
569       LINE("<"),
570       num2string(fn), LINE(":{")
571       "  ", wrapped_text,
572       LINE("  }")
573       LINE(">"));
574   return wrapped_text;
575 }
576 
assert_successful_parse(const string & proto,const char * expected_fmt,...)577 void assert_successful_parse(const string& proto,
578                              const char *expected_fmt, ...) {
579   string expected_text;
580   va_list args;
581   va_start(args, expected_fmt);
582   vappendf(&expected_text, expected_fmt, args);
583   va_end(args);
584   // To test both middle-of-buffer and end-of-buffer code paths,
585   // repeat once with no-op padding data at the end of buffer.
586   run_decoder(proto, &expected_text);
587   run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
588 
589   // Test that this also works when wrapped in a submessage or group.
590   // Indent the expected text one level and wrap it.
591   string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text);
592   string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text);
593 
594   run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1);
595   run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2);
596 }
597 
assert_does_not_parse_at_eof(const string & proto)598 void assert_does_not_parse_at_eof(const string& proto) {
599   run_decoder(proto, NULL);
600 
601   // Also test that we fail to parse at end-of-submessage, not just
602   // end-of-message.  But skip this if we have no handlers, because in that
603   // case we won't descend into the submessage.
604   if (test_mode != NO_HANDLERS) {
605     run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
606     run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto),
607                     thirty_byte_nop), NULL);
608   }
609 }
610 
assert_does_not_parse(const string & proto)611 void assert_does_not_parse(const string& proto) {
612   // Test that the error is caught both at end-of-buffer and middle-of-buffer.
613   assert_does_not_parse_at_eof(proto);
614   assert_does_not_parse_at_eof(cat( proto, thirty_byte_nop ));
615 }
616 
617 
618 /* The actual tests ***********************************************************/
619 
test_premature_eof_for_type(upb_descriptortype_t type)620 void test_premature_eof_for_type(upb_descriptortype_t type) {
621   // Incomplete values for each wire type.
622   static const string incompletes[6] = {
623     string("\x80"),     // UPB_WIRE_TYPE_VARINT
624     string("abcdefg"),  // UPB_WIRE_TYPE_64BIT
625     string("\x80"),     // UPB_WIRE_TYPE_DELIMITED (partial length)
626     string(),           // UPB_WIRE_TYPE_START_GROUP (no value required)
627     string(),           // UPB_WIRE_TYPE_END_GROUP (no value required)
628     string("abc")       // UPB_WIRE_TYPE_32BIT
629   };
630 
631   uint32_t fieldnum = type;
632   uint32_t rep_fieldnum = rep_fn(type);
633   int wire_type = upb_decoder_types[type].native_wire_type;
634   const string& incomplete = incompletes[wire_type];
635 
636   // EOF before a known non-repeated value.
637   assert_does_not_parse_at_eof(tag(fieldnum, wire_type));
638 
639   // EOF before a known repeated value.
640   assert_does_not_parse_at_eof(tag(rep_fieldnum, wire_type));
641 
642   // EOF before an unknown value.
643   assert_does_not_parse_at_eof(tag(UNKNOWN_FIELD, wire_type));
644 
645   // EOF inside a known non-repeated value.
646   assert_does_not_parse_at_eof(
647       cat( tag(fieldnum, wire_type), incomplete ));
648 
649   // EOF inside a known repeated value.
650   assert_does_not_parse_at_eof(
651       cat( tag(rep_fieldnum, wire_type), incomplete ));
652 
653   // EOF inside an unknown value.
654   assert_does_not_parse_at_eof(
655       cat( tag(UNKNOWN_FIELD, wire_type), incomplete ));
656 
657   if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
658     // EOF in the middle of delimited data for known non-repeated value.
659     assert_does_not_parse_at_eof(
660         cat( tag(fieldnum, wire_type), varint(1) ));
661 
662     // EOF in the middle of delimited data for known repeated value.
663     assert_does_not_parse_at_eof(
664         cat( tag(rep_fieldnum, wire_type), varint(1) ));
665 
666     // EOF in the middle of delimited data for unknown value.
667     assert_does_not_parse_at_eof(
668         cat( tag(UNKNOWN_FIELD, wire_type), varint(1) ));
669 
670     if (type == UPB_DESCRIPTOR_TYPE_MESSAGE) {
671       // Submessage ends in the middle of a value.
672       string incomplete_submsg =
673           cat ( tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
674                 incompletes[UPB_WIRE_TYPE_VARINT] );
675       assert_does_not_parse(
676           cat( tag(fieldnum, UPB_WIRE_TYPE_DELIMITED),
677                varint(incomplete_submsg.size()),
678                incomplete_submsg ));
679     }
680   } else {
681     // Packed region ends in the middle of a value.
682     assert_does_not_parse(
683         cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
684              varint(incomplete.size()),
685              incomplete ));
686 
687     // EOF in the middle of packed region.
688     assert_does_not_parse_at_eof(
689         cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED), varint(1) ));
690   }
691 }
692 
693 // "33" and "66" are just two random values that all numeric types can
694 // represent.
test_valid_data_for_type(upb_descriptortype_t type,const string & enc33,const string & enc66)695 void test_valid_data_for_type(upb_descriptortype_t type,
696                               const string& enc33, const string& enc66) {
697   uint32_t fieldnum = type;
698   uint32_t rep_fieldnum = rep_fn(type);
699   int wire_type = upb_decoder_types[type].native_wire_type;
700 
701   // Non-repeated
702   assert_successful_parse(
703       cat( tag(fieldnum, wire_type), enc33,
704            tag(fieldnum, wire_type), enc66 ),
705       LINE("<")
706       LINE("%u:33")
707       LINE("%u:66")
708       LINE(">"), fieldnum, fieldnum);
709 
710   // Non-packed repeated.
711   assert_successful_parse(
712       cat( tag(rep_fieldnum, wire_type), enc33,
713            tag(rep_fieldnum, wire_type), enc66 ),
714       LINE("<")
715       LINE("%u:[")
716       LINE("  %u:33")
717       LINE("  %u:66")
718       LINE("]")
719       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
720 
721   // Packed repeated.
722   assert_successful_parse(
723       cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
724            delim(cat( enc33, enc66 )) ),
725       LINE("<")
726       LINE("%u:[")
727       LINE("  %u:33")
728       LINE("  %u:66")
729       LINE("]")
730       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
731 }
732 
test_valid_data_for_signed_type(upb_descriptortype_t type,const string & enc33,const string & enc66)733 void test_valid_data_for_signed_type(upb_descriptortype_t type,
734                                      const string& enc33, const string& enc66) {
735   uint32_t fieldnum = type;
736   uint32_t rep_fieldnum = rep_fn(type);
737   int wire_type = upb_decoder_types[type].native_wire_type;
738 
739   // Non-repeated
740   assert_successful_parse(
741       cat( tag(fieldnum, wire_type), enc33,
742            tag(fieldnum, wire_type), enc66 ),
743       LINE("<")
744       LINE("%u:33")
745       LINE("%u:-66")
746       LINE(">"), fieldnum, fieldnum);
747 
748   // Non-packed repeated.
749   assert_successful_parse(
750       cat( tag(rep_fieldnum, wire_type), enc33,
751            tag(rep_fieldnum, wire_type), enc66 ),
752       LINE("<")
753       LINE("%u:[")
754       LINE("  %u:33")
755       LINE("  %u:-66")
756       LINE("]")
757       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
758 
759   // Packed repeated.
760   assert_successful_parse(
761       cat( tag(rep_fieldnum, UPB_WIRE_TYPE_DELIMITED),
762            delim(cat( enc33, enc66 )) ),
763       LINE("<")
764       LINE("%u:[")
765       LINE("  %u:33")
766       LINE("  %u:-66")
767       LINE("]")
768       LINE(">"), rep_fieldnum, rep_fieldnum, rep_fieldnum);
769 }
770 
771 // Test that invalid protobufs are properly detected (without crashing) and
772 // have an error reported.  Field numbers match registered handlers above.
test_invalid()773 void test_invalid() {
774   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_DOUBLE);
775   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FLOAT);
776   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT64);
777   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT64);
778   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_INT32);
779   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED64);
780   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_FIXED32);
781   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BOOL);
782   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_STRING);
783   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_BYTES);
784   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_UINT32);
785   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_ENUM);
786   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED32);
787   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SFIXED64);
788   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT32);
789   test_premature_eof_for_type(UPB_DESCRIPTOR_TYPE_SINT64);
790 
791   // EOF inside a tag's varint.
792   assert_does_not_parse_at_eof( string("\x80") );
793 
794   // EOF inside a known group.
795   // TODO(haberman): add group to decoder test schema.
796   //assert_does_not_parse_at_eof( tag(4, UPB_WIRE_TYPE_START_GROUP) );
797 
798   // EOF inside an unknown group.
799   assert_does_not_parse_at_eof( tag(UNKNOWN_FIELD, UPB_WIRE_TYPE_START_GROUP) );
800 
801   // End group that we are not currently in.
802   assert_does_not_parse( tag(4, UPB_WIRE_TYPE_END_GROUP) );
803 
804   // Field number is 0.
805   assert_does_not_parse(
806       cat( tag(0, UPB_WIRE_TYPE_DELIMITED), varint(0) ));
807   // The previous test alone did not catch this particular pattern which could
808   // corrupt the internal state.
809   assert_does_not_parse(
810       cat( tag(0, UPB_WIRE_TYPE_64BIT), uint64(0) ));
811 
812   // Field number is too large.
813   assert_does_not_parse(
814       cat( tag(UPB_MAX_FIELDNUMBER + 1, UPB_WIRE_TYPE_DELIMITED),
815            varint(0) ));
816 
817   // Known group inside a submessage has ENDGROUP tag AFTER submessage end.
818   assert_does_not_parse(
819       cat ( submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
820                    tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_START_GROUP)),
821             tag(UPB_DESCRIPTOR_TYPE_GROUP, UPB_WIRE_TYPE_END_GROUP)));
822 
823   // Unknown string extends past enclosing submessage.
824   assert_does_not_parse(
825       cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
826                          submsg(12345, string("   "))),
827            submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string("     "))));
828 
829   // Unknown fixed-length field extends past enclosing submessage.
830   assert_does_not_parse(
831       cat (badlen_submsg(-1, UPB_DESCRIPTOR_TYPE_MESSAGE,
832                          cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(0))),
833            submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string("     "))));
834 
835   // Test exceeding the resource limit of stack depth.
836   if (test_mode != NO_HANDLERS) {
837     string buf;
838     for (int i = 0; i <= MAX_NESTING; i++) {
839       buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
840     }
841     assert_does_not_parse(buf);
842   }
843 }
844 
test_valid()845 void test_valid() {
846   // Empty protobuf.
847   assert_successful_parse(string(""), "<\n>\n");
848 
849   // Empty protobuf where we never call PutString between
850   // StartString/EndString.
851 
852   // Randomly generated hash for this test, hope it doesn't conflict with others
853   // by chance.
854   const uint32_t emptyhash = 0x5709be8e;
855   if (!filter_hash || filter_hash == testhash) {
856     testhash = emptyhash;
857     upb::Status status;
858     upb::Arena arena;
859     upb::Sink sink(global_handlers, &closures[0]);
860     upb::pb::DecoderPtr decoder =
861         CreateDecoder(&arena, global_method, sink, &status);
862     output.clear();
863     bool ok = upb::PutBuffer(std::string(), decoder.input());
864     ASSERT(ok);
865     ASSERT(status.ok());
866     if (test_mode == ALL_HANDLERS) {
867       ASSERT(output == string("<\n>\n"));
868     }
869   }
870 
871   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_DOUBLE,
872                                   dbl(33),
873                                   dbl(-66));
874   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_FLOAT, flt(33), flt(-66));
875   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT64,
876                                   varint(33),
877                                   varint(-66));
878   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_INT32,
879                                   varint(33),
880                                   varint(-66));
881   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_ENUM,
882                                   varint(33),
883                                   varint(-66));
884   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED32,
885                                   uint32(33),
886                                   uint32(-66));
887   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SFIXED64,
888                                   uint64(33),
889                                   uint64(-66));
890   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT32,
891                                   zz32(33),
892                                   zz32(-66));
893   test_valid_data_for_signed_type(UPB_DESCRIPTOR_TYPE_SINT64,
894                                   zz64(33),
895                                   zz64(-66));
896 
897   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT64, varint(33), varint(66));
898   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_UINT32, varint(33), varint(66));
899   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED64, uint64(33), uint64(66));
900   test_valid_data_for_type(UPB_DESCRIPTOR_TYPE_FIXED32, uint32(33), uint32(66));
901 
902   // Unknown fields.
903   int int32_type = UPB_DESCRIPTOR_TYPE_INT32;
904   int msg_type = UPB_DESCRIPTOR_TYPE_MESSAGE;
905   assert_successful_parse(
906       cat( tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ),
907       "<\n>\n");
908   assert_successful_parse(
909       cat( tag(12345, UPB_WIRE_TYPE_32BIT), uint32(2345678) ),
910       "<\n>\n");
911   assert_successful_parse(
912       cat( tag(12345, UPB_WIRE_TYPE_64BIT), uint64(2345678) ),
913       "<\n>\n");
914   assert_successful_parse(
915       submsg(12345, string("                ")),
916       "<\n>\n");
917 
918   // Unknown field inside a known submessage.
919   assert_successful_parse(
920       submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string("   "))),
921       LINE("<")
922       LINE("%u:{")
923       LINE("  <")
924       LINE("  >")
925       LINE("  }")
926       LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE);
927 
928   assert_successful_parse(
929       cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string("   "))),
930            tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
931            varint(5)),
932       LINE("<")
933       LINE("%u:{")
934       LINE("  <")
935       LINE("  >")
936       LINE("  }")
937       LINE("%u:5")
938       LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE, UPB_DESCRIPTOR_TYPE_INT32);
939 
940   // This triggered a previous bug in the decoder.
941   assert_successful_parse(
942       cat( tag(UPB_DESCRIPTOR_TYPE_SFIXED32, UPB_WIRE_TYPE_VARINT),
943            varint(0) ),
944       "<\n>\n");
945 
946   assert_successful_parse(
947       cat(
948         submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
949           submsg(UPB_DESCRIPTOR_TYPE_MESSAGE,
950             cat( tag(int32_type, UPB_WIRE_TYPE_VARINT), varint(2345678),
951                  tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ))),
952         tag(int32_type, UPB_WIRE_TYPE_VARINT), varint(22222)),
953       LINE("<")
954       LINE("%u:{")
955       LINE("  <")
956       LINE("  %u:{")
957       LINE("    <")
958       LINE("    %u:2345678")
959       LINE("    >")
960       LINE("    }")
961       LINE("  >")
962       LINE("  }")
963       LINE("%u:22222")
964       LINE(">"), msg_type, msg_type, int32_type, int32_type);
965 
966   assert_successful_parse(
967       cat( tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), varint(1),
968            tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678) ),
969       LINE("<")
970       LINE("%u:1")
971       LINE(">"), UPB_DESCRIPTOR_TYPE_INT32);
972 
973   // String inside submsg.
974   uint32_t msg_fn = UPB_DESCRIPTOR_TYPE_MESSAGE;
975   assert_successful_parse(
976       submsg(msg_fn,
977              cat ( tag(UPB_DESCRIPTOR_TYPE_STRING, UPB_WIRE_TYPE_DELIMITED),
978                    delim(string("abcde"))
979                  )
980              ),
981       LINE("<")
982       LINE("%u:{")
983       LINE("  <")
984       LINE("  %u:(5)\"abcde")
985       LINE("    %u:\"")
986       LINE("  >")
987       LINE("  }")
988       LINE(">"), msg_fn, UPB_DESCRIPTOR_TYPE_STRING,
989                  UPB_DESCRIPTOR_TYPE_STRING);
990 
991   // Test implicit startseq/endseq.
992   uint32_t repfl_fn = rep_fn(UPB_DESCRIPTOR_TYPE_FLOAT);
993   uint32_t repdb_fn = rep_fn(UPB_DESCRIPTOR_TYPE_DOUBLE);
994   assert_successful_parse(
995       cat( tag(repfl_fn, UPB_WIRE_TYPE_32BIT), flt(33),
996            tag(repdb_fn, UPB_WIRE_TYPE_64BIT), dbl(66) ),
997       LINE("<")
998       LINE("%u:[")
999       LINE("  %u:33")
1000       LINE("]")
1001       LINE("%u:[")
1002       LINE("  %u:66")
1003       LINE("]")
1004       LINE(">"), repfl_fn, repfl_fn, repdb_fn, repdb_fn);
1005 
1006   // Submessage tests.
1007   assert_successful_parse(
1008       submsg(msg_fn, submsg(msg_fn, submsg(msg_fn, string()))),
1009       LINE("<")
1010       LINE("%u:{")
1011       LINE("  <")
1012       LINE("  %u:{")
1013       LINE("    <")
1014       LINE("    %u:{")
1015       LINE("      <")
1016       LINE("      >")
1017       LINE("      }")
1018       LINE("    >")
1019       LINE("    }")
1020       LINE("  >")
1021       LINE("  }")
1022       LINE(">"), msg_fn, msg_fn, msg_fn);
1023 
1024   uint32_t repm_fn = rep_fn(UPB_DESCRIPTOR_TYPE_MESSAGE);
1025   assert_successful_parse(
1026       submsg(repm_fn, submsg(repm_fn, string())),
1027       LINE("<")
1028       LINE("%u:[")
1029       LINE("  %u:{")
1030       LINE("    <")
1031       LINE("    %u:[")
1032       LINE("      %u:{")
1033       LINE("        <")
1034       LINE("        >")
1035       LINE("        }")
1036       LINE("    ]")
1037       LINE("    >")
1038       LINE("    }")
1039       LINE("]")
1040       LINE(">"), repm_fn, repm_fn, repm_fn, repm_fn);
1041 
1042   // Test unknown group.
1043   uint32_t unknown_group_fn = 12321;
1044   assert_successful_parse(
1045       cat( tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1046            tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP) ),
1047       LINE("<")
1048       LINE(">")
1049   );
1050 
1051   // Test some unknown fields inside an unknown group.
1052   const string unknown_group_with_data =
1053       cat(
1054           tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1055           tag(12345, UPB_WIRE_TYPE_VARINT), varint(2345678),
1056           tag(123456789, UPB_WIRE_TYPE_32BIT), uint32(2345678),
1057           tag(123477, UPB_WIRE_TYPE_64BIT), uint64(2345678),
1058           tag(123, UPB_WIRE_TYPE_DELIMITED), varint(0),
1059           tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP)
1060          );
1061 
1062   // Nested unknown group with data.
1063   assert_successful_parse(
1064       cat(
1065            tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1066            unknown_group_with_data,
1067            tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP),
1068            tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), varint(1)
1069          ),
1070       LINE("<")
1071       LINE("%u:1")
1072       LINE(">"),
1073       UPB_DESCRIPTOR_TYPE_INT32
1074   );
1075 
1076   assert_successful_parse(
1077       cat( tag(unknown_group_fn, UPB_WIRE_TYPE_START_GROUP),
1078            tag(unknown_group_fn + 1, UPB_WIRE_TYPE_START_GROUP),
1079            tag(unknown_group_fn + 1, UPB_WIRE_TYPE_END_GROUP),
1080            tag(unknown_group_fn, UPB_WIRE_TYPE_END_GROUP) ),
1081       LINE("<")
1082       LINE(">")
1083   );
1084 
1085   // Staying within the stack limit should work properly.
1086   string buf;
1087   string textbuf;
1088   int total = MAX_NESTING - 1;
1089   for (int i = 0; i < total; i++) {
1090     buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
1091     indentbuf(&textbuf, i);
1092     textbuf.append("<\n");
1093     indentbuf(&textbuf, i);
1094     appendf(&textbuf, "%u:{\n", UPB_DESCRIPTOR_TYPE_MESSAGE);
1095   }
1096   indentbuf(&textbuf, total);
1097   textbuf.append("<\n");
1098   indentbuf(&textbuf, total);
1099   textbuf.append(">\n");
1100   for (int i = 0; i < total; i++) {
1101     indentbuf(&textbuf, total - i - 1);
1102     textbuf.append("  }\n");
1103     indentbuf(&textbuf, total - i - 1);
1104     textbuf.append(">\n");
1105   }
1106   // Have to use run_decoder directly, because we are at max nesting and can't
1107   // afford the extra nesting that assert_successful_parse() will do.
1108   run_decoder(buf, &textbuf);
1109 }
1110 
empty_callback(const void *,upb::Handlers *)1111 void empty_callback(const void* /* closure */, upb::Handlers* /* h_ptr */) {}
1112 
test_emptyhandlers(upb::SymbolTable * symtab)1113 void test_emptyhandlers(upb::SymbolTable* symtab) {
1114   // Create an empty handlers to make sure that the decoder can handle empty
1115   // messages.
1116   HandlerRegisterData handlerdata;
1117   handlerdata.mode = test_mode;
1118 
1119   upb::HandlerCache handler_cache(empty_callback, &handlerdata);
1120   upb::pb::CodeCache pb_code_cache(&handler_cache);
1121 
1122   upb::MessageDefPtr md = upb::MessageDefPtr(Empty_getmsgdef(symtab->ptr()));
1123   global_handlers = handler_cache.Get(md);
1124   global_method = pb_code_cache.Get(md);
1125 
1126   // TODO: also test the case where a message has fields, but the fields are
1127   // submessage fields and have no handlers. This also results in a decoder
1128   // method with no field-handling code.
1129 
1130   // Ensure that the method can run with empty and non-empty input.
1131   string test_unknown_field_msg =
1132     cat(tag(1, UPB_WIRE_TYPE_VARINT), varint(42),
1133         tag(2, UPB_WIRE_TYPE_DELIMITED), delim("My test data"));
1134   const struct {
1135     const char* data;
1136     size_t length;
1137   } testdata[] = {
1138     { "", 0 },
1139     { test_unknown_field_msg.data(), test_unknown_field_msg.size() },
1140     { NULL, 0 },
1141   };
1142   for (int i = 0; testdata[i].data; i++) {
1143     VerboseParserEnvironment env(filter_hash != 0);
1144     upb::Sink sink(global_method.dest_handlers(), &closures[0]);
1145     upb::pb::DecoderPtr decoder =
1146         CreateDecoder(env.arena(), global_method, sink, env.status());
1147     env.ResetBytesSink(decoder.input());
1148     env.Reset(testdata[i].data, testdata[i].length, true, false);
1149     ASSERT(env.Start());
1150     ASSERT(env.ParseBuffer(-1));
1151     ASSERT(env.End());
1152     ASSERT(env.CheckConsistency());
1153   }
1154 }
1155 
run_tests()1156 void run_tests() {
1157   HandlerRegisterData handlerdata;
1158   handlerdata.mode = test_mode;
1159 
1160   upb::SymbolTable symtab;
1161   upb::HandlerCache handler_cache(callback, &handlerdata);
1162   upb::pb::CodeCache pb_code_cache(&handler_cache);
1163 
1164   upb::MessageDefPtr md(DecoderTest_getmsgdef(symtab.ptr()));
1165   global_handlers = handler_cache.Get(md);
1166   global_method = pb_code_cache.Get(md);
1167   completed = 0;
1168 
1169   test_invalid();
1170   test_valid();
1171 
1172   test_emptyhandlers(&symtab);
1173 }
1174 
1175 extern "C" {
1176 
run_tests(int argc,char * argv[])1177 int run_tests(int argc, char *argv[]) {
1178   if (argc > 1)
1179     filter_hash = (uint32_t)strtol(argv[1], NULL, 16);
1180   for (int i = 0; i < MAX_NESTING; i++) {
1181     closures[i] = i;
1182   }
1183 
1184   // Count tests.
1185   count = &total;
1186   total = 0;
1187   test_mode = COUNT_ONLY;
1188   run_tests();
1189   count = &completed;
1190 
1191   total *= 2;  // NO_HANDLERS, ALL_HANDLERS.
1192 
1193   test_mode = NO_HANDLERS;
1194   run_tests();
1195 
1196   test_mode = ALL_HANDLERS;
1197   run_tests();
1198 
1199   printf("All tests passed, %d assertions.\n", num_assertions);
1200   return 0;
1201 }
1202 
1203 }
1204