• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "binary_annotator.h"
2 
3 #include <algorithm>
4 #include <cstdint>
5 #include <iostream>
6 #include <limits>
7 #include <string>
8 #include <vector>
9 
10 #include "flatbuffers/base.h"
11 #include "flatbuffers/reflection.h"
12 #include "flatbuffers/util.h"
13 #include "flatbuffers/verifier.h"
14 
15 namespace flatbuffers {
16 namespace {
17 
BinaryRegionSort(const BinaryRegion & a,const BinaryRegion & b)18 static bool BinaryRegionSort(const BinaryRegion &a, const BinaryRegion &b) {
19   return a.offset < b.offset;
20 }
21 
SetError(BinaryRegionComment & comment,BinaryRegionStatus status,std::string message="")22 static void SetError(BinaryRegionComment &comment, BinaryRegionStatus status,
23                      std::string message = "") {
24   comment.status = status;
25   comment.status_message = message;
26 }
27 
MakeBinaryRegion(const uint64_t offset=0,const uint64_t length=0,const BinaryRegionType type=BinaryRegionType::Unknown,const uint64_t array_length=0,const uint64_t points_to_offset=0,BinaryRegionComment comment={})28 static BinaryRegion MakeBinaryRegion(
29     const uint64_t offset = 0, const uint64_t length = 0,
30     const BinaryRegionType type = BinaryRegionType::Unknown,
31     const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
32     BinaryRegionComment comment = {}) {
33   BinaryRegion region;
34   region.offset = offset;
35   region.length = length;
36   region.type = type;
37   region.array_length = array_length;
38   region.points_to_offset = points_to_offset;
39   region.comment = std::move(comment);
40   return region;
41 }
42 
MakeBinarySection(const std::string & name,const BinarySectionType type,std::vector<BinaryRegion> regions)43 static BinarySection MakeBinarySection(const std::string &name,
44                                        const BinarySectionType type,
45                                        std::vector<BinaryRegion> regions) {
46   BinarySection section;
47   section.name = name;
48   section.type = type;
49   section.regions = std::move(regions);
50   return section;
51 }
52 
MakeSingleRegionBinarySection(const std::string & name,const BinarySectionType type,const BinaryRegion & region)53 static BinarySection MakeSingleRegionBinarySection(const std::string &name,
54                                                    const BinarySectionType type,
55                                                    const BinaryRegion &region) {
56   std::vector<BinaryRegion> regions;
57   regions.push_back(region);
58   return MakeBinarySection(name, type, std::move(regions));
59 }
60 
IsNonZeroRegion(const uint64_t offset,const uint64_t length,const uint8_t * const binary)61 static bool IsNonZeroRegion(const uint64_t offset, const uint64_t length,
62                             const uint8_t *const binary) {
63   for (uint64_t i = offset; i < offset + length; ++i) {
64     if (binary[i] != 0) { return true; }
65   }
66   return false;
67 }
68 
IsPrintableRegion(const uint64_t offset,const uint64_t length,const uint8_t * const binary)69 static bool IsPrintableRegion(const uint64_t offset, const uint64_t length,
70                               const uint8_t *const binary) {
71   for (uint64_t i = offset; i < offset + length; ++i) {
72     if (!isprint(binary[i])) { return false; }
73   }
74   return true;
75 }
76 
GenerateMissingSection(const uint64_t offset,const uint64_t length,const uint8_t * const binary)77 static BinarySection GenerateMissingSection(const uint64_t offset,
78                                             const uint64_t length,
79                                             const uint8_t *const binary) {
80   std::vector<BinaryRegion> regions;
81 
82   // Check if the region is all zeros or not, as that can tell us if it is
83   // padding or not.
84   if (IsNonZeroRegion(offset, length, binary)) {
85     // Some of the padding bytes are non-zero, so this might be an unknown
86     // section of the binary.
87     // TODO(dbaileychess): We could be a bit smarter with different sized
88     // alignments. For now, the 8 byte check encompasses all the smaller
89     // alignments.
90     BinaryRegionComment comment;
91     comment.type = BinaryRegionCommentType::Unknown;
92     if (length >= 8) {
93       SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
94     } else {
95       SetError(comment, BinaryRegionStatus::WARN_CORRUPTED_PADDING);
96     }
97 
98     regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
99                                        BinaryRegionType::Unknown, length, 0,
100                                        comment));
101 
102     return MakeBinarySection("no known references", BinarySectionType::Unknown,
103                              std::move(regions));
104   }
105 
106   BinaryRegionComment comment;
107   comment.type = BinaryRegionCommentType::Padding;
108   if (length >= 8) {
109     SetError(comment, BinaryRegionStatus::WARN_PADDING_LENGTH);
110   }
111 
112   // This region is most likely padding.
113   regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
114                                      BinaryRegionType::Uint8, length, 0,
115                                      comment));
116 
117   return MakeBinarySection("", BinarySectionType::Padding, std::move(regions));
118 }
119 
120 }  // namespace
121 
Annotate()122 std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
123   if (bfbs_ != nullptr && bfbs_length_ != 0) {
124     flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
125     if ((is_size_prefixed_ &&
126          !reflection::VerifySizePrefixedSchemaBuffer(verifier)) ||
127         !reflection::VerifySchemaBuffer(verifier)) {
128       return {};
129     }
130   }
131 
132   // The binary is too short to read as a flatbuffers.
133   if (binary_length_ < FLATBUFFERS_MIN_BUFFER_SIZE) { return {}; }
134 
135   // Make sure we start with a clean slate.
136   vtables_.clear();
137   sections_.clear();
138 
139   // First parse the header region which always start at offset 0.
140   // The returned offset will point to the root_table location.
141   const uint64_t root_table_offset = BuildHeader(0);
142 
143   if (IsValidOffset(root_table_offset)) {
144     // Build the root table, and all else will be referenced from it.
145     BuildTable(root_table_offset, BinarySectionType::RootTable, RootTable());
146   }
147 
148   // Now that all the sections are built, make sure the binary sections are
149   // contiguous.
150   FixMissingRegions();
151 
152   // Then scan the area between BinarySections insert padding sections that are
153   // implied.
154   FixMissingSections();
155 
156   return sections_;
157 }
158 
BuildHeader(const uint64_t header_offset)159 uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
160   uint64_t offset = header_offset;
161   std::vector<BinaryRegion> regions;
162 
163   // If this binary is a size prefixed one, attempt to parse the size.
164   if (is_size_prefixed_) {
165     BinaryRegionComment prefix_length_comment;
166     prefix_length_comment.type = BinaryRegionCommentType::SizePrefix;
167 
168     bool has_prefix_value = false;
169     const auto prefix_length = ReadScalar<uoffset64_t>(offset);
170     if (*prefix_length <= binary_length_) {
171       regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset64_t),
172                                          BinaryRegionType::Uint64, 0, 0,
173                                          prefix_length_comment));
174       offset += sizeof(uoffset64_t);
175       has_prefix_value = true;
176     }
177 
178     if (!has_prefix_value) {
179       const auto prefix_length = ReadScalar<uoffset_t>(offset);
180       if (*prefix_length <= binary_length_) {
181         regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset_t),
182                                            BinaryRegionType::Uint32, 0, 0,
183                                            prefix_length_comment));
184         offset += sizeof(uoffset_t);
185         has_prefix_value = true;
186       }
187     }
188 
189     if (!has_prefix_value) {
190       SetError(prefix_length_comment, BinaryRegionStatus::ERROR);
191     }
192   }
193 
194   const auto root_table_offset = ReadScalar<uint32_t>(offset);
195 
196   if (!root_table_offset.has_value()) {
197     // This shouldn't occur, since we validate the min size of the buffer
198     // before. But for completion sake, we shouldn't read passed the binary end.
199     return std::numeric_limits<uint64_t>::max();
200   }
201 
202   const auto root_table_loc = offset + *root_table_offset;
203 
204   BinaryRegionComment root_offset_comment;
205   root_offset_comment.type = BinaryRegionCommentType::RootTableOffset;
206   root_offset_comment.name = RootTable()->name()->str();
207 
208   if (!IsValidOffset(root_table_loc)) {
209     SetError(root_offset_comment,
210              BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
211   }
212 
213   regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
214                                      BinaryRegionType::UOffset, 0,
215                                      root_table_loc, root_offset_comment));
216   offset += sizeof(uint32_t);
217 
218   if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) &&
219       IsPrintableRegion(offset, flatbuffers::kFileIdentifierLength, binary_)) {
220     BinaryRegionComment comment;
221     comment.type = BinaryRegionCommentType::FileIdentifier;
222     // Check if the file identifier region has non-zero data, and assume its
223     // the file identifier. Otherwise, it will get filled in with padding
224     // later.
225     regions.push_back(MakeBinaryRegion(
226         offset, flatbuffers::kFileIdentifierLength * sizeof(uint8_t),
227         BinaryRegionType::Char, flatbuffers::kFileIdentifierLength, 0,
228         comment));
229   }
230 
231   AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header,
232                                               std::move(regions)));
233 
234   return root_table_loc;
235 }
236 
GetOrBuildVTable(const uint64_t vtable_offset,const reflection::Object * const table,const uint64_t offset_of_referring_table)237 BinaryAnnotator::VTable *BinaryAnnotator::GetOrBuildVTable(
238     const uint64_t vtable_offset, const reflection::Object *const table,
239     const uint64_t offset_of_referring_table) {
240   // Get a list of vtables (if any) already defined at this offset.
241   std::list<VTable> &vtables = vtables_[vtable_offset];
242 
243   // See if this vtable for the table type has been generated before.
244   for (VTable &vtable : vtables) {
245     if (vtable.referring_table == table) { return &vtable; }
246   }
247 
248   // If we are trying to make a new vtable and it is already encompassed by
249   // another binary section, something is corrupted.
250   if (vtables.empty() && ContainsSection(vtable_offset)) { return nullptr; }
251 
252   const std::string referring_table_name = table->name()->str();
253 
254   BinaryRegionComment vtable_size_comment;
255   vtable_size_comment.type = BinaryRegionCommentType::VTableSize;
256 
257   const auto vtable_length = ReadScalar<uint16_t>(vtable_offset);
258   if (!vtable_length.has_value()) {
259     const uint64_t remaining = RemainingBytes(vtable_offset);
260 
261     SetError(vtable_size_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
262              "2");
263 
264     AddSection(vtable_offset,
265                MakeSingleRegionBinarySection(
266                    referring_table_name, BinarySectionType::VTable,
267                    MakeBinaryRegion(vtable_offset, remaining,
268                                     BinaryRegionType::Unknown, remaining, 0,
269                                     vtable_size_comment)));
270     return nullptr;
271   }
272 
273   // Vtables start with the size of the vtable
274   const uint16_t vtable_size = vtable_length.value();
275 
276   if (!IsValidOffset(vtable_offset + vtable_size - 1)) {
277     SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
278     // The vtable_size points to off the end of the binary.
279     AddSection(vtable_offset,
280                MakeSingleRegionBinarySection(
281                    referring_table_name, BinarySectionType::VTable,
282                    MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
283                                     BinaryRegionType::Uint16, 0, 0,
284                                     vtable_size_comment)));
285 
286     return nullptr;
287   } else if (vtable_size < 2 * sizeof(uint16_t)) {
288     SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
289              "4");
290     // The size includes itself and the table size which are both uint16_t.
291     AddSection(vtable_offset,
292                MakeSingleRegionBinarySection(
293                    referring_table_name, BinarySectionType::VTable,
294                    MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
295                                     BinaryRegionType::Uint16, 0, 0,
296                                     vtable_size_comment)));
297     return nullptr;
298   }
299 
300   std::vector<BinaryRegion> regions;
301 
302   regions.push_back(MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
303                                      BinaryRegionType::Uint16, 0, 0,
304                                      vtable_size_comment));
305   uint64_t offset = vtable_offset + sizeof(uint16_t);
306 
307   BinaryRegionComment ref_table_len_comment;
308   ref_table_len_comment.type =
309       BinaryRegionCommentType::VTableRefferingTableLength;
310 
311   // Ensure we can read the next uint16_t field, which is the size of the
312   // referring table.
313   const auto table_length = ReadScalar<uint16_t>(offset);
314 
315   if (!table_length.has_value()) {
316     const uint64_t remaining = RemainingBytes(offset);
317     SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
318              "2");
319 
320     AddSection(offset, MakeSingleRegionBinarySection(
321                            referring_table_name, BinarySectionType::VTable,
322                            MakeBinaryRegion(
323                                offset, remaining, BinaryRegionType::Unknown,
324                                remaining, 0, ref_table_len_comment)));
325     return nullptr;
326   }
327 
328   // Then they have the size of the table they reference.
329   const uint16_t table_size = table_length.value();
330 
331   if (!IsValidOffset(offset_of_referring_table + table_size - 1)) {
332     SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
333   } else if (table_size < 4) {
334     SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
335              "4");
336   }
337 
338   regions.push_back(MakeBinaryRegion(offset, sizeof(uint16_t),
339                                      BinaryRegionType::Uint16, 0, 0,
340                                      ref_table_len_comment));
341   offset += sizeof(uint16_t);
342 
343   const uint64_t offset_start = offset;
344 
345   // A mapping between field (and its id) to the relative offset (uin16_t) from
346   // the start of the table.
347   std::map<uint16_t, VTable::Entry> fields;
348 
349   // Counter for determining if the binary has more vtable entries than the
350   // schema provided. This can occur if the binary was created at a newer schema
351   // version and is being processed with an older one.
352   uint16_t fields_processed = 0;
353 
354   // Loop over all the fields.
355   ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
356     const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
357 
358     if (field_offset >= vtable_offset + vtable_size) {
359       // This field_offset is too large for this vtable, so it must come from a
360       // newer schema than the binary was create with or the binary writer did
361       // not write it. For either case, it is safe to ignore.
362 
363       // TODO(dbaileychess): We could show which fields are not set an their
364       // default values if we want. We just need a way to make it obvious that
365       // it isn't part of the buffer.
366       return;
367     }
368 
369     BinaryRegionComment field_comment;
370     field_comment.type = BinaryRegionCommentType::VTableFieldOffset;
371     field_comment.name = std::string(field->name()->c_str()) +
372                          "` (id: " + std::to_string(field->id()) + ")";
373 
374     const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
375 
376     if (!offset_from_table.has_value()) {
377       const uint64_t remaining = RemainingBytes(field_offset);
378 
379       SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
380       regions.push_back(MakeBinaryRegion(field_offset, remaining,
381                                          BinaryRegionType::Unknown, remaining,
382                                          0, field_comment));
383 
384       return;
385     }
386 
387     if (!IsValidOffset(offset_of_referring_table + offset_from_table.value() -
388                        1)) {
389       SetError(field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
390       regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
391                                          BinaryRegionType::VOffset, 0, 0,
392                                          field_comment));
393       return;
394     }
395 
396     VTable::Entry entry;
397     entry.field = field;
398     entry.offset_from_table = offset_from_table.value();
399     fields.insert(std::make_pair(field->id(), entry));
400 
401     std::string default_label;
402     if (offset_from_table.value() == 0) {
403       // Not present, so could be default or be optional.
404       if (field->required()) {
405         SetError(field_comment,
406                  BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT);
407         // If this is a required field, make it known this is an error.
408         regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
409                                            BinaryRegionType::VOffset, 0, 0,
410                                            field_comment));
411         return;
412       } else {
413         // Its an optional field, so get the default value and interpret and
414         // provided an annotation for it.
415         if (IsScalar(field->type()->base_type())) {
416           default_label += "<defaults to ";
417           default_label += IsFloat(field->type()->base_type())
418                                ? std::to_string(field->default_real())
419                                : std::to_string(field->default_integer());
420           default_label += "> (";
421         } else {
422           default_label += "<null> (";
423         }
424         default_label +=
425             reflection::EnumNameBaseType(field->type()->base_type());
426         default_label += ")";
427       }
428     }
429     field_comment.default_value = default_label;
430 
431     regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
432                                        BinaryRegionType::VOffset, 0, 0,
433                                        field_comment));
434 
435     fields_processed++;
436   });
437 
438   // Check if we covered all the expectant fields. If not, we need to add them
439   // as unknown fields.
440   uint16_t expectant_vtable_fields =
441       (vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
442 
443   // Prevent a bad binary from declaring a really large vtable_size, that we can
444   // not independently verify.
445   expectant_vtable_fields = std::min(
446       static_cast<uint16_t>(fields_processed * 3), expectant_vtable_fields);
447 
448   for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
449     const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
450 
451     const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
452 
453     BinaryRegionComment field_comment;
454     field_comment.type = BinaryRegionCommentType::VTableUnknownFieldOffset;
455     field_comment.index = id;
456 
457     if (!offset_from_table.has_value()) {
458       const uint64_t remaining = RemainingBytes(field_offset);
459       SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
460       regions.push_back(MakeBinaryRegion(field_offset, remaining,
461                                          BinaryRegionType::Unknown, remaining,
462                                          0, field_comment));
463       continue;
464     }
465 
466     VTable::Entry entry;
467     entry.field = nullptr;  // No field to reference.
468     entry.offset_from_table = offset_from_table.value();
469     fields.insert(std::make_pair(id, entry));
470 
471     regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
472                                        BinaryRegionType::VOffset, 0, 0,
473                                        field_comment));
474   }
475 
476   // If we have never added this vtable before record the Binary section.
477   if (vtables.empty()) {
478     sections_[vtable_offset] = MakeBinarySection(
479         referring_table_name, BinarySectionType::VTable, std::move(regions));
480   } else {
481     // Add the current table name to the name of the section.
482     sections_[vtable_offset].name += ", " + referring_table_name;
483   }
484 
485   VTable vtable;
486   vtable.referring_table = table;
487   vtable.fields = std::move(fields);
488   vtable.table_size = table_size;
489   vtable.vtable_size = vtable_size;
490 
491   // Add this vtable to the collection of vtables at this offset.
492   vtables.push_back(std::move(vtable));
493 
494   // Return the vtable we just added.
495   return &vtables.back();
496 }
497 
BuildTable(const uint64_t table_offset,const BinarySectionType type,const reflection::Object * const table)498 void BinaryAnnotator::BuildTable(const uint64_t table_offset,
499                                  const BinarySectionType type,
500                                  const reflection::Object *const table) {
501   if (ContainsSection(table_offset)) { return; }
502 
503   BinaryRegionComment vtable_offset_comment;
504   vtable_offset_comment.type = BinaryRegionCommentType::TableVTableOffset;
505 
506   const auto vtable_soffset = ReadScalar<int32_t>(table_offset);
507 
508   if (!vtable_soffset.has_value()) {
509     const uint64_t remaining = RemainingBytes(table_offset);
510     SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
511              "4");
512 
513     AddSection(
514         table_offset,
515         MakeSingleRegionBinarySection(
516             table->name()->str(), type,
517             MakeBinaryRegion(table_offset, remaining, BinaryRegionType::Unknown,
518                              remaining, 0, vtable_offset_comment)));
519 
520     // If there aren't enough bytes left to read the vtable offset, there is
521     // nothing we can do.
522     return;
523   }
524 
525   // Tables start with the vtable
526   const uint64_t vtable_offset = table_offset - vtable_soffset.value();
527 
528   if (!IsValidOffset(vtable_offset)) {
529     SetError(vtable_offset_comment,
530              BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
531 
532     AddSection(table_offset,
533                MakeSingleRegionBinarySection(
534                    table->name()->str(), type,
535                    MakeBinaryRegion(table_offset, sizeof(int32_t),
536                                     BinaryRegionType::SOffset, 0, vtable_offset,
537                                     vtable_offset_comment)));
538 
539     // There isn't much to do with an invalid vtable offset, as we won't be able
540     // to intepret the rest of the table fields.
541     return;
542   }
543 
544   std::vector<BinaryRegion> regions;
545   regions.push_back(MakeBinaryRegion(table_offset, sizeof(int32_t),
546                                      BinaryRegionType::SOffset, 0,
547                                      vtable_offset, vtable_offset_comment));
548 
549   // Parse the vtable first so we know what the rest of the fields in the table
550   // are.
551   const VTable *const vtable =
552       GetOrBuildVTable(vtable_offset, table, table_offset);
553 
554   if (vtable == nullptr) {
555     // There is no valid vtable for this table, so we cannot process the rest of
556     // the table entries.
557     return;
558   }
559 
560   // This is the size and length of this table.
561   const uint16_t table_size = vtable->table_size;
562   uint64_t table_end_offset = table_offset + table_size;
563 
564   if (!IsValidOffset(table_end_offset - 1)) {
565     // We already validated the table size in BuildVTable, but we have to make
566     // sure we don't use a bad value here.
567     table_end_offset = binary_length_;
568   }
569 
570   // We need to iterate over the vtable fields by their offset in the binary,
571   // not by their IDs. So copy them over to another vector that we can sort on
572   // the offset_from_table property.
573   std::vector<VTable::Entry> fields;
574   for (const auto &vtable_field : vtable->fields) {
575     fields.push_back(vtable_field.second);
576   }
577 
578   std::stable_sort(fields.begin(), fields.end(),
579                    [](const VTable::Entry &a, const VTable::Entry &b) {
580                      return a.offset_from_table < b.offset_from_table;
581                    });
582 
583   // Iterate over all the fields by order of their offset.
584   for (size_t i = 0; i < fields.size(); ++i) {
585     const reflection::Field *field = fields[i].field;
586     const uint16_t offset_from_table = fields[i].offset_from_table;
587 
588     if (offset_from_table == 0) {
589       // Skip non-present fields.
590       continue;
591     }
592 
593     // The field offsets are relative to the start of the table.
594     const uint64_t field_offset = table_offset + offset_from_table;
595 
596     if (!IsValidOffset(field_offset)) {
597       // The field offset is larger than the binary, nothing we can do.
598       continue;
599     }
600 
601     // We have a vtable entry for a non-existant field, that means its a binary
602     // generated by a newer schema than we are currently processing.
603     if (field == nullptr) {
604       // Calculate the length of this unknown field.
605       const uint64_t unknown_field_length =
606           // Check if there is another unknown field after this one.
607           ((i + 1 < fields.size())
608                ? table_offset + fields[i + 1].offset_from_table
609                // Otherwise use the known end of the table.
610                : table_end_offset) -
611           field_offset;
612 
613       if (unknown_field_length == 0) { continue; }
614 
615       std::string hint;
616 
617       if (unknown_field_length == 4) {
618         const auto relative_offset = ReadScalar<uint32_t>(field_offset);
619         if (relative_offset.has_value()) {
620           // The field is 4 in length, so it could be an offset? Provide a hint.
621           hint += "<possibly an offset? Check Loc: +0x";
622           hint += ToHex(field_offset + relative_offset.value());
623           hint += ">";
624         }
625       }
626 
627       BinaryRegionComment unknown_field_comment;
628       unknown_field_comment.type = BinaryRegionCommentType::TableUnknownField;
629 
630       if (!IsValidRead(field_offset, unknown_field_length)) {
631         const uint64_t remaining = RemainingBytes(field_offset);
632 
633         SetError(unknown_field_comment,
634                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
635                  std::to_string(unknown_field_length));
636 
637         regions.push_back(MakeBinaryRegion(field_offset, remaining,
638                                            BinaryRegionType::Unknown, remaining,
639                                            0, unknown_field_comment));
640         continue;
641       }
642 
643       unknown_field_comment.default_value = hint;
644 
645       regions.push_back(MakeBinaryRegion(
646           field_offset, unknown_field_length, BinaryRegionType::Unknown,
647           unknown_field_length, 0, unknown_field_comment));
648       continue;
649     }
650 
651     if (IsScalar(field->type()->base_type())) {
652       // These are the raw values store in the table.
653       const uint64_t type_size = GetTypeSize(field->type()->base_type());
654       const BinaryRegionType region_type =
655           GetRegionType(field->type()->base_type());
656 
657       BinaryRegionComment scalar_field_comment;
658       scalar_field_comment.type = BinaryRegionCommentType::TableField;
659       scalar_field_comment.name =
660           std::string(field->name()->c_str()) + "` (" +
661           reflection::EnumNameBaseType(field->type()->base_type()) + ")";
662 
663       if (!IsValidRead(field_offset, type_size)) {
664         const uint64_t remaining = RemainingBytes(field_offset);
665         SetError(scalar_field_comment,
666                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
667                  std::to_string(type_size));
668 
669         regions.push_back(MakeBinaryRegion(field_offset, remaining,
670                                            BinaryRegionType::Unknown, remaining,
671                                            0, scalar_field_comment));
672         continue;
673       }
674 
675       if (IsUnionType(field)) {
676         // This is a type for a union. Validate the value
677         const auto enum_value = ReadScalar<uint8_t>(field_offset);
678 
679         // This should always have a value, due to the IsValidRead check above.
680         if (!IsValidUnionValue(field, enum_value.value())) {
681           SetError(scalar_field_comment,
682                    BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
683 
684           regions.push_back(MakeBinaryRegion(field_offset, type_size,
685                                              region_type, 0, 0,
686                                              scalar_field_comment));
687           continue;
688         }
689       }
690 
691       regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type,
692                                          0, 0, scalar_field_comment));
693       continue;
694     }
695 
696     // Read the offset
697     uint64_t offset = 0;
698     uint64_t length = sizeof(uint32_t);
699     BinaryRegionType region_type = BinaryRegionType::UOffset;
700 
701     if (field->offset64()) {
702       length = sizeof(uint64_t);
703       region_type = BinaryRegionType::UOffset64;
704       offset = ReadScalar<uint64_t>(field_offset).value_or(0);
705     } else {
706       offset = ReadScalar<uint32_t>(field_offset).value_or(0);
707     }
708     // const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
709     uint64_t offset_of_next_item = 0;
710     BinaryRegionComment offset_field_comment;
711     offset_field_comment.type = BinaryRegionCommentType::TableOffsetField;
712     offset_field_comment.name = field->name()->c_str();
713     const std::string offset_prefix =
714         "offset to field `" + std::string(field->name()->c_str()) + "`";
715 
716     // Validate any field that isn't inline (i.e., non-structs).
717     if (!IsInlineField(field)) {
718       if (offset == 0) {
719         const uint64_t remaining = RemainingBytes(field_offset);
720 
721         SetError(offset_field_comment,
722                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
723 
724         regions.push_back(MakeBinaryRegion(field_offset, remaining,
725                                            BinaryRegionType::Unknown, remaining,
726                                            0, offset_field_comment));
727         continue;
728       }
729 
730       offset_of_next_item = field_offset + offset;
731 
732       if (!IsValidOffset(offset_of_next_item)) {
733         SetError(offset_field_comment,
734                  BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
735         regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
736                                            offset_of_next_item,
737                                            offset_field_comment));
738         continue;
739       }
740     }
741 
742     switch (field->type()->base_type()) {
743       case reflection::BaseType::Obj: {
744         const reflection::Object *next_object =
745             schema_->objects()->Get(field->type()->index());
746 
747         if (next_object->is_struct()) {
748           // Structs are stored inline.
749           BuildStruct(field_offset, regions, field->name()->c_str(),
750                       next_object);
751         } else {
752           offset_field_comment.default_value = "(table)";
753 
754           regions.push_back(MakeBinaryRegion(field_offset, length, region_type,
755                                              0, offset_of_next_item,
756                                              offset_field_comment));
757 
758           BuildTable(offset_of_next_item, BinarySectionType::Table,
759                      next_object);
760         }
761       } break;
762 
763       case reflection::BaseType::String: {
764         offset_field_comment.default_value = "(string)";
765         regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
766                                            offset_of_next_item,
767                                            offset_field_comment));
768         BuildString(offset_of_next_item, table, field);
769       } break;
770 
771       case reflection::BaseType::Vector: {
772         offset_field_comment.default_value = "(vector)";
773         regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
774                                            offset_of_next_item,
775                                            offset_field_comment));
776         BuildVector(offset_of_next_item, table, field, table_offset,
777                     vtable->fields);
778       } break;
779       case reflection::BaseType::Vector64: {
780         offset_field_comment.default_value = "(vector64)";
781         regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
782                                            offset_of_next_item,
783                                            offset_field_comment));
784         BuildVector(offset_of_next_item, table, field, table_offset,
785                     vtable->fields);
786       } break;
787 
788       case reflection::BaseType::Union: {
789         const uint64_t union_offset = offset_of_next_item;
790 
791         // The union type field is always one less than the union itself.
792         const uint16_t union_type_id = field->id() - 1;
793 
794         auto vtable_field = vtable->fields.find(union_type_id);
795         if (vtable_field == vtable->fields.end()) {
796           // TODO(dbaileychess): need to capture this error condition.
797           break;
798         }
799         offset_field_comment.default_value = "(union)";
800 
801         const uint64_t type_offset =
802             table_offset + vtable_field->second.offset_from_table;
803 
804         const auto realized_type = ReadScalar<uint8_t>(type_offset);
805         if (!realized_type.has_value()) {
806           const uint64_t remaining = RemainingBytes(type_offset);
807           SetError(offset_field_comment,
808                    BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
809           regions.push_back(MakeBinaryRegion(
810               type_offset, remaining, BinaryRegionType::Unknown, remaining, 0,
811               offset_field_comment));
812           continue;
813         }
814 
815         if (!IsValidUnionValue(field, realized_type.value())) {
816           // We already export an error in the union type field, so just skip
817           // building the union itself and it will default to an unreference
818           // Binary section.
819           continue;
820         }
821 
822         const std::string enum_type =
823             BuildUnion(union_offset, realized_type.value(), field);
824 
825         offset_field_comment.default_value =
826             "(union of type `" + enum_type + "`)";
827 
828         regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
829                                            union_offset, offset_field_comment));
830 
831       } break;
832 
833       default: break;
834     }
835   }
836 
837   // Handle the case where there is padding after the last known binary
838   // region. Calculate where we left off towards the expected end of the
839   // table.
840   const uint64_t i = regions.back().offset + regions.back().length + 1;
841 
842   if (i < table_end_offset) {
843     const uint64_t pad_bytes = table_end_offset - i + 1;
844 
845     BinaryRegionComment padding_comment;
846     padding_comment.type = BinaryRegionCommentType::Padding;
847 
848     regions.push_back(MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
849                                        BinaryRegionType::Uint8, pad_bytes, 0,
850                                        padding_comment));
851   }
852 
853   AddSection(table_offset,
854              MakeBinarySection(table->name()->str(), type, std::move(regions)));
855 }
856 
BuildStruct(const uint64_t struct_offset,std::vector<BinaryRegion> & regions,const std::string referring_field_name,const reflection::Object * const object)857 uint64_t BinaryAnnotator::BuildStruct(const uint64_t struct_offset,
858                                       std::vector<BinaryRegion> &regions,
859                                       const std::string referring_field_name,
860                                       const reflection::Object *const object) {
861   if (!object->is_struct()) { return struct_offset; }
862   uint64_t offset = struct_offset;
863 
864   // Loop over all the fields in increasing order
865   ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
866     if (IsScalar(field->type()->base_type())) {
867       // Structure Field value
868       const uint64_t type_size = GetTypeSize(field->type()->base_type());
869       const BinaryRegionType region_type =
870           GetRegionType(field->type()->base_type());
871 
872       BinaryRegionComment comment;
873       comment.type = BinaryRegionCommentType::StructField;
874       comment.name = referring_field_name + "." + field->name()->str();
875       comment.default_value = "of '" + object->name()->str() + "' (" +
876                               std::string(reflection::EnumNameBaseType(
877                                   field->type()->base_type())) +
878                               ")";
879 
880       if (!IsValidRead(offset, type_size)) {
881         const uint64_t remaining = RemainingBytes(offset);
882         SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
883                  std::to_string(type_size));
884         regions.push_back(MakeBinaryRegion(offset, remaining,
885                                            BinaryRegionType::Unknown, remaining,
886                                            0, comment));
887 
888         // TODO(dbaileychess): Should I bail out here? This sets offset to the
889         // end of the binary. So all other reads in the loop should fail.
890         offset += remaining;
891         return;
892       }
893 
894       regions.push_back(
895           MakeBinaryRegion(offset, type_size, region_type, 0, 0, comment));
896       offset += type_size;
897     } else if (field->type()->base_type() == reflection::BaseType::Obj) {
898       // Structs are stored inline, even when nested.
899       offset = BuildStruct(offset, regions,
900                            referring_field_name + "." + field->name()->str(),
901                            schema_->objects()->Get(field->type()->index()));
902     } else if (field->type()->base_type() == reflection::BaseType::Array) {
903       const bool is_scalar = IsScalar(field->type()->element());
904       const uint64_t type_size = GetTypeSize(field->type()->element());
905       const BinaryRegionType region_type =
906           GetRegionType(field->type()->element());
907 
908       // Arrays are just repeated structures.
909       for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
910         if (is_scalar) {
911           BinaryRegionComment array_comment;
912           array_comment.type = BinaryRegionCommentType::ArrayField;
913           array_comment.name =
914               referring_field_name + "." + field->name()->str();
915           array_comment.index = i;
916           array_comment.default_value =
917               "of '" + object->name()->str() + "' (" +
918               std::string(
919                   reflection::EnumNameBaseType(field->type()->element())) +
920               ")";
921 
922           if (!IsValidRead(offset, type_size)) {
923             const uint64_t remaining = RemainingBytes(offset);
924 
925             SetError(array_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
926                      std::to_string(type_size));
927 
928             regions.push_back(MakeBinaryRegion(offset, remaining,
929                                                BinaryRegionType::Unknown,
930                                                remaining, 0, array_comment));
931 
932             // TODO(dbaileychess): Should I bail out here? This sets offset to
933             // the end of the binary. So all other reads in the loop should
934             // fail.
935             offset += remaining;
936             break;
937           }
938 
939           regions.push_back(MakeBinaryRegion(offset, type_size, region_type, 0,
940                                              0, array_comment));
941 
942           offset += type_size;
943         } else {
944           // Array of Structs.
945           //
946           // TODO(dbaileychess): This works, but the comments on the fields lose
947           // some context. Need to figure a way how to plumb the nested arrays
948           // comments together that isn't too confusing.
949           offset =
950               BuildStruct(offset, regions,
951                           referring_field_name + "." + field->name()->str(),
952                           schema_->objects()->Get(field->type()->index()));
953         }
954       }
955     }
956 
957     // Insert any padding after this field.
958     const uint16_t padding = field->padding();
959     if (padding > 0 && IsValidOffset(offset + padding)) {
960       BinaryRegionComment padding_comment;
961       padding_comment.type = BinaryRegionCommentType::Padding;
962 
963       regions.push_back(MakeBinaryRegion(offset, padding,
964                                          BinaryRegionType::Uint8, padding, 0,
965                                          padding_comment));
966       offset += padding;
967     }
968   });
969 
970   return offset;
971 }
972 
BuildString(const uint64_t string_offset,const reflection::Object * const table,const reflection::Field * const field)973 void BinaryAnnotator::BuildString(const uint64_t string_offset,
974                                   const reflection::Object *const table,
975                                   const reflection::Field *const field) {
976   // Check if we have already generated this string section, and this is a
977   // shared string instance.
978   if (ContainsSection(string_offset)) { return; }
979 
980   std::vector<BinaryRegion> regions;
981   const auto string_length = ReadScalar<uint32_t>(string_offset);
982 
983   BinaryRegionComment string_length_comment;
984   string_length_comment.type = BinaryRegionCommentType::StringLength;
985 
986   if (!string_length.has_value()) {
987     const uint64_t remaining = RemainingBytes(string_offset);
988 
989     SetError(string_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
990              "4");
991 
992     regions.push_back(MakeBinaryRegion(string_offset, remaining,
993                                        BinaryRegionType::Unknown, remaining, 0,
994                                        string_length_comment));
995 
996   } else {
997     const uint32_t string_size = string_length.value();
998     const uint64_t string_end =
999         string_offset + sizeof(uint32_t) + string_size + sizeof(char);
1000 
1001     if (!IsValidOffset(string_end - 1)) {
1002       SetError(string_length_comment,
1003                BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
1004 
1005       regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
1006                                          BinaryRegionType::Uint32, 0, 0,
1007                                          string_length_comment));
1008     } else {
1009       regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
1010                                          BinaryRegionType::Uint32, 0, 0,
1011                                          string_length_comment));
1012 
1013       BinaryRegionComment string_comment;
1014       string_comment.type = BinaryRegionCommentType::StringValue;
1015 
1016       regions.push_back(MakeBinaryRegion(string_offset + sizeof(uint32_t),
1017                                          string_size, BinaryRegionType::Char,
1018                                          string_size, 0, string_comment));
1019 
1020       BinaryRegionComment string_terminator_comment;
1021       string_terminator_comment.type =
1022           BinaryRegionCommentType::StringTerminator;
1023 
1024       regions.push_back(MakeBinaryRegion(
1025           string_offset + sizeof(uint32_t) + string_size, sizeof(char),
1026           BinaryRegionType::Char, 0, 0, string_terminator_comment));
1027     }
1028   }
1029 
1030   AddSection(string_offset,
1031              MakeBinarySection(std::string(table->name()->c_str()) + "." +
1032                                    field->name()->c_str(),
1033                                BinarySectionType::String, std::move(regions)));
1034 }
1035 
BuildVector(const uint64_t vector_offset,const reflection::Object * const table,const reflection::Field * const field,const uint64_t parent_table_offset,const std::map<uint16_t,VTable::Entry> vtable_fields)1036 void BinaryAnnotator::BuildVector(
1037     const uint64_t vector_offset, const reflection::Object *const table,
1038     const reflection::Field *const field, const uint64_t parent_table_offset,
1039     const std::map<uint16_t, VTable::Entry> vtable_fields) {
1040   if (ContainsSection(vector_offset)) { return; }
1041 
1042   BinaryRegionComment vector_length_comment;
1043   vector_length_comment.type = BinaryRegionCommentType::VectorLength;
1044 
1045   const bool is_64_bit_vector =
1046       field->type()->base_type() == reflection::BaseType::Vector64;
1047 
1048   flatbuffers::Optional<uint64_t> vector_length;
1049   uint32_t vector_length_size_type = 0;
1050   BinaryRegionType region_type = BinaryRegionType::Uint32;
1051   BinarySectionType section_type = BinarySectionType::Vector;
1052 
1053   if (is_64_bit_vector) {
1054     auto v = ReadScalar<uint64_t>(vector_offset);
1055     if (v.has_value()) { vector_length = v.value(); }
1056     vector_length_size_type = sizeof(uint64_t);
1057     region_type = BinaryRegionType::Uint64;
1058     section_type = BinarySectionType::Vector64;
1059   } else {
1060     auto v = ReadScalar<uint32_t>(vector_offset);
1061     if (v.has_value()) { vector_length = v.value(); }
1062     vector_length_size_type = sizeof(uint32_t);
1063     region_type = BinaryRegionType::Uint32;
1064     section_type = BinarySectionType::Vector;
1065   }
1066 
1067   if (!vector_length.has_value()) {
1068     const uint64_t remaining = RemainingBytes(vector_offset);
1069     SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
1070              "4");
1071 
1072     AddSection(
1073         vector_offset,
1074         MakeSingleRegionBinarySection(
1075             std::string(table->name()->c_str()) + "." + field->name()->c_str(),
1076             BinarySectionType::Vector,
1077             MakeBinaryRegion(vector_offset, remaining,
1078                              BinaryRegionType::Unknown, remaining, 0,
1079                              vector_length_comment)));
1080     return;
1081   }
1082 
1083   // Validate there are enough bytes left in the binary to process all the
1084   // items.
1085   const uint64_t last_item_offset =
1086       vector_offset + vector_length_size_type +
1087       vector_length.value() * GetElementSize(field);
1088 
1089   if (!IsValidOffset(last_item_offset - 1)) {
1090     SetError(vector_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
1091     AddSection(
1092         vector_offset,
1093         MakeSingleRegionBinarySection(
1094             std::string(table->name()->c_str()) + "." + field->name()->c_str(),
1095             BinarySectionType::Vector,
1096             MakeBinaryRegion(vector_offset, vector_length_size_type,
1097                              region_type, 0, 0, vector_length_comment)));
1098 
1099     return;
1100   }
1101 
1102   std::vector<BinaryRegion> regions;
1103 
1104   regions.push_back(MakeBinaryRegion(vector_offset, vector_length_size_type,
1105                                      region_type, 0, 0, vector_length_comment));
1106   // Consume the vector length offset.
1107   uint64_t offset = vector_offset + vector_length_size_type;
1108 
1109   switch (field->type()->element()) {
1110     case reflection::BaseType::Obj: {
1111       const reflection::Object *object =
1112           schema_->objects()->Get(field->type()->index());
1113 
1114       if (object->is_struct()) {
1115         // Vector of structs
1116         for (size_t i = 0; i < vector_length.value(); ++i) {
1117           // Structs are inline to the vector.
1118           const uint64_t next_offset =
1119               BuildStruct(offset, regions, "[" + NumToString(i) + "]", object);
1120           if (next_offset == offset) { break; }
1121           offset = next_offset;
1122         }
1123       } else {
1124         // Vector of objects
1125         for (size_t i = 0; i < vector_length.value(); ++i) {
1126           BinaryRegionComment vector_object_comment;
1127           vector_object_comment.type =
1128               BinaryRegionCommentType::VectorTableValue;
1129           vector_object_comment.index = i;
1130 
1131           const auto table_relative_offset = ReadScalar<uint32_t>(offset);
1132           if (!table_relative_offset.has_value()) {
1133             const uint64_t remaining = RemainingBytes(offset);
1134             SetError(vector_object_comment,
1135                      BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1136 
1137             regions.push_back(
1138                 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1139                                  remaining, 0, vector_object_comment));
1140             break;
1141           }
1142 
1143           // The table offset is relative from the offset location itself.
1144           const uint64_t table_offset = offset + table_relative_offset.value();
1145 
1146           if (!IsValidOffset(table_offset)) {
1147             SetError(vector_object_comment,
1148                      BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1149             regions.push_back(MakeBinaryRegion(
1150                 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1151                 table_offset, vector_object_comment));
1152 
1153             offset += sizeof(uint32_t);
1154             continue;
1155           }
1156 
1157           if (table_offset == parent_table_offset) {
1158             SetError(vector_object_comment,
1159                      BinaryRegionStatus::ERROR_CYCLE_DETECTED);
1160             // A cycle detected where a table vector field is pointing to
1161             // itself. This should only happen in corrupted files.
1162             regions.push_back(MakeBinaryRegion(
1163                 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1164                 table_offset, vector_object_comment));
1165 
1166             offset += sizeof(uint32_t);
1167             continue;
1168           }
1169 
1170           regions.push_back(MakeBinaryRegion(
1171               offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1172               table_offset, vector_object_comment));
1173 
1174           // Consume the offset to the table.
1175           offset += sizeof(uint32_t);
1176 
1177           BuildTable(table_offset, BinarySectionType::Table, object);
1178         }
1179       }
1180     } break;
1181     case reflection::BaseType::String: {
1182       // Vector of strings
1183       for (size_t i = 0; i < vector_length.value(); ++i) {
1184         BinaryRegionComment vector_object_comment;
1185         vector_object_comment.type = BinaryRegionCommentType::VectorStringValue;
1186         vector_object_comment.index = i;
1187 
1188         const auto string_relative_offset = ReadScalar<uint32_t>(offset);
1189         if (!string_relative_offset.has_value()) {
1190           const uint64_t remaining = RemainingBytes(offset);
1191 
1192           SetError(vector_object_comment,
1193                    BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1194 
1195           regions.push_back(
1196               MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1197                                remaining, 0, vector_object_comment));
1198           break;
1199         }
1200 
1201         // The string offset is relative from the offset location itself.
1202         const uint64_t string_offset = offset + string_relative_offset.value();
1203 
1204         if (!IsValidOffset(string_offset)) {
1205           SetError(vector_object_comment,
1206                    BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1207           regions.push_back(MakeBinaryRegion(
1208               offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1209               string_offset, vector_object_comment));
1210 
1211           offset += sizeof(uint32_t);
1212           continue;
1213         }
1214 
1215         regions.push_back(MakeBinaryRegion(
1216             offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1217             string_offset, vector_object_comment));
1218 
1219         BuildString(string_offset, table, field);
1220 
1221         offset += sizeof(uint32_t);
1222       }
1223     } break;
1224     case reflection::BaseType::Union: {
1225       // Vector of unions
1226       // Unions have both their realized type (uint8_t for now) that are
1227       // stored separately. These are stored in the field->index() - 1
1228       // location.
1229       const uint16_t union_type_vector_id = field->id() - 1;
1230 
1231       auto vtable_entry = vtable_fields.find(union_type_vector_id);
1232       if (vtable_entry == vtable_fields.end()) {
1233         // TODO(dbaileychess): need to capture this error condition.
1234         break;
1235       }
1236 
1237       const uint64_t union_type_vector_field_offset =
1238           parent_table_offset + vtable_entry->second.offset_from_table;
1239 
1240       const auto union_type_vector_field_relative_offset =
1241           ReadScalar<uint16_t>(union_type_vector_field_offset);
1242 
1243       if (!union_type_vector_field_relative_offset.has_value()) {
1244         const uint64_t remaining = RemainingBytes(offset);
1245         BinaryRegionComment vector_union_comment;
1246         vector_union_comment.type = BinaryRegionCommentType::VectorUnionValue;
1247         SetError(vector_union_comment,
1248                  BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
1249 
1250         regions.push_back(MakeBinaryRegion(offset, remaining,
1251                                            BinaryRegionType::Unknown, remaining,
1252                                            0, vector_union_comment));
1253 
1254         break;
1255       }
1256 
1257       // Get the offset to the first type (the + sizeof(uint32_t) is to skip
1258       // over the vector length which we already know). Validation happens
1259       // within the loop below.
1260       const uint64_t union_type_vector_data_offset =
1261           union_type_vector_field_offset +
1262           union_type_vector_field_relative_offset.value() + sizeof(uint32_t);
1263 
1264       for (size_t i = 0; i < vector_length.value(); ++i) {
1265         BinaryRegionComment comment;
1266         comment.type = BinaryRegionCommentType::VectorUnionValue;
1267         comment.index = i;
1268 
1269         const auto union_relative_offset = ReadScalar<uint32_t>(offset);
1270         if (!union_relative_offset.has_value()) {
1271           const uint64_t remaining = RemainingBytes(offset);
1272 
1273           SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1274 
1275           regions.push_back(MakeBinaryRegion(offset, remaining,
1276                                              BinaryRegionType::Unknown,
1277                                              remaining, 0, comment));
1278 
1279           break;
1280         }
1281 
1282         // The union offset is relative from the offset location itself.
1283         const uint64_t union_offset = offset + union_relative_offset.value();
1284 
1285         if (!IsValidOffset(union_offset)) {
1286           SetError(comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1287 
1288           regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1289                                              BinaryRegionType::UOffset, 0,
1290                                              union_offset, comment));
1291           continue;
1292         }
1293 
1294         const auto realized_type =
1295             ReadScalar<uint8_t>(union_type_vector_data_offset + i);
1296 
1297         if (!realized_type.has_value()) {
1298           SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
1299           regions.push_back(MakeBinaryRegion(
1300               offset, 0, BinaryRegionType::Unknown, 0, 0, comment));
1301           continue;
1302         }
1303 
1304         if (!IsValidUnionValue(vtable_entry->second.field->type()->index(),
1305                                realized_type.value())) {
1306           // We already export an error in the union type field, so just skip
1307           // building the union itself and it will default to an unreference
1308           // Binary section.
1309           offset += sizeof(uint32_t);
1310           continue;
1311         }
1312 
1313         const std::string enum_type =
1314             BuildUnion(union_offset, realized_type.value(), field);
1315 
1316         comment.default_value = "(`" + enum_type + "`)";
1317         regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1318                                            BinaryRegionType::UOffset, 0,
1319                                            union_offset, comment));
1320 
1321         offset += sizeof(uint32_t);
1322       }
1323     } break;
1324     default: {
1325       if (IsScalar(field->type()->element())) {
1326         const BinaryRegionType binary_region_type =
1327             GetRegionType(field->type()->element());
1328 
1329         const uint64_t type_size = GetTypeSize(field->type()->element());
1330 
1331         // TODO(dbaileychess): It might be nicer to user the
1332         // BinaryRegion.array_length field to indicate this.
1333         for (size_t i = 0; i < vector_length.value(); ++i) {
1334           BinaryRegionComment vector_scalar_comment;
1335           vector_scalar_comment.type = BinaryRegionCommentType::VectorValue;
1336           vector_scalar_comment.index = i;
1337 
1338           if (!IsValidRead(offset, type_size)) {
1339             const uint64_t remaining = RemainingBytes(offset);
1340 
1341             SetError(vector_scalar_comment,
1342                      BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
1343                      std::to_string(type_size));
1344 
1345             regions.push_back(
1346                 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1347                                  remaining, 0, vector_scalar_comment));
1348             break;
1349           }
1350 
1351           if (IsUnionType(field->type()->element())) {
1352             // This is a type for a union. Validate the value
1353             const auto enum_value = ReadScalar<uint8_t>(offset);
1354 
1355             // This should always have a value, due to the IsValidRead check
1356             // above.
1357             if (!IsValidUnionValue(field->type()->index(),
1358                                    enum_value.value())) {
1359               SetError(vector_scalar_comment,
1360                        BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
1361               regions.push_back(MakeBinaryRegion(offset, type_size,
1362                                                  binary_region_type, 0, 0,
1363                                                  vector_scalar_comment));
1364               offset += type_size;
1365               continue;
1366             }
1367           }
1368 
1369           regions.push_back(MakeBinaryRegion(offset, type_size,
1370                                              binary_region_type, 0, 0,
1371                                              vector_scalar_comment));
1372           offset += type_size;
1373         }
1374       }
1375     } break;
1376   }
1377   AddSection(vector_offset,
1378              MakeBinarySection(std::string(table->name()->c_str()) + "." +
1379                                    field->name()->c_str(),
1380                                section_type, std::move(regions)));
1381 }
1382 
BuildUnion(const uint64_t union_offset,const uint8_t realized_type,const reflection::Field * const field)1383 std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset,
1384                                         const uint8_t realized_type,
1385                                         const reflection::Field *const field) {
1386   const reflection::Enum *next_enum =
1387       schema_->enums()->Get(field->type()->index());
1388 
1389   const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
1390 
1391   if (ContainsSection(union_offset)) { return enum_val->name()->c_str(); }
1392 
1393   const reflection::Type *union_type = enum_val->union_type();
1394 
1395   if (union_type->base_type() == reflection::BaseType::Obj) {
1396     const reflection::Object *object =
1397         schema_->objects()->Get(union_type->index());
1398 
1399     if (object->is_struct()) {
1400       // Union of vectors point to a new Binary section
1401       std::vector<BinaryRegion> regions;
1402 
1403       BuildStruct(union_offset, regions, field->name()->c_str(), object);
1404 
1405       AddSection(
1406           union_offset,
1407           MakeBinarySection(std::string(object->name()->c_str()) + "." +
1408                                 field->name()->c_str(),
1409                             BinarySectionType::Union, std::move(regions)));
1410     } else {
1411       BuildTable(union_offset, BinarySectionType::Table, object);
1412     }
1413   }
1414   // TODO(dbaileychess): handle the other union types.
1415 
1416   return enum_val->name()->c_str();
1417 }
1418 
FixMissingRegions()1419 void BinaryAnnotator::FixMissingRegions() {
1420   std::vector<BinaryRegion> regions_to_insert;
1421   for (auto &current_section : sections_) {
1422     BinarySection &section = current_section.second;
1423     if (section.regions.empty()) {
1424       // TODO(dbaileychess): is this possible?
1425       continue;
1426     }
1427 
1428     uint64_t offset = section.regions[0].offset + section.regions[0].length;
1429     for (size_t i = 1; i < section.regions.size(); ++i) {
1430       BinaryRegion &region = section.regions[i];
1431 
1432       const uint64_t next_offset = region.offset;
1433       if (!IsValidOffset(next_offset)) {
1434         // TODO(dbaileychess): figure out how we get into this situation.
1435         continue;
1436       }
1437 
1438       if (offset < next_offset) {
1439         const uint64_t padding_bytes = next_offset - offset;
1440 
1441         BinaryRegionComment comment;
1442         comment.type = BinaryRegionCommentType::Padding;
1443 
1444         if (IsNonZeroRegion(offset, padding_bytes, binary_)) {
1445           SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
1446           regions_to_insert.push_back(
1447               MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Unknown,
1448                                padding_bytes, 0, comment));
1449         } else {
1450           regions_to_insert.push_back(
1451               MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Uint8,
1452                                padding_bytes, 0, comment));
1453         }
1454       }
1455       offset = next_offset + region.length;
1456     }
1457 
1458     if (!regions_to_insert.empty()) {
1459       section.regions.insert(section.regions.end(), regions_to_insert.begin(),
1460                              regions_to_insert.end());
1461       std::stable_sort(section.regions.begin(), section.regions.end(),
1462                        BinaryRegionSort);
1463       regions_to_insert.clear();
1464     }
1465   }
1466 }
1467 
FixMissingSections()1468 void BinaryAnnotator::FixMissingSections() {
1469   uint64_t offset = 0;
1470 
1471   std::vector<BinarySection> sections_to_insert;
1472 
1473   for (auto &current_section : sections_) {
1474     BinarySection &section = current_section.second;
1475     const uint64_t section_start_offset = current_section.first;
1476     const uint64_t section_end_offset =
1477         section.regions.back().offset + section.regions.back().length;
1478 
1479     if (offset < section_start_offset) {
1480       // We are at an offset that is less then the current section.
1481       const uint64_t pad_bytes = section_start_offset - offset + 1;
1482 
1483       sections_to_insert.push_back(
1484           GenerateMissingSection(offset - 1, pad_bytes, binary_));
1485     }
1486     offset = section_end_offset + 1;
1487   }
1488 
1489   // Handle the case where there are still bytes left in the binary that are
1490   // unaccounted for.
1491   if (offset < binary_length_) {
1492     const uint64_t pad_bytes = binary_length_ - offset + 1;
1493     sections_to_insert.push_back(
1494         GenerateMissingSection(offset - 1, pad_bytes, binary_));
1495   }
1496 
1497   for (const BinarySection &section_to_insert : sections_to_insert) {
1498     AddSection(section_to_insert.regions[0].offset, section_to_insert);
1499   }
1500 }
1501 
ContainsSection(const uint64_t offset)1502 bool BinaryAnnotator::ContainsSection(const uint64_t offset) {
1503   auto it = sections_.lower_bound(offset);
1504   // If the section is found, check that it is exactly equal its offset.
1505   if (it != sections_.end() && it->first == offset) { return true; }
1506 
1507   // If this was the first section, there are no other previous sections to
1508   // check.
1509   if (it == sections_.begin()) { return false; }
1510 
1511   // Go back one section.
1512   --it;
1513 
1514   // And check that if the offset is covered by the section.
1515   return offset >= it->first && offset < it->second.regions.back().offset +
1516                                              it->second.regions.back().length;
1517 }
1518 
RootTable() const1519 const reflection::Object *BinaryAnnotator::RootTable() const {
1520   if (!root_table_.empty()) {
1521     return schema_->objects()->LookupByKey(root_table_);
1522   }
1523   return schema_->root_table();
1524 }
1525 
1526 }  // namespace flatbuffers
1527