1 #include "binary_annotator.h"
2
3 #include <algorithm>
4 #include <cstdint>
5 #include <iostream>
6 #include <limits>
7 #include <string>
8 #include <vector>
9
10 #include "flatbuffers/base.h"
11 #include "flatbuffers/reflection.h"
12 #include "flatbuffers/util.h"
13 #include "flatbuffers/verifier.h"
14
15 namespace flatbuffers {
16 namespace {
17
BinaryRegionSort(const BinaryRegion & a,const BinaryRegion & b)18 static bool BinaryRegionSort(const BinaryRegion &a, const BinaryRegion &b) {
19 return a.offset < b.offset;
20 }
21
SetError(BinaryRegionComment & comment,BinaryRegionStatus status,std::string message="")22 static void SetError(BinaryRegionComment &comment, BinaryRegionStatus status,
23 std::string message = "") {
24 comment.status = status;
25 comment.status_message = message;
26 }
27
MakeBinaryRegion(const uint64_t offset=0,const uint64_t length=0,const BinaryRegionType type=BinaryRegionType::Unknown,const uint64_t array_length=0,const uint64_t points_to_offset=0,BinaryRegionComment comment={})28 static BinaryRegion MakeBinaryRegion(
29 const uint64_t offset = 0, const uint64_t length = 0,
30 const BinaryRegionType type = BinaryRegionType::Unknown,
31 const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
32 BinaryRegionComment comment = {}) {
33 BinaryRegion region;
34 region.offset = offset;
35 region.length = length;
36 region.type = type;
37 region.array_length = array_length;
38 region.points_to_offset = points_to_offset;
39 region.comment = std::move(comment);
40 return region;
41 }
42
MakeBinarySection(const std::string & name,const BinarySectionType type,std::vector<BinaryRegion> regions)43 static BinarySection MakeBinarySection(const std::string &name,
44 const BinarySectionType type,
45 std::vector<BinaryRegion> regions) {
46 BinarySection section;
47 section.name = name;
48 section.type = type;
49 section.regions = std::move(regions);
50 return section;
51 }
52
MakeSingleRegionBinarySection(const std::string & name,const BinarySectionType type,const BinaryRegion & region)53 static BinarySection MakeSingleRegionBinarySection(const std::string &name,
54 const BinarySectionType type,
55 const BinaryRegion ®ion) {
56 std::vector<BinaryRegion> regions;
57 regions.push_back(region);
58 return MakeBinarySection(name, type, std::move(regions));
59 }
60
IsNonZeroRegion(const uint64_t offset,const uint64_t length,const uint8_t * const binary)61 static bool IsNonZeroRegion(const uint64_t offset, const uint64_t length,
62 const uint8_t *const binary) {
63 for (uint64_t i = offset; i < offset + length; ++i) {
64 if (binary[i] != 0) { return true; }
65 }
66 return false;
67 }
68
IsPrintableRegion(const uint64_t offset,const uint64_t length,const uint8_t * const binary)69 static bool IsPrintableRegion(const uint64_t offset, const uint64_t length,
70 const uint8_t *const binary) {
71 for (uint64_t i = offset; i < offset + length; ++i) {
72 if (!isprint(binary[i])) { return false; }
73 }
74 return true;
75 }
76
GenerateMissingSection(const uint64_t offset,const uint64_t length,const uint8_t * const binary)77 static BinarySection GenerateMissingSection(const uint64_t offset,
78 const uint64_t length,
79 const uint8_t *const binary) {
80 std::vector<BinaryRegion> regions;
81
82 // Check if the region is all zeros or not, as that can tell us if it is
83 // padding or not.
84 if (IsNonZeroRegion(offset, length, binary)) {
85 // Some of the padding bytes are non-zero, so this might be an unknown
86 // section of the binary.
87 // TODO(dbaileychess): We could be a bit smarter with different sized
88 // alignments. For now, the 8 byte check encompasses all the smaller
89 // alignments.
90 BinaryRegionComment comment;
91 comment.type = BinaryRegionCommentType::Unknown;
92 if (length >= 8) {
93 SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
94 } else {
95 SetError(comment, BinaryRegionStatus::WARN_CORRUPTED_PADDING);
96 }
97
98 regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
99 BinaryRegionType::Unknown, length, 0,
100 comment));
101
102 return MakeBinarySection("no known references", BinarySectionType::Unknown,
103 std::move(regions));
104 }
105
106 BinaryRegionComment comment;
107 comment.type = BinaryRegionCommentType::Padding;
108 if (length >= 8) {
109 SetError(comment, BinaryRegionStatus::WARN_PADDING_LENGTH);
110 }
111
112 // This region is most likely padding.
113 regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
114 BinaryRegionType::Uint8, length, 0,
115 comment));
116
117 return MakeBinarySection("", BinarySectionType::Padding, std::move(regions));
118 }
119
120 } // namespace
121
Annotate()122 std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
123 if (bfbs_ != nullptr && bfbs_length_ != 0) {
124 flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
125 if ((is_size_prefixed_ &&
126 !reflection::VerifySizePrefixedSchemaBuffer(verifier)) ||
127 !reflection::VerifySchemaBuffer(verifier)) {
128 return {};
129 }
130 }
131
132 // The binary is too short to read as a flatbuffers.
133 if (binary_length_ < FLATBUFFERS_MIN_BUFFER_SIZE) { return {}; }
134
135 // Make sure we start with a clean slate.
136 vtables_.clear();
137 sections_.clear();
138
139 // First parse the header region which always start at offset 0.
140 // The returned offset will point to the root_table location.
141 const uint64_t root_table_offset = BuildHeader(0);
142
143 if (IsValidOffset(root_table_offset)) {
144 // Build the root table, and all else will be referenced from it.
145 BuildTable(root_table_offset, BinarySectionType::RootTable, RootTable());
146 }
147
148 // Now that all the sections are built, make sure the binary sections are
149 // contiguous.
150 FixMissingRegions();
151
152 // Then scan the area between BinarySections insert padding sections that are
153 // implied.
154 FixMissingSections();
155
156 return sections_;
157 }
158
BuildHeader(const uint64_t header_offset)159 uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
160 uint64_t offset = header_offset;
161 std::vector<BinaryRegion> regions;
162
163 // If this binary is a size prefixed one, attempt to parse the size.
164 if (is_size_prefixed_) {
165 BinaryRegionComment prefix_length_comment;
166 prefix_length_comment.type = BinaryRegionCommentType::SizePrefix;
167
168 bool has_prefix_value = false;
169 const auto prefix_length = ReadScalar<uoffset64_t>(offset);
170 if (*prefix_length <= binary_length_) {
171 regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset64_t),
172 BinaryRegionType::Uint64, 0, 0,
173 prefix_length_comment));
174 offset += sizeof(uoffset64_t);
175 has_prefix_value = true;
176 }
177
178 if (!has_prefix_value) {
179 const auto prefix_length = ReadScalar<uoffset_t>(offset);
180 if (*prefix_length <= binary_length_) {
181 regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset_t),
182 BinaryRegionType::Uint32, 0, 0,
183 prefix_length_comment));
184 offset += sizeof(uoffset_t);
185 has_prefix_value = true;
186 }
187 }
188
189 if (!has_prefix_value) {
190 SetError(prefix_length_comment, BinaryRegionStatus::ERROR);
191 }
192 }
193
194 const auto root_table_offset = ReadScalar<uint32_t>(offset);
195
196 if (!root_table_offset.has_value()) {
197 // This shouldn't occur, since we validate the min size of the buffer
198 // before. But for completion sake, we shouldn't read passed the binary end.
199 return std::numeric_limits<uint64_t>::max();
200 }
201
202 const auto root_table_loc = offset + *root_table_offset;
203
204 BinaryRegionComment root_offset_comment;
205 root_offset_comment.type = BinaryRegionCommentType::RootTableOffset;
206 root_offset_comment.name = RootTable()->name()->str();
207
208 if (!IsValidOffset(root_table_loc)) {
209 SetError(root_offset_comment,
210 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
211 }
212
213 regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
214 BinaryRegionType::UOffset, 0,
215 root_table_loc, root_offset_comment));
216 offset += sizeof(uint32_t);
217
218 if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) &&
219 IsPrintableRegion(offset, flatbuffers::kFileIdentifierLength, binary_)) {
220 BinaryRegionComment comment;
221 comment.type = BinaryRegionCommentType::FileIdentifier;
222 // Check if the file identifier region has non-zero data, and assume its
223 // the file identifier. Otherwise, it will get filled in with padding
224 // later.
225 regions.push_back(MakeBinaryRegion(
226 offset, flatbuffers::kFileIdentifierLength * sizeof(uint8_t),
227 BinaryRegionType::Char, flatbuffers::kFileIdentifierLength, 0,
228 comment));
229 }
230
231 AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header,
232 std::move(regions)));
233
234 return root_table_loc;
235 }
236
GetOrBuildVTable(const uint64_t vtable_offset,const reflection::Object * const table,const uint64_t offset_of_referring_table)237 BinaryAnnotator::VTable *BinaryAnnotator::GetOrBuildVTable(
238 const uint64_t vtable_offset, const reflection::Object *const table,
239 const uint64_t offset_of_referring_table) {
240 // Get a list of vtables (if any) already defined at this offset.
241 std::list<VTable> &vtables = vtables_[vtable_offset];
242
243 // See if this vtable for the table type has been generated before.
244 for (VTable &vtable : vtables) {
245 if (vtable.referring_table == table) { return &vtable; }
246 }
247
248 // If we are trying to make a new vtable and it is already encompassed by
249 // another binary section, something is corrupted.
250 if (vtables.empty() && ContainsSection(vtable_offset)) { return nullptr; }
251
252 const std::string referring_table_name = table->name()->str();
253
254 BinaryRegionComment vtable_size_comment;
255 vtable_size_comment.type = BinaryRegionCommentType::VTableSize;
256
257 const auto vtable_length = ReadScalar<uint16_t>(vtable_offset);
258 if (!vtable_length.has_value()) {
259 const uint64_t remaining = RemainingBytes(vtable_offset);
260
261 SetError(vtable_size_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
262 "2");
263
264 AddSection(vtable_offset,
265 MakeSingleRegionBinarySection(
266 referring_table_name, BinarySectionType::VTable,
267 MakeBinaryRegion(vtable_offset, remaining,
268 BinaryRegionType::Unknown, remaining, 0,
269 vtable_size_comment)));
270 return nullptr;
271 }
272
273 // Vtables start with the size of the vtable
274 const uint16_t vtable_size = vtable_length.value();
275
276 if (!IsValidOffset(vtable_offset + vtable_size - 1)) {
277 SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
278 // The vtable_size points to off the end of the binary.
279 AddSection(vtable_offset,
280 MakeSingleRegionBinarySection(
281 referring_table_name, BinarySectionType::VTable,
282 MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
283 BinaryRegionType::Uint16, 0, 0,
284 vtable_size_comment)));
285
286 return nullptr;
287 } else if (vtable_size < 2 * sizeof(uint16_t)) {
288 SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
289 "4");
290 // The size includes itself and the table size which are both uint16_t.
291 AddSection(vtable_offset,
292 MakeSingleRegionBinarySection(
293 referring_table_name, BinarySectionType::VTable,
294 MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
295 BinaryRegionType::Uint16, 0, 0,
296 vtable_size_comment)));
297 return nullptr;
298 }
299
300 std::vector<BinaryRegion> regions;
301
302 regions.push_back(MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
303 BinaryRegionType::Uint16, 0, 0,
304 vtable_size_comment));
305 uint64_t offset = vtable_offset + sizeof(uint16_t);
306
307 BinaryRegionComment ref_table_len_comment;
308 ref_table_len_comment.type =
309 BinaryRegionCommentType::VTableRefferingTableLength;
310
311 // Ensure we can read the next uint16_t field, which is the size of the
312 // referring table.
313 const auto table_length = ReadScalar<uint16_t>(offset);
314
315 if (!table_length.has_value()) {
316 const uint64_t remaining = RemainingBytes(offset);
317 SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
318 "2");
319
320 AddSection(offset, MakeSingleRegionBinarySection(
321 referring_table_name, BinarySectionType::VTable,
322 MakeBinaryRegion(
323 offset, remaining, BinaryRegionType::Unknown,
324 remaining, 0, ref_table_len_comment)));
325 return nullptr;
326 }
327
328 // Then they have the size of the table they reference.
329 const uint16_t table_size = table_length.value();
330
331 if (!IsValidOffset(offset_of_referring_table + table_size - 1)) {
332 SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
333 } else if (table_size < 4) {
334 SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
335 "4");
336 }
337
338 regions.push_back(MakeBinaryRegion(offset, sizeof(uint16_t),
339 BinaryRegionType::Uint16, 0, 0,
340 ref_table_len_comment));
341 offset += sizeof(uint16_t);
342
343 const uint64_t offset_start = offset;
344
345 // A mapping between field (and its id) to the relative offset (uin16_t) from
346 // the start of the table.
347 std::map<uint16_t, VTable::Entry> fields;
348
349 // Counter for determining if the binary has more vtable entries than the
350 // schema provided. This can occur if the binary was created at a newer schema
351 // version and is being processed with an older one.
352 uint16_t fields_processed = 0;
353
354 // Loop over all the fields.
355 ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
356 const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
357
358 if (field_offset >= vtable_offset + vtable_size) {
359 // This field_offset is too large for this vtable, so it must come from a
360 // newer schema than the binary was create with or the binary writer did
361 // not write it. For either case, it is safe to ignore.
362
363 // TODO(dbaileychess): We could show which fields are not set an their
364 // default values if we want. We just need a way to make it obvious that
365 // it isn't part of the buffer.
366 return;
367 }
368
369 BinaryRegionComment field_comment;
370 field_comment.type = BinaryRegionCommentType::VTableFieldOffset;
371 field_comment.name = std::string(field->name()->c_str()) +
372 "` (id: " + std::to_string(field->id()) + ")";
373
374 const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
375
376 if (!offset_from_table.has_value()) {
377 const uint64_t remaining = RemainingBytes(field_offset);
378
379 SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
380 regions.push_back(MakeBinaryRegion(field_offset, remaining,
381 BinaryRegionType::Unknown, remaining,
382 0, field_comment));
383
384 return;
385 }
386
387 if (!IsValidOffset(offset_of_referring_table + offset_from_table.value() -
388 1)) {
389 SetError(field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
390 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
391 BinaryRegionType::VOffset, 0, 0,
392 field_comment));
393 return;
394 }
395
396 VTable::Entry entry;
397 entry.field = field;
398 entry.offset_from_table = offset_from_table.value();
399 fields.insert(std::make_pair(field->id(), entry));
400
401 std::string default_label;
402 if (offset_from_table.value() == 0) {
403 // Not present, so could be default or be optional.
404 if (field->required()) {
405 SetError(field_comment,
406 BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT);
407 // If this is a required field, make it known this is an error.
408 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
409 BinaryRegionType::VOffset, 0, 0,
410 field_comment));
411 return;
412 } else {
413 // Its an optional field, so get the default value and interpret and
414 // provided an annotation for it.
415 if (IsScalar(field->type()->base_type())) {
416 default_label += "<defaults to ";
417 default_label += IsFloat(field->type()->base_type())
418 ? std::to_string(field->default_real())
419 : std::to_string(field->default_integer());
420 default_label += "> (";
421 } else {
422 default_label += "<null> (";
423 }
424 default_label +=
425 reflection::EnumNameBaseType(field->type()->base_type());
426 default_label += ")";
427 }
428 }
429 field_comment.default_value = default_label;
430
431 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
432 BinaryRegionType::VOffset, 0, 0,
433 field_comment));
434
435 fields_processed++;
436 });
437
438 // Check if we covered all the expectant fields. If not, we need to add them
439 // as unknown fields.
440 uint16_t expectant_vtable_fields =
441 (vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
442
443 // Prevent a bad binary from declaring a really large vtable_size, that we can
444 // not independently verify.
445 expectant_vtable_fields = std::min(
446 static_cast<uint16_t>(fields_processed * 3), expectant_vtable_fields);
447
448 for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
449 const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
450
451 const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
452
453 BinaryRegionComment field_comment;
454 field_comment.type = BinaryRegionCommentType::VTableUnknownFieldOffset;
455 field_comment.index = id;
456
457 if (!offset_from_table.has_value()) {
458 const uint64_t remaining = RemainingBytes(field_offset);
459 SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
460 regions.push_back(MakeBinaryRegion(field_offset, remaining,
461 BinaryRegionType::Unknown, remaining,
462 0, field_comment));
463 continue;
464 }
465
466 VTable::Entry entry;
467 entry.field = nullptr; // No field to reference.
468 entry.offset_from_table = offset_from_table.value();
469 fields.insert(std::make_pair(id, entry));
470
471 regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
472 BinaryRegionType::VOffset, 0, 0,
473 field_comment));
474 }
475
476 // If we have never added this vtable before record the Binary section.
477 if (vtables.empty()) {
478 sections_[vtable_offset] = MakeBinarySection(
479 referring_table_name, BinarySectionType::VTable, std::move(regions));
480 } else {
481 // Add the current table name to the name of the section.
482 sections_[vtable_offset].name += ", " + referring_table_name;
483 }
484
485 VTable vtable;
486 vtable.referring_table = table;
487 vtable.fields = std::move(fields);
488 vtable.table_size = table_size;
489 vtable.vtable_size = vtable_size;
490
491 // Add this vtable to the collection of vtables at this offset.
492 vtables.push_back(std::move(vtable));
493
494 // Return the vtable we just added.
495 return &vtables.back();
496 }
497
BuildTable(const uint64_t table_offset,const BinarySectionType type,const reflection::Object * const table)498 void BinaryAnnotator::BuildTable(const uint64_t table_offset,
499 const BinarySectionType type,
500 const reflection::Object *const table) {
501 if (ContainsSection(table_offset)) { return; }
502
503 BinaryRegionComment vtable_offset_comment;
504 vtable_offset_comment.type = BinaryRegionCommentType::TableVTableOffset;
505
506 const auto vtable_soffset = ReadScalar<int32_t>(table_offset);
507
508 if (!vtable_soffset.has_value()) {
509 const uint64_t remaining = RemainingBytes(table_offset);
510 SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
511 "4");
512
513 AddSection(
514 table_offset,
515 MakeSingleRegionBinarySection(
516 table->name()->str(), type,
517 MakeBinaryRegion(table_offset, remaining, BinaryRegionType::Unknown,
518 remaining, 0, vtable_offset_comment)));
519
520 // If there aren't enough bytes left to read the vtable offset, there is
521 // nothing we can do.
522 return;
523 }
524
525 // Tables start with the vtable
526 const uint64_t vtable_offset = table_offset - vtable_soffset.value();
527
528 if (!IsValidOffset(vtable_offset)) {
529 SetError(vtable_offset_comment,
530 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
531
532 AddSection(table_offset,
533 MakeSingleRegionBinarySection(
534 table->name()->str(), type,
535 MakeBinaryRegion(table_offset, sizeof(int32_t),
536 BinaryRegionType::SOffset, 0, vtable_offset,
537 vtable_offset_comment)));
538
539 // There isn't much to do with an invalid vtable offset, as we won't be able
540 // to intepret the rest of the table fields.
541 return;
542 }
543
544 std::vector<BinaryRegion> regions;
545 regions.push_back(MakeBinaryRegion(table_offset, sizeof(int32_t),
546 BinaryRegionType::SOffset, 0,
547 vtable_offset, vtable_offset_comment));
548
549 // Parse the vtable first so we know what the rest of the fields in the table
550 // are.
551 const VTable *const vtable =
552 GetOrBuildVTable(vtable_offset, table, table_offset);
553
554 if (vtable == nullptr) {
555 // There is no valid vtable for this table, so we cannot process the rest of
556 // the table entries.
557 return;
558 }
559
560 // This is the size and length of this table.
561 const uint16_t table_size = vtable->table_size;
562 uint64_t table_end_offset = table_offset + table_size;
563
564 if (!IsValidOffset(table_end_offset - 1)) {
565 // We already validated the table size in BuildVTable, but we have to make
566 // sure we don't use a bad value here.
567 table_end_offset = binary_length_;
568 }
569
570 // We need to iterate over the vtable fields by their offset in the binary,
571 // not by their IDs. So copy them over to another vector that we can sort on
572 // the offset_from_table property.
573 std::vector<VTable::Entry> fields;
574 for (const auto &vtable_field : vtable->fields) {
575 fields.push_back(vtable_field.second);
576 }
577
578 std::stable_sort(fields.begin(), fields.end(),
579 [](const VTable::Entry &a, const VTable::Entry &b) {
580 return a.offset_from_table < b.offset_from_table;
581 });
582
583 // Iterate over all the fields by order of their offset.
584 for (size_t i = 0; i < fields.size(); ++i) {
585 const reflection::Field *field = fields[i].field;
586 const uint16_t offset_from_table = fields[i].offset_from_table;
587
588 if (offset_from_table == 0) {
589 // Skip non-present fields.
590 continue;
591 }
592
593 // The field offsets are relative to the start of the table.
594 const uint64_t field_offset = table_offset + offset_from_table;
595
596 if (!IsValidOffset(field_offset)) {
597 // The field offset is larger than the binary, nothing we can do.
598 continue;
599 }
600
601 // We have a vtable entry for a non-existant field, that means its a binary
602 // generated by a newer schema than we are currently processing.
603 if (field == nullptr) {
604 // Calculate the length of this unknown field.
605 const uint64_t unknown_field_length =
606 // Check if there is another unknown field after this one.
607 ((i + 1 < fields.size())
608 ? table_offset + fields[i + 1].offset_from_table
609 // Otherwise use the known end of the table.
610 : table_end_offset) -
611 field_offset;
612
613 if (unknown_field_length == 0) { continue; }
614
615 std::string hint;
616
617 if (unknown_field_length == 4) {
618 const auto relative_offset = ReadScalar<uint32_t>(field_offset);
619 if (relative_offset.has_value()) {
620 // The field is 4 in length, so it could be an offset? Provide a hint.
621 hint += "<possibly an offset? Check Loc: +0x";
622 hint += ToHex(field_offset + relative_offset.value());
623 hint += ">";
624 }
625 }
626
627 BinaryRegionComment unknown_field_comment;
628 unknown_field_comment.type = BinaryRegionCommentType::TableUnknownField;
629
630 if (!IsValidRead(field_offset, unknown_field_length)) {
631 const uint64_t remaining = RemainingBytes(field_offset);
632
633 SetError(unknown_field_comment,
634 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
635 std::to_string(unknown_field_length));
636
637 regions.push_back(MakeBinaryRegion(field_offset, remaining,
638 BinaryRegionType::Unknown, remaining,
639 0, unknown_field_comment));
640 continue;
641 }
642
643 unknown_field_comment.default_value = hint;
644
645 regions.push_back(MakeBinaryRegion(
646 field_offset, unknown_field_length, BinaryRegionType::Unknown,
647 unknown_field_length, 0, unknown_field_comment));
648 continue;
649 }
650
651 if (IsScalar(field->type()->base_type())) {
652 // These are the raw values store in the table.
653 const uint64_t type_size = GetTypeSize(field->type()->base_type());
654 const BinaryRegionType region_type =
655 GetRegionType(field->type()->base_type());
656
657 BinaryRegionComment scalar_field_comment;
658 scalar_field_comment.type = BinaryRegionCommentType::TableField;
659 scalar_field_comment.name =
660 std::string(field->name()->c_str()) + "` (" +
661 reflection::EnumNameBaseType(field->type()->base_type()) + ")";
662
663 if (!IsValidRead(field_offset, type_size)) {
664 const uint64_t remaining = RemainingBytes(field_offset);
665 SetError(scalar_field_comment,
666 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
667 std::to_string(type_size));
668
669 regions.push_back(MakeBinaryRegion(field_offset, remaining,
670 BinaryRegionType::Unknown, remaining,
671 0, scalar_field_comment));
672 continue;
673 }
674
675 if (IsUnionType(field)) {
676 // This is a type for a union. Validate the value
677 const auto enum_value = ReadScalar<uint8_t>(field_offset);
678
679 // This should always have a value, due to the IsValidRead check above.
680 if (!IsValidUnionValue(field, enum_value.value())) {
681 SetError(scalar_field_comment,
682 BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
683
684 regions.push_back(MakeBinaryRegion(field_offset, type_size,
685 region_type, 0, 0,
686 scalar_field_comment));
687 continue;
688 }
689 }
690
691 regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type,
692 0, 0, scalar_field_comment));
693 continue;
694 }
695
696 // Read the offset
697 uint64_t offset = 0;
698 uint64_t length = sizeof(uint32_t);
699 BinaryRegionType region_type = BinaryRegionType::UOffset;
700
701 if (field->offset64()) {
702 length = sizeof(uint64_t);
703 region_type = BinaryRegionType::UOffset64;
704 offset = ReadScalar<uint64_t>(field_offset).value_or(0);
705 } else {
706 offset = ReadScalar<uint32_t>(field_offset).value_or(0);
707 }
708 // const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
709 uint64_t offset_of_next_item = 0;
710 BinaryRegionComment offset_field_comment;
711 offset_field_comment.type = BinaryRegionCommentType::TableOffsetField;
712 offset_field_comment.name = field->name()->c_str();
713 const std::string offset_prefix =
714 "offset to field `" + std::string(field->name()->c_str()) + "`";
715
716 // Validate any field that isn't inline (i.e., non-structs).
717 if (!IsInlineField(field)) {
718 if (offset == 0) {
719 const uint64_t remaining = RemainingBytes(field_offset);
720
721 SetError(offset_field_comment,
722 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
723
724 regions.push_back(MakeBinaryRegion(field_offset, remaining,
725 BinaryRegionType::Unknown, remaining,
726 0, offset_field_comment));
727 continue;
728 }
729
730 offset_of_next_item = field_offset + offset;
731
732 if (!IsValidOffset(offset_of_next_item)) {
733 SetError(offset_field_comment,
734 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
735 regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
736 offset_of_next_item,
737 offset_field_comment));
738 continue;
739 }
740 }
741
742 switch (field->type()->base_type()) {
743 case reflection::BaseType::Obj: {
744 const reflection::Object *next_object =
745 schema_->objects()->Get(field->type()->index());
746
747 if (next_object->is_struct()) {
748 // Structs are stored inline.
749 BuildStruct(field_offset, regions, field->name()->c_str(),
750 next_object);
751 } else {
752 offset_field_comment.default_value = "(table)";
753
754 regions.push_back(MakeBinaryRegion(field_offset, length, region_type,
755 0, offset_of_next_item,
756 offset_field_comment));
757
758 BuildTable(offset_of_next_item, BinarySectionType::Table,
759 next_object);
760 }
761 } break;
762
763 case reflection::BaseType::String: {
764 offset_field_comment.default_value = "(string)";
765 regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
766 offset_of_next_item,
767 offset_field_comment));
768 BuildString(offset_of_next_item, table, field);
769 } break;
770
771 case reflection::BaseType::Vector: {
772 offset_field_comment.default_value = "(vector)";
773 regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
774 offset_of_next_item,
775 offset_field_comment));
776 BuildVector(offset_of_next_item, table, field, table_offset,
777 vtable->fields);
778 } break;
779 case reflection::BaseType::Vector64: {
780 offset_field_comment.default_value = "(vector64)";
781 regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
782 offset_of_next_item,
783 offset_field_comment));
784 BuildVector(offset_of_next_item, table, field, table_offset,
785 vtable->fields);
786 } break;
787
788 case reflection::BaseType::Union: {
789 const uint64_t union_offset = offset_of_next_item;
790
791 // The union type field is always one less than the union itself.
792 const uint16_t union_type_id = field->id() - 1;
793
794 auto vtable_field = vtable->fields.find(union_type_id);
795 if (vtable_field == vtable->fields.end()) {
796 // TODO(dbaileychess): need to capture this error condition.
797 break;
798 }
799 offset_field_comment.default_value = "(union)";
800
801 const uint64_t type_offset =
802 table_offset + vtable_field->second.offset_from_table;
803
804 const auto realized_type = ReadScalar<uint8_t>(type_offset);
805 if (!realized_type.has_value()) {
806 const uint64_t remaining = RemainingBytes(type_offset);
807 SetError(offset_field_comment,
808 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
809 regions.push_back(MakeBinaryRegion(
810 type_offset, remaining, BinaryRegionType::Unknown, remaining, 0,
811 offset_field_comment));
812 continue;
813 }
814
815 if (!IsValidUnionValue(field, realized_type.value())) {
816 // We already export an error in the union type field, so just skip
817 // building the union itself and it will default to an unreference
818 // Binary section.
819 continue;
820 }
821
822 const std::string enum_type =
823 BuildUnion(union_offset, realized_type.value(), field);
824
825 offset_field_comment.default_value =
826 "(union of type `" + enum_type + "`)";
827
828 regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
829 union_offset, offset_field_comment));
830
831 } break;
832
833 default: break;
834 }
835 }
836
837 // Handle the case where there is padding after the last known binary
838 // region. Calculate where we left off towards the expected end of the
839 // table.
840 const uint64_t i = regions.back().offset + regions.back().length + 1;
841
842 if (i < table_end_offset) {
843 const uint64_t pad_bytes = table_end_offset - i + 1;
844
845 BinaryRegionComment padding_comment;
846 padding_comment.type = BinaryRegionCommentType::Padding;
847
848 regions.push_back(MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
849 BinaryRegionType::Uint8, pad_bytes, 0,
850 padding_comment));
851 }
852
853 AddSection(table_offset,
854 MakeBinarySection(table->name()->str(), type, std::move(regions)));
855 }
856
BuildStruct(const uint64_t struct_offset,std::vector<BinaryRegion> & regions,const std::string referring_field_name,const reflection::Object * const object)857 uint64_t BinaryAnnotator::BuildStruct(const uint64_t struct_offset,
858 std::vector<BinaryRegion> ®ions,
859 const std::string referring_field_name,
860 const reflection::Object *const object) {
861 if (!object->is_struct()) { return struct_offset; }
862 uint64_t offset = struct_offset;
863
864 // Loop over all the fields in increasing order
865 ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
866 if (IsScalar(field->type()->base_type())) {
867 // Structure Field value
868 const uint64_t type_size = GetTypeSize(field->type()->base_type());
869 const BinaryRegionType region_type =
870 GetRegionType(field->type()->base_type());
871
872 BinaryRegionComment comment;
873 comment.type = BinaryRegionCommentType::StructField;
874 comment.name = referring_field_name + "." + field->name()->str();
875 comment.default_value = "of '" + object->name()->str() + "' (" +
876 std::string(reflection::EnumNameBaseType(
877 field->type()->base_type())) +
878 ")";
879
880 if (!IsValidRead(offset, type_size)) {
881 const uint64_t remaining = RemainingBytes(offset);
882 SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
883 std::to_string(type_size));
884 regions.push_back(MakeBinaryRegion(offset, remaining,
885 BinaryRegionType::Unknown, remaining,
886 0, comment));
887
888 // TODO(dbaileychess): Should I bail out here? This sets offset to the
889 // end of the binary. So all other reads in the loop should fail.
890 offset += remaining;
891 return;
892 }
893
894 regions.push_back(
895 MakeBinaryRegion(offset, type_size, region_type, 0, 0, comment));
896 offset += type_size;
897 } else if (field->type()->base_type() == reflection::BaseType::Obj) {
898 // Structs are stored inline, even when nested.
899 offset = BuildStruct(offset, regions,
900 referring_field_name + "." + field->name()->str(),
901 schema_->objects()->Get(field->type()->index()));
902 } else if (field->type()->base_type() == reflection::BaseType::Array) {
903 const bool is_scalar = IsScalar(field->type()->element());
904 const uint64_t type_size = GetTypeSize(field->type()->element());
905 const BinaryRegionType region_type =
906 GetRegionType(field->type()->element());
907
908 // Arrays are just repeated structures.
909 for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
910 if (is_scalar) {
911 BinaryRegionComment array_comment;
912 array_comment.type = BinaryRegionCommentType::ArrayField;
913 array_comment.name =
914 referring_field_name + "." + field->name()->str();
915 array_comment.index = i;
916 array_comment.default_value =
917 "of '" + object->name()->str() + "' (" +
918 std::string(
919 reflection::EnumNameBaseType(field->type()->element())) +
920 ")";
921
922 if (!IsValidRead(offset, type_size)) {
923 const uint64_t remaining = RemainingBytes(offset);
924
925 SetError(array_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
926 std::to_string(type_size));
927
928 regions.push_back(MakeBinaryRegion(offset, remaining,
929 BinaryRegionType::Unknown,
930 remaining, 0, array_comment));
931
932 // TODO(dbaileychess): Should I bail out here? This sets offset to
933 // the end of the binary. So all other reads in the loop should
934 // fail.
935 offset += remaining;
936 break;
937 }
938
939 regions.push_back(MakeBinaryRegion(offset, type_size, region_type, 0,
940 0, array_comment));
941
942 offset += type_size;
943 } else {
944 // Array of Structs.
945 //
946 // TODO(dbaileychess): This works, but the comments on the fields lose
947 // some context. Need to figure a way how to plumb the nested arrays
948 // comments together that isn't too confusing.
949 offset =
950 BuildStruct(offset, regions,
951 referring_field_name + "." + field->name()->str(),
952 schema_->objects()->Get(field->type()->index()));
953 }
954 }
955 }
956
957 // Insert any padding after this field.
958 const uint16_t padding = field->padding();
959 if (padding > 0 && IsValidOffset(offset + padding)) {
960 BinaryRegionComment padding_comment;
961 padding_comment.type = BinaryRegionCommentType::Padding;
962
963 regions.push_back(MakeBinaryRegion(offset, padding,
964 BinaryRegionType::Uint8, padding, 0,
965 padding_comment));
966 offset += padding;
967 }
968 });
969
970 return offset;
971 }
972
BuildString(const uint64_t string_offset,const reflection::Object * const table,const reflection::Field * const field)973 void BinaryAnnotator::BuildString(const uint64_t string_offset,
974 const reflection::Object *const table,
975 const reflection::Field *const field) {
976 // Check if we have already generated this string section, and this is a
977 // shared string instance.
978 if (ContainsSection(string_offset)) { return; }
979
980 std::vector<BinaryRegion> regions;
981 const auto string_length = ReadScalar<uint32_t>(string_offset);
982
983 BinaryRegionComment string_length_comment;
984 string_length_comment.type = BinaryRegionCommentType::StringLength;
985
986 if (!string_length.has_value()) {
987 const uint64_t remaining = RemainingBytes(string_offset);
988
989 SetError(string_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
990 "4");
991
992 regions.push_back(MakeBinaryRegion(string_offset, remaining,
993 BinaryRegionType::Unknown, remaining, 0,
994 string_length_comment));
995
996 } else {
997 const uint32_t string_size = string_length.value();
998 const uint64_t string_end =
999 string_offset + sizeof(uint32_t) + string_size + sizeof(char);
1000
1001 if (!IsValidOffset(string_end - 1)) {
1002 SetError(string_length_comment,
1003 BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
1004
1005 regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
1006 BinaryRegionType::Uint32, 0, 0,
1007 string_length_comment));
1008 } else {
1009 regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
1010 BinaryRegionType::Uint32, 0, 0,
1011 string_length_comment));
1012
1013 BinaryRegionComment string_comment;
1014 string_comment.type = BinaryRegionCommentType::StringValue;
1015
1016 regions.push_back(MakeBinaryRegion(string_offset + sizeof(uint32_t),
1017 string_size, BinaryRegionType::Char,
1018 string_size, 0, string_comment));
1019
1020 BinaryRegionComment string_terminator_comment;
1021 string_terminator_comment.type =
1022 BinaryRegionCommentType::StringTerminator;
1023
1024 regions.push_back(MakeBinaryRegion(
1025 string_offset + sizeof(uint32_t) + string_size, sizeof(char),
1026 BinaryRegionType::Char, 0, 0, string_terminator_comment));
1027 }
1028 }
1029
1030 AddSection(string_offset,
1031 MakeBinarySection(std::string(table->name()->c_str()) + "." +
1032 field->name()->c_str(),
1033 BinarySectionType::String, std::move(regions)));
1034 }
1035
BuildVector(const uint64_t vector_offset,const reflection::Object * const table,const reflection::Field * const field,const uint64_t parent_table_offset,const std::map<uint16_t,VTable::Entry> vtable_fields)1036 void BinaryAnnotator::BuildVector(
1037 const uint64_t vector_offset, const reflection::Object *const table,
1038 const reflection::Field *const field, const uint64_t parent_table_offset,
1039 const std::map<uint16_t, VTable::Entry> vtable_fields) {
1040 if (ContainsSection(vector_offset)) { return; }
1041
1042 BinaryRegionComment vector_length_comment;
1043 vector_length_comment.type = BinaryRegionCommentType::VectorLength;
1044
1045 const bool is_64_bit_vector =
1046 field->type()->base_type() == reflection::BaseType::Vector64;
1047
1048 flatbuffers::Optional<uint64_t> vector_length;
1049 uint32_t vector_length_size_type = 0;
1050 BinaryRegionType region_type = BinaryRegionType::Uint32;
1051 BinarySectionType section_type = BinarySectionType::Vector;
1052
1053 if (is_64_bit_vector) {
1054 auto v = ReadScalar<uint64_t>(vector_offset);
1055 if (v.has_value()) { vector_length = v.value(); }
1056 vector_length_size_type = sizeof(uint64_t);
1057 region_type = BinaryRegionType::Uint64;
1058 section_type = BinarySectionType::Vector64;
1059 } else {
1060 auto v = ReadScalar<uint32_t>(vector_offset);
1061 if (v.has_value()) { vector_length = v.value(); }
1062 vector_length_size_type = sizeof(uint32_t);
1063 region_type = BinaryRegionType::Uint32;
1064 section_type = BinarySectionType::Vector;
1065 }
1066
1067 if (!vector_length.has_value()) {
1068 const uint64_t remaining = RemainingBytes(vector_offset);
1069 SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
1070 "4");
1071
1072 AddSection(
1073 vector_offset,
1074 MakeSingleRegionBinarySection(
1075 std::string(table->name()->c_str()) + "." + field->name()->c_str(),
1076 BinarySectionType::Vector,
1077 MakeBinaryRegion(vector_offset, remaining,
1078 BinaryRegionType::Unknown, remaining, 0,
1079 vector_length_comment)));
1080 return;
1081 }
1082
1083 // Validate there are enough bytes left in the binary to process all the
1084 // items.
1085 const uint64_t last_item_offset =
1086 vector_offset + vector_length_size_type +
1087 vector_length.value() * GetElementSize(field);
1088
1089 if (!IsValidOffset(last_item_offset - 1)) {
1090 SetError(vector_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
1091 AddSection(
1092 vector_offset,
1093 MakeSingleRegionBinarySection(
1094 std::string(table->name()->c_str()) + "." + field->name()->c_str(),
1095 BinarySectionType::Vector,
1096 MakeBinaryRegion(vector_offset, vector_length_size_type,
1097 region_type, 0, 0, vector_length_comment)));
1098
1099 return;
1100 }
1101
1102 std::vector<BinaryRegion> regions;
1103
1104 regions.push_back(MakeBinaryRegion(vector_offset, vector_length_size_type,
1105 region_type, 0, 0, vector_length_comment));
1106 // Consume the vector length offset.
1107 uint64_t offset = vector_offset + vector_length_size_type;
1108
1109 switch (field->type()->element()) {
1110 case reflection::BaseType::Obj: {
1111 const reflection::Object *object =
1112 schema_->objects()->Get(field->type()->index());
1113
1114 if (object->is_struct()) {
1115 // Vector of structs
1116 for (size_t i = 0; i < vector_length.value(); ++i) {
1117 // Structs are inline to the vector.
1118 const uint64_t next_offset =
1119 BuildStruct(offset, regions, "[" + NumToString(i) + "]", object);
1120 if (next_offset == offset) { break; }
1121 offset = next_offset;
1122 }
1123 } else {
1124 // Vector of objects
1125 for (size_t i = 0; i < vector_length.value(); ++i) {
1126 BinaryRegionComment vector_object_comment;
1127 vector_object_comment.type =
1128 BinaryRegionCommentType::VectorTableValue;
1129 vector_object_comment.index = i;
1130
1131 const auto table_relative_offset = ReadScalar<uint32_t>(offset);
1132 if (!table_relative_offset.has_value()) {
1133 const uint64_t remaining = RemainingBytes(offset);
1134 SetError(vector_object_comment,
1135 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1136
1137 regions.push_back(
1138 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1139 remaining, 0, vector_object_comment));
1140 break;
1141 }
1142
1143 // The table offset is relative from the offset location itself.
1144 const uint64_t table_offset = offset + table_relative_offset.value();
1145
1146 if (!IsValidOffset(table_offset)) {
1147 SetError(vector_object_comment,
1148 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1149 regions.push_back(MakeBinaryRegion(
1150 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1151 table_offset, vector_object_comment));
1152
1153 offset += sizeof(uint32_t);
1154 continue;
1155 }
1156
1157 if (table_offset == parent_table_offset) {
1158 SetError(vector_object_comment,
1159 BinaryRegionStatus::ERROR_CYCLE_DETECTED);
1160 // A cycle detected where a table vector field is pointing to
1161 // itself. This should only happen in corrupted files.
1162 regions.push_back(MakeBinaryRegion(
1163 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1164 table_offset, vector_object_comment));
1165
1166 offset += sizeof(uint32_t);
1167 continue;
1168 }
1169
1170 regions.push_back(MakeBinaryRegion(
1171 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1172 table_offset, vector_object_comment));
1173
1174 // Consume the offset to the table.
1175 offset += sizeof(uint32_t);
1176
1177 BuildTable(table_offset, BinarySectionType::Table, object);
1178 }
1179 }
1180 } break;
1181 case reflection::BaseType::String: {
1182 // Vector of strings
1183 for (size_t i = 0; i < vector_length.value(); ++i) {
1184 BinaryRegionComment vector_object_comment;
1185 vector_object_comment.type = BinaryRegionCommentType::VectorStringValue;
1186 vector_object_comment.index = i;
1187
1188 const auto string_relative_offset = ReadScalar<uint32_t>(offset);
1189 if (!string_relative_offset.has_value()) {
1190 const uint64_t remaining = RemainingBytes(offset);
1191
1192 SetError(vector_object_comment,
1193 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1194
1195 regions.push_back(
1196 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1197 remaining, 0, vector_object_comment));
1198 break;
1199 }
1200
1201 // The string offset is relative from the offset location itself.
1202 const uint64_t string_offset = offset + string_relative_offset.value();
1203
1204 if (!IsValidOffset(string_offset)) {
1205 SetError(vector_object_comment,
1206 BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1207 regions.push_back(MakeBinaryRegion(
1208 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1209 string_offset, vector_object_comment));
1210
1211 offset += sizeof(uint32_t);
1212 continue;
1213 }
1214
1215 regions.push_back(MakeBinaryRegion(
1216 offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
1217 string_offset, vector_object_comment));
1218
1219 BuildString(string_offset, table, field);
1220
1221 offset += sizeof(uint32_t);
1222 }
1223 } break;
1224 case reflection::BaseType::Union: {
1225 // Vector of unions
1226 // Unions have both their realized type (uint8_t for now) that are
1227 // stored separately. These are stored in the field->index() - 1
1228 // location.
1229 const uint16_t union_type_vector_id = field->id() - 1;
1230
1231 auto vtable_entry = vtable_fields.find(union_type_vector_id);
1232 if (vtable_entry == vtable_fields.end()) {
1233 // TODO(dbaileychess): need to capture this error condition.
1234 break;
1235 }
1236
1237 const uint64_t union_type_vector_field_offset =
1238 parent_table_offset + vtable_entry->second.offset_from_table;
1239
1240 const auto union_type_vector_field_relative_offset =
1241 ReadScalar<uint16_t>(union_type_vector_field_offset);
1242
1243 if (!union_type_vector_field_relative_offset.has_value()) {
1244 const uint64_t remaining = RemainingBytes(offset);
1245 BinaryRegionComment vector_union_comment;
1246 vector_union_comment.type = BinaryRegionCommentType::VectorUnionValue;
1247 SetError(vector_union_comment,
1248 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
1249
1250 regions.push_back(MakeBinaryRegion(offset, remaining,
1251 BinaryRegionType::Unknown, remaining,
1252 0, vector_union_comment));
1253
1254 break;
1255 }
1256
1257 // Get the offset to the first type (the + sizeof(uint32_t) is to skip
1258 // over the vector length which we already know). Validation happens
1259 // within the loop below.
1260 const uint64_t union_type_vector_data_offset =
1261 union_type_vector_field_offset +
1262 union_type_vector_field_relative_offset.value() + sizeof(uint32_t);
1263
1264 for (size_t i = 0; i < vector_length.value(); ++i) {
1265 BinaryRegionComment comment;
1266 comment.type = BinaryRegionCommentType::VectorUnionValue;
1267 comment.index = i;
1268
1269 const auto union_relative_offset = ReadScalar<uint32_t>(offset);
1270 if (!union_relative_offset.has_value()) {
1271 const uint64_t remaining = RemainingBytes(offset);
1272
1273 SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
1274
1275 regions.push_back(MakeBinaryRegion(offset, remaining,
1276 BinaryRegionType::Unknown,
1277 remaining, 0, comment));
1278
1279 break;
1280 }
1281
1282 // The union offset is relative from the offset location itself.
1283 const uint64_t union_offset = offset + union_relative_offset.value();
1284
1285 if (!IsValidOffset(union_offset)) {
1286 SetError(comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
1287
1288 regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1289 BinaryRegionType::UOffset, 0,
1290 union_offset, comment));
1291 continue;
1292 }
1293
1294 const auto realized_type =
1295 ReadScalar<uint8_t>(union_type_vector_data_offset + i);
1296
1297 if (!realized_type.has_value()) {
1298 SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
1299 regions.push_back(MakeBinaryRegion(
1300 offset, 0, BinaryRegionType::Unknown, 0, 0, comment));
1301 continue;
1302 }
1303
1304 if (!IsValidUnionValue(vtable_entry->second.field->type()->index(),
1305 realized_type.value())) {
1306 // We already export an error in the union type field, so just skip
1307 // building the union itself and it will default to an unreference
1308 // Binary section.
1309 offset += sizeof(uint32_t);
1310 continue;
1311 }
1312
1313 const std::string enum_type =
1314 BuildUnion(union_offset, realized_type.value(), field);
1315
1316 comment.default_value = "(`" + enum_type + "`)";
1317 regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
1318 BinaryRegionType::UOffset, 0,
1319 union_offset, comment));
1320
1321 offset += sizeof(uint32_t);
1322 }
1323 } break;
1324 default: {
1325 if (IsScalar(field->type()->element())) {
1326 const BinaryRegionType binary_region_type =
1327 GetRegionType(field->type()->element());
1328
1329 const uint64_t type_size = GetTypeSize(field->type()->element());
1330
1331 // TODO(dbaileychess): It might be nicer to user the
1332 // BinaryRegion.array_length field to indicate this.
1333 for (size_t i = 0; i < vector_length.value(); ++i) {
1334 BinaryRegionComment vector_scalar_comment;
1335 vector_scalar_comment.type = BinaryRegionCommentType::VectorValue;
1336 vector_scalar_comment.index = i;
1337
1338 if (!IsValidRead(offset, type_size)) {
1339 const uint64_t remaining = RemainingBytes(offset);
1340
1341 SetError(vector_scalar_comment,
1342 BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
1343 std::to_string(type_size));
1344
1345 regions.push_back(
1346 MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
1347 remaining, 0, vector_scalar_comment));
1348 break;
1349 }
1350
1351 if (IsUnionType(field->type()->element())) {
1352 // This is a type for a union. Validate the value
1353 const auto enum_value = ReadScalar<uint8_t>(offset);
1354
1355 // This should always have a value, due to the IsValidRead check
1356 // above.
1357 if (!IsValidUnionValue(field->type()->index(),
1358 enum_value.value())) {
1359 SetError(vector_scalar_comment,
1360 BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
1361 regions.push_back(MakeBinaryRegion(offset, type_size,
1362 binary_region_type, 0, 0,
1363 vector_scalar_comment));
1364 offset += type_size;
1365 continue;
1366 }
1367 }
1368
1369 regions.push_back(MakeBinaryRegion(offset, type_size,
1370 binary_region_type, 0, 0,
1371 vector_scalar_comment));
1372 offset += type_size;
1373 }
1374 }
1375 } break;
1376 }
1377 AddSection(vector_offset,
1378 MakeBinarySection(std::string(table->name()->c_str()) + "." +
1379 field->name()->c_str(),
1380 section_type, std::move(regions)));
1381 }
1382
BuildUnion(const uint64_t union_offset,const uint8_t realized_type,const reflection::Field * const field)1383 std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset,
1384 const uint8_t realized_type,
1385 const reflection::Field *const field) {
1386 const reflection::Enum *next_enum =
1387 schema_->enums()->Get(field->type()->index());
1388
1389 const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
1390
1391 if (ContainsSection(union_offset)) { return enum_val->name()->c_str(); }
1392
1393 const reflection::Type *union_type = enum_val->union_type();
1394
1395 if (union_type->base_type() == reflection::BaseType::Obj) {
1396 const reflection::Object *object =
1397 schema_->objects()->Get(union_type->index());
1398
1399 if (object->is_struct()) {
1400 // Union of vectors point to a new Binary section
1401 std::vector<BinaryRegion> regions;
1402
1403 BuildStruct(union_offset, regions, field->name()->c_str(), object);
1404
1405 AddSection(
1406 union_offset,
1407 MakeBinarySection(std::string(object->name()->c_str()) + "." +
1408 field->name()->c_str(),
1409 BinarySectionType::Union, std::move(regions)));
1410 } else {
1411 BuildTable(union_offset, BinarySectionType::Table, object);
1412 }
1413 }
1414 // TODO(dbaileychess): handle the other union types.
1415
1416 return enum_val->name()->c_str();
1417 }
1418
FixMissingRegions()1419 void BinaryAnnotator::FixMissingRegions() {
1420 std::vector<BinaryRegion> regions_to_insert;
1421 for (auto ¤t_section : sections_) {
1422 BinarySection §ion = current_section.second;
1423 if (section.regions.empty()) {
1424 // TODO(dbaileychess): is this possible?
1425 continue;
1426 }
1427
1428 uint64_t offset = section.regions[0].offset + section.regions[0].length;
1429 for (size_t i = 1; i < section.regions.size(); ++i) {
1430 BinaryRegion ®ion = section.regions[i];
1431
1432 const uint64_t next_offset = region.offset;
1433 if (!IsValidOffset(next_offset)) {
1434 // TODO(dbaileychess): figure out how we get into this situation.
1435 continue;
1436 }
1437
1438 if (offset < next_offset) {
1439 const uint64_t padding_bytes = next_offset - offset;
1440
1441 BinaryRegionComment comment;
1442 comment.type = BinaryRegionCommentType::Padding;
1443
1444 if (IsNonZeroRegion(offset, padding_bytes, binary_)) {
1445 SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
1446 regions_to_insert.push_back(
1447 MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Unknown,
1448 padding_bytes, 0, comment));
1449 } else {
1450 regions_to_insert.push_back(
1451 MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Uint8,
1452 padding_bytes, 0, comment));
1453 }
1454 }
1455 offset = next_offset + region.length;
1456 }
1457
1458 if (!regions_to_insert.empty()) {
1459 section.regions.insert(section.regions.end(), regions_to_insert.begin(),
1460 regions_to_insert.end());
1461 std::stable_sort(section.regions.begin(), section.regions.end(),
1462 BinaryRegionSort);
1463 regions_to_insert.clear();
1464 }
1465 }
1466 }
1467
FixMissingSections()1468 void BinaryAnnotator::FixMissingSections() {
1469 uint64_t offset = 0;
1470
1471 std::vector<BinarySection> sections_to_insert;
1472
1473 for (auto ¤t_section : sections_) {
1474 BinarySection §ion = current_section.second;
1475 const uint64_t section_start_offset = current_section.first;
1476 const uint64_t section_end_offset =
1477 section.regions.back().offset + section.regions.back().length;
1478
1479 if (offset < section_start_offset) {
1480 // We are at an offset that is less then the current section.
1481 const uint64_t pad_bytes = section_start_offset - offset + 1;
1482
1483 sections_to_insert.push_back(
1484 GenerateMissingSection(offset - 1, pad_bytes, binary_));
1485 }
1486 offset = section_end_offset + 1;
1487 }
1488
1489 // Handle the case where there are still bytes left in the binary that are
1490 // unaccounted for.
1491 if (offset < binary_length_) {
1492 const uint64_t pad_bytes = binary_length_ - offset + 1;
1493 sections_to_insert.push_back(
1494 GenerateMissingSection(offset - 1, pad_bytes, binary_));
1495 }
1496
1497 for (const BinarySection §ion_to_insert : sections_to_insert) {
1498 AddSection(section_to_insert.regions[0].offset, section_to_insert);
1499 }
1500 }
1501
ContainsSection(const uint64_t offset)1502 bool BinaryAnnotator::ContainsSection(const uint64_t offset) {
1503 auto it = sections_.lower_bound(offset);
1504 // If the section is found, check that it is exactly equal its offset.
1505 if (it != sections_.end() && it->first == offset) { return true; }
1506
1507 // If this was the first section, there are no other previous sections to
1508 // check.
1509 if (it == sections_.begin()) { return false; }
1510
1511 // Go back one section.
1512 --it;
1513
1514 // And check that if the offset is covered by the section.
1515 return offset >= it->first && offset < it->second.regions.back().offset +
1516 it->second.regions.back().length;
1517 }
1518
RootTable() const1519 const reflection::Object *BinaryAnnotator::RootTable() const {
1520 if (!root_table_.empty()) {
1521 return schema_->objects()->LookupByKey(root_table_);
1522 }
1523 return schema_->root_table();
1524 }
1525
1526 } // namespace flatbuffers
1527