1 /*
2 * Copyright 2021 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
18 #define FLATBUFFERS_BINARY_ANNOTATOR_H_
19
20 #include <cstddef>
21 #include <cstdint>
22 #include <iomanip>
23 #include <ios>
24 #include <list>
25 #include <map>
26 #include <sstream>
27 #include <string>
28 #include <utility>
29 #include <vector>
30
31 #include "flatbuffers/base.h"
32 #include "flatbuffers/reflection.h"
33 #include "flatbuffers/reflection_generated.h"
34 #include "flatbuffers/stl_emulation.h"
35
36 namespace flatbuffers {
37
38 enum class BinaryRegionType {
39 Unknown = 0,
40 UOffset = 1,
41 SOffset = 2,
42 VOffset = 3,
43 Bool = 4,
44 Byte = 5,
45 Char = 6,
46 Uint8 = 7,
47 Int8 = 8,
48 Uint16 = 9,
49 Int16 = 10,
50 Uint32 = 11,
51 Int32 = 12,
52 Uint64 = 13,
53 Int64 = 14,
54 Float = 15,
55 Double = 16,
56 UType = 17,
57 UOffset64 = 18,
58 };
59
60 template<typename T>
61 static inline std::string ToHex(T i, size_t width = sizeof(T)) {
62 std::stringstream stream;
63 stream << std::hex << std::uppercase << std::setfill('0')
64 << std::setw(static_cast<int>(width)) << i;
65 return stream.str();
66 }
67
68 // Specialized version for uint8_t that don't work well with std::hex.
ToHex(uint8_t i)69 static inline std::string ToHex(uint8_t i) {
70 return ToHex<int>(static_cast<int>(i), 2);
71 }
72
73 enum class BinaryRegionStatus {
74 OK = 0,
75 WARN = 100,
76 WARN_NO_REFERENCES,
77 WARN_CORRUPTED_PADDING,
78 WARN_PADDING_LENGTH,
79 ERROR = 200,
80 // An offset is pointing outside the binary bounds.
81 ERROR_OFFSET_OUT_OF_BINARY,
82 // Expecting to read N bytes but not enough remain in the binary.
83 ERROR_INCOMPLETE_BINARY,
84 // When a length of a vtable/vector is longer than possible.
85 ERROR_LENGTH_TOO_LONG,
86 // When a length of a vtable/vector is shorter than possible.
87 ERROR_LENGTH_TOO_SHORT,
88 // A field mark required is not present in the vtable.
89 ERROR_REQUIRED_FIELD_NOT_PRESENT,
90 // A realized union type is not within the enum bounds.
91 ERROR_INVALID_UNION_TYPE,
92 // Occurs when there is a cycle in offsets.
93 ERROR_CYCLE_DETECTED,
94 };
95
96 enum class BinaryRegionCommentType {
97 Unknown = 0,
98 SizePrefix,
99 // The offset to the root table.
100 RootTableOffset,
101 // The optional 4-char file identifier.
102 FileIdentifier,
103 // Generic 0-filled padding
104 Padding,
105 // The size of the vtable.
106 VTableSize,
107 // The size of the referring table.
108 VTableRefferingTableLength,
109 // Offsets to vtable fields.
110 VTableFieldOffset,
111 // Offsets to unknown vtable fields.
112 VTableUnknownFieldOffset,
113 // The vtable offset of a table.
114 TableVTableOffset,
115 // A "inline" table field value.
116 TableField,
117 // A table field that is unknown.
118 TableUnknownField,
119 // A table field value that points to another section.
120 TableOffsetField,
121 // A struct field value.
122 StructField,
123 // A array field value.
124 ArrayField,
125 // The length of the string.
126 StringLength,
127 // The string contents.
128 StringValue,
129 // The explicit string terminator.
130 StringTerminator,
131 // The length of the vector (# of items).
132 VectorLength,
133 // A "inline" value of a vector.
134 VectorValue,
135 // A vector value that points to another section.
136 VectorTableValue,
137 VectorStringValue,
138 VectorUnionValue,
139 };
140
141 struct BinaryRegionComment {
142 BinaryRegionStatus status = BinaryRegionStatus::OK;
143
144 // If status is non OK, this may be filled in with additional details.
145 std::string status_message;
146
147 BinaryRegionCommentType type = BinaryRegionCommentType::Unknown;
148
149 std::string name;
150
151 std::string default_value;
152
153 size_t index = 0;
154 };
155
156 struct BinaryRegion {
157 // Offset into the binary where this region begins.
158 uint64_t offset = 0;
159
160 // The length of this region in bytes.
161 uint64_t length = 0;
162
163 // The underlying datatype of this region
164 BinaryRegionType type = BinaryRegionType::Unknown;
165
166 // If `type` is an array/vector, this is the number of those types this region
167 // encompasses.
168 uint64_t array_length = 0;
169
170 // If the is an offset to some other region, this is what it points to. The
171 // offset is relative to overall binary, not to this region.
172 uint64_t points_to_offset = 0;
173
174 // The comment on the region.
175 BinaryRegionComment comment;
176 };
177
178 enum class BinarySectionType {
179 Unknown = 0,
180 Header = 1,
181 Table = 2,
182 RootTable = 3,
183 VTable = 4,
184 Struct = 5,
185 String = 6,
186 Vector = 7,
187 Union = 8,
188 Padding = 9,
189 Vector64 = 10,
190 };
191
192 // A section of the binary that is grouped together in some logical manner, and
193 // often is pointed too by some other offset BinaryRegion. Sections include
194 // `tables`, `vtables`, `strings`, `vectors`, etc..
195 struct BinarySection {
196 // User-specified name of the section, if applicable.
197 std::string name;
198
199 // The type of this section.
200 BinarySectionType type = BinarySectionType::Unknown;
201
202 // The binary regions that make up this section, in order of their offsets.
203 std::vector<BinaryRegion> regions;
204 };
205
GetRegionType(reflection::BaseType base_type)206 inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
207 switch (base_type) {
208 case reflection::UType: return BinaryRegionType::UType;
209 case reflection::Bool: return BinaryRegionType::Uint8;
210 case reflection::Byte: return BinaryRegionType::Uint8;
211 case reflection::UByte: return BinaryRegionType::Uint8;
212 case reflection::Short: return BinaryRegionType::Int16;
213 case reflection::UShort: return BinaryRegionType::Uint16;
214 case reflection::Int: return BinaryRegionType::Uint32;
215 case reflection::UInt: return BinaryRegionType::Uint32;
216 case reflection::Long: return BinaryRegionType::Int64;
217 case reflection::ULong: return BinaryRegionType::Uint64;
218 case reflection::Float: return BinaryRegionType::Float;
219 case reflection::Double: return BinaryRegionType::Double;
220 default: return BinaryRegionType::Unknown;
221 }
222 }
223
ToString(const BinaryRegionType type)224 inline static std::string ToString(const BinaryRegionType type) {
225 switch (type) {
226 case BinaryRegionType::UOffset: return "UOffset32";
227 case BinaryRegionType::UOffset64: return "UOffset64";
228 case BinaryRegionType::SOffset: return "SOffset32";
229 case BinaryRegionType::VOffset: return "VOffset16";
230 case BinaryRegionType::Bool: return "bool";
231 case BinaryRegionType::Char: return "char";
232 case BinaryRegionType::Byte: return "int8_t";
233 case BinaryRegionType::Uint8: return "uint8_t";
234 case BinaryRegionType::Uint16: return "uint16_t";
235 case BinaryRegionType::Uint32: return "uint32_t";
236 case BinaryRegionType::Uint64: return "uint64_t";
237 case BinaryRegionType::Int8: return "int8_t";
238 case BinaryRegionType::Int16: return "int16_t";
239 case BinaryRegionType::Int32: return "int32_t";
240 case BinaryRegionType::Int64: return "int64_t";
241 case BinaryRegionType::Double: return "double";
242 case BinaryRegionType::Float: return "float";
243 case BinaryRegionType::UType: return "UType8";
244 case BinaryRegionType::Unknown: return "?uint8_t";
245 default: return "todo";
246 }
247 }
248
249 class BinaryAnnotator {
250 public:
BinaryAnnotator(const uint8_t * const bfbs,const uint64_t bfbs_length,const uint8_t * const binary,const uint64_t binary_length,const bool is_size_prefixed)251 explicit BinaryAnnotator(const uint8_t *const bfbs,
252 const uint64_t bfbs_length,
253 const uint8_t *const binary,
254 const uint64_t binary_length,
255 const bool is_size_prefixed)
256 : bfbs_(bfbs),
257 bfbs_length_(bfbs_length),
258 schema_(reflection::GetSchema(bfbs)),
259 root_table_(""),
260 binary_(binary),
261 binary_length_(binary_length),
262 is_size_prefixed_(is_size_prefixed) {}
263
BinaryAnnotator(const reflection::Schema * schema,const std::string & root_table,const uint8_t * binary,uint64_t binary_length,bool is_size_prefixed)264 BinaryAnnotator(const reflection::Schema *schema,
265 const std::string &root_table, const uint8_t *binary,
266 uint64_t binary_length, bool is_size_prefixed)
267 : bfbs_(nullptr),
268 bfbs_length_(0),
269 schema_(schema),
270 root_table_(root_table),
271 binary_(binary),
272 binary_length_(binary_length),
273 is_size_prefixed_(is_size_prefixed) {}
274
275 std::map<uint64_t, BinarySection> Annotate();
276
277 private:
278 struct VTable {
279 struct Entry {
280 const reflection::Field *field = nullptr;
281 uint16_t offset_from_table = 0;
282 };
283
284 const reflection::Object *referring_table = nullptr;
285
286 // Field ID -> {field def, offset from table}
287 std::map<uint16_t, Entry> fields;
288
289 uint16_t vtable_size = 0;
290 uint16_t table_size = 0;
291 };
292
293 uint64_t BuildHeader(uint64_t offset);
294
295 // VTables can be shared across instances or even across objects. This
296 // attempts to get an existing vtable given the offset and table type,
297 // otherwise it will built the vtable, memorize it, and return the built
298 // VTable. Returns nullptr if building the VTable fails.
299 VTable *GetOrBuildVTable(uint64_t offset, const reflection::Object *table,
300 uint64_t offset_of_referring_table);
301
302 void BuildTable(uint64_t offset, const BinarySectionType type,
303 const reflection::Object *table);
304
305 uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> ®ions,
306 const std::string referring_field_name,
307 const reflection::Object *structure);
308
309 void BuildString(uint64_t offset, const reflection::Object *table,
310 const reflection::Field *field);
311
312 void BuildVector(uint64_t offset, const reflection::Object *table,
313 const reflection::Field *field, uint64_t parent_table_offset,
314 const std::map<uint16_t, VTable::Entry> vtable_fields);
315
316 std::string BuildUnion(uint64_t offset, uint8_t realized_type,
317 const reflection::Field *field);
318
319 void FixMissingRegions();
320 void FixMissingSections();
321
IsValidOffset(const uint64_t offset)322 inline bool IsValidOffset(const uint64_t offset) const {
323 return offset < binary_length_;
324 }
325
326 // Determines if performing a GetScalar request for `T` at `offset` would read
327 // passed the end of the binary.
IsValidRead(const uint64_t offset)328 template<typename T> inline bool IsValidRead(const uint64_t offset) const {
329 return IsValidRead(offset, sizeof(T));
330 }
331
IsValidRead(const uint64_t offset,const uint64_t length)332 inline bool IsValidRead(const uint64_t offset, const uint64_t length) const {
333 return length < binary_length_ && IsValidOffset(offset + length - 1);
334 }
335
336 // Calculate the number of bytes remaining from the given offset. If offset is
337 // > binary_length, 0 is returned.
RemainingBytes(const uint64_t offset)338 uint64_t RemainingBytes(const uint64_t offset) const {
339 return IsValidOffset(offset) ? binary_length_ - offset : 0;
340 }
341
342 template<typename T>
ReadScalar(const uint64_t offset)343 flatbuffers::Optional<T> ReadScalar(const uint64_t offset) const {
344 if (!IsValidRead<T>(offset)) { return flatbuffers::nullopt; }
345
346 return flatbuffers::ReadScalar<T>(binary_ + offset);
347 }
348
349 // Adds the provided `section` keyed by the `offset` it occurs at. If a
350 // section is already added at that offset, it doesn't replace the existing
351 // one.
AddSection(const uint64_t offset,const BinarySection & section)352 void AddSection(const uint64_t offset, const BinarySection §ion) {
353 sections_.insert(std::make_pair(offset, section));
354 }
355
IsInlineField(const reflection::Field * const field)356 bool IsInlineField(const reflection::Field *const field) {
357 if (field->type()->base_type() == reflection::BaseType::Obj) {
358 return schema_->objects()->Get(field->type()->index())->is_struct();
359 }
360 return IsScalar(field->type()->base_type());
361 }
362
IsUnionType(const reflection::BaseType type)363 bool IsUnionType(const reflection::BaseType type) {
364 return (type == reflection::BaseType::UType ||
365 type == reflection::BaseType::Union);
366 }
367
IsUnionType(const reflection::Field * const field)368 bool IsUnionType(const reflection::Field *const field) {
369 return IsUnionType(field->type()->base_type()) &&
370 field->type()->index() >= 0;
371 }
372
IsValidUnionValue(const reflection::Field * const field,const uint8_t value)373 bool IsValidUnionValue(const reflection::Field *const field,
374 const uint8_t value) {
375 return IsUnionType(field) &&
376 IsValidUnionValue(field->type()->index(), value);
377 }
378
IsValidUnionValue(const uint32_t enum_id,const uint8_t value)379 bool IsValidUnionValue(const uint32_t enum_id, const uint8_t value) {
380 if (enum_id >= schema_->enums()->size()) { return false; }
381
382 const reflection::Enum *enum_def = schema_->enums()->Get(enum_id);
383
384 if (enum_def == nullptr) { return false; }
385
386 return value < enum_def->values()->size();
387 }
388
GetElementSize(const reflection::Field * const field)389 uint64_t GetElementSize(const reflection::Field *const field) {
390 if (IsScalar(field->type()->element())) {
391 return GetTypeSize(field->type()->element());
392 }
393
394 switch (field->type()->element()) {
395 case reflection::BaseType::Obj: {
396 auto obj = schema_->objects()->Get(field->type()->index());
397 return obj->is_struct() ? obj->bytesize() : sizeof(uint32_t);
398 }
399 default: return sizeof(uint32_t);
400 }
401 }
402
403 bool ContainsSection(const uint64_t offset);
404
405 const reflection::Object *RootTable() const;
406
407 // The schema for the binary file
408 const uint8_t *bfbs_;
409 const uint64_t bfbs_length_;
410 const reflection::Schema *schema_;
411 const std::string root_table_;
412
413 // The binary data itself.
414 const uint8_t *binary_;
415 const uint64_t binary_length_;
416 const bool is_size_prefixed_;
417
418 // Map of binary offset to vtables, to dedupe vtables.
419 std::map<uint64_t, std::list<VTable>> vtables_;
420
421 // The annotated binary sections, index by their absolute offset.
422 std::map<uint64_t, BinarySection> sections_;
423 };
424
425 } // namespace flatbuffers
426
427 #endif // FLATBUFFERS_BINARY_ANNOTATOR_H_
428