1 /*
2 * Copyright 2021 Google Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
18 #define FLATBUFFERS_BINARY_ANNOTATOR_H_
19
20 #include <list>
21 #include <map>
22 #include <string>
23 #include <vector>
24
25 #include "flatbuffers/base.h"
26 #include "flatbuffers/reflection.h"
27 #include "flatbuffers/stl_emulation.h"
28 #include "flatbuffers/util.h"
29
30 namespace flatbuffers {
31
32 enum class BinaryRegionType {
33 Unknown = 0,
34 UOffset = 1,
35 SOffset = 2,
36 VOffset = 3,
37 Bool = 4,
38 Byte = 5,
39 Char = 6,
40 Uint8 = 7,
41 Int8 = 8,
42 Uint16 = 9,
43 Int16 = 10,
44 Uint32 = 11,
45 Int32 = 12,
46 Uint64 = 13,
47 Int64 = 14,
48 Float = 15,
49 Double = 16,
50 UType = 17,
51 UOffset64 = 18,
52 };
53
54 template<typename T>
55 static inline std::string ToHex(T i, size_t width = sizeof(T)) {
56 std::stringstream stream;
57 stream << std::hex << std::uppercase << std::setfill('0')
58 << std::setw(static_cast<int>(width)) << i;
59 return stream.str();
60 }
61
62 // Specialized version for uint8_t that don't work well with std::hex.
ToHex(uint8_t i)63 static inline std::string ToHex(uint8_t i) {
64 return ToHex<int>(static_cast<int>(i), 2);
65 }
66
67 enum class BinaryRegionStatus {
68 OK = 0,
69 WARN = 100,
70 WARN_NO_REFERENCES,
71 WARN_CORRUPTED_PADDING,
72 WARN_PADDING_LENGTH,
73 ERROR = 200,
74 // An offset is pointing outside the binary bounds.
75 ERROR_OFFSET_OUT_OF_BINARY,
76 // Expecting to read N bytes but not enough remain in the binary.
77 ERROR_INCOMPLETE_BINARY,
78 // When a length of a vtable/vector is longer than possible.
79 ERROR_LENGTH_TOO_LONG,
80 // When a length of a vtable/vector is shorter than possible.
81 ERROR_LENGTH_TOO_SHORT,
82 // A field mark required is not present in the vtable.
83 ERROR_REQUIRED_FIELD_NOT_PRESENT,
84 // A realized union type is not within the enum bounds.
85 ERROR_INVALID_UNION_TYPE,
86 // Occurs when there is a cycle in offsets.
87 ERROR_CYCLE_DETECTED,
88 };
89
90 enum class BinaryRegionCommentType {
91 Unknown = 0,
92 SizePrefix,
93 // The offset to the root table.
94 RootTableOffset,
95 // The optional 4-char file identifier.
96 FileIdentifier,
97 // Generic 0-filled padding
98 Padding,
99 // The size of the vtable.
100 VTableSize,
101 // The size of the referring table.
102 VTableRefferingTableLength,
103 // Offsets to vtable fields.
104 VTableFieldOffset,
105 // Offsets to unknown vtable fields.
106 VTableUnknownFieldOffset,
107 // The vtable offset of a table.
108 TableVTableOffset,
109 // A "inline" table field value.
110 TableField,
111 // A table field that is unknown.
112 TableUnknownField,
113 // A table field value that points to another section.
114 TableOffsetField,
115 // A struct field value.
116 StructField,
117 // A array field value.
118 ArrayField,
119 // The length of the string.
120 StringLength,
121 // The string contents.
122 StringValue,
123 // The explicit string terminator.
124 StringTerminator,
125 // The length of the vector (# of items).
126 VectorLength,
127 // A "inline" value of a vector.
128 VectorValue,
129 // A vector value that points to another section.
130 VectorTableValue,
131 VectorStringValue,
132 VectorUnionValue,
133 };
134
135 struct BinaryRegionComment {
136 BinaryRegionStatus status = BinaryRegionStatus::OK;
137
138 // If status is non OK, this may be filled in with additional details.
139 std::string status_message;
140
141 BinaryRegionCommentType type = BinaryRegionCommentType::Unknown;
142
143 std::string name;
144
145 std::string default_value;
146
147 size_t index = 0;
148 };
149
150 struct BinaryRegion {
151 // Offset into the binary where this region begins.
152 uint64_t offset = 0;
153
154 // The length of this region in bytes.
155 uint64_t length = 0;
156
157 // The underlying datatype of this region
158 BinaryRegionType type = BinaryRegionType::Unknown;
159
160 // If `type` is an array/vector, this is the number of those types this region
161 // encompasses.
162 uint64_t array_length = 0;
163
164 // If the is an offset to some other region, this is what it points to. The
165 // offset is relative to overall binary, not to this region.
166 uint64_t points_to_offset = 0;
167
168 // The comment on the region.
169 BinaryRegionComment comment;
170 };
171
172 enum class BinarySectionType {
173 Unknown = 0,
174 Header = 1,
175 Table = 2,
176 RootTable = 3,
177 VTable = 4,
178 Struct = 5,
179 String = 6,
180 Vector = 7,
181 Union = 8,
182 Padding = 9,
183 Vector64 = 10,
184 };
185
186 // A section of the binary that is grouped together in some logical manner, and
187 // often is pointed too by some other offset BinaryRegion. Sections include
188 // `tables`, `vtables`, `strings`, `vectors`, etc..
189 struct BinarySection {
190 // User-specified name of the section, if applicable.
191 std::string name;
192
193 // The type of this section.
194 BinarySectionType type = BinarySectionType::Unknown;
195
196 // The binary regions that make up this section, in order of their offsets.
197 std::vector<BinaryRegion> regions;
198 };
199
GetRegionType(reflection::BaseType base_type)200 inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
201 switch (base_type) {
202 case reflection::UType: return BinaryRegionType::UType;
203 case reflection::Bool: return BinaryRegionType::Uint8;
204 case reflection::Byte: return BinaryRegionType::Uint8;
205 case reflection::UByte: return BinaryRegionType::Uint8;
206 case reflection::Short: return BinaryRegionType::Int16;
207 case reflection::UShort: return BinaryRegionType::Uint16;
208 case reflection::Int: return BinaryRegionType::Uint32;
209 case reflection::UInt: return BinaryRegionType::Uint32;
210 case reflection::Long: return BinaryRegionType::Int64;
211 case reflection::ULong: return BinaryRegionType::Uint64;
212 case reflection::Float: return BinaryRegionType::Float;
213 case reflection::Double: return BinaryRegionType::Double;
214 default: return BinaryRegionType::Unknown;
215 }
216 }
217
ToString(const BinaryRegionType type)218 inline static std::string ToString(const BinaryRegionType type) {
219 switch (type) {
220 case BinaryRegionType::UOffset: return "UOffset32";
221 case BinaryRegionType::UOffset64: return "UOffset64";
222 case BinaryRegionType::SOffset: return "SOffset32";
223 case BinaryRegionType::VOffset: return "VOffset16";
224 case BinaryRegionType::Bool: return "bool";
225 case BinaryRegionType::Char: return "char";
226 case BinaryRegionType::Byte: return "int8_t";
227 case BinaryRegionType::Uint8: return "uint8_t";
228 case BinaryRegionType::Uint16: return "uint16_t";
229 case BinaryRegionType::Uint32: return "uint32_t";
230 case BinaryRegionType::Uint64: return "uint64_t"; ;
231 case BinaryRegionType::Int8: return "int8_t";
232 case BinaryRegionType::Int16: return "int16_t";
233 case BinaryRegionType::Int32: return "int32_t";
234 case BinaryRegionType::Int64: return "int64_t";
235 case BinaryRegionType::Double: return "double";
236 case BinaryRegionType::Float: return "float";
237 case BinaryRegionType::UType: return "UType8";
238 case BinaryRegionType::Unknown: return "?uint8_t";
239 default: return "todo";
240 }
241 }
242
243 class BinaryAnnotator {
244 public:
BinaryAnnotator(const uint8_t * const bfbs,const uint64_t bfbs_length,const uint8_t * const binary,const uint64_t binary_length,const bool is_size_prefixed)245 explicit BinaryAnnotator(const uint8_t *const bfbs,
246 const uint64_t bfbs_length,
247 const uint8_t *const binary,
248 const uint64_t binary_length,
249 const bool is_size_prefixed)
250 : bfbs_(bfbs),
251 bfbs_length_(bfbs_length),
252 schema_(reflection::GetSchema(bfbs)),
253 binary_(binary),
254 binary_length_(binary_length),
255 is_size_prefixed_(is_size_prefixed) {}
256
257 std::map<uint64_t, BinarySection> Annotate();
258
259 private:
260 struct VTable {
261 struct Entry {
262 const reflection::Field *field = nullptr;
263 uint16_t offset_from_table = 0;
264 };
265
266 const reflection::Object *referring_table = nullptr;
267
268 // Field ID -> {field def, offset from table}
269 std::map<uint16_t, Entry> fields;
270
271 uint16_t vtable_size = 0;
272 uint16_t table_size = 0;
273 };
274
275 uint64_t BuildHeader(uint64_t offset);
276
277 // VTables can be shared across instances or even across objects. This
278 // attempts to get an existing vtable given the offset and table type,
279 // otherwise it will built the vtable, memorize it, and return the built
280 // VTable. Returns nullptr if building the VTable fails.
281 VTable *GetOrBuildVTable(uint64_t offset, const reflection::Object *table,
282 uint64_t offset_of_referring_table);
283
284 void BuildTable(uint64_t offset, const BinarySectionType type,
285 const reflection::Object *table);
286
287 uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> ®ions,
288 const std::string referring_field_name,
289 const reflection::Object *structure);
290
291 void BuildString(uint64_t offset, const reflection::Object *table,
292 const reflection::Field *field);
293
294 void BuildVector(uint64_t offset, const reflection::Object *table,
295 const reflection::Field *field, uint64_t parent_table_offset,
296 const std::map<uint16_t, VTable::Entry> vtable_fields);
297
298 std::string BuildUnion(uint64_t offset, uint8_t realized_type,
299 const reflection::Field *field);
300
301 void FixMissingRegions();
302 void FixMissingSections();
303
IsValidOffset(const uint64_t offset)304 inline bool IsValidOffset(const uint64_t offset) const {
305 return offset < binary_length_;
306 }
307
308 // Determines if performing a GetScalar request for `T` at `offset` would read
309 // passed the end of the binary.
IsValidRead(const uint64_t offset)310 template<typename T> inline bool IsValidRead(const uint64_t offset) const {
311 return IsValidRead(offset, sizeof(T));
312 }
313
IsValidRead(const uint64_t offset,const uint64_t length)314 inline bool IsValidRead(const uint64_t offset, const uint64_t length) const {
315 return length < binary_length_ && IsValidOffset(offset + length - 1);
316 }
317
318 // Calculate the number of bytes remaining from the given offset. If offset is
319 // > binary_length, 0 is returned.
RemainingBytes(const uint64_t offset)320 uint64_t RemainingBytes(const uint64_t offset) const {
321 return IsValidOffset(offset) ? binary_length_ - offset : 0;
322 }
323
324 template<typename T>
ReadScalar(const uint64_t offset)325 flatbuffers::Optional<T> ReadScalar(const uint64_t offset) const {
326 if (!IsValidRead<T>(offset)) { return flatbuffers::nullopt; }
327
328 return flatbuffers::ReadScalar<T>(binary_ + offset);
329 }
330
331 // Adds the provided `section` keyed by the `offset` it occurs at. If a
332 // section is already added at that offset, it doesn't replace the exisiting
333 // one.
AddSection(const uint64_t offset,const BinarySection & section)334 void AddSection(const uint64_t offset, const BinarySection §ion) {
335 sections_.insert(std::make_pair(offset, section));
336 }
337
IsInlineField(const reflection::Field * const field)338 bool IsInlineField(const reflection::Field *const field) {
339 if (field->type()->base_type() == reflection::BaseType::Obj) {
340 return schema_->objects()->Get(field->type()->index())->is_struct();
341 }
342 return IsScalar(field->type()->base_type());
343 }
344
IsUnionType(const reflection::BaseType type)345 bool IsUnionType(const reflection::BaseType type) {
346 return (type == reflection::BaseType::UType ||
347 type == reflection::BaseType::Union);
348 }
349
IsUnionType(const reflection::Field * const field)350 bool IsUnionType(const reflection::Field *const field) {
351 return IsUnionType(field->type()->base_type()) &&
352 field->type()->index() >= 0;
353 }
354
IsValidUnionValue(const reflection::Field * const field,const uint8_t value)355 bool IsValidUnionValue(const reflection::Field *const field,
356 const uint8_t value) {
357 return IsUnionType(field) &&
358 IsValidUnionValue(field->type()->index(), value);
359 }
360
IsValidUnionValue(const uint32_t enum_id,const uint8_t value)361 bool IsValidUnionValue(const uint32_t enum_id, const uint8_t value) {
362 if (enum_id >= schema_->enums()->size()) { return false; }
363
364 const reflection::Enum *enum_def = schema_->enums()->Get(enum_id);
365
366 if (enum_def == nullptr) { return false; }
367
368 return value < enum_def->values()->size();
369 }
370
GetElementSize(const reflection::Field * const field)371 uint64_t GetElementSize(const reflection::Field *const field) {
372 if (IsScalar(field->type()->element())) {
373 return GetTypeSize(field->type()->element());
374 }
375
376 switch (field->type()->element()) {
377 case reflection::BaseType::Obj: {
378 auto obj = schema_->objects()->Get(field->type()->index());
379 return obj->is_struct() ? obj->bytesize() : sizeof(uint32_t);
380 }
381 default: return sizeof(uint32_t);
382 }
383 }
384
385 bool ContainsSection(const uint64_t offset);
386
387 // The schema for the binary file
388 const uint8_t *bfbs_;
389 const uint64_t bfbs_length_;
390 const reflection::Schema *schema_;
391
392 // The binary data itself.
393 const uint8_t *binary_;
394 const uint64_t binary_length_;
395 const bool is_size_prefixed_;
396
397 // Map of binary offset to vtables, to dedupe vtables.
398 std::map<uint64_t, std::list<VTable>> vtables_;
399
400 // The annotated binary sections, index by their absolute offset.
401 std::map<uint64_t, BinarySection> sections_;
402 };
403
404 } // namespace flatbuffers
405
406 #endif // FLATBUFFERS_BINARY_ANNOTATOR_H_
407