• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
18 #define FLATBUFFERS_BINARY_ANNOTATOR_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <iomanip>
23 #include <ios>
24 #include <list>
25 #include <map>
26 #include <sstream>
27 #include <string>
28 #include <utility>
29 #include <vector>
30 
31 #include "flatbuffers/base.h"
32 #include "flatbuffers/reflection.h"
33 #include "flatbuffers/reflection_generated.h"
34 #include "flatbuffers/stl_emulation.h"
35 
36 namespace flatbuffers {
37 
38 enum class BinaryRegionType {
39   Unknown = 0,
40   UOffset = 1,
41   SOffset = 2,
42   VOffset = 3,
43   Bool = 4,
44   Byte = 5,
45   Char = 6,
46   Uint8 = 7,
47   Int8 = 8,
48   Uint16 = 9,
49   Int16 = 10,
50   Uint32 = 11,
51   Int32 = 12,
52   Uint64 = 13,
53   Int64 = 14,
54   Float = 15,
55   Double = 16,
56   UType = 17,
57   UOffset64 = 18,
58 };
59 
60 template<typename T>
61 static inline std::string ToHex(T i, size_t width = sizeof(T)) {
62   std::stringstream stream;
63   stream << std::hex << std::uppercase << std::setfill('0')
64          << std::setw(static_cast<int>(width)) << i;
65   return stream.str();
66 }
67 
68 // Specialized version for uint8_t that don't work well with std::hex.
ToHex(uint8_t i)69 static inline std::string ToHex(uint8_t i) {
70   return ToHex<int>(static_cast<int>(i), 2);
71 }
72 
73 enum class BinaryRegionStatus {
74   OK = 0,
75   WARN = 100,
76   WARN_NO_REFERENCES,
77   WARN_CORRUPTED_PADDING,
78   WARN_PADDING_LENGTH,
79   ERROR = 200,
80   // An offset is pointing outside the binary bounds.
81   ERROR_OFFSET_OUT_OF_BINARY,
82   // Expecting to read N bytes but not enough remain in the binary.
83   ERROR_INCOMPLETE_BINARY,
84   // When a length of a vtable/vector is longer than possible.
85   ERROR_LENGTH_TOO_LONG,
86   // When a length of a vtable/vector is shorter than possible.
87   ERROR_LENGTH_TOO_SHORT,
88   // A field mark required is not present in the vtable.
89   ERROR_REQUIRED_FIELD_NOT_PRESENT,
90   // A realized union type is not within the enum bounds.
91   ERROR_INVALID_UNION_TYPE,
92   // Occurs when there is a cycle in offsets.
93   ERROR_CYCLE_DETECTED,
94 };
95 
96 enum class BinaryRegionCommentType {
97   Unknown = 0,
98   SizePrefix,
99   // The offset to the root table.
100   RootTableOffset,
101   // The optional 4-char file identifier.
102   FileIdentifier,
103   // Generic 0-filled padding
104   Padding,
105   // The size of the vtable.
106   VTableSize,
107   // The size of the referring table.
108   VTableRefferingTableLength,
109   // Offsets to vtable fields.
110   VTableFieldOffset,
111   // Offsets to unknown vtable fields.
112   VTableUnknownFieldOffset,
113   // The vtable offset of a table.
114   TableVTableOffset,
115   // A "inline" table field value.
116   TableField,
117   // A table field that is unknown.
118   TableUnknownField,
119   // A table field value that points to another section.
120   TableOffsetField,
121   // A struct field value.
122   StructField,
123   // A array field value.
124   ArrayField,
125   // The length of the string.
126   StringLength,
127   // The string contents.
128   StringValue,
129   // The explicit string terminator.
130   StringTerminator,
131   // The length of the vector (# of items).
132   VectorLength,
133   // A "inline" value of a vector.
134   VectorValue,
135   // A vector value that points to another section.
136   VectorTableValue,
137   VectorStringValue,
138   VectorUnionValue,
139 };
140 
141 struct BinaryRegionComment {
142   BinaryRegionStatus status = BinaryRegionStatus::OK;
143 
144   // If status is non OK, this may be filled in with additional details.
145   std::string status_message;
146 
147   BinaryRegionCommentType type = BinaryRegionCommentType::Unknown;
148 
149   std::string name;
150 
151   std::string default_value;
152 
153   size_t index = 0;
154 };
155 
156 struct BinaryRegion {
157   // Offset into the binary where this region begins.
158   uint64_t offset = 0;
159 
160   // The length of this region in bytes.
161   uint64_t length = 0;
162 
163   // The underlying datatype of this region
164   BinaryRegionType type = BinaryRegionType::Unknown;
165 
166   // If `type` is an array/vector, this is the number of those types this region
167   // encompasses.
168   uint64_t array_length = 0;
169 
170   // If the is an offset to some other region, this is what it points to. The
171   // offset is relative to overall binary, not to this region.
172   uint64_t points_to_offset = 0;
173 
174   // The comment on the region.
175   BinaryRegionComment comment;
176 };
177 
178 enum class BinarySectionType {
179   Unknown = 0,
180   Header = 1,
181   Table = 2,
182   RootTable = 3,
183   VTable = 4,
184   Struct = 5,
185   String = 6,
186   Vector = 7,
187   Union = 8,
188   Padding = 9,
189   Vector64 = 10,
190 };
191 
192 // A section of the binary that is grouped together in some logical manner, and
193 // often is pointed too by some other offset BinaryRegion. Sections include
194 // `tables`, `vtables`, `strings`, `vectors`, etc..
195 struct BinarySection {
196   // User-specified name of the section, if applicable.
197   std::string name;
198 
199   // The type of this section.
200   BinarySectionType type = BinarySectionType::Unknown;
201 
202   // The binary regions that make up this section, in order of their offsets.
203   std::vector<BinaryRegion> regions;
204 };
205 
GetRegionType(reflection::BaseType base_type)206 inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
207   switch (base_type) {
208     case reflection::UType: return BinaryRegionType::UType;
209     case reflection::Bool: return BinaryRegionType::Uint8;
210     case reflection::Byte: return BinaryRegionType::Uint8;
211     case reflection::UByte: return BinaryRegionType::Uint8;
212     case reflection::Short: return BinaryRegionType::Int16;
213     case reflection::UShort: return BinaryRegionType::Uint16;
214     case reflection::Int: return BinaryRegionType::Uint32;
215     case reflection::UInt: return BinaryRegionType::Uint32;
216     case reflection::Long: return BinaryRegionType::Int64;
217     case reflection::ULong: return BinaryRegionType::Uint64;
218     case reflection::Float: return BinaryRegionType::Float;
219     case reflection::Double: return BinaryRegionType::Double;
220     default: return BinaryRegionType::Unknown;
221   }
222 }
223 
ToString(const BinaryRegionType type)224 inline static std::string ToString(const BinaryRegionType type) {
225   switch (type) {
226     case BinaryRegionType::UOffset: return "UOffset32";
227     case BinaryRegionType::UOffset64: return "UOffset64";
228     case BinaryRegionType::SOffset: return "SOffset32";
229     case BinaryRegionType::VOffset: return "VOffset16";
230     case BinaryRegionType::Bool: return "bool";
231     case BinaryRegionType::Char: return "char";
232     case BinaryRegionType::Byte: return "int8_t";
233     case BinaryRegionType::Uint8: return "uint8_t";
234     case BinaryRegionType::Uint16: return "uint16_t";
235     case BinaryRegionType::Uint32: return "uint32_t";
236     case BinaryRegionType::Uint64: return "uint64_t";
237     case BinaryRegionType::Int8: return "int8_t";
238     case BinaryRegionType::Int16: return "int16_t";
239     case BinaryRegionType::Int32: return "int32_t";
240     case BinaryRegionType::Int64: return "int64_t";
241     case BinaryRegionType::Double: return "double";
242     case BinaryRegionType::Float: return "float";
243     case BinaryRegionType::UType: return "UType8";
244     case BinaryRegionType::Unknown: return "?uint8_t";
245     default: return "todo";
246   }
247 }
248 
249 class BinaryAnnotator {
250  public:
BinaryAnnotator(const uint8_t * const bfbs,const uint64_t bfbs_length,const uint8_t * const binary,const uint64_t binary_length,const bool is_size_prefixed)251   explicit BinaryAnnotator(const uint8_t *const bfbs,
252                            const uint64_t bfbs_length,
253                            const uint8_t *const binary,
254                            const uint64_t binary_length,
255                            const bool is_size_prefixed)
256       : bfbs_(bfbs),
257         bfbs_length_(bfbs_length),
258         schema_(reflection::GetSchema(bfbs)),
259         root_table_(""),
260         binary_(binary),
261         binary_length_(binary_length),
262         is_size_prefixed_(is_size_prefixed) {}
263 
BinaryAnnotator(const reflection::Schema * schema,const std::string & root_table,const uint8_t * binary,uint64_t binary_length,bool is_size_prefixed)264   BinaryAnnotator(const reflection::Schema *schema,
265                   const std::string &root_table, const uint8_t *binary,
266                   uint64_t binary_length, bool is_size_prefixed)
267       : bfbs_(nullptr),
268         bfbs_length_(0),
269         schema_(schema),
270         root_table_(root_table),
271         binary_(binary),
272         binary_length_(binary_length),
273         is_size_prefixed_(is_size_prefixed) {}
274 
275   std::map<uint64_t, BinarySection> Annotate();
276 
277  private:
278   struct VTable {
279     struct Entry {
280       const reflection::Field *field = nullptr;
281       uint16_t offset_from_table = 0;
282     };
283 
284     const reflection::Object *referring_table = nullptr;
285 
286     // Field ID -> {field def, offset from table}
287     std::map<uint16_t, Entry> fields;
288 
289     uint16_t vtable_size = 0;
290     uint16_t table_size = 0;
291   };
292 
293   uint64_t BuildHeader(uint64_t offset);
294 
295   // VTables can be shared across instances or even across objects. This
296   // attempts to get an existing vtable given the offset and table type,
297   // otherwise it will built the vtable, memorize it, and return the built
298   // VTable. Returns nullptr if building the VTable fails.
299   VTable *GetOrBuildVTable(uint64_t offset, const reflection::Object *table,
300                            uint64_t offset_of_referring_table);
301 
302   void BuildTable(uint64_t offset, const BinarySectionType type,
303                   const reflection::Object *table);
304 
305   uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> &regions,
306                        const std::string referring_field_name,
307                        const reflection::Object *structure);
308 
309   void BuildString(uint64_t offset, const reflection::Object *table,
310                    const reflection::Field *field);
311 
312   void BuildVector(uint64_t offset, const reflection::Object *table,
313                    const reflection::Field *field, uint64_t parent_table_offset,
314                    const std::map<uint16_t, VTable::Entry> vtable_fields);
315 
316   std::string BuildUnion(uint64_t offset, uint8_t realized_type,
317                          const reflection::Field *field);
318 
319   void FixMissingRegions();
320   void FixMissingSections();
321 
IsValidOffset(const uint64_t offset)322   inline bool IsValidOffset(const uint64_t offset) const {
323     return offset < binary_length_;
324   }
325 
326   // Determines if performing a GetScalar request for `T` at `offset` would read
327   // passed the end of the binary.
IsValidRead(const uint64_t offset)328   template<typename T> inline bool IsValidRead(const uint64_t offset) const {
329     return IsValidRead(offset, sizeof(T));
330   }
331 
IsValidRead(const uint64_t offset,const uint64_t length)332   inline bool IsValidRead(const uint64_t offset, const uint64_t length) const {
333     return length < binary_length_ && IsValidOffset(offset + length - 1);
334   }
335 
336   // Calculate the number of bytes remaining from the given offset. If offset is
337   // > binary_length, 0 is returned.
RemainingBytes(const uint64_t offset)338   uint64_t RemainingBytes(const uint64_t offset) const {
339     return IsValidOffset(offset) ? binary_length_ - offset : 0;
340   }
341 
342   template<typename T>
ReadScalar(const uint64_t offset)343   flatbuffers::Optional<T> ReadScalar(const uint64_t offset) const {
344     if (!IsValidRead<T>(offset)) { return flatbuffers::nullopt; }
345 
346     return flatbuffers::ReadScalar<T>(binary_ + offset);
347   }
348 
349   // Adds the provided `section` keyed by the `offset` it occurs at. If a
350   // section is already added at that offset, it doesn't replace the existing
351   // one.
AddSection(const uint64_t offset,const BinarySection & section)352   void AddSection(const uint64_t offset, const BinarySection &section) {
353     sections_.insert(std::make_pair(offset, section));
354   }
355 
IsInlineField(const reflection::Field * const field)356   bool IsInlineField(const reflection::Field *const field) {
357     if (field->type()->base_type() == reflection::BaseType::Obj) {
358       return schema_->objects()->Get(field->type()->index())->is_struct();
359     }
360     return IsScalar(field->type()->base_type());
361   }
362 
IsUnionType(const reflection::BaseType type)363   bool IsUnionType(const reflection::BaseType type) {
364     return (type == reflection::BaseType::UType ||
365             type == reflection::BaseType::Union);
366   }
367 
IsUnionType(const reflection::Field * const field)368   bool IsUnionType(const reflection::Field *const field) {
369     return IsUnionType(field->type()->base_type()) &&
370            field->type()->index() >= 0;
371   }
372 
IsValidUnionValue(const reflection::Field * const field,const uint8_t value)373   bool IsValidUnionValue(const reflection::Field *const field,
374                          const uint8_t value) {
375     return IsUnionType(field) &&
376            IsValidUnionValue(field->type()->index(), value);
377   }
378 
IsValidUnionValue(const uint32_t enum_id,const uint8_t value)379   bool IsValidUnionValue(const uint32_t enum_id, const uint8_t value) {
380     if (enum_id >= schema_->enums()->size()) { return false; }
381 
382     const reflection::Enum *enum_def = schema_->enums()->Get(enum_id);
383 
384     if (enum_def == nullptr) { return false; }
385 
386     return value < enum_def->values()->size();
387   }
388 
GetElementSize(const reflection::Field * const field)389   uint64_t GetElementSize(const reflection::Field *const field) {
390     if (IsScalar(field->type()->element())) {
391       return GetTypeSize(field->type()->element());
392     }
393 
394     switch (field->type()->element()) {
395       case reflection::BaseType::Obj: {
396         auto obj = schema_->objects()->Get(field->type()->index());
397         return obj->is_struct() ? obj->bytesize() : sizeof(uint32_t);
398       }
399       default: return sizeof(uint32_t);
400     }
401   }
402 
403   bool ContainsSection(const uint64_t offset);
404 
405   const reflection::Object *RootTable() const;
406 
407   // The schema for the binary file
408   const uint8_t *bfbs_;
409   const uint64_t bfbs_length_;
410   const reflection::Schema *schema_;
411   const std::string root_table_;
412 
413   // The binary data itself.
414   const uint8_t *binary_;
415   const uint64_t binary_length_;
416   const bool is_size_prefixed_;
417 
418   // Map of binary offset to vtables, to dedupe vtables.
419   std::map<uint64_t, std::list<VTable>> vtables_;
420 
421   // The annotated binary sections, index by their absolute offset.
422   std::map<uint64_t, BinarySection> sections_;
423 };
424 
425 }  // namespace flatbuffers
426 
427 #endif  // FLATBUFFERS_BINARY_ANNOTATOR_H_
428