• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef FLATBUFFERS_BINARY_ANNOTATOR_H_
18 #define FLATBUFFERS_BINARY_ANNOTATOR_H_
19 
20 #include <list>
21 #include <map>
22 #include <string>
23 #include <vector>
24 
25 #include "flatbuffers/base.h"
26 #include "flatbuffers/reflection.h"
27 #include "flatbuffers/stl_emulation.h"
28 #include "flatbuffers/util.h"
29 
30 namespace flatbuffers {
31 
32 enum class BinaryRegionType {
33   Unknown = 0,
34   UOffset = 1,
35   SOffset = 2,
36   VOffset = 3,
37   Bool = 4,
38   Byte = 5,
39   Char = 6,
40   Uint8 = 7,
41   Int8 = 8,
42   Uint16 = 9,
43   Int16 = 10,
44   Uint32 = 11,
45   Int32 = 12,
46   Uint64 = 13,
47   Int64 = 14,
48   Float = 15,
49   Double = 16,
50   UType = 17,
51   UOffset64 = 18,
52 };
53 
54 template<typename T>
55 static inline std::string ToHex(T i, size_t width = sizeof(T)) {
56   std::stringstream stream;
57   stream << std::hex << std::uppercase << std::setfill('0')
58          << std::setw(static_cast<int>(width)) << i;
59   return stream.str();
60 }
61 
62 // Specialized version for uint8_t that don't work well with std::hex.
ToHex(uint8_t i)63 static inline std::string ToHex(uint8_t i) {
64   return ToHex<int>(static_cast<int>(i), 2);
65 }
66 
67 enum class BinaryRegionStatus {
68   OK = 0,
69   WARN = 100,
70   WARN_NO_REFERENCES,
71   WARN_CORRUPTED_PADDING,
72   WARN_PADDING_LENGTH,
73   ERROR = 200,
74   // An offset is pointing outside the binary bounds.
75   ERROR_OFFSET_OUT_OF_BINARY,
76   // Expecting to read N bytes but not enough remain in the binary.
77   ERROR_INCOMPLETE_BINARY,
78   // When a length of a vtable/vector is longer than possible.
79   ERROR_LENGTH_TOO_LONG,
80   // When a length of a vtable/vector is shorter than possible.
81   ERROR_LENGTH_TOO_SHORT,
82   // A field mark required is not present in the vtable.
83   ERROR_REQUIRED_FIELD_NOT_PRESENT,
84   // A realized union type is not within the enum bounds.
85   ERROR_INVALID_UNION_TYPE,
86   // Occurs when there is a cycle in offsets.
87   ERROR_CYCLE_DETECTED,
88 };
89 
90 enum class BinaryRegionCommentType {
91   Unknown = 0,
92   SizePrefix,
93   // The offset to the root table.
94   RootTableOffset,
95   // The optional 4-char file identifier.
96   FileIdentifier,
97   // Generic 0-filled padding
98   Padding,
99   // The size of the vtable.
100   VTableSize,
101   // The size of the referring table.
102   VTableRefferingTableLength,
103   // Offsets to vtable fields.
104   VTableFieldOffset,
105   // Offsets to unknown vtable fields.
106   VTableUnknownFieldOffset,
107   // The vtable offset of a table.
108   TableVTableOffset,
109   // A "inline" table field value.
110   TableField,
111   // A table field that is unknown.
112   TableUnknownField,
113   // A table field value that points to another section.
114   TableOffsetField,
115   // A struct field value.
116   StructField,
117   // A array field value.
118   ArrayField,
119   // The length of the string.
120   StringLength,
121   // The string contents.
122   StringValue,
123   // The explicit string terminator.
124   StringTerminator,
125   // The length of the vector (# of items).
126   VectorLength,
127   // A "inline" value of a vector.
128   VectorValue,
129   // A vector value that points to another section.
130   VectorTableValue,
131   VectorStringValue,
132   VectorUnionValue,
133 };
134 
135 struct BinaryRegionComment {
136   BinaryRegionStatus status = BinaryRegionStatus::OK;
137 
138   // If status is non OK, this may be filled in with additional details.
139   std::string status_message;
140 
141   BinaryRegionCommentType type = BinaryRegionCommentType::Unknown;
142 
143   std::string name;
144 
145   std::string default_value;
146 
147   size_t index = 0;
148 };
149 
150 struct BinaryRegion {
151   // Offset into the binary where this region begins.
152   uint64_t offset = 0;
153 
154   // The length of this region in bytes.
155   uint64_t length = 0;
156 
157   // The underlying datatype of this region
158   BinaryRegionType type = BinaryRegionType::Unknown;
159 
160   // If `type` is an array/vector, this is the number of those types this region
161   // encompasses.
162   uint64_t array_length = 0;
163 
164   // If the is an offset to some other region, this is what it points to. The
165   // offset is relative to overall binary, not to this region.
166   uint64_t points_to_offset = 0;
167 
168   // The comment on the region.
169   BinaryRegionComment comment;
170 };
171 
172 enum class BinarySectionType {
173   Unknown = 0,
174   Header = 1,
175   Table = 2,
176   RootTable = 3,
177   VTable = 4,
178   Struct = 5,
179   String = 6,
180   Vector = 7,
181   Union = 8,
182   Padding = 9,
183   Vector64 = 10,
184 };
185 
186 // A section of the binary that is grouped together in some logical manner, and
187 // often is pointed too by some other offset BinaryRegion. Sections include
188 // `tables`, `vtables`, `strings`, `vectors`, etc..
189 struct BinarySection {
190   // User-specified name of the section, if applicable.
191   std::string name;
192 
193   // The type of this section.
194   BinarySectionType type = BinarySectionType::Unknown;
195 
196   // The binary regions that make up this section, in order of their offsets.
197   std::vector<BinaryRegion> regions;
198 };
199 
GetRegionType(reflection::BaseType base_type)200 inline static BinaryRegionType GetRegionType(reflection::BaseType base_type) {
201   switch (base_type) {
202     case reflection::UType: return BinaryRegionType::UType;
203     case reflection::Bool: return BinaryRegionType::Uint8;
204     case reflection::Byte: return BinaryRegionType::Uint8;
205     case reflection::UByte: return BinaryRegionType::Uint8;
206     case reflection::Short: return BinaryRegionType::Int16;
207     case reflection::UShort: return BinaryRegionType::Uint16;
208     case reflection::Int: return BinaryRegionType::Uint32;
209     case reflection::UInt: return BinaryRegionType::Uint32;
210     case reflection::Long: return BinaryRegionType::Int64;
211     case reflection::ULong: return BinaryRegionType::Uint64;
212     case reflection::Float: return BinaryRegionType::Float;
213     case reflection::Double: return BinaryRegionType::Double;
214     default: return BinaryRegionType::Unknown;
215   }
216 }
217 
ToString(const BinaryRegionType type)218 inline static std::string ToString(const BinaryRegionType type) {
219   switch (type) {
220     case BinaryRegionType::UOffset: return "UOffset32";
221     case BinaryRegionType::UOffset64: return "UOffset64";
222     case BinaryRegionType::SOffset: return "SOffset32";
223     case BinaryRegionType::VOffset: return "VOffset16";
224     case BinaryRegionType::Bool: return "bool";
225     case BinaryRegionType::Char: return "char";
226     case BinaryRegionType::Byte: return "int8_t";
227     case BinaryRegionType::Uint8: return "uint8_t";
228     case BinaryRegionType::Uint16: return "uint16_t";
229     case BinaryRegionType::Uint32: return "uint32_t";
230     case BinaryRegionType::Uint64: return "uint64_t"; ;
231     case BinaryRegionType::Int8: return "int8_t";
232     case BinaryRegionType::Int16: return "int16_t";
233     case BinaryRegionType::Int32: return "int32_t";
234     case BinaryRegionType::Int64: return "int64_t";
235     case BinaryRegionType::Double: return "double";
236     case BinaryRegionType::Float: return "float";
237     case BinaryRegionType::UType: return "UType8";
238     case BinaryRegionType::Unknown: return "?uint8_t";
239     default: return "todo";
240   }
241 }
242 
243 class BinaryAnnotator {
244  public:
BinaryAnnotator(const uint8_t * const bfbs,const uint64_t bfbs_length,const uint8_t * const binary,const uint64_t binary_length,const bool is_size_prefixed)245   explicit BinaryAnnotator(const uint8_t *const bfbs,
246                            const uint64_t bfbs_length,
247                            const uint8_t *const binary,
248                            const uint64_t binary_length,
249                            const bool is_size_prefixed)
250       : bfbs_(bfbs),
251         bfbs_length_(bfbs_length),
252         schema_(reflection::GetSchema(bfbs)),
253         binary_(binary),
254         binary_length_(binary_length),
255         is_size_prefixed_(is_size_prefixed) {}
256 
257   std::map<uint64_t, BinarySection> Annotate();
258 
259  private:
260   struct VTable {
261     struct Entry {
262       const reflection::Field *field = nullptr;
263       uint16_t offset_from_table = 0;
264     };
265 
266     const reflection::Object *referring_table = nullptr;
267 
268     // Field ID -> {field def, offset from table}
269     std::map<uint16_t, Entry> fields;
270 
271     uint16_t vtable_size = 0;
272     uint16_t table_size = 0;
273   };
274 
275   uint64_t BuildHeader(uint64_t offset);
276 
277   // VTables can be shared across instances or even across objects. This
278   // attempts to get an existing vtable given the offset and table type,
279   // otherwise it will built the vtable, memorize it, and return the built
280   // VTable. Returns nullptr if building the VTable fails.
281   VTable *GetOrBuildVTable(uint64_t offset, const reflection::Object *table,
282                            uint64_t offset_of_referring_table);
283 
284   void BuildTable(uint64_t offset, const BinarySectionType type,
285                   const reflection::Object *table);
286 
287   uint64_t BuildStruct(uint64_t offset, std::vector<BinaryRegion> &regions,
288                        const std::string referring_field_name,
289                        const reflection::Object *structure);
290 
291   void BuildString(uint64_t offset, const reflection::Object *table,
292                    const reflection::Field *field);
293 
294   void BuildVector(uint64_t offset, const reflection::Object *table,
295                    const reflection::Field *field, uint64_t parent_table_offset,
296                    const std::map<uint16_t, VTable::Entry> vtable_fields);
297 
298   std::string BuildUnion(uint64_t offset, uint8_t realized_type,
299                          const reflection::Field *field);
300 
301   void FixMissingRegions();
302   void FixMissingSections();
303 
IsValidOffset(const uint64_t offset)304   inline bool IsValidOffset(const uint64_t offset) const {
305     return offset < binary_length_;
306   }
307 
308   // Determines if performing a GetScalar request for `T` at `offset` would read
309   // passed the end of the binary.
IsValidRead(const uint64_t offset)310   template<typename T> inline bool IsValidRead(const uint64_t offset) const {
311     return IsValidRead(offset, sizeof(T));
312   }
313 
IsValidRead(const uint64_t offset,const uint64_t length)314   inline bool IsValidRead(const uint64_t offset, const uint64_t length) const {
315     return length < binary_length_ && IsValidOffset(offset + length - 1);
316   }
317 
318   // Calculate the number of bytes remaining from the given offset. If offset is
319   // > binary_length, 0 is returned.
RemainingBytes(const uint64_t offset)320   uint64_t RemainingBytes(const uint64_t offset) const {
321     return IsValidOffset(offset) ? binary_length_ - offset : 0;
322   }
323 
324   template<typename T>
ReadScalar(const uint64_t offset)325   flatbuffers::Optional<T> ReadScalar(const uint64_t offset) const {
326     if (!IsValidRead<T>(offset)) { return flatbuffers::nullopt; }
327 
328     return flatbuffers::ReadScalar<T>(binary_ + offset);
329   }
330 
331   // Adds the provided `section` keyed by the `offset` it occurs at. If a
332   // section is already added at that offset, it doesn't replace the exisiting
333   // one.
AddSection(const uint64_t offset,const BinarySection & section)334   void AddSection(const uint64_t offset, const BinarySection &section) {
335     sections_.insert(std::make_pair(offset, section));
336   }
337 
IsInlineField(const reflection::Field * const field)338   bool IsInlineField(const reflection::Field *const field) {
339     if (field->type()->base_type() == reflection::BaseType::Obj) {
340       return schema_->objects()->Get(field->type()->index())->is_struct();
341     }
342     return IsScalar(field->type()->base_type());
343   }
344 
IsUnionType(const reflection::BaseType type)345   bool IsUnionType(const reflection::BaseType type) {
346     return (type == reflection::BaseType::UType ||
347             type == reflection::BaseType::Union);
348   }
349 
IsUnionType(const reflection::Field * const field)350   bool IsUnionType(const reflection::Field *const field) {
351     return IsUnionType(field->type()->base_type()) &&
352            field->type()->index() >= 0;
353   }
354 
IsValidUnionValue(const reflection::Field * const field,const uint8_t value)355   bool IsValidUnionValue(const reflection::Field *const field,
356                          const uint8_t value) {
357     return IsUnionType(field) &&
358            IsValidUnionValue(field->type()->index(), value);
359   }
360 
IsValidUnionValue(const uint32_t enum_id,const uint8_t value)361   bool IsValidUnionValue(const uint32_t enum_id, const uint8_t value) {
362     if (enum_id >= schema_->enums()->size()) { return false; }
363 
364     const reflection::Enum *enum_def = schema_->enums()->Get(enum_id);
365 
366     if (enum_def == nullptr) { return false; }
367 
368     return value < enum_def->values()->size();
369   }
370 
GetElementSize(const reflection::Field * const field)371   uint64_t GetElementSize(const reflection::Field *const field) {
372     if (IsScalar(field->type()->element())) {
373       return GetTypeSize(field->type()->element());
374     }
375 
376     switch (field->type()->element()) {
377       case reflection::BaseType::Obj: {
378         auto obj = schema_->objects()->Get(field->type()->index());
379         return obj->is_struct() ? obj->bytesize() : sizeof(uint32_t);
380       }
381       default: return sizeof(uint32_t);
382     }
383   }
384 
385   bool ContainsSection(const uint64_t offset);
386 
387   // The schema for the binary file
388   const uint8_t *bfbs_;
389   const uint64_t bfbs_length_;
390   const reflection::Schema *schema_;
391 
392   // The binary data itself.
393   const uint8_t *binary_;
394   const uint64_t binary_length_;
395   const bool is_size_prefixed_;
396 
397   // Map of binary offset to vtables, to dedupe vtables.
398   std::map<uint64_t, std::list<VTable>> vtables_;
399 
400   // The annotated binary sections, index by their absolute offset.
401   std::map<uint64_t, BinarySection> sections_;
402 };
403 
404 }  // namespace flatbuffers
405 
406 #endif  // FLATBUFFERS_BINARY_ANNOTATOR_H_
407