• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
19 
20 #include <stdint.h>
21 #include <array>
22 #include <memory>
23 #include <vector>
24 
25 #include "perfetto/base/compiler.h"
26 #include "perfetto/base/logging.h"
27 #include "perfetto/protozero/field.h"
28 #include "perfetto/protozero/proto_utils.h"
29 
30 namespace protozero {
31 
32 // A generic protobuf decoder. Doesn't require any knowledge about the proto
33 // schema. It tokenizes fields, retrieves their ID and type and exposes
34 // accessors to retrieve its values.
35 // It does NOT recurse in nested submessages, instead it just computes their
36 // boundaries, recursion is left to the caller.
37 // This class is designed to be used in perf-sensitive contexts. It does not
38 // allocate and does not perform any proto semantic checks (e.g. repeated /
39 // required / optional). It's supposedly safe wrt out-of-bounds memory accesses
40 // (see proto_decoder_fuzzer.cc).
41 // This class serves also as a building block for TypedProtoDecoder, used when
42 // the schema is known at compile time.
43 class PERFETTO_EXPORT_COMPONENT ProtoDecoder {
44  public:
45   // Creates a ProtoDecoder using the given |buffer| with size |length| bytes.
ProtoDecoder(const void * buffer,size_t length)46   ProtoDecoder(const void* buffer, size_t length)
47       : begin_(reinterpret_cast<const uint8_t*>(buffer)),
48         end_(begin_ + length),
49         read_ptr_(begin_) {}
ProtoDecoder(const std::string & str)50   ProtoDecoder(const std::string& str) : ProtoDecoder(str.data(), str.size()) {}
ProtoDecoder(const ConstBytes & cb)51   ProtoDecoder(const ConstBytes& cb) : ProtoDecoder(cb.data, cb.size) {}
52 
53   // Reads the next field from the buffer and advances the read cursor. If a
54   // full field cannot be read, the returned Field will be invalid (i.e.
55   // field.valid() == false).
56   Field ReadField();
57 
58   // Finds the first field with the given id. Doesn't affect the read cursor.
59   Field FindField(uint32_t field_id);
60 
61   // Resets the read cursor to the start of the buffer.
Reset()62   void Reset() { read_ptr_ = begin_; }
63 
64   // Resets the read cursor to the given position (must be within the buffer).
Reset(const uint8_t * pos)65   void Reset(const uint8_t* pos) {
66     PERFETTO_DCHECK(pos >= begin_ && pos < end_);
67     read_ptr_ = pos;
68   }
69 
70   // Returns the position of read cursor, relative to the start of the buffer.
read_offset()71   size_t read_offset() const { return static_cast<size_t>(read_ptr_ - begin_); }
72 
bytes_left()73   size_t bytes_left() const {
74     PERFETTO_DCHECK(read_ptr_ <= end_);
75     return static_cast<size_t>(end_ - read_ptr_);
76   }
77 
begin()78   const uint8_t* begin() const { return begin_; }
end()79   const uint8_t* end() const { return end_; }
80 
81  protected:
82   const uint8_t* const begin_;
83   const uint8_t* const end_;
84   const uint8_t* read_ptr_ = nullptr;
85 };
86 
87 // An iterator-like class used to iterate through repeated fields. Used by
88 // TypedProtoDecoder. The iteration sequence is a bit counter-intuitive due to
89 // the fact that fields_[field_id] holds the *last* value of the field, not the
90 // first, but the remaining storage holds repeated fields in FIFO order.
91 // Assume that we push the 10,11,12 into a repeated field with ID=1.
92 //
93 // Decoder memory layout:  [  fields storage  ] [ repeated fields storage ]
94 // 1st iteration:           10
95 // 2nd iteration:           11                   10
96 // 3rd iteration:           12                   10 11
97 //
98 // We start the iteration @ fields_[num_fields], which is the start of the
99 // repeated fields storage, proceed until the end and lastly jump @ fields_[id].
100 template <typename T>
101 class RepeatedFieldIterator {
102  public:
RepeatedFieldIterator(uint32_t field_id,const Field * begin,const Field * end,const Field * last)103   RepeatedFieldIterator(uint32_t field_id,
104                         const Field* begin,
105                         const Field* end,
106                         const Field* last)
107       : field_id_(field_id), iter_(begin), end_(end), last_(last) {
108     FindNextMatchingId();
109   }
110 
111   // Constructs an invalid iterator.
RepeatedFieldIterator()112   RepeatedFieldIterator()
113       : field_id_(0u), iter_(nullptr), end_(nullptr), last_(nullptr) {}
114 
115   explicit operator bool() const { return iter_ != end_; }
field()116   const Field& field() const { return *iter_; }
117 
118   T operator*() const {
119     T val{};
120     iter_->get(&val);
121     return val;
122   }
123   const Field* operator->() const { return iter_; }
124 
125   RepeatedFieldIterator& operator++() {
126     PERFETTO_DCHECK(iter_ != end_);
127     if (iter_ == last_) {
128       iter_ = end_;
129       return *this;
130     }
131     ++iter_;
132     FindNextMatchingId();
133     return *this;
134   }
135 
136   RepeatedFieldIterator operator++(int) {
137     PERFETTO_DCHECK(iter_ != end_);
138     RepeatedFieldIterator it(*this);
139     ++(*this);
140     return it;
141   }
142 
143  private:
FindNextMatchingId()144   void FindNextMatchingId() {
145     PERFETTO_DCHECK(iter_ != last_);
146     for (; iter_ != end_; ++iter_) {
147       if (iter_->id() == field_id_)
148         return;
149     }
150     iter_ = last_->valid() ? last_ : end_;
151   }
152 
153   uint32_t field_id_;
154 
155   // Initially points to the beginning of the repeated field storage, then is
156   // incremented as we call operator++().
157   const Field* iter_;
158 
159   // Always points to fields_[size_], i.e. past the end of the storage.
160   const Field* end_;
161 
162   // Always points to fields_[field_id].
163   const Field* last_;
164 };
165 
166 // As RepeatedFieldIterator, but allows iterating over a packed repeated field
167 // (which will be initially stored as a single length-delimited field).
168 // See |GetPackedRepeatedField| for details.
169 //
170 // Assumes little endianness, and that the input buffers are well formed -
171 // containing an exact multiple of encoded elements.
172 template <proto_utils::ProtoWireType wire_type, typename CppType>
173 class PackedRepeatedFieldIterator {
174  public:
PackedRepeatedFieldIterator(const uint8_t * data_begin,size_t size,bool * parse_error_ptr)175   PackedRepeatedFieldIterator(const uint8_t* data_begin,
176                               size_t size,
177                               bool* parse_error_ptr)
178       : data_end_(data_begin ? data_begin + size : nullptr),
179         read_ptr_(data_begin),
180         parse_error_(parse_error_ptr) {
181     using proto_utils::ProtoWireType;
182     static_assert(wire_type == ProtoWireType::kVarInt ||
183                       wire_type == ProtoWireType::kFixed32 ||
184                       wire_type == ProtoWireType::kFixed64,
185                   "invalid type");
186 
187     PERFETTO_DCHECK(parse_error_ptr);
188 
189     // Either the field is unset (and there are no data pointer), or the field
190     // is set with a zero length payload. Mark the iterator as invalid in both
191     // cases.
192     if (size == 0) {
193       curr_value_valid_ = false;
194       return;
195     }
196 
197     if ((wire_type == ProtoWireType::kFixed32 && (size % 4) != 0) ||
198         (wire_type == ProtoWireType::kFixed64 && (size % 8) != 0)) {
199       *parse_error_ = true;
200       curr_value_valid_ = false;
201       return;
202     }
203 
204     ++(*this);
205   }
206 
207   const CppType operator*() const { return curr_value_; }
208   explicit operator bool() const { return curr_value_valid_; }
209 
210   PackedRepeatedFieldIterator& operator++() {
211     using proto_utils::ProtoWireType;
212 
213     if (PERFETTO_UNLIKELY(!curr_value_valid_))
214       return *this;
215 
216     if (PERFETTO_UNLIKELY(read_ptr_ == data_end_)) {
217       curr_value_valid_ = false;
218       return *this;
219     }
220 
221     if (wire_type == ProtoWireType::kVarInt) {
222       uint64_t new_value = 0;
223       const uint8_t* new_pos =
224           proto_utils::ParseVarInt(read_ptr_, data_end_, &new_value);
225 
226       if (PERFETTO_UNLIKELY(new_pos == read_ptr_)) {
227         // Failed to decode the varint (probably incomplete buffer).
228         *parse_error_ = true;
229         curr_value_valid_ = false;
230       } else {
231         read_ptr_ = new_pos;
232         curr_value_ = static_cast<CppType>(new_value);
233       }
234     } else {  // kFixed32 or kFixed64
235       constexpr size_t kStep = wire_type == ProtoWireType::kFixed32 ? 4 : 8;
236 
237       // NB: the raw buffer is not guaranteed to be aligned, so neither are
238       // these copies.
239       memcpy(&curr_value_, read_ptr_, sizeof(CppType));
240       read_ptr_ += kStep;
241     }
242 
243     return *this;
244   }
245 
246   PackedRepeatedFieldIterator operator++(int) {
247     PackedRepeatedFieldIterator it(*this);
248     ++(*this);
249     return it;
250   }
251 
252  private:
253   // Might be null if the backing proto field isn't set.
254   const uint8_t* const data_end_;
255 
256   // The iterator looks ahead by an element, so |curr_value| holds the value
257   // to be returned when the caller dereferences the iterator, and |read_ptr_|
258   // points at the start of the next element to be decoded.
259   // |read_ptr_| might be null if the backing proto field isn't set.
260   const uint8_t* read_ptr_;
261   CppType curr_value_ = {};
262 
263   // Set to false once we've exhausted the iterator, or encountered an error.
264   bool curr_value_valid_ = true;
265 
266   // Where to set parsing errors, supplied by the caller.
267   bool* const parse_error_;
268 };
269 
270 // This decoder loads all fields upfront, without recursing in nested messages.
271 // It is used as a base class for typed decoders generated by the pbzero plugin.
272 // The split between TypedProtoDecoderBase and TypedProtoDecoder<> is to have
273 // unique definition of functions like ParseAllFields() and ExpandHeapStorage().
274 // The storage (either on-stack or on-heap) for this class is organized as
275 // follows:
276 // |-------------------------- fields_ ----------------------|
277 // [ field 0 (invalid) ] [ fields 1 .. N ] [ repeated fields ]
278 //                                        ^                  ^
279 //                                        num_fields_        size_
280 // Note that if a message has high field numbers, upon creation |size_| can be
281 // < |num_fields_| (until a heap expansion is hit while inserting).
282 class PERFETTO_EXPORT_COMPONENT TypedProtoDecoderBase : public ProtoDecoder {
283  public:
284   // If the field |id| is known at compile time, prefer the templated
285   // specialization at<kFieldNumber>().
Get(uint32_t id)286   const Field& Get(uint32_t id) const {
287     if (PERFETTO_LIKELY(id < num_fields_ && id < size_))
288       return fields_[id];
289     // If id >= num_fields_, the field id is invalid (was not known in the
290     // .proto) and we return the 0th field, which is always !valid().
291     // If id >= size_ and <= num_fields, the id is valid but the field has not
292     // been seen while decoding (hence the stack storage has not been expanded)
293     // so we return the 0th invalid field.
294     return fields_[0];
295   }
296 
297   // Returns an object that allows to iterate over all instances of a repeated
298   // field given its id. Example usage:
299   //   for (auto it = decoder.GetRepeated<int32_t>(N); it; ++it) { ... }
300   template <typename T>
GetRepeated(uint32_t field_id)301   RepeatedFieldIterator<T> GetRepeated(uint32_t field_id) const {
302     const Field* repeated_begin;
303     // The storage for repeated fields starts after the slot for the highest
304     // field id (refer to the diagram in the class-level comment). However, if
305     // a message has more than INITIAL_STACK_CAPACITY field there will be no
306     // slots available for the repeated fields (if ExpandHeapStorage() was not
307     // called). Imagine a message that has highest field id = 102 and that is
308     // still using the stack:
309     // [ F0 ] [ F1 ] ... [ F100 ] [ F101 ] [ F1012] [ repeated fields ]
310     //                                            ^ num_fields_
311     //                          ^ size (== capacity)
312     if (PERFETTO_LIKELY(num_fields_ < size_)) {
313       repeated_begin = &fields_[num_fields_];
314     } else {
315       // This is the case of not having any storage space for repeated fields.
316       // This makes it so begin == end, so the iterator will just skip @ last.
317       repeated_begin = &fields_[size_];
318     }
319     const Field* repeated_end = &fields_[size_];
320     const Field* last = &Get(field_id);
321     return RepeatedFieldIterator<T>(field_id, repeated_begin, repeated_end,
322                                     last);
323   }
324 
325   // Returns an objects that allows to iterate over all entries of a packed
326   // repeated field given its id and type. The |wire_type| is necessary for
327   // decoding the packed field, the |cpp_type| is for convenience & stronger
328   // typing.
329   //
330   // The caller must also supply a pointer to a bool that is set to true if the
331   // packed buffer is found to be malformed while iterating (so you need to
332   // exhaust the iterator if you want to check the full extent of the buffer).
333   //
334   // Note that unlike standard protobuf parsers, protozero does not allow
335   // treating of packed repeated fields as non-packed and vice-versa (therefore
336   // not making the packed option forwards and backwards compatible). So
337   // the caller needs to use the right accessor for correct results.
338   template <proto_utils::ProtoWireType wire_type, typename cpp_type>
GetPackedRepeated(uint32_t field_id,bool * parse_error_location)339   PackedRepeatedFieldIterator<wire_type, cpp_type> GetPackedRepeated(
340       uint32_t field_id,
341       bool* parse_error_location) const {
342     const Field& field = Get(field_id);
343     if (field.valid() &&
344         field.type() == proto_utils::ProtoWireType::kLengthDelimited) {
345       return PackedRepeatedFieldIterator<wire_type, cpp_type>(
346           field.data(), field.size(), parse_error_location);
347     }
348     return PackedRepeatedFieldIterator<wire_type, cpp_type>(
349         nullptr, 0, parse_error_location);
350   }
351 
352  protected:
TypedProtoDecoderBase(Field * storage,uint32_t num_fields,uint32_t capacity,const uint8_t * buffer,size_t length)353   TypedProtoDecoderBase(Field* storage,
354                         uint32_t num_fields,
355                         uint32_t capacity,
356                         const uint8_t* buffer,
357                         size_t length)
358       : ProtoDecoder(buffer, length),
359         fields_(storage),
360         num_fields_(num_fields),
361         // The reason for "capacity -1" is to avoid hitting the expansion path
362         // in TypedProtoDecoderBase::ParseAllFields() when we are just setting
363         // fields < INITIAL_STACK_CAPACITY (which is the most common case).
364         size_(std::min(num_fields, capacity - 1)),
365         capacity_(capacity) {
366     // The reason why Field needs to be trivially de/constructible is to avoid
367     // implicit initializers on all the ~1000 entries. We need it to initialize
368     // only on the first |max_field_id| fields, the remaining capacity doesn't
369     // require initialization.
370     static_assert(std::is_trivially_constructible<Field>::value &&
371                       std::is_trivially_destructible<Field>::value &&
372                       std::is_trivial<Field>::value,
373                   "Field must be a trivial aggregate type");
374     memset(fields_, 0, sizeof(Field) * capacity_);
375     PERFETTO_DCHECK(capacity > 0);
376   }
377 
378   void ParseAllFields();
379 
380   // Called when the default on-stack storage is exhausted and new repeated
381   // fields need to be pushed.
382   void ExpandHeapStorage();
383 
384   // Used only in presence of a large number of repeated fields, when the
385   // default on-stack storage is exhausted.
386   std::unique_ptr<Field[]> heap_storage_;
387 
388   // Points to the storage, either on-stack (default, provided by the template
389   // specialization) or |heap_storage_| after ExpandHeapStorage() is called, in
390   // case of a large number of repeated fields.
391   Field* fields_;
392 
393   // Number of known fields, without accounting repeated storage. This is equal
394   // to MAX_FIELD_ID + 1 (to account for the invalid 0th field). It never
395   // changes after construction.
396   // This is unrelated with |size_| and |capacity_|. If the highest field id of
397   // a proto message is 131, |num_fields_| will be = 132 but, on initialization,
398   // |size_| = |capacity_| = 100 (INITIAL_STACK_CAPACITY).
399   // One cannot generally assume that |fields_| has enough storage to
400   // dereference every field. That is only true:
401   // - For field ids < INITIAL_STACK_CAPACITY.
402   // - After the first call to ExpandHeapStorage().
403   uint32_t num_fields_;
404 
405   // Number of active |fields_| entries. This is initially equal to
406   // min(num_fields_, INITIAL_STACK_CAPACITY - 1) and after ExpandHeapStorage()
407   // becomes == |num_fields_|. If the message has non-packed repeated fields, it
408   // can grow further, up to |capacity_|.
409   // |size_| is always <= |capacity_|. But |num_fields_| can be > |size_|.
410   uint32_t size_;
411 
412   // Initially equal to kFieldsCapacity of the TypedProtoDecoder
413   // specialization. Can grow when falling back on heap-based storage, in which
414   // case it represents the size (#fields with each entry of a repeated field
415   // counted individually) of the |heap_storage_| array.
416   uint32_t capacity_;
417 };
418 
419 // This constant is a tradeoff between having a larger stack frame and being
420 // able to decode field IDs up to N (or N - num_fields repeated fields) without
421 // falling back on the heap.
422 #define PROTOZERO_DECODER_INITIAL_STACK_CAPACITY 100
423 
424 // Template class instantiated by the auto-generated decoder classes declared in
425 // xxx.pbzero.h files.
426 template <int MAX_FIELD_ID, bool HAS_NONPACKED_REPEATED_FIELDS>
427 class TypedProtoDecoder : public TypedProtoDecoderBase {
428  public:
TypedProtoDecoder(const uint8_t * buffer,size_t length)429   TypedProtoDecoder(const uint8_t* buffer, size_t length)
430       : TypedProtoDecoderBase(on_stack_storage_,
431                               /*num_fields=*/MAX_FIELD_ID + 1,
432                               PROTOZERO_DECODER_INITIAL_STACK_CAPACITY,
433                               buffer,
434                               length) {
435     TypedProtoDecoderBase::ParseAllFields();
436   }
437 
438   template <uint32_t FIELD_ID>
at()439   const Field& at() const {
440     static_assert(FIELD_ID <= MAX_FIELD_ID, "FIELD_ID > MAX_FIELD_ID");
441     // If the field id is < the on-stack capacity, it's safe to always
442     // dereference |fields_|, whether it's still using the stack or it fell
443     // back on the heap. Because both terms of the if () are known at compile
444     // time, the compiler elides the branch for ids < INITIAL_STACK_CAPACITY.
445     if (FIELD_ID < PROTOZERO_DECODER_INITIAL_STACK_CAPACITY) {
446       return fields_[FIELD_ID];
447     } else {
448       // Otherwise use the slowpath Get() which will do a runtime check.
449       return Get(FIELD_ID);
450     }
451   }
452 
TypedProtoDecoder(TypedProtoDecoder && other)453   TypedProtoDecoder(TypedProtoDecoder&& other) noexcept
454       : TypedProtoDecoderBase(std::move(other)) {
455     // If the moved-from decoder was using on-stack storage, we need to update
456     // our pointer to point to this decoder's on-stack storage.
457     if (fields_ == other.on_stack_storage_) {
458       fields_ = on_stack_storage_;
459       memcpy(on_stack_storage_, other.on_stack_storage_,
460              sizeof(on_stack_storage_));
461     }
462   }
463 
464  private:
465   Field on_stack_storage_[PROTOZERO_DECODER_INITIAL_STACK_CAPACITY];
466 };
467 
468 }  // namespace protozero
469 
470 #endif  // INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_
471