1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_ 18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_ 19 20 #include <stdint.h> 21 #include <array> 22 #include <memory> 23 #include <vector> 24 25 #include "perfetto/base/compiler.h" 26 #include "perfetto/base/logging.h" 27 #include "perfetto/protozero/field.h" 28 #include "perfetto/protozero/proto_utils.h" 29 30 namespace protozero { 31 32 // A generic protobuf decoder. Doesn't require any knowledge about the proto 33 // schema. It tokenizes fields, retrieves their ID and type and exposes 34 // accessors to retrieve its values. 35 // It does NOT recurse in nested submessages, instead it just computes their 36 // boundaries, recursion is left to the caller. 37 // This class is designed to be used in perf-sensitive contexts. It does not 38 // allocate and does not perform any proto semantic checks (e.g. repeated / 39 // required / optional). It's supposedly safe wrt out-of-bounds memory accesses 40 // (see proto_decoder_fuzzer.cc). 41 // This class serves also as a building block for TypedProtoDecoder, used when 42 // the schema is known at compile time. 43 class PERFETTO_EXPORT ProtoDecoder { 44 public: 45 // Creates a ProtoDecoder using the given |buffer| with size |length| bytes. ProtoDecoder(const void * buffer,size_t length)46 ProtoDecoder(const void* buffer, size_t length) 47 : begin_(reinterpret_cast<const uint8_t*>(buffer)), 48 end_(begin_ + length), 49 read_ptr_(begin_) {} ProtoDecoder(const std::string & str)50 ProtoDecoder(const std::string& str) : ProtoDecoder(str.data(), str.size()) {} ProtoDecoder(const ConstBytes & cb)51 ProtoDecoder(const ConstBytes& cb) : ProtoDecoder(cb.data, cb.size) {} 52 53 // Reads the next field from the buffer and advances the read cursor. If a 54 // full field cannot be read, the returned Field will be invalid (i.e. 55 // field.valid() == false). 56 Field ReadField(); 57 58 // Finds the first field with the given id. Doesn't affect the read cursor. 59 Field FindField(uint32_t field_id); 60 61 // Resets the read cursor to the start of the buffer. Reset()62 void Reset() { read_ptr_ = begin_; } 63 64 // Resets the read cursor to the given position (must be within the buffer). Reset(const uint8_t * pos)65 void Reset(const uint8_t* pos) { 66 PERFETTO_DCHECK(pos >= begin_ && pos < end_); 67 read_ptr_ = pos; 68 } 69 70 // Returns the position of read cursor, relative to the start of the buffer. read_offset()71 size_t read_offset() const { return static_cast<size_t>(read_ptr_ - begin_); } 72 bytes_left()73 size_t bytes_left() const { 74 PERFETTO_DCHECK(read_ptr_ <= end_); 75 return static_cast<size_t>(end_ - read_ptr_); 76 } 77 begin()78 const uint8_t* begin() const { return begin_; } end()79 const uint8_t* end() const { return end_; } 80 81 protected: 82 const uint8_t* const begin_; 83 const uint8_t* const end_; 84 const uint8_t* read_ptr_ = nullptr; 85 }; 86 87 // An iterator-like class used to iterate through repeated fields. Used by 88 // TypedProtoDecoder. The iteration sequence is a bit counter-intuitive due to 89 // the fact that fields_[field_id] holds the *last* value of the field, not the 90 // first, but the remaining storage holds repeated fields in FIFO order. 91 // Assume that we push the 10,11,12 into a repeated field with ID=1. 92 // 93 // Decoder memory layout: [ fields storage ] [ repeated fields storage ] 94 // 1st iteration: 10 95 // 2nd iteration: 11 10 96 // 3rd iteration: 12 10 11 97 // 98 // We start the iteration @ fields_[num_fields], which is the start of the 99 // repeated fields storage, proceed until the end and lastly jump @ fields_[id]. 100 template <typename T> 101 class RepeatedFieldIterator { 102 public: RepeatedFieldIterator(uint32_t field_id,const Field * begin,const Field * end,const Field * last)103 RepeatedFieldIterator(uint32_t field_id, 104 const Field* begin, 105 const Field* end, 106 const Field* last) 107 : field_id_(field_id), iter_(begin), end_(end), last_(last) { 108 FindNextMatchingId(); 109 } 110 111 // Constructs an invalid iterator. RepeatedFieldIterator()112 RepeatedFieldIterator() 113 : field_id_(0u), iter_(nullptr), end_(nullptr), last_(nullptr) {} 114 115 explicit operator bool() const { return iter_ != end_; } field()116 const Field& field() const { return *iter_; } 117 118 T operator*() const { 119 T val{}; 120 iter_->get(&val); 121 return val; 122 } 123 const Field* operator->() const { return iter_; } 124 125 RepeatedFieldIterator& operator++() { 126 PERFETTO_DCHECK(iter_ != end_); 127 if (iter_ == last_) { 128 iter_ = end_; 129 return *this; 130 } 131 ++iter_; 132 FindNextMatchingId(); 133 return *this; 134 } 135 136 RepeatedFieldIterator operator++(int) { 137 PERFETTO_DCHECK(iter_ != end_); 138 RepeatedFieldIterator it(*this); 139 ++(*this); 140 return it; 141 } 142 143 private: FindNextMatchingId()144 void FindNextMatchingId() { 145 PERFETTO_DCHECK(iter_ != last_); 146 for (; iter_ != end_; ++iter_) { 147 if (iter_->id() == field_id_) 148 return; 149 } 150 iter_ = last_->valid() ? last_ : end_; 151 } 152 153 uint32_t field_id_; 154 155 // Initially points to the beginning of the repeated field storage, then is 156 // incremented as we call operator++(). 157 const Field* iter_; 158 159 // Always points to fields_[size_], i.e. past the end of the storage. 160 const Field* end_; 161 162 // Always points to fields_[field_id]. 163 const Field* last_; 164 }; 165 166 // As RepeatedFieldIterator, but allows iterating over a packed repeated field 167 // (which will be initially stored as a single length-delimited field). 168 // See |GetPackedRepeatedField| for details. 169 // 170 // Assumes little endianness, and that the input buffers are well formed - 171 // containing an exact multiple of encoded elements. 172 template <proto_utils::ProtoWireType wire_type, typename CppType> 173 class PackedRepeatedFieldIterator { 174 public: PackedRepeatedFieldIterator(const uint8_t * data_begin,size_t size,bool * parse_error_ptr)175 PackedRepeatedFieldIterator(const uint8_t* data_begin, 176 size_t size, 177 bool* parse_error_ptr) 178 : data_end_(data_begin ? data_begin + size : nullptr), 179 read_ptr_(data_begin), 180 parse_error_(parse_error_ptr) { 181 using proto_utils::ProtoWireType; 182 static_assert(wire_type == ProtoWireType::kVarInt || 183 wire_type == ProtoWireType::kFixed32 || 184 wire_type == ProtoWireType::kFixed64, 185 "invalid type"); 186 187 PERFETTO_DCHECK(parse_error_ptr); 188 189 // Either the field is unset (and there are no data pointer), or the field 190 // is set with a zero length payload. Mark the iterator as invalid in both 191 // cases. 192 if (size == 0) { 193 curr_value_valid_ = false; 194 return; 195 } 196 197 if ((wire_type == ProtoWireType::kFixed32 && (size % 4) != 0) || 198 (wire_type == ProtoWireType::kFixed64 && (size % 8) != 0)) { 199 *parse_error_ = true; 200 curr_value_valid_ = false; 201 return; 202 } 203 204 ++(*this); 205 } 206 207 const CppType operator*() const { return curr_value_; } 208 explicit operator bool() const { return curr_value_valid_; } 209 210 PackedRepeatedFieldIterator& operator++() { 211 using proto_utils::ProtoWireType; 212 213 if (PERFETTO_UNLIKELY(!curr_value_valid_)) 214 return *this; 215 216 if (PERFETTO_UNLIKELY(read_ptr_ == data_end_)) { 217 curr_value_valid_ = false; 218 return *this; 219 } 220 221 if (wire_type == ProtoWireType::kVarInt) { 222 uint64_t new_value = 0; 223 const uint8_t* new_pos = 224 proto_utils::ParseVarInt(read_ptr_, data_end_, &new_value); 225 226 if (PERFETTO_UNLIKELY(new_pos == read_ptr_)) { 227 // Failed to decode the varint (probably incomplete buffer). 228 *parse_error_ = true; 229 curr_value_valid_ = false; 230 } else { 231 read_ptr_ = new_pos; 232 curr_value_ = static_cast<CppType>(new_value); 233 } 234 } else { // kFixed32 or kFixed64 235 constexpr size_t kStep = wire_type == ProtoWireType::kFixed32 ? 4 : 8; 236 237 // NB: the raw buffer is not guaranteed to be aligned, so neither are 238 // these copies. 239 memcpy(&curr_value_, read_ptr_, sizeof(CppType)); 240 read_ptr_ += kStep; 241 } 242 243 return *this; 244 } 245 246 PackedRepeatedFieldIterator operator++(int) { 247 PackedRepeatedFieldIterator it(*this); 248 ++(*this); 249 return it; 250 } 251 252 private: 253 // Might be null if the backing proto field isn't set. 254 const uint8_t* const data_end_; 255 256 // The iterator looks ahead by an element, so |curr_value| holds the value 257 // to be returned when the caller dereferences the iterator, and |read_ptr_| 258 // points at the start of the next element to be decoded. 259 // |read_ptr_| might be null if the backing proto field isn't set. 260 const uint8_t* read_ptr_; 261 CppType curr_value_ = 0; 262 263 // Set to false once we've exhausted the iterator, or encountered an error. 264 bool curr_value_valid_ = true; 265 266 // Where to set parsing errors, supplied by the caller. 267 bool* const parse_error_; 268 }; 269 270 // This decoder loads all fields upfront, without recursing in nested messages. 271 // It is used as a base class for typed decoders generated by the pbzero plugin. 272 // The split between TypedProtoDecoderBase and TypedProtoDecoder<> is to have 273 // unique definition of functions like ParseAllFields() and ExpandHeapStorage(). 274 // The storage (either on-stack or on-heap) for this class is organized as 275 // follows: 276 // |-------------------------- fields_ ----------------------| 277 // [ field 0 (invalid) ] [ fields 1 .. N ] [ repeated fields ] 278 // ^ ^ 279 // num_fields_ size_ 280 class PERFETTO_EXPORT TypedProtoDecoderBase : public ProtoDecoder { 281 public: 282 // If the field |id| is known at compile time, prefer the templated 283 // specialization at<kFieldNumber>(). Get(uint32_t id)284 const Field& Get(uint32_t id) const { 285 return PERFETTO_LIKELY(id < num_fields_) ? fields_[id] : fields_[0]; 286 } 287 288 // Returns an object that allows to iterate over all instances of a repeated 289 // field given its id. Example usage: 290 // for (auto it = decoder.GetRepeated<int32_t>(N); it; ++it) { ... } 291 template <typename T> GetRepeated(uint32_t field_id)292 RepeatedFieldIterator<T> GetRepeated(uint32_t field_id) const { 293 return RepeatedFieldIterator<T>(field_id, &fields_[num_fields_], 294 &fields_[size_], &fields_[field_id]); 295 } 296 297 // Returns an objects that allows to iterate over all entries of a packed 298 // repeated field given its id and type. The |wire_type| is necessary for 299 // decoding the packed field, the |cpp_type| is for convenience & stronger 300 // typing. 301 // 302 // The caller must also supply a pointer to a bool that is set to true if the 303 // packed buffer is found to be malformed while iterating (so you need to 304 // exhaust the iterator if you want to check the full extent of the buffer). 305 // 306 // Note that unlike standard protobuf parsers, protozero does not allow 307 // treating of packed repeated fields as non-packed and vice-versa (therefore 308 // not making the packed option forwards and backwards compatible). So 309 // the caller needs to use the right accessor for correct results. 310 template <proto_utils::ProtoWireType wire_type, typename cpp_type> GetPackedRepeated(uint32_t field_id,bool * parse_error_location)311 PackedRepeatedFieldIterator<wire_type, cpp_type> GetPackedRepeated( 312 uint32_t field_id, 313 bool* parse_error_location) const { 314 const Field& field = Get(field_id); 315 if (field.valid()) { 316 return PackedRepeatedFieldIterator<wire_type, cpp_type>( 317 field.data(), field.size(), parse_error_location); 318 } else { 319 return PackedRepeatedFieldIterator<wire_type, cpp_type>( 320 nullptr, 0, parse_error_location); 321 } 322 } 323 324 protected: TypedProtoDecoderBase(Field * storage,uint32_t num_fields,uint32_t capacity,const uint8_t * buffer,size_t length)325 TypedProtoDecoderBase(Field* storage, 326 uint32_t num_fields, 327 uint32_t capacity, 328 const uint8_t* buffer, 329 size_t length) 330 : ProtoDecoder(buffer, length), 331 fields_(storage), 332 num_fields_(num_fields), 333 size_(num_fields), 334 capacity_(capacity) { 335 // The reason why Field needs to be trivially de/constructible is to avoid 336 // implicit initializers on all the ~1000 entries. We need it to initialize 337 // only on the first |max_field_id| fields, the remaining capacity doesn't 338 // require initialization. 339 static_assert(std::is_trivially_constructible<Field>::value && 340 std::is_trivially_destructible<Field>::value && 341 std::is_trivial<Field>::value, 342 "Field must be a trivial aggregate type"); 343 memset(fields_, 0, sizeof(Field) * num_fields_); 344 } 345 346 void ParseAllFields(); 347 348 // Called when the default on-stack storage is exhausted and new repeated 349 // fields need to be pushed. 350 void ExpandHeapStorage(); 351 352 // Used only in presence of a large number of repeated fields, when the 353 // default on-stack storage is exhausted. 354 std::unique_ptr<Field[]> heap_storage_; 355 356 // Points to the storage, either on-stack (default, provided by the template 357 // specialization) or |heap_storage_| after ExpandHeapStorage() is called, in 358 // case of a large number of repeated fields. 359 Field* fields_; 360 361 // Number of fields without accounting repeated storage. This is equal to 362 // MAX_FIELD_ID + 1 (to account for the invalid 0th field). 363 // This value is always <= size_ (and hence <= capacity); 364 uint32_t num_fields_; 365 366 // Number of active |fields_| entries. This is initially equal to the highest 367 // number of fields for the message (num_fields_ == MAX_FIELD_ID + 1) and can 368 // grow up to |capacity_| in the case of repeated fields. 369 uint32_t size_; 370 371 // Initially equal to kFieldsCapacity of the TypedProtoDecoder 372 // specialization. Can grow when falling back on heap-based storage, in which 373 // case it represents the size (#fields with each entry of a repeated field 374 // counted individually) of the |heap_storage_| array. 375 uint32_t capacity_; 376 }; 377 378 // Template class instantiated by the auto-generated decoder classes declared in 379 // xxx.pbzero.h files. 380 template <int MAX_FIELD_ID, bool HAS_NONPACKED_REPEATED_FIELDS> 381 class TypedProtoDecoder : public TypedProtoDecoderBase { 382 public: TypedProtoDecoder(const uint8_t * buffer,size_t length)383 TypedProtoDecoder(const uint8_t* buffer, size_t length) 384 : TypedProtoDecoderBase(on_stack_storage_, 385 /*num_fields=*/MAX_FIELD_ID + 1, 386 kCapacity, 387 buffer, 388 length) { 389 static_assert(MAX_FIELD_ID <= kMaxDecoderFieldId, "Field ordinal too high"); 390 TypedProtoDecoderBase::ParseAllFields(); 391 } 392 393 template <uint32_t FIELD_ID> at()394 const Field& at() const { 395 static_assert(FIELD_ID <= MAX_FIELD_ID, "FIELD_ID > MAX_FIELD_ID"); 396 return fields_[FIELD_ID]; 397 } 398 TypedProtoDecoder(TypedProtoDecoder && other)399 TypedProtoDecoder(TypedProtoDecoder&& other) noexcept 400 : TypedProtoDecoderBase(std::move(other)) { 401 // If the moved-from decoder was using on-stack storage, we need to update 402 // our pointer to point to this decoder's on-stack storage. 403 if (fields_ == other.on_stack_storage_) { 404 fields_ = on_stack_storage_; 405 memcpy(on_stack_storage_, other.on_stack_storage_, 406 sizeof(on_stack_storage_)); 407 } 408 } 409 410 private: 411 // In the case of non-repeated fields, this constant defines the highest field 412 // id we are able to decode. This is to limit the on-stack storage. 413 // In the case of repeated fields, this constant defines the max number of 414 // repeated fields that we'll be able to store before falling back on the 415 // heap. Keep this value in sync with the one in protozero_generator.cc. 416 static constexpr size_t kMaxDecoderFieldId = 999; 417 418 // If we the message has no repeated fields we need at most N Field entries 419 // in the on-stack storage, where N is the highest field id. 420 // Otherwise we need some room to store repeated fields. 421 static constexpr size_t kCapacity = 422 1 + (HAS_NONPACKED_REPEATED_FIELDS ? kMaxDecoderFieldId : MAX_FIELD_ID); 423 424 Field on_stack_storage_[kCapacity]; 425 }; 426 427 } // namespace protozero 428 429 #endif // INCLUDE_PERFETTO_PROTOZERO_PROTO_DECODER_H_ 430