1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the LICENSE file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 #ifndef SRC_BYTE_PARSER_H_ 9 #define SRC_BYTE_PARSER_H_ 10 11 #include <cassert> 12 #include <cstdint> 13 #include <string> 14 #include <utility> 15 #include <vector> 16 17 #include "src/element_parser.h" 18 #include "webm/callback.h" 19 #include "webm/element.h" 20 #include "webm/reader.h" 21 #include "webm/status.h" 22 23 namespace webm { 24 25 // Parses an EBML string (UTF-8 and ASCII) or binary element from a byte stream. 26 // Spec reference for string/binary elements: 27 // http://matroska.org/technical/specs/index.html#EBML_ex 28 // https://github.com/Matroska-Org/ebml-specification/blob/master/specification.markdown#ebml-element-types 29 template <typename T> 30 class ByteParser : public ElementParser { 31 public: 32 static_assert(std::is_same<T, std::vector<std::uint8_t>>::value || 33 std::is_same<T, std::string>::value, 34 "T must be std::vector<std::uint8_t> or std::string"); 35 36 // Constructs a new parser which will use the given default_value as the 37 // value for the element if its size is zero. Defaults to the empty string 38 // or empty binary element (as the EBML spec indicates). 39 explicit ByteParser(T default_value = {}) default_value_(std::move (default_value))40 : default_value_(std::move(default_value)) {} 41 42 ByteParser(ByteParser&&) = default; 43 ByteParser& operator=(ByteParser&&) = default; 44 45 ByteParser(const ByteParser&) = delete; 46 ByteParser& operator=(const ByteParser&) = delete; 47 Init(const ElementMetadata & metadata,std::uint64_t max_size)48 Status Init(const ElementMetadata& metadata, 49 std::uint64_t max_size) override { 50 assert(metadata.size == kUnknownElementSize || metadata.size <= max_size); 51 52 if (metadata.size == kUnknownElementSize) { 53 return Status(Status::kInvalidElementSize); 54 } 55 56 if (metadata.size > std::numeric_limits<std::size_t>::max() || 57 metadata.size > value_.max_size()) { 58 return Status(Status::kNotEnoughMemory); 59 } 60 61 #if WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT 62 // AFL and ASan just kill the process if too much memory is allocated, so 63 // let's cap the maximum size of the element. It's too easy for the fuzzer 64 // to make an element with a ridiculously huge size, and that just creates 65 // uninteresting false positives. 66 if (metadata.size > WEBM_FUZZER_BYTE_ELEMENT_SIZE_LIMIT) { 67 return Status(Status::kNotEnoughMemory); 68 } 69 #endif 70 71 if (metadata.size == 0) { 72 value_ = default_value_; 73 total_read_ = default_value_.size(); 74 } else { 75 value_.resize(static_cast<std::size_t>(metadata.size)); 76 total_read_ = 0; 77 } 78 79 return Status(Status::kOkCompleted); 80 } 81 Feed(Callback * callback,Reader * reader,std::uint64_t * num_bytes_read)82 Status Feed(Callback* callback, Reader* reader, 83 std::uint64_t* num_bytes_read) override { 84 assert(callback != nullptr); 85 assert(reader != nullptr); 86 assert(num_bytes_read != nullptr); 87 88 *num_bytes_read = 0; 89 90 if (total_read_ == value_.size()) { 91 return Status(Status::kOkCompleted); 92 } 93 94 Status status; 95 do { 96 std::uint64_t local_num_bytes_read = 0; 97 std::uint8_t* buffer = 98 reinterpret_cast<std::uint8_t*>(&value_.front()) + total_read_; 99 std::size_t buffer_size = value_.size() - total_read_; 100 status = reader->Read(buffer_size, buffer, &local_num_bytes_read); 101 assert((status.completed_ok() && local_num_bytes_read == buffer_size) || 102 (status.ok() && local_num_bytes_read < buffer_size) || 103 (!status.ok() && local_num_bytes_read == 0)); 104 *num_bytes_read += local_num_bytes_read; 105 total_read_ += static_cast<std::size_t>(local_num_bytes_read); 106 } while (status.code == Status::kOkPartial); 107 108 // UTF-8 and ASCII string elements can be padded with NUL characters at the 109 // end, which should be ignored. 110 if (std::is_same<T, std::string>::value && status.completed_ok()) { 111 while (!value_.empty() && value_.back() == '\0') { 112 value_.pop_back(); 113 } 114 } 115 116 return status; 117 } 118 119 // Gets the parsed value. This must not be called until the parse has been 120 // successfully completed. value()121 const T& value() const { 122 assert(total_read_ >= value_.size()); 123 return value_; 124 } 125 126 // Gets the parsed value. This must not be called until the parse has been 127 // successfully completed. mutable_value()128 T* mutable_value() { 129 assert(total_read_ >= value_.size()); 130 return &value_; 131 } 132 133 private: 134 T value_; 135 T default_value_; 136 std::size_t total_read_; 137 }; 138 139 using StringParser = ByteParser<std::string>; 140 using BinaryParser = ByteParser<std::vector<std::uint8_t>>; 141 142 } // namespace webm 143 144 #endif // SRC_BYTE_PARSER_H_ 145