1 // Copyright (c) 2016 The WebM project authors. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the LICENSE file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 #ifndef SRC_MASTER_PARSER_H_ 9 #define SRC_MASTER_PARSER_H_ 10 11 #include <cassert> 12 #include <cstdint> 13 #include <functional> 14 #include <memory> 15 #include <type_traits> 16 #include <unordered_map> 17 #include <utility> 18 19 #include "src/element_parser.h" 20 #include "src/id_parser.h" 21 #include "src/size_parser.h" 22 #include "src/skip_parser.h" 23 #include "src/unknown_parser.h" 24 #include "src/void_parser.h" 25 #include "webm/callback.h" 26 #include "webm/element.h" 27 #include "webm/id.h" 28 #include "webm/reader.h" 29 #include "webm/status.h" 30 31 namespace webm { 32 33 // A general purpose parser for EBML master elements. 34 // 35 // For example, if a document specification defines a Foo master element that 36 // has two boolean children (Bar and Baz), then a FooParser capable of parsing 37 // the Foo master element could be defined as follows: 38 // 39 // struct FooParser : public MasterParser { 40 // FooParser() 41 // : MasterParser(MakeChild<BoolParser>(Id::kBar), 42 // MakeChild<BoolParser>(Id::kBaz)) {} 43 // }; 44 // 45 // See the MasterValueParser for an alternative class for parsing master 46 // elements into a data structure. 47 class MasterParser : public ElementParser { 48 public: 49 // Constructs a new MasterParser that uses the given 50 // {Id, std::unique_ptr<ElementParser>} pairs to map child IDs to the 51 // appropriate parser/handler. Each argument must be of type 52 // std::pair<Id, std::unique_ptr<ElementParser>>. If a parser is not 53 // explicitly provided for Id::kVoid, a VoidParser will automatically be used 54 // for it. 55 // 56 // Initializer lists don't support move-only types (i.e. std::unique_ptr), so 57 // instead a variadic template is used. 58 template <typename... T> MasterParser(T &&...parser_pairs)59 explicit MasterParser(T&&... parser_pairs) { 60 // Prefer an odd reserve size. This makes libc++ use a prime number for the 61 // bucket count. Otherwise, if it happens to be a power of 2, then libc++ 62 // will use a power-of-2 bucket count (and since Matroska EBML IDs have low 63 // entropy in the low bits, there will be a lot of collisions). libstdc++ 64 // always prefers a prime bucket count. I'm not sure how MSVC or others are 65 // implemented, but this shouldn't adversely affect them even if they are 66 // implemented differently. Add one to the count because we'll likely need 67 // to insert a parser for Id::kVoid. 68 parsers_.reserve((sizeof...(T) + 1) | 1); 69 70 // This dummy initializer list is just used to force the parameter pack to 71 // be expanded, which turns the expression into a for-each "loop" that 72 // inserts each argument into the map. 73 auto dummy = {0, (InsertParser(std::forward<T>(parser_pairs)), 0)...}; 74 (void)dummy; // Silence unused variable warning. 75 76 if (parsers_.find(Id::kVoid) == parsers_.end()) { 77 InsertParser(MakeChild<VoidParser>(Id::kVoid)); 78 } 79 } 80 81 MasterParser(const MasterParser&) = delete; 82 MasterParser& operator=(const MasterParser&) = delete; 83 84 Status Init(const ElementMetadata& metadata, std::uint64_t max_size) override; 85 86 void InitAfterSeek(const Ancestory& child_ancestory, 87 const ElementMetadata& child_metadata) override; 88 89 Status Feed(Callback* callback, Reader* reader, 90 std::uint64_t* num_bytes_read) override; 91 92 bool GetCachedMetadata(ElementMetadata* metadata) override; 93 header_size()94 std::uint32_t header_size() const { return header_size_; } 95 96 // Gets the size of this element. May be called before the parse is fully 97 // complete (but only after Init() has already been called and successfully 98 // returned). size()99 std::uint64_t size() const { return my_size_; } 100 101 // Gets absolute byte position of the start of the element in the byte stream. 102 // May be called before the parse is fully complete (but only after Init() has 103 // already been called and successfully returned). position()104 std::uint64_t position() const { return my_position_; } 105 106 // Gets the metadata for the child that is currently being parsed. This may 107 // only be called while the child's body (not its header information like ID 108 // and size) is being parsed. child_metadata()109 const ElementMetadata& child_metadata() const { 110 assert(state_ == State::kValidatingChildSize || 111 state_ == State::kGettingAction || 112 state_ == State::kInitializingChildParser || 113 state_ == State::kReadingChildBody); 114 return child_metadata_; 115 } 116 117 protected: 118 // Allocates a new parser of type T, forwarding args to the constructor, and 119 // creates a std::pair<Id, std::unique_ptr<ElementParser>> using the given id 120 // and the allocated parser. 121 template <typename T, typename... Args> MakeChild(Id id,Args &&...args)122 static std::pair<Id, std::unique_ptr<ElementParser>> MakeChild( 123 Id id, Args&&... args) { 124 std::unique_ptr<ElementParser> ptr(new T(std::forward<Args>(args)...)); 125 return std::pair<Id, std::unique_ptr<ElementParser>>(id, std::move(ptr)); 126 } 127 128 private: 129 // Parsing states for the finite-state machine. 130 enum class State { 131 /* clang-format off */ 132 // State Transitions to state When 133 kFirstReadOfChildId, // kFinishingReadingChildId size(id) > 1 134 // kReadingChildSize size(id) == 1 135 // kEndReached EOF 136 kFinishingReadingChildId, // kReadingChildSize done 137 kReadingChildSize, // kValidatingChildSize done 138 kValidatingChildSize, // kGettingAction done 139 // kEndReached unknown id & unsized 140 kGettingAction, // kInitializingChildParser done 141 kInitializingChildParser, // kReadingChildBody done 142 kReadingChildBody, // kChildFullyParsed child parse done 143 kChildFullyParsed, // kValidatingChildSize cached metadata 144 // kFirstReadOfChildId read < my_size_ 145 // kEndReached read == my_size_ 146 kEndReached, // No transitions from here (must call Init) 147 /* clang-format on */ 148 }; 149 150 using StdHashId = std::hash<std::underlying_type<Id>::type>; 151 152 // Hash functor for hashing Id enums for storage in std::unordered_map. 153 struct IdHash : StdHashId { 154 // Type aliases for conforming to the std::hash interface. 155 using argument_type = Id; 156 using result_type = StdHashId::result_type; 157 158 // Returns the hash of the given id. operatorIdHash159 result_type operator()(argument_type id) const { 160 return StdHashId::operator()(static_cast<StdHashId::argument_type>(id)); 161 } 162 }; 163 164 // The parser for parsing element Ids. 165 IdParser id_parser_; 166 167 // The parser for parsing element sizes. 168 SizeParser size_parser_; 169 170 // Metadata for the child element that is currently being parsed. 171 ElementMetadata child_metadata_; 172 173 // Maps child IDs to the appropriate parser that can handle that child. 174 std::unordered_map<Id, std::unique_ptr<ElementParser>, IdHash> parsers_; 175 176 // The parser that is used to parse unknown children. 177 UnknownParser unknown_parser_; 178 179 // The parser that is used to skip over children. 180 SkipParser skip_parser_; 181 182 // The parser that is being used to parse the current child. This must be null 183 // or a pointer in parsers_. 184 ElementParser* child_parser_; 185 186 // The current parsing action for the child that is currently being parsed. 187 Action action_ = Action::kRead; 188 189 // The current state of the parser. 190 State state_; 191 192 std::uint32_t header_size_; 193 194 // The size of this element. 195 std::uint64_t my_size_; 196 197 std::uint64_t my_position_; 198 199 std::uint64_t max_size_; 200 201 // The total number of bytes read by this parser. 202 std::uint64_t total_bytes_read_; 203 204 // Set to true if parsing has completed and this parser consumed an extra 205 // element header (ID and size) that wasn't from a child. 206 bool has_cached_metadata_ = false; 207 208 // Inserts the parser into the parsers_ map and asserts it is the only parser 209 // registers to parse the corresponding Id. 210 template <typename T> InsertParser(T && parser)211 void InsertParser(T&& parser) { 212 bool inserted = parsers_.insert(std::forward<T>(parser)).second; 213 (void)inserted; // Silence unused variable warning. 214 assert(inserted); // Make sure there aren't duplicates. 215 } 216 217 // Common initialization logic for Init/InitAfterseek. 218 void InitSetup(std::uint32_t header_size, std::uint64_t size_in_bytes, 219 std::uint64_t position); 220 221 // Resets the internal parsers in preparation for parsing the next child. 222 void PrepareForNextChild(); 223 }; 224 225 } // namespace webm 226 227 #endif // SRC_MASTER_PARSER_H_ 228