1 // Copyright 2015 The Chromium Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BSSL_DER_PARSER_H_ 6 #define BSSL_DER_PARSER_H_ 7 8 #include <stdint.h> 9 10 #include <optional> 11 12 #include <openssl/base.h> 13 #include <openssl/bytestring.h> 14 15 #include "input.h" 16 17 BSSL_NAMESPACE_BEGIN 18 namespace der { 19 20 class BitString; 21 struct GeneralizedTime; 22 23 // Parses a DER-encoded ASN.1 structure. DER (distinguished encoding rules) 24 // encodes each data value with a tag, length, and value (TLV). The tag 25 // indicates the type of the ASN.1 value. Depending on the type of the value, 26 // it could contain arbitrary bytes, so the length of the value is encoded 27 // after the tag and before the value to indicate how many bytes of value 28 // follow. DER also defines how the values are encoded for particular types. 29 // 30 // This Parser places a few restrictions on the DER encoding it can parse. The 31 // largest restriction is that it only supports tags which have a tag number 32 // no greater than 30 - these are the tags that fit in a single octet. The 33 // second restriction is that the maximum length for a value that can be parsed 34 // is 4GB. Both of these restrictions should be fine for any reasonable input. 35 // 36 // The Parser class is mainly focused on parsing the TLV structure of DER 37 // encoding, and does not directly handle parsing primitive values (other 38 // functions in the bssl::der namespace are provided for this.) When a Parser 39 // is created, it is passed in a reference to the encoded data. Because the 40 // encoded data is not owned by the Parser, the data cannot change during the 41 // lifespan of the Parser. The Parser functions by keeping a pointer to the 42 // current TLV which starts at the beginning of the input and advancing through 43 // the input as each TLV is read. As such, a Parser instance is thread-unsafe. 44 // 45 // Most methods for using the Parser write the current tag and/or value to 46 // the output parameters provided and then advance the input to the next TLV. 47 // None of the methods explicitly expose the length because it is part of the 48 // value. All methods return a boolean indicating whether there was a parsing 49 // error with the current TLV. 50 // 51 // Some methods are provided in the Parser class as convenience to both read 52 // the current TLV from the input and also parse the DER encoded value, 53 // converting it to a corresponding C++ type. These methods simply combine 54 // ReadTag() with the appropriate ParseType() free function. 55 // 56 // The design of DER encoding allows for nested data structures with 57 // constructed values, where the value is a series of TLVs. The Parser class 58 // is not designed to traverse through a nested encoding from a single object, 59 // but it does facilitate parsing nested data structures through the 60 // convenience methods ReadSequence() and the more general ReadConstructed(), 61 // which provide the user with another Parser object to traverse the next 62 // level of TLVs. 63 // 64 // For a brief example of how to use the Parser, suppose we have the following 65 // ASN.1 type definition: 66 // 67 // Foo ::= SEQUENCE { 68 // bar OCTET STRING OPTIONAL, 69 // quux OCTET STRING } 70 // 71 // If we have a DER-encoded Foo in an Input |encoded_value|, the 72 // following code shows an example of how to parse the quux field from the 73 // encoded data. 74 // 75 // bool ReadQuux(Input encoded_value, Input* quux_out) { 76 // Parser parser(encoded_value); 77 // Parser foo_parser; 78 // if (!parser.ReadSequence(&foo_parser)) 79 // return false; 80 // if (!foo_parser->SkipOptionalTag(kOctetString)) 81 // return false; 82 // if (!foo_parser->ReadTag(kOctetString, quux_out)) 83 // return false; 84 // return true; 85 // } 86 class OPENSSL_EXPORT Parser { 87 public: 88 // Default constructor; equivalent to calling Parser(Input()). This only 89 // exists so that a Parser can be stack allocated and passed in to 90 // ReadConstructed() and similar methods. 91 Parser(); 92 93 // Creates a parser to parse over the data represented by input. This class 94 // assumes that the underlying data will not change over the lifetime of 95 // the Parser object. 96 explicit Parser(Input input); 97 98 Parser(const Parser &) = default; 99 Parser &operator=(const Parser &) = default; 100 101 // Returns whether there is any more data left in the input to parse. This 102 // does not guarantee that the data is parseable. 103 bool HasMore(); 104 105 // Reads the current TLV from the input and advances. If the tag or length 106 // encoding for the current value is invalid, this method returns false and 107 // does not advance the input. Otherwise, it returns true, putting the 108 // read tag in |tag| and the value in |out|. 109 [[nodiscard]] bool ReadTagAndValue(CBS_ASN1_TAG *tag, Input *out); 110 111 // Reads the current TLV from the input and advances. Unlike ReadTagAndValue 112 // where only the value is put in |out|, this puts the raw bytes from the 113 // tag, length, and value in |out|. 114 [[nodiscard]] bool ReadRawTLV(Input *out); 115 116 // Basic methods for reading or skipping the current TLV, with an 117 // expectation of what the current tag should be. It should be possible 118 // to parse any structure with these 4 methods; convenience methods are also 119 // provided to make some cases easier. 120 121 // If the current tag in the input is |tag|, it puts the corresponding value 122 // in |out| and advances the input to the next TLV. If the current tag is 123 // something else, then |out| is set to nullopt and the input is not 124 // advanced. Like ReadTagAndValue, it returns false if the encoding is 125 // invalid and does not advance the input. 126 [[nodiscard]] bool ReadOptionalTag(CBS_ASN1_TAG tag, std::optional<Input> *out); 127 128 // If the current tag in the input is |tag|, it puts the corresponding value 129 // in |out|, sets |was_present| to true, and advances the input to the next 130 // TLV. If the current tag is something else, then |was_present| is set to 131 // false and the input is not advanced. Like ReadTagAndValue, it returns 132 // false if the encoding is invalid and does not advance the input. 133 // DEPRECATED: use the std::optional version above in new code. 134 // TODO(mattm): convert the existing callers and remove this override. 135 [[nodiscard]] bool ReadOptionalTag(CBS_ASN1_TAG tag, Input *out, bool *was_present); 136 137 // Like ReadOptionalTag, but the value is discarded. 138 [[nodiscard]] bool SkipOptionalTag(CBS_ASN1_TAG tag, bool *was_present); 139 140 // If the current tag matches |tag|, it puts the current value in |out|, 141 // advances the input, and returns true. Otherwise, it returns false. 142 [[nodiscard]] bool ReadTag(CBS_ASN1_TAG tag, Input *out); 143 144 // Advances the input and returns true if the current tag matches |tag|; 145 // otherwise it returns false. 146 [[nodiscard]] bool SkipTag(CBS_ASN1_TAG tag); 147 148 // Convenience methods to combine parsing the TLV with parsing the DER 149 // encoding for a specific type. 150 151 // Reads the current TLV from the input, checks that the tag matches |tag| 152 // and is a constructed tag, and creates a new Parser from the value. 153 [[nodiscard]] bool ReadConstructed(CBS_ASN1_TAG tag, Parser *out); 154 155 // A more specific form of ReadConstructed that expects the current tag 156 // to be 0x30 (SEQUENCE). 157 [[nodiscard]] bool ReadSequence(Parser *out); 158 159 // Expects the current tag to be kInteger, and calls ParseUint8 on the 160 // current value. Note that DER-encoded integers are arbitrary precision, 161 // so this method will fail for valid input that represents an integer 162 // outside the range of an uint8_t. 163 // 164 // Note that on failure the Parser is left in an undefined state (the 165 // input may or may not have been advanced). 166 [[nodiscard]] bool ReadUint8(uint8_t *out); 167 168 // Expects the current tag to be kInteger, and calls ParseUint64 on the 169 // current value. Note that DER-encoded integers are arbitrary precision, 170 // so this method will fail for valid input that represents an integer 171 // outside the range of an uint64_t. 172 // 173 // Note that on failure the Parser is left in an undefined state (the 174 // input may or may not have been advanced). 175 [[nodiscard]] bool ReadUint64(uint64_t *out); 176 177 // Reads a BIT STRING. On success returns BitString. On failure, returns 178 // std::nullopt. 179 // 180 // Note that on failure the Parser is left in an undefined state (the 181 // input may or may not have been advanced). 182 [[nodiscard]] std::optional<BitString> ReadBitString(); 183 184 // Reads a GeneralizeTime. On success fills |out| and returns true. 185 // 186 // Note that on failure the Parser is left in an undefined state (the 187 // input may or may not have been advanced). 188 [[nodiscard]] bool ReadGeneralizedTime(GeneralizedTime *out); 189 190 // Lower level methods. The previous methods couple reading data from the 191 // input with advancing the Parser's internal pointer to the next TLV; these 192 // lower level methods decouple those two steps into methods that read from 193 // the current TLV and a method that advances the internal pointer to the 194 // next TLV. 195 196 // Reads the current TLV from the input, putting the tag in |tag| and the raw 197 // value in |out|, but does not advance the input. Returns true if the tag 198 // and length are successfully read and the output exists. 199 [[nodiscard]] bool PeekTagAndValue(CBS_ASN1_TAG *tag, Input *out); 200 201 // Advances the input to the next TLV. This method only needs to be called 202 // after PeekTagAndValue; all other methods will advance the input if they 203 // read something. 204 bool Advance(); 205 206 private: 207 CBS cbs_; 208 size_t advance_len_ = 0; 209 }; 210 211 } // namespace der 212 BSSL_NAMESPACE_END 213 214 #endif // BSSL_DER_PARSER_H_ 215