1 #ifndef IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT 2 #define IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT 3 4 #include <string> 5 6 #include "image_io/base/data_context.h" 7 #include "image_io/base/data_match_result.h" 8 #include "image_io/base/data_range.h" 9 #include "image_io/base/data_segment.h" 10 11 namespace photos_editing_formats { 12 namespace image_io { 13 14 /// Provides a means to scan a textual portion of a data segment for a sequence 15 /// of characters and return the data associated with the resulting match. The 16 /// scanners also maintain state information for repeated calling in case the 17 /// text data is split over multipe data segments. The scanners also maintain 18 /// a data range where the result of the scanner's match can be found. These 19 /// scanners are written to allow copy semantics to make memory management 20 /// easier. Several types of scanners are provided. 21 class DataScanner { 22 public: 23 /// The type of scanner. 24 enum Type { 25 /// A scanner to look for text that matches exactly one or more characters. 26 /// The text to look for is given to the CreateLiteralScanner() function. 27 kLiteral, 28 29 /// A scanner to look for text that matches a name. A name must begin with 30 /// one of the characters in "[A-Z][a-z]:_". Subsequent characters can 31 /// include "[0-9]-.". 32 kName, 33 34 /// A scanner to look for a quoted string. A quoted string is delimited by 35 /// a single (') or double (") quote, and include any character except the 36 /// quote mark. 37 kQuotedString, 38 39 /// A scanner to look for one character from a set of characters. The set of 40 /// characters are given to the CreateSentinelScanner() function. 41 kSentinel, 42 43 /// A scanner to accept all text up to and including a literal text value. 44 /// The text to look for is given to the CreateThroughLiteralScanner() 45 /// function. 46 kThroughLiteral, 47 48 /// A scanner to skip white space characters. At least one whitespace 49 /// character must be scanned. The set of white space characters is given 50 /// by the GetWhitespaceChars() function. 51 kWhitespace, 52 53 /// A scanner to skip white space characters, but unlike the kWhitespace 54 /// scanner, this scanner will not return an error result if there are no 55 /// whitespace characters scanned. 56 kOptionalWhitespace, 57 }; 58 59 /// @return The set of whitespace characters: " \t\n\r". 60 static std::string GetWhitespaceChars(); 61 62 /// @return The characters used for base64 encoding and optionally the pad 63 /// char at the end of the string. 64 /// @param include_pad_char Whether to include the base64 pad char at the end 65 /// of the string. 66 static std::string GetBase64Chars(bool include_pad_char); 67 68 /// @return The character used to pad base64 encoded strings. 69 static std::string GetBase64PadChar(); 70 71 /// @param literal The literal to use for the scanner. 72 /// @return A kLiteral type scanner. 73 static DataScanner CreateLiteralScanner(const std::string& literal); 74 75 /// @return A kName type scanner. 76 static DataScanner CreateNameScanner(); 77 78 /// @return A kQuoteString type scanner. 79 static DataScanner CreateQuotedStringScanner(); 80 81 /// @param sentinels The set of sentinels to scan for. The "~" character is 82 /// used as an "abbreviation" for any of the characters that can make up the 83 /// first character of a kName type sentinel. 84 /// @return a kSentinel type scanner. 85 static DataScanner CreateSentinelScanner(const std::string& sentinels); 86 87 /// @param literal The literal to use for the scanner. 88 /// @return A kThroughLiteral type scanner. 89 static DataScanner CreateThroughLiteralScanner(const std::string& literal); 90 91 /// @return A kWhitespace type scanner; 92 static DataScanner CreateWhitespaceScanner(); 93 94 /// @return A kOptionalWhitespace type scanner; 95 static DataScanner CreateOptionalWhitespaceScanner(); 96 97 /// A function like strspn that accepts the length of string to scan. If the 98 /// return value, ret, is not slen, then s[ret] is not in scanset. 99 /// @param s The string to scan 100 /// @param slen The length of the string to scan 101 /// @param scanset The set of characters to scan/skip over. 102 /// @return The number of scanned characters in s that were in accept. 103 static size_t ScanChars(const char* s, size_t slen, const char* scanset); 104 105 /// @return The type of the scanner. GetType()106 Type GetType() const { return type_; } 107 108 /// @return A description of the scanner, or one that is based on the type. 109 std::string GetDescription() const; 110 111 /// @param The description to use for the scanner instead of an internal one 112 /// that is based on the type of scanner. SetDescription(const std::string & description)113 void SetDescription(const std::string& description) { 114 description_ = description; 115 } 116 117 /// @return The literal value of a kLiteral or kThroughLiteral type scanner, 118 /// or an empty string otherwise. 119 std::string GetLiteral() const; 120 121 /// @return The set of sentinels for a kSentinal type scanner, or an empty 122 /// string otherwise. 123 std::string GetSentenels() const; 124 125 /// @return The sentinel character from the set of characters passed to the 126 /// CreateSentinelScanner() function that was matched by a successful scan 127 /// operation, or 0 otherwise. 128 char GetSentinel() const; 129 130 /// @return The range of characters that the scanner found during one or more 131 /// successful Scan() function operations. GetTokenRange()132 const DataRange& GetTokenRange() const { return token_range_; } 133 134 /// @return The number of tiomes the Scan() function has been called. GetScanCallCount()135 size_t GetScanCallCount() const { return scan_call_count_; } 136 137 /// @param context The data context to use for the scan operation. 138 /// @return The match result of the scan operation. 139 DataMatchResult Scan(const DataContext& context); 140 141 /// Reset the scanner's token range to an invalid value. 142 void ResetTokenRange(); 143 144 /// Reset the scanner state to the value it had when it was first constructed. 145 void Reset(); 146 147 /// @param delta_length The byte count to use to extend the token range end. 148 /// @return The new length of the token range. 149 size_t ExtendTokenLength(size_t delta_length); 150 151 private: DataScanner(Type type)152 explicit DataScanner(Type type) : DataScanner(type, "") {} DataScanner(Type type,const std::string & literal_or_sentinels)153 DataScanner(Type type, const std::string& literal_or_sentinels) 154 : literal_or_sentinels_(literal_or_sentinels), 155 data_(0), 156 scan_call_count_(0), 157 type_(type) {} 158 159 /// The worker functions for scanning each type of literal. 160 /// @param cbytes The pointer value to the buffer at the context's location. 161 /// @param bytes_available The number of bytes available for the scan. 162 /// @param context The data context for message generation purposes. 163 DataMatchResult ScanLiteral(const char* cbytes, size_t bytes_available, 164 const DataContext& context); 165 DataMatchResult ScanName(const char* cbytes, size_t bytes_available, 166 const DataContext& context); 167 DataMatchResult ScanQuotedString(const char* cbytes, size_t bytes_available, 168 const DataContext& context); 169 DataMatchResult ScanSentinel(const char* cbytes, size_t bytes_available, 170 const DataContext& context); 171 DataMatchResult ScanThroughLiteral(const char* cbytes, size_t bytes_available, 172 const DataContext& context); 173 DataMatchResult ScanWhitespace(const char* cbytes, size_t bytes_available, 174 const DataContext& context); 175 176 /// Sets the match result to kError and generates an internal error message. 177 /// @param context The data context for message generation purposes. 178 /// @param error_description A description of the type of internal error. 179 /// @param result The result to receive the kError type and message. 180 void SetInternalError(const DataContext& context, 181 const std::string& error_description, 182 DataMatchResult* result); 183 184 /// Sets the match result to kError and generates an syntax error message. 185 /// @param context The data context for message generation purposes. 186 /// @param error_description A description of the type of syntax error. 187 /// @param result The result to receive the kError type and message. 188 void SetSyntaxError(const DataContext& context, 189 const std::string& error_description, 190 DataMatchResult* result); 191 192 /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners. 193 std::string literal_or_sentinels_; 194 195 /// The custom description of the scanner. 196 std::string description_; 197 198 /// The token range built by one or calls to the Scan() function. 199 DataRange token_range_; 200 201 /// State data used in different ways by different scanner types. 202 size_t data_; 203 204 /// The number of times the scanner's Scan function has been called. 205 size_t scan_call_count_; 206 207 /// The type of scanner. 208 Type type_; 209 }; 210 211 } // namespace image_io 212 } // namespace photos_editing_formats 213 214 #endif // IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT 215