1 #ifndef IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT 2 #define IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT 3 4 #include <string> 5 6 #include "image_io/base/data_context.h" 7 #include "image_io/base/data_match_result.h" 8 #include "image_io/base/data_range.h" 9 #include "image_io/base/data_segment.h" 10 11 namespace photos_editing_formats { 12 namespace image_io { 13 14 /// Provides a means to scan a textual portion of a data segment for a sequence 15 /// of characters and return the data associated with the resulting match. The 16 /// scanners also maintain state information for repeated calling in case the 17 /// text data is split over multipe data segments. The scanners also maintain 18 /// a data range where the result of the scanner's match can be found. These 19 /// scanners are written to allow copy semantics to make memory management 20 /// easier. Several types of scanners are provided. 21 class DataScanner { 22 public: 23 /// The type of scanner. 24 enum Type { 25 /// A scanner to look for text that matches exactly one or more characters. 26 /// The text to look for is given to the CreateLiteralScanner() function. 27 kLiteral, 28 29 /// A scanner to look for text that matches a name. A name must begin with 30 /// one of the characters in "[A-Z][a-z]:_". Subsequent characters can 31 /// include "[0-9]-.". 32 kName, 33 34 /// A scanner to look for a quoted string. A quoted string is delimited by 35 /// a single (') or double (") quote, and include any character except the 36 /// quote mark. 37 kQuotedString, 38 39 /// A scanner to look for one character from a set of characters. The set of 40 /// characters are given to the CreateSentinelScanner() function. 41 kSentinel, 42 43 /// A scanner to accept all text up to and including a literal text value. 44 /// The text to look for is given to the CreateThroughLiteralScanner() 45 /// function. 46 kThroughLiteral, 47 48 /// A scanner to skip white space characters. At least one whitespace 49 /// character must be scanned. The set of white space characters is given 50 /// by the GetWhitespaceChars() function. 51 kWhitespace, 52 53 /// A scanner to skip white space characters, but unlike the kWhitespace 54 /// scanner, this scanner will not return an error result if there are no 55 /// whitespace characters scanned. 56 kOptionalWhitespace, 57 }; 58 59 /// @return The set of whitespace characters: " \t\n\r". 60 static std::string GetWhitespaceChars(); 61 62 /// @param literal The literal to use for the scanner. 63 /// @return A kLiteral type scanner. 64 static DataScanner CreateLiteralScanner(const std::string& literal); 65 66 /// @return A kName type scanner. 67 static DataScanner CreateNameScanner(); 68 69 /// @return A kQuoteString type scanner. 70 static DataScanner CreateQuotedStringScanner(); 71 72 /// @param sentinels The set of sentinels to scan for. The "~" character is 73 /// used as an "abbreviation" for any of the characters that can make up the 74 /// first character of a kName type sentinel. 75 /// @return a kSentinel type scanner. 76 static DataScanner CreateSentinelScanner(const std::string& sentinels); 77 78 /// @param literal The literal to use for the scanner. 79 /// @return A kThroughLiteral type scanner. 80 static DataScanner CreateThroughLiteralScanner(const std::string& literal); 81 82 /// @return A kWhitespace type scanner; 83 static DataScanner CreateWhitespaceScanner(); 84 85 /// @return A kOptionalWhitespace type scanner; 86 static DataScanner CreateOptionalWhitespaceScanner(); 87 88 /// @return The type of the scanner. GetType()89 Type GetType() const { return type_; } 90 91 /// @return A description of the scanner, based on the type. 92 std::string GetDescription() const; 93 94 /// @return The literal value of a kLiteral or kThroughLiteral type scanner, 95 /// or an empty string otherwise. 96 std::string GetLiteral() const; 97 98 /// @return The set of sentinels for a kSentinal type scanner, or an empty 99 /// string otherwise. 100 std::string GetSentenels() const; 101 102 /// @return The sentinel character from the set of characters passed to the 103 /// CreateSentinelScanner() function that was matched by a successful scan 104 /// operation, or 0 otherwise. 105 char GetSentinel() const; 106 107 /// @return The range of characters that the scanner found during one or more 108 /// successful Scan() function operations. GetTokenRange()109 const DataRange& GetTokenRange() const { return token_range_; } 110 111 /// @return The number of tiomes the Scan() function has been called. GetScanCallCount()112 size_t GetScanCallCount() const { return scan_call_count_; } 113 114 /// @param context The data context to use for the scan operation. 115 /// @return The match result of the scan operation. 116 DataMatchResult Scan(const DataContext& context); 117 118 /// Reset the scanner's token range to an invalid value. 119 void ResetTokenRange(); 120 121 /// Reset the scanner state to the value it had when it was first constructed. 122 void Reset(); 123 124 private: DataScanner(Type type)125 explicit DataScanner(Type type) : DataScanner(type, "") {} DataScanner(Type type,const std::string & literal_or_sentinels)126 DataScanner(Type type, const std::string& literal_or_sentinels) 127 : literal_or_sentinels_(literal_or_sentinels), 128 data_(0), 129 scan_call_count_(0), 130 type_(type) {} 131 132 /// @param delta_length The byte count to use to extend the token range end. 133 /// @return The new length of the token range. 134 size_t ExtendTokenLength(size_t delta_length); 135 136 /// The worker functions for scanning each type of literal. 137 /// @param cbytes The pointer value to the buffer at the context's location. 138 /// @param bytes_available The number of bytes available for the scan. 139 /// @param context The data context for message generation purposes. 140 DataMatchResult ScanLiteral(const char* cbytes, size_t bytes_available, 141 const DataContext& context); 142 DataMatchResult ScanName(const char* cbytes, size_t bytes_available, 143 const DataContext& context); 144 DataMatchResult ScanQuotedString(const char* cbytes, size_t bytes_available, 145 const DataContext& context); 146 DataMatchResult ScanSentinel(const char* cbytes, size_t bytes_available, 147 const DataContext& context); 148 DataMatchResult ScanThroughLiteral(const char* cbytes, size_t bytes_available, 149 const DataContext& context); 150 DataMatchResult ScanWhitespace(const char* cbytes, size_t bytes_available, 151 const DataContext& context); 152 153 /// Sets the match result to kError and generates an internal error message. 154 /// @param context The data context for message generation purposes. 155 /// @param error_description A description of the type of internal error. 156 /// @param result The result to receive the kError type and message. 157 void SetInternalError(const DataContext& context, 158 const std::string& error_description, 159 DataMatchResult* result); 160 161 /// Sets the match result to kError and generates an syntax error message. 162 /// @param context The data context for message generation purposes. 163 /// @param error_description A description of the type of syntax error. 164 /// @param result The result to receive the kError type and message. 165 void SetSyntaxError(const DataContext& context, 166 const std::string& error_description, 167 DataMatchResult* result); 168 169 /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners. 170 std::string literal_or_sentinels_; 171 172 /// The token range built by one or calls to the Scan() function. 173 DataRange token_range_; 174 175 /// State data used in different ways by different scanner types. 176 size_t data_; 177 178 /// The number of times the scanner's Scan function has been called. 179 size_t scan_call_count_; 180 181 /// The type of scanner. 182 Type type_; 183 }; 184 185 } // namespace image_io 186 } // namespace photos_editing_formats 187 188 #endif // IMAGE_IO_BASE_DATA_SCANNER_H_ // NOLINT 189