• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT
2 #define IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT
3 
4 #include <string>
5 
6 #include "image_io/base/data_context.h"
7 #include "image_io/base/data_match_result.h"
8 #include "image_io/base/data_range.h"
9 #include "image_io/base/data_segment.h"
10 
11 namespace photos_editing_formats {
12 namespace image_io {
13 
14 /// Provides a means to scan a textual portion of a data segment for a sequence
15 /// of characters and return the data associated with the resulting match. The
16 /// scanners also maintain state information for repeated calling in case the
17 /// text data is split over multipe data segments. The scanners also maintain
18 /// a data range where the result of the scanner's match can be found. These
19 /// scanners are written to allow copy semantics to make memory management
20 /// easier. Several types of scanners are provided.
21 class DataScanner {
22  public:
23   /// The type of scanner.
24   enum Type {
25     /// A scanner to look for text that matches exactly one or more characters.
26     /// The text to look for is given to the CreateLiteralScanner() function.
27     kLiteral,
28 
29     /// A scanner to look for text that matches a name. A name must begin with
30     /// one of the characters in "[A-Z][a-z]:_". Subsequent characters can
31     /// include "[0-9]-.".
32     kName,
33 
34     /// A scanner to look for a quoted string. A quoted string is delimited by
35     /// a single (') or double (") quote, and include any character except the
36     /// quote mark.
37     kQuotedString,
38 
39     /// A scanner to look for one character from a set of characters. The set of
40     /// characters are given to the CreateSentinelScanner() function.
41     kSentinel,
42 
43     /// A scanner to accept all text up to and including a literal text value.
44     /// The text to look for is given to the CreateThroughLiteralScanner()
45     /// function.
46     kThroughLiteral,
47 
48     /// A scanner to skip white space characters. At least one whitespace
49     /// character must be scanned. The set of white space characters is given
50     /// by the GetWhitespaceChars() function.
51     kWhitespace,
52 
53     /// A scanner to skip white space characters, but unlike the kWhitespace
54     /// scanner, this scanner will not return an error result if there are no
55     /// whitespace characters scanned.
56     kOptionalWhitespace,
57   };
58 
59   /// @return The set of whitespace characters: " \t\n\r".
60   static std::string GetWhitespaceChars();
61 
62   /// @return The characters used for base64 encoding and optionally the pad
63   /// char at the end of the string.
64   /// @param include_pad_char Whether to include the base64 pad char at the end
65   /// of the string.
66   static std::string GetBase64Chars(bool include_pad_char);
67 
68   /// @return The character used to pad base64 encoded strings.
69   static std::string GetBase64PadChar();
70 
71   /// @param literal The literal to use for the scanner.
72   /// @return A kLiteral type scanner.
73   static DataScanner CreateLiteralScanner(const std::string& literal);
74 
75   /// @return A kName type scanner.
76   static DataScanner CreateNameScanner();
77 
78   /// @return A kQuoteString type scanner.
79   static DataScanner CreateQuotedStringScanner();
80 
81   /// @param sentinels The set of sentinels to scan for. The "~" character is
82   /// used as an "abbreviation" for any of the characters that can make up the
83   /// first character of a kName type sentinel.
84   /// @return a kSentinel type scanner.
85   static DataScanner CreateSentinelScanner(const std::string& sentinels);
86 
87   /// @param literal The literal to use for the scanner.
88   /// @return A kThroughLiteral type scanner.
89   static DataScanner CreateThroughLiteralScanner(const std::string& literal);
90 
91   /// @return A kWhitespace type scanner;
92   static DataScanner CreateWhitespaceScanner();
93 
94   /// @return A kOptionalWhitespace type scanner;
95   static DataScanner CreateOptionalWhitespaceScanner();
96 
97   /// A function like strspn that accepts the length of string to scan. If the
98   /// return value, ret,  is not slen, then s[ret] is not in scanset.
99   /// @param s The string to scan
100   /// @param slen The length of the string to scan
101   /// @param scanset The set of characters to scan/skip over.
102   /// @return The number of scanned characters in s that were in accept.
103   static size_t ScanChars(const char* s, size_t slen, const char* scanset);
104 
105   /// @return The type of the scanner.
GetType()106   Type GetType() const { return type_; }
107 
108   /// @return A description of the scanner, or one that is based on the type.
109   std::string GetDescription() const;
110 
111   /// @param The description to use for the scanner instead of an internal one
112   /// that is based on the type of scanner.
SetDescription(const std::string & description)113   void SetDescription(const std::string& description) {
114     description_ = description;
115   }
116 
117   /// @return The literal value of a kLiteral or kThroughLiteral type scanner,
118   /// or an empty string otherwise.
119   std::string GetLiteral() const;
120 
121   /// @return The set of sentinels for a kSentinal type scanner, or an empty
122   /// string otherwise.
123   std::string GetSentenels() const;
124 
125   /// @return The sentinel character from the set of characters passed to the
126   /// CreateSentinelScanner() function that was matched by a successful scan
127   /// operation, or 0 otherwise.
128   char GetSentinel() const;
129 
130   /// @return The range of characters that the scanner found during one or more
131   /// successful Scan() function operations.
GetTokenRange()132   const DataRange& GetTokenRange() const { return token_range_; }
133 
134   /// @return The number of tiomes the Scan() function has been called.
GetScanCallCount()135   size_t GetScanCallCount() const { return scan_call_count_; }
136 
137   /// @param context The data context to use for the scan operation.
138   /// @return The match result of the scan operation.
139   DataMatchResult Scan(const DataContext& context);
140 
141   /// Reset the scanner's token range to an invalid value.
142   void ResetTokenRange();
143 
144   /// Reset the scanner state to the value it had when it was first constructed.
145   void Reset();
146 
147   /// @param delta_length The byte count to use to extend the token range end.
148   /// @return The new length of the token range.
149   size_t ExtendTokenLength(size_t delta_length);
150 
151  private:
DataScanner(Type type)152   explicit DataScanner(Type type) : DataScanner(type, "") {}
DataScanner(Type type,const std::string & literal_or_sentinels)153   DataScanner(Type type, const std::string& literal_or_sentinels)
154       : literal_or_sentinels_(literal_or_sentinels),
155         data_(0),
156         scan_call_count_(0),
157         type_(type) {}
158 
159   /// The worker functions for scanning each type of literal.
160   /// @param cbytes The pointer value to the buffer at the context's location.
161   /// @param bytes_available The number of bytes available for the scan.
162   /// @param context The data context for message generation purposes.
163   DataMatchResult ScanLiteral(const char* cbytes, size_t bytes_available,
164                               const DataContext& context);
165   DataMatchResult ScanName(const char* cbytes, size_t bytes_available,
166                            const DataContext& context);
167   DataMatchResult ScanQuotedString(const char* cbytes, size_t bytes_available,
168                                    const DataContext& context);
169   DataMatchResult ScanSentinel(const char* cbytes, size_t bytes_available,
170                                const DataContext& context);
171   DataMatchResult ScanThroughLiteral(const char* cbytes, size_t bytes_available,
172                                      const DataContext& context);
173   DataMatchResult ScanWhitespace(const char* cbytes, size_t bytes_available,
174                                  const DataContext& context);
175 
176   /// Sets the match result to kError and generates an internal error message.
177   /// @param context The data context for message generation purposes.
178   /// @param error_description A description of the type of internal error.
179   /// @param result The result to receive the kError type and message.
180   void SetInternalError(const DataContext& context,
181                         const std::string& error_description,
182                         DataMatchResult* result);
183 
184   /// Sets the match result to kError and generates an syntax error message.
185   /// @param context The data context for message generation purposes.
186   /// @param error_description A description of the type of syntax error.
187   /// @param result The result to receive the kError type and message.
188   void SetSyntaxError(const DataContext& context,
189                       const std::string& error_description,
190                       DataMatchResult* result);
191 
192   /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners.
193   std::string literal_or_sentinels_;
194 
195   /// The custom description of the scanner.
196   std::string description_;
197 
198   /// The token range built by one or calls to the Scan() function.
199   DataRange token_range_;
200 
201   /// State data used in different ways by different scanner types.
202   size_t data_;
203 
204   /// The number of times the scanner's Scan function has been called.
205   size_t scan_call_count_;
206 
207   /// The type of scanner.
208   Type type_;
209 };
210 
211 }  // namespace image_io
212 }  // namespace photos_editing_formats
213 
214 #endif // IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT
215