• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #ifndef IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT
2 #define IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT
3 
4 #include <string>
5 
6 #include "image_io/base/data_context.h"
7 #include "image_io/base/data_match_result.h"
8 #include "image_io/base/data_range.h"
9 #include "image_io/base/data_segment.h"
10 
11 namespace photos_editing_formats {
12 namespace image_io {
13 
14 /// Provides a means to scan a textual portion of a data segment for a sequence
15 /// of characters and return the data associated with the resulting match. The
16 /// scanners also maintain state information for repeated calling in case the
17 /// text data is split over multipe data segments. The scanners also maintain
18 /// a data range where the result of the scanner's match can be found. These
19 /// scanners are written to allow copy semantics to make memory management
20 /// easier. Several types of scanners are provided.
21 class DataScanner {
22  public:
23   /// The type of scanner.
24   enum Type {
25     /// A scanner to look for text that matches exactly one or more characters.
26     /// The text to look for is given to the CreateLiteralScanner() function.
27     kLiteral,
28 
29     /// A scanner to look for text that matches a name. A name must begin with
30     /// one of the characters in "[A-Z][a-z]:_". Subsequent characters can
31     /// include "[0-9]-.".
32     kName,
33 
34     /// A scanner to look for a quoted string. A quoted string is delimited by
35     /// a single (') or double (") quote, and include any character except the
36     /// quote mark.
37     kQuotedString,
38 
39     /// A scanner to look for one character from a set of characters. The set of
40     /// characters are given to the CreateSentinelScanner() function.
41     kSentinel,
42 
43     /// A scanner to accept all text up to and including a literal text value.
44     /// The text to look for is given to the CreateThroughLiteralScanner()
45     /// function.
46     kThroughLiteral,
47 
48     /// A scanner to skip white space characters. At least one whitespace
49     /// character must be scanned. The set of white space characters is given
50     /// by the GetWhitespaceChars() function.
51     kWhitespace,
52 
53     /// A scanner to skip white space characters, but unlike the kWhitespace
54     /// scanner, this scanner will not return an error result if there are no
55     /// whitespace characters scanned.
56     kOptionalWhitespace,
57   };
58 
59   /// @return The set of whitespace characters: " \t\n\r".
60   static std::string GetWhitespaceChars();
61 
62   /// @param literal The literal to use for the scanner.
63   /// @return A kLiteral type scanner.
64   static DataScanner CreateLiteralScanner(const std::string& literal);
65 
66   /// @return A kName type scanner.
67   static DataScanner CreateNameScanner();
68 
69   /// @return A kQuoteString type scanner.
70   static DataScanner CreateQuotedStringScanner();
71 
72   /// @param sentinels The set of sentinels to scan for. The "~" character is
73   /// used as an "abbreviation" for any of the characters that can make up the
74   /// first character of a kName type sentinel.
75   /// @return a kSentinel type scanner.
76   static DataScanner CreateSentinelScanner(const std::string& sentinels);
77 
78   /// @param literal The literal to use for the scanner.
79   /// @return A kThroughLiteral type scanner.
80   static DataScanner CreateThroughLiteralScanner(const std::string& literal);
81 
82   /// @return A kWhitespace type scanner;
83   static DataScanner CreateWhitespaceScanner();
84 
85   /// @return A kOptionalWhitespace type scanner;
86   static DataScanner CreateOptionalWhitespaceScanner();
87 
88   /// @return The type of the scanner.
GetType()89   Type GetType() const { return type_; }
90 
91   /// @return A description of the scanner, based on the type.
92   std::string GetDescription() const;
93 
94   /// @return The literal value of a kLiteral or kThroughLiteral type scanner,
95   /// or an empty string otherwise.
96   std::string GetLiteral() const;
97 
98   /// @return The set of sentinels for a kSentinal type scanner, or an empty
99   /// string otherwise.
100   std::string GetSentenels() const;
101 
102   /// @return The sentinel character from the set of characters passed to the
103   /// CreateSentinelScanner() function that was matched by a successful scan
104   /// operation, or 0 otherwise.
105   char GetSentinel() const;
106 
107   /// @return The range of characters that the scanner found during one or more
108   /// successful Scan() function operations.
GetTokenRange()109   const DataRange& GetTokenRange() const { return token_range_; }
110 
111   /// @return The number of tiomes the Scan() function has been called.
GetScanCallCount()112   size_t GetScanCallCount() const { return scan_call_count_; }
113 
114   /// @param context The data context to use for the scan operation.
115   /// @return The match result of the scan operation.
116   DataMatchResult Scan(const DataContext& context);
117 
118   /// Reset the scanner's token range to an invalid value.
119   void ResetTokenRange();
120 
121   /// Reset the scanner state to the value it had when it was first constructed.
122   void Reset();
123 
124  private:
DataScanner(Type type)125   explicit DataScanner(Type type) : DataScanner(type, "") {}
DataScanner(Type type,const std::string & literal_or_sentinels)126   DataScanner(Type type, const std::string& literal_or_sentinels)
127       : literal_or_sentinels_(literal_or_sentinels),
128         data_(0),
129         scan_call_count_(0),
130         type_(type) {}
131 
132   /// @param delta_length The byte count to use to extend the token range end.
133   /// @return The new length of the token range.
134   size_t ExtendTokenLength(size_t delta_length);
135 
136   /// The worker functions for scanning each type of literal.
137   /// @param cbytes The pointer value to the buffer at the context's location.
138   /// @param bytes_available The number of bytes available for the scan.
139   /// @param context The data context for message generation purposes.
140   DataMatchResult ScanLiteral(const char* cbytes, size_t bytes_available,
141                               const DataContext& context);
142   DataMatchResult ScanName(const char* cbytes, size_t bytes_available,
143                            const DataContext& context);
144   DataMatchResult ScanQuotedString(const char* cbytes, size_t bytes_available,
145                                    const DataContext& context);
146   DataMatchResult ScanSentinel(const char* cbytes, size_t bytes_available,
147                                const DataContext& context);
148   DataMatchResult ScanThroughLiteral(const char* cbytes, size_t bytes_available,
149                                      const DataContext& context);
150   DataMatchResult ScanWhitespace(const char* cbytes, size_t bytes_available,
151                                  const DataContext& context);
152 
153   /// Sets the match result to kError and generates an internal error message.
154   /// @param context The data context for message generation purposes.
155   /// @param error_description A description of the type of internal error.
156   /// @param result The result to receive the kError type and message.
157   void SetInternalError(const DataContext& context,
158                         const std::string& error_description,
159                         DataMatchResult* result);
160 
161   /// Sets the match result to kError and generates an syntax error message.
162   /// @param context The data context for message generation purposes.
163   /// @param error_description A description of the type of syntax error.
164   /// @param result The result to receive the kError type and message.
165   void SetSyntaxError(const DataContext& context,
166                       const std::string& error_description,
167                       DataMatchResult* result);
168 
169   /// The string used for kLiteral, kThroughLiteral and kSentinel type scanners.
170   std::string literal_or_sentinels_;
171 
172   /// The token range built by one or calls to the Scan() function.
173   DataRange token_range_;
174 
175   /// State data used in different ways by different scanner types.
176   size_t data_;
177 
178   /// The number of times the scanner's Scan function has been called.
179   size_t scan_call_count_;
180 
181   /// The type of scanner.
182   Type type_;
183 };
184 
185 }  // namespace image_io
186 }  // namespace photos_editing_formats
187 
188 #endif // IMAGE_IO_BASE_DATA_SCANNER_H_  // NOLINT
189