• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "image_io/base/data_scanner.h"
2 
3 namespace photos_editing_formats {
4 namespace image_io {
5 
6 namespace {
7 
8 const char kWhitespaceChars[] = " \t\n\r";
9 
10 /// This function is like strspn but does not assume a null-terminated string.
memspn(const char * s,size_t slen,const char * accept)11 size_t memspn(const char* s, size_t slen, const char* accept) {
12   const char* p = s;
13   const char* spanp;
14   char c, sc;
15 
16 cont:
17   c = *p++;
18   if (slen-- == 0) return p - 1 - s;
19   for (spanp = accept; (sc = *spanp++) != '\0';)
20     if (sc == c) goto cont;
21   return p - 1 - s;
22 }
23 
24 /// @return Whether value is in the range [lo:hi].
InRange(char value,char lo,char hi)25 bool InRange(char value, char lo, char hi) {
26   return value >= lo && value <= hi;
27 }
28 
29 /// @return Whether the value is the first character of a kName type scanner.
IsFirstNameChar(char value)30 bool IsFirstNameChar(char value) {
31   return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' ||
32          value == ':';
33 }
34 
35 /// Scans the characters in the s string, where the characters can be any legal
36 /// character in the name.
37 /// @return The number of name characters scanned.
ScanOptionalNameChars(const char * s,size_t slen)38 size_t ScanOptionalNameChars(const char* s, size_t slen) {
39   const char* kOptionalChars =
40       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:";
41   return memspn(s, slen, kOptionalChars);
42 }
43 
44 /// Scans the whitespace characters in the s string.
45 /// @return The number of whitepace characters scanned.
ScanWhitespaceChars(const char * s,size_t slen)46 size_t ScanWhitespaceChars(const char* s, size_t slen) {
47   return memspn(s, slen, kWhitespaceChars);
48 }
49 
50 }  // namespace
51 
GetWhitespaceChars()52 std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; }
53 
CreateLiteralScanner(const std::string & literal)54 DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) {
55   return DataScanner(DataScanner::kLiteral, literal);
56 }
57 
CreateNameScanner()58 DataScanner DataScanner::CreateNameScanner() {
59   return DataScanner(DataScanner::kName);
60 }
61 
CreateQuotedStringScanner()62 DataScanner DataScanner::CreateQuotedStringScanner() {
63   return DataScanner(DataScanner::kQuotedString);
64 }
65 
CreateSentinelScanner(const std::string & sentinels)66 DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) {
67   return DataScanner(DataScanner::kSentinel, sentinels);
68 }
69 
CreateThroughLiteralScanner(const std::string & literal)70 DataScanner DataScanner::CreateThroughLiteralScanner(
71     const std::string& literal) {
72   return DataScanner(DataScanner::kThroughLiteral, literal);
73 }
74 
CreateWhitespaceScanner()75 DataScanner DataScanner::CreateWhitespaceScanner() {
76   return DataScanner(DataScanner::kWhitespace);
77 }
78 
CreateOptionalWhitespaceScanner()79 DataScanner DataScanner::CreateOptionalWhitespaceScanner() {
80   return DataScanner(DataScanner::kOptionalWhitespace);
81 }
82 
ExtendTokenLength(size_t delta_length)83 size_t DataScanner::ExtendTokenLength(size_t delta_length) {
84   token_range_ =
85       DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length);
86   return token_range_.GetLength();
87 }
88 
SetInternalError(const DataContext & context,const std::string & error_description,DataMatchResult * result)89 void DataScanner::SetInternalError(const DataContext& context,
90                                    const std::string& error_description,
91                                    DataMatchResult* result) {
92   result->SetType(DataMatchResult::kError);
93   result->SetMessage(
94       Message::kInternalError,
95       context.GetErrorText({}, {GetDescription()}, error_description, ""));
96 }
97 
SetSyntaxError(const DataContext & context,const std::string & error_description,DataMatchResult * result)98 void DataScanner::SetSyntaxError(const DataContext& context,
99                                  const std::string& error_description,
100                                  DataMatchResult* result) {
101   result->SetType(DataMatchResult::kError);
102   result->SetMessage(Message::kSyntaxError,
103                      context.GetErrorText(error_description, GetDescription()));
104 }
105 
ScanLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)106 DataMatchResult DataScanner::ScanLiteral(const char* cbytes,
107                                          size_t bytes_available,
108                                          const DataContext& context) {
109   DataMatchResult result;
110   size_t token_length = token_range_.GetLength();
111   if (token_length >= literal_or_sentinels_.length()) {
112     SetInternalError(context, "Literal already scanned", &result);
113     return result;
114   }
115   size_t bytes_still_needed = literal_or_sentinels_.length() - token_length;
116   size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
117   if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) ==
118       0) {
119     token_length = ExtendTokenLength(bytes_to_compare);
120     result.SetBytesConsumed(bytes_to_compare);
121     result.SetType(token_length == literal_or_sentinels_.length()
122                        ? DataMatchResult::kFull
123                        : DataMatchResult::kPartialOutOfData);
124   } else {
125     SetSyntaxError(context, "Expected literal", &result);
126   }
127   return result;
128 }
129 
ScanName(const char * cbytes,size_t bytes_available,const DataContext & context)130 DataMatchResult DataScanner::ScanName(const char* cbytes,
131                                       size_t bytes_available,
132                                       const DataContext& context) {
133   DataMatchResult result;
134   size_t token_length = token_range_.GetLength();
135   if (token_length == 0) {
136     if (!IsFirstNameChar(*cbytes)) {
137       SetSyntaxError(context, "Expected first character of a name", &result);
138       return result;
139     }
140     token_length = ExtendTokenLength(1);
141     result.SetBytesConsumed(1);
142     bytes_available -= 1;
143     cbytes += 1;
144   }
145   size_t optional_bytes_consumed =
146       ScanOptionalNameChars(cbytes, bytes_available);
147   token_length = ExtendTokenLength(optional_bytes_consumed);
148   result.IncrementBytesConsumed(optional_bytes_consumed);
149   if (result.GetBytesConsumed() == 0 && token_length > 0) {
150     result.SetType(DataMatchResult::kFull);
151   } else if (optional_bytes_consumed < bytes_available) {
152     result.SetType(DataMatchResult::kFull);
153   } else {
154     result.SetType(DataMatchResult::kPartialOutOfData);
155   }
156   return result;
157 }
158 
ScanQuotedString(const char * cbytes,size_t bytes_available,const DataContext & context)159 DataMatchResult DataScanner::ScanQuotedString(const char* cbytes,
160                                               size_t bytes_available,
161                                               const DataContext& context) {
162   const size_t kStart = 0;
163   const size_t kDone = '.';
164   const size_t kSquote = '\'';
165   const size_t kDquote = '"';
166   DataMatchResult result;
167   size_t token_length = token_range_.GetLength();
168   if ((data_ == kStart && token_length != 0) ||
169       (data_ != kStart && data_ != kSquote && data_ != kDquote)) {
170     SetInternalError(context, "Inconsistent state", &result);
171     return result;
172   }
173   if (data_ == kStart) {
174     if (*cbytes != kSquote && *cbytes != kDquote) {
175       SetSyntaxError(context, "Expected start of a quoted string", &result);
176       return result;
177     }
178     data_ = *cbytes++;
179     bytes_available--;
180     result.SetBytesConsumed(1);
181     token_length = ExtendTokenLength(1);
182   }
183   const char* ebytes = reinterpret_cast<const char*>(
184       memchr(cbytes, static_cast<int>(data_), bytes_available));
185   size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available;
186   result.IncrementBytesConsumed(bytes_scanned);
187   token_length = ExtendTokenLength(bytes_scanned);
188   if (bytes_scanned == bytes_available) {
189     result.SetType(DataMatchResult::kPartialOutOfData);
190   } else {
191     result.SetType(DataMatchResult::kFull);
192     result.IncrementBytesConsumed(1);
193     ExtendTokenLength(1);
194     data_ = kDone;
195   }
196   return result;
197 }
198 
ScanSentinel(const char * cbytes,size_t bytes_available,const DataContext & context)199 DataMatchResult DataScanner::ScanSentinel(const char* cbytes,
200                                           size_t bytes_available,
201                                           const DataContext& context) {
202   DataMatchResult result;
203   if (data_ != 0) {
204     SetInternalError(context, "Sentinel already scanned", &result);
205     return result;
206   }
207   char cbyte = *cbytes;
208   for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) {
209     char sentinel = literal_or_sentinels_[index];
210     if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) {
211       ExtendTokenLength(1);
212       result.SetBytesConsumed(1).SetType(DataMatchResult::kFull);
213       data_ = sentinel;
214       break;
215     }
216   }
217   if (result.GetBytesConsumed() == 0) {
218     SetSyntaxError(context, "Expected sentinal character", &result);
219   }
220   return result;
221 }
222 
ScanThroughLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)223 DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes,
224                                                 size_t bytes_available,
225                                                 const DataContext& context) {
226   DataMatchResult result;
227   size_t& scanned_literal_length = data_;
228   if (scanned_literal_length >= literal_or_sentinels_.length()) {
229     SetInternalError(context, "Literal already scanned", &result);
230     return result;
231   }
232   while (bytes_available > 0) {
233     if (scanned_literal_length == 0) {
234       // Literal scan not in progress. Find the first char of the literal.
235       auto* matched_byte = reinterpret_cast<const char*>(
236           memchr(cbytes, literal_or_sentinels_[0], bytes_available));
237       if (matched_byte == nullptr) {
238         // first char not found and chars exhausted.
239         ExtendTokenLength(bytes_available);
240         result.IncrementBytesConsumed(bytes_available);
241         result.SetType(DataMatchResult::kPartialOutOfData);
242         break;
243       } else {
244         // found the first char of the literal.
245         size_t bytes_scanned = (matched_byte - cbytes) + 1;
246         result.IncrementBytesConsumed(bytes_scanned);
247         bytes_available -= bytes_scanned;
248         cbytes += bytes_scanned;
249         ExtendTokenLength(bytes_scanned);
250         scanned_literal_length = 1;
251       }
252     }
253     // check if the rest of the literal is there.
254     size_t bytes_still_needed =
255         literal_or_sentinels_.length() - scanned_literal_length;
256     size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
257     if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes,
258                 bytes_to_compare) == 0) {
259       // Yes, the whole literal is there or chars are exhausted.
260       ExtendTokenLength(bytes_to_compare);
261       scanned_literal_length += bytes_to_compare;
262       result.IncrementBytesConsumed(bytes_to_compare);
263       result.SetType(scanned_literal_length == literal_or_sentinels_.length()
264                          ? DataMatchResult::kFull
265                          : DataMatchResult::kPartialOutOfData);
266       break;
267     }
268     // false alarm, the firsts char of the literal were found, but not the
269     // whole enchilada. Keep searching at one past the first char of the match.
270     scanned_literal_length = 0;
271   }
272   return result;
273 }
274 
ScanWhitespace(const char * cbytes,size_t bytes_available,const DataContext & context)275 DataMatchResult DataScanner::ScanWhitespace(const char* cbytes,
276                                             size_t bytes_available,
277                                             const DataContext& context) {
278   DataMatchResult result;
279   size_t token_length = token_range_.GetLength();
280   result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available));
281   token_length = ExtendTokenLength(result.GetBytesConsumed());
282   if (result.GetBytesConsumed() == 0) {
283     if (token_length == 0 && type_ == kWhitespace) {
284       SetSyntaxError(context, "Expected whitespace", &result);
285     } else {
286       result.SetType(DataMatchResult::kFull);
287     }
288   } else {
289     result.SetType((result.GetBytesConsumed() < bytes_available)
290                        ? DataMatchResult::kFull
291                        : DataMatchResult::kPartialOutOfData);
292   }
293   return result;
294 }
295 
Scan(const DataContext & context)296 DataMatchResult DataScanner::Scan(const DataContext& context) {
297   scan_call_count_ += 1;
298   DataMatchResult result;
299   if (!context.IsValidLocationAndRange()) {
300     SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(),
301                      &result);
302     return result;
303   }
304   if (!token_range_.IsValid()) {
305     token_range_ = DataRange(context.GetLocation(), context.GetLocation());
306   }
307   size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation();
308   const char* cbytes = context.GetCharBytes();
309   switch (type_) {
310     case kLiteral:
311       result = ScanLiteral(cbytes, bytes_available, context);
312       break;
313     case kName:
314       result = ScanName(cbytes, bytes_available, context);
315       break;
316     case kQuotedString:
317       result = ScanQuotedString(cbytes, bytes_available, context);
318       break;
319     case kSentinel:
320       result = ScanSentinel(cbytes, bytes_available, context);
321       break;
322     case kThroughLiteral:
323       result = ScanThroughLiteral(cbytes, bytes_available, context);
324       break;
325     case kWhitespace:
326     case kOptionalWhitespace:
327       result = ScanWhitespace(cbytes, bytes_available, context);
328       break;
329     default:
330       SetInternalError(context, "Undefined scanner type", &result);
331       break;
332   }
333   return result;
334 }
335 
ResetTokenRange()336 void DataScanner::ResetTokenRange() { token_range_ = DataRange(); }
337 
Reset()338 void DataScanner::Reset() {
339   data_ = 0;
340   scan_call_count_ = 0;
341   ResetTokenRange();
342 }
343 
GetDescription() const344 std::string DataScanner::GetDescription() const {
345   std::string description;
346   switch (type_) {
347     case kLiteral:
348       description = "Literal:'";
349       description += literal_or_sentinels_;
350       description += "'";
351       break;
352     case kName:
353       description = "Name";
354       break;
355     case kQuotedString:
356       description = "QuotedString";
357       break;
358     case kSentinel:
359       description = "OneOf:'";
360       description += literal_or_sentinels_;
361       description += "'";
362       break;
363     case kThroughLiteral:
364       description = "ThruLiteral:'";
365       description += literal_or_sentinels_;
366       description += "'";
367       break;
368     case kWhitespace:
369       description = "Whitespace";
370       break;
371     case kOptionalWhitespace:
372       description = "OptionalWhitespace";
373       break;
374   }
375   return description;
376 }
377 
GetLiteral() const378 std::string DataScanner::GetLiteral() const {
379   return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_
380                                                        : "";
381 }
382 
GetSentenels() const383 std::string DataScanner::GetSentenels() const {
384   return type_ == kSentinel ? literal_or_sentinels_ : "";
385 }
386 
GetSentinel() const387 char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; }
388 
389 }  // namespace image_io
390 }  // namespace photos_editing_formats
391