1 #include "image_io/base/data_scanner.h"
2
3 namespace photos_editing_formats {
4 namespace image_io {
5
6 namespace {
7
8 const char kWhitespaceChars[] = " \t\n\r";
9
10 /// This function is like strspn but does not assume a null-terminated string.
memspn(const char * s,size_t slen,const char * accept)11 size_t memspn(const char* s, size_t slen, const char* accept) {
12 const char* p = s;
13 const char* spanp;
14 char c, sc;
15
16 cont:
17 c = *p++;
18 if (slen-- == 0) return p - 1 - s;
19 for (spanp = accept; (sc = *spanp++) != '\0';)
20 if (sc == c) goto cont;
21 return p - 1 - s;
22 }
23
24 /// @return Whether value is in the range [lo:hi].
InRange(char value,char lo,char hi)25 bool InRange(char value, char lo, char hi) {
26 return value >= lo && value <= hi;
27 }
28
29 /// @return Whether the value is the first character of a kName type scanner.
IsFirstNameChar(char value)30 bool IsFirstNameChar(char value) {
31 return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' ||
32 value == ':';
33 }
34
35 /// Scans the characters in the s string, where the characters can be any legal
36 /// character in the name.
37 /// @return The number of name characters scanned.
ScanOptionalNameChars(const char * s,size_t slen)38 size_t ScanOptionalNameChars(const char* s, size_t slen) {
39 const char* kOptionalChars =
40 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:";
41 return memspn(s, slen, kOptionalChars);
42 }
43
44 /// Scans the whitespace characters in the s string.
45 /// @return The number of whitepace characters scanned.
ScanWhitespaceChars(const char * s,size_t slen)46 size_t ScanWhitespaceChars(const char* s, size_t slen) {
47 return memspn(s, slen, kWhitespaceChars);
48 }
49
50 } // namespace
51
GetWhitespaceChars()52 std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; }
53
CreateLiteralScanner(const std::string & literal)54 DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) {
55 return DataScanner(DataScanner::kLiteral, literal);
56 }
57
CreateNameScanner()58 DataScanner DataScanner::CreateNameScanner() {
59 return DataScanner(DataScanner::kName);
60 }
61
CreateQuotedStringScanner()62 DataScanner DataScanner::CreateQuotedStringScanner() {
63 return DataScanner(DataScanner::kQuotedString);
64 }
65
CreateSentinelScanner(const std::string & sentinels)66 DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) {
67 return DataScanner(DataScanner::kSentinel, sentinels);
68 }
69
CreateThroughLiteralScanner(const std::string & literal)70 DataScanner DataScanner::CreateThroughLiteralScanner(
71 const std::string& literal) {
72 return DataScanner(DataScanner::kThroughLiteral, literal);
73 }
74
CreateWhitespaceScanner()75 DataScanner DataScanner::CreateWhitespaceScanner() {
76 return DataScanner(DataScanner::kWhitespace);
77 }
78
CreateOptionalWhitespaceScanner()79 DataScanner DataScanner::CreateOptionalWhitespaceScanner() {
80 return DataScanner(DataScanner::kOptionalWhitespace);
81 }
82
ExtendTokenLength(size_t delta_length)83 size_t DataScanner::ExtendTokenLength(size_t delta_length) {
84 token_range_ =
85 DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length);
86 return token_range_.GetLength();
87 }
88
SetInternalError(const DataContext & context,const std::string & error_description,DataMatchResult * result)89 void DataScanner::SetInternalError(const DataContext& context,
90 const std::string& error_description,
91 DataMatchResult* result) {
92 result->SetType(DataMatchResult::kError);
93 result->SetMessage(
94 Message::kInternalError,
95 context.GetErrorText({}, {GetDescription()}, error_description, ""));
96 }
97
SetSyntaxError(const DataContext & context,const std::string & error_description,DataMatchResult * result)98 void DataScanner::SetSyntaxError(const DataContext& context,
99 const std::string& error_description,
100 DataMatchResult* result) {
101 result->SetType(DataMatchResult::kError);
102 result->SetMessage(Message::kSyntaxError,
103 context.GetErrorText(error_description, GetDescription()));
104 }
105
ScanLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)106 DataMatchResult DataScanner::ScanLiteral(const char* cbytes,
107 size_t bytes_available,
108 const DataContext& context) {
109 DataMatchResult result;
110 size_t token_length = token_range_.GetLength();
111 if (token_length >= literal_or_sentinels_.length()) {
112 SetInternalError(context, "Literal already scanned", &result);
113 return result;
114 }
115 size_t bytes_still_needed = literal_or_sentinels_.length() - token_length;
116 size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
117 if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) ==
118 0) {
119 token_length = ExtendTokenLength(bytes_to_compare);
120 result.SetBytesConsumed(bytes_to_compare);
121 result.SetType(token_length == literal_or_sentinels_.length()
122 ? DataMatchResult::kFull
123 : DataMatchResult::kPartialOutOfData);
124 } else {
125 SetSyntaxError(context, "Expected literal", &result);
126 }
127 return result;
128 }
129
ScanName(const char * cbytes,size_t bytes_available,const DataContext & context)130 DataMatchResult DataScanner::ScanName(const char* cbytes,
131 size_t bytes_available,
132 const DataContext& context) {
133 DataMatchResult result;
134 size_t token_length = token_range_.GetLength();
135 if (token_length == 0) {
136 if (!IsFirstNameChar(*cbytes)) {
137 SetSyntaxError(context, "Expected first character of a name", &result);
138 return result;
139 }
140 token_length = ExtendTokenLength(1);
141 result.SetBytesConsumed(1);
142 bytes_available -= 1;
143 cbytes += 1;
144 }
145 size_t optional_bytes_consumed =
146 ScanOptionalNameChars(cbytes, bytes_available);
147 token_length = ExtendTokenLength(optional_bytes_consumed);
148 result.IncrementBytesConsumed(optional_bytes_consumed);
149 if (result.GetBytesConsumed() == 0 && token_length > 0) {
150 result.SetType(DataMatchResult::kFull);
151 } else if (optional_bytes_consumed < bytes_available) {
152 result.SetType(DataMatchResult::kFull);
153 } else {
154 result.SetType(DataMatchResult::kPartialOutOfData);
155 }
156 return result;
157 }
158
ScanQuotedString(const char * cbytes,size_t bytes_available,const DataContext & context)159 DataMatchResult DataScanner::ScanQuotedString(const char* cbytes,
160 size_t bytes_available,
161 const DataContext& context) {
162 const size_t kStart = 0;
163 const size_t kDone = '.';
164 const size_t kSquote = '\'';
165 const size_t kDquote = '"';
166 DataMatchResult result;
167 size_t token_length = token_range_.GetLength();
168 if ((data_ == kStart && token_length != 0) ||
169 (data_ != kStart && data_ != kSquote && data_ != kDquote)) {
170 SetInternalError(context, "Inconsistent state", &result);
171 return result;
172 }
173 if (data_ == kStart) {
174 if (*cbytes != kSquote && *cbytes != kDquote) {
175 SetSyntaxError(context, "Expected start of a quoted string", &result);
176 return result;
177 }
178 data_ = *cbytes++;
179 bytes_available--;
180 result.SetBytesConsumed(1);
181 token_length = ExtendTokenLength(1);
182 }
183 const char* ebytes = reinterpret_cast<const char*>(
184 memchr(cbytes, static_cast<int>(data_), bytes_available));
185 size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available;
186 result.IncrementBytesConsumed(bytes_scanned);
187 token_length = ExtendTokenLength(bytes_scanned);
188 if (bytes_scanned == bytes_available) {
189 result.SetType(DataMatchResult::kPartialOutOfData);
190 } else {
191 result.SetType(DataMatchResult::kFull);
192 result.IncrementBytesConsumed(1);
193 ExtendTokenLength(1);
194 data_ = kDone;
195 }
196 return result;
197 }
198
ScanSentinel(const char * cbytes,size_t bytes_available,const DataContext & context)199 DataMatchResult DataScanner::ScanSentinel(const char* cbytes,
200 size_t bytes_available,
201 const DataContext& context) {
202 DataMatchResult result;
203 if (data_ != 0) {
204 SetInternalError(context, "Sentinel already scanned", &result);
205 return result;
206 }
207 char cbyte = *cbytes;
208 for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) {
209 char sentinel = literal_or_sentinels_[index];
210 if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) {
211 ExtendTokenLength(1);
212 result.SetBytesConsumed(1).SetType(DataMatchResult::kFull);
213 data_ = sentinel;
214 break;
215 }
216 }
217 if (result.GetBytesConsumed() == 0) {
218 SetSyntaxError(context, "Expected sentinal character", &result);
219 }
220 return result;
221 }
222
ScanThroughLiteral(const char * cbytes,size_t bytes_available,const DataContext & context)223 DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes,
224 size_t bytes_available,
225 const DataContext& context) {
226 DataMatchResult result;
227 size_t& scanned_literal_length = data_;
228 if (scanned_literal_length >= literal_or_sentinels_.length()) {
229 SetInternalError(context, "Literal already scanned", &result);
230 return result;
231 }
232 while (bytes_available > 0) {
233 if (scanned_literal_length == 0) {
234 // Literal scan not in progress. Find the first char of the literal.
235 auto* matched_byte = reinterpret_cast<const char*>(
236 memchr(cbytes, literal_or_sentinels_[0], bytes_available));
237 if (matched_byte == nullptr) {
238 // first char not found and chars exhausted.
239 ExtendTokenLength(bytes_available);
240 result.IncrementBytesConsumed(bytes_available);
241 result.SetType(DataMatchResult::kPartialOutOfData);
242 break;
243 } else {
244 // found the first char of the literal.
245 size_t bytes_scanned = (matched_byte - cbytes) + 1;
246 result.IncrementBytesConsumed(bytes_scanned);
247 bytes_available -= bytes_scanned;
248 cbytes += bytes_scanned;
249 ExtendTokenLength(bytes_scanned);
250 scanned_literal_length = 1;
251 }
252 }
253 // check if the rest of the literal is there.
254 size_t bytes_still_needed =
255 literal_or_sentinels_.length() - scanned_literal_length;
256 size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available);
257 if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes,
258 bytes_to_compare) == 0) {
259 // Yes, the whole literal is there or chars are exhausted.
260 ExtendTokenLength(bytes_to_compare);
261 scanned_literal_length += bytes_to_compare;
262 result.IncrementBytesConsumed(bytes_to_compare);
263 result.SetType(scanned_literal_length == literal_or_sentinels_.length()
264 ? DataMatchResult::kFull
265 : DataMatchResult::kPartialOutOfData);
266 break;
267 }
268 // false alarm, the firsts char of the literal were found, but not the
269 // whole enchilada. Keep searching at one past the first char of the match.
270 scanned_literal_length = 0;
271 }
272 return result;
273 }
274
ScanWhitespace(const char * cbytes,size_t bytes_available,const DataContext & context)275 DataMatchResult DataScanner::ScanWhitespace(const char* cbytes,
276 size_t bytes_available,
277 const DataContext& context) {
278 DataMatchResult result;
279 size_t token_length = token_range_.GetLength();
280 result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available));
281 token_length = ExtendTokenLength(result.GetBytesConsumed());
282 if (result.GetBytesConsumed() == 0) {
283 if (token_length == 0 && type_ == kWhitespace) {
284 SetSyntaxError(context, "Expected whitespace", &result);
285 } else {
286 result.SetType(DataMatchResult::kFull);
287 }
288 } else {
289 result.SetType((result.GetBytesConsumed() < bytes_available)
290 ? DataMatchResult::kFull
291 : DataMatchResult::kPartialOutOfData);
292 }
293 return result;
294 }
295
Scan(const DataContext & context)296 DataMatchResult DataScanner::Scan(const DataContext& context) {
297 scan_call_count_ += 1;
298 DataMatchResult result;
299 if (!context.IsValidLocationAndRange()) {
300 SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(),
301 &result);
302 return result;
303 }
304 if (!token_range_.IsValid()) {
305 token_range_ = DataRange(context.GetLocation(), context.GetLocation());
306 }
307 size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation();
308 const char* cbytes = context.GetCharBytes();
309 switch (type_) {
310 case kLiteral:
311 result = ScanLiteral(cbytes, bytes_available, context);
312 break;
313 case kName:
314 result = ScanName(cbytes, bytes_available, context);
315 break;
316 case kQuotedString:
317 result = ScanQuotedString(cbytes, bytes_available, context);
318 break;
319 case kSentinel:
320 result = ScanSentinel(cbytes, bytes_available, context);
321 break;
322 case kThroughLiteral:
323 result = ScanThroughLiteral(cbytes, bytes_available, context);
324 break;
325 case kWhitespace:
326 case kOptionalWhitespace:
327 result = ScanWhitespace(cbytes, bytes_available, context);
328 break;
329 default:
330 SetInternalError(context, "Undefined scanner type", &result);
331 break;
332 }
333 return result;
334 }
335
ResetTokenRange()336 void DataScanner::ResetTokenRange() { token_range_ = DataRange(); }
337
Reset()338 void DataScanner::Reset() {
339 data_ = 0;
340 scan_call_count_ = 0;
341 ResetTokenRange();
342 }
343
GetDescription() const344 std::string DataScanner::GetDescription() const {
345 std::string description;
346 switch (type_) {
347 case kLiteral:
348 description = "Literal:'";
349 description += literal_or_sentinels_;
350 description += "'";
351 break;
352 case kName:
353 description = "Name";
354 break;
355 case kQuotedString:
356 description = "QuotedString";
357 break;
358 case kSentinel:
359 description = "OneOf:'";
360 description += literal_or_sentinels_;
361 description += "'";
362 break;
363 case kThroughLiteral:
364 description = "ThruLiteral:'";
365 description += literal_or_sentinels_;
366 description += "'";
367 break;
368 case kWhitespace:
369 description = "Whitespace";
370 break;
371 case kOptionalWhitespace:
372 description = "OptionalWhitespace";
373 break;
374 }
375 return description;
376 }
377
GetLiteral() const378 std::string DataScanner::GetLiteral() const {
379 return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_
380 : "";
381 }
382
GetSentenels() const383 std::string DataScanner::GetSentenels() const {
384 return type_ == kSentinel ? literal_or_sentinels_ : "";
385 }
386
GetSentinel() const387 char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; }
388
389 } // namespace image_io
390 } // namespace photos_editing_formats
391