• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 The Tint Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/reader/wgsl/lexer.h"
16 
17 #include <cmath>
18 #include <cstring>
19 #include <limits>
20 #include <utility>
21 
22 #include "src/debug.h"
23 
24 namespace tint {
25 namespace reader {
26 namespace wgsl {
27 namespace {
28 
is_whitespace(char c)29 bool is_whitespace(char c) {
30   return std::isspace(c);
31 }
32 
dec_value(char c)33 uint32_t dec_value(char c) {
34   if (c >= '0' && c <= '9') {
35     return static_cast<uint32_t>(c - '0');
36   }
37   return 0;
38 }
39 
hex_value(char c)40 uint32_t hex_value(char c) {
41   if (c >= '0' && c <= '9') {
42     return static_cast<uint32_t>(c - '0');
43   }
44   if (c >= 'a' && c <= 'f') {
45     return 0xA + static_cast<uint32_t>(c - 'a');
46   }
47   if (c >= 'A' && c <= 'F') {
48     return 0xA + static_cast<uint32_t>(c - 'A');
49   }
50   return 0;
51 }
52 
53 }  // namespace
54 
Lexer(const std::string & file_path,const Source::FileContent * content)55 Lexer::Lexer(const std::string& file_path, const Source::FileContent* content)
56     : file_path_(file_path),
57       content_(content),
58       len_(static_cast<uint32_t>(content->data.size())),
59       location_{1, 1} {}
60 
61 Lexer::~Lexer() = default;
62 
next()63 Token Lexer::next() {
64   auto t = skip_whitespace_and_comments();
65   if (!t.IsUninitialized()) {
66     return t;
67   }
68 
69   t = try_hex_float();
70   if (!t.IsUninitialized()) {
71     return t;
72   }
73 
74   t = try_hex_integer();
75   if (!t.IsUninitialized()) {
76     return t;
77   }
78 
79   t = try_float();
80   if (!t.IsUninitialized()) {
81     return t;
82   }
83 
84   t = try_integer();
85   if (!t.IsUninitialized()) {
86     return t;
87   }
88 
89   t = try_ident();
90   if (!t.IsUninitialized()) {
91     return t;
92   }
93 
94   t = try_punctuation();
95   if (!t.IsUninitialized()) {
96     return t;
97   }
98 
99   return {Token::Type::kError, begin_source(), "invalid character found"};
100 }
101 
begin_source() const102 Source Lexer::begin_source() const {
103   Source src{};
104   src.file_path = file_path_;
105   src.file_content = content_;
106   src.range.begin = location_;
107   src.range.end = location_;
108   return src;
109 }
110 
end_source(Source & src) const111 void Lexer::end_source(Source& src) const {
112   src.range.end = location_;
113 }
114 
is_eof() const115 bool Lexer::is_eof() const {
116   return pos_ >= len_;
117 }
118 
is_alpha(char ch) const119 bool Lexer::is_alpha(char ch) const {
120   return std::isalpha(ch);
121 }
122 
is_digit(char ch) const123 bool Lexer::is_digit(char ch) const {
124   return std::isdigit(ch);
125 }
126 
is_alphanum_underscore(char ch) const127 bool Lexer::is_alphanum_underscore(char ch) const {
128   return is_alpha(ch) || is_digit(ch) || ch == '_';
129 }
130 
is_hex(char ch) const131 bool Lexer::is_hex(char ch) const {
132   return std::isxdigit(ch);
133 }
134 
matches(size_t pos,const std::string & substr)135 bool Lexer::matches(size_t pos, const std::string& substr) {
136   if (pos >= len_)
137     return false;
138   return content_->data.substr(pos, substr.size()) == substr;
139 }
140 
skip_whitespace_and_comments()141 Token Lexer::skip_whitespace_and_comments() {
142   for (;;) {
143     auto pos = pos_;
144     while (!is_eof() && is_whitespace(content_->data[pos_])) {
145       if (matches(pos_, "\n")) {
146         pos_++;
147         location_.line++;
148         location_.column = 1;
149         continue;
150       }
151 
152       pos_++;
153       location_.column++;
154     }
155 
156     auto t = skip_comment();
157     if (!t.IsUninitialized()) {
158       return t;
159     }
160 
161     // If the cursor didn't advance we didn't remove any whitespace
162     // so we're done.
163     if (pos == pos_)
164       break;
165   }
166   if (is_eof()) {
167     return {Token::Type::kEOF, begin_source()};
168   }
169 
170   return {};
171 }
172 
skip_comment()173 Token Lexer::skip_comment() {
174   if (matches(pos_, "//")) {
175     // Line comment: ignore everything until the end of line
176     // or end of input.
177     while (!is_eof() && !matches(pos_, "\n")) {
178       pos_++;
179       location_.column++;
180     }
181     return {};
182   }
183 
184   if (matches(pos_, "/*")) {
185     // Block comment: ignore everything until the closing '*/' token.
186 
187     // Record source location of the initial '/*'
188     auto source = begin_source();
189     source.range.end.column += 1;
190 
191     pos_ += 2;
192     location_.column += 2;
193 
194     int depth = 1;
195     while (!is_eof() && depth > 0) {
196       if (matches(pos_, "/*")) {
197         // Start of block comment: increase nesting depth.
198         pos_ += 2;
199         location_.column += 2;
200         depth++;
201       } else if (matches(pos_, "*/")) {
202         // End of block comment: decrease nesting depth.
203         pos_ += 2;
204         location_.column += 2;
205         depth--;
206       } else if (matches(pos_, "\n")) {
207         // Newline: skip and update source location.
208         pos_++;
209         location_.line++;
210         location_.column = 1;
211       } else {
212         // Anything else: skip and update source location.
213         pos_++;
214         location_.column++;
215       }
216     }
217     if (depth > 0) {
218       return {Token::Type::kError, source, "unterminated block comment"};
219     }
220   }
221   return {};
222 }
223 
try_float()224 Token Lexer::try_float() {
225   auto start = pos_;
226   auto end = pos_;
227 
228   auto source = begin_source();
229   bool has_mantissa_digits = false;
230 
231   if (matches(end, "-")) {
232     end++;
233   }
234   while (end < len_ && is_digit(content_->data[end])) {
235     has_mantissa_digits = true;
236     end++;
237   }
238 
239   bool has_point = false;
240   if (end < len_ && matches(end, ".")) {
241     has_point = true;
242     end++;
243   }
244 
245   while (end < len_ && is_digit(content_->data[end])) {
246     has_mantissa_digits = true;
247     end++;
248   }
249 
250   if (!has_mantissa_digits) {
251     return {};
252   }
253 
254   // Parse the exponent if one exists
255   bool has_exponent = false;
256   if (end < len_ && (matches(end, "e") || matches(end, "E"))) {
257     end++;
258     if (end < len_ && (matches(end, "+") || matches(end, "-"))) {
259       end++;
260     }
261 
262     while (end < len_ && isdigit(content_->data[end])) {
263       has_exponent = true;
264       end++;
265     }
266 
267     // If an 'e' or 'E' was present, then the number part must also be present.
268     if (!has_exponent) {
269       const auto str = content_->data.substr(start, end - start);
270       return {Token::Type::kError, source,
271               "incomplete exponent for floating point literal: " + str};
272     }
273   }
274 
275   bool has_f_suffix = false;
276   if (end < len_ && matches(end, "f")) {
277     end++;
278     has_f_suffix = true;
279   }
280 
281   if (!has_point && !has_exponent && !has_f_suffix) {
282     // If it only has digits then it's an integer.
283     return {};
284   }
285 
286   // Save the error string, for use by diagnostics.
287   const auto str = content_->data.substr(start, end - start);
288 
289   pos_ = end;
290   location_.column += (end - start);
291 
292   end_source(source);
293 
294   auto res = strtod(content_->data.c_str() + start, nullptr);
295   // This errors out if a non-zero magnitude is too small to represent in a
296   // float. It can't be represented faithfully in an f32.
297   const auto magnitude = std::fabs(res);
298   if (0.0 < magnitude &&
299       magnitude < static_cast<double>(std::numeric_limits<float>::min())) {
300     return {Token::Type::kError, source,
301             "f32 (" + str + ") magnitude too small, not representable"};
302   }
303   // This handles if the number is really large negative number
304   if (res < static_cast<double>(std::numeric_limits<float>::lowest())) {
305     return {Token::Type::kError, source,
306             "f32 (" + str + ") too large (negative)"};
307   }
308   if (res > static_cast<double>(std::numeric_limits<float>::max())) {
309     return {Token::Type::kError, source,
310             "f32 (" + str + ") too large (positive)"};
311   }
312 
313   return {source, static_cast<float>(res)};
314 }
315 
try_hex_float()316 Token Lexer::try_hex_float() {
317   constexpr uint32_t kTotalBits = 32;
318   constexpr uint32_t kTotalMsb = kTotalBits - 1;
319   constexpr uint32_t kMantissaBits = 23;
320   constexpr uint32_t kMantissaMsb = kMantissaBits - 1;
321   constexpr uint32_t kMantissaShiftRight = kTotalBits - kMantissaBits;
322   constexpr int32_t kExponentBias = 127;
323   constexpr int32_t kExponentMax = 255;
324   constexpr uint32_t kExponentBits = 8;
325   constexpr uint32_t kExponentMask = (1 << kExponentBits) - 1;
326   constexpr uint32_t kExponentLeftShift = kMantissaBits;
327   constexpr uint32_t kSignBit = 31;
328 
329   auto start = pos_;
330   auto end = pos_;
331 
332   auto source = begin_source();
333 
334   // clang-format off
335   // -?0x([0-9a-fA-F]*.?[0-9a-fA-F]+ | [0-9a-fA-F]+.[0-9a-fA-F]*)(p|P)(+|-)?[0-9]+  // NOLINT
336   // clang-format on
337 
338   // -?
339   int32_t sign_bit = 0;
340   if (matches(end, "-")) {
341     sign_bit = 1;
342     end++;
343   }
344   // 0x
345   if (matches(end, "0x")) {
346     end += 2;
347   } else {
348     return {};
349   }
350 
351   uint32_t mantissa = 0;
352   uint32_t exponent = 0;
353 
354   // TODO(dneto): Values in the normal range for the format do not explicitly
355   // store the most significant bit.  The algorithm here works hard to eliminate
356   // that bit in the representation during parsing, and then it backtracks
357   // when it sees it may have to explicitly represent it, and backtracks again
358   // when it sees the number is sub-normal (i.e. the exponent underflows).
359   // I suspect the logic can be clarified by storing it during parsing, and
360   // then removing it later only when needed.
361 
362   // `set_next_mantissa_bit_to` sets next `mantissa` bit starting from msb to
363   // lsb to value 1 if `set` is true, 0 otherwise. Returns true on success, i.e.
364   // when the bit can be accommodated in the available space.
365   uint32_t mantissa_next_bit = kTotalMsb;
366   auto set_next_mantissa_bit_to = [&](bool set, bool integer_part) -> bool {
367     // If adding bits for the integer part, we can overflow whether we set the
368     // bit or not. For the fractional part, we can only overflow when setting
369     // the bit.
370     const bool check_overflow = integer_part || set;
371     // Note: mantissa_next_bit actually decrements, so comparing it as
372     // larger than a positive number relies on wraparound.
373     if (check_overflow && (mantissa_next_bit > kTotalMsb)) {
374       return false;  // Overflowed mantissa
375     }
376     if (set) {
377       mantissa |= (1 << mantissa_next_bit);
378     }
379     --mantissa_next_bit;
380     return true;
381   };
382 
383   // Collect integer range (if any)
384   auto integer_range = std::make_pair(end, end);
385   while (end < len_ && is_hex(content_->data[end])) {
386     integer_range.second = ++end;
387   }
388 
389   // .?
390   bool hex_point = false;
391   if (matches(end, ".")) {
392     hex_point = true;
393     end++;
394   }
395 
396   // Collect fractional range (if any)
397   auto fractional_range = std::make_pair(end, end);
398   while (end < len_ && is_hex(content_->data[end])) {
399     fractional_range.second = ++end;
400   }
401 
402   // Must have at least an integer or fractional part
403   if ((integer_range.first == integer_range.second) &&
404       (fractional_range.first == fractional_range.second)) {
405     return {};
406   }
407 
408   // Is the binary exponent present?  It's optional.
409   const bool has_exponent = (matches(end, "p") || matches(end, "P"));
410   if (has_exponent) {
411     end++;
412   }
413   if (!has_exponent && !hex_point) {
414     // It's not a hex float. At best it's a hex integer.
415     return {};
416   }
417 
418   // At this point, we know for sure our token is a hex float value,
419   // or an invalid token.
420 
421   // Parse integer part
422   // [0-9a-fA-F]*
423 
424   bool has_zero_integer = true;
425   // The magnitude is zero if and only if seen_prior_one_bits is false.
426   bool seen_prior_one_bits = false;
427   for (auto i = integer_range.first; i < integer_range.second; ++i) {
428     const auto nibble = hex_value(content_->data[i]);
429     if (nibble != 0) {
430       has_zero_integer = false;
431     }
432 
433     for (int32_t bit = 3; bit >= 0; --bit) {
434       auto v = 1 & (nibble >> bit);
435 
436       // Skip leading 0s and the first 1
437       if (seen_prior_one_bits) {
438         if (!set_next_mantissa_bit_to(v != 0, true)) {
439           return {Token::Type::kError, source,
440                   "mantissa is too large for hex float"};
441         }
442         ++exponent;
443       } else {
444         if (v == 1) {
445           seen_prior_one_bits = true;
446         }
447       }
448     }
449   }
450 
451   // Parse fractional part
452   // [0-9a-fA-F]*
453   for (auto i = fractional_range.first; i < fractional_range.second; ++i) {
454     auto nibble = hex_value(content_->data[i]);
455     for (int32_t bit = 3; bit >= 0; --bit) {
456       auto v = 1 & (nibble >> bit);
457 
458       if (v == 1) {
459         seen_prior_one_bits = true;
460       }
461 
462       // If integer part is 0, we only start writing bits to the
463       // mantissa once we have a non-zero fractional bit. While the fractional
464       // values are 0, we adjust the exponent to avoid overflowing `mantissa`.
465       if (!seen_prior_one_bits) {
466         --exponent;
467       } else {
468         if (!set_next_mantissa_bit_to(v != 0, false)) {
469           return {Token::Type::kError, source,
470                   "mantissa is too large for hex float"};
471         }
472       }
473     }
474   }
475 
476   // Determine if the value of the mantissa is zero.
477   // Note: it's not enough to check mantissa == 0 as we drop the initial bit,
478   // whether it's in the integer part or the fractional part.
479   const bool is_zero = !seen_prior_one_bits;
480   TINT_ASSERT(Reader, !is_zero || mantissa == 0);
481 
482   // Parse the optional exponent.
483   // ((p|P)(\+|-)?[0-9]+)?
484   uint32_t input_exponent = 0;  // Defaults to 0 if not present
485   int32_t exponent_sign = 1;
486   // If the 'p' part is present, the rest of the exponent must exist.
487   if (has_exponent) {
488     // Parse the rest of the exponent.
489     // (+|-)?
490     if (matches(end, "+")) {
491       end++;
492     } else if (matches(end, "-")) {
493       exponent_sign = -1;
494       end++;
495     }
496 
497     // Parse exponent from input
498     // [0-9]+
499     // Allow overflow (in uint32_t) when the floating point value magnitude is
500     // zero.
501     bool has_exponent_digits = false;
502     while (end < len_ && isdigit(content_->data[end])) {
503       has_exponent_digits = true;
504       auto prev_exponent = input_exponent;
505       input_exponent = (input_exponent * 10) + dec_value(content_->data[end]);
506       // Check if we've overflowed input_exponent. This only matters when
507       // the mantissa is non-zero.
508       if (!is_zero && (prev_exponent > input_exponent)) {
509         return {Token::Type::kError, source,
510                 "exponent is too large for hex float"};
511       }
512       end++;
513     }
514 
515     // Parse optional 'f' suffix.  For a hex float, it can only exist
516     // when the exponent is present. Otherwise it will look like
517     // one of the mantissa digits.
518     if (end < len_ && matches(end, "f")) {
519       end++;
520     }
521 
522     if (!has_exponent_digits) {
523       return {Token::Type::kError, source,
524               "expected an exponent value for hex float"};
525     }
526   }
527 
528   pos_ = end;
529   location_.column += (end - start);
530   end_source(source);
531 
532   if (is_zero) {
533     // If value is zero, then ignore the exponent and produce a zero
534     exponent = 0;
535   } else {
536     // Ensure input exponent is not too large; i.e. that it won't overflow when
537     // adding the exponent bias.
538     const uint32_t kIntMax =
539         static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
540     const uint32_t kMaxInputExponent = kIntMax - kExponentBias;
541     if (input_exponent > kMaxInputExponent) {
542       return {Token::Type::kError, source,
543               "exponent is too large for hex float"};
544     }
545 
546     // Compute exponent so far
547     exponent += static_cast<uint32_t>(static_cast<int32_t>(input_exponent) *
548                                       exponent_sign);
549 
550     // Bias exponent if non-zero
551     // After this, if exponent is <= 0, our value is a denormal
552     exponent += kExponentBias;
553 
554     // We know the number is not zero.  The MSB is 1 (by construction), and
555     // should be eliminated because it becomes the implicit 1 that isn't
556     // explicitly represented in the binary32 format.  We'll bring it back
557     // later if we find the exponent actually underflowed, i.e. the number
558     // is sub-normal.
559     if (has_zero_integer) {
560       mantissa <<= 1;
561       --exponent;
562     }
563   }
564 
565   // We can now safely work with exponent as a signed quantity, as there's no
566   // chance to overflow
567   int32_t signed_exponent = static_cast<int32_t>(exponent);
568 
569   // Shift mantissa to occupy the low 23 bits
570   mantissa >>= kMantissaShiftRight;
571 
572   // If denormal, shift mantissa until our exponent is zero
573   if (!is_zero) {
574     // Denorm has exponent 0 and non-zero mantissa. We set the top bit here,
575     // then shift the mantissa to make exponent zero.
576     if (signed_exponent <= 0) {
577       mantissa >>= 1;
578       mantissa |= (1 << kMantissaMsb);
579     }
580 
581     while (signed_exponent < 0) {
582       mantissa >>= 1;
583       ++signed_exponent;
584 
585       // If underflow, clamp to zero
586       if (mantissa == 0) {
587         signed_exponent = 0;
588       }
589     }
590   }
591 
592   if (signed_exponent > kExponentMax) {
593     // Overflow: set to infinity
594     signed_exponent = kExponentMax;
595     mantissa = 0;
596   } else if (signed_exponent == kExponentMax && mantissa != 0) {
597     // NaN: set to infinity
598     mantissa = 0;
599   }
600 
601   // Combine sign, mantissa, and exponent
602   uint32_t result_u32 = sign_bit << kSignBit;
603   result_u32 |= mantissa;
604   result_u32 |= (static_cast<uint32_t>(signed_exponent) & kExponentMask)
605                 << kExponentLeftShift;
606 
607   // Reinterpret as float and return
608   float result;
609   std::memcpy(&result, &result_u32, sizeof(result));
610   return {source, static_cast<float>(result)};
611 }
612 
build_token_from_int_if_possible(Source source,size_t start,size_t end,int32_t base)613 Token Lexer::build_token_from_int_if_possible(Source source,
614                                               size_t start,
615                                               size_t end,
616                                               int32_t base) {
617   auto res = strtoll(content_->data.c_str() + start, nullptr, base);
618   if (matches(pos_, "u")) {
619     if (static_cast<uint64_t>(res) >
620         static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
621       return {
622           Token::Type::kError, source,
623           "u32 (" + content_->data.substr(start, end - start) + ") too large"};
624     }
625     pos_ += 1;
626     location_.column += 1;
627     end_source(source);
628     return {source, static_cast<uint32_t>(res)};
629   }
630 
631   if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
632     return {
633         Token::Type::kError, source,
634         "i32 (" + content_->data.substr(start, end - start) + ") too small"};
635   }
636   if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
637     return {
638         Token::Type::kError, source,
639         "i32 (" + content_->data.substr(start, end - start) + ") too large"};
640   }
641   end_source(source);
642   return {source, static_cast<int32_t>(res)};
643 }
644 
try_hex_integer()645 Token Lexer::try_hex_integer() {
646   constexpr size_t kMaxDigits = 8;  // Valid for both 32-bit integer types
647   auto start = pos_;
648   auto end = pos_;
649 
650   auto source = begin_source();
651 
652   if (matches(end, "-")) {
653     end++;
654   }
655 
656   if (!matches(end, "0x")) {
657     return {};
658   }
659   end += 2;
660 
661   auto first = end;
662   while (!is_eof() && is_hex(content_->data[end])) {
663     end++;
664 
665     auto digits = end - first;
666     if (digits > kMaxDigits) {
667       return {Token::Type::kError, source,
668               "integer literal (" +
669                   content_->data.substr(start, end - 1 - start) +
670                   "...) has too many digits"};
671     }
672   }
673 
674   pos_ = end;
675   location_.column += (end - start);
676 
677   return build_token_from_int_if_possible(source, start, end, 16);
678 }
679 
try_integer()680 Token Lexer::try_integer() {
681   constexpr size_t kMaxDigits = 10;  // Valid for both 32-bit integer types
682   auto start = pos_;
683   auto end = start;
684 
685   auto source = begin_source();
686 
687   if (matches(end, "-")) {
688     end++;
689   }
690 
691   if (end >= len_ || !is_digit(content_->data[end])) {
692     return {};
693   }
694 
695   auto first = end;
696   // If the first digit is a zero this must only be zero as leading zeros
697   // are not allowed.
698   auto next = first + 1;
699   if (next < len_) {
700     if (content_->data[first] == '0' && is_digit(content_->data[next])) {
701       return {Token::Type::kError, source,
702               "integer literal (" +
703                   content_->data.substr(start, end - 1 - start) +
704                   "...) has leading 0s"};
705     }
706   }
707 
708   while (end < len_ && is_digit(content_->data[end])) {
709     auto digits = end - first;
710     if (digits > kMaxDigits) {
711       return {Token::Type::kError, source,
712               "integer literal (" +
713                   content_->data.substr(start, end - 1 - start) +
714                   "...) has too many digits"};
715     }
716 
717     end++;
718   }
719 
720   pos_ = end;
721   location_.column += (end - start);
722 
723   return build_token_from_int_if_possible(source, start, end, 10);
724 }
725 
try_ident()726 Token Lexer::try_ident() {
727   // Must begin with an a-zA-Z_
728   if (!(is_alpha(content_->data[pos_]) || content_->data[pos_] == '_')) {
729     return {};
730   }
731 
732   auto source = begin_source();
733 
734   auto s = pos_;
735   while (!is_eof() && is_alphanum_underscore(content_->data[pos_])) {
736     pos_++;
737     location_.column++;
738   }
739 
740   if (content_->data[s] == '_') {
741     // Check for an underscore on its own (special token), or a
742     // double-underscore (not allowed).
743     if ((pos_ == s + 1) || (content_->data[s + 1] == '_')) {
744       location_.column -= (pos_ - s);
745       pos_ = s;
746       return {};
747     }
748   }
749 
750   auto str = content_->data.substr(s, pos_ - s);
751   end_source(source);
752 
753   auto t = check_keyword(source, str);
754   if (!t.IsUninitialized()) {
755     return t;
756   }
757 
758   return {Token::Type::kIdentifier, source, str};
759 }
760 
try_punctuation()761 Token Lexer::try_punctuation() {
762   auto source = begin_source();
763   auto type = Token::Type::kUninitialized;
764 
765   if (matches(pos_, "[[")) {
766     type = Token::Type::kAttrLeft;
767     pos_ += 2;
768     location_.column += 2;
769   } else if (matches(pos_, "]]")) {
770     type = Token::Type::kAttrRight;
771     pos_ += 2;
772     location_.column += 2;
773   } else if (matches(pos_, "(")) {
774     type = Token::Type::kParenLeft;
775     pos_ += 1;
776     location_.column += 1;
777   } else if (matches(pos_, ")")) {
778     type = Token::Type::kParenRight;
779     pos_ += 1;
780     location_.column += 1;
781   } else if (matches(pos_, "[")) {
782     type = Token::Type::kBracketLeft;
783     pos_ += 1;
784     location_.column += 1;
785   } else if (matches(pos_, "]")) {
786     type = Token::Type::kBracketRight;
787     pos_ += 1;
788     location_.column += 1;
789   } else if (matches(pos_, "{")) {
790     type = Token::Type::kBraceLeft;
791     pos_ += 1;
792     location_.column += 1;
793   } else if (matches(pos_, "}")) {
794     type = Token::Type::kBraceRight;
795     pos_ += 1;
796     location_.column += 1;
797   } else if (matches(pos_, "&&")) {
798     type = Token::Type::kAndAnd;
799     pos_ += 2;
800     location_.column += 2;
801   } else if (matches(pos_, "&")) {
802     type = Token::Type::kAnd;
803     pos_ += 1;
804     location_.column += 1;
805   } else if (matches(pos_, "/")) {
806     type = Token::Type::kForwardSlash;
807     pos_ += 1;
808     location_.column += 1;
809   } else if (matches(pos_, "!=")) {
810     type = Token::Type::kNotEqual;
811     pos_ += 2;
812     location_.column += 2;
813   } else if (matches(pos_, "!")) {
814     type = Token::Type::kBang;
815     pos_ += 1;
816     location_.column += 1;
817   } else if (matches(pos_, ":")) {
818     type = Token::Type::kColon;
819     pos_ += 1;
820     location_.column += 1;
821   } else if (matches(pos_, ",")) {
822     type = Token::Type::kComma;
823     pos_ += 1;
824     location_.column += 1;
825   } else if (matches(pos_, "==")) {
826     type = Token::Type::kEqualEqual;
827     pos_ += 2;
828     location_.column += 2;
829   } else if (matches(pos_, "=")) {
830     type = Token::Type::kEqual;
831     pos_ += 1;
832     location_.column += 1;
833   } else if (matches(pos_, ">=")) {
834     type = Token::Type::kGreaterThanEqual;
835     pos_ += 2;
836     location_.column += 2;
837   } else if (matches(pos_, ">>")) {
838     type = Token::Type::kShiftRight;
839     pos_ += 2;
840     location_.column += 2;
841   } else if (matches(pos_, ">")) {
842     type = Token::Type::kGreaterThan;
843     pos_ += 1;
844     location_.column += 1;
845   } else if (matches(pos_, "<=")) {
846     type = Token::Type::kLessThanEqual;
847     pos_ += 2;
848     location_.column += 2;
849   } else if (matches(pos_, "<<")) {
850     type = Token::Type::kShiftLeft;
851     pos_ += 2;
852     location_.column += 2;
853   } else if (matches(pos_, "<")) {
854     type = Token::Type::kLessThan;
855     pos_ += 1;
856     location_.column += 1;
857   } else if (matches(pos_, "%")) {
858     type = Token::Type::kMod;
859     pos_ += 1;
860     location_.column += 1;
861   } else if (matches(pos_, "->")) {
862     type = Token::Type::kArrow;
863     pos_ += 2;
864     location_.column += 2;
865   } else if (matches(pos_, "--")) {
866     type = Token::Type::kMinusMinus;
867     pos_ += 2;
868     location_.column += 2;
869   } else if (matches(pos_, "-")) {
870     type = Token::Type::kMinus;
871     pos_ += 1;
872     location_.column += 1;
873   } else if (matches(pos_, ".")) {
874     type = Token::Type::kPeriod;
875     pos_ += 1;
876     location_.column += 1;
877   } else if (matches(pos_, "++")) {
878     type = Token::Type::kPlusPlus;
879     pos_ += 2;
880     location_.column += 2;
881   } else if (matches(pos_, "+")) {
882     type = Token::Type::kPlus;
883     pos_ += 1;
884     location_.column += 1;
885   } else if (matches(pos_, "||")) {
886     type = Token::Type::kOrOr;
887     pos_ += 2;
888     location_.column += 2;
889   } else if (matches(pos_, "|")) {
890     type = Token::Type::kOr;
891     pos_ += 1;
892     location_.column += 1;
893   } else if (matches(pos_, ";")) {
894     type = Token::Type::kSemicolon;
895     pos_ += 1;
896     location_.column += 1;
897   } else if (matches(pos_, "*")) {
898     type = Token::Type::kStar;
899     pos_ += 1;
900     location_.column += 1;
901   } else if (matches(pos_, "~")) {
902     type = Token::Type::kTilde;
903     pos_ += 1;
904     location_.column += 1;
905   } else if (matches(pos_, "_")) {
906     type = Token::Type::kUnderscore;
907     pos_ += 1;
908     location_.column += 1;
909   } else if (matches(pos_, "^")) {
910     type = Token::Type::kXor;
911     pos_ += 1;
912     location_.column += 1;
913   }
914 
915   end_source(source);
916 
917   return {type, source};
918 }
919 
check_keyword(const Source & source,const std::string & str)920 Token Lexer::check_keyword(const Source& source, const std::string& str) {
921   if (str == "array")
922     return {Token::Type::kArray, source, "array"};
923   if (str == "atomic")
924     return {Token::Type::kAtomic, source, "atomic"};
925   if (str == "bitcast")
926     return {Token::Type::kBitcast, source, "bitcast"};
927   if (str == "bool")
928     return {Token::Type::kBool, source, "bool"};
929   if (str == "break")
930     return {Token::Type::kBreak, source, "break"};
931   if (str == "case")
932     return {Token::Type::kCase, source, "case"};
933   if (str == "continue")
934     return {Token::Type::kContinue, source, "continue"};
935   if (str == "continuing")
936     return {Token::Type::kContinuing, source, "continuing"};
937   if (str == "discard")
938     return {Token::Type::kDiscard, source, "discard"};
939   if (str == "default")
940     return {Token::Type::kDefault, source, "default"};
941   if (str == "else")
942     return {Token::Type::kElse, source, "else"};
943   if (str == "elseif")
944     return {Token::Type::kElseIf, source, "elseif"};
945   if (str == "f32")
946     return {Token::Type::kF32, source, "f32"};
947   if (str == "fallthrough")
948     return {Token::Type::kFallthrough, source, "fallthrough"};
949   if (str == "false")
950     return {Token::Type::kFalse, source, "false"};
951   if (str == "fn")
952     return {Token::Type::kFn, source, "fn"};
953   if (str == "for")
954     return {Token::Type::kFor, source, "for"};
955   if (str == "bgra8unorm")
956     return {Token::Type::kFormatBgra8Unorm, source, "bgra8unorm"};
957   if (str == "bgra8unorm_srgb")
958     return {Token::Type::kFormatBgra8UnormSrgb, source, "bgra8unorm_srgb"};
959   if (str == "r16float")
960     return {Token::Type::kFormatR16Float, source, "r16float"};
961   if (str == "r16sint")
962     return {Token::Type::kFormatR16Sint, source, "r16sint"};
963   if (str == "r16uint")
964     return {Token::Type::kFormatR16Uint, source, "r16uint"};
965   if (str == "r32float")
966     return {Token::Type::kFormatR32Float, source, "r32float"};
967   if (str == "r32sint")
968     return {Token::Type::kFormatR32Sint, source, "r32sint"};
969   if (str == "r32uint")
970     return {Token::Type::kFormatR32Uint, source, "r32uint"};
971   if (str == "r8sint")
972     return {Token::Type::kFormatR8Sint, source, "r8sint"};
973   if (str == "r8snorm")
974     return {Token::Type::kFormatR8Snorm, source, "r8snorm"};
975   if (str == "r8uint")
976     return {Token::Type::kFormatR8Uint, source, "r8uint"};
977   if (str == "r8unorm")
978     return {Token::Type::kFormatR8Unorm, source, "r8unorm"};
979   if (str == "rg11b10float")
980     return {Token::Type::kFormatRg11B10Float, source, "rg11b10float"};
981   if (str == "rg16float")
982     return {Token::Type::kFormatRg16Float, source, "rg16float"};
983   if (str == "rg16sint")
984     return {Token::Type::kFormatRg16Sint, source, "rg16sint"};
985   if (str == "rg16uint")
986     return {Token::Type::kFormatRg16Uint, source, "rg16uint"};
987   if (str == "rg32float")
988     return {Token::Type::kFormatRg32Float, source, "rg32float"};
989   if (str == "rg32sint")
990     return {Token::Type::kFormatRg32Sint, source, "rg32sint"};
991   if (str == "rg32uint")
992     return {Token::Type::kFormatRg32Uint, source, "rg32uint"};
993   if (str == "rg8sint")
994     return {Token::Type::kFormatRg8Sint, source, "rg8sint"};
995   if (str == "rg8snorm")
996     return {Token::Type::kFormatRg8Snorm, source, "rg8snorm"};
997   if (str == "rg8uint")
998     return {Token::Type::kFormatRg8Uint, source, "rg8uint"};
999   if (str == "rg8unorm")
1000     return {Token::Type::kFormatRg8Unorm, source, "rg8unorm"};
1001   if (str == "rgb10a2unorm")
1002     return {Token::Type::kFormatRgb10A2Unorm, source, "rgb10a2unorm"};
1003   if (str == "rgba16float")
1004     return {Token::Type::kFormatRgba16Float, source, "rgba16float"};
1005   if (str == "rgba16sint")
1006     return {Token::Type::kFormatRgba16Sint, source, "rgba16sint"};
1007   if (str == "rgba16uint")
1008     return {Token::Type::kFormatRgba16Uint, source, "rgba16uint"};
1009   if (str == "rgba32float")
1010     return {Token::Type::kFormatRgba32Float, source, "rgba32float"};
1011   if (str == "rgba32sint")
1012     return {Token::Type::kFormatRgba32Sint, source, "rgba32sint"};
1013   if (str == "rgba32uint")
1014     return {Token::Type::kFormatRgba32Uint, source, "rgba32uint"};
1015   if (str == "rgba8sint")
1016     return {Token::Type::kFormatRgba8Sint, source, "rgba8sint"};
1017   if (str == "rgba8snorm")
1018     return {Token::Type::kFormatRgba8Snorm, source, "rgba8snorm"};
1019   if (str == "rgba8uint")
1020     return {Token::Type::kFormatRgba8Uint, source, "rgba8uint"};
1021   if (str == "rgba8unorm")
1022     return {Token::Type::kFormatRgba8Unorm, source, "rgba8unorm"};
1023   if (str == "rgba8unorm_srgb")
1024     return {Token::Type::kFormatRgba8UnormSrgb, source, "rgba8unorm_srgb"};
1025   if (str == "function")
1026     return {Token::Type::kFunction, source, "function"};
1027   if (str == "i32")
1028     return {Token::Type::kI32, source, "i32"};
1029   if (str == "if")
1030     return {Token::Type::kIf, source, "if"};
1031   if (str == "image")
1032     return {Token::Type::kImage, source, "image"};
1033   if (str == "import")
1034     return {Token::Type::kImport, source, "import"};
1035   if (str == "let")
1036     return {Token::Type::kLet, source, "let"};
1037   if (str == "loop")
1038     return {Token::Type::kLoop, source, "loop"};
1039   if (str == "mat2x2")
1040     return {Token::Type::kMat2x2, source, "mat2x2"};
1041   if (str == "mat2x3")
1042     return {Token::Type::kMat2x3, source, "mat2x3"};
1043   if (str == "mat2x4")
1044     return {Token::Type::kMat2x4, source, "mat2x4"};
1045   if (str == "mat3x2")
1046     return {Token::Type::kMat3x2, source, "mat3x2"};
1047   if (str == "mat3x3")
1048     return {Token::Type::kMat3x3, source, "mat3x3"};
1049   if (str == "mat3x4")
1050     return {Token::Type::kMat3x4, source, "mat3x4"};
1051   if (str == "mat4x2")
1052     return {Token::Type::kMat4x2, source, "mat4x2"};
1053   if (str == "mat4x3")
1054     return {Token::Type::kMat4x3, source, "mat4x3"};
1055   if (str == "mat4x4")
1056     return {Token::Type::kMat4x4, source, "mat4x4"};
1057   if (str == "private")
1058     return {Token::Type::kPrivate, source, "private"};
1059   if (str == "ptr")
1060     return {Token::Type::kPtr, source, "ptr"};
1061   if (str == "return")
1062     return {Token::Type::kReturn, source, "return"};
1063   if (str == "sampler")
1064     return {Token::Type::kSampler, source, "sampler"};
1065   if (str == "sampler_comparison")
1066     return {Token::Type::kComparisonSampler, source, "sampler_comparison"};
1067   if (str == "storage_buffer" || str == "storage")
1068     return {Token::Type::kStorage, source, "storage"};
1069   if (str == "struct")
1070     return {Token::Type::kStruct, source, "struct"};
1071   if (str == "switch")
1072     return {Token::Type::kSwitch, source, "switch"};
1073   if (str == "texture_1d")
1074     return {Token::Type::kTextureSampled1d, source, "texture_1d"};
1075   if (str == "texture_2d")
1076     return {Token::Type::kTextureSampled2d, source, "texture_2d"};
1077   if (str == "texture_2d_array")
1078     return {Token::Type::kTextureSampled2dArray, source, "texture_2d_array"};
1079   if (str == "texture_3d")
1080     return {Token::Type::kTextureSampled3d, source, "texture_3d"};
1081   if (str == "texture_cube")
1082     return {Token::Type::kTextureSampledCube, source, "texture_cube"};
1083   if (str == "texture_cube_array") {
1084     return {Token::Type::kTextureSampledCubeArray, source,
1085             "texture_cube_array"};
1086   }
1087   if (str == "texture_depth_2d")
1088     return {Token::Type::kTextureDepth2d, source, "texture_depth_2d"};
1089   if (str == "texture_depth_2d_array") {
1090     return {Token::Type::kTextureDepth2dArray, source,
1091             "texture_depth_2d_array"};
1092   }
1093   if (str == "texture_depth_cube")
1094     return {Token::Type::kTextureDepthCube, source, "texture_depth_cube"};
1095   if (str == "texture_depth_cube_array") {
1096     return {Token::Type::kTextureDepthCubeArray, source,
1097             "texture_depth_cube_array"};
1098   }
1099   if (str == "texture_depth_multisampled_2d") {
1100     return {Token::Type::kTextureDepthMultisampled2d, source,
1101             "texture_depth_multisampled_2d"};
1102   }
1103   if (str == "texture_external") {
1104     return {Token::Type::kTextureExternal, source, "texture_external"};
1105   }
1106   if (str == "texture_multisampled_2d") {
1107     return {Token::Type::kTextureMultisampled2d, source,
1108             "texture_multisampled_2d"};
1109   }
1110   if (str == "texture_storage_1d") {
1111     return {Token::Type::kTextureStorage1d, source, "texture_storage_1d"};
1112   }
1113   if (str == "texture_storage_2d") {
1114     return {Token::Type::kTextureStorage2d, source, "texture_storage_2d"};
1115   }
1116   if (str == "texture_storage_2d_array") {
1117     return {Token::Type::kTextureStorage2dArray, source,
1118             "texture_storage_2d_array"};
1119   }
1120   if (str == "texture_storage_3d") {
1121     return {Token::Type::kTextureStorage3d, source, "texture_storage_3d"};
1122   }
1123   if (str == "true")
1124     return {Token::Type::kTrue, source, "true"};
1125   if (str == "type")
1126     return {Token::Type::kType, source, "type"};
1127   if (str == "u32")
1128     return {Token::Type::kU32, source, "u32"};
1129   if (str == "uniform")
1130     return {Token::Type::kUniform, source, "uniform"};
1131   if (str == "var")
1132     return {Token::Type::kVar, source, "var"};
1133   if (str == "vec2")
1134     return {Token::Type::kVec2, source, "vec2"};
1135   if (str == "vec3")
1136     return {Token::Type::kVec3, source, "vec3"};
1137   if (str == "vec4")
1138     return {Token::Type::kVec4, source, "vec4"};
1139   if (str == "workgroup")
1140     return {Token::Type::kWorkgroup, source, "workgroup"};
1141   return {};
1142 }
1143 
1144 }  // namespace wgsl
1145 }  // namespace reader
1146 }  // namespace tint
1147