1 // Copyright 2020 The Tint Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/reader/wgsl/lexer.h"
16
17 #include <cmath>
18 #include <cstring>
19 #include <limits>
20 #include <utility>
21
22 #include "src/debug.h"
23
24 namespace tint {
25 namespace reader {
26 namespace wgsl {
27 namespace {
28
is_whitespace(char c)29 bool is_whitespace(char c) {
30 return std::isspace(c);
31 }
32
dec_value(char c)33 uint32_t dec_value(char c) {
34 if (c >= '0' && c <= '9') {
35 return static_cast<uint32_t>(c - '0');
36 }
37 return 0;
38 }
39
hex_value(char c)40 uint32_t hex_value(char c) {
41 if (c >= '0' && c <= '9') {
42 return static_cast<uint32_t>(c - '0');
43 }
44 if (c >= 'a' && c <= 'f') {
45 return 0xA + static_cast<uint32_t>(c - 'a');
46 }
47 if (c >= 'A' && c <= 'F') {
48 return 0xA + static_cast<uint32_t>(c - 'A');
49 }
50 return 0;
51 }
52
53 } // namespace
54
Lexer(const std::string & file_path,const Source::FileContent * content)55 Lexer::Lexer(const std::string& file_path, const Source::FileContent* content)
56 : file_path_(file_path),
57 content_(content),
58 len_(static_cast<uint32_t>(content->data.size())),
59 location_{1, 1} {}
60
61 Lexer::~Lexer() = default;
62
next()63 Token Lexer::next() {
64 auto t = skip_whitespace_and_comments();
65 if (!t.IsUninitialized()) {
66 return t;
67 }
68
69 t = try_hex_float();
70 if (!t.IsUninitialized()) {
71 return t;
72 }
73
74 t = try_hex_integer();
75 if (!t.IsUninitialized()) {
76 return t;
77 }
78
79 t = try_float();
80 if (!t.IsUninitialized()) {
81 return t;
82 }
83
84 t = try_integer();
85 if (!t.IsUninitialized()) {
86 return t;
87 }
88
89 t = try_ident();
90 if (!t.IsUninitialized()) {
91 return t;
92 }
93
94 t = try_punctuation();
95 if (!t.IsUninitialized()) {
96 return t;
97 }
98
99 return {Token::Type::kError, begin_source(), "invalid character found"};
100 }
101
begin_source() const102 Source Lexer::begin_source() const {
103 Source src{};
104 src.file_path = file_path_;
105 src.file_content = content_;
106 src.range.begin = location_;
107 src.range.end = location_;
108 return src;
109 }
110
end_source(Source & src) const111 void Lexer::end_source(Source& src) const {
112 src.range.end = location_;
113 }
114
is_eof() const115 bool Lexer::is_eof() const {
116 return pos_ >= len_;
117 }
118
is_alpha(char ch) const119 bool Lexer::is_alpha(char ch) const {
120 return std::isalpha(ch);
121 }
122
is_digit(char ch) const123 bool Lexer::is_digit(char ch) const {
124 return std::isdigit(ch);
125 }
126
is_alphanum_underscore(char ch) const127 bool Lexer::is_alphanum_underscore(char ch) const {
128 return is_alpha(ch) || is_digit(ch) || ch == '_';
129 }
130
is_hex(char ch) const131 bool Lexer::is_hex(char ch) const {
132 return std::isxdigit(ch);
133 }
134
matches(size_t pos,const std::string & substr)135 bool Lexer::matches(size_t pos, const std::string& substr) {
136 if (pos >= len_)
137 return false;
138 return content_->data.substr(pos, substr.size()) == substr;
139 }
140
skip_whitespace_and_comments()141 Token Lexer::skip_whitespace_and_comments() {
142 for (;;) {
143 auto pos = pos_;
144 while (!is_eof() && is_whitespace(content_->data[pos_])) {
145 if (matches(pos_, "\n")) {
146 pos_++;
147 location_.line++;
148 location_.column = 1;
149 continue;
150 }
151
152 pos_++;
153 location_.column++;
154 }
155
156 auto t = skip_comment();
157 if (!t.IsUninitialized()) {
158 return t;
159 }
160
161 // If the cursor didn't advance we didn't remove any whitespace
162 // so we're done.
163 if (pos == pos_)
164 break;
165 }
166 if (is_eof()) {
167 return {Token::Type::kEOF, begin_source()};
168 }
169
170 return {};
171 }
172
skip_comment()173 Token Lexer::skip_comment() {
174 if (matches(pos_, "//")) {
175 // Line comment: ignore everything until the end of line
176 // or end of input.
177 while (!is_eof() && !matches(pos_, "\n")) {
178 pos_++;
179 location_.column++;
180 }
181 return {};
182 }
183
184 if (matches(pos_, "/*")) {
185 // Block comment: ignore everything until the closing '*/' token.
186
187 // Record source location of the initial '/*'
188 auto source = begin_source();
189 source.range.end.column += 1;
190
191 pos_ += 2;
192 location_.column += 2;
193
194 int depth = 1;
195 while (!is_eof() && depth > 0) {
196 if (matches(pos_, "/*")) {
197 // Start of block comment: increase nesting depth.
198 pos_ += 2;
199 location_.column += 2;
200 depth++;
201 } else if (matches(pos_, "*/")) {
202 // End of block comment: decrease nesting depth.
203 pos_ += 2;
204 location_.column += 2;
205 depth--;
206 } else if (matches(pos_, "\n")) {
207 // Newline: skip and update source location.
208 pos_++;
209 location_.line++;
210 location_.column = 1;
211 } else {
212 // Anything else: skip and update source location.
213 pos_++;
214 location_.column++;
215 }
216 }
217 if (depth > 0) {
218 return {Token::Type::kError, source, "unterminated block comment"};
219 }
220 }
221 return {};
222 }
223
try_float()224 Token Lexer::try_float() {
225 auto start = pos_;
226 auto end = pos_;
227
228 auto source = begin_source();
229 bool has_mantissa_digits = false;
230
231 if (matches(end, "-")) {
232 end++;
233 }
234 while (end < len_ && is_digit(content_->data[end])) {
235 has_mantissa_digits = true;
236 end++;
237 }
238
239 bool has_point = false;
240 if (end < len_ && matches(end, ".")) {
241 has_point = true;
242 end++;
243 }
244
245 while (end < len_ && is_digit(content_->data[end])) {
246 has_mantissa_digits = true;
247 end++;
248 }
249
250 if (!has_mantissa_digits) {
251 return {};
252 }
253
254 // Parse the exponent if one exists
255 bool has_exponent = false;
256 if (end < len_ && (matches(end, "e") || matches(end, "E"))) {
257 end++;
258 if (end < len_ && (matches(end, "+") || matches(end, "-"))) {
259 end++;
260 }
261
262 while (end < len_ && isdigit(content_->data[end])) {
263 has_exponent = true;
264 end++;
265 }
266
267 // If an 'e' or 'E' was present, then the number part must also be present.
268 if (!has_exponent) {
269 const auto str = content_->data.substr(start, end - start);
270 return {Token::Type::kError, source,
271 "incomplete exponent for floating point literal: " + str};
272 }
273 }
274
275 bool has_f_suffix = false;
276 if (end < len_ && matches(end, "f")) {
277 end++;
278 has_f_suffix = true;
279 }
280
281 if (!has_point && !has_exponent && !has_f_suffix) {
282 // If it only has digits then it's an integer.
283 return {};
284 }
285
286 // Save the error string, for use by diagnostics.
287 const auto str = content_->data.substr(start, end - start);
288
289 pos_ = end;
290 location_.column += (end - start);
291
292 end_source(source);
293
294 auto res = strtod(content_->data.c_str() + start, nullptr);
295 // This errors out if a non-zero magnitude is too small to represent in a
296 // float. It can't be represented faithfully in an f32.
297 const auto magnitude = std::fabs(res);
298 if (0.0 < magnitude &&
299 magnitude < static_cast<double>(std::numeric_limits<float>::min())) {
300 return {Token::Type::kError, source,
301 "f32 (" + str + ") magnitude too small, not representable"};
302 }
303 // This handles if the number is really large negative number
304 if (res < static_cast<double>(std::numeric_limits<float>::lowest())) {
305 return {Token::Type::kError, source,
306 "f32 (" + str + ") too large (negative)"};
307 }
308 if (res > static_cast<double>(std::numeric_limits<float>::max())) {
309 return {Token::Type::kError, source,
310 "f32 (" + str + ") too large (positive)"};
311 }
312
313 return {source, static_cast<float>(res)};
314 }
315
try_hex_float()316 Token Lexer::try_hex_float() {
317 constexpr uint32_t kTotalBits = 32;
318 constexpr uint32_t kTotalMsb = kTotalBits - 1;
319 constexpr uint32_t kMantissaBits = 23;
320 constexpr uint32_t kMantissaMsb = kMantissaBits - 1;
321 constexpr uint32_t kMantissaShiftRight = kTotalBits - kMantissaBits;
322 constexpr int32_t kExponentBias = 127;
323 constexpr int32_t kExponentMax = 255;
324 constexpr uint32_t kExponentBits = 8;
325 constexpr uint32_t kExponentMask = (1 << kExponentBits) - 1;
326 constexpr uint32_t kExponentLeftShift = kMantissaBits;
327 constexpr uint32_t kSignBit = 31;
328
329 auto start = pos_;
330 auto end = pos_;
331
332 auto source = begin_source();
333
334 // clang-format off
335 // -?0x([0-9a-fA-F]*.?[0-9a-fA-F]+ | [0-9a-fA-F]+.[0-9a-fA-F]*)(p|P)(+|-)?[0-9]+ // NOLINT
336 // clang-format on
337
338 // -?
339 int32_t sign_bit = 0;
340 if (matches(end, "-")) {
341 sign_bit = 1;
342 end++;
343 }
344 // 0x
345 if (matches(end, "0x")) {
346 end += 2;
347 } else {
348 return {};
349 }
350
351 uint32_t mantissa = 0;
352 uint32_t exponent = 0;
353
354 // TODO(dneto): Values in the normal range for the format do not explicitly
355 // store the most significant bit. The algorithm here works hard to eliminate
356 // that bit in the representation during parsing, and then it backtracks
357 // when it sees it may have to explicitly represent it, and backtracks again
358 // when it sees the number is sub-normal (i.e. the exponent underflows).
359 // I suspect the logic can be clarified by storing it during parsing, and
360 // then removing it later only when needed.
361
362 // `set_next_mantissa_bit_to` sets next `mantissa` bit starting from msb to
363 // lsb to value 1 if `set` is true, 0 otherwise. Returns true on success, i.e.
364 // when the bit can be accommodated in the available space.
365 uint32_t mantissa_next_bit = kTotalMsb;
366 auto set_next_mantissa_bit_to = [&](bool set, bool integer_part) -> bool {
367 // If adding bits for the integer part, we can overflow whether we set the
368 // bit or not. For the fractional part, we can only overflow when setting
369 // the bit.
370 const bool check_overflow = integer_part || set;
371 // Note: mantissa_next_bit actually decrements, so comparing it as
372 // larger than a positive number relies on wraparound.
373 if (check_overflow && (mantissa_next_bit > kTotalMsb)) {
374 return false; // Overflowed mantissa
375 }
376 if (set) {
377 mantissa |= (1 << mantissa_next_bit);
378 }
379 --mantissa_next_bit;
380 return true;
381 };
382
383 // Collect integer range (if any)
384 auto integer_range = std::make_pair(end, end);
385 while (end < len_ && is_hex(content_->data[end])) {
386 integer_range.second = ++end;
387 }
388
389 // .?
390 bool hex_point = false;
391 if (matches(end, ".")) {
392 hex_point = true;
393 end++;
394 }
395
396 // Collect fractional range (if any)
397 auto fractional_range = std::make_pair(end, end);
398 while (end < len_ && is_hex(content_->data[end])) {
399 fractional_range.second = ++end;
400 }
401
402 // Must have at least an integer or fractional part
403 if ((integer_range.first == integer_range.second) &&
404 (fractional_range.first == fractional_range.second)) {
405 return {};
406 }
407
408 // Is the binary exponent present? It's optional.
409 const bool has_exponent = (matches(end, "p") || matches(end, "P"));
410 if (has_exponent) {
411 end++;
412 }
413 if (!has_exponent && !hex_point) {
414 // It's not a hex float. At best it's a hex integer.
415 return {};
416 }
417
418 // At this point, we know for sure our token is a hex float value,
419 // or an invalid token.
420
421 // Parse integer part
422 // [0-9a-fA-F]*
423
424 bool has_zero_integer = true;
425 // The magnitude is zero if and only if seen_prior_one_bits is false.
426 bool seen_prior_one_bits = false;
427 for (auto i = integer_range.first; i < integer_range.second; ++i) {
428 const auto nibble = hex_value(content_->data[i]);
429 if (nibble != 0) {
430 has_zero_integer = false;
431 }
432
433 for (int32_t bit = 3; bit >= 0; --bit) {
434 auto v = 1 & (nibble >> bit);
435
436 // Skip leading 0s and the first 1
437 if (seen_prior_one_bits) {
438 if (!set_next_mantissa_bit_to(v != 0, true)) {
439 return {Token::Type::kError, source,
440 "mantissa is too large for hex float"};
441 }
442 ++exponent;
443 } else {
444 if (v == 1) {
445 seen_prior_one_bits = true;
446 }
447 }
448 }
449 }
450
451 // Parse fractional part
452 // [0-9a-fA-F]*
453 for (auto i = fractional_range.first; i < fractional_range.second; ++i) {
454 auto nibble = hex_value(content_->data[i]);
455 for (int32_t bit = 3; bit >= 0; --bit) {
456 auto v = 1 & (nibble >> bit);
457
458 if (v == 1) {
459 seen_prior_one_bits = true;
460 }
461
462 // If integer part is 0, we only start writing bits to the
463 // mantissa once we have a non-zero fractional bit. While the fractional
464 // values are 0, we adjust the exponent to avoid overflowing `mantissa`.
465 if (!seen_prior_one_bits) {
466 --exponent;
467 } else {
468 if (!set_next_mantissa_bit_to(v != 0, false)) {
469 return {Token::Type::kError, source,
470 "mantissa is too large for hex float"};
471 }
472 }
473 }
474 }
475
476 // Determine if the value of the mantissa is zero.
477 // Note: it's not enough to check mantissa == 0 as we drop the initial bit,
478 // whether it's in the integer part or the fractional part.
479 const bool is_zero = !seen_prior_one_bits;
480 TINT_ASSERT(Reader, !is_zero || mantissa == 0);
481
482 // Parse the optional exponent.
483 // ((p|P)(\+|-)?[0-9]+)?
484 uint32_t input_exponent = 0; // Defaults to 0 if not present
485 int32_t exponent_sign = 1;
486 // If the 'p' part is present, the rest of the exponent must exist.
487 if (has_exponent) {
488 // Parse the rest of the exponent.
489 // (+|-)?
490 if (matches(end, "+")) {
491 end++;
492 } else if (matches(end, "-")) {
493 exponent_sign = -1;
494 end++;
495 }
496
497 // Parse exponent from input
498 // [0-9]+
499 // Allow overflow (in uint32_t) when the floating point value magnitude is
500 // zero.
501 bool has_exponent_digits = false;
502 while (end < len_ && isdigit(content_->data[end])) {
503 has_exponent_digits = true;
504 auto prev_exponent = input_exponent;
505 input_exponent = (input_exponent * 10) + dec_value(content_->data[end]);
506 // Check if we've overflowed input_exponent. This only matters when
507 // the mantissa is non-zero.
508 if (!is_zero && (prev_exponent > input_exponent)) {
509 return {Token::Type::kError, source,
510 "exponent is too large for hex float"};
511 }
512 end++;
513 }
514
515 // Parse optional 'f' suffix. For a hex float, it can only exist
516 // when the exponent is present. Otherwise it will look like
517 // one of the mantissa digits.
518 if (end < len_ && matches(end, "f")) {
519 end++;
520 }
521
522 if (!has_exponent_digits) {
523 return {Token::Type::kError, source,
524 "expected an exponent value for hex float"};
525 }
526 }
527
528 pos_ = end;
529 location_.column += (end - start);
530 end_source(source);
531
532 if (is_zero) {
533 // If value is zero, then ignore the exponent and produce a zero
534 exponent = 0;
535 } else {
536 // Ensure input exponent is not too large; i.e. that it won't overflow when
537 // adding the exponent bias.
538 const uint32_t kIntMax =
539 static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
540 const uint32_t kMaxInputExponent = kIntMax - kExponentBias;
541 if (input_exponent > kMaxInputExponent) {
542 return {Token::Type::kError, source,
543 "exponent is too large for hex float"};
544 }
545
546 // Compute exponent so far
547 exponent += static_cast<uint32_t>(static_cast<int32_t>(input_exponent) *
548 exponent_sign);
549
550 // Bias exponent if non-zero
551 // After this, if exponent is <= 0, our value is a denormal
552 exponent += kExponentBias;
553
554 // We know the number is not zero. The MSB is 1 (by construction), and
555 // should be eliminated because it becomes the implicit 1 that isn't
556 // explicitly represented in the binary32 format. We'll bring it back
557 // later if we find the exponent actually underflowed, i.e. the number
558 // is sub-normal.
559 if (has_zero_integer) {
560 mantissa <<= 1;
561 --exponent;
562 }
563 }
564
565 // We can now safely work with exponent as a signed quantity, as there's no
566 // chance to overflow
567 int32_t signed_exponent = static_cast<int32_t>(exponent);
568
569 // Shift mantissa to occupy the low 23 bits
570 mantissa >>= kMantissaShiftRight;
571
572 // If denormal, shift mantissa until our exponent is zero
573 if (!is_zero) {
574 // Denorm has exponent 0 and non-zero mantissa. We set the top bit here,
575 // then shift the mantissa to make exponent zero.
576 if (signed_exponent <= 0) {
577 mantissa >>= 1;
578 mantissa |= (1 << kMantissaMsb);
579 }
580
581 while (signed_exponent < 0) {
582 mantissa >>= 1;
583 ++signed_exponent;
584
585 // If underflow, clamp to zero
586 if (mantissa == 0) {
587 signed_exponent = 0;
588 }
589 }
590 }
591
592 if (signed_exponent > kExponentMax) {
593 // Overflow: set to infinity
594 signed_exponent = kExponentMax;
595 mantissa = 0;
596 } else if (signed_exponent == kExponentMax && mantissa != 0) {
597 // NaN: set to infinity
598 mantissa = 0;
599 }
600
601 // Combine sign, mantissa, and exponent
602 uint32_t result_u32 = sign_bit << kSignBit;
603 result_u32 |= mantissa;
604 result_u32 |= (static_cast<uint32_t>(signed_exponent) & kExponentMask)
605 << kExponentLeftShift;
606
607 // Reinterpret as float and return
608 float result;
609 std::memcpy(&result, &result_u32, sizeof(result));
610 return {source, static_cast<float>(result)};
611 }
612
build_token_from_int_if_possible(Source source,size_t start,size_t end,int32_t base)613 Token Lexer::build_token_from_int_if_possible(Source source,
614 size_t start,
615 size_t end,
616 int32_t base) {
617 auto res = strtoll(content_->data.c_str() + start, nullptr, base);
618 if (matches(pos_, "u")) {
619 if (static_cast<uint64_t>(res) >
620 static_cast<uint64_t>(std::numeric_limits<uint32_t>::max())) {
621 return {
622 Token::Type::kError, source,
623 "u32 (" + content_->data.substr(start, end - start) + ") too large"};
624 }
625 pos_ += 1;
626 location_.column += 1;
627 end_source(source);
628 return {source, static_cast<uint32_t>(res)};
629 }
630
631 if (res < static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
632 return {
633 Token::Type::kError, source,
634 "i32 (" + content_->data.substr(start, end - start) + ") too small"};
635 }
636 if (res > static_cast<int64_t>(std::numeric_limits<int32_t>::max())) {
637 return {
638 Token::Type::kError, source,
639 "i32 (" + content_->data.substr(start, end - start) + ") too large"};
640 }
641 end_source(source);
642 return {source, static_cast<int32_t>(res)};
643 }
644
try_hex_integer()645 Token Lexer::try_hex_integer() {
646 constexpr size_t kMaxDigits = 8; // Valid for both 32-bit integer types
647 auto start = pos_;
648 auto end = pos_;
649
650 auto source = begin_source();
651
652 if (matches(end, "-")) {
653 end++;
654 }
655
656 if (!matches(end, "0x")) {
657 return {};
658 }
659 end += 2;
660
661 auto first = end;
662 while (!is_eof() && is_hex(content_->data[end])) {
663 end++;
664
665 auto digits = end - first;
666 if (digits > kMaxDigits) {
667 return {Token::Type::kError, source,
668 "integer literal (" +
669 content_->data.substr(start, end - 1 - start) +
670 "...) has too many digits"};
671 }
672 }
673
674 pos_ = end;
675 location_.column += (end - start);
676
677 return build_token_from_int_if_possible(source, start, end, 16);
678 }
679
try_integer()680 Token Lexer::try_integer() {
681 constexpr size_t kMaxDigits = 10; // Valid for both 32-bit integer types
682 auto start = pos_;
683 auto end = start;
684
685 auto source = begin_source();
686
687 if (matches(end, "-")) {
688 end++;
689 }
690
691 if (end >= len_ || !is_digit(content_->data[end])) {
692 return {};
693 }
694
695 auto first = end;
696 // If the first digit is a zero this must only be zero as leading zeros
697 // are not allowed.
698 auto next = first + 1;
699 if (next < len_) {
700 if (content_->data[first] == '0' && is_digit(content_->data[next])) {
701 return {Token::Type::kError, source,
702 "integer literal (" +
703 content_->data.substr(start, end - 1 - start) +
704 "...) has leading 0s"};
705 }
706 }
707
708 while (end < len_ && is_digit(content_->data[end])) {
709 auto digits = end - first;
710 if (digits > kMaxDigits) {
711 return {Token::Type::kError, source,
712 "integer literal (" +
713 content_->data.substr(start, end - 1 - start) +
714 "...) has too many digits"};
715 }
716
717 end++;
718 }
719
720 pos_ = end;
721 location_.column += (end - start);
722
723 return build_token_from_int_if_possible(source, start, end, 10);
724 }
725
try_ident()726 Token Lexer::try_ident() {
727 // Must begin with an a-zA-Z_
728 if (!(is_alpha(content_->data[pos_]) || content_->data[pos_] == '_')) {
729 return {};
730 }
731
732 auto source = begin_source();
733
734 auto s = pos_;
735 while (!is_eof() && is_alphanum_underscore(content_->data[pos_])) {
736 pos_++;
737 location_.column++;
738 }
739
740 if (content_->data[s] == '_') {
741 // Check for an underscore on its own (special token), or a
742 // double-underscore (not allowed).
743 if ((pos_ == s + 1) || (content_->data[s + 1] == '_')) {
744 location_.column -= (pos_ - s);
745 pos_ = s;
746 return {};
747 }
748 }
749
750 auto str = content_->data.substr(s, pos_ - s);
751 end_source(source);
752
753 auto t = check_keyword(source, str);
754 if (!t.IsUninitialized()) {
755 return t;
756 }
757
758 return {Token::Type::kIdentifier, source, str};
759 }
760
try_punctuation()761 Token Lexer::try_punctuation() {
762 auto source = begin_source();
763 auto type = Token::Type::kUninitialized;
764
765 if (matches(pos_, "[[")) {
766 type = Token::Type::kAttrLeft;
767 pos_ += 2;
768 location_.column += 2;
769 } else if (matches(pos_, "]]")) {
770 type = Token::Type::kAttrRight;
771 pos_ += 2;
772 location_.column += 2;
773 } else if (matches(pos_, "(")) {
774 type = Token::Type::kParenLeft;
775 pos_ += 1;
776 location_.column += 1;
777 } else if (matches(pos_, ")")) {
778 type = Token::Type::kParenRight;
779 pos_ += 1;
780 location_.column += 1;
781 } else if (matches(pos_, "[")) {
782 type = Token::Type::kBracketLeft;
783 pos_ += 1;
784 location_.column += 1;
785 } else if (matches(pos_, "]")) {
786 type = Token::Type::kBracketRight;
787 pos_ += 1;
788 location_.column += 1;
789 } else if (matches(pos_, "{")) {
790 type = Token::Type::kBraceLeft;
791 pos_ += 1;
792 location_.column += 1;
793 } else if (matches(pos_, "}")) {
794 type = Token::Type::kBraceRight;
795 pos_ += 1;
796 location_.column += 1;
797 } else if (matches(pos_, "&&")) {
798 type = Token::Type::kAndAnd;
799 pos_ += 2;
800 location_.column += 2;
801 } else if (matches(pos_, "&")) {
802 type = Token::Type::kAnd;
803 pos_ += 1;
804 location_.column += 1;
805 } else if (matches(pos_, "/")) {
806 type = Token::Type::kForwardSlash;
807 pos_ += 1;
808 location_.column += 1;
809 } else if (matches(pos_, "!=")) {
810 type = Token::Type::kNotEqual;
811 pos_ += 2;
812 location_.column += 2;
813 } else if (matches(pos_, "!")) {
814 type = Token::Type::kBang;
815 pos_ += 1;
816 location_.column += 1;
817 } else if (matches(pos_, ":")) {
818 type = Token::Type::kColon;
819 pos_ += 1;
820 location_.column += 1;
821 } else if (matches(pos_, ",")) {
822 type = Token::Type::kComma;
823 pos_ += 1;
824 location_.column += 1;
825 } else if (matches(pos_, "==")) {
826 type = Token::Type::kEqualEqual;
827 pos_ += 2;
828 location_.column += 2;
829 } else if (matches(pos_, "=")) {
830 type = Token::Type::kEqual;
831 pos_ += 1;
832 location_.column += 1;
833 } else if (matches(pos_, ">=")) {
834 type = Token::Type::kGreaterThanEqual;
835 pos_ += 2;
836 location_.column += 2;
837 } else if (matches(pos_, ">>")) {
838 type = Token::Type::kShiftRight;
839 pos_ += 2;
840 location_.column += 2;
841 } else if (matches(pos_, ">")) {
842 type = Token::Type::kGreaterThan;
843 pos_ += 1;
844 location_.column += 1;
845 } else if (matches(pos_, "<=")) {
846 type = Token::Type::kLessThanEqual;
847 pos_ += 2;
848 location_.column += 2;
849 } else if (matches(pos_, "<<")) {
850 type = Token::Type::kShiftLeft;
851 pos_ += 2;
852 location_.column += 2;
853 } else if (matches(pos_, "<")) {
854 type = Token::Type::kLessThan;
855 pos_ += 1;
856 location_.column += 1;
857 } else if (matches(pos_, "%")) {
858 type = Token::Type::kMod;
859 pos_ += 1;
860 location_.column += 1;
861 } else if (matches(pos_, "->")) {
862 type = Token::Type::kArrow;
863 pos_ += 2;
864 location_.column += 2;
865 } else if (matches(pos_, "--")) {
866 type = Token::Type::kMinusMinus;
867 pos_ += 2;
868 location_.column += 2;
869 } else if (matches(pos_, "-")) {
870 type = Token::Type::kMinus;
871 pos_ += 1;
872 location_.column += 1;
873 } else if (matches(pos_, ".")) {
874 type = Token::Type::kPeriod;
875 pos_ += 1;
876 location_.column += 1;
877 } else if (matches(pos_, "++")) {
878 type = Token::Type::kPlusPlus;
879 pos_ += 2;
880 location_.column += 2;
881 } else if (matches(pos_, "+")) {
882 type = Token::Type::kPlus;
883 pos_ += 1;
884 location_.column += 1;
885 } else if (matches(pos_, "||")) {
886 type = Token::Type::kOrOr;
887 pos_ += 2;
888 location_.column += 2;
889 } else if (matches(pos_, "|")) {
890 type = Token::Type::kOr;
891 pos_ += 1;
892 location_.column += 1;
893 } else if (matches(pos_, ";")) {
894 type = Token::Type::kSemicolon;
895 pos_ += 1;
896 location_.column += 1;
897 } else if (matches(pos_, "*")) {
898 type = Token::Type::kStar;
899 pos_ += 1;
900 location_.column += 1;
901 } else if (matches(pos_, "~")) {
902 type = Token::Type::kTilde;
903 pos_ += 1;
904 location_.column += 1;
905 } else if (matches(pos_, "_")) {
906 type = Token::Type::kUnderscore;
907 pos_ += 1;
908 location_.column += 1;
909 } else if (matches(pos_, "^")) {
910 type = Token::Type::kXor;
911 pos_ += 1;
912 location_.column += 1;
913 }
914
915 end_source(source);
916
917 return {type, source};
918 }
919
check_keyword(const Source & source,const std::string & str)920 Token Lexer::check_keyword(const Source& source, const std::string& str) {
921 if (str == "array")
922 return {Token::Type::kArray, source, "array"};
923 if (str == "atomic")
924 return {Token::Type::kAtomic, source, "atomic"};
925 if (str == "bitcast")
926 return {Token::Type::kBitcast, source, "bitcast"};
927 if (str == "bool")
928 return {Token::Type::kBool, source, "bool"};
929 if (str == "break")
930 return {Token::Type::kBreak, source, "break"};
931 if (str == "case")
932 return {Token::Type::kCase, source, "case"};
933 if (str == "continue")
934 return {Token::Type::kContinue, source, "continue"};
935 if (str == "continuing")
936 return {Token::Type::kContinuing, source, "continuing"};
937 if (str == "discard")
938 return {Token::Type::kDiscard, source, "discard"};
939 if (str == "default")
940 return {Token::Type::kDefault, source, "default"};
941 if (str == "else")
942 return {Token::Type::kElse, source, "else"};
943 if (str == "elseif")
944 return {Token::Type::kElseIf, source, "elseif"};
945 if (str == "f32")
946 return {Token::Type::kF32, source, "f32"};
947 if (str == "fallthrough")
948 return {Token::Type::kFallthrough, source, "fallthrough"};
949 if (str == "false")
950 return {Token::Type::kFalse, source, "false"};
951 if (str == "fn")
952 return {Token::Type::kFn, source, "fn"};
953 if (str == "for")
954 return {Token::Type::kFor, source, "for"};
955 if (str == "bgra8unorm")
956 return {Token::Type::kFormatBgra8Unorm, source, "bgra8unorm"};
957 if (str == "bgra8unorm_srgb")
958 return {Token::Type::kFormatBgra8UnormSrgb, source, "bgra8unorm_srgb"};
959 if (str == "r16float")
960 return {Token::Type::kFormatR16Float, source, "r16float"};
961 if (str == "r16sint")
962 return {Token::Type::kFormatR16Sint, source, "r16sint"};
963 if (str == "r16uint")
964 return {Token::Type::kFormatR16Uint, source, "r16uint"};
965 if (str == "r32float")
966 return {Token::Type::kFormatR32Float, source, "r32float"};
967 if (str == "r32sint")
968 return {Token::Type::kFormatR32Sint, source, "r32sint"};
969 if (str == "r32uint")
970 return {Token::Type::kFormatR32Uint, source, "r32uint"};
971 if (str == "r8sint")
972 return {Token::Type::kFormatR8Sint, source, "r8sint"};
973 if (str == "r8snorm")
974 return {Token::Type::kFormatR8Snorm, source, "r8snorm"};
975 if (str == "r8uint")
976 return {Token::Type::kFormatR8Uint, source, "r8uint"};
977 if (str == "r8unorm")
978 return {Token::Type::kFormatR8Unorm, source, "r8unorm"};
979 if (str == "rg11b10float")
980 return {Token::Type::kFormatRg11B10Float, source, "rg11b10float"};
981 if (str == "rg16float")
982 return {Token::Type::kFormatRg16Float, source, "rg16float"};
983 if (str == "rg16sint")
984 return {Token::Type::kFormatRg16Sint, source, "rg16sint"};
985 if (str == "rg16uint")
986 return {Token::Type::kFormatRg16Uint, source, "rg16uint"};
987 if (str == "rg32float")
988 return {Token::Type::kFormatRg32Float, source, "rg32float"};
989 if (str == "rg32sint")
990 return {Token::Type::kFormatRg32Sint, source, "rg32sint"};
991 if (str == "rg32uint")
992 return {Token::Type::kFormatRg32Uint, source, "rg32uint"};
993 if (str == "rg8sint")
994 return {Token::Type::kFormatRg8Sint, source, "rg8sint"};
995 if (str == "rg8snorm")
996 return {Token::Type::kFormatRg8Snorm, source, "rg8snorm"};
997 if (str == "rg8uint")
998 return {Token::Type::kFormatRg8Uint, source, "rg8uint"};
999 if (str == "rg8unorm")
1000 return {Token::Type::kFormatRg8Unorm, source, "rg8unorm"};
1001 if (str == "rgb10a2unorm")
1002 return {Token::Type::kFormatRgb10A2Unorm, source, "rgb10a2unorm"};
1003 if (str == "rgba16float")
1004 return {Token::Type::kFormatRgba16Float, source, "rgba16float"};
1005 if (str == "rgba16sint")
1006 return {Token::Type::kFormatRgba16Sint, source, "rgba16sint"};
1007 if (str == "rgba16uint")
1008 return {Token::Type::kFormatRgba16Uint, source, "rgba16uint"};
1009 if (str == "rgba32float")
1010 return {Token::Type::kFormatRgba32Float, source, "rgba32float"};
1011 if (str == "rgba32sint")
1012 return {Token::Type::kFormatRgba32Sint, source, "rgba32sint"};
1013 if (str == "rgba32uint")
1014 return {Token::Type::kFormatRgba32Uint, source, "rgba32uint"};
1015 if (str == "rgba8sint")
1016 return {Token::Type::kFormatRgba8Sint, source, "rgba8sint"};
1017 if (str == "rgba8snorm")
1018 return {Token::Type::kFormatRgba8Snorm, source, "rgba8snorm"};
1019 if (str == "rgba8uint")
1020 return {Token::Type::kFormatRgba8Uint, source, "rgba8uint"};
1021 if (str == "rgba8unorm")
1022 return {Token::Type::kFormatRgba8Unorm, source, "rgba8unorm"};
1023 if (str == "rgba8unorm_srgb")
1024 return {Token::Type::kFormatRgba8UnormSrgb, source, "rgba8unorm_srgb"};
1025 if (str == "function")
1026 return {Token::Type::kFunction, source, "function"};
1027 if (str == "i32")
1028 return {Token::Type::kI32, source, "i32"};
1029 if (str == "if")
1030 return {Token::Type::kIf, source, "if"};
1031 if (str == "image")
1032 return {Token::Type::kImage, source, "image"};
1033 if (str == "import")
1034 return {Token::Type::kImport, source, "import"};
1035 if (str == "let")
1036 return {Token::Type::kLet, source, "let"};
1037 if (str == "loop")
1038 return {Token::Type::kLoop, source, "loop"};
1039 if (str == "mat2x2")
1040 return {Token::Type::kMat2x2, source, "mat2x2"};
1041 if (str == "mat2x3")
1042 return {Token::Type::kMat2x3, source, "mat2x3"};
1043 if (str == "mat2x4")
1044 return {Token::Type::kMat2x4, source, "mat2x4"};
1045 if (str == "mat3x2")
1046 return {Token::Type::kMat3x2, source, "mat3x2"};
1047 if (str == "mat3x3")
1048 return {Token::Type::kMat3x3, source, "mat3x3"};
1049 if (str == "mat3x4")
1050 return {Token::Type::kMat3x4, source, "mat3x4"};
1051 if (str == "mat4x2")
1052 return {Token::Type::kMat4x2, source, "mat4x2"};
1053 if (str == "mat4x3")
1054 return {Token::Type::kMat4x3, source, "mat4x3"};
1055 if (str == "mat4x4")
1056 return {Token::Type::kMat4x4, source, "mat4x4"};
1057 if (str == "private")
1058 return {Token::Type::kPrivate, source, "private"};
1059 if (str == "ptr")
1060 return {Token::Type::kPtr, source, "ptr"};
1061 if (str == "return")
1062 return {Token::Type::kReturn, source, "return"};
1063 if (str == "sampler")
1064 return {Token::Type::kSampler, source, "sampler"};
1065 if (str == "sampler_comparison")
1066 return {Token::Type::kComparisonSampler, source, "sampler_comparison"};
1067 if (str == "storage_buffer" || str == "storage")
1068 return {Token::Type::kStorage, source, "storage"};
1069 if (str == "struct")
1070 return {Token::Type::kStruct, source, "struct"};
1071 if (str == "switch")
1072 return {Token::Type::kSwitch, source, "switch"};
1073 if (str == "texture_1d")
1074 return {Token::Type::kTextureSampled1d, source, "texture_1d"};
1075 if (str == "texture_2d")
1076 return {Token::Type::kTextureSampled2d, source, "texture_2d"};
1077 if (str == "texture_2d_array")
1078 return {Token::Type::kTextureSampled2dArray, source, "texture_2d_array"};
1079 if (str == "texture_3d")
1080 return {Token::Type::kTextureSampled3d, source, "texture_3d"};
1081 if (str == "texture_cube")
1082 return {Token::Type::kTextureSampledCube, source, "texture_cube"};
1083 if (str == "texture_cube_array") {
1084 return {Token::Type::kTextureSampledCubeArray, source,
1085 "texture_cube_array"};
1086 }
1087 if (str == "texture_depth_2d")
1088 return {Token::Type::kTextureDepth2d, source, "texture_depth_2d"};
1089 if (str == "texture_depth_2d_array") {
1090 return {Token::Type::kTextureDepth2dArray, source,
1091 "texture_depth_2d_array"};
1092 }
1093 if (str == "texture_depth_cube")
1094 return {Token::Type::kTextureDepthCube, source, "texture_depth_cube"};
1095 if (str == "texture_depth_cube_array") {
1096 return {Token::Type::kTextureDepthCubeArray, source,
1097 "texture_depth_cube_array"};
1098 }
1099 if (str == "texture_depth_multisampled_2d") {
1100 return {Token::Type::kTextureDepthMultisampled2d, source,
1101 "texture_depth_multisampled_2d"};
1102 }
1103 if (str == "texture_external") {
1104 return {Token::Type::kTextureExternal, source, "texture_external"};
1105 }
1106 if (str == "texture_multisampled_2d") {
1107 return {Token::Type::kTextureMultisampled2d, source,
1108 "texture_multisampled_2d"};
1109 }
1110 if (str == "texture_storage_1d") {
1111 return {Token::Type::kTextureStorage1d, source, "texture_storage_1d"};
1112 }
1113 if (str == "texture_storage_2d") {
1114 return {Token::Type::kTextureStorage2d, source, "texture_storage_2d"};
1115 }
1116 if (str == "texture_storage_2d_array") {
1117 return {Token::Type::kTextureStorage2dArray, source,
1118 "texture_storage_2d_array"};
1119 }
1120 if (str == "texture_storage_3d") {
1121 return {Token::Type::kTextureStorage3d, source, "texture_storage_3d"};
1122 }
1123 if (str == "true")
1124 return {Token::Type::kTrue, source, "true"};
1125 if (str == "type")
1126 return {Token::Type::kType, source, "type"};
1127 if (str == "u32")
1128 return {Token::Type::kU32, source, "u32"};
1129 if (str == "uniform")
1130 return {Token::Type::kUniform, source, "uniform"};
1131 if (str == "var")
1132 return {Token::Type::kVar, source, "var"};
1133 if (str == "vec2")
1134 return {Token::Type::kVec2, source, "vec2"};
1135 if (str == "vec3")
1136 return {Token::Type::kVec3, source, "vec3"};
1137 if (str == "vec4")
1138 return {Token::Type::kVec4, source, "vec4"};
1139 if (str == "workgroup")
1140 return {Token::Type::kWorkgroup, source, "workgroup"};
1141 return {};
1142 }
1143
1144 } // namespace wgsl
1145 } // namespace reader
1146 } // namespace tint
1147