1 //===- Token.cpp - MLIR Token Implementation ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Token class for the MLIR textual form.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "Token.h"
14 #include "llvm/ADT/StringExtras.h"
15 using namespace mlir;
16 using llvm::SMLoc;
17 using llvm::SMRange;
18
getLoc() const19 SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
20
getEndLoc() const21 SMLoc Token::getEndLoc() const {
22 return SMLoc::getFromPointer(spelling.data() + spelling.size());
23 }
24
getLocRange() const25 SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }
26
27 /// For an integer token, return its value as an unsigned. If it doesn't fit,
28 /// return None.
getUnsignedIntegerValue() const29 Optional<unsigned> Token::getUnsignedIntegerValue() const {
30 bool isHex = spelling.size() > 1 && spelling[1] == 'x';
31
32 unsigned result = 0;
33 if (spelling.getAsInteger(isHex ? 0 : 10, result))
34 return None;
35 return result;
36 }
37
38 /// For an integer token, return its value as a uint64_t. If it doesn't fit,
39 /// return None.
getUInt64IntegerValue(StringRef spelling)40 Optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
41 bool isHex = spelling.size() > 1 && spelling[1] == 'x';
42
43 uint64_t result = 0;
44 if (spelling.getAsInteger(isHex ? 0 : 10, result))
45 return None;
46 return result;
47 }
48
49 /// For a floatliteral, return its value as a double. Return None if the value
50 /// underflows or overflows.
getFloatingPointValue() const51 Optional<double> Token::getFloatingPointValue() const {
52 double result = 0;
53 if (spelling.getAsDouble(result))
54 return None;
55 return result;
56 }
57
58 /// For an inttype token, return its bitwidth.
getIntTypeBitwidth() const59 Optional<unsigned> Token::getIntTypeBitwidth() const {
60 assert(getKind() == inttype);
61 unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
62 unsigned result = 0;
63 if (spelling[bitwidthStart] == '0' ||
64 spelling.drop_front(bitwidthStart).getAsInteger(10, result) ||
65 result == 0)
66 return None;
67 return result;
68 }
69
getIntTypeSignedness() const70 Optional<bool> Token::getIntTypeSignedness() const {
71 assert(getKind() == inttype);
72 if (spelling[0] == 'i')
73 return llvm::None;
74 if (spelling[0] == 's')
75 return true;
76 assert(spelling[0] == 'u');
77 return false;
78 }
79
80 /// Given a token containing a string literal, return its value, including
81 /// removing the quote characters and unescaping the contents of the string. The
82 /// lexer has already verified that this token is valid.
getStringValue() const83 std::string Token::getStringValue() const {
84 assert(getKind() == string ||
85 (getKind() == at_identifier && getSpelling()[1] == '"'));
86 // Start by dropping the quotes.
87 StringRef bytes = getSpelling().drop_front().drop_back();
88 if (getKind() == at_identifier)
89 bytes = bytes.drop_front();
90
91 std::string result;
92 result.reserve(bytes.size());
93 for (unsigned i = 0, e = bytes.size(); i != e;) {
94 auto c = bytes[i++];
95 if (c != '\\') {
96 result.push_back(c);
97 continue;
98 }
99
100 assert(i + 1 <= e && "invalid string should be caught by lexer");
101 auto c1 = bytes[i++];
102 switch (c1) {
103 case '"':
104 case '\\':
105 result.push_back(c1);
106 continue;
107 case 'n':
108 result.push_back('\n');
109 continue;
110 case 't':
111 result.push_back('\t');
112 continue;
113 default:
114 break;
115 }
116
117 assert(i + 1 <= e && "invalid string should be caught by lexer");
118 auto c2 = bytes[i++];
119
120 assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
121 result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
122 }
123
124 return result;
125 }
126
127 /// Given a token containing a hex string literal, return its value or None if
128 /// the token does not contain a valid hex string.
getHexStringValue() const129 Optional<std::string> Token::getHexStringValue() const {
130 assert(getKind() == string);
131
132 // Get the internal string data, without the quotes.
133 StringRef bytes = getSpelling().drop_front().drop_back();
134
135 // Try to extract the binary data from the hex string.
136 std::string hex;
137 if (!bytes.consume_front("0x") || !llvm::tryGetFromHex(bytes, hex))
138 return llvm::None;
139 return hex;
140 }
141
142 /// Given a token containing a symbol reference, return the unescaped string
143 /// value.
getSymbolReference() const144 std::string Token::getSymbolReference() const {
145 assert(is(Token::at_identifier) && "expected valid @-identifier");
146 StringRef nameStr = getSpelling().drop_front();
147
148 // Check to see if the reference is a string literal, or a bare identifier.
149 if (nameStr.front() == '"')
150 return getStringValue();
151 return std::string(nameStr);
152 }
153
154 /// Given a hash_identifier token like #123, try to parse the number out of
155 /// the identifier, returning None if it is a named identifier like #x or
156 /// if the integer doesn't fit.
getHashIdentifierNumber() const157 Optional<unsigned> Token::getHashIdentifierNumber() const {
158 assert(getKind() == hash_identifier);
159 unsigned result = 0;
160 if (spelling.drop_front().getAsInteger(10, result))
161 return None;
162 return result;
163 }
164
165 /// Given a punctuation or keyword token kind, return the spelling of the
166 /// token as a string. Warning: This will abort on markers, identifiers and
167 /// literal tokens since they have no fixed spelling.
getTokenSpelling(Kind kind)168 StringRef Token::getTokenSpelling(Kind kind) {
169 switch (kind) {
170 default:
171 llvm_unreachable("This token kind has no fixed spelling");
172 #define TOK_PUNCTUATION(NAME, SPELLING) \
173 case NAME: \
174 return SPELLING;
175 #define TOK_KEYWORD(SPELLING) \
176 case kw_##SPELLING: \
177 return #SPELLING;
178 #include "TokenKinds.def"
179 }
180 }
181
182 /// Return true if this is one of the keyword token kinds (e.g. kw_if).
isKeyword() const183 bool Token::isKeyword() const {
184 switch (kind) {
185 default:
186 return false;
187 #define TOK_KEYWORD(SPELLING) \
188 case kw_##SPELLING: \
189 return true;
190 #include "TokenKinds.def"
191 }
192 }
193