1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "token-sequence.h"
10 #include "flang/Parser/characters.h"
11 #include "flang/Parser/message.h"
12 #include "llvm/Support/raw_ostream.h"
13
14 namespace Fortran::parser {
15
operator =(TokenSequence && that)16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
17 clear();
18 swap(that);
19 return *this;
20 }
21
clear()22 void TokenSequence::clear() {
23 start_.clear();
24 nextStart_ = 0;
25 char_.clear();
26 provenances_.clear();
27 }
28
pop_back()29 void TokenSequence::pop_back() {
30 std::size_t bytes{nextStart_ - start_.back()};
31 nextStart_ = start_.back();
32 start_.pop_back();
33 char_.resize(nextStart_);
34 provenances_.RemoveLastBytes(bytes);
35 }
36
shrink_to_fit()37 void TokenSequence::shrink_to_fit() {
38 start_.shrink_to_fit();
39 char_.shrink_to_fit();
40 provenances_.shrink_to_fit();
41 }
42
swap(TokenSequence & that)43 void TokenSequence::swap(TokenSequence &that) {
44 start_.swap(that.start_);
45 std::swap(nextStart_, that.nextStart_);
46 char_.swap(that.char_);
47 provenances_.swap(that.provenances_);
48 }
49
SkipBlanks(std::size_t at) const50 std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
51 std::size_t tokens{start_.size()};
52 for (; at < tokens; ++at) {
53 if (!TokenAt(at).IsBlank()) {
54 return at;
55 }
56 }
57 return tokens; // even if at > tokens
58 }
59
60 // C-style /*comments*/ are removed from preprocessing directive
61 // token sequences by the prescanner, but not C++ or Fortran
62 // free-form line-ending comments (//... and !...) because
63 // ignoring them is directive-specific.
IsAnythingLeft(std::size_t at) const64 bool TokenSequence::IsAnythingLeft(std::size_t at) const {
65 std::size_t tokens{start_.size()};
66 for (; at < tokens; ++at) {
67 auto tok{TokenAt(at)};
68 const char *end{tok.end()};
69 for (const char *p{tok.begin()}; p < end; ++p) {
70 switch (*p) {
71 case '/':
72 return p + 1 >= end || p[1] != '/';
73 case '!':
74 return false;
75 case ' ':
76 break;
77 default:
78 return true;
79 }
80 }
81 }
82 return false;
83 }
84
RemoveLastToken()85 void TokenSequence::RemoveLastToken() {
86 CHECK(!start_.empty());
87 CHECK(nextStart_ > start_.back());
88 std::size_t bytes{nextStart_ - start_.back()};
89 nextStart_ = start_.back();
90 start_.pop_back();
91 char_.erase(char_.begin() + nextStart_, char_.end());
92 provenances_.RemoveLastBytes(bytes);
93 }
94
Put(const TokenSequence & that)95 void TokenSequence::Put(const TokenSequence &that) {
96 if (nextStart_ < char_.size()) {
97 start_.push_back(nextStart_);
98 }
99 int offset = char_.size();
100 for (int st : that.start_) {
101 start_.push_back(st + offset);
102 }
103 char_.insert(char_.end(), that.char_.begin(), that.char_.end());
104 nextStart_ = char_.size();
105 provenances_.Put(that.provenances_);
106 }
107
Put(const TokenSequence & that,ProvenanceRange range)108 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
109 std::size_t offset{0};
110 std::size_t tokens{that.SizeInTokens()};
111 for (std::size_t j{0}; j < tokens; ++j) {
112 CharBlock tok{that.TokenAt(j)};
113 Put(tok, range.OffsetMember(offset));
114 offset += tok.size();
115 }
116 CHECK(offset == range.size());
117 }
118
Put(const TokenSequence & that,std::size_t at,std::size_t tokens)119 void TokenSequence::Put(
120 const TokenSequence &that, std::size_t at, std::size_t tokens) {
121 ProvenanceRange provenance;
122 std::size_t offset{0};
123 for (; tokens-- > 0; ++at) {
124 CharBlock tok{that.TokenAt(at)};
125 std::size_t tokBytes{tok.size()};
126 for (std::size_t j{0}; j < tokBytes; ++j) {
127 if (offset == provenance.size()) {
128 provenance = that.provenances_.Map(that.start_[at] + j);
129 offset = 0;
130 }
131 PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
132 }
133 CloseToken();
134 }
135 }
136
Put(const char * s,std::size_t bytes,Provenance provenance)137 void TokenSequence::Put(
138 const char *s, std::size_t bytes, Provenance provenance) {
139 for (std::size_t j{0}; j < bytes; ++j) {
140 PutNextTokenChar(s[j], provenance + j);
141 }
142 CloseToken();
143 }
144
Put(const CharBlock & t,Provenance provenance)145 void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
146 Put(&t[0], t.size(), provenance);
147 }
148
Put(const std::string & s,Provenance provenance)149 void TokenSequence::Put(const std::string &s, Provenance provenance) {
150 Put(s.data(), s.size(), provenance);
151 }
152
Put(llvm::raw_string_ostream & ss,Provenance provenance)153 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
154 Put(ss.str(), provenance);
155 }
156
ToLowerCase()157 TokenSequence &TokenSequence::ToLowerCase() {
158 std::size_t tokens{start_.size()};
159 std::size_t chars{char_.size()};
160 std::size_t atToken{0};
161 for (std::size_t j{0}; j < chars;) {
162 std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
163 char *p{&char_[j]};
164 char const *limit{char_.data() + nextStart};
165 j = nextStart;
166 if (IsDecimalDigit(*p)) {
167 while (p < limit && IsDecimalDigit(*p)) {
168 ++p;
169 }
170 if (p >= limit) {
171 } else if (*p == 'h' || *p == 'H') {
172 // Hollerith
173 *p = 'h';
174 } else if (*p == '_') {
175 // kind-prefixed character literal (e.g., 1_"ABC")
176 } else {
177 // exponent
178 for (; p < limit; ++p) {
179 *p = ToLowerCaseLetter(*p);
180 }
181 }
182 } else if (limit[-1] == '\'' || limit[-1] == '"') {
183 if (*p == limit[-1]) {
184 // Character literal without prefix
185 } else if (p[1] == limit[-1]) {
186 // BOZX-prefixed constant
187 for (; p < limit; ++p) {
188 *p = ToLowerCaseLetter(*p);
189 }
190 } else {
191 // Literal with kind-param prefix name (e.g., K_"ABC").
192 for (; *p != limit[-1]; ++p) {
193 *p = ToLowerCaseLetter(*p);
194 }
195 }
196 } else {
197 for (; p < limit; ++p) {
198 *p = ToLowerCaseLetter(*p);
199 }
200 }
201 }
202 return *this;
203 }
204
HasBlanks(std::size_t firstChar) const205 bool TokenSequence::HasBlanks(std::size_t firstChar) const {
206 std::size_t tokens{SizeInTokens()};
207 for (std::size_t j{0}; j < tokens; ++j) {
208 if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
209 return true;
210 }
211 }
212 return false;
213 }
214
HasRedundantBlanks(std::size_t firstChar) const215 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
216 std::size_t tokens{SizeInTokens()};
217 bool lastWasBlank{false};
218 for (std::size_t j{0}; j < tokens; ++j) {
219 bool isBlank{TokenAt(j).IsBlank()};
220 if (isBlank && lastWasBlank && start_[j] >= firstChar) {
221 return true;
222 }
223 lastWasBlank = isBlank;
224 }
225 return false;
226 }
227
RemoveBlanks(std::size_t firstChar)228 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
229 std::size_t tokens{SizeInTokens()};
230 TokenSequence result;
231 for (std::size_t j{0}; j < tokens; ++j) {
232 if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
233 result.Put(*this, j);
234 }
235 }
236 swap(result);
237 return *this;
238 }
239
RemoveRedundantBlanks(std::size_t firstChar)240 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
241 std::size_t tokens{SizeInTokens()};
242 TokenSequence result;
243 bool lastWasBlank{false};
244 for (std::size_t j{0}; j < tokens; ++j) {
245 bool isBlank{TokenAt(j).IsBlank()};
246 if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
247 result.Put(*this, j);
248 }
249 lastWasBlank = isBlank;
250 }
251 swap(result);
252 return *this;
253 }
254
ClipComment(bool skipFirst)255 TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
256 std::size_t tokens{SizeInTokens()};
257 for (std::size_t j{0}; j < tokens; ++j) {
258 if (TokenAt(j).FirstNonBlank() == '!') {
259 if (skipFirst) {
260 skipFirst = false;
261 } else {
262 TokenSequence result;
263 if (j > 0) {
264 result.Put(*this, 0, j - 1);
265 }
266 swap(result);
267 return *this;
268 }
269 }
270 }
271 return *this;
272 }
273
Emit(CookedSource & cooked) const274 void TokenSequence::Emit(CookedSource &cooked) const {
275 cooked.Put(&char_[0], char_.size());
276 cooked.PutProvenanceMappings(provenances_);
277 }
278
Dump(llvm::raw_ostream & o) const279 void TokenSequence::Dump(llvm::raw_ostream &o) const {
280 o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
281 << nextStart_ << '\n';
282 for (std::size_t j{0}; j < start_.size(); ++j) {
283 o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
284 << "'\n";
285 }
286 }
287
GetTokenProvenance(std::size_t token,std::size_t offset) const288 Provenance TokenSequence::GetTokenProvenance(
289 std::size_t token, std::size_t offset) const {
290 ProvenanceRange range{provenances_.Map(start_[token] + offset)};
291 return range.start();
292 }
293
GetTokenProvenanceRange(std::size_t token,std::size_t offset) const294 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
295 std::size_t token, std::size_t offset) const {
296 ProvenanceRange range{provenances_.Map(start_[token] + offset)};
297 return range.Prefix(TokenBytes(token) - offset);
298 }
299
GetIntervalProvenanceRange(std::size_t token,std::size_t tokens) const300 ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
301 std::size_t token, std::size_t tokens) const {
302 if (tokens == 0) {
303 return {};
304 }
305 ProvenanceRange range{provenances_.Map(start_[token])};
306 while (--tokens > 0 &&
307 range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
308 }
309 return range;
310 }
311
GetProvenanceRange() const312 ProvenanceRange TokenSequence::GetProvenanceRange() const {
313 return GetIntervalProvenanceRange(0, start_.size());
314 }
315
CheckBadFortranCharacters(Messages & messages) const316 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
317 Messages &messages) const {
318 std::size_t tokens{SizeInTokens()};
319 for (std::size_t j{0}; j < tokens; ++j) {
320 CharBlock token{TokenAt(j)};
321 char ch{token.FirstNonBlank()};
322 if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
323 if (ch == '!' && j == 0) {
324 // allow in !dir$
325 } else if (ch < ' ' || ch >= '\x7f') {
326 messages.Say(GetTokenProvenanceRange(j),
327 "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
328 } else {
329 messages.Say(GetTokenProvenanceRange(j),
330 "bad character ('%c') in Fortran token"_err_en_US, ch);
331 }
332 }
333 }
334 return *this;
335 }
336 } // namespace Fortran::parser
337