• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- lib/Parser/token-sequence.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "token-sequence.h"
10 #include "flang/Parser/characters.h"
11 #include "flang/Parser/message.h"
12 #include "llvm/Support/raw_ostream.h"
13 
14 namespace Fortran::parser {
15 
operator =(TokenSequence && that)16 TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
17   clear();
18   swap(that);
19   return *this;
20 }
21 
clear()22 void TokenSequence::clear() {
23   start_.clear();
24   nextStart_ = 0;
25   char_.clear();
26   provenances_.clear();
27 }
28 
pop_back()29 void TokenSequence::pop_back() {
30   std::size_t bytes{nextStart_ - start_.back()};
31   nextStart_ = start_.back();
32   start_.pop_back();
33   char_.resize(nextStart_);
34   provenances_.RemoveLastBytes(bytes);
35 }
36 
shrink_to_fit()37 void TokenSequence::shrink_to_fit() {
38   start_.shrink_to_fit();
39   char_.shrink_to_fit();
40   provenances_.shrink_to_fit();
41 }
42 
swap(TokenSequence & that)43 void TokenSequence::swap(TokenSequence &that) {
44   start_.swap(that.start_);
45   std::swap(nextStart_, that.nextStart_);
46   char_.swap(that.char_);
47   provenances_.swap(that.provenances_);
48 }
49 
SkipBlanks(std::size_t at) const50 std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
51   std::size_t tokens{start_.size()};
52   for (; at < tokens; ++at) {
53     if (!TokenAt(at).IsBlank()) {
54       return at;
55     }
56   }
57   return tokens; // even if at > tokens
58 }
59 
60 // C-style /*comments*/ are removed from preprocessing directive
61 // token sequences by the prescanner, but not C++ or Fortran
62 // free-form line-ending comments (//...  and !...) because
63 // ignoring them is directive-specific.
IsAnythingLeft(std::size_t at) const64 bool TokenSequence::IsAnythingLeft(std::size_t at) const {
65   std::size_t tokens{start_.size()};
66   for (; at < tokens; ++at) {
67     auto tok{TokenAt(at)};
68     const char *end{tok.end()};
69     for (const char *p{tok.begin()}; p < end; ++p) {
70       switch (*p) {
71       case '/':
72         return p + 1 >= end || p[1] != '/';
73       case '!':
74         return false;
75       case ' ':
76         break;
77       default:
78         return true;
79       }
80     }
81   }
82   return false;
83 }
84 
RemoveLastToken()85 void TokenSequence::RemoveLastToken() {
86   CHECK(!start_.empty());
87   CHECK(nextStart_ > start_.back());
88   std::size_t bytes{nextStart_ - start_.back()};
89   nextStart_ = start_.back();
90   start_.pop_back();
91   char_.erase(char_.begin() + nextStart_, char_.end());
92   provenances_.RemoveLastBytes(bytes);
93 }
94 
Put(const TokenSequence & that)95 void TokenSequence::Put(const TokenSequence &that) {
96   if (nextStart_ < char_.size()) {
97     start_.push_back(nextStart_);
98   }
99   int offset = char_.size();
100   for (int st : that.start_) {
101     start_.push_back(st + offset);
102   }
103   char_.insert(char_.end(), that.char_.begin(), that.char_.end());
104   nextStart_ = char_.size();
105   provenances_.Put(that.provenances_);
106 }
107 
Put(const TokenSequence & that,ProvenanceRange range)108 void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
109   std::size_t offset{0};
110   std::size_t tokens{that.SizeInTokens()};
111   for (std::size_t j{0}; j < tokens; ++j) {
112     CharBlock tok{that.TokenAt(j)};
113     Put(tok, range.OffsetMember(offset));
114     offset += tok.size();
115   }
116   CHECK(offset == range.size());
117 }
118 
Put(const TokenSequence & that,std::size_t at,std::size_t tokens)119 void TokenSequence::Put(
120     const TokenSequence &that, std::size_t at, std::size_t tokens) {
121   ProvenanceRange provenance;
122   std::size_t offset{0};
123   for (; tokens-- > 0; ++at) {
124     CharBlock tok{that.TokenAt(at)};
125     std::size_t tokBytes{tok.size()};
126     for (std::size_t j{0}; j < tokBytes; ++j) {
127       if (offset == provenance.size()) {
128         provenance = that.provenances_.Map(that.start_[at] + j);
129         offset = 0;
130       }
131       PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
132     }
133     CloseToken();
134   }
135 }
136 
Put(const char * s,std::size_t bytes,Provenance provenance)137 void TokenSequence::Put(
138     const char *s, std::size_t bytes, Provenance provenance) {
139   for (std::size_t j{0}; j < bytes; ++j) {
140     PutNextTokenChar(s[j], provenance + j);
141   }
142   CloseToken();
143 }
144 
Put(const CharBlock & t,Provenance provenance)145 void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
146   Put(&t[0], t.size(), provenance);
147 }
148 
Put(const std::string & s,Provenance provenance)149 void TokenSequence::Put(const std::string &s, Provenance provenance) {
150   Put(s.data(), s.size(), provenance);
151 }
152 
Put(llvm::raw_string_ostream & ss,Provenance provenance)153 void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
154   Put(ss.str(), provenance);
155 }
156 
ToLowerCase()157 TokenSequence &TokenSequence::ToLowerCase() {
158   std::size_t tokens{start_.size()};
159   std::size_t chars{char_.size()};
160   std::size_t atToken{0};
161   for (std::size_t j{0}; j < chars;) {
162     std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
163     char *p{&char_[j]};
164     char const *limit{char_.data() + nextStart};
165     j = nextStart;
166     if (IsDecimalDigit(*p)) {
167       while (p < limit && IsDecimalDigit(*p)) {
168         ++p;
169       }
170       if (p >= limit) {
171       } else if (*p == 'h' || *p == 'H') {
172         // Hollerith
173         *p = 'h';
174       } else if (*p == '_') {
175         // kind-prefixed character literal (e.g., 1_"ABC")
176       } else {
177         // exponent
178         for (; p < limit; ++p) {
179           *p = ToLowerCaseLetter(*p);
180         }
181       }
182     } else if (limit[-1] == '\'' || limit[-1] == '"') {
183       if (*p == limit[-1]) {
184         // Character literal without prefix
185       } else if (p[1] == limit[-1]) {
186         // BOZX-prefixed constant
187         for (; p < limit; ++p) {
188           *p = ToLowerCaseLetter(*p);
189         }
190       } else {
191         // Literal with kind-param prefix name (e.g., K_"ABC").
192         for (; *p != limit[-1]; ++p) {
193           *p = ToLowerCaseLetter(*p);
194         }
195       }
196     } else {
197       for (; p < limit; ++p) {
198         *p = ToLowerCaseLetter(*p);
199       }
200     }
201   }
202   return *this;
203 }
204 
HasBlanks(std::size_t firstChar) const205 bool TokenSequence::HasBlanks(std::size_t firstChar) const {
206   std::size_t tokens{SizeInTokens()};
207   for (std::size_t j{0}; j < tokens; ++j) {
208     if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
209       return true;
210     }
211   }
212   return false;
213 }
214 
HasRedundantBlanks(std::size_t firstChar) const215 bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
216   std::size_t tokens{SizeInTokens()};
217   bool lastWasBlank{false};
218   for (std::size_t j{0}; j < tokens; ++j) {
219     bool isBlank{TokenAt(j).IsBlank()};
220     if (isBlank && lastWasBlank && start_[j] >= firstChar) {
221       return true;
222     }
223     lastWasBlank = isBlank;
224   }
225   return false;
226 }
227 
RemoveBlanks(std::size_t firstChar)228 TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
229   std::size_t tokens{SizeInTokens()};
230   TokenSequence result;
231   for (std::size_t j{0}; j < tokens; ++j) {
232     if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
233       result.Put(*this, j);
234     }
235   }
236   swap(result);
237   return *this;
238 }
239 
RemoveRedundantBlanks(std::size_t firstChar)240 TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
241   std::size_t tokens{SizeInTokens()};
242   TokenSequence result;
243   bool lastWasBlank{false};
244   for (std::size_t j{0}; j < tokens; ++j) {
245     bool isBlank{TokenAt(j).IsBlank()};
246     if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
247       result.Put(*this, j);
248     }
249     lastWasBlank = isBlank;
250   }
251   swap(result);
252   return *this;
253 }
254 
ClipComment(bool skipFirst)255 TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
256   std::size_t tokens{SizeInTokens()};
257   for (std::size_t j{0}; j < tokens; ++j) {
258     if (TokenAt(j).FirstNonBlank() == '!') {
259       if (skipFirst) {
260         skipFirst = false;
261       } else {
262         TokenSequence result;
263         if (j > 0) {
264           result.Put(*this, 0, j - 1);
265         }
266         swap(result);
267         return *this;
268       }
269     }
270   }
271   return *this;
272 }
273 
Emit(CookedSource & cooked) const274 void TokenSequence::Emit(CookedSource &cooked) const {
275   cooked.Put(&char_[0], char_.size());
276   cooked.PutProvenanceMappings(provenances_);
277 }
278 
Dump(llvm::raw_ostream & o) const279 void TokenSequence::Dump(llvm::raw_ostream &o) const {
280   o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
281     << nextStart_ << '\n';
282   for (std::size_t j{0}; j < start_.size(); ++j) {
283     o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
284       << "'\n";
285   }
286 }
287 
GetTokenProvenance(std::size_t token,std::size_t offset) const288 Provenance TokenSequence::GetTokenProvenance(
289     std::size_t token, std::size_t offset) const {
290   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
291   return range.start();
292 }
293 
GetTokenProvenanceRange(std::size_t token,std::size_t offset) const294 ProvenanceRange TokenSequence::GetTokenProvenanceRange(
295     std::size_t token, std::size_t offset) const {
296   ProvenanceRange range{provenances_.Map(start_[token] + offset)};
297   return range.Prefix(TokenBytes(token) - offset);
298 }
299 
GetIntervalProvenanceRange(std::size_t token,std::size_t tokens) const300 ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
301     std::size_t token, std::size_t tokens) const {
302   if (tokens == 0) {
303     return {};
304   }
305   ProvenanceRange range{provenances_.Map(start_[token])};
306   while (--tokens > 0 &&
307       range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
308   }
309   return range;
310 }
311 
GetProvenanceRange() const312 ProvenanceRange TokenSequence::GetProvenanceRange() const {
313   return GetIntervalProvenanceRange(0, start_.size());
314 }
315 
CheckBadFortranCharacters(Messages & messages) const316 const TokenSequence &TokenSequence::CheckBadFortranCharacters(
317     Messages &messages) const {
318   std::size_t tokens{SizeInTokens()};
319   for (std::size_t j{0}; j < tokens; ++j) {
320     CharBlock token{TokenAt(j)};
321     char ch{token.FirstNonBlank()};
322     if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
323       if (ch == '!' && j == 0) {
324         // allow in !dir$
325       } else if (ch < ' ' || ch >= '\x7f') {
326         messages.Say(GetTokenProvenanceRange(j),
327             "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
328       } else {
329         messages.Say(GetTokenProvenanceRange(j),
330             "bad character ('%c') in Fortran token"_err_en_US, ch);
331       }
332     }
333   }
334   return *this;
335 }
336 } // namespace Fortran::parser
337