1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24
25 namespace Fortran::parser {
26
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)27 Definition::Definition(
28 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29 : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)31 Definition::Definition(const std::vector<std::string> &argNames,
32 const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33 bool isVariadic)
34 : isFunctionLike_{true},
35 argumentCount_(argNames.size()), isVariadic_{isVariadic},
36 replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37
Definition(const std::string & predefined,AllSources & sources)38 Definition::Definition(const std::string &predefined, AllSources &sources)
39 : isPredefined_{true},
40 replacement_{
41 predefined, sources.AddCompilerInsertion(predefined).start()} {}
42
set_isDisabled(bool disable)43 bool Definition::set_isDisabled(bool disable) {
44 bool was{isDisabled_};
45 isDisabled_ = disable;
46 return was;
47 }
48
IsLegalIdentifierStart(const CharBlock & cpl)49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50 return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54 const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55 std::map<std::string, std::string> args;
56 char argIndex{'A'};
57 for (const std::string &arg : argNames) {
58 CHECK(args.find(arg) == args.end());
59 args[arg] = "~"s + argIndex++;
60 }
61 TokenSequence result;
62 for (std::size_t j{0}; j < tokens; ++j) {
63 CharBlock tok{token.TokenAt(firstToken + j)};
64 if (IsLegalIdentifierStart(tok)) {
65 auto it{args.find(tok.ToString())};
66 if (it != args.end()) {
67 result.Put(it->second, token.GetTokenProvenance(j));
68 continue;
69 }
70 }
71 result.Put(token, firstToken + j, 1);
72 }
73 return result;
74 }
75
Stringify(const TokenSequence & tokens,AllSources & allSources)76 static TokenSequence Stringify(
77 const TokenSequence &tokens, AllSources &allSources) {
78 TokenSequence result;
79 Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80 result.PutNextTokenChar('"', quoteProvenance);
81 for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82 const CharBlock &token{tokens.TokenAt(j)};
83 std::size_t bytes{token.size()};
84 for (std::size_t k{0}; k < bytes; ++k) {
85 char ch{token[k]};
86 Provenance from{tokens.GetTokenProvenance(j, k)};
87 if (ch == '"' || ch == '\\') {
88 result.PutNextTokenChar(ch, from);
89 }
90 result.PutNextTokenChar(ch, from);
91 }
92 }
93 result.PutNextTokenChar('"', quoteProvenance);
94 result.CloseToken();
95 return result;
96 }
97
IsTokenPasting(CharBlock opr)98 constexpr bool IsTokenPasting(CharBlock opr) {
99 return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101
AnyTokenPasting(const TokenSequence & text)102 static bool AnyTokenPasting(const TokenSequence &text) {
103 std::size_t tokens{text.SizeInTokens()};
104 for (std::size_t j{0}; j < tokens; ++j) {
105 if (IsTokenPasting(text.TokenAt(j))) {
106 return true;
107 }
108 }
109 return false;
110 }
111
TokenPasting(TokenSequence && text)112 static TokenSequence TokenPasting(TokenSequence &&text) {
113 if (!AnyTokenPasting(text)) {
114 return std::move(text);
115 }
116 TokenSequence result;
117 std::size_t tokens{text.SizeInTokens()};
118 bool pasting{false};
119 for (std::size_t j{0}; j < tokens; ++j) {
120 if (IsTokenPasting(text.TokenAt(j))) {
121 if (!pasting) {
122 while (!result.empty() &&
123 result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124 result.pop_back();
125 }
126 if (!result.empty()) {
127 result.ReopenLastToken();
128 pasting = true;
129 }
130 }
131 } else if (pasting && text.TokenAt(j).IsBlank()) {
132 } else {
133 result.Put(text, j, 1);
134 pasting = false;
135 }
136 }
137 return result;
138 }
139
Apply(const std::vector<TokenSequence> & args,Prescanner & prescanner)140 TokenSequence Definition::Apply(
141 const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142 TokenSequence result;
143 bool skipping{false};
144 int parenthesesNesting{0};
145 std::size_t tokens{replacement_.SizeInTokens()};
146 for (std::size_t j{0}; j < tokens; ++j) {
147 CharBlock token{replacement_.TokenAt(j)};
148 std::size_t bytes{token.size()};
149 if (skipping) {
150 if (bytes == 1) {
151 if (token[0] == '(') {
152 ++parenthesesNesting;
153 } else if (token[0] == ')') {
154 skipping = --parenthesesNesting > 0;
155 }
156 }
157 continue;
158 }
159 if (bytes == 2 && token[0] == '~') { // argument substitution
160 std::size_t index = token[1] - 'A';
161 if (index >= args.size()) {
162 continue;
163 }
164 std::size_t prev{j};
165 while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166 --prev;
167 }
168 if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169 replacement_.TokenAt(prev - 1)[0] ==
170 '#') { // stringify argument without macro replacement
171 std::size_t resultSize{result.SizeInTokens()};
172 while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) {
173 result.pop_back();
174 }
175 CHECK(resultSize > 0 &&
176 result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
177 result.pop_back();
178 result.Put(Stringify(args[index], prescanner.allSources()));
179 } else {
180 const TokenSequence *arg{&args[index]};
181 std::optional<TokenSequence> replaced;
182 // Don't replace macros in the actual argument if it is preceded or
183 // followed by the token-pasting operator ## in the replacement text.
184 if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
185 auto next{replacement_.SkipBlanks(j + 1)};
186 if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
187 // Apply macro replacement to the actual argument
188 replaced =
189 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
190 if (replaced) {
191 arg = &*replaced;
192 }
193 }
194 }
195 result.Put(DEREF(arg));
196 }
197 } else if (bytes == 11 && isVariadic_ &&
198 token.ToString() == "__VA_ARGS__") {
199 Provenance commaProvenance{
200 prescanner.preprocessor().allSources().CompilerInsertionProvenance(
201 ',')};
202 for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
203 if (k > argumentCount_) {
204 result.Put(","s, commaProvenance);
205 }
206 result.Put(args[k]);
207 }
208 } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
209 j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
210 parenthesesNesting == 0) {
211 parenthesesNesting = 1;
212 skipping = args.size() == argumentCount_;
213 ++j;
214 } else {
215 if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
216 ++parenthesesNesting;
217 } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
218 if (--parenthesesNesting == 0) {
219 skipping = false;
220 continue;
221 }
222 }
223 result.Put(replacement_, j);
224 }
225 }
226 return TokenPasting(std::move(result));
227 }
228
FormatTime(const std::time_t & now,const char * format)229 static std::string FormatTime(const std::time_t &now, const char *format) {
230 char buffer[16];
231 return {buffer,
232 std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
233 }
234
Preprocessor(AllSources & allSources)235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
236 // Capture current local date & time once now to avoid having the values
237 // of __DATE__ or __TIME__ change during compilation.
238 std::time_t now;
239 std::time(&now);
240 definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904"
241 Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
242 definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60"
243 Definition{FormatTime(now, "\"%T\""), allSources});
244 // The values of these predefined macros depend on their invocation sites.
245 definitions_.emplace(
246 SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
247 definitions_.emplace(
248 SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
249 }
250
Define(std::string macro,std::string value)251 void Preprocessor::Define(std::string macro, std::string value) {
252 definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
253 }
254
Undefine(std::string macro)255 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
256
MacroReplacement(const TokenSequence & input,Prescanner & prescanner)257 std::optional<TokenSequence> Preprocessor::MacroReplacement(
258 const TokenSequence &input, Prescanner &prescanner) {
259 // Do quick scan for any use of a defined name.
260 std::size_t tokens{input.SizeInTokens()};
261 std::size_t j;
262 for (j = 0; j < tokens; ++j) {
263 CharBlock token{input.TokenAt(j)};
264 if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
265 IsNameDefined(token)) {
266 break;
267 }
268 }
269 if (j == tokens) {
270 return std::nullopt; // input contains nothing that would be replaced
271 }
272 TokenSequence result{input, 0, j};
273 for (; j < tokens; ++j) {
274 const CharBlock &token{input.TokenAt(j)};
275 if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
276 result.Put(input, j);
277 continue;
278 }
279 auto it{definitions_.find(token)};
280 if (it == definitions_.end()) {
281 result.Put(input, j);
282 continue;
283 }
284 Definition &def{it->second};
285 if (def.isDisabled()) {
286 result.Put(input, j);
287 continue;
288 }
289 if (!def.isFunctionLike()) {
290 if (def.isPredefined()) {
291 std::string name{def.replacement().TokenAt(0).ToString()};
292 std::string repl;
293 if (name == "__FILE__") {
294 repl = "\""s +
295 allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
296 } else if (name == "__LINE__") {
297 std::string buf;
298 llvm::raw_string_ostream ss{buf};
299 ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
300 repl = ss.str();
301 }
302 if (!repl.empty()) {
303 ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
304 ProvenanceRange call{allSources_.AddMacroCall(
305 insert, input.GetTokenProvenanceRange(j), repl)};
306 result.Put(repl, call.start());
307 continue;
308 }
309 }
310 def.set_isDisabled(true);
311 TokenSequence replaced{
312 TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
313 def.set_isDisabled(false);
314 if (!replaced.empty()) {
315 ProvenanceRange from{def.replacement().GetProvenanceRange()};
316 ProvenanceRange use{input.GetTokenProvenanceRange(j)};
317 ProvenanceRange newRange{
318 allSources_.AddMacroCall(from, use, replaced.ToString())};
319 result.Put(replaced, newRange);
320 }
321 continue;
322 }
323 // Possible function-like macro call. Skip spaces and newlines to see
324 // whether '(' is next.
325 std::size_t k{j};
326 bool leftParen{false};
327 while (++k < tokens) {
328 const CharBlock &lookAhead{input.TokenAt(k)};
329 if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
330 leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
331 break;
332 }
333 }
334 if (!leftParen) {
335 result.Put(input, j);
336 continue;
337 }
338 std::vector<std::size_t> argStart{++k};
339 for (int nesting{0}; k < tokens; ++k) {
340 CharBlock token{input.TokenAt(k)};
341 if (token.size() == 1) {
342 char ch{token[0]};
343 if (ch == '(') {
344 ++nesting;
345 } else if (ch == ')') {
346 if (nesting == 0) {
347 break;
348 }
349 --nesting;
350 } else if (ch == ',' && nesting == 0) {
351 argStart.push_back(k + 1);
352 }
353 }
354 }
355 if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
356 // Subtle: () is zero arguments, not one empty argument,
357 // unless one argument was expected.
358 argStart.clear();
359 }
360 if (k >= tokens || argStart.size() < def.argumentCount() ||
361 (argStart.size() > def.argumentCount() && !def.isVariadic())) {
362 result.Put(input, j);
363 continue;
364 }
365 std::vector<TokenSequence> args;
366 for (std::size_t n{0}; n < argStart.size(); ++n) {
367 std::size_t at{argStart[n]};
368 std::size_t count{
369 (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
370 args.emplace_back(TokenSequence(input, at, count));
371 }
372 def.set_isDisabled(true);
373 TokenSequence replaced{
374 ReplaceMacros(def.Apply(args, prescanner), prescanner)};
375 def.set_isDisabled(false);
376 if (!replaced.empty()) {
377 ProvenanceRange from{def.replacement().GetProvenanceRange()};
378 ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
379 ProvenanceRange newRange{
380 allSources_.AddMacroCall(from, use, replaced.ToString())};
381 result.Put(replaced, newRange);
382 }
383 j = k; // advance to the terminal ')'
384 }
385 return result;
386 }
387
ReplaceMacros(const TokenSequence & tokens,Prescanner & prescanner)388 TokenSequence Preprocessor::ReplaceMacros(
389 const TokenSequence &tokens, Prescanner &prescanner) {
390 if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
391 return std::move(*repl);
392 }
393 return tokens;
394 }
395
Directive(const TokenSequence & dir,Prescanner * prescanner)396 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
397 std::size_t tokens{dir.SizeInTokens()};
398 std::size_t j{dir.SkipBlanks(0)};
399 if (j == tokens) {
400 return;
401 }
402 if (dir.TokenAt(j).ToString() != "#") {
403 prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
404 return;
405 }
406 j = dir.SkipBlanks(j + 1);
407 while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
408 --tokens;
409 }
410 if (j == tokens) {
411 return;
412 }
413 if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
414 return; // treat like #line, ignore it
415 }
416 std::size_t dirOffset{j};
417 std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
418 j = dir.SkipBlanks(j + 1);
419 CharBlock nameToken;
420 if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
421 nameToken = dir.TokenAt(j);
422 }
423 if (dirName == "line") {
424 // #line is ignored
425 } else if (dirName == "define") {
426 if (nameToken.empty()) {
427 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
428 "#define: missing or invalid name"_err_en_US);
429 return;
430 }
431 nameToken = SaveTokenAsName(nameToken);
432 definitions_.erase(nameToken);
433 if (++j < tokens && dir.TokenAt(j).size() == 1 &&
434 dir.TokenAt(j)[0] == '(') {
435 j = dir.SkipBlanks(j + 1);
436 std::vector<std::string> argName;
437 bool isVariadic{false};
438 if (dir.TokenAt(j).ToString() != ")") {
439 while (true) {
440 std::string an{dir.TokenAt(j).ToString()};
441 if (an == "...") {
442 isVariadic = true;
443 } else {
444 if (an.empty() || !IsLegalIdentifierStart(an[0])) {
445 prescanner->Say(dir.GetTokenProvenanceRange(j),
446 "#define: missing or invalid argument name"_err_en_US);
447 return;
448 }
449 argName.push_back(an);
450 }
451 j = dir.SkipBlanks(j + 1);
452 if (j == tokens) {
453 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
454 "#define: malformed argument list"_err_en_US);
455 return;
456 }
457 std::string punc{dir.TokenAt(j).ToString()};
458 if (punc == ")") {
459 break;
460 }
461 if (isVariadic || punc != ",") {
462 prescanner->Say(dir.GetTokenProvenanceRange(j),
463 "#define: malformed argument list"_err_en_US);
464 return;
465 }
466 j = dir.SkipBlanks(j + 1);
467 if (j == tokens) {
468 prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
469 "#define: malformed argument list"_err_en_US);
470 return;
471 }
472 }
473 if (std::set<std::string>(argName.begin(), argName.end()).size() !=
474 argName.size()) {
475 prescanner->Say(dir.GetTokenProvenance(dirOffset),
476 "#define: argument names are not distinct"_err_en_US);
477 return;
478 }
479 }
480 j = dir.SkipBlanks(j + 1);
481 definitions_.emplace(std::make_pair(
482 nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
483 } else {
484 j = dir.SkipBlanks(j + 1);
485 definitions_.emplace(
486 std::make_pair(nameToken, Definition{dir, j, tokens - j}));
487 }
488 } else if (dirName == "undef") {
489 if (nameToken.empty()) {
490 prescanner->Say(
491 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
492 "# missing or invalid name"_err_en_US);
493 } else {
494 if (dir.IsAnythingLeft(++j)) {
495 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
496 "#undef: excess tokens at end of directive"_en_US);
497 } else {
498 definitions_.erase(nameToken);
499 }
500 }
501 } else if (dirName == "ifdef" || dirName == "ifndef") {
502 bool doThen{false};
503 if (nameToken.empty()) {
504 prescanner->Say(
505 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
506 "#%s: missing name"_err_en_US, dirName);
507 } else {
508 if (dir.IsAnythingLeft(++j)) {
509 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
510 "#%s: excess tokens at end of directive"_en_US, dirName);
511 }
512 doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
513 }
514 if (doThen) {
515 ifStack_.push(CanDeadElseAppear::Yes);
516 } else {
517 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
518 dir.GetTokenProvenance(dirOffset));
519 }
520 } else if (dirName == "if") {
521 if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
522 ifStack_.push(CanDeadElseAppear::Yes);
523 } else {
524 SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
525 dir.GetTokenProvenanceRange(dirOffset));
526 }
527 } else if (dirName == "else") {
528 if (dir.IsAnythingLeft(j)) {
529 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
530 "#else: excess tokens at end of directive"_en_US);
531 } else if (ifStack_.empty()) {
532 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
533 "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
534 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
535 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
536 "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
537 } else {
538 ifStack_.pop();
539 SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
540 dir.GetTokenProvenanceRange(dirOffset));
541 }
542 } else if (dirName == "elif") {
543 if (ifStack_.empty()) {
544 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
545 "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
546 } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
547 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
548 "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
549 } else {
550 ifStack_.pop();
551 SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
552 dir.GetTokenProvenanceRange(dirOffset));
553 }
554 } else if (dirName == "endif") {
555 if (dir.IsAnythingLeft(j)) {
556 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
557 "#endif: excess tokens at end of directive"_en_US);
558 } else if (ifStack_.empty()) {
559 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
560 "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
561 } else {
562 ifStack_.pop();
563 }
564 } else if (dirName == "error") {
565 prescanner->Say(
566 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
567 "%s"_err_en_US, dir.ToString());
568 } else if (dirName == "warning" || dirName == "comment" ||
569 dirName == "note") {
570 prescanner->Say(
571 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
572 "%s"_en_US, dir.ToString());
573 } else if (dirName == "include") {
574 if (j == tokens) {
575 prescanner->Say(
576 dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
577 "#include: missing name of file to include"_err_en_US);
578 return;
579 }
580 std::string include;
581 if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
582 std::size_t k{j + 1};
583 if (k >= tokens) {
584 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
585 "#include: file name missing"_err_en_US);
586 return;
587 }
588 while (k < tokens && dir.TokenAt(k) != ">") {
589 ++k;
590 }
591 if (k >= tokens) {
592 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
593 "#include: expected '>' at end of included file"_en_US);
594 }
595 TokenSequence braced{dir, j + 1, k - j - 1};
596 include = ReplaceMacros(braced, *prescanner).ToString();
597 j = k;
598 } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
599 include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
600 include = include.substr(1, include.size() - 2);
601 } else {
602 prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
603 "#include: expected name of file to include"_err_en_US);
604 return;
605 }
606 if (include.empty()) {
607 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
608 "#include: empty include file name"_err_en_US);
609 return;
610 }
611 j = dir.SkipBlanks(j + 1);
612 if (j < tokens && dir.TokenAt(j).ToString() != "!") {
613 prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
614 "#include: extra stuff ignored after file name"_en_US);
615 }
616 std::string buf;
617 llvm::raw_string_ostream error{buf};
618 const SourceFile *included{allSources_.Open(include, error)};
619 if (!included) {
620 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
621 "#include: %s"_err_en_US, error.str());
622 } else if (included->bytes() > 0) {
623 ProvenanceRange fileRange{
624 allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
625 Prescanner{*prescanner}
626 .set_encoding(included->encoding())
627 .Prescan(fileRange);
628 }
629 } else {
630 prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
631 "#%s: unknown or unimplemented directive"_err_en_US, dirName);
632 }
633 }
634
SaveTokenAsName(const CharBlock & t)635 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
636 names_.push_back(t.ToString());
637 return {names_.back().data(), names_.back().size()};
638 }
639
IsNameDefined(const CharBlock & token)640 bool Preprocessor::IsNameDefined(const CharBlock &token) {
641 return definitions_.find(token) != definitions_.end();
642 }
643
GetDirectiveName(const TokenSequence & line,std::size_t * rest)644 static std::string GetDirectiveName(
645 const TokenSequence &line, std::size_t *rest) {
646 std::size_t tokens{line.SizeInTokens()};
647 std::size_t j{line.SkipBlanks(0)};
648 if (j == tokens || line.TokenAt(j).ToString() != "#") {
649 *rest = tokens;
650 return "";
651 }
652 j = line.SkipBlanks(j + 1);
653 if (j == tokens) {
654 *rest = tokens;
655 return "";
656 }
657 *rest = line.SkipBlanks(j + 1);
658 return ToLowerCaseLetters(line.TokenAt(j).ToString());
659 }
660
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner * prescanner,ProvenanceRange provenanceRange)661 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
662 IsElseActive isElseActive, Prescanner *prescanner,
663 ProvenanceRange provenanceRange) {
664 int nesting{0};
665 while (!prescanner->IsAtEnd()) {
666 if (!prescanner->IsNextLinePreprocessorDirective()) {
667 prescanner->NextLine();
668 continue;
669 }
670 TokenSequence line{prescanner->TokenizePreprocessorDirective()};
671 std::size_t rest{0};
672 std::string dn{GetDirectiveName(line, &rest)};
673 if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
674 ++nesting;
675 } else if (dn == "endif") {
676 if (nesting-- == 0) {
677 return;
678 }
679 } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
680 if (dn == "else") {
681 ifStack_.push(CanDeadElseAppear::No);
682 return;
683 }
684 if (dn == "elif" &&
685 IsIfPredicateTrue(
686 line, rest, line.SizeInTokens() - rest, prescanner)) {
687 ifStack_.push(CanDeadElseAppear::Yes);
688 return;
689 }
690 }
691 }
692 prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
693 }
694
695 // Precedence level codes used here to accommodate mixed Fortran and C:
696 // 15: parentheses and constants, logical !, bitwise ~
697 // 14: unary + and -
698 // 13: **
699 // 12: *, /, % (modulus)
700 // 11: + and -
701 // 10: << and >>
702 // 9: bitwise &
703 // 8: bitwise ^
704 // 7: bitwise |
705 // 6: relations (.EQ., ==, &c.)
706 // 5: .NOT.
707 // 4: .AND., &&
708 // 3: .OR., ||
709 // 2: .EQV. and .NEQV. / .XOR.
710 // 1: ? :
711 // 0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)712 static std::int64_t ExpressionValue(const TokenSequence &token,
713 int minimumPrecedence, std::size_t *atToken,
714 std::optional<Message> *error) {
715 enum Operator {
716 PARENS,
717 CONST,
718 NOTZERO, // !
719 COMPLEMENT, // ~
720 UPLUS,
721 UMINUS,
722 POWER,
723 TIMES,
724 DIVIDE,
725 MODULUS,
726 ADD,
727 SUBTRACT,
728 LEFTSHIFT,
729 RIGHTSHIFT,
730 BITAND,
731 BITXOR,
732 BITOR,
733 LT,
734 LE,
735 EQ,
736 NE,
737 GE,
738 GT,
739 NOT,
740 AND,
741 OR,
742 EQV,
743 NEQV,
744 SELECT,
745 COMMA
746 };
747 static const int precedence[]{
748 15, 15, 15, 15, // (), 6, !, ~
749 14, 14, // unary +, -
750 13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
751 9, 8, 7, // &, ^, |
752 6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
753 5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
754 1, 0 // ?: and ,
755 };
756 static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
757 11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
758
759 static std::map<std::string, enum Operator> opNameMap;
760 if (opNameMap.empty()) {
761 opNameMap["("] = PARENS;
762 opNameMap["!"] = NOTZERO;
763 opNameMap["~"] = COMPLEMENT;
764 opNameMap["**"] = POWER;
765 opNameMap["*"] = TIMES;
766 opNameMap["/"] = DIVIDE;
767 opNameMap["%"] = MODULUS;
768 opNameMap["+"] = ADD;
769 opNameMap["-"] = SUBTRACT;
770 opNameMap["<<"] = LEFTSHIFT;
771 opNameMap[">>"] = RIGHTSHIFT;
772 opNameMap["&"] = BITAND;
773 opNameMap["^"] = BITXOR;
774 opNameMap["|"] = BITOR;
775 opNameMap[".lt."] = opNameMap["<"] = LT;
776 opNameMap[".le."] = opNameMap["<="] = LE;
777 opNameMap[".eq."] = opNameMap["=="] = EQ;
778 opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
779 opNameMap[".ge."] = opNameMap[">="] = GE;
780 opNameMap[".gt."] = opNameMap[">"] = GT;
781 opNameMap[".not."] = NOT;
782 opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
783 opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
784 opNameMap[".eqv."] = EQV;
785 opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
786 opNameMap["?"] = SELECT;
787 opNameMap[","] = COMMA;
788 }
789
790 std::size_t tokens{token.SizeInTokens()};
791 CHECK(tokens > 0);
792 if (*atToken >= tokens) {
793 *error =
794 Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
795 return 0;
796 }
797
798 // Parse and evaluate a primary or a unary operator and its operand.
799 std::size_t opAt{*atToken};
800 std::string t{token.TokenAt(opAt).ToString()};
801 enum Operator op;
802 std::int64_t left{0};
803 if (t == "(") {
804 op = PARENS;
805 } else if (IsDecimalDigit(t[0])) {
806 op = CONST;
807 std::size_t consumed{0};
808 left = std::stoll(t, &consumed, 0 /*base to be detected*/);
809 if (consumed < t.size()) {
810 *error = Message{token.GetTokenProvenanceRange(opAt),
811 "Uninterpretable numeric constant '%s'"_err_en_US, t};
812 return 0;
813 }
814 } else if (IsLegalIdentifierStart(t[0])) {
815 // undefined macro name -> zero
816 // TODO: BOZ constants?
817 op = CONST;
818 } else if (t == "+") {
819 op = UPLUS;
820 } else if (t == "-") {
821 op = UMINUS;
822 } else if (t == "." && *atToken + 2 < tokens &&
823 ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
824 token.TokenAt(*atToken + 2).ToString() == ".") {
825 op = NOT;
826 *atToken += 2;
827 } else {
828 auto it{opNameMap.find(t)};
829 if (it != opNameMap.end()) {
830 op = it->second;
831 } else {
832 *error = Message{token.GetTokenProvenanceRange(opAt),
833 "operand expected in expression"_err_en_US};
834 return 0;
835 }
836 }
837 if (precedence[op] < minimumPrecedence) {
838 *error = Message{token.GetTokenProvenanceRange(opAt),
839 "operator precedence error"_err_en_US};
840 return 0;
841 }
842 ++*atToken;
843 if (op != CONST) {
844 left = ExpressionValue(token, operandPrecedence[op], atToken, error);
845 if (*error) {
846 return 0;
847 }
848 switch (op) {
849 case PARENS:
850 if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
851 ++*atToken;
852 break;
853 }
854 if (*atToken >= tokens) {
855 *error = Message{token.GetProvenanceRange(),
856 "')' missing from expression"_err_en_US};
857 } else {
858 *error = Message{
859 token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
860 }
861 return 0;
862 case NOTZERO:
863 left = !left;
864 break;
865 case COMPLEMENT:
866 left = ~left;
867 break;
868 case UPLUS:
869 break;
870 case UMINUS:
871 left = -left;
872 break;
873 case NOT:
874 left = -!left;
875 break;
876 default:
877 CRASH_NO_CASE;
878 }
879 }
880
881 // Parse and evaluate binary operators and their second operands, if present.
882 while (*atToken < tokens) {
883 int advance{1};
884 t = token.TokenAt(*atToken).ToString();
885 if (t == "." && *atToken + 2 < tokens &&
886 token.TokenAt(*atToken + 2).ToString() == ".") {
887 t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
888 advance = 3;
889 }
890 auto it{opNameMap.find(t)};
891 if (it == opNameMap.end()) {
892 break;
893 }
894 op = it->second;
895 if (op < POWER || precedence[op] < minimumPrecedence) {
896 break;
897 }
898 opAt = *atToken;
899 *atToken += advance;
900
901 std::int64_t right{
902 ExpressionValue(token, operandPrecedence[op], atToken, error)};
903 if (*error) {
904 return 0;
905 }
906
907 switch (op) {
908 case POWER:
909 if (left == 0) {
910 if (right < 0) {
911 *error = Message{token.GetTokenProvenanceRange(opAt),
912 "0 ** negative power"_err_en_US};
913 }
914 } else if (left != 1 && right != 1) {
915 if (right <= 0) {
916 left = !right;
917 } else {
918 std::int64_t power{1};
919 for (; right > 0; --right) {
920 if ((power * left) / left != power) {
921 *error = Message{token.GetTokenProvenanceRange(opAt),
922 "overflow in exponentation"_err_en_US};
923 left = 1;
924 }
925 power *= left;
926 }
927 left = power;
928 }
929 }
930 break;
931 case TIMES:
932 if (left != 0 && right != 0 && ((left * right) / left) != right) {
933 *error = Message{token.GetTokenProvenanceRange(opAt),
934 "overflow in multiplication"_err_en_US};
935 }
936 left = left * right;
937 break;
938 case DIVIDE:
939 if (right == 0) {
940 *error = Message{
941 token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
942 left = 0;
943 } else {
944 left = left / right;
945 }
946 break;
947 case MODULUS:
948 if (right == 0) {
949 *error = Message{
950 token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
951 left = 0;
952 } else {
953 left = left % right;
954 }
955 break;
956 case ADD:
957 if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
958 *error = Message{token.GetTokenProvenanceRange(opAt),
959 "overflow in addition"_err_en_US};
960 }
961 left = left + right;
962 break;
963 case SUBTRACT:
964 if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
965 *error = Message{token.GetTokenProvenanceRange(opAt),
966 "overflow in subtraction"_err_en_US};
967 }
968 left = left - right;
969 break;
970 case LEFTSHIFT:
971 if (right < 0 || right > 64) {
972 *error = Message{token.GetTokenProvenanceRange(opAt),
973 "bad left shift count"_err_en_US};
974 }
975 left = right >= 64 ? 0 : left << right;
976 break;
977 case RIGHTSHIFT:
978 if (right < 0 || right > 64) {
979 *error = Message{token.GetTokenProvenanceRange(opAt),
980 "bad right shift count"_err_en_US};
981 }
982 left = right >= 64 ? 0 : left >> right;
983 break;
984 case BITAND:
985 case AND:
986 left = left & right;
987 break;
988 case BITXOR:
989 left = left ^ right;
990 break;
991 case BITOR:
992 case OR:
993 left = left | right;
994 break;
995 case LT:
996 left = -(left < right);
997 break;
998 case LE:
999 left = -(left <= right);
1000 break;
1001 case EQ:
1002 left = -(left == right);
1003 break;
1004 case NE:
1005 left = -(left != right);
1006 break;
1007 case GE:
1008 left = -(left >= right);
1009 break;
1010 case GT:
1011 left = -(left > right);
1012 break;
1013 case EQV:
1014 left = -(!left == !right);
1015 break;
1016 case NEQV:
1017 left = -(!left != !right);
1018 break;
1019 case SELECT:
1020 if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1021 *error = Message{token.GetTokenProvenanceRange(opAt),
1022 "':' required in selection expression"_err_en_US};
1023 return 0;
1024 } else {
1025 ++*atToken;
1026 std::int64_t third{
1027 ExpressionValue(token, operandPrecedence[op], atToken, error)};
1028 left = left != 0 ? right : third;
1029 }
1030 break;
1031 case COMMA:
1032 left = right;
1033 break;
1034 default:
1035 CRASH_NO_CASE;
1036 }
1037 }
1038 return left;
1039 }
1040
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner * prescanner)1041 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1042 std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
1043 TokenSequence expr1{expr, first, exprTokens};
1044 if (expr1.HasBlanks()) {
1045 expr1.RemoveBlanks();
1046 }
1047 TokenSequence expr2;
1048 for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1049 if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1050 CharBlock name;
1051 if (j + 3 < expr1.SizeInTokens() &&
1052 expr1.TokenAt(j + 1).ToString() == "(" &&
1053 expr1.TokenAt(j + 3).ToString() == ")") {
1054 name = expr1.TokenAt(j + 2);
1055 j += 3;
1056 } else if (j + 1 < expr1.SizeInTokens() &&
1057 IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1058 name = expr1.TokenAt(++j);
1059 }
1060 if (!name.empty()) {
1061 char truth{IsNameDefined(name) ? '1' : '0'};
1062 expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1063 continue;
1064 }
1065 }
1066 expr2.Put(expr1, j);
1067 }
1068 TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1069 if (expr3.HasBlanks()) {
1070 expr3.RemoveBlanks();
1071 }
1072 if (expr3.empty()) {
1073 prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1074 return false;
1075 }
1076 std::size_t atToken{0};
1077 std::optional<Message> error;
1078 bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1079 if (error) {
1080 prescanner->Say(std::move(*error));
1081 } else if (atToken < expr3.SizeInTokens() &&
1082 expr3.TokenAt(atToken).ToString() != "!") {
1083 prescanner->Say(expr3.GetIntervalProvenanceRange(
1084 atToken, expr3.SizeInTokens() - atToken),
1085 atToken == 0 ? "could not parse any expression"_err_en_US
1086 : "excess characters after expression"_err_en_US);
1087 }
1088 return result;
1089 }
1090 } // namespace Fortran::parser
1091