• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===-- lib/Parser/preprocessor.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "preprocessor.h"
10 #include "prescan.h"
11 #include "flang/Common/idioms.h"
12 #include "flang/Parser/characters.h"
13 #include "flang/Parser/message.h"
14 #include "llvm/Support/raw_ostream.h"
15 #include <algorithm>
16 #include <cinttypes>
17 #include <cstddef>
18 #include <ctime>
19 #include <map>
20 #include <memory>
21 #include <optional>
22 #include <set>
23 #include <utility>
24 
25 namespace Fortran::parser {
26 
Definition(const TokenSequence & repl,std::size_t firstToken,std::size_t tokens)27 Definition::Definition(
28     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens)
29     : replacement_{Tokenize({}, repl, firstToken, tokens)} {}
30 
Definition(const std::vector<std::string> & argNames,const TokenSequence & repl,std::size_t firstToken,std::size_t tokens,bool isVariadic)31 Definition::Definition(const std::vector<std::string> &argNames,
32     const TokenSequence &repl, std::size_t firstToken, std::size_t tokens,
33     bool isVariadic)
34     : isFunctionLike_{true},
35       argumentCount_(argNames.size()), isVariadic_{isVariadic},
36       replacement_{Tokenize(argNames, repl, firstToken, tokens)} {}
37 
Definition(const std::string & predefined,AllSources & sources)38 Definition::Definition(const std::string &predefined, AllSources &sources)
39     : isPredefined_{true},
40       replacement_{
41           predefined, sources.AddCompilerInsertion(predefined).start()} {}
42 
set_isDisabled(bool disable)43 bool Definition::set_isDisabled(bool disable) {
44   bool was{isDisabled_};
45   isDisabled_ = disable;
46   return was;
47 }
48 
IsLegalIdentifierStart(const CharBlock & cpl)49 static bool IsLegalIdentifierStart(const CharBlock &cpl) {
50   return cpl.size() > 0 && IsLegalIdentifierStart(cpl[0]);
51 }
52 
Tokenize(const std::vector<std::string> & argNames,const TokenSequence & token,std::size_t firstToken,std::size_t tokens)53 TokenSequence Definition::Tokenize(const std::vector<std::string> &argNames,
54     const TokenSequence &token, std::size_t firstToken, std::size_t tokens) {
55   std::map<std::string, std::string> args;
56   char argIndex{'A'};
57   for (const std::string &arg : argNames) {
58     CHECK(args.find(arg) == args.end());
59     args[arg] = "~"s + argIndex++;
60   }
61   TokenSequence result;
62   for (std::size_t j{0}; j < tokens; ++j) {
63     CharBlock tok{token.TokenAt(firstToken + j)};
64     if (IsLegalIdentifierStart(tok)) {
65       auto it{args.find(tok.ToString())};
66       if (it != args.end()) {
67         result.Put(it->second, token.GetTokenProvenance(j));
68         continue;
69       }
70     }
71     result.Put(token, firstToken + j, 1);
72   }
73   return result;
74 }
75 
Stringify(const TokenSequence & tokens,AllSources & allSources)76 static TokenSequence Stringify(
77     const TokenSequence &tokens, AllSources &allSources) {
78   TokenSequence result;
79   Provenance quoteProvenance{allSources.CompilerInsertionProvenance('"')};
80   result.PutNextTokenChar('"', quoteProvenance);
81   for (std::size_t j{0}; j < tokens.SizeInTokens(); ++j) {
82     const CharBlock &token{tokens.TokenAt(j)};
83     std::size_t bytes{token.size()};
84     for (std::size_t k{0}; k < bytes; ++k) {
85       char ch{token[k]};
86       Provenance from{tokens.GetTokenProvenance(j, k)};
87       if (ch == '"' || ch == '\\') {
88         result.PutNextTokenChar(ch, from);
89       }
90       result.PutNextTokenChar(ch, from);
91     }
92   }
93   result.PutNextTokenChar('"', quoteProvenance);
94   result.CloseToken();
95   return result;
96 }
97 
IsTokenPasting(CharBlock opr)98 constexpr bool IsTokenPasting(CharBlock opr) {
99   return opr.size() == 2 && opr[0] == '#' && opr[1] == '#';
100 }
101 
AnyTokenPasting(const TokenSequence & text)102 static bool AnyTokenPasting(const TokenSequence &text) {
103   std::size_t tokens{text.SizeInTokens()};
104   for (std::size_t j{0}; j < tokens; ++j) {
105     if (IsTokenPasting(text.TokenAt(j))) {
106       return true;
107     }
108   }
109   return false;
110 }
111 
TokenPasting(TokenSequence && text)112 static TokenSequence TokenPasting(TokenSequence &&text) {
113   if (!AnyTokenPasting(text)) {
114     return std::move(text);
115   }
116   TokenSequence result;
117   std::size_t tokens{text.SizeInTokens()};
118   bool pasting{false};
119   for (std::size_t j{0}; j < tokens; ++j) {
120     if (IsTokenPasting(text.TokenAt(j))) {
121       if (!pasting) {
122         while (!result.empty() &&
123             result.TokenAt(result.SizeInTokens() - 1).IsBlank()) {
124           result.pop_back();
125         }
126         if (!result.empty()) {
127           result.ReopenLastToken();
128           pasting = true;
129         }
130       }
131     } else if (pasting && text.TokenAt(j).IsBlank()) {
132     } else {
133       result.Put(text, j, 1);
134       pasting = false;
135     }
136   }
137   return result;
138 }
139 
Apply(const std::vector<TokenSequence> & args,Prescanner & prescanner)140 TokenSequence Definition::Apply(
141     const std::vector<TokenSequence> &args, Prescanner &prescanner) {
142   TokenSequence result;
143   bool skipping{false};
144   int parenthesesNesting{0};
145   std::size_t tokens{replacement_.SizeInTokens()};
146   for (std::size_t j{0}; j < tokens; ++j) {
147     CharBlock token{replacement_.TokenAt(j)};
148     std::size_t bytes{token.size()};
149     if (skipping) {
150       if (bytes == 1) {
151         if (token[0] == '(') {
152           ++parenthesesNesting;
153         } else if (token[0] == ')') {
154           skipping = --parenthesesNesting > 0;
155         }
156       }
157       continue;
158     }
159     if (bytes == 2 && token[0] == '~') { // argument substitution
160       std::size_t index = token[1] - 'A';
161       if (index >= args.size()) {
162         continue;
163       }
164       std::size_t prev{j};
165       while (prev > 0 && replacement_.TokenAt(prev - 1).IsBlank()) {
166         --prev;
167       }
168       if (prev > 0 && replacement_.TokenAt(prev - 1).size() == 1 &&
169           replacement_.TokenAt(prev - 1)[0] ==
170               '#') { // stringify argument without macro replacement
171         std::size_t resultSize{result.SizeInTokens()};
172         while (resultSize > 0 && result.TokenAt(resultSize - 1).empty()) {
173           result.pop_back();
174         }
175         CHECK(resultSize > 0 &&
176             result.TokenAt(resultSize - 1) == replacement_.TokenAt(prev - 1));
177         result.pop_back();
178         result.Put(Stringify(args[index], prescanner.allSources()));
179       } else {
180         const TokenSequence *arg{&args[index]};
181         std::optional<TokenSequence> replaced;
182         // Don't replace macros in the actual argument if it is preceded or
183         // followed by the token-pasting operator ## in the replacement text.
184         if (prev == 0 || !IsTokenPasting(replacement_.TokenAt(prev - 1))) {
185           auto next{replacement_.SkipBlanks(j + 1)};
186           if (next >= tokens || !IsTokenPasting(replacement_.TokenAt(next))) {
187             // Apply macro replacement to the actual argument
188             replaced =
189                 prescanner.preprocessor().MacroReplacement(*arg, prescanner);
190             if (replaced) {
191               arg = &*replaced;
192             }
193           }
194         }
195         result.Put(DEREF(arg));
196       }
197     } else if (bytes == 11 && isVariadic_ &&
198         token.ToString() == "__VA_ARGS__") {
199       Provenance commaProvenance{
200           prescanner.preprocessor().allSources().CompilerInsertionProvenance(
201               ',')};
202       for (std::size_t k{argumentCount_}; k < args.size(); ++k) {
203         if (k > argumentCount_) {
204           result.Put(","s, commaProvenance);
205         }
206         result.Put(args[k]);
207       }
208     } else if (bytes == 10 && isVariadic_ && token.ToString() == "__VA_OPT__" &&
209         j + 2 < tokens && replacement_.TokenAt(j + 1).ToString() == "(" &&
210         parenthesesNesting == 0) {
211       parenthesesNesting = 1;
212       skipping = args.size() == argumentCount_;
213       ++j;
214     } else {
215       if (bytes == 1 && parenthesesNesting > 0 && token[0] == '(') {
216         ++parenthesesNesting;
217       } else if (bytes == 1 && parenthesesNesting > 0 && token[0] == ')') {
218         if (--parenthesesNesting == 0) {
219           skipping = false;
220           continue;
221         }
222       }
223       result.Put(replacement_, j);
224     }
225   }
226   return TokenPasting(std::move(result));
227 }
228 
FormatTime(const std::time_t & now,const char * format)229 static std::string FormatTime(const std::time_t &now, const char *format) {
230   char buffer[16];
231   return {buffer,
232       std::strftime(buffer, sizeof buffer, format, std::localtime(&now))};
233 }
234 
Preprocessor(AllSources & allSources)235 Preprocessor::Preprocessor(AllSources &allSources) : allSources_{allSources} {
236   // Capture current local date & time once now to avoid having the values
237   // of __DATE__ or __TIME__ change during compilation.
238   std::time_t now;
239   std::time(&now);
240   definitions_.emplace(SaveTokenAsName("__DATE__"s), // e.g., "Jun 16 1904"
241       Definition{FormatTime(now, "\"%h %e %Y\""), allSources});
242   definitions_.emplace(SaveTokenAsName("__TIME__"s), // e.g., "23:59:60"
243       Definition{FormatTime(now, "\"%T\""), allSources});
244   // The values of these predefined macros depend on their invocation sites.
245   definitions_.emplace(
246       SaveTokenAsName("__FILE__"s), Definition{"__FILE__"s, allSources});
247   definitions_.emplace(
248       SaveTokenAsName("__LINE__"s), Definition{"__LINE__"s, allSources});
249 }
250 
Define(std::string macro,std::string value)251 void Preprocessor::Define(std::string macro, std::string value) {
252   definitions_.emplace(SaveTokenAsName(macro), Definition{value, allSources_});
253 }
254 
Undefine(std::string macro)255 void Preprocessor::Undefine(std::string macro) { definitions_.erase(macro); }
256 
MacroReplacement(const TokenSequence & input,Prescanner & prescanner)257 std::optional<TokenSequence> Preprocessor::MacroReplacement(
258     const TokenSequence &input, Prescanner &prescanner) {
259   // Do quick scan for any use of a defined name.
260   std::size_t tokens{input.SizeInTokens()};
261   std::size_t j;
262   for (j = 0; j < tokens; ++j) {
263     CharBlock token{input.TokenAt(j)};
264     if (!token.empty() && IsLegalIdentifierStart(token[0]) &&
265         IsNameDefined(token)) {
266       break;
267     }
268   }
269   if (j == tokens) {
270     return std::nullopt; // input contains nothing that would be replaced
271   }
272   TokenSequence result{input, 0, j};
273   for (; j < tokens; ++j) {
274     const CharBlock &token{input.TokenAt(j)};
275     if (token.IsBlank() || !IsLegalIdentifierStart(token[0])) {
276       result.Put(input, j);
277       continue;
278     }
279     auto it{definitions_.find(token)};
280     if (it == definitions_.end()) {
281       result.Put(input, j);
282       continue;
283     }
284     Definition &def{it->second};
285     if (def.isDisabled()) {
286       result.Put(input, j);
287       continue;
288     }
289     if (!def.isFunctionLike()) {
290       if (def.isPredefined()) {
291         std::string name{def.replacement().TokenAt(0).ToString()};
292         std::string repl;
293         if (name == "__FILE__") {
294           repl = "\""s +
295               allSources_.GetPath(prescanner.GetCurrentProvenance()) + '"';
296         } else if (name == "__LINE__") {
297           std::string buf;
298           llvm::raw_string_ostream ss{buf};
299           ss << allSources_.GetLineNumber(prescanner.GetCurrentProvenance());
300           repl = ss.str();
301         }
302         if (!repl.empty()) {
303           ProvenanceRange insert{allSources_.AddCompilerInsertion(repl)};
304           ProvenanceRange call{allSources_.AddMacroCall(
305               insert, input.GetTokenProvenanceRange(j), repl)};
306           result.Put(repl, call.start());
307           continue;
308         }
309       }
310       def.set_isDisabled(true);
311       TokenSequence replaced{
312           TokenPasting(ReplaceMacros(def.replacement(), prescanner))};
313       def.set_isDisabled(false);
314       if (!replaced.empty()) {
315         ProvenanceRange from{def.replacement().GetProvenanceRange()};
316         ProvenanceRange use{input.GetTokenProvenanceRange(j)};
317         ProvenanceRange newRange{
318             allSources_.AddMacroCall(from, use, replaced.ToString())};
319         result.Put(replaced, newRange);
320       }
321       continue;
322     }
323     // Possible function-like macro call.  Skip spaces and newlines to see
324     // whether '(' is next.
325     std::size_t k{j};
326     bool leftParen{false};
327     while (++k < tokens) {
328       const CharBlock &lookAhead{input.TokenAt(k)};
329       if (!lookAhead.IsBlank() && lookAhead[0] != '\n') {
330         leftParen = lookAhead[0] == '(' && lookAhead.size() == 1;
331         break;
332       }
333     }
334     if (!leftParen) {
335       result.Put(input, j);
336       continue;
337     }
338     std::vector<std::size_t> argStart{++k};
339     for (int nesting{0}; k < tokens; ++k) {
340       CharBlock token{input.TokenAt(k)};
341       if (token.size() == 1) {
342         char ch{token[0]};
343         if (ch == '(') {
344           ++nesting;
345         } else if (ch == ')') {
346           if (nesting == 0) {
347             break;
348           }
349           --nesting;
350         } else if (ch == ',' && nesting == 0) {
351           argStart.push_back(k + 1);
352         }
353       }
354     }
355     if (argStart.size() == 1 && k == argStart[0] && def.argumentCount() == 0) {
356       // Subtle: () is zero arguments, not one empty argument,
357       // unless one argument was expected.
358       argStart.clear();
359     }
360     if (k >= tokens || argStart.size() < def.argumentCount() ||
361         (argStart.size() > def.argumentCount() && !def.isVariadic())) {
362       result.Put(input, j);
363       continue;
364     }
365     std::vector<TokenSequence> args;
366     for (std::size_t n{0}; n < argStart.size(); ++n) {
367       std::size_t at{argStart[n]};
368       std::size_t count{
369           (n + 1 == argStart.size() ? k : argStart[n + 1] - 1) - at};
370       args.emplace_back(TokenSequence(input, at, count));
371     }
372     def.set_isDisabled(true);
373     TokenSequence replaced{
374         ReplaceMacros(def.Apply(args, prescanner), prescanner)};
375     def.set_isDisabled(false);
376     if (!replaced.empty()) {
377       ProvenanceRange from{def.replacement().GetProvenanceRange()};
378       ProvenanceRange use{input.GetIntervalProvenanceRange(j, k - j)};
379       ProvenanceRange newRange{
380           allSources_.AddMacroCall(from, use, replaced.ToString())};
381       result.Put(replaced, newRange);
382     }
383     j = k; // advance to the terminal ')'
384   }
385   return result;
386 }
387 
ReplaceMacros(const TokenSequence & tokens,Prescanner & prescanner)388 TokenSequence Preprocessor::ReplaceMacros(
389     const TokenSequence &tokens, Prescanner &prescanner) {
390   if (std::optional<TokenSequence> repl{MacroReplacement(tokens, prescanner)}) {
391     return std::move(*repl);
392   }
393   return tokens;
394 }
395 
Directive(const TokenSequence & dir,Prescanner * prescanner)396 void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
397   std::size_t tokens{dir.SizeInTokens()};
398   std::size_t j{dir.SkipBlanks(0)};
399   if (j == tokens) {
400     return;
401   }
402   if (dir.TokenAt(j).ToString() != "#") {
403     prescanner->Say(dir.GetTokenProvenanceRange(j), "missing '#'"_err_en_US);
404     return;
405   }
406   j = dir.SkipBlanks(j + 1);
407   while (tokens > 0 && dir.TokenAt(tokens - 1).IsBlank()) {
408     --tokens;
409   }
410   if (j == tokens) {
411     return;
412   }
413   if (IsDecimalDigit(dir.TokenAt(j)[0]) || dir.TokenAt(j)[0] == '"') {
414     return; // treat like #line, ignore it
415   }
416   std::size_t dirOffset{j};
417   std::string dirName{ToLowerCaseLetters(dir.TokenAt(dirOffset).ToString())};
418   j = dir.SkipBlanks(j + 1);
419   CharBlock nameToken;
420   if (j < tokens && IsLegalIdentifierStart(dir.TokenAt(j)[0])) {
421     nameToken = dir.TokenAt(j);
422   }
423   if (dirName == "line") {
424     // #line is ignored
425   } else if (dirName == "define") {
426     if (nameToken.empty()) {
427       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
428           "#define: missing or invalid name"_err_en_US);
429       return;
430     }
431     nameToken = SaveTokenAsName(nameToken);
432     definitions_.erase(nameToken);
433     if (++j < tokens && dir.TokenAt(j).size() == 1 &&
434         dir.TokenAt(j)[0] == '(') {
435       j = dir.SkipBlanks(j + 1);
436       std::vector<std::string> argName;
437       bool isVariadic{false};
438       if (dir.TokenAt(j).ToString() != ")") {
439         while (true) {
440           std::string an{dir.TokenAt(j).ToString()};
441           if (an == "...") {
442             isVariadic = true;
443           } else {
444             if (an.empty() || !IsLegalIdentifierStart(an[0])) {
445               prescanner->Say(dir.GetTokenProvenanceRange(j),
446                   "#define: missing or invalid argument name"_err_en_US);
447               return;
448             }
449             argName.push_back(an);
450           }
451           j = dir.SkipBlanks(j + 1);
452           if (j == tokens) {
453             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
454                 "#define: malformed argument list"_err_en_US);
455             return;
456           }
457           std::string punc{dir.TokenAt(j).ToString()};
458           if (punc == ")") {
459             break;
460           }
461           if (isVariadic || punc != ",") {
462             prescanner->Say(dir.GetTokenProvenanceRange(j),
463                 "#define: malformed argument list"_err_en_US);
464             return;
465           }
466           j = dir.SkipBlanks(j + 1);
467           if (j == tokens) {
468             prescanner->Say(dir.GetTokenProvenanceRange(tokens - 1),
469                 "#define: malformed argument list"_err_en_US);
470             return;
471           }
472         }
473         if (std::set<std::string>(argName.begin(), argName.end()).size() !=
474             argName.size()) {
475           prescanner->Say(dir.GetTokenProvenance(dirOffset),
476               "#define: argument names are not distinct"_err_en_US);
477           return;
478         }
479       }
480       j = dir.SkipBlanks(j + 1);
481       definitions_.emplace(std::make_pair(
482           nameToken, Definition{argName, dir, j, tokens - j, isVariadic}));
483     } else {
484       j = dir.SkipBlanks(j + 1);
485       definitions_.emplace(
486           std::make_pair(nameToken, Definition{dir, j, tokens - j}));
487     }
488   } else if (dirName == "undef") {
489     if (nameToken.empty()) {
490       prescanner->Say(
491           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
492           "# missing or invalid name"_err_en_US);
493     } else {
494       if (dir.IsAnythingLeft(++j)) {
495         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
496             "#undef: excess tokens at end of directive"_en_US);
497       } else {
498         definitions_.erase(nameToken);
499       }
500     }
501   } else if (dirName == "ifdef" || dirName == "ifndef") {
502     bool doThen{false};
503     if (nameToken.empty()) {
504       prescanner->Say(
505           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
506           "#%s: missing name"_err_en_US, dirName);
507     } else {
508       if (dir.IsAnythingLeft(++j)) {
509         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
510             "#%s: excess tokens at end of directive"_en_US, dirName);
511       }
512       doThen = IsNameDefined(nameToken) == (dirName == "ifdef");
513     }
514     if (doThen) {
515       ifStack_.push(CanDeadElseAppear::Yes);
516     } else {
517       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
518           dir.GetTokenProvenance(dirOffset));
519     }
520   } else if (dirName == "if") {
521     if (IsIfPredicateTrue(dir, j, tokens - j, prescanner)) {
522       ifStack_.push(CanDeadElseAppear::Yes);
523     } else {
524       SkipDisabledConditionalCode(dirName, IsElseActive::Yes, prescanner,
525           dir.GetTokenProvenanceRange(dirOffset));
526     }
527   } else if (dirName == "else") {
528     if (dir.IsAnythingLeft(j)) {
529       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
530           "#else: excess tokens at end of directive"_en_US);
531     } else if (ifStack_.empty()) {
532       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
533           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
534     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
535       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
536           "#else: already appeared within this #if, #ifdef, or #ifndef"_err_en_US);
537     } else {
538       ifStack_.pop();
539       SkipDisabledConditionalCode("else", IsElseActive::No, prescanner,
540           dir.GetTokenProvenanceRange(dirOffset));
541     }
542   } else if (dirName == "elif") {
543     if (ifStack_.empty()) {
544       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
545           "#elif: not nested within #if, #ifdef, or #ifndef"_err_en_US);
546     } else if (ifStack_.top() != CanDeadElseAppear::Yes) {
547       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
548           "#elif: #else previously appeared within this #if, #ifdef, or #ifndef"_err_en_US);
549     } else {
550       ifStack_.pop();
551       SkipDisabledConditionalCode("elif", IsElseActive::No, prescanner,
552           dir.GetTokenProvenanceRange(dirOffset));
553     }
554   } else if (dirName == "endif") {
555     if (dir.IsAnythingLeft(j)) {
556       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
557           "#endif: excess tokens at end of directive"_en_US);
558     } else if (ifStack_.empty()) {
559       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
560           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
561     } else {
562       ifStack_.pop();
563     }
564   } else if (dirName == "error") {
565     prescanner->Say(
566         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
567         "%s"_err_en_US, dir.ToString());
568   } else if (dirName == "warning" || dirName == "comment" ||
569       dirName == "note") {
570     prescanner->Say(
571         dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
572         "%s"_en_US, dir.ToString());
573   } else if (dirName == "include") {
574     if (j == tokens) {
575       prescanner->Say(
576           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
577           "#include: missing name of file to include"_err_en_US);
578       return;
579     }
580     std::string include;
581     if (dir.TokenAt(j).ToString() == "<") { // #include <foo>
582       std::size_t k{j + 1};
583       if (k >= tokens) {
584         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
585             "#include: file name missing"_err_en_US);
586         return;
587       }
588       while (k < tokens && dir.TokenAt(k) != ">") {
589         ++k;
590       }
591       if (k >= tokens) {
592         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
593             "#include: expected '>' at end of included file"_en_US);
594       }
595       TokenSequence braced{dir, j + 1, k - j - 1};
596       include = ReplaceMacros(braced, *prescanner).ToString();
597       j = k;
598     } else if ((include = dir.TokenAt(j).ToString()).substr(0, 1) == "\"" &&
599         include.substr(include.size() - 1, 1) == "\"") { // #include "foo"
600       include = include.substr(1, include.size() - 2);
601     } else {
602       prescanner->Say(dir.GetTokenProvenanceRange(j < tokens ? j : tokens - 1),
603           "#include: expected name of file to include"_err_en_US);
604       return;
605     }
606     if (include.empty()) {
607       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
608           "#include: empty include file name"_err_en_US);
609       return;
610     }
611     j = dir.SkipBlanks(j + 1);
612     if (j < tokens && dir.TokenAt(j).ToString() != "!") {
613       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
614           "#include: extra stuff ignored after file name"_en_US);
615     }
616     std::string buf;
617     llvm::raw_string_ostream error{buf};
618     const SourceFile *included{allSources_.Open(include, error)};
619     if (!included) {
620       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
621           "#include: %s"_err_en_US, error.str());
622     } else if (included->bytes() > 0) {
623       ProvenanceRange fileRange{
624           allSources_.AddIncludedFile(*included, dir.GetProvenanceRange())};
625       Prescanner{*prescanner}
626           .set_encoding(included->encoding())
627           .Prescan(fileRange);
628     }
629   } else {
630     prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
631         "#%s: unknown or unimplemented directive"_err_en_US, dirName);
632   }
633 }
634 
SaveTokenAsName(const CharBlock & t)635 CharBlock Preprocessor::SaveTokenAsName(const CharBlock &t) {
636   names_.push_back(t.ToString());
637   return {names_.back().data(), names_.back().size()};
638 }
639 
IsNameDefined(const CharBlock & token)640 bool Preprocessor::IsNameDefined(const CharBlock &token) {
641   return definitions_.find(token) != definitions_.end();
642 }
643 
GetDirectiveName(const TokenSequence & line,std::size_t * rest)644 static std::string GetDirectiveName(
645     const TokenSequence &line, std::size_t *rest) {
646   std::size_t tokens{line.SizeInTokens()};
647   std::size_t j{line.SkipBlanks(0)};
648   if (j == tokens || line.TokenAt(j).ToString() != "#") {
649     *rest = tokens;
650     return "";
651   }
652   j = line.SkipBlanks(j + 1);
653   if (j == tokens) {
654     *rest = tokens;
655     return "";
656   }
657   *rest = line.SkipBlanks(j + 1);
658   return ToLowerCaseLetters(line.TokenAt(j).ToString());
659 }
660 
SkipDisabledConditionalCode(const std::string & dirName,IsElseActive isElseActive,Prescanner * prescanner,ProvenanceRange provenanceRange)661 void Preprocessor::SkipDisabledConditionalCode(const std::string &dirName,
662     IsElseActive isElseActive, Prescanner *prescanner,
663     ProvenanceRange provenanceRange) {
664   int nesting{0};
665   while (!prescanner->IsAtEnd()) {
666     if (!prescanner->IsNextLinePreprocessorDirective()) {
667       prescanner->NextLine();
668       continue;
669     }
670     TokenSequence line{prescanner->TokenizePreprocessorDirective()};
671     std::size_t rest{0};
672     std::string dn{GetDirectiveName(line, &rest)};
673     if (dn == "ifdef" || dn == "ifndef" || dn == "if") {
674       ++nesting;
675     } else if (dn == "endif") {
676       if (nesting-- == 0) {
677         return;
678       }
679     } else if (isElseActive == IsElseActive::Yes && nesting == 0) {
680       if (dn == "else") {
681         ifStack_.push(CanDeadElseAppear::No);
682         return;
683       }
684       if (dn == "elif" &&
685           IsIfPredicateTrue(
686               line, rest, line.SizeInTokens() - rest, prescanner)) {
687         ifStack_.push(CanDeadElseAppear::Yes);
688         return;
689       }
690     }
691   }
692   prescanner->Say(provenanceRange, "#%s: missing #endif"_err_en_US, dirName);
693 }
694 
695 // Precedence level codes used here to accommodate mixed Fortran and C:
696 // 15: parentheses and constants, logical !, bitwise ~
697 // 14: unary + and -
698 // 13: **
699 // 12: *, /, % (modulus)
700 // 11: + and -
701 // 10: << and >>
702 //  9: bitwise &
703 //  8: bitwise ^
704 //  7: bitwise |
705 //  6: relations (.EQ., ==, &c.)
706 //  5: .NOT.
707 //  4: .AND., &&
708 //  3: .OR., ||
709 //  2: .EQV. and .NEQV. / .XOR.
710 //  1: ? :
711 //  0: ,
ExpressionValue(const TokenSequence & token,int minimumPrecedence,std::size_t * atToken,std::optional<Message> * error)712 static std::int64_t ExpressionValue(const TokenSequence &token,
713     int minimumPrecedence, std::size_t *atToken,
714     std::optional<Message> *error) {
715   enum Operator {
716     PARENS,
717     CONST,
718     NOTZERO, // !
719     COMPLEMENT, // ~
720     UPLUS,
721     UMINUS,
722     POWER,
723     TIMES,
724     DIVIDE,
725     MODULUS,
726     ADD,
727     SUBTRACT,
728     LEFTSHIFT,
729     RIGHTSHIFT,
730     BITAND,
731     BITXOR,
732     BITOR,
733     LT,
734     LE,
735     EQ,
736     NE,
737     GE,
738     GT,
739     NOT,
740     AND,
741     OR,
742     EQV,
743     NEQV,
744     SELECT,
745     COMMA
746   };
747   static const int precedence[]{
748       15, 15, 15, 15, // (), 6, !, ~
749       14, 14, // unary +, -
750       13, 12, 12, 12, 11, 11, 10, 10, // **, *, /, %, +, -, <<, >>
751       9, 8, 7, // &, ^, |
752       6, 6, 6, 6, 6, 6, // relations .LT. to .GT.
753       5, 4, 3, 2, 2, // .NOT., .AND., .OR., .EQV., .NEQV.
754       1, 0 // ?: and ,
755   };
756   static const int operandPrecedence[]{0, -1, 15, 15, 15, 15, 13, 12, 12, 12,
757       11, 11, 11, 11, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 4, 3, 3, 3, 1, 0};
758 
759   static std::map<std::string, enum Operator> opNameMap;
760   if (opNameMap.empty()) {
761     opNameMap["("] = PARENS;
762     opNameMap["!"] = NOTZERO;
763     opNameMap["~"] = COMPLEMENT;
764     opNameMap["**"] = POWER;
765     opNameMap["*"] = TIMES;
766     opNameMap["/"] = DIVIDE;
767     opNameMap["%"] = MODULUS;
768     opNameMap["+"] = ADD;
769     opNameMap["-"] = SUBTRACT;
770     opNameMap["<<"] = LEFTSHIFT;
771     opNameMap[">>"] = RIGHTSHIFT;
772     opNameMap["&"] = BITAND;
773     opNameMap["^"] = BITXOR;
774     opNameMap["|"] = BITOR;
775     opNameMap[".lt."] = opNameMap["<"] = LT;
776     opNameMap[".le."] = opNameMap["<="] = LE;
777     opNameMap[".eq."] = opNameMap["=="] = EQ;
778     opNameMap[".ne."] = opNameMap["/="] = opNameMap["!="] = NE;
779     opNameMap[".ge."] = opNameMap[">="] = GE;
780     opNameMap[".gt."] = opNameMap[">"] = GT;
781     opNameMap[".not."] = NOT;
782     opNameMap[".and."] = opNameMap[".a."] = opNameMap["&&"] = AND;
783     opNameMap[".or."] = opNameMap[".o."] = opNameMap["||"] = OR;
784     opNameMap[".eqv."] = EQV;
785     opNameMap[".neqv."] = opNameMap[".xor."] = opNameMap[".x."] = NEQV;
786     opNameMap["?"] = SELECT;
787     opNameMap[","] = COMMA;
788   }
789 
790   std::size_t tokens{token.SizeInTokens()};
791   CHECK(tokens > 0);
792   if (*atToken >= tokens) {
793     *error =
794         Message{token.GetProvenanceRange(), "incomplete expression"_err_en_US};
795     return 0;
796   }
797 
798   // Parse and evaluate a primary or a unary operator and its operand.
799   std::size_t opAt{*atToken};
800   std::string t{token.TokenAt(opAt).ToString()};
801   enum Operator op;
802   std::int64_t left{0};
803   if (t == "(") {
804     op = PARENS;
805   } else if (IsDecimalDigit(t[0])) {
806     op = CONST;
807     std::size_t consumed{0};
808     left = std::stoll(t, &consumed, 0 /*base to be detected*/);
809     if (consumed < t.size()) {
810       *error = Message{token.GetTokenProvenanceRange(opAt),
811           "Uninterpretable numeric constant '%s'"_err_en_US, t};
812       return 0;
813     }
814   } else if (IsLegalIdentifierStart(t[0])) {
815     // undefined macro name -> zero
816     // TODO: BOZ constants?
817     op = CONST;
818   } else if (t == "+") {
819     op = UPLUS;
820   } else if (t == "-") {
821     op = UMINUS;
822   } else if (t == "." && *atToken + 2 < tokens &&
823       ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) == "not" &&
824       token.TokenAt(*atToken + 2).ToString() == ".") {
825     op = NOT;
826     *atToken += 2;
827   } else {
828     auto it{opNameMap.find(t)};
829     if (it != opNameMap.end()) {
830       op = it->second;
831     } else {
832       *error = Message{token.GetTokenProvenanceRange(opAt),
833           "operand expected in expression"_err_en_US};
834       return 0;
835     }
836   }
837   if (precedence[op] < minimumPrecedence) {
838     *error = Message{token.GetTokenProvenanceRange(opAt),
839         "operator precedence error"_err_en_US};
840     return 0;
841   }
842   ++*atToken;
843   if (op != CONST) {
844     left = ExpressionValue(token, operandPrecedence[op], atToken, error);
845     if (*error) {
846       return 0;
847     }
848     switch (op) {
849     case PARENS:
850       if (*atToken < tokens && token.TokenAt(*atToken).ToString() == ")") {
851         ++*atToken;
852         break;
853       }
854       if (*atToken >= tokens) {
855         *error = Message{token.GetProvenanceRange(),
856             "')' missing from expression"_err_en_US};
857       } else {
858         *error = Message{
859             token.GetTokenProvenanceRange(*atToken), "expected ')'"_err_en_US};
860       }
861       return 0;
862     case NOTZERO:
863       left = !left;
864       break;
865     case COMPLEMENT:
866       left = ~left;
867       break;
868     case UPLUS:
869       break;
870     case UMINUS:
871       left = -left;
872       break;
873     case NOT:
874       left = -!left;
875       break;
876     default:
877       CRASH_NO_CASE;
878     }
879   }
880 
881   // Parse and evaluate binary operators and their second operands, if present.
882   while (*atToken < tokens) {
883     int advance{1};
884     t = token.TokenAt(*atToken).ToString();
885     if (t == "." && *atToken + 2 < tokens &&
886         token.TokenAt(*atToken + 2).ToString() == ".") {
887       t += ToLowerCaseLetters(token.TokenAt(*atToken + 1).ToString()) + '.';
888       advance = 3;
889     }
890     auto it{opNameMap.find(t)};
891     if (it == opNameMap.end()) {
892       break;
893     }
894     op = it->second;
895     if (op < POWER || precedence[op] < minimumPrecedence) {
896       break;
897     }
898     opAt = *atToken;
899     *atToken += advance;
900 
901     std::int64_t right{
902         ExpressionValue(token, operandPrecedence[op], atToken, error)};
903     if (*error) {
904       return 0;
905     }
906 
907     switch (op) {
908     case POWER:
909       if (left == 0) {
910         if (right < 0) {
911           *error = Message{token.GetTokenProvenanceRange(opAt),
912               "0 ** negative power"_err_en_US};
913         }
914       } else if (left != 1 && right != 1) {
915         if (right <= 0) {
916           left = !right;
917         } else {
918           std::int64_t power{1};
919           for (; right > 0; --right) {
920             if ((power * left) / left != power) {
921               *error = Message{token.GetTokenProvenanceRange(opAt),
922                   "overflow in exponentation"_err_en_US};
923               left = 1;
924             }
925             power *= left;
926           }
927           left = power;
928         }
929       }
930       break;
931     case TIMES:
932       if (left != 0 && right != 0 && ((left * right) / left) != right) {
933         *error = Message{token.GetTokenProvenanceRange(opAt),
934             "overflow in multiplication"_err_en_US};
935       }
936       left = left * right;
937       break;
938     case DIVIDE:
939       if (right == 0) {
940         *error = Message{
941             token.GetTokenProvenanceRange(opAt), "division by zero"_err_en_US};
942         left = 0;
943       } else {
944         left = left / right;
945       }
946       break;
947     case MODULUS:
948       if (right == 0) {
949         *error = Message{
950             token.GetTokenProvenanceRange(opAt), "modulus by zero"_err_en_US};
951         left = 0;
952       } else {
953         left = left % right;
954       }
955       break;
956     case ADD:
957       if ((left < 0) == (right < 0) && (left < 0) != (left + right < 0)) {
958         *error = Message{token.GetTokenProvenanceRange(opAt),
959             "overflow in addition"_err_en_US};
960       }
961       left = left + right;
962       break;
963     case SUBTRACT:
964       if ((left < 0) != (right < 0) && (left < 0) == (left - right < 0)) {
965         *error = Message{token.GetTokenProvenanceRange(opAt),
966             "overflow in subtraction"_err_en_US};
967       }
968       left = left - right;
969       break;
970     case LEFTSHIFT:
971       if (right < 0 || right > 64) {
972         *error = Message{token.GetTokenProvenanceRange(opAt),
973             "bad left shift count"_err_en_US};
974       }
975       left = right >= 64 ? 0 : left << right;
976       break;
977     case RIGHTSHIFT:
978       if (right < 0 || right > 64) {
979         *error = Message{token.GetTokenProvenanceRange(opAt),
980             "bad right shift count"_err_en_US};
981       }
982       left = right >= 64 ? 0 : left >> right;
983       break;
984     case BITAND:
985     case AND:
986       left = left & right;
987       break;
988     case BITXOR:
989       left = left ^ right;
990       break;
991     case BITOR:
992     case OR:
993       left = left | right;
994       break;
995     case LT:
996       left = -(left < right);
997       break;
998     case LE:
999       left = -(left <= right);
1000       break;
1001     case EQ:
1002       left = -(left == right);
1003       break;
1004     case NE:
1005       left = -(left != right);
1006       break;
1007     case GE:
1008       left = -(left >= right);
1009       break;
1010     case GT:
1011       left = -(left > right);
1012       break;
1013     case EQV:
1014       left = -(!left == !right);
1015       break;
1016     case NEQV:
1017       left = -(!left != !right);
1018       break;
1019     case SELECT:
1020       if (*atToken >= tokens || token.TokenAt(*atToken).ToString() != ":") {
1021         *error = Message{token.GetTokenProvenanceRange(opAt),
1022             "':' required in selection expression"_err_en_US};
1023         return 0;
1024       } else {
1025         ++*atToken;
1026         std::int64_t third{
1027             ExpressionValue(token, operandPrecedence[op], atToken, error)};
1028         left = left != 0 ? right : third;
1029       }
1030       break;
1031     case COMMA:
1032       left = right;
1033       break;
1034     default:
1035       CRASH_NO_CASE;
1036     }
1037   }
1038   return left;
1039 }
1040 
IsIfPredicateTrue(const TokenSequence & expr,std::size_t first,std::size_t exprTokens,Prescanner * prescanner)1041 bool Preprocessor::IsIfPredicateTrue(const TokenSequence &expr,
1042     std::size_t first, std::size_t exprTokens, Prescanner *prescanner) {
1043   TokenSequence expr1{expr, first, exprTokens};
1044   if (expr1.HasBlanks()) {
1045     expr1.RemoveBlanks();
1046   }
1047   TokenSequence expr2;
1048   for (std::size_t j{0}; j < expr1.SizeInTokens(); ++j) {
1049     if (ToLowerCaseLetters(expr1.TokenAt(j).ToString()) == "defined") {
1050       CharBlock name;
1051       if (j + 3 < expr1.SizeInTokens() &&
1052           expr1.TokenAt(j + 1).ToString() == "(" &&
1053           expr1.TokenAt(j + 3).ToString() == ")") {
1054         name = expr1.TokenAt(j + 2);
1055         j += 3;
1056       } else if (j + 1 < expr1.SizeInTokens() &&
1057           IsLegalIdentifierStart(expr1.TokenAt(j + 1))) {
1058         name = expr1.TokenAt(++j);
1059       }
1060       if (!name.empty()) {
1061         char truth{IsNameDefined(name) ? '1' : '0'};
1062         expr2.Put(&truth, 1, allSources_.CompilerInsertionProvenance(truth));
1063         continue;
1064       }
1065     }
1066     expr2.Put(expr1, j);
1067   }
1068   TokenSequence expr3{ReplaceMacros(expr2, *prescanner)};
1069   if (expr3.HasBlanks()) {
1070     expr3.RemoveBlanks();
1071   }
1072   if (expr3.empty()) {
1073     prescanner->Say(expr.GetProvenanceRange(), "empty expression"_err_en_US);
1074     return false;
1075   }
1076   std::size_t atToken{0};
1077   std::optional<Message> error;
1078   bool result{ExpressionValue(expr3, 0, &atToken, &error) != 0};
1079   if (error) {
1080     prescanner->Say(std::move(*error));
1081   } else if (atToken < expr3.SizeInTokens() &&
1082       expr3.TokenAt(atToken).ToString() != "!") {
1083     prescanner->Say(expr3.GetIntervalProvenanceRange(
1084                         atToken, expr3.SizeInTokens() - atToken),
1085         atToken == 0 ? "could not parse any expression"_err_en_US
1086                      : "excess characters after expression"_err_en_US);
1087   }
1088   return result;
1089 }
1090 } // namespace Fortran::parser
1091