1 //===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_PARSER_PRESCAN_H_ 10 #define FORTRAN_PARSER_PRESCAN_H_ 11 12 // Defines a fast Fortran source prescanning phase that implements some 13 // character-level features of the language that can be inefficient to 14 // support directly in a backtracking parser. This phase handles Fortran 15 // line continuation, comment removal, card image margins, padding out 16 // fixed form character literals on truncated card images, file 17 // inclusion, and driving the Fortran source preprocessor. 18 19 #include "token-sequence.h" 20 #include "flang/Common/Fortran-features.h" 21 #include "flang/Parser/characters.h" 22 #include "flang/Parser/message.h" 23 #include "flang/Parser/provenance.h" 24 #include <bitset> 25 #include <optional> 26 #include <string> 27 #include <unordered_set> 28 29 namespace Fortran::parser { 30 31 class Messages; 32 class Preprocessor; 33 34 class Prescanner { 35 public: 36 Prescanner(Messages &, CookedSource &, Preprocessor &, 37 common::LanguageFeatureControl); 38 Prescanner(const Prescanner &); 39 allSources()40 const AllSources &allSources() const { return allSources_; } allSources()41 AllSources &allSources() { return allSources_; } messages()42 const Messages &messages() const { return messages_; } messages()43 Messages &messages() { return messages_; } preprocessor()44 const Preprocessor &preprocessor() const { return preprocessor_; } preprocessor()45 Preprocessor &preprocessor() { return preprocessor_; } 46 set_fixedForm(bool yes)47 Prescanner &set_fixedForm(bool yes) { 48 inFixedForm_ = yes; 49 return *this; 50 } set_encoding(Encoding code)51 Prescanner &set_encoding(Encoding code) { 52 encoding_ = code; 53 return *this; 54 } set_fixedFormColumnLimit(int limit)55 Prescanner &set_fixedFormColumnLimit(int limit) { 56 fixedFormColumnLimit_ = limit; 57 return *this; 58 } 59 60 Prescanner &AddCompilerDirectiveSentinel(const std::string &); 61 62 void Prescan(ProvenanceRange); 63 void Statement(); 64 void NextLine(); 65 66 // Callbacks for use by Preprocessor. IsAtEnd()67 bool IsAtEnd() const { return nextLine_ >= limit_; } 68 bool IsNextLinePreprocessorDirective() const; 69 TokenSequence TokenizePreprocessorDirective(); GetCurrentProvenance()70 Provenance GetCurrentProvenance() const { return GetProvenance(at_); } 71 Say(A &&...a)72 template <typename... A> Message &Say(A &&...a) { 73 return messages_.Say(std::forward<A>(a)...); 74 } 75 76 private: 77 struct LineClassification { 78 enum class Kind { 79 Comment, 80 ConditionalCompilationDirective, 81 IncludeDirective, // #include 82 DefinitionDirective, // #define & #undef 83 PreprocessorDirective, 84 IncludeLine, // Fortran INCLUDE 85 CompilerDirective, 86 Source 87 }; 88 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr) 89 : kind{k}, payloadOffset{po}, sentinel{s} {} 90 LineClassification(LineClassification &&) = default; 91 Kind kind; 92 std::size_t payloadOffset; // byte offset of content 93 const char *sentinel; // if it's a compiler directive 94 }; 95 BeginSourceLine(const char * at)96 void BeginSourceLine(const char *at) { 97 at_ = at; 98 column_ = 1; 99 tabInCurrentLine_ = false; 100 } 101 BeginSourceLineAndAdvance()102 void BeginSourceLineAndAdvance() { 103 BeginSourceLine(nextLine_); 104 NextLine(); 105 } 106 BeginStatementAndAdvance()107 void BeginStatementAndAdvance() { 108 BeginSourceLineAndAdvance(); 109 slashInCurrentStatement_ = false; 110 preventHollerith_ = false; 111 delimiterNesting_ = 0; 112 } 113 GetProvenance(const char * sourceChar)114 Provenance GetProvenance(const char *sourceChar) const { 115 return startProvenance_ + (sourceChar - start_); 116 } 117 GetProvenanceRange(const char * first,const char * afterLast)118 ProvenanceRange GetProvenanceRange( 119 const char *first, const char *afterLast) const { 120 std::size_t bytes = afterLast - first; 121 return {startProvenance_ + (first - start_), bytes}; 122 } 123 EmitChar(TokenSequence & tokens,char ch)124 void EmitChar(TokenSequence &tokens, char ch) { 125 tokens.PutNextTokenChar(ch, GetCurrentProvenance()); 126 } 127 EmitInsertedChar(TokenSequence & tokens,char ch)128 void EmitInsertedChar(TokenSequence &tokens, char ch) { 129 Provenance provenance{allSources_.CompilerInsertionProvenance(ch)}; 130 tokens.PutNextTokenChar(ch, provenance); 131 } 132 EmitCharAndAdvance(TokenSequence & tokens,char ch)133 char EmitCharAndAdvance(TokenSequence &tokens, char ch) { 134 EmitChar(tokens, ch); 135 NextChar(); 136 return *at_; 137 } 138 InCompilerDirective()139 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; } InFixedFormSource()140 bool InFixedFormSource() const { 141 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective(); 142 } 143 IsCComment(const char * p)144 bool IsCComment(const char *p) const { 145 return p[0] == '/' && p[1] == '*' && 146 (inPreprocessorDirective_ || 147 (!inCharLiteral_ && 148 features_.IsEnabled( 149 common::LanguageFeature::ClassicCComments))); 150 } 151 152 void LabelField(TokenSequence &); 153 void SkipToEndOfLine(); 154 bool MustSkipToEndOfLine() const; 155 void NextChar(); 156 void SkipToNextSignificantCharacter(); 157 void SkipCComments(); 158 void SkipSpaces(); 159 static const char *SkipWhiteSpace(const char *); 160 const char *SkipWhiteSpaceAndCComments(const char *) const; 161 const char *SkipCComment(const char *) const; 162 bool NextToken(TokenSequence &); 163 bool ExponentAndKind(TokenSequence &); 164 void QuotedCharacterLiteral(TokenSequence &, const char *start); 165 void Hollerith(TokenSequence &, int count, const char *start); 166 bool PadOutCharacterLiteral(TokenSequence &); 167 bool SkipCommentLine(bool afterAmpersand); 168 bool IsFixedFormCommentLine(const char *) const; 169 const char *IsFreeFormComment(const char *) const; 170 std::optional<std::size_t> IsIncludeLine(const char *) const; 171 void FortranInclude(const char *quote); 172 const char *IsPreprocessorDirectiveLine(const char *) const; 173 const char *FixedFormContinuationLine(bool mightNeedSpace); 174 const char *FreeFormContinuationLine(bool ampersand); 175 bool IsImplicitContinuation() const; 176 bool FixedFormContinuation(bool mightNeedSpace); 177 bool FreeFormContinuation(); 178 bool Continuation(bool mightNeedFixedFormSpace); 179 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine( 180 const char *) const; 181 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine( 182 const char *) const; 183 const char *IsCompilerDirectiveSentinel(const char *) const; 184 LineClassification ClassifyLine(const char *) const; 185 void SourceFormChange(std::string &&); 186 187 Messages &messages_; 188 CookedSource &cooked_; 189 Preprocessor &preprocessor_; 190 AllSources &allSources_; 191 common::LanguageFeatureControl features_; 192 bool inFixedForm_{false}; 193 int fixedFormColumnLimit_{72}; 194 Encoding encoding_{Encoding::UTF_8}; 195 int delimiterNesting_{0}; 196 int prescannerNesting_{0}; 197 198 Provenance startProvenance_; 199 const char *start_{nullptr}; // beginning of current source file content 200 const char *limit_{nullptr}; // first address after end of current source 201 const char *nextLine_{nullptr}; // next line to process; <= limit_ 202 const char *directiveSentinel_{nullptr}; // current compiler directive 203 204 // This data members are state for processing the source line containing 205 // "at_", which goes to up to the newline character before "nextLine_". 206 const char *at_{nullptr}; // next character to process; < nextLine_ 207 int column_{1}; // card image column position of next character 208 bool tabInCurrentLine_{false}; 209 bool slashInCurrentStatement_{false}; 210 bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith 211 bool inCharLiteral_{false}; 212 bool inPreprocessorDirective_{false}; 213 214 // In some edge cases of compiler directive continuation lines, it 215 // is necessary to treat the line break as a space character by 216 // setting this flag, which is cleared by EmitChar(). 217 bool insertASpace_{false}; 218 219 // When a free form continuation marker (&) appears at the end of a line 220 // before a INCLUDE or #include, we delete it and omit the newline, so 221 // that the first line of the included file is truly a continuation of 222 // the line before. Also used when the & appears at the end of the last 223 // line in an include file. 224 bool omitNewline_{false}; 225 bool skipLeadingAmpersand_{false}; 226 227 const Provenance spaceProvenance_{ 228 allSources_.CompilerInsertionProvenance(' ')}; 229 const Provenance backslashProvenance_{ 230 allSources_.CompilerInsertionProvenance('\\')}; 231 232 // To avoid probing the set of active compiler directive sentinel strings 233 // on every comment line, they're checked first with a cheap Bloom filter. 234 static const int prime1{1019}, prime2{1021}; 235 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes 236 std::unordered_set<std::string> compilerDirectiveSentinels_; 237 }; 238 } // namespace Fortran::parser 239 #endif // FORTRAN_PARSER_PRESCAN_H_ 240