1 //===-- include/flang/Parser/provenance.h -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef FORTRAN_PARSER_PROVENANCE_H_ 10 #define FORTRAN_PARSER_PROVENANCE_H_ 11 12 #include "char-block.h" 13 #include "char-buffer.h" 14 #include "characters.h" 15 #include "source.h" 16 #include "flang/Common/idioms.h" 17 #include "flang/Common/interval.h" 18 #include "llvm/Support/raw_ostream.h" 19 #include <cstddef> 20 #include <list> 21 #include <map> 22 #include <memory> 23 #include <optional> 24 #include <string> 25 #include <utility> 26 #include <variant> 27 #include <vector> 28 29 namespace Fortran::parser { 30 31 // Each character in the contiguous source stream built by the 32 // prescanner corresponds to a particular character in a source file, 33 // include file, macro expansion, or compiler-inserted padding. 34 // The location of this original character to which a parsable character 35 // corresponds is its provenance. 36 // 37 // Provenances are offsets into an (unmaterialized) marshaling of the 38 // entire contents of all the original source files, include files, macro 39 // expansions, &c. for each visit to each source. These origins of the 40 // original source characters constitute a forest whose roots are 41 // the original source files named on the compiler's command line. 42 // Given a Provenance, we can find the tree node that contains it in time 43 // O(log(# of origins)), and describe the position precisely by walking 44 // up the tree. (It would be possible, via a time/space trade-off, to 45 // cap the time by the use of an intermediate table that would be indexed 46 // by the upper bits of an offset, but that does not appear to be 47 // necessary.) 48 49 class AllSources; 50 51 class Provenance { 52 public: Provenance()53 Provenance() {} Provenance(std::size_t offset)54 Provenance(std::size_t offset) : offset_{offset} { CHECK(offset > 0); } 55 Provenance(const Provenance &that) = default; 56 Provenance(Provenance &&that) = default; 57 Provenance &operator=(const Provenance &that) = default; 58 Provenance &operator=(Provenance &&that) = default; 59 offset()60 std::size_t offset() const { return offset_; } 61 62 Provenance operator+(ptrdiff_t n) const { 63 CHECK(n > -static_cast<ptrdiff_t>(offset_)); 64 return {offset_ + static_cast<std::size_t>(n)}; 65 } 66 Provenance operator+(std::size_t n) const { return {offset_ + n}; } 67 std::size_t operator-(Provenance that) const { 68 CHECK(that <= *this); 69 return offset_ - that.offset_; 70 } 71 bool operator<(Provenance that) const { return offset_ < that.offset_; } 72 bool operator<=(Provenance that) const { return !(that < *this); } 73 bool operator==(Provenance that) const { return offset_ == that.offset_; } 74 bool operator!=(Provenance that) const { return !(*this == that); } 75 76 private: 77 std::size_t offset_{0}; 78 }; 79 80 using ProvenanceRange = common::Interval<Provenance>; 81 82 // Maps contiguous ranges of byte offsets in original source files to 83 // contiguous ranges in the cooked character stream; essentially a 84 // partial inversion of OffsetToProvenanceMappings (below). 85 // Used for implementing the first step of mapping an identifier 86 // selected in a code editor to one of its declarative statements. 87 class ProvenanceRangeToOffsetMappings { 88 public: 89 ProvenanceRangeToOffsetMappings(); 90 ~ProvenanceRangeToOffsetMappings(); empty()91 bool empty() const { return map_.empty(); } 92 void Put(ProvenanceRange, std::size_t offset); 93 std::optional<std::size_t> Map(ProvenanceRange) const; 94 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 95 96 private: 97 // A comparison function object for use in std::multimap<Compare=>. 98 // Intersecting intervals will effectively compare equal, not being 99 // either < nor >= each other. 100 struct WhollyPrecedes { 101 bool operator()(ProvenanceRange, ProvenanceRange) const; 102 }; 103 104 std::multimap<ProvenanceRange, std::size_t, WhollyPrecedes> map_; 105 }; 106 107 // Maps 0-based local offsets in some contiguous range (e.g., a token 108 // sequence) to their provenances. Lookup time is on the order of 109 // O(log(#of intervals with contiguous provenances)). As mentioned 110 // above, this time could be capped via a time/space trade-off. 111 class OffsetToProvenanceMappings { 112 public: OffsetToProvenanceMappings()113 OffsetToProvenanceMappings() {} 114 void clear(); 115 void swap(OffsetToProvenanceMappings &); 116 void shrink_to_fit(); 117 std::size_t SizeInBytes() const; 118 void Put(ProvenanceRange); 119 void Put(const OffsetToProvenanceMappings &); 120 ProvenanceRange Map(std::size_t at) const; 121 void RemoveLastBytes(std::size_t); 122 ProvenanceRangeToOffsetMappings Invert(const AllSources &) const; 123 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 124 125 private: 126 struct ContiguousProvenanceMapping { 127 std::size_t start; 128 ProvenanceRange range; 129 }; 130 131 // Elements appear in ascending order of distinct .start values; 132 // their .range values are disjoint and not necessarily adjacent. 133 std::vector<ContiguousProvenanceMapping> provenanceMap_; 134 }; 135 136 // A singleton AllSources instance for the whole compilation 137 // is shared by reference. 138 class AllSources { 139 public: 140 AllSources(); 141 ~AllSources(); 142 size()143 std::size_t size() const { return range_.size(); } 144 const char &operator[](Provenance) const; encoding()145 Encoding encoding() const { return encoding_; } set_encoding(Encoding e)146 AllSources &set_encoding(Encoding e) { 147 encoding_ = e; 148 return *this; 149 } 150 151 void PushSearchPathDirectory(std::string); 152 std::string PopSearchPathDirectory(); 153 const SourceFile *Open(std::string path, llvm::raw_ostream &error); 154 const SourceFile *ReadStandardInput(llvm::raw_ostream &error); 155 156 ProvenanceRange AddIncludedFile( 157 const SourceFile &, ProvenanceRange, bool isModule = false); 158 ProvenanceRange AddMacroCall( 159 ProvenanceRange def, ProvenanceRange use, const std::string &expansion); 160 ProvenanceRange AddCompilerInsertion(std::string); 161 IsValid(Provenance at)162 bool IsValid(Provenance at) const { return range_.Contains(at); } IsValid(ProvenanceRange range)163 bool IsValid(ProvenanceRange range) const { 164 return range.size() > 0 && range_.Contains(range); 165 } 166 void EmitMessage(llvm::raw_ostream &, const std::optional<ProvenanceRange> &, 167 const std::string &message, bool echoSourceLine = false) const; 168 const SourceFile *GetSourceFile( 169 Provenance, std::size_t *offset = nullptr) const; 170 const char *GetSource(ProvenanceRange) const; 171 std::optional<SourcePosition> GetSourcePosition(Provenance) const; 172 std::optional<ProvenanceRange> GetFirstFileProvenance() const; 173 std::string GetPath(Provenance) const; // __FILE__ 174 int GetLineNumber(Provenance) const; // __LINE__ 175 Provenance CompilerInsertionProvenance(char ch); 176 Provenance CompilerInsertionProvenance(const char *, std::size_t); 177 ProvenanceRange IntersectionWithSourceFiles(ProvenanceRange) const; 178 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 179 180 private: 181 struct Inclusion { 182 const SourceFile &source; 183 bool isModule{false}; 184 }; 185 struct Macro { 186 ProvenanceRange definition; 187 std::string expansion; 188 }; 189 struct CompilerInsertion { 190 std::string text; 191 }; 192 193 struct Origin { 194 Origin(ProvenanceRange, const SourceFile &); 195 Origin(ProvenanceRange, const SourceFile &, ProvenanceRange, 196 bool isModule = false); 197 Origin(ProvenanceRange, ProvenanceRange def, ProvenanceRange use, 198 const std::string &expansion); 199 Origin(ProvenanceRange, const std::string &); 200 201 const char &operator[](std::size_t) const; 202 203 std::variant<Inclusion, Macro, CompilerInsertion> u; 204 ProvenanceRange covers, replaces; 205 }; 206 207 const Origin &MapToOrigin(Provenance) const; 208 209 // Elements are in ascending & contiguous order of .covers. 210 std::vector<Origin> origin_; 211 ProvenanceRange range_; 212 std::map<char, Provenance> compilerInsertionProvenance_; 213 std::vector<std::unique_ptr<SourceFile>> ownedSourceFiles_; 214 std::vector<std::string> searchPath_; 215 Encoding encoding_{Encoding::UTF_8}; 216 }; 217 218 // Represents the result of preprocessing and prescanning a single source 219 // file (and all its inclusions) or module file. Parsers operate within 220 // single instances of CookedSource. 221 class CookedSource { 222 public: AsCharBlock()223 CharBlock AsCharBlock() const { return CharBlock{data_}; } 224 std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const; 225 std::optional<CharBlock> GetCharBlock(ProvenanceRange) const; 226 227 // The result of a Put() is the offset that the new data 228 // will have in the eventually marshaled contiguous buffer. Put(const char * data,std::size_t bytes)229 std::size_t Put(const char *data, std::size_t bytes) { 230 return buffer_.Put(data, bytes); 231 } Put(const std::string & s)232 std::size_t Put(const std::string &s) { return buffer_.Put(s); } Put(char ch)233 std::size_t Put(char ch) { return buffer_.Put(&ch, 1); } Put(char ch,Provenance p)234 std::size_t Put(char ch, Provenance p) { 235 provenanceMap_.Put(ProvenanceRange{p, 1}); 236 return buffer_.Put(&ch, 1); 237 } 238 PutProvenance(Provenance p)239 void PutProvenance(Provenance p) { provenanceMap_.Put(ProvenanceRange{p}); } PutProvenance(ProvenanceRange pr)240 void PutProvenance(ProvenanceRange pr) { provenanceMap_.Put(pr); } PutProvenanceMappings(const OffsetToProvenanceMappings & pm)241 void PutProvenanceMappings(const OffsetToProvenanceMappings &pm) { 242 provenanceMap_.Put(pm); 243 } 244 245 std::size_t BufferedBytes() const; 246 void Marshal(AllSources &); // marshals text into one contiguous block 247 void CompileProvenanceRangeToOffsetMappings(AllSources &); 248 llvm::raw_ostream &Dump(llvm::raw_ostream &) const; 249 250 private: 251 CharBuffer buffer_; // before Marshal() 252 std::string data_; // all of it, prescanned and preprocessed 253 OffsetToProvenanceMappings provenanceMap_; 254 ProvenanceRangeToOffsetMappings invertedMap_; 255 }; 256 257 class AllCookedSources { 258 public: 259 explicit AllCookedSources(AllSources &); 260 ~AllCookedSources(); 261 allSources()262 AllSources &allSources() { return allSources_; } allSources()263 const AllSources &allSources() const { return allSources_; } 264 265 CookedSource &NewCookedSource(); 266 267 template <typename A> // const char * or CharBlock Find(A x)268 const CookedSource *Find(A x) const { 269 for (const auto &c : cooked_) { 270 if (c.AsCharBlock().Contains(x)) { 271 return &c; 272 } 273 } 274 return nullptr; 275 } 276 IsValid(ProvenanceRange r)277 bool IsValid(ProvenanceRange r) const { return allSources_.IsValid(r); } 278 279 std::optional<ProvenanceRange> GetProvenanceRange(CharBlock) const; 280 std::optional<CharBlock> GetCharBlockFromLineAndColumns( 281 int line, int startColumn, int endColumn) const; 282 std::optional<std::pair<SourcePosition, SourcePosition>> 283 GetSourcePositionRange(CharBlock) const; 284 std::optional<CharBlock> GetCharBlock(ProvenanceRange) const; 285 void Dump(llvm::raw_ostream &) const; 286 287 private: 288 AllSources &allSources_; 289 std::list<CookedSource> cooked_; // owns all CookedSource instances 290 }; 291 } // namespace Fortran::parser 292 #endif // FORTRAN_PARSER_PROVENANCE_H_ 293