1 //===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This library measures the distance between file paths. 10 // It's used for ranking symbols, e.g. in code completion. 11 // |foo/bar.h -> foo/bar.h| = 0. 12 // |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|. 13 // This is an edit-distance, where edits go up or down the directory tree. 14 // It's not symmetrical, the costs of going up and down may not match. 15 // 16 // Dealing with multiple sources: 17 // In practice we care about the distance from a source file, but files near 18 // its main-header and #included files are considered "close". 19 // So we start with a set of (anchor, cost) pairs, and call the distance to a 20 // path the minimum of `cost + |source -> path|`. 21 // 22 // We allow each source to limit the number of up-traversals paths may start 23 // with. Up-traversals may reach things that are not "semantically near". 24 // 25 // Symbol URI schemes: 26 // Symbol locations may be represented by URIs rather than file paths directly. 27 // In this case we want to perform distance computations in URI space rather 28 // than in file-space, without performing redundant conversions. 29 // Therefore we have a lookup structure that accepts URIs, so that intermediate 30 // calculations for the same scheme can be reused. 31 // 32 // Caveats: 33 // Assuming up and down traversals each have uniform costs is simplistic. 34 // Often there are "semantic roots" whose children are almost unrelated. 35 // (e.g. /usr/include/, or / in an umbrella repository). We ignore this. 36 // 37 //===----------------------------------------------------------------------===// 38 39 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H 40 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H 41 42 #include "URI.h" 43 #include "llvm/ADT/DenseMap.h" 44 #include "llvm/ADT/DenseMapInfo.h" 45 #include "llvm/ADT/SmallString.h" 46 #include "llvm/ADT/StringMap.h" 47 #include "llvm/ADT/StringRef.h" 48 #include "llvm/Support/Allocator.h" 49 #include "llvm/Support/Path.h" 50 #include "llvm/Support/StringSaver.h" 51 #include <memory> 52 53 namespace clang { 54 namespace clangd { 55 56 struct FileDistanceOptions { 57 unsigned UpCost = 2; // |foo/bar.h -> foo| 58 unsigned DownCost = 1; // |foo -> foo/bar.h| 59 unsigned IncludeCost = 2; // |foo.cc -> included_header.h| 60 bool AllowDownTraversalFromRoot = true; // | / -> /a | 61 }; 62 63 struct SourceParams { 64 // Base cost for paths starting at this source. 65 unsigned Cost = 0; 66 // Limits the number of upwards traversals allowed from this source. 67 unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max(); 68 }; 69 70 // Supports lookups to find the minimum distance to a file from any source. 71 // This object should be reused, it memoizes intermediate computations. 72 class FileDistance { 73 public: 74 static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max(); 75 static const llvm::hash_code RootHash; 76 77 FileDistance(llvm::StringMap<SourceParams> Sources, 78 const FileDistanceOptions &Opts = {}); 79 80 // Computes the minimum distance from any source to the file path. 81 unsigned distance(llvm::StringRef Path); 82 83 private: 84 // Costs computed so far. Always contains sources and their ancestors. 85 // We store hash codes only. Collisions are rare and consequences aren't dire. 86 llvm::DenseMap<llvm::hash_code, unsigned> Cache; 87 FileDistanceOptions Opts; 88 }; 89 90 // Supports lookups like FileDistance, but the lookup keys are URIs. 91 // We convert each of the sources to the scheme of the URI and do a FileDistance 92 // comparison on the bodies. 93 class URIDistance { 94 public: 95 // \p Sources must contain absolute paths, not URIs. 96 URIDistance(llvm::StringMap<SourceParams> Sources, 97 const FileDistanceOptions &Opts = {}) Sources(Sources)98 : Sources(Sources), Opts(Opts) {} 99 100 // Computes the minimum distance from any source to the URI. 101 // Only sources that can be mapped into the URI's scheme are considered. 102 unsigned distance(llvm::StringRef URI); 103 104 private: 105 // Returns the FileDistance for a URI scheme, creating it if needed. 106 FileDistance &forScheme(llvm::StringRef Scheme); 107 108 // We cache the results using the original strings so we can skip URI parsing. 109 llvm::DenseMap<llvm::hash_code, unsigned> Cache; 110 llvm::StringMap<SourceParams> Sources; 111 llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme; 112 FileDistanceOptions Opts; 113 }; 114 115 /// Support lookups like FileDistance, but the lookup keys are symbol scopes. 116 /// For example, a scope "na::nb::" is converted to "/na/nb". 117 class ScopeDistance { 118 public: 119 /// QueryScopes[0] is the preferred scope. 120 ScopeDistance(llvm::ArrayRef<std::string> QueryScopes); 121 122 unsigned distance(llvm::StringRef SymbolScope); 123 124 private: 125 FileDistance Distance; 126 }; 127 128 } // namespace clangd 129 } // namespace clang 130 131 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H 132