• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "FuzzySymbolIndex.h"
9 #include "llvm/Support/Regex.h"
10 
11 using clang::find_all_symbols::SymbolAndSignals;
12 using llvm::StringRef;
13 
14 namespace clang {
15 namespace include_fixer {
16 namespace {
17 
18 class MemSymbolIndex : public FuzzySymbolIndex {
19 public:
MemSymbolIndex(std::vector<SymbolAndSignals> Symbols)20   MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) {
21     for (auto &Symbol : Symbols) {
22       auto Tokens = tokenize(Symbol.Symbol.getName());
23       this->Symbols.emplace_back(
24           StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")),
25           std::move(Symbol));
26     }
27   }
28 
search(StringRef Query)29   std::vector<SymbolAndSignals> search(StringRef Query) override {
30     auto Tokens = tokenize(Query);
31     llvm::Regex Pattern("^" + queryRegexp(Tokens));
32     std::vector<SymbolAndSignals> Results;
33     for (const Entry &E : Symbols)
34       if (Pattern.match(E.first))
35         Results.push_back(E.second);
36     return Results;
37   }
38 
39 private:
40   using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>;
41   std::vector<Entry> Symbols;
42 };
43 
44 // Helpers for tokenize state machine.
45 enum TokenizeState {
46   EMPTY,      // No pending characters.
47   ONE_BIG,    // Read one uppercase letter, could be WORD or Word.
48   BIG_WORD,   // Reading an uppercase WORD.
49   SMALL_WORD, // Reading a lowercase word.
50   NUMBER      // Reading a number.
51 };
52 
53 enum CharType { UPPER, LOWER, DIGIT, MISC };
classify(char c)54 CharType classify(char c) {
55   if (isupper(c))
56     return UPPER;
57   if (islower(c))
58     return LOWER;
59   if (isdigit(c))
60     return DIGIT;
61   return MISC;
62 }
63 
64 } // namespace
65 
tokenize(StringRef Text)66 std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) {
67   std::vector<std::string> Result;
68   // State describes the treatment of text from Start to I.
69   // Once text is Flush()ed into Result, we're done with it and advance Start.
70   TokenizeState State = EMPTY;
71   size_t Start = 0;
72   auto Flush = [&](size_t End) {
73     if (State != EMPTY) {
74       Result.push_back(Text.substr(Start, End - Start).lower());
75       State = EMPTY;
76     }
77     Start = End;
78   };
79   for (size_t I = 0; I < Text.size(); ++I) {
80     CharType Type = classify(Text[I]);
81     if (Type == MISC)
82       Flush(I);
83     else if (Type == LOWER)
84       switch (State) {
85       case BIG_WORD:
86         Flush(I - 1); // FOOBar: first token is FOO, not FOOB.
87         LLVM_FALLTHROUGH;
88       case ONE_BIG:
89         State = SMALL_WORD;
90         LLVM_FALLTHROUGH;
91       case SMALL_WORD:
92         break;
93       default:
94         Flush(I);
95         State = SMALL_WORD;
96       }
97     else if (Type == UPPER)
98       switch (State) {
99       case ONE_BIG:
100         State = BIG_WORD;
101         LLVM_FALLTHROUGH;
102       case BIG_WORD:
103         break;
104       default:
105         Flush(I);
106         State = ONE_BIG;
107       }
108     else if (Type == DIGIT && State != NUMBER) {
109       Flush(I);
110       State = NUMBER;
111     }
112   }
113   Flush(Text.size());
114   return Result;
115 }
116 
117 std::string
queryRegexp(const std::vector<std::string> & Tokens)118 FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) {
119   std::string Result;
120   for (size_t I = 0; I < Tokens.size(); ++I) {
121     if (I)
122       Result.append("[[:alnum:]]* ");
123     for (size_t J = 0; J < Tokens[I].size(); ++J) {
124       if (J)
125         Result.append("([[:alnum:]]* )?");
126       Result.push_back(Tokens[I][J]);
127     }
128   }
129   return Result;
130 }
131 
132 llvm::Expected<std::unique_ptr<FuzzySymbolIndex>>
createFromYAML(StringRef FilePath)133 FuzzySymbolIndex::createFromYAML(StringRef FilePath) {
134   auto Buffer = llvm::MemoryBuffer::getFile(FilePath);
135   if (!Buffer)
136     return llvm::errorCodeToError(Buffer.getError());
137   return std::make_unique<MemSymbolIndex>(
138       find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer()));
139 }
140 
141 } // namespace include_fixer
142 } // namespace clang
143