//===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "FuzzySymbolIndex.h" #include "llvm/Support/Regex.h" using clang::find_all_symbols::SymbolAndSignals; using llvm::StringRef; namespace clang { namespace include_fixer { namespace { class MemSymbolIndex : public FuzzySymbolIndex { public: MemSymbolIndex(std::vector Symbols) { for (auto &Symbol : Symbols) { auto Tokens = tokenize(Symbol.Symbol.getName()); this->Symbols.emplace_back( StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")), std::move(Symbol)); } } std::vector search(StringRef Query) override { auto Tokens = tokenize(Query); llvm::Regex Pattern("^" + queryRegexp(Tokens)); std::vector Results; for (const Entry &E : Symbols) if (Pattern.match(E.first)) Results.push_back(E.second); return Results; } private: using Entry = std::pair, SymbolAndSignals>; std::vector Symbols; }; // Helpers for tokenize state machine. enum TokenizeState { EMPTY, // No pending characters. ONE_BIG, // Read one uppercase letter, could be WORD or Word. BIG_WORD, // Reading an uppercase WORD. SMALL_WORD, // Reading a lowercase word. NUMBER // Reading a number. }; enum CharType { UPPER, LOWER, DIGIT, MISC }; CharType classify(char c) { if (isupper(c)) return UPPER; if (islower(c)) return LOWER; if (isdigit(c)) return DIGIT; return MISC; } } // namespace std::vector FuzzySymbolIndex::tokenize(StringRef Text) { std::vector Result; // State describes the treatment of text from Start to I. // Once text is Flush()ed into Result, we're done with it and advance Start. TokenizeState State = EMPTY; size_t Start = 0; auto Flush = [&](size_t End) { if (State != EMPTY) { Result.push_back(Text.substr(Start, End - Start).lower()); State = EMPTY; } Start = End; }; for (size_t I = 0; I < Text.size(); ++I) { CharType Type = classify(Text[I]); if (Type == MISC) Flush(I); else if (Type == LOWER) switch (State) { case BIG_WORD: Flush(I - 1); // FOOBar: first token is FOO, not FOOB. LLVM_FALLTHROUGH; case ONE_BIG: State = SMALL_WORD; LLVM_FALLTHROUGH; case SMALL_WORD: break; default: Flush(I); State = SMALL_WORD; } else if (Type == UPPER) switch (State) { case ONE_BIG: State = BIG_WORD; LLVM_FALLTHROUGH; case BIG_WORD: break; default: Flush(I); State = ONE_BIG; } else if (Type == DIGIT && State != NUMBER) { Flush(I); State = NUMBER; } } Flush(Text.size()); return Result; } std::string FuzzySymbolIndex::queryRegexp(const std::vector &Tokens) { std::string Result; for (size_t I = 0; I < Tokens.size(); ++I) { if (I) Result.append("[[:alnum:]]* "); for (size_t J = 0; J < Tokens[I].size(); ++J) { if (J) Result.append("([[:alnum:]]* )?"); Result.push_back(Tokens[I][J]); } } return Result; } llvm::Expected> FuzzySymbolIndex::createFromYAML(StringRef FilePath) { auto Buffer = llvm::MemoryBuffer::getFile(FilePath); if (!Buffer) return llvm::errorCodeToError(Buffer.getError()); return std::make_unique( find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer())); } } // namespace include_fixer } // namespace clang