1 //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This file contains the implementation of MacroExpander, which handles macro
12 /// configuration and expansion while formatting.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "Macros.h"
17
18 #include "Encoding.h"
19 #include "FormatToken.h"
20 #include "FormatTokenLexer.h"
21 #include "clang/Basic/TokenKinds.h"
22 #include "clang/Format/Format.h"
23 #include "clang/Lex/HeaderSearch.h"
24 #include "clang/Lex/HeaderSearchOptions.h"
25 #include "clang/Lex/Lexer.h"
26 #include "clang/Lex/ModuleLoader.h"
27 #include "clang/Lex/Preprocessor.h"
28 #include "clang/Lex/PreprocessorOptions.h"
29 #include "llvm/ADT/StringSet.h"
30 #include "llvm/Support/ErrorHandling.h"
31
32 namespace clang {
33 namespace format {
34
35 struct MacroExpander::Definition {
36 StringRef Name;
37 SmallVector<FormatToken *, 8> Params;
38 SmallVector<FormatToken *, 8> Body;
39
40 // Map from each argument's name to its position in the argument list.
41 // With "M(x, y) x + y":
42 // x -> 0
43 // y -> 1
44 llvm::StringMap<size_t> ArgMap;
45
46 bool ObjectLike = true;
47 };
48
49 class MacroExpander::DefinitionParser {
50 public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)51 DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
52 assert(!Tokens.empty());
53 Current = Tokens[0];
54 }
55
56 // Parse the token stream and return the corresonding Definition object.
57 // Returns an empty definition object with a null-Name on error.
parse()58 MacroExpander::Definition parse() {
59 if (!Current->is(tok::identifier))
60 return {};
61 Def.Name = Current->TokenText;
62 nextToken();
63 if (Current->is(tok::l_paren)) {
64 Def.ObjectLike = false;
65 if (!parseParams())
66 return {};
67 }
68 if (!parseExpansion())
69 return {};
70
71 return Def;
72 }
73
74 private:
parseParams()75 bool parseParams() {
76 assert(Current->is(tok::l_paren));
77 nextToken();
78 while (Current->is(tok::identifier)) {
79 Def.Params.push_back(Current);
80 Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
81 nextToken();
82 if (Current->isNot(tok::comma))
83 break;
84 nextToken();
85 }
86 if (Current->isNot(tok::r_paren))
87 return false;
88 nextToken();
89 return true;
90 }
91
parseExpansion()92 bool parseExpansion() {
93 if (!Current->isOneOf(tok::equal, tok::eof))
94 return false;
95 if (Current->is(tok::equal))
96 nextToken();
97 parseTail();
98 return true;
99 }
100
parseTail()101 void parseTail() {
102 while (Current->isNot(tok::eof)) {
103 Def.Body.push_back(Current);
104 nextToken();
105 }
106 Def.Body.push_back(Current);
107 }
108
nextToken()109 void nextToken() {
110 if (Pos + 1 < Tokens.size())
111 ++Pos;
112 Current = Tokens[Pos];
113 Current->Finalized = true;
114 }
115
116 size_t Pos = 0;
117 FormatToken *Current = nullptr;
118 Definition Def;
119 ArrayRef<FormatToken *> Tokens;
120 };
121
MacroExpander(const std::vector<std::string> & Macros,clang::SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)122 MacroExpander::MacroExpander(
123 const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
124 const FormatStyle &Style,
125 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
126 IdentifierTable &IdentTable)
127 : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
128 IdentTable(IdentTable) {
129 for (const std::string &Macro : Macros) {
130 parseDefinition(Macro);
131 }
132 }
133
134 MacroExpander::~MacroExpander() = default;
135
parseDefinition(const std::string & Macro)136 void MacroExpander::parseDefinition(const std::string &Macro) {
137 Buffers.push_back(
138 llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
139 clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
140 FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
141 Allocator, IdentTable);
142 const auto Tokens = Lex.lex();
143 if (!Tokens.empty()) {
144 DefinitionParser Parser(Tokens);
145 auto Definition = Parser.parse();
146 Definitions[Definition.Name] = std::move(Definition);
147 }
148 }
149
defined(llvm::StringRef Name) const150 bool MacroExpander::defined(llvm::StringRef Name) const {
151 return Definitions.find(Name) != Definitions.end();
152 }
153
objectLike(llvm::StringRef Name) const154 bool MacroExpander::objectLike(llvm::StringRef Name) const {
155 return Definitions.find(Name)->second.ObjectLike;
156 }
157
expand(FormatToken * ID,ArgsList Args) const158 llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
159 ArgsList Args) const {
160 assert(defined(ID->TokenText));
161 SmallVector<FormatToken *, 8> Result;
162 const Definition &Def = Definitions.find(ID->TokenText)->second;
163
164 // Expand each argument at most once.
165 llvm::StringSet<> ExpandedArgs;
166
167 // Adds the given token to Result.
168 auto pushToken = [&](FormatToken *Tok) {
169 Tok->MacroCtx->ExpandedFrom.push_back(ID);
170 Result.push_back(Tok);
171 };
172
173 // If Tok references a parameter, adds the corresponding argument to Result.
174 // Returns false if Tok does not reference a parameter.
175 auto expandArgument = [&](FormatToken *Tok) -> bool {
176 // If the current token references a parameter, expand the corresponding
177 // argument.
178 if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
179 return false;
180 ExpandedArgs.insert(Tok->TokenText);
181 auto I = Def.ArgMap.find(Tok->TokenText);
182 if (I == Def.ArgMap.end())
183 return false;
184 // If there are fewer arguments than referenced parameters, treat the
185 // parameter as empty.
186 // FIXME: Potentially fully abort the expansion instead.
187 if (I->getValue() >= Args.size())
188 return true;
189 for (FormatToken *Arg : Args[I->getValue()]) {
190 // A token can be part of a macro argument at multiple levels.
191 // For example, with "ID(x) x":
192 // in ID(ID(x)), 'x' is expanded first as argument to the inner
193 // ID, then again as argument to the outer ID. We keep the macro
194 // role the token had from the inner expansion.
195 if (!Arg->MacroCtx)
196 Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
197 pushToken(Arg);
198 }
199 return true;
200 };
201
202 // Expand the definition into Result.
203 for (FormatToken *Tok : Def.Body) {
204 if (expandArgument(Tok))
205 continue;
206 // Create a copy of the tokens from the macro body, i.e. were not provided
207 // by user code.
208 FormatToken *New = new (Allocator.Allocate()) FormatToken;
209 New->copyFrom(*Tok);
210 assert(!New->MacroCtx);
211 // Tokens that are not part of the user code are not formatted.
212 New->MacroCtx = MacroExpansion(MR_Hidden);
213 pushToken(New);
214 }
215 assert(Result.size() >= 1 && Result.back()->is(tok::eof));
216 if (Result.size() > 1) {
217 ++Result[0]->MacroCtx->StartOfExpansion;
218 ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
219 }
220 return Result;
221 }
222
223 } // namespace format
224 } // namespace clang
225