1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Windows-specific.
10 // A parser for the module-definition file (.def file).
11 //
12 // The format of module-definition files are described in this document:
13 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "llvm/Object/COFFModuleDefinition.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Object/COFFImportFile.h"
22 #include "llvm/Object/Error.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26
27 using namespace llvm::COFF;
28 using namespace llvm;
29
30 namespace llvm {
31 namespace object {
32
33 enum Kind {
34 Unknown,
35 Eof,
36 Identifier,
37 Comma,
38 Equal,
39 EqualEqual,
40 KwBase,
41 KwConstant,
42 KwData,
43 KwExports,
44 KwHeapsize,
45 KwLibrary,
46 KwName,
47 KwNoname,
48 KwPrivate,
49 KwStacksize,
50 KwVersion,
51 };
52
53 struct Token {
Tokenllvm::object::Token54 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55 Kind K;
56 StringRef Value;
57 };
58
isDecorated(StringRef Sym,bool MingwDef)59 static bool isDecorated(StringRef Sym, bool MingwDef) {
60 // In def files, the symbols can either be listed decorated or undecorated.
61 //
62 // - For cdecl symbols, only the undecorated form is allowed.
63 // - For fastcall and vectorcall symbols, both fully decorated or
64 // undecorated forms can be present.
65 // - For stdcall symbols in non-MinGW environments, the decorated form is
66 // fully decorated with leading underscore and trailing stack argument
67 // size - like "_Func@0".
68 // - In MinGW def files, a decorated stdcall symbol does not include the
69 // leading underscore though, like "Func@0".
70
71 // This function controls whether a leading underscore should be added to
72 // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73 // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74 // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75 // as decorated, i.e. don't add any more leading underscores.
76 // We can't check for a leading underscore here, since function names
77 // themselves can start with an underscore, while a second one still needs
78 // to be added.
79 return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80 (!MingwDef && Sym.contains('@'));
81 }
82
createError(const Twine & Err)83 static Error createError(const Twine &Err) {
84 return make_error<StringError>(StringRef(Err.str()),
85 object_error::parse_failed);
86 }
87
88 class Lexer {
89 public:
Lexer(StringRef S)90 Lexer(StringRef S) : Buf(S) {}
91
lex()92 Token lex() {
93 Buf = Buf.trim();
94 if (Buf.empty())
95 return Token(Eof);
96
97 switch (Buf[0]) {
98 case '\0':
99 return Token(Eof);
100 case ';': {
101 size_t End = Buf.find('\n');
102 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103 return lex();
104 }
105 case '=':
106 Buf = Buf.drop_front();
107 if (Buf.startswith("=")) {
108 Buf = Buf.drop_front();
109 return Token(EqualEqual, "==");
110 }
111 return Token(Equal, "=");
112 case ',':
113 Buf = Buf.drop_front();
114 return Token(Comma, ",");
115 case '"': {
116 StringRef S;
117 std::tie(S, Buf) = Buf.substr(1).split('"');
118 return Token(Identifier, S);
119 }
120 default: {
121 size_t End = Buf.find_first_of("=,;\r\n \t\v");
122 StringRef Word = Buf.substr(0, End);
123 Kind K = llvm::StringSwitch<Kind>(Word)
124 .Case("BASE", KwBase)
125 .Case("CONSTANT", KwConstant)
126 .Case("DATA", KwData)
127 .Case("EXPORTS", KwExports)
128 .Case("HEAPSIZE", KwHeapsize)
129 .Case("LIBRARY", KwLibrary)
130 .Case("NAME", KwName)
131 .Case("NONAME", KwNoname)
132 .Case("PRIVATE", KwPrivate)
133 .Case("STACKSIZE", KwStacksize)
134 .Case("VERSION", KwVersion)
135 .Default(Identifier);
136 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
137 return Token(K, Word);
138 }
139 }
140 }
141
142 private:
143 StringRef Buf;
144 };
145
146 class Parser {
147 public:
Parser(StringRef S,MachineTypes M,bool B)148 explicit Parser(StringRef S, MachineTypes M, bool B)
149 : Lex(S), Machine(M), MingwDef(B) {}
150
parse()151 Expected<COFFModuleDefinition> parse() {
152 do {
153 if (Error Err = parseOne())
154 return std::move(Err);
155 } while (Tok.K != Eof);
156 return Info;
157 }
158
159 private:
read()160 void read() {
161 if (Stack.empty()) {
162 Tok = Lex.lex();
163 return;
164 }
165 Tok = Stack.back();
166 Stack.pop_back();
167 }
168
readAsInt(uint64_t * I)169 Error readAsInt(uint64_t *I) {
170 read();
171 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
172 return createError("integer expected");
173 return Error::success();
174 }
175
expect(Kind Expected,StringRef Msg)176 Error expect(Kind Expected, StringRef Msg) {
177 read();
178 if (Tok.K != Expected)
179 return createError(Msg);
180 return Error::success();
181 }
182
unget()183 void unget() { Stack.push_back(Tok); }
184
parseOne()185 Error parseOne() {
186 read();
187 switch (Tok.K) {
188 case Eof:
189 return Error::success();
190 case KwExports:
191 for (;;) {
192 read();
193 if (Tok.K != Identifier) {
194 unget();
195 return Error::success();
196 }
197 if (Error Err = parseExport())
198 return Err;
199 }
200 case KwHeapsize:
201 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
202 case KwStacksize:
203 return parseNumbers(&Info.StackReserve, &Info.StackCommit);
204 case KwLibrary:
205 case KwName: {
206 bool IsDll = Tok.K == KwLibrary; // Check before parseName.
207 std::string Name;
208 if (Error Err = parseName(&Name, &Info.ImageBase))
209 return Err;
210
211 Info.ImportName = Name;
212
213 // Set the output file, but don't override /out if it was already passed.
214 if (Info.OutputFile.empty()) {
215 Info.OutputFile = Name;
216 // Append the appropriate file extension if not already present.
217 if (!sys::path::has_extension(Name))
218 Info.OutputFile += IsDll ? ".dll" : ".exe";
219 }
220
221 return Error::success();
222 }
223 case KwVersion:
224 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
225 default:
226 return createError("unknown directive: " + Tok.Value);
227 }
228 }
229
parseExport()230 Error parseExport() {
231 COFFShortExport E;
232 E.Name = std::string(Tok.Value);
233 read();
234 if (Tok.K == Equal) {
235 read();
236 if (Tok.K != Identifier)
237 return createError("identifier expected, but got " + Tok.Value);
238 E.ExtName = E.Name;
239 E.Name = std::string(Tok.Value);
240 } else {
241 unget();
242 }
243
244 if (Machine == IMAGE_FILE_MACHINE_I386) {
245 if (!isDecorated(E.Name, MingwDef))
246 E.Name = (std::string("_").append(E.Name));
247 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
248 E.ExtName = (std::string("_").append(E.ExtName));
249 }
250
251 for (;;) {
252 read();
253 if (Tok.K == Identifier && Tok.Value[0] == '@') {
254 if (Tok.Value == "@") {
255 // "foo @ 10"
256 read();
257 Tok.Value.getAsInteger(10, E.Ordinal);
258 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
259 // "foo \n @bar" - Not an ordinal modifier at all, but the next
260 // export (fastcall decorated) - complete the current one.
261 unget();
262 Info.Exports.push_back(E);
263 return Error::success();
264 }
265 // "foo @10"
266 read();
267 if (Tok.K == KwNoname) {
268 E.Noname = true;
269 } else {
270 unget();
271 }
272 continue;
273 }
274 if (Tok.K == KwData) {
275 E.Data = true;
276 continue;
277 }
278 if (Tok.K == KwConstant) {
279 E.Constant = true;
280 continue;
281 }
282 if (Tok.K == KwPrivate) {
283 E.Private = true;
284 continue;
285 }
286 if (Tok.K == EqualEqual) {
287 read();
288 E.AliasTarget = std::string(Tok.Value);
289 if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
290 E.AliasTarget = std::string("_").append(E.AliasTarget);
291 continue;
292 }
293 unget();
294 Info.Exports.push_back(E);
295 return Error::success();
296 }
297 }
298
299 // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)300 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
301 if (Error Err = readAsInt(Reserve))
302 return Err;
303 read();
304 if (Tok.K != Comma) {
305 unget();
306 Commit = nullptr;
307 return Error::success();
308 }
309 if (Error Err = readAsInt(Commit))
310 return Err;
311 return Error::success();
312 }
313
314 // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)315 Error parseName(std::string *Out, uint64_t *Baseaddr) {
316 read();
317 if (Tok.K == Identifier) {
318 *Out = std::string(Tok.Value);
319 } else {
320 *Out = "";
321 unget();
322 return Error::success();
323 }
324 read();
325 if (Tok.K == KwBase) {
326 if (Error Err = expect(Equal, "'=' expected"))
327 return Err;
328 if (Error Err = readAsInt(Baseaddr))
329 return Err;
330 } else {
331 unget();
332 *Baseaddr = 0;
333 }
334 return Error::success();
335 }
336
337 // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)338 Error parseVersion(uint32_t *Major, uint32_t *Minor) {
339 read();
340 if (Tok.K != Identifier)
341 return createError("identifier expected, but got " + Tok.Value);
342 StringRef V1, V2;
343 std::tie(V1, V2) = Tok.Value.split('.');
344 if (V1.getAsInteger(10, *Major))
345 return createError("integer expected, but got " + Tok.Value);
346 if (V2.empty())
347 *Minor = 0;
348 else if (V2.getAsInteger(10, *Minor))
349 return createError("integer expected, but got " + Tok.Value);
350 return Error::success();
351 }
352
353 Lexer Lex;
354 Token Tok;
355 std::vector<Token> Stack;
356 MachineTypes Machine;
357 COFFModuleDefinition Info;
358 bool MingwDef;
359 };
360
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef)361 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
362 MachineTypes Machine,
363 bool MingwDef) {
364 return Parser(MB.getBuffer(), Machine, MingwDef).parse();
365 }
366
367 } // namespace object
368 } // namespace llvm
369