1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Windows-specific.
11 // A parser for the module-definition file (.def file).
12 //
13 // The format of module-definition files are described in this document:
14 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "llvm/Object/COFFModuleDefinition.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/Object/COFF.h"
22 #include "llvm/Object/COFFImportFile.h"
23 #include "llvm/Object/Error.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/raw_ostream.h"
27
28 using namespace llvm::COFF;
29 using namespace llvm;
30
31 namespace llvm {
32 namespace object {
33
34 enum Kind {
35 Unknown,
36 Eof,
37 Identifier,
38 Comma,
39 Equal,
40 EqualEqual,
41 KwBase,
42 KwConstant,
43 KwData,
44 KwExports,
45 KwHeapsize,
46 KwLibrary,
47 KwName,
48 KwNoname,
49 KwPrivate,
50 KwStacksize,
51 KwVersion,
52 };
53
54 struct Token {
Tokenllvm::object::Token55 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
56 Kind K;
57 StringRef Value;
58 };
59
isDecorated(StringRef Sym,bool MingwDef)60 static bool isDecorated(StringRef Sym, bool MingwDef) {
61 // In def files, the symbols can either be listed decorated or undecorated.
62 //
63 // - For cdecl symbols, only the undecorated form is allowed.
64 // - For fastcall and vectorcall symbols, both fully decorated or
65 // undecorated forms can be present.
66 // - For stdcall symbols in non-MinGW environments, the decorated form is
67 // fully decorated with leading underscore and trailing stack argument
68 // size - like "_Func@0".
69 // - In MinGW def files, a decorated stdcall symbol does not include the
70 // leading underscore though, like "Func@0".
71
72 // This function controls whether a leading underscore should be added to
73 // the given symbol name or not. For MinGW, treat a stdcall symbol name such
74 // as "Func@0" as undecorated, i.e. a leading underscore must be added.
75 // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
76 // as decorated, i.e. don't add any more leading underscores.
77 // We can't check for a leading underscore here, since function names
78 // themselves can start with an underscore, while a second one still needs
79 // to be added.
80 return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
81 (!MingwDef && Sym.contains('@'));
82 }
83
createError(const Twine & Err)84 static Error createError(const Twine &Err) {
85 return make_error<StringError>(StringRef(Err.str()),
86 object_error::parse_failed);
87 }
88
89 class Lexer {
90 public:
Lexer(StringRef S)91 Lexer(StringRef S) : Buf(S) {}
92
lex()93 Token lex() {
94 Buf = Buf.trim();
95 if (Buf.empty())
96 return Token(Eof);
97
98 switch (Buf[0]) {
99 case '\0':
100 return Token(Eof);
101 case ';': {
102 size_t End = Buf.find('\n');
103 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
104 return lex();
105 }
106 case '=':
107 Buf = Buf.drop_front();
108 if (Buf.startswith("=")) {
109 Buf = Buf.drop_front();
110 return Token(EqualEqual, "==");
111 }
112 return Token(Equal, "=");
113 case ',':
114 Buf = Buf.drop_front();
115 return Token(Comma, ",");
116 case '"': {
117 StringRef S;
118 std::tie(S, Buf) = Buf.substr(1).split('"');
119 return Token(Identifier, S);
120 }
121 default: {
122 size_t End = Buf.find_first_of("=,;\r\n \t\v");
123 StringRef Word = Buf.substr(0, End);
124 Kind K = llvm::StringSwitch<Kind>(Word)
125 .Case("BASE", KwBase)
126 .Case("CONSTANT", KwConstant)
127 .Case("DATA", KwData)
128 .Case("EXPORTS", KwExports)
129 .Case("HEAPSIZE", KwHeapsize)
130 .Case("LIBRARY", KwLibrary)
131 .Case("NAME", KwName)
132 .Case("NONAME", KwNoname)
133 .Case("PRIVATE", KwPrivate)
134 .Case("STACKSIZE", KwStacksize)
135 .Case("VERSION", KwVersion)
136 .Default(Identifier);
137 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
138 return Token(K, Word);
139 }
140 }
141 }
142
143 private:
144 StringRef Buf;
145 };
146
147 class Parser {
148 public:
Parser(StringRef S,MachineTypes M,bool B)149 explicit Parser(StringRef S, MachineTypes M, bool B)
150 : Lex(S), Machine(M), MingwDef(B) {}
151
parse()152 Expected<COFFModuleDefinition> parse() {
153 do {
154 if (Error Err = parseOne())
155 return std::move(Err);
156 } while (Tok.K != Eof);
157 return Info;
158 }
159
160 private:
read()161 void read() {
162 if (Stack.empty()) {
163 Tok = Lex.lex();
164 return;
165 }
166 Tok = Stack.back();
167 Stack.pop_back();
168 }
169
readAsInt(uint64_t * I)170 Error readAsInt(uint64_t *I) {
171 read();
172 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
173 return createError("integer expected");
174 return Error::success();
175 }
176
expect(Kind Expected,StringRef Msg)177 Error expect(Kind Expected, StringRef Msg) {
178 read();
179 if (Tok.K != Expected)
180 return createError(Msg);
181 return Error::success();
182 }
183
unget()184 void unget() { Stack.push_back(Tok); }
185
parseOne()186 Error parseOne() {
187 read();
188 switch (Tok.K) {
189 case Eof:
190 return Error::success();
191 case KwExports:
192 for (;;) {
193 read();
194 if (Tok.K != Identifier) {
195 unget();
196 return Error::success();
197 }
198 if (Error Err = parseExport())
199 return Err;
200 }
201 case KwHeapsize:
202 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
203 case KwStacksize:
204 return parseNumbers(&Info.StackReserve, &Info.StackCommit);
205 case KwLibrary:
206 case KwName: {
207 bool IsDll = Tok.K == KwLibrary; // Check before parseName.
208 std::string Name;
209 if (Error Err = parseName(&Name, &Info.ImageBase))
210 return Err;
211
212 Info.ImportName = Name;
213
214 // Set the output file, but don't override /out if it was already passed.
215 if (Info.OutputFile.empty()) {
216 Info.OutputFile = Name;
217 // Append the appropriate file extension if not already present.
218 if (!sys::path::has_extension(Name))
219 Info.OutputFile += IsDll ? ".dll" : ".exe";
220 }
221
222 return Error::success();
223 }
224 case KwVersion:
225 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
226 default:
227 return createError("unknown directive: " + Tok.Value);
228 }
229 }
230
parseExport()231 Error parseExport() {
232 COFFShortExport E;
233 E.Name = Tok.Value;
234 read();
235 if (Tok.K == Equal) {
236 read();
237 if (Tok.K != Identifier)
238 return createError("identifier expected, but got " + Tok.Value);
239 E.ExtName = E.Name;
240 E.Name = Tok.Value;
241 } else {
242 unget();
243 }
244
245 if (Machine == IMAGE_FILE_MACHINE_I386) {
246 if (!isDecorated(E.Name, MingwDef))
247 E.Name = (std::string("_").append(E.Name));
248 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
249 E.ExtName = (std::string("_").append(E.ExtName));
250 }
251
252 for (;;) {
253 read();
254 if (Tok.K == Identifier && Tok.Value[0] == '@') {
255 if (Tok.Value == "@") {
256 // "foo @ 10"
257 read();
258 Tok.Value.getAsInteger(10, E.Ordinal);
259 } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
260 // "foo \n @bar" - Not an ordinal modifier at all, but the next
261 // export (fastcall decorated) - complete the current one.
262 unget();
263 Info.Exports.push_back(E);
264 return Error::success();
265 }
266 // "foo @10"
267 read();
268 if (Tok.K == KwNoname) {
269 E.Noname = true;
270 } else {
271 unget();
272 }
273 continue;
274 }
275 if (Tok.K == KwData) {
276 E.Data = true;
277 continue;
278 }
279 if (Tok.K == KwConstant) {
280 E.Constant = true;
281 continue;
282 }
283 if (Tok.K == KwPrivate) {
284 E.Private = true;
285 continue;
286 }
287 if (Tok.K == EqualEqual) {
288 read();
289 E.AliasTarget = Tok.Value;
290 if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
291 E.AliasTarget = std::string("_").append(E.AliasTarget);
292 continue;
293 }
294 unget();
295 Info.Exports.push_back(E);
296 return Error::success();
297 }
298 }
299
300 // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)301 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
302 if (Error Err = readAsInt(Reserve))
303 return Err;
304 read();
305 if (Tok.K != Comma) {
306 unget();
307 Commit = nullptr;
308 return Error::success();
309 }
310 if (Error Err = readAsInt(Commit))
311 return Err;
312 return Error::success();
313 }
314
315 // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)316 Error parseName(std::string *Out, uint64_t *Baseaddr) {
317 read();
318 if (Tok.K == Identifier) {
319 *Out = Tok.Value;
320 } else {
321 *Out = "";
322 unget();
323 return Error::success();
324 }
325 read();
326 if (Tok.K == KwBase) {
327 if (Error Err = expect(Equal, "'=' expected"))
328 return Err;
329 if (Error Err = readAsInt(Baseaddr))
330 return Err;
331 } else {
332 unget();
333 *Baseaddr = 0;
334 }
335 return Error::success();
336 }
337
338 // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)339 Error parseVersion(uint32_t *Major, uint32_t *Minor) {
340 read();
341 if (Tok.K != Identifier)
342 return createError("identifier expected, but got " + Tok.Value);
343 StringRef V1, V2;
344 std::tie(V1, V2) = Tok.Value.split('.');
345 if (V1.getAsInteger(10, *Major))
346 return createError("integer expected, but got " + Tok.Value);
347 if (V2.empty())
348 *Minor = 0;
349 else if (V2.getAsInteger(10, *Minor))
350 return createError("integer expected, but got " + Tok.Value);
351 return Error::success();
352 }
353
354 Lexer Lex;
355 Token Tok;
356 std::vector<Token> Stack;
357 MachineTypes Machine;
358 COFFModuleDefinition Info;
359 bool MingwDef;
360 };
361
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef)362 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
363 MachineTypes Machine,
364 bool MingwDef) {
365 return Parser(MB.getBuffer(), Machine, MingwDef).parse();
366 }
367
368 } // namespace object
369 } // namespace llvm
370