1 #define AFL_LLVM_PASS
2
3 #include "config.h"
4 #include "debug.h"
5
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <unistd.h>
9 #include <sys/time.h>
10 #include <fnmatch.h>
11
12 #include <list>
13 #include <string>
14 #include <fstream>
15
16 #include <llvm/Support/raw_ostream.h>
17
18 #define IS_EXTERN extern
19 #include "afl-llvm-common.h"
20
21 using namespace llvm;
22
23 static std::list<std::string> allowListFiles;
24 static std::list<std::string> allowListFunctions;
25 static std::list<std::string> denyListFiles;
26 static std::list<std::string> denyListFunctions;
27
getBBName(const llvm::BasicBlock * BB)28 char *getBBName(const llvm::BasicBlock *BB) {
29
30 static char *name;
31
32 if (!BB->getName().empty()) {
33
34 name = strdup(BB->getName().str().c_str());
35 return name;
36
37 }
38
39 std::string Str;
40 raw_string_ostream OS(Str);
41
42 #if LLVM_VERSION_MAJOR >= 4 || \
43 (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
44 BB->printAsOperand(OS, false);
45 #endif
46 name = strdup(OS.str().c_str());
47 return name;
48
49 }
50
51 /* Function that we never instrument or analyze */
52 /* Note: this ignore check is also called in isInInstrumentList() */
isIgnoreFunction(const llvm::Function * F)53 bool isIgnoreFunction(const llvm::Function *F) {
54
55 // Starting from "LLVMFuzzer" these are functions used in libfuzzer based
56 // fuzzing campaign installations, e.g. oss-fuzz
57
58 static constexpr const char *ignoreList[] = {
59
60 "asan.",
61 "llvm.",
62 "sancov.",
63 "__ubsan",
64 "ign.",
65 "__afl",
66 "_fini",
67 "__libc_",
68 "__asan",
69 "__msan",
70 "__cmplog",
71 "__sancov",
72 "__san",
73 "__cxx_",
74 "__decide_deferred",
75 "_GLOBAL",
76 "_ZZN6__asan",
77 "_ZZN6__lsan",
78 "msan.",
79 "LLVMFuzzerM",
80 "LLVMFuzzerC",
81 "LLVMFuzzerI",
82 "maybe_duplicate_stderr",
83 "discard_output",
84 "close_stdout",
85 "dup_and_close_stderr",
86 "maybe_close_fd_mask",
87 "ExecuteFilesOnyByOne"
88
89 };
90
91 for (auto const &ignoreListFunc : ignoreList) {
92
93 if (F->getName().startswith(ignoreListFunc)) { return true; }
94
95 }
96
97 static constexpr const char *ignoreSubstringList[] = {
98
99 "__asan", "__msan", "__ubsan", "__lsan", "__san", "__sanitize",
100 "__cxx", "DebugCounter", "DwarfDebug", "DebugLoc"
101
102 };
103
104 for (auto const &ignoreListFunc : ignoreSubstringList) {
105
106 // hexcoder: F->getName().contains() not avaiilable in llvm 3.8.0
107 if (StringRef::npos != F->getName().find(ignoreListFunc)) { return true; }
108
109 }
110
111 return false;
112
113 }
114
initInstrumentList()115 void initInstrumentList() {
116
117 char *allowlist = getenv("AFL_LLVM_ALLOWLIST");
118 if (!allowlist) allowlist = getenv("AFL_LLVM_INSTRUMENT_FILE");
119 if (!allowlist) allowlist = getenv("AFL_LLVM_WHITELIST");
120 char *denylist = getenv("AFL_LLVM_DENYLIST");
121 if (!denylist) denylist = getenv("AFL_LLVM_BLOCKLIST");
122
123 if (allowlist && denylist)
124 FATAL(
125 "You can only specify either AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST "
126 "but not both!");
127
128 if (allowlist) {
129
130 std::string line;
131 std::ifstream fileStream;
132 fileStream.open(allowlist);
133 if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_ALLOWLIST");
134 getline(fileStream, line);
135
136 while (fileStream) {
137
138 int is_file = -1;
139 std::size_t npos;
140 std::string original_line = line;
141
142 line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
143 line.end());
144
145 // remove # and following
146 if ((npos = line.find("#")) != std::string::npos)
147 line = line.substr(0, npos);
148
149 if (line.compare(0, 4, "fun:") == 0) {
150
151 is_file = 0;
152 line = line.substr(4);
153
154 } else if (line.compare(0, 9, "function:") == 0) {
155
156 is_file = 0;
157 line = line.substr(9);
158
159 } else if (line.compare(0, 4, "src:") == 0) {
160
161 is_file = 1;
162 line = line.substr(4);
163
164 } else if (line.compare(0, 7, "source:") == 0) {
165
166 is_file = 1;
167 line = line.substr(7);
168
169 }
170
171 if (line.find(":") != std::string::npos) {
172
173 FATAL("invalid line in AFL_LLVM_ALLOWLIST: %s", original_line.c_str());
174
175 }
176
177 if (line.length() > 0) {
178
179 // if the entry contains / or . it must be a file
180 if (is_file == -1)
181 if (line.find("/") != std::string::npos ||
182 line.find(".") != std::string::npos)
183 is_file = 1;
184 // otherwise it is a function
185
186 if (is_file == 1)
187 allowListFiles.push_back(line);
188 else
189 allowListFunctions.push_back(line);
190
191 }
192
193 getline(fileStream, line);
194
195 }
196
197 if (debug)
198 DEBUGF("loaded allowlist with %zu file and %zu function entries\n",
199 allowListFiles.size(), allowListFunctions.size());
200
201 }
202
203 if (denylist) {
204
205 std::string line;
206 std::ifstream fileStream;
207 fileStream.open(denylist);
208 if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_DENYLIST");
209 getline(fileStream, line);
210
211 while (fileStream) {
212
213 int is_file = -1;
214 std::size_t npos;
215 std::string original_line = line;
216
217 line.erase(std::remove_if(line.begin(), line.end(), ::isspace),
218 line.end());
219
220 // remove # and following
221 if ((npos = line.find("#")) != std::string::npos)
222 line = line.substr(0, npos);
223
224 if (line.compare(0, 4, "fun:") == 0) {
225
226 is_file = 0;
227 line = line.substr(4);
228
229 } else if (line.compare(0, 9, "function:") == 0) {
230
231 is_file = 0;
232 line = line.substr(9);
233
234 } else if (line.compare(0, 4, "src:") == 0) {
235
236 is_file = 1;
237 line = line.substr(4);
238
239 } else if (line.compare(0, 7, "source:") == 0) {
240
241 is_file = 1;
242 line = line.substr(7);
243
244 }
245
246 if (line.find(":") != std::string::npos) {
247
248 FATAL("invalid line in AFL_LLVM_DENYLIST: %s", original_line.c_str());
249
250 }
251
252 if (line.length() > 0) {
253
254 // if the entry contains / or . it must be a file
255 if (is_file == -1)
256 if (line.find("/") != std::string::npos ||
257 line.find(".") != std::string::npos)
258 is_file = 1;
259 // otherwise it is a function
260
261 if (is_file == 1)
262 denyListFiles.push_back(line);
263 else
264 denyListFunctions.push_back(line);
265
266 }
267
268 getline(fileStream, line);
269
270 }
271
272 if (debug)
273 DEBUGF("loaded denylist with %zu file and %zu function entries\n",
274 denyListFiles.size(), denyListFunctions.size());
275
276 }
277
278 }
279
scanForDangerousFunctions(llvm::Module * M)280 void scanForDangerousFunctions(llvm::Module *M) {
281
282 if (!M) return;
283
284 #if LLVM_VERSION_MAJOR >= 4 || \
285 (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9)
286
287 for (GlobalIFunc &IF : M->ifuncs()) {
288
289 StringRef ifunc_name = IF.getName();
290 Constant *r = IF.getResolver();
291 StringRef r_name = cast<Function>(r->getOperand(0))->getName();
292 if (!be_quiet)
293 fprintf(stderr,
294 "Info: Found an ifunc with name %s that points to resolver "
295 "function %s, we will not instrument this, putting it into the "
296 "block list.\n",
297 ifunc_name.str().c_str(), r_name.str().c_str());
298 denyListFunctions.push_back(r_name.str());
299
300 }
301
302 GlobalVariable *GV = M->getNamedGlobal("llvm.global_ctors");
303 if (GV && !GV->isDeclaration() && !GV->hasLocalLinkage()) {
304
305 ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
306
307 if (InitList) {
308
309 for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
310
311 if (ConstantStruct *CS =
312 dyn_cast<ConstantStruct>(InitList->getOperand(i))) {
313
314 if (CS->getNumOperands() >= 2) {
315
316 if (CS->getOperand(1)->isNullValue())
317 break; // Found a null terminator, stop here.
318
319 ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
320 int Priority = CI ? CI->getSExtValue() : 0;
321
322 Constant *FP = CS->getOperand(1);
323 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
324 if (CE->isCast()) FP = CE->getOperand(0);
325 if (Function *F = dyn_cast<Function>(FP)) {
326
327 if (!F->isDeclaration() &&
328 strncmp(F->getName().str().c_str(), "__afl", 5) != 0) {
329
330 if (!be_quiet)
331 fprintf(stderr,
332 "Info: Found constructor function %s with prio "
333 "%u, we will not instrument this, putting it into a "
334 "block list.\n",
335 F->getName().str().c_str(), Priority);
336 denyListFunctions.push_back(F->getName().str());
337
338 }
339
340 }
341
342 }
343
344 }
345
346 }
347
348 }
349
350 }
351
352 #endif
353
354 }
355
getSourceName(llvm::Function * F)356 static std::string getSourceName(llvm::Function *F) {
357
358 // let's try to get the filename for the function
359 auto bb = &F->getEntryBlock();
360 BasicBlock::iterator IP = bb->getFirstInsertionPt();
361 IRBuilder<> IRB(&(*IP));
362 DebugLoc Loc = IP->getDebugLoc();
363
364 #if LLVM_VERSION_MAJOR >= 4 || \
365 (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7)
366 if (Loc) {
367
368 StringRef instFilename;
369 DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
370
371 if (cDILoc) { instFilename = cDILoc->getFilename(); }
372
373 if (instFilename.str().empty() && cDILoc) {
374
375 /* If the original location is empty, try using the inlined location
376 */
377 DILocation *oDILoc = cDILoc->getInlinedAt();
378 if (oDILoc) { instFilename = oDILoc->getFilename(); }
379
380 }
381
382 return instFilename.str();
383
384 }
385
386 #else
387 if (!Loc.isUnknown()) {
388
389 DILocation cDILoc(Loc.getAsMDNode(F->getContext()));
390
391 StringRef instFilename = cDILoc.getFilename();
392
393 /* Continue only if we know where we actually are */
394 return instFilename.str();
395
396 }
397
398 #endif
399
400 return std::string("");
401
402 }
403
isInInstrumentList(llvm::Function * F,std::string Filename)404 bool isInInstrumentList(llvm::Function *F, std::string Filename) {
405
406 bool return_default = true;
407
408 // is this a function with code? If it is external we don't instrument it
409 // anyway and it can't be in the instrument file list. Or if it is it is
410 // ignored.
411 if (!F->size() || isIgnoreFunction(F)) return false;
412
413 if (!denyListFiles.empty() || !denyListFunctions.empty()) {
414
415 if (!denyListFunctions.empty()) {
416
417 std::string instFunction = F->getName().str();
418
419 for (std::list<std::string>::iterator it = denyListFunctions.begin();
420 it != denyListFunctions.end(); ++it) {
421
422 /* We don't check for filename equality here because
423 * filenames might actually be full paths. Instead we
424 * check that the actual filename ends in the filename
425 * specified in the list. We also allow UNIX-style pattern
426 * matching */
427
428 if (instFunction.length() >= it->length()) {
429
430 if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
431
432 if (debug)
433 DEBUGF(
434 "Function %s is in the deny function list, not instrumenting "
435 "... \n",
436 instFunction.c_str());
437 return false;
438
439 }
440
441 }
442
443 }
444
445 }
446
447 if (!denyListFiles.empty()) {
448
449 std::string source_file = getSourceName(F);
450
451 if (source_file.empty()) { source_file = Filename; }
452
453 if (!source_file.empty()) {
454
455 for (std::list<std::string>::iterator it = denyListFiles.begin();
456 it != denyListFiles.end(); ++it) {
457
458 /* We don't check for filename equality here because
459 * filenames might actually be full paths. Instead we
460 * check that the actual filename ends in the filename
461 * specified in the list. We also allow UNIX-style pattern
462 * matching */
463
464 if (source_file.length() >= it->length()) {
465
466 if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
467
468 return false;
469
470 }
471
472 }
473
474 }
475
476 } else {
477
478 // we could not find out the location. in this case we say it is not
479 // in the instrument file list
480 if (!be_quiet)
481 WARNF(
482 "No debug information found for function %s, will be "
483 "instrumented (recompile with -g -O[1-3] and use a modern llvm).",
484 F->getName().str().c_str());
485
486 }
487
488 }
489
490 }
491
492 // if we do not have a instrument file list return true
493 if (!allowListFiles.empty() || !allowListFunctions.empty()) {
494
495 return_default = false;
496
497 if (!allowListFunctions.empty()) {
498
499 std::string instFunction = F->getName().str();
500
501 for (std::list<std::string>::iterator it = allowListFunctions.begin();
502 it != allowListFunctions.end(); ++it) {
503
504 /* We don't check for filename equality here because
505 * filenames might actually be full paths. Instead we
506 * check that the actual filename ends in the filename
507 * specified in the list. We also allow UNIX-style pattern
508 * matching */
509
510 if (instFunction.length() >= it->length()) {
511
512 if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) {
513
514 if (debug)
515 DEBUGF(
516 "Function %s is in the allow function list, instrumenting "
517 "... \n",
518 instFunction.c_str());
519 return true;
520
521 }
522
523 }
524
525 }
526
527 }
528
529 if (!allowListFiles.empty()) {
530
531 std::string source_file = getSourceName(F);
532
533 if (source_file.empty()) { source_file = Filename; }
534
535 if (!source_file.empty()) {
536
537 for (std::list<std::string>::iterator it = allowListFiles.begin();
538 it != allowListFiles.end(); ++it) {
539
540 /* We don't check for filename equality here because
541 * filenames might actually be full paths. Instead we
542 * check that the actual filename ends in the filename
543 * specified in the list. We also allow UNIX-style pattern
544 * matching */
545
546 if (source_file.length() >= it->length()) {
547
548 if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) {
549
550 if (debug)
551 DEBUGF(
552 "Function %s is in the allowlist (%s), instrumenting ... "
553 "\n",
554 F->getName().str().c_str(), source_file.c_str());
555 return true;
556
557 }
558
559 }
560
561 }
562
563 } else {
564
565 // we could not find out the location. In this case we say it is not
566 // in the instrument file list
567 if (!be_quiet)
568 WARNF(
569 "No debug information found for function %s, will not be "
570 "instrumented (recompile with -g -O[1-3] and use a modern llvm).",
571 F->getName().str().c_str());
572 return false;
573
574 }
575
576 }
577
578 }
579
580 return return_default;
581
582 }
583
584 // Calculate the number of average collisions that would occur if all
585 // location IDs would be assigned randomly (like normal afl/afl++).
586 // This uses the "balls in bins" algorithm.
calculateCollisions(uint32_t edges)587 unsigned long long int calculateCollisions(uint32_t edges) {
588
589 double bins = MAP_SIZE;
590 double balls = edges;
591 double step1 = 1 - (1 / bins);
592 double step2 = pow(step1, balls);
593 double step3 = bins * step2;
594 double step4 = round(step3);
595 unsigned long long int empty = step4;
596 unsigned long long int collisions = edges - (MAP_SIZE - empty);
597 return collisions;
598
599 }
600
601