1 /*
2 american fuzzy lop++ - LLVM LTO instrumentation pass
3 ----------------------------------------------------
4
5 Written by Marc Heuse <mh@mh-sec.de>
6
7 Copyright 2019-2022 AFLplusplus Project. All rights reserved.
8
9 Licensed under the Apache License, Version 2.0 (the "License");
10 you may not use this file except in compliance with the License.
11 You may obtain a copy of the License at:
12
13 https://www.apache.org/licenses/LICENSE-2.0
14
15 This library is plugged into LLVM when invoking clang through afl-clang-lto.
16
17 */
18
19 #define AFL_LLVM_PASS
20
21 #include "config.h"
22 #include "debug.h"
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <string.h>
28 #include <sys/time.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <fcntl.h>
32 #include <ctype.h>
33
34 #include <list>
35 #include <string>
36 #include <fstream>
37 #include <set>
38
39 #include "llvm/Config/llvm-config.h"
40 #include "llvm/ADT/Statistic.h"
41 #include "llvm/IR/IRBuilder.h"
42 #if LLVM_VERSION_MAJOR >= 11 /* use new pass manager */
43 #include "llvm/Passes/PassPlugin.h"
44 #include "llvm/Passes/PassBuilder.h"
45 #include "llvm/IR/PassManager.h"
46 #else
47 #include "llvm/IR/LegacyPassManager.h"
48 #endif
49 #include "llvm/IR/BasicBlock.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/DebugInfo.h"
52 #include "llvm/IR/CFG.h"
53 #include "llvm/IR/Verifier.h"
54 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
57 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
58 #include "llvm/Analysis/LoopInfo.h"
59 #include "llvm/Analysis/ValueTracking.h"
60 #include "llvm/Pass.h"
61 #include "llvm/IR/Constants.h"
62
63 #include "afl-llvm-common.h"
64
65 #ifndef O_DSYNC
66 #define O_DSYNC O_SYNC
67 #endif
68
69 using namespace llvm;
70
71 namespace {
72
73 #if LLVM_VERSION_MAJOR >= 11 /* use new pass manager */
74 class AFLdict2filePass : public PassInfoMixin<AFLdict2filePass> {
75
76 std::ofstream of;
77 void dict2file(u8 *, u32);
78
79 public:
AFLdict2filePass()80 AFLdict2filePass() {
81
82 #else
83
84 class AFLdict2filePass : public ModulePass {
85
86 std::ofstream of;
87 void dict2file(u8 *, u32);
88
89 public:
90 static char ID;
91
92 AFLdict2filePass() : ModulePass(ID) {
93
94 #endif
95
96 if (getenv("AFL_DEBUG")) debug = 1;
97
98 }
99
100 #if LLVM_VERSION_MAJOR >= 11 /* use new pass manager */
101 PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
102 #else
103 bool runOnModule(Module &M) override;
104 #endif
105
106 };
107
108 } // namespace
109
110 #if LLVM_MAJOR >= 11
111 extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK
llvmGetPassPluginInfo()112 llvmGetPassPluginInfo() {
113
114 return {LLVM_PLUGIN_API_VERSION, "AFLdict2filePass", "v0.1",
115 /* lambda to insert our pass into the pass pipeline. */
116 [](PassBuilder &PB) {
117
118 #if LLVM_VERSION_MAJOR <= 13
119 using OptimizationLevel = typename PassBuilder::OptimizationLevel;
120 #endif
121 PB.registerOptimizerLastEPCallback(
122 [](ModulePassManager &MPM, OptimizationLevel OL) {
123
124 MPM.addPass(AFLdict2filePass());
125
126 });
127
128 }};
129
130 }
131
132 #else
133 char AFLdict2filePass::ID = 0;
134 #endif
135
dict2file(u8 * mem,u32 len)136 void AFLdict2filePass::dict2file(u8 *mem, u32 len) {
137
138 u32 i, j, binary = 0;
139 char line[MAX_AUTO_EXTRA * 8], tmp[8];
140
141 strcpy(line, "\"");
142 j = 1;
143 for (i = 0; i < len; i++) {
144
145 if (isprint(mem[i]) && mem[i] != '\\' && mem[i] != '"') {
146
147 line[j++] = mem[i];
148
149 } else {
150
151 if (i + 1 != len || mem[i] != 0 || binary || len == 4 || len == 8) {
152
153 line[j] = 0;
154 sprintf(tmp, "\\x%02x", (u8)mem[i]);
155 strcat(line, tmp);
156 j = strlen(line);
157
158 }
159
160 binary = 1;
161
162 }
163
164 }
165
166 line[j] = 0;
167 strcat(line, "\"\n");
168 of << line;
169 of.flush();
170
171 if (!be_quiet) fprintf(stderr, "Found dictionary token: %s", line);
172
173 }
174
175 #if LLVM_VERSION_MAJOR >= 11 /* use new pass manager */
run(Module & M,ModuleAnalysisManager & MAM)176 PreservedAnalyses AFLdict2filePass::run(Module &M, ModuleAnalysisManager &MAM) {
177
178 #else
179 bool AFLdict2filePass::runOnModule(Module &M) {
180
181 #endif
182
183 DenseMap<Value *, std::string *> valueMap;
184 char * ptr;
185 int found = 0;
186
187 /* Show a banner */
188 setvbuf(stdout, NULL, _IONBF, 0);
189
190 if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
191
192 SAYF(cCYA "afl-llvm-dict2file" VERSION cRST
193 " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
194
195 } else
196
197 be_quiet = 1;
198
199 scanForDangerousFunctions(&M);
200
201 ptr = getenv("AFL_LLVM_DICT2FILE");
202
203 if (!ptr || *ptr != '/')
204 FATAL("AFL_LLVM_DICT2FILE is not set to an absolute path: %s", ptr);
205
206 of.open(ptr, std::ofstream::out | std::ofstream::app);
207 if (!of.is_open()) PFATAL("Could not open/create %s.", ptr);
208
209 /* Instrument all the things! */
210
211 for (auto &F : M) {
212
213 if (isIgnoreFunction(&F)) continue;
214 if (!isInInstrumentList(&F, MNAME) || !F.size()) { continue; }
215
216 /* Some implementation notes.
217 *
218 * We try to handle 3 cases:
219 * - memcmp("foo", arg, 3) <- literal string
220 * - static char globalvar[] = "foo";
221 * memcmp(globalvar, arg, 3) <- global variable
222 * - char localvar[] = "foo";
223 * memcmp(locallvar, arg, 3) <- local variable
224 *
225 * The local variable case is the hardest. We can only detect that
226 * case if there is no reassignment or change in the variable.
227 * And it might not work across llvm version.
228 * What we do is hooking the initializer function for local variables
229 * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned
230 * variable. And if that variable is then used in a compare function
231 * we use that noted string.
232 * This seems not to work for tokens that have a size <= 4 :-(
233 *
234 * - if the compared length is smaller than the string length we
235 * save the full string. This is likely better for fuzzing but
236 * might be wrong in a few cases depending on optimizers
237 *
238 * - not using StringRef because there is a bug in the llvm 11
239 * checkout I am using which sometimes points to wrong strings
240 *
241 * Over and out. Took me a full day. damn. mh/vh
242 */
243
244 for (auto &BB : F) {
245
246 for (auto &IN : BB) {
247
248 CallInst *callInst = nullptr;
249 CmpInst * cmpInst = nullptr;
250
251 if ((cmpInst = dyn_cast<CmpInst>(&IN))) {
252
253 Value * op = cmpInst->getOperand(1);
254 ConstantInt *ilen = dyn_cast<ConstantInt>(op);
255
256 /* We skip > 64 bit integers. why? first because their value is
257 difficult to obtain, and second because clang does not support
258 literals > 64 bit (as of llvm 12) */
259
260 if (ilen && ilen->uge(0xffffffffffffffff) == false) {
261
262 u64 val2 = 0, val = ilen->getZExtValue();
263 u32 len = 0;
264 if (val > 0x10000 && val < 0xffffffff) len = 4;
265 if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8;
266
267 if (len) {
268
269 auto c = cmpInst->getPredicate();
270
271 switch (c) {
272
273 case CmpInst::FCMP_OGT: // fall through
274 case CmpInst::FCMP_OLE: // fall through
275 case CmpInst::ICMP_SLE: // fall through
276 case CmpInst::ICMP_SGT:
277
278 // signed comparison and it is a negative constant
279 if ((len == 4 && (val & 80000000)) ||
280 (len == 8 && (val & 8000000000000000))) {
281
282 if ((val & 0xffff) != 1) val2 = val - 1;
283 break;
284
285 }
286
287 // fall through
288
289 case CmpInst::FCMP_UGT: // fall through
290 case CmpInst::FCMP_ULE: // fall through
291 case CmpInst::ICMP_UGT: // fall through
292 case CmpInst::ICMP_ULE:
293 if ((val & 0xffff) != 0xfffe) val2 = val + 1;
294 break;
295
296 case CmpInst::FCMP_OLT: // fall through
297 case CmpInst::FCMP_OGE: // fall through
298 case CmpInst::ICMP_SLT: // fall through
299 case CmpInst::ICMP_SGE:
300
301 // signed comparison and it is a negative constant
302 if ((len == 4 && (val & 80000000)) ||
303 (len == 8 && (val & 8000000000000000))) {
304
305 if ((val & 0xffff) != 1) val2 = val - 1;
306 break;
307
308 }
309
310 // fall through
311
312 case CmpInst::FCMP_ULT: // fall through
313 case CmpInst::FCMP_UGE: // fall through
314 case CmpInst::ICMP_ULT: // fall through
315 case CmpInst::ICMP_UGE:
316 if ((val & 0xffff) != 1) val2 = val - 1;
317 break;
318
319 default:
320 val2 = 0;
321
322 }
323
324 dict2file((u8 *)&val, len);
325 found++;
326 if (val2) {
327
328 dict2file((u8 *)&val2, len);
329 found++;
330
331 }
332
333 }
334
335 }
336
337 }
338
339 if ((callInst = dyn_cast<CallInst>(&IN))) {
340
341 bool isStrcmp = true;
342 bool isMemcmp = true;
343 bool isStrncmp = true;
344 bool isStrcasecmp = true;
345 bool isStrncasecmp = true;
346 bool isIntMemcpy = true;
347 bool isStdString = true;
348 bool isStrstr = true;
349 size_t optLen = 0;
350
351 Function *Callee = callInst->getCalledFunction();
352 if (!Callee) continue;
353 if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
354 std::string FuncName = Callee->getName().str();
355 isStrcmp &=
356 (!FuncName.compare("strcmp") || !FuncName.compare("xmlStrcmp") ||
357 !FuncName.compare("xmlStrEqual") ||
358 !FuncName.compare("g_strcmp0") ||
359 !FuncName.compare("curl_strequal") ||
360 !FuncName.compare("strcsequal"));
361 isMemcmp &=
362 (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") ||
363 !FuncName.compare("CRYPTO_memcmp") ||
364 !FuncName.compare("OPENSSL_memcmp") ||
365 !FuncName.compare("memcmp_const_time") ||
366 !FuncName.compare("memcmpct"));
367 isStrncmp &= (!FuncName.compare("strncmp") ||
368 !FuncName.compare("xmlStrncmp") ||
369 !FuncName.compare("curl_strnequal"));
370 isStrcasecmp &= (!FuncName.compare("strcasecmp") ||
371 !FuncName.compare("stricmp") ||
372 !FuncName.compare("ap_cstr_casecmp") ||
373 !FuncName.compare("OPENSSL_strcasecmp") ||
374 !FuncName.compare("xmlStrcasecmp") ||
375 !FuncName.compare("g_strcasecmp") ||
376 !FuncName.compare("g_ascii_strcasecmp") ||
377 !FuncName.compare("Curl_strcasecompare") ||
378 !FuncName.compare("Curl_safe_strcasecompare") ||
379 !FuncName.compare("cmsstrcasecmp"));
380 isStrncasecmp &= (!FuncName.compare("strncasecmp") ||
381 !FuncName.compare("strnicmp") ||
382 !FuncName.compare("ap_cstr_casecmpn") ||
383 !FuncName.compare("OPENSSL_strncasecmp") ||
384 !FuncName.compare("xmlStrncasecmp") ||
385 !FuncName.compare("g_ascii_strncasecmp") ||
386 !FuncName.compare("Curl_strncasecompare") ||
387 !FuncName.compare("g_strncasecmp"));
388 isStrstr &= (!FuncName.compare("strstr") ||
389 !FuncName.compare("g_strstr_len") ||
390 !FuncName.compare("ap_strcasestr") ||
391 !FuncName.compare("xmlStrstr") ||
392 !FuncName.compare("xmlStrcasestr") ||
393 !FuncName.compare("g_str_has_prefix") ||
394 !FuncName.compare("g_str_has_suffix"));
395 isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
396 isStdString &= ((FuncName.find("basic_string") != std::string::npos &&
397 FuncName.find("compare") != std::string::npos) ||
398 (FuncName.find("basic_string") != std::string::npos &&
399 FuncName.find("find") != std::string::npos));
400
401 if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
402 !isStrncasecmp && !isIntMemcpy && !isStdString && !isStrstr)
403 continue;
404
405 /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
406 * prototype */
407 FunctionType *FT = Callee->getFunctionType();
408
409 isStrstr &=
410 FT->getNumParams() == 2 &&
411 FT->getParamType(0) == FT->getParamType(1) &&
412 FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
413 isStrcmp &=
414 FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
415 FT->getParamType(0) == FT->getParamType(1) &&
416 FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
417 isStrcasecmp &=
418 FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) &&
419 FT->getParamType(0) == FT->getParamType(1) &&
420 FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext());
421 isMemcmp &= FT->getNumParams() == 3 &&
422 FT->getReturnType()->isIntegerTy(32) &&
423 FT->getParamType(0)->isPointerTy() &&
424 FT->getParamType(1)->isPointerTy() &&
425 FT->getParamType(2)->isIntegerTy();
426 isStrncmp &= FT->getNumParams() == 3 &&
427 FT->getReturnType()->isIntegerTy(32) &&
428 FT->getParamType(0) == FT->getParamType(1) &&
429 FT->getParamType(0) ==
430 IntegerType::getInt8PtrTy(M.getContext()) &&
431 FT->getParamType(2)->isIntegerTy();
432 isStrncasecmp &= FT->getNumParams() == 3 &&
433 FT->getReturnType()->isIntegerTy(32) &&
434 FT->getParamType(0) == FT->getParamType(1) &&
435 FT->getParamType(0) ==
436 IntegerType::getInt8PtrTy(M.getContext()) &&
437 FT->getParamType(2)->isIntegerTy();
438 isStdString &= FT->getNumParams() >= 2 &&
439 FT->getParamType(0)->isPointerTy() &&
440 FT->getParamType(1)->isPointerTy();
441
442 if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
443 !isStrncasecmp && !isIntMemcpy && !isStdString && !isStrstr)
444 continue;
445
446 /* is a str{n,}{case,}cmp/memcmp, check if we have
447 * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
448 * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..)
449 * memcmp(x, "const", ..) or memcmp("const", x, ..) */
450 Value *Str1P = callInst->getArgOperand(0),
451 *Str2P = callInst->getArgOperand(1);
452 std::string Str1, Str2;
453 StringRef TmpStr;
454 bool HasStr1;
455 getConstantStringInfo(Str1P, TmpStr);
456
457 if (isStrstr || TmpStr.empty()) {
458
459 HasStr1 = false;
460
461 } else {
462
463 HasStr1 = true;
464 Str1 = TmpStr.str();
465
466 }
467
468 bool HasStr2;
469 getConstantStringInfo(Str2P, TmpStr);
470 if (TmpStr.empty()) {
471
472 HasStr2 = false;
473
474 } else {
475
476 HasStr2 = true;
477 Str2 = TmpStr.str();
478
479 }
480
481 if (debug)
482 fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
483 FuncName.c_str(), (void *)Str1P,
484 Str1P->getName().str().c_str(), Str1.c_str(),
485 HasStr1 == true ? "true" : "false", (void *)Str2P,
486 Str2P->getName().str().c_str(), Str2.c_str(),
487 HasStr2 == true ? "true" : "false");
488
489 // we handle the 2nd parameter first because of llvm memcpy
490 if (!HasStr2) {
491
492 auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
493 if (Ptr && Ptr->getOpcode() == Instruction::GetElementPtr) {
494
495 if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
496
497 if (Var->hasInitializer()) {
498
499 if (auto *Array =
500 dyn_cast<ConstantDataArray>(Var->getInitializer())) {
501
502 HasStr2 = true;
503 Str2 = Array->getRawDataValues().str();
504
505 }
506
507 }
508
509 }
510
511 }
512
513 }
514
515 // for the internal memcpy routine we only care for the second
516 // parameter and are not reporting anything.
517 if (isIntMemcpy == true) {
518
519 if (HasStr2 == true) {
520
521 Value * op2 = callInst->getArgOperand(2);
522 ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
523 if (ilen) {
524
525 uint64_t literalLength = Str2.length();
526 uint64_t optLength = ilen->getZExtValue();
527 if (optLength > literalLength + 1) {
528
529 optLength = Str2.length() + 1;
530
531 }
532
533 if (literalLength + 1 == optLength) {
534
535 Str2.append("\0", 1); // add null byte
536
537 }
538
539 }
540
541 valueMap[Str1P] = new std::string(Str2);
542
543 if (debug) {
544
545 fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(),
546 (void *)Str1P);
547
548 }
549
550 continue;
551
552 }
553
554 continue;
555
556 }
557
558 // Neither a literal nor a global variable?
559 // maybe it is a local variable that we saved
560 if (!HasStr2) {
561
562 std::string *strng = valueMap[Str2P];
563 if (strng && !strng->empty()) {
564
565 Str2 = *strng;
566 HasStr2 = true;
567 if (debug)
568 fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
569 (void *)Str2P);
570
571 }
572
573 }
574
575 if (!HasStr1) {
576
577 auto Ptr = dyn_cast<ConstantExpr>(Str1P);
578
579 if (Ptr && Ptr->getOpcode() == Instruction::GetElementPtr) {
580
581 if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
582
583 if (Var->hasInitializer()) {
584
585 if (auto *Array =
586 dyn_cast<ConstantDataArray>(Var->getInitializer())) {
587
588 HasStr1 = true;
589 Str1 = Array->getRawDataValues().str();
590
591 }
592
593 }
594
595 }
596
597 }
598
599 }
600
601 // Neither a literal nor a global variable?
602 // maybe it is a local variable that we saved
603 if (!HasStr1) {
604
605 std::string *strng = valueMap[Str1P];
606 if (strng && !strng->empty()) {
607
608 Str1 = *strng;
609 HasStr1 = true;
610 if (debug)
611 fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
612 (void *)Str1P);
613
614 }
615
616 }
617
618 /* handle cases of one string is const, one string is variable */
619 if (!(HasStr1 ^ HasStr2)) continue;
620
621 std::string thestring;
622
623 if (HasStr1)
624 thestring = Str1;
625 else
626 thestring = Str2;
627
628 optLen = thestring.length();
629
630 if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
631
632 if (isMemcmp || isStrncmp || isStrncasecmp) {
633
634 Value * op2 = callInst->getArgOperand(2);
635 ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
636
637 if (ilen) {
638
639 uint64_t literalLength = optLen;
640 optLen = ilen->getZExtValue();
641 if (optLen > thestring.length() + 1) {
642
643 optLen = thestring.length() + 1;
644
645 }
646
647 if (optLen < 2) { continue; }
648 if (literalLength + 1 == optLen) { // add null byte
649
650 thestring.append("\0", 1);
651
652 }
653
654 }
655
656 }
657
658 // add null byte if this is a string compare function and a null
659 // was not already added
660 if (!isMemcmp) {
661
662 /*
663 if (addedNull == false && thestring[optLen - 1] != '\0')
664 {
665
666 thestring.append("\0", 1); // add null byte
667 optLen++;
668
669 }
670
671 */
672 if (!isStdString && thestring.find('\0', 0) != std::string::npos) {
673
674 // ensure we do not have garbage
675 size_t offset = thestring.find('\0', 0);
676 if (offset + 1 < optLen) optLen = offset + 1;
677 thestring = thestring.substr(0, optLen);
678
679 }
680
681 }
682
683 // we take the longer string, even if the compare was to a
684 // shorter part. Note that depending on the optimizer of the
685 // compiler this can be wrong, but it is more likely that this
686 // is helping the fuzzer
687 if (optLen != thestring.length()) optLen = thestring.length();
688 if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
689 if (optLen < 3) // too short? skip
690 continue;
691
692 ptr = (char *)thestring.c_str();
693
694 dict2file((u8 *)ptr, optLen);
695 found++;
696
697 }
698
699 }
700
701 }
702
703 }
704
705 of.close();
706
707 /* Say something nice. */
708
709 if (!be_quiet) {
710
711 if (!found)
712 OKF("No entries for a dictionary found.");
713 else
714 OKF("Wrote %d entries to the dictionary file.\n", found);
715
716 }
717
718 #if LLVM_VERSION_MAJOR >= 11 /* use new pass manager */
719 auto PA = PreservedAnalyses::all();
720 return PA;
721 #else
722 return true;
723 #endif
724
725 }
726
727 #if LLVM_VERSION_MAJOR < 11 /* use old pass manager */
728 static void registerAFLdict2filePass(const PassManagerBuilder &,
729 legacy::PassManagerBase &PM) {
730
731 PM.add(new AFLdict2filePass());
732
733 }
734
735 static RegisterPass<AFLdict2filePass> X("afl-dict2file",
736 "afl++ dict2file instrumentation pass",
737 false, false);
738
739 static RegisterStandardPasses RegisterAFLdict2filePass(
740 PassManagerBuilder::EP_OptimizerLast, registerAFLdict2filePass);
741
742 static RegisterStandardPasses RegisterAFLdict2filePass0(
743 PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLdict2filePass);
744
745 #endif
746
747