• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm-c/Disassembler.h"
13 #include "llvm-c/Target.h"
14 #include "llvm/MC/SubtargetFeature.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/raw_ostream.h"
17 
18 using namespace llvm;
19 
20 const unsigned AssemblyTextBufSize = 80;
21 
22 static cl::opt<std::string>
23     TripleName("triple", cl::desc("Target triple to assemble for, "
24                                   "see -version for available targets"));
25 
26 static cl::opt<std::string>
27     MCPU("mcpu",
28          cl::desc("Target a specific cpu type (-mcpu=help for details)"),
29          cl::value_desc("cpu-name"), cl::init(""));
30 
31 // This is useful for variable-length instruction sets.
32 static cl::opt<unsigned> InsnLimit(
33     "insn-limit",
34     cl::desc("Limit the number of instructions to process (0 for no limit)"),
35     cl::value_desc("count"), cl::init(0));
36 
37 static cl::list<std::string>
38     MAttrs("mattr", cl::CommaSeparated,
39            cl::desc("Target specific attributes (-mattr=help for details)"),
40            cl::value_desc("a1,+a2,-a3,..."));
41 // The feature string derived from -mattr's values.
42 std::string FeaturesStr;
43 
44 static cl::list<std::string>
45     FuzzerArgs("fuzzer-args", cl::Positional,
46                cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
47                cl::PositionalEatsArgs);
48 static std::vector<char *> ModifiedArgv;
49 
DisassembleOneInput(const uint8_t * Data,size_t Size)50 int DisassembleOneInput(const uint8_t *Data, size_t Size) {
51   char AssemblyText[AssemblyTextBufSize];
52 
53   std::vector<uint8_t> DataCopy(Data, Data + Size);
54 
55   LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
56       TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
57       nullptr, nullptr);
58   assert(Ctx);
59   uint8_t *p = DataCopy.data();
60   unsigned Consumed;
61   unsigned InstructionsProcessed = 0;
62   do {
63     Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
64                                      AssemblyTextBufSize);
65     Size -= Consumed;
66     p += Consumed;
67 
68     InstructionsProcessed ++;
69     if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
70       break;
71   } while (Consumed != 0);
72   LLVMDisasmDispose(Ctx);
73   return 0;
74 }
75 
LLVMFuzzerTestOneInput(const uint8_t * Data,size_t Size)76 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
77   return DisassembleOneInput(Data, Size);
78 }
79 
LLVMFuzzerInitialize(int * argc,char *** argv)80 extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
81                                                         char ***argv) {
82   // The command line is unusual compared to other fuzzers due to the need to
83   // specify the target. Options like -triple, -mcpu, and -mattr work like
84   // their counterparts in llvm-mc, while -fuzzer-args collects options for the
85   // fuzzer itself.
86   //
87   // Examples:
88   //
89   // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
90   // 4-bytes each and use the contents of ./corpus as the test corpus:
91   //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
92   //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
93   //
94   // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
95   // feature enabled using up to 64-byte inputs:
96   //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
97   //       -disassemble -fuzzer-args ./corpus
98   //
99   // If your aim is to find instructions that are not tested, then it is
100   // advisable to constrain the maximum input size to a single instruction
101   // using -max_len as in the first example. This results in a test corpus of
102   // individual instructions that test unique paths. Without this constraint,
103   // there will be considerable redundancy in the corpus.
104 
105   char **OriginalArgv = *argv;
106 
107   LLVMInitializeAllTargetInfos();
108   LLVMInitializeAllTargetMCs();
109   LLVMInitializeAllDisassemblers();
110 
111   cl::ParseCommandLineOptions(*argc, OriginalArgv);
112 
113   // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
114   // the driver can parse its arguments.
115   //
116   // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
117   // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
118   // non-const buffer to avoid the need to clean up when the fuzzer terminates.
119   ModifiedArgv.push_back(OriginalArgv[0]);
120   for (const auto &FuzzerArg : FuzzerArgs) {
121     for (int i = 1; i < *argc; ++i) {
122       if (FuzzerArg == OriginalArgv[i])
123         ModifiedArgv.push_back(OriginalArgv[i]);
124     }
125   }
126   *argc = ModifiedArgv.size();
127   *argv = ModifiedArgv.data();
128 
129   // Package up features to be passed to target/subtarget
130   // We have to pass it via a global since the callback doesn't
131   // permit any user data.
132   if (MAttrs.size()) {
133     SubtargetFeatures Features;
134     for (unsigned i = 0; i != MAttrs.size(); ++i)
135       Features.AddFeature(MAttrs[i]);
136     FeaturesStr = Features.getString();
137   }
138 
139   if (TripleName.empty())
140     TripleName = sys::getDefaultTargetTriple();
141 
142   return 0;
143 }
144