1 //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11
12 #include "FuzzerInterface.h"
13 #include "llvm-c/Disassembler.h"
14 #include "llvm-c/Target.h"
15 #include "llvm/MC/SubtargetFeature.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/raw_ostream.h"
18
19 using namespace llvm;
20
21 const unsigned AssemblyTextBufSize = 80;
22
23 enum ActionType {
24 AC_Assemble,
25 AC_Disassemble
26 };
27
28 static cl::opt<ActionType>
29 Action(cl::desc("Action to perform:"),
30 cl::init(AC_Assemble),
31 cl::values(clEnumValN(AC_Assemble, "assemble",
32 "Assemble a .s file (default)"),
33 clEnumValN(AC_Disassemble, "disassemble",
34 "Disassemble strings of hex bytes"),
35 clEnumValEnd));
36
37 static cl::opt<std::string>
38 TripleName("triple", cl::desc("Target triple to assemble for, "
39 "see -version for available targets"));
40
41 static cl::opt<std::string>
42 MCPU("mcpu",
43 cl::desc("Target a specific cpu type (-mcpu=help for details)"),
44 cl::value_desc("cpu-name"), cl::init(""));
45
46 // This is useful for variable-length instruction sets.
47 static cl::opt<unsigned> InsnLimit(
48 "insn-limit",
49 cl::desc("Limit the number of instructions to process (0 for no limit)"),
50 cl::value_desc("count"), cl::init(0));
51
52 static cl::list<std::string>
53 MAttrs("mattr", cl::CommaSeparated,
54 cl::desc("Target specific attributes (-mattr=help for details)"),
55 cl::value_desc("a1,+a2,-a3,..."));
56 // The feature string derived from -mattr's values.
57 std::string FeaturesStr;
58
59 static cl::list<std::string>
60 FuzzerArgs("fuzzer-args", cl::Positional,
61 cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
62 cl::PositionalEatsArgs);
63 static std::vector<char *> ModifiedArgv;
64
DisassembleOneInput(const uint8_t * Data,size_t Size)65 int DisassembleOneInput(const uint8_t *Data, size_t Size) {
66 char AssemblyText[AssemblyTextBufSize];
67
68 std::vector<uint8_t> DataCopy(Data, Data + Size);
69
70 LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
71 TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
72 nullptr, nullptr);
73 assert(Ctx);
74 uint8_t *p = DataCopy.data();
75 unsigned Consumed;
76 unsigned InstructionsProcessed = 0;
77 do {
78 Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
79 AssemblyTextBufSize);
80 Size -= Consumed;
81 p += Consumed;
82
83 InstructionsProcessed ++;
84 if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
85 break;
86 } while (Consumed != 0);
87 LLVMDisasmDispose(Ctx);
88 return 0;
89 }
90
LLVMFuzzerTestOneInput(const uint8_t * Data,size_t Size)91 int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
92 if (Action == AC_Assemble)
93 errs() << "error: -assemble is not implemented\n";
94 else if (Action == AC_Disassemble)
95 return DisassembleOneInput(Data, Size);
96
97 llvm_unreachable("Unknown action");
98 return 0;
99 }
100
LLVMFuzzerInitialize(int * argc,char *** argv)101 int LLVMFuzzerInitialize(int *argc, char ***argv) {
102 // The command line is unusual compared to other fuzzers due to the need to
103 // specify the target. Options like -triple, -mcpu, and -mattr work like
104 // their counterparts in llvm-mc, while -fuzzer-args collects options for the
105 // fuzzer itself.
106 //
107 // Examples:
108 //
109 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
110 // 4-bytes each and use the contents of ./corpus as the test corpus:
111 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
112 // -fuzzer-args -max_len=4 -runs=100000 ./corpus
113 //
114 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
115 // feature enabled using up to 64-byte inputs:
116 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
117 // -disassemble -fuzzer-args ./corpus
118 //
119 // If your aim is to find instructions that are not tested, then it is
120 // advisable to constrain the maximum input size to a single instruction
121 // using -max_len as in the first example. This results in a test corpus of
122 // individual instructions that test unique paths. Without this constraint,
123 // there will be considerable redundancy in the corpus.
124
125 char **OriginalArgv = *argv;
126
127 LLVMInitializeAllTargetInfos();
128 LLVMInitializeAllTargetMCs();
129 LLVMInitializeAllDisassemblers();
130
131 cl::ParseCommandLineOptions(*argc, OriginalArgv);
132
133 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
134 // the driver can parse its arguments.
135 //
136 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
137 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
138 // non-const buffer to avoid the need to clean up when the fuzzer terminates.
139 ModifiedArgv.push_back(OriginalArgv[0]);
140 for (const auto &FuzzerArg : FuzzerArgs) {
141 for (int i = 1; i < *argc; ++i) {
142 if (FuzzerArg == OriginalArgv[i])
143 ModifiedArgv.push_back(OriginalArgv[i]);
144 }
145 }
146 *argc = ModifiedArgv.size();
147 *argv = ModifiedArgv.data();
148
149 // Package up features to be passed to target/subtarget
150 // We have to pass it via a global since the callback doesn't
151 // permit any user data.
152 if (MAttrs.size()) {
153 SubtargetFeatures Features;
154 for (unsigned i = 0; i != MAttrs.size(); ++i)
155 Features.AddFeature(MAttrs[i]);
156 FeaturesStr = Features.getString();
157 }
158
159 return 0;
160 }
161