• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm-c/Target.h"
13 #include "llvm/MC/SubtargetFeature.h"
14 #include "llvm/MC/MCAsmBackend.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCCodeEmitter.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCInstPrinter.h"
19 #include "llvm/MC/MCInstrInfo.h"
20 #include "llvm/MC/MCObjectFileInfo.h"
21 #include "llvm/MC/MCParser/AsmLexer.h"
22 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCStreamer.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/MCTargetOptionsCommandFlags.inc"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/FileUtilities.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetSelect.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/ToolOutputFile.h"
36 
37 using namespace llvm;
38 
39 static cl::opt<std::string>
40     TripleName("triple", cl::desc("Target triple to assemble for, "
41                                   "see -version for available targets"));
42 
43 static cl::opt<std::string>
44     MCPU("mcpu",
45          cl::desc("Target a specific cpu type (-mcpu=help for details)"),
46          cl::value_desc("cpu-name"), cl::init(""));
47 
48 // This is useful for variable-length instruction sets.
49 static cl::opt<unsigned> InsnLimit(
50     "insn-limit",
51     cl::desc("Limit the number of instructions to process (0 for no limit)"),
52     cl::value_desc("count"), cl::init(0));
53 
54 static cl::list<std::string>
55     MAttrs("mattr", cl::CommaSeparated,
56            cl::desc("Target specific attributes (-mattr=help for details)"),
57            cl::value_desc("a1,+a2,-a3,..."));
58 // The feature string derived from -mattr's values.
59 std::string FeaturesStr;
60 
61 static cl::list<std::string>
62     FuzzerArgs("fuzzer-args", cl::Positional,
63                cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
64                cl::PositionalEatsArgs);
65 static std::vector<char *> ModifiedArgv;
66 
67 enum OutputFileType {
68   OFT_Null,
69   OFT_AssemblyFile,
70   OFT_ObjectFile
71 };
72 static cl::opt<OutputFileType>
73 FileType("filetype", cl::init(OFT_AssemblyFile),
74   cl::desc("Choose an output file type:"),
75   cl::values(
76        clEnumValN(OFT_AssemblyFile, "asm",
77                   "Emit an assembly ('.s') file"),
78        clEnumValN(OFT_Null, "null",
79                   "Don't emit anything (for timing purposes)"),
80        clEnumValN(OFT_ObjectFile, "obj",
81                   "Emit a native object ('.o') file")));
82 
83 
84 class LLVMFuzzerInputBuffer : public MemoryBuffer
85 {
86   public:
LLVMFuzzerInputBuffer(const uint8_t * data_,size_t size_)87     LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
88       : Data(reinterpret_cast<const char *>(data_)),
89         Size(size_) {
90         init(Data, Data+Size, false);
91       }
92 
93 
getBufferKind() const94     virtual BufferKind getBufferKind() const {
95       return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
96                                   // the intent ... though AFAIK it
97                                   // probably came from an mmap or sbrk
98     }
99 
100   private:
101     const char *Data;
102     size_t Size;
103 };
104 
AssembleInput(const char * ProgName,const Target * TheTarget,SourceMgr & SrcMgr,MCContext & Ctx,MCStreamer & Str,MCAsmInfo & MAI,MCSubtargetInfo & STI,MCInstrInfo & MCII,MCTargetOptions & MCOptions)105 static int AssembleInput(const char *ProgName, const Target *TheTarget,
106                          SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
107                          MCAsmInfo &MAI, MCSubtargetInfo &STI,
108                          MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
109   static const bool NoInitialTextSection = false;
110 
111   std::unique_ptr<MCAsmParser> Parser(
112     createMCAsmParser(SrcMgr, Ctx, Str, MAI));
113 
114   std::unique_ptr<MCTargetAsmParser> TAP(
115     TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
116 
117   if (!TAP) {
118     errs() << ProgName
119            << ": error: this target '" << TripleName
120            << "', does not support assembly parsing.\n";
121     abort();
122   }
123 
124   Parser->setTargetParser(*TAP);
125 
126   return Parser->Run(NoInitialTextSection);
127 }
128 
129 
AssembleOneInput(const uint8_t * Data,size_t Size)130 int AssembleOneInput(const uint8_t *Data, size_t Size) {
131   const bool ShowInst = false;
132   const bool AsmVerbose = false;
133   const bool UseDwarfDirectory = true;
134 
135   Triple TheTriple(Triple::normalize(TripleName));
136 
137   SourceMgr SrcMgr;
138 
139   std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
140 
141   // Tell SrcMgr about this buffer, which is what the parser will pick up.
142   SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
143 
144   static const std::vector<std::string> NoIncludeDirs;
145   SrcMgr.setIncludeDirs(NoIncludeDirs);
146 
147   static std::string ArchName;
148   std::string Error;
149   const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
150       Error);
151   if (!TheTarget) {
152     errs() << "error: this target '" << TheTriple.normalize()
153       << "/" << ArchName << "', was not found: '" << Error << "'\n";
154 
155     abort();
156   }
157 
158   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
159   if (!MRI) {
160     errs() << "Unable to create target register info!";
161     abort();
162   }
163 
164   std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
165   if (!MAI) {
166     errs() << "Unable to create target asm info!";
167     abort();
168   }
169 
170 
171   MCObjectFileInfo MOFI;
172   MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
173 
174   static const bool UsePIC = false;
175   MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx);
176 
177   const unsigned OutputAsmVariant = 0;
178   std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
179   MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
180       *MAI, *MCII, *MRI);
181   if (!IP) {
182     errs()
183       << "error: unable to create instruction printer for target triple '"
184       << TheTriple.normalize() << "' with assembly variant "
185       << OutputAsmVariant << ".\n";
186 
187     abort();
188   }
189 
190   const char *ProgName = "llvm-mc-fuzzer";
191   std::unique_ptr<MCSubtargetInfo> STI(
192       TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
193   std::unique_ptr<MCCodeEmitter> CE = nullptr;
194   std::unique_ptr<MCAsmBackend> MAB = nullptr;
195 
196   MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
197 
198   std::string OutputString;
199   raw_string_ostream Out(OutputString);
200   auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
201 
202   std::unique_ptr<MCStreamer> Str;
203 
204   if (FileType == OFT_AssemblyFile) {
205     Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), AsmVerbose,
206                                            UseDwarfDirectory, IP, std::move(CE),
207                                            std::move(MAB), ShowInst));
208   } else {
209     assert(FileType == OFT_ObjectFile && "Invalid file type!");
210 
211     std::error_code EC;
212     const std::string OutputFilename = "-";
213     auto Out =
214         llvm::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::F_None);
215     if (EC) {
216       errs() << EC.message() << '\n';
217       abort();
218     }
219 
220     // Don't waste memory on names of temp labels.
221     Ctx.setUseNamesOnTempLabels(false);
222 
223     std::unique_ptr<buffer_ostream> BOS;
224     raw_pwrite_stream *OS = &Out->os();
225     if (!Out->os().supportsSeeking()) {
226       BOS = make_unique<buffer_ostream>(Out->os());
227       OS = BOS.get();
228     }
229 
230     MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
231     MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
232     Str.reset(TheTarget->createMCObjectStreamer(
233         TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB), *OS,
234         std::unique_ptr<MCCodeEmitter>(CE), *STI, MCOptions.MCRelaxAll,
235         MCOptions.MCIncrementalLinkerCompatible,
236         /*DWARFMustBeAtTheEnd*/ false));
237   }
238   const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
239       *MCII, MCOptions);
240 
241   (void) Res;
242 
243   return 0;
244 }
245 
LLVMFuzzerTestOneInput(const uint8_t * Data,size_t Size)246 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
247   return AssembleOneInput(Data, Size);
248 }
249 
LLVMFuzzerInitialize(int * argc,char *** argv)250 extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
251                                                         char ***argv) {
252   // The command line is unusual compared to other fuzzers due to the need to
253   // specify the target. Options like -triple, -mcpu, and -mattr work like
254   // their counterparts in llvm-mc, while -fuzzer-args collects options for the
255   // fuzzer itself.
256   //
257   // Examples:
258   //
259   // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
260   // 4-bytes each and use the contents of ./corpus as the test corpus:
261   //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
262   //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
263   //
264   // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
265   // feature enabled using up to 64-byte inputs:
266   //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
267   //       -disassemble -fuzzer-args ./corpus
268   //
269   // If your aim is to find instructions that are not tested, then it is
270   // advisable to constrain the maximum input size to a single instruction
271   // using -max_len as in the first example. This results in a test corpus of
272   // individual instructions that test unique paths. Without this constraint,
273   // there will be considerable redundancy in the corpus.
274 
275   char **OriginalArgv = *argv;
276 
277   LLVMInitializeAllTargetInfos();
278   LLVMInitializeAllTargetMCs();
279   LLVMInitializeAllAsmParsers();
280 
281   cl::ParseCommandLineOptions(*argc, OriginalArgv);
282 
283   // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
284   // the driver can parse its arguments.
285   //
286   // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
287   // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
288   // non-const buffer to avoid the need to clean up when the fuzzer terminates.
289   ModifiedArgv.push_back(OriginalArgv[0]);
290   for (const auto &FuzzerArg : FuzzerArgs) {
291     for (int i = 1; i < *argc; ++i) {
292       if (FuzzerArg == OriginalArgv[i])
293         ModifiedArgv.push_back(OriginalArgv[i]);
294     }
295   }
296   *argc = ModifiedArgv.size();
297   *argv = ModifiedArgv.data();
298 
299   // Package up features to be passed to target/subtarget
300   // We have to pass it via a global since the callback doesn't
301   // permit any user data.
302   if (MAttrs.size()) {
303     SubtargetFeatures Features;
304     for (unsigned i = 0; i != MAttrs.size(); ++i)
305       Features.AddFeature(MAttrs[i]);
306     FeaturesStr = Features.getString();
307   }
308 
309   if (TripleName.empty())
310     TripleName = sys::getDefaultTargetTriple();
311 
312   return 0;
313 }
314