1 //===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // Define the main class fuzzer::Fuzzer and most functions. 10 //===----------------------------------------------------------------------===// 11 12 #ifndef LLVM_FUZZER_INTERNAL_H 13 #define LLVM_FUZZER_INTERNAL_H 14 15 #include <algorithm> 16 #include <atomic> 17 #include <cassert> 18 #include <chrono> 19 #include <climits> 20 #include <cstddef> 21 #include <cstdlib> 22 #include <random> 23 #include <string.h> 24 #include <string> 25 #include <unordered_set> 26 #include <vector> 27 28 #include "FuzzerExtFunctions.h" 29 #include "FuzzerInterface.h" 30 #include "FuzzerTracePC.h" 31 32 // Platform detection. 33 #ifdef __linux__ 34 #define LIBFUZZER_LINUX 1 35 #define LIBFUZZER_APPLE 0 36 #elif __APPLE__ 37 #define LIBFUZZER_LINUX 0 38 #define LIBFUZZER_APPLE 1 39 #else 40 #error "Support for your platform has not been implemented" 41 #endif 42 43 namespace fuzzer { 44 45 typedef int (*UserCallback)(const uint8_t *Data, size_t Size); 46 int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); 47 48 using namespace std::chrono; 49 typedef std::vector<uint8_t> Unit; 50 typedef std::vector<Unit> UnitVector; 51 52 // A simple POD sized array of bytes. 53 template <size_t kMaxSize> class FixedWord { 54 public: FixedWord()55 FixedWord() {} FixedWord(const uint8_t * B,uint8_t S)56 FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } 57 Set(const uint8_t * B,uint8_t S)58 void Set(const uint8_t *B, uint8_t S) { 59 assert(S <= kMaxSize); 60 memcpy(Data, B, S); 61 Size = S; 62 } 63 64 bool operator==(const FixedWord<kMaxSize> &w) const { 65 return Size == w.Size && 0 == memcmp(Data, w.Data, Size); 66 } 67 68 bool operator<(const FixedWord<kMaxSize> &w) const { 69 if (Size != w.Size) 70 return Size < w.Size; 71 return memcmp(Data, w.Data, Size) < 0; 72 } 73 GetMaxSize()74 static size_t GetMaxSize() { return kMaxSize; } data()75 const uint8_t *data() const { return Data; } size()76 uint8_t size() const { return Size; } 77 78 private: 79 uint8_t Size = 0; 80 uint8_t Data[kMaxSize]; 81 }; 82 83 typedef FixedWord<27> Word; // 28 bytes. 84 85 bool IsFile(const std::string &Path); 86 std::string FileToString(const std::string &Path); 87 Unit FileToVector(const std::string &Path, size_t MaxSize = 0); 88 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, 89 long *Epoch, size_t MaxSize); 90 void WriteToFile(const Unit &U, const std::string &Path); 91 void CopyFileToErr(const std::string &Path); 92 // Returns "Dir/FileName" or equivalent for the current OS. 93 std::string DirPlusFile(const std::string &DirPath, 94 const std::string &FileName); 95 96 void DupAndCloseStderr(); 97 void CloseStdout(); 98 void Printf(const char *Fmt, ...); 99 void PrintHexArray(const Unit &U, const char *PrintAfter = ""); 100 void PrintHexArray(const uint8_t *Data, size_t Size, 101 const char *PrintAfter = ""); 102 void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); 103 void PrintASCII(const Unit &U, const char *PrintAfter = ""); 104 void PrintASCII(const Word &W, const char *PrintAfter = ""); 105 std::string Hash(const Unit &U); 106 void SetTimer(int Seconds); 107 void SetSigSegvHandler(); 108 void SetSigBusHandler(); 109 void SetSigAbrtHandler(); 110 void SetSigIllHandler(); 111 void SetSigFpeHandler(); 112 void SetSigIntHandler(); 113 void SetSigTermHandler(); 114 std::string Base64(const Unit &U); 115 int ExecuteCommand(const std::string &Command); 116 size_t GetPeakRSSMb(); 117 118 // Private copy of SHA1 implementation. 119 static const int kSHA1NumBytes = 20; 120 // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. 121 void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); 122 123 // Changes U to contain only ASCII (isprint+isspace) characters. 124 // Returns true iff U has been changed. 125 bool ToASCII(uint8_t *Data, size_t Size); 126 bool IsASCII(const Unit &U); 127 bool IsASCII(const uint8_t *Data, size_t Size); 128 129 int NumberOfCpuCores(); 130 int GetPid(); 131 void SleepSeconds(int Seconds); 132 133 class Random { 134 public: Random(unsigned int seed)135 Random(unsigned int seed) : R(seed) {} Rand()136 size_t Rand() { return R(); } RandBool()137 size_t RandBool() { return Rand() % 2; } operator()138 size_t operator()(size_t n) { return n ? Rand() % n : 0; } Get_mt19937()139 std::mt19937 &Get_mt19937() { return R; } 140 private: 141 std::mt19937 R; 142 }; 143 144 // Dictionary. 145 146 // Parses one dictionary entry. 147 // If successfull, write the enty to Unit and returns true, 148 // otherwise returns false. 149 bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); 150 // Parses the dictionary file, fills Units, returns true iff all lines 151 // were parsed succesfully. 152 bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units); 153 154 class DictionaryEntry { 155 public: DictionaryEntry()156 DictionaryEntry() {} DictionaryEntry(Word W)157 DictionaryEntry(Word W) : W(W) {} DictionaryEntry(Word W,size_t PositionHint)158 DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} GetW()159 const Word &GetW() const { return W; } 160 HasPositionHint()161 bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); } GetPositionHint()162 size_t GetPositionHint() const { 163 assert(HasPositionHint()); 164 return PositionHint; 165 } IncUseCount()166 void IncUseCount() { UseCount++; } IncSuccessCount()167 void IncSuccessCount() { SuccessCount++; } GetUseCount()168 size_t GetUseCount() const { return UseCount; } GetSuccessCount()169 size_t GetSuccessCount() const {return SuccessCount; } 170 171 private: 172 Word W; 173 size_t PositionHint = std::numeric_limits<size_t>::max(); 174 size_t UseCount = 0; 175 size_t SuccessCount = 0; 176 }; 177 178 class Dictionary { 179 public: 180 static const size_t kMaxDictSize = 1 << 14; 181 ContainsWord(const Word & W)182 bool ContainsWord(const Word &W) const { 183 return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { 184 return DE.GetW() == W; 185 }); 186 } begin()187 const DictionaryEntry *begin() const { return &DE[0]; } end()188 const DictionaryEntry *end() const { return begin() + Size; } 189 DictionaryEntry & operator[] (size_t Idx) { 190 assert(Idx < Size); 191 return DE[Idx]; 192 } push_back(DictionaryEntry DE)193 void push_back(DictionaryEntry DE) { 194 if (Size < kMaxDictSize) 195 this->DE[Size++] = DE; 196 } clear()197 void clear() { Size = 0; } empty()198 bool empty() const { return Size == 0; } size()199 size_t size() const { return Size; } 200 201 private: 202 DictionaryEntry DE[kMaxDictSize]; 203 size_t Size = 0; 204 }; 205 206 struct FuzzingOptions { 207 int Verbosity = 1; 208 size_t MaxLen = 0; 209 int UnitTimeoutSec = 300; 210 int TimeoutExitCode = 77; 211 int ErrorExitCode = 77; 212 int MaxTotalTimeSec = 0; 213 int RssLimitMb = 0; 214 bool DoCrossOver = true; 215 int MutateDepth = 5; 216 bool UseCounters = false; 217 bool UseIndirCalls = true; 218 bool UseTraces = false; 219 bool UseMemcmp = true; 220 bool UseFullCoverageSet = false; 221 bool Reload = true; 222 bool ShuffleAtStartUp = true; 223 bool PreferSmall = true; 224 size_t MaxNumberOfRuns = ULONG_MAX; 225 int ReportSlowUnits = 10; 226 bool OnlyASCII = false; 227 std::string OutputCorpus; 228 std::string ArtifactPrefix = "./"; 229 std::string ExactArtifactPath; 230 bool SaveArtifacts = true; 231 bool PrintNEW = true; // Print a status line when new units are found; 232 bool OutputCSV = false; 233 bool PrintNewCovPcs = false; 234 bool PrintFinalStats = false; 235 bool DetectLeaks = true; 236 bool TruncateUnits = false; 237 bool PruneCorpus = true; 238 }; 239 240 class MutationDispatcher { 241 public: 242 MutationDispatcher(Random &Rand, const FuzzingOptions &Options); ~MutationDispatcher()243 ~MutationDispatcher() {} 244 /// Indicate that we are about to start a new sequence of mutations. 245 void StartMutationSequence(); 246 /// Print the current sequence of mutations. 247 void PrintMutationSequence(); 248 /// Indicate that the current sequence of mutations was successfull. 249 void RecordSuccessfulMutationSequence(); 250 /// Mutates data by invoking user-provided mutator. 251 size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); 252 /// Mutates data by invoking user-provided crossover. 253 size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); 254 /// Mutates data by shuffling bytes. 255 size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); 256 /// Mutates data by erasing a byte. 257 size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize); 258 /// Mutates data by inserting a byte. 259 size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); 260 /// Mutates data by chanding one byte. 261 size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); 262 /// Mutates data by chanding one bit. 263 size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); 264 265 /// Mutates data by adding a word from the manual dictionary. 266 size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, 267 size_t MaxSize); 268 269 /// Mutates data by adding a word from the temporary automatic dictionary. 270 size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size, 271 size_t MaxSize); 272 273 /// Mutates data by adding a word from the persistent automatic dictionary. 274 size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, 275 size_t MaxSize); 276 277 /// Tries to find an ASCII integer in Data, changes it to another ASCII int. 278 size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); 279 280 /// CrossOver Data with some other element of the corpus. 281 size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); 282 283 /// Applies one of the configured mutations. 284 /// Returns the new size of data which could be up to MaxSize. 285 size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); 286 /// Applies one of the default mutations. Provided as a service 287 /// to mutation authors. 288 size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); 289 290 /// Creates a cross-over of two pieces of Data, returns its size. 291 size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, 292 size_t Size2, uint8_t *Out, size_t MaxOutSize); 293 294 void AddWordToManualDictionary(const Word &W); 295 296 void AddWordToAutoDictionary(const Word &W, size_t PositionHint); 297 void ClearAutoDictionary(); 298 void PrintRecommendedDictionary(); 299 SetCorpus(const std::vector<Unit> * Corpus)300 void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; } 301 GetRand()302 Random &GetRand() { return Rand; } 303 304 private: 305 306 struct Mutator { 307 size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); 308 const char *Name; 309 }; 310 311 size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, 312 size_t MaxSize); 313 size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, 314 const std::vector<Mutator> &Mutators); 315 316 Random &Rand; 317 const FuzzingOptions Options; 318 319 // Dictionary provided by the user via -dict=DICT_FILE. 320 Dictionary ManualDictionary; 321 // Temporary dictionary modified by the fuzzer itself, 322 // recreated periodically. 323 Dictionary TempAutoDictionary; 324 // Persistent dictionary modified by the fuzzer, consists of 325 // entries that led to successfull discoveries in the past mutations. 326 Dictionary PersistentAutoDictionary; 327 std::vector<Mutator> CurrentMutatorSequence; 328 std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence; 329 const std::vector<Unit> *Corpus = nullptr; 330 std::vector<uint8_t> MutateInPlaceHere; 331 332 std::vector<Mutator> Mutators; 333 std::vector<Mutator> DefaultMutators; 334 }; 335 336 class Fuzzer { 337 public: 338 339 // Aggregates all available coverage measurements. 340 struct Coverage { CoverageCoverage341 Coverage() { Reset(); } 342 ResetCoverage343 void Reset() { 344 BlockCoverage = 0; 345 CallerCalleeCoverage = 0; 346 PcMapBits = 0; 347 CounterBitmapBits = 0; 348 PcBufferLen = 0; 349 CounterBitmap.clear(); 350 PCMap.Reset(); 351 } 352 353 std::string DebugString() const; 354 355 size_t BlockCoverage; 356 size_t CallerCalleeCoverage; 357 358 size_t PcBufferLen; 359 // Precalculated number of bits in CounterBitmap. 360 size_t CounterBitmapBits; 361 std::vector<uint8_t> CounterBitmap; 362 // Precalculated number of bits in PCMap. 363 size_t PcMapBits; 364 PcCoverageMap PCMap; 365 }; 366 367 Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options); AddToCorpus(const Unit & U)368 void AddToCorpus(const Unit &U) { 369 Corpus.push_back(U); 370 UpdateCorpusDistribution(); 371 } 372 size_t ChooseUnitIdxToMutate(); ChooseUnitToMutate()373 const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; 374 void TruncateUnits(std::vector<Unit> *NewCorpus); 375 void Loop(); 376 void Drill(); 377 void ShuffleAndMinimize(); 378 void InitializeTraceState(); 379 void AssignTaintLabels(uint8_t *Data, size_t Size); CorpusSize()380 size_t CorpusSize() const { return Corpus.size(); } 381 size_t MaxUnitSizeInCorpus() const; ReadDir(const std::string & Path,long * Epoch,size_t MaxSize)382 void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { 383 Printf("Loading corpus: %s\n", Path.c_str()); 384 ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize); 385 } 386 void RereadOutputCorpus(size_t MaxSize); 387 // Save the current corpus to OutputCorpus. 388 void SaveCorpus(); 389 secondsSinceProcessStartUp()390 size_t secondsSinceProcessStartUp() { 391 return duration_cast<seconds>(system_clock::now() - ProcessStartTime) 392 .count(); 393 } execPerSec()394 size_t execPerSec() { 395 size_t Seconds = secondsSinceProcessStartUp(); 396 return Seconds ? TotalNumberOfRuns / Seconds : 0; 397 } 398 getTotalNumberOfRuns()399 size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } 400 401 static void StaticAlarmCallback(); 402 static void StaticCrashSignalCallback(); 403 static void StaticInterruptCallback(); 404 405 void ExecuteCallback(const uint8_t *Data, size_t Size); 406 bool RunOne(const uint8_t *Data, size_t Size); 407 408 // Merge Corpora[1:] into Corpora[0]. 409 void Merge(const std::vector<std::string> &Corpora); 410 // Returns a subset of 'Extra' that adds coverage to 'Initial'. 411 UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra); GetMD()412 MutationDispatcher &GetMD() { return MD; } 413 void PrintFinalStats(); 414 void SetMaxLen(size_t MaxLen); 415 void RssLimitCallback(); 416 417 // Public for tests. 418 void ResetCoverage(); 419 InFuzzingThread()420 bool InFuzzingThread() const { return IsMyThread; } 421 size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; 422 423 private: 424 void AlarmCallback(); 425 void CrashCallback(); 426 void InterruptCallback(); 427 void MutateAndTestOne(); 428 void ReportNewCoverage(const Unit &U); RunOne(const Unit & U)429 bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } 430 void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size); 431 void WriteToOutputCorpus(const Unit &U); 432 void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); 433 void PrintStats(const char *Where, const char *End = "\n"); 434 void PrintStatusForNewUnit(const Unit &U); 435 void ShuffleCorpus(UnitVector *V); 436 void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, 437 bool DuringInitialCorpusExecution); 438 439 // Updates the probability distribution for the units in the corpus. 440 // Must be called whenever the corpus or unit weights are changed. 441 void UpdateCorpusDistribution(); 442 443 bool UpdateMaxCoverage(); 444 445 // Trace-based fuzzing: we run a unit with some kind of tracing 446 // enabled and record potentially useful mutations. Then 447 // We apply these mutations one by one to the unit and run it again. 448 449 // Start tracing; forget all previously proposed mutations. 450 void StartTraceRecording(); 451 // Stop tracing. 452 void StopTraceRecording(); 453 454 void SetDeathCallback(); 455 static void StaticDeathCallback(); 456 void DumpCurrentUnit(const char *Prefix); 457 void DeathCallback(); 458 459 void LazyAllocateCurrentUnitData(); 460 uint8_t *CurrentUnitData = nullptr; 461 std::atomic<size_t> CurrentUnitSize; 462 463 size_t TotalNumberOfRuns = 0; 464 size_t NumberOfNewUnitsAdded = 0; 465 466 bool HasMoreMallocsThanFrees = false; 467 size_t NumberOfLeakDetectionAttempts = 0; 468 469 std::vector<Unit> Corpus; 470 std::unordered_set<std::string> UnitHashesAddedToCorpus; 471 472 std::piecewise_constant_distribution<double> CorpusDistribution; 473 UserCallback CB; 474 MutationDispatcher &MD; 475 FuzzingOptions Options; 476 system_clock::time_point ProcessStartTime = system_clock::now(); 477 system_clock::time_point UnitStartTime; 478 long TimeOfLongestUnitInSeconds = 0; 479 long EpochOfLastReadOfOutputCorpus = 0; 480 481 // Maximum recorded coverage. 482 Coverage MaxCoverage; 483 484 // Need to know our own thread. 485 static thread_local bool IsMyThread; 486 }; 487 488 // Global interface to functions that may or may not be available. 489 extern ExternalFunctions *EF; 490 491 }; // namespace fuzzer 492 493 #endif // LLVM_FUZZER_INTERNAL_H 494