1 //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // fuzzer::InputCorpus 10 //===----------------------------------------------------------------------===// 11 12 #ifndef LLVM_FUZZER_CORPUS 13 #define LLVM_FUZZER_CORPUS 14 15 #include "FuzzerDefs.h" 16 #include "FuzzerIO.h" 17 #include "FuzzerRandom.h" 18 #include "FuzzerSHA1.h" 19 #include "FuzzerTracePC.h" 20 #include <numeric> 21 #include <random> 22 #include <unordered_set> 23 24 namespace fuzzer { 25 26 struct InputInfo { 27 Unit U; // The actual input data. 28 uint8_t Sha1[kSHA1NumBytes]; // Checksum. 29 // Number of features that this input has and no smaller input has. 30 size_t NumFeatures = 0; 31 size_t Tmp = 0; // Used by ValidateFeatureSet. 32 // Stats. 33 size_t NumExecutedMutations = 0; 34 size_t NumSuccessfullMutations = 0; 35 bool MayDeleteFile = false; 36 }; 37 38 class InputCorpus { 39 public: 40 static const size_t kFeatureSetSize = 1 << 16; InputCorpus(const std::string & OutputCorpus)41 InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { 42 memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); 43 memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); 44 } ~InputCorpus()45 ~InputCorpus() { 46 for (auto II : Inputs) 47 delete II; 48 } size()49 size_t size() const { return Inputs.size(); } SizeInBytes()50 size_t SizeInBytes() const { 51 size_t Res = 0; 52 for (auto II : Inputs) 53 Res += II->U.size(); 54 return Res; 55 } NumActiveUnits()56 size_t NumActiveUnits() const { 57 size_t Res = 0; 58 for (auto II : Inputs) 59 Res += !II->U.empty(); 60 return Res; 61 } empty()62 bool empty() const { return Inputs.empty(); } 63 const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } 64 void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile = false) { 65 assert(!U.empty()); 66 uint8_t Hash[kSHA1NumBytes]; 67 if (FeatureDebug) 68 Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); 69 ComputeSHA1(U.data(), U.size(), Hash); 70 Hashes.insert(Sha1ToString(Hash)); 71 Inputs.push_back(new InputInfo()); 72 InputInfo &II = *Inputs.back(); 73 II.U = U; 74 II.NumFeatures = NumFeatures; 75 II.MayDeleteFile = MayDeleteFile; 76 memcpy(II.Sha1, Hash, kSHA1NumBytes); 77 UpdateCorpusDistribution(); 78 ValidateFeatureSet(); 79 } 80 HasUnit(const Unit & U)81 bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } HasUnit(const std::string & H)82 bool HasUnit(const std::string &H) { return Hashes.count(H); } ChooseUnitToMutate(Random & Rand)83 InputInfo &ChooseUnitToMutate(Random &Rand) { 84 InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; 85 assert(!II.U.empty()); 86 return II; 87 }; 88 89 // Returns an index of random unit from the corpus to mutate. 90 // Hypothesis: units added to the corpus last are more likely to be 91 // interesting. This function gives more weight to the more recent units. ChooseUnitIdxToMutate(Random & Rand)92 size_t ChooseUnitIdxToMutate(Random &Rand) { 93 size_t Idx = static_cast<size_t>(CorpusDistribution(Rand.Get_mt19937())); 94 assert(Idx < Inputs.size()); 95 return Idx; 96 } 97 PrintStats()98 void PrintStats() { 99 for (size_t i = 0; i < Inputs.size(); i++) { 100 const auto &II = *Inputs[i]; 101 Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i, 102 Sha1ToString(II.Sha1).c_str(), II.U.size(), 103 II.NumExecutedMutations, II.NumSuccessfullMutations); 104 } 105 } 106 PrintFeatureSet()107 void PrintFeatureSet() { 108 for (size_t i = 0; i < kFeatureSetSize; i++) { 109 if(size_t Sz = GetFeature(i)) 110 Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz); 111 } 112 Printf("\n\t"); 113 for (size_t i = 0; i < Inputs.size(); i++) 114 if (size_t N = Inputs[i]->NumFeatures) 115 Printf(" %zd=>%zd ", i, N); 116 Printf("\n"); 117 } 118 DeleteInput(size_t Idx)119 void DeleteInput(size_t Idx) { 120 InputInfo &II = *Inputs[Idx]; 121 if (!OutputCorpus.empty() && II.MayDeleteFile) 122 RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); 123 Unit().swap(II.U); 124 if (FeatureDebug) 125 Printf("EVICTED %zd\n", Idx); 126 } 127 AddFeature(size_t Idx,uint32_t NewSize,bool Shrink)128 bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { 129 assert(NewSize); 130 Idx = Idx % kFeatureSetSize; 131 uint32_t OldSize = GetFeature(Idx); 132 if (OldSize == 0 || (Shrink && OldSize > NewSize)) { 133 if (OldSize > 0) { 134 size_t OldIdx = SmallestElementPerFeature[Idx]; 135 InputInfo &II = *Inputs[OldIdx]; 136 assert(II.NumFeatures > 0); 137 II.NumFeatures--; 138 if (II.NumFeatures == 0) 139 DeleteInput(OldIdx); 140 } 141 if (FeatureDebug) 142 Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); 143 SmallestElementPerFeature[Idx] = Inputs.size(); 144 InputSizesPerFeature[Idx] = NewSize; 145 CountingFeatures = true; 146 return true; 147 } 148 return false; 149 } 150 NumFeatures()151 size_t NumFeatures() const { 152 size_t Res = 0; 153 for (size_t i = 0; i < kFeatureSetSize; i++) 154 Res += GetFeature(i) != 0; 155 return Res; 156 } 157 ResetFeatureSet()158 void ResetFeatureSet() { 159 assert(Inputs.empty()); 160 memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); 161 memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); 162 } 163 164 private: 165 166 static const bool FeatureDebug = false; 167 GetFeature(size_t Idx)168 size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } 169 ValidateFeatureSet()170 void ValidateFeatureSet() { 171 if (!CountingFeatures) return; 172 if (FeatureDebug) 173 PrintFeatureSet(); 174 for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) 175 if (GetFeature(Idx)) 176 Inputs[SmallestElementPerFeature[Idx]]->Tmp++; 177 for (auto II: Inputs) { 178 if (II->Tmp != II->NumFeatures) 179 Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures); 180 assert(II->Tmp == II->NumFeatures); 181 II->Tmp = 0; 182 } 183 } 184 185 // Updates the probability distribution for the units in the corpus. 186 // Must be called whenever the corpus or unit weights are changed. UpdateCorpusDistribution()187 void UpdateCorpusDistribution() { 188 size_t N = Inputs.size(); 189 Intervals.resize(N + 1); 190 Weights.resize(N); 191 std::iota(Intervals.begin(), Intervals.end(), 0); 192 if (CountingFeatures) 193 for (size_t i = 0; i < N; i++) 194 Weights[i] = Inputs[i]->NumFeatures * (i + 1); 195 else 196 std::iota(Weights.begin(), Weights.end(), 1); 197 CorpusDistribution = std::piecewise_constant_distribution<double>( 198 Intervals.begin(), Intervals.end(), Weights.begin()); 199 } 200 std::piecewise_constant_distribution<double> CorpusDistribution; 201 202 std::vector<double> Intervals; 203 std::vector<double> Weights; 204 205 std::unordered_set<std::string> Hashes; 206 std::vector<InputInfo*> Inputs; 207 208 bool CountingFeatures = false; 209 uint32_t InputSizesPerFeature[kFeatureSetSize]; 210 uint32_t SmallestElementPerFeature[kFeatureSetSize]; 211 212 std::string OutputCorpus; 213 }; 214 215 } // namespace fuzzer 216 217 #endif // LLVM_FUZZER_CORPUS 218