1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #ifndef RE2_TESTING_STRING_GENERATOR_H_ 6 #define RE2_TESTING_STRING_GENERATOR_H_ 7 8 // String generator: generates all possible strings of up to 9 // maxlen letters using the set of letters in alpha. 10 // Fetch strings using a Java-like Next()/HasNext() interface. 11 12 #include <stdint.h> 13 #include <random> 14 #include <string> 15 #include <vector> 16 17 #include "util/util.h" 18 #include "re2/stringpiece.h" 19 20 namespace re2 { 21 22 class StringGenerator { 23 public: 24 StringGenerator(int maxlen, const std::vector<std::string>& alphabet); ~StringGenerator()25 ~StringGenerator() {} 26 27 const StringPiece& Next(); HasNext()28 bool HasNext() { return hasnext_; } 29 30 // Resets generator to start sequence over. 31 void Reset(); 32 33 // Causes generator to emit random strings for next n calls to Next(). 34 void Random(int32_t seed, int n); 35 36 // Causes generator to emit a NULL as the next call. 37 void GenerateNULL(); 38 39 private: 40 bool IncrementDigits(); 41 bool RandomDigits(); 42 43 // Global state. 44 int maxlen_; // Maximum length string to generate. 45 std::vector<std::string> alphabet_; // Alphabet, one string per letter. 46 47 // Iteration state. 48 StringPiece sp_; // Last StringPiece returned by Next(). 49 std::string s_; // String data in last StringPiece returned by Next(). 50 bool hasnext_; // Whether Next() can be called again. 51 std::vector<int> digits_; // Alphabet indices for next string. 52 bool generate_null_; // Whether to generate a NULL StringPiece next. 53 bool random_; // Whether generated strings are random. 54 int nrandom_; // Number of random strings left to generate. 55 std::minstd_rand0 rng_; // Random number generator. 56 57 StringGenerator(const StringGenerator&) = delete; 58 StringGenerator& operator=(const StringGenerator&) = delete; 59 }; 60 61 // Generates and returns a string over binary alphabet {0,1} that contains 62 // all possible binary sequences of length n as subsequences. The obvious 63 // brute force method would generate a string of length n * 2^n, but this 64 // generates a string of length n-1 + 2^n called a De Bruijn cycle. 65 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17. 66 // 67 // Such a string is useful for testing a DFA. If you have a DFA 68 // where distinct last n bytes implies distinct states, then running on a 69 // DeBruijn string causes the DFA to need to create a new state at every 70 // position in the input, never reusing any states until it gets to the 71 // end of the string. This is the worst possible case for DFA execution. 72 std::string DeBruijnString(int n); 73 74 } // namespace re2 75 76 #endif // RE2_TESTING_STRING_GENERATOR_H_ 77