1 //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines CachedHashString and CachedHashStringRef. These are owning 11 // and not-owning string types that store their hash in addition to their string 12 // data. 13 // 14 // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap 15 // (because, unlike std::string, CachedHashString lets us have empty and 16 // tombstone values). 17 // 18 //===----------------------------------------------------------------------===// 19 20 #ifndef LLVM_ADT_CACHED_HASH_STRING_H 21 #define LLVM_ADT_CACHED_HASH_STRING_H 22 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/Support/raw_ostream.h" 26 27 namespace llvm { 28 29 /// A container which contains a StringRef plus a precomputed hash. 30 class CachedHashStringRef { 31 const char *P; 32 uint32_t Size; 33 uint32_t Hash; 34 35 public: 36 // Explicit because hashing a string isn't free. CachedHashStringRef(StringRef S)37 explicit CachedHashStringRef(StringRef S) 38 : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {} 39 CachedHashStringRef(StringRef S,uint32_t Hash)40 CachedHashStringRef(StringRef S, uint32_t Hash) 41 : P(S.data()), Size(S.size()), Hash(Hash) { 42 assert(S.size() <= std::numeric_limits<uint32_t>::max()); 43 } 44 val()45 StringRef val() const { return StringRef(P, Size); } data()46 const char *data() const { return P; } size()47 uint32_t size() const { return Size; } hash()48 uint32_t hash() const { return Hash; } 49 }; 50 51 template <> struct DenseMapInfo<CachedHashStringRef> { 52 static CachedHashStringRef getEmptyKey() { 53 return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0); 54 } 55 static CachedHashStringRef getTombstoneKey() { 56 return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1); 57 } 58 static unsigned getHashValue(const CachedHashStringRef &S) { 59 assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!"); 60 assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!"); 61 return S.hash(); 62 } 63 static bool isEqual(const CachedHashStringRef &LHS, 64 const CachedHashStringRef &RHS) { 65 return LHS.hash() == RHS.hash() && 66 DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val()); 67 } 68 }; 69 70 /// A container which contains a string, which it owns, plus a precomputed hash. 71 /// 72 /// We do not null-terminate the string. 73 class CachedHashString { 74 friend struct DenseMapInfo<CachedHashString>; 75 76 char *P; 77 uint32_t Size; 78 uint32_t Hash; 79 80 static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); } 81 static char *getTombstoneKeyPtr() { 82 return DenseMapInfo<char *>::getTombstoneKey(); 83 } 84 85 bool isEmptyOrTombstone() const { 86 return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr(); 87 } 88 89 struct ConstructEmptyOrTombstoneTy {}; 90 91 CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr) 92 : P(EmptyOrTombstonePtr), Size(0), Hash(0) { 93 assert(isEmptyOrTombstone()); 94 } 95 96 // TODO: Use small-string optimization to avoid allocating. 97 98 public: 99 explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {} 100 101 // Explicit because copying and hashing a string isn't free. 102 explicit CachedHashString(StringRef S) 103 : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {} 104 105 CachedHashString(StringRef S, uint32_t Hash) 106 : P(new char[S.size()]), Size(S.size()), Hash(Hash) { 107 memcpy(P, S.data(), S.size()); 108 } 109 110 // Ideally this class would not be copyable. But SetVector requires copyable 111 // keys, and we want this to be usable there. 112 CachedHashString(const CachedHashString &Other) 113 : Size(Other.Size), Hash(Other.Hash) { 114 if (Other.isEmptyOrTombstone()) { 115 P = Other.P; 116 } else { 117 P = new char[Size]; 118 memcpy(P, Other.P, Size); 119 } 120 } 121 122 CachedHashString &operator=(CachedHashString Other) { 123 swap(*this, Other); 124 return *this; 125 } 126 127 CachedHashString(CachedHashString &&Other) noexcept 128 : P(Other.P), Size(Other.Size), Hash(Other.Hash) { 129 Other.P = getEmptyKeyPtr(); 130 } 131 132 ~CachedHashString() { 133 if (!isEmptyOrTombstone()) 134 delete[] P; 135 } 136 137 StringRef val() const { return StringRef(P, Size); } 138 uint32_t size() const { return Size; } 139 uint32_t hash() const { return Hash; } 140 141 operator StringRef() const { return val(); } 142 operator CachedHashStringRef() const { 143 return CachedHashStringRef(val(), Hash); 144 } 145 146 friend void swap(CachedHashString &LHS, CachedHashString &RHS) { 147 using std::swap; 148 swap(LHS.P, RHS.P); 149 swap(LHS.Size, RHS.Size); 150 swap(LHS.Hash, RHS.Hash); 151 } 152 }; 153 154 template <> struct DenseMapInfo<CachedHashString> { 155 static CachedHashString getEmptyKey() { 156 return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(), 157 CachedHashString::getEmptyKeyPtr()); 158 } 159 static CachedHashString getTombstoneKey() { 160 return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(), 161 CachedHashString::getTombstoneKeyPtr()); 162 } 163 static unsigned getHashValue(const CachedHashString &S) { 164 assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!"); 165 assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!"); 166 return S.hash(); 167 } 168 static bool isEqual(const CachedHashString &LHS, 169 const CachedHashString &RHS) { 170 if (LHS.hash() != RHS.hash()) 171 return false; 172 if (LHS.P == CachedHashString::getEmptyKeyPtr()) 173 return RHS.P == CachedHashString::getEmptyKeyPtr(); 174 if (LHS.P == CachedHashString::getTombstoneKeyPtr()) 175 return RHS.P == CachedHashString::getTombstoneKeyPtr(); 176 177 // This is safe because if RHS.P is the empty or tombstone key, it will have 178 // length 0, so we'll never dereference its pointer. 179 return LHS.val() == RHS.val(); 180 } 181 }; 182 183 } // namespace llvm 184 185 #endif 186