1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 10 #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 11 12 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 13 # pragma GCC system_header 14 #endif 15 16 #include <__algorithm/fill_n.h> 17 #include <__config> 18 #include <__functional/hash.h> 19 #include <__functional/operations.h> 20 #include <__iterator/distance.h> 21 #include <__iterator/iterator_traits.h> 22 #include <__memory/shared_ptr.h> 23 #include <__utility/pair.h> 24 #include <array> 25 #include <unordered_map> 26 #include <vector> 27 28 #if _LIBCPP_STD_VER >= 17 29 30 _LIBCPP_PUSH_MACROS 31 #include <__undef_macros> 32 33 _LIBCPP_BEGIN_NAMESPACE_STD 34 35 template <class _Key, 36 class _Value, 37 class _Hash, 38 class _BinaryPredicate, 39 bool /*useArray*/> 40 class _BMSkipTable; 41 42 // General case for BM data searching; use a map 43 template <class _Key, 44 class _Value, 45 class _Hash, 46 class _BinaryPredicate> 47 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { 48 private: 49 using value_type = _Value; 50 using key_type = _Key; 51 52 const value_type __default_value_; 53 unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; 54 55 public: 56 _LIBCPP_HIDE_FROM_ABI _BMSkipTable(size_t __sz,value_type __default_value,_Hash __hash,_BinaryPredicate __pred)57 explicit _BMSkipTable(size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) 58 : __default_value_(__default_value), 59 __table_(__sz, __hash, __pred) {} 60 insert(const key_type & __key,value_type __val)61 _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { 62 __table_[__key] = __val; 63 } 64 65 _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { 66 auto __it = __table_.find(__key); 67 return __it == __table_.end() ? __default_value_ : __it->second; 68 } 69 }; 70 71 // Special case small numeric values; use an array 72 template <class _Key, 73 class _Value, 74 class _Hash, 75 class _BinaryPredicate> 76 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { 77 private: 78 using value_type = _Value; 79 using key_type = _Key; 80 81 using unsigned_key_type = make_unsigned_t<key_type>; 82 std::array<value_type, 256> __table_; 83 static_assert(numeric_limits<unsigned_key_type>::max() < 256); 84 85 public: _BMSkipTable(size_t,value_type __default_value,_Hash,_BinaryPredicate)86 _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { 87 std::fill_n(__table_.data(), __table_.size(), __default_value); 88 } 89 insert(key_type __key,value_type __val)90 _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { 91 __table_[static_cast<unsigned_key_type>(__key)] = __val; 92 } 93 94 _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { 95 return __table_[static_cast<unsigned_key_type>(__key)]; 96 } 97 }; 98 99 template <class _RandomAccessIterator1, 100 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 101 class _BinaryPredicate = equal_to<>> 102 class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher { 103 private: 104 using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; 105 using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; 106 using __skip_table_type = _BMSkipTable<value_type, 107 difference_type, 108 _Hash, 109 _BinaryPredicate, 110 is_integral_v<value_type> 111 && sizeof(value_type) == 1 112 && is_same_v<_Hash, hash<value_type>> 113 && is_same_v<_BinaryPredicate, equal_to<>>>; 114 115 public: 116 _LIBCPP_HIDE_FROM_ABI 117 boyer_moore_searcher(_RandomAccessIterator1 __first, 118 _RandomAccessIterator1 __last, 119 _Hash __hash = _Hash(), 120 _BinaryPredicate __pred = _BinaryPredicate()) __first_(__first)121 : __first_(__first), 122 __last_(__last), 123 __pred_(__pred), 124 __pattern_length_(__last - __first), 125 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), 126 __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( 127 allocator<difference_type>(), __pattern_length_ + 1)) { 128 difference_type __i = 0; 129 while (__first != __last) { 130 __skip_table_->insert(*__first, __i); 131 ++__first; 132 ++__i; 133 } 134 __build_suffix_table(__first_, __last_, __pred_); 135 } 136 137 template <class _RandomAccessIterator2> 138 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> operator()139 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 140 static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type, 141 typename iterator_traits<_RandomAccessIterator2>::value_type>::value, 142 "Corpus and Pattern iterators must point to the same type"); 143 if (__first == __last) 144 return std::make_pair(__last, __last); 145 if (__first_ == __last_) 146 return std::make_pair(__first, __first); 147 148 if (__pattern_length_ > (__last - __first)) 149 return std::make_pair(__last, __last); 150 return __search(__first, __last); 151 } 152 153 private: 154 _RandomAccessIterator1 __first_; 155 _RandomAccessIterator1 __last_; 156 _BinaryPredicate __pred_; 157 difference_type __pattern_length_; 158 shared_ptr<__skip_table_type> __skip_table_; 159 shared_ptr<difference_type[]> __suffix_; 160 161 template <class _RandomAccessIterator2> 162 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> __search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)163 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 164 _RandomAccessIterator2 __current = __f; 165 const _RandomAccessIterator2 __last = __l - __pattern_length_; 166 const __skip_table_type& __skip_table = *__skip_table_; 167 168 while (__current <= __last) { 169 difference_type __j = __pattern_length_; 170 while (__pred_(__first_[__j - 1], __current[__j - 1])) { 171 --__j; 172 if (__j == 0) 173 return std::make_pair(__current, __current + __pattern_length_); 174 } 175 176 difference_type __k = __skip_table[__current[__j - 1]]; 177 difference_type __m = __j - __k - 1; 178 if (__k < __j && __m > __suffix_[__j]) 179 __current += __m; 180 else 181 __current += __suffix_[__j]; 182 } 183 return std::make_pair(__l, __l); 184 } 185 186 template <class _Iterator, class _Container> 187 _LIBCPP_HIDE_FROM_ABI void __compute_bm_prefix(_Iterator __first,_Iterator __last,_BinaryPredicate __pred,_Container & __prefix)188 __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { 189 const size_t __count = __last - __first; 190 191 __prefix[0] = 0; 192 size_t __k = 0; 193 194 for (size_t __i = 1; __i != __count; ++__i) { 195 while (__k > 0 && !__pred(__first[__k], __first[__i])) 196 __k = __prefix[__k - 1]; 197 198 if (__pred(__first[__k], __first[__i])) 199 ++__k; 200 __prefix[__i] = __k; 201 } 202 } 203 204 _LIBCPP_HIDE_FROM_ABI void __build_suffix_table(_RandomAccessIterator1 __first,_RandomAccessIterator1 __last,_BinaryPredicate __pred)205 __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { 206 const size_t __count = __last - __first; 207 208 if (__count == 0) 209 return; 210 211 vector<difference_type> __scratch(__count); 212 213 __compute_bm_prefix(__first, __last, __pred, __scratch); 214 for (size_t __i = 0; __i <= __count; ++__i) 215 __suffix_[__i] = __count - __scratch[__count - 1]; 216 217 using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; 218 __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); 219 220 for (size_t __i = 0; __i != __count; ++__i) { 221 const size_t __j = __count - __scratch[__i]; 222 const difference_type __k = __i - __scratch[__i] + 1; 223 224 if (__suffix_[__j] > __k) 225 __suffix_[__j] = __k; 226 } 227 } 228 }; 229 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); 230 231 template <class _RandomAccessIterator1, 232 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 233 class _BinaryPredicate = equal_to<>> 234 class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher { 235 private: 236 using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; 237 using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; 238 using __skip_table_type = _BMSkipTable<value_type, 239 difference_type, 240 _Hash, 241 _BinaryPredicate, 242 is_integral_v<value_type> 243 && sizeof(value_type) == 1 244 && is_same_v<_Hash, hash<value_type>> 245 && is_same_v<_BinaryPredicate, equal_to<>>>; 246 public: 247 _LIBCPP_HIDE_FROM_ABI 248 boyer_moore_horspool_searcher(_RandomAccessIterator1 __first, 249 _RandomAccessIterator1 __last, 250 _Hash __hash = _Hash(), 251 _BinaryPredicate __pred = _BinaryPredicate()) __first_(__first)252 : __first_(__first), 253 __last_(__last), 254 __pred_(__pred), 255 __pattern_length_(__last - __first), 256 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { 257 if (__first == __last) 258 return; 259 --__last; 260 difference_type __i = 0; 261 while (__first != __last) { 262 __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); 263 ++__first; 264 ++__i; 265 } 266 } 267 268 template <class _RandomAccessIterator2> 269 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> operator()270 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 271 static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type, 272 typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, 273 "Corpus and Pattern iterators must point to the same type"); 274 if (__first == __last) 275 return std::make_pair(__last, __last); 276 if (__first_ == __last_) 277 return std::make_pair(__first, __first); 278 279 if (__pattern_length_ > __last - __first) 280 return std::make_pair(__last, __last); 281 282 return __search(__first, __last); 283 } 284 285 private: 286 _RandomAccessIterator1 __first_; 287 _RandomAccessIterator1 __last_; 288 _BinaryPredicate __pred_; 289 difference_type __pattern_length_; 290 shared_ptr<__skip_table_type> __skip_table_; 291 292 template <class _RandomAccessIterator2> 293 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> __search(_RandomAccessIterator2 __f,_RandomAccessIterator2 __l)294 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 295 _RandomAccessIterator2 __current = __f; 296 const _RandomAccessIterator2 __last = __l - __pattern_length_; 297 const __skip_table_type& __skip_table = *__skip_table_; 298 299 while (__current <= __last) { 300 difference_type __j = __pattern_length_; 301 while (__pred_(__first_[__j - 1], __current[__j - 1])) { 302 --__j; 303 if (__j == 0) 304 return std::make_pair(__current, __current + __pattern_length_); 305 } 306 __current += __skip_table[__current[__pattern_length_ - 1]]; 307 } 308 return std::make_pair(__l, __l); 309 } 310 }; 311 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); 312 313 _LIBCPP_END_NAMESPACE_STD 314 315 _LIBCPP_POP_MACROS 316 317 #endif // _LIBCPP_STD_VER >= 17 318 319 #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 320