1 /////////////////////////////////////////////////////////////////////////////// 2 /// \file boyer_moore.hpp 3 /// Contains the boyer-moore implementation. Note: this is *not* a general- 4 /// purpose boyer-moore implementation. It truncates the search string at 5 /// 256 characters, but it is sufficient for the needs of xpressive. 6 // 7 // Copyright 2008 Eric Niebler. Distributed under the Boost 8 // Software License, Version 1.0. (See accompanying file 9 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 10 11 #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 12 #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005 13 14 // MS compatible compilers support #pragma once 15 #if defined(_MSC_VER) 16 # pragma once 17 # pragma warning(push) 18 # pragma warning(disable : 4100) // unreferenced formal parameter 19 #endif 20 21 #include <climits> // for UCHAR_MAX 22 #include <cstddef> // for std::ptrdiff_t 23 #include <utility> // for std::max 24 #include <vector> 25 #include <boost/mpl/bool.hpp> 26 #include <boost/noncopyable.hpp> 27 #include <boost/iterator/iterator_traits.hpp> 28 #include <boost/type_traits/is_convertible.hpp> 29 #include <boost/xpressive/detail/detail_fwd.hpp> 30 31 namespace boost { namespace xpressive { namespace detail 32 { 33 34 /////////////////////////////////////////////////////////////////////////////// 35 // boyer_moore 36 // 37 template<typename BidiIter, typename Traits> 38 struct boyer_moore 39 : noncopyable 40 { 41 typedef typename iterator_value<BidiIter>::type char_type; 42 typedef Traits traits_type; 43 typedef has_fold_case<Traits> case_fold; 44 typedef typename Traits::string_type string_type; 45 46 // initialize the Boyer-Moore search data structure, using the 47 // search sub-sequence to prime the pump. boyer_mooreboost::xpressive::detail::boyer_moore48 boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase) 49 : begin_(begin) 50 , last_(begin) 51 , fold_() 52 , find_fun_( 53 icase 54 ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_) 55 : &boyer_moore::find_ 56 ) 57 { 58 std::ptrdiff_t const uchar_max = UCHAR_MAX; 59 std::ptrdiff_t diff = std::distance(begin, end); 60 this->length_ = static_cast<unsigned char>((std::min)(diff, uchar_max)); 61 std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_); 62 --this->length_; 63 64 icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_()); 65 } 66 findboost::xpressive::detail::boyer_moore67 BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const 68 { 69 return (this->*this->find_fun_)(begin, end, tr); 70 } 71 72 private: 73 init_boost::xpressive::detail::boyer_moore74 void init_(Traits const &tr, mpl::false_) 75 { 76 for(unsigned char offset = this->length_; offset; --offset, ++this->last_) 77 { 78 this->offsets_[tr.hash(*this->last_)] = offset; 79 } 80 } 81 init_boost::xpressive::detail::boyer_moore82 void init_(Traits const &tr, mpl::true_) 83 { 84 this->fold_.reserve(this->length_ + 1); 85 for(unsigned char offset = this->length_; offset; --offset, ++this->last_) 86 { 87 this->fold_.push_back(tr.fold_case(*this->last_)); 88 for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end(); 89 beg != end; ++beg) 90 { 91 this->offsets_[tr.hash(*beg)] = offset; 92 } 93 } 94 this->fold_.push_back(tr.fold_case(*this->last_)); 95 } 96 97 // case-sensitive Boyer-Moore search find_boost::xpressive::detail::boyer_moore98 BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const 99 { 100 typedef typename boost::iterator_difference<BidiIter>::type diff_type; 101 diff_type const endpos = std::distance(begin, end); 102 diff_type offset = static_cast<diff_type>(this->length_); 103 104 for(diff_type curpos = offset; curpos < endpos; curpos += offset) 105 { 106 std::advance(begin, offset); 107 108 char_type const *pat_tmp = this->last_; 109 BidiIter str_tmp = begin; 110 111 for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) 112 { 113 if(pat_tmp == this->begin_) 114 { 115 return str_tmp; 116 } 117 } 118 119 offset = this->offsets_[tr.hash(tr.translate(*begin))]; 120 } 121 122 return end; 123 } 124 125 // case-insensitive Boyer-Moore search find_nocase_boost::xpressive::detail::boyer_moore126 BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const 127 { 128 typedef typename boost::iterator_difference<BidiIter>::type diff_type; 129 diff_type const endpos = std::distance(begin, end); 130 diff_type offset = static_cast<diff_type>(this->length_); 131 132 for(diff_type curpos = offset; curpos < endpos; curpos += offset) 133 { 134 std::advance(begin, offset); 135 136 char_type const *pat_tmp = this->last_; 137 BidiIter str_tmp = begin; 138 139 for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp) 140 { 141 if(pat_tmp == this->begin_) 142 { 143 return str_tmp; 144 } 145 } 146 147 offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))]; 148 } 149 150 return end; 151 } 152 153 // case-insensitive Boyer-Moore search with case-folding find_nocase_fold_boost::xpressive::detail::boyer_moore154 BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const 155 { 156 typedef typename boost::iterator_difference<BidiIter>::type diff_type; 157 diff_type const endpos = std::distance(begin, end); 158 diff_type offset = static_cast<diff_type>(this->length_); 159 160 for(diff_type curpos = offset; curpos < endpos; curpos += offset) 161 { 162 std::advance(begin, offset); 163 164 string_type const *pat_tmp = &this->fold_.back(); 165 BidiIter str_tmp = begin; 166 167 for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp); 168 --pat_tmp, --str_tmp) 169 { 170 if(pat_tmp == &this->fold_.front()) 171 { 172 return str_tmp; 173 } 174 } 175 176 offset = this->offsets_[tr.hash(*begin)]; 177 } 178 179 return end; 180 } 181 182 private: 183 184 char_type const *begin_; 185 char_type const *last_; 186 std::vector<string_type> fold_; 187 BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const; 188 unsigned char length_; 189 unsigned char offsets_[UCHAR_MAX + 1]; 190 }; 191 192 }}} // namespace boost::xpressive::detail 193 194 #if defined(_MSC_VER) 195 # pragma warning(pop) 196 #endif 197 198 #endif 199