1 /*============================================================================= 2 Copyright (c) 2001-2003 Joel de Guzman 3 Copyright (c) 2011 Daniel James 4 http://spirit.sourceforge.net/ 5 6 Use, modification and distribution is subject to the Boost Software 7 License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at 8 http://www.boost.org/LICENSE_1_0.txt) 9 =============================================================================*/ 10 #ifndef QUICKBOOK_SYMBOLS_IPP 11 #define QUICKBOOK_SYMBOLS_IPP 12 13 /////////////////////////////////////////////////////////////////////////////// 14 15 #include <boost/intrusive_ptr.hpp> 16 #include <boost/scoped_ptr.hpp> 17 #include <boost/spirit/home/classic/symbols.hpp> 18 19 /////////////////////////////////////////////////////////////////////////////// 20 namespace quickbook 21 { 22 23 /////////////////////////////////////////////////////////////////////////////// 24 // 25 // tst class 26 // 27 // This it the Ternary Search Tree from 28 // <boost/spirit/home/classic/symbols/impl/tst.ipp> adapted to be cheap 29 // to copy. 30 // 31 // Ternary Search Tree implementation. The data structure is faster 32 // than 33 // hashing for many typical search problems especially when the search 34 // interface is iterator based. Searching for a string of length k in a 35 // ternary search tree with n strings will require at most O(log n+k) 36 // character comparisons. TSTs are many times faster than hash tables 37 // for unsuccessful searches since mismatches are discovered earlier 38 // after examining only a few characters. Hash tables always examine an 39 // entire key when searching. 40 // 41 // For details see http://www.cs.princeton.edu/~rs/strings/. 42 // 43 // *** This is a low level class and is 44 // not meant for public consumption *** 45 // 46 /////////////////////////////////////////////////////////////////////////////// 47 48 template <typename T, typename CharT> struct tst_node 49 { tst_nodequickbook::tst_node50 tst_node(CharT value_) 51 : reference_count(0) 52 , left() 53 , middle() 54 , right() 55 , data() 56 , value(value_) 57 { 58 } 59 tst_nodequickbook::tst_node60 tst_node(tst_node const& other) 61 : reference_count(0) 62 , left(other.left) 63 , middle(other.middle) 64 , right(other.right) 65 , data(other.data ? new T(*other.data) : 0) 66 , value(other.value) 67 { 68 } 69 70 // If you fancy a slight improvement in memory use, 71 // reference_count + value could probably be packed 72 // in the space for a single int. 73 int reference_count; 74 boost::intrusive_ptr<tst_node> left; 75 boost::intrusive_ptr<tst_node> middle; 76 boost::intrusive_ptr<tst_node> right; 77 boost::scoped_ptr<T> data; 78 CharT value; 79 80 private: 81 tst_node& operator=(tst_node const&); 82 }; 83 84 template <typename T, typename CharT> intrusive_ptr_add_ref(tst_node<T,CharT> * ptr)85 void intrusive_ptr_add_ref(tst_node<T, CharT>* ptr) 86 { 87 ++ptr->reference_count; 88 } 89 90 template <typename T, typename CharT> intrusive_ptr_release(tst_node<T,CharT> * ptr)91 void intrusive_ptr_release(tst_node<T, CharT>* ptr) 92 { 93 if (--ptr->reference_count == 0) delete ptr; 94 } 95 96 /////////////////////////////////////////////////////////////////////////// 97 template <typename T, typename CharT> class tst 98 { 99 typedef tst_node<T, CharT> node_t; 100 typedef boost::intrusive_ptr<node_t> node_ptr; 101 node_ptr root; 102 103 public: 104 struct search_info 105 { 106 T* data; 107 std::size_t length; 108 }; 109 swap(tst & other)110 void swap(tst& other) { root.swap(other.root); } 111 112 // Adds symbol to ternary search tree. 113 // If it already exists, then replace it with new value. 114 // 115 // pre: first != last 116 template <typename IteratorT> add(IteratorT first,IteratorT const & last,T const & data)117 T* add(IteratorT first, IteratorT const& last, T const& data) 118 { 119 assert(first != last); 120 121 node_ptr* np = &root; 122 CharT ch = *first; 123 124 for (;;) { 125 if (!*np) { 126 *np = new node_t(ch); 127 } 128 else if ((*np)->reference_count > 1) { 129 *np = new node_t(**np); 130 } 131 132 if (ch < (*np)->value) { 133 np = &(*np)->left; 134 } 135 else if (ch == (*np)->value) { 136 ++first; 137 if (first == last) break; 138 ch = *first; 139 np = &(*np)->middle; 140 } 141 else { 142 np = &(*np)->right; 143 } 144 } 145 146 boost::scoped_ptr<T> new_data(new T(data)); 147 boost::swap((*np)->data, new_data); 148 return (*np)->data.get(); 149 } 150 151 template <typename ScannerT> find(ScannerT const & scan) const152 search_info find(ScannerT const& scan) const 153 { 154 search_info result = {0, 0}; 155 if (scan.at_end()) { 156 return result; 157 } 158 159 typedef typename ScannerT::iterator_t iterator_t; 160 node_ptr np = root; 161 CharT ch = *scan; 162 iterator_t latest = scan.first; 163 std::size_t length = 0; 164 165 while (np) { 166 if (ch < np->value) // => go left! 167 { 168 np = np->left; 169 } 170 else if (ch == np->value) // => go middle! 171 { 172 ++scan; 173 ++length; 174 175 // Found a potential match. 176 if (np->data.get()) { 177 result.data = np->data.get(); 178 result.length = length; 179 latest = scan.first; 180 } 181 182 if (scan.at_end()) break; 183 ch = *scan; 184 np = np->middle; 185 } 186 else // (ch > np->value) => go right! 187 { 188 np = np->right; 189 } 190 } 191 192 scan.first = latest; 193 return result; 194 } 195 }; 196 197 typedef boost::spirit::classic:: 198 symbols<std::string, char, quickbook::tst<std::string, char> > 199 string_symbols; 200 } // namespace quickbook 201 202 #endif 203