1 #ifndef ANTLR3COLLECTIONS_HPP 2 #define ANTLR3COLLECTIONS_HPP 3 4 // [The "BSD licence"] 5 // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB 6 7 // 8 // All rights reserved. 9 // 10 // Redistribution and use in source and binary forms, with or without 11 // modification, are permitted provided that the following conditions 12 // are met: 13 // 1. Redistributions of source code must retain the above copyright 14 // notice, this list of conditions and the following disclaimer. 15 // 2. Redistributions in binary form must reproduce the above copyright 16 // notice, this list of conditions and the following disclaimer in the 17 // documentation and/or other materials provided with the distribution. 18 // 3. The name of the author may not be used to endorse or promote products 19 // derived from this software without specific prior written permission. 20 // 21 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 32 #include "antlr3defs.hpp" 33 34 ANTLR_BEGIN_NAMESPACE() 35 36 /* -------------- TRIE Interfaces ---------------- */ 37 38 /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE 39 */ 40 template< class ImplTraits, class DataType > 41 class TrieEntry : public ImplTraits::AllocPolicyType 42 { 43 public: 44 typedef typename ImplTraits::AllocPolicyType AllocPolicy; 45 46 private: 47 DataType m_data; 48 TrieEntry* m_next; /* Allows duplicate entries for same key in insertion order */ 49 50 public: 51 TrieEntry(const DataType& data, TrieEntry* next); 52 DataType& get_data(); 53 const DataType& get_data() const; 54 TrieEntry* get_next() const; 55 void set_next( TrieEntry* next ); 56 }; 57 58 /** Structure that defines an element/node in an ANTLR_INT_TRIE 59 */ 60 template< class ImplTraits, class DataType > 61 class IntTrieNode : public ImplTraits::AllocPolicyType 62 { 63 public: 64 typedef TrieEntry<ImplTraits, DataType> TrieEntryType; 65 typedef TrieEntryType BucketsType; 66 67 private: 68 ANTLR_UINT32 m_bitNum; /**< This is the left/right bit index for traversal along the nodes */ 69 ANTLR_INTKEY m_key; /**< This is the actual key that the entry represents if it is a terminal node */ 70 BucketsType* m_buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */ 71 IntTrieNode* m_leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */ 72 IntTrieNode* m_rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */ 73 74 public: 75 IntTrieNode(); 76 ~IntTrieNode(); 77 78 ANTLR_UINT32 get_bitNum() const; 79 ANTLR_INTKEY get_key() const; 80 BucketsType* get_buckets() const; 81 IntTrieNode* get_leftN() const; 82 IntTrieNode* get_rightN() const; 83 void set_bitNum( ANTLR_UINT32 bitNum ); 84 void set_key( ANTLR_INTKEY key ); 85 void set_buckets( BucketsType* buckets ); 86 void set_leftN( IntTrieNode* leftN ); 87 void set_rightN( IntTrieNode* rightN ); 88 }; 89 90 /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation, 91 * as you might expect, the key is turned into a "string" by looking at bit(key, depth) 92 * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63) 93 * and potentially a huge trie. This is the algorithm for a Patricia Trie. 94 * Note also that this trie [can] accept multiple entries for the same key and is 95 * therefore a kind of elastic bucket patricia trie. 96 * 97 * If you find this code useful, please feel free to 'steal' it for any purpose 98 * as covered by the BSD license under which ANTLR is issued. You can cut the code 99 * but as the ANTLR library is only about 50K (Windows Vista), you might find it 100 * easier to just link the library. Please keep all comments and licenses and so on 101 * in any version of this you create of course. 102 * 103 * Jim Idle. 104 * 105 */ 106 class IntTrieBase 107 { 108 public: 109 static const ANTLR_UINT8* get_bitIndex(); 110 static const ANTLR_UINT64* get_bitMask(); 111 }; 112 113 template< class ImplTraits, class DataType > 114 class IntTrie : public ImplTraits::AllocPolicyType, public IntTrieBase 115 { 116 public: 117 typedef TrieEntry<ImplTraits, DataType> TrieEntryType; 118 typedef IntTrieNode<ImplTraits, DataType> IntTrieNodeType; 119 120 private: 121 IntTrieNodeType* m_root; /* Root node of this integer trie */ 122 IntTrieNodeType* m_current; /* Used to traverse the TRIE with the next() method */ 123 ANTLR_UINT32 m_count; /* Current entry count */ 124 bool m_allowDups; /* Whether this trie accepts duplicate keys */ 125 126 public: 127 /* INT TRIE Implementation of depth 64 bits, being the number of bits 128 * in a 64 bit integer. 129 */ 130 IntTrie( ANTLR_UINT32 depth ); 131 132 /** Search the int Trie and return a pointer to the first bucket indexed 133 * by the key if it is contained in the trie, otherwise NULL. 134 */ 135 TrieEntryType* get( ANTLR_INTKEY key); 136 bool del( ANTLR_INTKEY key); 137 138 /** Add an entry into the INT trie. 139 * Basically we descend the trie as we do when searching it, which will 140 * locate the only node in the trie that can be reached by the bit pattern of the 141 * key. If the key is actually at that node, then if the trie accepts duplicates 142 * we add the supplied data in a new chained bucket to that data node. If it does 143 * not accept duplicates then we merely return FALSE in case the caller wants to know 144 * whether the key was already in the trie. 145 * If the node we locate is not the key we are looking to add, then we insert a new node 146 * into the trie with a bit index of the leftmost differing bit and the left or right 147 * node pointing to itself or the data node we are inserting 'before'. 148 */ 149 bool add( ANTLR_INTKEY key, const DataType& data ); 150 ~IntTrie(); 151 }; 152 153 /** 154 * A topological sort system that given a set of dependencies of a node m on node n, 155 * can sort them in dependency order. This is a generally useful utility object 156 * that does not care what the things are it is sorting. Generally the set 157 * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR. 158 * I have provided a sort method that given ANTLR3_VECTOR as an input will sort 159 * the vector entries in place, as well as a sort method that just returns an 160 * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but 161 * some set of your own device. 162 * 163 * Of the two main algorithms that could be used, I chose to use the depth first 164 * search for unvisited nodes as a) This runs in linear time, and b) it is what 165 * we used in the ANTLR Tool to perform a topological sort of the input grammar files 166 * based on their dependencies. 167 */ 168 template<class ImplTraits> 169 class Topo : public ImplTraits::AllocPolicyType 170 { 171 public: 172 typedef typename ImplTraits::BitsetType BitsetType; 173 typedef typename ImplTraits::AllocPolicyType AllocPolicyType; 174 175 private: 176 /** 177 * A vector of vectors of edges, built by calling the addEdge method() 178 * to indicate that node number n depends on node number m. Each entry in the vector 179 * contains a bitset, which has a bit index set for each node upon which the 180 * entry node depends. 181 */ 182 BitsetType** m_edges; 183 184 /** 185 * A vector used to build up the sorted output order. Note that 186 * as the vector contains UINT32 then the maximum node index is 187 * 'limited' to 2^32, as nodes should be zero based. 188 */ 189 ANTLR_UINT32* m_sorted; 190 191 /** 192 * A vector used to detect cycles in the edge dependecies. It is used 193 * as a stack and each time we descend a node to one of its edges we 194 * add the node into this stack. If we find a node that we have already 195 * visited in the stack, then it means there wasa cycle such as 9->8->1->9 196 * as the only way a node can be on the stack is if we are currently 197 * descnding from it as we remove it from the stack as we exit from 198 * descending its dependencies 199 */ 200 ANTLR_UINT32* m_cycle; 201 202 /** 203 * A flag that indicates the algorithm found a cycle in the edges 204 * such as 9->8->1->9 205 * If this flag is set after you have called one of the sort routines 206 * then the detected cycle will be contained in the cycle array and 207 * cycleLimit will point to the one after the last entry in the cycle. 208 */ 209 bool m_hasCycle; 210 211 /** 212 * A watermark used to accumulate potential cycles in the cycle array. 213 * This should be zero when we are done. Check hasCycle after calling one 214 * of the sort methods and if it is true then you can find the cycle 215 * in cycle[0]...cycle[cycleMark-1] 216 */ 217 ANTLR_UINT32 m_cycleMark; 218 219 /** 220 * One more than the largest node index that is contained in edges/sorted. 221 */ 222 ANTLR_UINT32 m_limit; 223 224 /** 225 * The set of visited nodes as determined by a set entry in 226 * the bitmap. 227 */ 228 BitsetType* m_visited; 229 230 public: 231 Topo(); 232 /** 233 * A method that adds an edge from one node to another. An edge 234 * of n -> m indicates that node n is dependent on node m. Note that 235 * while building these edges, it is perfectly OK to add nodes out of 236 * sequence. So, if you have edges: 237 * 238 * 3 -> 0 239 * 2 -> 1 240 * 1 -> 3 241 * 242 * The you can add them in that order and so add node 3 before nodes 2 and 1 243 * 244 */ 245 void addEdge(ANTLR_UINT32 edge, ANTLR_UINT32 dependency); 246 247 248 /** 249 * A method that returns a pointer to an array of sorted node indexes. 250 * The array is sorted in topological sorted order. Note that the array 251 * is only as large as the largest node index you created an edge for. This means 252 * that if you had an input of 32 nodes, but that largest node with an edge 253 * was 16, then the returned array will be the sorted order of the first 16 254 * nodes and the last 16 nodes of your array are basically fine as they are 255 * as they had no dependencies and do not need any particular sort order. 256 * 257 * NB: If the structure that contains the array is freed, then the sorted 258 * array will be freed too so you should use the value of limit to 259 * make a long term copy of this array if you do not want to keep the topo 260 * structure around as well. 261 */ 262 ANTLR_UINT32* sortToArray(); 263 264 /** 265 * A method that sorts the supplied ANTLR3_VECTOR in place based 266 * on the previously supplied edge data. 267 */ 268 template<typename DataType> 269 void sortVector( typename ImplTraits::template VectorType<DataType>& v); 270 271 void DFS(ANTLR_UINT32 node); 272 273 /** 274 * A method to free this structure and any associated memory. 275 */ 276 ~Topo(); 277 }; 278 279 ANTLR_END_NAMESPACE() 280 281 #include "antlr3collections.inl" 282 283 #endif 284 285 286