• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 Adam Barth. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #ifndef SuffixTree_h
27 #define SuffixTree_h
28 
29 #include "PlatformString.h"
30 #include <wtf/Vector.h>
31 
32 namespace WebCore {
33 
34 class UnicodeCodebook {
35 public:
codeWord(UChar c)36     static int codeWord(UChar c) { return c; }
37     enum { codeSize = 1 << 8 * sizeof(UChar) };
38 };
39 
40 class ASCIICodebook {
41 public:
codeWord(UChar c)42     static int codeWord(UChar c) { return c & (codeSize - 1); }
43     enum { codeSize = 1 << (8 * sizeof(char) - 1) };
44 };
45 
46 template<typename Codebook>
47 class SuffixTree {
48 public:
SuffixTree(const String & text,unsigned depth)49     SuffixTree(const String& text, unsigned depth)
50         : m_depth(depth)
51         , m_leaf(true)
52     {
53         build(text);
54     }
55 
mightContain(const String & query)56     bool mightContain(const String& query)
57     {
58         Node* current = &m_root;
59         int limit = std::min(m_depth, query.length());
60         for (int i = 0; i < limit; ++i) {
61             current = current->at(Codebook::codeWord(query[i]));
62             if (!current)
63                 return false;
64         }
65         return true;
66     }
67 
68 private:
69     class Node {
70     public:
71         Node(bool isLeaf = false)
72         {
73             m_children.resize(Codebook::codeSize);
74             m_children.fill(0);
75             m_isLeaf = isLeaf;
76         }
77 
~Node()78         ~Node()
79         {
80             for (unsigned i = 0; i < m_children.size(); ++i) {
81                 Node* child = m_children.at(i);
82                 if (child && !child->m_isLeaf)
83                     delete child;
84             }
85         }
86 
at(int codeWord)87         Node*& at(int codeWord) { return m_children.at(codeWord); }
88 
89     private:
90         typedef Vector<Node*, Codebook::codeSize> ChildrenVector;
91 
92         ChildrenVector m_children;
93         bool m_isLeaf;
94     };
95 
build(const String & text)96     void build(const String& text)
97     {
98         for (unsigned base = 0; base < text.length(); ++base) {
99             Node* current = &m_root;
100             unsigned limit = std::min(base + m_depth, text.length());
101             for (unsigned offset = 0; base + offset < limit; ++offset) {
102                 ASSERT(current != &m_leaf);
103                 Node*& child = current->at(Codebook::codeWord(text[base + offset]));
104                 if (!child)
105                     child = base + offset + 1 == limit ? &m_leaf : new Node();
106                 current = child;
107             }
108         }
109     }
110 
111     Node m_root;
112     unsigned m_depth;
113 
114     // Instead of allocating a fresh empty leaf node for ever leaf in the tree
115     // (there can be a lot of these), we alias all the leaves to this "static"
116     // leaf node.
117     Node m_leaf;
118 };
119 
120 } // namespace WebCore
121 
122 #endif // SuffixTree_h
123