• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // pdt.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Common classes for PDT expansion/traversal.
20 
21 #ifndef FST_EXTENSIONS_PDT_PDT_H__
22 #define FST_EXTENSIONS_PDT_PDT_H__
23 
24 #include <unordered_map>
25 using std::tr1::unordered_map;
26 using std::tr1::unordered_multimap;
27 #include <map>
28 #include <set>
29 
30 #include <fst/state-table.h>
31 #include <fst/fst.h>
32 
33 namespace fst {
34 
35 // Provides bijection between parenthesis stacks and signed integral
36 // stack IDs. Each stack ID is unique to each distinct stack.  The
37 // open-close parenthesis label pairs are passed in 'parens'.
38 template <typename K, typename L>
39 class PdtStack {
40  public:
41   typedef K StackId;
42   typedef L Label;
43 
44   // The stacks are stored in a tree. The nodes are stored in vector
45   // 'nodes_'. Each node represents the top of some stack and is
46   // ID'ed by its position in the vector. Its parent node represents
47   // the stack with the top 'popped' and its children are stored in
48   // 'child_map_' accessed by stack_id and label. The paren_id is
49   // the position in 'parens' of the parenthesis for that node.
50   struct StackNode {
51     StackId parent_id;
52     size_t paren_id;
53 
StackNodeStackNode54     StackNode(StackId p, size_t i) : parent_id(p), paren_id(i) {}
55   };
56 
PdtStack(const vector<pair<Label,Label>> & parens)57   PdtStack(const vector<pair<Label, Label> > &parens)
58       : parens_(parens), min_paren_(kNoLabel), max_paren_(kNoLabel) {
59     for (size_t i = 0; i < parens.size(); ++i) {
60       const pair<Label, Label>  &p = parens[i];
61       paren_map_[p.first] = i;
62       paren_map_[p.second] = i;
63 
64       if (min_paren_ == kNoLabel || p.first < min_paren_)
65         min_paren_ = p.first;
66       if (p.second < min_paren_)
67         min_paren_ = p.second;
68 
69       if (max_paren_ == kNoLabel || p.first > max_paren_)
70         max_paren_ = p.first;
71       if (p.second > max_paren_)
72         max_paren_ = p.second;
73     }
74     nodes_.push_back(StackNode(-1, -1));  // Tree root.
75   }
76 
77   // Returns stack ID given the current stack ID (0 if empty) and
78   // label read. 'Pushes' onto a stack if the label is an open
79   // parenthesis, returning the new stack ID. 'Pops' the stack if the
80   // label is a close parenthesis that matches the top of the stack,
81   // returning the parent stack ID. Returns -1 if label is an
82   // unmatched close parenthesis. Otherwise, returns the current stack
83   // ID.
Find(StackId stack_id,Label label)84   StackId Find(StackId stack_id, Label label) {
85     if (min_paren_ == kNoLabel || label < min_paren_ || label > max_paren_)
86       return stack_id;                       // Non-paren.
87 
88     typename unordered_map<Label, size_t>::const_iterator pit
89         = paren_map_.find(label);
90     if (pit == paren_map_.end())             // Non-paren.
91       return stack_id;
92     ssize_t paren_id = pit->second;
93 
94     if (label == parens_[paren_id].first) {  // Open paren.
95       StackId &child_id = child_map_[make_pair(stack_id, label)];
96       if (child_id == 0) {                   // Child not found, push label.
97         child_id = nodes_.size();
98         nodes_.push_back(StackNode(stack_id, paren_id));
99       }
100       return child_id;
101     }
102 
103     const StackNode &node = nodes_[stack_id];
104     if (paren_id == node.paren_id)           // Matching close paren.
105       return node.parent_id;
106 
107     return -1;                               // Non-matching close paren.
108   }
109 
110   // Returns the stack ID obtained by "popping" the label at the top
111   // of the current stack ID.
Pop(StackId stack_id)112   StackId Pop(StackId stack_id) const {
113     return nodes_[stack_id].parent_id;
114   }
115 
116   // Returns the paren ID at the top of the stack for 'stack_id'
Top(StackId stack_id)117   ssize_t Top(StackId stack_id) const {
118     return nodes_[stack_id].paren_id;
119   }
120 
ParenId(Label label)121   ssize_t ParenId(Label label) const {
122     typename unordered_map<Label, size_t>::const_iterator pit
123         = paren_map_.find(label);
124     if (pit == paren_map_.end())  // Non-paren.
125       return -1;
126     return pit->second;
127   }
128 
129  private:
130   struct ChildHash {
operatorChildHash131     size_t operator()(const pair<StackId, Label> &p) const {
132       return p.first + p.second * kPrime;
133     }
134   };
135 
136   static const size_t kPrime;
137 
138   vector<pair<Label, Label> > parens_;
139   vector<StackNode> nodes_;
140   unordered_map<Label, size_t> paren_map_;
141   unordered_map<pair<StackId, Label>,
142            StackId, ChildHash> child_map_;   // Child of stack node wrt label
143   Label min_paren_;                          // For faster paren. check
144   Label max_paren_;                          // For faster paren. check
145 };
146 
147 template <typename T, typename L>
148 const size_t PdtStack<T, L>::kPrime = 7853;
149 
150 
151 // State tuple for PDT expansion
152 template <typename S, typename K>
153 struct PdtStateTuple {
154   typedef S StateId;
155   typedef K StackId;
156 
157   StateId state_id;
158   StackId stack_id;
159 
PdtStateTuplePdtStateTuple160   PdtStateTuple()
161       : state_id(kNoStateId), stack_id(-1) {}
162 
PdtStateTuplePdtStateTuple163   PdtStateTuple(StateId fs, StackId ss)
164       : state_id(fs), stack_id(ss) {}
165 };
166 
167 // Equality of PDT state tuples.
168 template <typename S, typename K>
169 inline bool operator==(const PdtStateTuple<S, K>& x,
170                        const PdtStateTuple<S, K>& y) {
171   if (&x == &y)
172     return true;
173   return x.state_id == y.state_id && x.stack_id == y.stack_id;
174 }
175 
176 
177 // Hash function object for PDT state tuples
178 template <class T>
179 class PdtStateHash {
180  public:
operator()181   size_t operator()(const T &tuple) const {
182     return tuple.state_id + tuple.stack_id * kPrime;
183   }
184 
185  private:
186   static const size_t kPrime;
187 };
188 
189 template <typename T>
190 const size_t PdtStateHash<T>::kPrime = 7853;
191 
192 
193 // Tuple to PDT state bijection.
194 template <class S, class K>
195 class PdtStateTable
196     : public CompactHashStateTable<PdtStateTuple<S, K>,
197                                    PdtStateHash<PdtStateTuple<S, K> > > {
198  public:
199   typedef S StateId;
200   typedef K StackId;
201 
PdtStateTable()202   PdtStateTable() {}
203 
PdtStateTable(const PdtStateTable<S,K> & table)204   PdtStateTable(const PdtStateTable<S, K> &table) {}
205 
206  private:
207   void operator=(const PdtStateTable<S, K> &table);  // disallow
208 };
209 
210 }  // namespace fst
211 
212 #endif  // FST_EXTENSIONS_PDT_PDT_H__
213