• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // partition.h
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 //
16 // \file Functions and classes to create a partition of states
17 //
18 
19 #ifndef FST_LIB_PARTITION_H__
20 #define FST_LIB_PARTITION_H__
21 
22 #include <algorithm>
23 #include <vector>
24 
25 #include "fst/lib/queue.h"
26 
27 
28 namespace fst {
29 
30 template <typename T> class PartitionIterator;
31 
32 // \class Partition
33 // \brief Defines a partitioning of states. Typically used to represent
34 //        equivalence classes for Fst operations like minimization.
35 //
36 template <typename T>
37 class Partition {
38   friend class PartitionIterator<T>;
39 
40   struct Element {
ElementElement41    Element() : value(0), next(0), prev(0) {}
ElementElement42    Element(T v) : value(v), next(0), prev(0) {}
43 
44    T        value;
45    Element* next;
46    Element* prev;
47   };
48 
49  public:
Partition()50   Partition() {}
51 
Partition(T num_states)52   Partition(T num_states) {
53     Initialize(num_states);
54   }
55 
~Partition()56   ~Partition() {
57     for (size_t i = 0; i < elements_.size(); ++i)
58       delete elements_[i];
59   }
60 
61   // Create an empty partition for num_states. At initialization time
62   // all elements are not assigned to a class (i.e class_index = -1).
63   // Initialize just creates num_states of elements. All element
64   // operations are then done by simply disconnecting the element from
65   // it current class and placing it at the head of the next class.
Initialize(size_t num_states)66   void Initialize(size_t num_states) {
67     for (size_t i = 0; i < elements_.size(); ++i)
68       delete elements_[i];
69     elements_.clear();
70     classes_.clear();
71     class_index_.clear();
72 
73     elements_.resize(num_states);
74     class_index_.resize(num_states, -1);
75     class_size_.reserve(num_states);
76     for (size_t i = 0; i < num_states; ++i)
77       elements_[i] = new Element(i);
78     num_states_ = num_states;
79   }
80 
81   // Add a class, resize classes_ and class_size_ resource by 1.
AddClass()82   size_t AddClass() {
83     size_t num_classes = classes_.size();
84     classes_.resize(num_classes + 1, 0);
85     class_size_.resize(num_classes + 1, 0);
86     class_split_.resize(num_classes + 1, 0);
87     split_size_.resize(num_classes + 1, 0);
88     return num_classes;
89   }
90 
AllocateClasses(T num_classes)91   void AllocateClasses(T num_classes) {
92     size_t n = classes_.size() + num_classes;
93     classes_.resize(n, 0);
94     class_size_.resize(n, 0);
95     class_split_.resize(n, 0);
96     split_size_.resize(n, 0);
97   }
98 
99   // Add element_id to class_id. The Add method is used to initialize
100   // partition. Once elements have been added to a class, you need to
101   // use the Move() method move an element from once class to another.
Add(T element_id,T class_id)102   void Add(T element_id, T class_id) {
103     Element* element = elements_[element_id];
104 
105     if (classes_[class_id])
106       classes_[class_id]->prev = element;
107     element->next = classes_[class_id];
108     element->prev = 0;
109     classes_[class_id] = element;
110 
111     class_index_[element_id] = class_id;
112     class_size_[class_id]++;
113   }
114 
115   // Move and element_id to class_id. Disconnects (removes) element
116   // from it current class and
Move(T element_id,T class_id)117   void Move(T element_id, T class_id) {
118     T old_class_id = class_index_[element_id];
119 
120     Element* element = elements_[element_id];
121     if (element->next) element->next->prev = element->prev;
122     if (element->prev) element->prev->next = element->next;
123     else               classes_[old_class_id] = element->next;
124 
125     Add(element_id, class_id);
126     class_size_[old_class_id]--;
127   }
128 
129   // split class on the element_id
SplitOn(T element_id)130   void SplitOn(T element_id) {
131     T class_id = class_index_[element_id];
132     if (class_size_[class_id] == 1) return;
133 
134     // first time class is split
135     if (split_size_[class_id] == 0)
136       visited_classes_.push_back(class_id);
137 
138     // increment size of split (set of element at head of chain)
139     split_size_[class_id]++;
140 
141     // update split point
142     if (class_split_[class_id] == 0)
143       class_split_[class_id] = classes_[class_id];
144     if (class_split_[class_id] == elements_[element_id])
145       class_split_[class_id] = elements_[element_id]->next;
146 
147     // move to head of chain in same class
148     Move(element_id, class_id);
149   }
150 
151   // Finalize class_id, split if required, and update class_splits,
152   // class indices of the newly created class. Returns the new_class id
153   // or -1 if no new class was created.
SplitRefine(T class_id)154   T SplitRefine(T class_id) {
155     // only split if necessary
156     if (class_size_[class_id] == split_size_[class_id]) {
157       class_split_[class_id] = 0;
158       split_size_[class_id] = 0;
159       return -1;
160     } else {
161 
162       T new_class = AddClass();
163       size_t remainder = class_size_[class_id] - split_size_[class_id];
164       if (remainder < (size_t)split_size_[class_id]) {  // add smaller
165         Element* split_el   = class_split_[class_id];
166         classes_[new_class] = split_el;
167         class_size_[class_id] = split_size_[class_id];
168         class_size_[new_class] = remainder;
169         split_el->prev->next = 0;
170         split_el->prev = 0;
171       } else {
172         Element* split_el   = class_split_[class_id];
173         classes_[new_class] = classes_[class_id];
174         class_size_[class_id] = remainder;
175         class_size_[new_class] = split_size_[class_id];
176         split_el->prev->next = 0;
177         split_el->prev = 0;
178         classes_[class_id] = split_el;
179       }
180 
181       // update class index for element in new class
182       for (Element* el = classes_[new_class]; el; el = el->next)
183         class_index_[el->value] = new_class;
184 
185       class_split_[class_id] = 0;
186       split_size_[class_id] = 0;
187 
188       return new_class;
189     }
190   }
191 
192   // Once all states have been processed for a particular class C, we
193   // can finalize the split. FinalizeSplit() will update each block in the
194   // partition, create new once and update the queue of active classes
195   // that require further refinement.
196   template <class Queue>
FinalizeSplit(Queue * L)197   void FinalizeSplit(Queue* L) {
198     for (size_t i = 0; i < visited_classes_.size(); ++i) {
199       T new_class = SplitRefine(visited_classes_[i]);
200       if (new_class != -1 && L)
201         L->Enqueue(new_class);
202     }
203     visited_classes_.clear();
204   }
205 
206 
class_id(T element_id)207   const T class_id(T element_id) const {
208     return class_index_[element_id];
209   }
210 
class_sizes()211   const vector<T>& class_sizes() const {
212     return class_size_;
213   }
214 
class_size(T class_id)215   size_t class_size(T class_id)  const {
216     return class_size_[class_id];
217   }
218 
num_classes()219   const T num_classes() const {
220     return classes_.size();
221   }
222 
223 
224  private:
225   int num_states_;
226 
227   // container of all elements (owner of ptrs)
228   vector<Element*> elements_;
229 
230   // linked list of elements belonging to class
231   vector<Element*> classes_;
232 
233   // pointer to split point for each class
234   vector<Element*> class_split_;
235 
236   // class index of element
237   vector<T> class_index_;
238 
239   // class sizes
240   vector<T> class_size_;
241 
242   // size of split for each class
243   vector<T> split_size_;
244 
245   // set of visited classes to be used in split refine
246   vector<T> visited_classes_;
247 };
248 
249 
250 // iterate over members of a class in a partition
251 template <typename T>
252 class PartitionIterator {
253   typedef typename Partition<T>::Element Element;
254  public:
PartitionIterator(const Partition<T> & partition,T class_id)255   PartitionIterator(const Partition<T>& partition, T class_id)
256       : p_(partition),
257         element_(p_.classes_[class_id]),
258         class_id_(class_id) {}
259 
Done()260   bool Done() {
261     return (element_ == 0);
262   }
263 
Value()264   const T Value() {
265     return (element_->value);
266   }
267 
Next()268   void Next() {
269     element_ = element_->next;
270   }
271 
Reset()272   void Reset() {
273     element_ = p_.classes_[class_id_];
274   }
275 
276  private:
277   const Partition<T>& p_;
278 
279   const Element* element_;
280 
281   T class_id_;
282 };
283 }
284 
285 #endif  // FST_LIB_PARTITION_H__
286