• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // partition.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: johans@google.com (Johan Schalkwyk)
17 //
18 // \file Functions and classes to create a partition of states
19 //
20 
21 #ifndef FST_LIB_PARTITION_H__
22 #define FST_LIB_PARTITION_H__
23 
24 #include <vector>
25 using std::vector;
26 #include <algorithm>
27 
28 
29 #include <fst/queue.h>
30 
31 
32 
33 namespace fst {
34 
35 template <typename T> class PartitionIterator;
36 
37 // \class Partition
38 // \brief Defines a partitioning of states. Typically used to represent
39 //        equivalence classes for Fst operations like minimization.
40 //
41 template <typename T>
42 class Partition {
43   friend class PartitionIterator<T>;
44 
45   struct Element {
ElementElement46    Element() : value(0), next(0), prev(0) {}
ElementElement47    Element(T v) : value(v), next(0), prev(0) {}
48 
49    T        value;
50    Element* next;
51    Element* prev;
52   };
53 
54  public:
Partition()55   Partition() {}
56 
Partition(T num_states)57   Partition(T num_states) {
58     Initialize(num_states);
59   }
60 
~Partition()61   ~Partition() {
62     for (size_t i = 0; i < elements_.size(); ++i)
63       delete elements_[i];
64   }
65 
66   // Create an empty partition for num_states. At initialization time
67   // all elements are not assigned to a class (i.e class_index = -1).
68   // Initialize just creates num_states of elements. All element
69   // operations are then done by simply disconnecting the element from
70   // it current class and placing it at the head of the next class.
Initialize(size_t num_states)71   void Initialize(size_t num_states) {
72     for (size_t i = 0; i < elements_.size(); ++i)
73       delete elements_[i];
74     elements_.clear();
75     classes_.clear();
76     class_index_.clear();
77 
78     elements_.resize(num_states);
79     class_index_.resize(num_states, -1);
80     class_size_.reserve(num_states);
81     for (size_t i = 0; i < num_states; ++i)
82       elements_[i] = new Element(i);
83     num_states_ = num_states;
84   }
85 
86   // Add a class, resize classes_ and class_size_ resource by 1.
AddClass()87   size_t AddClass() {
88     size_t num_classes = classes_.size();
89     classes_.resize(num_classes + 1, 0);
90     class_size_.resize(num_classes + 1, 0);
91     class_split_.resize(num_classes + 1, 0);
92     split_size_.resize(num_classes + 1, 0);
93     return num_classes;
94   }
95 
AllocateClasses(T num_classes)96   void AllocateClasses(T num_classes) {
97     size_t n = classes_.size() + num_classes;
98     classes_.resize(n, 0);
99     class_size_.resize(n, 0);
100     class_split_.resize(n, 0);
101     split_size_.resize(n, 0);
102   }
103 
104   // Add element_id to class_id. The Add method is used to initialize
105   // partition. Once elements have been added to a class, you need to
106   // use the Move() method move an element from once class to another.
Add(T element_id,T class_id)107   void Add(T element_id, T class_id) {
108     Element* element = elements_[element_id];
109 
110     if (classes_[class_id])
111       classes_[class_id]->prev = element;
112     element->next = classes_[class_id];
113     element->prev = 0;
114     classes_[class_id] = element;
115 
116     class_index_[element_id] = class_id;
117     class_size_[class_id]++;
118   }
119 
120   // Move and element_id to class_id. Disconnects (removes) element
121   // from it current class and
Move(T element_id,T class_id)122   void Move(T element_id, T class_id) {
123     T old_class_id = class_index_[element_id];
124 
125     Element* element = elements_[element_id];
126     if (element->next) element->next->prev = element->prev;
127     if (element->prev) element->prev->next = element->next;
128     else               classes_[old_class_id] = element->next;
129 
130     Add(element_id, class_id);
131     class_size_[old_class_id]--;
132   }
133 
134   // split class on the element_id
SplitOn(T element_id)135   void SplitOn(T element_id) {
136     T class_id = class_index_[element_id];
137     if (class_size_[class_id] == 1) return;
138 
139     // first time class is split
140     if (split_size_[class_id] == 0)
141       visited_classes_.push_back(class_id);
142 
143     // increment size of split (set of element at head of chain)
144     split_size_[class_id]++;
145 
146     // update split point
147     if (class_split_[class_id] == 0)
148       class_split_[class_id] = classes_[class_id];
149     if (class_split_[class_id] == elements_[element_id])
150       class_split_[class_id] = elements_[element_id]->next;
151 
152     // move to head of chain in same class
153     Move(element_id, class_id);
154   }
155 
156   // Finalize class_id, split if required, and update class_splits,
157   // class indices of the newly created class. Returns the new_class id
158   // or -1 if no new class was created.
SplitRefine(T class_id)159   T SplitRefine(T class_id) {
160     // only split if necessary
161     if (class_size_[class_id] == split_size_[class_id]) {
162       class_split_[class_id] = 0;
163       split_size_[class_id] = 0;
164       return -1;
165     } else {
166 
167       T new_class = AddClass();
168       size_t remainder = class_size_[class_id] - split_size_[class_id];
169       if (remainder < split_size_[class_id]) {  // add smaller
170         Element* split_el   = class_split_[class_id];
171         classes_[new_class] = split_el;
172         class_size_[class_id] = split_size_[class_id];
173         class_size_[new_class] = remainder;
174         split_el->prev->next = 0;
175         split_el->prev = 0;
176       } else {
177         Element* split_el   = class_split_[class_id];
178         classes_[new_class] = classes_[class_id];
179         class_size_[class_id] = remainder;
180         class_size_[new_class] = split_size_[class_id];
181         split_el->prev->next = 0;
182         split_el->prev = 0;
183         classes_[class_id] = split_el;
184       }
185 
186       // update class index for element in new class
187       for (Element* el = classes_[new_class]; el; el = el->next)
188         class_index_[el->value] = new_class;
189 
190       class_split_[class_id] = 0;
191       split_size_[class_id] = 0;
192 
193       return new_class;
194     }
195   }
196 
197   // Once all states have been processed for a particular class C, we
198   // can finalize the split. FinalizeSplit() will update each block in the
199   // partition, create new once and update the queue of active classes
200   // that require further refinement.
201   template <class Queue>
FinalizeSplit(Queue * L)202   void FinalizeSplit(Queue* L) {
203     for (size_t i = 0; i < visited_classes_.size(); ++i) {
204       T new_class = SplitRefine(visited_classes_[i]);
205       if (new_class != -1 && L)
206         L->Enqueue(new_class);
207     }
208     visited_classes_.clear();
209   }
210 
211 
class_id(T element_id)212   const T class_id(T element_id) const {
213     return class_index_[element_id];
214   }
215 
class_sizes()216   const vector<T>& class_sizes() const {
217     return class_size_;
218   }
219 
class_size(T class_id)220   const size_t class_size(T class_id)  const {
221     return class_size_[class_id];
222   }
223 
num_classes()224   const T num_classes() const {
225     return classes_.size();
226   }
227 
228 
229  private:
230   int num_states_;
231 
232   // container of all elements (owner of ptrs)
233   vector<Element*> elements_;
234 
235   // linked list of elements belonging to class
236   vector<Element*> classes_;
237 
238   // pointer to split point for each class
239   vector<Element*> class_split_;
240 
241   // class index of element
242   vector<T> class_index_;
243 
244   // class sizes
245   vector<T> class_size_;
246 
247   // size of split for each class
248   vector<T> split_size_;
249 
250   // set of visited classes to be used in split refine
251   vector<T> visited_classes_;
252 };
253 
254 
255 // iterate over members of a class in a partition
256 template <typename T>
257 class PartitionIterator {
258   typedef typename Partition<T>::Element Element;
259  public:
PartitionIterator(const Partition<T> & partition,T class_id)260   PartitionIterator(const Partition<T>& partition, T class_id)
261       : p_(partition),
262         element_(p_.classes_[class_id]),
263         class_id_(class_id) {}
264 
Done()265   bool Done() {
266     return (element_ == 0);
267   }
268 
Value()269   const T Value() {
270     return (element_->value);
271   }
272 
Next()273   void Next() {
274     element_ = element_->next;
275   }
276 
Reset()277   void Reset() {
278     element_ = p_.classes_[class_id_];
279   }
280 
281  private:
282   const Partition<T>& p_;
283 
284   const Element* element_;
285 
286   T class_id_;
287 };
288 }  // namespace fst
289 
290 #endif  // FST_LIB_PARTITION_H__
291