1 // partition.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: johans@google.com (Johan Schalkwyk) 17 // 18 // \file Functions and classes to create a partition of states 19 // 20 21 #ifndef FST_LIB_PARTITION_H__ 22 #define FST_LIB_PARTITION_H__ 23 24 #include <vector> 25 using std::vector; 26 #include <algorithm> 27 28 29 #include <fst/queue.h> 30 31 32 33 namespace fst { 34 35 template <typename T> class PartitionIterator; 36 37 // \class Partition 38 // \brief Defines a partitioning of states. Typically used to represent 39 // equivalence classes for Fst operations like minimization. 40 // 41 template <typename T> 42 class Partition { 43 friend class PartitionIterator<T>; 44 45 struct Element { ElementElement46 Element() : value(0), next(0), prev(0) {} ElementElement47 Element(T v) : value(v), next(0), prev(0) {} 48 49 T value; 50 Element* next; 51 Element* prev; 52 }; 53 54 public: Partition()55 Partition() {} 56 Partition(T num_states)57 Partition(T num_states) { 58 Initialize(num_states); 59 } 60 ~Partition()61 ~Partition() { 62 for (size_t i = 0; i < elements_.size(); ++i) 63 delete elements_[i]; 64 } 65 66 // Create an empty partition for num_states. At initialization time 67 // all elements are not assigned to a class (i.e class_index = -1). 68 // Initialize just creates num_states of elements. All element 69 // operations are then done by simply disconnecting the element from 70 // it current class and placing it at the head of the next class. Initialize(size_t num_states)71 void Initialize(size_t num_states) { 72 for (size_t i = 0; i < elements_.size(); ++i) 73 delete elements_[i]; 74 elements_.clear(); 75 classes_.clear(); 76 class_index_.clear(); 77 78 elements_.resize(num_states); 79 class_index_.resize(num_states, -1); 80 class_size_.reserve(num_states); 81 for (size_t i = 0; i < num_states; ++i) 82 elements_[i] = new Element(i); 83 num_states_ = num_states; 84 } 85 86 // Add a class, resize classes_ and class_size_ resource by 1. AddClass()87 size_t AddClass() { 88 size_t num_classes = classes_.size(); 89 classes_.resize(num_classes + 1, 0); 90 class_size_.resize(num_classes + 1, 0); 91 class_split_.resize(num_classes + 1, 0); 92 split_size_.resize(num_classes + 1, 0); 93 return num_classes; 94 } 95 AllocateClasses(T num_classes)96 void AllocateClasses(T num_classes) { 97 size_t n = classes_.size() + num_classes; 98 classes_.resize(n, 0); 99 class_size_.resize(n, 0); 100 class_split_.resize(n, 0); 101 split_size_.resize(n, 0); 102 } 103 104 // Add element_id to class_id. The Add method is used to initialize 105 // partition. Once elements have been added to a class, you need to 106 // use the Move() method move an element from once class to another. Add(T element_id,T class_id)107 void Add(T element_id, T class_id) { 108 Element* element = elements_[element_id]; 109 110 if (classes_[class_id]) 111 classes_[class_id]->prev = element; 112 element->next = classes_[class_id]; 113 element->prev = 0; 114 classes_[class_id] = element; 115 116 class_index_[element_id] = class_id; 117 class_size_[class_id]++; 118 } 119 120 // Move and element_id to class_id. Disconnects (removes) element 121 // from it current class and Move(T element_id,T class_id)122 void Move(T element_id, T class_id) { 123 T old_class_id = class_index_[element_id]; 124 125 Element* element = elements_[element_id]; 126 if (element->next) element->next->prev = element->prev; 127 if (element->prev) element->prev->next = element->next; 128 else classes_[old_class_id] = element->next; 129 130 Add(element_id, class_id); 131 class_size_[old_class_id]--; 132 } 133 134 // split class on the element_id SplitOn(T element_id)135 void SplitOn(T element_id) { 136 T class_id = class_index_[element_id]; 137 if (class_size_[class_id] == 1) return; 138 139 // first time class is split 140 if (split_size_[class_id] == 0) 141 visited_classes_.push_back(class_id); 142 143 // increment size of split (set of element at head of chain) 144 split_size_[class_id]++; 145 146 // update split point 147 if (class_split_[class_id] == 0) 148 class_split_[class_id] = classes_[class_id]; 149 if (class_split_[class_id] == elements_[element_id]) 150 class_split_[class_id] = elements_[element_id]->next; 151 152 // move to head of chain in same class 153 Move(element_id, class_id); 154 } 155 156 // Finalize class_id, split if required, and update class_splits, 157 // class indices of the newly created class. Returns the new_class id 158 // or -1 if no new class was created. SplitRefine(T class_id)159 T SplitRefine(T class_id) { 160 // only split if necessary 161 if (class_size_[class_id] == split_size_[class_id]) { 162 class_split_[class_id] = 0; 163 split_size_[class_id] = 0; 164 return -1; 165 } else { 166 167 T new_class = AddClass(); 168 size_t remainder = class_size_[class_id] - split_size_[class_id]; 169 if (remainder < split_size_[class_id]) { // add smaller 170 Element* split_el = class_split_[class_id]; 171 classes_[new_class] = split_el; 172 class_size_[class_id] = split_size_[class_id]; 173 class_size_[new_class] = remainder; 174 split_el->prev->next = 0; 175 split_el->prev = 0; 176 } else { 177 Element* split_el = class_split_[class_id]; 178 classes_[new_class] = classes_[class_id]; 179 class_size_[class_id] = remainder; 180 class_size_[new_class] = split_size_[class_id]; 181 split_el->prev->next = 0; 182 split_el->prev = 0; 183 classes_[class_id] = split_el; 184 } 185 186 // update class index for element in new class 187 for (Element* el = classes_[new_class]; el; el = el->next) 188 class_index_[el->value] = new_class; 189 190 class_split_[class_id] = 0; 191 split_size_[class_id] = 0; 192 193 return new_class; 194 } 195 } 196 197 // Once all states have been processed for a particular class C, we 198 // can finalize the split. FinalizeSplit() will update each block in the 199 // partition, create new once and update the queue of active classes 200 // that require further refinement. 201 template <class Queue> FinalizeSplit(Queue * L)202 void FinalizeSplit(Queue* L) { 203 for (size_t i = 0; i < visited_classes_.size(); ++i) { 204 T new_class = SplitRefine(visited_classes_[i]); 205 if (new_class != -1 && L) 206 L->Enqueue(new_class); 207 } 208 visited_classes_.clear(); 209 } 210 211 class_id(T element_id)212 const T class_id(T element_id) const { 213 return class_index_[element_id]; 214 } 215 class_sizes()216 const vector<T>& class_sizes() const { 217 return class_size_; 218 } 219 class_size(T class_id)220 const size_t class_size(T class_id) const { 221 return class_size_[class_id]; 222 } 223 num_classes()224 const T num_classes() const { 225 return classes_.size(); 226 } 227 228 229 private: 230 int num_states_; 231 232 // container of all elements (owner of ptrs) 233 vector<Element*> elements_; 234 235 // linked list of elements belonging to class 236 vector<Element*> classes_; 237 238 // pointer to split point for each class 239 vector<Element*> class_split_; 240 241 // class index of element 242 vector<T> class_index_; 243 244 // class sizes 245 vector<T> class_size_; 246 247 // size of split for each class 248 vector<T> split_size_; 249 250 // set of visited classes to be used in split refine 251 vector<T> visited_classes_; 252 }; 253 254 255 // iterate over members of a class in a partition 256 template <typename T> 257 class PartitionIterator { 258 typedef typename Partition<T>::Element Element; 259 public: PartitionIterator(const Partition<T> & partition,T class_id)260 PartitionIterator(const Partition<T>& partition, T class_id) 261 : p_(partition), 262 element_(p_.classes_[class_id]), 263 class_id_(class_id) {} 264 Done()265 bool Done() { 266 return (element_ == 0); 267 } 268 Value()269 const T Value() { 270 return (element_->value); 271 } 272 Next()273 void Next() { 274 element_ = element_->next; 275 } 276 Reset()277 void Reset() { 278 element_ = p_.classes_[class_id_]; 279 } 280 281 private: 282 const Partition<T>& p_; 283 284 const Element* element_; 285 286 T class_id_; 287 }; 288 } // namespace fst 289 290 #endif // FST_LIB_PARTITION_H__ 291