• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // util.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // FST utility inline definitions.
20 
21 #ifndef FST_LIB_UTIL_H__
22 #define FST_LIB_UTIL_H__
23 
24 #include <unordered_map>
25 using std::tr1::unordered_map;
26 using std::tr1::unordered_multimap;
27 #include <unordered_set>
28 using std::tr1::unordered_set;
29 using std::tr1::unordered_multiset;
30 #include <list>
31 #include <map>
32 #include <set>
33 #include <sstream>
34 #include <string>
35 #include <vector>
36 using std::vector;
37 
38 
39 #include <fst/compat.h>
40 #include <fst/types.h>
41 
42 #include <iostream>
43 #include <fstream>
44 
45 //
46 // UTILITY FOR ERROR HANDLING
47 //
48 
49 DECLARE_bool(fst_error_fatal);
50 
51 #define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
52 
53 namespace fst {
54 
55 //
56 // UTILITIES FOR TYPE I/O
57 //
58 
59 // Read some types from an input stream.
60 
61 // Generic case.
62 template <typename T>
ReadType(istream & strm,T * t)63 inline istream &ReadType(istream &strm, T *t) {
64   return t->Read(strm);
65 }
66 
67 // Fixed size, contiguous memory read.
68 #define READ_POD_TYPE(T)                                    \
69 inline istream &ReadType(istream &strm, T *t) {             \
70   return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
71 }
72 
73 READ_POD_TYPE(bool);
74 READ_POD_TYPE(char);
75 READ_POD_TYPE(signed char);
76 READ_POD_TYPE(unsigned char);
77 READ_POD_TYPE(short);
78 READ_POD_TYPE(unsigned short);
79 READ_POD_TYPE(int);
80 READ_POD_TYPE(unsigned int);
81 READ_POD_TYPE(long);
82 READ_POD_TYPE(unsigned long);
83 READ_POD_TYPE(long long);
84 READ_POD_TYPE(unsigned long long);
85 READ_POD_TYPE(float);
86 READ_POD_TYPE(double);
87 
88 // String case.
ReadType(istream & strm,string * s)89 inline istream &ReadType(istream &strm, string *s) {
90   s->clear();
91   int32 ns = 0;
92   strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
93   for (int i = 0; i < ns; ++i) {
94     char c;
95     strm.read(&c, 1);
96     *s += c;
97   }
98   return strm;
99 }
100 
101 // Pair case.
102 template <typename S, typename T>
ReadType(istream & strm,pair<S,T> * p)103 inline istream &ReadType(istream &strm, pair<S, T> *p) {
104   ReadType(strm, &p->first);
105   ReadType(strm, &p->second);
106   return strm;
107 }
108 
109 template <typename S, typename T>
ReadType(istream & strm,pair<const S,T> * p)110 inline istream &ReadType(istream &strm, pair<const S, T> *p) {
111   ReadType(strm, const_cast<S *>(&p->first));
112   ReadType(strm, &p->second);
113   return strm;
114 }
115 
116 // General case - no-op.
117 template <typename C>
StlReserve(C * c,int64 n)118 void StlReserve(C *c, int64 n) {}
119 
120 // Specialization for vectors.
121 template <typename S, typename T>
StlReserve(vector<S,T> * c,int64 n)122 void StlReserve(vector<S, T> *c, int64 n) {
123   c->reserve(n);
124 }
125 
126 // STL sequence container.
127 #define READ_STL_SEQ_TYPE(C)                             \
128 template <typename S, typename T>                        \
129 inline istream &ReadType(istream &strm, C<S, T> *c) {    \
130   c->clear();                                            \
131   int64 n = 0;                                           \
132   strm.read(reinterpret_cast<char *>(&n), sizeof(n));    \
133   StlReserve(c, n);                                      \
134   for (ssize_t i = 0; i < n; ++i) {                      \
135     typename C<S, T>::value_type value;                  \
136     ReadType(strm, &value);                              \
137     c->insert(c->end(), value);                          \
138   }                                                      \
139   return strm;                                           \
140 }
141 
142 READ_STL_SEQ_TYPE(vector);
143 READ_STL_SEQ_TYPE(list);
144 
145 // STL associative container.
146 #define READ_STL_ASSOC_TYPE(C)                           \
147 template <typename S, typename T, typename U>            \
148 inline istream &ReadType(istream &strm, C<S, T, U> *c) { \
149   c->clear();                                            \
150   int64 n = 0;                                           \
151   strm.read(reinterpret_cast<char *>(&n), sizeof(n));    \
152   for (ssize_t i = 0; i < n; ++i) {                      \
153     typename C<S, T, U>::value_type value;               \
154     ReadType(strm, &value);                              \
155     c->insert(value);                                    \
156   }                                                      \
157   return strm;                                           \
158 }
159 
160 READ_STL_ASSOC_TYPE(set);
161 READ_STL_ASSOC_TYPE(unordered_set);
162 READ_STL_ASSOC_TYPE(map);
163 READ_STL_ASSOC_TYPE(unordered_map);
164 
165 // Write some types to an output stream.
166 
167 // Generic case.
168 template <typename T>
WriteType(ostream & strm,const T t)169 inline ostream &WriteType(ostream &strm, const T t) {
170   t.Write(strm);
171   return strm;
172 }
173 
174 // Fixed size, contiguous memory write.
175 #define WRITE_POD_TYPE(T)                                           \
176 inline ostream &WriteType(ostream &strm, const T t) {               \
177   return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \
178 }
179 
180 WRITE_POD_TYPE(bool);
181 WRITE_POD_TYPE(char);
182 WRITE_POD_TYPE(signed char);
183 WRITE_POD_TYPE(unsigned char);
184 WRITE_POD_TYPE(short);
185 WRITE_POD_TYPE(unsigned short);
186 WRITE_POD_TYPE(int);
187 WRITE_POD_TYPE(unsigned int);
188 WRITE_POD_TYPE(long);
189 WRITE_POD_TYPE(unsigned long);
190 WRITE_POD_TYPE(long long);
191 WRITE_POD_TYPE(unsigned long long);
192 WRITE_POD_TYPE(float);
193 WRITE_POD_TYPE(double);
194 
195 // String case.
WriteType(ostream & strm,const string & s)196 inline ostream &WriteType(ostream &strm, const string &s) {
197   int32 ns = s.size();
198   strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
199   return strm.write(s.data(), ns);
200 }
201 
202 // Pair case.
203 template <typename S, typename T>
WriteType(ostream & strm,const pair<S,T> & p)204 inline ostream &WriteType(ostream &strm, const pair<S, T> &p) {
205   WriteType(strm, p.first);
206   WriteType(strm, p.second);
207   return strm;
208 }
209 
210 // STL sequence container.
211 #define WRITE_STL_SEQ_TYPE(C)                                                \
212 template <typename S, typename T>                                            \
213 inline ostream &WriteType(ostream &strm, const C<S, T> &c) {                 \
214   int64 n = c.size();                                                        \
215   strm.write(reinterpret_cast<char *>(&n), sizeof(n));                       \
216   for (typename C<S, T>::const_iterator it = c.begin();                      \
217        it != c.end(); ++it)                                                  \
218      WriteType(strm, *it);                                                   \
219   return strm;                                                               \
220 }
221 
222 WRITE_STL_SEQ_TYPE(vector);
223 WRITE_STL_SEQ_TYPE(list);
224 
225 // STL associative container.
226 #define WRITE_STL_ASSOC_TYPE(C)                                              \
227 template <typename S, typename T, typename U>                                \
228 inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) {              \
229   int64 n = c.size();                                                        \
230   strm.write(reinterpret_cast<char *>(&n), sizeof(n));                       \
231   for (typename C<S, T, U>::const_iterator it = c.begin();                   \
232        it != c.end(); ++it)                                                  \
233      WriteType(strm, *it);                                                   \
234   return strm;                                                               \
235 }
236 
237 WRITE_STL_ASSOC_TYPE(set);
238 WRITE_STL_ASSOC_TYPE(unordered_set);
239 WRITE_STL_ASSOC_TYPE(map);
240 WRITE_STL_ASSOC_TYPE(unordered_map);
241 
242 // Utilities for converting between int64 or Weight and string.
243 
244 int64 StrToInt64(const string &s, const string &src, size_t nline,
245                  bool allow_negative, bool *error = 0);
246 
247 template <typename Weight>
StrToWeight(const string & s,const string & src,size_t nline)248 Weight StrToWeight(const string &s, const string &src, size_t nline) {
249   Weight w;
250   istringstream strm(s);
251   strm >> w;
252   if (!strm) {
253     FSTERROR() << "StrToWeight: Bad weight = \"" << s
254                << "\", source = " << src << ", line = " << nline;
255     return Weight::NoWeight();
256   }
257   return w;
258 }
259 
260 void Int64ToStr(int64 n, string *s);
261 
262 template <typename Weight>
WeightToStr(Weight w,string * s)263 void WeightToStr(Weight w, string *s) {
264   ostringstream strm;
265   strm.precision(9);
266   strm << w;
267   *s += strm.str();
268 }
269 
270 // Utilities for reading/writing label pairs
271 
272 // Returns true on success
273 template <typename Label>
274 bool ReadLabelPairs(const string& filename,
275                     vector<pair<Label, Label> >* pairs,
276                     bool allow_negative = false) {
277   ifstream strm(filename.c_str());
278 
279   if (!strm) {
280     LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename;
281     return false;
282   }
283 
284   const int kLineLen = 8096;
285   char line[kLineLen];
286   size_t nline = 0;
287 
288   pairs->clear();
289   while (strm.getline(line, kLineLen)) {
290     ++nline;
291     vector<char *> col;
292     SplitToVector(line, "\n\t ", &col, true);
293     if (col.size() == 0 || col[0][0] == '\0')  // empty line
294       continue;
295     if (col.size() != 2) {
296       LOG(ERROR) << "ReadLabelPairs: Bad number of columns, "
297                  << "file = " << filename << ", line = " << nline;
298       return false;
299     }
300 
301     bool err;
302     Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err);
303     if (err) return false;
304     Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err);
305     if (err) return false;
306     pairs->push_back(make_pair(frmlabel, tolabel));
307   }
308   return true;
309 }
310 
311 // Returns true on success
312 template <typename Label>
WriteLabelPairs(const string & filename,const vector<pair<Label,Label>> & pairs)313 bool WriteLabelPairs(const string& filename,
314                      const vector<pair<Label, Label> >& pairs) {
315   ostream *strm = &std::cout;
316   if (!filename.empty()) {
317     strm = new ofstream(filename.c_str());
318     if (!*strm) {
319       LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename;
320       return false;
321     }
322   }
323 
324   for (ssize_t n = 0; n < pairs.size(); ++n)
325     *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
326 
327   if (!*strm) {
328     LOG(ERROR) << "WriteLabelPairs: Write failed: "
329                << (filename.empty() ? "standard output" : filename);
330     return false;
331   }
332   if (strm != &std::cout)
333     delete strm;
334   return true;
335 }
336 
337 // Utilities for converting a type name to a legal C symbol.
338 
339 void ConvertToLegalCSymbol(string *s);
340 
341 
342 //
343 // UTILITIES FOR STREAM I/O
344 //
345 
346 bool AlignInput(istream &strm, int align);
347 bool AlignOutput(ostream &strm, int align);
348 
349 //
350 // UTILITIES FOR PROTOCOL BUFFER I/O
351 //
352 
353 
354 // An associative container for which testing membership is
355 // faster than an STL set if members are restricted to an interval
356 // that excludes most non-members. A 'Key' must have ==, !=, and < defined.
357 // Element 'NoKey' should be a key that marks an uninitialized key and
358 // is otherwise unused. 'Find()' returns an STL const_iterator to the match
359 // found, otherwise it equals 'End()'.
360 template <class Key, Key NoKey>
361 class CompactSet {
362 public:
363   typedef typename set<Key>::const_iterator const_iterator;
364 
CompactSet()365   CompactSet()
366     : min_key_(NoKey),
367       max_key_(NoKey) { }
368 
CompactSet(const CompactSet<Key,NoKey> & compact_set)369   CompactSet(const CompactSet<Key, NoKey> &compact_set)
370     : set_(compact_set.set_),
371       min_key_(compact_set.min_key_),
372       max_key_(compact_set.max_key_) { }
373 
Insert(Key key)374   void Insert(Key key) {
375     set_.insert(key);
376     if (min_key_ == NoKey || key < min_key_)
377       min_key_ = key;
378     if (max_key_ == NoKey || max_key_ < key)
379         max_key_ = key;
380   }
381 
Clear()382   void Clear() {
383     set_.clear();
384     min_key_ = max_key_ = NoKey;
385   }
386 
Find(Key key)387   const_iterator Find(Key key) const {
388     if (min_key_ == NoKey ||
389         key < min_key_ || max_key_ < key)
390       return set_.end();
391     else
392       return set_.find(key);
393   }
394 
Begin()395   const_iterator Begin() const { return set_.begin(); }
396 
End()397   const_iterator End() const { return set_.end(); }
398 
399 private:
400   set<Key> set_;
401   Key min_key_;
402   Key max_key_;
403 
404   void operator=(const CompactSet<Key, NoKey> &);  //disallow
405 };
406 
407 }  // namespace fst
408 
409 #endif  // FST_LIB_UTIL_H__
410