1 // util.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // FST utility inline definitions.
20
21 #ifndef FST_LIB_UTIL_H__
22 #define FST_LIB_UTIL_H__
23
24 #include <unordered_map>
25 using std::tr1::unordered_map;
26 using std::tr1::unordered_multimap;
27 #include <unordered_set>
28 using std::tr1::unordered_set;
29 using std::tr1::unordered_multiset;
30 #include <list>
31 #include <map>
32 #include <set>
33 #include <sstream>
34 #include <string>
35 #include <vector>
36 using std::vector;
37
38
39 #include <fst/compat.h>
40 #include <fst/types.h>
41
42 #include <iostream>
43 #include <fstream>
44
45 //
46 // UTILITY FOR ERROR HANDLING
47 //
48
49 DECLARE_bool(fst_error_fatal);
50
51 #define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR))
52
53 namespace fst {
54
55 //
56 // UTILITIES FOR TYPE I/O
57 //
58
59 // Read some types from an input stream.
60
61 // Generic case.
62 template <typename T>
ReadType(istream & strm,T * t)63 inline istream &ReadType(istream &strm, T *t) {
64 return t->Read(strm);
65 }
66
67 // Fixed size, contiguous memory read.
68 #define READ_POD_TYPE(T) \
69 inline istream &ReadType(istream &strm, T *t) { \
70 return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \
71 }
72
73 READ_POD_TYPE(bool);
74 READ_POD_TYPE(char);
75 READ_POD_TYPE(signed char);
76 READ_POD_TYPE(unsigned char);
77 READ_POD_TYPE(short);
78 READ_POD_TYPE(unsigned short);
79 READ_POD_TYPE(int);
80 READ_POD_TYPE(unsigned int);
81 READ_POD_TYPE(long);
82 READ_POD_TYPE(unsigned long);
83 READ_POD_TYPE(long long);
84 READ_POD_TYPE(unsigned long long);
85 READ_POD_TYPE(float);
86 READ_POD_TYPE(double);
87
88 // String case.
ReadType(istream & strm,string * s)89 inline istream &ReadType(istream &strm, string *s) {
90 s->clear();
91 int32 ns = 0;
92 strm.read(reinterpret_cast<char *>(&ns), sizeof(ns));
93 for (int i = 0; i < ns; ++i) {
94 char c;
95 strm.read(&c, 1);
96 *s += c;
97 }
98 return strm;
99 }
100
101 // Pair case.
102 template <typename S, typename T>
ReadType(istream & strm,pair<S,T> * p)103 inline istream &ReadType(istream &strm, pair<S, T> *p) {
104 ReadType(strm, &p->first);
105 ReadType(strm, &p->second);
106 return strm;
107 }
108
109 template <typename S, typename T>
ReadType(istream & strm,pair<const S,T> * p)110 inline istream &ReadType(istream &strm, pair<const S, T> *p) {
111 ReadType(strm, const_cast<S *>(&p->first));
112 ReadType(strm, &p->second);
113 return strm;
114 }
115
116 // General case - no-op.
117 template <typename C>
StlReserve(C * c,int64 n)118 void StlReserve(C *c, int64 n) {}
119
120 // Specialization for vectors.
121 template <typename S, typename T>
StlReserve(vector<S,T> * c,int64 n)122 void StlReserve(vector<S, T> *c, int64 n) {
123 c->reserve(n);
124 }
125
126 // STL sequence container.
127 #define READ_STL_SEQ_TYPE(C) \
128 template <typename S, typename T> \
129 inline istream &ReadType(istream &strm, C<S, T> *c) { \
130 c->clear(); \
131 int64 n = 0; \
132 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
133 StlReserve(c, n); \
134 for (ssize_t i = 0; i < n; ++i) { \
135 typename C<S, T>::value_type value; \
136 ReadType(strm, &value); \
137 c->insert(c->end(), value); \
138 } \
139 return strm; \
140 }
141
142 READ_STL_SEQ_TYPE(vector);
143 READ_STL_SEQ_TYPE(list);
144
145 // STL associative container.
146 #define READ_STL_ASSOC_TYPE(C) \
147 template <typename S, typename T, typename U> \
148 inline istream &ReadType(istream &strm, C<S, T, U> *c) { \
149 c->clear(); \
150 int64 n = 0; \
151 strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \
152 for (ssize_t i = 0; i < n; ++i) { \
153 typename C<S, T, U>::value_type value; \
154 ReadType(strm, &value); \
155 c->insert(value); \
156 } \
157 return strm; \
158 }
159
160 READ_STL_ASSOC_TYPE(set);
161 READ_STL_ASSOC_TYPE(unordered_set);
162 READ_STL_ASSOC_TYPE(map);
163 READ_STL_ASSOC_TYPE(unordered_map);
164
165 // Write some types to an output stream.
166
167 // Generic case.
168 template <typename T>
WriteType(ostream & strm,const T t)169 inline ostream &WriteType(ostream &strm, const T t) {
170 t.Write(strm);
171 return strm;
172 }
173
174 // Fixed size, contiguous memory write.
175 #define WRITE_POD_TYPE(T) \
176 inline ostream &WriteType(ostream &strm, const T t) { \
177 return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \
178 }
179
180 WRITE_POD_TYPE(bool);
181 WRITE_POD_TYPE(char);
182 WRITE_POD_TYPE(signed char);
183 WRITE_POD_TYPE(unsigned char);
184 WRITE_POD_TYPE(short);
185 WRITE_POD_TYPE(unsigned short);
186 WRITE_POD_TYPE(int);
187 WRITE_POD_TYPE(unsigned int);
188 WRITE_POD_TYPE(long);
189 WRITE_POD_TYPE(unsigned long);
190 WRITE_POD_TYPE(long long);
191 WRITE_POD_TYPE(unsigned long long);
192 WRITE_POD_TYPE(float);
193 WRITE_POD_TYPE(double);
194
195 // String case.
WriteType(ostream & strm,const string & s)196 inline ostream &WriteType(ostream &strm, const string &s) {
197 int32 ns = s.size();
198 strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns));
199 return strm.write(s.data(), ns);
200 }
201
202 // Pair case.
203 template <typename S, typename T>
WriteType(ostream & strm,const pair<S,T> & p)204 inline ostream &WriteType(ostream &strm, const pair<S, T> &p) {
205 WriteType(strm, p.first);
206 WriteType(strm, p.second);
207 return strm;
208 }
209
210 // STL sequence container.
211 #define WRITE_STL_SEQ_TYPE(C) \
212 template <typename S, typename T> \
213 inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \
214 int64 n = c.size(); \
215 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
216 for (typename C<S, T>::const_iterator it = c.begin(); \
217 it != c.end(); ++it) \
218 WriteType(strm, *it); \
219 return strm; \
220 }
221
222 WRITE_STL_SEQ_TYPE(vector);
223 WRITE_STL_SEQ_TYPE(list);
224
225 // STL associative container.
226 #define WRITE_STL_ASSOC_TYPE(C) \
227 template <typename S, typename T, typename U> \
228 inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \
229 int64 n = c.size(); \
230 strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \
231 for (typename C<S, T, U>::const_iterator it = c.begin(); \
232 it != c.end(); ++it) \
233 WriteType(strm, *it); \
234 return strm; \
235 }
236
237 WRITE_STL_ASSOC_TYPE(set);
238 WRITE_STL_ASSOC_TYPE(unordered_set);
239 WRITE_STL_ASSOC_TYPE(map);
240 WRITE_STL_ASSOC_TYPE(unordered_map);
241
242 // Utilities for converting between int64 or Weight and string.
243
244 int64 StrToInt64(const string &s, const string &src, size_t nline,
245 bool allow_negative, bool *error = 0);
246
247 template <typename Weight>
StrToWeight(const string & s,const string & src,size_t nline)248 Weight StrToWeight(const string &s, const string &src, size_t nline) {
249 Weight w;
250 istringstream strm(s);
251 strm >> w;
252 if (!strm) {
253 FSTERROR() << "StrToWeight: Bad weight = \"" << s
254 << "\", source = " << src << ", line = " << nline;
255 return Weight::NoWeight();
256 }
257 return w;
258 }
259
260 void Int64ToStr(int64 n, string *s);
261
262 template <typename Weight>
WeightToStr(Weight w,string * s)263 void WeightToStr(Weight w, string *s) {
264 ostringstream strm;
265 strm.precision(9);
266 strm << w;
267 *s += strm.str();
268 }
269
270 // Utilities for reading/writing label pairs
271
272 // Returns true on success
273 template <typename Label>
274 bool ReadLabelPairs(const string& filename,
275 vector<pair<Label, Label> >* pairs,
276 bool allow_negative = false) {
277 ifstream strm(filename.c_str());
278
279 if (!strm) {
280 LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename;
281 return false;
282 }
283
284 const int kLineLen = 8096;
285 char line[kLineLen];
286 size_t nline = 0;
287
288 pairs->clear();
289 while (strm.getline(line, kLineLen)) {
290 ++nline;
291 vector<char *> col;
292 SplitToVector(line, "\n\t ", &col, true);
293 if (col.size() == 0 || col[0][0] == '\0') // empty line
294 continue;
295 if (col.size() != 2) {
296 LOG(ERROR) << "ReadLabelPairs: Bad number of columns, "
297 << "file = " << filename << ", line = " << nline;
298 return false;
299 }
300
301 bool err;
302 Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err);
303 if (err) return false;
304 Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err);
305 if (err) return false;
306 pairs->push_back(make_pair(frmlabel, tolabel));
307 }
308 return true;
309 }
310
311 // Returns true on success
312 template <typename Label>
WriteLabelPairs(const string & filename,const vector<pair<Label,Label>> & pairs)313 bool WriteLabelPairs(const string& filename,
314 const vector<pair<Label, Label> >& pairs) {
315 ostream *strm = &std::cout;
316 if (!filename.empty()) {
317 strm = new ofstream(filename.c_str());
318 if (!*strm) {
319 LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename;
320 return false;
321 }
322 }
323
324 for (ssize_t n = 0; n < pairs.size(); ++n)
325 *strm << pairs[n].first << "\t" << pairs[n].second << "\n";
326
327 if (!*strm) {
328 LOG(ERROR) << "WriteLabelPairs: Write failed: "
329 << (filename.empty() ? "standard output" : filename);
330 return false;
331 }
332 if (strm != &std::cout)
333 delete strm;
334 return true;
335 }
336
337 // Utilities for converting a type name to a legal C symbol.
338
339 void ConvertToLegalCSymbol(string *s);
340
341
342 //
343 // UTILITIES FOR STREAM I/O
344 //
345
346 bool AlignInput(istream &strm, int align);
347 bool AlignOutput(ostream &strm, int align);
348
349 //
350 // UTILITIES FOR PROTOCOL BUFFER I/O
351 //
352
353
354 // An associative container for which testing membership is
355 // faster than an STL set if members are restricted to an interval
356 // that excludes most non-members. A 'Key' must have ==, !=, and < defined.
357 // Element 'NoKey' should be a key that marks an uninitialized key and
358 // is otherwise unused. 'Find()' returns an STL const_iterator to the match
359 // found, otherwise it equals 'End()'.
360 template <class Key, Key NoKey>
361 class CompactSet {
362 public:
363 typedef typename set<Key>::const_iterator const_iterator;
364
CompactSet()365 CompactSet()
366 : min_key_(NoKey),
367 max_key_(NoKey) { }
368
CompactSet(const CompactSet<Key,NoKey> & compact_set)369 CompactSet(const CompactSet<Key, NoKey> &compact_set)
370 : set_(compact_set.set_),
371 min_key_(compact_set.min_key_),
372 max_key_(compact_set.max_key_) { }
373
Insert(Key key)374 void Insert(Key key) {
375 set_.insert(key);
376 if (min_key_ == NoKey || key < min_key_)
377 min_key_ = key;
378 if (max_key_ == NoKey || max_key_ < key)
379 max_key_ = key;
380 }
381
Clear()382 void Clear() {
383 set_.clear();
384 min_key_ = max_key_ = NoKey;
385 }
386
Find(Key key)387 const_iterator Find(Key key) const {
388 if (min_key_ == NoKey ||
389 key < min_key_ || max_key_ < key)
390 return set_.end();
391 else
392 return set_.find(key);
393 }
394
Begin()395 const_iterator Begin() const { return set_.begin(); }
396
End()397 const_iterator End() const { return set_.end(); }
398
399 private:
400 set<Key> set_;
401 Key min_key_;
402 Key max_key_;
403
404 void operator=(const CompactSet<Key, NoKey> &); //disallow
405 };
406
407 } // namespace fst
408
409 #endif // FST_LIB_UTIL_H__
410