• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // symbol-table.cc
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 //
16 // \file
17 // Classes to provide symbol-to-integer and integer-to-symbol mappings.
18 
19 #include "fst/lib/symbol-table.h"
20 #include "fst/lib/util.h"
21 
22 #include <string.h>
23 
24 DEFINE_bool(fst_compat_symbols, true,
25             "Require symbol tables to match when appropriate");
26 
27 namespace fst {
28 
29 // Maximum line length in textual symbols file.
30 const int kLineLen = 8096;
31 
32 // Identifies stream data as a symbol table (and its endianity)
33 static const int32 kSymbolTableMagicNumber = 2125658996;
34 
ReadText(const string & filename)35 SymbolTableImpl* SymbolTableImpl::ReadText(const string &filename) {
36   ifstream strm(filename.c_str());
37   if (!strm) {
38     LOG(ERROR) << "SymbolTable::ReadText: Can't open symbol file: "
39                << filename;
40     return 0;
41   }
42 
43   SymbolTableImpl* impl = new SymbolTableImpl(filename);
44 
45   int64 nline = 0;
46   char line[kLineLen];
47   while (strm.getline(line, kLineLen)) {
48     ++nline;
49     vector<char *> col;
50     SplitToVector(line, "\n\t ", &col, true);
51     if (col.size() == 0)  // empty line
52       continue;
53     if (col.size() != 2) {
54       LOG(ERROR) << "SymbolTable::ReadText: Bad number of columns (skipping), "
55                  << "file = " << filename << ", line = " << nline;
56       continue;
57     }
58     const char *symbol = col[0];
59     const char *value = col[1];
60     char *p;
61     int64 key = strtoll(value, &p, 10);
62     if (p < value + strlen(value) || key < 0) {
63       LOG(ERROR) << "SymbolTable::ReadText: Bad non-negative integer \""
64                  << value << "\" (skipping), "
65                  << "file = " << filename << ", line = " << nline;
66       continue;
67     }
68     impl->AddSymbol(symbol, key);
69   }
70 
71   return impl;
72 }
73 
RecomputeCheckSum() const74 void SymbolTableImpl::RecomputeCheckSum() const {
75   check_sum_.Reset();
76   for (size_t i = 0; i < symbols_.size(); ++i) {
77     check_sum_.Update(symbols_[i], strlen(symbols_[i])+1);
78   }
79   check_sum_finalized_ = true;
80 }
81 
AddSymbol(const string & symbol,int64 key)82 int64 SymbolTableImpl::AddSymbol(const string& symbol, int64 key) {
83   hash_map<string, int64>::const_iterator it =
84     symbol_map_.find(symbol);
85   if (it == symbol_map_.end()) {  // only add if not in table
86     check_sum_finalized_ = false;
87 
88     char *csymbol = new char[symbol.size() + 1];
89     strcpy(csymbol, symbol.c_str());
90     symbols_.push_back(csymbol);
91     key_map_[key] = csymbol;
92     symbol_map_[csymbol] = key;
93 
94     if (key >= available_key_) {
95       available_key_ = key + 1;
96     }
97   }
98 
99   return key;
100 }
101 
Read(istream & strm,const string & source)102 SymbolTableImpl* SymbolTableImpl::Read(istream &strm,
103                                        const string &source) {
104   int32 magic_number = 0;
105   ReadType(strm, &magic_number);
106   if (magic_number != kSymbolTableMagicNumber) {
107     LOG(ERROR) << "SymbolTable::Read: read failed";
108     return 0;
109   }
110   string name;
111   ReadType(strm, &name);
112   SymbolTableImpl* impl = new SymbolTableImpl(name);
113   ReadType(strm, &impl->available_key_);
114   int64 size;
115   ReadType(strm, &size);
116   string symbol;
117   int64 key = 0;
118   for (size_t i = 0; i < size; ++i) {
119     ReadType(strm, &symbol);
120     ReadType(strm, &key);
121     impl->AddSymbol(symbol, key);
122   }
123   if (!strm)
124     LOG(ERROR) << "SymbolTable::Read: read failed";
125   return impl;
126 }
127 
Write(ostream & strm) const128 bool SymbolTableImpl::Write(ostream &strm) const {
129   WriteType(strm, kSymbolTableMagicNumber);
130   WriteType(strm, name_);
131   WriteType(strm, available_key_);
132   int64 size = symbols_.size();
133   WriteType(strm, size);
134   for (size_t i = 0; i < symbols_.size(); ++i) {
135     const string symbol = symbols_[i];
136     WriteType(strm, symbol);
137     hash_map<string, int64>::const_iterator it = symbol_map_.find(symbol);
138     WriteType(strm, it->second);
139   }
140   strm.flush();
141   if (!strm)
142     LOG(ERROR) << "SymbolTable::Write: write failed";
143   return strm;
144 }
145 
WriteText(ostream & strm) const146 bool SymbolTableImpl::WriteText(ostream &strm) const {
147   for (size_t i = 0; i < symbols_.size(); ++i) {
148     char line[kLineLen];
149     snprintf(line, kLineLen, "%s\t%lld\n", symbols_[i], Find(symbols_[i]));
150     strm.write(line, strlen(line));
151   }
152   strm.flush();
153   if (!strm)
154     LOG(ERROR) << "SymbolTable::WriteText: write failed";
155   return strm;
156 }
157 
158 }  // namespace fst
159