1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "common/fml-parser.h"
18
19 #include <ctype.h>
20 #include <string>
21
22 #include "util/base/logging.h"
23 #include "util/strings/numbers.h"
24
25 namespace libtextclassifier {
26 namespace nlp_core {
27
28 namespace {
IsValidCharAtStartOfIdentifier(char c)29 inline bool IsValidCharAtStartOfIdentifier(char c) {
30 return isalpha(c) || (c == '_') || (c == '/');
31 }
32
33 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)34 inline bool IsValidCharInsideIdentifier(char c) {
35 return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
36 }
37
38 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)39 inline bool IsValidCharAtStartOfNumber(char c) {
40 return isdigit(c) || (c == '+') || (c == '-');
41 }
42
43 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)44 inline bool IsValidCharInsideNumber(char c) {
45 return isdigit(c) || (c == '.');
46 }
47 } // namespace
48
Initialize(const std::string & source)49 bool FMLParser::Initialize(const std::string &source) {
50 // Initialize parser state.
51 source_ = source;
52 current_ = source_.begin();
53 item_start_ = line_start_ = current_;
54 line_number_ = item_line_number_ = 1;
55
56 // Read first input item.
57 return NextItem();
58 }
59
ReportError(const std::string & error_message)60 void FMLParser::ReportError(const std::string &error_message) {
61 const int position = item_start_ - line_start_ + 1;
62 const std::string line(line_start_, current_);
63
64 TC_LOG(ERROR) << "Error in feature model, line " << item_line_number_
65 << ", position " << position << ": " << error_message
66 << "\n " << line << " <--HERE";
67 }
68
Next()69 void FMLParser::Next() {
70 // Move to the next input character. If we are at a line break update line
71 // number and line start position.
72 if (CurrentChar() == '\n') {
73 ++line_number_;
74 ++current_;
75 line_start_ = current_;
76 } else {
77 ++current_;
78 }
79 }
80
NextItem()81 bool FMLParser::NextItem() {
82 // Skip white space and comments.
83 while (!eos()) {
84 if (CurrentChar() == '#') {
85 // Skip comment.
86 while (!eos() && CurrentChar() != '\n') Next();
87 } else if (isspace(CurrentChar())) {
88 // Skip whitespace.
89 while (!eos() && isspace(CurrentChar())) Next();
90 } else {
91 break;
92 }
93 }
94
95 // Record start position for next item.
96 item_start_ = current_;
97 item_line_number_ = line_number_;
98
99 // Check for end of input.
100 if (eos()) {
101 item_type_ = END;
102 return true;
103 }
104
105 // Parse number.
106 if (IsValidCharAtStartOfNumber(CurrentChar())) {
107 std::string::iterator start = current_;
108 Next();
109 while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
110 item_text_.assign(start, current_);
111 item_type_ = NUMBER;
112 return true;
113 }
114
115 // Parse std::string.
116 if (CurrentChar() == '"') {
117 Next();
118 std::string::iterator start = current_;
119 while (CurrentChar() != '"') {
120 if (eos()) {
121 ReportError("Unterminated string");
122 return false;
123 }
124 Next();
125 }
126 item_text_.assign(start, current_);
127 item_type_ = STRING;
128 Next();
129 return true;
130 }
131
132 // Parse identifier name.
133 if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
134 std::string::iterator start = current_;
135 while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
136 Next();
137 }
138 item_text_.assign(start, current_);
139 item_type_ = NAME;
140 return true;
141 }
142
143 // Single character item.
144 item_type_ = CurrentChar();
145 Next();
146 return true;
147 }
148
Parse(const std::string & source,FeatureExtractorDescriptor * result)149 bool FMLParser::Parse(const std::string &source,
150 FeatureExtractorDescriptor *result) {
151 // Initialize parser.
152 if (!Initialize(source)) {
153 return false;
154 }
155
156 while (item_type_ != END) {
157 // Current item should be a feature name.
158 if (item_type_ != NAME) {
159 ReportError("Feature type name expected");
160 return false;
161 }
162 std::string name = item_text_;
163 if (!NextItem()) {
164 return false;
165 }
166
167 // Parse feature.
168 FeatureFunctionDescriptor *descriptor = result->add_feature();
169 descriptor->set_type(name);
170 if (!ParseFeature(descriptor)) {
171 return false;
172 }
173 }
174
175 return true;
176 }
177
ParseFeature(FeatureFunctionDescriptor * result)178 bool FMLParser::ParseFeature(FeatureFunctionDescriptor *result) {
179 // Parse argument and parameters.
180 if (item_type_ == '(') {
181 if (!NextItem()) return false;
182 if (!ParseParameter(result)) return false;
183 while (item_type_ == ',') {
184 if (!NextItem()) return false;
185 if (!ParseParameter(result)) return false;
186 }
187
188 if (item_type_ != ')') {
189 ReportError(") expected");
190 return false;
191 }
192 if (!NextItem()) return false;
193 }
194
195 // Parse feature name.
196 if (item_type_ == ':') {
197 if (!NextItem()) return false;
198 if (item_type_ != NAME && item_type_ != STRING) {
199 ReportError("Feature name expected");
200 return false;
201 }
202 std::string name = item_text_;
203 if (!NextItem()) return false;
204
205 // Set feature name.
206 result->set_name(name);
207 }
208
209 // Parse sub-features.
210 if (item_type_ == '.') {
211 // Parse dotted sub-feature.
212 if (!NextItem()) return false;
213 if (item_type_ != NAME) {
214 ReportError("Feature type name expected");
215 return false;
216 }
217 std::string type = item_text_;
218 if (!NextItem()) return false;
219
220 // Parse sub-feature.
221 FeatureFunctionDescriptor *subfeature = result->add_feature();
222 subfeature->set_type(type);
223 if (!ParseFeature(subfeature)) return false;
224 } else if (item_type_ == '{') {
225 // Parse sub-feature block.
226 if (!NextItem()) return false;
227 while (item_type_ != '}') {
228 if (item_type_ != NAME) {
229 ReportError("Feature type name expected");
230 return false;
231 }
232 std::string type = item_text_;
233 if (!NextItem()) return false;
234
235 // Parse sub-feature.
236 FeatureFunctionDescriptor *subfeature = result->add_feature();
237 subfeature->set_type(type);
238 if (!ParseFeature(subfeature)) return false;
239 }
240 if (!NextItem()) return false;
241 }
242 return true;
243 }
244
ParseParameter(FeatureFunctionDescriptor * result)245 bool FMLParser::ParseParameter(FeatureFunctionDescriptor *result) {
246 if (item_type_ == NUMBER) {
247 int32 argument;
248 if (!ParseInt32(item_text_.c_str(), &argument)) {
249 ReportError("Unable to parse number");
250 return false;
251 }
252 if (!NextItem()) return false;
253
254 // Set default argument for feature.
255 result->set_argument(argument);
256 } else if (item_type_ == NAME) {
257 std::string name = item_text_;
258 if (!NextItem()) return false;
259 if (item_type_ != '=') {
260 ReportError("= expected");
261 return false;
262 }
263 if (!NextItem()) return false;
264 if (item_type_ >= END) {
265 ReportError("Parameter value expected");
266 return false;
267 }
268 std::string value = item_text_;
269 if (!NextItem()) return false;
270
271 // Add parameter to feature.
272 Parameter *parameter;
273 parameter = result->add_parameter();
274 parameter->set_name(name);
275 parameter->set_value(value);
276 } else {
277 ReportError("Syntax error in parameter list");
278 return false;
279 }
280 return true;
281 }
282
ToFMLFunction(const FeatureFunctionDescriptor & function,std::string * output)283 void ToFMLFunction(const FeatureFunctionDescriptor &function,
284 std::string *output) {
285 output->append(function.type());
286 if (function.argument() != 0 || function.parameter_size() > 0) {
287 output->append("(");
288 bool first = true;
289 if (function.argument() != 0) {
290 output->append(IntToString(function.argument()));
291 first = false;
292 }
293 for (int i = 0; i < function.parameter_size(); ++i) {
294 if (!first) output->append(",");
295 output->append(function.parameter(i).name());
296 output->append("=");
297 output->append("\"");
298 output->append(function.parameter(i).value());
299 output->append("\"");
300 first = false;
301 }
302 output->append(")");
303 }
304 }
305
ToFML(const FeatureFunctionDescriptor & function,std::string * output)306 void ToFML(const FeatureFunctionDescriptor &function, std::string *output) {
307 ToFMLFunction(function, output);
308 if (function.feature_size() == 1) {
309 output->append(".");
310 ToFML(function.feature(0), output);
311 } else if (function.feature_size() > 1) {
312 output->append(" { ");
313 for (int i = 0; i < function.feature_size(); ++i) {
314 if (i > 0) output->append(" ");
315 ToFML(function.feature(i), output);
316 }
317 output->append(" } ");
318 }
319 }
320
ToFML(const FeatureExtractorDescriptor & extractor,std::string * output)321 void ToFML(const FeatureExtractorDescriptor &extractor, std::string *output) {
322 for (int i = 0; i < extractor.feature_size(); ++i) {
323 ToFML(extractor.feature(i), output);
324 output->append("\n");
325 }
326 }
327
328 } // namespace nlp_core
329 } // namespace libtextclassifier
330