• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "common/fml-parser.h"
18 
19 #include <ctype.h>
20 #include <string>
21 
22 #include "util/base/logging.h"
23 #include "util/strings/numbers.h"
24 
25 namespace libtextclassifier {
26 namespace nlp_core {
27 
28 namespace {
IsValidCharAtStartOfIdentifier(char c)29 inline bool IsValidCharAtStartOfIdentifier(char c) {
30   return isalpha(c) || (c == '_') || (c == '/');
31 }
32 
33 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)34 inline bool IsValidCharInsideIdentifier(char c) {
35   return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
36 }
37 
38 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)39 inline bool IsValidCharAtStartOfNumber(char c) {
40   return isdigit(c) || (c == '+') || (c == '-');
41 }
42 
43 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)44 inline bool IsValidCharInsideNumber(char c) {
45   return isdigit(c) || (c == '.');
46 }
47 }  // namespace
48 
Initialize(const std::string & source)49 bool FMLParser::Initialize(const std::string &source) {
50   // Initialize parser state.
51   source_ = source;
52   current_ = source_.begin();
53   item_start_ = line_start_ = current_;
54   line_number_ = item_line_number_ = 1;
55 
56   // Read first input item.
57   return NextItem();
58 }
59 
ReportError(const std::string & error_message)60 void FMLParser::ReportError(const std::string &error_message) {
61   const int position = item_start_ - line_start_ + 1;
62   const std::string line(line_start_, current_);
63 
64   TC_LOG(ERROR) << "Error in feature model, line " << item_line_number_
65                 << ", position " << position << ": " << error_message
66                 << "\n    " << line << " <--HERE";
67 }
68 
Next()69 void FMLParser::Next() {
70   // Move to the next input character. If we are at a line break update line
71   // number and line start position.
72   if (CurrentChar() == '\n') {
73     ++line_number_;
74     ++current_;
75     line_start_ = current_;
76   } else {
77     ++current_;
78   }
79 }
80 
NextItem()81 bool FMLParser::NextItem() {
82   // Skip white space and comments.
83   while (!eos()) {
84     if (CurrentChar() == '#') {
85       // Skip comment.
86       while (!eos() && CurrentChar() != '\n') Next();
87     } else if (isspace(CurrentChar())) {
88       // Skip whitespace.
89       while (!eos() && isspace(CurrentChar())) Next();
90     } else {
91       break;
92     }
93   }
94 
95   // Record start position for next item.
96   item_start_ = current_;
97   item_line_number_ = line_number_;
98 
99   // Check for end of input.
100   if (eos()) {
101     item_type_ = END;
102     return true;
103   }
104 
105   // Parse number.
106   if (IsValidCharAtStartOfNumber(CurrentChar())) {
107     std::string::iterator start = current_;
108     Next();
109     while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
110     item_text_.assign(start, current_);
111     item_type_ = NUMBER;
112     return true;
113   }
114 
115   // Parse std::string.
116   if (CurrentChar() == '"') {
117     Next();
118     std::string::iterator start = current_;
119     while (CurrentChar() != '"') {
120       if (eos()) {
121         ReportError("Unterminated string");
122         return false;
123       }
124       Next();
125     }
126     item_text_.assign(start, current_);
127     item_type_ = STRING;
128     Next();
129     return true;
130   }
131 
132   // Parse identifier name.
133   if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
134     std::string::iterator start = current_;
135     while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
136       Next();
137     }
138     item_text_.assign(start, current_);
139     item_type_ = NAME;
140     return true;
141   }
142 
143   // Single character item.
144   item_type_ = CurrentChar();
145   Next();
146   return true;
147 }
148 
Parse(const std::string & source,FeatureExtractorDescriptor * result)149 bool FMLParser::Parse(const std::string &source,
150                       FeatureExtractorDescriptor *result) {
151   // Initialize parser.
152   if (!Initialize(source)) {
153     return false;
154   }
155 
156   while (item_type_ != END) {
157     // Current item should be a feature name.
158     if (item_type_ != NAME) {
159       ReportError("Feature type name expected");
160       return false;
161     }
162     std::string name = item_text_;
163     if (!NextItem()) {
164       return false;
165     }
166 
167     // Parse feature.
168     FeatureFunctionDescriptor *descriptor = result->add_feature();
169     descriptor->set_type(name);
170     if (!ParseFeature(descriptor)) {
171       return false;
172     }
173   }
174 
175   return true;
176 }
177 
ParseFeature(FeatureFunctionDescriptor * result)178 bool FMLParser::ParseFeature(FeatureFunctionDescriptor *result) {
179   // Parse argument and parameters.
180   if (item_type_ == '(') {
181     if (!NextItem()) return false;
182     if (!ParseParameter(result)) return false;
183     while (item_type_ == ',') {
184       if (!NextItem()) return false;
185       if (!ParseParameter(result)) return false;
186     }
187 
188     if (item_type_ != ')') {
189       ReportError(") expected");
190       return false;
191     }
192     if (!NextItem()) return false;
193   }
194 
195   // Parse feature name.
196   if (item_type_ == ':') {
197     if (!NextItem()) return false;
198     if (item_type_ != NAME && item_type_ != STRING) {
199       ReportError("Feature name expected");
200       return false;
201     }
202     std::string name = item_text_;
203     if (!NextItem()) return false;
204 
205     // Set feature name.
206     result->set_name(name);
207   }
208 
209   // Parse sub-features.
210   if (item_type_ == '.') {
211     // Parse dotted sub-feature.
212     if (!NextItem()) return false;
213     if (item_type_ != NAME) {
214       ReportError("Feature type name expected");
215       return false;
216     }
217     std::string type = item_text_;
218     if (!NextItem()) return false;
219 
220     // Parse sub-feature.
221     FeatureFunctionDescriptor *subfeature = result->add_feature();
222     subfeature->set_type(type);
223     if (!ParseFeature(subfeature)) return false;
224   } else if (item_type_ == '{') {
225     // Parse sub-feature block.
226     if (!NextItem()) return false;
227     while (item_type_ != '}') {
228       if (item_type_ != NAME) {
229         ReportError("Feature type name expected");
230         return false;
231       }
232       std::string type = item_text_;
233       if (!NextItem()) return false;
234 
235       // Parse sub-feature.
236       FeatureFunctionDescriptor *subfeature = result->add_feature();
237       subfeature->set_type(type);
238       if (!ParseFeature(subfeature)) return false;
239     }
240     if (!NextItem()) return false;
241   }
242   return true;
243 }
244 
ParseParameter(FeatureFunctionDescriptor * result)245 bool FMLParser::ParseParameter(FeatureFunctionDescriptor *result) {
246   if (item_type_ == NUMBER) {
247     int32 argument;
248     if (!ParseInt32(item_text_.c_str(), &argument)) {
249       ReportError("Unable to parse number");
250       return false;
251     }
252     if (!NextItem()) return false;
253 
254     // Set default argument for feature.
255     result->set_argument(argument);
256   } else if (item_type_ == NAME) {
257     std::string name = item_text_;
258     if (!NextItem()) return false;
259     if (item_type_ != '=') {
260       ReportError("= expected");
261       return false;
262     }
263     if (!NextItem()) return false;
264     if (item_type_ >= END) {
265       ReportError("Parameter value expected");
266       return false;
267     }
268     std::string value = item_text_;
269     if (!NextItem()) return false;
270 
271     // Add parameter to feature.
272     Parameter *parameter;
273     parameter = result->add_parameter();
274     parameter->set_name(name);
275     parameter->set_value(value);
276   } else {
277     ReportError("Syntax error in parameter list");
278     return false;
279   }
280   return true;
281 }
282 
ToFMLFunction(const FeatureFunctionDescriptor & function,std::string * output)283 void ToFMLFunction(const FeatureFunctionDescriptor &function,
284                    std::string *output) {
285   output->append(function.type());
286   if (function.argument() != 0 || function.parameter_size() > 0) {
287     output->append("(");
288     bool first = true;
289     if (function.argument() != 0) {
290       output->append(IntToString(function.argument()));
291       first = false;
292     }
293     for (int i = 0; i < function.parameter_size(); ++i) {
294       if (!first) output->append(",");
295       output->append(function.parameter(i).name());
296       output->append("=");
297       output->append("\"");
298       output->append(function.parameter(i).value());
299       output->append("\"");
300       first = false;
301     }
302     output->append(")");
303   }
304 }
305 
ToFML(const FeatureFunctionDescriptor & function,std::string * output)306 void ToFML(const FeatureFunctionDescriptor &function, std::string *output) {
307   ToFMLFunction(function, output);
308   if (function.feature_size() == 1) {
309     output->append(".");
310     ToFML(function.feature(0), output);
311   } else if (function.feature_size() > 1) {
312     output->append(" { ");
313     for (int i = 0; i < function.feature_size(); ++i) {
314       if (i > 0) output->append(" ");
315       ToFML(function.feature(i), output);
316     }
317     output->append(" } ");
318   }
319 }
320 
ToFML(const FeatureExtractorDescriptor & extractor,std::string * output)321 void ToFML(const FeatureExtractorDescriptor &extractor, std::string *output) {
322   for (int i = 0; i < extractor.feature_size(); ++i) {
323     ToFML(extractor.feature(i), output);
324     output->append("\n");
325   }
326 }
327 
328 }  // namespace nlp_core
329 }  // namespace libtextclassifier
330