1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "lang_id/common/fel/fel-parser.h"
18
19 #include <ctype.h>
20
21 #include <string>
22
23 #include "lang_id/common/lite_base/logging.h"
24 #include "lang_id/common/lite_strings/numbers.h"
25
26 namespace libtextclassifier3 {
27 namespace mobile {
28
29 namespace {
IsValidCharAtStartOfIdentifier(char c)30 inline bool IsValidCharAtStartOfIdentifier(char c) {
31 return isalpha(c) || (c == '_') || (c == '/');
32 }
33
34 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)35 inline bool IsValidCharInsideIdentifier(char c) {
36 return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
37 }
38
39 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)40 inline bool IsValidCharAtStartOfNumber(char c) {
41 return isdigit(c) || (c == '+') || (c == '-');
42 }
43
44 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)45 inline bool IsValidCharInsideNumber(char c) {
46 return isdigit(c) || (c == '.');
47 }
48 } // namespace
49
Initialize(const std::string & source)50 bool FELParser::Initialize(const std::string &source) {
51 // Initialize parser state.
52 source_ = source;
53 current_ = source_.begin();
54 item_start_ = line_start_ = current_;
55 line_number_ = item_line_number_ = 1;
56
57 // Read first input item.
58 return NextItem();
59 }
60
ReportError(const std::string & error_message)61 void FELParser::ReportError(const std::string &error_message) {
62 const int position = item_start_ - line_start_ + 1;
63 const std::string line(line_start_, current_);
64
65 SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
66 << ", position " << position << ": " << error_message
67 << "\n " << line << " <--HERE";
68 }
69
Next()70 void FELParser::Next() {
71 // Move to the next input character. If we are at a line break update line
72 // number and line start position.
73 if (CurrentChar() == '\n') {
74 ++line_number_;
75 ++current_;
76 line_start_ = current_;
77 } else {
78 ++current_;
79 }
80 }
81
NextItem()82 bool FELParser::NextItem() {
83 // Skip white space and comments.
84 while (!eos()) {
85 if (CurrentChar() == '#') {
86 // Skip comment.
87 while (!eos() && CurrentChar() != '\n') Next();
88 } else if (isspace(CurrentChar())) {
89 // Skip whitespace.
90 while (!eos() && isspace(CurrentChar())) Next();
91 } else {
92 break;
93 }
94 }
95
96 // Record start position for next item.
97 item_start_ = current_;
98 item_line_number_ = line_number_;
99
100 // Check for end of input.
101 if (eos()) {
102 item_type_ = END;
103 return true;
104 }
105
106 // Parse number.
107 if (IsValidCharAtStartOfNumber(CurrentChar())) {
108 std::string::iterator start = current_;
109 Next();
110 while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
111 item_text_.assign(start, current_);
112 item_type_ = NUMBER;
113 return true;
114 }
115
116 // Parse string.
117 if (CurrentChar() == '"') {
118 Next();
119 std::string::iterator start = current_;
120 while (CurrentChar() != '"') {
121 if (eos()) {
122 ReportError("Unterminated string");
123 return false;
124 }
125 Next();
126 }
127 item_text_.assign(start, current_);
128 item_type_ = STRING;
129 Next();
130 return true;
131 }
132
133 // Parse identifier name.
134 if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
135 std::string::iterator start = current_;
136 while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
137 Next();
138 }
139 item_text_.assign(start, current_);
140 item_type_ = NAME;
141 return true;
142 }
143
144 // Single character item.
145 item_type_ = CurrentChar();
146 Next();
147 return true;
148 }
149
Parse(const std::string & source,FeatureExtractorDescriptor * result)150 bool FELParser::Parse(const std::string &source,
151 FeatureExtractorDescriptor *result) {
152 // Initialize parser.
153 if (!Initialize(source)) {
154 return false;
155 }
156
157 while (item_type_ != END) {
158 // Current item should be a feature name.
159 if (item_type_ != NAME) {
160 ReportError("Feature type name expected");
161 return false;
162 }
163 std::string name = item_text_;
164 if (!NextItem()) {
165 return false;
166 }
167
168 if (item_type_ == '=') {
169 ReportError("Invalid syntax: feature expected");
170 return false;
171 } else {
172 // Parse feature.
173 FeatureFunctionDescriptor *descriptor = result->add_feature();
174 descriptor->set_type(name);
175 if (!ParseFeature(descriptor)) {
176 return false;
177 }
178 }
179 }
180
181 return true;
182 }
183
ParseFeature(FeatureFunctionDescriptor * result)184 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
185 // Parse argument and parameters.
186 if (item_type_ == '(') {
187 if (!NextItem()) return false;
188 if (!ParseParameter(result)) return false;
189 while (item_type_ == ',') {
190 if (!NextItem()) return false;
191 if (!ParseParameter(result)) return false;
192 }
193
194 if (item_type_ != ')') {
195 ReportError(") expected");
196 return false;
197 }
198 if (!NextItem()) return false;
199 }
200
201 // Parse feature name.
202 if (item_type_ == ':') {
203 if (!NextItem()) return false;
204 if (item_type_ != NAME && item_type_ != STRING) {
205 ReportError("Feature name expected");
206 return false;
207 }
208 std::string name = item_text_;
209 if (!NextItem()) return false;
210
211 // Set feature name.
212 result->set_name(name);
213 }
214
215 // Parse sub-features.
216 if (item_type_ == '.') {
217 // Parse dotted sub-feature.
218 if (!NextItem()) return false;
219 if (item_type_ != NAME) {
220 ReportError("Feature type name expected");
221 return false;
222 }
223 std::string type = item_text_;
224 if (!NextItem()) return false;
225
226 // Parse sub-feature.
227 FeatureFunctionDescriptor *subfeature = result->add_feature();
228 subfeature->set_type(type);
229 if (!ParseFeature(subfeature)) return false;
230 } else if (item_type_ == '{') {
231 // Parse sub-feature block.
232 if (!NextItem()) return false;
233 while (item_type_ != '}') {
234 if (item_type_ != NAME) {
235 ReportError("Feature type name expected");
236 return false;
237 }
238 std::string type = item_text_;
239 if (!NextItem()) return false;
240
241 // Parse sub-feature.
242 FeatureFunctionDescriptor *subfeature = result->add_feature();
243 subfeature->set_type(type);
244 if (!ParseFeature(subfeature)) return false;
245 }
246 if (!NextItem()) return false;
247 }
248 return true;
249 }
250
ParseParameter(FeatureFunctionDescriptor * result)251 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
252 if (item_type_ == NUMBER) {
253 int argument;
254 if (!LiteAtoi(item_text_, &argument)) {
255 ReportError("Unable to parse number");
256 return false;
257 }
258 if (!NextItem()) return false;
259
260 // Set default argument for feature.
261 result->set_argument(argument);
262 } else if (item_type_ == NAME) {
263 std::string name = item_text_;
264 if (!NextItem()) return false;
265 if (item_type_ != '=') {
266 ReportError("= expected");
267 return false;
268 }
269 if (!NextItem()) return false;
270 if (item_type_ >= END) {
271 ReportError("Parameter value expected");
272 return false;
273 }
274 std::string value = item_text_;
275 if (!NextItem()) return false;
276
277 // Add parameter to feature.
278 Parameter *parameter;
279 parameter = result->add_parameter();
280 parameter->set_name(name);
281 parameter->set_value(value);
282 } else {
283 ReportError("Syntax error in parameter list");
284 return false;
285 }
286 return true;
287 }
288
289 } // namespace mobile
290 } // namespace nlp_saft
291