1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "lang_id/common/fel/fel-parser.h"
18
19 #include <ctype.h>
20 #include <string>
21
22 #include "lang_id/common/lite_base/logging.h"
23 #include "lang_id/common/lite_strings/numbers.h"
24
25 namespace libtextclassifier3 {
26 namespace mobile {
27
28 namespace {
IsValidCharAtStartOfIdentifier(char c)29 inline bool IsValidCharAtStartOfIdentifier(char c) {
30 return isalpha(c) || (c == '_') || (c == '/');
31 }
32
33 // Returns true iff character c can appear inside an identifier.
IsValidCharInsideIdentifier(char c)34 inline bool IsValidCharInsideIdentifier(char c) {
35 return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
36 }
37
38 // Returns true iff character c can appear at the beginning of a number.
IsValidCharAtStartOfNumber(char c)39 inline bool IsValidCharAtStartOfNumber(char c) {
40 return isdigit(c) || (c == '+') || (c == '-');
41 }
42
43 // Returns true iff character c can appear inside a number.
IsValidCharInsideNumber(char c)44 inline bool IsValidCharInsideNumber(char c) {
45 return isdigit(c) || (c == '.');
46 }
47 } // namespace
48
Initialize(const string & source)49 bool FELParser::Initialize(const string &source) {
50 // Initialize parser state.
51 source_ = source;
52 current_ = source_.begin();
53 item_start_ = line_start_ = current_;
54 line_number_ = item_line_number_ = 1;
55
56 // Read first input item.
57 return NextItem();
58 }
59
ReportError(const string & error_message)60 void FELParser::ReportError(const string &error_message) {
61 const int position = item_start_ - line_start_ + 1;
62 const string line(line_start_, current_);
63
64 SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
65 << ", position " << position << ": " << error_message
66 << "\n " << line << " <--HERE";
67 }
68
Next()69 void FELParser::Next() {
70 // Move to the next input character. If we are at a line break update line
71 // number and line start position.
72 if (CurrentChar() == '\n') {
73 ++line_number_;
74 ++current_;
75 line_start_ = current_;
76 } else {
77 ++current_;
78 }
79 }
80
NextItem()81 bool FELParser::NextItem() {
82 // Skip white space and comments.
83 while (!eos()) {
84 if (CurrentChar() == '#') {
85 // Skip comment.
86 while (!eos() && CurrentChar() != '\n') Next();
87 } else if (isspace(CurrentChar())) {
88 // Skip whitespace.
89 while (!eos() && isspace(CurrentChar())) Next();
90 } else {
91 break;
92 }
93 }
94
95 // Record start position for next item.
96 item_start_ = current_;
97 item_line_number_ = line_number_;
98
99 // Check for end of input.
100 if (eos()) {
101 item_type_ = END;
102 return true;
103 }
104
105 // Parse number.
106 if (IsValidCharAtStartOfNumber(CurrentChar())) {
107 string::iterator start = current_;
108 Next();
109 while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
110 item_text_.assign(start, current_);
111 item_type_ = NUMBER;
112 return true;
113 }
114
115 // Parse string.
116 if (CurrentChar() == '"') {
117 Next();
118 string::iterator start = current_;
119 while (CurrentChar() != '"') {
120 if (eos()) {
121 ReportError("Unterminated string");
122 return false;
123 }
124 Next();
125 }
126 item_text_.assign(start, current_);
127 item_type_ = STRING;
128 Next();
129 return true;
130 }
131
132 // Parse identifier name.
133 if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
134 string::iterator start = current_;
135 while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
136 Next();
137 }
138 item_text_.assign(start, current_);
139 item_type_ = NAME;
140 return true;
141 }
142
143 // Single character item.
144 item_type_ = CurrentChar();
145 Next();
146 return true;
147 }
148
Parse(const string & source,FeatureExtractorDescriptor * result)149 bool FELParser::Parse(const string &source,
150 FeatureExtractorDescriptor *result) {
151 // Initialize parser.
152 if (!Initialize(source)) {
153 return false;
154 }
155
156 while (item_type_ != END) {
157 // Current item should be a feature name.
158 if (item_type_ != NAME) {
159 ReportError("Feature type name expected");
160 return false;
161 }
162 string name = item_text_;
163 if (!NextItem()) {
164 return false;
165 }
166
167 if (item_type_ == '=') {
168 ReportError("Invalid syntax: feature expected");
169 return false;
170 } else {
171 // Parse feature.
172 FeatureFunctionDescriptor *descriptor = result->add_feature();
173 descriptor->set_type(name);
174 if (!ParseFeature(descriptor)) {
175 return false;
176 }
177 }
178 }
179
180 return true;
181 }
182
ParseFeature(FeatureFunctionDescriptor * result)183 bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
184 // Parse argument and parameters.
185 if (item_type_ == '(') {
186 if (!NextItem()) return false;
187 if (!ParseParameter(result)) return false;
188 while (item_type_ == ',') {
189 if (!NextItem()) return false;
190 if (!ParseParameter(result)) return false;
191 }
192
193 if (item_type_ != ')') {
194 ReportError(") expected");
195 return false;
196 }
197 if (!NextItem()) return false;
198 }
199
200 // Parse feature name.
201 if (item_type_ == ':') {
202 if (!NextItem()) return false;
203 if (item_type_ != NAME && item_type_ != STRING) {
204 ReportError("Feature name expected");
205 return false;
206 }
207 string name = item_text_;
208 if (!NextItem()) return false;
209
210 // Set feature name.
211 result->set_name(name);
212 }
213
214 // Parse sub-features.
215 if (item_type_ == '.') {
216 // Parse dotted sub-feature.
217 if (!NextItem()) return false;
218 if (item_type_ != NAME) {
219 ReportError("Feature type name expected");
220 return false;
221 }
222 string type = item_text_;
223 if (!NextItem()) return false;
224
225 // Parse sub-feature.
226 FeatureFunctionDescriptor *subfeature = result->add_feature();
227 subfeature->set_type(type);
228 if (!ParseFeature(subfeature)) return false;
229 } else if (item_type_ == '{') {
230 // Parse sub-feature block.
231 if (!NextItem()) return false;
232 while (item_type_ != '}') {
233 if (item_type_ != NAME) {
234 ReportError("Feature type name expected");
235 return false;
236 }
237 string type = item_text_;
238 if (!NextItem()) return false;
239
240 // Parse sub-feature.
241 FeatureFunctionDescriptor *subfeature = result->add_feature();
242 subfeature->set_type(type);
243 if (!ParseFeature(subfeature)) return false;
244 }
245 if (!NextItem()) return false;
246 }
247 return true;
248 }
249
ParseParameter(FeatureFunctionDescriptor * result)250 bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
251 if (item_type_ == NUMBER) {
252 int argument;
253 if (!LiteAtoi(item_text_, &argument)) {
254 ReportError("Unable to parse number");
255 return false;
256 }
257 if (!NextItem()) return false;
258
259 // Set default argument for feature.
260 result->set_argument(argument);
261 } else if (item_type_ == NAME) {
262 string name = item_text_;
263 if (!NextItem()) return false;
264 if (item_type_ != '=') {
265 ReportError("= expected");
266 return false;
267 }
268 if (!NextItem()) return false;
269 if (item_type_ >= END) {
270 ReportError("Parameter value expected");
271 return false;
272 }
273 string value = item_text_;
274 if (!NextItem()) return false;
275
276 // Add parameter to feature.
277 Parameter *parameter;
278 parameter = result->add_parameter();
279 parameter->set_name(name);
280 parameter->set_value(value);
281 } else {
282 ReportError("Syntax error in parameter list");
283 return false;
284 }
285 return true;
286 }
287
288 } // namespace mobile
289 } // namespace nlp_saft
290