/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Feature modeling language (fml) parser. // // BNF grammar for fml: // // ::= { } // // ::= | // '.' | // '{' { } '}' // // ::= // [ '(' ')' ] // [ ':' ] // // = ( | ) { ',' } // // ::= '=' // // ::= NAME // ::= NAME | STRING // ::= NUMBER // ::= NAME // ::= NUMBER | STRING | NAME #ifndef LIBTEXTCLASSIFIER_COMMON_FML_PARSER_H_ #define LIBTEXTCLASSIFIER_COMMON_FML_PARSER_H_ #include #include #include "common/feature-descriptors.h" #include "util/base/logging.h" namespace libtextclassifier { namespace nlp_core { class FMLParser { public: // Parses fml specification into feature extractor descriptor. // Returns true on success, false on error (e.g., syntax errors). bool Parse(const std::string &source, FeatureExtractorDescriptor *result); private: // Initializes the parser with the source text. // Returns true on success, false on syntax error. bool Initialize(const std::string &source); // Outputs an error message, with context info, and sets error_ to true. void ReportError(const std::string &error_message); // Moves to the next input character. void Next(); // Moves to the next input item. Sets item_text_ and item_type_ accordingly. // Returns true on success, false on syntax error. bool NextItem(); // Parses a feature descriptor. // Returns true on success, false on syntax error. bool ParseFeature(FeatureFunctionDescriptor *result); // Parses a parameter specification. // Returns true on success, false on syntax error. bool ParseParameter(FeatureFunctionDescriptor *result); // Returns true if end of source input has been reached. bool eos() const { return current_ >= source_.end(); } // Returns current character. Other methods should access the current // character through this method (instead of using *current_ directly): this // method performs extra safety checks. // // In case of an unsafe access, returns '\0'. char CurrentChar() const { if ((current_ >= source_.begin()) && (current_ < source_.end())) { return *current_; } else { TC_LOG(ERROR) << "Unsafe char read"; return '\0'; } } // Item types. enum ItemTypes { END = 0, NAME = -1, NUMBER = -2, STRING = -3, }; // Source text. std::string source_; // Current input position. std::string::iterator current_; // Line number for current input position. int line_number_; // Start position for current item. std::string::iterator item_start_; // Start position for current line. std::string::iterator line_start_; // Line number for current item. int item_line_number_; // Item type for current item. If this is positive it is interpreted as a // character. If it is negative it is interpreted as an item type. int item_type_; // Text for current item. std::string item_text_; }; // Converts a FeatureFunctionDescriptor into an FML spec (reverse of parsing). void ToFML(const FeatureFunctionDescriptor &function, std::string *output); // Like ToFML, but doesn't go into the nested functions. Instead, it generates // a string that starts with the name of the feature extraction function and // next, in-between parentheses, the parameters, separated by comma. // Intuitively, the constructed string is the prefix of ToFML, before the "{" // that starts the nested features. void ToFMLFunction(const FeatureFunctionDescriptor &function, std::string *output); } // namespace nlp_core } // namespace libtextclassifier #endif // LIBTEXTCLASSIFIER_COMMON_FML_PARSER_H_