1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Feature modeling language (fml) parser. 18 // 19 // BNF grammar for fml: 20 // 21 // <feature model> ::= { <feature extractor> } 22 // 23 // <feature extractor> ::= <extractor spec> | 24 // <extractor spec> '.' <feature extractor> | 25 // <extractor spec> '{' { <feature extractor> } '}' 26 // 27 // <extractor spec> ::= <extractor type> 28 // [ '(' <parameter list> ')' ] 29 // [ ':' <extractor name> ] 30 // 31 // <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> } 32 // 33 // <parameter> ::= <parameter name> '=' <parameter value> 34 // 35 // <extractor type> ::= NAME 36 // <extractor name> ::= NAME | STRING 37 // <argument> ::= NUMBER 38 // <parameter name> ::= NAME 39 // <parameter value> ::= NUMBER | STRING | NAME 40 41 #ifndef LIBTEXTCLASSIFIER_COMMON_FML_PARSER_H_ 42 #define LIBTEXTCLASSIFIER_COMMON_FML_PARSER_H_ 43 44 #include <string> 45 #include <vector> 46 47 #include "common/feature-descriptors.h" 48 #include "util/base/logging.h" 49 50 namespace libtextclassifier { 51 namespace nlp_core { 52 53 class FMLParser { 54 public: 55 // Parses fml specification into feature extractor descriptor. 56 // Returns true on success, false on error (e.g., syntax errors). 57 bool Parse(const std::string &source, FeatureExtractorDescriptor *result); 58 59 private: 60 // Initializes the parser with the source text. 61 // Returns true on success, false on syntax error. 62 bool Initialize(const std::string &source); 63 64 // Outputs an error message, with context info, and sets error_ to true. 65 void ReportError(const std::string &error_message); 66 67 // Moves to the next input character. 68 void Next(); 69 70 // Moves to the next input item. Sets item_text_ and item_type_ accordingly. 71 // Returns true on success, false on syntax error. 72 bool NextItem(); 73 74 // Parses a feature descriptor. 75 // Returns true on success, false on syntax error. 76 bool ParseFeature(FeatureFunctionDescriptor *result); 77 78 // Parses a parameter specification. 79 // Returns true on success, false on syntax error. 80 bool ParseParameter(FeatureFunctionDescriptor *result); 81 82 // Returns true if end of source input has been reached. eos()83 bool eos() const { return current_ >= source_.end(); } 84 85 // Returns current character. Other methods should access the current 86 // character through this method (instead of using *current_ directly): this 87 // method performs extra safety checks. 88 // 89 // In case of an unsafe access, returns '\0'. CurrentChar()90 char CurrentChar() const { 91 if ((current_ >= source_.begin()) && (current_ < source_.end())) { 92 return *current_; 93 } else { 94 TC_LOG(ERROR) << "Unsafe char read"; 95 return '\0'; 96 } 97 } 98 99 // Item types. 100 enum ItemTypes { 101 END = 0, 102 NAME = -1, 103 NUMBER = -2, 104 STRING = -3, 105 }; 106 107 // Source text. 108 std::string source_; 109 110 // Current input position. 111 std::string::iterator current_; 112 113 // Line number for current input position. 114 int line_number_; 115 116 // Start position for current item. 117 std::string::iterator item_start_; 118 119 // Start position for current line. 120 std::string::iterator line_start_; 121 122 // Line number for current item. 123 int item_line_number_; 124 125 // Item type for current item. If this is positive it is interpreted as a 126 // character. If it is negative it is interpreted as an item type. 127 int item_type_; 128 129 // Text for current item. 130 std::string item_text_; 131 }; 132 133 // Converts a FeatureFunctionDescriptor into an FML spec (reverse of parsing). 134 void ToFML(const FeatureFunctionDescriptor &function, std::string *output); 135 136 // Like ToFML, but doesn't go into the nested functions. Instead, it generates 137 // a string that starts with the name of the feature extraction function and 138 // next, in-between parentheses, the parameters, separated by comma. 139 // Intuitively, the constructed string is the prefix of ToFML, before the "{" 140 // that starts the nested features. 141 void ToFMLFunction(const FeatureFunctionDescriptor &function, 142 std::string *output); 143 144 } // namespace nlp_core 145 } // namespace libtextclassifier 146 147 #endif // LIBTEXTCLASSIFIER_COMMON_FML_PARSER_H_ 148