1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Feature extraction language (FEL) parser. 18 // 19 // BNF grammar for FEL: 20 // 21 // <feature model> ::= { <feature extractor> } 22 // 23 // <feature extractor> ::= <extractor spec> | 24 // <extractor spec> '.' <feature extractor> | 25 // <extractor spec> '{' { <feature extractor> } '}' 26 // 27 // <extractor spec> ::= <extractor type> 28 // [ '(' <parameter list> ')' ] 29 // [ ':' <extractor name> ] 30 // 31 // <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> } 32 // 33 // <parameter> ::= <parameter name> '=' <parameter value> 34 // 35 // <extractor type> ::= NAME 36 // <extractor name> ::= NAME | STRING 37 // <argument> ::= NUMBER 38 // <parameter name> ::= NAME 39 // <parameter value> ::= NUMBER | STRING | NAME 40 41 #ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_ 42 #define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_ 43 44 #include <string> 45 46 #include "lang_id/common/fel/feature-descriptors.h" 47 #include "lang_id/common/lite_base/logging.h" 48 49 namespace libtextclassifier3 { 50 namespace mobile { 51 52 class FELParser { 53 public: 54 // Parses fml specification into feature extractor descriptor. 55 // Returns true on success, false on error (e.g., syntax errors). 56 bool Parse(const std::string &source, FeatureExtractorDescriptor *result); 57 58 private: 59 // Initializes the parser with the source text. 60 // Returns true on success, false on syntax error. 61 bool Initialize(absl::string_view source); 62 63 // Outputs an error message, with context info. 64 void ReportError(const std::string &error_message); 65 66 // Moves to the next input character. 67 void Next(); 68 69 // Moves to the next input item. Sets item_text_ and item_type_ accordingly. 70 // Returns true on success, false on syntax error. 71 bool NextItem(); 72 73 // Parses a feature descriptor. 74 // Returns true on success, false on syntax error. 75 bool ParseFeature(FeatureFunctionDescriptor *result); 76 77 // Parses a parameter specification. 78 // Returns true on success, false on syntax error. 79 bool ParseParameter(FeatureFunctionDescriptor *result); 80 81 // Returns true if end of source input has been reached. eos()82 bool eos() const { return current_ >= source_.end(); } 83 84 // Returns current character. Other methods should access the current 85 // character through this method (instead of using *current_ directly): this 86 // method performs extra safety checks. 87 // 88 // In case of an unsafe access, returns '\0'. CurrentChar()89 char CurrentChar() const { 90 if ((current_ >= source_.begin()) && (current_ < source_.end())) { 91 return *current_; 92 } else { 93 SAFTM_LOG(ERROR) << "Unsafe char read"; 94 return '\0'; 95 } 96 } 97 98 // Item types. 99 enum ItemTypes { 100 END = 0, 101 NAME = -1, 102 NUMBER = -2, 103 STRING = -3, 104 }; 105 106 // Source text. 107 std::string source_; 108 109 // Current input position. 110 std::string::iterator current_; 111 112 // Line number for current input position. 113 int line_number_; 114 115 // Start position for current item. 116 std::string::iterator item_start_; 117 118 // Start position for current line. 119 std::string::iterator line_start_; 120 121 // Line number for current item. 122 int item_line_number_; 123 124 // Item type for current item. If this is positive it is interpreted as a 125 // character. If it is negative it is interpreted as an item type. 126 int item_type_; 127 128 // Text for current item. 129 std::string item_text_; 130 }; 131 132 } // namespace mobile 133 } // namespace nlp_saft 134 135 #endif // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_ 136