1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_UTILS_GRAMMAR_ANALYZER_H_ 18 #define LIBTEXTCLASSIFIER_UTILS_GRAMMAR_ANALYZER_H_ 19 20 #include "utils/base/arena.h" 21 #include "utils/base/statusor.h" 22 #include "utils/grammar/evaluated-derivation.h" 23 #include "utils/grammar/parsing/parser.h" 24 #include "utils/grammar/semantics/composer.h" 25 #include "utils/grammar/text-context.h" 26 #include "utils/i18n/locale.h" 27 #include "utils/tokenizer.h" 28 #include "utils/utf8/unilib.h" 29 30 namespace libtextclassifier3::grammar { 31 32 // An analyzer that parses and semantically evaluates an input text with a 33 // grammar. 34 class Analyzer { 35 public: 36 explicit Analyzer(const UniLib* unilib, const RulesSet* rules_set); 37 explicit Analyzer(const UniLib* unilib, const RulesSet* rules_set, 38 const Tokenizer* tokenizer); 39 40 // Parses and evaluates an input. 41 StatusOr<std::vector<EvaluatedDerivation>> Parse( 42 const TextContext& input, UnsafeArena* arena, 43 bool deduplicate_derivations = true) const; 44 45 StatusOr<std::vector<EvaluatedDerivation>> Parse( 46 const UnicodeText& text, const std::vector<Locale>& locales, 47 UnsafeArena* arena, bool deduplicate_derivations = true) const; 48 49 // Pre-processes an input text for parsing. 50 TextContext BuildTextContextForInput( 51 const UnicodeText& text, const std::vector<Locale>& locales = {}) const; 52 parser()53 const Parser& parser() const { return parser_; } 54 55 private: 56 std::unique_ptr<Tokenizer> owned_tokenizer_; 57 const Tokenizer* tokenizer_; 58 Parser parser_; 59 SemanticComposer semantic_evaluator_; 60 }; 61 62 } // namespace libtextclassifier3::grammar 63 64 #endif // LIBTEXTCLASSIFIER_UTILS_GRAMMAR_ANALYZER_H_ 65