1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_GRAMMAR_GRAMMAR_ANNOTATOR_H_ 18 #define LIBTEXTCLASSIFIER_ANNOTATOR_GRAMMAR_GRAMMAR_ANNOTATOR_H_ 19 20 #include <vector> 21 22 #include "annotator/model_generated.h" 23 #include "annotator/types.h" 24 #include "utils/flatbuffers/mutable.h" 25 #include "utils/grammar/analyzer.h" 26 #include "utils/grammar/evaluated-derivation.h" 27 #include "utils/grammar/text-context.h" 28 #include "utils/i18n/locale.h" 29 #include "utils/tokenizer.h" 30 #include "utils/utf8/unicodetext.h" 31 #include "utils/utf8/unilib.h" 32 33 namespace libtextclassifier3 { 34 35 // Grammar backed annotator. 36 class GrammarAnnotator { 37 public: 38 explicit GrammarAnnotator( 39 const UniLib* unilib, const GrammarModel* model, 40 const MutableFlatbufferBuilder* entity_data_builder); 41 42 // Annotates a given text. 43 // Returns true if the text was successfully annotated. 44 bool Annotate(const std::vector<Locale>& locales, const UnicodeText& text, 45 std::vector<AnnotatedSpan>* result) const; 46 47 // Classifies a span in a text. 48 // Returns true if the span was classified by a grammar rule. 49 bool ClassifyText(const std::vector<Locale>& locales, const UnicodeText& text, 50 const CodepointSpan& selection, 51 ClassificationResult* classification_result) const; 52 53 // Suggests text selections in a text. 54 // Returns true if a span was suggested by a grammar rule. 55 bool SuggestSelection(const std::vector<Locale>& locales, 56 const UnicodeText& text, const CodepointSpan& selection, 57 AnnotatedSpan* result) const; 58 59 private: 60 // Filters out derivations that do not overlap with a reference span. 61 std::vector<grammar::Derivation> OverlappingDerivations( 62 const CodepointSpan& selection, 63 const std::vector<grammar::Derivation>& derivations, 64 const bool only_exact_overlap) const; 65 66 // Fills out an annotated span from a grammar match result. 67 bool InstantiateAnnotatedSpanFromDerivation( 68 const grammar::TextContext& input_context, 69 const grammar::ParseTree* parse_tree, 70 const GrammarModel_::RuleClassificationResult* interpretation, 71 AnnotatedSpan* result) const; 72 73 // Instantiates a classification result from a rule match. 74 bool InstantiateClassificationFromDerivation( 75 const grammar::TextContext& input_context, 76 const grammar::ParseTree* parse_tree, 77 const GrammarModel_::RuleClassificationResult* interpretation, 78 ClassificationResult* classification) const; 79 80 const UniLib& unilib_; 81 const GrammarModel* model_; 82 const Tokenizer tokenizer_; 83 const MutableFlatbufferBuilder* entity_data_builder_; 84 const grammar::Analyzer analyzer_; 85 }; 86 87 } // namespace libtextclassifier3 88 89 #endif // LIBTEXTCLASSIFIER_ANNOTATOR_GRAMMAR_GRAMMAR_ANNOTATOR_H_ 90