1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_TOKENIZATION_TOKENIZER_FACTORY_H_ 16 #define ICING_TOKENIZATION_TOKENIZER_FACTORY_H_ 17 18 #include <memory> 19 20 #include "icing/text_classifier/lib3/utils/base/statusor.h" 21 #include "icing/absl_ports/canonical_errors.h" 22 #include "icing/proto/schema.pb.h" 23 #include "icing/tokenization/language-segmenter.h" 24 #include "icing/tokenization/tokenizer.h" 25 26 namespace icing { 27 namespace lib { 28 29 namespace tokenizer_factory { 30 31 // Factory function to create an indexing Tokenizer which does not take 32 // ownership of any input components, and all pointers must refer to valid 33 // objects that outlive the created Tokenizer instance. 34 // 35 // Returns: 36 // A tokenizer on success 37 // FAILED_PRECONDITION on any null pointer input 38 // INVALID_ARGUMENT if tokenizer type is invalid 39 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>> 40 CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type, 41 const LanguageSegmenter* lang_segmenter); 42 43 // All the supported query tokenizer types 44 enum QueryTokenizerType { RAW_QUERY = 0 }; 45 46 // Factory function to create a query Tokenizer which does not take ownership of 47 // any input components, and all pointers must refer to valid objects that 48 // outlive the created Tokenizer instance. 49 // 50 // Returns: 51 // A tokenizer on success 52 // FAILED_PRECONDITION on any null pointer input 53 // INVALID_ARGUMENT if tokenizer type is invalid 54 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>> CreateQueryTokenizer( 55 QueryTokenizerType query_tokenizer_type, 56 const LanguageSegmenter* lang_segmenter); 57 58 } // namespace tokenizer_factory 59 60 } // namespace lib 61 } // namespace icing 62 63 #endif // ICING_TOKENIZATION_TOKENIZER_FACTORY_H_ 64