1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "icing/tokenization/tokenizer-factory.h" 16 17 #include <memory> 18 19 #include "icing/text_classifier/lib3/utils/base/statusor.h" 20 #include "icing/absl_ports/canonical_errors.h" 21 #include "icing/proto/schema.pb.h" 22 #include "icing/tokenization/language-segmenter.h" 23 #include "icing/tokenization/plain-tokenizer.h" 24 #include "icing/tokenization/raw-query-tokenizer.h" 25 #include "icing/tokenization/rfc822-tokenizer.h" 26 #include "icing/tokenization/tokenizer.h" 27 28 #ifdef ENABLE_URL_TOKENIZER 29 #include "icing/tokenization/url-tokenizer.h" 30 #endif // ENABLE_URL_TOKENIZER 31 32 #include "icing/tokenization/verbatim-tokenizer.h" 33 #include "icing/util/status-macros.h" 34 35 namespace icing { 36 namespace lib { 37 38 namespace tokenizer_factory { 39 40 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>> CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,const LanguageSegmenter * lang_segmenter)41CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type, 42 const LanguageSegmenter* lang_segmenter) { 43 ICING_RETURN_ERROR_IF_NULL(lang_segmenter); 44 45 switch (type) { 46 case StringIndexingConfig::TokenizerType::PLAIN: 47 return std::make_unique<PlainTokenizer>(lang_segmenter); 48 case StringIndexingConfig::TokenizerType::VERBATIM: 49 return std::make_unique<VerbatimTokenizer>(); 50 case StringIndexingConfig::TokenizerType::RFC822: 51 return std::make_unique<Rfc822Tokenizer>(); 52 // TODO (b/246964044): remove ifdef guard when url-tokenizer is ready for export 53 // to Android. 54 #ifdef ENABLE_URL_TOKENIZER 55 case StringIndexingConfig::TokenizerType::URL: 56 return std::make_unique<UrlTokenizer>(); 57 #endif // ENABLE_URL_TOKENIZER 58 case StringIndexingConfig::TokenizerType::NONE: 59 [[fallthrough]]; 60 default: 61 // This should never happen. 62 return absl_ports::InvalidArgumentError( 63 "Invalid tokenizer type for an indexed section"); 64 } 65 } 66 CreateQueryTokenizer(QueryTokenizerType query_tokenizer_type,const LanguageSegmenter * lang_segmenter)67libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>> CreateQueryTokenizer( 68 QueryTokenizerType query_tokenizer_type, 69 const LanguageSegmenter* lang_segmenter) { 70 ICING_RETURN_ERROR_IF_NULL(lang_segmenter); 71 72 switch (query_tokenizer_type) { 73 case RAW_QUERY: 74 return std::make_unique<RawQueryTokenizer>(lang_segmenter); 75 default: 76 // This should never happen. 77 return absl_ports::InvalidArgumentError( 78 "Invalid tokenizer type for query"); 79 } 80 } 81 82 } // namespace tokenizer_factory 83 84 } // namespace lib 85 } // namespace icing 86