• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/tokenization/tokenizer-factory.h"
16 
17 #include <memory>
18 
19 #include "icing/text_classifier/lib3/utils/base/statusor.h"
20 #include "icing/absl_ports/canonical_errors.h"
21 #include "icing/proto/schema.pb.h"
22 #include "icing/tokenization/language-segmenter.h"
23 #include "icing/tokenization/plain-tokenizer.h"
24 #include "icing/tokenization/raw-query-tokenizer.h"
25 #include "icing/tokenization/rfc822-tokenizer.h"
26 #include "icing/tokenization/tokenizer.h"
27 
28 #ifdef ENABLE_URL_TOKENIZER
29 #include "icing/tokenization/url-tokenizer.h"
30 #endif  // ENABLE_URL_TOKENIZER
31 
32 #include "icing/tokenization/verbatim-tokenizer.h"
33 #include "icing/util/status-macros.h"
34 
35 namespace icing {
36 namespace lib {
37 
38 namespace tokenizer_factory {
39 
40 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,const LanguageSegmenter * lang_segmenter)41 CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
42                         const LanguageSegmenter* lang_segmenter) {
43   ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
44 
45   switch (type) {
46     case StringIndexingConfig::TokenizerType::PLAIN:
47       return std::make_unique<PlainTokenizer>(lang_segmenter);
48     case StringIndexingConfig::TokenizerType::VERBATIM:
49       return std::make_unique<VerbatimTokenizer>();
50     case StringIndexingConfig::TokenizerType::RFC822:
51       return std::make_unique<Rfc822Tokenizer>();
52 // TODO (b/246964044): remove ifdef guard when url-tokenizer is ready for export
53 // to Android.
54 #ifdef ENABLE_URL_TOKENIZER
55     case StringIndexingConfig::TokenizerType::URL:
56       return std::make_unique<UrlTokenizer>();
57 #endif  // ENABLE_URL_TOKENIZER
58     case StringIndexingConfig::TokenizerType::NONE:
59       [[fallthrough]];
60     default:
61       // This should never happen.
62       return absl_ports::InvalidArgumentError(
63           "Invalid tokenizer type for an indexed section");
64   }
65 }
66 
CreateQueryTokenizer(QueryTokenizerType query_tokenizer_type,const LanguageSegmenter * lang_segmenter)67 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>> CreateQueryTokenizer(
68     QueryTokenizerType query_tokenizer_type,
69     const LanguageSegmenter* lang_segmenter) {
70   ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
71 
72   switch (query_tokenizer_type) {
73     case RAW_QUERY:
74       return std::make_unique<RawQueryTokenizer>(lang_segmenter);
75     default:
76       // This should never happen.
77       return absl_ports::InvalidArgumentError(
78           "Invalid tokenizer type for query");
79   }
80 }
81 
82 }  // namespace tokenizer_factory
83 
84 }  // namespace lib
85 }  // namespace icing
86