• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_
16 #define ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_
17 
18 #include <memory>
19 #include <string_view>
20 #include <utility>
21 
22 #include "icing/text_classifier/lib3/utils/base/statusor.h"
23 #include "icing/jni/jni-cache.h"
24 #include "icing/tokenization/language-segmenter.h"
25 
26 namespace icing {
27 namespace lib {
28 
29 namespace language_segmenter_factory {
30 
31 // TODO: b/332382299 - Avoid using default values in the SegmenterOptions
32 // constructor. This can lead to unexpected behavior.
33 struct SegmenterOptions {
34   explicit SegmenterOptions(std::string locale,
35                             const JniCache* jni_cache = nullptr,
36                             bool enable_icu_segmenter = false)
localeSegmenterOptions37       : locale(std::move(locale)),
38         jni_cache(jni_cache),
39         enable_icu_segmenter(enable_icu_segmenter) {}
40 
41   std::string locale;
42 
43   // Does not hold ownership.
44   const JniCache* jni_cache;
45 
46   // Determines whether to use an ICU based language segmenter
47   // in icu-with-reverse-jni-language-segmenter-factory or not.
48   // The default value is false, which means that the fallback option of a
49   // Reverse JNI based language segmenter will be used.
50   //
51   // This variable is a no-op for all other segmenter factories because they
52   // only support one segmenter type.
53   bool enable_icu_segmenter;
54 };
55 
56 // Creates a language segmenter with the given locale.
57 //
58 // Returns:
59 //   A LanguageSegmenter on success
60 //   INVALID_ARGUMENT_ERROR if locale string is invalid
61 libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
62     SegmenterOptions options);
63 
64 }  // namespace language_segmenter_factory
65 
66 }  // namespace lib
67 }  // namespace icing
68 
69 #endif  // ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_
70