1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_ 16 #define ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_ 17 18 #include <memory> 19 #include <string_view> 20 #include <utility> 21 22 #include "icing/text_classifier/lib3/utils/base/statusor.h" 23 #include "icing/jni/jni-cache.h" 24 #include "icing/tokenization/language-segmenter.h" 25 26 namespace icing { 27 namespace lib { 28 29 namespace language_segmenter_factory { 30 31 // TODO: b/332382299 - Avoid using default values in the SegmenterOptions 32 // constructor. This can lead to unexpected behavior. 33 struct SegmenterOptions { 34 explicit SegmenterOptions(std::string locale, 35 const JniCache* jni_cache = nullptr, 36 bool enable_icu_segmenter = false) localeSegmenterOptions37 : locale(std::move(locale)), 38 jni_cache(jni_cache), 39 enable_icu_segmenter(enable_icu_segmenter) {} 40 41 std::string locale; 42 43 // Does not hold ownership. 44 const JniCache* jni_cache; 45 46 // Determines whether to use an ICU based language segmenter 47 // in icu-with-reverse-jni-language-segmenter-factory or not. 48 // The default value is false, which means that the fallback option of a 49 // Reverse JNI based language segmenter will be used. 50 // 51 // This variable is a no-op for all other segmenter factories because they 52 // only support one segmenter type. 53 bool enable_icu_segmenter; 54 }; 55 56 // Creates a language segmenter with the given locale. 57 // 58 // Returns: 59 // A LanguageSegmenter on success 60 // INVALID_ARGUMENT_ERROR if locale string is invalid 61 libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create( 62 SegmenterOptions options); 63 64 } // namespace language_segmenter_factory 65 66 } // namespace lib 67 } // namespace icing 68 69 #endif // ICING_TOKENIZATION_LANGUAGE_SEGMENTER_FACTORY_H_ 70