1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_ 16 #define ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_ 17 18 #include <jni.h> 19 20 #include <queue> 21 #include <string> 22 23 #include "icing/text_classifier/lib3/utils/java/jni-base.h" 24 #include "icing/jni/jni-cache.h" 25 26 namespace icing { 27 namespace lib { 28 29 // A class that handles the cross-JNI interactions with BreakIteratorBatcher and 30 // hides the batching element to provide an interface akin to 31 // java.text.BreakIterator. 32 // 33 // Example: 34 // std::string text = "我每天走路去上班。"; 35 // ASSERT_THAT(text, SizeIs(27)); 36 // std::unique_ptr<ReverseJniBreakIterator> itr = 37 // ReverseJniBreakIterator::Create(jni_cache, text, locale); 38 // std::vector<int> nexts; 39 // int next = itr->Next(); 40 // while (next != ReverseJniBreakIterator::kDone) { 41 // nexts.push_back(next); 42 // next = itr->Next(); 43 // } 44 // EXPECT_THAT(nexts, ElementsAre(1, 3, 5, 6, 8)); 45 class ReverseJniBreakIterator { 46 public: 47 static constexpr int kDone = -1; 48 49 // Creates a ReverseJniBreakiterator with the given text and locale. 50 // 51 // Returns: 52 // A ReverseJniBreakIterator on success 53 // INVALID_ARGUMENT if jni_cache isn't a valid JniCache pointer 54 // INTERNAL if unable to create any of the required Java objects 55 static libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>> 56 Create(const JniCache* jni_cache, std::string_view text, 57 std::string_view locale); 58 59 // Returns the UTF-16 boundary following the current boundary. If the current 60 // boundary is the last text boundary, it returns 61 // ReverseJniBreakIterator::kDONE. 62 // 63 // NOTE: The 'boundary' refers to the UTF-16 boundary - NOT the UTF-8 64 // boundary. Callers interested in the UTF-8 boundary are required to maintain 65 // whatever state is necessary to translate from UTF-16 to UTF-8 boundaries. 66 int Next(); 67 68 // Returns the first UTF-16 boundary. The iterator's current position is set 69 // to the first text boundary and any cached data is cleared. 70 int First(); 71 72 // Returns the position of the first UTF-16 boundary preceding the UTF-16 73 // offset. If there is no boundary preceding the specified offset, then 74 // ReverseJniBreakIterator::kDone is returned. 75 // 76 // The iterator's current position is set to the segment whose boundary was 77 // returned and any cached data is cleared. 78 int Preceding(int offset); 79 80 // Returns the position of the first UTF-16 boundary following the UTF-16 81 // offset. If there is no boundary following the specified offset, then 82 // ReverseJniBreakIterator::kDone is returned. 83 // 84 // The iterator's current position is set to the segment whose boundary 85 // was returned and any cached data is cleared. 86 int Following(int offset); 87 88 private: 89 ReverseJniBreakIterator( 90 const JniCache* jni_cache, 91 libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher); 92 93 // Fetches the results of up to kBatchSize next calls and stores them in 94 // break_indices_cache_. Returns the number of results or kDone if no more 95 // results could be fetched. 96 int FetchNextBatch(); 97 98 // Empties the cache and sets is_done_ and is_almost_done_ to false. 99 void ClearCache(); 100 101 // Keeps track of references to Java classes and methods. Does NOT own. 102 const JniCache* jni_cache_; 103 104 // The reference to the actual instance of BreakIteratorBatcher that 105 // this class interacts with. 106 libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher_; 107 108 // The cache holding the most recent batch of return values from 109 // BreakIteratorBatcher#next. 110 std::queue<int> break_indices_cache_; 111 112 bool is_done_; 113 114 // The last batch was incomplete (< kBatchSize results were returned). The 115 // next call to BreakIteratorBatcher#next is guaranteed to return an 116 // empty array. Once the results from the last batch are evicted from 117 // break_indices_cache, ReverseJniBreakIterator will transition to is_done_. 118 bool is_almost_done_; 119 }; 120 121 } // namespace lib 122 } // namespace icing 123 124 #endif // ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_ 125