• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
16 #define ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
17 
18 #include <jni.h>
19 
20 #include <queue>
21 #include <string>
22 
23 #include "icing/text_classifier/lib3/utils/java/jni-base.h"
24 #include "icing/jni/jni-cache.h"
25 
26 namespace icing {
27 namespace lib {
28 
29 // A class that handles the cross-JNI interactions with BreakIteratorBatcher and
30 // hides the batching element to provide an interface akin to
31 // java.text.BreakIterator.
32 //
33 // Example:
34 // std::string text = "我每天走路去上班。";
35 // ASSERT_THAT(text, SizeIs(27));
36 // std::unique_ptr<ReverseJniBreakIterator> itr =
37 //     ReverseJniBreakIterator::Create(jni_cache, text, locale);
38 // std::vector<int> nexts;
39 // int next = itr->Next();
40 // while (next != ReverseJniBreakIterator::kDone) {
41 //   nexts.push_back(next);
42 //   next = itr->Next();
43 // }
44 // EXPECT_THAT(nexts, ElementsAre(1, 3, 5, 6, 8));
45 class ReverseJniBreakIterator {
46  public:
47   static constexpr int kDone = -1;
48 
49   // Creates a ReverseJniBreakiterator with the given text and locale.
50   //
51   // Returns:
52   //   A ReverseJniBreakIterator on success
53   //   INVALID_ARGUMENT if jni_cache isn't a valid JniCache pointer
54   //   INTERNAL if unable to create any of the required Java objects
55   static libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>>
56   Create(const JniCache* jni_cache, std::string_view text,
57          std::string_view locale);
58 
59   // Returns the UTF-16 boundary following the current boundary. If the current
60   // boundary is the last text boundary, it returns
61   // ReverseJniBreakIterator::kDONE.
62   //
63   // NOTE: The 'boundary' refers to the UTF-16 boundary - NOT the UTF-8
64   // boundary. Callers interested in the UTF-8 boundary are required to maintain
65   // whatever state is necessary to translate from UTF-16 to UTF-8 boundaries.
66   int Next();
67 
68   // Returns the first UTF-16 boundary. The iterator's current position is set
69   // to the first text boundary and any cached data is cleared.
70   int First();
71 
72   // Returns the position of the first UTF-16 boundary preceding the UTF-16
73   // offset. If there is no boundary preceding the specified offset, then
74   // ReverseJniBreakIterator::kDone is returned.
75   //
76   // The iterator's current position is set to the segment whose boundary was
77   // returned and any cached data is cleared.
78   int Preceding(int offset);
79 
80   // Returns the position of the first UTF-16 boundary following the UTF-16
81   // offset. If there is no boundary following the specified offset, then
82   // ReverseJniBreakIterator::kDone is returned.
83   //
84   // The iterator's current position is set to the segment whose boundary
85   // was returned and any cached data is cleared.
86   int Following(int offset);
87 
88  private:
89   ReverseJniBreakIterator(
90       const JniCache* jni_cache,
91       libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher);
92 
93   // Fetches the results of up to kBatchSize next calls and stores them in
94   // break_indices_cache_. Returns the number of results or kDone if no more
95   // results could be fetched.
96   int FetchNextBatch();
97 
98   // Empties the cache and sets is_done_ and is_almost_done_ to false.
99   void ClearCache();
100 
101   // Keeps track of references to Java classes and methods. Does NOT own.
102   const JniCache* jni_cache_;
103 
104   // The reference to the actual instance of BreakIteratorBatcher that
105   // this class interacts with.
106   libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher_;
107 
108   // The cache holding the most recent batch of return values from
109   // BreakIteratorBatcher#next.
110   std::queue<int> break_indices_cache_;
111 
112   bool is_done_;
113 
114   // The last batch was incomplete (< kBatchSize results were returned). The
115   // next call to BreakIteratorBatcher#next is guaranteed to return an
116   // empty array. Once the results from the last batch are evicted from
117   // break_indices_cache, ReverseJniBreakIterator will transition to is_done_.
118   bool is_almost_done_;
119 };
120 
121 }  // namespace lib
122 }  // namespace icing
123 
124 #endif  // ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
125