• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Matcher"
18 
19 #include <memory>
20 #include <stdlib.h>
21 
22 #include <android-base/logging.h>
23 #include <nativehelper/JNIHelp.h>
24 #include <nativehelper/JniConstants.h>
25 #include <nativehelper/ScopedPrimitiveArray.h>
26 #include <nativehelper/ScopedStringChars.h>
27 
28 #include "IcuUtilities.h"
29 #include "JniException.h"
30 #include "ScopedJavaUnicodeString.h"
31 #include "jni.h"
32 #include "unicode/parseerr.h"
33 #include "unicode/regex.h"
34 
35 // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html
36 
37 /**
38  * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of
39  * the input it's currently operating on in the native heap.
40  *
41  * Rationale: We choose to make a copy here because it turns out to be a lot
42  * cheaper when a moving GC and/or string compression is enabled. This is
43  * because env->GetStringChars() always copies in this scenario. This becomes
44  * especially bad when the String in question is long and/or contains a large
45  * number of matches.
46  *
47  * Drawbacks: The native allocation associated with this class is no longer
48  * fixed size, so we're effectively lying to the NativeAllocationRegistry about
49  * the size of the object(s) we're allocating on the native heap. The peak
50  * memory usage doesn't change though, given that GetStringChars would have
51  * made an allocation of precisely the same size.
52  */
53 class MatcherState {
54 public:
MatcherState(icu::RegexMatcher * matcher)55     MatcherState(icu::RegexMatcher* matcher) :
56         mMatcher(matcher),
57         mUChars(nullptr),
58         mUText(nullptr),
59         mStatus(U_ZERO_ERROR) {
60     }
61 
updateInput(JNIEnv * env,jstring input)62     bool updateInput(JNIEnv* env, jstring input) {
63         // First, close the UText struct, since we're about to allocate a new one.
64         if (mUText != nullptr) {
65             utext_close(mUText);
66             mUText = nullptr;
67         }
68 
69         // Then delete the UChar* associated with the UText struct..
70         mUChars.reset(nullptr);
71 
72         // TODO: We should investigate whether we can avoid an additional copy
73         // in the native heap when is_copy == JNI_TRUE. The problem with doing
74         // that is that we might call ReleaseStringChars with a different
75         // JNIEnv* on a different downcall. This is currently safe as
76         // implemented in ART, but is unlikely to be portable and the spec stays
77         // silent on the matter.
78         ScopedStringChars inputChars(env, input);
79         if (inputChars.get() == nullptr) {
80             // There will be an exception pending if we get here.
81             return false;
82         }
83 
84         // Make a copy of |input| on the native heap. This copy will be live
85         // until the next call to updateInput or close.
86         mUChars.reset(new (std::nothrow) UChar[inputChars.size()]);
87         if (mUChars.get() == nullptr) {
88             env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory");
89             return false;
90         }
91 
92         static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)");
93         memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar));
94 
95         // Reset any errors that might have occurred on previous patches.
96         mStatus = U_ZERO_ERROR;
97         mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus);
98         if (mUText == nullptr) {
99             CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus));
100             return false;
101         }
102 
103         // It is an error for ICU to have returned a non-null mUText but to
104         // still have indicated an error.
105         CHECK(U_SUCCESS(mStatus));
106 
107         mMatcher->reset(mUText);
108         return true;
109     }
110 
~MatcherState()111     ~MatcherState() {
112         if (mUText != nullptr) {
113             utext_close(mUText);
114         }
115     }
116 
matcher()117     icu::RegexMatcher* matcher() {
118         return mMatcher.get();
119     }
120 
status()121     UErrorCode& status() {
122         return mStatus;
123     }
124 
updateOffsets(JNIEnv * env,jintArray javaOffsets)125     void updateOffsets(JNIEnv* env, jintArray javaOffsets) {
126         ScopedIntArrayRW offsets(env, javaOffsets);
127         if (offsets.get() == NULL) {
128             return;
129         }
130 
131         for (size_t i = 0, groupCount = mMatcher->groupCount(); i <= groupCount; ++i) {
132             offsets[2*i + 0] = mMatcher->start(i, mStatus);
133             offsets[2*i + 1] = mMatcher->end(i, mStatus);
134         }
135     }
136 
137 private:
138     std::unique_ptr<icu::RegexMatcher> mMatcher;
139     std::unique_ptr<UChar[]> mUChars;
140     UText* mUText;
141     UErrorCode mStatus;
142 
143     // Disallow copy and assignment.
144     MatcherState(const MatcherState&);
145     void operator=(const MatcherState&);
146 };
147 
toMatcherState(jlong address)148 static inline MatcherState* toMatcherState(jlong address) {
149     return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address));
150 }
151 
Matcher_free(void * address)152 static void Matcher_free(void* address) {
153     MatcherState* state = reinterpret_cast<MatcherState*>(address);
154     delete state;
155 }
156 
Matcher_getNativeFinalizer(JNIEnv *,jclass)157 static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) {
158     return reinterpret_cast<jlong>(&Matcher_free);
159 }
160 
161 // Return a guess of the amount of native memory to be deallocated by a typical call to
162 // Matcher_free().
Matcher_nativeSize(JNIEnv *,jclass)163 static jint Matcher_nativeSize(JNIEnv*, jclass) {
164     return 200;  // Very rough guess based on a quick look at the implementation.
165 }
166 
Matcher_findImpl(JNIEnv * env,jclass,jlong addr,jint startIndex,jintArray offsets)167 static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) {
168     MatcherState* state = toMatcherState(addr);
169     UBool result = state->matcher()->find(startIndex, state->status());
170     if (result) {
171         state->updateOffsets(env, offsets);
172     }
173     return result;
174 }
175 
Matcher_findNextImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)176 static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
177     MatcherState* state = toMatcherState(addr);
178     UBool result = state->matcher()->find();
179     if (result) {
180         state->updateOffsets(env, offsets);
181     }
182     return result;
183 }
184 
Matcher_groupCountImpl(JNIEnv *,jclass,jlong addr)185 static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) {
186     MatcherState* state = toMatcherState(addr);
187     return state->matcher()->groupCount();
188 }
189 
Matcher_hitEndImpl(JNIEnv *,jclass,jlong addr)190 static jint Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) {
191     MatcherState* state = toMatcherState(addr);
192     return state->matcher()->hitEnd();
193 }
194 
Matcher_lookingAtImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)195 static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
196     MatcherState* state = toMatcherState(addr);
197     UBool result = state->matcher()->lookingAt(state->status());
198     if (result) {
199         state->updateOffsets(env, offsets);
200     }
201     return result;
202 }
203 
Matcher_matchesImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)204 static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
205     MatcherState* state = toMatcherState(addr);
206     UBool result = state->matcher()->matches(state->status());
207     if (result) {
208         state->updateOffsets(env, offsets);
209     }
210     return result;
211 }
212 
Matcher_openImpl(JNIEnv * env,jclass,jlong patternAddr)213 static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) {
214     icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
215     UErrorCode status = U_ZERO_ERROR;
216     icu::RegexMatcher* result = pattern->matcher(status);
217     if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) {
218         return 0;
219     }
220 
221     return reinterpret_cast<uintptr_t>(new MatcherState(result));
222 }
223 
Matcher_requireEndImpl(JNIEnv *,jclass,jlong addr)224 static jint Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) {
225     MatcherState* state = toMatcherState(addr);
226     return state->matcher()->requireEnd();
227 }
228 
Matcher_setInputImpl(JNIEnv * env,jclass,jlong addr,jstring javaText,jint start,jint end)229 static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) {
230     MatcherState* state = toMatcherState(addr);
231     if (state->updateInput(env, javaText)) {
232         state->matcher()->region(start, end, state->status());
233     }
234 }
235 
Matcher_useAnchoringBoundsImpl(JNIEnv *,jclass,jlong addr,jboolean value)236 static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
237     MatcherState* state = toMatcherState(addr);
238     state->matcher()->useAnchoringBounds(value);
239 }
240 
Matcher_useTransparentBoundsImpl(JNIEnv *,jclass,jlong addr,jboolean value)241 static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
242     MatcherState* state = toMatcherState(addr);
243     state->matcher()->useTransparentBounds(value);
244 }
245 
Matcher_getMatchedGroupIndex0(JNIEnv * env,jclass,jlong patternAddr,jstring javaGroupName)246 static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) {
247   icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
248   ScopedJavaUnicodeString groupName(env, javaGroupName);
249   UErrorCode status = U_ZERO_ERROR;
250 
251   jint result = pattern->groupNumberFromName(groupName.unicodeString(), status);
252   if (U_SUCCESS(status)) {
253     return result;
254   }
255   if (status == U_REGEX_INVALID_CAPTURE_GROUP_NAME) {
256     return -1;
257   }
258   maybeThrowIcuException(env, "RegexPattern::groupNumberFromName", status);
259   return -1;
260 }
261 
262 
263 static JNINativeMethod gMethods[] = {
264     NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"),
265     NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"),
266     NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"),
267     NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"),
268     NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"),
269     NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"),
270     NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"),
271     NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"),
272     NATIVE_METHOD(Matcher, nativeSize, "()I"),
273     NATIVE_METHOD(Matcher, openImpl, "(J)J"),
274     NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"),
275     NATIVE_METHOD(Matcher, setInputImpl, "(JLjava/lang/String;II)V"),
276     NATIVE_METHOD(Matcher, useAnchoringBoundsImpl, "(JZ)V"),
277     NATIVE_METHOD(Matcher, useTransparentBoundsImpl, "(JZ)V"),
278 };
register_java_util_regex_Matcher(JNIEnv * env)279 void register_java_util_regex_Matcher(JNIEnv* env) {
280     jniRegisterNativeMethods(env, "java/util/regex/Matcher", gMethods, NELEM(gMethods));
281 }
282