1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Matcher"
18
19 #include <memory>
20 #include <stdlib.h>
21
22 #include <android-base/logging.h>
23 #include <nativehelper/JNIHelp.h>
24 #include <nativehelper/JniConstants.h>
25 #include <nativehelper/ScopedPrimitiveArray.h>
26 #include <nativehelper/ScopedStringChars.h>
27
28 #include "IcuUtilities.h"
29 #include "JniException.h"
30 #include "ScopedJavaUnicodeString.h"
31 #include "jni.h"
32 #include "unicode/parseerr.h"
33 #include "unicode/regex.h"
34
35 // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html
36
37 /**
38 * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of
39 * the input it's currently operating on in the native heap.
40 *
41 * Rationale: We choose to make a copy here because it turns out to be a lot
42 * cheaper when a moving GC and/or string compression is enabled. This is
43 * because env->GetStringChars() always copies in this scenario. This becomes
44 * especially bad when the String in question is long and/or contains a large
45 * number of matches.
46 *
47 * Drawbacks: The native allocation associated with this class is no longer
48 * fixed size, so we're effectively lying to the NativeAllocationRegistry about
49 * the size of the object(s) we're allocating on the native heap. The peak
50 * memory usage doesn't change though, given that GetStringChars would have
51 * made an allocation of precisely the same size.
52 */
53 class MatcherState {
54 public:
MatcherState(icu::RegexMatcher * matcher)55 MatcherState(icu::RegexMatcher* matcher) :
56 mMatcher(matcher),
57 mUChars(nullptr),
58 mUText(nullptr),
59 mStatus(U_ZERO_ERROR) {
60 }
61
updateInput(JNIEnv * env,jstring input)62 bool updateInput(JNIEnv* env, jstring input) {
63 // First, close the UText struct, since we're about to allocate a new one.
64 if (mUText != nullptr) {
65 utext_close(mUText);
66 mUText = nullptr;
67 }
68
69 // Then delete the UChar* associated with the UText struct..
70 mUChars.reset(nullptr);
71
72 // TODO: We should investigate whether we can avoid an additional copy
73 // in the native heap when is_copy == JNI_TRUE. The problem with doing
74 // that is that we might call ReleaseStringChars with a different
75 // JNIEnv* on a different downcall. This is currently safe as
76 // implemented in ART, but is unlikely to be portable and the spec stays
77 // silent on the matter.
78 ScopedStringChars inputChars(env, input);
79 if (inputChars.get() == nullptr) {
80 // There will be an exception pending if we get here.
81 return false;
82 }
83
84 // Make a copy of |input| on the native heap. This copy will be live
85 // until the next call to updateInput or close.
86 mUChars.reset(new (std::nothrow) UChar[inputChars.size()]);
87 if (mUChars.get() == nullptr) {
88 env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory");
89 return false;
90 }
91
92 static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)");
93 memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar));
94
95 // Reset any errors that might have occurred on previous patches.
96 mStatus = U_ZERO_ERROR;
97 mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus);
98 if (mUText == nullptr) {
99 CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus));
100 return false;
101 }
102
103 // It is an error for ICU to have returned a non-null mUText but to
104 // still have indicated an error.
105 CHECK(U_SUCCESS(mStatus));
106
107 mMatcher->reset(mUText);
108 return true;
109 }
110
~MatcherState()111 ~MatcherState() {
112 if (mUText != nullptr) {
113 utext_close(mUText);
114 }
115 }
116
matcher()117 icu::RegexMatcher* matcher() {
118 return mMatcher.get();
119 }
120
status()121 UErrorCode& status() {
122 return mStatus;
123 }
124
updateOffsets(JNIEnv * env,jintArray javaOffsets)125 void updateOffsets(JNIEnv* env, jintArray javaOffsets) {
126 ScopedIntArrayRW offsets(env, javaOffsets);
127 if (offsets.get() == NULL) {
128 return;
129 }
130
131 for (size_t i = 0, groupCount = mMatcher->groupCount(); i <= groupCount; ++i) {
132 offsets[2*i + 0] = mMatcher->start(i, mStatus);
133 offsets[2*i + 1] = mMatcher->end(i, mStatus);
134 }
135 }
136
137 private:
138 std::unique_ptr<icu::RegexMatcher> mMatcher;
139 std::unique_ptr<UChar[]> mUChars;
140 UText* mUText;
141 UErrorCode mStatus;
142
143 // Disallow copy and assignment.
144 MatcherState(const MatcherState&);
145 void operator=(const MatcherState&);
146 };
147
toMatcherState(jlong address)148 static inline MatcherState* toMatcherState(jlong address) {
149 return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address));
150 }
151
Matcher_free(void * address)152 static void Matcher_free(void* address) {
153 MatcherState* state = reinterpret_cast<MatcherState*>(address);
154 delete state;
155 }
156
Matcher_getNativeFinalizer(JNIEnv *,jclass)157 static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) {
158 return reinterpret_cast<jlong>(&Matcher_free);
159 }
160
161 // Return a guess of the amount of native memory to be deallocated by a typical call to
162 // Matcher_free().
Matcher_nativeSize(JNIEnv *,jclass)163 static jint Matcher_nativeSize(JNIEnv*, jclass) {
164 return 200; // Very rough guess based on a quick look at the implementation.
165 }
166
Matcher_findImpl(JNIEnv * env,jclass,jlong addr,jint startIndex,jintArray offsets)167 static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) {
168 MatcherState* state = toMatcherState(addr);
169 UBool result = state->matcher()->find(startIndex, state->status());
170 if (result) {
171 state->updateOffsets(env, offsets);
172 }
173 return result;
174 }
175
Matcher_findNextImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)176 static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
177 MatcherState* state = toMatcherState(addr);
178 UBool result = state->matcher()->find();
179 if (result) {
180 state->updateOffsets(env, offsets);
181 }
182 return result;
183 }
184
Matcher_groupCountImpl(JNIEnv *,jclass,jlong addr)185 static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) {
186 MatcherState* state = toMatcherState(addr);
187 return state->matcher()->groupCount();
188 }
189
Matcher_hitEndImpl(JNIEnv *,jclass,jlong addr)190 static jint Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) {
191 MatcherState* state = toMatcherState(addr);
192 return state->matcher()->hitEnd();
193 }
194
Matcher_lookingAtImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)195 static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
196 MatcherState* state = toMatcherState(addr);
197 UBool result = state->matcher()->lookingAt(state->status());
198 if (result) {
199 state->updateOffsets(env, offsets);
200 }
201 return result;
202 }
203
Matcher_matchesImpl(JNIEnv * env,jclass,jlong addr,jintArray offsets)204 static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
205 MatcherState* state = toMatcherState(addr);
206 UBool result = state->matcher()->matches(state->status());
207 if (result) {
208 state->updateOffsets(env, offsets);
209 }
210 return result;
211 }
212
Matcher_openImpl(JNIEnv * env,jclass,jlong patternAddr)213 static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) {
214 icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
215 UErrorCode status = U_ZERO_ERROR;
216 icu::RegexMatcher* result = pattern->matcher(status);
217 if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) {
218 return 0;
219 }
220
221 return reinterpret_cast<uintptr_t>(new MatcherState(result));
222 }
223
Matcher_requireEndImpl(JNIEnv *,jclass,jlong addr)224 static jint Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) {
225 MatcherState* state = toMatcherState(addr);
226 return state->matcher()->requireEnd();
227 }
228
Matcher_setInputImpl(JNIEnv * env,jclass,jlong addr,jstring javaText,jint start,jint end)229 static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) {
230 MatcherState* state = toMatcherState(addr);
231 if (state->updateInput(env, javaText)) {
232 state->matcher()->region(start, end, state->status());
233 }
234 }
235
Matcher_useAnchoringBoundsImpl(JNIEnv *,jclass,jlong addr,jboolean value)236 static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
237 MatcherState* state = toMatcherState(addr);
238 state->matcher()->useAnchoringBounds(value);
239 }
240
Matcher_useTransparentBoundsImpl(JNIEnv *,jclass,jlong addr,jboolean value)241 static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
242 MatcherState* state = toMatcherState(addr);
243 state->matcher()->useTransparentBounds(value);
244 }
245
Matcher_getMatchedGroupIndex0(JNIEnv * env,jclass,jlong patternAddr,jstring javaGroupName)246 static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) {
247 icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
248 ScopedJavaUnicodeString groupName(env, javaGroupName);
249 UErrorCode status = U_ZERO_ERROR;
250
251 jint result = pattern->groupNumberFromName(groupName.unicodeString(), status);
252 if (U_SUCCESS(status)) {
253 return result;
254 }
255 if (status == U_REGEX_INVALID_CAPTURE_GROUP_NAME) {
256 return -1;
257 }
258 maybeThrowIcuException(env, "RegexPattern::groupNumberFromName", status);
259 return -1;
260 }
261
262
263 static JNINativeMethod gMethods[] = {
264 NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"),
265 NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"),
266 NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"),
267 NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"),
268 NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"),
269 NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"),
270 NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"),
271 NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"),
272 NATIVE_METHOD(Matcher, nativeSize, "()I"),
273 NATIVE_METHOD(Matcher, openImpl, "(J)J"),
274 NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"),
275 NATIVE_METHOD(Matcher, setInputImpl, "(JLjava/lang/String;II)V"),
276 NATIVE_METHOD(Matcher, useAnchoringBoundsImpl, "(JZ)V"),
277 NATIVE_METHOD(Matcher, useTransparentBoundsImpl, "(JZ)V"),
278 };
register_java_util_regex_Matcher(JNIEnv * env)279 void register_java_util_regex_Matcher(JNIEnv* env) {
280 jniRegisterNativeMethods(env, "java/util/regex/Matcher", gMethods, NELEM(gMethods));
281 }
282