• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Pattern"
18 
19 #include <stdlib.h>
20 
21 #include <nativehelper/JNIHelp.h>
22 #include <nativehelper/JniConstants.h>
23 #include "ScopedJavaUnicodeString.h"
24 #include "jni.h"
25 #include "unicode/parseerr.h"
26 #include "unicode/regex.h"
27 
28 // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexPattern.html
29 
regexDetailMessage(UErrorCode status)30 static const char* regexDetailMessage(UErrorCode status) {
31     // These human-readable error messages were culled from "utypes.h", and then slightly tuned
32     // to make more sense in context.
33     // If we don't have a special-case, we'll just return the textual name of
34     // the enum value (such as U_REGEX_RULE_SYNTAX), which is better than nothing.
35     switch (status) {
36     case U_REGEX_INTERNAL_ERROR: return "An internal error was detected";
37     case U_REGEX_RULE_SYNTAX: return "Syntax error in regexp pattern";
38     case U_REGEX_INVALID_STATE: return "Matcher in invalid state for requested operation";
39     case U_REGEX_BAD_ESCAPE_SEQUENCE: return "Unrecognized backslash escape sequence in pattern";
40     case U_REGEX_PROPERTY_SYNTAX: return "Incorrect Unicode property";
41     case U_REGEX_UNIMPLEMENTED: return "Use of unimplemented feature";
42     case U_REGEX_MISMATCHED_PAREN: return "Incorrectly nested parentheses in regexp pattern";
43     case U_REGEX_NUMBER_TOO_BIG: return "Decimal number is too large";
44     case U_REGEX_BAD_INTERVAL: return "Error in {min,max} interval";
45     case U_REGEX_MAX_LT_MIN: return "In {min,max}, max is less than min";
46     case U_REGEX_INVALID_BACK_REF: return "Back-reference to a non-existent capture group";
47     case U_REGEX_INVALID_FLAG: return "Invalid value for match mode flags";
48     case U_REGEX_LOOK_BEHIND_LIMIT: return "Look-behind pattern matches must have a bounded maximum length";
49     case U_REGEX_SET_CONTAINS_STRING: return "Regular expressions cannot have UnicodeSets containing strings";
50     case U_REGEX_OCTAL_TOO_BIG: return "Octal character constants must be <= 0377.";
51     case U_REGEX_MISSING_CLOSE_BRACKET: return "Missing closing bracket in character class";
52     case U_REGEX_INVALID_RANGE: return "In a character range [x-y], x is greater than y";
53     case U_REGEX_STACK_OVERFLOW: return "Regular expression backtrack stack overflow";
54     case U_REGEX_TIME_OUT: return "Maximum allowed match time exceeded";
55     case U_REGEX_STOPPED_BY_CALLER: return "Matching operation aborted by user callback function";
56     default:
57         return u_errorName(status);
58     }
59 }
60 
throwPatternSyntaxException(JNIEnv * env,UErrorCode status,jstring pattern,UParseError error)61 static void throwPatternSyntaxException(JNIEnv* env, UErrorCode status, jstring pattern, UParseError error) {
62     static jmethodID method = env->GetMethodID(JniConstants::patternSyntaxExceptionClass,
63             "<init>", "(Ljava/lang/String;Ljava/lang/String;I)V");
64     jstring message = env->NewStringUTF(regexDetailMessage(status));
65     jclass exceptionClass = JniConstants::patternSyntaxExceptionClass;
66     jobject exception = env->NewObject(exceptionClass, method, message, pattern, error.offset);
67     env->Throw(reinterpret_cast<jthrowable>(exception));
68 }
69 
Pattern_free(void * addr)70 static void Pattern_free(void* addr) {
71     delete reinterpret_cast<icu::RegexPattern*>(addr);
72 }
73 
Pattern_getNativeFinalizer(JNIEnv *,jclass)74 static jlong Pattern_getNativeFinalizer(JNIEnv*, jclass) {
75     return reinterpret_cast<jlong>(&Pattern_free);
76 }
77 
78 // Return a guess of the amount of native memory to be deallocated by a typical call to
79 // Pattern_free().
Pattern_nativeSize(JNIEnv *,jclass)80 static jint Pattern_nativeSize(JNIEnv*, jclass) {
81     return 500;  // Very rough guess based on a quick look at the implementation.
82 }
83 
Pattern_compileImpl(JNIEnv * env,jclass,jstring javaRegex,jint flags)84 static jlong Pattern_compileImpl(JNIEnv* env, jclass, jstring javaRegex, jint flags) {
85     flags |= UREGEX_ERROR_ON_UNKNOWN_ESCAPES;
86 
87     UErrorCode status = U_ZERO_ERROR;
88     UParseError error;
89     error.offset = -1;
90 
91     ScopedJavaUnicodeString regex(env, javaRegex);
92     if (!regex.valid()) {
93         return 0;
94     }
95     icu::UnicodeString& regexString(regex.unicodeString());
96     icu::RegexPattern* result = icu::RegexPattern::compile(regexString, flags, error, status);
97     if (!U_SUCCESS(status)) {
98         throwPatternSyntaxException(env, status, javaRegex, error);
99     }
100     return static_cast<jlong>(reinterpret_cast<uintptr_t>(result));
101 }
102 
103 static JNINativeMethod gMethods[] = {
104     NATIVE_METHOD(Pattern, compileImpl, "(Ljava/lang/String;I)J"),
105     NATIVE_METHOD(Pattern, getNativeFinalizer, "()J"),
106     NATIVE_METHOD(Pattern, nativeSize, "()I"),
107 };
108 
register_java_util_regex_Pattern(JNIEnv * env)109 void register_java_util_regex_Pattern(JNIEnv* env) {
110     jniRegisterNativeMethods(env, "java/util/regex/Pattern", gMethods, NELEM(gMethods));
111 }
112