• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "String"
18 
19 #include "JNIHelp.h"
20 #include "JniConstants.h"
21 #include "ScopedPrimitiveArray.h"
22 #include "jni.h"
23 #include "unicode/utf16.h"
24 
25 #include <string.h>
26 
27 /**
28  * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
29  * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
30  * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
31  *
32  * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
33  * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
34  * to the garbage collector (nor hide potentially large allocations from it).
35  *
36  * Because a call to append might require an allocation, it might fail. Callers should always
37  * check the return value of append.
38  */
39 class NativeUnsafeByteSequence {
40 public:
NativeUnsafeByteSequence(JNIEnv * env)41     NativeUnsafeByteSequence(JNIEnv* env)
42         : mEnv(env), mJavaArray(NULL), mRawArray(NULL), mSize(-1), mOffset(0)
43     {
44     }
45 
~NativeUnsafeByteSequence()46     ~NativeUnsafeByteSequence() {
47         // Release our pointer to the raw array, copying changes back to the Java heap.
48         if (mRawArray != NULL) {
49             mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
50         }
51     }
52 
append(jbyte b)53     bool append(jbyte b) {
54         if (mOffset == mSize && !resize(mSize * 2)) {
55             return false;
56         }
57         mRawArray[mOffset++] = b;
58         return true;
59     }
60 
resize(int newSize)61     bool resize(int newSize) {
62         if (newSize == mSize) {
63             return true;
64         }
65 
66         // Allocate a new array.
67         jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
68         if (newJavaArray == NULL) {
69             return false;
70         }
71         jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, NULL);
72         if (newRawArray == NULL) {
73             return false;
74         }
75 
76         // Copy data out of the old array and then let go of it.
77         // Note that we may be trimming the array.
78         if (mRawArray != NULL) {
79             memcpy(newRawArray, mRawArray, mOffset);
80             mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
81             mEnv->DeleteLocalRef(mJavaArray);
82         }
83 
84         // Point ourselves at the new array.
85         mJavaArray = newJavaArray;
86         mRawArray = newRawArray;
87         mSize = newSize;
88         return true;
89     }
90 
toByteArray()91     jbyteArray toByteArray() {
92         // Trim any unused space, if necessary.
93         bool okay = resize(mOffset);
94         return okay ? mJavaArray : NULL;
95     }
96 
97 private:
98     JNIEnv* mEnv;
99     jbyteArray mJavaArray;
100     jbyte* mRawArray;
101     jint mSize;
102     jint mOffset;
103 
104     // Disallow copy and assignment.
105     NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
106     void operator=(const NativeUnsafeByteSequence&);
107 };
108 
Charsets_asciiBytesToChars(JNIEnv * env,jclass,jbyteArray javaBytes,jint offset,jint length,jcharArray javaChars)109 static void Charsets_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
110     ScopedByteArrayRO bytes(env, javaBytes);
111     if (bytes.get() == NULL) {
112         return;
113     }
114     ScopedCharArrayRW chars(env, javaChars);
115     if (chars.get() == NULL) {
116         return;
117     }
118 
119     const jbyte* src = &bytes[offset];
120     jchar* dst = &chars[0];
121     static const jchar REPLACEMENT_CHAR = 0xfffd;
122     for (int i = length - 1; i >= 0; --i) {
123         jchar ch = static_cast<jchar>(*src++ & 0xff);
124         *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
125     }
126 }
127 
Charsets_isoLatin1BytesToChars(JNIEnv * env,jclass,jbyteArray javaBytes,jint offset,jint length,jcharArray javaChars)128 static void Charsets_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
129     ScopedByteArrayRO bytes(env, javaBytes);
130     if (bytes.get() == NULL) {
131         return;
132     }
133     ScopedCharArrayRW chars(env, javaChars);
134     if (chars.get() == NULL) {
135         return;
136     }
137 
138     const jbyte* src = &bytes[offset];
139     jchar* dst = &chars[0];
140     for (int i = length - 1; i >= 0; --i) {
141         *dst++ = static_cast<jchar>(*src++ & 0xff);
142     }
143 }
144 
145 /**
146  * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
147  * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
148  * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
149  */
charsToBytes(JNIEnv * env,jcharArray javaChars,jint offset,jint length,jchar maxValidChar)150 static jbyteArray charsToBytes(JNIEnv* env, jcharArray javaChars, jint offset, jint length, jchar maxValidChar) {
151     ScopedCharArrayRO chars(env, javaChars);
152     if (chars.get() == NULL) {
153         return NULL;
154     }
155 
156     jbyteArray javaBytes = env->NewByteArray(length);
157     ScopedByteArrayRW bytes(env, javaBytes);
158     if (bytes.get() == NULL) {
159         return NULL;
160     }
161 
162     const jchar* src = &chars[offset];
163     jbyte* dst = &bytes[0];
164     for (int i = length - 1; i >= 0; --i) {
165         jchar ch = *src++;
166         if (ch > maxValidChar) {
167             ch = '?';
168         }
169         *dst++ = static_cast<jbyte>(ch);
170     }
171 
172     return javaBytes;
173 }
174 
Charsets_toAsciiBytes(JNIEnv * env,jclass,jcharArray javaChars,jint offset,jint length)175 static jbyteArray Charsets_toAsciiBytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
176     return charsToBytes(env, javaChars, offset, length, 0x7f);
177 }
178 
Charsets_toIsoLatin1Bytes(JNIEnv * env,jclass,jcharArray javaChars,jint offset,jint length)179 static jbyteArray Charsets_toIsoLatin1Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
180     return charsToBytes(env, javaChars, offset, length, 0xff);
181 }
182 
Charsets_toUtf8Bytes(JNIEnv * env,jclass,jcharArray javaChars,jint offset,jint length)183 static jbyteArray Charsets_toUtf8Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
184     ScopedCharArrayRO chars(env, javaChars);
185     if (chars.get() == NULL) {
186         return NULL;
187     }
188 
189     NativeUnsafeByteSequence out(env);
190     if (!out.resize(length)) {
191         return NULL;
192     }
193 
194     const int end = offset + length;
195     for (int i = offset; i < end; ++i) {
196         jint ch = chars[i];
197         if (ch < 0x80) {
198             // One byte.
199             if (!out.append(ch)) {
200                 return NULL;
201             }
202         } else if (ch < 0x800) {
203             // Two bytes.
204             if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
205                 return NULL;
206             }
207         } else if (U16_IS_SURROGATE(ch)) {
208             // A supplementary character.
209             jchar high = (jchar) ch;
210             jchar low = (i + 1 != end) ? chars[i + 1] : 0;
211             if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
212                 if (!out.append('?')) {
213                     return NULL;
214                 }
215                 continue;
216             }
217             // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
218             ++i;
219             ch = U16_GET_SUPPLEMENTARY(high, low);
220             // Four bytes.
221             jbyte b1 = (ch >> 18) | 0xf0;
222             jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
223             jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
224             jbyte b4 = (ch & 0x3f) | 0x80;
225             if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
226                 return NULL;
227             }
228         } else {
229             // Three bytes.
230             jbyte b1 = (ch >> 12) | 0xe0;
231             jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
232             jbyte b3 = (ch & 0x3f) | 0x80;
233             if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
234                 return NULL;
235             }
236         }
237     }
238     return out.toByteArray();
239 }
240 
241 static JNINativeMethod gMethods[] = {
242     NATIVE_METHOD(Charsets, asciiBytesToChars, "([BII[C)V"),
243     NATIVE_METHOD(Charsets, isoLatin1BytesToChars, "([BII[C)V"),
244     NATIVE_METHOD(Charsets, toAsciiBytes, "([CII)[B"),
245     NATIVE_METHOD(Charsets, toIsoLatin1Bytes, "([CII)[B"),
246     NATIVE_METHOD(Charsets, toUtf8Bytes, "([CII)[B"),
247 };
register_java_nio_charset_Charsets(JNIEnv * env)248 void register_java_nio_charset_Charsets(JNIEnv* env) {
249     jniRegisterNativeMethods(env, "java/nio/charset/Charsets", gMethods, NELEM(gMethods));
250 }
251