1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * UTF-8 and Unicode string manipulation, plus java/lang/String convenience
19 * functions.
20 *
21 * In most cases we populate the fields in the String object directly,
22 * rather than going through an instance field lookup.
23 */
24 #include "Dalvik.h"
25 #include <stdlib.h>
26
27 /*
28 * Initialize string globals.
29 *
30 * This isn't part of the VM init sequence because it's hard to get the
31 * timing right -- we need it to happen after java/lang/String has been
32 * loaded, but before anybody wants to use a string. It's easiest to
33 * just initialize it on first use.
34 *
35 * In some unusual circumstances (e.g. trying to throw an exception because
36 * String implements java/lang/CharSequence, but CharSequence doesn't exist)
37 * we can try to create an exception string internally before anything has
38 * really tried to use String. In that case we basically self-destruct.
39 *
40 * We're expecting to be essentially single-threaded at this point.
41 * We employ atomics to ensure everything is observed correctly, and also
42 * to guarantee that we do detect a problem if our assumption is wrong.
43 */
stringStartup()44 static bool stringStartup()
45 {
46 if (gDvm.javaLangStringReady < 0) {
47 LOGE("ERROR: reentrant string initialization\n");
48 assert(false);
49 return false;
50 }
51
52 if (android_atomic_acquire_cas(0, -1, &gDvm.javaLangStringReady) != 0) {
53 LOGE("ERROR: initial string-ready state not 0 (%d)\n",
54 gDvm.javaLangStringReady);
55 return false;
56 }
57
58 if (gDvm.classJavaLangString == NULL)
59 gDvm.classJavaLangString =
60 dvmFindSystemClassNoInit("Ljava/lang/String;");
61
62 gDvm.offJavaLangString_value =
63 dvmFindFieldOffset(gDvm.classJavaLangString, "value", "[C");
64 gDvm.offJavaLangString_count =
65 dvmFindFieldOffset(gDvm.classJavaLangString, "count", "I");
66 gDvm.offJavaLangString_offset =
67 dvmFindFieldOffset(gDvm.classJavaLangString, "offset", "I");
68 gDvm.offJavaLangString_hashCode =
69 dvmFindFieldOffset(gDvm.classJavaLangString, "hashCode", "I");
70
71 if (gDvm.offJavaLangString_value < 0 ||
72 gDvm.offJavaLangString_count < 0 ||
73 gDvm.offJavaLangString_offset < 0 ||
74 gDvm.offJavaLangString_hashCode < 0)
75 {
76 LOGE("VM-required field missing from java/lang/String\n");
77 return false;
78 }
79
80 bool badValue = false;
81 if (gDvm.offJavaLangString_value != STRING_FIELDOFF_VALUE) {
82 LOGE("InlineNative: String.value offset = %d, expected %d\n",
83 gDvm.offJavaLangString_value, STRING_FIELDOFF_VALUE);
84 badValue = true;
85 }
86 if (gDvm.offJavaLangString_count != STRING_FIELDOFF_COUNT) {
87 LOGE("InlineNative: String.count offset = %d, expected %d\n",
88 gDvm.offJavaLangString_count, STRING_FIELDOFF_COUNT);
89 badValue = true;
90 }
91 if (gDvm.offJavaLangString_offset != STRING_FIELDOFF_OFFSET) {
92 LOGE("InlineNative: String.offset offset = %d, expected %d\n",
93 gDvm.offJavaLangString_offset, STRING_FIELDOFF_OFFSET);
94 badValue = true;
95 }
96 if (gDvm.offJavaLangString_hashCode != STRING_FIELDOFF_HASHCODE) {
97 LOGE("InlineNative: String.hashCode offset = %d, expected %d\n",
98 gDvm.offJavaLangString_hashCode, STRING_FIELDOFF_HASHCODE);
99 badValue = true;
100 }
101 if (badValue)
102 return false;
103
104 android_atomic_release_store(1, &gDvm.javaLangStringReady);
105
106 return true;
107 }
108
109 /*
110 * Discard heap-allocated storage.
111 */
dvmStringShutdown()112 void dvmStringShutdown()
113 {
114 // currently unused
115 }
116
117 /*
118 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
119 *
120 * This may or may not yield the same results as the java/lang/String
121 * computeHashCode() function. (To make sure this doesn't get abused,
122 * I'm initializing the hash code to 1 so they *don't* match up.)
123 *
124 * It would be more correct to invoke dexGetUtf16FromUtf8() here and compute
125 * the hash with the result. That way, if something encoded the same
126 * character in two different ways, the hash value would be the same. For
127 * our purposes that isn't necessary.
128 */
dvmComputeUtf8Hash(const char * utf8Str)129 u4 dvmComputeUtf8Hash(const char* utf8Str)
130 {
131 u4 hash = 1;
132
133 while (*utf8Str != '\0')
134 hash = hash * 31 + *utf8Str++;
135
136 return hash;
137 }
138
139 /*
140 * Like "strlen", but for strings encoded with "modified" UTF-8.
141 *
142 * The value returned is the number of characters, which may or may not
143 * be the same as the number of bytes.
144 *
145 * (If this needs optimizing, try: mask against 0xa0, shift right 5,
146 * get increment {1-3} from table of 8 values.)
147 */
dvmUtf8Len(const char * utf8Str)148 int dvmUtf8Len(const char* utf8Str)
149 {
150 int ic, len = 0;
151
152 while ((ic = *utf8Str++) != '\0') {
153 len++;
154 if ((ic & 0x80) != 0) {
155 /* two- or three-byte encoding */
156 utf8Str++;
157 if ((ic & 0x20) != 0) {
158 /* three-byte encoding */
159 utf8Str++;
160 }
161 }
162 }
163
164 return len;
165 }
166
167 /*
168 * Convert a "modified" UTF-8 string to UTF-16.
169 */
dvmConvertUtf8ToUtf16(u2 * utf16Str,const char * utf8Str)170 void dvmConvertUtf8ToUtf16(u2* utf16Str, const char* utf8Str)
171 {
172 while (*utf8Str != '\0')
173 *utf16Str++ = dexGetUtf16FromUtf8(&utf8Str);
174 }
175
176 /*
177 * Given a UTF-16 string, compute the length of the corresponding UTF-8
178 * string in bytes.
179 */
utf16_utf8ByteLen(const u2 * utf16Str,int len)180 static int utf16_utf8ByteLen(const u2* utf16Str, int len)
181 {
182 int utf8Len = 0;
183
184 while (len--) {
185 unsigned int uic = *utf16Str++;
186
187 /*
188 * The most common case is (uic > 0 && uic <= 0x7f).
189 */
190 if (uic == 0 || uic > 0x7f) {
191 if (uic > 0x07ff)
192 utf8Len += 3;
193 else /*(uic > 0x7f || uic == 0) */
194 utf8Len += 2;
195 } else
196 utf8Len++;
197 }
198 return utf8Len;
199 }
200
201 /*
202 * Convert a UTF-16 string to UTF-8.
203 *
204 * Make sure you allocate "utf8Str" with the result of utf16_utf8ByteLen(),
205 * not just "len".
206 */
convertUtf16ToUtf8(char * utf8Str,const u2 * utf16Str,int len)207 static void convertUtf16ToUtf8(char* utf8Str, const u2* utf16Str, int len)
208 {
209 assert(len >= 0);
210
211 while (len--) {
212 unsigned int uic = *utf16Str++;
213
214 /*
215 * The most common case is (uic > 0 && uic <= 0x7f).
216 */
217 if (uic == 0 || uic > 0x7f) {
218 if (uic > 0x07ff) {
219 *utf8Str++ = (uic >> 12) | 0xe0;
220 *utf8Str++ = ((uic >> 6) & 0x3f) | 0x80;
221 *utf8Str++ = (uic & 0x3f) | 0x80;
222 } else /*(uic > 0x7f || uic == 0)*/ {
223 *utf8Str++ = (uic >> 6) | 0xc0;
224 *utf8Str++ = (uic & 0x3f) | 0x80;
225 }
226 } else {
227 *utf8Str++ = uic;
228 }
229 }
230
231 *utf8Str = '\0';
232 }
233
234 /*
235 * Use the java/lang/String.computeHashCode() algorithm.
236 */
dvmComputeUtf16Hash(const u2 * utf16Str,int len)237 static inline u4 dvmComputeUtf16Hash(const u2* utf16Str, int len)
238 {
239 u4 hash = 0;
240
241 while (len--)
242 hash = hash * 31 + *utf16Str++;
243
244 return hash;
245 }
dvmComputeStringHash(const StringObject * strObj)246 u4 dvmComputeStringHash(const StringObject* strObj) {
247 ArrayObject* chars = (ArrayObject*) dvmGetFieldObject((Object*) strObj,
248 STRING_FIELDOFF_VALUE);
249 int offset, len;
250
251 len = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_COUNT);
252 offset = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_OFFSET);
253
254 return dvmComputeUtf16Hash((u2*) chars->contents + offset, len);
255 }
256
257 /*
258 * Create a new java/lang/String object, using the string data in "utf8Str".
259 *
260 * The caller must call dvmReleaseTrackedAlloc() on the return value.
261 *
262 * Returns NULL and throws an exception on failure.
263 */
dvmCreateStringFromCstr(const char * utf8Str)264 StringObject* dvmCreateStringFromCstr(const char* utf8Str)
265 {
266 assert(utf8Str != NULL);
267 return dvmCreateStringFromCstrAndLength(utf8Str, dvmUtf8Len(utf8Str));
268 }
269
270 /*
271 * Create a java/lang/String from a C string, given its UTF-16 length
272 * (number of UTF-16 code points).
273 *
274 * The caller must call dvmReleaseTrackedAlloc() on the return value.
275 *
276 * Returns NULL and throws an exception on failure.
277 */
dvmCreateStringFromCstrAndLength(const char * utf8Str,u4 utf16Length)278 StringObject* dvmCreateStringFromCstrAndLength(const char* utf8Str,
279 u4 utf16Length)
280 {
281 StringObject* newObj;
282 ArrayObject* chars;
283 u4 hashCode = 0;
284
285 //LOGV("Creating String from '%s'\n", utf8Str);
286 assert(utf8Str != NULL);
287
288 if (gDvm.javaLangStringReady <= 0) {
289 if (!stringStartup())
290 return NULL;
291 }
292
293 /* init before alloc */
294 if (!dvmIsClassInitialized(gDvm.classJavaLangString) &&
295 !dvmInitClass(gDvm.classJavaLangString))
296 {
297 return NULL;
298 }
299
300 newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString,
301 ALLOC_DEFAULT);
302 if (newObj == NULL)
303 return NULL;
304
305 chars = dvmAllocPrimitiveArray('C', utf16Length, ALLOC_DEFAULT);
306 if (chars == NULL) {
307 dvmReleaseTrackedAlloc((Object*) newObj, NULL);
308 return NULL;
309 }
310 dvmConvertUtf8ToUtf16((u2*)chars->contents, utf8Str);
311 hashCode = dvmComputeUtf16Hash((u2*) chars->contents, utf16Length);
312
313 dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE,
314 (Object*)chars);
315 dvmReleaseTrackedAlloc((Object*) chars, NULL);
316 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, utf16Length);
317 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
318 /* leave offset set to zero */
319
320 /* debugging stuff */
321 //dvmDumpObject((Object*)newObj);
322 //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, utf16Length * 2,
323 // kHexDumpMem);
324
325 /* caller may need to dvmReleaseTrackedAlloc(newObj) */
326 return newObj;
327 }
328
329 /*
330 * Create a new java/lang/String object, using the Unicode data.
331 */
dvmCreateStringFromUnicode(const u2 * unichars,int len)332 StringObject* dvmCreateStringFromUnicode(const u2* unichars, int len)
333 {
334 StringObject* newObj;
335 ArrayObject* chars;
336 u4 hashCode = 0;
337
338 /* we allow a null pointer if the length is zero */
339 assert(len == 0 || unichars != NULL);
340
341 if (gDvm.javaLangStringReady <= 0) {
342 if (!stringStartup())
343 return NULL;
344 }
345
346 /* init before alloc */
347 if (!dvmIsClassInitialized(gDvm.classJavaLangString) &&
348 !dvmInitClass(gDvm.classJavaLangString))
349 {
350 return NULL;
351 }
352
353 newObj = (StringObject*) dvmAllocObject(gDvm.classJavaLangString,
354 ALLOC_DEFAULT);
355 if (newObj == NULL)
356 return NULL;
357
358 chars = dvmAllocPrimitiveArray('C', len, ALLOC_DEFAULT);
359 if (chars == NULL) {
360 dvmReleaseTrackedAlloc((Object*) newObj, NULL);
361 return NULL;
362 }
363 if (len > 0)
364 memcpy(chars->contents, unichars, len * sizeof(u2));
365 hashCode = dvmComputeUtf16Hash((u2*) chars->contents, len);
366
367 dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE,
368 (Object*)chars);
369 dvmReleaseTrackedAlloc((Object*) chars, NULL);
370 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, len);
371 dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
372 /* leave offset set to zero */
373
374 /* debugging stuff */
375 //dvmDumpObject((Object*)newObj);
376 //printHexDumpEx(ANDROID_LOG_DEBUG, chars->contents, len*2, kHexDumpMem);
377
378 /* caller must dvmReleaseTrackedAlloc(newObj) */
379 return newObj;
380 }
381
382 /*
383 * Create a new C string from a java/lang/String object.
384 *
385 * Returns NULL if the object is NULL.
386 */
dvmCreateCstrFromString(StringObject * jstr)387 char* dvmCreateCstrFromString(StringObject* jstr)
388 {
389 char* newStr;
390 ArrayObject* chars;
391 int len, byteLen, offset;
392 const u2* data;
393
394 assert(gDvm.javaLangStringReady > 0);
395
396 if (jstr == NULL)
397 return NULL;
398
399 len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
400 offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
401 chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
402 STRING_FIELDOFF_VALUE);
403 data = (const u2*) chars->contents + offset;
404 assert(offset + len <= (int) chars->length);
405
406 byteLen = utf16_utf8ByteLen(data, len);
407 newStr = (char*) malloc(byteLen+1);
408 if (newStr == NULL)
409 return NULL;
410 convertUtf16ToUtf8(newStr, data, len);
411
412 return newStr;
413 }
414
415 /*
416 * Create a UTF-8 C string from a region of a java/lang/String. (Used by
417 * the JNI GetStringUTFRegion call.)
418 */
dvmCreateCstrFromStringRegion(StringObject * jstr,int start,int len,char * buf)419 void dvmCreateCstrFromStringRegion(StringObject* jstr, int start, int len,
420 char* buf)
421 {
422 const u2* data;
423
424 data = dvmStringChars(jstr) + start;
425 convertUtf16ToUtf8(buf, data, len);
426 }
427
428 /*
429 * Compute the length, in modified UTF-8, of a java/lang/String object.
430 *
431 * Does not include the terminating null byte.
432 */
dvmStringUtf8ByteLen(StringObject * jstr)433 int dvmStringUtf8ByteLen(StringObject* jstr)
434 {
435 ArrayObject* chars;
436 int len, offset;
437 const u2* data;
438
439 assert(gDvm.javaLangStringReady > 0);
440
441 if (jstr == NULL)
442 return 0; // should we throw something? assert?
443
444 len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
445 offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
446 chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
447 STRING_FIELDOFF_VALUE);
448 data = (const u2*) chars->contents + offset;
449 assert(offset + len <= (int) chars->length);
450
451 return utf16_utf8ByteLen(data, len);
452 }
453
454 /*
455 * Get the string's length.
456 */
dvmStringLen(StringObject * jstr)457 int dvmStringLen(StringObject* jstr)
458 {
459 return dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
460 }
461
462 /*
463 * Get the char[] object from the String.
464 */
dvmStringCharArray(StringObject * jstr)465 ArrayObject* dvmStringCharArray(StringObject* jstr)
466 {
467 return (ArrayObject*) dvmGetFieldObject((Object*) jstr,
468 STRING_FIELDOFF_VALUE);
469 }
470
471 /*
472 * Get the string's data.
473 */
dvmStringChars(StringObject * jstr)474 const u2* dvmStringChars(StringObject* jstr)
475 {
476 ArrayObject* chars;
477 int offset;
478
479 offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
480 chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
481 STRING_FIELDOFF_VALUE);
482 return (const u2*) chars->contents + offset;
483 }
484
485
486 /*
487 * Compare two String objects.
488 *
489 * This is a dvmHashTableLookup() callback. The function has already
490 * compared their hash values; we need to do a full compare to ensure
491 * that the strings really match.
492 */
dvmHashcmpStrings(const void * vstrObj1,const void * vstrObj2)493 int dvmHashcmpStrings(const void* vstrObj1, const void* vstrObj2)
494 {
495 const StringObject* strObj1 = (const StringObject*) vstrObj1;
496 const StringObject* strObj2 = (const StringObject*) vstrObj2;
497 ArrayObject* chars1;
498 ArrayObject* chars2;
499 int len1, len2, offset1, offset2;
500
501 assert(gDvm.javaLangStringReady > 0);
502
503 /* get offset and length into char array; all values are in 16-bit units */
504 len1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_COUNT);
505 offset1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_OFFSET);
506 len2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_COUNT);
507 offset2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_OFFSET);
508 if (len1 != len2)
509 return len1 - len2;
510
511 chars1 = (ArrayObject*) dvmGetFieldObject((Object*) strObj1,
512 STRING_FIELDOFF_VALUE);
513 chars2 = (ArrayObject*) dvmGetFieldObject((Object*) strObj2,
514 STRING_FIELDOFF_VALUE);
515
516 /* damage here actually indicates a broken java/lang/String */
517 assert(offset1 + len1 <= (int) chars1->length);
518 assert(offset2 + len2 <= (int) chars2->length);
519
520 return memcmp((const u2*) chars1->contents + offset1,
521 (const u2*) chars2->contents + offset2,
522 len1 * sizeof(u2));
523 }
524